bio 0.7.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. data/bin/bioruby +71 -27
  2. data/bin/br_biofetch.rb +5 -17
  3. data/bin/br_bioflat.rb +14 -26
  4. data/bin/br_biogetseq.rb +6 -18
  5. data/bin/br_pmfetch.rb +6 -16
  6. data/doc/Changes-0.7.rd +35 -0
  7. data/doc/KEGG_API.rd +287 -172
  8. data/doc/KEGG_API.rd.ja +273 -160
  9. data/doc/Tutorial.rd +18 -9
  10. data/doc/Tutorial.rd.ja +656 -138
  11. data/lib/bio.rb +6 -24
  12. data/lib/bio/alignment.rb +5 -5
  13. data/lib/bio/appl/blast.rb +132 -98
  14. data/lib/bio/appl/blast/format0.rb +9 -19
  15. data/lib/bio/appl/blast/wublast.rb +5 -18
  16. data/lib/bio/appl/emboss.rb +40 -47
  17. data/lib/bio/appl/hmmer.rb +116 -82
  18. data/lib/bio/appl/hmmer/report.rb +509 -364
  19. data/lib/bio/appl/spidey/report.rb +7 -18
  20. data/lib/bio/data/na.rb +3 -21
  21. data/lib/bio/db.rb +3 -21
  22. data/lib/bio/db/aaindex.rb +147 -52
  23. data/lib/bio/db/embl/common.rb +27 -6
  24. data/lib/bio/db/embl/embl.rb +18 -10
  25. data/lib/bio/db/embl/sptr.rb +87 -67
  26. data/lib/bio/db/embl/swissprot.rb +32 -3
  27. data/lib/bio/db/embl/trembl.rb +32 -3
  28. data/lib/bio/db/embl/uniprot.rb +32 -3
  29. data/lib/bio/db/fasta.rb +327 -289
  30. data/lib/bio/db/medline.rb +25 -4
  31. data/lib/bio/db/nbrf.rb +12 -20
  32. data/lib/bio/db/pdb.rb +4 -1
  33. data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
  34. data/lib/bio/db/pdb/pdb.rb +13 -8
  35. data/lib/bio/db/rebase.rb +93 -97
  36. data/lib/bio/feature.rb +2 -31
  37. data/lib/bio/io/ddbjxml.rb +167 -139
  38. data/lib/bio/io/fastacmd.rb +89 -56
  39. data/lib/bio/io/flatfile.rb +994 -278
  40. data/lib/bio/io/flatfile/index.rb +257 -194
  41. data/lib/bio/io/flatfile/indexer.rb +37 -29
  42. data/lib/bio/reference.rb +147 -64
  43. data/lib/bio/sequence.rb +57 -417
  44. data/lib/bio/sequence/aa.rb +64 -0
  45. data/lib/bio/sequence/common.rb +175 -0
  46. data/lib/bio/sequence/compat.rb +68 -0
  47. data/lib/bio/sequence/format.rb +134 -0
  48. data/lib/bio/sequence/generic.rb +24 -0
  49. data/lib/bio/sequence/na.rb +189 -0
  50. data/lib/bio/shell.rb +9 -23
  51. data/lib/bio/shell/core.rb +130 -125
  52. data/lib/bio/shell/demo.rb +143 -0
  53. data/lib/bio/shell/{session.rb → interface.rb} +42 -40
  54. data/lib/bio/shell/object.rb +52 -0
  55. data/lib/bio/shell/plugin/codon.rb +4 -22
  56. data/lib/bio/shell/plugin/emboss.rb +23 -0
  57. data/lib/bio/shell/plugin/entry.rb +34 -25
  58. data/lib/bio/shell/plugin/flatfile.rb +5 -23
  59. data/lib/bio/shell/plugin/keggapi.rb +11 -24
  60. data/lib/bio/shell/plugin/midi.rb +5 -23
  61. data/lib/bio/shell/plugin/obda.rb +4 -22
  62. data/lib/bio/shell/plugin/seq.rb +6 -24
  63. data/lib/bio/shell/rails/Rakefile +10 -0
  64. data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
  65. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
  66. data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
  67. data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
  68. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
  69. data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
  70. data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
  71. data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
  72. data/lib/bio/shell/rails/config/boot.rb +19 -0
  73. data/lib/bio/shell/rails/config/database.yml +85 -0
  74. data/lib/bio/shell/rails/config/environment.rb +53 -0
  75. data/lib/bio/shell/rails/config/environments/development.rb +19 -0
  76. data/lib/bio/shell/rails/config/environments/production.rb +19 -0
  77. data/lib/bio/shell/rails/config/environments/test.rb +19 -0
  78. data/lib/bio/shell/rails/config/routes.rb +19 -0
  79. data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
  80. data/lib/bio/shell/rails/public/404.html +8 -0
  81. data/lib/bio/shell/rails/public/500.html +8 -0
  82. data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
  83. data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
  84. data/lib/bio/shell/rails/public/dispatch.rb +10 -0
  85. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  86. data/lib/bio/shell/rails/public/images/icon.png +0 -0
  87. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  88. data/lib/bio/shell/rails/public/index.html +277 -0
  89. data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
  90. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
  91. data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
  92. data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
  93. data/lib/bio/shell/rails/public/robots.txt +1 -0
  94. data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
  95. data/lib/bio/shell/rails/script/about +3 -0
  96. data/lib/bio/shell/rails/script/breakpointer +3 -0
  97. data/lib/bio/shell/rails/script/console +3 -0
  98. data/lib/bio/shell/rails/script/destroy +3 -0
  99. data/lib/bio/shell/rails/script/generate +3 -0
  100. data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
  101. data/lib/bio/shell/rails/script/performance/profiler +3 -0
  102. data/lib/bio/shell/rails/script/plugin +3 -0
  103. data/lib/bio/shell/rails/script/process/reaper +3 -0
  104. data/lib/bio/shell/rails/script/process/spawner +3 -0
  105. data/lib/bio/shell/rails/script/process/spinner +3 -0
  106. data/lib/bio/shell/rails/script/runner +3 -0
  107. data/lib/bio/shell/rails/script/server +42 -0
  108. data/lib/bio/shell/rails/test/test_helper.rb +28 -0
  109. data/lib/bio/shell/web.rb +90 -0
  110. data/lib/bio/util/contingency_table.rb +231 -225
  111. data/sample/any2fasta.rb +59 -0
  112. data/test/data/HMMER/hmmpfam.out +64 -0
  113. data/test/data/HMMER/hmmsearch.out +88 -0
  114. data/test/data/aaindex/DAYM780301 +30 -0
  115. data/test/data/aaindex/PRAM900102 +20 -0
  116. data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
  117. data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
  118. data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
  119. data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
  120. data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
  121. data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
  122. data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
  123. data/test/unit/bio/appl/blast/test_report.rb +15 -12
  124. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
  125. data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
  126. data/test/unit/bio/appl/test_blast.rb +5 -5
  127. data/test/unit/bio/data/test_na.rb +9 -18
  128. data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
  129. data/test/unit/bio/db/test_aaindex.rb +197 -0
  130. data/test/unit/bio/io/test_fastacmd.rb +55 -0
  131. data/test/unit/bio/sequence/test_aa.rb +102 -0
  132. data/test/unit/bio/sequence/test_common.rb +178 -0
  133. data/test/unit/bio/sequence/test_compat.rb +82 -0
  134. data/test/unit/bio/sequence/test_na.rb +242 -0
  135. data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
  136. data/test/unit/bio/test_alignment.rb +15 -7
  137. data/test/unit/bio/test_reference.rb +198 -0
  138. data/test/unit/bio/test_sequence.rb +4 -49
  139. data/test/unit/bio/test_shell.rb +2 -2
  140. metadata +118 -15
  141. data/lib/bio/io/brdb.rb +0 -103
  142. data/lib/bioruby.rb +0 -34
@@ -0,0 +1,64 @@
1
+ #
2
+ # = bio/sequence/aa.rb - amino acid sequence class
3
+ #
4
+ # Copyright:: Copyright (C) 2006
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: Ruby's
7
+ #
8
+ # $Id: aa.rb,v 1.2 2006/02/06 14:11:31 k Exp $
9
+ #
10
+
11
+ require 'bio/sequence/common'
12
+
13
+ module Bio
14
+
15
+ autoload :AminoAcid, 'bio/data/aa'
16
+
17
+ class Sequence
18
+
19
+
20
+ # Amino Acid sequence
21
+ class AA < String
22
+
23
+ include Bio::Sequence::Common
24
+
25
+ # Generate a amino acid sequence object from a string.
26
+ def initialize(str)
27
+ super
28
+ self.upcase!
29
+ self.tr!(" \t\n\r",'')
30
+ end
31
+
32
+
33
+ # Estimate the weight of this protein.
34
+ def molecular_weight
35
+ Bio::AminoAcid.weight(self)
36
+ end
37
+
38
+ def to_re
39
+ Bio::AminoAcid.to_re(self)
40
+ end
41
+
42
+ # Generate the list of the names of the each residue along with the
43
+ # sequence (3 letters code).
44
+ def codes
45
+ array = []
46
+ self.each_byte do |x|
47
+ array.push(Bio::AminoAcid.names[x.chr])
48
+ end
49
+ return array
50
+ end
51
+
52
+ # Similar to codes but returns long names.
53
+ def names
54
+ self.codes.map do |x|
55
+ Bio::AminoAcid.names[x]
56
+ end
57
+ end
58
+
59
+ end # AA
60
+
61
+ end # Sequence
62
+
63
+ end # Bio
64
+
@@ -0,0 +1,175 @@
1
+ #
2
+ # = bio/sequence/common.rb - common methods for biological sequence
3
+ #
4
+ # Copyright:: Copyright (C) 2006
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: Ruby's
7
+ #
8
+ # $Id: common.rb,v 1.2 2006/02/06 14:16:17 k Exp $
9
+ #
10
+
11
+ module Bio
12
+
13
+ autoload :Locations, 'bio/location'
14
+
15
+ class Sequence
16
+
17
+ # This module provides common methods for biological sequence classes
18
+ # which must inherit String.
19
+ module Common
20
+
21
+ def to_s
22
+ String.new(self)
23
+ end
24
+ alias to_str to_s
25
+
26
+ # Force self to re-initialize for clean up (remove white spaces,
27
+ # case unification).
28
+ def seq
29
+ self.class.new(self)
30
+ end
31
+
32
+ # Similar to the 'seq' method, but changes the self object destructively.
33
+ def normalize!
34
+ initialize(self)
35
+ self
36
+ end
37
+ alias seq! normalize!
38
+
39
+ def <<(*arg)
40
+ super(self.class.new(*arg))
41
+ end
42
+ alias concat <<
43
+
44
+ def +(*arg)
45
+ self.class.new(super(*arg))
46
+ end
47
+
48
+ # Returns the subsequence of the self string.
49
+ def subseq(s = 1, e = self.length)
50
+ raise "Error: start/end position must be a positive integer" unless s > 0 and e > 0
51
+ s -= 1
52
+ e -= 1
53
+ self[s..e]
54
+ end
55
+
56
+ # This method iterates on sub string with specified length 'window_size'.
57
+ # By specifing 'step_size', codon sized shifting or spliting genome
58
+ # sequence with ovelapping each end can easily be yielded.
59
+ #
60
+ # The remainder sequence at the terminal end will be returned.
61
+ #
62
+ # Example:
63
+ # # prints average GC% on each 100bp
64
+ # seq.window_search(100) do |subseq|
65
+ # puts subseq.gc
66
+ # end
67
+ # # prints every translated peptide (length 5aa) in the same frame
68
+ # seq.window_search(15, 3) do |subseq|
69
+ # puts subseq.translate
70
+ # end
71
+ # # split genome sequence by 10000bp with 1000bp overlap in fasta format
72
+ # i = 1
73
+ # remainder = seq.window_search(10000, 9000) do |subseq|
74
+ # puts subseq.to_fasta("segment #{i}", 60)
75
+ # i += 1
76
+ # end
77
+ # puts remainder.to_fasta("segment #{i}", 60)
78
+ #
79
+ def window_search(window_size, step_size = 1)
80
+ i = 0
81
+ 0.step(self.length - window_size, step_size) do |i|
82
+ yield self[i, window_size]
83
+ end
84
+ return self[i + window_size .. -1]
85
+ end
86
+
87
+ # This method receive a hash of residues/bases to the particular values,
88
+ # and sum up the value along with the self sequence. Especially useful
89
+ # to use with the window_search method and amino acid indices etc.
90
+ def total(hash)
91
+ hash.default = 0.0 unless hash.default
92
+ sum = 0.0
93
+ self.each_byte do |x|
94
+ begin
95
+ sum += hash[x.chr]
96
+ end
97
+ end
98
+ return sum
99
+ end
100
+
101
+ # Returns a hash of the occurrence counts for each residue or base.
102
+ def composition
103
+ count = Hash.new(0)
104
+ self.scan(/./) do |x|
105
+ count[x] += 1
106
+ end
107
+ return count
108
+ end
109
+
110
+ # Returns a randomized sequence keeping its composition by default.
111
+ # The argument is required when generating a random sequence from the empty
112
+ # sequence (used by the class methods NA.randomize, AA.randomize).
113
+ # If the block is given, yields for each random residue/base.
114
+ def randomize(hash = nil)
115
+ length = self.length
116
+ if hash
117
+ count = hash.clone
118
+ count.each_value {|x| length += x}
119
+ else
120
+ count = self.composition
121
+ end
122
+
123
+ seq = ''
124
+ tmp = {}
125
+ length.times do
126
+ count.each do |k, v|
127
+ tmp[k] = v * rand
128
+ end
129
+ max = tmp.max {|a, b| a[1] <=> b[1]}
130
+ count[max.first] -= 1
131
+
132
+ if block_given?
133
+ yield max.first
134
+ else
135
+ seq += max.first
136
+ end
137
+ end
138
+ return self.class.new(seq)
139
+ end
140
+
141
+ # Generate a new random sequence with the given frequency of bases
142
+ # or residues. The sequence length is determined by the sum of each
143
+ # base/residue occurences.
144
+ def self.randomize(*arg, &block)
145
+ self.new('').randomize(*arg, &block)
146
+ end
147
+
148
+ # Receive a GenBank style position string and convert it to the Locations
149
+ # objects to splice the sequence itself. See also: bio/location.rb
150
+ def splice(position)
151
+ unless position.is_a?(Locations) then
152
+ position = Locations.new(position)
153
+ end
154
+ s = ''
155
+ position.each do |location|
156
+ if location.sequence
157
+ s << location.sequence
158
+ else
159
+ exon = self.subseq(location.from, location.to)
160
+ begin
161
+ exon.complement! if location.strand < 0
162
+ rescue NameError
163
+ end
164
+ s << exon
165
+ end
166
+ end
167
+ return self.class.new(s)
168
+ end
169
+ alias splicing splice
170
+
171
+ end # Common
172
+
173
+ end # Sequence
174
+
175
+ end # Bio
@@ -0,0 +1,68 @@
1
+ #
2
+ # = bio/sequence/compat.rb - methods for backward compatibility
3
+ #
4
+ # Copyright:: Copyright (C) 2006
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: Ruby's
7
+ #
8
+ # $Id: compat.rb,v 1.2 2006/02/06 14:18:03 k Exp $
9
+ #
10
+
11
+
12
+ module Bio
13
+
14
+ class Sequence
15
+
16
+ autoload :Common, 'bio/sequence/common'
17
+ autoload :NA, 'bio/sequence/na'
18
+ autoload :AA, 'bio/sequence/aa'
19
+
20
+ def to_s
21
+ String.new(@seq)
22
+ end
23
+ alias to_str to_s
24
+
25
+
26
+ module Common
27
+
28
+ # Output the FASTA format string of the sequence. The 1st argument is
29
+ # used as the comment string. If the 2nd option is given, the output
30
+ # sequence will be folded.
31
+ def to_fasta(header = '', width = nil)
32
+ warn "Bio::Sequence#to_fasta is obsolete. Use Bio::Sequence#output(:fasta) instead" if $DEBUG
33
+ ">#{header}\n" +
34
+ if width
35
+ self.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
36
+ else
37
+ self.to_s + "\n"
38
+ end
39
+ end
40
+
41
+ end # Common
42
+
43
+
44
+ class NA
45
+
46
+ def self.randomize(*arg, &block)
47
+ self.new('').randomize(*arg, &block)
48
+ end
49
+
50
+ def pikachu
51
+ self.dna.tr("atgc", "pika") # joke, of course :-)
52
+ end
53
+
54
+ end # NA
55
+
56
+
57
+ class AA
58
+
59
+ def self.randomize(*arg, &block)
60
+ self.new('').randomize(*arg, &block)
61
+ end
62
+
63
+ end # AA
64
+
65
+
66
+ end # Sequence
67
+
68
+ end # Bio
@@ -0,0 +1,134 @@
1
+ #
2
+ # = bio/sequence/format.rb - various output format of the biological sequence
3
+ #
4
+ # Copyright:: Copyright (C) 2006
5
+ # Toshiaki Katayama <k@bioruby.org>,
6
+ # Naohisa Goto <ng@bioruby.org>
7
+ # License:: Ruby's
8
+ #
9
+ # = TODO
10
+ #
11
+ # porting from N. Goto's feature-output.rb on BioRuby list.
12
+ #
13
+ # $Id: format.rb,v 1.2 2006/02/06 14:20:35 k Exp $
14
+ #
15
+
16
+
17
+ module Bio
18
+
19
+ autoload :Sequence, 'bio/sequence'
20
+
21
+ class Sequence
22
+
23
+ module Format
24
+
25
+ # Output the FASTA format string of the sequence. The 1st argument is
26
+ # used in the comment line. If the 2nd argument (integer) is given,
27
+ # the output sequence will be folded.
28
+ def format_fasta(header = nil, width = nil)
29
+ header ||= "#{@entry_id} #{@definition}"
30
+
31
+ ">#{header}\n" +
32
+ if width
33
+ @seq.to_s.gsub(Regexp.new(".{1,#{width}}"), "\\0\n")
34
+ else
35
+ @seq.to_s + "\n"
36
+ end
37
+ end
38
+
39
+ def format_gff
40
+ raise NotImplementedError
41
+ end
42
+
43
+ def format_genbank
44
+ prefix = ' ' * 5
45
+ indent = prefix + ' ' * 16
46
+ fwidth = 79 - indent.length
47
+
48
+ format_features(prefix, indent, fwidth)
49
+ end
50
+
51
+ def format_embl
52
+ prefix = 'FT '
53
+ indent = prefix + ' ' * 16
54
+ fwidth = 80 - indent.length
55
+
56
+ format_features(prefix, indent, fwidth)
57
+ end
58
+
59
+
60
+ private
61
+
62
+ def format_features(prefix, indent, width)
63
+ result = ''
64
+ @features.each do |feature|
65
+ result << prefix + sprintf("%-16s", feature.feature)
66
+
67
+ position = feature.position
68
+ #position = feature.locations.to_s
69
+
70
+ head = ''
71
+ wrap(position, width).each_line do |line|
72
+ result << head << line
73
+ head = indent
74
+ end
75
+
76
+ result << format_qualifiers(feature.qualifiers, width)
77
+ end
78
+ return result
79
+ end
80
+
81
+ def format_qualifiers(qualifiers, indent, width)
82
+ qualifiers.each do |qualifier|
83
+ q = qualifier.qualifier
84
+ v = qualifier.value.to_s
85
+
86
+ if v == true
87
+ lines = wrap('/' + q, width)
88
+ elsif q == 'translation'
89
+ lines = fold('/' + q + '=' + val, width)
90
+ else
91
+ if v[/\D/]
92
+ #v.delete!("\x00-\x1f\x7f-\xff")
93
+ v.gsub!(/"/, '""')
94
+ v = '"' + v + '"'
95
+ end
96
+ lines = wrap('/' + q + '=' + val, width)
97
+ end
98
+
99
+ return lines.gsub(/^/, indent)
100
+ end
101
+ end
102
+
103
+ def fold(str, width)
104
+ str.gsub(Regexp.new("(.{1,#{width}})"), "\\1\n")
105
+ end
106
+
107
+ def wrap(str, width)
108
+ result = []
109
+ left = str.dup
110
+ while left and left.length > width
111
+ line = nil
112
+ width.downto(1) do |i|
113
+ if left[i..i] == ' ' or /[,;]/ =~ left[(i-1)..(i-1)] then
114
+ line = left[0..(i-1)].sub(/ +\z/, '')
115
+ left = left[i..-1].sub(/\A +/, '')
116
+ break
117
+ end
118
+ end
119
+ if line.nil? then
120
+ line = left[0..(width-1)]
121
+ left = left[width..-1]
122
+ end
123
+ result << line
124
+ end
125
+ result << left if left
126
+ return result.join("\n")
127
+ end
128
+
129
+ end # Format
130
+
131
+ end # Sequence
132
+
133
+ end # Bio
134
+
@@ -0,0 +1,24 @@
1
+ #
2
+ # = bio/sequence/generic.rb - generic sequence class to store an intact string
3
+ #
4
+ # Copyright:: Copyright (C) 2006
5
+ # Toshiaki Katayama <k@bioruby.org>
6
+ # License:: Ruby's
7
+ #
8
+ # $Id: generic.rb,v 1.3 2006/02/06 14:26:04 k Exp $
9
+ #
10
+
11
+ require 'bio/sequence/common'
12
+
13
+ module Bio
14
+ class Sequence
15
+
16
+ class Generic < String
17
+
18
+ include Bio::Sequence::Common
19
+
20
+ end # Generic
21
+
22
+ end # Sequence
23
+ end # Bio
24
+