bio 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,166 @@
1
+ #
2
+ # = bio/appl/sosui/report.rb - SOSUI report class
3
+ #
4
+ # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: report.rb,v 1.9 2005/12/18 15:58:41 k Exp $
8
+ #
9
+ # == Example
10
+ #
11
+ # == References
12
+ # * http://sosui.proteome.bio.tuat.ac.jp/sosui_submit.html
13
+ #--
14
+ #
15
+ # This library is free software; you can redistribute it and/or
16
+ # modify it under the terms of the GNU Lesser General Public
17
+ # License as published by the Free Software Foundation; either
18
+ # version 2 of the License, or (at your option) any later version.
19
+ #
20
+ # This library is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23
+ # Lesser General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU Lesser General Public
26
+ # License along with this library; if not, write to the Free Software
27
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28
+ #
29
+ #++
30
+ #
31
+
32
+
33
+ module Bio
34
+
35
+ class SOSUI
36
+
37
+ # = SOSUI output report parsing class
38
+ #
39
+ # == References
40
+ # * http://sosui.proteome.bio.tuat.ac.jp/sosui_submit.html
41
+ class Report
42
+
43
+ # Delimiter
44
+ DELIMITER = "\n>"
45
+ RS = DELIMITER
46
+
47
+ # Query entry_id
48
+ attr_reader :entry_id
49
+
50
+ # Returns the prediction result whether "MEMBRANE PROTEIN" or
51
+ # "SOLUBLE PROTEIN".
52
+ attr_reader :prediction
53
+
54
+ # Transmembrane helixes ary
55
+ attr_reader :tmhs
56
+
57
+ # Parser for SOSUI output report.
58
+ def initialize(output_report)
59
+ entry = output_report.split(/\n/)
60
+
61
+ @entry_id = entry[0].strip.sub(/^>/,'')
62
+ @prediction = entry[1].strip
63
+ @tms = 0
64
+ @tmhs = []
65
+ parse_tmh(entry) if /MEMBRANE/ =~ @prediction
66
+ end
67
+
68
+ private
69
+
70
+ # Parser for TMH lines.
71
+ def parse_tmh(entry)
72
+ entry.each do |line|
73
+ if /NUMBER OF TM HELIX = (\d+)/ =~ line
74
+ @tms = $1
75
+ elsif /TM (\d+) +(\d+)- *(\d+) (\w+) +(\w+)/ =~ line
76
+ tmh = $1.to_i
77
+ range = Range.new($2.to_i, $3.to_i)
78
+ grade = $4
79
+ seq = $5
80
+ @tmhs.push(TMH.new(range, grade, seq))
81
+ end
82
+ end
83
+ end
84
+
85
+
86
+ # = Bio::SOSUI::Report::TMH
87
+ # Container class for transmembrane helix information.
88
+ #
89
+ # TM 1 31- 53 SECONDARY HIRMTFLRKVYSILSLQVLLTTV
90
+ class TMH
91
+
92
+ # Returns aRng of transmembrane helix
93
+ attr_reader :range
94
+
95
+ # Retruns ``PRIMARY'' or ``SECONDARY'' of helix.
96
+ attr_reader :grade
97
+
98
+ # Returns the sequence. of transmembrane helix.
99
+ attr_reader :sequence
100
+
101
+ # Sets values.
102
+ def initialize(range, grade, sequence)
103
+ @range = range
104
+ @grade = grade
105
+ @sequence = sequence
106
+ end
107
+ end
108
+
109
+ end # class Report
110
+
111
+ end # class SOSUI
112
+
113
+ end # module Bio
114
+
115
+
116
+
117
+ if __FILE__ == $0
118
+
119
+ begin
120
+ require 'pp'
121
+ alias p pp
122
+ rescue LoadError
123
+ end
124
+
125
+
126
+ sample = <<HOGE
127
+ >HOGE1
128
+ MEMBRANE PROTEIN
129
+ NUMBER OF TM HELIX = 6
130
+ TM 1 12- 34 SECONDARY LLVPILLPEKCYDQLFVQWDLLH
131
+ TM 2 36- 58 PRIMARY PCLKILLSKGLGLGIVAGSLLVK
132
+ TM 3 102- 124 SECONDARY SWGEALFLMLQTITICFLVMHYR
133
+ TM 4 126- 148 PRIMARY QTVKGVAFLACYGLVLLVLLSPL
134
+ TM 5 152- 174 SECONDARY TVVTLLQASNVPAVVVGRLLQAA
135
+ TM 6 214- 236 SECONDARY AGTFVVSSLCNGLIAAQLLFYWN
136
+
137
+ >HOGE2
138
+ SOLUBLE PROTEIN
139
+
140
+ HOGE
141
+
142
+ def hoge(ent)
143
+ puts '==='
144
+ puts ent
145
+ puts '==='
146
+ sosui = Bio::SOSUI::Report.new(ent)
147
+ p [:entry_id, sosui.entry_id]
148
+ p [:prediction, sosui.prediction]
149
+ p [:tmhs.size, sosui.tmhs]
150
+ pp [:tmhs, sosui.tmh]
151
+ end
152
+
153
+ sample.split(/#{Bio::SOSUI::Report::DELIMITER}/).each {|ent|
154
+ hoge(ent)
155
+ }
156
+
157
+ exit if ARGV.size == 0
158
+
159
+ while ent = $<.gets(Bio::SOSUI::Report::DELIMITER)
160
+ hoge(ent)
161
+ end
162
+
163
+ end
164
+
165
+
166
+
@@ -0,0 +1,604 @@
1
+ #
2
+ # = bio/appl/spidey/report.rb - SPIDEY result parser
3
+ #
4
+ # Copyright:: Copyright (C) 2004 GOTO Naohisa <ng@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: report.rb,v 1.8 2005/12/18 15:58:41 k Exp $
24
+ #
25
+ # NCBI Spidey result parser.
26
+ # Currently, output of default (-p 0 option) or -p 1 option are supported.
27
+ #
28
+ # == Notes
29
+ #
30
+ # The mRNA sequence is regarded as a query, and
31
+ # the enomic sequence is regarded as a target (subject, hit).
32
+ #
33
+ # == References
34
+ #
35
+ # * Wheelan, S.J., et al., Spidey: a tool for mRNA-to-genomic alignments,
36
+ # Genome Research, 11, 1952--1957, 2001.
37
+ # http://www.genome.org/cgi/content/abstract/11/11/1952
38
+ # * http://www.ncbi.nlm.nih.gov/spidey/
39
+ #
40
+
41
+ require 'bio'
42
+
43
+ module Bio
44
+ class Spidey
45
+
46
+ # Spidey report parser class.
47
+ # Its object may contain some Bio::Spidey::Report::Hit objects.
48
+ class Report #< DB
49
+ #--
50
+ # File format: -p 0 (default) or -p 1 options
51
+ #++
52
+
53
+ # Delimiter of each entry. Bio::FlatFile uses it.
54
+ DELIMITER = RS = "\n--SPIDEY "
55
+
56
+ # Creates a new Bio::Spidey::Report object from String.
57
+ # You can use Bio::FlatFile to read a file.
58
+ def initialize(str)
59
+ str = str.sub(/\A\s+/, '')
60
+ str.sub!(/\n(^\-\-SPIDEY .*)/m, '') # remove trailing entries for sure
61
+ @entry_overrun = $1
62
+ data = str.split(/\r?\n(?:\r?\n)+/)
63
+ d0 = data.shift.to_s.split(/\r?\n/)
64
+ @hit = Hit.new(data, d0)
65
+ @all_hits = [ @hit ]
66
+ if d0.empty? or /\ANo alignment found\.\s*\z/ =~ d0[-1] then
67
+ @hits = []
68
+ else
69
+ @hits = [ @hit ]
70
+ end
71
+ end
72
+ # piece of next entry. Bio::FlatFile uses it.
73
+ attr_reader :entry_overrun
74
+
75
+ # Returns an Array of Bio::Spidey::Report::Hit objects.
76
+ # Because current version of SPIDEY supports only 1 genomic sequences,
77
+ # the number of hits is 1 or 0.
78
+ attr_reader :hits
79
+
80
+ # Returns an Array of Bio::Spidey::Report::Hit objects.
81
+ # Unlike Bio::Spidey::Report#hits, the method returns
82
+ # results of all trials of pairwise alignment.
83
+ # This would be a Bio::Spidey specific method.
84
+ attr_reader :all_hits
85
+
86
+ # SeqDesc stores sequence information of query or subject.
87
+ class SeqDesc
88
+ #--
89
+ # description/definitions of a sequence
90
+ #++
91
+
92
+ # Creates a new SeqDesc object.
93
+ # It is designed to be called from Bio::Spidey::Report::* classes.
94
+ # Users shall not call it directly.
95
+ def initialize(seqid, seqdef, len)
96
+ @entry_id = seqid
97
+ @definition = seqdef
98
+ @len = len
99
+ end
100
+
101
+ # Identifier of the sequence.
102
+ attr_reader :entry_id
103
+
104
+ # Definition of the sequence.
105
+ attr_reader :definition
106
+
107
+ # Length of the sequence.
108
+ attr_reader :len
109
+
110
+ # Parses piece of Spidey result text and creates a new SeqDesc object.
111
+ # It is designed to be called from Bio::Spidey::Report::* classes.
112
+ # Users shall not call it directly.
113
+ def self.parse(str)
114
+ /^(Genomic|mRNA)\:\s*(([^\s]*) (.+))\, (\d+) bp\s*$/ =~ str.to_s
115
+ seqid = $3
116
+ seqdef = $2
117
+ len = ($5 ? $5.to_i : nil)
118
+ self.new(seqid, seqdef, len)
119
+ end
120
+ end #class SeqDesc
121
+
122
+ # Sequence segment pair of Spidey result.
123
+ # Similar to Bio::Blast::Report::Hsp but lacks many methods.
124
+ # For mRNA-genome mapping programs, unlike other homology search
125
+ # programs, the class is used not only for exons but also for introns.
126
+ # (Note that intron data would not be available according to run-time
127
+ # options of the program.)
128
+ class SegmentPair
129
+ #--
130
+ # segment pair (like Bio::BLAST::*::Report::Hsp)
131
+ #++
132
+
133
+ # Creates a new SegmentPair object.
134
+ # It is designed to be called from Bio::Spidey::Report::* classes.
135
+ # Users shall not call it directly.
136
+ def initialize(genomic, mrna, midline, aaseqline,
137
+ percent_identity, mismatches, gaps, splice_site,
138
+ align_len)
139
+ @genomic = genomic
140
+ @mrna = mrna
141
+ @midline = midline
142
+ @aaseqline = aaseqline
143
+ @percent_identity = percent_identity
144
+ @mismaches = mismatches
145
+ @gaps = gaps
146
+ @splice_site = splice_site
147
+ @align_len = align_len
148
+ end
149
+
150
+ # Returns segment informations of the 'Genomic'.
151
+ # Returns a Bio::Spidey::Report::Segment object.
152
+ # This would be a Bio::Spidey specific method.
153
+ attr_reader :genomic
154
+
155
+ # Returns segment informations of the 'mRNA'.
156
+ # Returns a Bio::Spidey::Report::Segment object.
157
+ # This would be a Bio::Spidey specific method.
158
+ attr_reader :mrna
159
+
160
+ # Returns the middle line of the alignment of the segment pair.
161
+ # Returns nil if no alignment data are available.
162
+ attr_reader :midline
163
+
164
+ # Returns amino acide sequence in alignment.
165
+ # Returns String, because white spaces is also important.
166
+ # Returns nil if no alignment data are available.
167
+ attr_reader :aaseqline
168
+
169
+ # Returns percent identity of the segment pair.
170
+ attr_reader :percent_identity
171
+
172
+ # Returns mismatches.
173
+ attr_reader :mismatches
174
+ alias mismatch_count mismatches
175
+
176
+ # Returns gaps.
177
+ attr_reader :gaps
178
+
179
+ # Returns splice site information.
180
+ # Returns a hash which contains :d and :a for keys and
181
+ # 0, 1, or nil for values.
182
+ # This would be a Bio::Spidey specific methods.
183
+ attr_reader :splice_site
184
+
185
+ # Returns alignment length of the segment pair.
186
+ # Returns nil if no alignment data are available.
187
+ attr_reader :align_len
188
+
189
+ # Creates a new SegmentPair object when the segment pair is an intron.
190
+ # It is designed to be called internally from
191
+ # Bio::Spidey::Report::* classes.
192
+ # Users shall not call it directly.
193
+ def self.new_intron(from, to, strand, aln)
194
+ genomic = Segment.new(from, to, strand, aln[0])
195
+ mrna = Segment.new(nil, nil, nil, aln[2])
196
+ midline = aln[1]
197
+ aaseqline = aln[3]
198
+ self.new(genomic, mrna, midline, aaseqline,
199
+ nil, nil, nil, nil, nil)
200
+ end
201
+
202
+ # Parses a piece of Spidey result text and creates a new
203
+ # SegmentPair object.
204
+ # It is designed to be called internally from
205
+ # Bio::Spidey::Report::* classes.
206
+ # Users shall not call it directly.
207
+ def self.parse(str, strand, complement, aln)
208
+ /\AExon\s*\d+(\(\-\))?\:\s*(\d+)\-(\d+)\s*\(gen\)\s+(\d+)\-(\d+)\s*\(mRNA\)\s+id\s*([\d\.]+)\s*\%\s+mismatches\s+(\d+)\s+gaps\s+(\d+)\s+splice site\s*\(d +a\)\s*\:\s*(\d+)\s+(\d+)/ =~ str
209
+ if strand == 'minus' then
210
+ genomic = Segment.new($3, $2, strand, aln[0])
211
+ else
212
+ genomic = Segment.new($2, $3, 'plus', aln[0])
213
+ end
214
+ if complement then
215
+ mrna = Segment.new($4, $5, 'minus', aln[2])
216
+ else
217
+ mrna = Segment.new($4, $5, 'plus', aln[2])
218
+ end
219
+ percent_identity = $6
220
+ mismatches = ($7 ? $7.to_i : nil)
221
+ gaps = ($8 ? $8.to_i : nil)
222
+ splice_site = {
223
+ :d => ($9 ? $9.to_i : nil),
224
+ :a => ($10 ? $10.to_i : nil)
225
+ }
226
+ midline = aln[1]
227
+ aaseqline = aln[3]
228
+ self.new(genomic, mrna, midline, aaseqline,
229
+ percent_identity, mismatches, gaps, splice_site,
230
+ (midline ? midline.length : nil))
231
+ end
232
+
233
+ #--
234
+ # Bio::BLAST::*::Report::Hsp compatible methods
235
+ # Methods already defined: midline, percent_identity,
236
+ # gaps, align_len, mismatch_count
237
+ #++
238
+
239
+ # Returns start position of the mRNA (query) (the first position is 1).
240
+ def query_from; @mrna.from; end
241
+
242
+ # Returns end position (including its position) of the mRNA (query).
243
+ def query_to; @mrna.to; end
244
+
245
+ # Returns the sequence (with gaps) of the mRNA (query).
246
+ def qseq; @mrna.seq; end
247
+
248
+ # Returns strand information of the mRNA (query).
249
+ # Returns 'plus', 'minus', or nil.
250
+ def query_strand; @mrna.strand; end
251
+
252
+ # Returns start position of the genomic (target, hit)
253
+ # (the first position is 1).
254
+ def hit_from; @genomic.from; end
255
+
256
+ # Returns end position (including its position) of the
257
+ # genomic (target, hit).
258
+ def hit_to; @genomic.to; end
259
+
260
+ # Returns the sequence (with gaps) of the genomic (target, hit).
261
+ def hseq; @genomic.seq; end
262
+
263
+ # Returns strand information of the genomic (target, hit).
264
+ # Returns 'plus', 'minus', or nil.
265
+ def hit_strand; @genomic.strand; end
266
+ end #class SegmentPair
267
+
268
+ # Segment informations of a segment pair.
269
+ class Segment
270
+ # Creates a new Segment object.
271
+ # It is designed to be called internally from
272
+ # Bio::Spidey::Report::* classes.
273
+ # Users shall not call it directly.
274
+ def initialize(pos_st, pos_ed, strand = nil, seq = nil)
275
+ @from = pos_st ? pos_st.to_i : nil
276
+ @to = pos_ed ? pos_ed.to_i : nil
277
+ @strand = strand
278
+ @seq = seq
279
+ end
280
+
281
+ # start position
282
+ attr_reader :from
283
+
284
+ # end position
285
+ attr_reader :to
286
+
287
+ # strand information
288
+ attr_reader :strand
289
+
290
+ # sequence data
291
+ attr_reader :seq
292
+ end #class Segment
293
+
294
+ # Hit object of Spidey result.
295
+ # Similar to Bio::Blast::Report::Hit but lacks many methods.
296
+ class Hit
297
+ # Creates a new Hit object.
298
+ # It is designed to be called internally from
299
+ # Bio::Spidey::Report::* classes.
300
+ # Users shall not call it directly.
301
+ def initialize(data, d0)
302
+ @data = data
303
+ @d0 = d0
304
+ end
305
+
306
+ # Fetches fields.
307
+ def field_fetch(t, ary)
308
+ reg = Regexp.new(/^#{Regexp.escape(t)}\:\s*(.+)\s*$/)
309
+ if ary.find { |x| reg =~ x }
310
+ $1.strip
311
+ else
312
+ nil
313
+ end
314
+ end
315
+ private :field_fetch
316
+
317
+ # Parses information about strand.
318
+ def parse_strand
319
+ x = field_fetch('Strand', @d0)
320
+ if x =~ /^(.+)Reverse +complement\s*$/ then
321
+ @strand = $1.strip
322
+ @complement = true
323
+ else
324
+ @strand = x
325
+ @complement = nil
326
+ end
327
+ end
328
+ private :parse_strand
329
+
330
+ # Returns strand information of the hit.
331
+ # Returns 'plus', 'minus', or nil.
332
+ # This would be a Bio::Spidey specific method.
333
+ def strand
334
+ unless defined?(@strand); parse_strand; end
335
+ @strand
336
+ end
337
+
338
+ # Returns true if the result reports 'Reverse complement'.
339
+ # Otherwise, return false or nil.
340
+ # This would be a Bio::Spidey specific method.
341
+ def complement?
342
+ unless defined?(@complement); parse_strand; end
343
+ @complement
344
+ end
345
+
346
+ # Returns number of exons in the hit.
347
+ def number_of_exons
348
+ unless defined?(@number_of_exons)
349
+ @number_of_exons = field_fetch('Number of exons', @d0).to_i
350
+ end
351
+ @number_of_exons
352
+ end
353
+
354
+ # Returns number of splice sites of the hit.
355
+ def number_of_splice_sites
356
+ unless defined?(@number_of_splice_sites)
357
+ @number_of_splice_sites =
358
+ field_fetch('Number of splice sites', @d0).to_i
359
+ end
360
+ @number_of_splice_sites
361
+ end
362
+
363
+ # Returns overall percent identity of the hit.
364
+ def percent_identity
365
+ unless defined?(@percent_identity)
366
+ x = field_fetch('overall percent identity', @d0)
367
+ @percent_identity =
368
+ (/([\d\.]+)\s*\%/ =~ x.to_s) ? $1 : nil
369
+ end
370
+ @percent_identity
371
+ end
372
+
373
+ # Returns missing mRNA ends of the hit.
374
+ def missing_mrna_ends
375
+ unless defined?(@missing_mrna_ends)
376
+ @missing_mrna_ends = field_fetch('Missing mRNA ends', @d0)
377
+ end
378
+ @missing_mrna_ends
379
+ end
380
+
381
+ # Returns sequence informations of the 'Genomic'.
382
+ # Returns a Bio::Spidey::Report::SeqDesc object.
383
+ # This would be a Bio::Spidey specific method.
384
+ def genomic
385
+ unless defined?(@genomic)
386
+ @genomic = SeqDesc.parse(@d0.find { |x| /^Genomic\:/ =~ x })
387
+ end
388
+ @genomic
389
+ end
390
+
391
+ # Returns sequence informations of the mRNA.
392
+ # Returns a Bio::Spidey::Report::SeqDesc object.
393
+ # This would be a Bio::Spidey specific method.
394
+ def mrna
395
+ unless defined?(@mrna)
396
+ @mrna = SeqDesc.parse(@d0.find { |x| /^mRNA\:/ =~ x })
397
+ end
398
+ @mrna
399
+ end
400
+
401
+ # Parses segment pairs.
402
+ def parse_segmentpairs
403
+ aln = self.align.dup
404
+ ex = []
405
+ itr = []
406
+ segpairs = []
407
+ cflag = self.complement?
408
+ strand = self.strand
409
+ if strand == 'minus' then
410
+ d_to = 1; d_from = -1
411
+ else
412
+ d_to = -1; d_from = 1
413
+ end
414
+ @d0.each do |x|
415
+ #p x
416
+ if x =~ /^Exon\s*\d+(\(.*\))?\:/ then
417
+ if a = aln.shift then
418
+ y = SegmentPair.parse(x, strand, cflag, a[1])
419
+ ex << y
420
+ if a[0][0].to_s.length > 0 then
421
+ to = y.genomic.from + d_to
422
+ i0 = SegmentPair.new_intron(nil, to, strand, a[0])
423
+ itr << i0
424
+ segpairs << i0
425
+ end
426
+ segpairs << y
427
+ if a[2][0].to_s.length > 0 then
428
+ from = y.genomic.to + d_from
429
+ i2 = SegmentPair.new_intron(from, nil, strand, a[2])
430
+ itr << i2
431
+ segpairs << i2
432
+ end
433
+ else
434
+ y = SegmentPair.parse(x, strand, cflag, [])
435
+ ex << y
436
+ segpairs << y
437
+ end
438
+ end
439
+ end
440
+ @exons = ex
441
+ @introns = itr
442
+ @segmentpairs = segpairs
443
+ end
444
+ private :parse_segmentpairs
445
+
446
+ # Returns exons of the hit.
447
+ # Returns an array of Bio::Spidey::Report::SegmentPair object.
448
+ def exons
449
+ unless defined?(@exons); parse_segmentpairs; end
450
+ @exons
451
+ end
452
+
453
+ # Returns introns of the hit.
454
+ # Some of them would contain untranscribed regions.
455
+ # Returns an array of Bio::Spidey::Report::SegmentPair objects.
456
+ # (Note that intron data is not always available
457
+ # according to run-time options of the program.)
458
+ def introns
459
+ unless defined?(@introns); parse_segmentpairs; end
460
+ @introns
461
+ end
462
+
463
+ # Returns segment pairs (exons and introns) of the hit.
464
+ # Each segment pair is a Bio::Spidey::Report::SegmentPair object.
465
+ # Returns an array of Bio::Spidey::Report::SegmentPair objects.
466
+ # (Note that intron data is not always available
467
+ # according to run-time options of the program.)
468
+ def segmentpairs
469
+ unless defined?(@segmentparis); parse_segmentpairs; end
470
+ @segmentpairs
471
+ end
472
+
473
+ # Returns alignments.
474
+ # Returns an Array of arrays.
475
+ # This would be a Bio::Spidey specific method.
476
+ def align
477
+ unless defined?(@align); parse_align; end
478
+ @align
479
+ end
480
+
481
+ # Parses alignment lines.
482
+ def parse_align_lines(data)
483
+ misc = [ [], [], [], [] ]
484
+ data.each do |x|
485
+ a = x.split(/\r?\n/)
486
+ if g = a.shift then
487
+ misc[0] << g
488
+ (1..3).each do |i|
489
+ if y = a.shift then
490
+ if y.length < g.length
491
+ y << ' ' * (g.length - y.length)
492
+ end
493
+ misc[i] << y
494
+ else
495
+ misc[i] << ' ' * g.length
496
+ end
497
+ end
498
+ end
499
+ end
500
+ misc.collect! { |x| x.join('') }
501
+ left = []
502
+ if /\A +/ =~ misc[2] then
503
+ len = $&.size
504
+ left = misc.collect { |x| x[0, len] }
505
+ misc.each { |x| x[0, len] = '' }
506
+ end
507
+ right = []
508
+ if / +\z/ =~ misc[2] then
509
+ len = $&.size
510
+ right = misc.collect { |x| x[(-len)..-1] }
511
+ misc.each { |x| x[(-len)..-1] = '' }
512
+ end
513
+ body = misc
514
+ [ left, body, right ]
515
+ end
516
+ private :parse_align_lines
517
+
518
+ # Parses alignments.
519
+ def parse_align
520
+ r = []
521
+ data = @data
522
+ while !data.empty?
523
+ a = []
524
+ while x = data.shift and !(x =~ /^(Genomic|Exon\s*\d+)\:/)
525
+ a.push x
526
+ end
527
+ r.push parse_align_lines(a) unless a.empty?
528
+ end
529
+ @align = r
530
+ end
531
+ private :parse_align
532
+
533
+ #--
534
+ # Bio::BLAST::*::Report::Hit compatible methods
535
+ #++
536
+
537
+ # Length of the mRNA (query) sequence.
538
+ # Same as Bio::Spidey::Report#query_len.
539
+ def query_len; mrna.len; end
540
+
541
+ # Identifier of the mRNA (query).
542
+ # Same as Bio::Spidey::Report#query_id.
543
+ def query_id; mrna.entry_id; end
544
+
545
+ # Definition of the mRNA (query).
546
+ # Same as Bio::Spidey::Report#query_def.
547
+ def query_def; mrna.definition; end
548
+
549
+ # The genomic (target) sequence length.
550
+ def target_len; genomic.len; end
551
+
552
+ # Identifier of the genomic (target) sequence.
553
+ def target_id; genomic.entry_id; end
554
+
555
+ # Definition of the genomic (target) sequence.
556
+ def target_def; genomic.definition; end
557
+
558
+ alias hit_id target_id
559
+ alias len target_len
560
+ alias definition target_def
561
+
562
+ alias hsps exons
563
+
564
+ # Iterates over each exon of the hit.
565
+ # Yields Bio::Spidey::Report::SegmentPair object.
566
+ def each(&x) #:yields: segmentpair
567
+ exons.each(&x)
568
+ end
569
+ end #class Hit
570
+
571
+ # Returns sequence informationsof the mRNA.
572
+ # Returns a Bio::Spidey::Report::SeqDesc object.
573
+ # This would be a Bio::Spidey specific method.
574
+ def mrna; @hit.mrna; end
575
+
576
+ #--
577
+ #Bio::BLAST::*::Report compatible methods
578
+ #++
579
+
580
+ # Returns number of hits.
581
+ # Same as hits.size.
582
+ def num_hits; @hits.size; end
583
+
584
+ # Iterates over each hits.
585
+ # Same as hits.each.
586
+ # Yields a Bio::Spidey::Report::Hit object.
587
+ def each_hit(&x) #:yields: hit
588
+ @hits.each(&x)
589
+ end
590
+ alias each each_hit
591
+
592
+ # Returns definition of the mRNA (query) sequence.
593
+ def query_def; @hit.mrna.definition; end
594
+
595
+ # Returns identifier of the mRNA (query) sequence.
596
+ def query_id; @hit.mrna.entry_id; end
597
+
598
+ # Returns the length of the mRNA (query) sequence.
599
+ def query_len; @hit.mrna.len; end
600
+ end #class Report
601
+
602
+ end #class Spidey
603
+ end #module Bio
604
+