bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,166 @@
1
+ #
2
+ # = bio/appl/sosui/report.rb - SOSUI report class
3
+ #
4
+ # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: report.rb,v 1.9 2005/12/18 15:58:41 k Exp $
8
+ #
9
+ # == Example
10
+ #
11
+ # == References
12
+ # * http://sosui.proteome.bio.tuat.ac.jp/sosui_submit.html
13
+ #--
14
+ #
15
+ # This library is free software; you can redistribute it and/or
16
+ # modify it under the terms of the GNU Lesser General Public
17
+ # License as published by the Free Software Foundation; either
18
+ # version 2 of the License, or (at your option) any later version.
19
+ #
20
+ # This library is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23
+ # Lesser General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU Lesser General Public
26
+ # License along with this library; if not, write to the Free Software
27
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28
+ #
29
+ #++
30
+ #
31
+
32
+
33
+ module Bio
34
+
35
+ class SOSUI
36
+
37
+ # = SOSUI output report parsing class
38
+ #
39
+ # == References
40
+ # * http://sosui.proteome.bio.tuat.ac.jp/sosui_submit.html
41
+ class Report
42
+
43
+ # Delimiter
44
+ DELIMITER = "\n>"
45
+ RS = DELIMITER
46
+
47
+ # Query entry_id
48
+ attr_reader :entry_id
49
+
50
+ # Returns the prediction result whether "MEMBRANE PROTEIN" or
51
+ # "SOLUBLE PROTEIN".
52
+ attr_reader :prediction
53
+
54
+ # Transmembrane helixes ary
55
+ attr_reader :tmhs
56
+
57
+ # Parser for SOSUI output report.
58
+ def initialize(output_report)
59
+ entry = output_report.split(/\n/)
60
+
61
+ @entry_id = entry[0].strip.sub(/^>/,'')
62
+ @prediction = entry[1].strip
63
+ @tms = 0
64
+ @tmhs = []
65
+ parse_tmh(entry) if /MEMBRANE/ =~ @prediction
66
+ end
67
+
68
+ private
69
+
70
+ # Parser for TMH lines.
71
+ def parse_tmh(entry)
72
+ entry.each do |line|
73
+ if /NUMBER OF TM HELIX = (\d+)/ =~ line
74
+ @tms = $1
75
+ elsif /TM (\d+) +(\d+)- *(\d+) (\w+) +(\w+)/ =~ line
76
+ tmh = $1.to_i
77
+ range = Range.new($2.to_i, $3.to_i)
78
+ grade = $4
79
+ seq = $5
80
+ @tmhs.push(TMH.new(range, grade, seq))
81
+ end
82
+ end
83
+ end
84
+
85
+
86
+ # = Bio::SOSUI::Report::TMH
87
+ # Container class for transmembrane helix information.
88
+ #
89
+ # TM 1 31- 53 SECONDARY HIRMTFLRKVYSILSLQVLLTTV
90
+ class TMH
91
+
92
+ # Returns aRng of transmembrane helix
93
+ attr_reader :range
94
+
95
+ # Retruns ``PRIMARY'' or ``SECONDARY'' of helix.
96
+ attr_reader :grade
97
+
98
+ # Returns the sequence. of transmembrane helix.
99
+ attr_reader :sequence
100
+
101
+ # Sets values.
102
+ def initialize(range, grade, sequence)
103
+ @range = range
104
+ @grade = grade
105
+ @sequence = sequence
106
+ end
107
+ end
108
+
109
+ end # class Report
110
+
111
+ end # class SOSUI
112
+
113
+ end # module Bio
114
+
115
+
116
+
117
+ if __FILE__ == $0
118
+
119
+ begin
120
+ require 'pp'
121
+ alias p pp
122
+ rescue LoadError
123
+ end
124
+
125
+
126
+ sample = <<HOGE
127
+ >HOGE1
128
+ MEMBRANE PROTEIN
129
+ NUMBER OF TM HELIX = 6
130
+ TM 1 12- 34 SECONDARY LLVPILLPEKCYDQLFVQWDLLH
131
+ TM 2 36- 58 PRIMARY PCLKILLSKGLGLGIVAGSLLVK
132
+ TM 3 102- 124 SECONDARY SWGEALFLMLQTITICFLVMHYR
133
+ TM 4 126- 148 PRIMARY QTVKGVAFLACYGLVLLVLLSPL
134
+ TM 5 152- 174 SECONDARY TVVTLLQASNVPAVVVGRLLQAA
135
+ TM 6 214- 236 SECONDARY AGTFVVSSLCNGLIAAQLLFYWN
136
+
137
+ >HOGE2
138
+ SOLUBLE PROTEIN
139
+
140
+ HOGE
141
+
142
+ def hoge(ent)
143
+ puts '==='
144
+ puts ent
145
+ puts '==='
146
+ sosui = Bio::SOSUI::Report.new(ent)
147
+ p [:entry_id, sosui.entry_id]
148
+ p [:prediction, sosui.prediction]
149
+ p [:tmhs.size, sosui.tmhs]
150
+ pp [:tmhs, sosui.tmh]
151
+ end
152
+
153
+ sample.split(/#{Bio::SOSUI::Report::DELIMITER}/).each {|ent|
154
+ hoge(ent)
155
+ }
156
+
157
+ exit if ARGV.size == 0
158
+
159
+ while ent = $<.gets(Bio::SOSUI::Report::DELIMITER)
160
+ hoge(ent)
161
+ end
162
+
163
+ end
164
+
165
+
166
+
@@ -0,0 +1,604 @@
1
+ #
2
+ # = bio/appl/spidey/report.rb - SPIDEY result parser
3
+ #
4
+ # Copyright:: Copyright (C) 2004 GOTO Naohisa <ng@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: report.rb,v 1.8 2005/12/18 15:58:41 k Exp $
24
+ #
25
+ # NCBI Spidey result parser.
26
+ # Currently, output of default (-p 0 option) or -p 1 option are supported.
27
+ #
28
+ # == Notes
29
+ #
30
+ # The mRNA sequence is regarded as a query, and
31
+ # the enomic sequence is regarded as a target (subject, hit).
32
+ #
33
+ # == References
34
+ #
35
+ # * Wheelan, S.J., et al., Spidey: a tool for mRNA-to-genomic alignments,
36
+ # Genome Research, 11, 1952--1957, 2001.
37
+ # http://www.genome.org/cgi/content/abstract/11/11/1952
38
+ # * http://www.ncbi.nlm.nih.gov/spidey/
39
+ #
40
+
41
+ require 'bio'
42
+
43
+ module Bio
44
+ class Spidey
45
+
46
+ # Spidey report parser class.
47
+ # Its object may contain some Bio::Spidey::Report::Hit objects.
48
+ class Report #< DB
49
+ #--
50
+ # File format: -p 0 (default) or -p 1 options
51
+ #++
52
+
53
+ # Delimiter of each entry. Bio::FlatFile uses it.
54
+ DELIMITER = RS = "\n--SPIDEY "
55
+
56
+ # Creates a new Bio::Spidey::Report object from String.
57
+ # You can use Bio::FlatFile to read a file.
58
+ def initialize(str)
59
+ str = str.sub(/\A\s+/, '')
60
+ str.sub!(/\n(^\-\-SPIDEY .*)/m, '') # remove trailing entries for sure
61
+ @entry_overrun = $1
62
+ data = str.split(/\r?\n(?:\r?\n)+/)
63
+ d0 = data.shift.to_s.split(/\r?\n/)
64
+ @hit = Hit.new(data, d0)
65
+ @all_hits = [ @hit ]
66
+ if d0.empty? or /\ANo alignment found\.\s*\z/ =~ d0[-1] then
67
+ @hits = []
68
+ else
69
+ @hits = [ @hit ]
70
+ end
71
+ end
72
+ # piece of next entry. Bio::FlatFile uses it.
73
+ attr_reader :entry_overrun
74
+
75
+ # Returns an Array of Bio::Spidey::Report::Hit objects.
76
+ # Because current version of SPIDEY supports only 1 genomic sequences,
77
+ # the number of hits is 1 or 0.
78
+ attr_reader :hits
79
+
80
+ # Returns an Array of Bio::Spidey::Report::Hit objects.
81
+ # Unlike Bio::Spidey::Report#hits, the method returns
82
+ # results of all trials of pairwise alignment.
83
+ # This would be a Bio::Spidey specific method.
84
+ attr_reader :all_hits
85
+
86
+ # SeqDesc stores sequence information of query or subject.
87
+ class SeqDesc
88
+ #--
89
+ # description/definitions of a sequence
90
+ #++
91
+
92
+ # Creates a new SeqDesc object.
93
+ # It is designed to be called from Bio::Spidey::Report::* classes.
94
+ # Users shall not call it directly.
95
+ def initialize(seqid, seqdef, len)
96
+ @entry_id = seqid
97
+ @definition = seqdef
98
+ @len = len
99
+ end
100
+
101
+ # Identifier of the sequence.
102
+ attr_reader :entry_id
103
+
104
+ # Definition of the sequence.
105
+ attr_reader :definition
106
+
107
+ # Length of the sequence.
108
+ attr_reader :len
109
+
110
+ # Parses piece of Spidey result text and creates a new SeqDesc object.
111
+ # It is designed to be called from Bio::Spidey::Report::* classes.
112
+ # Users shall not call it directly.
113
+ def self.parse(str)
114
+ /^(Genomic|mRNA)\:\s*(([^\s]*) (.+))\, (\d+) bp\s*$/ =~ str.to_s
115
+ seqid = $3
116
+ seqdef = $2
117
+ len = ($5 ? $5.to_i : nil)
118
+ self.new(seqid, seqdef, len)
119
+ end
120
+ end #class SeqDesc
121
+
122
+ # Sequence segment pair of Spidey result.
123
+ # Similar to Bio::Blast::Report::Hsp but lacks many methods.
124
+ # For mRNA-genome mapping programs, unlike other homology search
125
+ # programs, the class is used not only for exons but also for introns.
126
+ # (Note that intron data would not be available according to run-time
127
+ # options of the program.)
128
+ class SegmentPair
129
+ #--
130
+ # segment pair (like Bio::BLAST::*::Report::Hsp)
131
+ #++
132
+
133
+ # Creates a new SegmentPair object.
134
+ # It is designed to be called from Bio::Spidey::Report::* classes.
135
+ # Users shall not call it directly.
136
+ def initialize(genomic, mrna, midline, aaseqline,
137
+ percent_identity, mismatches, gaps, splice_site,
138
+ align_len)
139
+ @genomic = genomic
140
+ @mrna = mrna
141
+ @midline = midline
142
+ @aaseqline = aaseqline
143
+ @percent_identity = percent_identity
144
+ @mismaches = mismatches
145
+ @gaps = gaps
146
+ @splice_site = splice_site
147
+ @align_len = align_len
148
+ end
149
+
150
+ # Returns segment informations of the 'Genomic'.
151
+ # Returns a Bio::Spidey::Report::Segment object.
152
+ # This would be a Bio::Spidey specific method.
153
+ attr_reader :genomic
154
+
155
+ # Returns segment informations of the 'mRNA'.
156
+ # Returns a Bio::Spidey::Report::Segment object.
157
+ # This would be a Bio::Spidey specific method.
158
+ attr_reader :mrna
159
+
160
+ # Returns the middle line of the alignment of the segment pair.
161
+ # Returns nil if no alignment data are available.
162
+ attr_reader :midline
163
+
164
+ # Returns amino acide sequence in alignment.
165
+ # Returns String, because white spaces is also important.
166
+ # Returns nil if no alignment data are available.
167
+ attr_reader :aaseqline
168
+
169
+ # Returns percent identity of the segment pair.
170
+ attr_reader :percent_identity
171
+
172
+ # Returns mismatches.
173
+ attr_reader :mismatches
174
+ alias mismatch_count mismatches
175
+
176
+ # Returns gaps.
177
+ attr_reader :gaps
178
+
179
+ # Returns splice site information.
180
+ # Returns a hash which contains :d and :a for keys and
181
+ # 0, 1, or nil for values.
182
+ # This would be a Bio::Spidey specific methods.
183
+ attr_reader :splice_site
184
+
185
+ # Returns alignment length of the segment pair.
186
+ # Returns nil if no alignment data are available.
187
+ attr_reader :align_len
188
+
189
+ # Creates a new SegmentPair object when the segment pair is an intron.
190
+ # It is designed to be called internally from
191
+ # Bio::Spidey::Report::* classes.
192
+ # Users shall not call it directly.
193
+ def self.new_intron(from, to, strand, aln)
194
+ genomic = Segment.new(from, to, strand, aln[0])
195
+ mrna = Segment.new(nil, nil, nil, aln[2])
196
+ midline = aln[1]
197
+ aaseqline = aln[3]
198
+ self.new(genomic, mrna, midline, aaseqline,
199
+ nil, nil, nil, nil, nil)
200
+ end
201
+
202
+ # Parses a piece of Spidey result text and creates a new
203
+ # SegmentPair object.
204
+ # It is designed to be called internally from
205
+ # Bio::Spidey::Report::* classes.
206
+ # Users shall not call it directly.
207
+ def self.parse(str, strand, complement, aln)
208
+ /\AExon\s*\d+(\(\-\))?\:\s*(\d+)\-(\d+)\s*\(gen\)\s+(\d+)\-(\d+)\s*\(mRNA\)\s+id\s*([\d\.]+)\s*\%\s+mismatches\s+(\d+)\s+gaps\s+(\d+)\s+splice site\s*\(d +a\)\s*\:\s*(\d+)\s+(\d+)/ =~ str
209
+ if strand == 'minus' then
210
+ genomic = Segment.new($3, $2, strand, aln[0])
211
+ else
212
+ genomic = Segment.new($2, $3, 'plus', aln[0])
213
+ end
214
+ if complement then
215
+ mrna = Segment.new($4, $5, 'minus', aln[2])
216
+ else
217
+ mrna = Segment.new($4, $5, 'plus', aln[2])
218
+ end
219
+ percent_identity = $6
220
+ mismatches = ($7 ? $7.to_i : nil)
221
+ gaps = ($8 ? $8.to_i : nil)
222
+ splice_site = {
223
+ :d => ($9 ? $9.to_i : nil),
224
+ :a => ($10 ? $10.to_i : nil)
225
+ }
226
+ midline = aln[1]
227
+ aaseqline = aln[3]
228
+ self.new(genomic, mrna, midline, aaseqline,
229
+ percent_identity, mismatches, gaps, splice_site,
230
+ (midline ? midline.length : nil))
231
+ end
232
+
233
+ #--
234
+ # Bio::BLAST::*::Report::Hsp compatible methods
235
+ # Methods already defined: midline, percent_identity,
236
+ # gaps, align_len, mismatch_count
237
+ #++
238
+
239
+ # Returns start position of the mRNA (query) (the first position is 1).
240
+ def query_from; @mrna.from; end
241
+
242
+ # Returns end position (including its position) of the mRNA (query).
243
+ def query_to; @mrna.to; end
244
+
245
+ # Returns the sequence (with gaps) of the mRNA (query).
246
+ def qseq; @mrna.seq; end
247
+
248
+ # Returns strand information of the mRNA (query).
249
+ # Returns 'plus', 'minus', or nil.
250
+ def query_strand; @mrna.strand; end
251
+
252
+ # Returns start position of the genomic (target, hit)
253
+ # (the first position is 1).
254
+ def hit_from; @genomic.from; end
255
+
256
+ # Returns end position (including its position) of the
257
+ # genomic (target, hit).
258
+ def hit_to; @genomic.to; end
259
+
260
+ # Returns the sequence (with gaps) of the genomic (target, hit).
261
+ def hseq; @genomic.seq; end
262
+
263
+ # Returns strand information of the genomic (target, hit).
264
+ # Returns 'plus', 'minus', or nil.
265
+ def hit_strand; @genomic.strand; end
266
+ end #class SegmentPair
267
+
268
+ # Segment informations of a segment pair.
269
+ class Segment
270
+ # Creates a new Segment object.
271
+ # It is designed to be called internally from
272
+ # Bio::Spidey::Report::* classes.
273
+ # Users shall not call it directly.
274
+ def initialize(pos_st, pos_ed, strand = nil, seq = nil)
275
+ @from = pos_st ? pos_st.to_i : nil
276
+ @to = pos_ed ? pos_ed.to_i : nil
277
+ @strand = strand
278
+ @seq = seq
279
+ end
280
+
281
+ # start position
282
+ attr_reader :from
283
+
284
+ # end position
285
+ attr_reader :to
286
+
287
+ # strand information
288
+ attr_reader :strand
289
+
290
+ # sequence data
291
+ attr_reader :seq
292
+ end #class Segment
293
+
294
+ # Hit object of Spidey result.
295
+ # Similar to Bio::Blast::Report::Hit but lacks many methods.
296
+ class Hit
297
+ # Creates a new Hit object.
298
+ # It is designed to be called internally from
299
+ # Bio::Spidey::Report::* classes.
300
+ # Users shall not call it directly.
301
+ def initialize(data, d0)
302
+ @data = data
303
+ @d0 = d0
304
+ end
305
+
306
+ # Fetches fields.
307
+ def field_fetch(t, ary)
308
+ reg = Regexp.new(/^#{Regexp.escape(t)}\:\s*(.+)\s*$/)
309
+ if ary.find { |x| reg =~ x }
310
+ $1.strip
311
+ else
312
+ nil
313
+ end
314
+ end
315
+ private :field_fetch
316
+
317
+ # Parses information about strand.
318
+ def parse_strand
319
+ x = field_fetch('Strand', @d0)
320
+ if x =~ /^(.+)Reverse +complement\s*$/ then
321
+ @strand = $1.strip
322
+ @complement = true
323
+ else
324
+ @strand = x
325
+ @complement = nil
326
+ end
327
+ end
328
+ private :parse_strand
329
+
330
+ # Returns strand information of the hit.
331
+ # Returns 'plus', 'minus', or nil.
332
+ # This would be a Bio::Spidey specific method.
333
+ def strand
334
+ unless defined?(@strand); parse_strand; end
335
+ @strand
336
+ end
337
+
338
+ # Returns true if the result reports 'Reverse complement'.
339
+ # Otherwise, return false or nil.
340
+ # This would be a Bio::Spidey specific method.
341
+ def complement?
342
+ unless defined?(@complement); parse_strand; end
343
+ @complement
344
+ end
345
+
346
+ # Returns number of exons in the hit.
347
+ def number_of_exons
348
+ unless defined?(@number_of_exons)
349
+ @number_of_exons = field_fetch('Number of exons', @d0).to_i
350
+ end
351
+ @number_of_exons
352
+ end
353
+
354
+ # Returns number of splice sites of the hit.
355
+ def number_of_splice_sites
356
+ unless defined?(@number_of_splice_sites)
357
+ @number_of_splice_sites =
358
+ field_fetch('Number of splice sites', @d0).to_i
359
+ end
360
+ @number_of_splice_sites
361
+ end
362
+
363
+ # Returns overall percent identity of the hit.
364
+ def percent_identity
365
+ unless defined?(@percent_identity)
366
+ x = field_fetch('overall percent identity', @d0)
367
+ @percent_identity =
368
+ (/([\d\.]+)\s*\%/ =~ x.to_s) ? $1 : nil
369
+ end
370
+ @percent_identity
371
+ end
372
+
373
+ # Returns missing mRNA ends of the hit.
374
+ def missing_mrna_ends
375
+ unless defined?(@missing_mrna_ends)
376
+ @missing_mrna_ends = field_fetch('Missing mRNA ends', @d0)
377
+ end
378
+ @missing_mrna_ends
379
+ end
380
+
381
+ # Returns sequence informations of the 'Genomic'.
382
+ # Returns a Bio::Spidey::Report::SeqDesc object.
383
+ # This would be a Bio::Spidey specific method.
384
+ def genomic
385
+ unless defined?(@genomic)
386
+ @genomic = SeqDesc.parse(@d0.find { |x| /^Genomic\:/ =~ x })
387
+ end
388
+ @genomic
389
+ end
390
+
391
+ # Returns sequence informations of the mRNA.
392
+ # Returns a Bio::Spidey::Report::SeqDesc object.
393
+ # This would be a Bio::Spidey specific method.
394
+ def mrna
395
+ unless defined?(@mrna)
396
+ @mrna = SeqDesc.parse(@d0.find { |x| /^mRNA\:/ =~ x })
397
+ end
398
+ @mrna
399
+ end
400
+
401
+ # Parses segment pairs.
402
+ def parse_segmentpairs
403
+ aln = self.align.dup
404
+ ex = []
405
+ itr = []
406
+ segpairs = []
407
+ cflag = self.complement?
408
+ strand = self.strand
409
+ if strand == 'minus' then
410
+ d_to = 1; d_from = -1
411
+ else
412
+ d_to = -1; d_from = 1
413
+ end
414
+ @d0.each do |x|
415
+ #p x
416
+ if x =~ /^Exon\s*\d+(\(.*\))?\:/ then
417
+ if a = aln.shift then
418
+ y = SegmentPair.parse(x, strand, cflag, a[1])
419
+ ex << y
420
+ if a[0][0].to_s.length > 0 then
421
+ to = y.genomic.from + d_to
422
+ i0 = SegmentPair.new_intron(nil, to, strand, a[0])
423
+ itr << i0
424
+ segpairs << i0
425
+ end
426
+ segpairs << y
427
+ if a[2][0].to_s.length > 0 then
428
+ from = y.genomic.to + d_from
429
+ i2 = SegmentPair.new_intron(from, nil, strand, a[2])
430
+ itr << i2
431
+ segpairs << i2
432
+ end
433
+ else
434
+ y = SegmentPair.parse(x, strand, cflag, [])
435
+ ex << y
436
+ segpairs << y
437
+ end
438
+ end
439
+ end
440
+ @exons = ex
441
+ @introns = itr
442
+ @segmentpairs = segpairs
443
+ end
444
+ private :parse_segmentpairs
445
+
446
+ # Returns exons of the hit.
447
+ # Returns an array of Bio::Spidey::Report::SegmentPair object.
448
+ def exons
449
+ unless defined?(@exons); parse_segmentpairs; end
450
+ @exons
451
+ end
452
+
453
+ # Returns introns of the hit.
454
+ # Some of them would contain untranscribed regions.
455
+ # Returns an array of Bio::Spidey::Report::SegmentPair objects.
456
+ # (Note that intron data is not always available
457
+ # according to run-time options of the program.)
458
+ def introns
459
+ unless defined?(@introns); parse_segmentpairs; end
460
+ @introns
461
+ end
462
+
463
+ # Returns segment pairs (exons and introns) of the hit.
464
+ # Each segment pair is a Bio::Spidey::Report::SegmentPair object.
465
+ # Returns an array of Bio::Spidey::Report::SegmentPair objects.
466
+ # (Note that intron data is not always available
467
+ # according to run-time options of the program.)
468
+ def segmentpairs
469
+ unless defined?(@segmentparis); parse_segmentpairs; end
470
+ @segmentpairs
471
+ end
472
+
473
+ # Returns alignments.
474
+ # Returns an Array of arrays.
475
+ # This would be a Bio::Spidey specific method.
476
+ def align
477
+ unless defined?(@align); parse_align; end
478
+ @align
479
+ end
480
+
481
+ # Parses alignment lines.
482
+ def parse_align_lines(data)
483
+ misc = [ [], [], [], [] ]
484
+ data.each do |x|
485
+ a = x.split(/\r?\n/)
486
+ if g = a.shift then
487
+ misc[0] << g
488
+ (1..3).each do |i|
489
+ if y = a.shift then
490
+ if y.length < g.length
491
+ y << ' ' * (g.length - y.length)
492
+ end
493
+ misc[i] << y
494
+ else
495
+ misc[i] << ' ' * g.length
496
+ end
497
+ end
498
+ end
499
+ end
500
+ misc.collect! { |x| x.join('') }
501
+ left = []
502
+ if /\A +/ =~ misc[2] then
503
+ len = $&.size
504
+ left = misc.collect { |x| x[0, len] }
505
+ misc.each { |x| x[0, len] = '' }
506
+ end
507
+ right = []
508
+ if / +\z/ =~ misc[2] then
509
+ len = $&.size
510
+ right = misc.collect { |x| x[(-len)..-1] }
511
+ misc.each { |x| x[(-len)..-1] = '' }
512
+ end
513
+ body = misc
514
+ [ left, body, right ]
515
+ end
516
+ private :parse_align_lines
517
+
518
+ # Parses alignments.
519
+ def parse_align
520
+ r = []
521
+ data = @data
522
+ while !data.empty?
523
+ a = []
524
+ while x = data.shift and !(x =~ /^(Genomic|Exon\s*\d+)\:/)
525
+ a.push x
526
+ end
527
+ r.push parse_align_lines(a) unless a.empty?
528
+ end
529
+ @align = r
530
+ end
531
+ private :parse_align
532
+
533
+ #--
534
+ # Bio::BLAST::*::Report::Hit compatible methods
535
+ #++
536
+
537
+ # Length of the mRNA (query) sequence.
538
+ # Same as Bio::Spidey::Report#query_len.
539
+ def query_len; mrna.len; end
540
+
541
+ # Identifier of the mRNA (query).
542
+ # Same as Bio::Spidey::Report#query_id.
543
+ def query_id; mrna.entry_id; end
544
+
545
+ # Definition of the mRNA (query).
546
+ # Same as Bio::Spidey::Report#query_def.
547
+ def query_def; mrna.definition; end
548
+
549
+ # The genomic (target) sequence length.
550
+ def target_len; genomic.len; end
551
+
552
+ # Identifier of the genomic (target) sequence.
553
+ def target_id; genomic.entry_id; end
554
+
555
+ # Definition of the genomic (target) sequence.
556
+ def target_def; genomic.definition; end
557
+
558
+ alias hit_id target_id
559
+ alias len target_len
560
+ alias definition target_def
561
+
562
+ alias hsps exons
563
+
564
+ # Iterates over each exon of the hit.
565
+ # Yields Bio::Spidey::Report::SegmentPair object.
566
+ def each(&x) #:yields: segmentpair
567
+ exons.each(&x)
568
+ end
569
+ end #class Hit
570
+
571
+ # Returns sequence informationsof the mRNA.
572
+ # Returns a Bio::Spidey::Report::SeqDesc object.
573
+ # This would be a Bio::Spidey specific method.
574
+ def mrna; @hit.mrna; end
575
+
576
+ #--
577
+ #Bio::BLAST::*::Report compatible methods
578
+ #++
579
+
580
+ # Returns number of hits.
581
+ # Same as hits.size.
582
+ def num_hits; @hits.size; end
583
+
584
+ # Iterates over each hits.
585
+ # Same as hits.each.
586
+ # Yields a Bio::Spidey::Report::Hit object.
587
+ def each_hit(&x) #:yields: hit
588
+ @hits.each(&x)
589
+ end
590
+ alias each each_hit
591
+
592
+ # Returns definition of the mRNA (query) sequence.
593
+ def query_def; @hit.mrna.definition; end
594
+
595
+ # Returns identifier of the mRNA (query) sequence.
596
+ def query_id; @hit.mrna.entry_id; end
597
+
598
+ # Returns the length of the mRNA (query) sequence.
599
+ def query_len; @hit.mrna.len; end
600
+ end #class Report
601
+
602
+ end #class Spidey
603
+ end #module Bio
604
+