bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,134 @@
1
+ #
2
+ # = bio/appl/sim4.rb - sim4 wrapper class
3
+ #
4
+ # Copyright:: Copyright (C) 2004 GOTO Naohisa <ng@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: sim4.rb,v 1.5 2005/12/18 15:58:40 k Exp $
24
+ #
25
+ # The sim4 execution wrapper class.
26
+ #
27
+ # == References
28
+ #
29
+ # * Florea, L., et al., A Computer program for aligning a cDNA sequence
30
+ # with a genomic DNA sequence, Genome Research, 8, 967--974, 1998.
31
+ # http://www.genome.org/cgi/content/abstract/8/9/967
32
+ #
33
+
34
+ require 'open3'
35
+ require 'tempfile'
36
+
37
+ module Bio
38
+
39
+ # The sim4 execution wrapper class.
40
+ class Sim4
41
+
42
+ autoload :Report, 'bio/appl/sim4/report'
43
+
44
+ # Creates a new sim4 execution wrapper object.
45
+ # [+program+] Program name. Usually 'sim4' in UNIX.
46
+ # [+database+] Default file name of database('seq2').
47
+ # [+option+] Options (array of strings).
48
+ def initialize(program = 'sim4', database = nil, option = [])
49
+ @program = program
50
+ @option = option
51
+ @database = database #seq2
52
+ @command = nil
53
+ @output = nil
54
+ @report = nil
55
+ @log = nil
56
+ end
57
+
58
+ # default file name of database('seq2')
59
+ attr_accessor :database
60
+
61
+ # name of the program (usually 'sim4' in UNIX)
62
+ attr_reader :program
63
+
64
+ # options
65
+ attr_reader :option
66
+
67
+ # last command-line strings executed by the object
68
+ attr_reader :command
69
+
70
+ # last messages of program reported to the STDERR
71
+ attr_reader :log
72
+
73
+ # last result text (String)
74
+ attr_reader :output
75
+
76
+ # last result. Returns a Bio::Sim4::Report object.
77
+ attr_reader :report
78
+
79
+ # Executes the sim4 program.
80
+ # <tt>seq1</tt> shall be a Bio::Sequence object.
81
+ # Returns a Bio::Sim4::Report object.
82
+ def query(seq1)
83
+ tf = Tempfile.open('sim4')
84
+ tf.print seq1.to_fasta('seq1', 70)
85
+ tf.close(false)
86
+ r = exec_local(tf.path)
87
+ tf.close(true)
88
+ r
89
+ end
90
+
91
+ # Executes the sim4 program.
92
+ # Perform mRNA-genome alignment between given sequences.
93
+ # <tt>seq1</tt> and <tt>seq2</tt> should be Bio::Sequence objects.
94
+ # Returns a Bio::Sim4::Report object.
95
+ def query_pairwise(seq1, seq2)
96
+ tf = Tempfile.open('sim4')
97
+ tf.print seq1.to_fasta('seq1', 70)
98
+ tf.close(false)
99
+ tf2 = Tempfile.open('seq2')
100
+ tf2.print seq1.to_fasta('seq2', 70)
101
+ tf2.close(false)
102
+ r = exec_local(tf.path, tf2.path)
103
+ tf.close(true)
104
+ tf2.close(true)
105
+ r
106
+ end
107
+
108
+ # Executes the sim4 program.
109
+ # Perform mRNA-genome alignment between sequences in given files.
110
+ # <tt>filename1</tt> and <tt>filename2</tt> should be file name strings.
111
+ # If <tt>filename2</tt> is not specified, using <tt>self.database</tt>.
112
+ def exec_local(filename1, filename2 = nil)
113
+ @command = [ @program, filename1, (filename2 or @database), *@option ]
114
+ @output = nil
115
+ @log = nil
116
+ @report = nil
117
+ Open3.popen3(*@command) do |din, dout, derr|
118
+ din.close
119
+ derr.sync = true
120
+ t = Thread.start { @log = derr.read }
121
+ begin
122
+ @output = dout.read
123
+ @report = Bio::Sim4::Report.new(@output)
124
+ ensure
125
+ t.join
126
+ end
127
+ end
128
+ @report
129
+ end
130
+ alias exec exec_local
131
+
132
+ end #class Sim4
133
+ end #module Bio
134
+
@@ -0,0 +1,501 @@
1
+ #
2
+ # = bio/appl/sim4/report.rb - sim4 result parser
3
+ #
4
+ # Copyright:: Copyright (C) 2004 GOTO Naohisa <ng@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ #--
8
+ # This library is free software; you can redistribute it and/or
9
+ # modify it under the terms of the GNU Lesser General Public
10
+ # License as published by the Free Software Foundation; either
11
+ # version 2 of the License, or (at your option) any later version.
12
+ #
13
+ # This library is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16
+ # Lesser General Public License for more details.
17
+ #
18
+ # You should have received a copy of the GNU Lesser General Public
19
+ # License along with this library; if not, write to the Free Software
20
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21
+ #++
22
+ #
23
+ # $Id: report.rb,v 1.7 2005/12/18 15:58:40 k Exp $
24
+ #
25
+ # The sim4 report parser classes.
26
+ #
27
+ # == References
28
+ #
29
+ # * Florea, L., et al., A Computer program for aligning a cDNA sequence
30
+ # with a genomic DNA sequence, Genome Research, 8, 967--974, 1998.
31
+ # http://www.genome.org/cgi/content/abstract/8/9/967
32
+ #
33
+
34
+ module Bio
35
+ class Sim4
36
+
37
+ # Bio::Sim4::Report is the sim4 report parser class.
38
+ # Its object may contain some Bio::Sim4::Report::Hit objects.
39
+ class Report #< DB
40
+ #--
41
+ # format: A=0, A=3, or A=4
42
+ #++
43
+
44
+ # Delimiter of each entry. Bio::FlatFile uses it.
45
+ # In Bio::Sim4::Report, it it nil (1 entry 1 file).
46
+ DELIMITER = RS = nil # 1 entry 1 file
47
+
48
+ # Creates new Bio::Sim4::Report object from String.
49
+ # You can use Bio::FlatFile to read a file.
50
+ # Currently, format A=0, A=3, and A=4 are supported.
51
+ # (A=1, A=2, A=5 are NOT supported yet.)
52
+ #
53
+ # Note that 'seq1' in sim4 result is always regarded as 'query',
54
+ # and 'seq2' is always regarded as 'subject'(target, hit).
55
+ #
56
+ # Note that first 'seq1' informations are used for
57
+ # Bio::Sim4::Report#query_id, #query_def, #query_len, and #seq1 methods.
58
+ def initialize(text)
59
+ @hits = []
60
+ @all_hits = []
61
+ overrun = ''
62
+ text.each("\n\nseq1 = ") do |str|
63
+ str = str.sub(/\A\s+/, '')
64
+ str.sub!(/\n(^seq1 \= .*)/m, "\n") # remove trailing hits for sure
65
+ tmp = $1.to_s
66
+ hit = Hit.new(overrun + str)
67
+ overrun = tmp
68
+ unless hit.instance_eval { @data.empty? } then
69
+ @hits << hit
70
+ end
71
+ @all_hits << hit
72
+ end
73
+ @seq1 = @all_hits[0].seq1
74
+ end
75
+
76
+ # Returns hits of the entry.
77
+ # Unlike Bio::Sim4::Report#all_hits, it returns
78
+ # hits which have alignments.
79
+ # Returns an Array of Bio::Sim4::Report::Hit objects.
80
+ attr_reader :hits
81
+
82
+ # Returns all hits of the entry.
83
+ # Unlike Bio::Sim4::Report#hits, it returns
84
+ # results of all trials of pairwise alignment.
85
+ # This would be a Bio::Sim4 specific method.
86
+ # Returns an Array of Bio::Sim4::Report::Hit objects.
87
+ attr_reader :all_hits
88
+
89
+ # Returns sequence informations of 'seq1'.
90
+ # Returns a Bio::Sim4::Report::SeqDesc object.
91
+ # This would be a Bio::Sim4 specific method.
92
+ attr_reader :seq1
93
+
94
+ # Bio::Sim4::Report::SeqDesc stores sequence information of
95
+ # query or subject of sim4 report.
96
+ class SeqDesc
97
+ #--
98
+ # description/definitions of a sequence
99
+ #++
100
+
101
+ # Creates a new object.
102
+ # It is designed to be called internally from Bio::Sim4::Report object.
103
+ # Users shall not use it directly.
104
+ def initialize(seqid, seqdef, len, filename)
105
+ @entry_id = seqid
106
+ @definition = seqdef
107
+ @len = len
108
+ @filename = filename
109
+ end
110
+ # identifier of the sequence
111
+ attr_reader :entry_id
112
+ # definition of the sequence
113
+ attr_reader :definition
114
+ # sequence length of the sequence
115
+ attr_reader :len
116
+ # filename of the sequence
117
+ attr_reader :filename
118
+
119
+ # Parses part of sim4 result text and creates new SeqDesc object.
120
+ # It is designed to be called internally from Bio::Sim4::Report object.
121
+ # Users shall not use it directly.
122
+ def self.parse(str, str2 = nil)
123
+ /^seq[12] \= (.*)(?: \((.*)\))?\,\s*(\d+)\s*bp\s*$/ =~ str
124
+ seqid = $2
125
+ filename = $1
126
+ len = $3.to_i
127
+ if str2 then
128
+ seqdef = str2.sub(/^\>\s*/, '')
129
+ seqid =seqdef.split(/\s+/, 2)[0] unless seqid
130
+ else
131
+ seqdef = (seqid or filename)
132
+ seqid = filename unless seqid
133
+ end
134
+ self.new(seqid, seqdef, len, filename)
135
+ end
136
+ end #class SeqDesc
137
+
138
+
139
+ # Sequence segment pair of the sim4 result.
140
+ # Similar to Bio::Blast::Report::HSP but lacks many methods.
141
+ # For mRNA-genome mapping programs,
142
+ # unlike other homology search programs,
143
+ # the class is used not only for exons but also for introns.
144
+ # (Note that intron data would not be available according to run-time
145
+ # options of the program.)
146
+ class SegmentPair
147
+ #--
148
+ # segment pair (like Bio::BLAST::*::Report::HSP)
149
+ #++
150
+
151
+ # Creates a new SegmentPair object.
152
+ # It is designed to be called internally from
153
+ # Bio::Sim4::Report::Hit object.
154
+ # Users shall not use it directly.
155
+ def initialize(seq1, seq2, midline = nil,
156
+ percent_identity = nil, direction = nil)
157
+ @seq1 = seq1
158
+ @seq2 = seq2
159
+ @midline = midline
160
+ @percent_identity = percent_identity
161
+ @direction = direction
162
+ end
163
+ # Returns segment informations of 'seq1'.
164
+ # Returns a Bio::Sim4::Report::Segment object.
165
+ # These would be Bio::Sim4 specific methods.
166
+ attr_reader :seq1
167
+ # Returns segment informations of 'seq2'.
168
+ # Returns a Bio::Sim4::Report::Segment object.
169
+ # These would be Bio::Sim4 specific methods.
170
+ attr_reader :seq2
171
+
172
+ # Returns the "midline" of the segment pair.
173
+ # Returns nil if no alignment data are available.
174
+ attr_reader :midline
175
+
176
+ # Returns percent identity of the segment pair.
177
+ attr_reader :percent_identity
178
+
179
+ # Returns directions of mapping.
180
+ # Maybe one of "->", "<-" or "" or nil.
181
+ # This would be a Bio::Sim4 specific method.
182
+ attr_reader :direction
183
+
184
+ # Parses part of sim4 result text and creates a new SegmentPair object.
185
+ # It is designed to be called internally from
186
+ # Bio::Sim4::Report::Hit class.
187
+ # Users shall not use it directly.
188
+ def self.parse(str, aln)
189
+ /^(\d+)\-(\d+)\s*\((\d+)\-(\d+)\)\s*([\d\.]+)\%\s*([\-\<\>]*)/ =~ str
190
+ self.new(Segment.new($1, $2, aln[0]),
191
+ Segment.new($3, $4, aln[2]),
192
+ aln[1], $5, $6)
193
+ end
194
+
195
+ # Parses part of sim4 result text and creates a new SegmentPair
196
+ # object when the seq1 is a intron.
197
+ # It is designed to be called internally from
198
+ # Bio::Sim4::Report::Hit class.
199
+ # Users shall not use it directly.
200
+ def self.seq1_intron(prev_e, e, aln)
201
+ self.new(Segment.new(prev_e.seq1.to+1, e.seq1.from-1, aln[0]),
202
+ Segment.new(nil, nil, aln[2]),
203
+ aln[1])
204
+ end
205
+
206
+ # Parses part of sim4 result text and creates a new SegmentPair
207
+ # object when seq2 is a intron.
208
+ # It is designed to be called internally from
209
+ # Bio::Sim4::Report::Hit class.
210
+ # Users shall not use it directly.
211
+ def self.seq2_intron(prev_e, e, aln)
212
+ self.new(Segment.new(nil, nil, aln[0]),
213
+ Segment.new(prev_e.seq2.to+1, e.seq2.from-1, aln[2]),
214
+ aln[1])
215
+ end
216
+
217
+ #--
218
+ # Bio::BLAST::*::Report::Hsp compatible methods
219
+ # Methods already defined: midline, percent_identity
220
+ #++
221
+
222
+ # start position of the query (the first position is 1)
223
+ def query_from; @seq1.from; end
224
+
225
+ # end position of the query (including its position)
226
+ def query_to; @seq1.to; end
227
+
228
+ # query sequence (with gaps) of the alignment of the segment pair.
229
+ def qseq; @seq1.seq; end
230
+
231
+ # start position of the hit(target) (the first position is 1)
232
+ def hit_from; @seq2.from; end
233
+
234
+ # end position of the hit(target) (including its position)
235
+ def hit_to; @seq2.to; end
236
+
237
+ # hit(target) sequence (with gaps) of the alignment
238
+ # of the segment pair.
239
+ def hseq; @seq2.seq; end
240
+
241
+ # Returns alignment length of the segment pair.
242
+ # Returns nil if no alignment data are available.
243
+ def align_len
244
+ (@midline and @seq1.seq and @seq2.seq) ? @midline.length : nil
245
+ end
246
+ end #class SegmentPair
247
+
248
+ # Segment informations of a segment pair.
249
+ class Segment
250
+ #--
251
+ # the segment of a sequence
252
+ #++
253
+
254
+ # Creates a new Segment object.
255
+ # It is designed to be called internally from
256
+ # Bio::Sim4::Report::SegmentPair class.
257
+ # Users shall not use it directly.
258
+ def initialize(pos_st, pos_ed, seq = nil)
259
+ @from = pos_st.to_i
260
+ @to = pos_ed.to_i
261
+ @seq = seq
262
+ end
263
+ # start position of the segment (the first position is 1)
264
+ attr_reader :from
265
+ # end position of the segment (including its position)
266
+ attr_reader :to
267
+ # sequence (with gaps) of the segment
268
+ attr_reader :seq
269
+ end #class Segment
270
+
271
+ # Hit object of the sim4 result.
272
+ # Similar to Bio::Blast::Report::Hit but lacks many methods.
273
+ class Hit
274
+
275
+ # Parses part of sim4 result text and creates a new Hit object.
276
+ # It is designed to be called internally from Bio::Sim4::Report class.
277
+ # Users shall not use it directly.
278
+ def initialize(str)
279
+ @data = str.split(/\n(?:\r?\n)+/)
280
+ parse_seqdesc
281
+ end
282
+
283
+ # Parses sequence descriptions.
284
+ def parse_seqdesc
285
+ # seq1: query, seq2: target(hit)
286
+ a0 = @data.shift.split(/\r?\n/)
287
+ if @data[0].to_s =~ /^\>/ then
288
+ a1 = @data.shift.split(/\r?\n/)
289
+ else
290
+ a1 = []
291
+ end
292
+ @seq1 = SeqDesc.parse(a0[0], a1[0])
293
+ @seq2 = SeqDesc.parse(a0[1], a1[1])
294
+
295
+ if @data[0].to_s.sub!(/\A\(complement\)\s*$/, '') then
296
+ @complement = true
297
+ @data.shift if @data[0].strip.empty?
298
+ else
299
+ @complement = nil
300
+ end
301
+ end
302
+ private :parse_seqdesc
303
+
304
+ # Returns sequence informations of 'seq1'.
305
+ # Returns a Bio::Sim4::Report::SeqDesc object.
306
+ # This would be Bio::Sim4 specific method.
307
+ attr_reader :seq1
308
+
309
+ # Returns sequence informations of 'seq2'.
310
+ # Returns a Bio::Sim4::Report::SeqDesc object.
311
+ # This would be Bio::Sim4 specific method.
312
+ attr_reader :seq2
313
+
314
+ # Returns true if the hit reports '-'(complemental) strand
315
+ # search result.
316
+ # Otherwise, return false or nil.
317
+ # This would be a Bio::Sim4 specific method.
318
+ def complement?
319
+ @complement
320
+ end
321
+
322
+ # Parses segment pair.
323
+ def parse_segmentpairs
324
+ aln = (self.align ? self.align.dup : [])
325
+ exo = [] #exons
326
+ itr = [] #introns
327
+ sgp = [] #segmentpairs
328
+ prev_e = nil
329
+ return unless @data[0]
330
+ @data[0].split(/\r?\n/).each do |str|
331
+ ai = (prev_e ? aln.shift : nil)
332
+ a = (aln.shift or [])
333
+ e = SegmentPair.parse(str, a)
334
+ exo << e
335
+ if ai then
336
+ # intron data in alignment
337
+ if ai[2].strip.empty? then
338
+ i = SegmentPair.seq1_intron(prev_e, e, ai)
339
+ else
340
+ i = SegmentPair.seq2_intron(prev_e, e, ai)
341
+ end
342
+ itr << i
343
+ sgp << i
344
+ end
345
+ sgp << e
346
+ prev_e = e
347
+ end
348
+ @exons = exo
349
+ @introns = itr
350
+ @segmentpairs = sgp
351
+ end
352
+ private :parse_segmentpairs
353
+
354
+ # Parses alignment.
355
+ def parse_align
356
+ s1 = []; ml = []; s2 = []
357
+ dat = @data[1..-1]
358
+ return unless dat
359
+ dat.each do |str|
360
+ a = str.split(/\r?\n/)
361
+ a.shift
362
+ if /^(\s*\d+\s*)(.+)$/ =~ a[0] then
363
+ range = ($1.length)..($1.length + $2.strip.length - 1)
364
+ a.collect! { |x| x[range] }
365
+ s1 << a.shift
366
+ ml << a.shift
367
+ s2 << a.shift
368
+ end
369
+ end #each
370
+ alx = ml.join('').split(/([\<\>]+\.+[\<\>]+)/)
371
+ seq1 = s1.join(''); seq2 = s2.join('')
372
+ i = 0
373
+ alx.collect! do |x|
374
+ len = x.length
375
+ y = [ seq1[i, len], x, seq2[i, len] ]
376
+ i += len
377
+ y
378
+ end
379
+ @align = alx
380
+ end
381
+ private :parse_align
382
+
383
+ # Returns exons of the hit.
384
+ # Each exon is a Bio::Sim4::Report::SegmentPair object.
385
+ def exons
386
+ unless defined?(@exons); parse_segmentpairs; end
387
+ @exons
388
+ end
389
+
390
+ # Returns segment pairs (exons and introns) of the hit.
391
+ # Each segment pair is a Bio::Sim4::Report::SegmentPair object.
392
+ # Returns an array of Bio::Sim4::Report::SegmentPair objects.
393
+ # (Note that intron data is not always available
394
+ # according to run-time options of the program.)
395
+ def segmentpairs
396
+ unless defined?(@segmentpairs); parse_segmentpairs; end
397
+ @segmentpairs
398
+ end
399
+
400
+ # Returns introns of the hit.
401
+ # Some of them would contain untranscribed regions.
402
+ # Returns an array of Bio::Sim4::Report::SegmentPair objects.
403
+ # (Note that intron data is not always available
404
+ # according to run-time options of the program.)
405
+ def introns
406
+ unless defined?(@introns); parse_segmentpairs; end
407
+ @introns
408
+ end
409
+
410
+ # Returns alignments.
411
+ # Returns an Array of arrays.
412
+ # Each array contains sequence of seq1, midline, sequence of seq2,
413
+ # respectively.
414
+ # This would be a Bio::Sim4 specific method.
415
+ def align
416
+ unless defined?(@align); parse_align; end
417
+ @align
418
+ end
419
+
420
+ #--
421
+ # Bio::BLAST::*::Report::Hit compatible methods
422
+ #++
423
+
424
+ # Length of the query sequence.
425
+ # Same as Bio::Sim4::Report#query_len.
426
+ def query_len; seq1.len; end
427
+
428
+ # Identifier of the query sequence.
429
+ # Same as Bio::Sim4::Report#query_id.
430
+ def query_id; seq1.entry_id; end
431
+
432
+ # Definition of the query sequence
433
+ # Same as Bio::Sim4::Report#query_def.
434
+ def query_def; seq1.definition; end
435
+
436
+ # length of the hit(target) sequence
437
+ def target_len; seq2.len; end
438
+
439
+ # Identifier of the hit(target) sequence
440
+ def target_id; seq2.entry_id; end
441
+
442
+ # Definition of the hit(target) sequence
443
+ def target_def; seq2.definition; end
444
+
445
+ alias hit_id target_id
446
+ alias len target_len
447
+ alias definition target_def
448
+
449
+ alias hsps exons
450
+
451
+ # Iterates over each exon of the hit.
452
+ # Yields a Bio::Sim4::Report::SegmentPair object.
453
+ def each(&x) #:yields: segmentpair
454
+ exons.each(&x)
455
+ end
456
+ end #class Hit
457
+
458
+ #--
459
+ #Bio::BLAST::*::Report compatible methods
460
+ #++
461
+
462
+ # Returns number of hits.
463
+ # Same as hits.size.
464
+ def num_hits; @hits.size; end
465
+
466
+ # Iterates over each hits of the sim4 result.
467
+ # Same as hits.each.
468
+ # Yields a Bio::Sim4::Report::Hit object.
469
+ def each_hit(&x) #:yields: hit
470
+ @hits.each(&x)
471
+ end
472
+ alias each each_hit
473
+
474
+ # Returns the definition of query sequence.
475
+ # The value will be filename or (first word of) sequence definition
476
+ # according to sim4 run-time options.
477
+ def query_def; @seq1.definition; end
478
+
479
+ # Returns the identifier of query sequence.
480
+ # The value will be filename or (first word of) sequence definition
481
+ # according to sim4 run-time options.
482
+ def query_id; @seq1.entry_id; end
483
+
484
+ # Returns the length of query sequence.
485
+ def query_len; @seq1.len; end
486
+ end #class Report
487
+
488
+ end #class Sim4
489
+ end #module Bio
490
+
491
+ =begin
492
+
493
+ = Bio::Sim4::Report
494
+
495
+ = References
496
+
497
+ * ((<URL:http://www.genome.org/cgi/content/abstract/8/9/967>))
498
+ Florea, L., et al., A Computer program for aligning a cDNA sequence
499
+ with a genomic DNA sequence, Genome Research, 8, 967--974, 1998.
500
+
501
+ =end