bio 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (201) hide show
  1. data/bin/bioruby +107 -0
  2. data/bin/br_biofetch.rb +59 -0
  3. data/bin/br_bioflat.rb +294 -0
  4. data/bin/br_biogetseq.rb +57 -0
  5. data/bin/br_pmfetch.rb +431 -0
  6. data/doc/BioRuby.rd.ja +225 -0
  7. data/doc/Changes-0.7.rd +236 -0
  8. data/doc/Design.rd.ja +341 -0
  9. data/doc/KEGG_API.rd +1437 -0
  10. data/doc/KEGG_API.rd.ja +1399 -0
  11. data/doc/TODO.rd.ja +138 -0
  12. data/doc/Tutorial.rd +1138 -0
  13. data/doc/Tutorial.rd.ja +2110 -0
  14. data/etc/bioinformatics/seqdatabase.ini +210 -0
  15. data/lib/bio.rb +256 -0
  16. data/lib/bio/alignment.rb +1906 -0
  17. data/lib/bio/appl/bl2seq/report.rb +350 -0
  18. data/lib/bio/appl/blast.rb +269 -0
  19. data/lib/bio/appl/blast/format0.rb +1402 -0
  20. data/lib/bio/appl/blast/format8.rb +95 -0
  21. data/lib/bio/appl/blast/report.rb +652 -0
  22. data/lib/bio/appl/blast/rexml.rb +151 -0
  23. data/lib/bio/appl/blast/wublast.rb +553 -0
  24. data/lib/bio/appl/blast/xmlparser.rb +222 -0
  25. data/lib/bio/appl/blat/report.rb +392 -0
  26. data/lib/bio/appl/clustalw.rb +191 -0
  27. data/lib/bio/appl/clustalw/report.rb +154 -0
  28. data/lib/bio/appl/emboss.rb +68 -0
  29. data/lib/bio/appl/fasta.rb +262 -0
  30. data/lib/bio/appl/fasta/format10.rb +428 -0
  31. data/lib/bio/appl/fasta/format6.rb +37 -0
  32. data/lib/bio/appl/genscan/report.rb +570 -0
  33. data/lib/bio/appl/hmmer.rb +129 -0
  34. data/lib/bio/appl/hmmer/report.rb +556 -0
  35. data/lib/bio/appl/mafft.rb +222 -0
  36. data/lib/bio/appl/mafft/report.rb +119 -0
  37. data/lib/bio/appl/psort.rb +555 -0
  38. data/lib/bio/appl/psort/report.rb +473 -0
  39. data/lib/bio/appl/sim4.rb +134 -0
  40. data/lib/bio/appl/sim4/report.rb +501 -0
  41. data/lib/bio/appl/sosui/report.rb +166 -0
  42. data/lib/bio/appl/spidey/report.rb +604 -0
  43. data/lib/bio/appl/targetp/report.rb +283 -0
  44. data/lib/bio/appl/tmhmm/report.rb +238 -0
  45. data/lib/bio/command.rb +166 -0
  46. data/lib/bio/data/aa.rb +354 -0
  47. data/lib/bio/data/codontable.rb +740 -0
  48. data/lib/bio/data/na.rb +226 -0
  49. data/lib/bio/db.rb +340 -0
  50. data/lib/bio/db/aaindex.rb +280 -0
  51. data/lib/bio/db/embl/common.rb +332 -0
  52. data/lib/bio/db/embl/embl.rb +446 -0
  53. data/lib/bio/db/embl/sptr.rb +954 -0
  54. data/lib/bio/db/embl/swissprot.rb +32 -0
  55. data/lib/bio/db/embl/trembl.rb +31 -0
  56. data/lib/bio/db/embl/uniprot.rb +32 -0
  57. data/lib/bio/db/fantom.rb +604 -0
  58. data/lib/bio/db/fasta.rb +869 -0
  59. data/lib/bio/db/genbank/common.rb +299 -0
  60. data/lib/bio/db/genbank/ddbj.rb +34 -0
  61. data/lib/bio/db/genbank/genbank.rb +354 -0
  62. data/lib/bio/db/genbank/genpept.rb +73 -0
  63. data/lib/bio/db/genbank/refseq.rb +31 -0
  64. data/lib/bio/db/gff.rb +106 -0
  65. data/lib/bio/db/go.rb +497 -0
  66. data/lib/bio/db/kegg/brite.rb +51 -0
  67. data/lib/bio/db/kegg/cell.rb +88 -0
  68. data/lib/bio/db/kegg/compound.rb +130 -0
  69. data/lib/bio/db/kegg/enzyme.rb +125 -0
  70. data/lib/bio/db/kegg/expression.rb +173 -0
  71. data/lib/bio/db/kegg/genes.rb +293 -0
  72. data/lib/bio/db/kegg/genome.rb +362 -0
  73. data/lib/bio/db/kegg/glycan.rb +213 -0
  74. data/lib/bio/db/kegg/keggtab.rb +418 -0
  75. data/lib/bio/db/kegg/kgml.rb +299 -0
  76. data/lib/bio/db/kegg/ko.rb +178 -0
  77. data/lib/bio/db/kegg/reaction.rb +97 -0
  78. data/lib/bio/db/litdb.rb +131 -0
  79. data/lib/bio/db/medline.rb +317 -0
  80. data/lib/bio/db/nbrf.rb +199 -0
  81. data/lib/bio/db/pdb.rb +38 -0
  82. data/lib/bio/db/pdb/atom.rb +60 -0
  83. data/lib/bio/db/pdb/chain.rb +117 -0
  84. data/lib/bio/db/pdb/model.rb +106 -0
  85. data/lib/bio/db/pdb/pdb.rb +1682 -0
  86. data/lib/bio/db/pdb/residue.rb +122 -0
  87. data/lib/bio/db/pdb/utils.rb +234 -0
  88. data/lib/bio/db/prosite.rb +616 -0
  89. data/lib/bio/db/rebase.rb +417 -0
  90. data/lib/bio/db/transfac.rb +387 -0
  91. data/lib/bio/feature.rb +201 -0
  92. data/lib/bio/io/brdb.rb +103 -0
  93. data/lib/bio/io/das.rb +471 -0
  94. data/lib/bio/io/dbget.rb +212 -0
  95. data/lib/bio/io/ddbjxml.rb +614 -0
  96. data/lib/bio/io/fastacmd.rb +123 -0
  97. data/lib/bio/io/fetch.rb +114 -0
  98. data/lib/bio/io/flatfile.rb +496 -0
  99. data/lib/bio/io/flatfile/bdb.rb +266 -0
  100. data/lib/bio/io/flatfile/index.rb +1308 -0
  101. data/lib/bio/io/flatfile/indexer.rb +778 -0
  102. data/lib/bio/io/higet.rb +92 -0
  103. data/lib/bio/io/keggapi.rb +863 -0
  104. data/lib/bio/io/pubmed.rb +189 -0
  105. data/lib/bio/io/registry.rb +308 -0
  106. data/lib/bio/io/soapwsdl.rb +114 -0
  107. data/lib/bio/io/sql.rb +428 -0
  108. data/lib/bio/location.rb +650 -0
  109. data/lib/bio/pathway.rb +991 -0
  110. data/lib/bio/reference.rb +308 -0
  111. data/lib/bio/sequence.rb +593 -0
  112. data/lib/bio/shell.rb +51 -0
  113. data/lib/bio/shell/core.rb +512 -0
  114. data/lib/bio/shell/plugin/codon.rb +228 -0
  115. data/lib/bio/shell/plugin/entry.rb +85 -0
  116. data/lib/bio/shell/plugin/flatfile.rb +119 -0
  117. data/lib/bio/shell/plugin/keggapi.rb +187 -0
  118. data/lib/bio/shell/plugin/midi.rb +448 -0
  119. data/lib/bio/shell/plugin/obda.rb +63 -0
  120. data/lib/bio/shell/plugin/seq.rb +238 -0
  121. data/lib/bio/shell/session.rb +214 -0
  122. data/lib/bio/util/color_scheme.rb +214 -0
  123. data/lib/bio/util/color_scheme/buried.rb +78 -0
  124. data/lib/bio/util/color_scheme/helix.rb +78 -0
  125. data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
  126. data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
  127. data/lib/bio/util/color_scheme/strand.rb +78 -0
  128. data/lib/bio/util/color_scheme/taylor.rb +69 -0
  129. data/lib/bio/util/color_scheme/turn.rb +78 -0
  130. data/lib/bio/util/color_scheme/zappo.rb +69 -0
  131. data/lib/bio/util/contingency_table.rb +337 -0
  132. data/lib/bio/util/sirna.rb +306 -0
  133. data/lib/bioruby.rb +34 -0
  134. data/sample/biofetch.rb +475 -0
  135. data/sample/color_scheme_na.rb +99 -0
  136. data/sample/dbget +37 -0
  137. data/sample/fasta2tab.rb +99 -0
  138. data/sample/fsplit.rb +51 -0
  139. data/sample/gb2fasta.rb +31 -0
  140. data/sample/gb2tab.rb +325 -0
  141. data/sample/gbtab2mysql.rb +161 -0
  142. data/sample/genes2nuc.rb +33 -0
  143. data/sample/genes2pep.rb +33 -0
  144. data/sample/genes2tab.rb +81 -0
  145. data/sample/genome2rb.rb +29 -0
  146. data/sample/genome2tab.rb +76 -0
  147. data/sample/goslim.rb +311 -0
  148. data/sample/gt2fasta.rb +47 -0
  149. data/sample/pmfetch.rb +42 -0
  150. data/sample/pmsearch.rb +42 -0
  151. data/sample/psortplot_html.rb +222 -0
  152. data/sample/ssearch2tab.rb +96 -0
  153. data/sample/tdiary.rb +158 -0
  154. data/sample/tfastx2tab.rb +100 -0
  155. data/sample/vs-genes.rb +212 -0
  156. data/test/data/SOSUI/sample.report +11 -0
  157. data/test/data/TMHMM/sample.report +21 -0
  158. data/test/data/blast/eco:b0002.faa +15 -0
  159. data/test/data/blast/eco:b0002.faa.m0 +128 -0
  160. data/test/data/blast/eco:b0002.faa.m7 +65 -0
  161. data/test/data/blast/eco:b0002.faa.m8 +1 -0
  162. data/test/data/embl/AB090716.embl +65 -0
  163. data/test/data/genscan/sample.report +63 -0
  164. data/test/data/prosite/prosite.dat +2233 -0
  165. data/test/data/refseq/nm_126355.entret +64 -0
  166. data/test/data/uniprot/p53_human.uniprot +1456 -0
  167. data/test/runner.rb +10 -0
  168. data/test/unit/bio/appl/blast/test_report.rb +427 -0
  169. data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
  170. data/test/unit/bio/appl/genscan/test_report.rb +195 -0
  171. data/test/unit/bio/appl/sosui/test_report.rb +94 -0
  172. data/test/unit/bio/appl/targetp/test_report.rb +159 -0
  173. data/test/unit/bio/appl/test_blast.rb +159 -0
  174. data/test/unit/bio/appl/test_fasta.rb +142 -0
  175. data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
  176. data/test/unit/bio/data/test_aa.rb +103 -0
  177. data/test/unit/bio/data/test_codontable.rb +120 -0
  178. data/test/unit/bio/data/test_na.rb +89 -0
  179. data/test/unit/bio/db/embl/test_common.rb +130 -0
  180. data/test/unit/bio/db/embl/test_embl.rb +227 -0
  181. data/test/unit/bio/db/embl/test_sptr.rb +268 -0
  182. data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
  183. data/test/unit/bio/db/kegg/test_genes.rb +58 -0
  184. data/test/unit/bio/db/test_fasta.rb +263 -0
  185. data/test/unit/bio/db/test_gff.rb +140 -0
  186. data/test/unit/bio/db/test_prosite.rb +1450 -0
  187. data/test/unit/bio/io/test_ddbjxml.rb +87 -0
  188. data/test/unit/bio/io/test_soapwsdl.rb +45 -0
  189. data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
  190. data/test/unit/bio/test_alignment.rb +1028 -0
  191. data/test/unit/bio/test_command.rb +71 -0
  192. data/test/unit/bio/test_db.rb +109 -0
  193. data/test/unit/bio/test_feature.rb +128 -0
  194. data/test/unit/bio/test_location.rb +51 -0
  195. data/test/unit/bio/test_pathway.rb +485 -0
  196. data/test/unit/bio/test_sequence.rb +386 -0
  197. data/test/unit/bio/test_shell.rb +31 -0
  198. data/test/unit/bio/util/test_color_scheme.rb +45 -0
  199. data/test/unit/bio/util/test_contingency_table.rb +106 -0
  200. data/test/unit/bio/util/test_sirna.rb +258 -0
  201. metadata +295 -0
@@ -0,0 +1,473 @@
1
+ #
2
+ # = bio/appl/psort/report.rb - PSORT systems report classes
3
+ #
4
+ # Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
5
+ # License:: LGPL
6
+ #
7
+ # $Id: report.rb,v 1.12 2005/11/03 10:50:58 nakao Exp $
8
+ #
9
+ # == A Report classes for PSORT Systems
10
+ #
11
+ #--
12
+ #
13
+ # This library is free software; you can redistribute it and/or
14
+ # modify it under the terms of the GNU Lesser General Public
15
+ # License as published by the Free Software Foundation; either
16
+ # version 2 of the License, or (at your option) any later version.
17
+ #
18
+ # This library is distributed in the hope that it will be useful,
19
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
20
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21
+ # Lesser General Public License for more details.
22
+ #
23
+ # You should have received a copy of the GNU Lesser General Public
24
+ # License along with this library; if not, write to the Free Software
25
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
26
+ #
27
+ # ++
28
+ #
29
+
30
+ require 'bio/sequence'
31
+ require 'bio/appl/psort'
32
+
33
+
34
+ module Bio
35
+
36
+ class PSORT
37
+
38
+ class PSORT1
39
+
40
+ # = Bio::PSORT::PSORT1::Report
41
+ # Parser class for PSORT1 output report.
42
+ #
43
+ # == Example
44
+ class Report
45
+
46
+ # Returns aBio::PSORT::PSORT1::Report.
47
+ def self.parser(output_report)
48
+ self.default_parser(output_report)
49
+ end
50
+
51
+ # Returns aBio::PSORT::PSORT1::Report.
52
+ def self.default_parser(output_report)
53
+ rpt = self.new
54
+ rpt.raw = output_report
55
+ query_info = output_report.scan(/^Query Information\n\n(.+?)\n\n/m)[0][0].split(/\n/)
56
+ result_info = output_report.scan(/^Result Information\n\n(.+?)\n\n\*/m)[0][0]
57
+ step1 = output_report.scan(/^\*\*\* Reasoning Step: 1\n\n(.+?)\n\n/m)[0][0]
58
+ step2 = output_report.scan(/^\*\*\* Reasoning Step: 2\n\n(.+?)\n\n/m)[0][0]
59
+ final_result = output_report.scan(/\n\n----- Final Results -----\n\n(.+?)\n\n\n/m)[0][0]
60
+
61
+ rpt.entry_id = query_info[2].scan(/^>(\S+) */).to_s
62
+ rpt.origin = query_info[0].scan(/ORIGIN (\w+)/).to_s
63
+ rpt.sequence = Bio::Sequence::AA.new(query_info[3..query_info.size].to_s)
64
+ # rpt.reasoning
65
+
66
+ rpt.final_result = final_result.split(/\n/).map {|x|
67
+ x = x.strip.split(/---/).map {|y| y.strip }
68
+ { 'prediction' => x[0],
69
+ 'certainty' => x[1].scan(/Certainty= (\d\.\d{3})/).to_s,
70
+ 'comment' => x[1].scan(/\((\w+)\)/).to_s
71
+ }
72
+ }
73
+ return rpt
74
+ end
75
+
76
+ attr_accessor :entry_id
77
+ attr_accessor :origin
78
+ attr_accessor :title
79
+ attr_accessor :sequence
80
+ attr_accessor :result_info
81
+ attr_accessor :reasoning
82
+ attr_accessor :final_result
83
+ attr_accessor :raw
84
+
85
+
86
+
87
+ # Constructs aBio::PSORT::PSORT1::Report object.
88
+ def initialize(entry_id = '', origin = '', title = '', sequence = '',
89
+ result_info = '', reasoning = {}, final_result = [])
90
+ @entry_id = entry_id
91
+ @origin = origin
92
+ @title = title
93
+ @sequence = sequence
94
+ @result_info = result_info
95
+ @reasoning = reasoning
96
+ @final_result = final_result
97
+ @raw = ''
98
+ end
99
+
100
+
101
+ end # class Report
102
+
103
+ end # class PSORT1
104
+
105
+
106
+
107
+ class PSORT2
108
+
109
+ # Subcellular localization name codes used by PSORT2
110
+ SclNames = {
111
+ 'csk' => 'cytoskeletal',
112
+ 'cyt' => 'cytoplasmic',
113
+ 'nuc' => 'nuclear',
114
+ 'mit' => 'mitochondrial',
115
+ 'ves' => 'vesicles of secretory system',
116
+ 'end' => 'endoplasmic reticulum',
117
+ 'gol' => 'Golgi',
118
+ 'vac' => 'vacuolar',
119
+ 'pla' => 'plasma membrane',
120
+ 'pox' => 'peroxisomal',
121
+ 'exc' => 'extracellular, including cell wall',
122
+ '---' => 'other'
123
+ }
124
+
125
+ # Feature name codes
126
+ Features = [
127
+ 'psg', # PSG: PSG score
128
+ 'gvh', # GvH: GvH score
129
+ 'alm', # ALOM: $xmax
130
+ 'tms', # ALOM: $count
131
+ 'top', # MTOP: Charge difference: $mtopscr
132
+ 'mit', # MITDISC: Score: $score
133
+ 'mip', # Gavel: motif at $isite
134
+ 'nuc', # NUCDISC: NLS Score: $score
135
+ 'erl', # KDEL: ($seg|none)
136
+ 'erm', # ER Membrane Retention Signals: ($cseg|none) $scr
137
+ 'pox', # SKL: ($pat|none) $scr
138
+ 'px2', # PTS2: (found|none) ($#match < 0) ? 0 : ($#match+1);
139
+ 'vac', # VAC: (found|none) ($#match < 0) ? 0 : ($#match+1);
140
+ 'rnp', # RNA-binding motif: (found|none) ($#match < 0) ? 0 : ($#match+1);
141
+ 'act', # Actinin-type actin-binding motif: (found|none) $hit
142
+ 'caa', # Prenylation motif: (2|1|0) CaaX,CXC,CC,nil
143
+ 'yqr', # memYQRL: (found|none) $scr
144
+ 'tyr', # Tyrosines in the tail: (none|\S+[,])
145
+ # 10 * scalar(@ylist) / ($end - $start + 1);
146
+ 'leu', # Dileucine motif in the tail: (none|found) $scr
147
+ 'gpi', # >>> Seem to be GPI anchored
148
+ 'myr', # NMYR: (none|\w) $scr
149
+ 'dna', # checking 63 PROSITE DNA binding motifs: $hit
150
+ 'rib', # checking 71 PROSITE ribosomal protein motifs: $hit
151
+ 'bac', # checking 33 PROSITE prokaryotic DNA binding motifs: $hit
152
+ 'm1a', # $mtype eq '1a'
153
+ 'm1b', # $mtype eq '1b'
154
+ 'm2', # $mtype eq '2 '
155
+ 'mNt', # $mtype eq 'Nt'
156
+ 'm3a', # $mtype eq '3a'
157
+ 'm3b', # $mtype eq '3b'
158
+ 'm_', # $mtype eq '__' tms == 0
159
+ 'ncn', # NNCN: ($NetOutput[1] > $NetOutput[0]) ? $output : (-$output);
160
+ 'lps', # COIL: $count
161
+ 'len' # $leng
162
+ ]
163
+
164
+ # Feature name codes (long version).
165
+ FeaturesLong = {
166
+ 'psg' => 'PSG',
167
+ 'gvh' => 'GvH',
168
+ 'tms' => 'ALOM',
169
+ 'alm' => 'ALOM',
170
+ 'top' => 'MTOP',
171
+ 'mit' => 'MITDISC',
172
+ 'mip' => 'Gavel',
173
+ 'nuc' => 'NUCDISC',
174
+ 'erl' => 'KDEL',
175
+ 'erm' => 'ER Membrane Retention Signals',
176
+ 'pox' => 'SKL',
177
+ 'px2' => 'PTS2',
178
+ 'vac' => 'VAC',
179
+ 'rnp' => 'RNA-binding motif',
180
+ 'act' => 'Actinin-type actin-binding motif',
181
+ 'caa' => 'Prenylation motif',
182
+ 'yqr' => 'memYQRL',
183
+ 'tyr' => 'Tyrosines in the tail',
184
+ 'leu' => 'Dileucine motif in the tail',
185
+ 'gpi' => '>>> Seems to be GPI anchored',
186
+ 'myr' => 'NMYR',
187
+ 'dna' => 'checking 63 PROSITE DNA binding motifs',
188
+ 'rib' => 'checking 71 PROSITE ribosomal protein motifs',
189
+ 'bac' => 'ochecking 33 PROSITE prokaryotic DNA binding motifs:',
190
+ 'm1a' => '',
191
+ 'm1b' => '',
192
+ 'm2' => '',
193
+ 'mNt' => '',
194
+ 'm3a' => '',
195
+ 'm3b' => '',
196
+ 'm_' => '',
197
+ 'ncn' => 'NNCN',
198
+ 'lps' => 'COIL',
199
+ 'len' => 'AA' # length of input sequence
200
+ }
201
+
202
+ # = Bio::PSORT::PSORT2::Report
203
+ # Report parser classe for PSORT II(PSORT2).
204
+ # == Example
205
+ class Report
206
+
207
+ # Report boundary string.
208
+ BOUNDARY = '-' * 75
209
+
210
+
211
+ # Report delimiter.
212
+ RS = DELIMITER = "\)\n\n#{BOUNDARY}"
213
+
214
+ # entry_id of query sequence.
215
+ attr_accessor :entry_id
216
+
217
+ # Given subcellular localization (three letters code).
218
+ attr_accessor :scl
219
+
220
+ # Definition of query sequence.
221
+ attr_accessor :definition
222
+
223
+ # Sequence of query sequence.
224
+ attr_accessor :seq
225
+
226
+ # k parameter of k-nearest neighbors classifier.
227
+ attr_accessor :k
228
+
229
+ # Feature vector used the kNN prediction.
230
+ attr_accessor :features
231
+
232
+ # Probability vector of kNN prediction.
233
+ attr_accessor :prob
234
+
235
+ # Predicted subcellular localization (three letters code).
236
+ attr_accessor :pred
237
+
238
+ # Raw text of output report.
239
+ attr_accessor :raw
240
+
241
+
242
+ # Constructs aBio::PSORT::PSORT2::Report object.
243
+ def initialize(raw = '', entry_id = nil, scl = nil, definition = nil,
244
+ seq = nil, k = nil, features = {}, prob = {}, pred = nil)
245
+ @entry_id = entry_id
246
+ @scl = scl
247
+ @definition = definition
248
+ @seq = seq
249
+ @features = features
250
+ @prob = prob
251
+ @pred = pred
252
+ @k = k
253
+ @raw = raw
254
+ end
255
+
256
+
257
+ # Parses output report with output format detection automatically.
258
+ def self.parser(str, entry_id)
259
+ case str
260
+ when /^ psg:/ # default report
261
+ self.default_parser(str, entry_id)
262
+ when /^PSG:/ # -v report
263
+ self.v_parser(str, entry_id)
264
+ when /: too short length /
265
+ self.too_short_parser(str, entry_id)
266
+ when /PSORT II server/
267
+ tmp = self.new(ent, entry_id)
268
+ else
269
+ raise ArgumentError, "invalid format\n[#{str}]"
270
+ end
271
+ end
272
+
273
+ # Parser for ``too short length'' report.
274
+ #
275
+ # $id: too short length ($leng), skipped\n";
276
+ def self.too_short_parser(ent, entry_id = nil)
277
+ report = self.new(ent)
278
+ report.entry_id = entry_id
279
+ if ent =~ /^(.+)?: too short length/
280
+ report.entry_id = $1 unless report.entry_id
281
+ report.scl = '---'
282
+ end
283
+ report
284
+ end
285
+
286
+
287
+ # Parser for the default report format.
288
+ # ``psort report'' output.
289
+ def self.default_parser(ent, entry_id = nil)
290
+ report = self.new(ent, entry_id)
291
+ ent = ent.split(/\n\n/).map {|e| e.chomp }
292
+
293
+ report.set_header_line(ent[0])
294
+
295
+ # feature matrix
296
+ ent[1].gsub(/\n/,' ').strip.split(/ /).map {|fe|
297
+ pair = fe.split(/: /)
298
+ report.features[pair[0].strip] = pair[1].strip.to_f
299
+ }
300
+
301
+ report.prob = self.set_kNN_prob(ent[2])
302
+ report.set_prediction(ent[3])
303
+
304
+ return report
305
+ end
306
+
307
+ # Returns header information.
308
+ def set_header_line(str)
309
+ str.sub!(/^-+\n/,'')
310
+ tmp = str.split(/\t| /)
311
+ @entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id
312
+
313
+ case tmp.join(' ').chomp
314
+ when /\(\d+ aa\) (.+)$/
315
+ @definition = $1
316
+ else
317
+ @definition = tmp.join(' ').chomp
318
+ end
319
+ scl = @definition.split(' ')[0]
320
+
321
+ @scl = scl if SclNames.keys.index(scl)
322
+ end
323
+
324
+ # Returns @prob value.
325
+ def self.set_kNN_prob(str)
326
+ prob = Hash.new
327
+ Bio::PSORT::PSORT2::SclNames.keys.each {|a|
328
+ prob.update( {a => 0.0} )
329
+ }
330
+ str.gsub(/\t/,'').split(/\n/).each {|a|
331
+ val,scl = a.strip.split(/ %: /)
332
+ key = Bio::PSORT::PSORT2::SclNames.index(scl)
333
+ prob[key] = val.to_f
334
+ }
335
+ return prob
336
+ end
337
+
338
+ # Returns @prob and @k values.
339
+ def set_prediction(str)
340
+ case str
341
+ when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/
342
+ @entry_id ||= $1 unless @entry_id
343
+ @pred = $2
344
+ @k = $3
345
+ else
346
+ raise ArgumentError,
347
+ "Invalid format at(#{self.entry_id}):\n[#{str}]\n"
348
+ end
349
+ end
350
+
351
+
352
+ # Parser for the verbose output report format.
353
+ # ``psort -v report'' and WWW server output.
354
+ def self.v_parser(ent, entry_id = nil)
355
+ report = Bio::PSORT::PSORT2::Report.new(ent, entry_id)
356
+
357
+ ent = ent.split(/\n\n/).map {|e| e.chomp }
358
+ ent.each_with_index {|e, i|
359
+ unless /^(\w|-|\>|\t)/ =~ e
360
+ j = self.__send__(:search_j, i, ent)
361
+ ent[i - j] += e
362
+ ent[i] = nil
363
+ end
364
+ if /^none/ =~ e # psort output bug
365
+ j = self.__send__(:search_j, i, ent)
366
+ ent[i - j] += e
367
+ ent[i] = nil
368
+ end
369
+ }
370
+ ent.compact!
371
+
372
+ if /^ PSORT II server/ =~ ent[0] # for WWW version
373
+ ent.shift
374
+ delline = ''
375
+ ent.each {|e| delline = e if /^Results of Subprograms/ =~ e }
376
+ i = ent.index(delline)
377
+ ent.delete(delline)
378
+ ent.delete_at(i - 1)
379
+ end
380
+
381
+ report.set_header_line(ent.shift)
382
+ report.seq = Bio::Sequence::AA.new(ent.shift)
383
+
384
+ fent, pent = self.divent(ent)
385
+ report.set_features(fent)
386
+ report.prob = self.set_kNN_prob(pent[0].strip)
387
+ report.set_prediction(pent[1].strip)
388
+
389
+ return report
390
+ end
391
+
392
+
393
+ #
394
+ def self.search_j(i, ent)
395
+ j = 1
396
+ 1.upto(ent.size) {|x|
397
+ if ent[i - x]
398
+ j = x
399
+ break
400
+ end
401
+ }
402
+ return j
403
+ end
404
+ private_class_method :search_j
405
+
406
+
407
+ # Divides entry body
408
+ def self.divent(entry)
409
+ boundary = entry.index(BOUNDARY)
410
+ return entry[0..(boundary - 1)], entry[(boundary + 2)..(entry.length)]
411
+ end
412
+
413
+ # Sets @features values.
414
+ def set_features(features_ary)
415
+ features_ary.each {|fent|
416
+ key = fent.split(/\:( |\n)/)[0].strip
417
+ self.features[key] = fent # unless /^\>/ =~ key
418
+ }
419
+ self.features['AA'] = self.seq.length
420
+ end
421
+
422
+ end # class Report
423
+
424
+ end # class PSORT2
425
+
426
+ end # class PSORT
427
+
428
+ end # module Bio
429
+
430
+
431
+
432
+
433
+
434
+ # testing code
435
+
436
+ if __FILE__ == $0
437
+
438
+
439
+ while entry = $<.gets(Bio::PSORT::PSORT2::Report::DELIMITER)
440
+
441
+ puts "\n ==> a = Bio::PSORT::PSORT2::Report.parser(entry)"
442
+ a = Bio::PSORT::PSORT2::Report.parser(entry)
443
+
444
+ puts "\n ==> a.entry_id "
445
+ p a.entry_id
446
+ puts "\n ==> a.scl "
447
+ p a.scl
448
+ puts "\n ==> a.pred "
449
+ p a.pred
450
+ puts "\n ==> a.prob "
451
+ p a.prob
452
+ p a.prob.keys.sort.map {|k| k.rjust(4)}.inspect.gsub('"','')
453
+ p a.prob.keys.sort.map {|k| a.prob[k].to_s.rjust(4) }.inspect.gsub('"','')
454
+
455
+ puts "\n ==> a.k "
456
+ p a.k
457
+ puts "\n ==> a.definition"
458
+ p a.definition
459
+ puts "\n ==> a.seq"
460
+ p a.seq
461
+
462
+ puts "\n ==> a.features.keys.sort "
463
+ p a.features.keys.sort
464
+
465
+ a.features.keys.sort.each do |key|
466
+ puts "\n ==> a.features['#{key}'] "
467
+ puts a.features[key]
468
+ end
469
+
470
+
471
+ end
472
+
473
+ end