bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/psort/report.rb - PSORT systems report classes
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2003 Mitsuteru C. Nakao <n@bioruby.org>
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
#
|
|
7
|
+
# $Id: report.rb,v 1.12 2005/11/03 10:50:58 nakao Exp $
|
|
8
|
+
#
|
|
9
|
+
# == A Report classes for PSORT Systems
|
|
10
|
+
#
|
|
11
|
+
#--
|
|
12
|
+
#
|
|
13
|
+
# This library is free software; you can redistribute it and/or
|
|
14
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
15
|
+
# License as published by the Free Software Foundation; either
|
|
16
|
+
# version 2 of the License, or (at your option) any later version.
|
|
17
|
+
#
|
|
18
|
+
# This library is distributed in the hope that it will be useful,
|
|
19
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
20
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
21
|
+
# Lesser General Public License for more details.
|
|
22
|
+
#
|
|
23
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
24
|
+
# License along with this library; if not, write to the Free Software
|
|
25
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
26
|
+
#
|
|
27
|
+
# ++
|
|
28
|
+
#
|
|
29
|
+
|
|
30
|
+
require 'bio/sequence'
|
|
31
|
+
require 'bio/appl/psort'
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
module Bio
|
|
35
|
+
|
|
36
|
+
class PSORT
|
|
37
|
+
|
|
38
|
+
class PSORT1
|
|
39
|
+
|
|
40
|
+
# = Bio::PSORT::PSORT1::Report
|
|
41
|
+
# Parser class for PSORT1 output report.
|
|
42
|
+
#
|
|
43
|
+
# == Example
|
|
44
|
+
class Report
|
|
45
|
+
|
|
46
|
+
# Returns aBio::PSORT::PSORT1::Report.
|
|
47
|
+
def self.parser(output_report)
|
|
48
|
+
self.default_parser(output_report)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Returns aBio::PSORT::PSORT1::Report.
|
|
52
|
+
def self.default_parser(output_report)
|
|
53
|
+
rpt = self.new
|
|
54
|
+
rpt.raw = output_report
|
|
55
|
+
query_info = output_report.scan(/^Query Information\n\n(.+?)\n\n/m)[0][0].split(/\n/)
|
|
56
|
+
result_info = output_report.scan(/^Result Information\n\n(.+?)\n\n\*/m)[0][0]
|
|
57
|
+
step1 = output_report.scan(/^\*\*\* Reasoning Step: 1\n\n(.+?)\n\n/m)[0][0]
|
|
58
|
+
step2 = output_report.scan(/^\*\*\* Reasoning Step: 2\n\n(.+?)\n\n/m)[0][0]
|
|
59
|
+
final_result = output_report.scan(/\n\n----- Final Results -----\n\n(.+?)\n\n\n/m)[0][0]
|
|
60
|
+
|
|
61
|
+
rpt.entry_id = query_info[2].scan(/^>(\S+) */).to_s
|
|
62
|
+
rpt.origin = query_info[0].scan(/ORIGIN (\w+)/).to_s
|
|
63
|
+
rpt.sequence = Bio::Sequence::AA.new(query_info[3..query_info.size].to_s)
|
|
64
|
+
# rpt.reasoning
|
|
65
|
+
|
|
66
|
+
rpt.final_result = final_result.split(/\n/).map {|x|
|
|
67
|
+
x = x.strip.split(/---/).map {|y| y.strip }
|
|
68
|
+
{ 'prediction' => x[0],
|
|
69
|
+
'certainty' => x[1].scan(/Certainty= (\d\.\d{3})/).to_s,
|
|
70
|
+
'comment' => x[1].scan(/\((\w+)\)/).to_s
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
return rpt
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
attr_accessor :entry_id
|
|
77
|
+
attr_accessor :origin
|
|
78
|
+
attr_accessor :title
|
|
79
|
+
attr_accessor :sequence
|
|
80
|
+
attr_accessor :result_info
|
|
81
|
+
attr_accessor :reasoning
|
|
82
|
+
attr_accessor :final_result
|
|
83
|
+
attr_accessor :raw
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
# Constructs aBio::PSORT::PSORT1::Report object.
|
|
88
|
+
def initialize(entry_id = '', origin = '', title = '', sequence = '',
|
|
89
|
+
result_info = '', reasoning = {}, final_result = [])
|
|
90
|
+
@entry_id = entry_id
|
|
91
|
+
@origin = origin
|
|
92
|
+
@title = title
|
|
93
|
+
@sequence = sequence
|
|
94
|
+
@result_info = result_info
|
|
95
|
+
@reasoning = reasoning
|
|
96
|
+
@final_result = final_result
|
|
97
|
+
@raw = ''
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
end # class Report
|
|
102
|
+
|
|
103
|
+
end # class PSORT1
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class PSORT2
|
|
108
|
+
|
|
109
|
+
# Subcellular localization name codes used by PSORT2
|
|
110
|
+
SclNames = {
|
|
111
|
+
'csk' => 'cytoskeletal',
|
|
112
|
+
'cyt' => 'cytoplasmic',
|
|
113
|
+
'nuc' => 'nuclear',
|
|
114
|
+
'mit' => 'mitochondrial',
|
|
115
|
+
'ves' => 'vesicles of secretory system',
|
|
116
|
+
'end' => 'endoplasmic reticulum',
|
|
117
|
+
'gol' => 'Golgi',
|
|
118
|
+
'vac' => 'vacuolar',
|
|
119
|
+
'pla' => 'plasma membrane',
|
|
120
|
+
'pox' => 'peroxisomal',
|
|
121
|
+
'exc' => 'extracellular, including cell wall',
|
|
122
|
+
'---' => 'other'
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
# Feature name codes
|
|
126
|
+
Features = [
|
|
127
|
+
'psg', # PSG: PSG score
|
|
128
|
+
'gvh', # GvH: GvH score
|
|
129
|
+
'alm', # ALOM: $xmax
|
|
130
|
+
'tms', # ALOM: $count
|
|
131
|
+
'top', # MTOP: Charge difference: $mtopscr
|
|
132
|
+
'mit', # MITDISC: Score: $score
|
|
133
|
+
'mip', # Gavel: motif at $isite
|
|
134
|
+
'nuc', # NUCDISC: NLS Score: $score
|
|
135
|
+
'erl', # KDEL: ($seg|none)
|
|
136
|
+
'erm', # ER Membrane Retention Signals: ($cseg|none) $scr
|
|
137
|
+
'pox', # SKL: ($pat|none) $scr
|
|
138
|
+
'px2', # PTS2: (found|none) ($#match < 0) ? 0 : ($#match+1);
|
|
139
|
+
'vac', # VAC: (found|none) ($#match < 0) ? 0 : ($#match+1);
|
|
140
|
+
'rnp', # RNA-binding motif: (found|none) ($#match < 0) ? 0 : ($#match+1);
|
|
141
|
+
'act', # Actinin-type actin-binding motif: (found|none) $hit
|
|
142
|
+
'caa', # Prenylation motif: (2|1|0) CaaX,CXC,CC,nil
|
|
143
|
+
'yqr', # memYQRL: (found|none) $scr
|
|
144
|
+
'tyr', # Tyrosines in the tail: (none|\S+[,])
|
|
145
|
+
# 10 * scalar(@ylist) / ($end - $start + 1);
|
|
146
|
+
'leu', # Dileucine motif in the tail: (none|found) $scr
|
|
147
|
+
'gpi', # >>> Seem to be GPI anchored
|
|
148
|
+
'myr', # NMYR: (none|\w) $scr
|
|
149
|
+
'dna', # checking 63 PROSITE DNA binding motifs: $hit
|
|
150
|
+
'rib', # checking 71 PROSITE ribosomal protein motifs: $hit
|
|
151
|
+
'bac', # checking 33 PROSITE prokaryotic DNA binding motifs: $hit
|
|
152
|
+
'm1a', # $mtype eq '1a'
|
|
153
|
+
'm1b', # $mtype eq '1b'
|
|
154
|
+
'm2', # $mtype eq '2 '
|
|
155
|
+
'mNt', # $mtype eq 'Nt'
|
|
156
|
+
'm3a', # $mtype eq '3a'
|
|
157
|
+
'm3b', # $mtype eq '3b'
|
|
158
|
+
'm_', # $mtype eq '__' tms == 0
|
|
159
|
+
'ncn', # NNCN: ($NetOutput[1] > $NetOutput[0]) ? $output : (-$output);
|
|
160
|
+
'lps', # COIL: $count
|
|
161
|
+
'len' # $leng
|
|
162
|
+
]
|
|
163
|
+
|
|
164
|
+
# Feature name codes (long version).
|
|
165
|
+
FeaturesLong = {
|
|
166
|
+
'psg' => 'PSG',
|
|
167
|
+
'gvh' => 'GvH',
|
|
168
|
+
'tms' => 'ALOM',
|
|
169
|
+
'alm' => 'ALOM',
|
|
170
|
+
'top' => 'MTOP',
|
|
171
|
+
'mit' => 'MITDISC',
|
|
172
|
+
'mip' => 'Gavel',
|
|
173
|
+
'nuc' => 'NUCDISC',
|
|
174
|
+
'erl' => 'KDEL',
|
|
175
|
+
'erm' => 'ER Membrane Retention Signals',
|
|
176
|
+
'pox' => 'SKL',
|
|
177
|
+
'px2' => 'PTS2',
|
|
178
|
+
'vac' => 'VAC',
|
|
179
|
+
'rnp' => 'RNA-binding motif',
|
|
180
|
+
'act' => 'Actinin-type actin-binding motif',
|
|
181
|
+
'caa' => 'Prenylation motif',
|
|
182
|
+
'yqr' => 'memYQRL',
|
|
183
|
+
'tyr' => 'Tyrosines in the tail',
|
|
184
|
+
'leu' => 'Dileucine motif in the tail',
|
|
185
|
+
'gpi' => '>>> Seems to be GPI anchored',
|
|
186
|
+
'myr' => 'NMYR',
|
|
187
|
+
'dna' => 'checking 63 PROSITE DNA binding motifs',
|
|
188
|
+
'rib' => 'checking 71 PROSITE ribosomal protein motifs',
|
|
189
|
+
'bac' => 'ochecking 33 PROSITE prokaryotic DNA binding motifs:',
|
|
190
|
+
'm1a' => '',
|
|
191
|
+
'm1b' => '',
|
|
192
|
+
'm2' => '',
|
|
193
|
+
'mNt' => '',
|
|
194
|
+
'm3a' => '',
|
|
195
|
+
'm3b' => '',
|
|
196
|
+
'm_' => '',
|
|
197
|
+
'ncn' => 'NNCN',
|
|
198
|
+
'lps' => 'COIL',
|
|
199
|
+
'len' => 'AA' # length of input sequence
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
# = Bio::PSORT::PSORT2::Report
|
|
203
|
+
# Report parser classe for PSORT II(PSORT2).
|
|
204
|
+
# == Example
|
|
205
|
+
class Report
|
|
206
|
+
|
|
207
|
+
# Report boundary string.
|
|
208
|
+
BOUNDARY = '-' * 75
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
# Report delimiter.
|
|
212
|
+
RS = DELIMITER = "\)\n\n#{BOUNDARY}"
|
|
213
|
+
|
|
214
|
+
# entry_id of query sequence.
|
|
215
|
+
attr_accessor :entry_id
|
|
216
|
+
|
|
217
|
+
# Given subcellular localization (three letters code).
|
|
218
|
+
attr_accessor :scl
|
|
219
|
+
|
|
220
|
+
# Definition of query sequence.
|
|
221
|
+
attr_accessor :definition
|
|
222
|
+
|
|
223
|
+
# Sequence of query sequence.
|
|
224
|
+
attr_accessor :seq
|
|
225
|
+
|
|
226
|
+
# k parameter of k-nearest neighbors classifier.
|
|
227
|
+
attr_accessor :k
|
|
228
|
+
|
|
229
|
+
# Feature vector used the kNN prediction.
|
|
230
|
+
attr_accessor :features
|
|
231
|
+
|
|
232
|
+
# Probability vector of kNN prediction.
|
|
233
|
+
attr_accessor :prob
|
|
234
|
+
|
|
235
|
+
# Predicted subcellular localization (three letters code).
|
|
236
|
+
attr_accessor :pred
|
|
237
|
+
|
|
238
|
+
# Raw text of output report.
|
|
239
|
+
attr_accessor :raw
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
# Constructs aBio::PSORT::PSORT2::Report object.
|
|
243
|
+
def initialize(raw = '', entry_id = nil, scl = nil, definition = nil,
|
|
244
|
+
seq = nil, k = nil, features = {}, prob = {}, pred = nil)
|
|
245
|
+
@entry_id = entry_id
|
|
246
|
+
@scl = scl
|
|
247
|
+
@definition = definition
|
|
248
|
+
@seq = seq
|
|
249
|
+
@features = features
|
|
250
|
+
@prob = prob
|
|
251
|
+
@pred = pred
|
|
252
|
+
@k = k
|
|
253
|
+
@raw = raw
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
|
|
257
|
+
# Parses output report with output format detection automatically.
|
|
258
|
+
def self.parser(str, entry_id)
|
|
259
|
+
case str
|
|
260
|
+
when /^ psg:/ # default report
|
|
261
|
+
self.default_parser(str, entry_id)
|
|
262
|
+
when /^PSG:/ # -v report
|
|
263
|
+
self.v_parser(str, entry_id)
|
|
264
|
+
when /: too short length /
|
|
265
|
+
self.too_short_parser(str, entry_id)
|
|
266
|
+
when /PSORT II server/
|
|
267
|
+
tmp = self.new(ent, entry_id)
|
|
268
|
+
else
|
|
269
|
+
raise ArgumentError, "invalid format\n[#{str}]"
|
|
270
|
+
end
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Parser for ``too short length'' report.
|
|
274
|
+
#
|
|
275
|
+
# $id: too short length ($leng), skipped\n";
|
|
276
|
+
def self.too_short_parser(ent, entry_id = nil)
|
|
277
|
+
report = self.new(ent)
|
|
278
|
+
report.entry_id = entry_id
|
|
279
|
+
if ent =~ /^(.+)?: too short length/
|
|
280
|
+
report.entry_id = $1 unless report.entry_id
|
|
281
|
+
report.scl = '---'
|
|
282
|
+
end
|
|
283
|
+
report
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
# Parser for the default report format.
|
|
288
|
+
# ``psort report'' output.
|
|
289
|
+
def self.default_parser(ent, entry_id = nil)
|
|
290
|
+
report = self.new(ent, entry_id)
|
|
291
|
+
ent = ent.split(/\n\n/).map {|e| e.chomp }
|
|
292
|
+
|
|
293
|
+
report.set_header_line(ent[0])
|
|
294
|
+
|
|
295
|
+
# feature matrix
|
|
296
|
+
ent[1].gsub(/\n/,' ').strip.split(/ /).map {|fe|
|
|
297
|
+
pair = fe.split(/: /)
|
|
298
|
+
report.features[pair[0].strip] = pair[1].strip.to_f
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
report.prob = self.set_kNN_prob(ent[2])
|
|
302
|
+
report.set_prediction(ent[3])
|
|
303
|
+
|
|
304
|
+
return report
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
# Returns header information.
|
|
308
|
+
def set_header_line(str)
|
|
309
|
+
str.sub!(/^-+\n/,'')
|
|
310
|
+
tmp = str.split(/\t| /)
|
|
311
|
+
@entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id
|
|
312
|
+
|
|
313
|
+
case tmp.join(' ').chomp
|
|
314
|
+
when /\(\d+ aa\) (.+)$/
|
|
315
|
+
@definition = $1
|
|
316
|
+
else
|
|
317
|
+
@definition = tmp.join(' ').chomp
|
|
318
|
+
end
|
|
319
|
+
scl = @definition.split(' ')[0]
|
|
320
|
+
|
|
321
|
+
@scl = scl if SclNames.keys.index(scl)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
# Returns @prob value.
|
|
325
|
+
def self.set_kNN_prob(str)
|
|
326
|
+
prob = Hash.new
|
|
327
|
+
Bio::PSORT::PSORT2::SclNames.keys.each {|a|
|
|
328
|
+
prob.update( {a => 0.0} )
|
|
329
|
+
}
|
|
330
|
+
str.gsub(/\t/,'').split(/\n/).each {|a|
|
|
331
|
+
val,scl = a.strip.split(/ %: /)
|
|
332
|
+
key = Bio::PSORT::PSORT2::SclNames.index(scl)
|
|
333
|
+
prob[key] = val.to_f
|
|
334
|
+
}
|
|
335
|
+
return prob
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# Returns @prob and @k values.
|
|
339
|
+
def set_prediction(str)
|
|
340
|
+
case str
|
|
341
|
+
when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/
|
|
342
|
+
@entry_id ||= $1 unless @entry_id
|
|
343
|
+
@pred = $2
|
|
344
|
+
@k = $3
|
|
345
|
+
else
|
|
346
|
+
raise ArgumentError,
|
|
347
|
+
"Invalid format at(#{self.entry_id}):\n[#{str}]\n"
|
|
348
|
+
end
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
# Parser for the verbose output report format.
|
|
353
|
+
# ``psort -v report'' and WWW server output.
|
|
354
|
+
def self.v_parser(ent, entry_id = nil)
|
|
355
|
+
report = Bio::PSORT::PSORT2::Report.new(ent, entry_id)
|
|
356
|
+
|
|
357
|
+
ent = ent.split(/\n\n/).map {|e| e.chomp }
|
|
358
|
+
ent.each_with_index {|e, i|
|
|
359
|
+
unless /^(\w|-|\>|\t)/ =~ e
|
|
360
|
+
j = self.__send__(:search_j, i, ent)
|
|
361
|
+
ent[i - j] += e
|
|
362
|
+
ent[i] = nil
|
|
363
|
+
end
|
|
364
|
+
if /^none/ =~ e # psort output bug
|
|
365
|
+
j = self.__send__(:search_j, i, ent)
|
|
366
|
+
ent[i - j] += e
|
|
367
|
+
ent[i] = nil
|
|
368
|
+
end
|
|
369
|
+
}
|
|
370
|
+
ent.compact!
|
|
371
|
+
|
|
372
|
+
if /^ PSORT II server/ =~ ent[0] # for WWW version
|
|
373
|
+
ent.shift
|
|
374
|
+
delline = ''
|
|
375
|
+
ent.each {|e| delline = e if /^Results of Subprograms/ =~ e }
|
|
376
|
+
i = ent.index(delline)
|
|
377
|
+
ent.delete(delline)
|
|
378
|
+
ent.delete_at(i - 1)
|
|
379
|
+
end
|
|
380
|
+
|
|
381
|
+
report.set_header_line(ent.shift)
|
|
382
|
+
report.seq = Bio::Sequence::AA.new(ent.shift)
|
|
383
|
+
|
|
384
|
+
fent, pent = self.divent(ent)
|
|
385
|
+
report.set_features(fent)
|
|
386
|
+
report.prob = self.set_kNN_prob(pent[0].strip)
|
|
387
|
+
report.set_prediction(pent[1].strip)
|
|
388
|
+
|
|
389
|
+
return report
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
#
|
|
394
|
+
def self.search_j(i, ent)
|
|
395
|
+
j = 1
|
|
396
|
+
1.upto(ent.size) {|x|
|
|
397
|
+
if ent[i - x]
|
|
398
|
+
j = x
|
|
399
|
+
break
|
|
400
|
+
end
|
|
401
|
+
}
|
|
402
|
+
return j
|
|
403
|
+
end
|
|
404
|
+
private_class_method :search_j
|
|
405
|
+
|
|
406
|
+
|
|
407
|
+
# Divides entry body
|
|
408
|
+
def self.divent(entry)
|
|
409
|
+
boundary = entry.index(BOUNDARY)
|
|
410
|
+
return entry[0..(boundary - 1)], entry[(boundary + 2)..(entry.length)]
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
# Sets @features values.
|
|
414
|
+
def set_features(features_ary)
|
|
415
|
+
features_ary.each {|fent|
|
|
416
|
+
key = fent.split(/\:( |\n)/)[0].strip
|
|
417
|
+
self.features[key] = fent # unless /^\>/ =~ key
|
|
418
|
+
}
|
|
419
|
+
self.features['AA'] = self.seq.length
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
end # class Report
|
|
423
|
+
|
|
424
|
+
end # class PSORT2
|
|
425
|
+
|
|
426
|
+
end # class PSORT
|
|
427
|
+
|
|
428
|
+
end # module Bio
|
|
429
|
+
|
|
430
|
+
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
|
|
434
|
+
# testing code
|
|
435
|
+
|
|
436
|
+
if __FILE__ == $0
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
while entry = $<.gets(Bio::PSORT::PSORT2::Report::DELIMITER)
|
|
440
|
+
|
|
441
|
+
puts "\n ==> a = Bio::PSORT::PSORT2::Report.parser(entry)"
|
|
442
|
+
a = Bio::PSORT::PSORT2::Report.parser(entry)
|
|
443
|
+
|
|
444
|
+
puts "\n ==> a.entry_id "
|
|
445
|
+
p a.entry_id
|
|
446
|
+
puts "\n ==> a.scl "
|
|
447
|
+
p a.scl
|
|
448
|
+
puts "\n ==> a.pred "
|
|
449
|
+
p a.pred
|
|
450
|
+
puts "\n ==> a.prob "
|
|
451
|
+
p a.prob
|
|
452
|
+
p a.prob.keys.sort.map {|k| k.rjust(4)}.inspect.gsub('"','')
|
|
453
|
+
p a.prob.keys.sort.map {|k| a.prob[k].to_s.rjust(4) }.inspect.gsub('"','')
|
|
454
|
+
|
|
455
|
+
puts "\n ==> a.k "
|
|
456
|
+
p a.k
|
|
457
|
+
puts "\n ==> a.definition"
|
|
458
|
+
p a.definition
|
|
459
|
+
puts "\n ==> a.seq"
|
|
460
|
+
p a.seq
|
|
461
|
+
|
|
462
|
+
puts "\n ==> a.features.keys.sort "
|
|
463
|
+
p a.features.keys.sort
|
|
464
|
+
|
|
465
|
+
a.features.keys.sort.each do |key|
|
|
466
|
+
puts "\n ==> a.features['#{key}'] "
|
|
467
|
+
puts a.features[key]
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
end
|