bio 0.7.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
data/sample/any2fasta.rb
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# any2fasta.rb - convert input file into FASTA format using a regex
|
4
|
+
# filter
|
5
|
+
#
|
6
|
+
# Copyright (C) 2006 Pjotr Prins <p@bioruby.org>
|
7
|
+
#
|
8
|
+
# This program is free software; you can redistribute it and/or modify
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
10
|
+
# the Free Software Foundation; either version 2 of the License, or
|
11
|
+
# (at your option) any later version.
|
12
|
+
#
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
16
|
+
# GNU General Public License for more details.
|
17
|
+
#
|
18
|
+
# $Id: any2fasta.rb,v 1.1 2006/02/17 14:59:27 pjotr Exp $
|
19
|
+
#
|
20
|
+
|
21
|
+
require 'bio/io/flatfile'
|
22
|
+
|
23
|
+
include Bio
|
24
|
+
|
25
|
+
usage = <<USAGE
|
26
|
+
|
27
|
+
Usage: any2fasta.rb [regex] infiles
|
28
|
+
|
29
|
+
Examples:
|
30
|
+
|
31
|
+
Output all sequences containing GATC or GATT ignoring case:
|
32
|
+
|
33
|
+
any2fasta.rb "/GAT[CT]/i" *.seq > reduced.fasta
|
34
|
+
|
35
|
+
USAGE
|
36
|
+
|
37
|
+
if ARGV.size == 0
|
38
|
+
print usage
|
39
|
+
exit 1
|
40
|
+
end
|
41
|
+
|
42
|
+
# ---- Valid regular expression - if it is not a file
|
43
|
+
regex = ARGV[0]
|
44
|
+
if regex=~/^\// and !File.exist?(regex)
|
45
|
+
ARGV.shift
|
46
|
+
else
|
47
|
+
regex = nil
|
48
|
+
end
|
49
|
+
|
50
|
+
ARGV.each do | fn |
|
51
|
+
ff = Bio::FlatFile.auto(fn)
|
52
|
+
ff.each_entry do |entry|
|
53
|
+
if regex != nil
|
54
|
+
next if eval("entry.seq !~ #{regex}")
|
55
|
+
end
|
56
|
+
print entry.seq.to_fasta(entry.definition,70)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
@@ -0,0 +1,64 @@
|
|
1
|
+
hmmpfam - search one or more sequences against HMM database
|
2
|
+
HMMER 2.3.2 (Oct 2003)
|
3
|
+
Copyright (C) 1992-2003 HHMI/Washington University School of Medicine
|
4
|
+
Freely distributed under the GNU General Public License (GPL)
|
5
|
+
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
6
|
+
HMM file: /Users/nakao/Sites/iprscan/data/Pfam
|
7
|
+
Sequence file: /Users/nakao/Sites/iprscan/tmp/20050517/iprscan-20050517-16244071/chunk_1/iprscan-20050517-16244071.nocrc
|
8
|
+
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
9
|
+
|
10
|
+
Query sequence: 104K_THEPA
|
11
|
+
Accession: [none]
|
12
|
+
Description: [none]
|
13
|
+
|
14
|
+
Scores for sequence family classification (score includes all domains):
|
15
|
+
Model Description Score E-value N
|
16
|
+
-------- ----------- ----- ------- ---
|
17
|
+
PF04385.4 Domain of unknown function, DUF529 259.3 6.6e-75 4
|
18
|
+
|
19
|
+
Parsed for domains:
|
20
|
+
Model Domain seq-f seq-t hmm-f hmm-t score E-value
|
21
|
+
-------- ------- ----- ----- ----- ----- ----- -------
|
22
|
+
PF04385.4 1/4 36 111 .. 1 80 [] 65.0 2e-16
|
23
|
+
PF04385.4 2/4 149 224 .. 1 80 [] 64.7 2.5e-16
|
24
|
+
PF04385.4 3/4 265 343 .. 1 80 [] 64.6 2.7e-16
|
25
|
+
PF04385.4 4/4 379 456 .. 1 80 [] 65.0 2e-16
|
26
|
+
|
27
|
+
Alignments of top-scoring domains:
|
28
|
+
PF04385.4: domain 1 of 4, from 36 to 111: score 65.0, E = 2e-16
|
29
|
+
*->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
|
30
|
+
t+D+n++++ f +v+++g+++ + ++ ++v+++++++Gn+v+W
|
31
|
+
104K_THEPA 36 TFDINSNQTG-PAFLTAVEMAGVKYLQVQHGSNVNIHRLVEGNVVIW 81
|
32
|
+
|
33
|
+
eseddpefglivtlsfyldsnkfLvlllintak<-*
|
34
|
+
e++ + +l++ ++++++++++++++++ +++
|
35
|
+
104K_THEPA 82 ENA---STPLYTGAIVTNNDGPYMAYVEVLGDP 111
|
36
|
+
|
37
|
+
PF04385.4: domain 2 of 4, from 149 to 224: score 64.7, E = 2.5e-16
|
38
|
+
*->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
|
39
|
+
+L++ ++ +++k+ + ++a+ng ++vt++p++G+ +++++++n++++
|
40
|
+
104K_THEPA 149 SLNMAFQLENNKYEVETHAKNGANMVTFIPRNGHICKMVYHKNVRIY 195
|
41
|
+
|
42
|
+
eseddpefglivtlsfyldsnkfLvlllintak<-*
|
43
|
+
++ ++++vt++++++ +++L+l+++ +
|
44
|
+
104K_THEPA 196 KA----TGNDTVTSVVGFFRGLRLLLINVFSID 224
|
45
|
+
|
46
|
+
PF04385.4: domain 3 of 4, from 265 to 343: score 64.6, E = 2.7e-16
|
47
|
+
*->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
|
48
|
+
+Dl+ +++ +++f+ + a+++ ++++++p++G+++tk++dG++v++
|
49
|
+
104K_THEPA 265 PVDLDIKDIDYTMFHLADATYHEPCFKIIPNTGFCITKLFDGDQVLY 311
|
50
|
+
|
51
|
+
eseddpefglivtlsfyldsnkfLvlllintak<-*
|
52
|
+
es+ + + ++i +++y+++n ++++l++n+++
|
53
|
+
104K_THEPA 312 ESFNP-LIHCINEVHIYDRNNGSIICLHLNYSP 343
|
54
|
+
|
55
|
+
PF04385.4: domain 4 of 4, from 379 to 456: score 65.0, E = 2e-16
|
56
|
+
*->tLDlndtgstlkqfdykvalngdivvty.tpkpGvkftkitdGnevv
|
57
|
+
+LD+n ++++k+ +++ +n d +t+ tp+p+ + +++dG+ev+
|
58
|
+
104K_THEPA 379 ELDVN--FISDKDLYVAALTNADLNYTMvTPRPHRDVIRVSDGSEVL 423
|
59
|
+
|
60
|
+
WeseddpefglivtlsfyldsnkfLvlllintak<-*
|
61
|
+
W++e+ ++ l++++++++d++ +Lv+l+i++
|
62
|
+
104K_THEPA 424 WYYEGL-DNFLVCAWIYVSDGVASLVHLRIKDRI 456
|
63
|
+
|
64
|
+
//
|
@@ -0,0 +1,88 @@
|
|
1
|
+
hmmsearch - search a sequence database with a profile HMM
|
2
|
+
HMMER 2.2g (August 2001)
|
3
|
+
Copyright (C) 1992-2001 HHMI/Washington University School of Medicine
|
4
|
+
Freely distributed under the GNU General Public License (GPL)
|
5
|
+
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
6
|
+
HMM file: /sw/share/hmmer/demo/7tm_1 [7tm_1]
|
7
|
+
Sequence database: /sw/share/hmmer/demo/P08908.fasta
|
8
|
+
per-sequence score cutoff: [none]
|
9
|
+
per-domain score cutoff: [none]
|
10
|
+
per-sequence Eval cutoff: <= 10
|
11
|
+
per-domain Eval cutoff: [none]
|
12
|
+
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
13
|
+
|
14
|
+
Query HMM: 7tm_1
|
15
|
+
Accession: PF00001
|
16
|
+
Description: 7 transmembrane receptor (rhodopsin family)
|
17
|
+
[HMM has been calibrated; E-values are empirical estimates]
|
18
|
+
|
19
|
+
Scores for complete sequences (score includes all domains):
|
20
|
+
Sequence Description Score E-value N
|
21
|
+
-------- ----------- ----- ------- ---
|
22
|
+
sp|P08908|5H1A_HUMAN 5-hydroxytryptamine 1A receptor 377.1 5.5e-130 1
|
23
|
+
|
24
|
+
Parsed for domains:
|
25
|
+
Sequence Domain seq-f seq-t hmm-f hmm-t score E-value
|
26
|
+
-------- ------- ----- ----- ----- ----- ----- -------
|
27
|
+
sp|P08908|5H1A_HUMAN 1/1 53 400 .. 1 275 [] 377.1 5.5e-130
|
28
|
+
|
29
|
+
Alignments of top-scoring domains:
|
30
|
+
sp|P08908|5H1A_HUMAN: domain 1 of 1, from 53 to 400: score 377.1, E = 5.5e-130
|
31
|
+
*->GNlLVilvilrtkklrtptnifilNLAvADLLflltlppwalyylvg
|
32
|
+
GN+ V+++i+++++l++++n++i++LAv+DL+++++++p+a++y v
|
33
|
+
sp|P08908| 53 GNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVL 99
|
34
|
+
|
35
|
+
gsedWpfGsalCklvtaldvvnmyaSillLtaISiDRYlAIvhPlryrrr
|
36
|
+
+ W++G++ C+l++aldv+++++Sil+L+aI++DRY+AI++P+ y ++
|
37
|
+
sp|P08908| 100 N--KWTLGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK 147
|
38
|
+
|
39
|
+
rtsprrAkvvillvWvlalllslPpllfswvktveegngtlnvnvtvCli
|
40
|
+
rt prrA+++i+l+W++++l+s+Pp +++w++++ + +C+i
|
41
|
+
sp|P08908| 148 RT-PRRAAALISLTWLIGFLISIPP-MLGWRTPEDRSD------PDACTI 189
|
42
|
+
|
43
|
+
dfpeestasvstwlvsyvllstlvgFllPllvilvcYtrIlrtlrkrark
|
44
|
+
+ +++ y+++st+++F++Pll++lv+Y+rI+r++r r rk
|
45
|
+
sp|P08908| 190 SKDHG-----------YTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK 228
|
46
|
+
|
47
|
+
gas...............................................
|
48
|
+
+ + ++++ +++++ ++ ++++++ ++++++++ + + +++ ++ +
|
49
|
+
sp|P08908| 229 TVKkvektgadtrhgaspapqpkksvngesgsrnwrlgveskaggalcan 278
|
50
|
+
|
51
|
+
..................................................
|
52
|
+
+ ++++++ + + ++ ++++++ + +++ ++++ + + +++++++
|
53
|
+
sp|P08908| 279 gavrqgddgaalevievhrvgnskehlplpseagptpcapasferknern 328
|
54
|
+
|
55
|
+
.....kkrsskerkaaktllvvvvvFvlCWlPyfivllldtlc.lsiims
|
56
|
+
+ ++k+ +erk++ktl++++++F+lCWlP+fiv+l+ ++c++s++m
|
57
|
+
sp|P08908| 329 aeakrKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPFCeSSCHM- 377
|
58
|
+
|
59
|
+
stCelervlptallvtlwLayvNsclNPiIY<-*
|
60
|
+
+ + +++wL+y+Ns lNP+IY
|
61
|
+
sp|P08908| 378 --------PTLLGAIINWLGYSNSLLNPVIY 400
|
62
|
+
|
63
|
+
|
64
|
+
Histogram of all scores:
|
65
|
+
score obs exp (one = represents 1 sequences)
|
66
|
+
----- --- ---
|
67
|
+
377 1 0|=
|
68
|
+
|
69
|
+
|
70
|
+
% Statistical details of theoretical EVD fit:
|
71
|
+
mu = -10.6639
|
72
|
+
lambda = 0.7676
|
73
|
+
chi-sq statistic = 0.0000
|
74
|
+
P(chi-square) = 0
|
75
|
+
|
76
|
+
Total sequences searched: 1
|
77
|
+
|
78
|
+
Whole sequence top hits:
|
79
|
+
tophits_s report:
|
80
|
+
Total hits: 1
|
81
|
+
Satisfying E cutoff: 1
|
82
|
+
Total memory: 16K
|
83
|
+
|
84
|
+
Domain top hits:
|
85
|
+
tophits_s report:
|
86
|
+
Total hits: 1
|
87
|
+
Satisfying E cutoff: 1
|
88
|
+
Total memory: 17K
|
@@ -0,0 +1,30 @@
|
|
1
|
+
H DAYM780301
|
2
|
+
D Log odds matrix for 250 PAMs (Dayhoff et al., 1978)
|
3
|
+
R
|
4
|
+
A Dayhoff, M.O., Schwartz, R.M. and Orcutt, B.C.
|
5
|
+
T A model of evolutionary change in proteins
|
6
|
+
J In "Atlas of Protein Sequence and Structure", Vol.5, Suppl.3 (Dayhoff,
|
7
|
+
M.O., ed.), National Biomedical Research Foundation, Washington, D.C.,
|
8
|
+
p.352 (1978)
|
9
|
+
M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
|
10
|
+
2.
|
11
|
+
-2. 6.
|
12
|
+
0. 0. 2.
|
13
|
+
0. -1. 2. 4.
|
14
|
+
-2. -4. -4. -5. 12.
|
15
|
+
0. 1. 1. 2. -5. 4.
|
16
|
+
0. -1. 1. 3. -5. 2. 4.
|
17
|
+
1. -3. 0. 1. -3. -1. 0. 5.
|
18
|
+
-1. 2. 2. 1. -3. 3. 1. -2. 6.
|
19
|
+
-1. -2. -2. -2. -2. -2. -2. -3. -2. 5.
|
20
|
+
-2. -3. -3. -4. -6. -2. -3. -4. -2. 2. 6.
|
21
|
+
-1. 3. 1. 0. -5. 1. 0. -2. 0. -2. -3. 5.
|
22
|
+
-1. 0. -2. -3. -5. -1. -2. -3. -2. 2. 4. 0. 6.
|
23
|
+
-4. -4. -4. -6. -4. -5. -5. -5. -2. 1. 2. -5. 0. 9.
|
24
|
+
1. 0. -1. -1. -3. 0. -1. -1. 0. -2. -3. -1. -2. -5. 6.
|
25
|
+
1. 0. 1. 0. 0. -1. 0. 1. -1. -1. -3. 0. -2. -3. 1. 2.
|
26
|
+
1. -1. 0. 0. -2. -1. 0. 0. -1. 0. -2. 0. -1. -3. 0. 1. 3.
|
27
|
+
-6. 2. -4. -7. -8. -5. -7. -7. -3. -5. -2. -3. -4. 0. -6. -2. -5. 17.
|
28
|
+
-3. -4. -2. -4. 0. -4. -4. -5. 0. -1. -1. -4. -2. 7. -5. -3. -3. 0. 10.
|
29
|
+
0. -2. -2. -2. -2. -2. -2. -1. -2. 4. 2. -2. 2. -1. -1. -1. 0. -6. -2. 4.
|
30
|
+
//
|
@@ -0,0 +1,20 @@
|
|
1
|
+
H PRAM900102
|
2
|
+
D Relative frequency in alpha-helix (Prabhakaran, 1990)
|
3
|
+
R LIT:1614053b PMID:2390062
|
4
|
+
A Prabhakaran, M.
|
5
|
+
T The distribution of physical, chemical and conformational properties in
|
6
|
+
signal and nascent peptides
|
7
|
+
J Biochem. J. 269, 691-696 (1990) Original reference of these three data:
|
8
|
+
Creighton, T.E. In "Protein Structure and Melecular Properties", (Freeman,
|
9
|
+
W.H., ed.), San Francisco P.235 (1983)
|
10
|
+
C LEVM780101 1.000 LEVM780104 0.964 PALJ810101 0.943
|
11
|
+
KANM800101 0.942 ISOY800101 0.929 MAXF760101 0.924
|
12
|
+
ROBB760101 0.916 GEIM800101 0.912 GEIM800104 0.907
|
13
|
+
RACS820108 0.904 PALJ810102 0.902 PALJ810109 0.898
|
14
|
+
NAGK730101 0.894 CRAJ730101 0.887 CHOP780201 0.873
|
15
|
+
TANS770101 0.854 KANM800103 0.850 QIAN880107 0.829
|
16
|
+
QIAN880106 0.827 BURA740101 0.805 NAGK730103 -0.809
|
17
|
+
I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
|
18
|
+
1.29 0.96 0.90 1.04 1.11 1.27 1.44 0.56 1.22 0.97
|
19
|
+
1.30 1.23 1.47 1.07 0.52 0.82 0.82 0.99 0.72 0.91
|
20
|
+
//
|
@@ -0,0 +1,53 @@
|
|
1
|
+
Query= CD8A_HUMAN P01732 T-cell surface glycoprotein CD8 alpha chain
|
2
|
+
precursor (T-lymphocyte differentiation antigen T8/Leu-2).
|
3
|
+
(235 letters)
|
4
|
+
|
5
|
+
>CD8B_HUMAN P10966 T-cell surface glycoprotein CD8 beta chain
|
6
|
+
precursor (Antigen CD8B).
|
7
|
+
Length = 210
|
8
|
+
|
9
|
+
Score = 29.6 bits (65), Expect = 5e-05
|
10
|
+
Identities = 21/90 (23%), Positives = 37/90 (41%), Gaps = 3/90 (3%)
|
11
|
+
|
12
|
+
Query: 39 VELKCQVLLSNPTSGCSWLFQ---PRGAAASPTFLLYLSQNKPKAAEGLDTQRFSGKRLG 95
|
13
|
+
V L C+ +S WL Q P + L+ S E ++ ++ + R
|
14
|
+
Sbjct: 37 VMLSCEAKISLSNMRIYWLRQRQAPSSDSHHEFLALWDSAKGTIHGEEVEQEKIAVFRDA 96
|
15
|
+
|
16
|
+
Query: 96 DTFVLTLSDFRRENEGYYFCSALSNSIMYF 125
|
17
|
+
F+L L+ + E+ G YFC + + + F
|
18
|
+
Sbjct: 97 SRFILNLTSVKPEDSGIYFCMIVGSPELTF 126
|
19
|
+
|
20
|
+
|
21
|
+
Lambda K H
|
22
|
+
0.323 0.137 0.436
|
23
|
+
|
24
|
+
Gapped
|
25
|
+
Lambda K H
|
26
|
+
0.267 0.0410 0.140
|
27
|
+
|
28
|
+
|
29
|
+
Matrix: BLOSUM62
|
30
|
+
Gap Penalties: Existence: 11, Extension: 1
|
31
|
+
Number of Hits to DB: 102
|
32
|
+
Number of Sequences: 0
|
33
|
+
Number of extensions: 5
|
34
|
+
Number of successful extensions: 3
|
35
|
+
Number of sequences better than 10.0: 1
|
36
|
+
Number of HSP's better than 10.0 without gapping: 1
|
37
|
+
Number of HSP's successfully gapped in prelim test: 0
|
38
|
+
Number of HSP's that attempted gapping in prelim test: 0
|
39
|
+
Number of HSP's gapped (non-prelim): 1
|
40
|
+
length of query: 235
|
41
|
+
length of database: 210
|
42
|
+
effective HSP length: 22
|
43
|
+
effective length of query: 213
|
44
|
+
effective length of database: 188
|
45
|
+
effective search space: 40044
|
46
|
+
effective search space used: 40044
|
47
|
+
T: 11
|
48
|
+
A: 40
|
49
|
+
X1: 16 ( 7.5 bits)
|
50
|
+
X2: 38 (14.6 bits)
|
51
|
+
X3: 64 (24.7 bits)
|
52
|
+
S1: 20 (12.2 bits)
|
53
|
+
S2: 20 (12.3 bits)
|
@@ -0,0 +1,37 @@
|
|
1
|
+
Query= CD8A_HUMAN P01732 T-cell surface glycoprotein CD8 alpha chain
|
2
|
+
precursor (T-lymphocyte differentiation antigen T8/Leu-2).
|
3
|
+
(235 letters)
|
4
|
+
|
5
|
+
Lambda K H
|
6
|
+
0.323 0.137 0.436
|
7
|
+
|
8
|
+
Gapped
|
9
|
+
Lambda K H
|
10
|
+
0.267 0.0410 0.140
|
11
|
+
|
12
|
+
|
13
|
+
Matrix: BLOSUM62
|
14
|
+
Gap Penalties: Existence: 11, Extension: 1
|
15
|
+
Number of Hits to DB: 175
|
16
|
+
Number of Sequences: 0
|
17
|
+
Number of extensions: 8
|
18
|
+
Number of successful extensions: 0
|
19
|
+
Number of sequences better than 1.0e-05: 0
|
20
|
+
Number of HSP's better than 0.0 without gapping: 0
|
21
|
+
Number of HSP's successfully gapped in prelim test: 0
|
22
|
+
Number of HSP's that attempted gapping in prelim test: 0
|
23
|
+
Number of HSP's gapped (non-prelim): 0
|
24
|
+
length of query: 235
|
25
|
+
length of database: 393
|
26
|
+
effective HSP length: 27
|
27
|
+
effective length of query: 208
|
28
|
+
effective length of database: 366
|
29
|
+
effective search space: 76128
|
30
|
+
effective search space used: 76128
|
31
|
+
T: 11
|
32
|
+
A: 40
|
33
|
+
X1: 16 ( 7.5 bits)
|
34
|
+
X2: 38 (14.6 bits)
|
35
|
+
X3: 64 (24.7 bits)
|
36
|
+
S1: 41 (22.0 bits)
|
37
|
+
S2: 74 (33.1 bits)
|
File without changes
|
@@ -12,7 +12,7 @@ aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal)
|
|
12
12
|
aspartate kinase (A)
|
13
13
|
(820 letters)
|
14
14
|
|
15
|
-
Database:
|
15
|
+
Database: b0002.faa
|
16
16
|
1 sequences; 820 total letters
|
17
17
|
|
18
18
|
Searching.done
|
@@ -88,7 +88,7 @@ Query: 781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820
|
|
88
88
|
Sbjct: 781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820
|
89
89
|
|
90
90
|
|
91
|
-
Database:
|
91
|
+
Database: b0002.faa
|
92
92
|
Posted date: Aug 7, 2005 7:29 AM
|
93
93
|
Number of letters in database: 820
|
94
94
|
Number of sequences in database: 1
|
@@ -4,7 +4,7 @@
|
|
4
4
|
<BlastOutput_program>blastp</BlastOutput_program>
|
5
5
|
<BlastOutput_version>blastp 2.2.10 [Oct-19-2004]</BlastOutput_version>
|
6
6
|
<BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
|
7
|
-
<BlastOutput_db>
|
7
|
+
<BlastOutput_db>b0002.faa</BlastOutput_db>
|
8
8
|
<BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
|
9
9
|
<BlastOutput_query-def>eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A)</BlastOutput_query-def>
|
10
10
|
<BlastOutput_query-len>820</BlastOutput_query-len>
|
File without changes
|
@@ -0,0 +1,134 @@
|
|
1
|
+
#
|
2
|
+
# test/unit/bio/appl/bl2seq/test_report.rb - Unit test for
|
3
|
+
# Bio::Blast::Bl2seq::Report
|
4
|
+
#
|
5
|
+
# Copyright:: Copyright (C) 2006
|
6
|
+
# Mitsuteru C. Nakao <n@bioruby.org>
|
7
|
+
# License:: Ruby's
|
8
|
+
#
|
9
|
+
# $Id: test_report.rb,v 1.2 2006/02/23 22:25:30 nakao Exp $
|
10
|
+
#
|
11
|
+
|
12
|
+
require 'pathname'
|
13
|
+
libpath = Pathname.new(File.join(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib'))).cleanpath.to_s
|
14
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
15
|
+
|
16
|
+
require 'test/unit'
|
17
|
+
require 'bio/appl/bl2seq/report'
|
18
|
+
|
19
|
+
|
20
|
+
module Bio
|
21
|
+
class TestBl2seqReportData
|
22
|
+
bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
|
23
|
+
TestDataBl2seq = Pathname.new(File.join(bioruby_root, 'test', 'data', 'bl2seq')).cleanpath.to_s
|
24
|
+
|
25
|
+
def self.output(format = 7)
|
26
|
+
case format
|
27
|
+
when 'empty'
|
28
|
+
File.open(File.join(TestDataBl2seq, 'cd8a_p53_e-5blastp.bl2seq')).read
|
29
|
+
when 'blastp'
|
30
|
+
File.open(File.join(TestDataBl2seq, 'cd8a_cd8b_blastp.bl2seq')).read
|
31
|
+
when 'blastn'
|
32
|
+
when 'blastx'
|
33
|
+
when 'tblastn'
|
34
|
+
when 'tblastx'
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
class TestBl2seqReportConstants < Test::Unit::TestCase
|
41
|
+
def test_rs
|
42
|
+
rs = nil
|
43
|
+
assert_equal(nil, Bio::Blast::Bl2seq::Report::RS)
|
44
|
+
assert_equal(nil, Bio::Blast::Bl2seq::Report::DELIMITER)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
|
49
|
+
class TestBl2seqReport < Test::Unit::TestCase
|
50
|
+
|
51
|
+
def setup
|
52
|
+
@empty = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('empty'))
|
53
|
+
@blastp = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('blastp'))
|
54
|
+
end
|
55
|
+
|
56
|
+
def test_new
|
57
|
+
assert(@empty)
|
58
|
+
assert(@blastp)
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_undefed_methods
|
62
|
+
methods = ['format0_parse_header',
|
63
|
+
'program',
|
64
|
+
'version',
|
65
|
+
'version_number',
|
66
|
+
'version_date',
|
67
|
+
'message',
|
68
|
+
'converged?',
|
69
|
+
'reference',
|
70
|
+
'db']
|
71
|
+
|
72
|
+
methods.each do |x|
|
73
|
+
assert_nil(@empty.methods.include?(x))
|
74
|
+
end
|
75
|
+
|
76
|
+
methods.each do |x|
|
77
|
+
assert_nil(@blastp.methods.include?(x))
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
# TestF0dbstat < Test::Unit::TestCase
|
82
|
+
|
83
|
+
def test_db_num
|
84
|
+
assert_equal(0, @empty.db_num)
|
85
|
+
assert_equal(0, @blastp.db_num)
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_db_len
|
89
|
+
assert_equal(393, @empty.db_len)
|
90
|
+
assert_equal(210, @blastp.db_len)
|
91
|
+
end
|
92
|
+
|
93
|
+
# TestIteration < Test::Unit::TestCase
|
94
|
+
def test_undefed_methods
|
95
|
+
methods = ['message',
|
96
|
+
'pattern_in_database',
|
97
|
+
'pattern',
|
98
|
+
'pattern_positions',
|
99
|
+
'hits_found_again',
|
100
|
+
'hits_newly_found',
|
101
|
+
'hits_for_pattern',
|
102
|
+
'parse_hitlist',
|
103
|
+
'converged?']
|
104
|
+
|
105
|
+
methods.each do |x|
|
106
|
+
assert_equal(false, @empty.iterations.first.methods.include?(x), "undifined? : #{x}")
|
107
|
+
end
|
108
|
+
|
109
|
+
methods.each do |x|
|
110
|
+
assert_equal(false, @blastp.iterations.first.methods.include?(x), "undefined? : #{x}")
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
|
116
|
+
class TestBl2seqReportHit < Test::Unit::TestCase
|
117
|
+
def setup
|
118
|
+
@empty = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('empty'))
|
119
|
+
@blastp = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('blastp'))
|
120
|
+
@empty_hit = @empty.hits.first
|
121
|
+
@blastp_hit = @blastp.hits.first
|
122
|
+
end
|
123
|
+
|
124
|
+
def test_empty_hits
|
125
|
+
assert_equal(0, @empty.hits.size)
|
126
|
+
end
|
127
|
+
|
128
|
+
def test_hits
|
129
|
+
assert_equal(Bio::Blast::Bl2seq::Report::Hit, @blastp.hits.first.class)
|
130
|
+
assert_equal(1, @blastp.hits.size)
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
end # module Bio
|