bio 0.7.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +71 -27
- data/bin/br_biofetch.rb +5 -17
- data/bin/br_bioflat.rb +14 -26
- data/bin/br_biogetseq.rb +6 -18
- data/bin/br_pmfetch.rb +6 -16
- data/doc/Changes-0.7.rd +35 -0
- data/doc/KEGG_API.rd +287 -172
- data/doc/KEGG_API.rd.ja +273 -160
- data/doc/Tutorial.rd +18 -9
- data/doc/Tutorial.rd.ja +656 -138
- data/lib/bio.rb +6 -24
- data/lib/bio/alignment.rb +5 -5
- data/lib/bio/appl/blast.rb +132 -98
- data/lib/bio/appl/blast/format0.rb +9 -19
- data/lib/bio/appl/blast/wublast.rb +5 -18
- data/lib/bio/appl/emboss.rb +40 -47
- data/lib/bio/appl/hmmer.rb +116 -82
- data/lib/bio/appl/hmmer/report.rb +509 -364
- data/lib/bio/appl/spidey/report.rb +7 -18
- data/lib/bio/data/na.rb +3 -21
- data/lib/bio/db.rb +3 -21
- data/lib/bio/db/aaindex.rb +147 -52
- data/lib/bio/db/embl/common.rb +27 -6
- data/lib/bio/db/embl/embl.rb +18 -10
- data/lib/bio/db/embl/sptr.rb +87 -67
- data/lib/bio/db/embl/swissprot.rb +32 -3
- data/lib/bio/db/embl/trembl.rb +32 -3
- data/lib/bio/db/embl/uniprot.rb +32 -3
- data/lib/bio/db/fasta.rb +327 -289
- data/lib/bio/db/medline.rb +25 -4
- data/lib/bio/db/nbrf.rb +12 -20
- data/lib/bio/db/pdb.rb +4 -1
- data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
- data/lib/bio/db/pdb/pdb.rb +13 -8
- data/lib/bio/db/rebase.rb +93 -97
- data/lib/bio/feature.rb +2 -31
- data/lib/bio/io/ddbjxml.rb +167 -139
- data/lib/bio/io/fastacmd.rb +89 -56
- data/lib/bio/io/flatfile.rb +994 -278
- data/lib/bio/io/flatfile/index.rb +257 -194
- data/lib/bio/io/flatfile/indexer.rb +37 -29
- data/lib/bio/reference.rb +147 -64
- data/lib/bio/sequence.rb +57 -417
- data/lib/bio/sequence/aa.rb +64 -0
- data/lib/bio/sequence/common.rb +175 -0
- data/lib/bio/sequence/compat.rb +68 -0
- data/lib/bio/sequence/format.rb +134 -0
- data/lib/bio/sequence/generic.rb +24 -0
- data/lib/bio/sequence/na.rb +189 -0
- data/lib/bio/shell.rb +9 -23
- data/lib/bio/shell/core.rb +130 -125
- data/lib/bio/shell/demo.rb +143 -0
- data/lib/bio/shell/{session.rb → interface.rb} +42 -40
- data/lib/bio/shell/object.rb +52 -0
- data/lib/bio/shell/plugin/codon.rb +4 -22
- data/lib/bio/shell/plugin/emboss.rb +23 -0
- data/lib/bio/shell/plugin/entry.rb +34 -25
- data/lib/bio/shell/plugin/flatfile.rb +5 -23
- data/lib/bio/shell/plugin/keggapi.rb +11 -24
- data/lib/bio/shell/plugin/midi.rb +5 -23
- data/lib/bio/shell/plugin/obda.rb +4 -22
- data/lib/bio/shell/plugin/seq.rb +6 -24
- data/lib/bio/shell/rails/Rakefile +10 -0
- data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
- data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
- data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
- data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
- data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
- data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
- data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
- data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
- data/lib/bio/shell/rails/config/boot.rb +19 -0
- data/lib/bio/shell/rails/config/database.yml +85 -0
- data/lib/bio/shell/rails/config/environment.rb +53 -0
- data/lib/bio/shell/rails/config/environments/development.rb +19 -0
- data/lib/bio/shell/rails/config/environments/production.rb +19 -0
- data/lib/bio/shell/rails/config/environments/test.rb +19 -0
- data/lib/bio/shell/rails/config/routes.rb +19 -0
- data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
- data/lib/bio/shell/rails/public/404.html +8 -0
- data/lib/bio/shell/rails/public/500.html +8 -0
- data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
- data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
- data/lib/bio/shell/rails/public/dispatch.rb +10 -0
- data/lib/bio/shell/rails/public/favicon.ico +0 -0
- data/lib/bio/shell/rails/public/images/icon.png +0 -0
- data/lib/bio/shell/rails/public/images/rails.png +0 -0
- data/lib/bio/shell/rails/public/index.html +277 -0
- data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
- data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
- data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
- data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
- data/lib/bio/shell/rails/public/robots.txt +1 -0
- data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
- data/lib/bio/shell/rails/script/about +3 -0
- data/lib/bio/shell/rails/script/breakpointer +3 -0
- data/lib/bio/shell/rails/script/console +3 -0
- data/lib/bio/shell/rails/script/destroy +3 -0
- data/lib/bio/shell/rails/script/generate +3 -0
- data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
- data/lib/bio/shell/rails/script/performance/profiler +3 -0
- data/lib/bio/shell/rails/script/plugin +3 -0
- data/lib/bio/shell/rails/script/process/reaper +3 -0
- data/lib/bio/shell/rails/script/process/spawner +3 -0
- data/lib/bio/shell/rails/script/process/spinner +3 -0
- data/lib/bio/shell/rails/script/runner +3 -0
- data/lib/bio/shell/rails/script/server +42 -0
- data/lib/bio/shell/rails/test/test_helper.rb +28 -0
- data/lib/bio/shell/web.rb +90 -0
- data/lib/bio/util/contingency_table.rb +231 -225
- data/sample/any2fasta.rb +59 -0
- data/test/data/HMMER/hmmpfam.out +64 -0
- data/test/data/HMMER/hmmsearch.out +88 -0
- data/test/data/aaindex/DAYM780301 +30 -0
- data/test/data/aaindex/PRAM900102 +20 -0
- data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
- data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
- data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
- data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
- data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
- data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
- data/test/unit/bio/appl/blast/test_report.rb +15 -12
- data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
- data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
- data/test/unit/bio/appl/test_blast.rb +5 -5
- data/test/unit/bio/data/test_na.rb +9 -18
- data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
- data/test/unit/bio/db/test_aaindex.rb +197 -0
- data/test/unit/bio/io/test_fastacmd.rb +55 -0
- data/test/unit/bio/sequence/test_aa.rb +102 -0
- data/test/unit/bio/sequence/test_common.rb +178 -0
- data/test/unit/bio/sequence/test_compat.rb +82 -0
- data/test/unit/bio/sequence/test_na.rb +242 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
- data/test/unit/bio/test_alignment.rb +15 -7
- data/test/unit/bio/test_reference.rb +198 -0
- data/test/unit/bio/test_sequence.rb +4 -49
- data/test/unit/bio/test_shell.rb +2 -2
- metadata +118 -15
- data/lib/bio/io/brdb.rb +0 -103
- data/lib/bioruby.rb +0 -34
data/sample/any2fasta.rb
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# any2fasta.rb - convert input file into FASTA format using a regex
|
|
4
|
+
# filter
|
|
5
|
+
#
|
|
6
|
+
# Copyright (C) 2006 Pjotr Prins <p@bioruby.org>
|
|
7
|
+
#
|
|
8
|
+
# This program is free software; you can redistribute it and/or modify
|
|
9
|
+
# it under the terms of the GNU General Public License as published by
|
|
10
|
+
# the Free Software Foundation; either version 2 of the License, or
|
|
11
|
+
# (at your option) any later version.
|
|
12
|
+
#
|
|
13
|
+
# This program is distributed in the hope that it will be useful,
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
16
|
+
# GNU General Public License for more details.
|
|
17
|
+
#
|
|
18
|
+
# $Id: any2fasta.rb,v 1.1 2006/02/17 14:59:27 pjotr Exp $
|
|
19
|
+
#
|
|
20
|
+
|
|
21
|
+
require 'bio/io/flatfile'
|
|
22
|
+
|
|
23
|
+
include Bio
|
|
24
|
+
|
|
25
|
+
usage = <<USAGE
|
|
26
|
+
|
|
27
|
+
Usage: any2fasta.rb [regex] infiles
|
|
28
|
+
|
|
29
|
+
Examples:
|
|
30
|
+
|
|
31
|
+
Output all sequences containing GATC or GATT ignoring case:
|
|
32
|
+
|
|
33
|
+
any2fasta.rb "/GAT[CT]/i" *.seq > reduced.fasta
|
|
34
|
+
|
|
35
|
+
USAGE
|
|
36
|
+
|
|
37
|
+
if ARGV.size == 0
|
|
38
|
+
print usage
|
|
39
|
+
exit 1
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# ---- Valid regular expression - if it is not a file
|
|
43
|
+
regex = ARGV[0]
|
|
44
|
+
if regex=~/^\// and !File.exist?(regex)
|
|
45
|
+
ARGV.shift
|
|
46
|
+
else
|
|
47
|
+
regex = nil
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
ARGV.each do | fn |
|
|
51
|
+
ff = Bio::FlatFile.auto(fn)
|
|
52
|
+
ff.each_entry do |entry|
|
|
53
|
+
if regex != nil
|
|
54
|
+
next if eval("entry.seq !~ #{regex}")
|
|
55
|
+
end
|
|
56
|
+
print entry.seq.to_fasta(entry.definition,70)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
hmmpfam - search one or more sequences against HMM database
|
|
2
|
+
HMMER 2.3.2 (Oct 2003)
|
|
3
|
+
Copyright (C) 1992-2003 HHMI/Washington University School of Medicine
|
|
4
|
+
Freely distributed under the GNU General Public License (GPL)
|
|
5
|
+
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
6
|
+
HMM file: /Users/nakao/Sites/iprscan/data/Pfam
|
|
7
|
+
Sequence file: /Users/nakao/Sites/iprscan/tmp/20050517/iprscan-20050517-16244071/chunk_1/iprscan-20050517-16244071.nocrc
|
|
8
|
+
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
9
|
+
|
|
10
|
+
Query sequence: 104K_THEPA
|
|
11
|
+
Accession: [none]
|
|
12
|
+
Description: [none]
|
|
13
|
+
|
|
14
|
+
Scores for sequence family classification (score includes all domains):
|
|
15
|
+
Model Description Score E-value N
|
|
16
|
+
-------- ----------- ----- ------- ---
|
|
17
|
+
PF04385.4 Domain of unknown function, DUF529 259.3 6.6e-75 4
|
|
18
|
+
|
|
19
|
+
Parsed for domains:
|
|
20
|
+
Model Domain seq-f seq-t hmm-f hmm-t score E-value
|
|
21
|
+
-------- ------- ----- ----- ----- ----- ----- -------
|
|
22
|
+
PF04385.4 1/4 36 111 .. 1 80 [] 65.0 2e-16
|
|
23
|
+
PF04385.4 2/4 149 224 .. 1 80 [] 64.7 2.5e-16
|
|
24
|
+
PF04385.4 3/4 265 343 .. 1 80 [] 64.6 2.7e-16
|
|
25
|
+
PF04385.4 4/4 379 456 .. 1 80 [] 65.0 2e-16
|
|
26
|
+
|
|
27
|
+
Alignments of top-scoring domains:
|
|
28
|
+
PF04385.4: domain 1 of 4, from 36 to 111: score 65.0, E = 2e-16
|
|
29
|
+
*->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
|
|
30
|
+
t+D+n++++ f +v+++g+++ + ++ ++v+++++++Gn+v+W
|
|
31
|
+
104K_THEPA 36 TFDINSNQTG-PAFLTAVEMAGVKYLQVQHGSNVNIHRLVEGNVVIW 81
|
|
32
|
+
|
|
33
|
+
eseddpefglivtlsfyldsnkfLvlllintak<-*
|
|
34
|
+
e++ + +l++ ++++++++++++++++ +++
|
|
35
|
+
104K_THEPA 82 ENA---STPLYTGAIVTNNDGPYMAYVEVLGDP 111
|
|
36
|
+
|
|
37
|
+
PF04385.4: domain 2 of 4, from 149 to 224: score 64.7, E = 2.5e-16
|
|
38
|
+
*->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
|
|
39
|
+
+L++ ++ +++k+ + ++a+ng ++vt++p++G+ +++++++n++++
|
|
40
|
+
104K_THEPA 149 SLNMAFQLENNKYEVETHAKNGANMVTFIPRNGHICKMVYHKNVRIY 195
|
|
41
|
+
|
|
42
|
+
eseddpefglivtlsfyldsnkfLvlllintak<-*
|
|
43
|
+
++ ++++vt++++++ +++L+l+++ +
|
|
44
|
+
104K_THEPA 196 KA----TGNDTVTSVVGFFRGLRLLLINVFSID 224
|
|
45
|
+
|
|
46
|
+
PF04385.4: domain 3 of 4, from 265 to 343: score 64.6, E = 2.7e-16
|
|
47
|
+
*->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
|
|
48
|
+
+Dl+ +++ +++f+ + a+++ ++++++p++G+++tk++dG++v++
|
|
49
|
+
104K_THEPA 265 PVDLDIKDIDYTMFHLADATYHEPCFKIIPNTGFCITKLFDGDQVLY 311
|
|
50
|
+
|
|
51
|
+
eseddpefglivtlsfyldsnkfLvlllintak<-*
|
|
52
|
+
es+ + + ++i +++y+++n ++++l++n+++
|
|
53
|
+
104K_THEPA 312 ESFNP-LIHCINEVHIYDRNNGSIICLHLNYSP 343
|
|
54
|
+
|
|
55
|
+
PF04385.4: domain 4 of 4, from 379 to 456: score 65.0, E = 2e-16
|
|
56
|
+
*->tLDlndtgstlkqfdykvalngdivvty.tpkpGvkftkitdGnevv
|
|
57
|
+
+LD+n ++++k+ +++ +n d +t+ tp+p+ + +++dG+ev+
|
|
58
|
+
104K_THEPA 379 ELDVN--FISDKDLYVAALTNADLNYTMvTPRPHRDVIRVSDGSEVL 423
|
|
59
|
+
|
|
60
|
+
WeseddpefglivtlsfyldsnkfLvlllintak<-*
|
|
61
|
+
W++e+ ++ l++++++++d++ +Lv+l+i++
|
|
62
|
+
104K_THEPA 424 WYYEGL-DNFLVCAWIYVSDGVASLVHLRIKDRI 456
|
|
63
|
+
|
|
64
|
+
//
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
hmmsearch - search a sequence database with a profile HMM
|
|
2
|
+
HMMER 2.2g (August 2001)
|
|
3
|
+
Copyright (C) 1992-2001 HHMI/Washington University School of Medicine
|
|
4
|
+
Freely distributed under the GNU General Public License (GPL)
|
|
5
|
+
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
6
|
+
HMM file: /sw/share/hmmer/demo/7tm_1 [7tm_1]
|
|
7
|
+
Sequence database: /sw/share/hmmer/demo/P08908.fasta
|
|
8
|
+
per-sequence score cutoff: [none]
|
|
9
|
+
per-domain score cutoff: [none]
|
|
10
|
+
per-sequence Eval cutoff: <= 10
|
|
11
|
+
per-domain Eval cutoff: [none]
|
|
12
|
+
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
|
|
13
|
+
|
|
14
|
+
Query HMM: 7tm_1
|
|
15
|
+
Accession: PF00001
|
|
16
|
+
Description: 7 transmembrane receptor (rhodopsin family)
|
|
17
|
+
[HMM has been calibrated; E-values are empirical estimates]
|
|
18
|
+
|
|
19
|
+
Scores for complete sequences (score includes all domains):
|
|
20
|
+
Sequence Description Score E-value N
|
|
21
|
+
-------- ----------- ----- ------- ---
|
|
22
|
+
sp|P08908|5H1A_HUMAN 5-hydroxytryptamine 1A receptor 377.1 5.5e-130 1
|
|
23
|
+
|
|
24
|
+
Parsed for domains:
|
|
25
|
+
Sequence Domain seq-f seq-t hmm-f hmm-t score E-value
|
|
26
|
+
-------- ------- ----- ----- ----- ----- ----- -------
|
|
27
|
+
sp|P08908|5H1A_HUMAN 1/1 53 400 .. 1 275 [] 377.1 5.5e-130
|
|
28
|
+
|
|
29
|
+
Alignments of top-scoring domains:
|
|
30
|
+
sp|P08908|5H1A_HUMAN: domain 1 of 1, from 53 to 400: score 377.1, E = 5.5e-130
|
|
31
|
+
*->GNlLVilvilrtkklrtptnifilNLAvADLLflltlppwalyylvg
|
|
32
|
+
GN+ V+++i+++++l++++n++i++LAv+DL+++++++p+a++y v
|
|
33
|
+
sp|P08908| 53 GNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVL 99
|
|
34
|
+
|
|
35
|
+
gsedWpfGsalCklvtaldvvnmyaSillLtaISiDRYlAIvhPlryrrr
|
|
36
|
+
+ W++G++ C+l++aldv+++++Sil+L+aI++DRY+AI++P+ y ++
|
|
37
|
+
sp|P08908| 100 N--KWTLGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK 147
|
|
38
|
+
|
|
39
|
+
rtsprrAkvvillvWvlalllslPpllfswvktveegngtlnvnvtvCli
|
|
40
|
+
rt prrA+++i+l+W++++l+s+Pp +++w++++ + +C+i
|
|
41
|
+
sp|P08908| 148 RT-PRRAAALISLTWLIGFLISIPP-MLGWRTPEDRSD------PDACTI 189
|
|
42
|
+
|
|
43
|
+
dfpeestasvstwlvsyvllstlvgFllPllvilvcYtrIlrtlrkrark
|
|
44
|
+
+ +++ y+++st+++F++Pll++lv+Y+rI+r++r r rk
|
|
45
|
+
sp|P08908| 190 SKDHG-----------YTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK 228
|
|
46
|
+
|
|
47
|
+
gas...............................................
|
|
48
|
+
+ + ++++ +++++ ++ ++++++ ++++++++ + + +++ ++ +
|
|
49
|
+
sp|P08908| 229 TVKkvektgadtrhgaspapqpkksvngesgsrnwrlgveskaggalcan 278
|
|
50
|
+
|
|
51
|
+
..................................................
|
|
52
|
+
+ ++++++ + + ++ ++++++ + +++ ++++ + + +++++++
|
|
53
|
+
sp|P08908| 279 gavrqgddgaalevievhrvgnskehlplpseagptpcapasferknern 328
|
|
54
|
+
|
|
55
|
+
.....kkrsskerkaaktllvvvvvFvlCWlPyfivllldtlc.lsiims
|
|
56
|
+
+ ++k+ +erk++ktl++++++F+lCWlP+fiv+l+ ++c++s++m
|
|
57
|
+
sp|P08908| 329 aeakrKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPFCeSSCHM- 377
|
|
58
|
+
|
|
59
|
+
stCelervlptallvtlwLayvNsclNPiIY<-*
|
|
60
|
+
+ + +++wL+y+Ns lNP+IY
|
|
61
|
+
sp|P08908| 378 --------PTLLGAIINWLGYSNSLLNPVIY 400
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
Histogram of all scores:
|
|
65
|
+
score obs exp (one = represents 1 sequences)
|
|
66
|
+
----- --- ---
|
|
67
|
+
377 1 0|=
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
% Statistical details of theoretical EVD fit:
|
|
71
|
+
mu = -10.6639
|
|
72
|
+
lambda = 0.7676
|
|
73
|
+
chi-sq statistic = 0.0000
|
|
74
|
+
P(chi-square) = 0
|
|
75
|
+
|
|
76
|
+
Total sequences searched: 1
|
|
77
|
+
|
|
78
|
+
Whole sequence top hits:
|
|
79
|
+
tophits_s report:
|
|
80
|
+
Total hits: 1
|
|
81
|
+
Satisfying E cutoff: 1
|
|
82
|
+
Total memory: 16K
|
|
83
|
+
|
|
84
|
+
Domain top hits:
|
|
85
|
+
tophits_s report:
|
|
86
|
+
Total hits: 1
|
|
87
|
+
Satisfying E cutoff: 1
|
|
88
|
+
Total memory: 17K
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
H DAYM780301
|
|
2
|
+
D Log odds matrix for 250 PAMs (Dayhoff et al., 1978)
|
|
3
|
+
R
|
|
4
|
+
A Dayhoff, M.O., Schwartz, R.M. and Orcutt, B.C.
|
|
5
|
+
T A model of evolutionary change in proteins
|
|
6
|
+
J In "Atlas of Protein Sequence and Structure", Vol.5, Suppl.3 (Dayhoff,
|
|
7
|
+
M.O., ed.), National Biomedical Research Foundation, Washington, D.C.,
|
|
8
|
+
p.352 (1978)
|
|
9
|
+
M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
|
|
10
|
+
2.
|
|
11
|
+
-2. 6.
|
|
12
|
+
0. 0. 2.
|
|
13
|
+
0. -1. 2. 4.
|
|
14
|
+
-2. -4. -4. -5. 12.
|
|
15
|
+
0. 1. 1. 2. -5. 4.
|
|
16
|
+
0. -1. 1. 3. -5. 2. 4.
|
|
17
|
+
1. -3. 0. 1. -3. -1. 0. 5.
|
|
18
|
+
-1. 2. 2. 1. -3. 3. 1. -2. 6.
|
|
19
|
+
-1. -2. -2. -2. -2. -2. -2. -3. -2. 5.
|
|
20
|
+
-2. -3. -3. -4. -6. -2. -3. -4. -2. 2. 6.
|
|
21
|
+
-1. 3. 1. 0. -5. 1. 0. -2. 0. -2. -3. 5.
|
|
22
|
+
-1. 0. -2. -3. -5. -1. -2. -3. -2. 2. 4. 0. 6.
|
|
23
|
+
-4. -4. -4. -6. -4. -5. -5. -5. -2. 1. 2. -5. 0. 9.
|
|
24
|
+
1. 0. -1. -1. -3. 0. -1. -1. 0. -2. -3. -1. -2. -5. 6.
|
|
25
|
+
1. 0. 1. 0. 0. -1. 0. 1. -1. -1. -3. 0. -2. -3. 1. 2.
|
|
26
|
+
1. -1. 0. 0. -2. -1. 0. 0. -1. 0. -2. 0. -1. -3. 0. 1. 3.
|
|
27
|
+
-6. 2. -4. -7. -8. -5. -7. -7. -3. -5. -2. -3. -4. 0. -6. -2. -5. 17.
|
|
28
|
+
-3. -4. -2. -4. 0. -4. -4. -5. 0. -1. -1. -4. -2. 7. -5. -3. -3. 0. 10.
|
|
29
|
+
0. -2. -2. -2. -2. -2. -2. -1. -2. 4. 2. -2. 2. -1. -1. -1. 0. -6. -2. 4.
|
|
30
|
+
//
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
H PRAM900102
|
|
2
|
+
D Relative frequency in alpha-helix (Prabhakaran, 1990)
|
|
3
|
+
R LIT:1614053b PMID:2390062
|
|
4
|
+
A Prabhakaran, M.
|
|
5
|
+
T The distribution of physical, chemical and conformational properties in
|
|
6
|
+
signal and nascent peptides
|
|
7
|
+
J Biochem. J. 269, 691-696 (1990) Original reference of these three data:
|
|
8
|
+
Creighton, T.E. In "Protein Structure and Melecular Properties", (Freeman,
|
|
9
|
+
W.H., ed.), San Francisco P.235 (1983)
|
|
10
|
+
C LEVM780101 1.000 LEVM780104 0.964 PALJ810101 0.943
|
|
11
|
+
KANM800101 0.942 ISOY800101 0.929 MAXF760101 0.924
|
|
12
|
+
ROBB760101 0.916 GEIM800101 0.912 GEIM800104 0.907
|
|
13
|
+
RACS820108 0.904 PALJ810102 0.902 PALJ810109 0.898
|
|
14
|
+
NAGK730101 0.894 CRAJ730101 0.887 CHOP780201 0.873
|
|
15
|
+
TANS770101 0.854 KANM800103 0.850 QIAN880107 0.829
|
|
16
|
+
QIAN880106 0.827 BURA740101 0.805 NAGK730103 -0.809
|
|
17
|
+
I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
|
|
18
|
+
1.29 0.96 0.90 1.04 1.11 1.27 1.44 0.56 1.22 0.97
|
|
19
|
+
1.30 1.23 1.47 1.07 0.52 0.82 0.82 0.99 0.72 0.91
|
|
20
|
+
//
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
Query= CD8A_HUMAN P01732 T-cell surface glycoprotein CD8 alpha chain
|
|
2
|
+
precursor (T-lymphocyte differentiation antigen T8/Leu-2).
|
|
3
|
+
(235 letters)
|
|
4
|
+
|
|
5
|
+
>CD8B_HUMAN P10966 T-cell surface glycoprotein CD8 beta chain
|
|
6
|
+
precursor (Antigen CD8B).
|
|
7
|
+
Length = 210
|
|
8
|
+
|
|
9
|
+
Score = 29.6 bits (65), Expect = 5e-05
|
|
10
|
+
Identities = 21/90 (23%), Positives = 37/90 (41%), Gaps = 3/90 (3%)
|
|
11
|
+
|
|
12
|
+
Query: 39 VELKCQVLLSNPTSGCSWLFQ---PRGAAASPTFLLYLSQNKPKAAEGLDTQRFSGKRLG 95
|
|
13
|
+
V L C+ +S WL Q P + L+ S E ++ ++ + R
|
|
14
|
+
Sbjct: 37 VMLSCEAKISLSNMRIYWLRQRQAPSSDSHHEFLALWDSAKGTIHGEEVEQEKIAVFRDA 96
|
|
15
|
+
|
|
16
|
+
Query: 96 DTFVLTLSDFRRENEGYYFCSALSNSIMYF 125
|
|
17
|
+
F+L L+ + E+ G YFC + + + F
|
|
18
|
+
Sbjct: 97 SRFILNLTSVKPEDSGIYFCMIVGSPELTF 126
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
Lambda K H
|
|
22
|
+
0.323 0.137 0.436
|
|
23
|
+
|
|
24
|
+
Gapped
|
|
25
|
+
Lambda K H
|
|
26
|
+
0.267 0.0410 0.140
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
Matrix: BLOSUM62
|
|
30
|
+
Gap Penalties: Existence: 11, Extension: 1
|
|
31
|
+
Number of Hits to DB: 102
|
|
32
|
+
Number of Sequences: 0
|
|
33
|
+
Number of extensions: 5
|
|
34
|
+
Number of successful extensions: 3
|
|
35
|
+
Number of sequences better than 10.0: 1
|
|
36
|
+
Number of HSP's better than 10.0 without gapping: 1
|
|
37
|
+
Number of HSP's successfully gapped in prelim test: 0
|
|
38
|
+
Number of HSP's that attempted gapping in prelim test: 0
|
|
39
|
+
Number of HSP's gapped (non-prelim): 1
|
|
40
|
+
length of query: 235
|
|
41
|
+
length of database: 210
|
|
42
|
+
effective HSP length: 22
|
|
43
|
+
effective length of query: 213
|
|
44
|
+
effective length of database: 188
|
|
45
|
+
effective search space: 40044
|
|
46
|
+
effective search space used: 40044
|
|
47
|
+
T: 11
|
|
48
|
+
A: 40
|
|
49
|
+
X1: 16 ( 7.5 bits)
|
|
50
|
+
X2: 38 (14.6 bits)
|
|
51
|
+
X3: 64 (24.7 bits)
|
|
52
|
+
S1: 20 (12.2 bits)
|
|
53
|
+
S2: 20 (12.3 bits)
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
Query= CD8A_HUMAN P01732 T-cell surface glycoprotein CD8 alpha chain
|
|
2
|
+
precursor (T-lymphocyte differentiation antigen T8/Leu-2).
|
|
3
|
+
(235 letters)
|
|
4
|
+
|
|
5
|
+
Lambda K H
|
|
6
|
+
0.323 0.137 0.436
|
|
7
|
+
|
|
8
|
+
Gapped
|
|
9
|
+
Lambda K H
|
|
10
|
+
0.267 0.0410 0.140
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
Matrix: BLOSUM62
|
|
14
|
+
Gap Penalties: Existence: 11, Extension: 1
|
|
15
|
+
Number of Hits to DB: 175
|
|
16
|
+
Number of Sequences: 0
|
|
17
|
+
Number of extensions: 8
|
|
18
|
+
Number of successful extensions: 0
|
|
19
|
+
Number of sequences better than 1.0e-05: 0
|
|
20
|
+
Number of HSP's better than 0.0 without gapping: 0
|
|
21
|
+
Number of HSP's successfully gapped in prelim test: 0
|
|
22
|
+
Number of HSP's that attempted gapping in prelim test: 0
|
|
23
|
+
Number of HSP's gapped (non-prelim): 0
|
|
24
|
+
length of query: 235
|
|
25
|
+
length of database: 393
|
|
26
|
+
effective HSP length: 27
|
|
27
|
+
effective length of query: 208
|
|
28
|
+
effective length of database: 366
|
|
29
|
+
effective search space: 76128
|
|
30
|
+
effective search space used: 76128
|
|
31
|
+
T: 11
|
|
32
|
+
A: 40
|
|
33
|
+
X1: 16 ( 7.5 bits)
|
|
34
|
+
X2: 38 (14.6 bits)
|
|
35
|
+
X3: 64 (24.7 bits)
|
|
36
|
+
S1: 41 (22.0 bits)
|
|
37
|
+
S2: 74 (33.1 bits)
|
|
File without changes
|
|
@@ -12,7 +12,7 @@ aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal)
|
|
|
12
12
|
aspartate kinase (A)
|
|
13
13
|
(820 letters)
|
|
14
14
|
|
|
15
|
-
Database:
|
|
15
|
+
Database: b0002.faa
|
|
16
16
|
1 sequences; 820 total letters
|
|
17
17
|
|
|
18
18
|
Searching.done
|
|
@@ -88,7 +88,7 @@ Query: 781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820
|
|
|
88
88
|
Sbjct: 781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820
|
|
89
89
|
|
|
90
90
|
|
|
91
|
-
Database:
|
|
91
|
+
Database: b0002.faa
|
|
92
92
|
Posted date: Aug 7, 2005 7:29 AM
|
|
93
93
|
Number of letters in database: 820
|
|
94
94
|
Number of sequences in database: 1
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
<BlastOutput_program>blastp</BlastOutput_program>
|
|
5
5
|
<BlastOutput_version>blastp 2.2.10 [Oct-19-2004]</BlastOutput_version>
|
|
6
6
|
<BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~"Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs", Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
|
|
7
|
-
<BlastOutput_db>
|
|
7
|
+
<BlastOutput_db>b0002.faa</BlastOutput_db>
|
|
8
8
|
<BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
|
|
9
9
|
<BlastOutput_query-def>eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A)</BlastOutput_query-def>
|
|
10
10
|
<BlastOutput_query-len>820</BlastOutput_query-len>
|
|
File without changes
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
#
|
|
2
|
+
# test/unit/bio/appl/bl2seq/test_report.rb - Unit test for
|
|
3
|
+
# Bio::Blast::Bl2seq::Report
|
|
4
|
+
#
|
|
5
|
+
# Copyright:: Copyright (C) 2006
|
|
6
|
+
# Mitsuteru C. Nakao <n@bioruby.org>
|
|
7
|
+
# License:: Ruby's
|
|
8
|
+
#
|
|
9
|
+
# $Id: test_report.rb,v 1.2 2006/02/23 22:25:30 nakao Exp $
|
|
10
|
+
#
|
|
11
|
+
|
|
12
|
+
require 'pathname'
|
|
13
|
+
libpath = Pathname.new(File.join(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib'))).cleanpath.to_s
|
|
14
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
|
15
|
+
|
|
16
|
+
require 'test/unit'
|
|
17
|
+
require 'bio/appl/bl2seq/report'
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
module Bio
|
|
21
|
+
class TestBl2seqReportData
|
|
22
|
+
bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
|
|
23
|
+
TestDataBl2seq = Pathname.new(File.join(bioruby_root, 'test', 'data', 'bl2seq')).cleanpath.to_s
|
|
24
|
+
|
|
25
|
+
def self.output(format = 7)
|
|
26
|
+
case format
|
|
27
|
+
when 'empty'
|
|
28
|
+
File.open(File.join(TestDataBl2seq, 'cd8a_p53_e-5blastp.bl2seq')).read
|
|
29
|
+
when 'blastp'
|
|
30
|
+
File.open(File.join(TestDataBl2seq, 'cd8a_cd8b_blastp.bl2seq')).read
|
|
31
|
+
when 'blastn'
|
|
32
|
+
when 'blastx'
|
|
33
|
+
when 'tblastn'
|
|
34
|
+
when 'tblastx'
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class TestBl2seqReportConstants < Test::Unit::TestCase
|
|
41
|
+
def test_rs
|
|
42
|
+
rs = nil
|
|
43
|
+
assert_equal(nil, Bio::Blast::Bl2seq::Report::RS)
|
|
44
|
+
assert_equal(nil, Bio::Blast::Bl2seq::Report::DELIMITER)
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class TestBl2seqReport < Test::Unit::TestCase
|
|
50
|
+
|
|
51
|
+
def setup
|
|
52
|
+
@empty = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('empty'))
|
|
53
|
+
@blastp = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('blastp'))
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def test_new
|
|
57
|
+
assert(@empty)
|
|
58
|
+
assert(@blastp)
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def test_undefed_methods
|
|
62
|
+
methods = ['format0_parse_header',
|
|
63
|
+
'program',
|
|
64
|
+
'version',
|
|
65
|
+
'version_number',
|
|
66
|
+
'version_date',
|
|
67
|
+
'message',
|
|
68
|
+
'converged?',
|
|
69
|
+
'reference',
|
|
70
|
+
'db']
|
|
71
|
+
|
|
72
|
+
methods.each do |x|
|
|
73
|
+
assert_nil(@empty.methods.include?(x))
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
methods.each do |x|
|
|
77
|
+
assert_nil(@blastp.methods.include?(x))
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# TestF0dbstat < Test::Unit::TestCase
|
|
82
|
+
|
|
83
|
+
def test_db_num
|
|
84
|
+
assert_equal(0, @empty.db_num)
|
|
85
|
+
assert_equal(0, @blastp.db_num)
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def test_db_len
|
|
89
|
+
assert_equal(393, @empty.db_len)
|
|
90
|
+
assert_equal(210, @blastp.db_len)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# TestIteration < Test::Unit::TestCase
|
|
94
|
+
def test_undefed_methods
|
|
95
|
+
methods = ['message',
|
|
96
|
+
'pattern_in_database',
|
|
97
|
+
'pattern',
|
|
98
|
+
'pattern_positions',
|
|
99
|
+
'hits_found_again',
|
|
100
|
+
'hits_newly_found',
|
|
101
|
+
'hits_for_pattern',
|
|
102
|
+
'parse_hitlist',
|
|
103
|
+
'converged?']
|
|
104
|
+
|
|
105
|
+
methods.each do |x|
|
|
106
|
+
assert_equal(false, @empty.iterations.first.methods.include?(x), "undifined? : #{x}")
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
methods.each do |x|
|
|
110
|
+
assert_equal(false, @blastp.iterations.first.methods.include?(x), "undefined? : #{x}")
|
|
111
|
+
end
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class TestBl2seqReportHit < Test::Unit::TestCase
|
|
117
|
+
def setup
|
|
118
|
+
@empty = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('empty'))
|
|
119
|
+
@blastp = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('blastp'))
|
|
120
|
+
@empty_hit = @empty.hits.first
|
|
121
|
+
@blastp_hit = @blastp.hits.first
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
def test_empty_hits
|
|
125
|
+
assert_equal(0, @empty.hits.size)
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
def test_hits
|
|
129
|
+
assert_equal(Bio::Blast::Bl2seq::Report::Hit, @blastp.hits.first.class)
|
|
130
|
+
assert_equal(1, @blastp.hits.size)
|
|
131
|
+
end
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
end # module Bio
|