bio 0.7.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (142) hide show
  1. data/bin/bioruby +71 -27
  2. data/bin/br_biofetch.rb +5 -17
  3. data/bin/br_bioflat.rb +14 -26
  4. data/bin/br_biogetseq.rb +6 -18
  5. data/bin/br_pmfetch.rb +6 -16
  6. data/doc/Changes-0.7.rd +35 -0
  7. data/doc/KEGG_API.rd +287 -172
  8. data/doc/KEGG_API.rd.ja +273 -160
  9. data/doc/Tutorial.rd +18 -9
  10. data/doc/Tutorial.rd.ja +656 -138
  11. data/lib/bio.rb +6 -24
  12. data/lib/bio/alignment.rb +5 -5
  13. data/lib/bio/appl/blast.rb +132 -98
  14. data/lib/bio/appl/blast/format0.rb +9 -19
  15. data/lib/bio/appl/blast/wublast.rb +5 -18
  16. data/lib/bio/appl/emboss.rb +40 -47
  17. data/lib/bio/appl/hmmer.rb +116 -82
  18. data/lib/bio/appl/hmmer/report.rb +509 -364
  19. data/lib/bio/appl/spidey/report.rb +7 -18
  20. data/lib/bio/data/na.rb +3 -21
  21. data/lib/bio/db.rb +3 -21
  22. data/lib/bio/db/aaindex.rb +147 -52
  23. data/lib/bio/db/embl/common.rb +27 -6
  24. data/lib/bio/db/embl/embl.rb +18 -10
  25. data/lib/bio/db/embl/sptr.rb +87 -67
  26. data/lib/bio/db/embl/swissprot.rb +32 -3
  27. data/lib/bio/db/embl/trembl.rb +32 -3
  28. data/lib/bio/db/embl/uniprot.rb +32 -3
  29. data/lib/bio/db/fasta.rb +327 -289
  30. data/lib/bio/db/medline.rb +25 -4
  31. data/lib/bio/db/nbrf.rb +12 -20
  32. data/lib/bio/db/pdb.rb +4 -1
  33. data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
  34. data/lib/bio/db/pdb/pdb.rb +13 -8
  35. data/lib/bio/db/rebase.rb +93 -97
  36. data/lib/bio/feature.rb +2 -31
  37. data/lib/bio/io/ddbjxml.rb +167 -139
  38. data/lib/bio/io/fastacmd.rb +89 -56
  39. data/lib/bio/io/flatfile.rb +994 -278
  40. data/lib/bio/io/flatfile/index.rb +257 -194
  41. data/lib/bio/io/flatfile/indexer.rb +37 -29
  42. data/lib/bio/reference.rb +147 -64
  43. data/lib/bio/sequence.rb +57 -417
  44. data/lib/bio/sequence/aa.rb +64 -0
  45. data/lib/bio/sequence/common.rb +175 -0
  46. data/lib/bio/sequence/compat.rb +68 -0
  47. data/lib/bio/sequence/format.rb +134 -0
  48. data/lib/bio/sequence/generic.rb +24 -0
  49. data/lib/bio/sequence/na.rb +189 -0
  50. data/lib/bio/shell.rb +9 -23
  51. data/lib/bio/shell/core.rb +130 -125
  52. data/lib/bio/shell/demo.rb +143 -0
  53. data/lib/bio/shell/{session.rb → interface.rb} +42 -40
  54. data/lib/bio/shell/object.rb +52 -0
  55. data/lib/bio/shell/plugin/codon.rb +4 -22
  56. data/lib/bio/shell/plugin/emboss.rb +23 -0
  57. data/lib/bio/shell/plugin/entry.rb +34 -25
  58. data/lib/bio/shell/plugin/flatfile.rb +5 -23
  59. data/lib/bio/shell/plugin/keggapi.rb +11 -24
  60. data/lib/bio/shell/plugin/midi.rb +5 -23
  61. data/lib/bio/shell/plugin/obda.rb +4 -22
  62. data/lib/bio/shell/plugin/seq.rb +6 -24
  63. data/lib/bio/shell/rails/Rakefile +10 -0
  64. data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
  65. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
  66. data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
  67. data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
  68. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
  69. data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
  70. data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
  71. data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
  72. data/lib/bio/shell/rails/config/boot.rb +19 -0
  73. data/lib/bio/shell/rails/config/database.yml +85 -0
  74. data/lib/bio/shell/rails/config/environment.rb +53 -0
  75. data/lib/bio/shell/rails/config/environments/development.rb +19 -0
  76. data/lib/bio/shell/rails/config/environments/production.rb +19 -0
  77. data/lib/bio/shell/rails/config/environments/test.rb +19 -0
  78. data/lib/bio/shell/rails/config/routes.rb +19 -0
  79. data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
  80. data/lib/bio/shell/rails/public/404.html +8 -0
  81. data/lib/bio/shell/rails/public/500.html +8 -0
  82. data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
  83. data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
  84. data/lib/bio/shell/rails/public/dispatch.rb +10 -0
  85. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  86. data/lib/bio/shell/rails/public/images/icon.png +0 -0
  87. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  88. data/lib/bio/shell/rails/public/index.html +277 -0
  89. data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
  90. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
  91. data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
  92. data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
  93. data/lib/bio/shell/rails/public/robots.txt +1 -0
  94. data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
  95. data/lib/bio/shell/rails/script/about +3 -0
  96. data/lib/bio/shell/rails/script/breakpointer +3 -0
  97. data/lib/bio/shell/rails/script/console +3 -0
  98. data/lib/bio/shell/rails/script/destroy +3 -0
  99. data/lib/bio/shell/rails/script/generate +3 -0
  100. data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
  101. data/lib/bio/shell/rails/script/performance/profiler +3 -0
  102. data/lib/bio/shell/rails/script/plugin +3 -0
  103. data/lib/bio/shell/rails/script/process/reaper +3 -0
  104. data/lib/bio/shell/rails/script/process/spawner +3 -0
  105. data/lib/bio/shell/rails/script/process/spinner +3 -0
  106. data/lib/bio/shell/rails/script/runner +3 -0
  107. data/lib/bio/shell/rails/script/server +42 -0
  108. data/lib/bio/shell/rails/test/test_helper.rb +28 -0
  109. data/lib/bio/shell/web.rb +90 -0
  110. data/lib/bio/util/contingency_table.rb +231 -225
  111. data/sample/any2fasta.rb +59 -0
  112. data/test/data/HMMER/hmmpfam.out +64 -0
  113. data/test/data/HMMER/hmmsearch.out +88 -0
  114. data/test/data/aaindex/DAYM780301 +30 -0
  115. data/test/data/aaindex/PRAM900102 +20 -0
  116. data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
  117. data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
  118. data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
  119. data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
  120. data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
  121. data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
  122. data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
  123. data/test/unit/bio/appl/blast/test_report.rb +15 -12
  124. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
  125. data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
  126. data/test/unit/bio/appl/test_blast.rb +5 -5
  127. data/test/unit/bio/data/test_na.rb +9 -18
  128. data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
  129. data/test/unit/bio/db/test_aaindex.rb +197 -0
  130. data/test/unit/bio/io/test_fastacmd.rb +55 -0
  131. data/test/unit/bio/sequence/test_aa.rb +102 -0
  132. data/test/unit/bio/sequence/test_common.rb +178 -0
  133. data/test/unit/bio/sequence/test_compat.rb +82 -0
  134. data/test/unit/bio/sequence/test_na.rb +242 -0
  135. data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
  136. data/test/unit/bio/test_alignment.rb +15 -7
  137. data/test/unit/bio/test_reference.rb +198 -0
  138. data/test/unit/bio/test_sequence.rb +4 -49
  139. data/test/unit/bio/test_shell.rb +2 -2
  140. metadata +118 -15
  141. data/lib/bio/io/brdb.rb +0 -103
  142. data/lib/bioruby.rb +0 -34
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # any2fasta.rb - convert input file into FASTA format using a regex
4
+ # filter
5
+ #
6
+ # Copyright (C) 2006 Pjotr Prins <p@bioruby.org>
7
+ #
8
+ # This program is free software; you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation; either version 2 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # $Id: any2fasta.rb,v 1.1 2006/02/17 14:59:27 pjotr Exp $
19
+ #
20
+
21
+ require 'bio/io/flatfile'
22
+
23
+ include Bio
24
+
25
+ usage = <<USAGE
26
+
27
+ Usage: any2fasta.rb [regex] infiles
28
+
29
+ Examples:
30
+
31
+ Output all sequences containing GATC or GATT ignoring case:
32
+
33
+ any2fasta.rb "/GAT[CT]/i" *.seq > reduced.fasta
34
+
35
+ USAGE
36
+
37
+ if ARGV.size == 0
38
+ print usage
39
+ exit 1
40
+ end
41
+
42
+ # ---- Valid regular expression - if it is not a file
43
+ regex = ARGV[0]
44
+ if regex=~/^\// and !File.exist?(regex)
45
+ ARGV.shift
46
+ else
47
+ regex = nil
48
+ end
49
+
50
+ ARGV.each do | fn |
51
+ ff = Bio::FlatFile.auto(fn)
52
+ ff.each_entry do |entry|
53
+ if regex != nil
54
+ next if eval("entry.seq !~ #{regex}")
55
+ end
56
+ print entry.seq.to_fasta(entry.definition,70)
57
+ end
58
+ end
59
+
@@ -0,0 +1,64 @@
1
+ hmmpfam - search one or more sequences against HMM database
2
+ HMMER 2.3.2 (Oct 2003)
3
+ Copyright (C) 1992-2003 HHMI/Washington University School of Medicine
4
+ Freely distributed under the GNU General Public License (GPL)
5
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6
+ HMM file: /Users/nakao/Sites/iprscan/data/Pfam
7
+ Sequence file: /Users/nakao/Sites/iprscan/tmp/20050517/iprscan-20050517-16244071/chunk_1/iprscan-20050517-16244071.nocrc
8
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9
+
10
+ Query sequence: 104K_THEPA
11
+ Accession: [none]
12
+ Description: [none]
13
+
14
+ Scores for sequence family classification (score includes all domains):
15
+ Model Description Score E-value N
16
+ -------- ----------- ----- ------- ---
17
+ PF04385.4 Domain of unknown function, DUF529 259.3 6.6e-75 4
18
+
19
+ Parsed for domains:
20
+ Model Domain seq-f seq-t hmm-f hmm-t score E-value
21
+ -------- ------- ----- ----- ----- ----- ----- -------
22
+ PF04385.4 1/4 36 111 .. 1 80 [] 65.0 2e-16
23
+ PF04385.4 2/4 149 224 .. 1 80 [] 64.7 2.5e-16
24
+ PF04385.4 3/4 265 343 .. 1 80 [] 64.6 2.7e-16
25
+ PF04385.4 4/4 379 456 .. 1 80 [] 65.0 2e-16
26
+
27
+ Alignments of top-scoring domains:
28
+ PF04385.4: domain 1 of 4, from 36 to 111: score 65.0, E = 2e-16
29
+ *->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
30
+ t+D+n++++ f +v+++g+++ + ++ ++v+++++++Gn+v+W
31
+ 104K_THEPA 36 TFDINSNQTG-PAFLTAVEMAGVKYLQVQHGSNVNIHRLVEGNVVIW 81
32
+
33
+ eseddpefglivtlsfyldsnkfLvlllintak<-*
34
+ e++ + +l++ ++++++++++++++++ +++
35
+ 104K_THEPA 82 ENA---STPLYTGAIVTNNDGPYMAYVEVLGDP 111
36
+
37
+ PF04385.4: domain 2 of 4, from 149 to 224: score 64.7, E = 2.5e-16
38
+ *->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
39
+ +L++ ++ +++k+ + ++a+ng ++vt++p++G+ +++++++n++++
40
+ 104K_THEPA 149 SLNMAFQLENNKYEVETHAKNGANMVTFIPRNGHICKMVYHKNVRIY 195
41
+
42
+ eseddpefglivtlsfyldsnkfLvlllintak<-*
43
+ ++ ++++vt++++++ +++L+l+++ +
44
+ 104K_THEPA 196 KA----TGNDTVTSVVGFFRGLRLLLINVFSID 224
45
+
46
+ PF04385.4: domain 3 of 4, from 265 to 343: score 64.6, E = 2.7e-16
47
+ *->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
48
+ +Dl+ +++ +++f+ + a+++ ++++++p++G+++tk++dG++v++
49
+ 104K_THEPA 265 PVDLDIKDIDYTMFHLADATYHEPCFKIIPNTGFCITKLFDGDQVLY 311
50
+
51
+ eseddpefglivtlsfyldsnkfLvlllintak<-*
52
+ es+ + + ++i +++y+++n ++++l++n+++
53
+ 104K_THEPA 312 ESFNP-LIHCINEVHIYDRNNGSIICLHLNYSP 343
54
+
55
+ PF04385.4: domain 4 of 4, from 379 to 456: score 65.0, E = 2e-16
56
+ *->tLDlndtgstlkqfdykvalngdivvty.tpkpGvkftkitdGnevv
57
+ +LD+n ++++k+ +++ +n d +t+ tp+p+ + +++dG+ev+
58
+ 104K_THEPA 379 ELDVN--FISDKDLYVAALTNADLNYTMvTPRPHRDVIRVSDGSEVL 423
59
+
60
+ WeseddpefglivtlsfyldsnkfLvlllintak<-*
61
+ W++e+ ++ l++++++++d++ +Lv+l+i++
62
+ 104K_THEPA 424 WYYEGL-DNFLVCAWIYVSDGVASLVHLRIKDRI 456
63
+
64
+ //
@@ -0,0 +1,88 @@
1
+ hmmsearch - search a sequence database with a profile HMM
2
+ HMMER 2.2g (August 2001)
3
+ Copyright (C) 1992-2001 HHMI/Washington University School of Medicine
4
+ Freely distributed under the GNU General Public License (GPL)
5
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6
+ HMM file: /sw/share/hmmer/demo/7tm_1 [7tm_1]
7
+ Sequence database: /sw/share/hmmer/demo/P08908.fasta
8
+ per-sequence score cutoff: [none]
9
+ per-domain score cutoff: [none]
10
+ per-sequence Eval cutoff: <= 10
11
+ per-domain Eval cutoff: [none]
12
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
13
+
14
+ Query HMM: 7tm_1
15
+ Accession: PF00001
16
+ Description: 7 transmembrane receptor (rhodopsin family)
17
+ [HMM has been calibrated; E-values are empirical estimates]
18
+
19
+ Scores for complete sequences (score includes all domains):
20
+ Sequence Description Score E-value N
21
+ -------- ----------- ----- ------- ---
22
+ sp|P08908|5H1A_HUMAN 5-hydroxytryptamine 1A receptor 377.1 5.5e-130 1
23
+
24
+ Parsed for domains:
25
+ Sequence Domain seq-f seq-t hmm-f hmm-t score E-value
26
+ -------- ------- ----- ----- ----- ----- ----- -------
27
+ sp|P08908|5H1A_HUMAN 1/1 53 400 .. 1 275 [] 377.1 5.5e-130
28
+
29
+ Alignments of top-scoring domains:
30
+ sp|P08908|5H1A_HUMAN: domain 1 of 1, from 53 to 400: score 377.1, E = 5.5e-130
31
+ *->GNlLVilvilrtkklrtptnifilNLAvADLLflltlppwalyylvg
32
+ GN+ V+++i+++++l++++n++i++LAv+DL+++++++p+a++y v
33
+ sp|P08908| 53 GNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVL 99
34
+
35
+ gsedWpfGsalCklvtaldvvnmyaSillLtaISiDRYlAIvhPlryrrr
36
+ + W++G++ C+l++aldv+++++Sil+L+aI++DRY+AI++P+ y ++
37
+ sp|P08908| 100 N--KWTLGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK 147
38
+
39
+ rtsprrAkvvillvWvlalllslPpllfswvktveegngtlnvnvtvCli
40
+ rt prrA+++i+l+W++++l+s+Pp +++w++++ + +C+i
41
+ sp|P08908| 148 RT-PRRAAALISLTWLIGFLISIPP-MLGWRTPEDRSD------PDACTI 189
42
+
43
+ dfpeestasvstwlvsyvllstlvgFllPllvilvcYtrIlrtlrkrark
44
+ + +++ y+++st+++F++Pll++lv+Y+rI+r++r r rk
45
+ sp|P08908| 190 SKDHG-----------YTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK 228
46
+
47
+ gas...............................................
48
+ + + ++++ +++++ ++ ++++++ ++++++++ + + +++ ++ +
49
+ sp|P08908| 229 TVKkvektgadtrhgaspapqpkksvngesgsrnwrlgveskaggalcan 278
50
+
51
+ ..................................................
52
+ + ++++++ + + ++ ++++++ + +++ ++++ + + +++++++
53
+ sp|P08908| 279 gavrqgddgaalevievhrvgnskehlplpseagptpcapasferknern 328
54
+
55
+ .....kkrsskerkaaktllvvvvvFvlCWlPyfivllldtlc.lsiims
56
+ + ++k+ +erk++ktl++++++F+lCWlP+fiv+l+ ++c++s++m
57
+ sp|P08908| 329 aeakrKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPFCeSSCHM- 377
58
+
59
+ stCelervlptallvtlwLayvNsclNPiIY<-*
60
+ + + +++wL+y+Ns lNP+IY
61
+ sp|P08908| 378 --------PTLLGAIINWLGYSNSLLNPVIY 400
62
+
63
+
64
+ Histogram of all scores:
65
+ score obs exp (one = represents 1 sequences)
66
+ ----- --- ---
67
+ 377 1 0|=
68
+
69
+
70
+ % Statistical details of theoretical EVD fit:
71
+ mu = -10.6639
72
+ lambda = 0.7676
73
+ chi-sq statistic = 0.0000
74
+ P(chi-square) = 0
75
+
76
+ Total sequences searched: 1
77
+
78
+ Whole sequence top hits:
79
+ tophits_s report:
80
+ Total hits: 1
81
+ Satisfying E cutoff: 1
82
+ Total memory: 16K
83
+
84
+ Domain top hits:
85
+ tophits_s report:
86
+ Total hits: 1
87
+ Satisfying E cutoff: 1
88
+ Total memory: 17K
@@ -0,0 +1,30 @@
1
+ H DAYM780301
2
+ D Log odds matrix for 250 PAMs (Dayhoff et al., 1978)
3
+ R
4
+ A Dayhoff, M.O., Schwartz, R.M. and Orcutt, B.C.
5
+ T A model of evolutionary change in proteins
6
+ J In "Atlas of Protein Sequence and Structure", Vol.5, Suppl.3 (Dayhoff,
7
+ M.O., ed.), National Biomedical Research Foundation, Washington, D.C.,
8
+ p.352 (1978)
9
+ M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
10
+ 2.
11
+ -2. 6.
12
+ 0. 0. 2.
13
+ 0. -1. 2. 4.
14
+ -2. -4. -4. -5. 12.
15
+ 0. 1. 1. 2. -5. 4.
16
+ 0. -1. 1. 3. -5. 2. 4.
17
+ 1. -3. 0. 1. -3. -1. 0. 5.
18
+ -1. 2. 2. 1. -3. 3. 1. -2. 6.
19
+ -1. -2. -2. -2. -2. -2. -2. -3. -2. 5.
20
+ -2. -3. -3. -4. -6. -2. -3. -4. -2. 2. 6.
21
+ -1. 3. 1. 0. -5. 1. 0. -2. 0. -2. -3. 5.
22
+ -1. 0. -2. -3. -5. -1. -2. -3. -2. 2. 4. 0. 6.
23
+ -4. -4. -4. -6. -4. -5. -5. -5. -2. 1. 2. -5. 0. 9.
24
+ 1. 0. -1. -1. -3. 0. -1. -1. 0. -2. -3. -1. -2. -5. 6.
25
+ 1. 0. 1. 0. 0. -1. 0. 1. -1. -1. -3. 0. -2. -3. 1. 2.
26
+ 1. -1. 0. 0. -2. -1. 0. 0. -1. 0. -2. 0. -1. -3. 0. 1. 3.
27
+ -6. 2. -4. -7. -8. -5. -7. -7. -3. -5. -2. -3. -4. 0. -6. -2. -5. 17.
28
+ -3. -4. -2. -4. 0. -4. -4. -5. 0. -1. -1. -4. -2. 7. -5. -3. -3. 0. 10.
29
+ 0. -2. -2. -2. -2. -2. -2. -1. -2. 4. 2. -2. 2. -1. -1. -1. 0. -6. -2. 4.
30
+ //
@@ -0,0 +1,20 @@
1
+ H PRAM900102
2
+ D Relative frequency in alpha-helix (Prabhakaran, 1990)
3
+ R LIT:1614053b PMID:2390062
4
+ A Prabhakaran, M.
5
+ T The distribution of physical, chemical and conformational properties in
6
+ signal and nascent peptides
7
+ J Biochem. J. 269, 691-696 (1990) Original reference of these three data:
8
+ Creighton, T.E. In "Protein Structure and Melecular Properties", (Freeman,
9
+ W.H., ed.), San Francisco P.235 (1983)
10
+ C LEVM780101 1.000 LEVM780104 0.964 PALJ810101 0.943
11
+ KANM800101 0.942 ISOY800101 0.929 MAXF760101 0.924
12
+ ROBB760101 0.916 GEIM800101 0.912 GEIM800104 0.907
13
+ RACS820108 0.904 PALJ810102 0.902 PALJ810109 0.898
14
+ NAGK730101 0.894 CRAJ730101 0.887 CHOP780201 0.873
15
+ TANS770101 0.854 KANM800103 0.850 QIAN880107 0.829
16
+ QIAN880106 0.827 BURA740101 0.805 NAGK730103 -0.809
17
+ I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
18
+ 1.29 0.96 0.90 1.04 1.11 1.27 1.44 0.56 1.22 0.97
19
+ 1.30 1.23 1.47 1.07 0.52 0.82 0.82 0.99 0.72 0.91
20
+ //
@@ -0,0 +1,53 @@
1
+ Query= CD8A_HUMAN P01732 T-cell surface glycoprotein CD8 alpha chain
2
+ precursor (T-lymphocyte differentiation antigen T8/Leu-2).
3
+ (235 letters)
4
+
5
+ >CD8B_HUMAN P10966 T-cell surface glycoprotein CD8 beta chain
6
+ precursor (Antigen CD8B).
7
+ Length = 210
8
+
9
+ Score = 29.6 bits (65), Expect = 5e-05
10
+ Identities = 21/90 (23%), Positives = 37/90 (41%), Gaps = 3/90 (3%)
11
+
12
+ Query: 39 VELKCQVLLSNPTSGCSWLFQ---PRGAAASPTFLLYLSQNKPKAAEGLDTQRFSGKRLG 95
13
+ V L C+ +S WL Q P + L+ S E ++ ++ + R
14
+ Sbjct: 37 VMLSCEAKISLSNMRIYWLRQRQAPSSDSHHEFLALWDSAKGTIHGEEVEQEKIAVFRDA 96
15
+
16
+ Query: 96 DTFVLTLSDFRRENEGYYFCSALSNSIMYF 125
17
+ F+L L+ + E+ G YFC + + + F
18
+ Sbjct: 97 SRFILNLTSVKPEDSGIYFCMIVGSPELTF 126
19
+
20
+
21
+ Lambda K H
22
+ 0.323 0.137 0.436
23
+
24
+ Gapped
25
+ Lambda K H
26
+ 0.267 0.0410 0.140
27
+
28
+
29
+ Matrix: BLOSUM62
30
+ Gap Penalties: Existence: 11, Extension: 1
31
+ Number of Hits to DB: 102
32
+ Number of Sequences: 0
33
+ Number of extensions: 5
34
+ Number of successful extensions: 3
35
+ Number of sequences better than 10.0: 1
36
+ Number of HSP's better than 10.0 without gapping: 1
37
+ Number of HSP's successfully gapped in prelim test: 0
38
+ Number of HSP's that attempted gapping in prelim test: 0
39
+ Number of HSP's gapped (non-prelim): 1
40
+ length of query: 235
41
+ length of database: 210
42
+ effective HSP length: 22
43
+ effective length of query: 213
44
+ effective length of database: 188
45
+ effective search space: 40044
46
+ effective search space used: 40044
47
+ T: 11
48
+ A: 40
49
+ X1: 16 ( 7.5 bits)
50
+ X2: 38 (14.6 bits)
51
+ X3: 64 (24.7 bits)
52
+ S1: 20 (12.2 bits)
53
+ S2: 20 (12.3 bits)
@@ -0,0 +1,37 @@
1
+ Query= CD8A_HUMAN P01732 T-cell surface glycoprotein CD8 alpha chain
2
+ precursor (T-lymphocyte differentiation antigen T8/Leu-2).
3
+ (235 letters)
4
+
5
+ Lambda K H
6
+ 0.323 0.137 0.436
7
+
8
+ Gapped
9
+ Lambda K H
10
+ 0.267 0.0410 0.140
11
+
12
+
13
+ Matrix: BLOSUM62
14
+ Gap Penalties: Existence: 11, Extension: 1
15
+ Number of Hits to DB: 175
16
+ Number of Sequences: 0
17
+ Number of extensions: 8
18
+ Number of successful extensions: 0
19
+ Number of sequences better than 1.0e-05: 0
20
+ Number of HSP's better than 0.0 without gapping: 0
21
+ Number of HSP's successfully gapped in prelim test: 0
22
+ Number of HSP's that attempted gapping in prelim test: 0
23
+ Number of HSP's gapped (non-prelim): 0
24
+ length of query: 235
25
+ length of database: 393
26
+ effective HSP length: 27
27
+ effective length of query: 208
28
+ effective length of database: 366
29
+ effective search space: 76128
30
+ effective search space used: 76128
31
+ T: 11
32
+ A: 40
33
+ X1: 16 ( 7.5 bits)
34
+ X2: 38 (14.6 bits)
35
+ X3: 64 (24.7 bits)
36
+ S1: 41 (22.0 bits)
37
+ S2: 74 (33.1 bits)
@@ -12,7 +12,7 @@ aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal)
12
12
  aspartate kinase (A)
13
13
  (820 letters)
14
14
 
15
- Database: eco:b0002.faa
15
+ Database: b0002.faa
16
16
  1 sequences; 820 total letters
17
17
 
18
18
  Searching.done
@@ -88,7 +88,7 @@ Query: 781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820
88
88
  Sbjct: 781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820
89
89
 
90
90
 
91
- Database: eco:b0002.faa
91
+ Database: b0002.faa
92
92
  Posted date: Aug 7, 2005 7:29 AM
93
93
  Number of letters in database: 820
94
94
  Number of sequences in database: 1
@@ -4,7 +4,7 @@
4
4
  <BlastOutput_program>blastp</BlastOutput_program>
5
5
  <BlastOutput_version>blastp 2.2.10 [Oct-19-2004]</BlastOutput_version>
6
6
  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
7
- <BlastOutput_db>eco:b0002.faa</BlastOutput_db>
7
+ <BlastOutput_db>b0002.faa</BlastOutput_db>
8
8
  <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
9
9
  <BlastOutput_query-def>eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A)</BlastOutput_query-def>
10
10
  <BlastOutput_query-len>820</BlastOutput_query-len>
@@ -0,0 +1,134 @@
1
+ #
2
+ # test/unit/bio/appl/bl2seq/test_report.rb - Unit test for
3
+ # Bio::Blast::Bl2seq::Report
4
+ #
5
+ # Copyright:: Copyright (C) 2006
6
+ # Mitsuteru C. Nakao <n@bioruby.org>
7
+ # License:: Ruby's
8
+ #
9
+ # $Id: test_report.rb,v 1.2 2006/02/23 22:25:30 nakao Exp $
10
+ #
11
+
12
+ require 'pathname'
13
+ libpath = Pathname.new(File.join(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib'))).cleanpath.to_s
14
+ $:.unshift(libpath) unless $:.include?(libpath)
15
+
16
+ require 'test/unit'
17
+ require 'bio/appl/bl2seq/report'
18
+
19
+
20
+ module Bio
21
+ class TestBl2seqReportData
22
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
23
+ TestDataBl2seq = Pathname.new(File.join(bioruby_root, 'test', 'data', 'bl2seq')).cleanpath.to_s
24
+
25
+ def self.output(format = 7)
26
+ case format
27
+ when 'empty'
28
+ File.open(File.join(TestDataBl2seq, 'cd8a_p53_e-5blastp.bl2seq')).read
29
+ when 'blastp'
30
+ File.open(File.join(TestDataBl2seq, 'cd8a_cd8b_blastp.bl2seq')).read
31
+ when 'blastn'
32
+ when 'blastx'
33
+ when 'tblastn'
34
+ when 'tblastx'
35
+ end
36
+ end
37
+ end
38
+
39
+
40
+ class TestBl2seqReportConstants < Test::Unit::TestCase
41
+ def test_rs
42
+ rs = nil
43
+ assert_equal(nil, Bio::Blast::Bl2seq::Report::RS)
44
+ assert_equal(nil, Bio::Blast::Bl2seq::Report::DELIMITER)
45
+ end
46
+ end
47
+
48
+
49
+ class TestBl2seqReport < Test::Unit::TestCase
50
+
51
+ def setup
52
+ @empty = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('empty'))
53
+ @blastp = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('blastp'))
54
+ end
55
+
56
+ def test_new
57
+ assert(@empty)
58
+ assert(@blastp)
59
+ end
60
+
61
+ def test_undefed_methods
62
+ methods = ['format0_parse_header',
63
+ 'program',
64
+ 'version',
65
+ 'version_number',
66
+ 'version_date',
67
+ 'message',
68
+ 'converged?',
69
+ 'reference',
70
+ 'db']
71
+
72
+ methods.each do |x|
73
+ assert_nil(@empty.methods.include?(x))
74
+ end
75
+
76
+ methods.each do |x|
77
+ assert_nil(@blastp.methods.include?(x))
78
+ end
79
+ end
80
+
81
+ # TestF0dbstat < Test::Unit::TestCase
82
+
83
+ def test_db_num
84
+ assert_equal(0, @empty.db_num)
85
+ assert_equal(0, @blastp.db_num)
86
+ end
87
+
88
+ def test_db_len
89
+ assert_equal(393, @empty.db_len)
90
+ assert_equal(210, @blastp.db_len)
91
+ end
92
+
93
+ # TestIteration < Test::Unit::TestCase
94
+ def test_undefed_methods
95
+ methods = ['message',
96
+ 'pattern_in_database',
97
+ 'pattern',
98
+ 'pattern_positions',
99
+ 'hits_found_again',
100
+ 'hits_newly_found',
101
+ 'hits_for_pattern',
102
+ 'parse_hitlist',
103
+ 'converged?']
104
+
105
+ methods.each do |x|
106
+ assert_equal(false, @empty.iterations.first.methods.include?(x), "undifined? : #{x}")
107
+ end
108
+
109
+ methods.each do |x|
110
+ assert_equal(false, @blastp.iterations.first.methods.include?(x), "undefined? : #{x}")
111
+ end
112
+ end
113
+ end
114
+
115
+
116
+ class TestBl2seqReportHit < Test::Unit::TestCase
117
+ def setup
118
+ @empty = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('empty'))
119
+ @blastp = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('blastp'))
120
+ @empty_hit = @empty.hits.first
121
+ @blastp_hit = @blastp.hits.first
122
+ end
123
+
124
+ def test_empty_hits
125
+ assert_equal(0, @empty.hits.size)
126
+ end
127
+
128
+ def test_hits
129
+ assert_equal(Bio::Blast::Bl2seq::Report::Hit, @blastp.hits.first.class)
130
+ assert_equal(1, @blastp.hits.size)
131
+ end
132
+ end
133
+
134
+ end # module Bio