bio 0.7.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (142) hide show
  1. data/bin/bioruby +71 -27
  2. data/bin/br_biofetch.rb +5 -17
  3. data/bin/br_bioflat.rb +14 -26
  4. data/bin/br_biogetseq.rb +6 -18
  5. data/bin/br_pmfetch.rb +6 -16
  6. data/doc/Changes-0.7.rd +35 -0
  7. data/doc/KEGG_API.rd +287 -172
  8. data/doc/KEGG_API.rd.ja +273 -160
  9. data/doc/Tutorial.rd +18 -9
  10. data/doc/Tutorial.rd.ja +656 -138
  11. data/lib/bio.rb +6 -24
  12. data/lib/bio/alignment.rb +5 -5
  13. data/lib/bio/appl/blast.rb +132 -98
  14. data/lib/bio/appl/blast/format0.rb +9 -19
  15. data/lib/bio/appl/blast/wublast.rb +5 -18
  16. data/lib/bio/appl/emboss.rb +40 -47
  17. data/lib/bio/appl/hmmer.rb +116 -82
  18. data/lib/bio/appl/hmmer/report.rb +509 -364
  19. data/lib/bio/appl/spidey/report.rb +7 -18
  20. data/lib/bio/data/na.rb +3 -21
  21. data/lib/bio/db.rb +3 -21
  22. data/lib/bio/db/aaindex.rb +147 -52
  23. data/lib/bio/db/embl/common.rb +27 -6
  24. data/lib/bio/db/embl/embl.rb +18 -10
  25. data/lib/bio/db/embl/sptr.rb +87 -67
  26. data/lib/bio/db/embl/swissprot.rb +32 -3
  27. data/lib/bio/db/embl/trembl.rb +32 -3
  28. data/lib/bio/db/embl/uniprot.rb +32 -3
  29. data/lib/bio/db/fasta.rb +327 -289
  30. data/lib/bio/db/medline.rb +25 -4
  31. data/lib/bio/db/nbrf.rb +12 -20
  32. data/lib/bio/db/pdb.rb +4 -1
  33. data/lib/bio/db/pdb/chemicalcomponent.rb +240 -0
  34. data/lib/bio/db/pdb/pdb.rb +13 -8
  35. data/lib/bio/db/rebase.rb +93 -97
  36. data/lib/bio/feature.rb +2 -31
  37. data/lib/bio/io/ddbjxml.rb +167 -139
  38. data/lib/bio/io/fastacmd.rb +89 -56
  39. data/lib/bio/io/flatfile.rb +994 -278
  40. data/lib/bio/io/flatfile/index.rb +257 -194
  41. data/lib/bio/io/flatfile/indexer.rb +37 -29
  42. data/lib/bio/reference.rb +147 -64
  43. data/lib/bio/sequence.rb +57 -417
  44. data/lib/bio/sequence/aa.rb +64 -0
  45. data/lib/bio/sequence/common.rb +175 -0
  46. data/lib/bio/sequence/compat.rb +68 -0
  47. data/lib/bio/sequence/format.rb +134 -0
  48. data/lib/bio/sequence/generic.rb +24 -0
  49. data/lib/bio/sequence/na.rb +189 -0
  50. data/lib/bio/shell.rb +9 -23
  51. data/lib/bio/shell/core.rb +130 -125
  52. data/lib/bio/shell/demo.rb +143 -0
  53. data/lib/bio/shell/{session.rb → interface.rb} +42 -40
  54. data/lib/bio/shell/object.rb +52 -0
  55. data/lib/bio/shell/plugin/codon.rb +4 -22
  56. data/lib/bio/shell/plugin/emboss.rb +23 -0
  57. data/lib/bio/shell/plugin/entry.rb +34 -25
  58. data/lib/bio/shell/plugin/flatfile.rb +5 -23
  59. data/lib/bio/shell/plugin/keggapi.rb +11 -24
  60. data/lib/bio/shell/plugin/midi.rb +5 -23
  61. data/lib/bio/shell/plugin/obda.rb +4 -22
  62. data/lib/bio/shell/plugin/seq.rb +6 -24
  63. data/lib/bio/shell/rails/Rakefile +10 -0
  64. data/lib/bio/shell/rails/app/controllers/application.rb +4 -0
  65. data/lib/bio/shell/rails/app/controllers/shell_controller.rb +94 -0
  66. data/lib/bio/shell/rails/app/helpers/application_helper.rb +3 -0
  67. data/lib/bio/shell/rails/app/models/shell_connection.rb +30 -0
  68. data/lib/bio/shell/rails/app/views/layouts/shell.rhtml +37 -0
  69. data/lib/bio/shell/rails/app/views/shell/history.rhtml +5 -0
  70. data/lib/bio/shell/rails/app/views/shell/index.rhtml +2 -0
  71. data/lib/bio/shell/rails/app/views/shell/show.rhtml +13 -0
  72. data/lib/bio/shell/rails/config/boot.rb +19 -0
  73. data/lib/bio/shell/rails/config/database.yml +85 -0
  74. data/lib/bio/shell/rails/config/environment.rb +53 -0
  75. data/lib/bio/shell/rails/config/environments/development.rb +19 -0
  76. data/lib/bio/shell/rails/config/environments/production.rb +19 -0
  77. data/lib/bio/shell/rails/config/environments/test.rb +19 -0
  78. data/lib/bio/shell/rails/config/routes.rb +19 -0
  79. data/lib/bio/shell/rails/doc/README_FOR_APP +2 -0
  80. data/lib/bio/shell/rails/public/404.html +8 -0
  81. data/lib/bio/shell/rails/public/500.html +8 -0
  82. data/lib/bio/shell/rails/public/dispatch.cgi +10 -0
  83. data/lib/bio/shell/rails/public/dispatch.fcgi +24 -0
  84. data/lib/bio/shell/rails/public/dispatch.rb +10 -0
  85. data/lib/bio/shell/rails/public/favicon.ico +0 -0
  86. data/lib/bio/shell/rails/public/images/icon.png +0 -0
  87. data/lib/bio/shell/rails/public/images/rails.png +0 -0
  88. data/lib/bio/shell/rails/public/index.html +277 -0
  89. data/lib/bio/shell/rails/public/javascripts/controls.js +750 -0
  90. data/lib/bio/shell/rails/public/javascripts/dragdrop.js +584 -0
  91. data/lib/bio/shell/rails/public/javascripts/effects.js +854 -0
  92. data/lib/bio/shell/rails/public/javascripts/prototype.js +1785 -0
  93. data/lib/bio/shell/rails/public/robots.txt +1 -0
  94. data/lib/bio/shell/rails/public/stylesheets/main.css +187 -0
  95. data/lib/bio/shell/rails/script/about +3 -0
  96. data/lib/bio/shell/rails/script/breakpointer +3 -0
  97. data/lib/bio/shell/rails/script/console +3 -0
  98. data/lib/bio/shell/rails/script/destroy +3 -0
  99. data/lib/bio/shell/rails/script/generate +3 -0
  100. data/lib/bio/shell/rails/script/performance/benchmarker +3 -0
  101. data/lib/bio/shell/rails/script/performance/profiler +3 -0
  102. data/lib/bio/shell/rails/script/plugin +3 -0
  103. data/lib/bio/shell/rails/script/process/reaper +3 -0
  104. data/lib/bio/shell/rails/script/process/spawner +3 -0
  105. data/lib/bio/shell/rails/script/process/spinner +3 -0
  106. data/lib/bio/shell/rails/script/runner +3 -0
  107. data/lib/bio/shell/rails/script/server +42 -0
  108. data/lib/bio/shell/rails/test/test_helper.rb +28 -0
  109. data/lib/bio/shell/web.rb +90 -0
  110. data/lib/bio/util/contingency_table.rb +231 -225
  111. data/sample/any2fasta.rb +59 -0
  112. data/test/data/HMMER/hmmpfam.out +64 -0
  113. data/test/data/HMMER/hmmsearch.out +88 -0
  114. data/test/data/aaindex/DAYM780301 +30 -0
  115. data/test/data/aaindex/PRAM900102 +20 -0
  116. data/test/data/bl2seq/cd8a_cd8b_blastp.bl2seq +53 -0
  117. data/test/data/bl2seq/cd8a_p53_e-5blastp.bl2seq +37 -0
  118. data/test/data/blast/{eco:b0002.faa → b0002.faa} +0 -0
  119. data/test/data/blast/{eco:b0002.faa.m0 → b0002.faa.m0} +2 -2
  120. data/test/data/blast/{eco:b0002.faa.m7 → b0002.faa.m7} +1 -1
  121. data/test/data/blast/{eco:b0002.faa.m8 → b0002.faa.m8} +0 -0
  122. data/test/unit/bio/appl/bl2seq/test_report.rb +134 -0
  123. data/test/unit/bio/appl/blast/test_report.rb +15 -12
  124. data/test/unit/bio/appl/blast/test_xmlparser.rb +4 -4
  125. data/test/unit/bio/appl/hmmer/test_report.rb +355 -0
  126. data/test/unit/bio/appl/test_blast.rb +5 -5
  127. data/test/unit/bio/data/test_na.rb +9 -18
  128. data/test/unit/bio/db/pdb/test_pdb.rb +169 -0
  129. data/test/unit/bio/db/test_aaindex.rb +197 -0
  130. data/test/unit/bio/io/test_fastacmd.rb +55 -0
  131. data/test/unit/bio/sequence/test_aa.rb +102 -0
  132. data/test/unit/bio/sequence/test_common.rb +178 -0
  133. data/test/unit/bio/sequence/test_compat.rb +82 -0
  134. data/test/unit/bio/sequence/test_na.rb +242 -0
  135. data/test/unit/bio/shell/plugin/test_seq.rb +29 -19
  136. data/test/unit/bio/test_alignment.rb +15 -7
  137. data/test/unit/bio/test_reference.rb +198 -0
  138. data/test/unit/bio/test_sequence.rb +4 -49
  139. data/test/unit/bio/test_shell.rb +2 -2
  140. metadata +118 -15
  141. data/lib/bio/io/brdb.rb +0 -103
  142. data/lib/bioruby.rb +0 -34
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # any2fasta.rb - convert input file into FASTA format using a regex
4
+ # filter
5
+ #
6
+ # Copyright (C) 2006 Pjotr Prins <p@bioruby.org>
7
+ #
8
+ # This program is free software; you can redistribute it and/or modify
9
+ # it under the terms of the GNU General Public License as published by
10
+ # the Free Software Foundation; either version 2 of the License, or
11
+ # (at your option) any later version.
12
+ #
13
+ # This program is distributed in the hope that it will be useful,
14
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
15
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16
+ # GNU General Public License for more details.
17
+ #
18
+ # $Id: any2fasta.rb,v 1.1 2006/02/17 14:59:27 pjotr Exp $
19
+ #
20
+
21
+ require 'bio/io/flatfile'
22
+
23
+ include Bio
24
+
25
+ usage = <<USAGE
26
+
27
+ Usage: any2fasta.rb [regex] infiles
28
+
29
+ Examples:
30
+
31
+ Output all sequences containing GATC or GATT ignoring case:
32
+
33
+ any2fasta.rb "/GAT[CT]/i" *.seq > reduced.fasta
34
+
35
+ USAGE
36
+
37
+ if ARGV.size == 0
38
+ print usage
39
+ exit 1
40
+ end
41
+
42
+ # ---- Valid regular expression - if it is not a file
43
+ regex = ARGV[0]
44
+ if regex=~/^\// and !File.exist?(regex)
45
+ ARGV.shift
46
+ else
47
+ regex = nil
48
+ end
49
+
50
+ ARGV.each do | fn |
51
+ ff = Bio::FlatFile.auto(fn)
52
+ ff.each_entry do |entry|
53
+ if regex != nil
54
+ next if eval("entry.seq !~ #{regex}")
55
+ end
56
+ print entry.seq.to_fasta(entry.definition,70)
57
+ end
58
+ end
59
+
@@ -0,0 +1,64 @@
1
+ hmmpfam - search one or more sequences against HMM database
2
+ HMMER 2.3.2 (Oct 2003)
3
+ Copyright (C) 1992-2003 HHMI/Washington University School of Medicine
4
+ Freely distributed under the GNU General Public License (GPL)
5
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6
+ HMM file: /Users/nakao/Sites/iprscan/data/Pfam
7
+ Sequence file: /Users/nakao/Sites/iprscan/tmp/20050517/iprscan-20050517-16244071/chunk_1/iprscan-20050517-16244071.nocrc
8
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9
+
10
+ Query sequence: 104K_THEPA
11
+ Accession: [none]
12
+ Description: [none]
13
+
14
+ Scores for sequence family classification (score includes all domains):
15
+ Model Description Score E-value N
16
+ -------- ----------- ----- ------- ---
17
+ PF04385.4 Domain of unknown function, DUF529 259.3 6.6e-75 4
18
+
19
+ Parsed for domains:
20
+ Model Domain seq-f seq-t hmm-f hmm-t score E-value
21
+ -------- ------- ----- ----- ----- ----- ----- -------
22
+ PF04385.4 1/4 36 111 .. 1 80 [] 65.0 2e-16
23
+ PF04385.4 2/4 149 224 .. 1 80 [] 64.7 2.5e-16
24
+ PF04385.4 3/4 265 343 .. 1 80 [] 64.6 2.7e-16
25
+ PF04385.4 4/4 379 456 .. 1 80 [] 65.0 2e-16
26
+
27
+ Alignments of top-scoring domains:
28
+ PF04385.4: domain 1 of 4, from 36 to 111: score 65.0, E = 2e-16
29
+ *->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
30
+ t+D+n++++ f +v+++g+++ + ++ ++v+++++++Gn+v+W
31
+ 104K_THEPA 36 TFDINSNQTG-PAFLTAVEMAGVKYLQVQHGSNVNIHRLVEGNVVIW 81
32
+
33
+ eseddpefglivtlsfyldsnkfLvlllintak<-*
34
+ e++ + +l++ ++++++++++++++++ +++
35
+ 104K_THEPA 82 ENA---STPLYTGAIVTNNDGPYMAYVEVLGDP 111
36
+
37
+ PF04385.4: domain 2 of 4, from 149 to 224: score 64.7, E = 2.5e-16
38
+ *->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
39
+ +L++ ++ +++k+ + ++a+ng ++vt++p++G+ +++++++n++++
40
+ 104K_THEPA 149 SLNMAFQLENNKYEVETHAKNGANMVTFIPRNGHICKMVYHKNVRIY 195
41
+
42
+ eseddpefglivtlsfyldsnkfLvlllintak<-*
43
+ ++ ++++vt++++++ +++L+l+++ +
44
+ 104K_THEPA 196 KA----TGNDTVTSVVGFFRGLRLLLINVFSID 224
45
+
46
+ PF04385.4: domain 3 of 4, from 265 to 343: score 64.6, E = 2.7e-16
47
+ *->tLDlndtgstlkqfdykvalngdivvtytpkpGvkftkitdGnevvW
48
+ +Dl+ +++ +++f+ + a+++ ++++++p++G+++tk++dG++v++
49
+ 104K_THEPA 265 PVDLDIKDIDYTMFHLADATYHEPCFKIIPNTGFCITKLFDGDQVLY 311
50
+
51
+ eseddpefglivtlsfyldsnkfLvlllintak<-*
52
+ es+ + + ++i +++y+++n ++++l++n+++
53
+ 104K_THEPA 312 ESFNP-LIHCINEVHIYDRNNGSIICLHLNYSP 343
54
+
55
+ PF04385.4: domain 4 of 4, from 379 to 456: score 65.0, E = 2e-16
56
+ *->tLDlndtgstlkqfdykvalngdivvty.tpkpGvkftkitdGnevv
57
+ +LD+n ++++k+ +++ +n d +t+ tp+p+ + +++dG+ev+
58
+ 104K_THEPA 379 ELDVN--FISDKDLYVAALTNADLNYTMvTPRPHRDVIRVSDGSEVL 423
59
+
60
+ WeseddpefglivtlsfyldsnkfLvlllintak<-*
61
+ W++e+ ++ l++++++++d++ +Lv+l+i++
62
+ 104K_THEPA 424 WYYEGL-DNFLVCAWIYVSDGVASLVHLRIKDRI 456
63
+
64
+ //
@@ -0,0 +1,88 @@
1
+ hmmsearch - search a sequence database with a profile HMM
2
+ HMMER 2.2g (August 2001)
3
+ Copyright (C) 1992-2001 HHMI/Washington University School of Medicine
4
+ Freely distributed under the GNU General Public License (GPL)
5
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
6
+ HMM file: /sw/share/hmmer/demo/7tm_1 [7tm_1]
7
+ Sequence database: /sw/share/hmmer/demo/P08908.fasta
8
+ per-sequence score cutoff: [none]
9
+ per-domain score cutoff: [none]
10
+ per-sequence Eval cutoff: <= 10
11
+ per-domain Eval cutoff: [none]
12
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
13
+
14
+ Query HMM: 7tm_1
15
+ Accession: PF00001
16
+ Description: 7 transmembrane receptor (rhodopsin family)
17
+ [HMM has been calibrated; E-values are empirical estimates]
18
+
19
+ Scores for complete sequences (score includes all domains):
20
+ Sequence Description Score E-value N
21
+ -------- ----------- ----- ------- ---
22
+ sp|P08908|5H1A_HUMAN 5-hydroxytryptamine 1A receptor 377.1 5.5e-130 1
23
+
24
+ Parsed for domains:
25
+ Sequence Domain seq-f seq-t hmm-f hmm-t score E-value
26
+ -------- ------- ----- ----- ----- ----- ----- -------
27
+ sp|P08908|5H1A_HUMAN 1/1 53 400 .. 1 275 [] 377.1 5.5e-130
28
+
29
+ Alignments of top-scoring domains:
30
+ sp|P08908|5H1A_HUMAN: domain 1 of 1, from 53 to 400: score 377.1, E = 5.5e-130
31
+ *->GNlLVilvilrtkklrtptnifilNLAvADLLflltlppwalyylvg
32
+ GN+ V+++i+++++l++++n++i++LAv+DL+++++++p+a++y v
33
+ sp|P08908| 53 GNACVVAAIALERSLQNVANYLIGSLAVTDLMVSVLVLPMAALYQVL 99
34
+
35
+ gsedWpfGsalCklvtaldvvnmyaSillLtaISiDRYlAIvhPlryrrr
36
+ + W++G++ C+l++aldv+++++Sil+L+aI++DRY+AI++P+ y ++
37
+ sp|P08908| 100 N--KWTLGQVTCDLFIALDVLCCTSSILHLCAIALDRYWAITDPIDYVNK 147
38
+
39
+ rtsprrAkvvillvWvlalllslPpllfswvktveegngtlnvnvtvCli
40
+ rt prrA+++i+l+W++++l+s+Pp +++w++++ + +C+i
41
+ sp|P08908| 148 RT-PRRAAALISLTWLIGFLISIPP-MLGWRTPEDRSD------PDACTI 189
42
+
43
+ dfpeestasvstwlvsyvllstlvgFllPllvilvcYtrIlrtlrkrark
44
+ + +++ y+++st+++F++Pll++lv+Y+rI+r++r r rk
45
+ sp|P08908| 190 SKDHG-----------YTIYSTFGAFYIPLLLMLVLYGRIFRAARFRIRK 228
46
+
47
+ gas...............................................
48
+ + + ++++ +++++ ++ ++++++ ++++++++ + + +++ ++ +
49
+ sp|P08908| 229 TVKkvektgadtrhgaspapqpkksvngesgsrnwrlgveskaggalcan 278
50
+
51
+ ..................................................
52
+ + ++++++ + + ++ ++++++ + +++ ++++ + + +++++++
53
+ sp|P08908| 279 gavrqgddgaalevievhrvgnskehlplpseagptpcapasferknern 328
54
+
55
+ .....kkrsskerkaaktllvvvvvFvlCWlPyfivllldtlc.lsiims
56
+ + ++k+ +erk++ktl++++++F+lCWlP+fiv+l+ ++c++s++m
57
+ sp|P08908| 329 aeakrKMALARERKTVKTLGIIMGTFILCWLPFFIVALVLPFCeSSCHM- 377
58
+
59
+ stCelervlptallvtlwLayvNsclNPiIY<-*
60
+ + + +++wL+y+Ns lNP+IY
61
+ sp|P08908| 378 --------PTLLGAIINWLGYSNSLLNPVIY 400
62
+
63
+
64
+ Histogram of all scores:
65
+ score obs exp (one = represents 1 sequences)
66
+ ----- --- ---
67
+ 377 1 0|=
68
+
69
+
70
+ % Statistical details of theoretical EVD fit:
71
+ mu = -10.6639
72
+ lambda = 0.7676
73
+ chi-sq statistic = 0.0000
74
+ P(chi-square) = 0
75
+
76
+ Total sequences searched: 1
77
+
78
+ Whole sequence top hits:
79
+ tophits_s report:
80
+ Total hits: 1
81
+ Satisfying E cutoff: 1
82
+ Total memory: 16K
83
+
84
+ Domain top hits:
85
+ tophits_s report:
86
+ Total hits: 1
87
+ Satisfying E cutoff: 1
88
+ Total memory: 17K
@@ -0,0 +1,30 @@
1
+ H DAYM780301
2
+ D Log odds matrix for 250 PAMs (Dayhoff et al., 1978)
3
+ R
4
+ A Dayhoff, M.O., Schwartz, R.M. and Orcutt, B.C.
5
+ T A model of evolutionary change in proteins
6
+ J In "Atlas of Protein Sequence and Structure", Vol.5, Suppl.3 (Dayhoff,
7
+ M.O., ed.), National Biomedical Research Foundation, Washington, D.C.,
8
+ p.352 (1978)
9
+ M rows = ARNDCQEGHILKMFPSTWYV, cols = ARNDCQEGHILKMFPSTWYV
10
+ 2.
11
+ -2. 6.
12
+ 0. 0. 2.
13
+ 0. -1. 2. 4.
14
+ -2. -4. -4. -5. 12.
15
+ 0. 1. 1. 2. -5. 4.
16
+ 0. -1. 1. 3. -5. 2. 4.
17
+ 1. -3. 0. 1. -3. -1. 0. 5.
18
+ -1. 2. 2. 1. -3. 3. 1. -2. 6.
19
+ -1. -2. -2. -2. -2. -2. -2. -3. -2. 5.
20
+ -2. -3. -3. -4. -6. -2. -3. -4. -2. 2. 6.
21
+ -1. 3. 1. 0. -5. 1. 0. -2. 0. -2. -3. 5.
22
+ -1. 0. -2. -3. -5. -1. -2. -3. -2. 2. 4. 0. 6.
23
+ -4. -4. -4. -6. -4. -5. -5. -5. -2. 1. 2. -5. 0. 9.
24
+ 1. 0. -1. -1. -3. 0. -1. -1. 0. -2. -3. -1. -2. -5. 6.
25
+ 1. 0. 1. 0. 0. -1. 0. 1. -1. -1. -3. 0. -2. -3. 1. 2.
26
+ 1. -1. 0. 0. -2. -1. 0. 0. -1. 0. -2. 0. -1. -3. 0. 1. 3.
27
+ -6. 2. -4. -7. -8. -5. -7. -7. -3. -5. -2. -3. -4. 0. -6. -2. -5. 17.
28
+ -3. -4. -2. -4. 0. -4. -4. -5. 0. -1. -1. -4. -2. 7. -5. -3. -3. 0. 10.
29
+ 0. -2. -2. -2. -2. -2. -2. -1. -2. 4. 2. -2. 2. -1. -1. -1. 0. -6. -2. 4.
30
+ //
@@ -0,0 +1,20 @@
1
+ H PRAM900102
2
+ D Relative frequency in alpha-helix (Prabhakaran, 1990)
3
+ R LIT:1614053b PMID:2390062
4
+ A Prabhakaran, M.
5
+ T The distribution of physical, chemical and conformational properties in
6
+ signal and nascent peptides
7
+ J Biochem. J. 269, 691-696 (1990) Original reference of these three data:
8
+ Creighton, T.E. In "Protein Structure and Melecular Properties", (Freeman,
9
+ W.H., ed.), San Francisco P.235 (1983)
10
+ C LEVM780101 1.000 LEVM780104 0.964 PALJ810101 0.943
11
+ KANM800101 0.942 ISOY800101 0.929 MAXF760101 0.924
12
+ ROBB760101 0.916 GEIM800101 0.912 GEIM800104 0.907
13
+ RACS820108 0.904 PALJ810102 0.902 PALJ810109 0.898
14
+ NAGK730101 0.894 CRAJ730101 0.887 CHOP780201 0.873
15
+ TANS770101 0.854 KANM800103 0.850 QIAN880107 0.829
16
+ QIAN880106 0.827 BURA740101 0.805 NAGK730103 -0.809
17
+ I A/L R/K N/M D/F C/P Q/S E/T G/W H/Y I/V
18
+ 1.29 0.96 0.90 1.04 1.11 1.27 1.44 0.56 1.22 0.97
19
+ 1.30 1.23 1.47 1.07 0.52 0.82 0.82 0.99 0.72 0.91
20
+ //
@@ -0,0 +1,53 @@
1
+ Query= CD8A_HUMAN P01732 T-cell surface glycoprotein CD8 alpha chain
2
+ precursor (T-lymphocyte differentiation antigen T8/Leu-2).
3
+ (235 letters)
4
+
5
+ >CD8B_HUMAN P10966 T-cell surface glycoprotein CD8 beta chain
6
+ precursor (Antigen CD8B).
7
+ Length = 210
8
+
9
+ Score = 29.6 bits (65), Expect = 5e-05
10
+ Identities = 21/90 (23%), Positives = 37/90 (41%), Gaps = 3/90 (3%)
11
+
12
+ Query: 39 VELKCQVLLSNPTSGCSWLFQ---PRGAAASPTFLLYLSQNKPKAAEGLDTQRFSGKRLG 95
13
+ V L C+ +S WL Q P + L+ S E ++ ++ + R
14
+ Sbjct: 37 VMLSCEAKISLSNMRIYWLRQRQAPSSDSHHEFLALWDSAKGTIHGEEVEQEKIAVFRDA 96
15
+
16
+ Query: 96 DTFVLTLSDFRRENEGYYFCSALSNSIMYF 125
17
+ F+L L+ + E+ G YFC + + + F
18
+ Sbjct: 97 SRFILNLTSVKPEDSGIYFCMIVGSPELTF 126
19
+
20
+
21
+ Lambda K H
22
+ 0.323 0.137 0.436
23
+
24
+ Gapped
25
+ Lambda K H
26
+ 0.267 0.0410 0.140
27
+
28
+
29
+ Matrix: BLOSUM62
30
+ Gap Penalties: Existence: 11, Extension: 1
31
+ Number of Hits to DB: 102
32
+ Number of Sequences: 0
33
+ Number of extensions: 5
34
+ Number of successful extensions: 3
35
+ Number of sequences better than 10.0: 1
36
+ Number of HSP's better than 10.0 without gapping: 1
37
+ Number of HSP's successfully gapped in prelim test: 0
38
+ Number of HSP's that attempted gapping in prelim test: 0
39
+ Number of HSP's gapped (non-prelim): 1
40
+ length of query: 235
41
+ length of database: 210
42
+ effective HSP length: 22
43
+ effective length of query: 213
44
+ effective length of database: 188
45
+ effective search space: 40044
46
+ effective search space used: 40044
47
+ T: 11
48
+ A: 40
49
+ X1: 16 ( 7.5 bits)
50
+ X2: 38 (14.6 bits)
51
+ X3: 64 (24.7 bits)
52
+ S1: 20 (12.2 bits)
53
+ S2: 20 (12.3 bits)
@@ -0,0 +1,37 @@
1
+ Query= CD8A_HUMAN P01732 T-cell surface glycoprotein CD8 alpha chain
2
+ precursor (T-lymphocyte differentiation antigen T8/Leu-2).
3
+ (235 letters)
4
+
5
+ Lambda K H
6
+ 0.323 0.137 0.436
7
+
8
+ Gapped
9
+ Lambda K H
10
+ 0.267 0.0410 0.140
11
+
12
+
13
+ Matrix: BLOSUM62
14
+ Gap Penalties: Existence: 11, Extension: 1
15
+ Number of Hits to DB: 175
16
+ Number of Sequences: 0
17
+ Number of extensions: 8
18
+ Number of successful extensions: 0
19
+ Number of sequences better than 1.0e-05: 0
20
+ Number of HSP's better than 0.0 without gapping: 0
21
+ Number of HSP's successfully gapped in prelim test: 0
22
+ Number of HSP's that attempted gapping in prelim test: 0
23
+ Number of HSP's gapped (non-prelim): 0
24
+ length of query: 235
25
+ length of database: 393
26
+ effective HSP length: 27
27
+ effective length of query: 208
28
+ effective length of database: 366
29
+ effective search space: 76128
30
+ effective search space used: 76128
31
+ T: 11
32
+ A: 40
33
+ X1: 16 ( 7.5 bits)
34
+ X2: 38 (14.6 bits)
35
+ X3: 64 (24.7 bits)
36
+ S1: 41 (22.0 bits)
37
+ S2: 74 (33.1 bits)
@@ -12,7 +12,7 @@ aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal)
12
12
  aspartate kinase (A)
13
13
  (820 letters)
14
14
 
15
- Database: eco:b0002.faa
15
+ Database: b0002.faa
16
16
  1 sequences; 820 total letters
17
17
 
18
18
  Searching.done
@@ -88,7 +88,7 @@ Query: 781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820
88
88
  Sbjct: 781 YSHYYQPLPLVLRGYGAGNDVTAAGVFADLLRTLSWKLGV 820
89
89
 
90
90
 
91
- Database: eco:b0002.faa
91
+ Database: b0002.faa
92
92
  Posted date: Aug 7, 2005 7:29 AM
93
93
  Number of letters in database: 820
94
94
  Number of sequences in database: 1
@@ -4,7 +4,7 @@
4
4
  <BlastOutput_program>blastp</BlastOutput_program>
5
5
  <BlastOutput_version>blastp 2.2.10 [Oct-19-2004]</BlastOutput_version>
6
6
  <BlastOutput_reference>~Reference: Altschul, Stephen F., Thomas L. Madden, Alejandro A. Schaffer, ~Jinghui Zhang, Zheng Zhang, Webb Miller, and David J. Lipman (1997), ~&quot;Gapped BLAST and PSI-BLAST: a new generation of protein database search~programs&quot;, Nucleic Acids Res. 25:3389-3402.</BlastOutput_reference>
7
- <BlastOutput_db>eco:b0002.faa</BlastOutput_db>
7
+ <BlastOutput_db>b0002.faa</BlastOutput_db>
8
8
  <BlastOutput_query-ID>lcl|QUERY</BlastOutput_query-ID>
9
9
  <BlastOutput_query-def>eco:b0002 thrA, Hs, thrD, thrA2, thrA1; bifunctional: aspartokinase I (N-terminal); homoserine dehydrogenase I (C-terminal) [EC:2.7.2.4 1.1.1.3]; K00003 homoserine dehydrogenase; K00928 aspartate kinase (A)</BlastOutput_query-def>
10
10
  <BlastOutput_query-len>820</BlastOutput_query-len>
@@ -0,0 +1,134 @@
1
+ #
2
+ # test/unit/bio/appl/bl2seq/test_report.rb - Unit test for
3
+ # Bio::Blast::Bl2seq::Report
4
+ #
5
+ # Copyright:: Copyright (C) 2006
6
+ # Mitsuteru C. Nakao <n@bioruby.org>
7
+ # License:: Ruby's
8
+ #
9
+ # $Id: test_report.rb,v 1.2 2006/02/23 22:25:30 nakao Exp $
10
+ #
11
+
12
+ require 'pathname'
13
+ libpath = Pathname.new(File.join(File.join(File.dirname(__FILE__), ['..'] * 5, 'lib'))).cleanpath.to_s
14
+ $:.unshift(libpath) unless $:.include?(libpath)
15
+
16
+ require 'test/unit'
17
+ require 'bio/appl/bl2seq/report'
18
+
19
+
20
+ module Bio
21
+ class TestBl2seqReportData
22
+ bioruby_root = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 5)).cleanpath.to_s
23
+ TestDataBl2seq = Pathname.new(File.join(bioruby_root, 'test', 'data', 'bl2seq')).cleanpath.to_s
24
+
25
+ def self.output(format = 7)
26
+ case format
27
+ when 'empty'
28
+ File.open(File.join(TestDataBl2seq, 'cd8a_p53_e-5blastp.bl2seq')).read
29
+ when 'blastp'
30
+ File.open(File.join(TestDataBl2seq, 'cd8a_cd8b_blastp.bl2seq')).read
31
+ when 'blastn'
32
+ when 'blastx'
33
+ when 'tblastn'
34
+ when 'tblastx'
35
+ end
36
+ end
37
+ end
38
+
39
+
40
+ class TestBl2seqReportConstants < Test::Unit::TestCase
41
+ def test_rs
42
+ rs = nil
43
+ assert_equal(nil, Bio::Blast::Bl2seq::Report::RS)
44
+ assert_equal(nil, Bio::Blast::Bl2seq::Report::DELIMITER)
45
+ end
46
+ end
47
+
48
+
49
+ class TestBl2seqReport < Test::Unit::TestCase
50
+
51
+ def setup
52
+ @empty = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('empty'))
53
+ @blastp = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('blastp'))
54
+ end
55
+
56
+ def test_new
57
+ assert(@empty)
58
+ assert(@blastp)
59
+ end
60
+
61
+ def test_undefed_methods
62
+ methods = ['format0_parse_header',
63
+ 'program',
64
+ 'version',
65
+ 'version_number',
66
+ 'version_date',
67
+ 'message',
68
+ 'converged?',
69
+ 'reference',
70
+ 'db']
71
+
72
+ methods.each do |x|
73
+ assert_nil(@empty.methods.include?(x))
74
+ end
75
+
76
+ methods.each do |x|
77
+ assert_nil(@blastp.methods.include?(x))
78
+ end
79
+ end
80
+
81
+ # TestF0dbstat < Test::Unit::TestCase
82
+
83
+ def test_db_num
84
+ assert_equal(0, @empty.db_num)
85
+ assert_equal(0, @blastp.db_num)
86
+ end
87
+
88
+ def test_db_len
89
+ assert_equal(393, @empty.db_len)
90
+ assert_equal(210, @blastp.db_len)
91
+ end
92
+
93
+ # TestIteration < Test::Unit::TestCase
94
+ def test_undefed_methods
95
+ methods = ['message',
96
+ 'pattern_in_database',
97
+ 'pattern',
98
+ 'pattern_positions',
99
+ 'hits_found_again',
100
+ 'hits_newly_found',
101
+ 'hits_for_pattern',
102
+ 'parse_hitlist',
103
+ 'converged?']
104
+
105
+ methods.each do |x|
106
+ assert_equal(false, @empty.iterations.first.methods.include?(x), "undifined? : #{x}")
107
+ end
108
+
109
+ methods.each do |x|
110
+ assert_equal(false, @blastp.iterations.first.methods.include?(x), "undefined? : #{x}")
111
+ end
112
+ end
113
+ end
114
+
115
+
116
+ class TestBl2seqReportHit < Test::Unit::TestCase
117
+ def setup
118
+ @empty = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('empty'))
119
+ @blastp = Bio::Blast::Bl2seq::Report.new(Bio::TestBl2seqReportData.output('blastp'))
120
+ @empty_hit = @empty.hits.first
121
+ @blastp_hit = @blastp.hits.first
122
+ end
123
+
124
+ def test_empty_hits
125
+ assert_equal(0, @empty.hits.size)
126
+ end
127
+
128
+ def test_hits
129
+ assert_equal(Bio::Blast::Bl2seq::Report::Hit, @blastp.hits.first.class)
130
+ assert_equal(1, @blastp.hits.size)
131
+ end
132
+ end
133
+
134
+ end # module Bio