bio 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,193 @@
1
+ RPS-BLAST 2.2.18 [Mar-02-2008]
2
+
3
+ Database: Pfam.v.22.0
4
+ 9318 sequences; 1,769,994 total letters
5
+
6
+ Searching..................................................done
7
+
8
+ Query= TestSequence mixture of globin and rhodopsin (computationally
9
+ randomly concatenated)
10
+ (495 letters)
11
+
12
+
13
+
14
+ Score E
15
+ Sequences producing significant alignments: (bits) Value
16
+
17
+ gnl|CDD|84466 pfam00042, Globin, Globin.. 110 2e-25
18
+ gnl|CDD|84429 pfam00001, 7tm_1, 7 transmembrane receptor (rhodop... 91 2e-19
19
+ gnl|CDD|87195 pfam06976, DUF1300, Protein of unknown function (D... 37 0.003
20
+
21
+ >gnl|CDD|84466 pfam00042, Globin, Globin..
22
+ Length = 110
23
+
24
+ Score = 110 bits (277), Expect = 2e-25
25
+ Identities = 50/110 (45%), Positives = 69/110 (62%), Gaps = 5/110 (4%)
26
+
27
+ Query: 148 EKQLITGLWGKV--NVAECGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPMVRAHG 205
28
+ +K L+ WGKV N E GAE LARL YP T+ +F FG+LS+ A+ +P +AHG
29
+ Sbjct: 1 QKALVKASWGKVKGNAPEIGAEILARLFTAYPDTKAYFPKFGDLSTAEALKSSPKFKAHG 60
30
+
31
+ Query: 206 KKVLTSFGDAVKNLDN---IKNTFSQLSELHCDKLHVDPENFRLLGDILI 252
32
+ KKVL + G+AVK+LD+ +K +L H + HVDP NF+L G+ L+
33
+ Sbjct: 61 KKVLAALGEAVKHLDDDGNLKAALKKLGARHAKRGHVDPANFKLFGEALL 110
34
+
35
+
36
+ >gnl|CDD|84429 pfam00001, 7tm_1, 7 transmembrane receptor (rhodopsin family). This
37
+ family contains, amongst other G-protein-coupled
38
+ receptors (GCPRs), members of the opsin family, which
39
+ have been considered to be typical members of the
40
+ rhodopsin superfamily. They share several motifs, mainly
41
+ the seven transmembrane helices, GCPRs of the rhodopsin
42
+ superfamily. All opsins bind a chromophore, such as
43
+ 11-cis-retinal. The function of most opsins other than
44
+ the photoisomerases is split into two steps: light
45
+ absorption and G-protein activation. Photoisomerases, on
46
+ the other hand, are not coupled to G-proteins - they are
47
+ thought to generate and supply the chromophore that is
48
+ used by visual opsins..
49
+ Length = 258
50
+
51
+ Score = 90.8 bits (225), Expect = 2e-19
52
+ Identities = 37/162 (22%), Positives = 76/162 (46%), Gaps = 10/162 (6%)
53
+
54
+ Query: 299 HAIMGVAFTWVMALACAAPPLAGWSRY-IPEGLQCSCGIDYYTLKPEVNNESFVIYMFVV 357
55
+ A + + WV+AL + PPL + EG +C ID+ S+ + ++
56
+ Sbjct: 100 RAKVLILLVWVLALLLSLPPLLFSWLRTVEEGNVTTCLIDFPEESLLR---SYTLLSTLL 156
57
+
58
+ Query: 358 HFTIPMIIIFFCYGQLVFTV----KEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWV 413
59
+ F +P+++I CY +++ T+ + A+ + +E++ +M++++V+ F++CW+
60
+ Sbjct: 157 GFVLPLLVILVCYTRILRTLRRRARSGASIARSLKRRSSSERKAAKMLLVVVVVFVLCWL 216
61
+
62
+ Query: 414 PYASVAFY--IFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIY 453
63
+ PY V + P + I + A + NP+IY
64
+ Sbjct: 217 PYHIVLLLDSLCLLSIIRVLPTALLITLWLAYVNSCLNPIIY 258
65
+
66
+
67
+
68
+ Score = 73.4 bits (180), Expect = 3e-14
69
+ Identities = 32/86 (37%), Positives = 47/86 (54%)
70
+
71
+ Query: 55 NFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEG 114
72
+ N L + V ++ K+LRTP N LLNLAVADL +L LY + G + FG C L G
73
+ Sbjct: 2 NLLVILVILRTKRLRTPTNIFLLNLAVADLLFLLTLPPWALYYLVGGDWPFGDALCKLVG 61
74
+
75
+ Query: 115 FFATLGGEIALWSLVVLAIERYVVVC 140
76
+ + G ++ L ++I+RY+ +
77
+ Sbjct: 62 ALFVVNGYASILLLTAISIDRYLAIV 87
78
+
79
+
80
+ >gnl|CDD|87195 pfam06976, DUF1300, Protein of unknown function (DUF1300). This
81
+ family represents a conserved region approximately 80
82
+ residues long within a number of proteins of unknown
83
+ function that seem to be specific to C. elegans. Some
84
+ family members contain more than one copy of this
85
+ region..
86
+ Length = 336
87
+
88
+ Score = 37.1 bits (86), Expect = 0.003
89
+ Identities = 32/145 (22%), Positives = 58/145 (40%), Gaps = 7/145 (4%)
90
+
91
+ Query: 336 IDYYTLKPEVNNESFVIYMFV--VHFT-IPMIIIFFCYGQLVFTVKEAAAQQQESATTQK 392
92
+ I+Y E+ S+ I + + + F IP II+ L+F +K+ S+T+
93
+ Sbjct: 192 IEYIIETTELFGSSYEILLLIEGILFKLIPSIILPIATILLIFQLKKNKKVSSRSSTSSS 251
94
+
95
+ Query: 393 AEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVI 452
96
+ + T++V + I+FLI VP + F + + A + N I
97
+ Sbjct: 252 SNDRSTKLVTFVTISFLIATVPLGILYLIKFFVFEYEGLVMIIDKLAIIFTFLSTINGTI 311
98
+
99
+ Query: 453 YIM----MNKQFRNCMLTTICCGKN 473
100
+ + + M+ Q+RN + K
101
+ Sbjct: 312 HFLICYFMSSQYRNTVREMFGRKKK 336
102
+
103
+
104
+ Query= randomseq3
105
+ (1087 letters)
106
+
107
+ ***** No hits found ******
108
+
109
+
110
+ Query= gi|6013469|gb|AAD49229.2|AF159462_1 EHEC factor for adherence
111
+ [Escherichia coli]
112
+ (3223 letters)
113
+
114
+
115
+
116
+ Score E
117
+ Sequences producing significant alignments: (bits) Value
118
+
119
+ gnl|CDD|86672 pfam04488, Gly_transf_sug, Glycosyltransferase sug... 84 1e-16
120
+ gnl|CDD|84583 pfam00175, NAD_binding_1, Oxidoreductase NAD-bindi... 37 0.019
121
+
122
+ >gnl|CDD|86672 pfam04488, Gly_transf_sug, Glycosyltransferase sugar-binding region
123
+ containing DXD motif. The DXD motif is a short conserved
124
+ motif found in many families of glycosyltransferases,
125
+ which add a range of different sugars to other sugars,
126
+ phosphates and proteins. DXD-containing
127
+ glycosyltransferases all use nucleoside diphosphate
128
+ sugars as donors and require divalent cations, usually
129
+ manganese. The DXD motif is expected to play a
130
+ carbohydrate binding role in sugar-nucleoside
131
+ diphosphate and manganese dependent
132
+ glycosyltransferases..
133
+ Length = 86
134
+
135
+ Score = 84.2 bits (208), Expect = 1e-16
136
+ Identities = 33/85 (38%), Positives = 40/85 (47%), Gaps = 2/85 (2%)
137
+
138
+ Query: 505 RISIKDVNSLTSLSKSENNHNYQTEMLLRWNYPAA-SDLLRMYILKEHGGIYTDTDMMPA 563
139
+ I L SL N + + EM LRW Y AA SD LR IL ++GGIY DTD++P
140
+ Sbjct: 1 YDVILVTPDLESLFIDTNAYPWFQEMFLRWPYNAAASDFLRYAILYKYGGIYLDTDVIPL 60
141
+
142
+ Query: 564 YSKQVIFKIMMQTN-GDNRFLEDLK 587
143
+ S V+ I R E L
144
+ Sbjct: 61 KSLDVLINIEGSNFLDGERSFERLN 85
145
+
146
+
147
+ >gnl|CDD|84583 pfam00175, NAD_binding_1, Oxidoreductase NAD-binding domain. Xanthine
148
+ dehydrogenases, that also bind FAD/NAD, have essentially
149
+ no similarity..
150
+ Length = 110
151
+
152
+ Score = 37.2 bits (86), Expect = 0.019
153
+ Identities = 16/82 (19%), Positives = 36/82 (43%), Gaps = 3/82 (3%)
154
+
155
+ Query: 959 IKGFLASNPHTKINILYSNKTEHNIFIKDLFSFAVMENELRDIINNMSKDKTPENWEGRV 1018
156
+ +K L T++ ++Y N+TE ++ +++ + R + + T + W GR
157
+ Sbjct: 16 LKALLEDEDGTEVYLVYGNRTEDDLLLREELEELAKKYPGRLKVVAVVSR-TDDGWYGRK 74
158
+
159
+ Query: 1019 MLQRYLELKMKDHLSLQSSQEA 1040
160
+ + +++HLSL +
161
+ Sbjct: 75 G--YVTDALLEEHLSLIDLDDT 94
162
+
163
+
164
+ Database: Pfam.v.22.0
165
+ Posted date: Nov 8, 2007 6:06 PM
166
+ Number of letters in database: 1,769,994
167
+ Number of sequences in database: 9318
168
+
169
+ Lambda K H
170
+ 0.327 0.139 0.439
171
+
172
+ Gapped
173
+ Lambda K H
174
+ 0.267 0.0632 0.140
175
+
176
+
177
+ Matrix: BLOSUM62
178
+ Gap Penalties: Existence: 11, Extension: 1
179
+ Number of Sequences: 9318
180
+ Number of Hits to DB: 28,279,060
181
+ Number of extensions: 2147710
182
+ Number of successful extensions: 3028
183
+ Number of sequences better than 2.0e-02: 3
184
+ Number of HSP's gapped: 3016
185
+ Number of HSP's successfully gapped: 20
186
+ Length of database: 1,769,994
187
+ Neighboring words threshold: 11
188
+ Window for multiple hits: 40
189
+ X1: 15 ( 7.1 bits)
190
+ X2: 38 (14.6 bits)
191
+ X3: 64 (24.7 bits)
192
+ S1: 40 (21.7 bits)
193
+ S2: 77 (33.6 bits)
File without changes
@@ -0,0 +1,115 @@
1
+ #
2
+ # = test/functional/bio/appl/test_pts1.rb - Unit test for Bio::PTS1 with network connection
3
+ #
4
+ # Copyright:: Copyright (C) 2006
5
+ # Mitsuteru Nakao <n@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ require 'pathname'
12
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
13
+ $:.unshift(libpath) unless $:.include?(libpath)
14
+
15
+ require 'test/unit'
16
+ require 'bio/appl/pts1'
17
+
18
+
19
+ module Bio
20
+
21
+ class FuncTestPTS1 < Test::Unit::TestCase
22
+
23
+ def setup
24
+ @seq =<<END
25
+ >AB000464
26
+ MRTGGDNAGPSHSHIKRLPTSGLSTWLQGTQTCVLHLPTGTRPPAHHPLLGYSSRRSYRL
27
+ LENPAAGCWARFSFCQGAAWDWDLEGVQWLRALAGGVSTAPSAPPGNLVFLSVSIFLCGS
28
+ LLLETCPAYFSSLDPD*
29
+ END
30
+ @serv = Bio::PTS1.new
31
+ end
32
+
33
+
34
+ def test_function_set
35
+ @serv.function("GENERAL")
36
+ assert_equal("GENERAL", @serv.function)
37
+ end
38
+
39
+ def test_function_show
40
+ assert_equal("METAZOA-specific", @serv.function)
41
+ end
42
+
43
+ def test_function_set_number_1
44
+ @serv.function(1)
45
+ assert_equal("METAZOA-specific", @serv.function)
46
+ end
47
+
48
+ def test_function_set_number_2
49
+ @serv.function(2)
50
+ assert_equal("FUNGI-specific", @serv.function)
51
+ end
52
+
53
+ def test_function_set_number_3
54
+ @serv.function(3)
55
+ assert_equal("GENERAL", @serv.function)
56
+ end
57
+
58
+
59
+ def test_exec
60
+ report = @serv.exec(@seq)
61
+ assert_equal(Bio::PTS1::Report, report.class)
62
+ end
63
+
64
+ def test_exec_with_faa
65
+ report = @serv.exec(Bio::FastaFormat.new(@seq))
66
+ assert_equal(Bio::PTS1::Report, report.class)
67
+ end
68
+
69
+ end
70
+
71
+ class FuncTestPTS1Report < Test::Unit::TestCase
72
+ def setup
73
+ serv = Bio::PTS1.new
74
+ seq = ">hoge\nAVSFLSMRRARL\n"
75
+ @report = serv.exec(seq)
76
+ end
77
+
78
+
79
+ def test_output_size
80
+ assert_equal(1634, @report.output.size)
81
+ end
82
+
83
+ def test_entry_id
84
+ assert_equal("hoge", @report.entry_id)
85
+ end
86
+
87
+ def test_prediction
88
+ assert_equal("Targeted", @report.prediction)
89
+ end
90
+
91
+ def test_cterm
92
+ assert_equal("AVSFLSMRRARL", @report.cterm)
93
+ end
94
+
95
+ def test_score
96
+ assert_equal("7.559", @report.score)
97
+ end
98
+
99
+ def test_fp
100
+ assert_equal("2.5e-04", @report.fp)
101
+ end
102
+
103
+ def test_sppta
104
+ assert_equal("-5.833", @report.sppta)
105
+ end
106
+
107
+ def test_spptna
108
+ assert_equal("-1.698", @report.spptna)
109
+ end
110
+
111
+ def test_profile
112
+ assert_equal("15.091", @report.profile)
113
+ end
114
+ end
115
+ end
@@ -5,7 +5,7 @@
5
5
  # Mitsuteru C. Nakao <n@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: test_ensembl.rb,v 1.5 2007/11/10 16:57:43 nakao Exp $
8
+ # $Id:$
9
9
  #
10
10
 
11
11
  require 'pathname'
@@ -40,6 +40,13 @@ class FuncTestEnsemblHuman < Test::Unit::TestCase
40
40
  def test_server
41
41
  assert_equal("http://www.ensembl.org", @serv.server)
42
42
  end
43
+ end
44
+
45
+ class FuncTestEnsemblHumanExportView < Test::Unit::TestCase
46
+ def setup
47
+ @serv = Bio::Ensembl.new('Homo_sapiens',
48
+ 'http://jul2008.archive.ensembl.org')
49
+ end
43
50
 
44
51
  def test_fna_exportview
45
52
  seq = ">4 dna:chromosome chromosome:NCBI36:4:1149206:1149209:1\nGAGA\n"
@@ -72,82 +79,80 @@ class FuncTestEnsemblHuman < Test::Unit::TestCase
72
79
  assert_equal(fna10, fna)
73
80
  end
74
81
 
75
- def test_gff_exportview
76
- line = ["chromosome:NCBI36:4:1149206:1149209:1",
77
- "Ensembl",
78
- "Gene",
79
- "-839",
80
- "2747",
81
- ".",
82
- "+",
83
- ".",
84
- "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"].join("\t") + "\n"
85
- line = ["4",
86
- "Ensembl",
87
- "Gene",
88
- "1148366",
89
- "1151952",
90
- ".",
91
- "+",
92
- "1",
93
- "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
82
+ def test_gff_exportview_for_empty_result
94
83
  gff = @serv.exportview(4, 1149206, 1149209, ['gene'])
84
+ assert_equal('', gff)
85
+ end
86
+
87
+ def test_gff_exportview
88
+ # OR1A1 (Olfactory receptor 1A1)
89
+ lines = [ [ "17",
90
+ "Ensembl",
91
+ "Gene",
92
+ "3065665",
93
+ "3066594",
94
+ ".",
95
+ "+",
96
+ "1",
97
+ "gene_id=ENSG00000172146; transcript_id=ENST00000304094; exon_id=ENSE00001137815; gene_type=KNOWN_protein_coding"
98
+ ],
99
+ [ "17",
100
+ "Vega",
101
+ "Gene",
102
+ "3065665",
103
+ "3066594",
104
+ ".",
105
+ "+",
106
+ "1",
107
+ "gene_id=OTTHUMG00000090637; transcript_id=OTTHUMT00000207292; exon_id=OTTHUME00001080001; gene_type=KNOWN_protein_coding"
108
+ ]
109
+ ]
110
+ line = lines.collect { |x| x.join("\t") + "\n" }.join('')
111
+ gff = @serv.exportview(17, 3065665, 3066594, ['gene'])
95
112
  assert_equal(line, gff)
96
113
  end
97
114
 
98
- def test_gff_exportview_with_named_args
99
- line = ["chromosome:NCBI36:4:1149206:1149209:1",
100
- "Ensembl",
101
- "Gene",
102
- "-839",
103
- "2747",
104
- ".",
105
- "+",
106
- ".",
107
- "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
108
- line = ["4",
109
- "Ensembl",
110
- "Gene",
111
- "1148366",
112
- "1151952",
113
- ".",
114
- "+",
115
- "1",
116
- "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
115
+ def test_gff_exportview_with_named_args_for_empty_result
117
116
  gff = @serv.exportview(:seq_region_name => 4,
118
117
  :anchor1 => 1149206,
119
118
  :anchor2 => 1149209,
120
119
  :options => ['gene'])
120
+ assert_equal('', gff)
121
+ end
122
+
123
+ def test_gff_exportview_with_named_args
124
+ # OR1A1 (Olfactory receptor 1A1)
125
+ lines = [ [ "17",
126
+ "Ensembl",
127
+ "Gene",
128
+ "3065665",
129
+ "3066594",
130
+ ".",
131
+ "+",
132
+ "1",
133
+ "gene_id=ENSG00000172146; transcript_id=ENST00000304094; exon_id=ENSE00001137815; gene_type=KNOWN_protein_coding"
134
+ ],
135
+ [ "17",
136
+ "Vega",
137
+ "Gene",
138
+ "3065665",
139
+ "3066594",
140
+ ".",
141
+ "+",
142
+ "1",
143
+ "gene_id=OTTHUMG00000090637; transcript_id=OTTHUMT00000207292; exon_id=OTTHUME00001080001; gene_type=KNOWN_protein_coding"
144
+ ]
145
+ ]
146
+ line = lines.collect { |x| x.join("\t") + "\n" }.join('')
147
+ gff = @serv.exportview(:seq_region_name => 17,
148
+ :anchor1 => 3065665,
149
+ :anchor2 => 3066594,
150
+ :options => ['gene'])
121
151
  assert_equal(line, gff)
122
152
  end
123
153
 
124
- def test_tab_exportview_with_named_args
125
- line = [["seqname",
126
- "source",
127
- "feature",
128
- "start",
129
- "end",
130
- "score",
131
- "strand",
132
- "frame",
133
- "gene_id",
134
- "transcript_id",
135
- "exon_id",
136
- "gene_type"].join("\t"),
137
- ["chromosome:NCBI36:4:1149206:1149209:1",
138
- "Ensembl",
139
- "Gene",
140
- "-839",
141
- "2747",
142
- ".",
143
- "+",
144
- ".",
145
- "ENSG00000206158",
146
- "ENST00000382964",
147
- "ENSE00001494097",
148
- "KNOWN_protein_coding"].join("\t") + "\n"
149
- ].join("\n")
150
- line = [["seqname",
154
+ def test_tab_exportview_with_named_args_for_empty_result
155
+ line = ["seqname",
151
156
  "source",
152
157
  "feature",
153
158
  "start",
@@ -158,20 +163,7 @@ class FuncTestEnsemblHuman < Test::Unit::TestCase
158
163
  "gene_id",
159
164
  "transcript_id",
160
165
  "exon_id",
161
- "gene_type"].join("\t"),
162
- ["4",
163
- "Ensembl",
164
- "Gene",
165
- "1148366",
166
- "1151952",
167
- ".",
168
- "+",
169
- "1",
170
- "ENSG00000206158",
171
- "ENST00000382964",
172
- "ENSE00001494097",
173
- "KNOWN_protein_coding"].join("\t") + "\n"
174
- ].join("\n")
166
+ "gene_type"].join("\t") + "\n"
175
167
  gff = @serv.exportview(:seq_region_name => 4,
176
168
  :anchor1 => 1149206,
177
169
  :anchor2 => 1149209,
@@ -180,6 +172,57 @@ class FuncTestEnsemblHuman < Test::Unit::TestCase
180
172
  assert_equal(line, gff)
181
173
  end
182
174
 
175
+ def test_tab_exportview_with_named_args
176
+ # OR1A1 (Olfactory receptor 1A1)
177
+ lines = [ [ "seqname",
178
+ "source",
179
+ "feature",
180
+ "start",
181
+ "end",
182
+ "score",
183
+ "strand",
184
+ "frame",
185
+ "gene_id",
186
+ "transcript_id",
187
+ "exon_id",
188
+ "gene_type"
189
+ ],
190
+ [ "17",
191
+ "Ensembl",
192
+ "Gene",
193
+ "3065665",
194
+ "3066594",
195
+ ".",
196
+ "+",
197
+ "1",
198
+ "ENSG00000172146",
199
+ "ENST00000304094",
200
+ "ENSE00001137815",
201
+ "KNOWN_protein_coding"
202
+ ],
203
+ [ "17",
204
+ "Vega",
205
+ "Gene",
206
+ "3065665",
207
+ "3066594",
208
+ ".",
209
+ "+",
210
+ "1",
211
+ "OTTHUMG00000090637",
212
+ "OTTHUMT00000207292",
213
+ "OTTHUME00001080001",
214
+ "KNOWN_protein_coding"
215
+ ]
216
+ ]
217
+ line = lines.collect { |x| x.join("\t") + "\n" }.join('')
218
+ gff = @serv.exportview(:seq_region_name => 17,
219
+ :anchor1 => 3065665,
220
+ :anchor2 => 3066594,
221
+ :options => ['gene'],
222
+ :format => 'tab')
223
+ assert_equal(line, gff)
224
+ end
225
+
183
226
 
184
227
  end
185
228