bio 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (259) hide show
  1. data/ChangeLog +3421 -0
  2. data/KNOWN_ISSUES.rdoc +88 -0
  3. data/README.rdoc +252 -0
  4. data/README_DEV.rdoc +285 -0
  5. data/Rakefile +143 -0
  6. data/bin/bioruby +0 -0
  7. data/bin/br_biofetch.rb +0 -0
  8. data/bin/br_bioflat.rb +12 -1
  9. data/bin/br_biogetseq.rb +0 -0
  10. data/bin/br_pmfetch.rb +4 -3
  11. data/bioruby.gemspec +477 -0
  12. data/bioruby.gemspec.erb +117 -0
  13. data/doc/Changes-0.7.rd +7 -0
  14. data/doc/Changes-1.3.rdoc +239 -0
  15. data/doc/Tutorial.rd +296 -184
  16. data/doc/Tutorial.rd.html +1031 -0
  17. data/doc/Tutorial.rd.ja +111 -45
  18. data/doc/Tutorial.rd.ja.html +2225 -0
  19. data/doc/bioruby.css +281 -0
  20. data/extconf.rb +2 -0
  21. data/lib/bio.rb +29 -4
  22. data/lib/bio/appl/blast.rb +306 -121
  23. data/lib/bio/appl/blast/ddbj.rb +142 -0
  24. data/lib/bio/appl/blast/format0.rb +35 -25
  25. data/lib/bio/appl/blast/format8.rb +2 -2
  26. data/lib/bio/appl/blast/genomenet.rb +263 -0
  27. data/lib/bio/appl/blast/ncbioptions.rb +220 -0
  28. data/lib/bio/appl/blast/remote.rb +106 -0
  29. data/lib/bio/appl/blast/report.rb +260 -9
  30. data/lib/bio/appl/blast/rexml.rb +12 -5
  31. data/lib/bio/appl/blast/rpsblast.rb +277 -0
  32. data/lib/bio/appl/blast/wublast.rb +133 -12
  33. data/lib/bio/appl/blast/xmlparser.rb +35 -18
  34. data/lib/bio/appl/blat/report.rb +46 -5
  35. data/lib/bio/appl/emboss.rb +62 -13
  36. data/lib/bio/appl/fasta.rb +9 -11
  37. data/lib/bio/appl/genscan/report.rb +3 -3
  38. data/lib/bio/appl/hmmer.rb +1 -1
  39. data/lib/bio/appl/hmmer/report.rb +10 -10
  40. data/lib/bio/appl/paml/baseml.rb +95 -0
  41. data/lib/bio/appl/paml/baseml/report.rb +32 -0
  42. data/lib/bio/appl/paml/codeml.rb +242 -0
  43. data/lib/bio/appl/paml/codeml/rates.rb +67 -0
  44. data/lib/bio/appl/paml/codeml/report.rb +67 -0
  45. data/lib/bio/appl/paml/common.rb +348 -0
  46. data/lib/bio/appl/paml/common_report.rb +38 -0
  47. data/lib/bio/appl/paml/yn00.rb +103 -0
  48. data/lib/bio/appl/paml/yn00/report.rb +32 -0
  49. data/lib/bio/appl/psort.rb +2 -2
  50. data/lib/bio/appl/pts1.rb +5 -5
  51. data/lib/bio/appl/tmhmm/report.rb +10 -1
  52. data/lib/bio/command.rb +297 -41
  53. data/lib/bio/compat/features.rb +157 -0
  54. data/lib/bio/compat/references.rb +128 -0
  55. data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
  56. data/lib/bio/db/biosql/sequence.rb +508 -0
  57. data/lib/bio/db/embl/common.rb +28 -12
  58. data/lib/bio/db/embl/embl.rb +107 -9
  59. data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
  60. data/lib/bio/db/embl/format_embl.rb +190 -0
  61. data/lib/bio/db/embl/sptr.rb +15 -16
  62. data/lib/bio/db/fantom.rb +6 -8
  63. data/lib/bio/db/fasta.rb +10 -507
  64. data/lib/bio/db/fasta/defline.rb +532 -0
  65. data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
  66. data/lib/bio/db/fasta/format_fasta.rb +97 -0
  67. data/lib/bio/db/genbank/common.rb +25 -8
  68. data/lib/bio/db/genbank/format_genbank.rb +187 -0
  69. data/lib/bio/db/genbank/genbank.rb +36 -1
  70. data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
  71. data/lib/bio/db/gff.rb +1791 -119
  72. data/lib/bio/db/kegg/glycan.rb +2 -6
  73. data/lib/bio/db/lasergene.rb +3 -3
  74. data/lib/bio/db/medline.rb +4 -1
  75. data/lib/bio/db/newick.rb +10 -10
  76. data/lib/bio/db/pdb/chain.rb +6 -2
  77. data/lib/bio/db/pdb/pdb.rb +12 -3
  78. data/lib/bio/db/rebase.rb +7 -8
  79. data/lib/bio/db/soft.rb +3 -3
  80. data/lib/bio/feature.rb +1 -88
  81. data/lib/bio/io/biosql/biodatabase.rb +64 -0
  82. data/lib/bio/io/biosql/bioentry.rb +29 -0
  83. data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
  84. data/lib/bio/io/biosql/bioentry_path.rb +12 -0
  85. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
  86. data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
  87. data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
  88. data/lib/bio/io/biosql/biosequence.rb +11 -0
  89. data/lib/bio/io/biosql/comment.rb +7 -0
  90. data/lib/bio/io/biosql/config/database.yml +20 -0
  91. data/lib/bio/io/biosql/dbxref.rb +13 -0
  92. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
  93. data/lib/bio/io/biosql/location.rb +32 -0
  94. data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
  95. data/lib/bio/io/biosql/ontology.rb +10 -0
  96. data/lib/bio/io/biosql/reference.rb +9 -0
  97. data/lib/bio/io/biosql/seqfeature.rb +32 -0
  98. data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
  99. data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
  100. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
  101. data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
  102. data/lib/bio/io/biosql/taxon.rb +12 -0
  103. data/lib/bio/io/biosql/taxon_name.rb +9 -0
  104. data/lib/bio/io/biosql/term.rb +27 -0
  105. data/lib/bio/io/biosql/term_dbxref.rb +11 -0
  106. data/lib/bio/io/biosql/term_path.rb +12 -0
  107. data/lib/bio/io/biosql/term_relationship.rb +13 -0
  108. data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
  109. data/lib/bio/io/biosql/term_synonym.rb +10 -0
  110. data/lib/bio/io/das.rb +7 -7
  111. data/lib/bio/io/ddbjxml.rb +57 -0
  112. data/lib/bio/io/ensembl.rb +2 -2
  113. data/lib/bio/io/fetch.rb +28 -14
  114. data/lib/bio/io/flatfile.rb +17 -853
  115. data/lib/bio/io/flatfile/autodetection.rb +545 -0
  116. data/lib/bio/io/flatfile/buffer.rb +237 -0
  117. data/lib/bio/io/flatfile/index.rb +17 -7
  118. data/lib/bio/io/flatfile/indexer.rb +30 -12
  119. data/lib/bio/io/flatfile/splitter.rb +297 -0
  120. data/lib/bio/io/hinv.rb +442 -0
  121. data/lib/bio/io/keggapi.rb +2 -2
  122. data/lib/bio/io/ncbirest.rb +733 -0
  123. data/lib/bio/io/pubmed.rb +34 -80
  124. data/lib/bio/io/registry.rb +2 -2
  125. data/lib/bio/io/sql.rb +178 -357
  126. data/lib/bio/io/togows.rb +458 -0
  127. data/lib/bio/location.rb +106 -11
  128. data/lib/bio/pathway.rb +120 -14
  129. data/lib/bio/reference.rb +115 -101
  130. data/lib/bio/sequence.rb +164 -183
  131. data/lib/bio/sequence/adapter.rb +108 -0
  132. data/lib/bio/sequence/common.rb +22 -45
  133. data/lib/bio/sequence/compat.rb +2 -2
  134. data/lib/bio/sequence/dblink.rb +54 -0
  135. data/lib/bio/sequence/format.rb +254 -77
  136. data/lib/bio/sequence/format_raw.rb +23 -0
  137. data/lib/bio/shell.rb +3 -1
  138. data/lib/bio/shell/core.rb +2 -2
  139. data/lib/bio/shell/plugin/entry.rb +33 -4
  140. data/lib/bio/shell/plugin/ncbirest.rb +64 -0
  141. data/lib/bio/shell/plugin/togows.rb +40 -0
  142. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
  143. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
  144. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
  145. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
  146. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
  147. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
  148. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
  149. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
  150. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
  151. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
  152. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
  153. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
  154. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
  155. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
  156. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
  157. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
  158. data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
  159. data/lib/bio/tree.rb +4 -2
  160. data/lib/bio/util/color_scheme.rb +2 -2
  161. data/lib/bio/util/contingency_table.rb +2 -2
  162. data/lib/bio/util/restriction_enzyme.rb +2 -2
  163. data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
  164. data/lib/bio/version.rb +25 -0
  165. data/rdoc.zsh +8 -0
  166. data/sample/any2fasta.rb +0 -0
  167. data/sample/biofetch.rb +0 -0
  168. data/sample/dbget +0 -0
  169. data/sample/demo_sequence.rb +158 -0
  170. data/sample/enzymes.rb +0 -0
  171. data/sample/fasta2tab.rb +0 -0
  172. data/sample/fastagrep.rb +72 -0
  173. data/sample/fastasort.rb +54 -0
  174. data/sample/fsplit.rb +0 -0
  175. data/sample/gb2fasta.rb +2 -3
  176. data/sample/gb2tab.rb +0 -0
  177. data/sample/gbtab2mysql.rb +0 -0
  178. data/sample/genes2nuc.rb +0 -0
  179. data/sample/genes2pep.rb +0 -0
  180. data/sample/genes2tab.rb +0 -0
  181. data/sample/genome2rb.rb +0 -0
  182. data/sample/genome2tab.rb +0 -0
  183. data/sample/goslim.rb +0 -0
  184. data/sample/gt2fasta.rb +0 -0
  185. data/sample/na2aa.rb +34 -0
  186. data/sample/pmfetch.rb +0 -0
  187. data/sample/pmsearch.rb +0 -0
  188. data/sample/ssearch2tab.rb +0 -0
  189. data/sample/tfastx2tab.rb +0 -0
  190. data/sample/vs-genes.rb +0 -0
  191. data/setup.rb +1596 -0
  192. data/test/data/blast/blastp-multi.m7 +188 -0
  193. data/test/data/command/echoarg2.bat +1 -0
  194. data/test/data/paml/codeml/control_file.txt +30 -0
  195. data/test/data/paml/codeml/output.txt +78 -0
  196. data/test/data/paml/codeml/rates +217 -0
  197. data/test/data/rpsblast/misc.rpsblast +193 -0
  198. data/test/data/soft/GDS100_partial.soft +0 -0
  199. data/test/data/soft/GSE3457_family_partial.soft +0 -0
  200. data/test/functional/bio/appl/test_pts1.rb +115 -0
  201. data/test/functional/bio/io/test_ensembl.rb +123 -80
  202. data/test/functional/bio/io/test_togows.rb +267 -0
  203. data/test/functional/bio/sequence/test_output_embl.rb +51 -0
  204. data/test/functional/bio/test_command.rb +301 -0
  205. data/test/runner.rb +17 -1
  206. data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
  207. data/test/unit/bio/appl/blast/test_report.rb +753 -35
  208. data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
  209. data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
  210. data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
  211. data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
  212. data/test/unit/bio/appl/test_blast.rb +135 -4
  213. data/test/unit/bio/appl/test_fasta.rb +2 -2
  214. data/test/unit/bio/appl/test_pts1.rb +1 -64
  215. data/test/unit/bio/db/embl/test_common.rb +15 -15
  216. data/test/unit/bio/db/embl/test_embl.rb +4 -4
  217. data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
  218. data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
  219. data/test/unit/bio/db/embl/test_sptr.rb +38 -1
  220. data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
  221. data/test/unit/bio/db/test_gff.rb +1151 -25
  222. data/test/unit/bio/db/test_medline.rb +127 -0
  223. data/test/unit/bio/db/test_nexus.rb +5 -1
  224. data/test/unit/bio/db/test_prosite.rb +4 -4
  225. data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
  226. data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
  227. data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
  228. data/test/unit/bio/io/test_ddbjxml.rb +8 -3
  229. data/test/unit/bio/io/test_fastacmd.rb +5 -5
  230. data/test/unit/bio/io/test_flatfile.rb +357 -106
  231. data/test/unit/bio/io/test_soapwsdl.rb +2 -2
  232. data/test/unit/bio/io/test_togows.rb +161 -0
  233. data/test/unit/bio/sequence/test_common.rb +210 -11
  234. data/test/unit/bio/sequence/test_compat.rb +3 -3
  235. data/test/unit/bio/sequence/test_dblink.rb +58 -0
  236. data/test/unit/bio/sequence/test_na.rb +2 -2
  237. data/test/unit/bio/test_command.rb +111 -50
  238. data/test/unit/bio/test_feature.rb +29 -1
  239. data/test/unit/bio/test_location.rb +566 -6
  240. data/test/unit/bio/test_pathway.rb +91 -65
  241. data/test/unit/bio/test_reference.rb +67 -13
  242. data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
  243. data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
  244. data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
  245. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
  246. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
  247. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
  248. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
  249. data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
  250. data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
  251. data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
  252. data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
  253. data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
  254. data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
  255. data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
  256. data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
  257. data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
  258. metadata +202 -167
  259. data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,193 @@
1
+ RPS-BLAST 2.2.18 [Mar-02-2008]
2
+
3
+ Database: Pfam.v.22.0
4
+ 9318 sequences; 1,769,994 total letters
5
+
6
+ Searching..................................................done
7
+
8
+ Query= TestSequence mixture of globin and rhodopsin (computationally
9
+ randomly concatenated)
10
+ (495 letters)
11
+
12
+
13
+
14
+ Score E
15
+ Sequences producing significant alignments: (bits) Value
16
+
17
+ gnl|CDD|84466 pfam00042, Globin, Globin.. 110 2e-25
18
+ gnl|CDD|84429 pfam00001, 7tm_1, 7 transmembrane receptor (rhodop... 91 2e-19
19
+ gnl|CDD|87195 pfam06976, DUF1300, Protein of unknown function (D... 37 0.003
20
+
21
+ >gnl|CDD|84466 pfam00042, Globin, Globin..
22
+ Length = 110
23
+
24
+ Score = 110 bits (277), Expect = 2e-25
25
+ Identities = 50/110 (45%), Positives = 69/110 (62%), Gaps = 5/110 (4%)
26
+
27
+ Query: 148 EKQLITGLWGKV--NVAECGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPMVRAHG 205
28
+ +K L+ WGKV N E GAE LARL YP T+ +F FG+LS+ A+ +P +AHG
29
+ Sbjct: 1 QKALVKASWGKVKGNAPEIGAEILARLFTAYPDTKAYFPKFGDLSTAEALKSSPKFKAHG 60
30
+
31
+ Query: 206 KKVLTSFGDAVKNLDN---IKNTFSQLSELHCDKLHVDPENFRLLGDILI 252
32
+ KKVL + G+AVK+LD+ +K +L H + HVDP NF+L G+ L+
33
+ Sbjct: 61 KKVLAALGEAVKHLDDDGNLKAALKKLGARHAKRGHVDPANFKLFGEALL 110
34
+
35
+
36
+ >gnl|CDD|84429 pfam00001, 7tm_1, 7 transmembrane receptor (rhodopsin family). This
37
+ family contains, amongst other G-protein-coupled
38
+ receptors (GCPRs), members of the opsin family, which
39
+ have been considered to be typical members of the
40
+ rhodopsin superfamily. They share several motifs, mainly
41
+ the seven transmembrane helices, GCPRs of the rhodopsin
42
+ superfamily. All opsins bind a chromophore, such as
43
+ 11-cis-retinal. The function of most opsins other than
44
+ the photoisomerases is split into two steps: light
45
+ absorption and G-protein activation. Photoisomerases, on
46
+ the other hand, are not coupled to G-proteins - they are
47
+ thought to generate and supply the chromophore that is
48
+ used by visual opsins..
49
+ Length = 258
50
+
51
+ Score = 90.8 bits (225), Expect = 2e-19
52
+ Identities = 37/162 (22%), Positives = 76/162 (46%), Gaps = 10/162 (6%)
53
+
54
+ Query: 299 HAIMGVAFTWVMALACAAPPLAGWSRY-IPEGLQCSCGIDYYTLKPEVNNESFVIYMFVV 357
55
+ A + + WV+AL + PPL + EG +C ID+ S+ + ++
56
+ Sbjct: 100 RAKVLILLVWVLALLLSLPPLLFSWLRTVEEGNVTTCLIDFPEESLLR---SYTLLSTLL 156
57
+
58
+ Query: 358 HFTIPMIIIFFCYGQLVFTV----KEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWV 413
59
+ F +P+++I CY +++ T+ + A+ + +E++ +M++++V+ F++CW+
60
+ Sbjct: 157 GFVLPLLVILVCYTRILRTLRRRARSGASIARSLKRRSSSERKAAKMLLVVVVVFVLCWL 216
61
+
62
+ Query: 414 PYASVAFY--IFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIY 453
63
+ PY V + P + I + A + NP+IY
64
+ Sbjct: 217 PYHIVLLLDSLCLLSIIRVLPTALLITLWLAYVNSCLNPIIY 258
65
+
66
+
67
+
68
+ Score = 73.4 bits (180), Expect = 3e-14
69
+ Identities = 32/86 (37%), Positives = 47/86 (54%)
70
+
71
+ Query: 55 NFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEG 114
72
+ N L + V ++ K+LRTP N LLNLAVADL +L LY + G + FG C L G
73
+ Sbjct: 2 NLLVILVILRTKRLRTPTNIFLLNLAVADLLFLLTLPPWALYYLVGGDWPFGDALCKLVG 61
74
+
75
+ Query: 115 FFATLGGEIALWSLVVLAIERYVVVC 140
76
+ + G ++ L ++I+RY+ +
77
+ Sbjct: 62 ALFVVNGYASILLLTAISIDRYLAIV 87
78
+
79
+
80
+ >gnl|CDD|87195 pfam06976, DUF1300, Protein of unknown function (DUF1300). This
81
+ family represents a conserved region approximately 80
82
+ residues long within a number of proteins of unknown
83
+ function that seem to be specific to C. elegans. Some
84
+ family members contain more than one copy of this
85
+ region..
86
+ Length = 336
87
+
88
+ Score = 37.1 bits (86), Expect = 0.003
89
+ Identities = 32/145 (22%), Positives = 58/145 (40%), Gaps = 7/145 (4%)
90
+
91
+ Query: 336 IDYYTLKPEVNNESFVIYMFV--VHFT-IPMIIIFFCYGQLVFTVKEAAAQQQESATTQK 392
92
+ I+Y E+ S+ I + + + F IP II+ L+F +K+ S+T+
93
+ Sbjct: 192 IEYIIETTELFGSSYEILLLIEGILFKLIPSIILPIATILLIFQLKKNKKVSSRSSTSSS 251
94
+
95
+ Query: 393 AEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVI 452
96
+ + T++V + I+FLI VP + F + + A + N I
97
+ Sbjct: 252 SNDRSTKLVTFVTISFLIATVPLGILYLIKFFVFEYEGLVMIIDKLAIIFTFLSTINGTI 311
98
+
99
+ Query: 453 YIM----MNKQFRNCMLTTICCGKN 473
100
+ + + M+ Q+RN + K
101
+ Sbjct: 312 HFLICYFMSSQYRNTVREMFGRKKK 336
102
+
103
+
104
+ Query= randomseq3
105
+ (1087 letters)
106
+
107
+ ***** No hits found ******
108
+
109
+
110
+ Query= gi|6013469|gb|AAD49229.2|AF159462_1 EHEC factor for adherence
111
+ [Escherichia coli]
112
+ (3223 letters)
113
+
114
+
115
+
116
+ Score E
117
+ Sequences producing significant alignments: (bits) Value
118
+
119
+ gnl|CDD|86672 pfam04488, Gly_transf_sug, Glycosyltransferase sug... 84 1e-16
120
+ gnl|CDD|84583 pfam00175, NAD_binding_1, Oxidoreductase NAD-bindi... 37 0.019
121
+
122
+ >gnl|CDD|86672 pfam04488, Gly_transf_sug, Glycosyltransferase sugar-binding region
123
+ containing DXD motif. The DXD motif is a short conserved
124
+ motif found in many families of glycosyltransferases,
125
+ which add a range of different sugars to other sugars,
126
+ phosphates and proteins. DXD-containing
127
+ glycosyltransferases all use nucleoside diphosphate
128
+ sugars as donors and require divalent cations, usually
129
+ manganese. The DXD motif is expected to play a
130
+ carbohydrate binding role in sugar-nucleoside
131
+ diphosphate and manganese dependent
132
+ glycosyltransferases..
133
+ Length = 86
134
+
135
+ Score = 84.2 bits (208), Expect = 1e-16
136
+ Identities = 33/85 (38%), Positives = 40/85 (47%), Gaps = 2/85 (2%)
137
+
138
+ Query: 505 RISIKDVNSLTSLSKSENNHNYQTEMLLRWNYPAA-SDLLRMYILKEHGGIYTDTDMMPA 563
139
+ I L SL N + + EM LRW Y AA SD LR IL ++GGIY DTD++P
140
+ Sbjct: 1 YDVILVTPDLESLFIDTNAYPWFQEMFLRWPYNAAASDFLRYAILYKYGGIYLDTDVIPL 60
141
+
142
+ Query: 564 YSKQVIFKIMMQTN-GDNRFLEDLK 587
143
+ S V+ I R E L
144
+ Sbjct: 61 KSLDVLINIEGSNFLDGERSFERLN 85
145
+
146
+
147
+ >gnl|CDD|84583 pfam00175, NAD_binding_1, Oxidoreductase NAD-binding domain. Xanthine
148
+ dehydrogenases, that also bind FAD/NAD, have essentially
149
+ no similarity..
150
+ Length = 110
151
+
152
+ Score = 37.2 bits (86), Expect = 0.019
153
+ Identities = 16/82 (19%), Positives = 36/82 (43%), Gaps = 3/82 (3%)
154
+
155
+ Query: 959 IKGFLASNPHTKINILYSNKTEHNIFIKDLFSFAVMENELRDIINNMSKDKTPENWEGRV 1018
156
+ +K L T++ ++Y N+TE ++ +++ + R + + T + W GR
157
+ Sbjct: 16 LKALLEDEDGTEVYLVYGNRTEDDLLLREELEELAKKYPGRLKVVAVVSR-TDDGWYGRK 74
158
+
159
+ Query: 1019 MLQRYLELKMKDHLSLQSSQEA 1040
160
+ + +++HLSL +
161
+ Sbjct: 75 G--YVTDALLEEHLSLIDLDDT 94
162
+
163
+
164
+ Database: Pfam.v.22.0
165
+ Posted date: Nov 8, 2007 6:06 PM
166
+ Number of letters in database: 1,769,994
167
+ Number of sequences in database: 9318
168
+
169
+ Lambda K H
170
+ 0.327 0.139 0.439
171
+
172
+ Gapped
173
+ Lambda K H
174
+ 0.267 0.0632 0.140
175
+
176
+
177
+ Matrix: BLOSUM62
178
+ Gap Penalties: Existence: 11, Extension: 1
179
+ Number of Sequences: 9318
180
+ Number of Hits to DB: 28,279,060
181
+ Number of extensions: 2147710
182
+ Number of successful extensions: 3028
183
+ Number of sequences better than 2.0e-02: 3
184
+ Number of HSP's gapped: 3016
185
+ Number of HSP's successfully gapped: 20
186
+ Length of database: 1,769,994
187
+ Neighboring words threshold: 11
188
+ Window for multiple hits: 40
189
+ X1: 15 ( 7.1 bits)
190
+ X2: 38 (14.6 bits)
191
+ X3: 64 (24.7 bits)
192
+ S1: 40 (21.7 bits)
193
+ S2: 77 (33.6 bits)
File without changes
@@ -0,0 +1,115 @@
1
+ #
2
+ # = test/functional/bio/appl/test_pts1.rb - Unit test for Bio::PTS1 with network connection
3
+ #
4
+ # Copyright:: Copyright (C) 2006
5
+ # Mitsuteru Nakao <n@bioruby.org>
6
+ # License:: The Ruby License
7
+ #
8
+ # $Id:$
9
+ #
10
+
11
+ require 'pathname'
12
+ libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
13
+ $:.unshift(libpath) unless $:.include?(libpath)
14
+
15
+ require 'test/unit'
16
+ require 'bio/appl/pts1'
17
+
18
+
19
+ module Bio
20
+
21
+ class FuncTestPTS1 < Test::Unit::TestCase
22
+
23
+ def setup
24
+ @seq =<<END
25
+ >AB000464
26
+ MRTGGDNAGPSHSHIKRLPTSGLSTWLQGTQTCVLHLPTGTRPPAHHPLLGYSSRRSYRL
27
+ LENPAAGCWARFSFCQGAAWDWDLEGVQWLRALAGGVSTAPSAPPGNLVFLSVSIFLCGS
28
+ LLLETCPAYFSSLDPD*
29
+ END
30
+ @serv = Bio::PTS1.new
31
+ end
32
+
33
+
34
+ def test_function_set
35
+ @serv.function("GENERAL")
36
+ assert_equal("GENERAL", @serv.function)
37
+ end
38
+
39
+ def test_function_show
40
+ assert_equal("METAZOA-specific", @serv.function)
41
+ end
42
+
43
+ def test_function_set_number_1
44
+ @serv.function(1)
45
+ assert_equal("METAZOA-specific", @serv.function)
46
+ end
47
+
48
+ def test_function_set_number_2
49
+ @serv.function(2)
50
+ assert_equal("FUNGI-specific", @serv.function)
51
+ end
52
+
53
+ def test_function_set_number_3
54
+ @serv.function(3)
55
+ assert_equal("GENERAL", @serv.function)
56
+ end
57
+
58
+
59
+ def test_exec
60
+ report = @serv.exec(@seq)
61
+ assert_equal(Bio::PTS1::Report, report.class)
62
+ end
63
+
64
+ def test_exec_with_faa
65
+ report = @serv.exec(Bio::FastaFormat.new(@seq))
66
+ assert_equal(Bio::PTS1::Report, report.class)
67
+ end
68
+
69
+ end
70
+
71
+ class FuncTestPTS1Report < Test::Unit::TestCase
72
+ def setup
73
+ serv = Bio::PTS1.new
74
+ seq = ">hoge\nAVSFLSMRRARL\n"
75
+ @report = serv.exec(seq)
76
+ end
77
+
78
+
79
+ def test_output_size
80
+ assert_equal(1634, @report.output.size)
81
+ end
82
+
83
+ def test_entry_id
84
+ assert_equal("hoge", @report.entry_id)
85
+ end
86
+
87
+ def test_prediction
88
+ assert_equal("Targeted", @report.prediction)
89
+ end
90
+
91
+ def test_cterm
92
+ assert_equal("AVSFLSMRRARL", @report.cterm)
93
+ end
94
+
95
+ def test_score
96
+ assert_equal("7.559", @report.score)
97
+ end
98
+
99
+ def test_fp
100
+ assert_equal("2.5e-04", @report.fp)
101
+ end
102
+
103
+ def test_sppta
104
+ assert_equal("-5.833", @report.sppta)
105
+ end
106
+
107
+ def test_spptna
108
+ assert_equal("-1.698", @report.spptna)
109
+ end
110
+
111
+ def test_profile
112
+ assert_equal("15.091", @report.profile)
113
+ end
114
+ end
115
+ end
@@ -5,7 +5,7 @@
5
5
  # Mitsuteru C. Nakao <n@bioruby.org>
6
6
  # License:: The Ruby License
7
7
  #
8
- # $Id: test_ensembl.rb,v 1.5 2007/11/10 16:57:43 nakao Exp $
8
+ # $Id:$
9
9
  #
10
10
 
11
11
  require 'pathname'
@@ -40,6 +40,13 @@ class FuncTestEnsemblHuman < Test::Unit::TestCase
40
40
  def test_server
41
41
  assert_equal("http://www.ensembl.org", @serv.server)
42
42
  end
43
+ end
44
+
45
+ class FuncTestEnsemblHumanExportView < Test::Unit::TestCase
46
+ def setup
47
+ @serv = Bio::Ensembl.new('Homo_sapiens',
48
+ 'http://jul2008.archive.ensembl.org')
49
+ end
43
50
 
44
51
  def test_fna_exportview
45
52
  seq = ">4 dna:chromosome chromosome:NCBI36:4:1149206:1149209:1\nGAGA\n"
@@ -72,82 +79,80 @@ class FuncTestEnsemblHuman < Test::Unit::TestCase
72
79
  assert_equal(fna10, fna)
73
80
  end
74
81
 
75
- def test_gff_exportview
76
- line = ["chromosome:NCBI36:4:1149206:1149209:1",
77
- "Ensembl",
78
- "Gene",
79
- "-839",
80
- "2747",
81
- ".",
82
- "+",
83
- ".",
84
- "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"].join("\t") + "\n"
85
- line = ["4",
86
- "Ensembl",
87
- "Gene",
88
- "1148366",
89
- "1151952",
90
- ".",
91
- "+",
92
- "1",
93
- "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
82
+ def test_gff_exportview_for_empty_result
94
83
  gff = @serv.exportview(4, 1149206, 1149209, ['gene'])
84
+ assert_equal('', gff)
85
+ end
86
+
87
+ def test_gff_exportview
88
+ # OR1A1 (Olfactory receptor 1A1)
89
+ lines = [ [ "17",
90
+ "Ensembl",
91
+ "Gene",
92
+ "3065665",
93
+ "3066594",
94
+ ".",
95
+ "+",
96
+ "1",
97
+ "gene_id=ENSG00000172146; transcript_id=ENST00000304094; exon_id=ENSE00001137815; gene_type=KNOWN_protein_coding"
98
+ ],
99
+ [ "17",
100
+ "Vega",
101
+ "Gene",
102
+ "3065665",
103
+ "3066594",
104
+ ".",
105
+ "+",
106
+ "1",
107
+ "gene_id=OTTHUMG00000090637; transcript_id=OTTHUMT00000207292; exon_id=OTTHUME00001080001; gene_type=KNOWN_protein_coding"
108
+ ]
109
+ ]
110
+ line = lines.collect { |x| x.join("\t") + "\n" }.join('')
111
+ gff = @serv.exportview(17, 3065665, 3066594, ['gene'])
95
112
  assert_equal(line, gff)
96
113
  end
97
114
 
98
- def test_gff_exportview_with_named_args
99
- line = ["chromosome:NCBI36:4:1149206:1149209:1",
100
- "Ensembl",
101
- "Gene",
102
- "-839",
103
- "2747",
104
- ".",
105
- "+",
106
- ".",
107
- "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
108
- line = ["4",
109
- "Ensembl",
110
- "Gene",
111
- "1148366",
112
- "1151952",
113
- ".",
114
- "+",
115
- "1",
116
- "gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
115
+ def test_gff_exportview_with_named_args_for_empty_result
117
116
  gff = @serv.exportview(:seq_region_name => 4,
118
117
  :anchor1 => 1149206,
119
118
  :anchor2 => 1149209,
120
119
  :options => ['gene'])
120
+ assert_equal('', gff)
121
+ end
122
+
123
+ def test_gff_exportview_with_named_args
124
+ # OR1A1 (Olfactory receptor 1A1)
125
+ lines = [ [ "17",
126
+ "Ensembl",
127
+ "Gene",
128
+ "3065665",
129
+ "3066594",
130
+ ".",
131
+ "+",
132
+ "1",
133
+ "gene_id=ENSG00000172146; transcript_id=ENST00000304094; exon_id=ENSE00001137815; gene_type=KNOWN_protein_coding"
134
+ ],
135
+ [ "17",
136
+ "Vega",
137
+ "Gene",
138
+ "3065665",
139
+ "3066594",
140
+ ".",
141
+ "+",
142
+ "1",
143
+ "gene_id=OTTHUMG00000090637; transcript_id=OTTHUMT00000207292; exon_id=OTTHUME00001080001; gene_type=KNOWN_protein_coding"
144
+ ]
145
+ ]
146
+ line = lines.collect { |x| x.join("\t") + "\n" }.join('')
147
+ gff = @serv.exportview(:seq_region_name => 17,
148
+ :anchor1 => 3065665,
149
+ :anchor2 => 3066594,
150
+ :options => ['gene'])
121
151
  assert_equal(line, gff)
122
152
  end
123
153
 
124
- def test_tab_exportview_with_named_args
125
- line = [["seqname",
126
- "source",
127
- "feature",
128
- "start",
129
- "end",
130
- "score",
131
- "strand",
132
- "frame",
133
- "gene_id",
134
- "transcript_id",
135
- "exon_id",
136
- "gene_type"].join("\t"),
137
- ["chromosome:NCBI36:4:1149206:1149209:1",
138
- "Ensembl",
139
- "Gene",
140
- "-839",
141
- "2747",
142
- ".",
143
- "+",
144
- ".",
145
- "ENSG00000206158",
146
- "ENST00000382964",
147
- "ENSE00001494097",
148
- "KNOWN_protein_coding"].join("\t") + "\n"
149
- ].join("\n")
150
- line = [["seqname",
154
+ def test_tab_exportview_with_named_args_for_empty_result
155
+ line = ["seqname",
151
156
  "source",
152
157
  "feature",
153
158
  "start",
@@ -158,20 +163,7 @@ class FuncTestEnsemblHuman < Test::Unit::TestCase
158
163
  "gene_id",
159
164
  "transcript_id",
160
165
  "exon_id",
161
- "gene_type"].join("\t"),
162
- ["4",
163
- "Ensembl",
164
- "Gene",
165
- "1148366",
166
- "1151952",
167
- ".",
168
- "+",
169
- "1",
170
- "ENSG00000206158",
171
- "ENST00000382964",
172
- "ENSE00001494097",
173
- "KNOWN_protein_coding"].join("\t") + "\n"
174
- ].join("\n")
166
+ "gene_type"].join("\t") + "\n"
175
167
  gff = @serv.exportview(:seq_region_name => 4,
176
168
  :anchor1 => 1149206,
177
169
  :anchor2 => 1149209,
@@ -180,6 +172,57 @@ class FuncTestEnsemblHuman < Test::Unit::TestCase
180
172
  assert_equal(line, gff)
181
173
  end
182
174
 
175
+ def test_tab_exportview_with_named_args
176
+ # OR1A1 (Olfactory receptor 1A1)
177
+ lines = [ [ "seqname",
178
+ "source",
179
+ "feature",
180
+ "start",
181
+ "end",
182
+ "score",
183
+ "strand",
184
+ "frame",
185
+ "gene_id",
186
+ "transcript_id",
187
+ "exon_id",
188
+ "gene_type"
189
+ ],
190
+ [ "17",
191
+ "Ensembl",
192
+ "Gene",
193
+ "3065665",
194
+ "3066594",
195
+ ".",
196
+ "+",
197
+ "1",
198
+ "ENSG00000172146",
199
+ "ENST00000304094",
200
+ "ENSE00001137815",
201
+ "KNOWN_protein_coding"
202
+ ],
203
+ [ "17",
204
+ "Vega",
205
+ "Gene",
206
+ "3065665",
207
+ "3066594",
208
+ ".",
209
+ "+",
210
+ "1",
211
+ "OTTHUMG00000090637",
212
+ "OTTHUMT00000207292",
213
+ "OTTHUME00001080001",
214
+ "KNOWN_protein_coding"
215
+ ]
216
+ ]
217
+ line = lines.collect { |x| x.join("\t") + "\n" }.join('')
218
+ gff = @serv.exportview(:seq_region_name => 17,
219
+ :anchor1 => 3065665,
220
+ :anchor2 => 3066594,
221
+ :options => ['gene'],
222
+ :format => 'tab')
223
+ assert_equal(line, gff)
224
+ end
225
+
183
226
 
184
227
  end
185
228