bio 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog +3421 -0
- data/KNOWN_ISSUES.rdoc +88 -0
- data/README.rdoc +252 -0
- data/README_DEV.rdoc +285 -0
- data/Rakefile +143 -0
- data/bin/bioruby +0 -0
- data/bin/br_biofetch.rb +0 -0
- data/bin/br_bioflat.rb +12 -1
- data/bin/br_biogetseq.rb +0 -0
- data/bin/br_pmfetch.rb +4 -3
- data/bioruby.gemspec +477 -0
- data/bioruby.gemspec.erb +117 -0
- data/doc/Changes-0.7.rd +7 -0
- data/doc/Changes-1.3.rdoc +239 -0
- data/doc/Tutorial.rd +296 -184
- data/doc/Tutorial.rd.html +1031 -0
- data/doc/Tutorial.rd.ja +111 -45
- data/doc/Tutorial.rd.ja.html +2225 -0
- data/doc/bioruby.css +281 -0
- data/extconf.rb +2 -0
- data/lib/bio.rb +29 -4
- data/lib/bio/appl/blast.rb +306 -121
- data/lib/bio/appl/blast/ddbj.rb +142 -0
- data/lib/bio/appl/blast/format0.rb +35 -25
- data/lib/bio/appl/blast/format8.rb +2 -2
- data/lib/bio/appl/blast/genomenet.rb +263 -0
- data/lib/bio/appl/blast/ncbioptions.rb +220 -0
- data/lib/bio/appl/blast/remote.rb +106 -0
- data/lib/bio/appl/blast/report.rb +260 -9
- data/lib/bio/appl/blast/rexml.rb +12 -5
- data/lib/bio/appl/blast/rpsblast.rb +277 -0
- data/lib/bio/appl/blast/wublast.rb +133 -12
- data/lib/bio/appl/blast/xmlparser.rb +35 -18
- data/lib/bio/appl/blat/report.rb +46 -5
- data/lib/bio/appl/emboss.rb +62 -13
- data/lib/bio/appl/fasta.rb +9 -11
- data/lib/bio/appl/genscan/report.rb +3 -3
- data/lib/bio/appl/hmmer.rb +1 -1
- data/lib/bio/appl/hmmer/report.rb +10 -10
- data/lib/bio/appl/paml/baseml.rb +95 -0
- data/lib/bio/appl/paml/baseml/report.rb +32 -0
- data/lib/bio/appl/paml/codeml.rb +242 -0
- data/lib/bio/appl/paml/codeml/rates.rb +67 -0
- data/lib/bio/appl/paml/codeml/report.rb +67 -0
- data/lib/bio/appl/paml/common.rb +348 -0
- data/lib/bio/appl/paml/common_report.rb +38 -0
- data/lib/bio/appl/paml/yn00.rb +103 -0
- data/lib/bio/appl/paml/yn00/report.rb +32 -0
- data/lib/bio/appl/psort.rb +2 -2
- data/lib/bio/appl/pts1.rb +5 -5
- data/lib/bio/appl/tmhmm/report.rb +10 -1
- data/lib/bio/command.rb +297 -41
- data/lib/bio/compat/features.rb +157 -0
- data/lib/bio/compat/references.rb +128 -0
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +67 -0
- data/lib/bio/db/biosql/sequence.rb +508 -0
- data/lib/bio/db/embl/common.rb +28 -12
- data/lib/bio/db/embl/embl.rb +107 -9
- data/lib/bio/db/embl/embl_to_biosequence.rb +85 -0
- data/lib/bio/db/embl/format_embl.rb +190 -0
- data/lib/bio/db/embl/sptr.rb +15 -16
- data/lib/bio/db/fantom.rb +6 -8
- data/lib/bio/db/fasta.rb +10 -507
- data/lib/bio/db/fasta/defline.rb +532 -0
- data/lib/bio/db/fasta/fasta_to_biosequence.rb +63 -0
- data/lib/bio/db/fasta/format_fasta.rb +97 -0
- data/lib/bio/db/genbank/common.rb +25 -8
- data/lib/bio/db/genbank/format_genbank.rb +187 -0
- data/lib/bio/db/genbank/genbank.rb +36 -1
- data/lib/bio/db/genbank/genbank_to_biosequence.rb +86 -0
- data/lib/bio/db/gff.rb +1791 -119
- data/lib/bio/db/kegg/glycan.rb +2 -6
- data/lib/bio/db/lasergene.rb +3 -3
- data/lib/bio/db/medline.rb +4 -1
- data/lib/bio/db/newick.rb +10 -10
- data/lib/bio/db/pdb/chain.rb +6 -2
- data/lib/bio/db/pdb/pdb.rb +12 -3
- data/lib/bio/db/rebase.rb +7 -8
- data/lib/bio/db/soft.rb +3 -3
- data/lib/bio/feature.rb +1 -88
- data/lib/bio/io/biosql/biodatabase.rb +64 -0
- data/lib/bio/io/biosql/bioentry.rb +29 -0
- data/lib/bio/io/biosql/bioentry_dbxref.rb +11 -0
- data/lib/bio/io/biosql/bioentry_path.rb +12 -0
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +10 -0
- data/lib/bio/io/biosql/bioentry_reference.rb +10 -0
- data/lib/bio/io/biosql/bioentry_relationship.rb +10 -0
- data/lib/bio/io/biosql/biosequence.rb +11 -0
- data/lib/bio/io/biosql/comment.rb +7 -0
- data/lib/bio/io/biosql/config/database.yml +20 -0
- data/lib/bio/io/biosql/dbxref.rb +13 -0
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +12 -0
- data/lib/bio/io/biosql/location.rb +32 -0
- data/lib/bio/io/biosql/location_qualifier_value.rb +11 -0
- data/lib/bio/io/biosql/ontology.rb +10 -0
- data/lib/bio/io/biosql/reference.rb +9 -0
- data/lib/bio/io/biosql/seqfeature.rb +32 -0
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_path.rb +11 -0
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +20 -0
- data/lib/bio/io/biosql/seqfeature_relationship.rb +11 -0
- data/lib/bio/io/biosql/taxon.rb +12 -0
- data/lib/bio/io/biosql/taxon_name.rb +9 -0
- data/lib/bio/io/biosql/term.rb +27 -0
- data/lib/bio/io/biosql/term_dbxref.rb +11 -0
- data/lib/bio/io/biosql/term_path.rb +12 -0
- data/lib/bio/io/biosql/term_relationship.rb +13 -0
- data/lib/bio/io/biosql/term_relationship_term.rb +11 -0
- data/lib/bio/io/biosql/term_synonym.rb +10 -0
- data/lib/bio/io/das.rb +7 -7
- data/lib/bio/io/ddbjxml.rb +57 -0
- data/lib/bio/io/ensembl.rb +2 -2
- data/lib/bio/io/fetch.rb +28 -14
- data/lib/bio/io/flatfile.rb +17 -853
- data/lib/bio/io/flatfile/autodetection.rb +545 -0
- data/lib/bio/io/flatfile/buffer.rb +237 -0
- data/lib/bio/io/flatfile/index.rb +17 -7
- data/lib/bio/io/flatfile/indexer.rb +30 -12
- data/lib/bio/io/flatfile/splitter.rb +297 -0
- data/lib/bio/io/hinv.rb +442 -0
- data/lib/bio/io/keggapi.rb +2 -2
- data/lib/bio/io/ncbirest.rb +733 -0
- data/lib/bio/io/pubmed.rb +34 -80
- data/lib/bio/io/registry.rb +2 -2
- data/lib/bio/io/sql.rb +178 -357
- data/lib/bio/io/togows.rb +458 -0
- data/lib/bio/location.rb +106 -11
- data/lib/bio/pathway.rb +120 -14
- data/lib/bio/reference.rb +115 -101
- data/lib/bio/sequence.rb +164 -183
- data/lib/bio/sequence/adapter.rb +108 -0
- data/lib/bio/sequence/common.rb +22 -45
- data/lib/bio/sequence/compat.rb +2 -2
- data/lib/bio/sequence/dblink.rb +54 -0
- data/lib/bio/sequence/format.rb +254 -77
- data/lib/bio/sequence/format_raw.rb +23 -0
- data/lib/bio/shell.rb +3 -1
- data/lib/bio/shell/core.rb +2 -2
- data/lib/bio/shell/plugin/entry.rb +33 -4
- data/lib/bio/shell/plugin/ncbirest.rb +64 -0
- data/lib/bio/shell/plugin/togows.rb +40 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/bioruby_generator.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_classes.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_log.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_methods.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_modules.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/_variables.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-bg.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-gem.png +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby-link.gif +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.css +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_controller.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/bioruby_helper.rb +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/commands.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/history.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/index.rhtml +0 -0
- data/lib/bio/shell/rails/vendor/plugins/{generators → bioruby/generators}/bioruby/templates/spinner.gif +0 -0
- data/lib/bio/tree.rb +4 -2
- data/lib/bio/util/color_scheme.rb +2 -2
- data/lib/bio/util/contingency_table.rb +2 -2
- data/lib/bio/util/restriction_enzyme.rb +2 -2
- data/lib/bio/util/restriction_enzyme/single_strand.rb +6 -5
- data/lib/bio/version.rb +25 -0
- data/rdoc.zsh +8 -0
- data/sample/any2fasta.rb +0 -0
- data/sample/biofetch.rb +0 -0
- data/sample/dbget +0 -0
- data/sample/demo_sequence.rb +158 -0
- data/sample/enzymes.rb +0 -0
- data/sample/fasta2tab.rb +0 -0
- data/sample/fastagrep.rb +72 -0
- data/sample/fastasort.rb +54 -0
- data/sample/fsplit.rb +0 -0
- data/sample/gb2fasta.rb +2 -3
- data/sample/gb2tab.rb +0 -0
- data/sample/gbtab2mysql.rb +0 -0
- data/sample/genes2nuc.rb +0 -0
- data/sample/genes2pep.rb +0 -0
- data/sample/genes2tab.rb +0 -0
- data/sample/genome2rb.rb +0 -0
- data/sample/genome2tab.rb +0 -0
- data/sample/goslim.rb +0 -0
- data/sample/gt2fasta.rb +0 -0
- data/sample/na2aa.rb +34 -0
- data/sample/pmfetch.rb +0 -0
- data/sample/pmsearch.rb +0 -0
- data/sample/ssearch2tab.rb +0 -0
- data/sample/tfastx2tab.rb +0 -0
- data/sample/vs-genes.rb +0 -0
- data/setup.rb +1596 -0
- data/test/data/blast/blastp-multi.m7 +188 -0
- data/test/data/command/echoarg2.bat +1 -0
- data/test/data/paml/codeml/control_file.txt +30 -0
- data/test/data/paml/codeml/output.txt +78 -0
- data/test/data/paml/codeml/rates +217 -0
- data/test/data/rpsblast/misc.rpsblast +193 -0
- data/test/data/soft/GDS100_partial.soft +0 -0
- data/test/data/soft/GSE3457_family_partial.soft +0 -0
- data/test/functional/bio/appl/test_pts1.rb +115 -0
- data/test/functional/bio/io/test_ensembl.rb +123 -80
- data/test/functional/bio/io/test_togows.rb +267 -0
- data/test/functional/bio/sequence/test_output_embl.rb +51 -0
- data/test/functional/bio/test_command.rb +301 -0
- data/test/runner.rb +17 -1
- data/test/unit/bio/appl/blast/test_ncbioptions.rb +112 -0
- data/test/unit/bio/appl/blast/test_report.rb +753 -35
- data/test/unit/bio/appl/blast/test_rpsblast.rb +398 -0
- data/test/unit/bio/appl/paml/codeml/test_rates.rb +45 -0
- data/test/unit/bio/appl/paml/codeml/test_report.rb +45 -0
- data/test/unit/bio/appl/paml/test_codeml.rb +174 -0
- data/test/unit/bio/appl/test_blast.rb +135 -4
- data/test/unit/bio/appl/test_fasta.rb +2 -2
- data/test/unit/bio/appl/test_pts1.rb +1 -64
- data/test/unit/bio/db/embl/test_common.rb +15 -15
- data/test/unit/bio/db/embl/test_embl.rb +4 -4
- data/test/unit/bio/db/embl/test_embl_rel89.rb +5 -5
- data/test/unit/bio/db/embl/test_embl_to_bioseq.rb +203 -0
- data/test/unit/bio/db/embl/test_sptr.rb +38 -1
- data/test/unit/bio/db/pdb/test_pdb.rb +2 -2
- data/test/unit/bio/db/test_gff.rb +1151 -25
- data/test/unit/bio/db/test_medline.rb +127 -0
- data/test/unit/bio/db/test_nexus.rb +5 -1
- data/test/unit/bio/db/test_prosite.rb +4 -4
- data/test/unit/bio/io/flatfile/test_autodetection.rb +375 -0
- data/test/unit/bio/io/flatfile/test_buffer.rb +251 -0
- data/test/unit/bio/io/flatfile/test_splitter.rb +369 -0
- data/test/unit/bio/io/test_ddbjxml.rb +8 -3
- data/test/unit/bio/io/test_fastacmd.rb +5 -5
- data/test/unit/bio/io/test_flatfile.rb +357 -106
- data/test/unit/bio/io/test_soapwsdl.rb +2 -2
- data/test/unit/bio/io/test_togows.rb +161 -0
- data/test/unit/bio/sequence/test_common.rb +210 -11
- data/test/unit/bio/sequence/test_compat.rb +3 -3
- data/test/unit/bio/sequence/test_dblink.rb +58 -0
- data/test/unit/bio/sequence/test_na.rb +2 -2
- data/test/unit/bio/test_command.rb +111 -50
- data/test/unit/bio/test_feature.rb +29 -1
- data/test/unit/bio/test_location.rb +566 -6
- data/test/unit/bio/test_pathway.rb +91 -65
- data/test/unit/bio/test_reference.rb +67 -13
- data/test/unit/bio/util/restriction_enzyme/analysis/test_calculated_cuts.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_cut_ranges.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/analysis/test_sequence_range.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_aligned_strands.rb +4 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_location_pair_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/double_stranded/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/single_strand/test_cut_locations_in_enzyme_notation.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_analysis.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_cut_symbol.rb +4 -4
- data/test/unit/bio/util/restriction_enzyme/test_double_stranded.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_single_strand_complement.rb +3 -3
- data/test/unit/bio/util/restriction_enzyme/test_string_formatting.rb +3 -3
- data/test/unit/bio/util/test_restriction_enzyme.rb +3 -3
- metadata +202 -167
- data/test/unit/bio/appl/blast/test_xmlparser.rb +0 -388
@@ -0,0 +1,193 @@
|
|
1
|
+
RPS-BLAST 2.2.18 [Mar-02-2008]
|
2
|
+
|
3
|
+
Database: Pfam.v.22.0
|
4
|
+
9318 sequences; 1,769,994 total letters
|
5
|
+
|
6
|
+
Searching..................................................done
|
7
|
+
|
8
|
+
Query= TestSequence mixture of globin and rhodopsin (computationally
|
9
|
+
randomly concatenated)
|
10
|
+
(495 letters)
|
11
|
+
|
12
|
+
|
13
|
+
|
14
|
+
Score E
|
15
|
+
Sequences producing significant alignments: (bits) Value
|
16
|
+
|
17
|
+
gnl|CDD|84466 pfam00042, Globin, Globin.. 110 2e-25
|
18
|
+
gnl|CDD|84429 pfam00001, 7tm_1, 7 transmembrane receptor (rhodop... 91 2e-19
|
19
|
+
gnl|CDD|87195 pfam06976, DUF1300, Protein of unknown function (D... 37 0.003
|
20
|
+
|
21
|
+
>gnl|CDD|84466 pfam00042, Globin, Globin..
|
22
|
+
Length = 110
|
23
|
+
|
24
|
+
Score = 110 bits (277), Expect = 2e-25
|
25
|
+
Identities = 50/110 (45%), Positives = 69/110 (62%), Gaps = 5/110 (4%)
|
26
|
+
|
27
|
+
Query: 148 EKQLITGLWGKV--NVAECGAEALARLLIVYPWTQRFFASFGNLSSPTAILGNPMVRAHG 205
|
28
|
+
+K L+ WGKV N E GAE LARL YP T+ +F FG+LS+ A+ +P +AHG
|
29
|
+
Sbjct: 1 QKALVKASWGKVKGNAPEIGAEILARLFTAYPDTKAYFPKFGDLSTAEALKSSPKFKAHG 60
|
30
|
+
|
31
|
+
Query: 206 KKVLTSFGDAVKNLDN---IKNTFSQLSELHCDKLHVDPENFRLLGDILI 252
|
32
|
+
KKVL + G+AVK+LD+ +K +L H + HVDP NF+L G+ L+
|
33
|
+
Sbjct: 61 KKVLAALGEAVKHLDDDGNLKAALKKLGARHAKRGHVDPANFKLFGEALL 110
|
34
|
+
|
35
|
+
|
36
|
+
>gnl|CDD|84429 pfam00001, 7tm_1, 7 transmembrane receptor (rhodopsin family). This
|
37
|
+
family contains, amongst other G-protein-coupled
|
38
|
+
receptors (GCPRs), members of the opsin family, which
|
39
|
+
have been considered to be typical members of the
|
40
|
+
rhodopsin superfamily. They share several motifs, mainly
|
41
|
+
the seven transmembrane helices, GCPRs of the rhodopsin
|
42
|
+
superfamily. All opsins bind a chromophore, such as
|
43
|
+
11-cis-retinal. The function of most opsins other than
|
44
|
+
the photoisomerases is split into two steps: light
|
45
|
+
absorption and G-protein activation. Photoisomerases, on
|
46
|
+
the other hand, are not coupled to G-proteins - they are
|
47
|
+
thought to generate and supply the chromophore that is
|
48
|
+
used by visual opsins..
|
49
|
+
Length = 258
|
50
|
+
|
51
|
+
Score = 90.8 bits (225), Expect = 2e-19
|
52
|
+
Identities = 37/162 (22%), Positives = 76/162 (46%), Gaps = 10/162 (6%)
|
53
|
+
|
54
|
+
Query: 299 HAIMGVAFTWVMALACAAPPLAGWSRY-IPEGLQCSCGIDYYTLKPEVNNESFVIYMFVV 357
|
55
|
+
A + + WV+AL + PPL + EG +C ID+ S+ + ++
|
56
|
+
Sbjct: 100 RAKVLILLVWVLALLLSLPPLLFSWLRTVEEGNVTTCLIDFPEESLLR---SYTLLSTLL 156
|
57
|
+
|
58
|
+
Query: 358 HFTIPMIIIFFCYGQLVFTV----KEAAAQQQESATTQKAEKEVTRMVIIMVIAFLICWV 413
|
59
|
+
F +P+++I CY +++ T+ + A+ + +E++ +M++++V+ F++CW+
|
60
|
+
Sbjct: 157 GFVLPLLVILVCYTRILRTLRRRARSGASIARSLKRRSSSERKAAKMLLVVVVVFVLCWL 216
|
61
|
+
|
62
|
+
Query: 414 PYASVAFY--IFTHQGSNFGPIFMTIPAFFAKSAAIYNPVIY 453
|
63
|
+
PY V + P + I + A + NP+IY
|
64
|
+
Sbjct: 217 PYHIVLLLDSLCLLSIIRVLPTALLITLWLAYVNSCLNPIIY 258
|
65
|
+
|
66
|
+
|
67
|
+
|
68
|
+
Score = 73.4 bits (180), Expect = 3e-14
|
69
|
+
Identities = 32/86 (37%), Positives = 47/86 (54%)
|
70
|
+
|
71
|
+
Query: 55 NFLTLYVTVQHKKLRTPLNYILLNLAVADLFMVLGGFTSTLYTSLHGYFVFGPTGCNLEG 114
|
72
|
+
N L + V ++ K+LRTP N LLNLAVADL +L LY + G + FG C L G
|
73
|
+
Sbjct: 2 NLLVILVILRTKRLRTPTNIFLLNLAVADLLFLLTLPPWALYYLVGGDWPFGDALCKLVG 61
|
74
|
+
|
75
|
+
Query: 115 FFATLGGEIALWSLVVLAIERYVVVC 140
|
76
|
+
+ G ++ L ++I+RY+ +
|
77
|
+
Sbjct: 62 ALFVVNGYASILLLTAISIDRYLAIV 87
|
78
|
+
|
79
|
+
|
80
|
+
>gnl|CDD|87195 pfam06976, DUF1300, Protein of unknown function (DUF1300). This
|
81
|
+
family represents a conserved region approximately 80
|
82
|
+
residues long within a number of proteins of unknown
|
83
|
+
function that seem to be specific to C. elegans. Some
|
84
|
+
family members contain more than one copy of this
|
85
|
+
region..
|
86
|
+
Length = 336
|
87
|
+
|
88
|
+
Score = 37.1 bits (86), Expect = 0.003
|
89
|
+
Identities = 32/145 (22%), Positives = 58/145 (40%), Gaps = 7/145 (4%)
|
90
|
+
|
91
|
+
Query: 336 IDYYTLKPEVNNESFVIYMFV--VHFT-IPMIIIFFCYGQLVFTVKEAAAQQQESATTQK 392
|
92
|
+
I+Y E+ S+ I + + + F IP II+ L+F +K+ S+T+
|
93
|
+
Sbjct: 192 IEYIIETTELFGSSYEILLLIEGILFKLIPSIILPIATILLIFQLKKNKKVSSRSSTSSS 251
|
94
|
+
|
95
|
+
Query: 393 AEKEVTRMVIIMVIAFLICWVPYASVAFYIFTHQGSNFGPIFMTIPAFFAKSAAIYNPVI 452
|
96
|
+
+ T++V + I+FLI VP + F + + A + N I
|
97
|
+
Sbjct: 252 SNDRSTKLVTFVTISFLIATVPLGILYLIKFFVFEYEGLVMIIDKLAIIFTFLSTINGTI 311
|
98
|
+
|
99
|
+
Query: 453 YIM----MNKQFRNCMLTTICCGKN 473
|
100
|
+
+ + M+ Q+RN + K
|
101
|
+
Sbjct: 312 HFLICYFMSSQYRNTVREMFGRKKK 336
|
102
|
+
|
103
|
+
|
104
|
+
Query= randomseq3
|
105
|
+
(1087 letters)
|
106
|
+
|
107
|
+
***** No hits found ******
|
108
|
+
|
109
|
+
|
110
|
+
Query= gi|6013469|gb|AAD49229.2|AF159462_1 EHEC factor for adherence
|
111
|
+
[Escherichia coli]
|
112
|
+
(3223 letters)
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
Score E
|
117
|
+
Sequences producing significant alignments: (bits) Value
|
118
|
+
|
119
|
+
gnl|CDD|86672 pfam04488, Gly_transf_sug, Glycosyltransferase sug... 84 1e-16
|
120
|
+
gnl|CDD|84583 pfam00175, NAD_binding_1, Oxidoreductase NAD-bindi... 37 0.019
|
121
|
+
|
122
|
+
>gnl|CDD|86672 pfam04488, Gly_transf_sug, Glycosyltransferase sugar-binding region
|
123
|
+
containing DXD motif. The DXD motif is a short conserved
|
124
|
+
motif found in many families of glycosyltransferases,
|
125
|
+
which add a range of different sugars to other sugars,
|
126
|
+
phosphates and proteins. DXD-containing
|
127
|
+
glycosyltransferases all use nucleoside diphosphate
|
128
|
+
sugars as donors and require divalent cations, usually
|
129
|
+
manganese. The DXD motif is expected to play a
|
130
|
+
carbohydrate binding role in sugar-nucleoside
|
131
|
+
diphosphate and manganese dependent
|
132
|
+
glycosyltransferases..
|
133
|
+
Length = 86
|
134
|
+
|
135
|
+
Score = 84.2 bits (208), Expect = 1e-16
|
136
|
+
Identities = 33/85 (38%), Positives = 40/85 (47%), Gaps = 2/85 (2%)
|
137
|
+
|
138
|
+
Query: 505 RISIKDVNSLTSLSKSENNHNYQTEMLLRWNYPAA-SDLLRMYILKEHGGIYTDTDMMPA 563
|
139
|
+
I L SL N + + EM LRW Y AA SD LR IL ++GGIY DTD++P
|
140
|
+
Sbjct: 1 YDVILVTPDLESLFIDTNAYPWFQEMFLRWPYNAAASDFLRYAILYKYGGIYLDTDVIPL 60
|
141
|
+
|
142
|
+
Query: 564 YSKQVIFKIMMQTN-GDNRFLEDLK 587
|
143
|
+
S V+ I R E L
|
144
|
+
Sbjct: 61 KSLDVLINIEGSNFLDGERSFERLN 85
|
145
|
+
|
146
|
+
|
147
|
+
>gnl|CDD|84583 pfam00175, NAD_binding_1, Oxidoreductase NAD-binding domain. Xanthine
|
148
|
+
dehydrogenases, that also bind FAD/NAD, have essentially
|
149
|
+
no similarity..
|
150
|
+
Length = 110
|
151
|
+
|
152
|
+
Score = 37.2 bits (86), Expect = 0.019
|
153
|
+
Identities = 16/82 (19%), Positives = 36/82 (43%), Gaps = 3/82 (3%)
|
154
|
+
|
155
|
+
Query: 959 IKGFLASNPHTKINILYSNKTEHNIFIKDLFSFAVMENELRDIINNMSKDKTPENWEGRV 1018
|
156
|
+
+K L T++ ++Y N+TE ++ +++ + R + + T + W GR
|
157
|
+
Sbjct: 16 LKALLEDEDGTEVYLVYGNRTEDDLLLREELEELAKKYPGRLKVVAVVSR-TDDGWYGRK 74
|
158
|
+
|
159
|
+
Query: 1019 MLQRYLELKMKDHLSLQSSQEA 1040
|
160
|
+
+ +++HLSL +
|
161
|
+
Sbjct: 75 G--YVTDALLEEHLSLIDLDDT 94
|
162
|
+
|
163
|
+
|
164
|
+
Database: Pfam.v.22.0
|
165
|
+
Posted date: Nov 8, 2007 6:06 PM
|
166
|
+
Number of letters in database: 1,769,994
|
167
|
+
Number of sequences in database: 9318
|
168
|
+
|
169
|
+
Lambda K H
|
170
|
+
0.327 0.139 0.439
|
171
|
+
|
172
|
+
Gapped
|
173
|
+
Lambda K H
|
174
|
+
0.267 0.0632 0.140
|
175
|
+
|
176
|
+
|
177
|
+
Matrix: BLOSUM62
|
178
|
+
Gap Penalties: Existence: 11, Extension: 1
|
179
|
+
Number of Sequences: 9318
|
180
|
+
Number of Hits to DB: 28,279,060
|
181
|
+
Number of extensions: 2147710
|
182
|
+
Number of successful extensions: 3028
|
183
|
+
Number of sequences better than 2.0e-02: 3
|
184
|
+
Number of HSP's gapped: 3016
|
185
|
+
Number of HSP's successfully gapped: 20
|
186
|
+
Length of database: 1,769,994
|
187
|
+
Neighboring words threshold: 11
|
188
|
+
Window for multiple hits: 40
|
189
|
+
X1: 15 ( 7.1 bits)
|
190
|
+
X2: 38 (14.6 bits)
|
191
|
+
X3: 64 (24.7 bits)
|
192
|
+
S1: 40 (21.7 bits)
|
193
|
+
S2: 77 (33.6 bits)
|
File without changes
|
File without changes
|
@@ -0,0 +1,115 @@
|
|
1
|
+
#
|
2
|
+
# = test/functional/bio/appl/test_pts1.rb - Unit test for Bio::PTS1 with network connection
|
3
|
+
#
|
4
|
+
# Copyright:: Copyright (C) 2006
|
5
|
+
# Mitsuteru Nakao <n@bioruby.org>
|
6
|
+
# License:: The Ruby License
|
7
|
+
#
|
8
|
+
# $Id:$
|
9
|
+
#
|
10
|
+
|
11
|
+
require 'pathname'
|
12
|
+
libpath = Pathname.new(File.join(File.dirname(__FILE__), ['..'] * 4, 'lib')).cleanpath.to_s
|
13
|
+
$:.unshift(libpath) unless $:.include?(libpath)
|
14
|
+
|
15
|
+
require 'test/unit'
|
16
|
+
require 'bio/appl/pts1'
|
17
|
+
|
18
|
+
|
19
|
+
module Bio
|
20
|
+
|
21
|
+
class FuncTestPTS1 < Test::Unit::TestCase
|
22
|
+
|
23
|
+
def setup
|
24
|
+
@seq =<<END
|
25
|
+
>AB000464
|
26
|
+
MRTGGDNAGPSHSHIKRLPTSGLSTWLQGTQTCVLHLPTGTRPPAHHPLLGYSSRRSYRL
|
27
|
+
LENPAAGCWARFSFCQGAAWDWDLEGVQWLRALAGGVSTAPSAPPGNLVFLSVSIFLCGS
|
28
|
+
LLLETCPAYFSSLDPD*
|
29
|
+
END
|
30
|
+
@serv = Bio::PTS1.new
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
def test_function_set
|
35
|
+
@serv.function("GENERAL")
|
36
|
+
assert_equal("GENERAL", @serv.function)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_function_show
|
40
|
+
assert_equal("METAZOA-specific", @serv.function)
|
41
|
+
end
|
42
|
+
|
43
|
+
def test_function_set_number_1
|
44
|
+
@serv.function(1)
|
45
|
+
assert_equal("METAZOA-specific", @serv.function)
|
46
|
+
end
|
47
|
+
|
48
|
+
def test_function_set_number_2
|
49
|
+
@serv.function(2)
|
50
|
+
assert_equal("FUNGI-specific", @serv.function)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_function_set_number_3
|
54
|
+
@serv.function(3)
|
55
|
+
assert_equal("GENERAL", @serv.function)
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def test_exec
|
60
|
+
report = @serv.exec(@seq)
|
61
|
+
assert_equal(Bio::PTS1::Report, report.class)
|
62
|
+
end
|
63
|
+
|
64
|
+
def test_exec_with_faa
|
65
|
+
report = @serv.exec(Bio::FastaFormat.new(@seq))
|
66
|
+
assert_equal(Bio::PTS1::Report, report.class)
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
class FuncTestPTS1Report < Test::Unit::TestCase
|
72
|
+
def setup
|
73
|
+
serv = Bio::PTS1.new
|
74
|
+
seq = ">hoge\nAVSFLSMRRARL\n"
|
75
|
+
@report = serv.exec(seq)
|
76
|
+
end
|
77
|
+
|
78
|
+
|
79
|
+
def test_output_size
|
80
|
+
assert_equal(1634, @report.output.size)
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_entry_id
|
84
|
+
assert_equal("hoge", @report.entry_id)
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_prediction
|
88
|
+
assert_equal("Targeted", @report.prediction)
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_cterm
|
92
|
+
assert_equal("AVSFLSMRRARL", @report.cterm)
|
93
|
+
end
|
94
|
+
|
95
|
+
def test_score
|
96
|
+
assert_equal("7.559", @report.score)
|
97
|
+
end
|
98
|
+
|
99
|
+
def test_fp
|
100
|
+
assert_equal("2.5e-04", @report.fp)
|
101
|
+
end
|
102
|
+
|
103
|
+
def test_sppta
|
104
|
+
assert_equal("-5.833", @report.sppta)
|
105
|
+
end
|
106
|
+
|
107
|
+
def test_spptna
|
108
|
+
assert_equal("-1.698", @report.spptna)
|
109
|
+
end
|
110
|
+
|
111
|
+
def test_profile
|
112
|
+
assert_equal("15.091", @report.profile)
|
113
|
+
end
|
114
|
+
end
|
115
|
+
end
|
@@ -5,7 +5,7 @@
|
|
5
5
|
# Mitsuteru C. Nakao <n@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id
|
8
|
+
# $Id:$
|
9
9
|
#
|
10
10
|
|
11
11
|
require 'pathname'
|
@@ -40,6 +40,13 @@ class FuncTestEnsemblHuman < Test::Unit::TestCase
|
|
40
40
|
def test_server
|
41
41
|
assert_equal("http://www.ensembl.org", @serv.server)
|
42
42
|
end
|
43
|
+
end
|
44
|
+
|
45
|
+
class FuncTestEnsemblHumanExportView < Test::Unit::TestCase
|
46
|
+
def setup
|
47
|
+
@serv = Bio::Ensembl.new('Homo_sapiens',
|
48
|
+
'http://jul2008.archive.ensembl.org')
|
49
|
+
end
|
43
50
|
|
44
51
|
def test_fna_exportview
|
45
52
|
seq = ">4 dna:chromosome chromosome:NCBI36:4:1149206:1149209:1\nGAGA\n"
|
@@ -72,82 +79,80 @@ class FuncTestEnsemblHuman < Test::Unit::TestCase
|
|
72
79
|
assert_equal(fna10, fna)
|
73
80
|
end
|
74
81
|
|
75
|
-
def
|
76
|
-
line = ["chromosome:NCBI36:4:1149206:1149209:1",
|
77
|
-
"Ensembl",
|
78
|
-
"Gene",
|
79
|
-
"-839",
|
80
|
-
"2747",
|
81
|
-
".",
|
82
|
-
"+",
|
83
|
-
".",
|
84
|
-
"gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding\n"].join("\t") + "\n"
|
85
|
-
line = ["4",
|
86
|
-
"Ensembl",
|
87
|
-
"Gene",
|
88
|
-
"1148366",
|
89
|
-
"1151952",
|
90
|
-
".",
|
91
|
-
"+",
|
92
|
-
"1",
|
93
|
-
"gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
|
82
|
+
def test_gff_exportview_for_empty_result
|
94
83
|
gff = @serv.exportview(4, 1149206, 1149209, ['gene'])
|
84
|
+
assert_equal('', gff)
|
85
|
+
end
|
86
|
+
|
87
|
+
def test_gff_exportview
|
88
|
+
# OR1A1 (Olfactory receptor 1A1)
|
89
|
+
lines = [ [ "17",
|
90
|
+
"Ensembl",
|
91
|
+
"Gene",
|
92
|
+
"3065665",
|
93
|
+
"3066594",
|
94
|
+
".",
|
95
|
+
"+",
|
96
|
+
"1",
|
97
|
+
"gene_id=ENSG00000172146; transcript_id=ENST00000304094; exon_id=ENSE00001137815; gene_type=KNOWN_protein_coding"
|
98
|
+
],
|
99
|
+
[ "17",
|
100
|
+
"Vega",
|
101
|
+
"Gene",
|
102
|
+
"3065665",
|
103
|
+
"3066594",
|
104
|
+
".",
|
105
|
+
"+",
|
106
|
+
"1",
|
107
|
+
"gene_id=OTTHUMG00000090637; transcript_id=OTTHUMT00000207292; exon_id=OTTHUME00001080001; gene_type=KNOWN_protein_coding"
|
108
|
+
]
|
109
|
+
]
|
110
|
+
line = lines.collect { |x| x.join("\t") + "\n" }.join('')
|
111
|
+
gff = @serv.exportview(17, 3065665, 3066594, ['gene'])
|
95
112
|
assert_equal(line, gff)
|
96
113
|
end
|
97
114
|
|
98
|
-
def
|
99
|
-
line = ["chromosome:NCBI36:4:1149206:1149209:1",
|
100
|
-
"Ensembl",
|
101
|
-
"Gene",
|
102
|
-
"-839",
|
103
|
-
"2747",
|
104
|
-
".",
|
105
|
-
"+",
|
106
|
-
".",
|
107
|
-
"gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
|
108
|
-
line = ["4",
|
109
|
-
"Ensembl",
|
110
|
-
"Gene",
|
111
|
-
"1148366",
|
112
|
-
"1151952",
|
113
|
-
".",
|
114
|
-
"+",
|
115
|
-
"1",
|
116
|
-
"gene_id=ENSG00000206158; transcript_id=ENST00000382964; exon_id=ENSE00001494097; gene_type=KNOWN_protein_coding"].join("\t") + "\n"
|
115
|
+
def test_gff_exportview_with_named_args_for_empty_result
|
117
116
|
gff = @serv.exportview(:seq_region_name => 4,
|
118
117
|
:anchor1 => 1149206,
|
119
118
|
:anchor2 => 1149209,
|
120
119
|
:options => ['gene'])
|
120
|
+
assert_equal('', gff)
|
121
|
+
end
|
122
|
+
|
123
|
+
def test_gff_exportview_with_named_args
|
124
|
+
# OR1A1 (Olfactory receptor 1A1)
|
125
|
+
lines = [ [ "17",
|
126
|
+
"Ensembl",
|
127
|
+
"Gene",
|
128
|
+
"3065665",
|
129
|
+
"3066594",
|
130
|
+
".",
|
131
|
+
"+",
|
132
|
+
"1",
|
133
|
+
"gene_id=ENSG00000172146; transcript_id=ENST00000304094; exon_id=ENSE00001137815; gene_type=KNOWN_protein_coding"
|
134
|
+
],
|
135
|
+
[ "17",
|
136
|
+
"Vega",
|
137
|
+
"Gene",
|
138
|
+
"3065665",
|
139
|
+
"3066594",
|
140
|
+
".",
|
141
|
+
"+",
|
142
|
+
"1",
|
143
|
+
"gene_id=OTTHUMG00000090637; transcript_id=OTTHUMT00000207292; exon_id=OTTHUME00001080001; gene_type=KNOWN_protein_coding"
|
144
|
+
]
|
145
|
+
]
|
146
|
+
line = lines.collect { |x| x.join("\t") + "\n" }.join('')
|
147
|
+
gff = @serv.exportview(:seq_region_name => 17,
|
148
|
+
:anchor1 => 3065665,
|
149
|
+
:anchor2 => 3066594,
|
150
|
+
:options => ['gene'])
|
121
151
|
assert_equal(line, gff)
|
122
152
|
end
|
123
153
|
|
124
|
-
def
|
125
|
-
line = [
|
126
|
-
"source",
|
127
|
-
"feature",
|
128
|
-
"start",
|
129
|
-
"end",
|
130
|
-
"score",
|
131
|
-
"strand",
|
132
|
-
"frame",
|
133
|
-
"gene_id",
|
134
|
-
"transcript_id",
|
135
|
-
"exon_id",
|
136
|
-
"gene_type"].join("\t"),
|
137
|
-
["chromosome:NCBI36:4:1149206:1149209:1",
|
138
|
-
"Ensembl",
|
139
|
-
"Gene",
|
140
|
-
"-839",
|
141
|
-
"2747",
|
142
|
-
".",
|
143
|
-
"+",
|
144
|
-
".",
|
145
|
-
"ENSG00000206158",
|
146
|
-
"ENST00000382964",
|
147
|
-
"ENSE00001494097",
|
148
|
-
"KNOWN_protein_coding"].join("\t") + "\n"
|
149
|
-
].join("\n")
|
150
|
-
line = [["seqname",
|
154
|
+
def test_tab_exportview_with_named_args_for_empty_result
|
155
|
+
line = ["seqname",
|
151
156
|
"source",
|
152
157
|
"feature",
|
153
158
|
"start",
|
@@ -158,20 +163,7 @@ class FuncTestEnsemblHuman < Test::Unit::TestCase
|
|
158
163
|
"gene_id",
|
159
164
|
"transcript_id",
|
160
165
|
"exon_id",
|
161
|
-
"gene_type"].join("\t")
|
162
|
-
["4",
|
163
|
-
"Ensembl",
|
164
|
-
"Gene",
|
165
|
-
"1148366",
|
166
|
-
"1151952",
|
167
|
-
".",
|
168
|
-
"+",
|
169
|
-
"1",
|
170
|
-
"ENSG00000206158",
|
171
|
-
"ENST00000382964",
|
172
|
-
"ENSE00001494097",
|
173
|
-
"KNOWN_protein_coding"].join("\t") + "\n"
|
174
|
-
].join("\n")
|
166
|
+
"gene_type"].join("\t") + "\n"
|
175
167
|
gff = @serv.exportview(:seq_region_name => 4,
|
176
168
|
:anchor1 => 1149206,
|
177
169
|
:anchor2 => 1149209,
|
@@ -180,6 +172,57 @@ class FuncTestEnsemblHuman < Test::Unit::TestCase
|
|
180
172
|
assert_equal(line, gff)
|
181
173
|
end
|
182
174
|
|
175
|
+
def test_tab_exportview_with_named_args
|
176
|
+
# OR1A1 (Olfactory receptor 1A1)
|
177
|
+
lines = [ [ "seqname",
|
178
|
+
"source",
|
179
|
+
"feature",
|
180
|
+
"start",
|
181
|
+
"end",
|
182
|
+
"score",
|
183
|
+
"strand",
|
184
|
+
"frame",
|
185
|
+
"gene_id",
|
186
|
+
"transcript_id",
|
187
|
+
"exon_id",
|
188
|
+
"gene_type"
|
189
|
+
],
|
190
|
+
[ "17",
|
191
|
+
"Ensembl",
|
192
|
+
"Gene",
|
193
|
+
"3065665",
|
194
|
+
"3066594",
|
195
|
+
".",
|
196
|
+
"+",
|
197
|
+
"1",
|
198
|
+
"ENSG00000172146",
|
199
|
+
"ENST00000304094",
|
200
|
+
"ENSE00001137815",
|
201
|
+
"KNOWN_protein_coding"
|
202
|
+
],
|
203
|
+
[ "17",
|
204
|
+
"Vega",
|
205
|
+
"Gene",
|
206
|
+
"3065665",
|
207
|
+
"3066594",
|
208
|
+
".",
|
209
|
+
"+",
|
210
|
+
"1",
|
211
|
+
"OTTHUMG00000090637",
|
212
|
+
"OTTHUMT00000207292",
|
213
|
+
"OTTHUME00001080001",
|
214
|
+
"KNOWN_protein_coding"
|
215
|
+
]
|
216
|
+
]
|
217
|
+
line = lines.collect { |x| x.join("\t") + "\n" }.join('')
|
218
|
+
gff = @serv.exportview(:seq_region_name => 17,
|
219
|
+
:anchor1 => 3065665,
|
220
|
+
:anchor2 => 3066594,
|
221
|
+
:options => ['gene'],
|
222
|
+
:format => 'tab')
|
223
|
+
assert_equal(line, gff)
|
224
|
+
end
|
225
|
+
|
183
226
|
|
184
227
|
end
|
185
228
|
|