bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
data/sample/tdiary.rb
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
#
|
|
2
|
+
# tDiary : plugin/bio.rb
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2003 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
# Mitsuteru C. Nakao <n@bioruby.org>
|
|
6
|
+
# Itoshi NIKAIDO <itoshi@gsc.riken.go.jp>
|
|
7
|
+
# Takeya KASUKAWA <kasukawa@gsc.riken.go.jp>
|
|
8
|
+
#
|
|
9
|
+
# This library is free software; you can redistribute it and/or
|
|
10
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
11
|
+
# License as published by the Free Software Foundation; either
|
|
12
|
+
# version 2 of the License, or (at your option) any later version.
|
|
13
|
+
#
|
|
14
|
+
# This library is distributed in the hope that it will be useful,
|
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
17
|
+
# Lesser General Public License for more details.
|
|
18
|
+
#
|
|
19
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
20
|
+
# License along with this library; if not, write to the Free Software
|
|
21
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
22
|
+
#
|
|
23
|
+
# $Id: tdiary.rb,v 1.3 2003/03/17 04:24:47 k Exp $
|
|
24
|
+
#
|
|
25
|
+
|
|
26
|
+
=begin
|
|
27
|
+
|
|
28
|
+
== What's this?
|
|
29
|
+
|
|
30
|
+
This is a plugin for the ((<tDiary|URL:http://www.tdiary.org/>)) to create
|
|
31
|
+
various links for biological resources from your diary.
|
|
32
|
+
|
|
33
|
+
tDiary is an extensible web diary application written in Ruby.
|
|
34
|
+
|
|
35
|
+
== How to install
|
|
36
|
+
|
|
37
|
+
Just copy this file under the tDiary's plugin directory as bio.rb.
|
|
38
|
+
|
|
39
|
+
== Usage
|
|
40
|
+
|
|
41
|
+
--- pubmed(pmid, comment = nil)
|
|
42
|
+
|
|
43
|
+
Create a link to NCBI Entrez reference database by using PubMed ID.
|
|
44
|
+
See ((<URL:http://www.ncbi.nlm.nih.gov/entrez/query.fcgi>)) for more
|
|
45
|
+
information.
|
|
46
|
+
|
|
47
|
+
* tDiary style
|
|
48
|
+
* <%= pubmed 12345 %>
|
|
49
|
+
* <%= pubmed 12345, 'hogehoge' %>
|
|
50
|
+
* RD style
|
|
51
|
+
* ((% pubmed 12345 %))
|
|
52
|
+
* ((% pubmed 12345, 'hogehoge' %))
|
|
53
|
+
|
|
54
|
+
--- biofetch(db, entry_id)
|
|
55
|
+
|
|
56
|
+
Create a link to the BioFetch detabase entry retrieval system.
|
|
57
|
+
See ((<URL:http://biofetch.bioruby.org/>)) for more information.
|
|
58
|
+
|
|
59
|
+
* tDiary style
|
|
60
|
+
* <%= biofetch 'genbank', 'AA2CG' %>
|
|
61
|
+
* RD style
|
|
62
|
+
* ((% biofetch 'genbank', 'AA2CG' %))
|
|
63
|
+
|
|
64
|
+
--- amigo(go_id, comment = nil)
|
|
65
|
+
|
|
66
|
+
Create a link to the AmiGO GO term browser by using GO ID.
|
|
67
|
+
See ((<URL:http://www.godatabase.org/cgi-bin/go.cgi>)) for more
|
|
68
|
+
information.
|
|
69
|
+
|
|
70
|
+
* tDiary style
|
|
71
|
+
* <%= amigo '0003673' %>
|
|
72
|
+
* <%= amigo '0003673', 'The root of GO' %>
|
|
73
|
+
* RD style
|
|
74
|
+
* ((% amigo 0003673 %))
|
|
75
|
+
* ((% amigo 0003673, 'The root of GO' %))
|
|
76
|
+
|
|
77
|
+
--- fantom(id, comment = nil)
|
|
78
|
+
|
|
79
|
+
Create a link to FANTOM database by using Clone ID.
|
|
80
|
+
You can use RIKEN clone ID, Rearray ID, Seq ID and Accession Number.
|
|
81
|
+
See ((<URL:http://fantom2.gsc.riken.go.jp/db/>)) for more information.
|
|
82
|
+
|
|
83
|
+
* tDiary style
|
|
84
|
+
* <%= fantom 12345 %>
|
|
85
|
+
* <%= fantom 12345, 'hogehoge' %>
|
|
86
|
+
* RD style
|
|
87
|
+
* ((% fantom 12345 %))
|
|
88
|
+
* ((% fantom 12345, 'hogehoge' %))
|
|
89
|
+
|
|
90
|
+
--- rtps(id, comment = nil)
|
|
91
|
+
|
|
92
|
+
Create a link to FANTOM RTPS database by using Clone ID.
|
|
93
|
+
You can use only RTPS ID.
|
|
94
|
+
See ((<URL:http://fantom2.gsc.riken.go.jp/RTPS/>)) for more information.
|
|
95
|
+
|
|
96
|
+
* tDiary style
|
|
97
|
+
* <%= rtps 12345 %>
|
|
98
|
+
* <%= rtps 12345, 'hogehoge' %>
|
|
99
|
+
* RD style
|
|
100
|
+
* ((% rtps 12345 %))
|
|
101
|
+
* ((% rtps 12345, 'hogehoge' %))
|
|
102
|
+
|
|
103
|
+
== References
|
|
104
|
+
|
|
105
|
+
* Analysis of the mouse transcriptome based on functional annotation of
|
|
106
|
+
60,770 full-length cDNAs, The FANTOM Consortium and the RIKEN Genome
|
|
107
|
+
Exploration Research Group Phase I & II Team, Nature 420:563-573, 2002
|
|
108
|
+
|
|
109
|
+
* Functional annotation of a full-length mouse cDNA collection,
|
|
110
|
+
The RIKEN Genome Exploration Research Group Phase II Team and
|
|
111
|
+
the FANTOM Consortium, Nature 409:685-690, 2001
|
|
112
|
+
|
|
113
|
+
=end
|
|
114
|
+
|
|
115
|
+
def pubmed(pmid, comment = nil)
|
|
116
|
+
pmid = pmid.to_s.strip
|
|
117
|
+
url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi"
|
|
118
|
+
url << "?cmd=Retrieve&db=PubMed&dopt=Abstract&list_uids=#{pmid}"
|
|
119
|
+
if comment
|
|
120
|
+
%Q[<a href="#{url}">#{comment.to_s.strip}</a>]
|
|
121
|
+
else
|
|
122
|
+
%Q[<a href="#{url}">PMID:#{pmid}</a>]
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def biofetch(db, entry_id)
|
|
127
|
+
url = "http://biofetch.bioruby.org/"
|
|
128
|
+
%Q[<a href="#{url}?db=#{db};id=#{entry_id};style=raw">#{db}:#{entry_id}</a>]
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def amigo(go_id = '0003673', comment = nil)
|
|
132
|
+
go_id = go_id.to_s.strip
|
|
133
|
+
url = "http://www.godatabase.org/cgi-bin/go.cgi?query=#{go_id};view=query;action=query;search_constraint=terms"
|
|
134
|
+
comment = "AmiGO:#{go_id}" unless comment
|
|
135
|
+
%Q[<a href="#{url}">#{comment}</a>]
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
def fantom(id, comment = nil)
|
|
139
|
+
id = id.to_s.strip
|
|
140
|
+
url = "http://fantom2.gsc.riken.go.jp/db/link/id.cgi"
|
|
141
|
+
url << "?id=#{id}"
|
|
142
|
+
if comment
|
|
143
|
+
%Q[<a href="#{url}">#{comment.to_s.strip}</a>]
|
|
144
|
+
else
|
|
145
|
+
%Q[<a href="#{url}">FANTOM DB:#{id}</a>]
|
|
146
|
+
end
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
def rtps(id, comment = nil)
|
|
150
|
+
id = id.to_s.strip
|
|
151
|
+
url = "http://fantom2.gsc.riken.go.jp/RTPS/link/id.cgi"
|
|
152
|
+
url << "?id=#{id}"
|
|
153
|
+
if comment
|
|
154
|
+
%Q[<a href="#{url}">#{comment.to_s.strip}</a>]
|
|
155
|
+
else
|
|
156
|
+
%Q[<a href="#{url}">FANTOM RTPS DB:#{id}</a>]
|
|
157
|
+
end
|
|
158
|
+
end
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# tfastx2tab.rb - convert TFASTX (-m 6) output into tab delimited data for MySQL
|
|
4
|
+
#
|
|
5
|
+
# Usage:
|
|
6
|
+
#
|
|
7
|
+
# % tfastx2tab.rb TFASTX-output-file[s] > tfastx_results.tab
|
|
8
|
+
# % mysql < tfastx_results.sql (use sample at the end of this file)
|
|
9
|
+
#
|
|
10
|
+
# Format accepted:
|
|
11
|
+
#
|
|
12
|
+
# % tfastx3[3][_t] -Q -H -m 6 query.f target.f ktup > TFASTX-output-file
|
|
13
|
+
#
|
|
14
|
+
# Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
|
|
15
|
+
#
|
|
16
|
+
# This program is free software; you can redistribute it and/or modify
|
|
17
|
+
# it under the terms of the GNU General Public License as published by
|
|
18
|
+
# the Free Software Foundation; either version 2 of the License, or
|
|
19
|
+
# (at your option) any later version.
|
|
20
|
+
#
|
|
21
|
+
# This program is distributed in the hope that it will be useful,
|
|
22
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
23
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
24
|
+
# GNU General Public License for more details.
|
|
25
|
+
#
|
|
26
|
+
# $Id: tfastx2tab.rb,v 0.1 2001/06/21 08:26:14 katayama Exp $
|
|
27
|
+
#
|
|
28
|
+
|
|
29
|
+
while gets
|
|
30
|
+
|
|
31
|
+
# query
|
|
32
|
+
if /^\S+: (\d+) aa$/
|
|
33
|
+
q_len = $1
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# each hit
|
|
37
|
+
if /^>>([^>]\S+).*\((\d+) aa\)$/
|
|
38
|
+
target = $1
|
|
39
|
+
t_len = $2
|
|
40
|
+
|
|
41
|
+
# d = dummy variable
|
|
42
|
+
d, frame, d, initn, d, init1, d, opt, d, zscore, d, bits, d, evalue =
|
|
43
|
+
gets.split(/\s+/)
|
|
44
|
+
d, d, sw, ident, d, ugident, d, d, overlap, d, d, lap =
|
|
45
|
+
gets.split(/\s+/)
|
|
46
|
+
|
|
47
|
+
# query-hit pair
|
|
48
|
+
print "#{$FILENAME}\t#{q_len}\t#{target}\t#{t_len}"
|
|
49
|
+
|
|
50
|
+
# pick up values
|
|
51
|
+
ary = [
|
|
52
|
+
initn,
|
|
53
|
+
init1,
|
|
54
|
+
opt,
|
|
55
|
+
zscore,
|
|
56
|
+
bits,
|
|
57
|
+
evalue,
|
|
58
|
+
sw,
|
|
59
|
+
ident,
|
|
60
|
+
ugident,
|
|
61
|
+
overlap,
|
|
62
|
+
lap
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
# print values
|
|
66
|
+
for i in ary
|
|
67
|
+
i.tr!('^0-9.:e\-','')
|
|
68
|
+
print "\t#{i}"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
print "\t#{frame}\n"
|
|
72
|
+
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
=begin MySQL tfastx_results.sql sample
|
|
77
|
+
|
|
78
|
+
CREATE DATABASE IF NOT EXISTS db_name;
|
|
79
|
+
CREATE TABLE IF NOT EXISTS db_name.table_name (
|
|
80
|
+
query varchar(25) not NULL,
|
|
81
|
+
q_len integer unsigned default 0,
|
|
82
|
+
target varchar(25) not NULL,
|
|
83
|
+
t_len integer unsigned default 0,
|
|
84
|
+
initn integer unsigned default 0,
|
|
85
|
+
init1 integer unsigned default 0,
|
|
86
|
+
opt integer unsigned default 0,
|
|
87
|
+
zscore float default 0.0,
|
|
88
|
+
bits float default 0.0,
|
|
89
|
+
evalue float default 0.0,
|
|
90
|
+
sw integer unsigned default 0,
|
|
91
|
+
ident float default 0.0,
|
|
92
|
+
ugident float default 0.0,
|
|
93
|
+
overlap integer unsigned default 0,
|
|
94
|
+
lap_at varchar(25) default NULL,
|
|
95
|
+
frame varchar(5) default NULL
|
|
96
|
+
);
|
|
97
|
+
LOAD DATA LOCAL INFILE 'tfastx_results.tab' INTO TABLE db_name.table_name;
|
|
98
|
+
|
|
99
|
+
=end
|
|
100
|
+
|
data/sample/vs-genes.rb
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
#
|
|
3
|
+
# vs-genes.rb - homology/motif search wrapper
|
|
4
|
+
#
|
|
5
|
+
# FASTA/BLAST/Pfam interface for the multiple query in the FASTA format
|
|
6
|
+
#
|
|
7
|
+
# Copyright (C) 2001 KATAYAMA Toshiaki <k@bioruby.org>
|
|
8
|
+
#
|
|
9
|
+
# This program is free software; you can redistribute it and/or modify
|
|
10
|
+
# it under the terms of the GNU General Public License as published by
|
|
11
|
+
# the Free Software Foundation; either version 2 of the License, or
|
|
12
|
+
# (at your option) any later version.
|
|
13
|
+
#
|
|
14
|
+
# This program is distributed in the hope that it will be useful,
|
|
15
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
16
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
17
|
+
# GNU General Public License for more details.
|
|
18
|
+
#
|
|
19
|
+
# $Id: vs-genes.rb,v 0.1 2001/06/21 08:26:31 katayama Exp $
|
|
20
|
+
#
|
|
21
|
+
|
|
22
|
+
def usage(cpu, ktup, skip, resultdir, verbose)
|
|
23
|
+
print <<-END
|
|
24
|
+
|
|
25
|
+
Usage:
|
|
26
|
+
|
|
27
|
+
% #{$0} -p PROG -q QUERY -t TARGET [-c #] [-k #] [-s #] [-d DIR] [-v on]
|
|
28
|
+
|
|
29
|
+
options
|
|
30
|
+
-p PROG : (fasta3|ssearch3|tfasta3|fastx3|tfastx3)[3]
|
|
31
|
+
or
|
|
32
|
+
(blastp|blastn|blastx|tblastn|tblastx)
|
|
33
|
+
or
|
|
34
|
+
(hmmpfam|hmmpfam_n)
|
|
35
|
+
-q QUERY : query nucleotide or peptide sequences in the FASTA format
|
|
36
|
+
-t TARGET : target DB (FASTA or BLAST2 formatdb or Pfam format)
|
|
37
|
+
|
|
38
|
+
optional arguments
|
|
39
|
+
-c num : number of CPUs (for the SMP machines, default is #{cpu})
|
|
40
|
+
-k num : FASTA ktup value (2 for pep, 6 for nuc, default is #{ktup})
|
|
41
|
+
-s num : skip query (for the resume session, default is #{skip})
|
|
42
|
+
-d DIR : result output directory (default is "#{resultdir}")
|
|
43
|
+
-v on/off : verbose output of processing if on (default is "#{verbose}")
|
|
44
|
+
|
|
45
|
+
END
|
|
46
|
+
|
|
47
|
+
exit 1
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
### initialize
|
|
52
|
+
|
|
53
|
+
def init
|
|
54
|
+
arg = {}
|
|
55
|
+
|
|
56
|
+
# default values
|
|
57
|
+
arg['c'] = 1 # num of CPUs
|
|
58
|
+
arg['k'] = 2 # ktup value for FASTA
|
|
59
|
+
arg['s'] = 0 # skip query
|
|
60
|
+
arg['d'] = "./result" # result directory
|
|
61
|
+
arg['v'] = 'off' # verbose mode
|
|
62
|
+
|
|
63
|
+
# parse options
|
|
64
|
+
ARGV.join(' ').scan(/-(\w) (\S+)/).each do |key, val|
|
|
65
|
+
arg[key] = val
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# check program, query, target or print usage
|
|
69
|
+
unless arg['p'] and arg['q'] and arg['t']
|
|
70
|
+
usage(arg['c'], arg['k'], arg['s'], arg['d'], arg['v'])
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# create result output directory
|
|
74
|
+
unless test(?d, "#{arg['d']}")
|
|
75
|
+
Dir.mkdir("#{arg['d']}", 0755)
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# print status
|
|
79
|
+
if arg['v'] != 'off'
|
|
80
|
+
puts "PROG : #{arg['p']}"
|
|
81
|
+
puts " ktup : #{arg['k']}" if arg['p'] =~ /fast/
|
|
82
|
+
puts "QUERY : #{arg['q']}"
|
|
83
|
+
puts " skip : #{arg['s']}"
|
|
84
|
+
puts "TARGET : #{arg['t']}"
|
|
85
|
+
puts "RESULT : #{arg['d']}"
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
return arg
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
### generate command line
|
|
93
|
+
|
|
94
|
+
def cmd_line(arg, orf)
|
|
95
|
+
# program with default command line options # query -> target DB
|
|
96
|
+
opt = {
|
|
97
|
+
# FASTA : "-b n" for best n scores, "-d n" for best n alignment
|
|
98
|
+
'fasta3' => "fasta3 -Q -H -m 6", # pep -> pep or nuc -> nuc
|
|
99
|
+
'ssearch3' => "ssearch3 -Q -H -m 6", # pep -> pep or nuc -> nuc
|
|
100
|
+
'tfasta3' => "tfasta3 -Q -H -m 6", # pep -> nuc
|
|
101
|
+
'fastx3' => "fastx3 -Q -H -m 6", # nuc -> pep
|
|
102
|
+
'tfastx3' => "tfastx3 -Q -H -m 6", # pep -> nuc (with frameshifts)
|
|
103
|
+
|
|
104
|
+
'fasta33' => "fasta33 -Q -H -m 6", # pep -> pep or nuc -> nuc
|
|
105
|
+
'ssearch33' => "ssearch33 -Q -H -m 6", # pep -> pep or nuc -> nuc
|
|
106
|
+
'tfasta33' => "tfasta33 -Q -H -m 6", # pep -> nuc
|
|
107
|
+
'fastx33' => "fastx33 -Q -H -m 6", # nuc -> pep
|
|
108
|
+
'tfastx33' => "tfastx33 -Q -H -m 6", # pep -> nuc (with frameshifts)
|
|
109
|
+
|
|
110
|
+
# BLAST : outputs XML
|
|
111
|
+
'blastp' => "blastall -m 7 -p blastp -d", # pep -> pep
|
|
112
|
+
'blastn' => "blastall -m 7 -p blastn -d", # nuc -> nuc
|
|
113
|
+
'blastx' => "blastall -m 7 -p blastx -d", # nuc -> pep
|
|
114
|
+
'tblastn' => "blastall -m 7 -p tblastn -d", # pep -> nuc
|
|
115
|
+
'tblastx' => "blastall -m 7 -p tblastx -d", # nuc -> nuc (by trans)
|
|
116
|
+
|
|
117
|
+
# Pfam : "-A n" for best n alignment, "-E n" for E value cutoff etc.
|
|
118
|
+
'hmmpfam' => "hmmpfam", # pep -> Pfam DB
|
|
119
|
+
'hmmpfam_n' => "hmmpfam -n", # nuc -> Pfam DB
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
# arguments used in the command line
|
|
123
|
+
cpu = arg['c'].to_i
|
|
124
|
+
ktup = arg['k']
|
|
125
|
+
target = arg['t']
|
|
126
|
+
query = arg['d'] + "/query." + orf
|
|
127
|
+
result = arg['d'] + "/" + orf
|
|
128
|
+
|
|
129
|
+
prog = opt[arg['p']]
|
|
130
|
+
|
|
131
|
+
if cpu > 1 # use multiple CPUs
|
|
132
|
+
case arg['p']
|
|
133
|
+
when /(fast|ssearch)/
|
|
134
|
+
prog += " -T #{cpu}"
|
|
135
|
+
prog.sub!(' ', '_t ') # rename program with "_t"
|
|
136
|
+
when /pfam/
|
|
137
|
+
prog += " --cpu #{cpu}"
|
|
138
|
+
end
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# generate complete command line to execute
|
|
142
|
+
case arg['p']
|
|
143
|
+
when /fast/
|
|
144
|
+
command = "#{prog} #{query} #{target} #{ktup} > #{result}"
|
|
145
|
+
when /ssearch/
|
|
146
|
+
command = "#{prog} #{query} #{target} > #{result}"
|
|
147
|
+
when /blast/
|
|
148
|
+
command = "#{prog} #{target} -i #{query} > #{result}"
|
|
149
|
+
when /pfam/
|
|
150
|
+
command = "#{prog} #{target} #{query} > #{result}"
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
return command
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
|
|
157
|
+
### main
|
|
158
|
+
|
|
159
|
+
begin
|
|
160
|
+
arg = init
|
|
161
|
+
count = 0
|
|
162
|
+
|
|
163
|
+
open(arg['q'], "r") do |f|
|
|
164
|
+
while seq = f.gets("\n>")
|
|
165
|
+
count += 1
|
|
166
|
+
|
|
167
|
+
# skip (-s option)
|
|
168
|
+
next unless count > arg['s'].to_i
|
|
169
|
+
|
|
170
|
+
# clean up
|
|
171
|
+
seq.sub!(/^>?[ \t]*/, '') # delete '>' and SPACEs or TABs at the head
|
|
172
|
+
seq.sub!(/>$/, '') # delete '>' at the tail (separator)
|
|
173
|
+
|
|
174
|
+
# get ORF name
|
|
175
|
+
if seq[/^$/] # no definition (e.g. ">\nSEQ>" or ">\n>")
|
|
176
|
+
next # -> useless for the multiple query
|
|
177
|
+
else
|
|
178
|
+
orf = seq[/^\S+/] # the first word in the definition line
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# KEGG uses ">DB:ENTRY" format in the definition line
|
|
182
|
+
if orf =~ /:/
|
|
183
|
+
db,orf = orf.split(/:/)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# add time if the same ORF name was already used
|
|
187
|
+
if test(?f, "#{arg['d']}/#{orf}")
|
|
188
|
+
orf = "#{orf}.#{Time.now.to_f.to_s}"
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# create temporal file of the query
|
|
192
|
+
open("#{arg['d']}/query.#{orf}", "w+") do |tmp|
|
|
193
|
+
tmp.print(">#{seq}")
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
command = cmd_line(arg, orf)
|
|
197
|
+
|
|
198
|
+
# print status
|
|
199
|
+
if arg['v'] != 'off'
|
|
200
|
+
puts "#{count} : #{orf} ..."
|
|
201
|
+
puts " #{command}"
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
# execute
|
|
205
|
+
system("#{command}")
|
|
206
|
+
|
|
207
|
+
# remove temporal file
|
|
208
|
+
File.delete("#{arg['d']}/query.#{orf}")
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
end
|
|
212
|
+
|