bio 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/bioruby +107 -0
- data/bin/br_biofetch.rb +59 -0
- data/bin/br_bioflat.rb +294 -0
- data/bin/br_biogetseq.rb +57 -0
- data/bin/br_pmfetch.rb +431 -0
- data/doc/BioRuby.rd.ja +225 -0
- data/doc/Changes-0.7.rd +236 -0
- data/doc/Design.rd.ja +341 -0
- data/doc/KEGG_API.rd +1437 -0
- data/doc/KEGG_API.rd.ja +1399 -0
- data/doc/TODO.rd.ja +138 -0
- data/doc/Tutorial.rd +1138 -0
- data/doc/Tutorial.rd.ja +2110 -0
- data/etc/bioinformatics/seqdatabase.ini +210 -0
- data/lib/bio.rb +256 -0
- data/lib/bio/alignment.rb +1906 -0
- data/lib/bio/appl/bl2seq/report.rb +350 -0
- data/lib/bio/appl/blast.rb +269 -0
- data/lib/bio/appl/blast/format0.rb +1402 -0
- data/lib/bio/appl/blast/format8.rb +95 -0
- data/lib/bio/appl/blast/report.rb +652 -0
- data/lib/bio/appl/blast/rexml.rb +151 -0
- data/lib/bio/appl/blast/wublast.rb +553 -0
- data/lib/bio/appl/blast/xmlparser.rb +222 -0
- data/lib/bio/appl/blat/report.rb +392 -0
- data/lib/bio/appl/clustalw.rb +191 -0
- data/lib/bio/appl/clustalw/report.rb +154 -0
- data/lib/bio/appl/emboss.rb +68 -0
- data/lib/bio/appl/fasta.rb +262 -0
- data/lib/bio/appl/fasta/format10.rb +428 -0
- data/lib/bio/appl/fasta/format6.rb +37 -0
- data/lib/bio/appl/genscan/report.rb +570 -0
- data/lib/bio/appl/hmmer.rb +129 -0
- data/lib/bio/appl/hmmer/report.rb +556 -0
- data/lib/bio/appl/mafft.rb +222 -0
- data/lib/bio/appl/mafft/report.rb +119 -0
- data/lib/bio/appl/psort.rb +555 -0
- data/lib/bio/appl/psort/report.rb +473 -0
- data/lib/bio/appl/sim4.rb +134 -0
- data/lib/bio/appl/sim4/report.rb +501 -0
- data/lib/bio/appl/sosui/report.rb +166 -0
- data/lib/bio/appl/spidey/report.rb +604 -0
- data/lib/bio/appl/targetp/report.rb +283 -0
- data/lib/bio/appl/tmhmm/report.rb +238 -0
- data/lib/bio/command.rb +166 -0
- data/lib/bio/data/aa.rb +354 -0
- data/lib/bio/data/codontable.rb +740 -0
- data/lib/bio/data/na.rb +226 -0
- data/lib/bio/db.rb +340 -0
- data/lib/bio/db/aaindex.rb +280 -0
- data/lib/bio/db/embl/common.rb +332 -0
- data/lib/bio/db/embl/embl.rb +446 -0
- data/lib/bio/db/embl/sptr.rb +954 -0
- data/lib/bio/db/embl/swissprot.rb +32 -0
- data/lib/bio/db/embl/trembl.rb +31 -0
- data/lib/bio/db/embl/uniprot.rb +32 -0
- data/lib/bio/db/fantom.rb +604 -0
- data/lib/bio/db/fasta.rb +869 -0
- data/lib/bio/db/genbank/common.rb +299 -0
- data/lib/bio/db/genbank/ddbj.rb +34 -0
- data/lib/bio/db/genbank/genbank.rb +354 -0
- data/lib/bio/db/genbank/genpept.rb +73 -0
- data/lib/bio/db/genbank/refseq.rb +31 -0
- data/lib/bio/db/gff.rb +106 -0
- data/lib/bio/db/go.rb +497 -0
- data/lib/bio/db/kegg/brite.rb +51 -0
- data/lib/bio/db/kegg/cell.rb +88 -0
- data/lib/bio/db/kegg/compound.rb +130 -0
- data/lib/bio/db/kegg/enzyme.rb +125 -0
- data/lib/bio/db/kegg/expression.rb +173 -0
- data/lib/bio/db/kegg/genes.rb +293 -0
- data/lib/bio/db/kegg/genome.rb +362 -0
- data/lib/bio/db/kegg/glycan.rb +213 -0
- data/lib/bio/db/kegg/keggtab.rb +418 -0
- data/lib/bio/db/kegg/kgml.rb +299 -0
- data/lib/bio/db/kegg/ko.rb +178 -0
- data/lib/bio/db/kegg/reaction.rb +97 -0
- data/lib/bio/db/litdb.rb +131 -0
- data/lib/bio/db/medline.rb +317 -0
- data/lib/bio/db/nbrf.rb +199 -0
- data/lib/bio/db/pdb.rb +38 -0
- data/lib/bio/db/pdb/atom.rb +60 -0
- data/lib/bio/db/pdb/chain.rb +117 -0
- data/lib/bio/db/pdb/model.rb +106 -0
- data/lib/bio/db/pdb/pdb.rb +1682 -0
- data/lib/bio/db/pdb/residue.rb +122 -0
- data/lib/bio/db/pdb/utils.rb +234 -0
- data/lib/bio/db/prosite.rb +616 -0
- data/lib/bio/db/rebase.rb +417 -0
- data/lib/bio/db/transfac.rb +387 -0
- data/lib/bio/feature.rb +201 -0
- data/lib/bio/io/brdb.rb +103 -0
- data/lib/bio/io/das.rb +471 -0
- data/lib/bio/io/dbget.rb +212 -0
- data/lib/bio/io/ddbjxml.rb +614 -0
- data/lib/bio/io/fastacmd.rb +123 -0
- data/lib/bio/io/fetch.rb +114 -0
- data/lib/bio/io/flatfile.rb +496 -0
- data/lib/bio/io/flatfile/bdb.rb +266 -0
- data/lib/bio/io/flatfile/index.rb +1308 -0
- data/lib/bio/io/flatfile/indexer.rb +778 -0
- data/lib/bio/io/higet.rb +92 -0
- data/lib/bio/io/keggapi.rb +863 -0
- data/lib/bio/io/pubmed.rb +189 -0
- data/lib/bio/io/registry.rb +308 -0
- data/lib/bio/io/soapwsdl.rb +114 -0
- data/lib/bio/io/sql.rb +428 -0
- data/lib/bio/location.rb +650 -0
- data/lib/bio/pathway.rb +991 -0
- data/lib/bio/reference.rb +308 -0
- data/lib/bio/sequence.rb +593 -0
- data/lib/bio/shell.rb +51 -0
- data/lib/bio/shell/core.rb +512 -0
- data/lib/bio/shell/plugin/codon.rb +228 -0
- data/lib/bio/shell/plugin/entry.rb +85 -0
- data/lib/bio/shell/plugin/flatfile.rb +119 -0
- data/lib/bio/shell/plugin/keggapi.rb +187 -0
- data/lib/bio/shell/plugin/midi.rb +448 -0
- data/lib/bio/shell/plugin/obda.rb +63 -0
- data/lib/bio/shell/plugin/seq.rb +238 -0
- data/lib/bio/shell/session.rb +214 -0
- data/lib/bio/util/color_scheme.rb +214 -0
- data/lib/bio/util/color_scheme/buried.rb +78 -0
- data/lib/bio/util/color_scheme/helix.rb +78 -0
- data/lib/bio/util/color_scheme/hydropathy.rb +83 -0
- data/lib/bio/util/color_scheme/nucleotide.rb +50 -0
- data/lib/bio/util/color_scheme/strand.rb +78 -0
- data/lib/bio/util/color_scheme/taylor.rb +69 -0
- data/lib/bio/util/color_scheme/turn.rb +78 -0
- data/lib/bio/util/color_scheme/zappo.rb +69 -0
- data/lib/bio/util/contingency_table.rb +337 -0
- data/lib/bio/util/sirna.rb +306 -0
- data/lib/bioruby.rb +34 -0
- data/sample/biofetch.rb +475 -0
- data/sample/color_scheme_na.rb +99 -0
- data/sample/dbget +37 -0
- data/sample/fasta2tab.rb +99 -0
- data/sample/fsplit.rb +51 -0
- data/sample/gb2fasta.rb +31 -0
- data/sample/gb2tab.rb +325 -0
- data/sample/gbtab2mysql.rb +161 -0
- data/sample/genes2nuc.rb +33 -0
- data/sample/genes2pep.rb +33 -0
- data/sample/genes2tab.rb +81 -0
- data/sample/genome2rb.rb +29 -0
- data/sample/genome2tab.rb +76 -0
- data/sample/goslim.rb +311 -0
- data/sample/gt2fasta.rb +47 -0
- data/sample/pmfetch.rb +42 -0
- data/sample/pmsearch.rb +42 -0
- data/sample/psortplot_html.rb +222 -0
- data/sample/ssearch2tab.rb +96 -0
- data/sample/tdiary.rb +158 -0
- data/sample/tfastx2tab.rb +100 -0
- data/sample/vs-genes.rb +212 -0
- data/test/data/SOSUI/sample.report +11 -0
- data/test/data/TMHMM/sample.report +21 -0
- data/test/data/blast/eco:b0002.faa +15 -0
- data/test/data/blast/eco:b0002.faa.m0 +128 -0
- data/test/data/blast/eco:b0002.faa.m7 +65 -0
- data/test/data/blast/eco:b0002.faa.m8 +1 -0
- data/test/data/embl/AB090716.embl +65 -0
- data/test/data/genscan/sample.report +63 -0
- data/test/data/prosite/prosite.dat +2233 -0
- data/test/data/refseq/nm_126355.entret +64 -0
- data/test/data/uniprot/p53_human.uniprot +1456 -0
- data/test/runner.rb +10 -0
- data/test/unit/bio/appl/blast/test_report.rb +427 -0
- data/test/unit/bio/appl/blast/test_xmlparser.rb +400 -0
- data/test/unit/bio/appl/genscan/test_report.rb +195 -0
- data/test/unit/bio/appl/sosui/test_report.rb +94 -0
- data/test/unit/bio/appl/targetp/test_report.rb +159 -0
- data/test/unit/bio/appl/test_blast.rb +159 -0
- data/test/unit/bio/appl/test_fasta.rb +142 -0
- data/test/unit/bio/appl/tmhmm/test_report.rb +139 -0
- data/test/unit/bio/data/test_aa.rb +103 -0
- data/test/unit/bio/data/test_codontable.rb +120 -0
- data/test/unit/bio/data/test_na.rb +89 -0
- data/test/unit/bio/db/embl/test_common.rb +130 -0
- data/test/unit/bio/db/embl/test_embl.rb +227 -0
- data/test/unit/bio/db/embl/test_sptr.rb +268 -0
- data/test/unit/bio/db/embl/test_uniprot.rb +44 -0
- data/test/unit/bio/db/kegg/test_genes.rb +58 -0
- data/test/unit/bio/db/test_fasta.rb +263 -0
- data/test/unit/bio/db/test_gff.rb +140 -0
- data/test/unit/bio/db/test_prosite.rb +1450 -0
- data/test/unit/bio/io/test_ddbjxml.rb +87 -0
- data/test/unit/bio/io/test_soapwsdl.rb +45 -0
- data/test/unit/bio/shell/plugin/test_seq.rb +175 -0
- data/test/unit/bio/test_alignment.rb +1028 -0
- data/test/unit/bio/test_command.rb +71 -0
- data/test/unit/bio/test_db.rb +109 -0
- data/test/unit/bio/test_feature.rb +128 -0
- data/test/unit/bio/test_location.rb +51 -0
- data/test/unit/bio/test_pathway.rb +485 -0
- data/test/unit/bio/test_sequence.rb +386 -0
- data/test/unit/bio/test_shell.rb +31 -0
- data/test/unit/bio/util/test_color_scheme.rb +45 -0
- data/test/unit/bio/util/test_contingency_table.rb +106 -0
- data/test/unit/bio/util/test_sirna.rb +258 -0
- metadata +295 -0
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/clustalw.rb - CLUSTAL W wrapper class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
#
|
|
7
|
+
#--
|
|
8
|
+
# This library is free software; you can redistribute it and/or
|
|
9
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
10
|
+
# License as published by the Free Software Foundation; either
|
|
11
|
+
# version 2 of the License, or (at your option) any later version.
|
|
12
|
+
#
|
|
13
|
+
# This library is distributed in the hope that it will be useful,
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
16
|
+
# Lesser General Public License for more details.
|
|
17
|
+
#
|
|
18
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
19
|
+
# License along with this library; if not, write to the Free Software
|
|
20
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
21
|
+
#++
|
|
22
|
+
#
|
|
23
|
+
# $Id: clustalw.rb,v 1.10 2005/12/18 15:58:40 k Exp $
|
|
24
|
+
#
|
|
25
|
+
# Bio::ClustalW is a CLUSTAL W execution wrapper class.
|
|
26
|
+
# Its object is also called an alignment factory.
|
|
27
|
+
# CLUSTAL W is a very popular software for multiple sequence alignment.
|
|
28
|
+
#
|
|
29
|
+
# == References
|
|
30
|
+
#
|
|
31
|
+
# * Thompson,J.D., Higgins,D.G. and Gibson,T.J..
|
|
32
|
+
# CLUSTAL W: improving the sensitivity of progressive multiple sequence
|
|
33
|
+
# alignment through sequence weighting, position-specific gap penalties
|
|
34
|
+
# and weight matrix choice. Nucleic Acids Research, 22:4673-4680, 1994.
|
|
35
|
+
# http://nar.oxfordjournals.org/cgi/content/abstract/22/22/4673
|
|
36
|
+
# * http://www.ebi.ac.uk/clustalw/
|
|
37
|
+
# * ftp://ftp.ebi.ac.uk/pub/software/unix/clustalw/
|
|
38
|
+
#
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
require 'tempfile'
|
|
42
|
+
require 'open3'
|
|
43
|
+
|
|
44
|
+
require 'bio/sequence'
|
|
45
|
+
require 'bio/alignment'
|
|
46
|
+
|
|
47
|
+
module Bio
|
|
48
|
+
|
|
49
|
+
# Bio::ClustalW is a CLUSTAL W execution wrapper class.
|
|
50
|
+
# Its object is also called an alignment factory.
|
|
51
|
+
# CLUSTAL W is a very popular software for multiple sequence alignment.
|
|
52
|
+
class ClustalW
|
|
53
|
+
|
|
54
|
+
autoload :Report, 'bio/appl/clustalw/report'
|
|
55
|
+
|
|
56
|
+
# Creates a new CLUSTAL W execution wrapper object (alignment factory).
|
|
57
|
+
def initialize(program = 'clustalw', option = [])
|
|
58
|
+
@program = program
|
|
59
|
+
@option = option
|
|
60
|
+
@command = nil
|
|
61
|
+
@output = nil
|
|
62
|
+
@report = nil
|
|
63
|
+
@log = nil
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# name of the program (usually 'clustalw' in UNIX)
|
|
67
|
+
attr_accessor :program
|
|
68
|
+
|
|
69
|
+
# options
|
|
70
|
+
attr_accessor :option
|
|
71
|
+
|
|
72
|
+
# Returns last command-line strings executed by this factory.
|
|
73
|
+
# Note that filenames described in the command-line may already
|
|
74
|
+
# be removed because they are temporary files.
|
|
75
|
+
# Returns an array.
|
|
76
|
+
attr_reader :command
|
|
77
|
+
|
|
78
|
+
# Returns last messages of CLUSTAL W execution.
|
|
79
|
+
attr_reader :log
|
|
80
|
+
|
|
81
|
+
# Returns last raw alignment result (String).
|
|
82
|
+
attr_reader :output
|
|
83
|
+
|
|
84
|
+
# Returns last alignment result.
|
|
85
|
+
# Returns a Bio::ClustalW::Report object.
|
|
86
|
+
attr_reader :report
|
|
87
|
+
|
|
88
|
+
# Executes the program(clustalw).
|
|
89
|
+
# If +seqs+ is not nil, perform alignment for seqs.
|
|
90
|
+
# If +seqs+ is nil, simply executes CLUSTAL W.
|
|
91
|
+
def query(seqs)
|
|
92
|
+
if seqs then
|
|
93
|
+
query_align(seqs)
|
|
94
|
+
else
|
|
95
|
+
exec_local(@option)
|
|
96
|
+
end
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Performs alignment for +seqs+.
|
|
100
|
+
# +seqs+ should be Bio::Alignment or Array of sequences or nil.
|
|
101
|
+
def query_align(seqs)
|
|
102
|
+
seqtype = nil
|
|
103
|
+
unless seqs.is_a?(Bio::Alignment)
|
|
104
|
+
seqs = Bio::Alignment.new(seqs)
|
|
105
|
+
end
|
|
106
|
+
seqs.each do |s|
|
|
107
|
+
if s.is_a?(Bio::Sequence::AA) then
|
|
108
|
+
seqtype = 'PROTEIN'
|
|
109
|
+
elsif s.is_a?(Bio::Sequence::NA) then
|
|
110
|
+
seqtype = 'DNA'
|
|
111
|
+
end
|
|
112
|
+
break if seqtype
|
|
113
|
+
end
|
|
114
|
+
query_string(seqs.to_fasta(70, :avoid_same_name => true), seqtype)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Performs alignment for +str+.
|
|
118
|
+
# +str+ should be a string that can be recognized by CLUSTAL W.
|
|
119
|
+
def query_string(str, *arg)
|
|
120
|
+
begin
|
|
121
|
+
tf_in = Tempfile.open('align')
|
|
122
|
+
tf_in.print str
|
|
123
|
+
ensure
|
|
124
|
+
tf_in.close(false)
|
|
125
|
+
end
|
|
126
|
+
r = query_by_filename(tf_in.path, *arg)
|
|
127
|
+
tf_in.close(true)
|
|
128
|
+
r
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# Performs alignment of sequences in the file named +path+.
|
|
132
|
+
def query_by_filename(path, seqtype = nil)
|
|
133
|
+
require 'bio/appl/clustalw/report'
|
|
134
|
+
|
|
135
|
+
tf_out = Tempfile.open('clustalout')
|
|
136
|
+
tf_out.close(false)
|
|
137
|
+
tf_dnd = Tempfile.open('clustaldnd')
|
|
138
|
+
tf_dnd.close(false)
|
|
139
|
+
|
|
140
|
+
opt = [ "-align",
|
|
141
|
+
"-infile=#{path}",
|
|
142
|
+
"-outfile=#{tf_out.path}",
|
|
143
|
+
"-newtree=#{tf_dnd.path}",
|
|
144
|
+
"-outorder=input"
|
|
145
|
+
]
|
|
146
|
+
opt << "-type=#{seqtype}" if seqtype
|
|
147
|
+
opt.concat(@option)
|
|
148
|
+
exec_local(opt)
|
|
149
|
+
tf_out.open
|
|
150
|
+
@output = tf_out.read
|
|
151
|
+
tf_out.close(true)
|
|
152
|
+
tf_dnd.open
|
|
153
|
+
@output_dnd = tf_dnd.read
|
|
154
|
+
tf_dnd.close(true)
|
|
155
|
+
@report = Report.new(@output, seqtype)
|
|
156
|
+
@report
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Returns last alignment guild-tree (file.dnd).
|
|
160
|
+
attr_reader :output_dnd
|
|
161
|
+
|
|
162
|
+
# Returns last error messages (to stderr) of CLUSTAL W execution.
|
|
163
|
+
attr_reader :errorlog
|
|
164
|
+
|
|
165
|
+
private
|
|
166
|
+
# Executes the program in the local machine.
|
|
167
|
+
def exec_local(opt)
|
|
168
|
+
@command = [ @program, *opt ]
|
|
169
|
+
#STDERR.print "DEBUG: ", @command.join(" "), "\n"
|
|
170
|
+
@log = nil
|
|
171
|
+
|
|
172
|
+
Open3.popen3(*@command) do |din, dout, derr|
|
|
173
|
+
din.close
|
|
174
|
+
t = Thread.start do
|
|
175
|
+
@errorlog = derr.read
|
|
176
|
+
end
|
|
177
|
+
@log = dout.read
|
|
178
|
+
t.join
|
|
179
|
+
end
|
|
180
|
+
# @command_string = @command.join(" ")
|
|
181
|
+
# IO.popen(@command, "r") do |io|
|
|
182
|
+
# io.sync = true
|
|
183
|
+
# @log = io.read
|
|
184
|
+
# end
|
|
185
|
+
@log
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
end #class ClustalW
|
|
189
|
+
|
|
190
|
+
end #module Bio
|
|
191
|
+
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
#
|
|
2
|
+
# = bio/appl/clustalw/report.rb - CLUSTAL W format data (*.aln) class
|
|
3
|
+
#
|
|
4
|
+
# Copyright:: Copyright (C) 2003 GOTO Naohisa <ngoto@gen-info.osaka-u.ac.jp>
|
|
5
|
+
# License:: LGPL
|
|
6
|
+
#
|
|
7
|
+
#--
|
|
8
|
+
# This library is free software; you can redistribute it and/or
|
|
9
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
10
|
+
# License as published by the Free Software Foundation; either
|
|
11
|
+
# version 2 of the License, or (at your option) any later version.
|
|
12
|
+
#
|
|
13
|
+
# This library is distributed in the hope that it will be useful,
|
|
14
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
15
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
16
|
+
# Lesser General Public License for more details.
|
|
17
|
+
#
|
|
18
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
19
|
+
# License along with this library; if not, write to the Free Software
|
|
20
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
21
|
+
#++
|
|
22
|
+
#
|
|
23
|
+
# $Id: report.rb,v 1.9 2005/12/18 15:58:40 k Exp $
|
|
24
|
+
#
|
|
25
|
+
# Bio::ClustalW::Report is a CLUSTAL W report (*.aln file) parser.
|
|
26
|
+
# CLUSTAL W is a very popular software for multiple sequence alignment.
|
|
27
|
+
#
|
|
28
|
+
# == References
|
|
29
|
+
#
|
|
30
|
+
# * Thompson,J.D., Higgins,D.G. and Gibson,T.J..
|
|
31
|
+
# CLUSTAL W: improving the sensitivity of progressive multiple sequence
|
|
32
|
+
# alignment through sequence weighting, position-specific gap penalties
|
|
33
|
+
# and weight matrix choice. Nucleic Acids Research, 22:4673-4680, 1994.
|
|
34
|
+
# http://nar.oxfordjournals.org/cgi/content/abstract/22/22/4673
|
|
35
|
+
# * http://www.ebi.ac.uk/clustalw/
|
|
36
|
+
# * ftp://ftp.ebi.ac.uk/pub/software/unix/clustalw/
|
|
37
|
+
#
|
|
38
|
+
|
|
39
|
+
require 'bio/sequence'
|
|
40
|
+
require 'bio/db'
|
|
41
|
+
require 'bio/alignment'
|
|
42
|
+
require 'bio/appl/clustalw'
|
|
43
|
+
|
|
44
|
+
module Bio
|
|
45
|
+
class ClustalW
|
|
46
|
+
|
|
47
|
+
# CLUSTAL W result data (*.aln file) parser class.
|
|
48
|
+
class Report < Bio::DB
|
|
49
|
+
|
|
50
|
+
# Delimiter of each entry. Bio::FlatFile uses it.
|
|
51
|
+
# In Bio::ClustalW::Report, it it nil (1 entry 1 file).
|
|
52
|
+
DELIMITER = nil
|
|
53
|
+
|
|
54
|
+
# Creates new instance.
|
|
55
|
+
# +str+ should be a CLUSTAL format string.
|
|
56
|
+
# +seqclass+ should on of following:
|
|
57
|
+
# * Class: Bio::Sequence::AA, Bio::Sequence::NA, ...
|
|
58
|
+
# * String: 'PROTEIN', 'DNA', ...
|
|
59
|
+
def initialize(str, seqclass = nil)
|
|
60
|
+
@raw = str
|
|
61
|
+
@align = nil
|
|
62
|
+
@match_line = nil
|
|
63
|
+
@header = nil
|
|
64
|
+
case seqclass
|
|
65
|
+
when /PROTEIN/i
|
|
66
|
+
@seqclass = Bio::Sequence::AA
|
|
67
|
+
when /[DR]NA/i
|
|
68
|
+
@seqclass = Bio::Sequence::NA
|
|
69
|
+
else
|
|
70
|
+
if seqclass.is_a?(Module) then
|
|
71
|
+
@seqclass = seqclass
|
|
72
|
+
else
|
|
73
|
+
@seqclass = Bio::Sequence
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
# string of whole result
|
|
78
|
+
attr_reader :raw
|
|
79
|
+
|
|
80
|
+
# sequence class (one of Bio::Sequence, Bio::Sequence::NA,
|
|
81
|
+
# Bio::Sequence::AA, ...)
|
|
82
|
+
attr_reader :seqclass
|
|
83
|
+
|
|
84
|
+
# Shows first line of the result data, for example,
|
|
85
|
+
# 'CLUSTAL W (1.82) multiple sequence alignment'.
|
|
86
|
+
# Returns a string.
|
|
87
|
+
def header
|
|
88
|
+
@header or (do_parse or @header)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Shows "match line" of CLUSTAL's alignment result, for example,
|
|
92
|
+
# ':* :* .* * .*::*. ** :* . * . '.
|
|
93
|
+
# Returns a string.
|
|
94
|
+
def match_line
|
|
95
|
+
@match_line or (do_parse or @match_line)
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
# Gets an multiple alignment.
|
|
99
|
+
# Returns a Bio::Alignment object.
|
|
100
|
+
def align
|
|
101
|
+
do_parse() unless @align
|
|
102
|
+
@align
|
|
103
|
+
end
|
|
104
|
+
alias alignment align
|
|
105
|
+
|
|
106
|
+
# Gets an fasta-format string of the sequences.
|
|
107
|
+
# Returns a string.
|
|
108
|
+
def to_fasta(*arg)
|
|
109
|
+
align.to_fasta(*arg)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Gets an array of the sequences.
|
|
113
|
+
# Returns an array of Bio::FastaFormat objects.
|
|
114
|
+
def to_a
|
|
115
|
+
align.to_fastaformat_array
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
private
|
|
119
|
+
# Parses Clustal W result text.
|
|
120
|
+
def do_parse
|
|
121
|
+
return nil if @align
|
|
122
|
+
a = @raw.split(/\r?\n\r?\n/)
|
|
123
|
+
@header = a.shift.to_s
|
|
124
|
+
xalign = Bio::Alignment.new
|
|
125
|
+
@match_line = ''
|
|
126
|
+
if a.size > 0 then
|
|
127
|
+
a[0].gsub!(/\A(\r?\n)+/, '')
|
|
128
|
+
a.collect! { |x| x.split(/\r?\n/) }
|
|
129
|
+
a.each { |x|
|
|
130
|
+
x.each { |y| y.sub!(/ +\d+\s*$/, '') }} #for -SEQNOS=on option
|
|
131
|
+
@tagsize = ( a[0][0].rindex(/\s/) or -1 ) + 1
|
|
132
|
+
a.each do |x|
|
|
133
|
+
@match_line << x.pop.to_s[@tagsize..-1]
|
|
134
|
+
end
|
|
135
|
+
a[0].each do |y|
|
|
136
|
+
xalign.store(y[0, @tagsize].sub(/\s+\z/, ''), '')
|
|
137
|
+
end
|
|
138
|
+
a.each do |x|
|
|
139
|
+
x.each do |y|
|
|
140
|
+
name = y[0, @tagsize].sub(/\s+\z/, '')
|
|
141
|
+
seq = y[@tagsize..-1]
|
|
142
|
+
xalign[name] << seq
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
xalign.collect! { |x| @seqclass.new(x) }
|
|
146
|
+
end
|
|
147
|
+
@align = xalign
|
|
148
|
+
nil
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
end #class Report
|
|
152
|
+
end #class ClustalW
|
|
153
|
+
end #module Bio
|
|
154
|
+
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/appl/emboss.rb - EMBOSS wrapper
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: emboss.rb,v 1.2 2005/09/08 01:22:08 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
module Bio
|
|
24
|
+
|
|
25
|
+
class EMBOSS
|
|
26
|
+
|
|
27
|
+
def initialize(cmd_line)
|
|
28
|
+
@cmd_line = cmd_line + ' -stdout'
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def exec
|
|
32
|
+
begin
|
|
33
|
+
@io = IO.popen(@cmd_line, "w+")
|
|
34
|
+
@result = @io.read
|
|
35
|
+
return @result
|
|
36
|
+
ensure
|
|
37
|
+
@io.close
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
attr_reader :io, :result
|
|
41
|
+
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
=begin
|
|
47
|
+
|
|
48
|
+
= Bio::EMBOSS
|
|
49
|
+
|
|
50
|
+
EMBOSS wrapper.
|
|
51
|
+
|
|
52
|
+
#!/usr/bin/env ruby
|
|
53
|
+
require 'bio'
|
|
54
|
+
|
|
55
|
+
emboss = Bio::EMBOSS.new("getorf -sequence ~/xlrhodop -outseq stdout")
|
|
56
|
+
puts emboss.exec
|
|
57
|
+
|
|
58
|
+
--- Bio::EMBOSS.new(command_line)
|
|
59
|
+
|
|
60
|
+
--- Bio::EMBOSS#exec
|
|
61
|
+
--- Bio::EMBOSS#io
|
|
62
|
+
--- Bio::EMBOSS#result
|
|
63
|
+
|
|
64
|
+
=== SEE ALSO
|
|
65
|
+
|
|
66
|
+
* http://www.emboss.org
|
|
67
|
+
|
|
68
|
+
=end
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
#
|
|
2
|
+
# bio/appl/fasta.rb - FASTA wrapper
|
|
3
|
+
#
|
|
4
|
+
# Copyright (C) 2001,2002 KATAYAMA Toshiaki <k@bioruby.org>
|
|
5
|
+
#
|
|
6
|
+
# This library is free software; you can redistribute it and/or
|
|
7
|
+
# modify it under the terms of the GNU Lesser General Public
|
|
8
|
+
# License as published by the Free Software Foundation; either
|
|
9
|
+
# version 2 of the License, or (at your option) any later version.
|
|
10
|
+
#
|
|
11
|
+
# This library is distributed in the hope that it will be useful,
|
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14
|
+
# Lesser General Public License for more details.
|
|
15
|
+
#
|
|
16
|
+
# You should have received a copy of the GNU Lesser General Public
|
|
17
|
+
# License along with this library; if not, write to the Free Software
|
|
18
|
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
|
19
|
+
#
|
|
20
|
+
# $Id: fasta.rb,v 1.20 2005/09/26 13:00:04 k Exp $
|
|
21
|
+
#
|
|
22
|
+
|
|
23
|
+
require 'net/http'
|
|
24
|
+
require 'cgi' unless defined?(CGI)
|
|
25
|
+
require 'bio/command'
|
|
26
|
+
require 'shellwords'
|
|
27
|
+
|
|
28
|
+
module Bio
|
|
29
|
+
|
|
30
|
+
class Fasta
|
|
31
|
+
|
|
32
|
+
autoload :Report, 'bio/appl/fasta/format10'
|
|
33
|
+
#autoload :?????, 'bio/appl/fasta/format6'
|
|
34
|
+
|
|
35
|
+
include Bio::Command::Tools
|
|
36
|
+
|
|
37
|
+
def initialize(program, db, opt = [], server = 'local')
|
|
38
|
+
@format = 10
|
|
39
|
+
|
|
40
|
+
@program = program
|
|
41
|
+
@db = db
|
|
42
|
+
@server = server
|
|
43
|
+
|
|
44
|
+
@ktup = nil
|
|
45
|
+
@matrix = nil
|
|
46
|
+
|
|
47
|
+
@output = ''
|
|
48
|
+
|
|
49
|
+
begin
|
|
50
|
+
a = opt.to_ary
|
|
51
|
+
rescue NameError #NoMethodError
|
|
52
|
+
# backward compatibility
|
|
53
|
+
a = Shellwords.shellwords(opt)
|
|
54
|
+
end
|
|
55
|
+
@options = [ '-Q', '-H', '-m', @format.to_s, *a ] # need -a ?
|
|
56
|
+
end
|
|
57
|
+
attr_accessor :program, :db, :options, :server, :ktup, :matrix
|
|
58
|
+
attr_reader :output
|
|
59
|
+
|
|
60
|
+
def option
|
|
61
|
+
# backward compatibility
|
|
62
|
+
make_command_line(@options)
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def option=(str)
|
|
66
|
+
# backward compatibility
|
|
67
|
+
@options = Shellwords.shellwords(str)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def format=(num)
|
|
71
|
+
@format = num.to_i
|
|
72
|
+
if i = @options.index('-m') then
|
|
73
|
+
@options[i+1, 1] = @format.to_s
|
|
74
|
+
else
|
|
75
|
+
@options << '-m' << @format.to_s
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
attr_reader :format
|
|
79
|
+
|
|
80
|
+
def self.parser(parser)
|
|
81
|
+
require "bio/appl/fasta/#{parser}"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def self.local(program, db, option = '')
|
|
85
|
+
self.new(program, db, option, 'local')
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def self.remote(program, db, option = '', server = 'genomenet')
|
|
89
|
+
self.new(program, db, option, server)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def query(query)
|
|
93
|
+
return self.send("exec_#{@server}", query.to_s)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
private
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def parse_result(data)
|
|
101
|
+
case @format
|
|
102
|
+
when 6
|
|
103
|
+
require 'bio/appl/fasta/format6'
|
|
104
|
+
when 10
|
|
105
|
+
require 'bio/appl/fasta/format10'
|
|
106
|
+
end
|
|
107
|
+
Report.new(data)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def exec_local(query)
|
|
112
|
+
cmd = [ @program, *@options ]
|
|
113
|
+
cmd.concat([ '@', @db, @ktup ])
|
|
114
|
+
|
|
115
|
+
report = nil
|
|
116
|
+
|
|
117
|
+
@output = call_command_local(cmd, query)
|
|
118
|
+
report = parse_result(@output)
|
|
119
|
+
|
|
120
|
+
return report
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def exec_genomenet(query)
|
|
125
|
+
host = "fasta.genome.jp"
|
|
126
|
+
#path = "/sit-bin/nph-fasta"
|
|
127
|
+
path = "/sit-bin/fasta" #2005.08.12
|
|
128
|
+
|
|
129
|
+
form = {
|
|
130
|
+
'style' => 'raw',
|
|
131
|
+
'prog' => @program,
|
|
132
|
+
'dbname' => @db,
|
|
133
|
+
'sequence' => CGI.escape(query),
|
|
134
|
+
'other_param' => CGI.escape(make_command_line_unix(@options)),
|
|
135
|
+
'ktup_value' => @ktup,
|
|
136
|
+
'matrix' => @matrix,
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
data = []
|
|
140
|
+
|
|
141
|
+
form.each do |k, v|
|
|
142
|
+
data.push("#{k}=#{v}") if v
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
report = nil
|
|
146
|
+
|
|
147
|
+
begin
|
|
148
|
+
http = Net::HTTP.new(host)
|
|
149
|
+
http.open_timeout = 300
|
|
150
|
+
http.read_timeout = 600
|
|
151
|
+
result, = http.post(path, data.join('&'))
|
|
152
|
+
@output = result.body
|
|
153
|
+
# workaround 2005.08.12
|
|
154
|
+
if /\<A +HREF=\"(http\:\/\/fasta\.genome\.jp(\/tmp\/[^\"]+))\"\>Show all result\<\/A\>/i =~ @output.to_s then
|
|
155
|
+
result, = http.get($2)
|
|
156
|
+
@output = result.body
|
|
157
|
+
txt = @output.to_s.split(/\<pre\>/)[1]
|
|
158
|
+
raise 'cannot understand response' unless txt
|
|
159
|
+
txt.sub!(/\<\/pre\>.*\z/m, '')
|
|
160
|
+
txt.sub!(/.*^((T?FASTA|SSEARCH) (searches|compares))/m, '\1')
|
|
161
|
+
txt.sub!(/^\<form method\=\"POST\" name\=\"clust_check\"\>.*\n/, '')
|
|
162
|
+
txt.gsub!(/\<input[^\>]+value\=\"[^\"]*\"[^\>]*\>/i, '')
|
|
163
|
+
txt.gsub!(/\<(a|form|select|input|option|img)\s+[^\>]+\>/i, '')
|
|
164
|
+
txt.gsub!(/\<\/(a|form|select|input|option|img)\>/i, '')
|
|
165
|
+
@output = txt.gsub(/\<\;/, '<')
|
|
166
|
+
report = parse_result(@output.dup)
|
|
167
|
+
else
|
|
168
|
+
raise 'cannot understand response'
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
return report
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
if __FILE__ == $0
|
|
181
|
+
begin
|
|
182
|
+
require 'pp'
|
|
183
|
+
alias p pp
|
|
184
|
+
rescue
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
# serv = Bio::Fasta.local('fasta34', 'hoge.nuc')
|
|
188
|
+
# serv = Bio::Fasta.local('fasta34', 'hoge.pep')
|
|
189
|
+
# serv = Bio::Fasta.local('ssearch34', 'hoge.pep')
|
|
190
|
+
serv = Bio::Fasta.remote('fasta', 'genes')
|
|
191
|
+
p serv.query(ARGF.read)
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
|
|
195
|
+
=begin
|
|
196
|
+
|
|
197
|
+
= Bio::Fasta
|
|
198
|
+
|
|
199
|
+
--- Bio::Fasta.new(program, db, option = '', server = 'local')
|
|
200
|
+
--- Bio::Fasta.local(program, db, option = '')
|
|
201
|
+
--- Bio::Fasta.remote(program, db, option = '', server = 'genomenet')
|
|
202
|
+
|
|
203
|
+
Returns a fasta factory object (Bio::Fasta).
|
|
204
|
+
|
|
205
|
+
For the develpper, you can add server 'hoge' by adding
|
|
206
|
+
exec_hoge(query) method.
|
|
207
|
+
|
|
208
|
+
--- Bio::Fasta#query(query)
|
|
209
|
+
|
|
210
|
+
Execute fasta search and returns Report object (Bio::Fasta::Report).
|
|
211
|
+
|
|
212
|
+
--- Bio::Fasta#output
|
|
213
|
+
|
|
214
|
+
Returns a String containing fasta execution output in as is format.
|
|
215
|
+
|
|
216
|
+
--- Bio::Fasta#program
|
|
217
|
+
--- Bio::Fasta#db
|
|
218
|
+
--- Bio::Fasta#options
|
|
219
|
+
--- Bio::Fasta#server
|
|
220
|
+
--- Bio::Fasta#ktup
|
|
221
|
+
|
|
222
|
+
Accessors for the factory parameters.
|
|
223
|
+
|
|
224
|
+
--- Bio::Fasta#option
|
|
225
|
+
--- Bio::Fasta#option=(str)
|
|
226
|
+
|
|
227
|
+
Get/set options by string.
|
|
228
|
+
|
|
229
|
+
--- Bio::Fasta#format
|
|
230
|
+
--- Bio::Fasta#format=(number)
|
|
231
|
+
|
|
232
|
+
Accessors for the -m option.
|
|
233
|
+
|
|
234
|
+
--- Bio::Fasta.parser(parser)
|
|
235
|
+
|
|
236
|
+
Import Bio::Fasta::Report class by requiring specified parser.
|
|
237
|
+
|
|
238
|
+
This class method will be useful when you already have fasta
|
|
239
|
+
output files and want to use appropriate Report class for parsing.
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
== Available databases for Fasta.remote(@program, @db, option, 'genomenet')
|
|
243
|
+
|
|
244
|
+
# ----------+-------+---------------------------------------------------
|
|
245
|
+
# @program | query | @db (supported in GenomeNet)
|
|
246
|
+
# ----------+-------+---------------------------------------------------
|
|
247
|
+
# fasta | AA | nr-aa, genes, vgenes.pep, swissprot, swissprot-upd,
|
|
248
|
+
# | | pir, prf, pdbstr
|
|
249
|
+
# +-------+---------------------------------------------------
|
|
250
|
+
# | NA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss,
|
|
251
|
+
# | | htgs, dbsts, embl-nonst, embnonst-upd, epd,
|
|
252
|
+
# | | genes-nt, genome, vgenes.nuc
|
|
253
|
+
# ----------+-------+---------------------------------------------------
|
|
254
|
+
# tfasta | AA | nr-nt, genbank-nonst, gbnonst-upd, dbest, dbgss,
|
|
255
|
+
# | | htgs, dbsts, embl-nonst, embnonst-upd,
|
|
256
|
+
# | | genes-nt, genome, vgenes.nuc
|
|
257
|
+
# ----------+-------+---------------------------------------------------
|
|
258
|
+
|
|
259
|
+
See http://fasta.genome.jp/ideas/ideas.html#fasta for more details.
|
|
260
|
+
|
|
261
|
+
=end
|
|
262
|
+
|