bio 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +56 -0
- data/COPYING.ja +51 -0
- data/ChangeLog +540 -0
- data/GPL +340 -0
- data/LEGAL +141 -0
- data/LGPL +504 -0
- data/README.rdoc +4 -2
- data/Rakefile +2 -2
- data/bioruby.gemspec +17 -29
- data/doc/Tutorial.rd +118 -90
- data/doc/Tutorial.rd.html +124 -87
- data/lib/bio/appl/blast.rb +2 -2
- data/lib/bio/appl/blast/format0.rb +1 -1
- data/lib/bio/appl/fasta.rb +5 -12
- data/lib/bio/appl/fasta/format10.rb +96 -6
- data/lib/bio/appl/gcg/msf.rb +11 -14
- data/lib/bio/appl/pts1.rb +0 -4
- data/lib/bio/appl/sim4/report.rb +50 -17
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +10 -0
- data/lib/bio/db/biosql/sequence.rb +234 -298
- data/lib/bio/db/embl/embl.rb +0 -3
- data/lib/bio/db/genbank/common.rb +3 -1
- data/lib/bio/io/biosql/ar-biosql.rb +257 -0
- data/lib/bio/io/biosql/biosql.rb +39 -0
- data/lib/bio/io/biosql/config/database.yml +5 -4
- data/lib/bio/io/ncbirest.rb +12 -5
- data/lib/bio/io/pubmed.rb +5 -1
- data/lib/bio/io/sql.rb +43 -150
- data/lib/bio/sequence/compat.rb +5 -1
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +6 -4
- data/lib/bio/version.rb +1 -1
- data/test/data/gcg/pileup-aa.msf +67 -0
- data/test/data/sim4/complement-A4.sim4 +43 -0
- data/test/data/sim4/simple-A4.sim4 +25 -0
- data/test/data/sim4/simple2-A4.sim4 +25 -0
- data/test/functional/bio/io/test_pubmed.rb +129 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -5
- data/test/unit/bio/appl/gcg/test_msf.rb +154 -0
- data/test/unit/bio/appl/hmmer/test_report.rb +2 -2
- data/test/unit/bio/appl/sim4/test_report.rb +869 -0
- data/test/unit/bio/appl/test_blast.rb +1 -1
- data/test/unit/bio/db/biosql/tc_biosql.rb +110 -0
- data/test/unit/bio/db/biosql/ts_suite_biosql.rb +8 -0
- data/test/unit/bio/test_feature.rb +18 -17
- data/test/unit/bio/test_reference.rb +18 -18
- data/test/unit/bio/test_sequence.rb +1 -1
- metadata +18 -30
- data/lib/bio/io/biosql/biodatabase.rb +0 -64
- data/lib/bio/io/biosql/bioentry.rb +0 -29
- data/lib/bio/io/biosql/bioentry_dbxref.rb +0 -11
- data/lib/bio/io/biosql/bioentry_path.rb +0 -12
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +0 -10
- data/lib/bio/io/biosql/bioentry_reference.rb +0 -10
- data/lib/bio/io/biosql/bioentry_relationship.rb +0 -10
- data/lib/bio/io/biosql/biosequence.rb +0 -11
- data/lib/bio/io/biosql/comment.rb +0 -7
- data/lib/bio/io/biosql/dbxref.rb +0 -13
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +0 -12
- data/lib/bio/io/biosql/location.rb +0 -32
- data/lib/bio/io/biosql/location_qualifier_value.rb +0 -11
- data/lib/bio/io/biosql/ontology.rb +0 -10
- data/lib/bio/io/biosql/reference.rb +0 -9
- data/lib/bio/io/biosql/seqfeature.rb +0 -32
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +0 -11
- data/lib/bio/io/biosql/seqfeature_path.rb +0 -11
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +0 -20
- data/lib/bio/io/biosql/seqfeature_relationship.rb +0 -11
- data/lib/bio/io/biosql/taxon.rb +0 -12
- data/lib/bio/io/biosql/taxon_name.rb +0 -9
- data/lib/bio/io/biosql/term.rb +0 -27
- data/lib/bio/io/biosql/term_dbxref.rb +0 -11
- data/lib/bio/io/biosql/term_path.rb +0 -12
- data/lib/bio/io/biosql/term_relationship.rb +0 -13
- data/lib/bio/io/biosql/term_relationship_term.rb +0 -11
- data/lib/bio/io/biosql/term_synonym.rb +0 -10
data/doc/Tutorial.rd.html
CHANGED
@@ -9,18 +9,17 @@
|
|
9
9
|
</head>
|
10
10
|
<body>
|
11
11
|
<h1><a name="label-0" id="label-0">BioRuby Tutorial</a></h1><!-- RDLabel: "BioRuby Tutorial" -->
|
12
|
-
<p>Editor: PjotrPrins <p .at. bioruby.org></p>
|
13
12
|
<ul>
|
14
13
|
<li>Copyright (C) 2001-2003 KATAYAMA Toshiaki <k .at. bioruby.org></li>
|
15
|
-
<li>Copyright (C) 2005-
|
14
|
+
<li>Copyright (C) 2005-2009 Pjotr Prins, Naohisa Goto and others</li>
|
16
15
|
</ul>
|
17
|
-
<p>
|
18
|
-
|
19
|
-
<p>
|
16
|
+
<p>This document was last modified: 2009/03/17
|
17
|
+
Current editor: Pjotr Prins <p .at. bioruby.org></p>
|
18
|
+
<p>The latest version resides in the GIT source code repository: ./doc/<a href="http://github.com/pjotrp/bioruby/raw/documentation/doc/Tutorial.rd">Tutorial.rd</a>.</p>
|
20
19
|
<h2><a name="label-1" id="label-1">Introduction</a></h2><!-- RDLabel: "Introduction" -->
|
21
20
|
<p>This is a tutorial for using Bioruby. A basic knowledge of Ruby is required.
|
22
21
|
If you want to know more about the programming langauge Ruby we recommend the
|
23
|
-
|
22
|
+
latest Ruby book <a href="http://www.pragprog.com/titles/ruby">Programming Ruby</a>
|
24
23
|
by Dave Thomas and Andy Hunt - some of it is online
|
25
24
|
<a href="http://www.rubycentral.com/pickaxe/">here</a>.</p>
|
26
25
|
<p>For BioRuby you need to install Ruby and the BioRuby package on your computer</p>
|
@@ -28,7 +27,7 @@ by Dave Thomas and Andy Hunt - some of it is online
|
|
28
27
|
version it has with the</p>
|
29
28
|
<pre>% ruby -v</pre>
|
30
29
|
<p>command. Showing something like:</p>
|
31
|
-
<pre>ruby 1.8.
|
30
|
+
<pre>ruby 1.8.7 (2008-08-11 patchlevel 72) [i486-linux]</pre>
|
32
31
|
<p>If you see no such thing you'll have to install Ruby using your installation
|
33
32
|
manager. For more information see the
|
34
33
|
<a href="http://www.ruby-lang.org/en/">Ruby</a> website.</p>
|
@@ -46,7 +45,8 @@ ruby -I lib bin/bioruby</pre>
|
|
46
45
|
<p>and you should see a prompt</p>
|
47
46
|
<pre>bioruby></pre>
|
48
47
|
<p>Now test the following:</p>
|
49
|
-
<pre>bioruby>
|
48
|
+
<pre>bioruby> require 'bio'
|
49
|
+
bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
|
50
50
|
==> "atgcatgcaaaa"
|
51
51
|
|
52
52
|
bioruby> seq.complement
|
@@ -131,29 +131,32 @@ specify positions smaller than or equal to 0 for either one of the "from" or
|
|
131
131
|
way of writing concise and clear code using 'closures'. Each sliding
|
132
132
|
window creates a subsequence which is supplied to the enclosed block
|
133
133
|
through a variable named +s+.</p>
|
134
|
-
<
|
134
|
+
<ul>
|
135
|
+
<li><p>Show average percentage of GC content for 20 bases (stepping the default one base at a time)</p>
|
135
136
|
<pre>bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa")
|
136
137
|
==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa"
|
137
138
|
|
138
139
|
bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent }
|
139
140
|
bioruby> a
|
140
|
-
==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]</pre>
|
141
|
+
==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]</pre></li>
|
142
|
+
</ul>
|
141
143
|
<p>Since the class of each subsequence is the same as original sequence
|
142
144
|
(Bio::Sequence::NA or Bio::Sequence::AA or Bio::Sequence), you can
|
143
145
|
use all methods on the subsequence. For example,</p>
|
144
|
-
<
|
146
|
+
<ul>
|
147
|
+
<li><p>Shows translation results for 15 bases shifting a codon at a time</p>
|
145
148
|
<pre>bioruby> a = []
|
146
|
-
bioruby> seq.window_search(15, 3)
|
147
|
-
bioruby> a.push s.translate
|
148
|
-
bioruby> end
|
149
|
+
bioruby> seq.window_search(15, 3) { | s | a.push s.translate }
|
149
150
|
bioruby> a
|
150
|
-
==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]</pre>
|
151
|
+
==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]</pre></li>
|
152
|
+
</ul>
|
151
153
|
<p>Finally, the window_search method returns the last leftover
|
152
154
|
subsequence. This allows for example</p>
|
153
|
-
<
|
154
|
-
|
155
|
-
|
156
|
-
the
|
155
|
+
<ul>
|
156
|
+
<li><p>Divide a genome sequence into sections of 10000bp and
|
157
|
+
output FASTA formatted sequences (line width 60 chars). The 1000bp at the
|
158
|
+
start and end of each subsequence overlapped. At the 3' end of the sequence
|
159
|
+
the leftover is also added:</p>
|
157
160
|
<pre>i = 1
|
158
161
|
textwidth=60
|
159
162
|
remainder = seq.window_search(10000, 9000) do |s|
|
@@ -162,24 +165,23 @@ remainder = seq.window_search(10000, 9000) do |s|
|
|
162
165
|
end
|
163
166
|
if remainder
|
164
167
|
puts remainder.to_fasta("segment #{i}", textwidth)
|
165
|
-
end</pre>
|
168
|
+
end</pre></li>
|
169
|
+
</ul>
|
166
170
|
<p>If you don't want the overlapping window, set window size and stepping
|
167
171
|
size to equal values.</p>
|
168
172
|
<p>Other examples</p>
|
169
|
-
<
|
173
|
+
<ul>
|
174
|
+
<li><p>Count the codon usage</p>
|
170
175
|
<pre>bioruby> codon_usage = Hash.new(0)
|
171
|
-
bioruby> seq.window_search(3, 3)
|
172
|
-
bioruby> codon_usage[s] += 1
|
173
|
-
bioruby> end
|
176
|
+
bioruby> seq.window_search(3, 3) { |s| codon_usage[s] += 1 }
|
174
177
|
bioruby> codon_usage
|
175
|
-
==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}</pre>
|
176
|
-
<p>Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)</p>
|
178
|
+
==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}</pre></li>
|
179
|
+
<li><p>Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)</p>
|
177
180
|
<pre>bioruby> a = []
|
178
|
-
bioruby> seq.window_search(10, 10)
|
179
|
-
bioruby> a.push s.molecular_weight
|
180
|
-
bioruby> end
|
181
|
+
bioruby> seq.window_search(10, 10) { |s| a.push s.molecular_weight }
|
181
182
|
bioruby> a
|
182
|
-
==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]</pre>
|
183
|
+
==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]</pre></li>
|
184
|
+
</ul>
|
183
185
|
<p>In most cases, sequences are read from files or retrieved from databases.
|
184
186
|
For example:</p>
|
185
187
|
<pre>require 'bio'
|
@@ -303,12 +305,14 @@ ff.each_entry do |gb|
|
|
303
305
|
puts hash['translation']
|
304
306
|
end
|
305
307
|
end</pre>
|
306
|
-
<
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
308
|
+
<ul>
|
309
|
+
<li>Note: In this example Feature#assoc method makes a Hash from a
|
310
|
+
feature object. It is useful because you can get data from the hash
|
311
|
+
by using qualifiers as keys.
|
312
|
+
(But there is a risk some information is lost when two or more
|
313
|
+
qualifiers are the same. Therefore an Array is returned by
|
314
|
+
Feature#feature)</li>
|
315
|
+
</ul>
|
312
316
|
<p>Bio::Sequence#splicing splices subsequence from nucleic acid sequence
|
313
317
|
according to location information used in GenBank, EMBL and DDBJ.</p>
|
314
318
|
<p>When the specified translation table is different from the default
|
@@ -318,15 +322,19 @@ contains selenocysteine, the two amino acid sequences will differ.</p>
|
|
318
322
|
feature style location text but also Bio::Locations object. For more
|
319
323
|
information about location format and Bio::Locations class, see
|
320
324
|
bio/location.rb.</p>
|
321
|
-
<
|
322
|
-
<
|
323
|
-
<
|
325
|
+
<ul>
|
326
|
+
<li><p>Splice according to location string used in a GenBank entry</p>
|
327
|
+
<pre>naseq.splicing('join(2035..2050,complement(1775..1818),13..345')</pre></li>
|
328
|
+
<li><p>Generate Bio::Locations object and pass the splicing method</p>
|
324
329
|
<pre>locs = Bio::Locations.new('join((8298.8300)..10206,1..855)')
|
325
|
-
naseq.splicing(locs)</pre>
|
330
|
+
naseq.splicing(locs)</pre></li>
|
331
|
+
</ul>
|
326
332
|
<p>You can also use the splicing method for amino acid sequences
|
327
333
|
(Bio::Sequence::AA objects).</p>
|
328
|
-
<
|
329
|
-
<
|
334
|
+
<ul>
|
335
|
+
<li><p>Splicing peptide from a protein (e.g. signal peptide)</p>
|
336
|
+
<pre>aaseq.splicing('21..119')</pre></li>
|
337
|
+
</ul>
|
330
338
|
<h3><a name="label-5" id="label-5">More databases</a></h3><!-- RDLabel: "More databases" -->
|
331
339
|
<p>Databases in BioRuby are essentially accessed like that of GenBank
|
332
340
|
with classes like Bio::GenBank, Bio::KEGG::GENES. A full list can be found in
|
@@ -384,23 +392,23 @@ bioruby> a = Bio::Alignment.new(seqs)
|
|
384
392
|
bioruby> a.consensus
|
385
393
|
==> "a?gc?"
|
386
394
|
# shows IUPAC consensus
|
387
|
-
a.consensus_iupac
|
388
|
-
|
395
|
+
p a.consensus_iupac # ==> "ahgcr"
|
396
|
+
|
389
397
|
# iterates over each seq
|
390
398
|
a.each { |x| p x }
|
391
|
-
# ==>
|
392
|
-
# "atgca"
|
393
|
-
# "aagca"
|
394
|
-
# "acgca"
|
395
|
-
# "acgcg"
|
399
|
+
# ==>
|
400
|
+
# "atgca"
|
401
|
+
# "aagca"
|
402
|
+
# "acgca"
|
403
|
+
# "acgcg"
|
396
404
|
# iterates over each site
|
397
405
|
a.each_site { |x| p x }
|
398
|
-
# ==>
|
399
|
-
# ["a", "a", "a", "a"]
|
400
|
-
# ["t", "a", "c", "c"]
|
401
|
-
# ["g", "g", "g", "g"]
|
402
|
-
# ["c", "c", "c", "c"]
|
403
|
-
# ["a", "a", "a", "g"]
|
406
|
+
# ==>
|
407
|
+
# ["a", "a", "a", "a"]
|
408
|
+
# ["t", "a", "c", "c"]
|
409
|
+
# ["g", "g", "g", "g"]
|
410
|
+
# ["c", "c", "c", "c"]
|
411
|
+
# ["a", "a", "a", "g"]
|
404
412
|
|
405
413
|
# doing alignment by using CLUSTAL W.
|
406
414
|
# clustalw command must be installed.
|
@@ -525,9 +533,9 @@ method of the factory object after the "query" method.</p>
|
|
525
533
|
puts factory.output</pre>
|
526
534
|
<h3><a name="label-10" id="label-10">using FASTA from a remote internet site</a></h3><!-- RDLabel: "using FASTA from a remote internet site" -->
|
527
535
|
<ul>
|
528
|
-
<li>Note: Currently, only GenomeNet (fasta.genome.jp) is
|
536
|
+
<li>Note: Currently, only GenomeNet (fasta.genome.jp) is
|
537
|
+
supported. check the class documentation for updates.</li>
|
529
538
|
</ul>
|
530
|
-
<p>supported. check the class documentation for updates.</p>
|
531
539
|
<p>For accessing a remote site the Bio::Fasta.remote method is used
|
532
540
|
instead of Bio::Fasta.local. When using a remote method, the
|
533
541
|
databases available may be limited, but, otherwise, you can do the
|
@@ -625,7 +633,7 @@ are extracted from the first Hsp (High-scoring Segment Pair).</p>
|
|
625
633
|
retrieved. For now suffice to state that Bio::Blast::Report has a
|
626
634
|
hierarchical structure mirroring the general BLAST output stream:</p>
|
627
635
|
<ul>
|
628
|
-
<li>In a Bio::Blast::Report object, @
|
636
|
+
<li>In a Bio::Blast::Report object, @iterations is an array of
|
629
637
|
Bio::Blast::Report::Iteration objects.
|
630
638
|
<ul>
|
631
639
|
<li>In a Bio::Blast::Report::Iteration object, @hits is an array of
|
@@ -642,24 +650,38 @@ hierarchical structure mirroring the general BLAST output stream:</p>
|
|
642
650
|
you can directly create Bio::Blast::Report objects without the
|
643
651
|
Bio::Blast factory object. For this purpose use Bio::Blast.reports,
|
644
652
|
which supports the "-m 0" default and "-m 7" XML type output format.</p>
|
645
|
-
<
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
653
|
+
<ul>
|
654
|
+
<li><p>For example: </p>
|
655
|
+
<pre>bioruby> blast_version = nil; result = []
|
656
|
+
bioruby> Bio::Blast.reports(File.new("../test/data/blast/blastp-multi.m7")) do |report|
|
657
|
+
bioruby> blast_version = report.version
|
658
|
+
bioruby> report.iterations.each do |itr|
|
659
|
+
bioruby> itr.hits.each do |hit|
|
660
|
+
bioruby> result.push hit.target_id
|
661
|
+
bioruby> end
|
662
|
+
bioruby> end
|
663
|
+
bioruby> end
|
664
|
+
bioruby> blast_version
|
665
|
+
==> "blastp 2.2.18 [Mar-02-2008]"
|
666
|
+
bioruby> result
|
667
|
+
==> ["BAB38768", "BAB38768", "BAB38769", "BAB37741"]</pre></li>
|
668
|
+
<li><p>another example:</p>
|
669
|
+
<pre>require 'bio'
|
670
|
+
Bio::Blast.reports(ARGF) do |report|
|
652
671
|
puts "Hits for " + report.query_def + " against " + report.db
|
653
672
|
report.each do |hit|
|
654
673
|
print hit.target_id, "\t", hit.evalue, "\n" if hit.evalue < 0.001
|
655
674
|
end
|
656
|
-
end</pre>
|
675
|
+
end</pre></li>
|
676
|
+
</ul>
|
657
677
|
<p>Save the script as hits_under_0.001.rb and to process BLAST output
|
658
|
-
files *.xml, you can
|
678
|
+
files *.xml, you can run it with:</p>
|
659
679
|
<pre>% ruby hits_under_0.001.rb *.xml</pre>
|
660
|
-
<p>Sometimes BLAST XML output may be wrong and can not be parsed.
|
661
|
-
|
662
|
-
the
|
680
|
+
<p>Sometimes BLAST XML output may be wrong and can not be parsed. Check whether
|
681
|
+
blast is version 2.2.5 or later. See also blast --help. </p>
|
682
|
+
<p>Bio::Blast loads the full XML file into memory. If this causes a problem
|
683
|
+
you can split the BLAST XML file into smaller chunks using XML-Twig. An
|
684
|
+
example can be found in <a href="http://github.com/pjotrp/biotools/">Biotools</a>.</p>
|
663
685
|
<h3><a name="label-13" id="label-13">Add remote BLAST search sites</a></h3><!-- RDLabel: "Add remote BLAST search sites" -->
|
664
686
|
<pre>Note: this section is an advanced topic</pre>
|
665
687
|
<p>Here a more advanced application for using BLAST sequence homology
|
@@ -678,11 +700,7 @@ Bio::Blast::Report.new(or Bio::Blast::Default::Report.new):</p>
|
|
678
700
|
they may be included.</p>
|
679
701
|
<h2><a name="label-14" id="label-14">Generate a reference list using PubMed (Bio::PubMed)</a></h2><!-- RDLabel: "Generate a reference list using PubMed (Bio::PubMed)" -->
|
680
702
|
<p>Below script is an example which seaches PubMed and creates a reference list.</p>
|
681
|
-
<pre
|
682
|
-
|
683
|
-
require 'bio'
|
684
|
-
|
685
|
-
ARGV.each do |id|
|
703
|
+
<pre>ARGV.each do |id|
|
686
704
|
entry = Bio::PubMed.query(id) # searches PubMed and get entry
|
687
705
|
medline = Bio::MEDLINE.new(entry) # creates Bio::MEDLINE object from entry text
|
688
706
|
reference = medline.reference # converts into Bio::Reference object
|
@@ -818,9 +836,6 @@ BioRuby and other projects' members (2002).</p>
|
|
818
836
|
</ul>
|
819
837
|
<p>Here we give a quick overview. Check out
|
820
838
|
<a href="http://obda.open-bio.org/"><URL:http://obda.open-bio.org/></a> for more extensive details.</p>
|
821
|
-
<p>The specification is stored on CVS repository at cvs.open-bio.org,
|
822
|
-
also available via http from:
|
823
|
-
<a href="http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common"><URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common></a></p>
|
824
839
|
<h2><a name="label-18" id="label-18">BioRegistry</a></h2><!-- RDLabel: "BioRegistry" -->
|
825
840
|
<p>BioRegistry allows for locating retrieval methods and database
|
826
841
|
locations through configuration files. The priorities are</p>
|
@@ -1000,13 +1015,35 @@ rtags -R --vi</pre>
|
|
1000
1015
|
<ul>
|
1001
1016
|
<li><a href="http://www.genome.jp/kegg/soap/"><URL:http://www.genome.jp/kegg/soap/></a></li>
|
1002
1017
|
</ul>
|
1003
|
-
<h2><a name="label-30" id="label-30">
|
1018
|
+
<h2><a name="label-30" id="label-30">Ruby Ensembl API</a></h2><!-- RDLabel: "Ruby Ensembl API" -->
|
1019
|
+
<p>Ruby Ensembl API is a ruby API to the Ensembl database. It is NOT currently
|
1020
|
+
included in the BioRuby archives. To install it, see
|
1021
|
+
<a href="http://wiki.github.com/jandot/ruby-ensembl-api"><URL:http://wiki.github.com/jandot/ruby-ensembl-api></a>
|
1022
|
+
for more information.</p>
|
1023
|
+
<h3><a name="label-31" id="label-31">Gene Ontology (GO) through the Ruby Ensembl API</a></h3><!-- RDLabel: "Gene Ontology (GO) through the Ruby Ensembl API" -->
|
1024
|
+
<p>Gene Ontologies can be fetched through the Ruby Ensembl API package:</p>
|
1025
|
+
<pre>require 'ensembl'
|
1026
|
+
Ensembl::Core::DBConnection.connect('drosophila_melanogaster')
|
1027
|
+
infile = IO.readlines(ARGV.shift) # reading your comma-separated accession mapping file (one line per mapping)
|
1028
|
+
infile.each do |line|
|
1029
|
+
accs = line.split(",") # Split the comma-sep.entries into an array
|
1030
|
+
drosphila_acc = accs.shift # the first entry is the Drosophila acc
|
1031
|
+
mosq_acc = accs.shift # the second entry is you Mosq. acc
|
1032
|
+
gene = Ensembl::Core::Gene.find_by_stable_id(drosophila_acc)
|
1033
|
+
print "#{mosq_acc}"
|
1034
|
+
gene.go_terms.each do |go|
|
1035
|
+
print ",#{go}"
|
1036
|
+
end
|
1037
|
+
end</pre>
|
1038
|
+
<p>Prints each mosq. accession/uniq identifier and the GO terms from the Drosphila
|
1039
|
+
homologues.</p>
|
1040
|
+
<h2><a name="label-32" id="label-32">Comparing BioProjects</a></h2><!-- RDLabel: "Comparing BioProjects" -->
|
1004
1041
|
<p>For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see <a href="http://sciruby.codeforpeople.com/sr.cgi/BioProjects"><URL:http://sciruby.codeforpeople.com/sr.cgi/BioProjects></a></p>
|
1005
|
-
<h2><a name="label-
|
1042
|
+
<h2><a name="label-33" id="label-33">Using BioRuby with R</a></h2><!-- RDLabel: "Using BioRuby with R" -->
|
1006
1043
|
<p>Using Ruby with R Pjotr wrote a section on SciRuby. See <a href="http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang"><URL:http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang></a></p>
|
1007
|
-
<h2><a name="label-
|
1044
|
+
<h2><a name="label-34" id="label-34">Using BioPerl or BioPython from Ruby</a></h2><!-- RDLabel: "Using BioPerl or BioPython from Ruby" -->
|
1008
1045
|
<p>At the moment there is no easy way of accessing BioPerl from Ruby. The best way, perhaps, is to create a Perl server that gets accessed through XML/RPC or SOAP.</p>
|
1009
|
-
<h2><a name="label-
|
1046
|
+
<h2><a name="label-35" id="label-35">Installing required external library</a></h2><!-- RDLabel: "Installing required external library" -->
|
1010
1047
|
<p>At this point for using BioRuby no additional libraries are needed.
|
1011
1048
|
This may change, so keep an eye on the Bioruby website. Also when
|
1012
1049
|
a package is missing BioRuby should show an informative message.</p>
|
@@ -1014,17 +1051,17 @@ a package is missing BioRuby should show an informative message.</p>
|
|
1014
1051
|
painful, as the gem standard for packages evolved late and some still
|
1015
1052
|
force you to copy things by hand. Therefore read the README's
|
1016
1053
|
carefully that come with each package.</p>
|
1017
|
-
<h2><a name="label-
|
1054
|
+
<h2><a name="label-36" id="label-36">Trouble shooting</a></h2><!-- RDLabel: "Trouble shooting" -->
|
1018
1055
|
<ul>
|
1019
1056
|
<li>Error: in `require': no such file to load -- bio (LoadError)</li>
|
1020
1057
|
</ul>
|
1021
1058
|
<p>Ruby fails to find the BioRuby libraries - add it to the RUBYLIB path, or pass
|
1022
1059
|
it to the interpeter. For example:</p>
|
1023
|
-
<pre>ruby -I
|
1024
|
-
<h2><a name="label-
|
1025
|
-
<p>IMPORTANT NOTICE: This page is maintained in the BioRuby
|
1060
|
+
<pre>ruby -I$BIORUBYPATH/lib yourprogram.rb</pre>
|
1061
|
+
<h2><a name="label-37" id="label-37">Modifying this page</a></h2><!-- RDLabel: "Modifying this page" -->
|
1062
|
+
<p>IMPORTANT NOTICE: This page is maintained in the BioRuby source code
|
1026
1063
|
repository. Please edit the file there otherwise changes may get
|
1027
|
-
lost. See <!-- Reference, RDLabel "BioRuby Developer Information" doesn't exist --><em class="label-not-found">BioRuby Developer Information</em><!-- Reference end --> for
|
1064
|
+
lost. See <!-- Reference, RDLabel "BioRuby Developer Information" doesn't exist --><em class="label-not-found">BioRuby Developer Information</em><!-- Reference end --> for repository and mailing list
|
1028
1065
|
access.</p>
|
1029
1066
|
|
1030
1067
|
</body>
|
data/lib/bio/appl/blast.rb
CHANGED
@@ -257,7 +257,7 @@ module Bio
|
|
257
257
|
end
|
258
258
|
|
259
259
|
# Server to submit the BLASTs to
|
260
|
-
|
260
|
+
attr_reader :server
|
261
261
|
|
262
262
|
# Sets server to submit the BLASTs to.
|
263
263
|
# The exec_xxxx method should be defined in Bio::Blast or
|
@@ -399,7 +399,7 @@ module Bio
|
|
399
399
|
if fmt = ncbiopt.get('-m') then
|
400
400
|
@format = fmt.to_i
|
401
401
|
else
|
402
|
-
Bio::Blast::Report #dummy to load XMLParser or REXML
|
402
|
+
dummy = Bio::Blast::Report #dummy to load XMLParser or REXML
|
403
403
|
if defined?(XMLParser) or defined?(REXML)
|
404
404
|
@format ||= 7
|
405
405
|
else
|
@@ -1218,7 +1218,7 @@ module Bio
|
|
1218
1218
|
method_after_parse_alignment :query_from
|
1219
1219
|
|
1220
1220
|
# end position of the query (including its position)
|
1221
|
-
attr_reader :query_to
|
1221
|
+
attr_reader :query_to if false #dummy
|
1222
1222
|
method_after_parse_alignment :query_to
|
1223
1223
|
|
1224
1224
|
# start position of the hit (the first position is 1)
|
data/lib/bio/appl/fasta.rb
CHANGED
@@ -16,7 +16,7 @@ module Bio
|
|
16
16
|
|
17
17
|
class Fasta
|
18
18
|
|
19
|
-
|
19
|
+
autoload :Report, 'bio/appl/fasta/format10'
|
20
20
|
#autoload :?????, 'bio/appl/fasta/format6'
|
21
21
|
|
22
22
|
# Returns a FASTA factory object (Bio::Fasta).
|
@@ -66,14 +66,13 @@ class Fasta
|
|
66
66
|
end
|
67
67
|
attr_reader :format
|
68
68
|
|
69
|
-
#
|
69
|
+
# OBSOLETE. Does nothing and shows warning messages.
|
70
70
|
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
# want to use appropriate Report class for parsing.
|
71
|
+
# Historically, selecting parser to use ('format6' or 'format10' were
|
72
|
+
# expected, but only 'format10' was available as a working parser).
|
74
73
|
#
|
75
74
|
def self.parser(parser)
|
76
|
-
|
75
|
+
warn 'Bio::Fasta.parser is obsoleted and will soon be removed.'
|
77
76
|
end
|
78
77
|
|
79
78
|
# Returns a FASTA factory object (Bio::Fasta) to run FASTA search on
|
@@ -102,12 +101,6 @@ class Fasta
|
|
102
101
|
|
103
102
|
|
104
103
|
def parse_result(data)
|
105
|
-
case @format
|
106
|
-
when 6
|
107
|
-
require 'bio/appl/fasta/format6'
|
108
|
-
when 10
|
109
|
-
require 'bio/appl/fasta/format10'
|
110
|
-
end
|
111
104
|
Report.new(data)
|
112
105
|
end
|
113
106
|
|
@@ -4,10 +4,11 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2002 Toshiaki Katayama <k@bioruby.org>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
-
# $Id
|
7
|
+
# $Id:$
|
8
8
|
#
|
9
9
|
|
10
10
|
require 'bio/appl/fasta'
|
11
|
+
require 'bio/io/flatfile/splitter'
|
11
12
|
|
12
13
|
module Bio
|
13
14
|
class Fasta
|
@@ -15,14 +16,94 @@ class Fasta
|
|
15
16
|
# Summarized results of the fasta execution results.
|
16
17
|
class Report
|
17
18
|
|
19
|
+
# Splitter for Bio::FlatFile
|
20
|
+
class FastaFormat10Splitter < Bio::FlatFile::Splitter::Template
|
21
|
+
|
22
|
+
# creates a new splitter object
|
23
|
+
def initialize(klass, bstream)
|
24
|
+
super(klass, bstream)
|
25
|
+
@delimiter = '>>>'
|
26
|
+
@real_delimiter = /^\s*\d+\>\>\>\z/
|
27
|
+
end
|
28
|
+
|
29
|
+
# do nothing and returns nil
|
30
|
+
def skip_leader
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
|
34
|
+
# gets an entry
|
35
|
+
def get_entry
|
36
|
+
p0 = stream_pos()
|
37
|
+
pieces = []
|
38
|
+
overrun = nil
|
39
|
+
first = true
|
40
|
+
while e = stream.gets(@delimiter)
|
41
|
+
pieces.push e
|
42
|
+
if @real_delimiter =~ e then
|
43
|
+
if first then
|
44
|
+
first = nil
|
45
|
+
else
|
46
|
+
overrun = $&
|
47
|
+
break
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
ent = (pieces.empty? ? nil : pieces.join(''))
|
53
|
+
if ent and overrun then
|
54
|
+
ent[-overrun.length, overrun.length] = ''
|
55
|
+
stream.ungets(overrun)
|
56
|
+
end
|
57
|
+
|
58
|
+
p1 = stream_pos()
|
59
|
+
self.entry_start_pos = p0
|
60
|
+
self.entry = ent
|
61
|
+
self.entry_ended_pos = p1
|
62
|
+
return ent
|
63
|
+
end
|
64
|
+
end #FastaFormat10Splitter
|
65
|
+
|
66
|
+
# Splitter for Bio::FlatFile
|
67
|
+
FLATFILE_SPLITTER = FastaFormat10Splitter
|
68
|
+
|
18
69
|
def initialize(data)
|
70
|
+
# Split outputs containing multiple query sequences' results
|
71
|
+
chunks = data.split(/^(\s*\d+\>\>\>.*)/, 3)
|
72
|
+
if chunks.size >= 3 then
|
73
|
+
if chunks[0].strip.empty? then
|
74
|
+
qdef_line = chunks[1]
|
75
|
+
data = chunks[1..2].join('')
|
76
|
+
overruns = chunks[3..-1]
|
77
|
+
elsif /^\>\>\>/ =~ chunks[0] then
|
78
|
+
qdef_line = nil
|
79
|
+
data = chunks.shift
|
80
|
+
overruns = chunks
|
81
|
+
else
|
82
|
+
qdef_line = chunks[1]
|
83
|
+
data = chunks[0..2].join('')
|
84
|
+
overruns = chunks[3..-1]
|
85
|
+
end
|
86
|
+
@entry_overrun = overruns.join('')
|
87
|
+
if qdef_line and
|
88
|
+
/^ *\d+\>\>\>([^ ]+) .+ \- +(\d+) +(nt|aa)\s*$/ =~ qdef_line then
|
89
|
+
@query_def = $1
|
90
|
+
@query_len = $2.to_i
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
19
94
|
# header lines - brief list of the hits
|
20
|
-
if data.
|
95
|
+
if list_start = data.index("\nThe best scores are") then
|
96
|
+
data = data[(list_start + 1)..-1]
|
21
97
|
data.sub!(/(.*)\n\n>>>/m, '')
|
22
|
-
@list =
|
98
|
+
@list = $1
|
23
99
|
else
|
24
|
-
data.
|
25
|
-
|
100
|
+
if list_start = data.index(/\n!!\s+/) then
|
101
|
+
data = data[list_start..-1]
|
102
|
+
data.sub!(/\n!!\s+/, '')
|
103
|
+
data.sub!(/.*/) { |x| @list = x; '' }
|
104
|
+
else
|
105
|
+
data = data.sub(/.*/) { |x| @list = x; '' }
|
106
|
+
end
|
26
107
|
end
|
27
108
|
|
28
109
|
# body lines - fasta execution result
|
@@ -41,7 +122,16 @@ class Report
|
|
41
122
|
@hits.push(Hit.new(x))
|
42
123
|
end
|
43
124
|
end
|
44
|
-
|
125
|
+
|
126
|
+
# piece of next entry. Bio::FlatFile uses it.
|
127
|
+
attr_reader :entry_overrun
|
128
|
+
|
129
|
+
# Query definition. For older reports, the value may be nil.
|
130
|
+
attr_reader :query_def
|
131
|
+
|
132
|
+
# Query sequence length. For older reports, the value may be nil.
|
133
|
+
attr_reader :query_len
|
134
|
+
|
45
135
|
# Returns the 'The best scores are' lines as a String.
|
46
136
|
attr_reader :list
|
47
137
|
|