bio 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +56 -0
- data/COPYING.ja +51 -0
- data/ChangeLog +540 -0
- data/GPL +340 -0
- data/LEGAL +141 -0
- data/LGPL +504 -0
- data/README.rdoc +4 -2
- data/Rakefile +2 -2
- data/bioruby.gemspec +17 -29
- data/doc/Tutorial.rd +118 -90
- data/doc/Tutorial.rd.html +124 -87
- data/lib/bio/appl/blast.rb +2 -2
- data/lib/bio/appl/blast/format0.rb +1 -1
- data/lib/bio/appl/fasta.rb +5 -12
- data/lib/bio/appl/fasta/format10.rb +96 -6
- data/lib/bio/appl/gcg/msf.rb +11 -14
- data/lib/bio/appl/pts1.rb +0 -4
- data/lib/bio/appl/sim4/report.rb +50 -17
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +10 -0
- data/lib/bio/db/biosql/sequence.rb +234 -298
- data/lib/bio/db/embl/embl.rb +0 -3
- data/lib/bio/db/genbank/common.rb +3 -1
- data/lib/bio/io/biosql/ar-biosql.rb +257 -0
- data/lib/bio/io/biosql/biosql.rb +39 -0
- data/lib/bio/io/biosql/config/database.yml +5 -4
- data/lib/bio/io/ncbirest.rb +12 -5
- data/lib/bio/io/pubmed.rb +5 -1
- data/lib/bio/io/sql.rb +43 -150
- data/lib/bio/sequence/compat.rb +5 -1
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +6 -4
- data/lib/bio/version.rb +1 -1
- data/test/data/gcg/pileup-aa.msf +67 -0
- data/test/data/sim4/complement-A4.sim4 +43 -0
- data/test/data/sim4/simple-A4.sim4 +25 -0
- data/test/data/sim4/simple2-A4.sim4 +25 -0
- data/test/functional/bio/io/test_pubmed.rb +129 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -5
- data/test/unit/bio/appl/gcg/test_msf.rb +154 -0
- data/test/unit/bio/appl/hmmer/test_report.rb +2 -2
- data/test/unit/bio/appl/sim4/test_report.rb +869 -0
- data/test/unit/bio/appl/test_blast.rb +1 -1
- data/test/unit/bio/db/biosql/tc_biosql.rb +110 -0
- data/test/unit/bio/db/biosql/ts_suite_biosql.rb +8 -0
- data/test/unit/bio/test_feature.rb +18 -17
- data/test/unit/bio/test_reference.rb +18 -18
- data/test/unit/bio/test_sequence.rb +1 -1
- metadata +18 -30
- data/lib/bio/io/biosql/biodatabase.rb +0 -64
- data/lib/bio/io/biosql/bioentry.rb +0 -29
- data/lib/bio/io/biosql/bioentry_dbxref.rb +0 -11
- data/lib/bio/io/biosql/bioentry_path.rb +0 -12
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +0 -10
- data/lib/bio/io/biosql/bioentry_reference.rb +0 -10
- data/lib/bio/io/biosql/bioentry_relationship.rb +0 -10
- data/lib/bio/io/biosql/biosequence.rb +0 -11
- data/lib/bio/io/biosql/comment.rb +0 -7
- data/lib/bio/io/biosql/dbxref.rb +0 -13
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +0 -12
- data/lib/bio/io/biosql/location.rb +0 -32
- data/lib/bio/io/biosql/location_qualifier_value.rb +0 -11
- data/lib/bio/io/biosql/ontology.rb +0 -10
- data/lib/bio/io/biosql/reference.rb +0 -9
- data/lib/bio/io/biosql/seqfeature.rb +0 -32
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +0 -11
- data/lib/bio/io/biosql/seqfeature_path.rb +0 -11
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +0 -20
- data/lib/bio/io/biosql/seqfeature_relationship.rb +0 -11
- data/lib/bio/io/biosql/taxon.rb +0 -12
- data/lib/bio/io/biosql/taxon_name.rb +0 -9
- data/lib/bio/io/biosql/term.rb +0 -27
- data/lib/bio/io/biosql/term_dbxref.rb +0 -11
- data/lib/bio/io/biosql/term_path.rb +0 -12
- data/lib/bio/io/biosql/term_relationship.rb +0 -13
- data/lib/bio/io/biosql/term_relationship_term.rb +0 -11
- data/lib/bio/io/biosql/term_synonym.rb +0 -10
data/doc/Tutorial.rd.html
CHANGED
@@ -9,18 +9,17 @@
|
|
9
9
|
</head>
|
10
10
|
<body>
|
11
11
|
<h1><a name="label-0" id="label-0">BioRuby Tutorial</a></h1><!-- RDLabel: "BioRuby Tutorial" -->
|
12
|
-
<p>Editor: PjotrPrins <p .at. bioruby.org></p>
|
13
12
|
<ul>
|
14
13
|
<li>Copyright (C) 2001-2003 KATAYAMA Toshiaki <k .at. bioruby.org></li>
|
15
|
-
<li>Copyright (C) 2005-
|
14
|
+
<li>Copyright (C) 2005-2009 Pjotr Prins, Naohisa Goto and others</li>
|
16
15
|
</ul>
|
17
|
-
<p>
|
18
|
-
|
19
|
-
<p>
|
16
|
+
<p>This document was last modified: 2009/03/17
|
17
|
+
Current editor: Pjotr Prins <p .at. bioruby.org></p>
|
18
|
+
<p>The latest version resides in the GIT source code repository: ./doc/<a href="http://github.com/pjotrp/bioruby/raw/documentation/doc/Tutorial.rd">Tutorial.rd</a>.</p>
|
20
19
|
<h2><a name="label-1" id="label-1">Introduction</a></h2><!-- RDLabel: "Introduction" -->
|
21
20
|
<p>This is a tutorial for using Bioruby. A basic knowledge of Ruby is required.
|
22
21
|
If you want to know more about the programming langauge Ruby we recommend the
|
23
|
-
|
22
|
+
latest Ruby book <a href="http://www.pragprog.com/titles/ruby">Programming Ruby</a>
|
24
23
|
by Dave Thomas and Andy Hunt - some of it is online
|
25
24
|
<a href="http://www.rubycentral.com/pickaxe/">here</a>.</p>
|
26
25
|
<p>For BioRuby you need to install Ruby and the BioRuby package on your computer</p>
|
@@ -28,7 +27,7 @@ by Dave Thomas and Andy Hunt - some of it is online
|
|
28
27
|
version it has with the</p>
|
29
28
|
<pre>% ruby -v</pre>
|
30
29
|
<p>command. Showing something like:</p>
|
31
|
-
<pre>ruby 1.8.
|
30
|
+
<pre>ruby 1.8.7 (2008-08-11 patchlevel 72) [i486-linux]</pre>
|
32
31
|
<p>If you see no such thing you'll have to install Ruby using your installation
|
33
32
|
manager. For more information see the
|
34
33
|
<a href="http://www.ruby-lang.org/en/">Ruby</a> website.</p>
|
@@ -46,7 +45,8 @@ ruby -I lib bin/bioruby</pre>
|
|
46
45
|
<p>and you should see a prompt</p>
|
47
46
|
<pre>bioruby></pre>
|
48
47
|
<p>Now test the following:</p>
|
49
|
-
<pre>bioruby>
|
48
|
+
<pre>bioruby> require 'bio'
|
49
|
+
bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
|
50
50
|
==> "atgcatgcaaaa"
|
51
51
|
|
52
52
|
bioruby> seq.complement
|
@@ -131,29 +131,32 @@ specify positions smaller than or equal to 0 for either one of the "from" or
|
|
131
131
|
way of writing concise and clear code using 'closures'. Each sliding
|
132
132
|
window creates a subsequence which is supplied to the enclosed block
|
133
133
|
through a variable named +s+.</p>
|
134
|
-
<
|
134
|
+
<ul>
|
135
|
+
<li><p>Show average percentage of GC content for 20 bases (stepping the default one base at a time)</p>
|
135
136
|
<pre>bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa")
|
136
137
|
==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa"
|
137
138
|
|
138
139
|
bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent }
|
139
140
|
bioruby> a
|
140
|
-
==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]</pre>
|
141
|
+
==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]</pre></li>
|
142
|
+
</ul>
|
141
143
|
<p>Since the class of each subsequence is the same as original sequence
|
142
144
|
(Bio::Sequence::NA or Bio::Sequence::AA or Bio::Sequence), you can
|
143
145
|
use all methods on the subsequence. For example,</p>
|
144
|
-
<
|
146
|
+
<ul>
|
147
|
+
<li><p>Shows translation results for 15 bases shifting a codon at a time</p>
|
145
148
|
<pre>bioruby> a = []
|
146
|
-
bioruby> seq.window_search(15, 3)
|
147
|
-
bioruby> a.push s.translate
|
148
|
-
bioruby> end
|
149
|
+
bioruby> seq.window_search(15, 3) { | s | a.push s.translate }
|
149
150
|
bioruby> a
|
150
|
-
==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]</pre>
|
151
|
+
==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]</pre></li>
|
152
|
+
</ul>
|
151
153
|
<p>Finally, the window_search method returns the last leftover
|
152
154
|
subsequence. This allows for example</p>
|
153
|
-
<
|
154
|
-
|
155
|
-
|
156
|
-
the
|
155
|
+
<ul>
|
156
|
+
<li><p>Divide a genome sequence into sections of 10000bp and
|
157
|
+
output FASTA formatted sequences (line width 60 chars). The 1000bp at the
|
158
|
+
start and end of each subsequence overlapped. At the 3' end of the sequence
|
159
|
+
the leftover is also added:</p>
|
157
160
|
<pre>i = 1
|
158
161
|
textwidth=60
|
159
162
|
remainder = seq.window_search(10000, 9000) do |s|
|
@@ -162,24 +165,23 @@ remainder = seq.window_search(10000, 9000) do |s|
|
|
162
165
|
end
|
163
166
|
if remainder
|
164
167
|
puts remainder.to_fasta("segment #{i}", textwidth)
|
165
|
-
end</pre>
|
168
|
+
end</pre></li>
|
169
|
+
</ul>
|
166
170
|
<p>If you don't want the overlapping window, set window size and stepping
|
167
171
|
size to equal values.</p>
|
168
172
|
<p>Other examples</p>
|
169
|
-
<
|
173
|
+
<ul>
|
174
|
+
<li><p>Count the codon usage</p>
|
170
175
|
<pre>bioruby> codon_usage = Hash.new(0)
|
171
|
-
bioruby> seq.window_search(3, 3)
|
172
|
-
bioruby> codon_usage[s] += 1
|
173
|
-
bioruby> end
|
176
|
+
bioruby> seq.window_search(3, 3) { |s| codon_usage[s] += 1 }
|
174
177
|
bioruby> codon_usage
|
175
|
-
==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}</pre>
|
176
|
-
<p>Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)</p>
|
178
|
+
==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}</pre></li>
|
179
|
+
<li><p>Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)</p>
|
177
180
|
<pre>bioruby> a = []
|
178
|
-
bioruby> seq.window_search(10, 10)
|
179
|
-
bioruby> a.push s.molecular_weight
|
180
|
-
bioruby> end
|
181
|
+
bioruby> seq.window_search(10, 10) { |s| a.push s.molecular_weight }
|
181
182
|
bioruby> a
|
182
|
-
==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]</pre>
|
183
|
+
==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]</pre></li>
|
184
|
+
</ul>
|
183
185
|
<p>In most cases, sequences are read from files or retrieved from databases.
|
184
186
|
For example:</p>
|
185
187
|
<pre>require 'bio'
|
@@ -303,12 +305,14 @@ ff.each_entry do |gb|
|
|
303
305
|
puts hash['translation']
|
304
306
|
end
|
305
307
|
end</pre>
|
306
|
-
<
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
308
|
+
<ul>
|
309
|
+
<li>Note: In this example Feature#assoc method makes a Hash from a
|
310
|
+
feature object. It is useful because you can get data from the hash
|
311
|
+
by using qualifiers as keys.
|
312
|
+
(But there is a risk some information is lost when two or more
|
313
|
+
qualifiers are the same. Therefore an Array is returned by
|
314
|
+
Feature#feature)</li>
|
315
|
+
</ul>
|
312
316
|
<p>Bio::Sequence#splicing splices subsequence from nucleic acid sequence
|
313
317
|
according to location information used in GenBank, EMBL and DDBJ.</p>
|
314
318
|
<p>When the specified translation table is different from the default
|
@@ -318,15 +322,19 @@ contains selenocysteine, the two amino acid sequences will differ.</p>
|
|
318
322
|
feature style location text but also Bio::Locations object. For more
|
319
323
|
information about location format and Bio::Locations class, see
|
320
324
|
bio/location.rb.</p>
|
321
|
-
<
|
322
|
-
<
|
323
|
-
<
|
325
|
+
<ul>
|
326
|
+
<li><p>Splice according to location string used in a GenBank entry</p>
|
327
|
+
<pre>naseq.splicing('join(2035..2050,complement(1775..1818),13..345')</pre></li>
|
328
|
+
<li><p>Generate Bio::Locations object and pass the splicing method</p>
|
324
329
|
<pre>locs = Bio::Locations.new('join((8298.8300)..10206,1..855)')
|
325
|
-
naseq.splicing(locs)</pre>
|
330
|
+
naseq.splicing(locs)</pre></li>
|
331
|
+
</ul>
|
326
332
|
<p>You can also use the splicing method for amino acid sequences
|
327
333
|
(Bio::Sequence::AA objects).</p>
|
328
|
-
<
|
329
|
-
<
|
334
|
+
<ul>
|
335
|
+
<li><p>Splicing peptide from a protein (e.g. signal peptide)</p>
|
336
|
+
<pre>aaseq.splicing('21..119')</pre></li>
|
337
|
+
</ul>
|
330
338
|
<h3><a name="label-5" id="label-5">More databases</a></h3><!-- RDLabel: "More databases" -->
|
331
339
|
<p>Databases in BioRuby are essentially accessed like that of GenBank
|
332
340
|
with classes like Bio::GenBank, Bio::KEGG::GENES. A full list can be found in
|
@@ -384,23 +392,23 @@ bioruby> a = Bio::Alignment.new(seqs)
|
|
384
392
|
bioruby> a.consensus
|
385
393
|
==> "a?gc?"
|
386
394
|
# shows IUPAC consensus
|
387
|
-
a.consensus_iupac
|
388
|
-
|
395
|
+
p a.consensus_iupac # ==> "ahgcr"
|
396
|
+
|
389
397
|
# iterates over each seq
|
390
398
|
a.each { |x| p x }
|
391
|
-
# ==>
|
392
|
-
# "atgca"
|
393
|
-
# "aagca"
|
394
|
-
# "acgca"
|
395
|
-
# "acgcg"
|
399
|
+
# ==>
|
400
|
+
# "atgca"
|
401
|
+
# "aagca"
|
402
|
+
# "acgca"
|
403
|
+
# "acgcg"
|
396
404
|
# iterates over each site
|
397
405
|
a.each_site { |x| p x }
|
398
|
-
# ==>
|
399
|
-
# ["a", "a", "a", "a"]
|
400
|
-
# ["t", "a", "c", "c"]
|
401
|
-
# ["g", "g", "g", "g"]
|
402
|
-
# ["c", "c", "c", "c"]
|
403
|
-
# ["a", "a", "a", "g"]
|
406
|
+
# ==>
|
407
|
+
# ["a", "a", "a", "a"]
|
408
|
+
# ["t", "a", "c", "c"]
|
409
|
+
# ["g", "g", "g", "g"]
|
410
|
+
# ["c", "c", "c", "c"]
|
411
|
+
# ["a", "a", "a", "g"]
|
404
412
|
|
405
413
|
# doing alignment by using CLUSTAL W.
|
406
414
|
# clustalw command must be installed.
|
@@ -525,9 +533,9 @@ method of the factory object after the "query" method.</p>
|
|
525
533
|
puts factory.output</pre>
|
526
534
|
<h3><a name="label-10" id="label-10">using FASTA from a remote internet site</a></h3><!-- RDLabel: "using FASTA from a remote internet site" -->
|
527
535
|
<ul>
|
528
|
-
<li>Note: Currently, only GenomeNet (fasta.genome.jp) is
|
536
|
+
<li>Note: Currently, only GenomeNet (fasta.genome.jp) is
|
537
|
+
supported. check the class documentation for updates.</li>
|
529
538
|
</ul>
|
530
|
-
<p>supported. check the class documentation for updates.</p>
|
531
539
|
<p>For accessing a remote site the Bio::Fasta.remote method is used
|
532
540
|
instead of Bio::Fasta.local. When using a remote method, the
|
533
541
|
databases available may be limited, but, otherwise, you can do the
|
@@ -625,7 +633,7 @@ are extracted from the first Hsp (High-scoring Segment Pair).</p>
|
|
625
633
|
retrieved. For now suffice to state that Bio::Blast::Report has a
|
626
634
|
hierarchical structure mirroring the general BLAST output stream:</p>
|
627
635
|
<ul>
|
628
|
-
<li>In a Bio::Blast::Report object, @
|
636
|
+
<li>In a Bio::Blast::Report object, @iterations is an array of
|
629
637
|
Bio::Blast::Report::Iteration objects.
|
630
638
|
<ul>
|
631
639
|
<li>In a Bio::Blast::Report::Iteration object, @hits is an array of
|
@@ -642,24 +650,38 @@ hierarchical structure mirroring the general BLAST output stream:</p>
|
|
642
650
|
you can directly create Bio::Blast::Report objects without the
|
643
651
|
Bio::Blast factory object. For this purpose use Bio::Blast.reports,
|
644
652
|
which supports the "-m 0" default and "-m 7" XML type output format.</p>
|
645
|
-
<
|
646
|
-
|
647
|
-
|
648
|
-
|
649
|
-
|
650
|
-
|
651
|
-
|
653
|
+
<ul>
|
654
|
+
<li><p>For example: </p>
|
655
|
+
<pre>bioruby> blast_version = nil; result = []
|
656
|
+
bioruby> Bio::Blast.reports(File.new("../test/data/blast/blastp-multi.m7")) do |report|
|
657
|
+
bioruby> blast_version = report.version
|
658
|
+
bioruby> report.iterations.each do |itr|
|
659
|
+
bioruby> itr.hits.each do |hit|
|
660
|
+
bioruby> result.push hit.target_id
|
661
|
+
bioruby> end
|
662
|
+
bioruby> end
|
663
|
+
bioruby> end
|
664
|
+
bioruby> blast_version
|
665
|
+
==> "blastp 2.2.18 [Mar-02-2008]"
|
666
|
+
bioruby> result
|
667
|
+
==> ["BAB38768", "BAB38768", "BAB38769", "BAB37741"]</pre></li>
|
668
|
+
<li><p>another example:</p>
|
669
|
+
<pre>require 'bio'
|
670
|
+
Bio::Blast.reports(ARGF) do |report|
|
652
671
|
puts "Hits for " + report.query_def + " against " + report.db
|
653
672
|
report.each do |hit|
|
654
673
|
print hit.target_id, "\t", hit.evalue, "\n" if hit.evalue < 0.001
|
655
674
|
end
|
656
|
-
end</pre>
|
675
|
+
end</pre></li>
|
676
|
+
</ul>
|
657
677
|
<p>Save the script as hits_under_0.001.rb and to process BLAST output
|
658
|
-
files *.xml, you can
|
678
|
+
files *.xml, you can run it with:</p>
|
659
679
|
<pre>% ruby hits_under_0.001.rb *.xml</pre>
|
660
|
-
<p>Sometimes BLAST XML output may be wrong and can not be parsed.
|
661
|
-
|
662
|
-
the
|
680
|
+
<p>Sometimes BLAST XML output may be wrong and can not be parsed. Check whether
|
681
|
+
blast is version 2.2.5 or later. See also blast --help. </p>
|
682
|
+
<p>Bio::Blast loads the full XML file into memory. If this causes a problem
|
683
|
+
you can split the BLAST XML file into smaller chunks using XML-Twig. An
|
684
|
+
example can be found in <a href="http://github.com/pjotrp/biotools/">Biotools</a>.</p>
|
663
685
|
<h3><a name="label-13" id="label-13">Add remote BLAST search sites</a></h3><!-- RDLabel: "Add remote BLAST search sites" -->
|
664
686
|
<pre>Note: this section is an advanced topic</pre>
|
665
687
|
<p>Here a more advanced application for using BLAST sequence homology
|
@@ -678,11 +700,7 @@ Bio::Blast::Report.new(or Bio::Blast::Default::Report.new):</p>
|
|
678
700
|
they may be included.</p>
|
679
701
|
<h2><a name="label-14" id="label-14">Generate a reference list using PubMed (Bio::PubMed)</a></h2><!-- RDLabel: "Generate a reference list using PubMed (Bio::PubMed)" -->
|
680
702
|
<p>Below script is an example which seaches PubMed and creates a reference list.</p>
|
681
|
-
<pre
|
682
|
-
|
683
|
-
require 'bio'
|
684
|
-
|
685
|
-
ARGV.each do |id|
|
703
|
+
<pre>ARGV.each do |id|
|
686
704
|
entry = Bio::PubMed.query(id) # searches PubMed and get entry
|
687
705
|
medline = Bio::MEDLINE.new(entry) # creates Bio::MEDLINE object from entry text
|
688
706
|
reference = medline.reference # converts into Bio::Reference object
|
@@ -818,9 +836,6 @@ BioRuby and other projects' members (2002).</p>
|
|
818
836
|
</ul>
|
819
837
|
<p>Here we give a quick overview. Check out
|
820
838
|
<a href="http://obda.open-bio.org/"><URL:http://obda.open-bio.org/></a> for more extensive details.</p>
|
821
|
-
<p>The specification is stored on CVS repository at cvs.open-bio.org,
|
822
|
-
also available via http from:
|
823
|
-
<a href="http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common"><URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common></a></p>
|
824
839
|
<h2><a name="label-18" id="label-18">BioRegistry</a></h2><!-- RDLabel: "BioRegistry" -->
|
825
840
|
<p>BioRegistry allows for locating retrieval methods and database
|
826
841
|
locations through configuration files. The priorities are</p>
|
@@ -1000,13 +1015,35 @@ rtags -R --vi</pre>
|
|
1000
1015
|
<ul>
|
1001
1016
|
<li><a href="http://www.genome.jp/kegg/soap/"><URL:http://www.genome.jp/kegg/soap/></a></li>
|
1002
1017
|
</ul>
|
1003
|
-
<h2><a name="label-30" id="label-30">
|
1018
|
+
<h2><a name="label-30" id="label-30">Ruby Ensembl API</a></h2><!-- RDLabel: "Ruby Ensembl API" -->
|
1019
|
+
<p>Ruby Ensembl API is a ruby API to the Ensembl database. It is NOT currently
|
1020
|
+
included in the BioRuby archives. To install it, see
|
1021
|
+
<a href="http://wiki.github.com/jandot/ruby-ensembl-api"><URL:http://wiki.github.com/jandot/ruby-ensembl-api></a>
|
1022
|
+
for more information.</p>
|
1023
|
+
<h3><a name="label-31" id="label-31">Gene Ontology (GO) through the Ruby Ensembl API</a></h3><!-- RDLabel: "Gene Ontology (GO) through the Ruby Ensembl API" -->
|
1024
|
+
<p>Gene Ontologies can be fetched through the Ruby Ensembl API package:</p>
|
1025
|
+
<pre>require 'ensembl'
|
1026
|
+
Ensembl::Core::DBConnection.connect('drosophila_melanogaster')
|
1027
|
+
infile = IO.readlines(ARGV.shift) # reading your comma-separated accession mapping file (one line per mapping)
|
1028
|
+
infile.each do |line|
|
1029
|
+
accs = line.split(",") # Split the comma-sep.entries into an array
|
1030
|
+
drosphila_acc = accs.shift # the first entry is the Drosophila acc
|
1031
|
+
mosq_acc = accs.shift # the second entry is you Mosq. acc
|
1032
|
+
gene = Ensembl::Core::Gene.find_by_stable_id(drosophila_acc)
|
1033
|
+
print "#{mosq_acc}"
|
1034
|
+
gene.go_terms.each do |go|
|
1035
|
+
print ",#{go}"
|
1036
|
+
end
|
1037
|
+
end</pre>
|
1038
|
+
<p>Prints each mosq. accession/uniq identifier and the GO terms from the Drosphila
|
1039
|
+
homologues.</p>
|
1040
|
+
<h2><a name="label-32" id="label-32">Comparing BioProjects</a></h2><!-- RDLabel: "Comparing BioProjects" -->
|
1004
1041
|
<p>For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see <a href="http://sciruby.codeforpeople.com/sr.cgi/BioProjects"><URL:http://sciruby.codeforpeople.com/sr.cgi/BioProjects></a></p>
|
1005
|
-
<h2><a name="label-
|
1042
|
+
<h2><a name="label-33" id="label-33">Using BioRuby with R</a></h2><!-- RDLabel: "Using BioRuby with R" -->
|
1006
1043
|
<p>Using Ruby with R Pjotr wrote a section on SciRuby. See <a href="http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang"><URL:http://sciruby.codeforpeople.com/sr.cgi/RubyWithRlang></a></p>
|
1007
|
-
<h2><a name="label-
|
1044
|
+
<h2><a name="label-34" id="label-34">Using BioPerl or BioPython from Ruby</a></h2><!-- RDLabel: "Using BioPerl or BioPython from Ruby" -->
|
1008
1045
|
<p>At the moment there is no easy way of accessing BioPerl from Ruby. The best way, perhaps, is to create a Perl server that gets accessed through XML/RPC or SOAP.</p>
|
1009
|
-
<h2><a name="label-
|
1046
|
+
<h2><a name="label-35" id="label-35">Installing required external library</a></h2><!-- RDLabel: "Installing required external library" -->
|
1010
1047
|
<p>At this point for using BioRuby no additional libraries are needed.
|
1011
1048
|
This may change, so keep an eye on the Bioruby website. Also when
|
1012
1049
|
a package is missing BioRuby should show an informative message.</p>
|
@@ -1014,17 +1051,17 @@ a package is missing BioRuby should show an informative message.</p>
|
|
1014
1051
|
painful, as the gem standard for packages evolved late and some still
|
1015
1052
|
force you to copy things by hand. Therefore read the README's
|
1016
1053
|
carefully that come with each package.</p>
|
1017
|
-
<h2><a name="label-
|
1054
|
+
<h2><a name="label-36" id="label-36">Trouble shooting</a></h2><!-- RDLabel: "Trouble shooting" -->
|
1018
1055
|
<ul>
|
1019
1056
|
<li>Error: in `require': no such file to load -- bio (LoadError)</li>
|
1020
1057
|
</ul>
|
1021
1058
|
<p>Ruby fails to find the BioRuby libraries - add it to the RUBYLIB path, or pass
|
1022
1059
|
it to the interpeter. For example:</p>
|
1023
|
-
<pre>ruby -I
|
1024
|
-
<h2><a name="label-
|
1025
|
-
<p>IMPORTANT NOTICE: This page is maintained in the BioRuby
|
1060
|
+
<pre>ruby -I$BIORUBYPATH/lib yourprogram.rb</pre>
|
1061
|
+
<h2><a name="label-37" id="label-37">Modifying this page</a></h2><!-- RDLabel: "Modifying this page" -->
|
1062
|
+
<p>IMPORTANT NOTICE: This page is maintained in the BioRuby source code
|
1026
1063
|
repository. Please edit the file there otherwise changes may get
|
1027
|
-
lost. See <!-- Reference, RDLabel "BioRuby Developer Information" doesn't exist --><em class="label-not-found">BioRuby Developer Information</em><!-- Reference end --> for
|
1064
|
+
lost. See <!-- Reference, RDLabel "BioRuby Developer Information" doesn't exist --><em class="label-not-found">BioRuby Developer Information</em><!-- Reference end --> for repository and mailing list
|
1028
1065
|
access.</p>
|
1029
1066
|
|
1030
1067
|
</body>
|
data/lib/bio/appl/blast.rb
CHANGED
@@ -257,7 +257,7 @@ module Bio
|
|
257
257
|
end
|
258
258
|
|
259
259
|
# Server to submit the BLASTs to
|
260
|
-
|
260
|
+
attr_reader :server
|
261
261
|
|
262
262
|
# Sets server to submit the BLASTs to.
|
263
263
|
# The exec_xxxx method should be defined in Bio::Blast or
|
@@ -399,7 +399,7 @@ module Bio
|
|
399
399
|
if fmt = ncbiopt.get('-m') then
|
400
400
|
@format = fmt.to_i
|
401
401
|
else
|
402
|
-
Bio::Blast::Report #dummy to load XMLParser or REXML
|
402
|
+
dummy = Bio::Blast::Report #dummy to load XMLParser or REXML
|
403
403
|
if defined?(XMLParser) or defined?(REXML)
|
404
404
|
@format ||= 7
|
405
405
|
else
|
@@ -1218,7 +1218,7 @@ module Bio
|
|
1218
1218
|
method_after_parse_alignment :query_from
|
1219
1219
|
|
1220
1220
|
# end position of the query (including its position)
|
1221
|
-
attr_reader :query_to
|
1221
|
+
attr_reader :query_to if false #dummy
|
1222
1222
|
method_after_parse_alignment :query_to
|
1223
1223
|
|
1224
1224
|
# start position of the hit (the first position is 1)
|
data/lib/bio/appl/fasta.rb
CHANGED
@@ -16,7 +16,7 @@ module Bio
|
|
16
16
|
|
17
17
|
class Fasta
|
18
18
|
|
19
|
-
|
19
|
+
autoload :Report, 'bio/appl/fasta/format10'
|
20
20
|
#autoload :?????, 'bio/appl/fasta/format6'
|
21
21
|
|
22
22
|
# Returns a FASTA factory object (Bio::Fasta).
|
@@ -66,14 +66,13 @@ class Fasta
|
|
66
66
|
end
|
67
67
|
attr_reader :format
|
68
68
|
|
69
|
-
#
|
69
|
+
# OBSOLETE. Does nothing and shows warning messages.
|
70
70
|
#
|
71
|
-
#
|
72
|
-
#
|
73
|
-
# want to use appropriate Report class for parsing.
|
71
|
+
# Historically, selecting parser to use ('format6' or 'format10' were
|
72
|
+
# expected, but only 'format10' was available as a working parser).
|
74
73
|
#
|
75
74
|
def self.parser(parser)
|
76
|
-
|
75
|
+
warn 'Bio::Fasta.parser is obsoleted and will soon be removed.'
|
77
76
|
end
|
78
77
|
|
79
78
|
# Returns a FASTA factory object (Bio::Fasta) to run FASTA search on
|
@@ -102,12 +101,6 @@ class Fasta
|
|
102
101
|
|
103
102
|
|
104
103
|
def parse_result(data)
|
105
|
-
case @format
|
106
|
-
when 6
|
107
|
-
require 'bio/appl/fasta/format6'
|
108
|
-
when 10
|
109
|
-
require 'bio/appl/fasta/format10'
|
110
|
-
end
|
111
104
|
Report.new(data)
|
112
105
|
end
|
113
106
|
|
@@ -4,10 +4,11 @@
|
|
4
4
|
# Copyright:: Copyright (C) 2002 Toshiaki Katayama <k@bioruby.org>
|
5
5
|
# License:: The Ruby License
|
6
6
|
#
|
7
|
-
# $Id
|
7
|
+
# $Id:$
|
8
8
|
#
|
9
9
|
|
10
10
|
require 'bio/appl/fasta'
|
11
|
+
require 'bio/io/flatfile/splitter'
|
11
12
|
|
12
13
|
module Bio
|
13
14
|
class Fasta
|
@@ -15,14 +16,94 @@ class Fasta
|
|
15
16
|
# Summarized results of the fasta execution results.
|
16
17
|
class Report
|
17
18
|
|
19
|
+
# Splitter for Bio::FlatFile
|
20
|
+
class FastaFormat10Splitter < Bio::FlatFile::Splitter::Template
|
21
|
+
|
22
|
+
# creates a new splitter object
|
23
|
+
def initialize(klass, bstream)
|
24
|
+
super(klass, bstream)
|
25
|
+
@delimiter = '>>>'
|
26
|
+
@real_delimiter = /^\s*\d+\>\>\>\z/
|
27
|
+
end
|
28
|
+
|
29
|
+
# do nothing and returns nil
|
30
|
+
def skip_leader
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
|
34
|
+
# gets an entry
|
35
|
+
def get_entry
|
36
|
+
p0 = stream_pos()
|
37
|
+
pieces = []
|
38
|
+
overrun = nil
|
39
|
+
first = true
|
40
|
+
while e = stream.gets(@delimiter)
|
41
|
+
pieces.push e
|
42
|
+
if @real_delimiter =~ e then
|
43
|
+
if first then
|
44
|
+
first = nil
|
45
|
+
else
|
46
|
+
overrun = $&
|
47
|
+
break
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
ent = (pieces.empty? ? nil : pieces.join(''))
|
53
|
+
if ent and overrun then
|
54
|
+
ent[-overrun.length, overrun.length] = ''
|
55
|
+
stream.ungets(overrun)
|
56
|
+
end
|
57
|
+
|
58
|
+
p1 = stream_pos()
|
59
|
+
self.entry_start_pos = p0
|
60
|
+
self.entry = ent
|
61
|
+
self.entry_ended_pos = p1
|
62
|
+
return ent
|
63
|
+
end
|
64
|
+
end #FastaFormat10Splitter
|
65
|
+
|
66
|
+
# Splitter for Bio::FlatFile
|
67
|
+
FLATFILE_SPLITTER = FastaFormat10Splitter
|
68
|
+
|
18
69
|
def initialize(data)
|
70
|
+
# Split outputs containing multiple query sequences' results
|
71
|
+
chunks = data.split(/^(\s*\d+\>\>\>.*)/, 3)
|
72
|
+
if chunks.size >= 3 then
|
73
|
+
if chunks[0].strip.empty? then
|
74
|
+
qdef_line = chunks[1]
|
75
|
+
data = chunks[1..2].join('')
|
76
|
+
overruns = chunks[3..-1]
|
77
|
+
elsif /^\>\>\>/ =~ chunks[0] then
|
78
|
+
qdef_line = nil
|
79
|
+
data = chunks.shift
|
80
|
+
overruns = chunks
|
81
|
+
else
|
82
|
+
qdef_line = chunks[1]
|
83
|
+
data = chunks[0..2].join('')
|
84
|
+
overruns = chunks[3..-1]
|
85
|
+
end
|
86
|
+
@entry_overrun = overruns.join('')
|
87
|
+
if qdef_line and
|
88
|
+
/^ *\d+\>\>\>([^ ]+) .+ \- +(\d+) +(nt|aa)\s*$/ =~ qdef_line then
|
89
|
+
@query_def = $1
|
90
|
+
@query_len = $2.to_i
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
19
94
|
# header lines - brief list of the hits
|
20
|
-
if data.
|
95
|
+
if list_start = data.index("\nThe best scores are") then
|
96
|
+
data = data[(list_start + 1)..-1]
|
21
97
|
data.sub!(/(.*)\n\n>>>/m, '')
|
22
|
-
@list =
|
98
|
+
@list = $1
|
23
99
|
else
|
24
|
-
data.
|
25
|
-
|
100
|
+
if list_start = data.index(/\n!!\s+/) then
|
101
|
+
data = data[list_start..-1]
|
102
|
+
data.sub!(/\n!!\s+/, '')
|
103
|
+
data.sub!(/.*/) { |x| @list = x; '' }
|
104
|
+
else
|
105
|
+
data = data.sub(/.*/) { |x| @list = x; '' }
|
106
|
+
end
|
26
107
|
end
|
27
108
|
|
28
109
|
# body lines - fasta execution result
|
@@ -41,7 +122,16 @@ class Report
|
|
41
122
|
@hits.push(Hit.new(x))
|
42
123
|
end
|
43
124
|
end
|
44
|
-
|
125
|
+
|
126
|
+
# piece of next entry. Bio::FlatFile uses it.
|
127
|
+
attr_reader :entry_overrun
|
128
|
+
|
129
|
+
# Query definition. For older reports, the value may be nil.
|
130
|
+
attr_reader :query_def
|
131
|
+
|
132
|
+
# Query sequence length. For older reports, the value may be nil.
|
133
|
+
attr_reader :query_len
|
134
|
+
|
45
135
|
# Returns the 'The best scores are' lines as a String.
|
46
136
|
attr_reader :list
|
47
137
|
|