bio 1.3.0 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +56 -0
- data/COPYING.ja +51 -0
- data/ChangeLog +540 -0
- data/GPL +340 -0
- data/LEGAL +141 -0
- data/LGPL +504 -0
- data/README.rdoc +4 -2
- data/Rakefile +2 -2
- data/bioruby.gemspec +17 -29
- data/doc/Tutorial.rd +118 -90
- data/doc/Tutorial.rd.html +124 -87
- data/lib/bio/appl/blast.rb +2 -2
- data/lib/bio/appl/blast/format0.rb +1 -1
- data/lib/bio/appl/fasta.rb +5 -12
- data/lib/bio/appl/fasta/format10.rb +96 -6
- data/lib/bio/appl/gcg/msf.rb +11 -14
- data/lib/bio/appl/pts1.rb +0 -4
- data/lib/bio/appl/sim4/report.rb +50 -17
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +10 -0
- data/lib/bio/db/biosql/sequence.rb +234 -298
- data/lib/bio/db/embl/embl.rb +0 -3
- data/lib/bio/db/genbank/common.rb +3 -1
- data/lib/bio/io/biosql/ar-biosql.rb +257 -0
- data/lib/bio/io/biosql/biosql.rb +39 -0
- data/lib/bio/io/biosql/config/database.yml +5 -4
- data/lib/bio/io/ncbirest.rb +12 -5
- data/lib/bio/io/pubmed.rb +5 -1
- data/lib/bio/io/sql.rb +43 -150
- data/lib/bio/sequence/compat.rb +5 -1
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +6 -4
- data/lib/bio/version.rb +1 -1
- data/test/data/gcg/pileup-aa.msf +67 -0
- data/test/data/sim4/complement-A4.sim4 +43 -0
- data/test/data/sim4/simple-A4.sim4 +25 -0
- data/test/data/sim4/simple2-A4.sim4 +25 -0
- data/test/functional/bio/io/test_pubmed.rb +129 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -5
- data/test/unit/bio/appl/gcg/test_msf.rb +154 -0
- data/test/unit/bio/appl/hmmer/test_report.rb +2 -2
- data/test/unit/bio/appl/sim4/test_report.rb +869 -0
- data/test/unit/bio/appl/test_blast.rb +1 -1
- data/test/unit/bio/db/biosql/tc_biosql.rb +110 -0
- data/test/unit/bio/db/biosql/ts_suite_biosql.rb +8 -0
- data/test/unit/bio/test_feature.rb +18 -17
- data/test/unit/bio/test_reference.rb +18 -18
- data/test/unit/bio/test_sequence.rb +1 -1
- metadata +18 -30
- data/lib/bio/io/biosql/biodatabase.rb +0 -64
- data/lib/bio/io/biosql/bioentry.rb +0 -29
- data/lib/bio/io/biosql/bioentry_dbxref.rb +0 -11
- data/lib/bio/io/biosql/bioentry_path.rb +0 -12
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +0 -10
- data/lib/bio/io/biosql/bioentry_reference.rb +0 -10
- data/lib/bio/io/biosql/bioentry_relationship.rb +0 -10
- data/lib/bio/io/biosql/biosequence.rb +0 -11
- data/lib/bio/io/biosql/comment.rb +0 -7
- data/lib/bio/io/biosql/dbxref.rb +0 -13
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +0 -12
- data/lib/bio/io/biosql/location.rb +0 -32
- data/lib/bio/io/biosql/location_qualifier_value.rb +0 -11
- data/lib/bio/io/biosql/ontology.rb +0 -10
- data/lib/bio/io/biosql/reference.rb +0 -9
- data/lib/bio/io/biosql/seqfeature.rb +0 -32
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +0 -11
- data/lib/bio/io/biosql/seqfeature_path.rb +0 -11
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +0 -20
- data/lib/bio/io/biosql/seqfeature_relationship.rb +0 -11
- data/lib/bio/io/biosql/taxon.rb +0 -12
- data/lib/bio/io/biosql/taxon_name.rb +0 -9
- data/lib/bio/io/biosql/term.rb +0 -27
- data/lib/bio/io/biosql/term_dbxref.rb +0 -11
- data/lib/bio/io/biosql/term_path.rb +0 -12
- data/lib/bio/io/biosql/term_relationship.rb +0 -13
- data/lib/bio/io/biosql/term_relationship_term.rb +0 -11
- data/lib/bio/io/biosql/term_synonym.rb +0 -10
data/README.rdoc
CHANGED
@@ -239,10 +239,12 @@ RubyGems 0.9.0 and removed in RubyGems 1.0.1.
|
|
239
239
|
== LICENSE
|
240
240
|
|
241
241
|
BioRuby can be freely distributed under the same terms as Ruby.
|
242
|
+
See the file COPYING (or COPYING.ja written in Japanese).
|
242
243
|
|
243
|
-
|
244
|
+
As written in the file COPYING, see the file LEGAL for files distributed
|
245
|
+
under different license. For example, setup.rb which comes from
|
244
246
|
{RAA:setup}[http://raa.ruby-lang.org/project/setup/] developed by Minero Aoki
|
245
|
-
(http://i.loveruby.net/en/projects/setup/).
|
247
|
+
(http://i.loveruby.net/en/projects/setup/) is licensed under LGPL 2.1.
|
246
248
|
|
247
249
|
|
248
250
|
== CONTACT
|
data/Rakefile
CHANGED
@@ -61,7 +61,7 @@ task :gemspec => GEM_SPEC_FILE
|
|
61
61
|
desc "Force update gem spec file"
|
62
62
|
task :regemspec do
|
63
63
|
#rm GEM_SPEC_FILE, :force => true
|
64
|
-
Rake::Task[GEM_SPEC_FILE].execute
|
64
|
+
Rake::Task[GEM_SPEC_FILE].execute(nil)
|
65
65
|
end
|
66
66
|
|
67
67
|
desc "Update #{GEM_SPEC_FILE}"
|
@@ -137,7 +137,7 @@ desc "Force update doc/Tutorial*.html"
|
|
137
137
|
task :retutorial2html do
|
138
138
|
# safe_unlink HTMLFILES_TUTORIAL
|
139
139
|
HTMLFILES_TUTORIAL.each do |x|
|
140
|
-
Rake::Task[x].execute
|
140
|
+
Rake::Task[x].execute(nil)
|
141
141
|
end
|
142
142
|
end
|
143
143
|
|
data/bioruby.gemspec
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = 'bio'
|
6
|
-
s.version = "1.3.
|
6
|
+
s.version = "1.3.1"
|
7
7
|
|
8
8
|
s.author = "BioRuby project"
|
9
9
|
s.email = "staff@bioruby.org"
|
@@ -14,8 +14,13 @@ Gem::Specification.new do |s|
|
|
14
14
|
|
15
15
|
s.platform = Gem::Platform::RUBY
|
16
16
|
s.files = [
|
17
|
+
"COPYING",
|
18
|
+
"COPYING.ja",
|
17
19
|
"ChangeLog",
|
20
|
+
"GPL",
|
18
21
|
"KNOWN_ISSUES.rdoc",
|
22
|
+
"LEGAL",
|
23
|
+
"LGPL",
|
19
24
|
"README.rdoc",
|
20
25
|
"README_DEV.rdoc",
|
21
26
|
"Rakefile",
|
@@ -153,35 +158,9 @@ Gem::Specification.new do |s|
|
|
153
158
|
"lib/bio/db/soft.rb",
|
154
159
|
"lib/bio/db/transfac.rb",
|
155
160
|
"lib/bio/feature.rb",
|
156
|
-
"lib/bio/io/biosql/
|
157
|
-
"lib/bio/io/biosql/
|
158
|
-
"lib/bio/io/biosql/bioentry_dbxref.rb",
|
159
|
-
"lib/bio/io/biosql/bioentry_path.rb",
|
160
|
-
"lib/bio/io/biosql/bioentry_qualifier_value.rb",
|
161
|
-
"lib/bio/io/biosql/bioentry_reference.rb",
|
162
|
-
"lib/bio/io/biosql/bioentry_relationship.rb",
|
163
|
-
"lib/bio/io/biosql/biosequence.rb",
|
164
|
-
"lib/bio/io/biosql/comment.rb",
|
161
|
+
"lib/bio/io/biosql/ar-biosql.rb",
|
162
|
+
"lib/bio/io/biosql/biosql.rb",
|
165
163
|
"lib/bio/io/biosql/config/database.yml",
|
166
|
-
"lib/bio/io/biosql/dbxref.rb",
|
167
|
-
"lib/bio/io/biosql/dbxref_qualifier_value.rb",
|
168
|
-
"lib/bio/io/biosql/location.rb",
|
169
|
-
"lib/bio/io/biosql/location_qualifier_value.rb",
|
170
|
-
"lib/bio/io/biosql/ontology.rb",
|
171
|
-
"lib/bio/io/biosql/reference.rb",
|
172
|
-
"lib/bio/io/biosql/seqfeature.rb",
|
173
|
-
"lib/bio/io/biosql/seqfeature_dbxref.rb",
|
174
|
-
"lib/bio/io/biosql/seqfeature_path.rb",
|
175
|
-
"lib/bio/io/biosql/seqfeature_qualifier_value.rb",
|
176
|
-
"lib/bio/io/biosql/seqfeature_relationship.rb",
|
177
|
-
"lib/bio/io/biosql/taxon.rb",
|
178
|
-
"lib/bio/io/biosql/taxon_name.rb",
|
179
|
-
"lib/bio/io/biosql/term.rb",
|
180
|
-
"lib/bio/io/biosql/term_dbxref.rb",
|
181
|
-
"lib/bio/io/biosql/term_path.rb",
|
182
|
-
"lib/bio/io/biosql/term_relationship.rb",
|
183
|
-
"lib/bio/io/biosql/term_relationship_term.rb",
|
184
|
-
"lib/bio/io/biosql/term_synonym.rb",
|
185
164
|
"lib/bio/io/das.rb",
|
186
165
|
"lib/bio/io/dbget.rb",
|
187
166
|
"lib/bio/io/ddbjxml.rb",
|
@@ -345,6 +324,7 @@ Gem::Specification.new do |s|
|
|
345
324
|
"test/data/embl/AB090716.embl.rel89",
|
346
325
|
"test/data/fasta/example1.txt",
|
347
326
|
"test/data/fasta/example2.txt",
|
327
|
+
"test/data/gcg/pileup-aa.msf",
|
348
328
|
"test/data/genscan/sample.report",
|
349
329
|
"test/data/iprscan/merged.raw",
|
350
330
|
"test/data/iprscan/merged.txt",
|
@@ -354,11 +334,15 @@ Gem::Specification.new do |s|
|
|
354
334
|
"test/data/prosite/prosite.dat",
|
355
335
|
"test/data/refseq/nm_126355.entret",
|
356
336
|
"test/data/rpsblast/misc.rpsblast",
|
337
|
+
"test/data/sim4/complement-A4.sim4",
|
338
|
+
"test/data/sim4/simple-A4.sim4",
|
339
|
+
"test/data/sim4/simple2-A4.sim4",
|
357
340
|
"test/data/soft/GDS100_partial.soft",
|
358
341
|
"test/data/soft/GSE3457_family_partial.soft",
|
359
342
|
"test/data/uniprot/p53_human.uniprot",
|
360
343
|
"test/functional/bio/appl/test_pts1.rb",
|
361
344
|
"test/functional/bio/io/test_ensembl.rb",
|
345
|
+
"test/functional/bio/io/test_pubmed.rb",
|
362
346
|
"test/functional/bio/io/test_soapwsdl.rb",
|
363
347
|
"test/functional/bio/io/test_togows.rb",
|
364
348
|
"test/functional/bio/sequence/test_output_embl.rb",
|
@@ -368,6 +352,7 @@ Gem::Specification.new do |s|
|
|
368
352
|
"test/unit/bio/appl/blast/test_ncbioptions.rb",
|
369
353
|
"test/unit/bio/appl/blast/test_report.rb",
|
370
354
|
"test/unit/bio/appl/blast/test_rpsblast.rb",
|
355
|
+
"test/unit/bio/appl/gcg/test_msf.rb",
|
371
356
|
"test/unit/bio/appl/genscan/test_report.rb",
|
372
357
|
"test/unit/bio/appl/hmmer/test_report.rb",
|
373
358
|
"test/unit/bio/appl/iprscan/test_report.rb",
|
@@ -375,6 +360,7 @@ Gem::Specification.new do |s|
|
|
375
360
|
"test/unit/bio/appl/paml/codeml/test_rates.rb",
|
376
361
|
"test/unit/bio/appl/paml/codeml/test_report.rb",
|
377
362
|
"test/unit/bio/appl/paml/test_codeml.rb",
|
363
|
+
"test/unit/bio/appl/sim4/test_report.rb",
|
378
364
|
"test/unit/bio/appl/sosui/test_report.rb",
|
379
365
|
"test/unit/bio/appl/targetp/test_report.rb",
|
380
366
|
"test/unit/bio/appl/test_blast.rb",
|
@@ -384,6 +370,8 @@ Gem::Specification.new do |s|
|
|
384
370
|
"test/unit/bio/data/test_aa.rb",
|
385
371
|
"test/unit/bio/data/test_codontable.rb",
|
386
372
|
"test/unit/bio/data/test_na.rb",
|
373
|
+
"test/unit/bio/db/biosql/tc_biosql.rb",
|
374
|
+
"test/unit/bio/db/biosql/ts_suite_biosql.rb",
|
387
375
|
"test/unit/bio/db/embl/test_common.rb",
|
388
376
|
"test/unit/bio/db/embl/test_embl.rb",
|
389
377
|
"test/unit/bio/db/embl/test_embl_rel89.rb",
|
data/doc/Tutorial.rd
CHANGED
@@ -2,45 +2,43 @@
|
|
2
2
|
#
|
3
3
|
# A possible test run could be from rdtool (on Debian package rdtool)
|
4
4
|
#
|
5
|
-
#
|
5
|
+
# rd2 $BIORUBYPATH/doc/Tutorial.rd
|
6
6
|
#
|
7
7
|
# or with style sheet:
|
8
8
|
#
|
9
|
-
#
|
10
|
-
ss=bioruby.css ~/cvs/opensource/bioruby/doc/Tutorial.rd > ~/bioruby.html
|
9
|
+
# rd2 -r rd/rd2html-lib.rb --with-css=bioruby.css $BIORUBYPATH/doc/Tutorial.rd > ~/bioruby.html
|
11
10
|
#
|
12
11
|
# in Debian:
|
13
12
|
#
|
14
|
-
# rd2 -r rd/rd2html-lib --with-css="
|
13
|
+
# rd2 -r rd/rd2html-lib --with-css="../lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css" Tutorial.rd > Tutorial.rd.html
|
15
14
|
#
|
16
15
|
# A common problem is tabs in the text file! TABs are not allowed.
|
17
16
|
#
|
18
17
|
# To add tests run Toshiaki's bioruby shell and paste in the query plus
|
19
18
|
# results.
|
20
19
|
#
|
21
|
-
# To run the embedded Ruby doctests you can
|
20
|
+
# To run the embedded Ruby doctests you can use the rubydoctest tool, part
|
21
|
+
# of the bioruby-support repository at http://github.com/pjotrp/bioruby-support/
|
22
|
+
#
|
22
23
|
|
23
24
|
=begin
|
24
25
|
#doctest Testing bioruby
|
25
26
|
|
26
27
|
= BioRuby Tutorial
|
27
28
|
|
28
|
-
Editor: PjotrPrins <p .at. bioruby.org>
|
29
|
-
|
30
29
|
* Copyright (C) 2001-2003 KATAYAMA Toshiaki <k .at. bioruby.org>
|
31
|
-
* Copyright (C) 2005-
|
30
|
+
* Copyright (C) 2005-2009 Pjotr Prins, Naohisa Goto and others
|
32
31
|
|
33
|
-
|
32
|
+
This document was last modified: 2009/03/17
|
33
|
+
Current editor: Pjotr Prins <p .at. bioruby.org>
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
in preparation for the ((<BioHackathlon 2008|URL:http://hackathon.dbcls.jp/>))
|
35
|
+
The latest version resides in the GIT source code repository: ./doc/((<Tutorial.rd|URL:http://github.com/pjotrp/bioruby/raw/documentation/doc/Tutorial.rd>)).
|
38
36
|
|
39
37
|
== Introduction
|
40
38
|
|
41
39
|
This is a tutorial for using Bioruby. A basic knowledge of Ruby is required.
|
42
40
|
If you want to know more about the programming langauge Ruby we recommend the
|
43
|
-
|
41
|
+
latest Ruby book ((<Programming Ruby|URL:http://www.pragprog.com/titles/ruby>))
|
44
42
|
by Dave Thomas and Andy Hunt - some of it is online
|
45
43
|
((<here|URL:http://www.rubycentral.com/pickaxe/>)).
|
46
44
|
|
@@ -53,7 +51,7 @@ version it has with the
|
|
53
51
|
|
54
52
|
command. Showing something like:
|
55
53
|
|
56
|
-
ruby 1.8.
|
54
|
+
ruby 1.8.7 (2008-08-11 patchlevel 72) [i486-linux]
|
57
55
|
|
58
56
|
If you see no such thing you'll have to install Ruby using your installation
|
59
57
|
manager. For more information see the
|
@@ -81,6 +79,7 @@ and you should see a prompt
|
|
81
79
|
|
82
80
|
Now test the following:
|
83
81
|
|
82
|
+
bioruby> require 'bio'
|
84
83
|
bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
|
85
84
|
==> "atgcatgcaaaa"
|
86
85
|
|
@@ -182,37 +181,35 @@ way of writing concise and clear code using 'closures'. Each sliding
|
|
182
181
|
window creates a subsequence which is supplied to the enclosed block
|
183
182
|
through a variable named +s+.
|
184
183
|
|
185
|
-
Show average percentage of GC content for 20 bases (stepping the default one base at a time)
|
184
|
+
* Show average percentage of GC content for 20 bases (stepping the default one base at a time)
|
186
185
|
|
187
|
-
|
188
|
-
|
186
|
+
bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa")
|
187
|
+
==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa"
|
189
188
|
|
190
|
-
|
191
|
-
|
192
|
-
|
189
|
+
bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent }
|
190
|
+
bioruby> a
|
191
|
+
==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]
|
193
192
|
|
194
193
|
|
195
194
|
Since the class of each subsequence is the same as original sequence
|
196
195
|
(Bio::Sequence::NA or Bio::Sequence::AA or Bio::Sequence), you can
|
197
196
|
use all methods on the subsequence. For example,
|
198
197
|
|
199
|
-
Shows translation results for 15 bases shifting a codon at a time
|
198
|
+
* Shows translation results for 15 bases shifting a codon at a time
|
200
199
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
bioruby> a
|
206
|
-
==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]
|
200
|
+
bioruby> a = []
|
201
|
+
bioruby> seq.window_search(15, 3) { | s | a.push s.translate }
|
202
|
+
bioruby> a
|
203
|
+
==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]
|
207
204
|
|
208
205
|
|
209
206
|
Finally, the window_search method returns the last leftover
|
210
207
|
subsequence. This allows for example
|
211
208
|
|
212
|
-
Divide a genome sequence into sections of 10000bp and
|
213
|
-
output FASTA formatted sequences (line width 60 chars). The 1000bp at the
|
214
|
-
start and end of each subsequence overlapped. At the 3' end of the sequence
|
215
|
-
the leftover is also added:
|
209
|
+
* Divide a genome sequence into sections of 10000bp and
|
210
|
+
output FASTA formatted sequences (line width 60 chars). The 1000bp at the
|
211
|
+
start and end of each subsequence overlapped. At the 3' end of the sequence
|
212
|
+
the leftover is also added:
|
216
213
|
|
217
214
|
i = 1
|
218
215
|
textwidth=60
|
@@ -229,24 +226,20 @@ size to equal values.
|
|
229
226
|
|
230
227
|
Other examples
|
231
228
|
|
232
|
-
Count the codon usage
|
229
|
+
* Count the codon usage
|
233
230
|
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
bioruby> codon_usage
|
239
|
-
==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}
|
231
|
+
bioruby> codon_usage = Hash.new(0)
|
232
|
+
bioruby> seq.window_search(3, 3) { |s| codon_usage[s] += 1 }
|
233
|
+
bioruby> codon_usage
|
234
|
+
==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}
|
240
235
|
|
241
236
|
|
242
|
-
Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
|
237
|
+
* Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
|
243
238
|
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
bioruby> a
|
249
|
-
==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]
|
239
|
+
bioruby> a = []
|
240
|
+
bioruby> seq.window_search(10, 10) { |s| a.push s.molecular_weight }
|
241
|
+
bioruby> a
|
242
|
+
==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]
|
250
243
|
|
251
244
|
In most cases, sequences are read from files or retrieved from databases.
|
252
245
|
For example:
|
@@ -398,12 +391,12 @@ very complicated:
|
|
398
391
|
end
|
399
392
|
end
|
400
393
|
|
401
|
-
Note: In this example Feature#assoc method makes a Hash from a
|
402
|
-
feature object. It is useful because you can get data from the hash
|
403
|
-
by using qualifiers as keys.
|
404
|
-
(But there is a risk some information is lost when two or more
|
405
|
-
qualifiers are the same. Therefore an Array is returned by
|
406
|
-
Feature#feature)
|
394
|
+
* Note: In this example Feature#assoc method makes a Hash from a
|
395
|
+
feature object. It is useful because you can get data from the hash
|
396
|
+
by using qualifiers as keys.
|
397
|
+
(But there is a risk some information is lost when two or more
|
398
|
+
qualifiers are the same. Therefore an Array is returned by
|
399
|
+
Feature#feature)
|
407
400
|
|
408
401
|
Bio::Sequence#splicing splices subsequence from nucleic acid sequence
|
409
402
|
according to location information used in GenBank, EMBL and DDBJ.
|
@@ -417,11 +410,11 @@ feature style location text but also Bio::Locations object. For more
|
|
417
410
|
information about location format and Bio::Locations class, see
|
418
411
|
bio/location.rb.
|
419
412
|
|
420
|
-
Splice according to location string used in a GenBank entry
|
413
|
+
* Splice according to location string used in a GenBank entry
|
421
414
|
|
422
415
|
naseq.splicing('join(2035..2050,complement(1775..1818),13..345')
|
423
416
|
|
424
|
-
Generate Bio::Locations object and pass the splicing method
|
417
|
+
* Generate Bio::Locations object and pass the splicing method
|
425
418
|
|
426
419
|
locs = Bio::Locations.new('join((8298.8300)..10206,1..855)')
|
427
420
|
naseq.splicing(locs)
|
@@ -429,7 +422,7 @@ Generate Bio::Locations object and pass the splicing method
|
|
429
422
|
You can also use the splicing method for amino acid sequences
|
430
423
|
(Bio::Sequence::AA objects).
|
431
424
|
|
432
|
-
Splicing peptide from a protein (e.g. signal peptide)
|
425
|
+
* Splicing peptide from a protein (e.g. signal peptide)
|
433
426
|
|
434
427
|
aaseq.splicing('21..119')
|
435
428
|
|
@@ -469,7 +462,7 @@ to a FASTA file can be found in sample/any2fasta.rb. With this technique it is
|
|
469
462
|
possible to write a Unix type grep/sort pipe for sequence information. One
|
470
463
|
example using scripts in the BIORUBY sample folder:
|
471
464
|
|
472
|
-
|
465
|
+
fastagrep.rb '/At|Dm/' database.seq | fastasort.rb
|
473
466
|
|
474
467
|
greps the database for Arabidopsis and Drosophila entries and sorts the output
|
475
468
|
to FASTA.
|
@@ -505,23 +498,23 @@ Array and BioPerl's Bio::SimpleAlign. A very simple example is:
|
|
505
498
|
bioruby> a.consensus
|
506
499
|
==> "a?gc?"
|
507
500
|
# shows IUPAC consensus
|
508
|
-
a.consensus_iupac
|
509
|
-
|
501
|
+
p a.consensus_iupac # ==> "ahgcr"
|
502
|
+
|
510
503
|
# iterates over each seq
|
511
504
|
a.each { |x| p x }
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
505
|
+
# ==>
|
506
|
+
# "atgca"
|
507
|
+
# "aagca"
|
508
|
+
# "acgca"
|
509
|
+
# "acgcg"
|
517
510
|
# iterates over each site
|
518
511
|
a.each_site { |x| p x }
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
512
|
+
# ==>
|
513
|
+
# ["a", "a", "a", "a"]
|
514
|
+
# ["t", "a", "c", "c"]
|
515
|
+
# ["g", "g", "g", "g"]
|
516
|
+
# ["c", "c", "c", "c"]
|
517
|
+
# ["a", "a", "a", "g"]
|
525
518
|
|
526
519
|
# doing alignment by using CLUSTAL W.
|
527
520
|
# clustalw command must be installed.
|
@@ -671,7 +664,7 @@ method of the factory object after the "query" method.
|
|
671
664
|
=== using FASTA from a remote internet site
|
672
665
|
|
673
666
|
* Note: Currently, only GenomeNet (fasta.genome.jp) is
|
674
|
-
supported. check the class documentation for updates.
|
667
|
+
supported. check the class documentation for updates.
|
675
668
|
|
676
669
|
For accessing a remote site the Bio::Fasta.remote method is used
|
677
670
|
instead of Bio::Fasta.local. When using a remote method, the
|
@@ -774,7 +767,7 @@ Check the documentation for Bio::Blast::Report to see what can be
|
|
774
767
|
retrieved. For now suffice to state that Bio::Blast::Report has a
|
775
768
|
hierarchical structure mirroring the general BLAST output stream:
|
776
769
|
|
777
|
-
* In a Bio::Blast::Report object, @
|
770
|
+
* In a Bio::Blast::Report object, @iterations is an array of
|
778
771
|
Bio::Blast::Report::Iteration objects.
|
779
772
|
* In a Bio::Blast::Report::Iteration object, @hits is an array of
|
780
773
|
Bio::Blast::Report::Hits objects.
|
@@ -790,13 +783,26 @@ you can directly create Bio::Blast::Report objects without the
|
|
790
783
|
Bio::Blast factory object. For this purpose use Bio::Blast.reports,
|
791
784
|
which supports the "-m 0" default and "-m 7" XML type output format.
|
792
785
|
|
793
|
-
|
786
|
+
* For example:
|
794
787
|
|
795
|
-
|
788
|
+
bioruby> blast_version = nil; result = []
|
789
|
+
bioruby> Bio::Blast.reports(File.new("../test/data/blast/blastp-multi.m7")) do |report|
|
790
|
+
bioruby> blast_version = report.version
|
791
|
+
bioruby> report.iterations.each do |itr|
|
792
|
+
bioruby> itr.hits.each do |hit|
|
793
|
+
bioruby> result.push hit.target_id
|
794
|
+
bioruby> end
|
795
|
+
bioruby> end
|
796
|
+
bioruby> end
|
797
|
+
bioruby> blast_version
|
798
|
+
==> "blastp 2.2.18 [Mar-02-2008]"
|
799
|
+
bioruby> result
|
800
|
+
==> ["BAB38768", "BAB38768", "BAB38769", "BAB37741"]
|
796
801
|
|
797
|
-
|
798
|
-
|
799
|
-
|
802
|
+
* another example:
|
803
|
+
|
804
|
+
require 'bio'
|
805
|
+
Bio::Blast.reports(ARGF) do |report|
|
800
806
|
puts "Hits for " + report.query_def + " against " + report.db
|
801
807
|
report.each do |hit|
|
802
808
|
print hit.target_id, "\t", hit.evalue, "\n" if hit.evalue < 0.001
|
@@ -804,14 +810,16 @@ which supports the "-m 0" default and "-m 7" XML type output format.
|
|
804
810
|
end
|
805
811
|
|
806
812
|
Save the script as hits_under_0.001.rb and to process BLAST output
|
807
|
-
files *.xml, you can
|
813
|
+
files *.xml, you can run it with:
|
808
814
|
|
809
815
|
% ruby hits_under_0.001.rb *.xml
|
810
816
|
|
811
|
-
Sometimes BLAST XML output may be wrong and can not be parsed.
|
812
|
-
|
813
|
-
the -D and -m options when you encounter problems.
|
817
|
+
Sometimes BLAST XML output may be wrong and can not be parsed. Check whether
|
818
|
+
blast is version 2.2.5 or later. See also blast --help.
|
814
819
|
|
820
|
+
Bio::Blast loads the full XML file into memory. If this causes a problem
|
821
|
+
you can split the BLAST XML file into smaller chunks using XML-Twig. An
|
822
|
+
example can be found in ((<Biotools|URL:http://github.com/pjotrp/biotools/>)).
|
815
823
|
|
816
824
|
=== Add remote BLAST search sites
|
817
825
|
|
@@ -838,10 +846,6 @@ they may be included.
|
|
838
846
|
|
839
847
|
Below script is an example which seaches PubMed and creates a reference list.
|
840
848
|
|
841
|
-
#!/usr/bin/env ruby
|
842
|
-
|
843
|
-
require 'bio'
|
844
|
-
|
845
849
|
ARGV.each do |id|
|
846
850
|
entry = Bio::PubMed.query(id) # searches PubMed and get entry
|
847
851
|
medline = Bio::MEDLINE.new(entry) # creates Bio::MEDLINE object from entry text
|
@@ -1010,10 +1014,6 @@ BioRuby and other projects' members (2002).
|
|
1010
1014
|
Here we give a quick overview. Check out
|
1011
1015
|
((<URL:http://obda.open-bio.org/>)) for more extensive details.
|
1012
1016
|
|
1013
|
-
The specification is stored on CVS repository at cvs.open-bio.org,
|
1014
|
-
also available via http from:
|
1015
|
-
((<URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common>))
|
1016
|
-
|
1017
1017
|
== BioRegistry
|
1018
1018
|
|
1019
1019
|
BioRegistry allows for locating retrieval methods and database
|
@@ -1254,6 +1254,34 @@ Please refer to KEGG_API.rd.ja (English version: ((<URL:http://www.genome.jp/keg
|
|
1254
1254
|
|
1255
1255
|
* ((<URL:http://www.genome.jp/kegg/soap/>))
|
1256
1256
|
|
1257
|
+
== Ruby Ensembl API
|
1258
|
+
|
1259
|
+
Ruby Ensembl API is a ruby API to the Ensembl database. It is NOT currently
|
1260
|
+
included in the BioRuby archives. To install it, see
|
1261
|
+
((<URL:http://wiki.github.com/jandot/ruby-ensembl-api>))
|
1262
|
+
for more information.
|
1263
|
+
|
1264
|
+
=== Gene Ontology (GO) through the Ruby Ensembl API
|
1265
|
+
|
1266
|
+
Gene Ontologies can be fetched through the Ruby Ensembl API package:
|
1267
|
+
|
1268
|
+
require 'ensembl'
|
1269
|
+
Ensembl::Core::DBConnection.connect('drosophila_melanogaster')
|
1270
|
+
infile = IO.readlines(ARGV.shift) # reading your comma-separated accession mapping file (one line per mapping)
|
1271
|
+
infile.each do |line|
|
1272
|
+
accs = line.split(",") # Split the comma-sep.entries into an array
|
1273
|
+
drosphila_acc = accs.shift # the first entry is the Drosophila acc
|
1274
|
+
mosq_acc = accs.shift # the second entry is you Mosq. acc
|
1275
|
+
gene = Ensembl::Core::Gene.find_by_stable_id(drosophila_acc)
|
1276
|
+
print "#{mosq_acc}"
|
1277
|
+
gene.go_terms.each do |go|
|
1278
|
+
print ",#{go}"
|
1279
|
+
end
|
1280
|
+
end
|
1281
|
+
|
1282
|
+
Prints each mosq. accession/uniq identifier and the GO terms from the Drosphila
|
1283
|
+
homologues.
|
1284
|
+
|
1257
1285
|
== Comparing BioProjects
|
1258
1286
|
|
1259
1287
|
For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<URL:http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
|
@@ -1284,13 +1312,13 @@ carefully that come with each package.
|
|
1284
1312
|
Ruby fails to find the BioRuby libraries - add it to the RUBYLIB path, or pass
|
1285
1313
|
it to the interpeter. For example:
|
1286
1314
|
|
1287
|
-
ruby -I
|
1315
|
+
ruby -I$BIORUBYPATH/lib yourprogram.rb
|
1288
1316
|
|
1289
1317
|
== Modifying this page
|
1290
1318
|
|
1291
|
-
IMPORTANT NOTICE: This page is maintained in the BioRuby
|
1319
|
+
IMPORTANT NOTICE: This page is maintained in the BioRuby source code
|
1292
1320
|
repository. Please edit the file there otherwise changes may get
|
1293
|
-
lost. See ((<BioRuby Developer Information>)) for
|
1321
|
+
lost. See ((<BioRuby Developer Information>)) for repository and mailing list
|
1294
1322
|
access.
|
1295
1323
|
|
1296
1324
|
=end
|