bio 1.3.0 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +56 -0
- data/COPYING.ja +51 -0
- data/ChangeLog +540 -0
- data/GPL +340 -0
- data/LEGAL +141 -0
- data/LGPL +504 -0
- data/README.rdoc +4 -2
- data/Rakefile +2 -2
- data/bioruby.gemspec +17 -29
- data/doc/Tutorial.rd +118 -90
- data/doc/Tutorial.rd.html +124 -87
- data/lib/bio/appl/blast.rb +2 -2
- data/lib/bio/appl/blast/format0.rb +1 -1
- data/lib/bio/appl/fasta.rb +5 -12
- data/lib/bio/appl/fasta/format10.rb +96 -6
- data/lib/bio/appl/gcg/msf.rb +11 -14
- data/lib/bio/appl/pts1.rb +0 -4
- data/lib/bio/appl/sim4/report.rb +50 -17
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +10 -0
- data/lib/bio/db/biosql/sequence.rb +234 -298
- data/lib/bio/db/embl/embl.rb +0 -3
- data/lib/bio/db/genbank/common.rb +3 -1
- data/lib/bio/io/biosql/ar-biosql.rb +257 -0
- data/lib/bio/io/biosql/biosql.rb +39 -0
- data/lib/bio/io/biosql/config/database.yml +5 -4
- data/lib/bio/io/ncbirest.rb +12 -5
- data/lib/bio/io/pubmed.rb +5 -1
- data/lib/bio/io/sql.rb +43 -150
- data/lib/bio/sequence/compat.rb +5 -1
- data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +6 -4
- data/lib/bio/version.rb +1 -1
- data/test/data/gcg/pileup-aa.msf +67 -0
- data/test/data/sim4/complement-A4.sim4 +43 -0
- data/test/data/sim4/simple-A4.sim4 +25 -0
- data/test/data/sim4/simple2-A4.sim4 +25 -0
- data/test/functional/bio/io/test_pubmed.rb +129 -0
- data/test/unit/bio/appl/bl2seq/test_report.rb +5 -5
- data/test/unit/bio/appl/gcg/test_msf.rb +154 -0
- data/test/unit/bio/appl/hmmer/test_report.rb +2 -2
- data/test/unit/bio/appl/sim4/test_report.rb +869 -0
- data/test/unit/bio/appl/test_blast.rb +1 -1
- data/test/unit/bio/db/biosql/tc_biosql.rb +110 -0
- data/test/unit/bio/db/biosql/ts_suite_biosql.rb +8 -0
- data/test/unit/bio/test_feature.rb +18 -17
- data/test/unit/bio/test_reference.rb +18 -18
- data/test/unit/bio/test_sequence.rb +1 -1
- metadata +18 -30
- data/lib/bio/io/biosql/biodatabase.rb +0 -64
- data/lib/bio/io/biosql/bioentry.rb +0 -29
- data/lib/bio/io/biosql/bioentry_dbxref.rb +0 -11
- data/lib/bio/io/biosql/bioentry_path.rb +0 -12
- data/lib/bio/io/biosql/bioentry_qualifier_value.rb +0 -10
- data/lib/bio/io/biosql/bioentry_reference.rb +0 -10
- data/lib/bio/io/biosql/bioentry_relationship.rb +0 -10
- data/lib/bio/io/biosql/biosequence.rb +0 -11
- data/lib/bio/io/biosql/comment.rb +0 -7
- data/lib/bio/io/biosql/dbxref.rb +0 -13
- data/lib/bio/io/biosql/dbxref_qualifier_value.rb +0 -12
- data/lib/bio/io/biosql/location.rb +0 -32
- data/lib/bio/io/biosql/location_qualifier_value.rb +0 -11
- data/lib/bio/io/biosql/ontology.rb +0 -10
- data/lib/bio/io/biosql/reference.rb +0 -9
- data/lib/bio/io/biosql/seqfeature.rb +0 -32
- data/lib/bio/io/biosql/seqfeature_dbxref.rb +0 -11
- data/lib/bio/io/biosql/seqfeature_path.rb +0 -11
- data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +0 -20
- data/lib/bio/io/biosql/seqfeature_relationship.rb +0 -11
- data/lib/bio/io/biosql/taxon.rb +0 -12
- data/lib/bio/io/biosql/taxon_name.rb +0 -9
- data/lib/bio/io/biosql/term.rb +0 -27
- data/lib/bio/io/biosql/term_dbxref.rb +0 -11
- data/lib/bio/io/biosql/term_path.rb +0 -12
- data/lib/bio/io/biosql/term_relationship.rb +0 -13
- data/lib/bio/io/biosql/term_relationship_term.rb +0 -11
- data/lib/bio/io/biosql/term_synonym.rb +0 -10
data/README.rdoc
CHANGED
@@ -239,10 +239,12 @@ RubyGems 0.9.0 and removed in RubyGems 1.0.1.
|
|
239
239
|
== LICENSE
|
240
240
|
|
241
241
|
BioRuby can be freely distributed under the same terms as Ruby.
|
242
|
+
See the file COPYING (or COPYING.ja written in Japanese).
|
242
243
|
|
243
|
-
|
244
|
+
As written in the file COPYING, see the file LEGAL for files distributed
|
245
|
+
under different license. For example, setup.rb which comes from
|
244
246
|
{RAA:setup}[http://raa.ruby-lang.org/project/setup/] developed by Minero Aoki
|
245
|
-
(http://i.loveruby.net/en/projects/setup/).
|
247
|
+
(http://i.loveruby.net/en/projects/setup/) is licensed under LGPL 2.1.
|
246
248
|
|
247
249
|
|
248
250
|
== CONTACT
|
data/Rakefile
CHANGED
@@ -61,7 +61,7 @@ task :gemspec => GEM_SPEC_FILE
|
|
61
61
|
desc "Force update gem spec file"
|
62
62
|
task :regemspec do
|
63
63
|
#rm GEM_SPEC_FILE, :force => true
|
64
|
-
Rake::Task[GEM_SPEC_FILE].execute
|
64
|
+
Rake::Task[GEM_SPEC_FILE].execute(nil)
|
65
65
|
end
|
66
66
|
|
67
67
|
desc "Update #{GEM_SPEC_FILE}"
|
@@ -137,7 +137,7 @@ desc "Force update doc/Tutorial*.html"
|
|
137
137
|
task :retutorial2html do
|
138
138
|
# safe_unlink HTMLFILES_TUTORIAL
|
139
139
|
HTMLFILES_TUTORIAL.each do |x|
|
140
|
-
Rake::Task[x].execute
|
140
|
+
Rake::Task[x].execute(nil)
|
141
141
|
end
|
142
142
|
end
|
143
143
|
|
data/bioruby.gemspec
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
#
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = 'bio'
|
6
|
-
s.version = "1.3.
|
6
|
+
s.version = "1.3.1"
|
7
7
|
|
8
8
|
s.author = "BioRuby project"
|
9
9
|
s.email = "staff@bioruby.org"
|
@@ -14,8 +14,13 @@ Gem::Specification.new do |s|
|
|
14
14
|
|
15
15
|
s.platform = Gem::Platform::RUBY
|
16
16
|
s.files = [
|
17
|
+
"COPYING",
|
18
|
+
"COPYING.ja",
|
17
19
|
"ChangeLog",
|
20
|
+
"GPL",
|
18
21
|
"KNOWN_ISSUES.rdoc",
|
22
|
+
"LEGAL",
|
23
|
+
"LGPL",
|
19
24
|
"README.rdoc",
|
20
25
|
"README_DEV.rdoc",
|
21
26
|
"Rakefile",
|
@@ -153,35 +158,9 @@ Gem::Specification.new do |s|
|
|
153
158
|
"lib/bio/db/soft.rb",
|
154
159
|
"lib/bio/db/transfac.rb",
|
155
160
|
"lib/bio/feature.rb",
|
156
|
-
"lib/bio/io/biosql/
|
157
|
-
"lib/bio/io/biosql/
|
158
|
-
"lib/bio/io/biosql/bioentry_dbxref.rb",
|
159
|
-
"lib/bio/io/biosql/bioentry_path.rb",
|
160
|
-
"lib/bio/io/biosql/bioentry_qualifier_value.rb",
|
161
|
-
"lib/bio/io/biosql/bioentry_reference.rb",
|
162
|
-
"lib/bio/io/biosql/bioentry_relationship.rb",
|
163
|
-
"lib/bio/io/biosql/biosequence.rb",
|
164
|
-
"lib/bio/io/biosql/comment.rb",
|
161
|
+
"lib/bio/io/biosql/ar-biosql.rb",
|
162
|
+
"lib/bio/io/biosql/biosql.rb",
|
165
163
|
"lib/bio/io/biosql/config/database.yml",
|
166
|
-
"lib/bio/io/biosql/dbxref.rb",
|
167
|
-
"lib/bio/io/biosql/dbxref_qualifier_value.rb",
|
168
|
-
"lib/bio/io/biosql/location.rb",
|
169
|
-
"lib/bio/io/biosql/location_qualifier_value.rb",
|
170
|
-
"lib/bio/io/biosql/ontology.rb",
|
171
|
-
"lib/bio/io/biosql/reference.rb",
|
172
|
-
"lib/bio/io/biosql/seqfeature.rb",
|
173
|
-
"lib/bio/io/biosql/seqfeature_dbxref.rb",
|
174
|
-
"lib/bio/io/biosql/seqfeature_path.rb",
|
175
|
-
"lib/bio/io/biosql/seqfeature_qualifier_value.rb",
|
176
|
-
"lib/bio/io/biosql/seqfeature_relationship.rb",
|
177
|
-
"lib/bio/io/biosql/taxon.rb",
|
178
|
-
"lib/bio/io/biosql/taxon_name.rb",
|
179
|
-
"lib/bio/io/biosql/term.rb",
|
180
|
-
"lib/bio/io/biosql/term_dbxref.rb",
|
181
|
-
"lib/bio/io/biosql/term_path.rb",
|
182
|
-
"lib/bio/io/biosql/term_relationship.rb",
|
183
|
-
"lib/bio/io/biosql/term_relationship_term.rb",
|
184
|
-
"lib/bio/io/biosql/term_synonym.rb",
|
185
164
|
"lib/bio/io/das.rb",
|
186
165
|
"lib/bio/io/dbget.rb",
|
187
166
|
"lib/bio/io/ddbjxml.rb",
|
@@ -345,6 +324,7 @@ Gem::Specification.new do |s|
|
|
345
324
|
"test/data/embl/AB090716.embl.rel89",
|
346
325
|
"test/data/fasta/example1.txt",
|
347
326
|
"test/data/fasta/example2.txt",
|
327
|
+
"test/data/gcg/pileup-aa.msf",
|
348
328
|
"test/data/genscan/sample.report",
|
349
329
|
"test/data/iprscan/merged.raw",
|
350
330
|
"test/data/iprscan/merged.txt",
|
@@ -354,11 +334,15 @@ Gem::Specification.new do |s|
|
|
354
334
|
"test/data/prosite/prosite.dat",
|
355
335
|
"test/data/refseq/nm_126355.entret",
|
356
336
|
"test/data/rpsblast/misc.rpsblast",
|
337
|
+
"test/data/sim4/complement-A4.sim4",
|
338
|
+
"test/data/sim4/simple-A4.sim4",
|
339
|
+
"test/data/sim4/simple2-A4.sim4",
|
357
340
|
"test/data/soft/GDS100_partial.soft",
|
358
341
|
"test/data/soft/GSE3457_family_partial.soft",
|
359
342
|
"test/data/uniprot/p53_human.uniprot",
|
360
343
|
"test/functional/bio/appl/test_pts1.rb",
|
361
344
|
"test/functional/bio/io/test_ensembl.rb",
|
345
|
+
"test/functional/bio/io/test_pubmed.rb",
|
362
346
|
"test/functional/bio/io/test_soapwsdl.rb",
|
363
347
|
"test/functional/bio/io/test_togows.rb",
|
364
348
|
"test/functional/bio/sequence/test_output_embl.rb",
|
@@ -368,6 +352,7 @@ Gem::Specification.new do |s|
|
|
368
352
|
"test/unit/bio/appl/blast/test_ncbioptions.rb",
|
369
353
|
"test/unit/bio/appl/blast/test_report.rb",
|
370
354
|
"test/unit/bio/appl/blast/test_rpsblast.rb",
|
355
|
+
"test/unit/bio/appl/gcg/test_msf.rb",
|
371
356
|
"test/unit/bio/appl/genscan/test_report.rb",
|
372
357
|
"test/unit/bio/appl/hmmer/test_report.rb",
|
373
358
|
"test/unit/bio/appl/iprscan/test_report.rb",
|
@@ -375,6 +360,7 @@ Gem::Specification.new do |s|
|
|
375
360
|
"test/unit/bio/appl/paml/codeml/test_rates.rb",
|
376
361
|
"test/unit/bio/appl/paml/codeml/test_report.rb",
|
377
362
|
"test/unit/bio/appl/paml/test_codeml.rb",
|
363
|
+
"test/unit/bio/appl/sim4/test_report.rb",
|
378
364
|
"test/unit/bio/appl/sosui/test_report.rb",
|
379
365
|
"test/unit/bio/appl/targetp/test_report.rb",
|
380
366
|
"test/unit/bio/appl/test_blast.rb",
|
@@ -384,6 +370,8 @@ Gem::Specification.new do |s|
|
|
384
370
|
"test/unit/bio/data/test_aa.rb",
|
385
371
|
"test/unit/bio/data/test_codontable.rb",
|
386
372
|
"test/unit/bio/data/test_na.rb",
|
373
|
+
"test/unit/bio/db/biosql/tc_biosql.rb",
|
374
|
+
"test/unit/bio/db/biosql/ts_suite_biosql.rb",
|
387
375
|
"test/unit/bio/db/embl/test_common.rb",
|
388
376
|
"test/unit/bio/db/embl/test_embl.rb",
|
389
377
|
"test/unit/bio/db/embl/test_embl_rel89.rb",
|
data/doc/Tutorial.rd
CHANGED
@@ -2,45 +2,43 @@
|
|
2
2
|
#
|
3
3
|
# A possible test run could be from rdtool (on Debian package rdtool)
|
4
4
|
#
|
5
|
-
#
|
5
|
+
# rd2 $BIORUBYPATH/doc/Tutorial.rd
|
6
6
|
#
|
7
7
|
# or with style sheet:
|
8
8
|
#
|
9
|
-
#
|
10
|
-
ss=bioruby.css ~/cvs/opensource/bioruby/doc/Tutorial.rd > ~/bioruby.html
|
9
|
+
# rd2 -r rd/rd2html-lib.rb --with-css=bioruby.css $BIORUBYPATH/doc/Tutorial.rd > ~/bioruby.html
|
11
10
|
#
|
12
11
|
# in Debian:
|
13
12
|
#
|
14
|
-
# rd2 -r rd/rd2html-lib --with-css="
|
13
|
+
# rd2 -r rd/rd2html-lib --with-css="../lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css" Tutorial.rd > Tutorial.rd.html
|
15
14
|
#
|
16
15
|
# A common problem is tabs in the text file! TABs are not allowed.
|
17
16
|
#
|
18
17
|
# To add tests run Toshiaki's bioruby shell and paste in the query plus
|
19
18
|
# results.
|
20
19
|
#
|
21
|
-
# To run the embedded Ruby doctests you can
|
20
|
+
# To run the embedded Ruby doctests you can use the rubydoctest tool, part
|
21
|
+
# of the bioruby-support repository at http://github.com/pjotrp/bioruby-support/
|
22
|
+
#
|
22
23
|
|
23
24
|
=begin
|
24
25
|
#doctest Testing bioruby
|
25
26
|
|
26
27
|
= BioRuby Tutorial
|
27
28
|
|
28
|
-
Editor: PjotrPrins <p .at. bioruby.org>
|
29
|
-
|
30
29
|
* Copyright (C) 2001-2003 KATAYAMA Toshiaki <k .at. bioruby.org>
|
31
|
-
* Copyright (C) 2005-
|
30
|
+
* Copyright (C) 2005-2009 Pjotr Prins, Naohisa Goto and others
|
32
31
|
|
33
|
-
|
32
|
+
This document was last modified: 2009/03/17
|
33
|
+
Current editor: Pjotr Prins <p .at. bioruby.org>
|
34
34
|
|
35
|
-
|
36
|
-
|
37
|
-
in preparation for the ((<BioHackathlon 2008|URL:http://hackathon.dbcls.jp/>))
|
35
|
+
The latest version resides in the GIT source code repository: ./doc/((<Tutorial.rd|URL:http://github.com/pjotrp/bioruby/raw/documentation/doc/Tutorial.rd>)).
|
38
36
|
|
39
37
|
== Introduction
|
40
38
|
|
41
39
|
This is a tutorial for using Bioruby. A basic knowledge of Ruby is required.
|
42
40
|
If you want to know more about the programming langauge Ruby we recommend the
|
43
|
-
|
41
|
+
latest Ruby book ((<Programming Ruby|URL:http://www.pragprog.com/titles/ruby>))
|
44
42
|
by Dave Thomas and Andy Hunt - some of it is online
|
45
43
|
((<here|URL:http://www.rubycentral.com/pickaxe/>)).
|
46
44
|
|
@@ -53,7 +51,7 @@ version it has with the
|
|
53
51
|
|
54
52
|
command. Showing something like:
|
55
53
|
|
56
|
-
ruby 1.8.
|
54
|
+
ruby 1.8.7 (2008-08-11 patchlevel 72) [i486-linux]
|
57
55
|
|
58
56
|
If you see no such thing you'll have to install Ruby using your installation
|
59
57
|
manager. For more information see the
|
@@ -81,6 +79,7 @@ and you should see a prompt
|
|
81
79
|
|
82
80
|
Now test the following:
|
83
81
|
|
82
|
+
bioruby> require 'bio'
|
84
83
|
bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
|
85
84
|
==> "atgcatgcaaaa"
|
86
85
|
|
@@ -182,37 +181,35 @@ way of writing concise and clear code using 'closures'. Each sliding
|
|
182
181
|
window creates a subsequence which is supplied to the enclosed block
|
183
182
|
through a variable named +s+.
|
184
183
|
|
185
|
-
Show average percentage of GC content for 20 bases (stepping the default one base at a time)
|
184
|
+
* Show average percentage of GC content for 20 bases (stepping the default one base at a time)
|
186
185
|
|
187
|
-
|
188
|
-
|
186
|
+
bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa")
|
187
|
+
==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa"
|
189
188
|
|
190
|
-
|
191
|
-
|
192
|
-
|
189
|
+
bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent }
|
190
|
+
bioruby> a
|
191
|
+
==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]
|
193
192
|
|
194
193
|
|
195
194
|
Since the class of each subsequence is the same as original sequence
|
196
195
|
(Bio::Sequence::NA or Bio::Sequence::AA or Bio::Sequence), you can
|
197
196
|
use all methods on the subsequence. For example,
|
198
197
|
|
199
|
-
Shows translation results for 15 bases shifting a codon at a time
|
198
|
+
* Shows translation results for 15 bases shifting a codon at a time
|
200
199
|
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
bioruby> a
|
206
|
-
==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]
|
200
|
+
bioruby> a = []
|
201
|
+
bioruby> seq.window_search(15, 3) { | s | a.push s.translate }
|
202
|
+
bioruby> a
|
203
|
+
==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]
|
207
204
|
|
208
205
|
|
209
206
|
Finally, the window_search method returns the last leftover
|
210
207
|
subsequence. This allows for example
|
211
208
|
|
212
|
-
Divide a genome sequence into sections of 10000bp and
|
213
|
-
output FASTA formatted sequences (line width 60 chars). The 1000bp at the
|
214
|
-
start and end of each subsequence overlapped. At the 3' end of the sequence
|
215
|
-
the leftover is also added:
|
209
|
+
* Divide a genome sequence into sections of 10000bp and
|
210
|
+
output FASTA formatted sequences (line width 60 chars). The 1000bp at the
|
211
|
+
start and end of each subsequence overlapped. At the 3' end of the sequence
|
212
|
+
the leftover is also added:
|
216
213
|
|
217
214
|
i = 1
|
218
215
|
textwidth=60
|
@@ -229,24 +226,20 @@ size to equal values.
|
|
229
226
|
|
230
227
|
Other examples
|
231
228
|
|
232
|
-
Count the codon usage
|
229
|
+
* Count the codon usage
|
233
230
|
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
bioruby> codon_usage
|
239
|
-
==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}
|
231
|
+
bioruby> codon_usage = Hash.new(0)
|
232
|
+
bioruby> seq.window_search(3, 3) { |s| codon_usage[s] += 1 }
|
233
|
+
bioruby> codon_usage
|
234
|
+
==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}
|
240
235
|
|
241
236
|
|
242
|
-
Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
|
237
|
+
* Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
|
243
238
|
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
bioruby> a
|
249
|
-
==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]
|
239
|
+
bioruby> a = []
|
240
|
+
bioruby> seq.window_search(10, 10) { |s| a.push s.molecular_weight }
|
241
|
+
bioruby> a
|
242
|
+
==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]
|
250
243
|
|
251
244
|
In most cases, sequences are read from files or retrieved from databases.
|
252
245
|
For example:
|
@@ -398,12 +391,12 @@ very complicated:
|
|
398
391
|
end
|
399
392
|
end
|
400
393
|
|
401
|
-
Note: In this example Feature#assoc method makes a Hash from a
|
402
|
-
feature object. It is useful because you can get data from the hash
|
403
|
-
by using qualifiers as keys.
|
404
|
-
(But there is a risk some information is lost when two or more
|
405
|
-
qualifiers are the same. Therefore an Array is returned by
|
406
|
-
Feature#feature)
|
394
|
+
* Note: In this example Feature#assoc method makes a Hash from a
|
395
|
+
feature object. It is useful because you can get data from the hash
|
396
|
+
by using qualifiers as keys.
|
397
|
+
(But there is a risk some information is lost when two or more
|
398
|
+
qualifiers are the same. Therefore an Array is returned by
|
399
|
+
Feature#feature)
|
407
400
|
|
408
401
|
Bio::Sequence#splicing splices subsequence from nucleic acid sequence
|
409
402
|
according to location information used in GenBank, EMBL and DDBJ.
|
@@ -417,11 +410,11 @@ feature style location text but also Bio::Locations object. For more
|
|
417
410
|
information about location format and Bio::Locations class, see
|
418
411
|
bio/location.rb.
|
419
412
|
|
420
|
-
Splice according to location string used in a GenBank entry
|
413
|
+
* Splice according to location string used in a GenBank entry
|
421
414
|
|
422
415
|
naseq.splicing('join(2035..2050,complement(1775..1818),13..345')
|
423
416
|
|
424
|
-
Generate Bio::Locations object and pass the splicing method
|
417
|
+
* Generate Bio::Locations object and pass the splicing method
|
425
418
|
|
426
419
|
locs = Bio::Locations.new('join((8298.8300)..10206,1..855)')
|
427
420
|
naseq.splicing(locs)
|
@@ -429,7 +422,7 @@ Generate Bio::Locations object and pass the splicing method
|
|
429
422
|
You can also use the splicing method for amino acid sequences
|
430
423
|
(Bio::Sequence::AA objects).
|
431
424
|
|
432
|
-
Splicing peptide from a protein (e.g. signal peptide)
|
425
|
+
* Splicing peptide from a protein (e.g. signal peptide)
|
433
426
|
|
434
427
|
aaseq.splicing('21..119')
|
435
428
|
|
@@ -469,7 +462,7 @@ to a FASTA file can be found in sample/any2fasta.rb. With this technique it is
|
|
469
462
|
possible to write a Unix type grep/sort pipe for sequence information. One
|
470
463
|
example using scripts in the BIORUBY sample folder:
|
471
464
|
|
472
|
-
|
465
|
+
fastagrep.rb '/At|Dm/' database.seq | fastasort.rb
|
473
466
|
|
474
467
|
greps the database for Arabidopsis and Drosophila entries and sorts the output
|
475
468
|
to FASTA.
|
@@ -505,23 +498,23 @@ Array and BioPerl's Bio::SimpleAlign. A very simple example is:
|
|
505
498
|
bioruby> a.consensus
|
506
499
|
==> "a?gc?"
|
507
500
|
# shows IUPAC consensus
|
508
|
-
a.consensus_iupac
|
509
|
-
|
501
|
+
p a.consensus_iupac # ==> "ahgcr"
|
502
|
+
|
510
503
|
# iterates over each seq
|
511
504
|
a.each { |x| p x }
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
505
|
+
# ==>
|
506
|
+
# "atgca"
|
507
|
+
# "aagca"
|
508
|
+
# "acgca"
|
509
|
+
# "acgcg"
|
517
510
|
# iterates over each site
|
518
511
|
a.each_site { |x| p x }
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
512
|
+
# ==>
|
513
|
+
# ["a", "a", "a", "a"]
|
514
|
+
# ["t", "a", "c", "c"]
|
515
|
+
# ["g", "g", "g", "g"]
|
516
|
+
# ["c", "c", "c", "c"]
|
517
|
+
# ["a", "a", "a", "g"]
|
525
518
|
|
526
519
|
# doing alignment by using CLUSTAL W.
|
527
520
|
# clustalw command must be installed.
|
@@ -671,7 +664,7 @@ method of the factory object after the "query" method.
|
|
671
664
|
=== using FASTA from a remote internet site
|
672
665
|
|
673
666
|
* Note: Currently, only GenomeNet (fasta.genome.jp) is
|
674
|
-
supported. check the class documentation for updates.
|
667
|
+
supported. check the class documentation for updates.
|
675
668
|
|
676
669
|
For accessing a remote site the Bio::Fasta.remote method is used
|
677
670
|
instead of Bio::Fasta.local. When using a remote method, the
|
@@ -774,7 +767,7 @@ Check the documentation for Bio::Blast::Report to see what can be
|
|
774
767
|
retrieved. For now suffice to state that Bio::Blast::Report has a
|
775
768
|
hierarchical structure mirroring the general BLAST output stream:
|
776
769
|
|
777
|
-
* In a Bio::Blast::Report object, @
|
770
|
+
* In a Bio::Blast::Report object, @iterations is an array of
|
778
771
|
Bio::Blast::Report::Iteration objects.
|
779
772
|
* In a Bio::Blast::Report::Iteration object, @hits is an array of
|
780
773
|
Bio::Blast::Report::Hits objects.
|
@@ -790,13 +783,26 @@ you can directly create Bio::Blast::Report objects without the
|
|
790
783
|
Bio::Blast factory object. For this purpose use Bio::Blast.reports,
|
791
784
|
which supports the "-m 0" default and "-m 7" XML type output format.
|
792
785
|
|
793
|
-
|
786
|
+
* For example:
|
794
787
|
|
795
|
-
|
788
|
+
bioruby> blast_version = nil; result = []
|
789
|
+
bioruby> Bio::Blast.reports(File.new("../test/data/blast/blastp-multi.m7")) do |report|
|
790
|
+
bioruby> blast_version = report.version
|
791
|
+
bioruby> report.iterations.each do |itr|
|
792
|
+
bioruby> itr.hits.each do |hit|
|
793
|
+
bioruby> result.push hit.target_id
|
794
|
+
bioruby> end
|
795
|
+
bioruby> end
|
796
|
+
bioruby> end
|
797
|
+
bioruby> blast_version
|
798
|
+
==> "blastp 2.2.18 [Mar-02-2008]"
|
799
|
+
bioruby> result
|
800
|
+
==> ["BAB38768", "BAB38768", "BAB38769", "BAB37741"]
|
796
801
|
|
797
|
-
|
798
|
-
|
799
|
-
|
802
|
+
* another example:
|
803
|
+
|
804
|
+
require 'bio'
|
805
|
+
Bio::Blast.reports(ARGF) do |report|
|
800
806
|
puts "Hits for " + report.query_def + " against " + report.db
|
801
807
|
report.each do |hit|
|
802
808
|
print hit.target_id, "\t", hit.evalue, "\n" if hit.evalue < 0.001
|
@@ -804,14 +810,16 @@ which supports the "-m 0" default and "-m 7" XML type output format.
|
|
804
810
|
end
|
805
811
|
|
806
812
|
Save the script as hits_under_0.001.rb and to process BLAST output
|
807
|
-
files *.xml, you can
|
813
|
+
files *.xml, you can run it with:
|
808
814
|
|
809
815
|
% ruby hits_under_0.001.rb *.xml
|
810
816
|
|
811
|
-
Sometimes BLAST XML output may be wrong and can not be parsed.
|
812
|
-
|
813
|
-
the -D and -m options when you encounter problems.
|
817
|
+
Sometimes BLAST XML output may be wrong and can not be parsed. Check whether
|
818
|
+
blast is version 2.2.5 or later. See also blast --help.
|
814
819
|
|
820
|
+
Bio::Blast loads the full XML file into memory. If this causes a problem
|
821
|
+
you can split the BLAST XML file into smaller chunks using XML-Twig. An
|
822
|
+
example can be found in ((<Biotools|URL:http://github.com/pjotrp/biotools/>)).
|
815
823
|
|
816
824
|
=== Add remote BLAST search sites
|
817
825
|
|
@@ -838,10 +846,6 @@ they may be included.
|
|
838
846
|
|
839
847
|
Below script is an example which seaches PubMed and creates a reference list.
|
840
848
|
|
841
|
-
#!/usr/bin/env ruby
|
842
|
-
|
843
|
-
require 'bio'
|
844
|
-
|
845
849
|
ARGV.each do |id|
|
846
850
|
entry = Bio::PubMed.query(id) # searches PubMed and get entry
|
847
851
|
medline = Bio::MEDLINE.new(entry) # creates Bio::MEDLINE object from entry text
|
@@ -1010,10 +1014,6 @@ BioRuby and other projects' members (2002).
|
|
1010
1014
|
Here we give a quick overview. Check out
|
1011
1015
|
((<URL:http://obda.open-bio.org/>)) for more extensive details.
|
1012
1016
|
|
1013
|
-
The specification is stored on CVS repository at cvs.open-bio.org,
|
1014
|
-
also available via http from:
|
1015
|
-
((<URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common>))
|
1016
|
-
|
1017
1017
|
== BioRegistry
|
1018
1018
|
|
1019
1019
|
BioRegistry allows for locating retrieval methods and database
|
@@ -1254,6 +1254,34 @@ Please refer to KEGG_API.rd.ja (English version: ((<URL:http://www.genome.jp/keg
|
|
1254
1254
|
|
1255
1255
|
* ((<URL:http://www.genome.jp/kegg/soap/>))
|
1256
1256
|
|
1257
|
+
== Ruby Ensembl API
|
1258
|
+
|
1259
|
+
Ruby Ensembl API is a ruby API to the Ensembl database. It is NOT currently
|
1260
|
+
included in the BioRuby archives. To install it, see
|
1261
|
+
((<URL:http://wiki.github.com/jandot/ruby-ensembl-api>))
|
1262
|
+
for more information.
|
1263
|
+
|
1264
|
+
=== Gene Ontology (GO) through the Ruby Ensembl API
|
1265
|
+
|
1266
|
+
Gene Ontologies can be fetched through the Ruby Ensembl API package:
|
1267
|
+
|
1268
|
+
require 'ensembl'
|
1269
|
+
Ensembl::Core::DBConnection.connect('drosophila_melanogaster')
|
1270
|
+
infile = IO.readlines(ARGV.shift) # reading your comma-separated accession mapping file (one line per mapping)
|
1271
|
+
infile.each do |line|
|
1272
|
+
accs = line.split(",") # Split the comma-sep.entries into an array
|
1273
|
+
drosphila_acc = accs.shift # the first entry is the Drosophila acc
|
1274
|
+
mosq_acc = accs.shift # the second entry is you Mosq. acc
|
1275
|
+
gene = Ensembl::Core::Gene.find_by_stable_id(drosophila_acc)
|
1276
|
+
print "#{mosq_acc}"
|
1277
|
+
gene.go_terms.each do |go|
|
1278
|
+
print ",#{go}"
|
1279
|
+
end
|
1280
|
+
end
|
1281
|
+
|
1282
|
+
Prints each mosq. accession/uniq identifier and the GO terms from the Drosphila
|
1283
|
+
homologues.
|
1284
|
+
|
1257
1285
|
== Comparing BioProjects
|
1258
1286
|
|
1259
1287
|
For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<URL:http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
|
@@ -1284,13 +1312,13 @@ carefully that come with each package.
|
|
1284
1312
|
Ruby fails to find the BioRuby libraries - add it to the RUBYLIB path, or pass
|
1285
1313
|
it to the interpeter. For example:
|
1286
1314
|
|
1287
|
-
ruby -I
|
1315
|
+
ruby -I$BIORUBYPATH/lib yourprogram.rb
|
1288
1316
|
|
1289
1317
|
== Modifying this page
|
1290
1318
|
|
1291
|
-
IMPORTANT NOTICE: This page is maintained in the BioRuby
|
1319
|
+
IMPORTANT NOTICE: This page is maintained in the BioRuby source code
|
1292
1320
|
repository. Please edit the file there otherwise changes may get
|
1293
|
-
lost. See ((<BioRuby Developer Information>)) for
|
1321
|
+
lost. See ((<BioRuby Developer Information>)) for repository and mailing list
|
1294
1322
|
access.
|
1295
1323
|
|
1296
1324
|
=end
|