bio 1.3.0 → 1.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (75) hide show
  1. data/COPYING +56 -0
  2. data/COPYING.ja +51 -0
  3. data/ChangeLog +540 -0
  4. data/GPL +340 -0
  5. data/LEGAL +141 -0
  6. data/LGPL +504 -0
  7. data/README.rdoc +4 -2
  8. data/Rakefile +2 -2
  9. data/bioruby.gemspec +17 -29
  10. data/doc/Tutorial.rd +118 -90
  11. data/doc/Tutorial.rd.html +124 -87
  12. data/lib/bio/appl/blast.rb +2 -2
  13. data/lib/bio/appl/blast/format0.rb +1 -1
  14. data/lib/bio/appl/fasta.rb +5 -12
  15. data/lib/bio/appl/fasta/format10.rb +96 -6
  16. data/lib/bio/appl/gcg/msf.rb +11 -14
  17. data/lib/bio/appl/pts1.rb +0 -4
  18. data/lib/bio/appl/sim4/report.rb +50 -17
  19. data/lib/bio/db/biosql/biosql_to_biosequence.rb +10 -0
  20. data/lib/bio/db/biosql/sequence.rb +234 -298
  21. data/lib/bio/db/embl/embl.rb +0 -3
  22. data/lib/bio/db/genbank/common.rb +3 -1
  23. data/lib/bio/io/biosql/ar-biosql.rb +257 -0
  24. data/lib/bio/io/biosql/biosql.rb +39 -0
  25. data/lib/bio/io/biosql/config/database.yml +5 -4
  26. data/lib/bio/io/ncbirest.rb +12 -5
  27. data/lib/bio/io/pubmed.rb +5 -1
  28. data/lib/bio/io/sql.rb +43 -150
  29. data/lib/bio/sequence/compat.rb +5 -1
  30. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +6 -4
  31. data/lib/bio/version.rb +1 -1
  32. data/test/data/gcg/pileup-aa.msf +67 -0
  33. data/test/data/sim4/complement-A4.sim4 +43 -0
  34. data/test/data/sim4/simple-A4.sim4 +25 -0
  35. data/test/data/sim4/simple2-A4.sim4 +25 -0
  36. data/test/functional/bio/io/test_pubmed.rb +129 -0
  37. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -5
  38. data/test/unit/bio/appl/gcg/test_msf.rb +154 -0
  39. data/test/unit/bio/appl/hmmer/test_report.rb +2 -2
  40. data/test/unit/bio/appl/sim4/test_report.rb +869 -0
  41. data/test/unit/bio/appl/test_blast.rb +1 -1
  42. data/test/unit/bio/db/biosql/tc_biosql.rb +110 -0
  43. data/test/unit/bio/db/biosql/ts_suite_biosql.rb +8 -0
  44. data/test/unit/bio/test_feature.rb +18 -17
  45. data/test/unit/bio/test_reference.rb +18 -18
  46. data/test/unit/bio/test_sequence.rb +1 -1
  47. metadata +18 -30
  48. data/lib/bio/io/biosql/biodatabase.rb +0 -64
  49. data/lib/bio/io/biosql/bioentry.rb +0 -29
  50. data/lib/bio/io/biosql/bioentry_dbxref.rb +0 -11
  51. data/lib/bio/io/biosql/bioentry_path.rb +0 -12
  52. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +0 -10
  53. data/lib/bio/io/biosql/bioentry_reference.rb +0 -10
  54. data/lib/bio/io/biosql/bioentry_relationship.rb +0 -10
  55. data/lib/bio/io/biosql/biosequence.rb +0 -11
  56. data/lib/bio/io/biosql/comment.rb +0 -7
  57. data/lib/bio/io/biosql/dbxref.rb +0 -13
  58. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +0 -12
  59. data/lib/bio/io/biosql/location.rb +0 -32
  60. data/lib/bio/io/biosql/location_qualifier_value.rb +0 -11
  61. data/lib/bio/io/biosql/ontology.rb +0 -10
  62. data/lib/bio/io/biosql/reference.rb +0 -9
  63. data/lib/bio/io/biosql/seqfeature.rb +0 -32
  64. data/lib/bio/io/biosql/seqfeature_dbxref.rb +0 -11
  65. data/lib/bio/io/biosql/seqfeature_path.rb +0 -11
  66. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +0 -20
  67. data/lib/bio/io/biosql/seqfeature_relationship.rb +0 -11
  68. data/lib/bio/io/biosql/taxon.rb +0 -12
  69. data/lib/bio/io/biosql/taxon_name.rb +0 -9
  70. data/lib/bio/io/biosql/term.rb +0 -27
  71. data/lib/bio/io/biosql/term_dbxref.rb +0 -11
  72. data/lib/bio/io/biosql/term_path.rb +0 -12
  73. data/lib/bio/io/biosql/term_relationship.rb +0 -13
  74. data/lib/bio/io/biosql/term_relationship_term.rb +0 -11
  75. data/lib/bio/io/biosql/term_synonym.rb +0 -10
@@ -239,10 +239,12 @@ RubyGems 0.9.0 and removed in RubyGems 1.0.1.
239
239
  == LICENSE
240
240
 
241
241
  BioRuby can be freely distributed under the same terms as Ruby.
242
+ See the file COPYING (or COPYING.ja written in Japanese).
242
243
 
243
- Note that setup.rb included in the BioRuby package comes from
244
+ As written in the file COPYING, see the file LEGAL for files distributed
245
+ under different license. For example, setup.rb which comes from
244
246
  {RAA:setup}[http://raa.ruby-lang.org/project/setup/] developed by Minero Aoki
245
- (http://i.loveruby.net/en/projects/setup/).
247
+ (http://i.loveruby.net/en/projects/setup/) is licensed under LGPL 2.1.
246
248
 
247
249
 
248
250
  == CONTACT
data/Rakefile CHANGED
@@ -61,7 +61,7 @@ task :gemspec => GEM_SPEC_FILE
61
61
  desc "Force update gem spec file"
62
62
  task :regemspec do
63
63
  #rm GEM_SPEC_FILE, :force => true
64
- Rake::Task[GEM_SPEC_FILE].execute
64
+ Rake::Task[GEM_SPEC_FILE].execute(nil)
65
65
  end
66
66
 
67
67
  desc "Update #{GEM_SPEC_FILE}"
@@ -137,7 +137,7 @@ desc "Force update doc/Tutorial*.html"
137
137
  task :retutorial2html do
138
138
  # safe_unlink HTMLFILES_TUTORIAL
139
139
  HTMLFILES_TUTORIAL.each do |x|
140
- Rake::Task[x].execute
140
+ Rake::Task[x].execute(nil)
141
141
  end
142
142
  end
143
143
 
@@ -3,7 +3,7 @@
3
3
  #
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'bio'
6
- s.version = "1.3.0"
6
+ s.version = "1.3.1"
7
7
 
8
8
  s.author = "BioRuby project"
9
9
  s.email = "staff@bioruby.org"
@@ -14,8 +14,13 @@ Gem::Specification.new do |s|
14
14
 
15
15
  s.platform = Gem::Platform::RUBY
16
16
  s.files = [
17
+ "COPYING",
18
+ "COPYING.ja",
17
19
  "ChangeLog",
20
+ "GPL",
18
21
  "KNOWN_ISSUES.rdoc",
22
+ "LEGAL",
23
+ "LGPL",
19
24
  "README.rdoc",
20
25
  "README_DEV.rdoc",
21
26
  "Rakefile",
@@ -153,35 +158,9 @@ Gem::Specification.new do |s|
153
158
  "lib/bio/db/soft.rb",
154
159
  "lib/bio/db/transfac.rb",
155
160
  "lib/bio/feature.rb",
156
- "lib/bio/io/biosql/biodatabase.rb",
157
- "lib/bio/io/biosql/bioentry.rb",
158
- "lib/bio/io/biosql/bioentry_dbxref.rb",
159
- "lib/bio/io/biosql/bioentry_path.rb",
160
- "lib/bio/io/biosql/bioentry_qualifier_value.rb",
161
- "lib/bio/io/biosql/bioentry_reference.rb",
162
- "lib/bio/io/biosql/bioentry_relationship.rb",
163
- "lib/bio/io/biosql/biosequence.rb",
164
- "lib/bio/io/biosql/comment.rb",
161
+ "lib/bio/io/biosql/ar-biosql.rb",
162
+ "lib/bio/io/biosql/biosql.rb",
165
163
  "lib/bio/io/biosql/config/database.yml",
166
- "lib/bio/io/biosql/dbxref.rb",
167
- "lib/bio/io/biosql/dbxref_qualifier_value.rb",
168
- "lib/bio/io/biosql/location.rb",
169
- "lib/bio/io/biosql/location_qualifier_value.rb",
170
- "lib/bio/io/biosql/ontology.rb",
171
- "lib/bio/io/biosql/reference.rb",
172
- "lib/bio/io/biosql/seqfeature.rb",
173
- "lib/bio/io/biosql/seqfeature_dbxref.rb",
174
- "lib/bio/io/biosql/seqfeature_path.rb",
175
- "lib/bio/io/biosql/seqfeature_qualifier_value.rb",
176
- "lib/bio/io/biosql/seqfeature_relationship.rb",
177
- "lib/bio/io/biosql/taxon.rb",
178
- "lib/bio/io/biosql/taxon_name.rb",
179
- "lib/bio/io/biosql/term.rb",
180
- "lib/bio/io/biosql/term_dbxref.rb",
181
- "lib/bio/io/biosql/term_path.rb",
182
- "lib/bio/io/biosql/term_relationship.rb",
183
- "lib/bio/io/biosql/term_relationship_term.rb",
184
- "lib/bio/io/biosql/term_synonym.rb",
185
164
  "lib/bio/io/das.rb",
186
165
  "lib/bio/io/dbget.rb",
187
166
  "lib/bio/io/ddbjxml.rb",
@@ -345,6 +324,7 @@ Gem::Specification.new do |s|
345
324
  "test/data/embl/AB090716.embl.rel89",
346
325
  "test/data/fasta/example1.txt",
347
326
  "test/data/fasta/example2.txt",
327
+ "test/data/gcg/pileup-aa.msf",
348
328
  "test/data/genscan/sample.report",
349
329
  "test/data/iprscan/merged.raw",
350
330
  "test/data/iprscan/merged.txt",
@@ -354,11 +334,15 @@ Gem::Specification.new do |s|
354
334
  "test/data/prosite/prosite.dat",
355
335
  "test/data/refseq/nm_126355.entret",
356
336
  "test/data/rpsblast/misc.rpsblast",
337
+ "test/data/sim4/complement-A4.sim4",
338
+ "test/data/sim4/simple-A4.sim4",
339
+ "test/data/sim4/simple2-A4.sim4",
357
340
  "test/data/soft/GDS100_partial.soft",
358
341
  "test/data/soft/GSE3457_family_partial.soft",
359
342
  "test/data/uniprot/p53_human.uniprot",
360
343
  "test/functional/bio/appl/test_pts1.rb",
361
344
  "test/functional/bio/io/test_ensembl.rb",
345
+ "test/functional/bio/io/test_pubmed.rb",
362
346
  "test/functional/bio/io/test_soapwsdl.rb",
363
347
  "test/functional/bio/io/test_togows.rb",
364
348
  "test/functional/bio/sequence/test_output_embl.rb",
@@ -368,6 +352,7 @@ Gem::Specification.new do |s|
368
352
  "test/unit/bio/appl/blast/test_ncbioptions.rb",
369
353
  "test/unit/bio/appl/blast/test_report.rb",
370
354
  "test/unit/bio/appl/blast/test_rpsblast.rb",
355
+ "test/unit/bio/appl/gcg/test_msf.rb",
371
356
  "test/unit/bio/appl/genscan/test_report.rb",
372
357
  "test/unit/bio/appl/hmmer/test_report.rb",
373
358
  "test/unit/bio/appl/iprscan/test_report.rb",
@@ -375,6 +360,7 @@ Gem::Specification.new do |s|
375
360
  "test/unit/bio/appl/paml/codeml/test_rates.rb",
376
361
  "test/unit/bio/appl/paml/codeml/test_report.rb",
377
362
  "test/unit/bio/appl/paml/test_codeml.rb",
363
+ "test/unit/bio/appl/sim4/test_report.rb",
378
364
  "test/unit/bio/appl/sosui/test_report.rb",
379
365
  "test/unit/bio/appl/targetp/test_report.rb",
380
366
  "test/unit/bio/appl/test_blast.rb",
@@ -384,6 +370,8 @@ Gem::Specification.new do |s|
384
370
  "test/unit/bio/data/test_aa.rb",
385
371
  "test/unit/bio/data/test_codontable.rb",
386
372
  "test/unit/bio/data/test_na.rb",
373
+ "test/unit/bio/db/biosql/tc_biosql.rb",
374
+ "test/unit/bio/db/biosql/ts_suite_biosql.rb",
387
375
  "test/unit/bio/db/embl/test_common.rb",
388
376
  "test/unit/bio/db/embl/test_embl.rb",
389
377
  "test/unit/bio/db/embl/test_embl_rel89.rb",
@@ -2,45 +2,43 @@
2
2
  #
3
3
  # A possible test run could be from rdtool (on Debian package rdtool)
4
4
  #
5
- # ruby -I lib ./bin/rd2 ~/cvs/opensource/bioruby/doc/Tutorial.rd
5
+ # rd2 $BIORUBYPATH/doc/Tutorial.rd
6
6
  #
7
7
  # or with style sheet:
8
8
  #
9
- # ruby -I lib ./bin/rd2 -r rd/rd2html-lib.rb --with-c
10
- ss=bioruby.css ~/cvs/opensource/bioruby/doc/Tutorial.rd > ~/bioruby.html
9
+ # rd2 -r rd/rd2html-lib.rb --with-css=bioruby.css $BIORUBYPATH/doc/Tutorial.rd > ~/bioruby.html
11
10
  #
12
11
  # in Debian:
13
12
  #
14
- # rd2 -r rd/rd2html-lib --with-css="/home/wrk/izip/cvs/opensource/bioruby/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css" Tutorial.rd > index.html
13
+ # rd2 -r rd/rd2html-lib --with-css="../lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css" Tutorial.rd > Tutorial.rd.html
15
14
  #
16
15
  # A common problem is tabs in the text file! TABs are not allowed.
17
16
  #
18
17
  # To add tests run Toshiaki's bioruby shell and paste in the query plus
19
18
  # results.
20
19
  #
21
- # To run the embedded Ruby doctests you can get the doctest.rb from Pjotr.
20
+ # To run the embedded Ruby doctests you can use the rubydoctest tool, part
21
+ # of the bioruby-support repository at http://github.com/pjotrp/bioruby-support/
22
+ #
22
23
 
23
24
  =begin
24
25
  #doctest Testing bioruby
25
26
 
26
27
  = BioRuby Tutorial
27
28
 
28
- Editor: PjotrPrins <p .at. bioruby.org>
29
-
30
29
  * Copyright (C) 2001-2003 KATAYAMA Toshiaki <k .at. bioruby.org>
31
- * Copyright (C) 2005-2008 Pjotr Prins, Naohisa Goto and others
30
+ * Copyright (C) 2005-2009 Pjotr Prins, Naohisa Goto and others
32
31
 
33
- The latest version resides in the CVS repository ./doc/((<Tutorial.rd|URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/*checkout*/bioruby/doc/Tutorial.rd?rev=HEAD&cvsroot=bioruby&content-type=text/plain>)). This one was updated:
32
+ This document was last modified: 2009/03/17
33
+ Current editor: Pjotr Prins <p .at. bioruby.org>
34
34
 
35
- $Id: Tutorial.rd,v 1.22 2008/05/19 12:22:05 pjotr Exp $
36
-
37
- in preparation for the ((<BioHackathlon 2008|URL:http://hackathon.dbcls.jp/>))
35
+ The latest version resides in the GIT source code repository: ./doc/((<Tutorial.rd|URL:http://github.com/pjotrp/bioruby/raw/documentation/doc/Tutorial.rd>)).
38
36
 
39
37
  == Introduction
40
38
 
41
39
  This is a tutorial for using Bioruby. A basic knowledge of Ruby is required.
42
40
  If you want to know more about the programming langauge Ruby we recommend the
43
- excellent book ((<Programming Ruby|URL:http://www.pragprog.com/titles/ruby>))
41
+ latest Ruby book ((<Programming Ruby|URL:http://www.pragprog.com/titles/ruby>))
44
42
  by Dave Thomas and Andy Hunt - some of it is online
45
43
  ((<here|URL:http://www.rubycentral.com/pickaxe/>)).
46
44
 
@@ -53,7 +51,7 @@ version it has with the
53
51
 
54
52
  command. Showing something like:
55
53
 
56
- ruby 1.8.5 (2006-08-25) [powerpc-linux]
54
+ ruby 1.8.7 (2008-08-11 patchlevel 72) [i486-linux]
57
55
 
58
56
  If you see no such thing you'll have to install Ruby using your installation
59
57
  manager. For more information see the
@@ -81,6 +79,7 @@ and you should see a prompt
81
79
 
82
80
  Now test the following:
83
81
 
82
+ bioruby> require 'bio'
84
83
  bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
85
84
  ==> "atgcatgcaaaa"
86
85
 
@@ -182,37 +181,35 @@ way of writing concise and clear code using 'closures'. Each sliding
182
181
  window creates a subsequence which is supplied to the enclosed block
183
182
  through a variable named +s+.
184
183
 
185
- Show average percentage of GC content for 20 bases (stepping the default one base at a time)
184
+ * Show average percentage of GC content for 20 bases (stepping the default one base at a time)
186
185
 
187
- bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa")
188
- ==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa"
186
+ bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa")
187
+ ==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa"
189
188
 
190
- bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent }
191
- bioruby> a
192
- ==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]
189
+ bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent }
190
+ bioruby> a
191
+ ==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]
193
192
 
194
193
 
195
194
  Since the class of each subsequence is the same as original sequence
196
195
  (Bio::Sequence::NA or Bio::Sequence::AA or Bio::Sequence), you can
197
196
  use all methods on the subsequence. For example,
198
197
 
199
- Shows translation results for 15 bases shifting a codon at a time
198
+ * Shows translation results for 15 bases shifting a codon at a time
200
199
 
201
- bioruby> a = []
202
- bioruby> seq.window_search(15, 3) do |s|
203
- bioruby> a.push s.translate
204
- bioruby> end
205
- bioruby> a
206
- ==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]
200
+ bioruby> a = []
201
+ bioruby> seq.window_search(15, 3) { | s | a.push s.translate }
202
+ bioruby> a
203
+ ==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]
207
204
 
208
205
 
209
206
  Finally, the window_search method returns the last leftover
210
207
  subsequence. This allows for example
211
208
 
212
- Divide a genome sequence into sections of 10000bp and
213
- output FASTA formatted sequences (line width 60 chars). The 1000bp at the
214
- start and end of each subsequence overlapped. At the 3' end of the sequence
215
- the leftover is also added:
209
+ * Divide a genome sequence into sections of 10000bp and
210
+ output FASTA formatted sequences (line width 60 chars). The 1000bp at the
211
+ start and end of each subsequence overlapped. At the 3' end of the sequence
212
+ the leftover is also added:
216
213
 
217
214
  i = 1
218
215
  textwidth=60
@@ -229,24 +226,20 @@ size to equal values.
229
226
 
230
227
  Other examples
231
228
 
232
- Count the codon usage
229
+ * Count the codon usage
233
230
 
234
- bioruby> codon_usage = Hash.new(0)
235
- bioruby> seq.window_search(3, 3) do |s|
236
- bioruby> codon_usage[s] += 1
237
- bioruby> end
238
- bioruby> codon_usage
239
- ==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}
231
+ bioruby> codon_usage = Hash.new(0)
232
+ bioruby> seq.window_search(3, 3) { |s| codon_usage[s] += 1 }
233
+ bioruby> codon_usage
234
+ ==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}
240
235
 
241
236
 
242
- Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
237
+ * Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
243
238
 
244
- bioruby> a = []
245
- bioruby> seq.window_search(10, 10) do |s|
246
- bioruby> a.push s.molecular_weight
247
- bioruby> end
248
- bioruby> a
249
- ==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]
239
+ bioruby> a = []
240
+ bioruby> seq.window_search(10, 10) { |s| a.push s.molecular_weight }
241
+ bioruby> a
242
+ ==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]
250
243
 
251
244
  In most cases, sequences are read from files or retrieved from databases.
252
245
  For example:
@@ -398,12 +391,12 @@ very complicated:
398
391
  end
399
392
  end
400
393
 
401
- Note: In this example Feature#assoc method makes a Hash from a
402
- feature object. It is useful because you can get data from the hash
403
- by using qualifiers as keys.
404
- (But there is a risk some information is lost when two or more
405
- qualifiers are the same. Therefore an Array is returned by
406
- Feature#feature)
394
+ * Note: In this example Feature#assoc method makes a Hash from a
395
+ feature object. It is useful because you can get data from the hash
396
+ by using qualifiers as keys.
397
+ (But there is a risk some information is lost when two or more
398
+ qualifiers are the same. Therefore an Array is returned by
399
+ Feature#feature)
407
400
 
408
401
  Bio::Sequence#splicing splices subsequence from nucleic acid sequence
409
402
  according to location information used in GenBank, EMBL and DDBJ.
@@ -417,11 +410,11 @@ feature style location text but also Bio::Locations object. For more
417
410
  information about location format and Bio::Locations class, see
418
411
  bio/location.rb.
419
412
 
420
- Splice according to location string used in a GenBank entry
413
+ * Splice according to location string used in a GenBank entry
421
414
 
422
415
  naseq.splicing('join(2035..2050,complement(1775..1818),13..345')
423
416
 
424
- Generate Bio::Locations object and pass the splicing method
417
+ * Generate Bio::Locations object and pass the splicing method
425
418
 
426
419
  locs = Bio::Locations.new('join((8298.8300)..10206,1..855)')
427
420
  naseq.splicing(locs)
@@ -429,7 +422,7 @@ Generate Bio::Locations object and pass the splicing method
429
422
  You can also use the splicing method for amino acid sequences
430
423
  (Bio::Sequence::AA objects).
431
424
 
432
- Splicing peptide from a protein (e.g. signal peptide)
425
+ * Splicing peptide from a protein (e.g. signal peptide)
433
426
 
434
427
  aaseq.splicing('21..119')
435
428
 
@@ -469,7 +462,7 @@ to a FASTA file can be found in sample/any2fasta.rb. With this technique it is
469
462
  possible to write a Unix type grep/sort pipe for sequence information. One
470
463
  example using scripts in the BIORUBY sample folder:
471
464
 
472
- fastagrep.rb '/At|Dm/' database.seq | fastasort.rb
465
+ fastagrep.rb '/At|Dm/' database.seq | fastasort.rb
473
466
 
474
467
  greps the database for Arabidopsis and Drosophila entries and sorts the output
475
468
  to FASTA.
@@ -505,23 +498,23 @@ Array and BioPerl's Bio::SimpleAlign. A very simple example is:
505
498
  bioruby> a.consensus
506
499
  ==> "a?gc?"
507
500
  # shows IUPAC consensus
508
- a.consensus_iupac
509
- ==> "ahgcr"
501
+ p a.consensus_iupac # ==> "ahgcr"
502
+
510
503
  # iterates over each seq
511
504
  a.each { |x| p x }
512
- # ==>
513
- # "atgca"
514
- # "aagca"
515
- # "acgca"
516
- # "acgcg"
505
+ # ==>
506
+ # "atgca"
507
+ # "aagca"
508
+ # "acgca"
509
+ # "acgcg"
517
510
  # iterates over each site
518
511
  a.each_site { |x| p x }
519
- # ==>
520
- # ["a", "a", "a", "a"]
521
- # ["t", "a", "c", "c"]
522
- # ["g", "g", "g", "g"]
523
- # ["c", "c", "c", "c"]
524
- # ["a", "a", "a", "g"]
512
+ # ==>
513
+ # ["a", "a", "a", "a"]
514
+ # ["t", "a", "c", "c"]
515
+ # ["g", "g", "g", "g"]
516
+ # ["c", "c", "c", "c"]
517
+ # ["a", "a", "a", "g"]
525
518
 
526
519
  # doing alignment by using CLUSTAL W.
527
520
  # clustalw command must be installed.
@@ -671,7 +664,7 @@ method of the factory object after the "query" method.
671
664
  === using FASTA from a remote internet site
672
665
 
673
666
  * Note: Currently, only GenomeNet (fasta.genome.jp) is
674
- supported. check the class documentation for updates.
667
+ supported. check the class documentation for updates.
675
668
 
676
669
  For accessing a remote site the Bio::Fasta.remote method is used
677
670
  instead of Bio::Fasta.local. When using a remote method, the
@@ -774,7 +767,7 @@ Check the documentation for Bio::Blast::Report to see what can be
774
767
  retrieved. For now suffice to state that Bio::Blast::Report has a
775
768
  hierarchical structure mirroring the general BLAST output stream:
776
769
 
777
- * In a Bio::Blast::Report object, @iteratinos is an array of
770
+ * In a Bio::Blast::Report object, @iterations is an array of
778
771
  Bio::Blast::Report::Iteration objects.
779
772
  * In a Bio::Blast::Report::Iteration object, @hits is an array of
780
773
  Bio::Blast::Report::Hits objects.
@@ -790,13 +783,26 @@ you can directly create Bio::Blast::Report objects without the
790
783
  Bio::Blast factory object. For this purpose use Bio::Blast.reports,
791
784
  which supports the "-m 0" default and "-m 7" XML type output format.
792
785
 
793
- #!/usr/bin/env ruby
786
+ * For example:
794
787
 
795
- require 'bio'
788
+ bioruby> blast_version = nil; result = []
789
+ bioruby> Bio::Blast.reports(File.new("../test/data/blast/blastp-multi.m7")) do |report|
790
+ bioruby> blast_version = report.version
791
+ bioruby> report.iterations.each do |itr|
792
+ bioruby> itr.hits.each do |hit|
793
+ bioruby> result.push hit.target_id
794
+ bioruby> end
795
+ bioruby> end
796
+ bioruby> end
797
+ bioruby> blast_version
798
+ ==> "blastp 2.2.18 [Mar-02-2008]"
799
+ bioruby> result
800
+ ==> ["BAB38768", "BAB38768", "BAB38769", "BAB37741"]
796
801
 
797
- # Iterates over each XML result.
798
- # The variable "report" is a Bio::Blast::Report object.
799
- Bio::Blast.reports(ARGF) do |report|
802
+ * another example:
803
+
804
+ require 'bio'
805
+ Bio::Blast.reports(ARGF) do |report|
800
806
  puts "Hits for " + report.query_def + " against " + report.db
801
807
  report.each do |hit|
802
808
  print hit.target_id, "\t", hit.evalue, "\n" if hit.evalue < 0.001
@@ -804,14 +810,16 @@ which supports the "-m 0" default and "-m 7" XML type output format.
804
810
  end
805
811
 
806
812
  Save the script as hits_under_0.001.rb and to process BLAST output
807
- files *.xml, you can
813
+ files *.xml, you can run it with:
808
814
 
809
815
  % ruby hits_under_0.001.rb *.xml
810
816
 
811
- Sometimes BLAST XML output may be wrong and can not be parsed. We
812
- recommended to install BLAST 2.2.5 or later, and try combinations of
813
- the -D and -m options when you encounter problems.
817
+ Sometimes BLAST XML output may be wrong and can not be parsed. Check whether
818
+ blast is version 2.2.5 or later. See also blast --help.
814
819
 
820
+ Bio::Blast loads the full XML file into memory. If this causes a problem
821
+ you can split the BLAST XML file into smaller chunks using XML-Twig. An
822
+ example can be found in ((<Biotools|URL:http://github.com/pjotrp/biotools/>)).
815
823
 
816
824
  === Add remote BLAST search sites
817
825
 
@@ -838,10 +846,6 @@ they may be included.
838
846
 
839
847
  Below script is an example which seaches PubMed and creates a reference list.
840
848
 
841
- #!/usr/bin/env ruby
842
-
843
- require 'bio'
844
-
845
849
  ARGV.each do |id|
846
850
  entry = Bio::PubMed.query(id) # searches PubMed and get entry
847
851
  medline = Bio::MEDLINE.new(entry) # creates Bio::MEDLINE object from entry text
@@ -1010,10 +1014,6 @@ BioRuby and other projects' members (2002).
1010
1014
  Here we give a quick overview. Check out
1011
1015
  ((<URL:http://obda.open-bio.org/>)) for more extensive details.
1012
1016
 
1013
- The specification is stored on CVS repository at cvs.open-bio.org,
1014
- also available via http from:
1015
- ((<URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common>))
1016
-
1017
1017
  == BioRegistry
1018
1018
 
1019
1019
  BioRegistry allows for locating retrieval methods and database
@@ -1254,6 +1254,34 @@ Please refer to KEGG_API.rd.ja (English version: ((<URL:http://www.genome.jp/keg
1254
1254
 
1255
1255
  * ((<URL:http://www.genome.jp/kegg/soap/>))
1256
1256
 
1257
+ == Ruby Ensembl API
1258
+
1259
+ Ruby Ensembl API is a ruby API to the Ensembl database. It is NOT currently
1260
+ included in the BioRuby archives. To install it, see
1261
+ ((<URL:http://wiki.github.com/jandot/ruby-ensembl-api>))
1262
+ for more information.
1263
+
1264
+ === Gene Ontology (GO) through the Ruby Ensembl API
1265
+
1266
+ Gene Ontologies can be fetched through the Ruby Ensembl API package:
1267
+
1268
+ require 'ensembl'
1269
+ Ensembl::Core::DBConnection.connect('drosophila_melanogaster')
1270
+ infile = IO.readlines(ARGV.shift) # reading your comma-separated accession mapping file (one line per mapping)
1271
+ infile.each do |line|
1272
+ accs = line.split(",") # Split the comma-sep.entries into an array
1273
+ drosphila_acc = accs.shift # the first entry is the Drosophila acc
1274
+ mosq_acc = accs.shift # the second entry is you Mosq. acc
1275
+ gene = Ensembl::Core::Gene.find_by_stable_id(drosophila_acc)
1276
+ print "#{mosq_acc}"
1277
+ gene.go_terms.each do |go|
1278
+ print ",#{go}"
1279
+ end
1280
+ end
1281
+
1282
+ Prints each mosq. accession/uniq identifier and the GO terms from the Drosphila
1283
+ homologues.
1284
+
1257
1285
  == Comparing BioProjects
1258
1286
 
1259
1287
  For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<URL:http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
@@ -1284,13 +1312,13 @@ carefully that come with each package.
1284
1312
  Ruby fails to find the BioRuby libraries - add it to the RUBYLIB path, or pass
1285
1313
  it to the interpeter. For example:
1286
1314
 
1287
- ruby -I~/cvs/bioruby/lib yourprogram.rb
1315
+ ruby -I$BIORUBYPATH/lib yourprogram.rb
1288
1316
 
1289
1317
  == Modifying this page
1290
1318
 
1291
- IMPORTANT NOTICE: This page is maintained in the BioRuby CVS
1319
+ IMPORTANT NOTICE: This page is maintained in the BioRuby source code
1292
1320
  repository. Please edit the file there otherwise changes may get
1293
- lost. See ((<BioRuby Developer Information>)) for CVS and mailing list
1321
+ lost. See ((<BioRuby Developer Information>)) for repository and mailing list
1294
1322
  access.
1295
1323
 
1296
1324
  =end