bio 1.3.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. data/COPYING +56 -0
  2. data/COPYING.ja +51 -0
  3. data/ChangeLog +540 -0
  4. data/GPL +340 -0
  5. data/LEGAL +141 -0
  6. data/LGPL +504 -0
  7. data/README.rdoc +4 -2
  8. data/Rakefile +2 -2
  9. data/bioruby.gemspec +17 -29
  10. data/doc/Tutorial.rd +118 -90
  11. data/doc/Tutorial.rd.html +124 -87
  12. data/lib/bio/appl/blast.rb +2 -2
  13. data/lib/bio/appl/blast/format0.rb +1 -1
  14. data/lib/bio/appl/fasta.rb +5 -12
  15. data/lib/bio/appl/fasta/format10.rb +96 -6
  16. data/lib/bio/appl/gcg/msf.rb +11 -14
  17. data/lib/bio/appl/pts1.rb +0 -4
  18. data/lib/bio/appl/sim4/report.rb +50 -17
  19. data/lib/bio/db/biosql/biosql_to_biosequence.rb +10 -0
  20. data/lib/bio/db/biosql/sequence.rb +234 -298
  21. data/lib/bio/db/embl/embl.rb +0 -3
  22. data/lib/bio/db/genbank/common.rb +3 -1
  23. data/lib/bio/io/biosql/ar-biosql.rb +257 -0
  24. data/lib/bio/io/biosql/biosql.rb +39 -0
  25. data/lib/bio/io/biosql/config/database.yml +5 -4
  26. data/lib/bio/io/ncbirest.rb +12 -5
  27. data/lib/bio/io/pubmed.rb +5 -1
  28. data/lib/bio/io/sql.rb +43 -150
  29. data/lib/bio/sequence/compat.rb +5 -1
  30. data/lib/bio/util/restriction_enzyme/range/sequence_range/calculated_cuts.rb +6 -4
  31. data/lib/bio/version.rb +1 -1
  32. data/test/data/gcg/pileup-aa.msf +67 -0
  33. data/test/data/sim4/complement-A4.sim4 +43 -0
  34. data/test/data/sim4/simple-A4.sim4 +25 -0
  35. data/test/data/sim4/simple2-A4.sim4 +25 -0
  36. data/test/functional/bio/io/test_pubmed.rb +129 -0
  37. data/test/unit/bio/appl/bl2seq/test_report.rb +5 -5
  38. data/test/unit/bio/appl/gcg/test_msf.rb +154 -0
  39. data/test/unit/bio/appl/hmmer/test_report.rb +2 -2
  40. data/test/unit/bio/appl/sim4/test_report.rb +869 -0
  41. data/test/unit/bio/appl/test_blast.rb +1 -1
  42. data/test/unit/bio/db/biosql/tc_biosql.rb +110 -0
  43. data/test/unit/bio/db/biosql/ts_suite_biosql.rb +8 -0
  44. data/test/unit/bio/test_feature.rb +18 -17
  45. data/test/unit/bio/test_reference.rb +18 -18
  46. data/test/unit/bio/test_sequence.rb +1 -1
  47. metadata +18 -30
  48. data/lib/bio/io/biosql/biodatabase.rb +0 -64
  49. data/lib/bio/io/biosql/bioentry.rb +0 -29
  50. data/lib/bio/io/biosql/bioentry_dbxref.rb +0 -11
  51. data/lib/bio/io/biosql/bioentry_path.rb +0 -12
  52. data/lib/bio/io/biosql/bioentry_qualifier_value.rb +0 -10
  53. data/lib/bio/io/biosql/bioentry_reference.rb +0 -10
  54. data/lib/bio/io/biosql/bioentry_relationship.rb +0 -10
  55. data/lib/bio/io/biosql/biosequence.rb +0 -11
  56. data/lib/bio/io/biosql/comment.rb +0 -7
  57. data/lib/bio/io/biosql/dbxref.rb +0 -13
  58. data/lib/bio/io/biosql/dbxref_qualifier_value.rb +0 -12
  59. data/lib/bio/io/biosql/location.rb +0 -32
  60. data/lib/bio/io/biosql/location_qualifier_value.rb +0 -11
  61. data/lib/bio/io/biosql/ontology.rb +0 -10
  62. data/lib/bio/io/biosql/reference.rb +0 -9
  63. data/lib/bio/io/biosql/seqfeature.rb +0 -32
  64. data/lib/bio/io/biosql/seqfeature_dbxref.rb +0 -11
  65. data/lib/bio/io/biosql/seqfeature_path.rb +0 -11
  66. data/lib/bio/io/biosql/seqfeature_qualifier_value.rb +0 -20
  67. data/lib/bio/io/biosql/seqfeature_relationship.rb +0 -11
  68. data/lib/bio/io/biosql/taxon.rb +0 -12
  69. data/lib/bio/io/biosql/taxon_name.rb +0 -9
  70. data/lib/bio/io/biosql/term.rb +0 -27
  71. data/lib/bio/io/biosql/term_dbxref.rb +0 -11
  72. data/lib/bio/io/biosql/term_path.rb +0 -12
  73. data/lib/bio/io/biosql/term_relationship.rb +0 -13
  74. data/lib/bio/io/biosql/term_relationship_term.rb +0 -11
  75. data/lib/bio/io/biosql/term_synonym.rb +0 -10
@@ -239,10 +239,12 @@ RubyGems 0.9.0 and removed in RubyGems 1.0.1.
239
239
  == LICENSE
240
240
 
241
241
  BioRuby can be freely distributed under the same terms as Ruby.
242
+ See the file COPYING (or COPYING.ja written in Japanese).
242
243
 
243
- Note that setup.rb included in the BioRuby package comes from
244
+ As written in the file COPYING, see the file LEGAL for files distributed
245
+ under different license. For example, setup.rb which comes from
244
246
  {RAA:setup}[http://raa.ruby-lang.org/project/setup/] developed by Minero Aoki
245
- (http://i.loveruby.net/en/projects/setup/).
247
+ (http://i.loveruby.net/en/projects/setup/) is licensed under LGPL 2.1.
246
248
 
247
249
 
248
250
  == CONTACT
data/Rakefile CHANGED
@@ -61,7 +61,7 @@ task :gemspec => GEM_SPEC_FILE
61
61
  desc "Force update gem spec file"
62
62
  task :regemspec do
63
63
  #rm GEM_SPEC_FILE, :force => true
64
- Rake::Task[GEM_SPEC_FILE].execute
64
+ Rake::Task[GEM_SPEC_FILE].execute(nil)
65
65
  end
66
66
 
67
67
  desc "Update #{GEM_SPEC_FILE}"
@@ -137,7 +137,7 @@ desc "Force update doc/Tutorial*.html"
137
137
  task :retutorial2html do
138
138
  # safe_unlink HTMLFILES_TUTORIAL
139
139
  HTMLFILES_TUTORIAL.each do |x|
140
- Rake::Task[x].execute
140
+ Rake::Task[x].execute(nil)
141
141
  end
142
142
  end
143
143
 
@@ -3,7 +3,7 @@
3
3
  #
4
4
  Gem::Specification.new do |s|
5
5
  s.name = 'bio'
6
- s.version = "1.3.0"
6
+ s.version = "1.3.1"
7
7
 
8
8
  s.author = "BioRuby project"
9
9
  s.email = "staff@bioruby.org"
@@ -14,8 +14,13 @@ Gem::Specification.new do |s|
14
14
 
15
15
  s.platform = Gem::Platform::RUBY
16
16
  s.files = [
17
+ "COPYING",
18
+ "COPYING.ja",
17
19
  "ChangeLog",
20
+ "GPL",
18
21
  "KNOWN_ISSUES.rdoc",
22
+ "LEGAL",
23
+ "LGPL",
19
24
  "README.rdoc",
20
25
  "README_DEV.rdoc",
21
26
  "Rakefile",
@@ -153,35 +158,9 @@ Gem::Specification.new do |s|
153
158
  "lib/bio/db/soft.rb",
154
159
  "lib/bio/db/transfac.rb",
155
160
  "lib/bio/feature.rb",
156
- "lib/bio/io/biosql/biodatabase.rb",
157
- "lib/bio/io/biosql/bioentry.rb",
158
- "lib/bio/io/biosql/bioentry_dbxref.rb",
159
- "lib/bio/io/biosql/bioentry_path.rb",
160
- "lib/bio/io/biosql/bioentry_qualifier_value.rb",
161
- "lib/bio/io/biosql/bioentry_reference.rb",
162
- "lib/bio/io/biosql/bioentry_relationship.rb",
163
- "lib/bio/io/biosql/biosequence.rb",
164
- "lib/bio/io/biosql/comment.rb",
161
+ "lib/bio/io/biosql/ar-biosql.rb",
162
+ "lib/bio/io/biosql/biosql.rb",
165
163
  "lib/bio/io/biosql/config/database.yml",
166
- "lib/bio/io/biosql/dbxref.rb",
167
- "lib/bio/io/biosql/dbxref_qualifier_value.rb",
168
- "lib/bio/io/biosql/location.rb",
169
- "lib/bio/io/biosql/location_qualifier_value.rb",
170
- "lib/bio/io/biosql/ontology.rb",
171
- "lib/bio/io/biosql/reference.rb",
172
- "lib/bio/io/biosql/seqfeature.rb",
173
- "lib/bio/io/biosql/seqfeature_dbxref.rb",
174
- "lib/bio/io/biosql/seqfeature_path.rb",
175
- "lib/bio/io/biosql/seqfeature_qualifier_value.rb",
176
- "lib/bio/io/biosql/seqfeature_relationship.rb",
177
- "lib/bio/io/biosql/taxon.rb",
178
- "lib/bio/io/biosql/taxon_name.rb",
179
- "lib/bio/io/biosql/term.rb",
180
- "lib/bio/io/biosql/term_dbxref.rb",
181
- "lib/bio/io/biosql/term_path.rb",
182
- "lib/bio/io/biosql/term_relationship.rb",
183
- "lib/bio/io/biosql/term_relationship_term.rb",
184
- "lib/bio/io/biosql/term_synonym.rb",
185
164
  "lib/bio/io/das.rb",
186
165
  "lib/bio/io/dbget.rb",
187
166
  "lib/bio/io/ddbjxml.rb",
@@ -345,6 +324,7 @@ Gem::Specification.new do |s|
345
324
  "test/data/embl/AB090716.embl.rel89",
346
325
  "test/data/fasta/example1.txt",
347
326
  "test/data/fasta/example2.txt",
327
+ "test/data/gcg/pileup-aa.msf",
348
328
  "test/data/genscan/sample.report",
349
329
  "test/data/iprscan/merged.raw",
350
330
  "test/data/iprscan/merged.txt",
@@ -354,11 +334,15 @@ Gem::Specification.new do |s|
354
334
  "test/data/prosite/prosite.dat",
355
335
  "test/data/refseq/nm_126355.entret",
356
336
  "test/data/rpsblast/misc.rpsblast",
337
+ "test/data/sim4/complement-A4.sim4",
338
+ "test/data/sim4/simple-A4.sim4",
339
+ "test/data/sim4/simple2-A4.sim4",
357
340
  "test/data/soft/GDS100_partial.soft",
358
341
  "test/data/soft/GSE3457_family_partial.soft",
359
342
  "test/data/uniprot/p53_human.uniprot",
360
343
  "test/functional/bio/appl/test_pts1.rb",
361
344
  "test/functional/bio/io/test_ensembl.rb",
345
+ "test/functional/bio/io/test_pubmed.rb",
362
346
  "test/functional/bio/io/test_soapwsdl.rb",
363
347
  "test/functional/bio/io/test_togows.rb",
364
348
  "test/functional/bio/sequence/test_output_embl.rb",
@@ -368,6 +352,7 @@ Gem::Specification.new do |s|
368
352
  "test/unit/bio/appl/blast/test_ncbioptions.rb",
369
353
  "test/unit/bio/appl/blast/test_report.rb",
370
354
  "test/unit/bio/appl/blast/test_rpsblast.rb",
355
+ "test/unit/bio/appl/gcg/test_msf.rb",
371
356
  "test/unit/bio/appl/genscan/test_report.rb",
372
357
  "test/unit/bio/appl/hmmer/test_report.rb",
373
358
  "test/unit/bio/appl/iprscan/test_report.rb",
@@ -375,6 +360,7 @@ Gem::Specification.new do |s|
375
360
  "test/unit/bio/appl/paml/codeml/test_rates.rb",
376
361
  "test/unit/bio/appl/paml/codeml/test_report.rb",
377
362
  "test/unit/bio/appl/paml/test_codeml.rb",
363
+ "test/unit/bio/appl/sim4/test_report.rb",
378
364
  "test/unit/bio/appl/sosui/test_report.rb",
379
365
  "test/unit/bio/appl/targetp/test_report.rb",
380
366
  "test/unit/bio/appl/test_blast.rb",
@@ -384,6 +370,8 @@ Gem::Specification.new do |s|
384
370
  "test/unit/bio/data/test_aa.rb",
385
371
  "test/unit/bio/data/test_codontable.rb",
386
372
  "test/unit/bio/data/test_na.rb",
373
+ "test/unit/bio/db/biosql/tc_biosql.rb",
374
+ "test/unit/bio/db/biosql/ts_suite_biosql.rb",
387
375
  "test/unit/bio/db/embl/test_common.rb",
388
376
  "test/unit/bio/db/embl/test_embl.rb",
389
377
  "test/unit/bio/db/embl/test_embl_rel89.rb",
@@ -2,45 +2,43 @@
2
2
  #
3
3
  # A possible test run could be from rdtool (on Debian package rdtool)
4
4
  #
5
- # ruby -I lib ./bin/rd2 ~/cvs/opensource/bioruby/doc/Tutorial.rd
5
+ # rd2 $BIORUBYPATH/doc/Tutorial.rd
6
6
  #
7
7
  # or with style sheet:
8
8
  #
9
- # ruby -I lib ./bin/rd2 -r rd/rd2html-lib.rb --with-c
10
- ss=bioruby.css ~/cvs/opensource/bioruby/doc/Tutorial.rd > ~/bioruby.html
9
+ # rd2 -r rd/rd2html-lib.rb --with-css=bioruby.css $BIORUBYPATH/doc/Tutorial.rd > ~/bioruby.html
11
10
  #
12
11
  # in Debian:
13
12
  #
14
- # rd2 -r rd/rd2html-lib --with-css="/home/wrk/izip/cvs/opensource/bioruby/lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css" Tutorial.rd > index.html
13
+ # rd2 -r rd/rd2html-lib --with-css="../lib/bio/shell/rails/vendor/plugins/bioruby/generators/bioruby/templates/bioruby.css" Tutorial.rd > Tutorial.rd.html
15
14
  #
16
15
  # A common problem is tabs in the text file! TABs are not allowed.
17
16
  #
18
17
  # To add tests run Toshiaki's bioruby shell and paste in the query plus
19
18
  # results.
20
19
  #
21
- # To run the embedded Ruby doctests you can get the doctest.rb from Pjotr.
20
+ # To run the embedded Ruby doctests you can use the rubydoctest tool, part
21
+ # of the bioruby-support repository at http://github.com/pjotrp/bioruby-support/
22
+ #
22
23
 
23
24
  =begin
24
25
  #doctest Testing bioruby
25
26
 
26
27
  = BioRuby Tutorial
27
28
 
28
- Editor: PjotrPrins <p .at. bioruby.org>
29
-
30
29
  * Copyright (C) 2001-2003 KATAYAMA Toshiaki <k .at. bioruby.org>
31
- * Copyright (C) 2005-2008 Pjotr Prins, Naohisa Goto and others
30
+ * Copyright (C) 2005-2009 Pjotr Prins, Naohisa Goto and others
32
31
 
33
- The latest version resides in the CVS repository ./doc/((<Tutorial.rd|URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/*checkout*/bioruby/doc/Tutorial.rd?rev=HEAD&cvsroot=bioruby&content-type=text/plain>)). This one was updated:
32
+ This document was last modified: 2009/03/17
33
+ Current editor: Pjotr Prins <p .at. bioruby.org>
34
34
 
35
- $Id: Tutorial.rd,v 1.22 2008/05/19 12:22:05 pjotr Exp $
36
-
37
- in preparation for the ((<BioHackathlon 2008|URL:http://hackathon.dbcls.jp/>))
35
+ The latest version resides in the GIT source code repository: ./doc/((<Tutorial.rd|URL:http://github.com/pjotrp/bioruby/raw/documentation/doc/Tutorial.rd>)).
38
36
 
39
37
  == Introduction
40
38
 
41
39
  This is a tutorial for using Bioruby. A basic knowledge of Ruby is required.
42
40
  If you want to know more about the programming langauge Ruby we recommend the
43
- excellent book ((<Programming Ruby|URL:http://www.pragprog.com/titles/ruby>))
41
+ latest Ruby book ((<Programming Ruby|URL:http://www.pragprog.com/titles/ruby>))
44
42
  by Dave Thomas and Andy Hunt - some of it is online
45
43
  ((<here|URL:http://www.rubycentral.com/pickaxe/>)).
46
44
 
@@ -53,7 +51,7 @@ version it has with the
53
51
 
54
52
  command. Showing something like:
55
53
 
56
- ruby 1.8.5 (2006-08-25) [powerpc-linux]
54
+ ruby 1.8.7 (2008-08-11 patchlevel 72) [i486-linux]
57
55
 
58
56
  If you see no such thing you'll have to install Ruby using your installation
59
57
  manager. For more information see the
@@ -81,6 +79,7 @@ and you should see a prompt
81
79
 
82
80
  Now test the following:
83
81
 
82
+ bioruby> require 'bio'
84
83
  bioruby> seq = Bio::Sequence::NA.new("atgcatgcaaaa")
85
84
  ==> "atgcatgcaaaa"
86
85
 
@@ -182,37 +181,35 @@ way of writing concise and clear code using 'closures'. Each sliding
182
181
  window creates a subsequence which is supplied to the enclosed block
183
182
  through a variable named +s+.
184
183
 
185
- Show average percentage of GC content for 20 bases (stepping the default one base at a time)
184
+ * Show average percentage of GC content for 20 bases (stepping the default one base at a time)
186
185
 
187
- bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa")
188
- ==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa"
186
+ bioruby> seq = Bio::Sequence::NA.new("atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa")
187
+ ==> "atgcatgcaattaagctaatcccaattagatcatcccgatcatcaaaaaaaaaa"
189
188
 
190
- bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent }
191
- bioruby> a
192
- ==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]
189
+ bioruby> a=[]; seq.window_search(20) { |s| a.push s.gc_percent }
190
+ bioruby> a
191
+ ==> [30, 35, 40, 40, 35, 35, 35, 30, 25, 30, 30, 30, 35, 35, 35, 35, 35, 40, 45, 45, 45, 45, 40, 35, 40, 40, 40, 40, 40, 35, 35, 35, 30, 30, 30]
193
192
 
194
193
 
195
194
  Since the class of each subsequence is the same as original sequence
196
195
  (Bio::Sequence::NA or Bio::Sequence::AA or Bio::Sequence), you can
197
196
  use all methods on the subsequence. For example,
198
197
 
199
- Shows translation results for 15 bases shifting a codon at a time
198
+ * Shows translation results for 15 bases shifting a codon at a time
200
199
 
201
- bioruby> a = []
202
- bioruby> seq.window_search(15, 3) do |s|
203
- bioruby> a.push s.translate
204
- bioruby> end
205
- bioruby> a
206
- ==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]
200
+ bioruby> a = []
201
+ bioruby> seq.window_search(15, 3) { | s | a.push s.translate }
202
+ bioruby> a
203
+ ==> ["MHAIK", "HAIKL", "AIKLI", "IKLIP", "KLIPI", "LIPIR", "IPIRS", "PIRSS", "IRSSR", "RSSRS", "SSRSS", "SRSSK", "RSSKK", "SSKKK"]
207
204
 
208
205
 
209
206
  Finally, the window_search method returns the last leftover
210
207
  subsequence. This allows for example
211
208
 
212
- Divide a genome sequence into sections of 10000bp and
213
- output FASTA formatted sequences (line width 60 chars). The 1000bp at the
214
- start and end of each subsequence overlapped. At the 3' end of the sequence
215
- the leftover is also added:
209
+ * Divide a genome sequence into sections of 10000bp and
210
+ output FASTA formatted sequences (line width 60 chars). The 1000bp at the
211
+ start and end of each subsequence overlapped. At the 3' end of the sequence
212
+ the leftover is also added:
216
213
 
217
214
  i = 1
218
215
  textwidth=60
@@ -229,24 +226,20 @@ size to equal values.
229
226
 
230
227
  Other examples
231
228
 
232
- Count the codon usage
229
+ * Count the codon usage
233
230
 
234
- bioruby> codon_usage = Hash.new(0)
235
- bioruby> seq.window_search(3, 3) do |s|
236
- bioruby> codon_usage[s] += 1
237
- bioruby> end
238
- bioruby> codon_usage
239
- ==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}
231
+ bioruby> codon_usage = Hash.new(0)
232
+ bioruby> seq.window_search(3, 3) { |s| codon_usage[s] += 1 }
233
+ bioruby> codon_usage
234
+ ==> {"cat"=>1, "aaa"=>3, "cca"=>1, "att"=>2, "aga"=>1, "atc"=>1, "cta"=>1, "gca"=>1, "cga"=>1, "tca"=>3, "aag"=>1, "tcc"=>1, "atg"=>1}
240
235
 
241
236
 
242
- Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
237
+ * Calculate molecular weight for each 10-aa peptide (or 10-nt nucleic acid)
243
238
 
244
- bioruby> a = []
245
- bioruby> seq.window_search(10, 10) do |s|
246
- bioruby> a.push s.molecular_weight
247
- bioruby> end
248
- bioruby> a
249
- ==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]
239
+ bioruby> a = []
240
+ bioruby> seq.window_search(10, 10) { |s| a.push s.molecular_weight }
241
+ bioruby> a
242
+ ==> [3096.2062, 3086.1962, 3056.1762, 3023.1262, 3073.2262]
250
243
 
251
244
  In most cases, sequences are read from files or retrieved from databases.
252
245
  For example:
@@ -398,12 +391,12 @@ very complicated:
398
391
  end
399
392
  end
400
393
 
401
- Note: In this example Feature#assoc method makes a Hash from a
402
- feature object. It is useful because you can get data from the hash
403
- by using qualifiers as keys.
404
- (But there is a risk some information is lost when two or more
405
- qualifiers are the same. Therefore an Array is returned by
406
- Feature#feature)
394
+ * Note: In this example Feature#assoc method makes a Hash from a
395
+ feature object. It is useful because you can get data from the hash
396
+ by using qualifiers as keys.
397
+ (But there is a risk some information is lost when two or more
398
+ qualifiers are the same. Therefore an Array is returned by
399
+ Feature#feature)
407
400
 
408
401
  Bio::Sequence#splicing splices subsequence from nucleic acid sequence
409
402
  according to location information used in GenBank, EMBL and DDBJ.
@@ -417,11 +410,11 @@ feature style location text but also Bio::Locations object. For more
417
410
  information about location format and Bio::Locations class, see
418
411
  bio/location.rb.
419
412
 
420
- Splice according to location string used in a GenBank entry
413
+ * Splice according to location string used in a GenBank entry
421
414
 
422
415
  naseq.splicing('join(2035..2050,complement(1775..1818),13..345')
423
416
 
424
- Generate Bio::Locations object and pass the splicing method
417
+ * Generate Bio::Locations object and pass the splicing method
425
418
 
426
419
  locs = Bio::Locations.new('join((8298.8300)..10206,1..855)')
427
420
  naseq.splicing(locs)
@@ -429,7 +422,7 @@ Generate Bio::Locations object and pass the splicing method
429
422
  You can also use the splicing method for amino acid sequences
430
423
  (Bio::Sequence::AA objects).
431
424
 
432
- Splicing peptide from a protein (e.g. signal peptide)
425
+ * Splicing peptide from a protein (e.g. signal peptide)
433
426
 
434
427
  aaseq.splicing('21..119')
435
428
 
@@ -469,7 +462,7 @@ to a FASTA file can be found in sample/any2fasta.rb. With this technique it is
469
462
  possible to write a Unix type grep/sort pipe for sequence information. One
470
463
  example using scripts in the BIORUBY sample folder:
471
464
 
472
- fastagrep.rb '/At|Dm/' database.seq | fastasort.rb
465
+ fastagrep.rb '/At|Dm/' database.seq | fastasort.rb
473
466
 
474
467
  greps the database for Arabidopsis and Drosophila entries and sorts the output
475
468
  to FASTA.
@@ -505,23 +498,23 @@ Array and BioPerl's Bio::SimpleAlign. A very simple example is:
505
498
  bioruby> a.consensus
506
499
  ==> "a?gc?"
507
500
  # shows IUPAC consensus
508
- a.consensus_iupac
509
- ==> "ahgcr"
501
+ p a.consensus_iupac # ==> "ahgcr"
502
+
510
503
  # iterates over each seq
511
504
  a.each { |x| p x }
512
- # ==>
513
- # "atgca"
514
- # "aagca"
515
- # "acgca"
516
- # "acgcg"
505
+ # ==>
506
+ # "atgca"
507
+ # "aagca"
508
+ # "acgca"
509
+ # "acgcg"
517
510
  # iterates over each site
518
511
  a.each_site { |x| p x }
519
- # ==>
520
- # ["a", "a", "a", "a"]
521
- # ["t", "a", "c", "c"]
522
- # ["g", "g", "g", "g"]
523
- # ["c", "c", "c", "c"]
524
- # ["a", "a", "a", "g"]
512
+ # ==>
513
+ # ["a", "a", "a", "a"]
514
+ # ["t", "a", "c", "c"]
515
+ # ["g", "g", "g", "g"]
516
+ # ["c", "c", "c", "c"]
517
+ # ["a", "a", "a", "g"]
525
518
 
526
519
  # doing alignment by using CLUSTAL W.
527
520
  # clustalw command must be installed.
@@ -671,7 +664,7 @@ method of the factory object after the "query" method.
671
664
  === using FASTA from a remote internet site
672
665
 
673
666
  * Note: Currently, only GenomeNet (fasta.genome.jp) is
674
- supported. check the class documentation for updates.
667
+ supported. check the class documentation for updates.
675
668
 
676
669
  For accessing a remote site the Bio::Fasta.remote method is used
677
670
  instead of Bio::Fasta.local. When using a remote method, the
@@ -774,7 +767,7 @@ Check the documentation for Bio::Blast::Report to see what can be
774
767
  retrieved. For now suffice to state that Bio::Blast::Report has a
775
768
  hierarchical structure mirroring the general BLAST output stream:
776
769
 
777
- * In a Bio::Blast::Report object, @iteratinos is an array of
770
+ * In a Bio::Blast::Report object, @iterations is an array of
778
771
  Bio::Blast::Report::Iteration objects.
779
772
  * In a Bio::Blast::Report::Iteration object, @hits is an array of
780
773
  Bio::Blast::Report::Hits objects.
@@ -790,13 +783,26 @@ you can directly create Bio::Blast::Report objects without the
790
783
  Bio::Blast factory object. For this purpose use Bio::Blast.reports,
791
784
  which supports the "-m 0" default and "-m 7" XML type output format.
792
785
 
793
- #!/usr/bin/env ruby
786
+ * For example:
794
787
 
795
- require 'bio'
788
+ bioruby> blast_version = nil; result = []
789
+ bioruby> Bio::Blast.reports(File.new("../test/data/blast/blastp-multi.m7")) do |report|
790
+ bioruby> blast_version = report.version
791
+ bioruby> report.iterations.each do |itr|
792
+ bioruby> itr.hits.each do |hit|
793
+ bioruby> result.push hit.target_id
794
+ bioruby> end
795
+ bioruby> end
796
+ bioruby> end
797
+ bioruby> blast_version
798
+ ==> "blastp 2.2.18 [Mar-02-2008]"
799
+ bioruby> result
800
+ ==> ["BAB38768", "BAB38768", "BAB38769", "BAB37741"]
796
801
 
797
- # Iterates over each XML result.
798
- # The variable "report" is a Bio::Blast::Report object.
799
- Bio::Blast.reports(ARGF) do |report|
802
+ * another example:
803
+
804
+ require 'bio'
805
+ Bio::Blast.reports(ARGF) do |report|
800
806
  puts "Hits for " + report.query_def + " against " + report.db
801
807
  report.each do |hit|
802
808
  print hit.target_id, "\t", hit.evalue, "\n" if hit.evalue < 0.001
@@ -804,14 +810,16 @@ which supports the "-m 0" default and "-m 7" XML type output format.
804
810
  end
805
811
 
806
812
  Save the script as hits_under_0.001.rb and to process BLAST output
807
- files *.xml, you can
813
+ files *.xml, you can run it with:
808
814
 
809
815
  % ruby hits_under_0.001.rb *.xml
810
816
 
811
- Sometimes BLAST XML output may be wrong and can not be parsed. We
812
- recommended to install BLAST 2.2.5 or later, and try combinations of
813
- the -D and -m options when you encounter problems.
817
+ Sometimes BLAST XML output may be wrong and can not be parsed. Check whether
818
+ blast is version 2.2.5 or later. See also blast --help.
814
819
 
820
+ Bio::Blast loads the full XML file into memory. If this causes a problem
821
+ you can split the BLAST XML file into smaller chunks using XML-Twig. An
822
+ example can be found in ((<Biotools|URL:http://github.com/pjotrp/biotools/>)).
815
823
 
816
824
  === Add remote BLAST search sites
817
825
 
@@ -838,10 +846,6 @@ they may be included.
838
846
 
839
847
  Below script is an example which seaches PubMed and creates a reference list.
840
848
 
841
- #!/usr/bin/env ruby
842
-
843
- require 'bio'
844
-
845
849
  ARGV.each do |id|
846
850
  entry = Bio::PubMed.query(id) # searches PubMed and get entry
847
851
  medline = Bio::MEDLINE.new(entry) # creates Bio::MEDLINE object from entry text
@@ -1010,10 +1014,6 @@ BioRuby and other projects' members (2002).
1010
1014
  Here we give a quick overview. Check out
1011
1015
  ((<URL:http://obda.open-bio.org/>)) for more extensive details.
1012
1016
 
1013
- The specification is stored on CVS repository at cvs.open-bio.org,
1014
- also available via http from:
1015
- ((<URL:http://cvs.open-bio.org/cgi-bin/viewcvs/viewcvs.cgi/obda-specs/?cvsroot=obf-common>))
1016
-
1017
1017
  == BioRegistry
1018
1018
 
1019
1019
  BioRegistry allows for locating retrieval methods and database
@@ -1254,6 +1254,34 @@ Please refer to KEGG_API.rd.ja (English version: ((<URL:http://www.genome.jp/keg
1254
1254
 
1255
1255
  * ((<URL:http://www.genome.jp/kegg/soap/>))
1256
1256
 
1257
+ == Ruby Ensembl API
1258
+
1259
+ Ruby Ensembl API is a ruby API to the Ensembl database. It is NOT currently
1260
+ included in the BioRuby archives. To install it, see
1261
+ ((<URL:http://wiki.github.com/jandot/ruby-ensembl-api>))
1262
+ for more information.
1263
+
1264
+ === Gene Ontology (GO) through the Ruby Ensembl API
1265
+
1266
+ Gene Ontologies can be fetched through the Ruby Ensembl API package:
1267
+
1268
+ require 'ensembl'
1269
+ Ensembl::Core::DBConnection.connect('drosophila_melanogaster')
1270
+ infile = IO.readlines(ARGV.shift) # reading your comma-separated accession mapping file (one line per mapping)
1271
+ infile.each do |line|
1272
+ accs = line.split(",") # Split the comma-sep.entries into an array
1273
+ drosphila_acc = accs.shift # the first entry is the Drosophila acc
1274
+ mosq_acc = accs.shift # the second entry is you Mosq. acc
1275
+ gene = Ensembl::Core::Gene.find_by_stable_id(drosophila_acc)
1276
+ print "#{mosq_acc}"
1277
+ gene.go_terms.each do |go|
1278
+ print ",#{go}"
1279
+ end
1280
+ end
1281
+
1282
+ Prints each mosq. accession/uniq identifier and the GO terms from the Drosphila
1283
+ homologues.
1284
+
1257
1285
  == Comparing BioProjects
1258
1286
 
1259
1287
  For a quick functional comparison of BioRuby, BioPerl, BioPython and Bioconductor (R) see ((<URL:http://sciruby.codeforpeople.com/sr.cgi/BioProjects>))
@@ -1284,13 +1312,13 @@ carefully that come with each package.
1284
1312
  Ruby fails to find the BioRuby libraries - add it to the RUBYLIB path, or pass
1285
1313
  it to the interpeter. For example:
1286
1314
 
1287
- ruby -I~/cvs/bioruby/lib yourprogram.rb
1315
+ ruby -I$BIORUBYPATH/lib yourprogram.rb
1288
1316
 
1289
1317
  == Modifying this page
1290
1318
 
1291
- IMPORTANT NOTICE: This page is maintained in the BioRuby CVS
1319
+ IMPORTANT NOTICE: This page is maintained in the BioRuby source code
1292
1320
  repository. Please edit the file there otherwise changes may get
1293
- lost. See ((<BioRuby Developer Information>)) for CVS and mailing list
1321
+ lost. See ((<BioRuby Developer Information>)) for repository and mailing list
1294
1322
  access.
1295
1323
 
1296
1324
  =end