bio 1.4.3.0001 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.travis.yml +39 -33
- data/BSDL +22 -0
- data/COPYING +2 -2
- data/COPYING.ja +36 -36
- data/ChangeLog +2404 -1025
- data/KNOWN_ISSUES.rdoc +15 -55
- data/README.rdoc +17 -23
- data/RELEASE_NOTES.rdoc +246 -183
- data/Rakefile +3 -2
- data/bin/br_biofetch.rb +29 -5
- data/bioruby.gemspec +15 -32
- data/bioruby.gemspec.erb +10 -20
- data/doc/ChangeLog-1.4.3 +1478 -0
- data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
- data/doc/Tutorial.rd +0 -6
- data/doc/Tutorial.rd.html +7 -12
- data/doc/Tutorial.rd.ja +960 -1064
- data/doc/Tutorial.rd.ja.html +977 -1067
- data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-rbx +13 -0
- data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
- data/lib/bio.rb +10 -43
- data/lib/bio/alignment.rb +8 -14
- data/lib/bio/appl/blast.rb +1 -2
- data/lib/bio/appl/blast/format0.rb +18 -7
- data/lib/bio/appl/blast/remote.rb +0 -9
- data/lib/bio/appl/blast/report.rb +1 -1
- data/lib/bio/appl/clustalw/report.rb +3 -1
- data/lib/bio/appl/genscan/report.rb +1 -2
- data/lib/bio/appl/iprscan/report.rb +1 -2
- data/lib/bio/appl/meme/mast.rb +4 -4
- data/lib/bio/appl/meme/mast/report.rb +1 -1
- data/lib/bio/appl/paml/codeml.rb +2 -2
- data/lib/bio/appl/paml/codeml/report.rb +1 -0
- data/lib/bio/appl/paml/common.rb +1 -1
- data/lib/bio/appl/sosui/report.rb +1 -2
- data/lib/bio/command.rb +62 -2
- data/lib/bio/data/aa.rb +13 -31
- data/lib/bio/data/codontable.rb +1 -2
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
- data/lib/bio/db/biosql/sequence.rb +1 -1
- data/lib/bio/db/embl/common.rb +1 -1
- data/lib/bio/db/embl/embl.rb +5 -4
- data/lib/bio/db/embl/format_embl.rb +3 -3
- data/lib/bio/db/embl/sptr.rb +9 -1444
- data/lib/bio/db/embl/swissprot.rb +12 -29
- data/lib/bio/db/embl/trembl.rb +13 -30
- data/lib/bio/db/embl/uniprot.rb +12 -29
- data/lib/bio/db/embl/uniprotkb.rb +1455 -0
- data/lib/bio/db/fasta.rb +17 -0
- data/lib/bio/db/fasta/defline.rb +1 -3
- data/lib/bio/db/fastq.rb +1 -1
- data/lib/bio/db/genbank/ddbj.rb +9 -5
- data/lib/bio/db/genbank/refseq.rb +11 -3
- data/lib/bio/db/gff.rb +3 -4
- data/lib/bio/db/go.rb +5 -6
- data/lib/bio/db/kegg/module.rb +4 -5
- data/lib/bio/db/kegg/pathway.rb +4 -5
- data/lib/bio/db/kegg/reaction.rb +1 -1
- data/lib/bio/db/nexus.rb +3 -2
- data/lib/bio/db/pdb/pdb.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
- data/lib/bio/db/transfac.rb +1 -1
- data/lib/bio/io/das.rb +40 -41
- data/lib/bio/io/fastacmd.rb +0 -16
- data/lib/bio/io/fetch.rb +111 -55
- data/lib/bio/io/flatfile/buffer.rb +4 -5
- data/lib/bio/io/hinv.rb +2 -3
- data/lib/bio/io/ncbirest.rb +43 -6
- data/lib/bio/io/pubmed.rb +76 -81
- data/lib/bio/io/togows.rb +33 -10
- data/lib/bio/map.rb +1 -1
- data/lib/bio/pathway.rb +1 -1
- data/lib/bio/sequence/compat.rb +1 -1
- data/lib/bio/sequence/na.rb +63 -12
- data/lib/bio/shell.rb +0 -2
- data/lib/bio/shell/core.rb +5 -6
- data/lib/bio/shell/interface.rb +3 -4
- data/lib/bio/shell/irb.rb +1 -2
- data/lib/bio/shell/plugin/entry.rb +2 -3
- data/lib/bio/shell/plugin/seq.rb +7 -6
- data/lib/bio/shell/setup.rb +1 -2
- data/lib/bio/tree.rb +2 -2
- data/lib/bio/util/contingency_table.rb +0 -2
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
- data/lib/bio/util/sirna.rb +76 -16
- data/lib/bio/version.rb +8 -9
- data/sample/benchmark_clustalw_report.rb +47 -0
- data/sample/biofetch.rb +248 -151
- data/setup.rb +6 -7
- data/test/data/clustalw/example1-seqnos.aln +58 -0
- data/test/network/bio/appl/blast/test_remote.rb +1 -15
- data/test/network/bio/appl/test_blast.rb +0 -12
- data/test/network/bio/io/test_pubmed.rb +49 -0
- data/test/network/bio/io/test_togows.rb +0 -1
- data/test/network/bio/test_command.rb +65 -2
- data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
- data/test/unit/bio/appl/blast/test_report.rb +110 -48
- data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
- data/test/unit/bio/appl/sim4/test_report.rb +46 -17
- data/test/unit/bio/appl/test_blast.rb +2 -2
- data/test/unit/bio/db/embl/test_embl.rb +0 -1
- data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
- data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
- data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
- data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
- data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
- data/test/unit/bio/db/test_fasta.rb +41 -1
- data/test/unit/bio/db/test_fastq.rb +14 -4
- data/test/unit/bio/db/test_gff.rb +2 -2
- data/test/unit/bio/db/test_phyloxml.rb +30 -30
- data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
- data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
- data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
- data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
- data/test/unit/bio/io/test_togows.rb +3 -2
- data/test/unit/bio/sequence/test_dblink.rb +1 -1
- data/test/unit/bio/sequence/test_na.rb +3 -1
- data/test/unit/bio/test_alignment.rb +1 -2
- data/test/unit/bio/test_command.rb +5 -4
- data/test/unit/bio/test_db.rb +4 -2
- data/test/unit/bio/test_pathway.rb +25 -10
- data/test/unit/bio/util/test_sirna.rb +22 -22
- metadata +656 -1430
- data/doc/KEGG_API.rd +0 -1843
- data/doc/KEGG_API.rd.ja +0 -1834
- data/extconf.rb +0 -2
- data/lib/bio/appl/blast/ddbj.rb +0 -131
- data/lib/bio/db/kegg/taxonomy.rb +0 -280
- data/lib/bio/io/dbget.rb +0 -194
- data/lib/bio/io/ddbjrest.rb +0 -344
- data/lib/bio/io/ddbjxml.rb +0 -458
- data/lib/bio/io/ebisoap.rb +0 -158
- data/lib/bio/io/ensembl.rb +0 -229
- data/lib/bio/io/higet.rb +0 -73
- data/lib/bio/io/keggapi.rb +0 -363
- data/lib/bio/io/ncbisoap.rb +0 -156
- data/lib/bio/io/soapwsdl.rb +0 -119
- data/lib/bio/shell/plugin/keggapi.rb +0 -181
- data/lib/bio/shell/plugin/soap.rb +0 -87
- data/sample/dbget +0 -37
- data/sample/demo_ddbjxml.rb +0 -212
- data/sample/demo_kegg_taxonomy.rb +0 -92
- data/sample/demo_keggapi.rb +0 -502
- data/sample/psortplot_html.rb +0 -214
- data/test/network/bio/io/test_ddbjrest.rb +0 -47
- data/test/network/bio/io/test_ensembl.rb +0 -230
- data/test/network/bio/io/test_soapwsdl.rb +0 -53
- data/test/unit/bio/io/test_ddbjxml.rb +0 -81
- data/test/unit/bio/io/test_ensembl.rb +0 -111
- data/test/unit/bio/io/test_soapwsdl.rb +0 -33
data/lib/bio/map.rb
CHANGED
data/lib/bio/pathway.rb
CHANGED
@@ -591,7 +591,7 @@ class Pathway
|
|
591
591
|
# problem in the graph in which edge weights can be negative.
|
592
592
|
def bellman_ford(root)
|
593
593
|
distance, predecessor = initialize_single_source(root)
|
594
|
-
|
594
|
+
(self.nodes - 1).times do
|
595
595
|
@graph.each_key do |u|
|
596
596
|
@graph[u].each do |v, w|
|
597
597
|
# relaxing procedure of root -> 'u' -> 'v'
|
data/lib/bio/sequence/compat.rb
CHANGED
@@ -32,7 +32,7 @@ class Sequence
|
|
32
32
|
|
33
33
|
module Common
|
34
34
|
|
35
|
-
# <b>Bio::Sequence#to_fasta is
|
35
|
+
# <b>Bio::Sequence#to_fasta is DEPRECATED</b>
|
36
36
|
# Do not use Bio::Sequence#to_fasta ! Use Bio::Sequence#output instead.
|
37
37
|
# Note that Bio::Sequence::NA#to_fasta, Bio::Sequence::AA#to_fasata,
|
38
38
|
# and Bio::Sequence::Generic#to_fasta <b>can still be used</b>,
|
data/lib/bio/sequence/na.rb
CHANGED
@@ -283,6 +283,14 @@ class NA < String
|
|
283
283
|
#
|
284
284
|
# s = Bio::Sequence::NA.new('atggcgtga')
|
285
285
|
# puts s.gc_percent #=> 55
|
286
|
+
#
|
287
|
+
# Note that this method only returns an integer value.
|
288
|
+
# When more digits after decimal points are needed,
|
289
|
+
# use gc_content and sprintf like below:
|
290
|
+
#
|
291
|
+
# s = Bio::Sequence::NA.new('atggcgtga')
|
292
|
+
# puts sprintf("%3.2f", s.gc_content * 100) #=> "55.56"
|
293
|
+
#
|
286
294
|
# ---
|
287
295
|
# *Returns*:: Fixnum
|
288
296
|
def gc_percent
|
@@ -297,57 +305,100 @@ class NA < String
|
|
297
305
|
# Calculate the ratio of GC / ATGC bases. U is regarded as T.
|
298
306
|
#
|
299
307
|
# s = Bio::Sequence::NA.new('atggcgtga')
|
308
|
+
# puts s.gc_content #=> (5/9)
|
309
|
+
# puts s.gc_content.to_f #=> 0.5555555555555556
|
310
|
+
#
|
311
|
+
# In older Ruby versions, Float is always returned.
|
312
|
+
#
|
313
|
+
# s = Bio::Sequence::NA.new('atggcgtga')
|
300
314
|
# puts s.gc_content #=> 0.555555555555556
|
315
|
+
#
|
316
|
+
# Note that "u" is regarded as "t".
|
317
|
+
# If there are no ATGC bases in the sequence, 0.0 is returned.
|
318
|
+
#
|
301
319
|
# ---
|
302
|
-
# *Returns*:: Float
|
320
|
+
# *Returns*:: Rational or Float
|
303
321
|
def gc_content
|
304
322
|
count = self.composition
|
305
323
|
at = count['a'] + count['t'] + count['u']
|
306
324
|
gc = count['g'] + count['c']
|
307
|
-
|
308
|
-
return
|
325
|
+
total = at + gc
|
326
|
+
return 0.0 if total == 0
|
327
|
+
return gc.quo(total)
|
309
328
|
end
|
310
329
|
|
311
330
|
# Calculate the ratio of AT / ATGC bases. U is regarded as T.
|
312
331
|
#
|
313
332
|
# s = Bio::Sequence::NA.new('atggcgtga')
|
333
|
+
# puts s.at_content #=> 4/9
|
334
|
+
# puts s.at_content.to_f #=> 0.444444444444444
|
335
|
+
#
|
336
|
+
# In older Ruby versions, Float is always returned.
|
337
|
+
#
|
338
|
+
# s = Bio::Sequence::NA.new('atggcgtga')
|
314
339
|
# puts s.at_content #=> 0.444444444444444
|
340
|
+
#
|
341
|
+
# Note that "u" is regarded as "t".
|
342
|
+
# If there are no ATGC bases in the sequence, 0.0 is returned.
|
343
|
+
#
|
315
344
|
# ---
|
316
|
-
# *Returns*:: Float
|
345
|
+
# *Returns*:: Rational or Float
|
317
346
|
def at_content
|
318
347
|
count = self.composition
|
319
348
|
at = count['a'] + count['t'] + count['u']
|
320
349
|
gc = count['g'] + count['c']
|
321
|
-
|
322
|
-
return
|
350
|
+
total = at + gc
|
351
|
+
return 0.0 if total == 0
|
352
|
+
return at.quo(total)
|
323
353
|
end
|
324
354
|
|
325
355
|
# Calculate the ratio of (G - C) / (G + C) bases.
|
326
356
|
#
|
327
357
|
# s = Bio::Sequence::NA.new('atggcgtga')
|
358
|
+
# puts s.gc_skew #=> 3/5
|
359
|
+
# puts s.gc_skew.to_f #=> 0.6
|
360
|
+
#
|
361
|
+
# In older Ruby versions, Float is always returned.
|
362
|
+
#
|
363
|
+
# s = Bio::Sequence::NA.new('atggcgtga')
|
328
364
|
# puts s.gc_skew #=> 0.6
|
365
|
+
#
|
366
|
+
# If there are no GC bases in the sequence, 0.0 is returned.
|
367
|
+
#
|
329
368
|
# ---
|
330
|
-
# *Returns*:: Float
|
369
|
+
# *Returns*:: Rational or Float
|
331
370
|
def gc_skew
|
332
371
|
count = self.composition
|
333
372
|
g = count['g']
|
334
373
|
c = count['c']
|
335
|
-
|
336
|
-
return
|
374
|
+
gc = g + c
|
375
|
+
return 0.0 if gc == 0
|
376
|
+
return (g - c).quo(gc)
|
337
377
|
end
|
338
378
|
|
339
379
|
# Calculate the ratio of (A - T) / (A + T) bases. U is regarded as T.
|
340
380
|
#
|
341
381
|
# s = Bio::Sequence::NA.new('atgttgttgttc')
|
382
|
+
# puts s.at_skew #=> (-3/4)
|
383
|
+
# puts s.at_skew.to_f #=> -0.75
|
384
|
+
#
|
385
|
+
# In older Ruby versions, Float is always returned.
|
386
|
+
#
|
387
|
+
# s = Bio::Sequence::NA.new('atgttgttgttc')
|
342
388
|
# puts s.at_skew #=> -0.75
|
389
|
+
#
|
390
|
+
# Note that "u" is regarded as "t".
|
391
|
+
# If there are no AT bases in the sequence, 0.0 is returned.
|
392
|
+
#
|
343
393
|
# ---
|
344
|
-
# *Returns*:: Float
|
394
|
+
# *Returns*:: Rational or Float
|
345
395
|
def at_skew
|
346
396
|
count = self.composition
|
347
397
|
a = count['a']
|
348
398
|
t = count['t'] + count['u']
|
349
|
-
|
350
|
-
return
|
399
|
+
at = a + t
|
400
|
+
return 0.0 if at == 0
|
401
|
+
return (a - t).quo(at)
|
351
402
|
end
|
352
403
|
|
353
404
|
# Returns an alphabetically sorted array of any non-standard bases
|
data/lib/bio/shell.rb
CHANGED
@@ -31,8 +31,6 @@ module Bio::Shell
|
|
31
31
|
require 'bio/shell/plugin/flatfile'
|
32
32
|
require 'bio/shell/plugin/obda'
|
33
33
|
require 'bio/shell/plugin/das'
|
34
|
-
require 'bio/shell/plugin/keggapi'
|
35
|
-
require 'bio/shell/plugin/soap'
|
36
34
|
require 'bio/shell/plugin/emboss'
|
37
35
|
require 'bio/shell/plugin/blast'
|
38
36
|
require 'bio/shell/plugin/psort'
|
data/lib/bio/shell/core.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
|
11
10
|
module Bio::Shell::Core
|
@@ -195,7 +194,7 @@ module Bio::Shell::Ghost
|
|
195
194
|
|
196
195
|
def find_flat_dir(dbname)
|
197
196
|
dir = File.join(bioflat_dir, dbname.to_s.strip)
|
198
|
-
if File.
|
197
|
+
if File.exist?(dir)
|
199
198
|
return dir
|
200
199
|
else
|
201
200
|
return nil
|
@@ -209,7 +208,7 @@ module Bio::Shell::Ghost
|
|
209
208
|
end
|
210
209
|
|
211
210
|
def load_config_file(file)
|
212
|
-
if File.
|
211
|
+
if File.exist?(file)
|
213
212
|
STDERR.print "Loading config (#{file}) ... "
|
214
213
|
if hash = YAML.load(File.read(file))
|
215
214
|
@config.update(hash)
|
@@ -312,7 +311,7 @@ module Bio::Shell::Ghost
|
|
312
311
|
end
|
313
312
|
|
314
313
|
def load_object_file(file)
|
315
|
-
if File.
|
314
|
+
if File.exist?(file)
|
316
315
|
STDERR.print "Loading object (#{file}) ... "
|
317
316
|
begin
|
318
317
|
bind = Bio::Shell.cache[:binding]
|
@@ -394,7 +393,7 @@ module Bio::Shell::Ghost
|
|
394
393
|
end
|
395
394
|
|
396
395
|
def load_history_file(file)
|
397
|
-
if File.
|
396
|
+
if File.exist?(file)
|
398
397
|
STDERR.print "Loading history (#{file}) ... "
|
399
398
|
File.open(file).each do |line|
|
400
399
|
unless line[/^# /]
|
@@ -459,7 +458,7 @@ module Bio::Shell::Ghost
|
|
459
458
|
|
460
459
|
def save_script
|
461
460
|
if @script_begin and @script_end and @script_begin <= @script_end
|
462
|
-
if File.
|
461
|
+
if File.exist?(script_file)
|
463
462
|
message = "Overwrite script file (#{script_file})? [y/n] "
|
464
463
|
else
|
465
464
|
message = "Save script file (#{script_file})? [y/n] "
|
data/lib/bio/shell/interface.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: interface.rb,v 1.19 2007/11/15 07:08:49 k Exp $
|
9
8
|
#
|
10
9
|
|
11
10
|
module Bio::Shell
|
@@ -101,7 +100,7 @@ module Bio::Shell
|
|
101
100
|
STDOUT.reopen(pg)
|
102
101
|
objs.each do |obj|
|
103
102
|
if obj.is_a?(String)
|
104
|
-
if File.
|
103
|
+
if File.exist?(obj)
|
105
104
|
system("#{cmd} #{obj}")
|
106
105
|
else
|
107
106
|
obj.display
|
@@ -128,7 +127,7 @@ module Bio::Shell
|
|
128
127
|
|
129
128
|
def head(arg, num = 10)
|
130
129
|
str = ""
|
131
|
-
if File.
|
130
|
+
if File.exist?(arg)
|
132
131
|
File.open(arg) do |file|
|
133
132
|
num.times do
|
134
133
|
if line = file.gets
|
@@ -154,7 +153,7 @@ module Bio::Shell
|
|
154
153
|
if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message)
|
155
154
|
file = File.join(datadir, file)
|
156
155
|
end
|
157
|
-
if File.
|
156
|
+
if File.exist?(file)
|
158
157
|
message = "Overwrite existing '#{file}' file? [y/n] "
|
159
158
|
if ! Bio::Shell.ask_yes_or_no(message)
|
160
159
|
puts " ... save aborted."
|
data/lib/bio/shell/irb.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: irb.rb,v 1.3 2007/04/05 23:35:41 trevor Exp $
|
9
8
|
#
|
10
9
|
|
11
10
|
module Bio::Shell
|
@@ -48,7 +47,7 @@ module Bio::Shell
|
|
48
47
|
return line
|
49
48
|
end
|
50
49
|
|
51
|
-
if File.
|
50
|
+
if File.exist?("./config/boot.rb")
|
52
51
|
require "./config/boot"
|
53
52
|
require "./config/environment"
|
54
53
|
#require 'commands/console'
|
@@ -5,7 +5,6 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
|
11
10
|
module Bio::Shell
|
@@ -31,7 +30,7 @@ module Bio::Shell
|
|
31
30
|
seq = ""
|
32
31
|
if arg.kind_of?(Bio::Sequence)
|
33
32
|
seq = arg
|
34
|
-
elsif arg.respond_to?(:gets) or File.
|
33
|
+
elsif arg.respond_to?(:gets) or File.exist?(arg)
|
35
34
|
ent = flatauto(arg)
|
36
35
|
elsif arg[/:/]
|
37
36
|
ent = getobj(arg)
|
@@ -65,7 +64,7 @@ module Bio::Shell
|
|
65
64
|
db, entry_id = arg.to_s.strip.split(/\:/, 2)
|
66
65
|
|
67
66
|
# local file
|
68
|
-
if arg.respond_to?(:gets) or File.
|
67
|
+
if arg.respond_to?(:gets) or File.exist?(arg)
|
69
68
|
puts "Retrieving entry from file (#{arg})"
|
70
69
|
entry = flatfile(arg)
|
71
70
|
|
data/lib/bio/shell/plugin/seq.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: seq.rb,v 1.21 2007/04/05 23:35:41 trevor Exp $
|
9
8
|
#
|
10
9
|
|
11
10
|
module Bio::Shell
|
@@ -161,20 +160,22 @@ end
|
|
161
160
|
class String
|
162
161
|
|
163
162
|
def step(window_size)
|
164
|
-
|
163
|
+
j = 0
|
165
164
|
0.step(self.length - window_size, window_size) do |i|
|
166
165
|
yield self[i, window_size]
|
166
|
+
j = i
|
167
167
|
end
|
168
|
-
yield self[
|
168
|
+
yield self[j + window_size .. -1] if j + window_size < self.length
|
169
169
|
end
|
170
170
|
|
171
171
|
def skip(window_size, step_size = 1)
|
172
|
-
|
172
|
+
j = 0
|
173
173
|
0.step(self.length - window_size, step_size) do |i|
|
174
174
|
yield [self[i, window_size], i + 1, i + window_size]
|
175
|
+
j = i
|
175
176
|
end
|
176
|
-
from =
|
177
|
-
to = [self.length,
|
177
|
+
from = j + step_size
|
178
|
+
to = [self.length, j + step_size + window_size].min
|
178
179
|
yield [self[from, window_size], from + 1, to] if from + 1 <= to
|
179
180
|
end
|
180
181
|
|
data/lib/bio/shell/setup.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: setup.rb,v 1.8 2007/06/28 11:21:40 k Exp $
|
9
8
|
#
|
10
9
|
|
11
10
|
require 'getoptlong'
|
@@ -25,7 +24,7 @@ class Bio::Shell::Setup
|
|
25
24
|
Bio::Shell.configure(savedir)
|
26
25
|
|
27
26
|
# set default to irb mode
|
28
|
-
Bio::Shell.cache[:mode] = @mode || :irb
|
27
|
+
Bio::Shell.cache[:mode] = ((defined? @mode) && @mode) || :irb
|
29
28
|
|
30
29
|
case Bio::Shell.cache[:mode]
|
31
30
|
when :web
|
data/lib/bio/tree.rb
CHANGED
@@ -614,7 +614,7 @@ module Bio
|
|
614
614
|
raise IndexError, 'node2 not found' unless @pathway.graph[node2]
|
615
615
|
return [ node1 ] if node1 == node2
|
616
616
|
return [ node1, node2 ] if @pathway.graph[node1][node2]
|
617
|
-
|
617
|
+
_, path = @pathway.bfs_shortest_path(node1, node2)
|
618
618
|
unless path[0] == node1 and path[-1] == node2 then
|
619
619
|
raise NoPathError, 'node1 and node2 are not connected'
|
620
620
|
end
|
@@ -765,7 +765,7 @@ module Bio
|
|
765
765
|
# The result is unspecified for cyclic trees.
|
766
766
|
def lowest_common_ancestor(node1, node2, root = nil)
|
767
767
|
root ||= @root
|
768
|
-
|
768
|
+
_, route = @pathway.breadth_first_search(root)
|
769
769
|
x = node1; r1 = []
|
770
770
|
begin; r1 << x; end while x = route[x]
|
771
771
|
x = node2; r2 = []
|
@@ -331,8 +331,6 @@ class ContingencyTable
|
|
331
331
|
# *Returns*:: +Float+ chi square value
|
332
332
|
def chi_square
|
333
333
|
total = 0
|
334
|
-
c = @characters
|
335
|
-
max = c.size - 1
|
336
334
|
@characters.each do |i| # Loop through every row in the ContingencyTable
|
337
335
|
@characters.each do |j| # Loop through every column in the ContingencyTable
|
338
336
|
total += chi_square_element(i, j)
|
@@ -196,7 +196,7 @@ class SequenceRange
|
|
196
196
|
c_cut = cc.vc_complement_as_original_class
|
197
197
|
h_cut = cc.hc_between_strands_as_original_class
|
198
198
|
|
199
|
-
if @circular
|
199
|
+
if (defined? @circular) && @circular
|
200
200
|
# NOTE
|
201
201
|
# if it's circular we should start at the beginning of a cut for orientation
|
202
202
|
# scan for it, hack off the first set of hcuts and move them to the back
|
@@ -238,7 +238,7 @@ class SequenceRange
|
|
238
238
|
|
239
239
|
# Bin "-1" is an easy way to indicate the start of a strand just in case
|
240
240
|
# there is a horizontal cut at position 0
|
241
|
-
bins.delete(-1) unless @circular
|
241
|
+
bins.delete(-1) unless ((defined? @circular) && @circular)
|
242
242
|
bins
|
243
243
|
end
|
244
244
|
|
data/lib/bio/util/sirna.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
#
|
2
2
|
# = bio/util/sirna.rb - Class for designing small inhibitory RNAs
|
3
3
|
#
|
4
|
-
# Copyright:: Copyright (C) 2004
|
4
|
+
# Copyright:: Copyright (C) 2004-2013
|
5
5
|
# Itoshi NIKAIDO <dritoshi@gmail.com>
|
6
|
+
# Yuki NAITO <y-naito@rnai.jp>
|
6
7
|
# License:: The Ruby License
|
7
8
|
#
|
8
9
|
# $Id:$
|
@@ -33,10 +34,10 @@
|
|
33
34
|
#
|
34
35
|
# * Kumiko Ui-Tei et al. Guidelines for the selection of highly effective
|
35
36
|
# siRNA sequences for mammalian and chick RNA interference.
|
36
|
-
#
|
37
|
+
# Nucleic Acids Res. 2004 32: 936-948.
|
37
38
|
#
|
38
39
|
# * Angela Reynolds et al. Rational siRNA design for RNA interference.
|
39
|
-
#
|
40
|
+
# Nat. Biotechnol. 2004 22: 326-330.
|
40
41
|
#
|
41
42
|
|
42
43
|
require 'bio/sequence'
|
@@ -71,24 +72,64 @@ module Bio
|
|
71
72
|
|
72
73
|
# Ui-Tei's rule.
|
73
74
|
def uitei?(target)
|
74
|
-
return false
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
return false
|
83
|
-
|
75
|
+
return false if target.length != 23 # 21 nt target + 2 nt overhang
|
76
|
+
|
77
|
+
seq19 = target[2..20] # 19 nt double-stranded region of siRNA
|
78
|
+
|
79
|
+
# criteria i
|
80
|
+
return false unless seq19[18..18].match(/[AU]/i)
|
81
|
+
|
82
|
+
# criteria ii
|
83
|
+
return false unless seq19[0..0].match(/[GC]/i)
|
84
|
+
|
85
|
+
# criteria iii
|
86
|
+
au_number = seq19[12..18].scan(/[AU]/i).size
|
87
|
+
return false unless au_number >= 4
|
88
|
+
|
89
|
+
# criteria iv
|
90
|
+
return false if seq19.match(/[GC]{10}/i)
|
91
|
+
|
84
92
|
return true
|
85
93
|
end
|
86
94
|
|
87
95
|
# Reynolds' rule.
|
88
96
|
def reynolds?(target)
|
89
|
-
return false if
|
90
|
-
|
91
|
-
|
97
|
+
return false if target.length != 23 # 21 nt target + 2 nt overhang
|
98
|
+
|
99
|
+
seq19 = target[2..20] # 19 nt double-stranded region of siRNA
|
100
|
+
score = 0
|
101
|
+
|
102
|
+
# criteria I
|
103
|
+
gc_number = seq19.scan(/[GC]/i).size
|
104
|
+
score += 1 if (7 <= gc_number and gc_number <= 10)
|
105
|
+
|
106
|
+
# criteria II
|
107
|
+
au_number = seq19[14..18].scan(/[AU]/i).size
|
108
|
+
score += au_number
|
109
|
+
|
110
|
+
# criteria III
|
111
|
+
# NotImpremented: Tm
|
112
|
+
|
113
|
+
# criteria IV
|
114
|
+
score += 1 if seq19[18..18].match(/A/i)
|
115
|
+
|
116
|
+
# criteria V
|
117
|
+
score += 1 if seq19[2..2].match(/A/i)
|
118
|
+
|
119
|
+
# criteria VI
|
120
|
+
score += 1 if seq19[9..9].match(/[U]/i)
|
121
|
+
|
122
|
+
# criteria VII
|
123
|
+
score -= 1 if seq19[18..18].match(/[GC]/i)
|
124
|
+
|
125
|
+
# criteria VIII
|
126
|
+
score -= 1 if seq19[12..12].match(/G/i)
|
127
|
+
|
128
|
+
if score >= 6
|
129
|
+
return score
|
130
|
+
else
|
131
|
+
return false
|
132
|
+
end
|
92
133
|
end
|
93
134
|
|
94
135
|
# same as design('uitei').
|
@@ -254,6 +295,25 @@ end # module Bio
|
|
254
295
|
|
255
296
|
= ChangeLog
|
256
297
|
|
298
|
+
2013/04/03 Yuki NAITO <y-naito@rnai.jp>
|
299
|
+
Modified siRNA design rules:
|
300
|
+
|
301
|
+
- Ui-Tei's rule:
|
302
|
+
- Restricted target length to 23 nt (21 nt plus 2 nt overhang)
|
303
|
+
for selecting functional siRNAs.
|
304
|
+
- Avoided contiguous GCs 10 nt or more. (not 9 nt or more)
|
305
|
+
|
306
|
+
- Reynolds' rule:
|
307
|
+
- Restricted target length to 23 nt (21 nt plus 2 nt overhang)
|
308
|
+
for selecting functional siRNAs.
|
309
|
+
- Reynolds' rule does not require to fulfill all the criteria
|
310
|
+
simultaneously. Total score of eight criteria is calculated
|
311
|
+
and used for the siRNA efficacy prediction. This change may
|
312
|
+
significantly alter an output.
|
313
|
+
- Returns total score of eight criteria for functional siRNA,
|
314
|
+
instead of returning 'true'.
|
315
|
+
- Returns 'false' for non-functional siRNA, as usual.
|
316
|
+
|
257
317
|
2005/03/21 Itoshi NIKAIDO <itoshi.nikaido@nifty.com>
|
258
318
|
Bio::SiRNA#ShRNA_designer method was changed design method.
|
259
319
|
|