bio 1.4.3.0001 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.travis.yml +39 -33
- data/BSDL +22 -0
- data/COPYING +2 -2
- data/COPYING.ja +36 -36
- data/ChangeLog +2404 -1025
- data/KNOWN_ISSUES.rdoc +15 -55
- data/README.rdoc +17 -23
- data/RELEASE_NOTES.rdoc +246 -183
- data/Rakefile +3 -2
- data/bin/br_biofetch.rb +29 -5
- data/bioruby.gemspec +15 -32
- data/bioruby.gemspec.erb +10 -20
- data/doc/ChangeLog-1.4.3 +1478 -0
- data/doc/RELEASE_NOTES-1.4.3.rdoc +204 -0
- data/doc/Tutorial.rd +0 -6
- data/doc/Tutorial.rd.html +7 -12
- data/doc/Tutorial.rd.ja +960 -1064
- data/doc/Tutorial.rd.ja.html +977 -1067
- data/gemfiles/Gemfile.travis-jruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-jruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-rbx +13 -0
- data/gemfiles/Gemfile.travis-ruby1.8 +2 -1
- data/gemfiles/Gemfile.travis-ruby1.9 +2 -4
- data/gemfiles/Gemfile.travis-ruby2.2 +9 -0
- data/lib/bio.rb +10 -43
- data/lib/bio/alignment.rb +8 -14
- data/lib/bio/appl/blast.rb +1 -2
- data/lib/bio/appl/blast/format0.rb +18 -7
- data/lib/bio/appl/blast/remote.rb +0 -9
- data/lib/bio/appl/blast/report.rb +1 -1
- data/lib/bio/appl/clustalw/report.rb +3 -1
- data/lib/bio/appl/genscan/report.rb +1 -2
- data/lib/bio/appl/iprscan/report.rb +1 -2
- data/lib/bio/appl/meme/mast.rb +4 -4
- data/lib/bio/appl/meme/mast/report.rb +1 -1
- data/lib/bio/appl/paml/codeml.rb +2 -2
- data/lib/bio/appl/paml/codeml/report.rb +1 -0
- data/lib/bio/appl/paml/common.rb +1 -1
- data/lib/bio/appl/sosui/report.rb +1 -2
- data/lib/bio/command.rb +62 -2
- data/lib/bio/data/aa.rb +13 -31
- data/lib/bio/data/codontable.rb +1 -2
- data/lib/bio/db/biosql/biosql_to_biosequence.rb +1 -0
- data/lib/bio/db/biosql/sequence.rb +1 -1
- data/lib/bio/db/embl/common.rb +1 -1
- data/lib/bio/db/embl/embl.rb +5 -4
- data/lib/bio/db/embl/format_embl.rb +3 -3
- data/lib/bio/db/embl/sptr.rb +9 -1444
- data/lib/bio/db/embl/swissprot.rb +12 -29
- data/lib/bio/db/embl/trembl.rb +13 -30
- data/lib/bio/db/embl/uniprot.rb +12 -29
- data/lib/bio/db/embl/uniprotkb.rb +1455 -0
- data/lib/bio/db/fasta.rb +17 -0
- data/lib/bio/db/fasta/defline.rb +1 -3
- data/lib/bio/db/fastq.rb +1 -1
- data/lib/bio/db/genbank/ddbj.rb +9 -5
- data/lib/bio/db/genbank/refseq.rb +11 -3
- data/lib/bio/db/gff.rb +3 -4
- data/lib/bio/db/go.rb +5 -6
- data/lib/bio/db/kegg/module.rb +4 -5
- data/lib/bio/db/kegg/pathway.rb +4 -5
- data/lib/bio/db/kegg/reaction.rb +1 -1
- data/lib/bio/db/nexus.rb +3 -2
- data/lib/bio/db/pdb/pdb.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_elements.rb +82 -59
- data/lib/bio/db/phyloxml/phyloxml_parser.rb +2 -2
- data/lib/bio/db/phyloxml/phyloxml_writer.rb +1 -2
- data/lib/bio/db/sanger_chromatogram/chromatogram.rb +1 -2
- data/lib/bio/db/transfac.rb +1 -1
- data/lib/bio/io/das.rb +40 -41
- data/lib/bio/io/fastacmd.rb +0 -16
- data/lib/bio/io/fetch.rb +111 -55
- data/lib/bio/io/flatfile/buffer.rb +4 -5
- data/lib/bio/io/hinv.rb +2 -3
- data/lib/bio/io/ncbirest.rb +43 -6
- data/lib/bio/io/pubmed.rb +76 -81
- data/lib/bio/io/togows.rb +33 -10
- data/lib/bio/map.rb +1 -1
- data/lib/bio/pathway.rb +1 -1
- data/lib/bio/sequence/compat.rb +1 -1
- data/lib/bio/sequence/na.rb +63 -12
- data/lib/bio/shell.rb +0 -2
- data/lib/bio/shell/core.rb +5 -6
- data/lib/bio/shell/interface.rb +3 -4
- data/lib/bio/shell/irb.rb +1 -2
- data/lib/bio/shell/plugin/entry.rb +2 -3
- data/lib/bio/shell/plugin/seq.rb +7 -6
- data/lib/bio/shell/setup.rb +1 -2
- data/lib/bio/tree.rb +2 -2
- data/lib/bio/util/contingency_table.rb +0 -2
- data/lib/bio/util/restriction_enzyme/range/sequence_range.rb +2 -2
- data/lib/bio/util/sirna.rb +76 -16
- data/lib/bio/version.rb +8 -9
- data/sample/benchmark_clustalw_report.rb +47 -0
- data/sample/biofetch.rb +248 -151
- data/setup.rb +6 -7
- data/test/data/clustalw/example1-seqnos.aln +58 -0
- data/test/network/bio/appl/blast/test_remote.rb +1 -15
- data/test/network/bio/appl/test_blast.rb +0 -12
- data/test/network/bio/io/test_pubmed.rb +49 -0
- data/test/network/bio/io/test_togows.rb +0 -1
- data/test/network/bio/test_command.rb +65 -2
- data/test/unit/bio/appl/bl2seq/test_report.rb +0 -1
- data/test/unit/bio/appl/blast/test_report.rb +110 -48
- data/test/unit/bio/appl/clustalw/test_report.rb +67 -51
- data/test/unit/bio/appl/sim4/test_report.rb +46 -17
- data/test/unit/bio/appl/test_blast.rb +2 -2
- data/test/unit/bio/db/embl/test_embl.rb +0 -1
- data/test/unit/bio/db/embl/test_embl_rel89.rb +0 -1
- data/test/unit/bio/db/embl/{test_sptr.rb → test_uniprotkb.rb} +111 -115
- data/test/unit/bio/db/embl/{test_uniprot_new_part.rb → test_uniprotkb_new_part.rb} +11 -11
- data/test/unit/bio/db/genbank/test_genbank.rb +10 -4
- data/test/unit/bio/db/pdb/test_pdb.rb +14 -8
- data/test/unit/bio/db/test_fasta.rb +41 -1
- data/test/unit/bio/db/test_fastq.rb +14 -4
- data/test/unit/bio/db/test_gff.rb +2 -2
- data/test/unit/bio/db/test_phyloxml.rb +30 -30
- data/test/unit/bio/db/test_phyloxml_writer.rb +2 -2
- data/test/unit/bio/io/flatfile/test_autodetection.rb +1 -2
- data/test/unit/bio/io/flatfile/test_buffer.rb +7 -1
- data/test/unit/bio/io/flatfile/test_splitter.rb +1 -1
- data/test/unit/bio/io/test_togows.rb +3 -2
- data/test/unit/bio/sequence/test_dblink.rb +1 -1
- data/test/unit/bio/sequence/test_na.rb +3 -1
- data/test/unit/bio/test_alignment.rb +1 -2
- data/test/unit/bio/test_command.rb +5 -4
- data/test/unit/bio/test_db.rb +4 -2
- data/test/unit/bio/test_pathway.rb +25 -10
- data/test/unit/bio/util/test_sirna.rb +22 -22
- metadata +656 -1430
- data/doc/KEGG_API.rd +0 -1843
- data/doc/KEGG_API.rd.ja +0 -1834
- data/extconf.rb +0 -2
- data/lib/bio/appl/blast/ddbj.rb +0 -131
- data/lib/bio/db/kegg/taxonomy.rb +0 -280
- data/lib/bio/io/dbget.rb +0 -194
- data/lib/bio/io/ddbjrest.rb +0 -344
- data/lib/bio/io/ddbjxml.rb +0 -458
- data/lib/bio/io/ebisoap.rb +0 -158
- data/lib/bio/io/ensembl.rb +0 -229
- data/lib/bio/io/higet.rb +0 -73
- data/lib/bio/io/keggapi.rb +0 -363
- data/lib/bio/io/ncbisoap.rb +0 -156
- data/lib/bio/io/soapwsdl.rb +0 -119
- data/lib/bio/shell/plugin/keggapi.rb +0 -181
- data/lib/bio/shell/plugin/soap.rb +0 -87
- data/sample/dbget +0 -37
- data/sample/demo_ddbjxml.rb +0 -212
- data/sample/demo_kegg_taxonomy.rb +0 -92
- data/sample/demo_keggapi.rb +0 -502
- data/sample/psortplot_html.rb +0 -214
- data/test/network/bio/io/test_ddbjrest.rb +0 -47
- data/test/network/bio/io/test_ensembl.rb +0 -230
- data/test/network/bio/io/test_soapwsdl.rb +0 -53
- data/test/unit/bio/io/test_ddbjxml.rb +0 -81
- data/test/unit/bio/io/test_ensembl.rb +0 -111
- data/test/unit/bio/io/test_soapwsdl.rb +0 -33
data/lib/bio/map.rb
CHANGED
data/lib/bio/pathway.rb
CHANGED
@@ -591,7 +591,7 @@ class Pathway
|
|
591
591
|
# problem in the graph in which edge weights can be negative.
|
592
592
|
def bellman_ford(root)
|
593
593
|
distance, predecessor = initialize_single_source(root)
|
594
|
-
|
594
|
+
(self.nodes - 1).times do
|
595
595
|
@graph.each_key do |u|
|
596
596
|
@graph[u].each do |v, w|
|
597
597
|
# relaxing procedure of root -> 'u' -> 'v'
|
data/lib/bio/sequence/compat.rb
CHANGED
@@ -32,7 +32,7 @@ class Sequence
|
|
32
32
|
|
33
33
|
module Common
|
34
34
|
|
35
|
-
# <b>Bio::Sequence#to_fasta is
|
35
|
+
# <b>Bio::Sequence#to_fasta is DEPRECATED</b>
|
36
36
|
# Do not use Bio::Sequence#to_fasta ! Use Bio::Sequence#output instead.
|
37
37
|
# Note that Bio::Sequence::NA#to_fasta, Bio::Sequence::AA#to_fasata,
|
38
38
|
# and Bio::Sequence::Generic#to_fasta <b>can still be used</b>,
|
data/lib/bio/sequence/na.rb
CHANGED
@@ -283,6 +283,14 @@ class NA < String
|
|
283
283
|
#
|
284
284
|
# s = Bio::Sequence::NA.new('atggcgtga')
|
285
285
|
# puts s.gc_percent #=> 55
|
286
|
+
#
|
287
|
+
# Note that this method only returns an integer value.
|
288
|
+
# When more digits after decimal points are needed,
|
289
|
+
# use gc_content and sprintf like below:
|
290
|
+
#
|
291
|
+
# s = Bio::Sequence::NA.new('atggcgtga')
|
292
|
+
# puts sprintf("%3.2f", s.gc_content * 100) #=> "55.56"
|
293
|
+
#
|
286
294
|
# ---
|
287
295
|
# *Returns*:: Fixnum
|
288
296
|
def gc_percent
|
@@ -297,57 +305,100 @@ class NA < String
|
|
297
305
|
# Calculate the ratio of GC / ATGC bases. U is regarded as T.
|
298
306
|
#
|
299
307
|
# s = Bio::Sequence::NA.new('atggcgtga')
|
308
|
+
# puts s.gc_content #=> (5/9)
|
309
|
+
# puts s.gc_content.to_f #=> 0.5555555555555556
|
310
|
+
#
|
311
|
+
# In older Ruby versions, Float is always returned.
|
312
|
+
#
|
313
|
+
# s = Bio::Sequence::NA.new('atggcgtga')
|
300
314
|
# puts s.gc_content #=> 0.555555555555556
|
315
|
+
#
|
316
|
+
# Note that "u" is regarded as "t".
|
317
|
+
# If there are no ATGC bases in the sequence, 0.0 is returned.
|
318
|
+
#
|
301
319
|
# ---
|
302
|
-
# *Returns*:: Float
|
320
|
+
# *Returns*:: Rational or Float
|
303
321
|
def gc_content
|
304
322
|
count = self.composition
|
305
323
|
at = count['a'] + count['t'] + count['u']
|
306
324
|
gc = count['g'] + count['c']
|
307
|
-
|
308
|
-
return
|
325
|
+
total = at + gc
|
326
|
+
return 0.0 if total == 0
|
327
|
+
return gc.quo(total)
|
309
328
|
end
|
310
329
|
|
311
330
|
# Calculate the ratio of AT / ATGC bases. U is regarded as T.
|
312
331
|
#
|
313
332
|
# s = Bio::Sequence::NA.new('atggcgtga')
|
333
|
+
# puts s.at_content #=> 4/9
|
334
|
+
# puts s.at_content.to_f #=> 0.444444444444444
|
335
|
+
#
|
336
|
+
# In older Ruby versions, Float is always returned.
|
337
|
+
#
|
338
|
+
# s = Bio::Sequence::NA.new('atggcgtga')
|
314
339
|
# puts s.at_content #=> 0.444444444444444
|
340
|
+
#
|
341
|
+
# Note that "u" is regarded as "t".
|
342
|
+
# If there are no ATGC bases in the sequence, 0.0 is returned.
|
343
|
+
#
|
315
344
|
# ---
|
316
|
-
# *Returns*:: Float
|
345
|
+
# *Returns*:: Rational or Float
|
317
346
|
def at_content
|
318
347
|
count = self.composition
|
319
348
|
at = count['a'] + count['t'] + count['u']
|
320
349
|
gc = count['g'] + count['c']
|
321
|
-
|
322
|
-
return
|
350
|
+
total = at + gc
|
351
|
+
return 0.0 if total == 0
|
352
|
+
return at.quo(total)
|
323
353
|
end
|
324
354
|
|
325
355
|
# Calculate the ratio of (G - C) / (G + C) bases.
|
326
356
|
#
|
327
357
|
# s = Bio::Sequence::NA.new('atggcgtga')
|
358
|
+
# puts s.gc_skew #=> 3/5
|
359
|
+
# puts s.gc_skew.to_f #=> 0.6
|
360
|
+
#
|
361
|
+
# In older Ruby versions, Float is always returned.
|
362
|
+
#
|
363
|
+
# s = Bio::Sequence::NA.new('atggcgtga')
|
328
364
|
# puts s.gc_skew #=> 0.6
|
365
|
+
#
|
366
|
+
# If there are no GC bases in the sequence, 0.0 is returned.
|
367
|
+
#
|
329
368
|
# ---
|
330
|
-
# *Returns*:: Float
|
369
|
+
# *Returns*:: Rational or Float
|
331
370
|
def gc_skew
|
332
371
|
count = self.composition
|
333
372
|
g = count['g']
|
334
373
|
c = count['c']
|
335
|
-
|
336
|
-
return
|
374
|
+
gc = g + c
|
375
|
+
return 0.0 if gc == 0
|
376
|
+
return (g - c).quo(gc)
|
337
377
|
end
|
338
378
|
|
339
379
|
# Calculate the ratio of (A - T) / (A + T) bases. U is regarded as T.
|
340
380
|
#
|
341
381
|
# s = Bio::Sequence::NA.new('atgttgttgttc')
|
382
|
+
# puts s.at_skew #=> (-3/4)
|
383
|
+
# puts s.at_skew.to_f #=> -0.75
|
384
|
+
#
|
385
|
+
# In older Ruby versions, Float is always returned.
|
386
|
+
#
|
387
|
+
# s = Bio::Sequence::NA.new('atgttgttgttc')
|
342
388
|
# puts s.at_skew #=> -0.75
|
389
|
+
#
|
390
|
+
# Note that "u" is regarded as "t".
|
391
|
+
# If there are no AT bases in the sequence, 0.0 is returned.
|
392
|
+
#
|
343
393
|
# ---
|
344
|
-
# *Returns*:: Float
|
394
|
+
# *Returns*:: Rational or Float
|
345
395
|
def at_skew
|
346
396
|
count = self.composition
|
347
397
|
a = count['a']
|
348
398
|
t = count['t'] + count['u']
|
349
|
-
|
350
|
-
return
|
399
|
+
at = a + t
|
400
|
+
return 0.0 if at == 0
|
401
|
+
return (a - t).quo(at)
|
351
402
|
end
|
352
403
|
|
353
404
|
# Returns an alphabetically sorted array of any non-standard bases
|
data/lib/bio/shell.rb
CHANGED
@@ -31,8 +31,6 @@ module Bio::Shell
|
|
31
31
|
require 'bio/shell/plugin/flatfile'
|
32
32
|
require 'bio/shell/plugin/obda'
|
33
33
|
require 'bio/shell/plugin/das'
|
34
|
-
require 'bio/shell/plugin/keggapi'
|
35
|
-
require 'bio/shell/plugin/soap'
|
36
34
|
require 'bio/shell/plugin/emboss'
|
37
35
|
require 'bio/shell/plugin/blast'
|
38
36
|
require 'bio/shell/plugin/psort'
|
data/lib/bio/shell/core.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
|
11
10
|
module Bio::Shell::Core
|
@@ -195,7 +194,7 @@ module Bio::Shell::Ghost
|
|
195
194
|
|
196
195
|
def find_flat_dir(dbname)
|
197
196
|
dir = File.join(bioflat_dir, dbname.to_s.strip)
|
198
|
-
if File.
|
197
|
+
if File.exist?(dir)
|
199
198
|
return dir
|
200
199
|
else
|
201
200
|
return nil
|
@@ -209,7 +208,7 @@ module Bio::Shell::Ghost
|
|
209
208
|
end
|
210
209
|
|
211
210
|
def load_config_file(file)
|
212
|
-
if File.
|
211
|
+
if File.exist?(file)
|
213
212
|
STDERR.print "Loading config (#{file}) ... "
|
214
213
|
if hash = YAML.load(File.read(file))
|
215
214
|
@config.update(hash)
|
@@ -312,7 +311,7 @@ module Bio::Shell::Ghost
|
|
312
311
|
end
|
313
312
|
|
314
313
|
def load_object_file(file)
|
315
|
-
if File.
|
314
|
+
if File.exist?(file)
|
316
315
|
STDERR.print "Loading object (#{file}) ... "
|
317
316
|
begin
|
318
317
|
bind = Bio::Shell.cache[:binding]
|
@@ -394,7 +393,7 @@ module Bio::Shell::Ghost
|
|
394
393
|
end
|
395
394
|
|
396
395
|
def load_history_file(file)
|
397
|
-
if File.
|
396
|
+
if File.exist?(file)
|
398
397
|
STDERR.print "Loading history (#{file}) ... "
|
399
398
|
File.open(file).each do |line|
|
400
399
|
unless line[/^# /]
|
@@ -459,7 +458,7 @@ module Bio::Shell::Ghost
|
|
459
458
|
|
460
459
|
def save_script
|
461
460
|
if @script_begin and @script_end and @script_begin <= @script_end
|
462
|
-
if File.
|
461
|
+
if File.exist?(script_file)
|
463
462
|
message = "Overwrite script file (#{script_file})? [y/n] "
|
464
463
|
else
|
465
464
|
message = "Save script file (#{script_file})? [y/n] "
|
data/lib/bio/shell/interface.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: interface.rb,v 1.19 2007/11/15 07:08:49 k Exp $
|
9
8
|
#
|
10
9
|
|
11
10
|
module Bio::Shell
|
@@ -101,7 +100,7 @@ module Bio::Shell
|
|
101
100
|
STDOUT.reopen(pg)
|
102
101
|
objs.each do |obj|
|
103
102
|
if obj.is_a?(String)
|
104
|
-
if File.
|
103
|
+
if File.exist?(obj)
|
105
104
|
system("#{cmd} #{obj}")
|
106
105
|
else
|
107
106
|
obj.display
|
@@ -128,7 +127,7 @@ module Bio::Shell
|
|
128
127
|
|
129
128
|
def head(arg, num = 10)
|
130
129
|
str = ""
|
131
|
-
if File.
|
130
|
+
if File.exist?(arg)
|
132
131
|
File.open(arg) do |file|
|
133
132
|
num.times do
|
134
133
|
if line = file.gets
|
@@ -154,7 +153,7 @@ module Bio::Shell
|
|
154
153
|
if ! file[/^#{datadir}/] and Bio::Shell.ask_yes_or_no(message)
|
155
154
|
file = File.join(datadir, file)
|
156
155
|
end
|
157
|
-
if File.
|
156
|
+
if File.exist?(file)
|
158
157
|
message = "Overwrite existing '#{file}' file? [y/n] "
|
159
158
|
if ! Bio::Shell.ask_yes_or_no(message)
|
160
159
|
puts " ... save aborted."
|
data/lib/bio/shell/irb.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: irb.rb,v 1.3 2007/04/05 23:35:41 trevor Exp $
|
9
8
|
#
|
10
9
|
|
11
10
|
module Bio::Shell
|
@@ -48,7 +47,7 @@ module Bio::Shell
|
|
48
47
|
return line
|
49
48
|
end
|
50
49
|
|
51
|
-
if File.
|
50
|
+
if File.exist?("./config/boot.rb")
|
52
51
|
require "./config/boot"
|
53
52
|
require "./config/environment"
|
54
53
|
#require 'commands/console'
|
@@ -5,7 +5,6 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id:$
|
9
8
|
#
|
10
9
|
|
11
10
|
module Bio::Shell
|
@@ -31,7 +30,7 @@ module Bio::Shell
|
|
31
30
|
seq = ""
|
32
31
|
if arg.kind_of?(Bio::Sequence)
|
33
32
|
seq = arg
|
34
|
-
elsif arg.respond_to?(:gets) or File.
|
33
|
+
elsif arg.respond_to?(:gets) or File.exist?(arg)
|
35
34
|
ent = flatauto(arg)
|
36
35
|
elsif arg[/:/]
|
37
36
|
ent = getobj(arg)
|
@@ -65,7 +64,7 @@ module Bio::Shell
|
|
65
64
|
db, entry_id = arg.to_s.strip.split(/\:/, 2)
|
66
65
|
|
67
66
|
# local file
|
68
|
-
if arg.respond_to?(:gets) or File.
|
67
|
+
if arg.respond_to?(:gets) or File.exist?(arg)
|
69
68
|
puts "Retrieving entry from file (#{arg})"
|
70
69
|
entry = flatfile(arg)
|
71
70
|
|
data/lib/bio/shell/plugin/seq.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: seq.rb,v 1.21 2007/04/05 23:35:41 trevor Exp $
|
9
8
|
#
|
10
9
|
|
11
10
|
module Bio::Shell
|
@@ -161,20 +160,22 @@ end
|
|
161
160
|
class String
|
162
161
|
|
163
162
|
def step(window_size)
|
164
|
-
|
163
|
+
j = 0
|
165
164
|
0.step(self.length - window_size, window_size) do |i|
|
166
165
|
yield self[i, window_size]
|
166
|
+
j = i
|
167
167
|
end
|
168
|
-
yield self[
|
168
|
+
yield self[j + window_size .. -1] if j + window_size < self.length
|
169
169
|
end
|
170
170
|
|
171
171
|
def skip(window_size, step_size = 1)
|
172
|
-
|
172
|
+
j = 0
|
173
173
|
0.step(self.length - window_size, step_size) do |i|
|
174
174
|
yield [self[i, window_size], i + 1, i + window_size]
|
175
|
+
j = i
|
175
176
|
end
|
176
|
-
from =
|
177
|
-
to = [self.length,
|
177
|
+
from = j + step_size
|
178
|
+
to = [self.length, j + step_size + window_size].min
|
178
179
|
yield [self[from, window_size], from + 1, to] if from + 1 <= to
|
179
180
|
end
|
180
181
|
|
data/lib/bio/shell/setup.rb
CHANGED
@@ -5,7 +5,6 @@
|
|
5
5
|
# Toshiaki Katayama <k@bioruby.org>
|
6
6
|
# License:: The Ruby License
|
7
7
|
#
|
8
|
-
# $Id: setup.rb,v 1.8 2007/06/28 11:21:40 k Exp $
|
9
8
|
#
|
10
9
|
|
11
10
|
require 'getoptlong'
|
@@ -25,7 +24,7 @@ class Bio::Shell::Setup
|
|
25
24
|
Bio::Shell.configure(savedir)
|
26
25
|
|
27
26
|
# set default to irb mode
|
28
|
-
Bio::Shell.cache[:mode] = @mode || :irb
|
27
|
+
Bio::Shell.cache[:mode] = ((defined? @mode) && @mode) || :irb
|
29
28
|
|
30
29
|
case Bio::Shell.cache[:mode]
|
31
30
|
when :web
|
data/lib/bio/tree.rb
CHANGED
@@ -614,7 +614,7 @@ module Bio
|
|
614
614
|
raise IndexError, 'node2 not found' unless @pathway.graph[node2]
|
615
615
|
return [ node1 ] if node1 == node2
|
616
616
|
return [ node1, node2 ] if @pathway.graph[node1][node2]
|
617
|
-
|
617
|
+
_, path = @pathway.bfs_shortest_path(node1, node2)
|
618
618
|
unless path[0] == node1 and path[-1] == node2 then
|
619
619
|
raise NoPathError, 'node1 and node2 are not connected'
|
620
620
|
end
|
@@ -765,7 +765,7 @@ module Bio
|
|
765
765
|
# The result is unspecified for cyclic trees.
|
766
766
|
def lowest_common_ancestor(node1, node2, root = nil)
|
767
767
|
root ||= @root
|
768
|
-
|
768
|
+
_, route = @pathway.breadth_first_search(root)
|
769
769
|
x = node1; r1 = []
|
770
770
|
begin; r1 << x; end while x = route[x]
|
771
771
|
x = node2; r2 = []
|
@@ -331,8 +331,6 @@ class ContingencyTable
|
|
331
331
|
# *Returns*:: +Float+ chi square value
|
332
332
|
def chi_square
|
333
333
|
total = 0
|
334
|
-
c = @characters
|
335
|
-
max = c.size - 1
|
336
334
|
@characters.each do |i| # Loop through every row in the ContingencyTable
|
337
335
|
@characters.each do |j| # Loop through every column in the ContingencyTable
|
338
336
|
total += chi_square_element(i, j)
|
@@ -196,7 +196,7 @@ class SequenceRange
|
|
196
196
|
c_cut = cc.vc_complement_as_original_class
|
197
197
|
h_cut = cc.hc_between_strands_as_original_class
|
198
198
|
|
199
|
-
if @circular
|
199
|
+
if (defined? @circular) && @circular
|
200
200
|
# NOTE
|
201
201
|
# if it's circular we should start at the beginning of a cut for orientation
|
202
202
|
# scan for it, hack off the first set of hcuts and move them to the back
|
@@ -238,7 +238,7 @@ class SequenceRange
|
|
238
238
|
|
239
239
|
# Bin "-1" is an easy way to indicate the start of a strand just in case
|
240
240
|
# there is a horizontal cut at position 0
|
241
|
-
bins.delete(-1) unless @circular
|
241
|
+
bins.delete(-1) unless ((defined? @circular) && @circular)
|
242
242
|
bins
|
243
243
|
end
|
244
244
|
|
data/lib/bio/util/sirna.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
#
|
2
2
|
# = bio/util/sirna.rb - Class for designing small inhibitory RNAs
|
3
3
|
#
|
4
|
-
# Copyright:: Copyright (C) 2004
|
4
|
+
# Copyright:: Copyright (C) 2004-2013
|
5
5
|
# Itoshi NIKAIDO <dritoshi@gmail.com>
|
6
|
+
# Yuki NAITO <y-naito@rnai.jp>
|
6
7
|
# License:: The Ruby License
|
7
8
|
#
|
8
9
|
# $Id:$
|
@@ -33,10 +34,10 @@
|
|
33
34
|
#
|
34
35
|
# * Kumiko Ui-Tei et al. Guidelines for the selection of highly effective
|
35
36
|
# siRNA sequences for mammalian and chick RNA interference.
|
36
|
-
#
|
37
|
+
# Nucleic Acids Res. 2004 32: 936-948.
|
37
38
|
#
|
38
39
|
# * Angela Reynolds et al. Rational siRNA design for RNA interference.
|
39
|
-
#
|
40
|
+
# Nat. Biotechnol. 2004 22: 326-330.
|
40
41
|
#
|
41
42
|
|
42
43
|
require 'bio/sequence'
|
@@ -71,24 +72,64 @@ module Bio
|
|
71
72
|
|
72
73
|
# Ui-Tei's rule.
|
73
74
|
def uitei?(target)
|
74
|
-
return false
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
return false
|
83
|
-
|
75
|
+
return false if target.length != 23 # 21 nt target + 2 nt overhang
|
76
|
+
|
77
|
+
seq19 = target[2..20] # 19 nt double-stranded region of siRNA
|
78
|
+
|
79
|
+
# criteria i
|
80
|
+
return false unless seq19[18..18].match(/[AU]/i)
|
81
|
+
|
82
|
+
# criteria ii
|
83
|
+
return false unless seq19[0..0].match(/[GC]/i)
|
84
|
+
|
85
|
+
# criteria iii
|
86
|
+
au_number = seq19[12..18].scan(/[AU]/i).size
|
87
|
+
return false unless au_number >= 4
|
88
|
+
|
89
|
+
# criteria iv
|
90
|
+
return false if seq19.match(/[GC]{10}/i)
|
91
|
+
|
84
92
|
return true
|
85
93
|
end
|
86
94
|
|
87
95
|
# Reynolds' rule.
|
88
96
|
def reynolds?(target)
|
89
|
-
return false if
|
90
|
-
|
91
|
-
|
97
|
+
return false if target.length != 23 # 21 nt target + 2 nt overhang
|
98
|
+
|
99
|
+
seq19 = target[2..20] # 19 nt double-stranded region of siRNA
|
100
|
+
score = 0
|
101
|
+
|
102
|
+
# criteria I
|
103
|
+
gc_number = seq19.scan(/[GC]/i).size
|
104
|
+
score += 1 if (7 <= gc_number and gc_number <= 10)
|
105
|
+
|
106
|
+
# criteria II
|
107
|
+
au_number = seq19[14..18].scan(/[AU]/i).size
|
108
|
+
score += au_number
|
109
|
+
|
110
|
+
# criteria III
|
111
|
+
# NotImpremented: Tm
|
112
|
+
|
113
|
+
# criteria IV
|
114
|
+
score += 1 if seq19[18..18].match(/A/i)
|
115
|
+
|
116
|
+
# criteria V
|
117
|
+
score += 1 if seq19[2..2].match(/A/i)
|
118
|
+
|
119
|
+
# criteria VI
|
120
|
+
score += 1 if seq19[9..9].match(/[U]/i)
|
121
|
+
|
122
|
+
# criteria VII
|
123
|
+
score -= 1 if seq19[18..18].match(/[GC]/i)
|
124
|
+
|
125
|
+
# criteria VIII
|
126
|
+
score -= 1 if seq19[12..12].match(/G/i)
|
127
|
+
|
128
|
+
if score >= 6
|
129
|
+
return score
|
130
|
+
else
|
131
|
+
return false
|
132
|
+
end
|
92
133
|
end
|
93
134
|
|
94
135
|
# same as design('uitei').
|
@@ -254,6 +295,25 @@ end # module Bio
|
|
254
295
|
|
255
296
|
= ChangeLog
|
256
297
|
|
298
|
+
2013/04/03 Yuki NAITO <y-naito@rnai.jp>
|
299
|
+
Modified siRNA design rules:
|
300
|
+
|
301
|
+
- Ui-Tei's rule:
|
302
|
+
- Restricted target length to 23 nt (21 nt plus 2 nt overhang)
|
303
|
+
for selecting functional siRNAs.
|
304
|
+
- Avoided contiguous GCs 10 nt or more. (not 9 nt or more)
|
305
|
+
|
306
|
+
- Reynolds' rule:
|
307
|
+
- Restricted target length to 23 nt (21 nt plus 2 nt overhang)
|
308
|
+
for selecting functional siRNAs.
|
309
|
+
- Reynolds' rule does not require to fulfill all the criteria
|
310
|
+
simultaneously. Total score of eight criteria is calculated
|
311
|
+
and used for the siRNA efficacy prediction. This change may
|
312
|
+
significantly alter an output.
|
313
|
+
- Returns total score of eight criteria for functional siRNA,
|
314
|
+
instead of returning 'true'.
|
315
|
+
- Returns 'false' for non-functional siRNA, as usual.
|
316
|
+
|
257
317
|
2005/03/21 Itoshi NIKAIDO <itoshi.nikaido@nifty.com>
|
258
318
|
Bio::SiRNA#ShRNA_designer method was changed design method.
|
259
319
|
|