bio 1.6.0.pre.20181210 → 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +32 -34
- data/ChangeLog +1462 -8
- data/Gemfile +0 -2
- data/KNOWN_ISSUES.rdoc +4 -10
- data/LEGAL +0 -10
- data/README.rdoc +31 -80
- data/README_DEV.rdoc +5 -5
- data/RELEASE_NOTES.rdoc +171 -18
- data/Rakefile +0 -34
- data/appveyor.yml +15 -7
- data/bioruby.gemspec +13 -4
- data/bioruby.gemspec.erb +0 -1
- data/gemfiles/Gemfile.travis-rbx +0 -2
- data/gemfiles/Gemfile.travis-ruby1.8 +0 -2
- data/gemfiles/Gemfile.travis-ruby1.9 +0 -2
- data/gemfiles/Gemfile.windows +6 -0
- data/lib/bio/appl/blast/report.rb +40 -8
- data/lib/bio/appl/iprscan/report.rb +3 -3
- data/lib/bio/appl/sosui/report.rb +1 -1
- data/lib/bio/db/embl/uniprotkb.rb +1 -1
- data/lib/bio/db/gff.rb +3 -1
- data/lib/bio/db/go.rb +2 -2
- data/lib/bio/db/kegg/common.rb +14 -0
- data/lib/bio/db/kegg/genes.rb +26 -0
- data/lib/bio/db/kegg/pathway.rb +5 -11
- data/lib/bio/sequence/common.rb +112 -0
- data/lib/bio/sequence/format.rb +1 -0
- data/lib/bio/tree.rb +1 -1
- data/lib/bio/version.rb +3 -3
- data/sample/color_scheme_aa.rb +82 -0
- data/sample/color_scheme_na.rb +5 -6
- data/sample/fastq2html.cwl +23 -0
- data/sample/fastq2html.rb +94 -0
- data/sample/fastq2html.testdata.yaml +5 -0
- data/sample/na2aa.cwl +23 -0
- data/sample/na2aa.rb +11 -25
- data/sample/na2aa.testdata.yaml +7 -0
- data/sample/rev_comp.cwl +23 -0
- data/sample/rev_comp.rb +20 -0
- data/sample/rev_comp.testdata.yaml +7 -0
- data/test/network/bio/db/kegg/test_genes_hsa7422.rb +91 -0
- data/test/unit/bio/appl/blast/test_report.rb +4 -4
- data/test/unit/bio/db/test_gff.rb +5 -0
- data/test/unit/bio/sequence/test_ruby3.rb +462 -0
- metadata +17 -8
- data/lib/bio/appl/blast/xmlparser.rb +0 -236
- data/setup.rb +0 -1600
data/gemfiles/Gemfile.travis-rbx
CHANGED
@@ -43,18 +43,20 @@ class Report
|
|
43
43
|
#--
|
44
44
|
# require lines moved here to avoid circular require
|
45
45
|
#++
|
46
|
-
require 'bio/appl/blast/xmlparser'
|
47
46
|
require 'bio/appl/blast/rexml'
|
48
47
|
require 'bio/appl/blast/format8'
|
49
48
|
|
49
|
+
#--
|
50
|
+
# loading bio-blast-xmlparser plugin if available
|
51
|
+
#++
|
52
|
+
begin
|
53
|
+
require 'bio-blast-xmlparser'
|
54
|
+
rescue LoadError
|
55
|
+
end
|
56
|
+
|
50
57
|
# for Bio::FlatFile support (only for XML data)
|
51
58
|
DELIMITER = RS = "</BlastOutput>\n"
|
52
59
|
|
53
|
-
# Specify to use XMLParser to parse XML (-m 7) output.
|
54
|
-
def self.xmlparser(data)
|
55
|
-
self.new(data, :xmlparser)
|
56
|
-
end
|
57
|
-
|
58
60
|
# Specify to use REXML to parse XML (-m 7) output.
|
59
61
|
def self.rexml(data)
|
60
62
|
self.new(data, :rexml)
|
@@ -67,7 +69,7 @@ class Report
|
|
67
69
|
|
68
70
|
def auto_parse(data)
|
69
71
|
if /<?xml/.match(data[/.*/])
|
70
|
-
if defined?
|
72
|
+
if defined? xmlparser_parse
|
71
73
|
xmlparser_parse(data)
|
72
74
|
@reports = blastxml_split_reports
|
73
75
|
else
|
@@ -87,7 +89,11 @@ class Report
|
|
87
89
|
@parameters = {}
|
88
90
|
case parser
|
89
91
|
when :xmlparser # format 7
|
90
|
-
xmlparser_parse
|
92
|
+
if defined? xmlparser_parse
|
93
|
+
xmlparser_parse(data)
|
94
|
+
else
|
95
|
+
raise NameError, "xmlparser_parse does not defined"
|
96
|
+
end
|
91
97
|
@reports = blastxml_split_reports
|
92
98
|
when :rexml # format 7
|
93
99
|
rexml_parse(data)
|
@@ -383,6 +389,32 @@ class Report
|
|
383
389
|
attr_reader :reports
|
384
390
|
|
385
391
|
private
|
392
|
+
# set parameter of the key as val
|
393
|
+
def xml_set_parameter(key, val)
|
394
|
+
#labels = {
|
395
|
+
# 'matrix' => 'Parameters_matrix',
|
396
|
+
# 'expect' => 'Parameters_expect',
|
397
|
+
# 'include' => 'Parameters_include',
|
398
|
+
# 'sc-match' => 'Parameters_sc-match',
|
399
|
+
# 'sc-mismatch' => 'Parameters_sc-mismatch',
|
400
|
+
# 'gap-open' => 'Parameters_gap-open',
|
401
|
+
# 'gap-extend' => 'Parameters_gap-extend',
|
402
|
+
# 'filter' => 'Parameters_filter',
|
403
|
+
# 'pattern' => 'Parameters_pattern',
|
404
|
+
# 'entrez-query' => 'Parameters_entrez-query',
|
405
|
+
#}
|
406
|
+
k = key.sub(/\AParameters\_/, '')
|
407
|
+
@parameters[k] =
|
408
|
+
case k
|
409
|
+
when 'expect', 'include'
|
410
|
+
val.to_f
|
411
|
+
when /\Agap\-/, /\Asc\-/
|
412
|
+
val.to_i
|
413
|
+
else
|
414
|
+
val
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
386
418
|
# (private method)
|
387
419
|
# In new BLAST XML (blastall >= 2.2.14), results of multiple queries
|
388
420
|
# are stored in <Iteration>. This method splits iterations into
|
@@ -83,7 +83,7 @@ module Bio
|
|
83
83
|
yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != ''
|
84
84
|
end
|
85
85
|
|
86
|
-
# Parser method for a raw formated entry.
|
86
|
+
# Parser method for a raw formated entry. Returns a Bio::Iprscan::Report
|
87
87
|
# object.
|
88
88
|
def self.parse_raw_entry(str)
|
89
89
|
report = self.new
|
@@ -113,7 +113,7 @@ module Bio
|
|
113
113
|
|
114
114
|
|
115
115
|
|
116
|
-
# Parser method for a xml formated entry.
|
116
|
+
# Parser method for a xml formated entry. Returns a Bio::Iprscan::Report
|
117
117
|
# object.
|
118
118
|
# def self.parse_xml(str)
|
119
119
|
# end
|
@@ -196,7 +196,7 @@ module Bio
|
|
196
196
|
end
|
197
197
|
end
|
198
198
|
|
199
|
-
# Parser method for a pseudo-txt formated entry.
|
199
|
+
# Parser method for a pseudo-txt formated entry. Returns a Bio::Iprscan::Report
|
200
200
|
# object.
|
201
201
|
#
|
202
202
|
# == Usage
|
@@ -174,7 +174,7 @@ class UniProtKB < EMBLDB
|
|
174
174
|
#
|
175
175
|
# http://www.uniprot.org/docs/sp_news.htm
|
176
176
|
def parse_DE_line_rel14(str)
|
177
|
-
#
|
177
|
+
# Returns if it is not the new format since Rel.14
|
178
178
|
return nil unless /^DE (RecName|AltName|SubName)\: / =~ str
|
179
179
|
ret = []
|
180
180
|
cur = nil
|
data/lib/bio/db/gff.rb
CHANGED
@@ -359,7 +359,9 @@ module Bio
|
|
359
359
|
# Parses a GFF2-formatted line and returns a new
|
360
360
|
# Bio::GFF::GFF2::Record object.
|
361
361
|
def self.parse(str)
|
362
|
-
self.new
|
362
|
+
ret = self.new
|
363
|
+
ret.parse(str)
|
364
|
+
ret
|
363
365
|
end
|
364
366
|
|
365
367
|
# Creates a Bio::GFF::GFF2::Record object.
|
data/lib/bio/db/go.rb
CHANGED
@@ -193,7 +193,7 @@ class GO
|
|
193
193
|
# Delimiter
|
194
194
|
RS = DELIMITER
|
195
195
|
|
196
|
-
#
|
196
|
+
# Returns an Array of parsed gene_association flatfile.
|
197
197
|
# Block is acceptable.
|
198
198
|
def self.parser(str)
|
199
199
|
if block_given?
|
@@ -226,7 +226,7 @@ class GO
|
|
226
226
|
# Returns Db_Reference variable.
|
227
227
|
attr_reader :db_reference # -> []
|
228
228
|
|
229
|
-
#
|
229
|
+
# Returns Evidence code variable.
|
230
230
|
attr_reader :evidence
|
231
231
|
|
232
232
|
# Returns the entry is associated with this value.
|
data/lib/bio/db/kegg/common.rb
CHANGED
@@ -210,6 +210,20 @@ class KEGG
|
|
210
210
|
private :strings_as_hash
|
211
211
|
end #module StringsAsHash
|
212
212
|
|
213
|
+
# This module provides diseases_as_hash method.
|
214
|
+
#
|
215
|
+
# Bio::KEGG::* internal use only.
|
216
|
+
module DiseasesAsHash
|
217
|
+
include StringsAsHash
|
218
|
+
# Returns a Hash of the disease ID and its definition
|
219
|
+
def diseases_as_hash
|
220
|
+
unless (defined? @diseases_as_hash) && @diseases_as_hash
|
221
|
+
@diseases_as_hash = strings_as_hash(diseases_as_strings)
|
222
|
+
end
|
223
|
+
@diseases_as_hash
|
224
|
+
end
|
225
|
+
end #module DiseasesAsHash
|
226
|
+
|
213
227
|
end #module Common
|
214
228
|
end #class KEGG
|
215
229
|
end #module Bio
|
data/lib/bio/db/kegg/genes.rb
CHANGED
@@ -107,6 +107,11 @@ class GENES < KEGGDB
|
|
107
107
|
def orthologs_as_hash; super; end if false #dummy for RDoc
|
108
108
|
alias orthologs orthologs_as_hash
|
109
109
|
|
110
|
+
include Common::DiseasesAsHash
|
111
|
+
# Returns a Hash of the disease ID and its definition
|
112
|
+
def diseases_as_hash; super; end if false #dummy for RDoc
|
113
|
+
alias diseases diseases_as_hash
|
114
|
+
|
110
115
|
# Creates a new Bio::KEGG::GENES object.
|
111
116
|
# ---
|
112
117
|
# *Arguments*:
|
@@ -238,6 +243,27 @@ class GENES < KEGGDB
|
|
238
243
|
lines_fetch('PATHWAY')
|
239
244
|
end
|
240
245
|
|
246
|
+
# Networks described in the NETWORK lines.
|
247
|
+
# ---
|
248
|
+
# *Returns*:: Array containing String
|
249
|
+
def networks_as_strings
|
250
|
+
lines_fetch('NETWORK')
|
251
|
+
end
|
252
|
+
|
253
|
+
# Diseases described in the DISEASE lines.
|
254
|
+
# ---
|
255
|
+
# *Returns*:: Array containing String
|
256
|
+
def diseases_as_strings
|
257
|
+
lines_fetch('DISEASE')
|
258
|
+
end
|
259
|
+
|
260
|
+
# Drug targets described in the DRUG_TARGET lines.
|
261
|
+
# ---
|
262
|
+
# *Returns*:: Array containing String
|
263
|
+
def drug_targets_as_strings
|
264
|
+
lines_fetch('DRUG_TARGET')
|
265
|
+
end
|
266
|
+
|
241
267
|
# Returns CLASS field of the entry.
|
242
268
|
def keggclass
|
243
269
|
field_fetch('CLASS')
|
data/lib/bio/db/kegg/pathway.rb
CHANGED
@@ -42,6 +42,11 @@ class PATHWAY < KEGGDB
|
|
42
42
|
def orthologs_as_hash; super; end if false #dummy for RDoc
|
43
43
|
alias orthologs orthologs_as_hash
|
44
44
|
|
45
|
+
include Common::DiseasesAsHash
|
46
|
+
# Returns a Hash of the disease ID and its definition
|
47
|
+
def diseases_as_hash; super; end if false #dummy for RDoc
|
48
|
+
alias diseases diseases_as_hash
|
49
|
+
|
45
50
|
include Common::References
|
46
51
|
# REFERENCE -- Returns contents of the REFERENCE records as an Array of
|
47
52
|
# Bio::Reference objects.
|
@@ -122,17 +127,6 @@ class PATHWAY < KEGGDB
|
|
122
127
|
lines_fetch('DISEASE')
|
123
128
|
end
|
124
129
|
|
125
|
-
# Diseases described in the DISEASE lines.
|
126
|
-
# ---
|
127
|
-
# *Returns*:: Hash of disease ID and its definition
|
128
|
-
def diseases_as_hash
|
129
|
-
unless (defined? @diseases_as_hash) && @diseases_as_hash
|
130
|
-
@diseases_as_hash = strings_as_hash(diseases_as_strings)
|
131
|
-
end
|
132
|
-
@diseases_as_hash
|
133
|
-
end
|
134
|
-
alias diseases diseases_as_hash
|
135
|
-
|
136
130
|
# Returns an Array of a database name and entry IDs in DBLINKS field.
|
137
131
|
# ---
|
138
132
|
# *Returns*:: Array containing String
|
data/lib/bio/sequence/common.rb
CHANGED
@@ -303,6 +303,118 @@ module Common
|
|
303
303
|
end
|
304
304
|
alias splicing splice
|
305
305
|
|
306
|
+
#--
|
307
|
+
# Workaround for Ruby 3.0.0 incompatible changes
|
308
|
+
if ::RUBY_VERSION > "3"
|
309
|
+
|
310
|
+
# Acts almost the same as String#split.
|
311
|
+
def split(*arg)
|
312
|
+
if block_given?
|
313
|
+
super
|
314
|
+
else
|
315
|
+
ret = super(*arg)
|
316
|
+
ret.collect! { |x| self.class.new('').replace(x) }
|
317
|
+
ret
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
%w( * ljust rjust center ).each do |w|
|
322
|
+
module_eval %Q{
|
323
|
+
def #{w}(*arg)
|
324
|
+
self.class.new('').replace(super)
|
325
|
+
end
|
326
|
+
}
|
327
|
+
end
|
328
|
+
|
329
|
+
%w( chomp chop
|
330
|
+
delete delete_prefix delete_suffix
|
331
|
+
lstrip rstrip strip
|
332
|
+
reverse
|
333
|
+
squeeze
|
334
|
+
succ next
|
335
|
+
tr tr_s
|
336
|
+
capitalize upcase downcase swapcase
|
337
|
+
).each do |w|
|
338
|
+
module_eval %Q{
|
339
|
+
def #{w}(*arg)
|
340
|
+
s = self.dup
|
341
|
+
s.#{w}!(*arg)
|
342
|
+
s
|
343
|
+
end
|
344
|
+
}
|
345
|
+
end
|
346
|
+
|
347
|
+
%w( sub gsub ).each do |w|
|
348
|
+
module_eval %Q{
|
349
|
+
def #{w}(*arg, &block)
|
350
|
+
s = self.dup
|
351
|
+
s.#{w}!(*arg, &block)
|
352
|
+
s
|
353
|
+
end
|
354
|
+
}
|
355
|
+
end
|
356
|
+
|
357
|
+
#Reference: https://nacl-ltd.github.io/2018/11/08/gsub-wrapper.html
|
358
|
+
#(Title: Is it possible to implement gsub wrapper?)
|
359
|
+
%w( sub! gsub! ).each do |w|
|
360
|
+
module_eval %Q{
|
361
|
+
def #{w}(*arg, &block)
|
362
|
+
if block_given? then
|
363
|
+
super(*arg) do |m|
|
364
|
+
b = Thread.current[:_backref]
|
365
|
+
Thread.current[:_backref] = ::Regexp.last_match
|
366
|
+
block.binding.eval("$~ = Thread.current[:_backref]")
|
367
|
+
Thread.current[:_backref] = b
|
368
|
+
block.call(self.class.new('').replace(m))
|
369
|
+
end
|
370
|
+
else
|
371
|
+
super
|
372
|
+
end
|
373
|
+
end
|
374
|
+
}
|
375
|
+
end
|
376
|
+
|
377
|
+
%w( each_char each_grapheme_cluster each_line ).each do |w|
|
378
|
+
module_eval %Q{
|
379
|
+
def #{w}
|
380
|
+
if block_given?
|
381
|
+
super { |c| yield(self.class.new('').replace(c)) }
|
382
|
+
else
|
383
|
+
enum_for(:#{w})
|
384
|
+
end
|
385
|
+
end
|
386
|
+
}
|
387
|
+
end
|
388
|
+
|
389
|
+
%w( slice [] slice! ).each do |w|
|
390
|
+
module_eval %Q{
|
391
|
+
def #{w}(*arg)
|
392
|
+
r = super
|
393
|
+
r ? self.class.new('').replace(r) : r
|
394
|
+
end
|
395
|
+
}
|
396
|
+
end
|
397
|
+
|
398
|
+
%w( partition rpartition ).each do |w|
|
399
|
+
module_eval %Q{
|
400
|
+
def #{w}(sep)
|
401
|
+
r = super
|
402
|
+
if r.kind_of?(Array)
|
403
|
+
r[1] == sep ?
|
404
|
+
[ self.class.new('').replace(r[0]),
|
405
|
+
r[1],
|
406
|
+
self.class.new('').replace(r[2]) ] :
|
407
|
+
r.collect { |x| self.class.new('').replace(x) }
|
408
|
+
else
|
409
|
+
r
|
410
|
+
end
|
411
|
+
end
|
412
|
+
}
|
413
|
+
end
|
414
|
+
#++
|
415
|
+
|
416
|
+
end # if ::RUBY_VERSION > "3"
|
417
|
+
|
306
418
|
end # Common
|
307
419
|
|
308
420
|
end # Sequence
|
data/lib/bio/sequence/format.rb
CHANGED
data/lib/bio/tree.rb
CHANGED
@@ -605,7 +605,7 @@ module Bio
|
|
605
605
|
end
|
606
606
|
|
607
607
|
# Gets path from node1 to node2.
|
608
|
-
#
|
608
|
+
# Returns an array of nodes, including node1 and node2.
|
609
609
|
# If node1 and/or node2 do not exist, IndexError is raised.
|
610
610
|
# If node1 and node2 are not connected, NoPathError is raised.
|
611
611
|
# The result is unspecified for cyclic trees.
|
data/lib/bio/version.rb
CHANGED
@@ -10,7 +10,7 @@
|
|
10
10
|
module Bio
|
11
11
|
|
12
12
|
# BioRuby version (Array containing Integer)
|
13
|
-
BIORUBY_VERSION = [
|
13
|
+
BIORUBY_VERSION = [2, 0, 3].extend(Comparable).freeze
|
14
14
|
|
15
15
|
# Extra version specifier (String or nil).
|
16
16
|
# Existance of the value indicates development version.
|
@@ -19,8 +19,8 @@ module Bio
|
|
19
19
|
# ".pre :: Pre-release version.
|
20
20
|
#
|
21
21
|
# References: https://guides.rubygems.org/patterns/#prerelease-gems
|
22
|
-
BIORUBY_EXTRA_VERSION =
|
23
|
-
".pre"
|
22
|
+
BIORUBY_EXTRA_VERSION = nil
|
23
|
+
#".pre"
|
24
24
|
|
25
25
|
# Version identifier, including extra version string (String)
|
26
26
|
# Unlike BIORUBY_VERSION, it is not comparable.
|
@@ -0,0 +1,82 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# color_scheme_aa.rb - A Bio::ColorScheme demo script for Amino Acid sequences.
|
4
|
+
#
|
5
|
+
# Usage:
|
6
|
+
#
|
7
|
+
# % ruby color_scheme_aa.rb > cs-seq-faa.html
|
8
|
+
#
|
9
|
+
# % cat seq.faa
|
10
|
+
# >AA_sequence
|
11
|
+
# MKRISTTITTTITITTGNGAG
|
12
|
+
# % ruby color_scheme_aa.rb seq.faa > colored-seq-faa.html
|
13
|
+
#
|
14
|
+
#
|
15
|
+
# Copyright:: Copyright (C) 2005
|
16
|
+
# Mitsuteru C. Nakao <n@bioruby.org>
|
17
|
+
# License:: The Ruby License
|
18
|
+
#
|
19
|
+
|
20
|
+
require 'bio'
|
21
|
+
|
22
|
+
|
23
|
+
# returns folded sequence with <br>.
|
24
|
+
def br(i, width = 80)
|
25
|
+
return "<br\n>" if i % width == 0
|
26
|
+
""
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
# returns sequence html doc
|
31
|
+
def display(seq, cs)
|
32
|
+
html = '<p style="font-family: monospace">'
|
33
|
+
postfix = '</span>'
|
34
|
+
i = 0
|
35
|
+
seq.each_char do |c|
|
36
|
+
color = cs[c]
|
37
|
+
prefix = %Q(<span style="background:\##{color};">)
|
38
|
+
html += prefix + c + postfix
|
39
|
+
html += br(i += 1)
|
40
|
+
end
|
41
|
+
html + '</p>'
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
# returns scheme wise html doc
|
46
|
+
def display_scheme(scheme, aaseq)
|
47
|
+
html = ''
|
48
|
+
cs = Bio::ColorScheme.const_get(scheme.intern)
|
49
|
+
[aaseq].each do |seq|
|
50
|
+
html += display(seq, cs)
|
51
|
+
end
|
52
|
+
return ['<div>', "<h3>#{cs}</h3>", html, '</div>']
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
if fna = ARGV.shift
|
58
|
+
aaseq = Bio::FlatFile.open(fna) { |ff| ff.next_entry.aaseq }
|
59
|
+
else
|
60
|
+
aaseq = Bio::Sequence::AA.new('ARNDCQEGHILKMFPSTWYV' * 20).randomize
|
61
|
+
end
|
62
|
+
|
63
|
+
title = 'Bio::ColorScheme for amino acid sequences'
|
64
|
+
doc = ['<html>',
|
65
|
+
'<header>', '<title>', title, '</title>', '</header>',
|
66
|
+
'<body>', '<h1>', title, '</h1>']
|
67
|
+
|
68
|
+
doc << ['<div>', '<h2>', 'Simple colors', '</h2>']
|
69
|
+
|
70
|
+
['Zappo', 'Taylor' ].each do |scheme|
|
71
|
+
doc << display_scheme(scheme, aaseq)
|
72
|
+
end
|
73
|
+
doc << ['</div>']
|
74
|
+
|
75
|
+
|
76
|
+
doc << ['<div>', '<h2>', 'Score colors', '</h2>']
|
77
|
+
['Buried', 'Helix', 'Hydropathy', 'Strand', 'Turn'].each do |score|
|
78
|
+
doc << display_scheme(score, aaseq)
|
79
|
+
end
|
80
|
+
doc << ['</div>']
|
81
|
+
|
82
|
+
puts doc + ['</body>','</html>']
|
data/sample/color_scheme_na.rb
CHANGED
@@ -17,7 +17,6 @@
|
|
17
17
|
# Mitsuteru C. Nakao <n@bioruby.org>
|
18
18
|
# License:: The Ruby License
|
19
19
|
#
|
20
|
-
# $Id: color_scheme_na.rb,v 1.3 2007/04/05 23:35:42 trevor Exp $
|
21
20
|
#
|
22
21
|
|
23
22
|
require 'bio'
|
@@ -35,10 +34,10 @@ def display(seq, cs)
|
|
35
34
|
html = '<p style="font-family: monospace">'
|
36
35
|
postfix = '</span>'
|
37
36
|
i = 0
|
38
|
-
seq.
|
39
|
-
color = cs[c
|
37
|
+
seq.each_char do |c|
|
38
|
+
color = cs[c]
|
40
39
|
prefix = %Q(<span style="background:\##{color};">)
|
41
|
-
html += prefix + c
|
40
|
+
html += prefix + c + postfix
|
42
41
|
html += br(i += 1)
|
43
42
|
end
|
44
43
|
html + '</p>'
|
@@ -48,7 +47,7 @@ end
|
|
48
47
|
# returns scheme wise html doc
|
49
48
|
def display_scheme(scheme, naseq, aaseq)
|
50
49
|
html = ''
|
51
|
-
cs =
|
50
|
+
cs = Bio::ColorScheme.const_get(scheme.intern)
|
52
51
|
[naseq, aaseq].each do |seq|
|
53
52
|
html += display(seq, cs)
|
54
53
|
end
|
@@ -58,7 +57,7 @@ end
|
|
58
57
|
|
59
58
|
|
60
59
|
if fna = ARGV.shift
|
61
|
-
naseq = Bio::
|
60
|
+
naseq = Bio::FlatFile.open(fna) { |ff| ff.next_entry.naseq }
|
62
61
|
aaseq = naseq.translate
|
63
62
|
else
|
64
63
|
naseq = Bio::Sequence::NA.new('acgtu' * 20).randomize
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env cwl-runner
|
2
|
+
|
3
|
+
cwlVersion: v1.0
|
4
|
+
class: CommandLineTool
|
5
|
+
baseCommand: [ruby]
|
6
|
+
|
7
|
+
inputs:
|
8
|
+
- id: script
|
9
|
+
type: File
|
10
|
+
default:
|
11
|
+
class: File
|
12
|
+
location: fastq2html.rb
|
13
|
+
inputBinding:
|
14
|
+
position: -1
|
15
|
+
- id: fastq
|
16
|
+
type: File[]
|
17
|
+
inputBinding:
|
18
|
+
position: 1
|
19
|
+
|
20
|
+
outputs:
|
21
|
+
- id: out
|
22
|
+
type: stdout
|
23
|
+
stdout: $(inputs.script.nameroot)-$(inputs.fastq[0].nameroot).html
|
@@ -0,0 +1,94 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# fastq2html.rb - HTML visualization of FASTQ sequences
|
4
|
+
#
|
5
|
+
# Usage:
|
6
|
+
#
|
7
|
+
# % ruby fastq2html.rb seq00.fastq > seq00.html
|
8
|
+
#
|
9
|
+
#
|
10
|
+
# Copyright:: Copyright (C) 2019 BioRuby Project
|
11
|
+
# Copyright (C) 2005 Mitsuteru C. Nakao <n@bioruby.org>
|
12
|
+
# License:: The Ruby License
|
13
|
+
#
|
14
|
+
#
|
15
|
+
|
16
|
+
require 'bio'
|
17
|
+
|
18
|
+
# thickness to color
|
19
|
+
def thickness2color(t)
|
20
|
+
c = "%02X" % ((t * 255.0).to_i)
|
21
|
+
c * 3
|
22
|
+
end
|
23
|
+
|
24
|
+
# Creates
|
25
|
+
def create_score2color_hashes
|
26
|
+
h_bg = {}
|
27
|
+
h_char = {}
|
28
|
+
cutoff_low = 0
|
29
|
+
cutoff_high = 50
|
30
|
+
range = cutoff_high - cutoff_low
|
31
|
+
sc_min = -5
|
32
|
+
sc_max = 100
|
33
|
+
(sc_min..sc_max).each do |i|
|
34
|
+
t = if i <= cutoff_low then
|
35
|
+
0.0
|
36
|
+
elsif i >= cutoff_high then
|
37
|
+
1.0
|
38
|
+
else
|
39
|
+
(i - cutoff_low).to_f / range
|
40
|
+
end
|
41
|
+
h_bg[i] = thickness2color(t)
|
42
|
+
h_char[i] = thickness2color((t > 0.3) ? 0.0 : 0.55)
|
43
|
+
end
|
44
|
+
h_bg.default = h_bg[cutoff_low]
|
45
|
+
h_char.default = h_char[cutoff_low]
|
46
|
+
[h_bg, h_char]
|
47
|
+
end
|
48
|
+
|
49
|
+
# Color code from quality score
|
50
|
+
SCORE2COLOR_BG, SCORE2COLOR_CHAR = create_score2color_hashes
|
51
|
+
|
52
|
+
# returns folded sequence with <br>.
|
53
|
+
def br(i, width = 80)
|
54
|
+
return "<br\n>" if i % width == 0
|
55
|
+
""
|
56
|
+
end
|
57
|
+
|
58
|
+
# returns sequence html doc
|
59
|
+
def display(naseq, scores)
|
60
|
+
html = '<p style="font-family: monospace">'
|
61
|
+
postfix = '</span>'
|
62
|
+
i = 0
|
63
|
+
naseq.each_char.with_index do |c, i|
|
64
|
+
sc = scores[i]
|
65
|
+
bgcol = SCORE2COLOR_BG[sc]
|
66
|
+
col = SCORE2COLOR_CHAR[sc]
|
67
|
+
prefix = %Q(<span style="color:\##{col}; background:\##{bgcol};">)
|
68
|
+
html += prefix + c + postfix
|
69
|
+
html += br(i += 1)
|
70
|
+
end
|
71
|
+
html + '</p>'
|
72
|
+
end
|
73
|
+
|
74
|
+
# returns colorized html doc
|
75
|
+
def fastq2html(definition, naseq, scores)
|
76
|
+
html = display(naseq, scores)
|
77
|
+
return ['<div>', "<div>>#{CGI.escapeHTML(definition)}</div>", html, '</div>']
|
78
|
+
end
|
79
|
+
|
80
|
+
title = 'Sequences with quality scores'
|
81
|
+
puts ['<html>',
|
82
|
+
'<header>', '<title>', title, '</title>', '</header>',
|
83
|
+
'<body>', '<h1>', title, '</h1>']
|
84
|
+
|
85
|
+
#main loop
|
86
|
+
ARGV.each do |filename|
|
87
|
+
Bio::FlatFile.open(filename) do |ff|
|
88
|
+
ff.each do |e|
|
89
|
+
puts fastq2html(e.definition, e.naseq, e.quality_scores)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
puts ['</body>','</html>']
|