bio 1.6.0.pre.20181210 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +32 -34
- data/ChangeLog +1462 -8
- data/Gemfile +0 -2
- data/KNOWN_ISSUES.rdoc +4 -10
- data/LEGAL +0 -10
- data/README.rdoc +31 -80
- data/README_DEV.rdoc +5 -5
- data/RELEASE_NOTES.rdoc +171 -18
- data/Rakefile +0 -34
- data/appveyor.yml +15 -7
- data/bioruby.gemspec +13 -4
- data/bioruby.gemspec.erb +0 -1
- data/gemfiles/Gemfile.travis-rbx +0 -2
- data/gemfiles/Gemfile.travis-ruby1.8 +0 -2
- data/gemfiles/Gemfile.travis-ruby1.9 +0 -2
- data/gemfiles/Gemfile.windows +6 -0
- data/lib/bio/appl/blast/report.rb +40 -8
- data/lib/bio/appl/iprscan/report.rb +3 -3
- data/lib/bio/appl/sosui/report.rb +1 -1
- data/lib/bio/db/embl/uniprotkb.rb +1 -1
- data/lib/bio/db/gff.rb +3 -1
- data/lib/bio/db/go.rb +2 -2
- data/lib/bio/db/kegg/common.rb +14 -0
- data/lib/bio/db/kegg/genes.rb +26 -0
- data/lib/bio/db/kegg/pathway.rb +5 -11
- data/lib/bio/sequence/common.rb +112 -0
- data/lib/bio/sequence/format.rb +1 -0
- data/lib/bio/tree.rb +1 -1
- data/lib/bio/version.rb +3 -3
- data/sample/color_scheme_aa.rb +82 -0
- data/sample/color_scheme_na.rb +5 -6
- data/sample/fastq2html.cwl +23 -0
- data/sample/fastq2html.rb +94 -0
- data/sample/fastq2html.testdata.yaml +5 -0
- data/sample/na2aa.cwl +23 -0
- data/sample/na2aa.rb +11 -25
- data/sample/na2aa.testdata.yaml +7 -0
- data/sample/rev_comp.cwl +23 -0
- data/sample/rev_comp.rb +20 -0
- data/sample/rev_comp.testdata.yaml +7 -0
- data/test/network/bio/db/kegg/test_genes_hsa7422.rb +91 -0
- data/test/unit/bio/appl/blast/test_report.rb +4 -4
- data/test/unit/bio/db/test_gff.rb +5 -0
- data/test/unit/bio/sequence/test_ruby3.rb +462 -0
- metadata +17 -8
- data/lib/bio/appl/blast/xmlparser.rb +0 -236
- data/setup.rb +0 -1600
data/gemfiles/Gemfile.travis-rbx
CHANGED
@@ -43,18 +43,20 @@ class Report
|
|
43
43
|
#--
|
44
44
|
# require lines moved here to avoid circular require
|
45
45
|
#++
|
46
|
-
require 'bio/appl/blast/xmlparser'
|
47
46
|
require 'bio/appl/blast/rexml'
|
48
47
|
require 'bio/appl/blast/format8'
|
49
48
|
|
49
|
+
#--
|
50
|
+
# loading bio-blast-xmlparser plugin if available
|
51
|
+
#++
|
52
|
+
begin
|
53
|
+
require 'bio-blast-xmlparser'
|
54
|
+
rescue LoadError
|
55
|
+
end
|
56
|
+
|
50
57
|
# for Bio::FlatFile support (only for XML data)
|
51
58
|
DELIMITER = RS = "</BlastOutput>\n"
|
52
59
|
|
53
|
-
# Specify to use XMLParser to parse XML (-m 7) output.
|
54
|
-
def self.xmlparser(data)
|
55
|
-
self.new(data, :xmlparser)
|
56
|
-
end
|
57
|
-
|
58
60
|
# Specify to use REXML to parse XML (-m 7) output.
|
59
61
|
def self.rexml(data)
|
60
62
|
self.new(data, :rexml)
|
@@ -67,7 +69,7 @@ class Report
|
|
67
69
|
|
68
70
|
def auto_parse(data)
|
69
71
|
if /<?xml/.match(data[/.*/])
|
70
|
-
if defined?
|
72
|
+
if defined? xmlparser_parse
|
71
73
|
xmlparser_parse(data)
|
72
74
|
@reports = blastxml_split_reports
|
73
75
|
else
|
@@ -87,7 +89,11 @@ class Report
|
|
87
89
|
@parameters = {}
|
88
90
|
case parser
|
89
91
|
when :xmlparser # format 7
|
90
|
-
xmlparser_parse
|
92
|
+
if defined? xmlparser_parse
|
93
|
+
xmlparser_parse(data)
|
94
|
+
else
|
95
|
+
raise NameError, "xmlparser_parse does not defined"
|
96
|
+
end
|
91
97
|
@reports = blastxml_split_reports
|
92
98
|
when :rexml # format 7
|
93
99
|
rexml_parse(data)
|
@@ -383,6 +389,32 @@ class Report
|
|
383
389
|
attr_reader :reports
|
384
390
|
|
385
391
|
private
|
392
|
+
# set parameter of the key as val
|
393
|
+
def xml_set_parameter(key, val)
|
394
|
+
#labels = {
|
395
|
+
# 'matrix' => 'Parameters_matrix',
|
396
|
+
# 'expect' => 'Parameters_expect',
|
397
|
+
# 'include' => 'Parameters_include',
|
398
|
+
# 'sc-match' => 'Parameters_sc-match',
|
399
|
+
# 'sc-mismatch' => 'Parameters_sc-mismatch',
|
400
|
+
# 'gap-open' => 'Parameters_gap-open',
|
401
|
+
# 'gap-extend' => 'Parameters_gap-extend',
|
402
|
+
# 'filter' => 'Parameters_filter',
|
403
|
+
# 'pattern' => 'Parameters_pattern',
|
404
|
+
# 'entrez-query' => 'Parameters_entrez-query',
|
405
|
+
#}
|
406
|
+
k = key.sub(/\AParameters\_/, '')
|
407
|
+
@parameters[k] =
|
408
|
+
case k
|
409
|
+
when 'expect', 'include'
|
410
|
+
val.to_f
|
411
|
+
when /\Agap\-/, /\Asc\-/
|
412
|
+
val.to_i
|
413
|
+
else
|
414
|
+
val
|
415
|
+
end
|
416
|
+
end
|
417
|
+
|
386
418
|
# (private method)
|
387
419
|
# In new BLAST XML (blastall >= 2.2.14), results of multiple queries
|
388
420
|
# are stored in <Iteration>. This method splits iterations into
|
@@ -83,7 +83,7 @@ module Bio
|
|
83
83
|
yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != ''
|
84
84
|
end
|
85
85
|
|
86
|
-
# Parser method for a raw formated entry.
|
86
|
+
# Parser method for a raw formated entry. Returns a Bio::Iprscan::Report
|
87
87
|
# object.
|
88
88
|
def self.parse_raw_entry(str)
|
89
89
|
report = self.new
|
@@ -113,7 +113,7 @@ module Bio
|
|
113
113
|
|
114
114
|
|
115
115
|
|
116
|
-
# Parser method for a xml formated entry.
|
116
|
+
# Parser method for a xml formated entry. Returns a Bio::Iprscan::Report
|
117
117
|
# object.
|
118
118
|
# def self.parse_xml(str)
|
119
119
|
# end
|
@@ -196,7 +196,7 @@ module Bio
|
|
196
196
|
end
|
197
197
|
end
|
198
198
|
|
199
|
-
# Parser method for a pseudo-txt formated entry.
|
199
|
+
# Parser method for a pseudo-txt formated entry. Returns a Bio::Iprscan::Report
|
200
200
|
# object.
|
201
201
|
#
|
202
202
|
# == Usage
|
@@ -174,7 +174,7 @@ class UniProtKB < EMBLDB
|
|
174
174
|
#
|
175
175
|
# http://www.uniprot.org/docs/sp_news.htm
|
176
176
|
def parse_DE_line_rel14(str)
|
177
|
-
#
|
177
|
+
# Returns if it is not the new format since Rel.14
|
178
178
|
return nil unless /^DE (RecName|AltName|SubName)\: / =~ str
|
179
179
|
ret = []
|
180
180
|
cur = nil
|
data/lib/bio/db/gff.rb
CHANGED
@@ -359,7 +359,9 @@ module Bio
|
|
359
359
|
# Parses a GFF2-formatted line and returns a new
|
360
360
|
# Bio::GFF::GFF2::Record object.
|
361
361
|
def self.parse(str)
|
362
|
-
self.new
|
362
|
+
ret = self.new
|
363
|
+
ret.parse(str)
|
364
|
+
ret
|
363
365
|
end
|
364
366
|
|
365
367
|
# Creates a Bio::GFF::GFF2::Record object.
|
data/lib/bio/db/go.rb
CHANGED
@@ -193,7 +193,7 @@ class GO
|
|
193
193
|
# Delimiter
|
194
194
|
RS = DELIMITER
|
195
195
|
|
196
|
-
#
|
196
|
+
# Returns an Array of parsed gene_association flatfile.
|
197
197
|
# Block is acceptable.
|
198
198
|
def self.parser(str)
|
199
199
|
if block_given?
|
@@ -226,7 +226,7 @@ class GO
|
|
226
226
|
# Returns Db_Reference variable.
|
227
227
|
attr_reader :db_reference # -> []
|
228
228
|
|
229
|
-
#
|
229
|
+
# Returns Evidence code variable.
|
230
230
|
attr_reader :evidence
|
231
231
|
|
232
232
|
# Returns the entry is associated with this value.
|
data/lib/bio/db/kegg/common.rb
CHANGED
@@ -210,6 +210,20 @@ class KEGG
|
|
210
210
|
private :strings_as_hash
|
211
211
|
end #module StringsAsHash
|
212
212
|
|
213
|
+
# This module provides diseases_as_hash method.
|
214
|
+
#
|
215
|
+
# Bio::KEGG::* internal use only.
|
216
|
+
module DiseasesAsHash
|
217
|
+
include StringsAsHash
|
218
|
+
# Returns a Hash of the disease ID and its definition
|
219
|
+
def diseases_as_hash
|
220
|
+
unless (defined? @diseases_as_hash) && @diseases_as_hash
|
221
|
+
@diseases_as_hash = strings_as_hash(diseases_as_strings)
|
222
|
+
end
|
223
|
+
@diseases_as_hash
|
224
|
+
end
|
225
|
+
end #module DiseasesAsHash
|
226
|
+
|
213
227
|
end #module Common
|
214
228
|
end #class KEGG
|
215
229
|
end #module Bio
|
data/lib/bio/db/kegg/genes.rb
CHANGED
@@ -107,6 +107,11 @@ class GENES < KEGGDB
|
|
107
107
|
def orthologs_as_hash; super; end if false #dummy for RDoc
|
108
108
|
alias orthologs orthologs_as_hash
|
109
109
|
|
110
|
+
include Common::DiseasesAsHash
|
111
|
+
# Returns a Hash of the disease ID and its definition
|
112
|
+
def diseases_as_hash; super; end if false #dummy for RDoc
|
113
|
+
alias diseases diseases_as_hash
|
114
|
+
|
110
115
|
# Creates a new Bio::KEGG::GENES object.
|
111
116
|
# ---
|
112
117
|
# *Arguments*:
|
@@ -238,6 +243,27 @@ class GENES < KEGGDB
|
|
238
243
|
lines_fetch('PATHWAY')
|
239
244
|
end
|
240
245
|
|
246
|
+
# Networks described in the NETWORK lines.
|
247
|
+
# ---
|
248
|
+
# *Returns*:: Array containing String
|
249
|
+
def networks_as_strings
|
250
|
+
lines_fetch('NETWORK')
|
251
|
+
end
|
252
|
+
|
253
|
+
# Diseases described in the DISEASE lines.
|
254
|
+
# ---
|
255
|
+
# *Returns*:: Array containing String
|
256
|
+
def diseases_as_strings
|
257
|
+
lines_fetch('DISEASE')
|
258
|
+
end
|
259
|
+
|
260
|
+
# Drug targets described in the DRUG_TARGET lines.
|
261
|
+
# ---
|
262
|
+
# *Returns*:: Array containing String
|
263
|
+
def drug_targets_as_strings
|
264
|
+
lines_fetch('DRUG_TARGET')
|
265
|
+
end
|
266
|
+
|
241
267
|
# Returns CLASS field of the entry.
|
242
268
|
def keggclass
|
243
269
|
field_fetch('CLASS')
|
data/lib/bio/db/kegg/pathway.rb
CHANGED
@@ -42,6 +42,11 @@ class PATHWAY < KEGGDB
|
|
42
42
|
def orthologs_as_hash; super; end if false #dummy for RDoc
|
43
43
|
alias orthologs orthologs_as_hash
|
44
44
|
|
45
|
+
include Common::DiseasesAsHash
|
46
|
+
# Returns a Hash of the disease ID and its definition
|
47
|
+
def diseases_as_hash; super; end if false #dummy for RDoc
|
48
|
+
alias diseases diseases_as_hash
|
49
|
+
|
45
50
|
include Common::References
|
46
51
|
# REFERENCE -- Returns contents of the REFERENCE records as an Array of
|
47
52
|
# Bio::Reference objects.
|
@@ -122,17 +127,6 @@ class PATHWAY < KEGGDB
|
|
122
127
|
lines_fetch('DISEASE')
|
123
128
|
end
|
124
129
|
|
125
|
-
# Diseases described in the DISEASE lines.
|
126
|
-
# ---
|
127
|
-
# *Returns*:: Hash of disease ID and its definition
|
128
|
-
def diseases_as_hash
|
129
|
-
unless (defined? @diseases_as_hash) && @diseases_as_hash
|
130
|
-
@diseases_as_hash = strings_as_hash(diseases_as_strings)
|
131
|
-
end
|
132
|
-
@diseases_as_hash
|
133
|
-
end
|
134
|
-
alias diseases diseases_as_hash
|
135
|
-
|
136
130
|
# Returns an Array of a database name and entry IDs in DBLINKS field.
|
137
131
|
# ---
|
138
132
|
# *Returns*:: Array containing String
|
data/lib/bio/sequence/common.rb
CHANGED
@@ -303,6 +303,118 @@ module Common
|
|
303
303
|
end
|
304
304
|
alias splicing splice
|
305
305
|
|
306
|
+
#--
|
307
|
+
# Workaround for Ruby 3.0.0 incompatible changes
|
308
|
+
if ::RUBY_VERSION > "3"
|
309
|
+
|
310
|
+
# Acts almost the same as String#split.
|
311
|
+
def split(*arg)
|
312
|
+
if block_given?
|
313
|
+
super
|
314
|
+
else
|
315
|
+
ret = super(*arg)
|
316
|
+
ret.collect! { |x| self.class.new('').replace(x) }
|
317
|
+
ret
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
%w( * ljust rjust center ).each do |w|
|
322
|
+
module_eval %Q{
|
323
|
+
def #{w}(*arg)
|
324
|
+
self.class.new('').replace(super)
|
325
|
+
end
|
326
|
+
}
|
327
|
+
end
|
328
|
+
|
329
|
+
%w( chomp chop
|
330
|
+
delete delete_prefix delete_suffix
|
331
|
+
lstrip rstrip strip
|
332
|
+
reverse
|
333
|
+
squeeze
|
334
|
+
succ next
|
335
|
+
tr tr_s
|
336
|
+
capitalize upcase downcase swapcase
|
337
|
+
).each do |w|
|
338
|
+
module_eval %Q{
|
339
|
+
def #{w}(*arg)
|
340
|
+
s = self.dup
|
341
|
+
s.#{w}!(*arg)
|
342
|
+
s
|
343
|
+
end
|
344
|
+
}
|
345
|
+
end
|
346
|
+
|
347
|
+
%w( sub gsub ).each do |w|
|
348
|
+
module_eval %Q{
|
349
|
+
def #{w}(*arg, &block)
|
350
|
+
s = self.dup
|
351
|
+
s.#{w}!(*arg, &block)
|
352
|
+
s
|
353
|
+
end
|
354
|
+
}
|
355
|
+
end
|
356
|
+
|
357
|
+
#Reference: https://nacl-ltd.github.io/2018/11/08/gsub-wrapper.html
|
358
|
+
#(Title: Is it possible to implement gsub wrapper?)
|
359
|
+
%w( sub! gsub! ).each do |w|
|
360
|
+
module_eval %Q{
|
361
|
+
def #{w}(*arg, &block)
|
362
|
+
if block_given? then
|
363
|
+
super(*arg) do |m|
|
364
|
+
b = Thread.current[:_backref]
|
365
|
+
Thread.current[:_backref] = ::Regexp.last_match
|
366
|
+
block.binding.eval("$~ = Thread.current[:_backref]")
|
367
|
+
Thread.current[:_backref] = b
|
368
|
+
block.call(self.class.new('').replace(m))
|
369
|
+
end
|
370
|
+
else
|
371
|
+
super
|
372
|
+
end
|
373
|
+
end
|
374
|
+
}
|
375
|
+
end
|
376
|
+
|
377
|
+
%w( each_char each_grapheme_cluster each_line ).each do |w|
|
378
|
+
module_eval %Q{
|
379
|
+
def #{w}
|
380
|
+
if block_given?
|
381
|
+
super { |c| yield(self.class.new('').replace(c)) }
|
382
|
+
else
|
383
|
+
enum_for(:#{w})
|
384
|
+
end
|
385
|
+
end
|
386
|
+
}
|
387
|
+
end
|
388
|
+
|
389
|
+
%w( slice [] slice! ).each do |w|
|
390
|
+
module_eval %Q{
|
391
|
+
def #{w}(*arg)
|
392
|
+
r = super
|
393
|
+
r ? self.class.new('').replace(r) : r
|
394
|
+
end
|
395
|
+
}
|
396
|
+
end
|
397
|
+
|
398
|
+
%w( partition rpartition ).each do |w|
|
399
|
+
module_eval %Q{
|
400
|
+
def #{w}(sep)
|
401
|
+
r = super
|
402
|
+
if r.kind_of?(Array)
|
403
|
+
r[1] == sep ?
|
404
|
+
[ self.class.new('').replace(r[0]),
|
405
|
+
r[1],
|
406
|
+
self.class.new('').replace(r[2]) ] :
|
407
|
+
r.collect { |x| self.class.new('').replace(x) }
|
408
|
+
else
|
409
|
+
r
|
410
|
+
end
|
411
|
+
end
|
412
|
+
}
|
413
|
+
end
|
414
|
+
#++
|
415
|
+
|
416
|
+
end # if ::RUBY_VERSION > "3"
|
417
|
+
|
306
418
|
end # Common
|
307
419
|
|
308
420
|
end # Sequence
|
data/lib/bio/sequence/format.rb
CHANGED
data/lib/bio/tree.rb
CHANGED
@@ -605,7 +605,7 @@ module Bio
|
|
605
605
|
end
|
606
606
|
|
607
607
|
# Gets path from node1 to node2.
|
608
|
-
#
|
608
|
+
# Returns an array of nodes, including node1 and node2.
|
609
609
|
# If node1 and/or node2 do not exist, IndexError is raised.
|
610
610
|
# If node1 and node2 are not connected, NoPathError is raised.
|
611
611
|
# The result is unspecified for cyclic trees.
|
data/lib/bio/version.rb
CHANGED
@@ -10,7 +10,7 @@
|
|
10
10
|
module Bio
|
11
11
|
|
12
12
|
# BioRuby version (Array containing Integer)
|
13
|
-
BIORUBY_VERSION = [
|
13
|
+
BIORUBY_VERSION = [2, 0, 3].extend(Comparable).freeze
|
14
14
|
|
15
15
|
# Extra version specifier (String or nil).
|
16
16
|
# Existance of the value indicates development version.
|
@@ -19,8 +19,8 @@ module Bio
|
|
19
19
|
# ".pre :: Pre-release version.
|
20
20
|
#
|
21
21
|
# References: https://guides.rubygems.org/patterns/#prerelease-gems
|
22
|
-
BIORUBY_EXTRA_VERSION =
|
23
|
-
".pre"
|
22
|
+
BIORUBY_EXTRA_VERSION = nil
|
23
|
+
#".pre"
|
24
24
|
|
25
25
|
# Version identifier, including extra version string (String)
|
26
26
|
# Unlike BIORUBY_VERSION, it is not comparable.
|
@@ -0,0 +1,82 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# color_scheme_aa.rb - A Bio::ColorScheme demo script for Amino Acid sequences.
|
4
|
+
#
|
5
|
+
# Usage:
|
6
|
+
#
|
7
|
+
# % ruby color_scheme_aa.rb > cs-seq-faa.html
|
8
|
+
#
|
9
|
+
# % cat seq.faa
|
10
|
+
# >AA_sequence
|
11
|
+
# MKRISTTITTTITITTGNGAG
|
12
|
+
# % ruby color_scheme_aa.rb seq.faa > colored-seq-faa.html
|
13
|
+
#
|
14
|
+
#
|
15
|
+
# Copyright:: Copyright (C) 2005
|
16
|
+
# Mitsuteru C. Nakao <n@bioruby.org>
|
17
|
+
# License:: The Ruby License
|
18
|
+
#
|
19
|
+
|
20
|
+
require 'bio'
|
21
|
+
|
22
|
+
|
23
|
+
# returns folded sequence with <br>.
|
24
|
+
def br(i, width = 80)
|
25
|
+
return "<br\n>" if i % width == 0
|
26
|
+
""
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
# returns sequence html doc
|
31
|
+
def display(seq, cs)
|
32
|
+
html = '<p style="font-family: monospace">'
|
33
|
+
postfix = '</span>'
|
34
|
+
i = 0
|
35
|
+
seq.each_char do |c|
|
36
|
+
color = cs[c]
|
37
|
+
prefix = %Q(<span style="background:\##{color};">)
|
38
|
+
html += prefix + c + postfix
|
39
|
+
html += br(i += 1)
|
40
|
+
end
|
41
|
+
html + '</p>'
|
42
|
+
end
|
43
|
+
|
44
|
+
|
45
|
+
# returns scheme wise html doc
|
46
|
+
def display_scheme(scheme, aaseq)
|
47
|
+
html = ''
|
48
|
+
cs = Bio::ColorScheme.const_get(scheme.intern)
|
49
|
+
[aaseq].each do |seq|
|
50
|
+
html += display(seq, cs)
|
51
|
+
end
|
52
|
+
return ['<div>', "<h3>#{cs}</h3>", html, '</div>']
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
|
57
|
+
if fna = ARGV.shift
|
58
|
+
aaseq = Bio::FlatFile.open(fna) { |ff| ff.next_entry.aaseq }
|
59
|
+
else
|
60
|
+
aaseq = Bio::Sequence::AA.new('ARNDCQEGHILKMFPSTWYV' * 20).randomize
|
61
|
+
end
|
62
|
+
|
63
|
+
title = 'Bio::ColorScheme for amino acid sequences'
|
64
|
+
doc = ['<html>',
|
65
|
+
'<header>', '<title>', title, '</title>', '</header>',
|
66
|
+
'<body>', '<h1>', title, '</h1>']
|
67
|
+
|
68
|
+
doc << ['<div>', '<h2>', 'Simple colors', '</h2>']
|
69
|
+
|
70
|
+
['Zappo', 'Taylor' ].each do |scheme|
|
71
|
+
doc << display_scheme(scheme, aaseq)
|
72
|
+
end
|
73
|
+
doc << ['</div>']
|
74
|
+
|
75
|
+
|
76
|
+
doc << ['<div>', '<h2>', 'Score colors', '</h2>']
|
77
|
+
['Buried', 'Helix', 'Hydropathy', 'Strand', 'Turn'].each do |score|
|
78
|
+
doc << display_scheme(score, aaseq)
|
79
|
+
end
|
80
|
+
doc << ['</div>']
|
81
|
+
|
82
|
+
puts doc + ['</body>','</html>']
|
data/sample/color_scheme_na.rb
CHANGED
@@ -17,7 +17,6 @@
|
|
17
17
|
# Mitsuteru C. Nakao <n@bioruby.org>
|
18
18
|
# License:: The Ruby License
|
19
19
|
#
|
20
|
-
# $Id: color_scheme_na.rb,v 1.3 2007/04/05 23:35:42 trevor Exp $
|
21
20
|
#
|
22
21
|
|
23
22
|
require 'bio'
|
@@ -35,10 +34,10 @@ def display(seq, cs)
|
|
35
34
|
html = '<p style="font-family: monospace">'
|
36
35
|
postfix = '</span>'
|
37
36
|
i = 0
|
38
|
-
seq.
|
39
|
-
color = cs[c
|
37
|
+
seq.each_char do |c|
|
38
|
+
color = cs[c]
|
40
39
|
prefix = %Q(<span style="background:\##{color};">)
|
41
|
-
html += prefix + c
|
40
|
+
html += prefix + c + postfix
|
42
41
|
html += br(i += 1)
|
43
42
|
end
|
44
43
|
html + '</p>'
|
@@ -48,7 +47,7 @@ end
|
|
48
47
|
# returns scheme wise html doc
|
49
48
|
def display_scheme(scheme, naseq, aaseq)
|
50
49
|
html = ''
|
51
|
-
cs =
|
50
|
+
cs = Bio::ColorScheme.const_get(scheme.intern)
|
52
51
|
[naseq, aaseq].each do |seq|
|
53
52
|
html += display(seq, cs)
|
54
53
|
end
|
@@ -58,7 +57,7 @@ end
|
|
58
57
|
|
59
58
|
|
60
59
|
if fna = ARGV.shift
|
61
|
-
naseq = Bio::
|
60
|
+
naseq = Bio::FlatFile.open(fna) { |ff| ff.next_entry.naseq }
|
62
61
|
aaseq = naseq.translate
|
63
62
|
else
|
64
63
|
naseq = Bio::Sequence::NA.new('acgtu' * 20).randomize
|
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/usr/bin/env cwl-runner
|
2
|
+
|
3
|
+
cwlVersion: v1.0
|
4
|
+
class: CommandLineTool
|
5
|
+
baseCommand: [ruby]
|
6
|
+
|
7
|
+
inputs:
|
8
|
+
- id: script
|
9
|
+
type: File
|
10
|
+
default:
|
11
|
+
class: File
|
12
|
+
location: fastq2html.rb
|
13
|
+
inputBinding:
|
14
|
+
position: -1
|
15
|
+
- id: fastq
|
16
|
+
type: File[]
|
17
|
+
inputBinding:
|
18
|
+
position: 1
|
19
|
+
|
20
|
+
outputs:
|
21
|
+
- id: out
|
22
|
+
type: stdout
|
23
|
+
stdout: $(inputs.script.nameroot)-$(inputs.fastq[0].nameroot).html
|
@@ -0,0 +1,94 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# fastq2html.rb - HTML visualization of FASTQ sequences
|
4
|
+
#
|
5
|
+
# Usage:
|
6
|
+
#
|
7
|
+
# % ruby fastq2html.rb seq00.fastq > seq00.html
|
8
|
+
#
|
9
|
+
#
|
10
|
+
# Copyright:: Copyright (C) 2019 BioRuby Project
|
11
|
+
# Copyright (C) 2005 Mitsuteru C. Nakao <n@bioruby.org>
|
12
|
+
# License:: The Ruby License
|
13
|
+
#
|
14
|
+
#
|
15
|
+
|
16
|
+
require 'bio'
|
17
|
+
|
18
|
+
# thickness to color
|
19
|
+
def thickness2color(t)
|
20
|
+
c = "%02X" % ((t * 255.0).to_i)
|
21
|
+
c * 3
|
22
|
+
end
|
23
|
+
|
24
|
+
# Creates
|
25
|
+
def create_score2color_hashes
|
26
|
+
h_bg = {}
|
27
|
+
h_char = {}
|
28
|
+
cutoff_low = 0
|
29
|
+
cutoff_high = 50
|
30
|
+
range = cutoff_high - cutoff_low
|
31
|
+
sc_min = -5
|
32
|
+
sc_max = 100
|
33
|
+
(sc_min..sc_max).each do |i|
|
34
|
+
t = if i <= cutoff_low then
|
35
|
+
0.0
|
36
|
+
elsif i >= cutoff_high then
|
37
|
+
1.0
|
38
|
+
else
|
39
|
+
(i - cutoff_low).to_f / range
|
40
|
+
end
|
41
|
+
h_bg[i] = thickness2color(t)
|
42
|
+
h_char[i] = thickness2color((t > 0.3) ? 0.0 : 0.55)
|
43
|
+
end
|
44
|
+
h_bg.default = h_bg[cutoff_low]
|
45
|
+
h_char.default = h_char[cutoff_low]
|
46
|
+
[h_bg, h_char]
|
47
|
+
end
|
48
|
+
|
49
|
+
# Color code from quality score
|
50
|
+
SCORE2COLOR_BG, SCORE2COLOR_CHAR = create_score2color_hashes
|
51
|
+
|
52
|
+
# returns folded sequence with <br>.
|
53
|
+
def br(i, width = 80)
|
54
|
+
return "<br\n>" if i % width == 0
|
55
|
+
""
|
56
|
+
end
|
57
|
+
|
58
|
+
# returns sequence html doc
|
59
|
+
def display(naseq, scores)
|
60
|
+
html = '<p style="font-family: monospace">'
|
61
|
+
postfix = '</span>'
|
62
|
+
i = 0
|
63
|
+
naseq.each_char.with_index do |c, i|
|
64
|
+
sc = scores[i]
|
65
|
+
bgcol = SCORE2COLOR_BG[sc]
|
66
|
+
col = SCORE2COLOR_CHAR[sc]
|
67
|
+
prefix = %Q(<span style="color:\##{col}; background:\##{bgcol};">)
|
68
|
+
html += prefix + c + postfix
|
69
|
+
html += br(i += 1)
|
70
|
+
end
|
71
|
+
html + '</p>'
|
72
|
+
end
|
73
|
+
|
74
|
+
# returns colorized html doc
|
75
|
+
def fastq2html(definition, naseq, scores)
|
76
|
+
html = display(naseq, scores)
|
77
|
+
return ['<div>', "<div>>#{CGI.escapeHTML(definition)}</div>", html, '</div>']
|
78
|
+
end
|
79
|
+
|
80
|
+
title = 'Sequences with quality scores'
|
81
|
+
puts ['<html>',
|
82
|
+
'<header>', '<title>', title, '</title>', '</header>',
|
83
|
+
'<body>', '<h1>', title, '</h1>']
|
84
|
+
|
85
|
+
#main loop
|
86
|
+
ARGV.each do |filename|
|
87
|
+
Bio::FlatFile.open(filename) do |ff|
|
88
|
+
ff.each do |e|
|
89
|
+
puts fastq2html(e.definition, e.naseq, e.quality_scores)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
puts ['</body>','</html>']
|