bio 1.6.0.pre.20181210 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +32 -34
  3. data/ChangeLog +1462 -8
  4. data/Gemfile +0 -2
  5. data/KNOWN_ISSUES.rdoc +4 -10
  6. data/LEGAL +0 -10
  7. data/README.rdoc +31 -80
  8. data/README_DEV.rdoc +5 -5
  9. data/RELEASE_NOTES.rdoc +171 -18
  10. data/Rakefile +0 -34
  11. data/appveyor.yml +15 -7
  12. data/bioruby.gemspec +13 -4
  13. data/bioruby.gemspec.erb +0 -1
  14. data/gemfiles/Gemfile.travis-rbx +0 -2
  15. data/gemfiles/Gemfile.travis-ruby1.8 +0 -2
  16. data/gemfiles/Gemfile.travis-ruby1.9 +0 -2
  17. data/gemfiles/Gemfile.windows +6 -0
  18. data/lib/bio/appl/blast/report.rb +40 -8
  19. data/lib/bio/appl/iprscan/report.rb +3 -3
  20. data/lib/bio/appl/sosui/report.rb +1 -1
  21. data/lib/bio/db/embl/uniprotkb.rb +1 -1
  22. data/lib/bio/db/gff.rb +3 -1
  23. data/lib/bio/db/go.rb +2 -2
  24. data/lib/bio/db/kegg/common.rb +14 -0
  25. data/lib/bio/db/kegg/genes.rb +26 -0
  26. data/lib/bio/db/kegg/pathway.rb +5 -11
  27. data/lib/bio/sequence/common.rb +112 -0
  28. data/lib/bio/sequence/format.rb +1 -0
  29. data/lib/bio/tree.rb +1 -1
  30. data/lib/bio/version.rb +3 -3
  31. data/sample/color_scheme_aa.rb +82 -0
  32. data/sample/color_scheme_na.rb +5 -6
  33. data/sample/fastq2html.cwl +23 -0
  34. data/sample/fastq2html.rb +94 -0
  35. data/sample/fastq2html.testdata.yaml +5 -0
  36. data/sample/na2aa.cwl +23 -0
  37. data/sample/na2aa.rb +11 -25
  38. data/sample/na2aa.testdata.yaml +7 -0
  39. data/sample/rev_comp.cwl +23 -0
  40. data/sample/rev_comp.rb +20 -0
  41. data/sample/rev_comp.testdata.yaml +7 -0
  42. data/test/network/bio/db/kegg/test_genes_hsa7422.rb +91 -0
  43. data/test/unit/bio/appl/blast/test_report.rb +4 -4
  44. data/test/unit/bio/db/test_gff.rb +5 -0
  45. data/test/unit/bio/sequence/test_ruby3.rb +462 -0
  46. metadata +17 -8
  47. data/lib/bio/appl/blast/xmlparser.rb +0 -236
  48. data/setup.rb +0 -1600
@@ -3,8 +3,6 @@ source "https://rubygems.org"
3
3
  gem "rake"
4
4
  gem "rdoc"
5
5
 
6
- gem "xmlparser"
7
-
8
6
  platforms :rbx do
9
7
  gem 'racc'
10
8
  gem 'rubysl', '~> 2.0'
@@ -4,5 +4,3 @@ gem "rake", "~>10.4"
4
4
  # rdoc 4.3.0 requires Ruby >= 1.9.3
5
5
  gem "rdoc", "~>4.2.0"
6
6
 
7
- gem "xmlparser", "~>0.7.2"
8
-
@@ -3,5 +3,3 @@ source "https://rubygems.org"
3
3
  gem "rake"
4
4
  gem "rdoc"
5
5
 
6
- gem "xmlparser"
7
-
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ gem "rake"
4
+ gem "rdoc"
5
+ gem "test-unit"
6
+
@@ -43,18 +43,20 @@ class Report
43
43
  #--
44
44
  # require lines moved here to avoid circular require
45
45
  #++
46
- require 'bio/appl/blast/xmlparser'
47
46
  require 'bio/appl/blast/rexml'
48
47
  require 'bio/appl/blast/format8'
49
48
 
49
+ #--
50
+ # loading bio-blast-xmlparser plugin if available
51
+ #++
52
+ begin
53
+ require 'bio-blast-xmlparser'
54
+ rescue LoadError
55
+ end
56
+
50
57
  # for Bio::FlatFile support (only for XML data)
51
58
  DELIMITER = RS = "</BlastOutput>\n"
52
59
 
53
- # Specify to use XMLParser to parse XML (-m 7) output.
54
- def self.xmlparser(data)
55
- self.new(data, :xmlparser)
56
- end
57
-
58
60
  # Specify to use REXML to parse XML (-m 7) output.
59
61
  def self.rexml(data)
60
62
  self.new(data, :rexml)
@@ -67,7 +69,7 @@ class Report
67
69
 
68
70
  def auto_parse(data)
69
71
  if /<?xml/.match(data[/.*/])
70
- if defined?(XMLParser)
72
+ if defined? xmlparser_parse
71
73
  xmlparser_parse(data)
72
74
  @reports = blastxml_split_reports
73
75
  else
@@ -87,7 +89,11 @@ class Report
87
89
  @parameters = {}
88
90
  case parser
89
91
  when :xmlparser # format 7
90
- xmlparser_parse(data)
92
+ if defined? xmlparser_parse
93
+ xmlparser_parse(data)
94
+ else
95
+ raise NameError, "xmlparser_parse does not defined"
96
+ end
91
97
  @reports = blastxml_split_reports
92
98
  when :rexml # format 7
93
99
  rexml_parse(data)
@@ -383,6 +389,32 @@ class Report
383
389
  attr_reader :reports
384
390
 
385
391
  private
392
+ # set parameter of the key as val
393
+ def xml_set_parameter(key, val)
394
+ #labels = {
395
+ # 'matrix' => 'Parameters_matrix',
396
+ # 'expect' => 'Parameters_expect',
397
+ # 'include' => 'Parameters_include',
398
+ # 'sc-match' => 'Parameters_sc-match',
399
+ # 'sc-mismatch' => 'Parameters_sc-mismatch',
400
+ # 'gap-open' => 'Parameters_gap-open',
401
+ # 'gap-extend' => 'Parameters_gap-extend',
402
+ # 'filter' => 'Parameters_filter',
403
+ # 'pattern' => 'Parameters_pattern',
404
+ # 'entrez-query' => 'Parameters_entrez-query',
405
+ #}
406
+ k = key.sub(/\AParameters\_/, '')
407
+ @parameters[k] =
408
+ case k
409
+ when 'expect', 'include'
410
+ val.to_f
411
+ when /\Agap\-/, /\Asc\-/
412
+ val.to_i
413
+ else
414
+ val
415
+ end
416
+ end
417
+
386
418
  # (private method)
387
419
  # In new BLAST XML (blastall >= 2.2.14), results of multiple queries
388
420
  # are stored in <Iteration>. This method splits iterations into
@@ -83,7 +83,7 @@ module Bio
83
83
  yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != ''
84
84
  end
85
85
 
86
- # Parser method for a raw formated entry. Retruns a Bio::Iprscan::Report
86
+ # Parser method for a raw formated entry. Returns a Bio::Iprscan::Report
87
87
  # object.
88
88
  def self.parse_raw_entry(str)
89
89
  report = self.new
@@ -113,7 +113,7 @@ module Bio
113
113
 
114
114
 
115
115
 
116
- # Parser method for a xml formated entry. Retruns a Bio::Iprscan::Report
116
+ # Parser method for a xml formated entry. Returns a Bio::Iprscan::Report
117
117
  # object.
118
118
  # def self.parse_xml(str)
119
119
  # end
@@ -196,7 +196,7 @@ module Bio
196
196
  end
197
197
  end
198
198
 
199
- # Parser method for a pseudo-txt formated entry. Retruns a Bio::Iprscan::Report
199
+ # Parser method for a pseudo-txt formated entry. Returns a Bio::Iprscan::Report
200
200
  # object.
201
201
  #
202
202
  # == Usage
@@ -78,7 +78,7 @@ module Bio
78
78
  # Returns aRng of transmembrane helix
79
79
  attr_reader :range
80
80
 
81
- # Retruns ``PRIMARY'' or ``SECONDARY'' of helix.
81
+ # Returns ``PRIMARY'' or ``SECONDARY'' of helix.
82
82
  attr_reader :grade
83
83
 
84
84
  # Returns the sequence. of transmembrane helix.
@@ -174,7 +174,7 @@ class UniProtKB < EMBLDB
174
174
  #
175
175
  # http://www.uniprot.org/docs/sp_news.htm
176
176
  def parse_DE_line_rel14(str)
177
- # Retruns if it is not the new format since Rel.14
177
+ # Returns if it is not the new format since Rel.14
178
178
  return nil unless /^DE (RecName|AltName|SubName)\: / =~ str
179
179
  ret = []
180
180
  cur = nil
data/lib/bio/db/gff.rb CHANGED
@@ -359,7 +359,9 @@ module Bio
359
359
  # Parses a GFF2-formatted line and returns a new
360
360
  # Bio::GFF::GFF2::Record object.
361
361
  def self.parse(str)
362
- self.new.parse(str)
362
+ ret = self.new
363
+ ret.parse(str)
364
+ ret
363
365
  end
364
366
 
365
367
  # Creates a Bio::GFF::GFF2::Record object.
data/lib/bio/db/go.rb CHANGED
@@ -193,7 +193,7 @@ class GO
193
193
  # Delimiter
194
194
  RS = DELIMITER
195
195
 
196
- # Retruns an Array of parsed gene_association flatfile.
196
+ # Returns an Array of parsed gene_association flatfile.
197
197
  # Block is acceptable.
198
198
  def self.parser(str)
199
199
  if block_given?
@@ -226,7 +226,7 @@ class GO
226
226
  # Returns Db_Reference variable.
227
227
  attr_reader :db_reference # -> []
228
228
 
229
- # Retruns Evidence code variable.
229
+ # Returns Evidence code variable.
230
230
  attr_reader :evidence
231
231
 
232
232
  # Returns the entry is associated with this value.
@@ -210,6 +210,20 @@ class KEGG
210
210
  private :strings_as_hash
211
211
  end #module StringsAsHash
212
212
 
213
+ # This module provides diseases_as_hash method.
214
+ #
215
+ # Bio::KEGG::* internal use only.
216
+ module DiseasesAsHash
217
+ include StringsAsHash
218
+ # Returns a Hash of the disease ID and its definition
219
+ def diseases_as_hash
220
+ unless (defined? @diseases_as_hash) && @diseases_as_hash
221
+ @diseases_as_hash = strings_as_hash(diseases_as_strings)
222
+ end
223
+ @diseases_as_hash
224
+ end
225
+ end #module DiseasesAsHash
226
+
213
227
  end #module Common
214
228
  end #class KEGG
215
229
  end #module Bio
@@ -107,6 +107,11 @@ class GENES < KEGGDB
107
107
  def orthologs_as_hash; super; end if false #dummy for RDoc
108
108
  alias orthologs orthologs_as_hash
109
109
 
110
+ include Common::DiseasesAsHash
111
+ # Returns a Hash of the disease ID and its definition
112
+ def diseases_as_hash; super; end if false #dummy for RDoc
113
+ alias diseases diseases_as_hash
114
+
110
115
  # Creates a new Bio::KEGG::GENES object.
111
116
  # ---
112
117
  # *Arguments*:
@@ -238,6 +243,27 @@ class GENES < KEGGDB
238
243
  lines_fetch('PATHWAY')
239
244
  end
240
245
 
246
+ # Networks described in the NETWORK lines.
247
+ # ---
248
+ # *Returns*:: Array containing String
249
+ def networks_as_strings
250
+ lines_fetch('NETWORK')
251
+ end
252
+
253
+ # Diseases described in the DISEASE lines.
254
+ # ---
255
+ # *Returns*:: Array containing String
256
+ def diseases_as_strings
257
+ lines_fetch('DISEASE')
258
+ end
259
+
260
+ # Drug targets described in the DRUG_TARGET lines.
261
+ # ---
262
+ # *Returns*:: Array containing String
263
+ def drug_targets_as_strings
264
+ lines_fetch('DRUG_TARGET')
265
+ end
266
+
241
267
  # Returns CLASS field of the entry.
242
268
  def keggclass
243
269
  field_fetch('CLASS')
@@ -42,6 +42,11 @@ class PATHWAY < KEGGDB
42
42
  def orthologs_as_hash; super; end if false #dummy for RDoc
43
43
  alias orthologs orthologs_as_hash
44
44
 
45
+ include Common::DiseasesAsHash
46
+ # Returns a Hash of the disease ID and its definition
47
+ def diseases_as_hash; super; end if false #dummy for RDoc
48
+ alias diseases diseases_as_hash
49
+
45
50
  include Common::References
46
51
  # REFERENCE -- Returns contents of the REFERENCE records as an Array of
47
52
  # Bio::Reference objects.
@@ -122,17 +127,6 @@ class PATHWAY < KEGGDB
122
127
  lines_fetch('DISEASE')
123
128
  end
124
129
 
125
- # Diseases described in the DISEASE lines.
126
- # ---
127
- # *Returns*:: Hash of disease ID and its definition
128
- def diseases_as_hash
129
- unless (defined? @diseases_as_hash) && @diseases_as_hash
130
- @diseases_as_hash = strings_as_hash(diseases_as_strings)
131
- end
132
- @diseases_as_hash
133
- end
134
- alias diseases diseases_as_hash
135
-
136
130
  # Returns an Array of a database name and entry IDs in DBLINKS field.
137
131
  # ---
138
132
  # *Returns*:: Array containing String
@@ -303,6 +303,118 @@ module Common
303
303
  end
304
304
  alias splicing splice
305
305
 
306
+ #--
307
+ # Workaround for Ruby 3.0.0 incompatible changes
308
+ if ::RUBY_VERSION > "3"
309
+
310
+ # Acts almost the same as String#split.
311
+ def split(*arg)
312
+ if block_given?
313
+ super
314
+ else
315
+ ret = super(*arg)
316
+ ret.collect! { |x| self.class.new('').replace(x) }
317
+ ret
318
+ end
319
+ end
320
+
321
+ %w( * ljust rjust center ).each do |w|
322
+ module_eval %Q{
323
+ def #{w}(*arg)
324
+ self.class.new('').replace(super)
325
+ end
326
+ }
327
+ end
328
+
329
+ %w( chomp chop
330
+ delete delete_prefix delete_suffix
331
+ lstrip rstrip strip
332
+ reverse
333
+ squeeze
334
+ succ next
335
+ tr tr_s
336
+ capitalize upcase downcase swapcase
337
+ ).each do |w|
338
+ module_eval %Q{
339
+ def #{w}(*arg)
340
+ s = self.dup
341
+ s.#{w}!(*arg)
342
+ s
343
+ end
344
+ }
345
+ end
346
+
347
+ %w( sub gsub ).each do |w|
348
+ module_eval %Q{
349
+ def #{w}(*arg, &block)
350
+ s = self.dup
351
+ s.#{w}!(*arg, &block)
352
+ s
353
+ end
354
+ }
355
+ end
356
+
357
+ #Reference: https://nacl-ltd.github.io/2018/11/08/gsub-wrapper.html
358
+ #(Title: Is it possible to implement gsub wrapper?)
359
+ %w( sub! gsub! ).each do |w|
360
+ module_eval %Q{
361
+ def #{w}(*arg, &block)
362
+ if block_given? then
363
+ super(*arg) do |m|
364
+ b = Thread.current[:_backref]
365
+ Thread.current[:_backref] = ::Regexp.last_match
366
+ block.binding.eval("$~ = Thread.current[:_backref]")
367
+ Thread.current[:_backref] = b
368
+ block.call(self.class.new('').replace(m))
369
+ end
370
+ else
371
+ super
372
+ end
373
+ end
374
+ }
375
+ end
376
+
377
+ %w( each_char each_grapheme_cluster each_line ).each do |w|
378
+ module_eval %Q{
379
+ def #{w}
380
+ if block_given?
381
+ super { |c| yield(self.class.new('').replace(c)) }
382
+ else
383
+ enum_for(:#{w})
384
+ end
385
+ end
386
+ }
387
+ end
388
+
389
+ %w( slice [] slice! ).each do |w|
390
+ module_eval %Q{
391
+ def #{w}(*arg)
392
+ r = super
393
+ r ? self.class.new('').replace(r) : r
394
+ end
395
+ }
396
+ end
397
+
398
+ %w( partition rpartition ).each do |w|
399
+ module_eval %Q{
400
+ def #{w}(sep)
401
+ r = super
402
+ if r.kind_of?(Array)
403
+ r[1] == sep ?
404
+ [ self.class.new('').replace(r[0]),
405
+ r[1],
406
+ self.class.new('').replace(r[2]) ] :
407
+ r.collect { |x| self.class.new('').replace(x) }
408
+ else
409
+ r
410
+ end
411
+ end
412
+ }
413
+ end
414
+ #++
415
+
416
+ end # if ::RUBY_VERSION > "3"
417
+
306
418
  end # Common
307
419
 
308
420
  end # Sequence
@@ -10,6 +10,7 @@
10
10
  #
11
11
 
12
12
  require 'erb'
13
+ require 'date'
13
14
 
14
15
  module Bio
15
16
 
data/lib/bio/tree.rb CHANGED
@@ -605,7 +605,7 @@ module Bio
605
605
  end
606
606
 
607
607
  # Gets path from node1 to node2.
608
- # Retruns an array of nodes, including node1 and node2.
608
+ # Returns an array of nodes, including node1 and node2.
609
609
  # If node1 and/or node2 do not exist, IndexError is raised.
610
610
  # If node1 and node2 are not connected, NoPathError is raised.
611
611
  # The result is unspecified for cyclic trees.
data/lib/bio/version.rb CHANGED
@@ -10,7 +10,7 @@
10
10
  module Bio
11
11
 
12
12
  # BioRuby version (Array containing Integer)
13
- BIORUBY_VERSION = [1, 6, 0].extend(Comparable).freeze
13
+ BIORUBY_VERSION = [2, 0, 3].extend(Comparable).freeze
14
14
 
15
15
  # Extra version specifier (String or nil).
16
16
  # Existance of the value indicates development version.
@@ -19,8 +19,8 @@ module Bio
19
19
  # ".pre :: Pre-release version.
20
20
  #
21
21
  # References: https://guides.rubygems.org/patterns/#prerelease-gems
22
- BIORUBY_EXTRA_VERSION = #nil
23
- ".pre"
22
+ BIORUBY_EXTRA_VERSION = nil
23
+ #".pre"
24
24
 
25
25
  # Version identifier, including extra version string (String)
26
26
  # Unlike BIORUBY_VERSION, it is not comparable.
@@ -0,0 +1,82 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # color_scheme_aa.rb - A Bio::ColorScheme demo script for Amino Acid sequences.
4
+ #
5
+ # Usage:
6
+ #
7
+ # % ruby color_scheme_aa.rb > cs-seq-faa.html
8
+ #
9
+ # % cat seq.faa
10
+ # >AA_sequence
11
+ # MKRISTTITTTITITTGNGAG
12
+ # % ruby color_scheme_aa.rb seq.faa > colored-seq-faa.html
13
+ #
14
+ #
15
+ # Copyright:: Copyright (C) 2005
16
+ # Mitsuteru C. Nakao <n@bioruby.org>
17
+ # License:: The Ruby License
18
+ #
19
+
20
+ require 'bio'
21
+
22
+
23
+ # returns folded sequence with <br>.
24
+ def br(i, width = 80)
25
+ return "<br\n>" if i % width == 0
26
+ ""
27
+ end
28
+
29
+
30
+ # returns sequence html doc
31
+ def display(seq, cs)
32
+ html = '<p style="font-family: monospace">'
33
+ postfix = '</span>'
34
+ i = 0
35
+ seq.each_char do |c|
36
+ color = cs[c]
37
+ prefix = %Q(<span style="background:\##{color};">)
38
+ html += prefix + c + postfix
39
+ html += br(i += 1)
40
+ end
41
+ html + '</p>'
42
+ end
43
+
44
+
45
+ # returns scheme wise html doc
46
+ def display_scheme(scheme, aaseq)
47
+ html = ''
48
+ cs = Bio::ColorScheme.const_get(scheme.intern)
49
+ [aaseq].each do |seq|
50
+ html += display(seq, cs)
51
+ end
52
+ return ['<div>', "<h3>#{cs}</h3>", html, '</div>']
53
+ end
54
+
55
+
56
+
57
+ if fna = ARGV.shift
58
+ aaseq = Bio::FlatFile.open(fna) { |ff| ff.next_entry.aaseq }
59
+ else
60
+ aaseq = Bio::Sequence::AA.new('ARNDCQEGHILKMFPSTWYV' * 20).randomize
61
+ end
62
+
63
+ title = 'Bio::ColorScheme for amino acid sequences'
64
+ doc = ['<html>',
65
+ '<header>', '<title>', title, '</title>', '</header>',
66
+ '<body>', '<h1>', title, '</h1>']
67
+
68
+ doc << ['<div>', '<h2>', 'Simple colors', '</h2>']
69
+
70
+ ['Zappo', 'Taylor' ].each do |scheme|
71
+ doc << display_scheme(scheme, aaseq)
72
+ end
73
+ doc << ['</div>']
74
+
75
+
76
+ doc << ['<div>', '<h2>', 'Score colors', '</h2>']
77
+ ['Buried', 'Helix', 'Hydropathy', 'Strand', 'Turn'].each do |score|
78
+ doc << display_scheme(score, aaseq)
79
+ end
80
+ doc << ['</div>']
81
+
82
+ puts doc + ['</body>','</html>']
@@ -17,7 +17,6 @@
17
17
  # Mitsuteru C. Nakao <n@bioruby.org>
18
18
  # License:: The Ruby License
19
19
  #
20
- # $Id: color_scheme_na.rb,v 1.3 2007/04/05 23:35:42 trevor Exp $
21
20
  #
22
21
 
23
22
  require 'bio'
@@ -35,10 +34,10 @@ def display(seq, cs)
35
34
  html = '<p style="font-family: monospace">'
36
35
  postfix = '</span>'
37
36
  i = 0
38
- seq.each_byte do |c|
39
- color = cs[c.chr]
37
+ seq.each_char do |c|
38
+ color = cs[c]
40
39
  prefix = %Q(<span style="background:\##{color};">)
41
- html += prefix + c.chr + postfix
40
+ html += prefix + c + postfix
42
41
  html += br(i += 1)
43
42
  end
44
43
  html + '</p>'
@@ -48,7 +47,7 @@ end
48
47
  # returns scheme wise html doc
49
48
  def display_scheme(scheme, naseq, aaseq)
50
49
  html = ''
51
- cs = eval("Bio::ColorScheme::#{scheme}")
50
+ cs = Bio::ColorScheme.const_get(scheme.intern)
52
51
  [naseq, aaseq].each do |seq|
53
52
  html += display(seq, cs)
54
53
  end
@@ -58,7 +57,7 @@ end
58
57
 
59
58
 
60
59
  if fna = ARGV.shift
61
- naseq = Bio::FastaFormat.new(File.open(fna, 'r').read).naseq
60
+ naseq = Bio::FlatFile.open(fna) { |ff| ff.next_entry.naseq }
62
61
  aaseq = naseq.translate
63
62
  else
64
63
  naseq = Bio::Sequence::NA.new('acgtu' * 20).randomize
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env cwl-runner
2
+
3
+ cwlVersion: v1.0
4
+ class: CommandLineTool
5
+ baseCommand: [ruby]
6
+
7
+ inputs:
8
+ - id: script
9
+ type: File
10
+ default:
11
+ class: File
12
+ location: fastq2html.rb
13
+ inputBinding:
14
+ position: -1
15
+ - id: fastq
16
+ type: File[]
17
+ inputBinding:
18
+ position: 1
19
+
20
+ outputs:
21
+ - id: out
22
+ type: stdout
23
+ stdout: $(inputs.script.nameroot)-$(inputs.fastq[0].nameroot).html
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # fastq2html.rb - HTML visualization of FASTQ sequences
4
+ #
5
+ # Usage:
6
+ #
7
+ # % ruby fastq2html.rb seq00.fastq > seq00.html
8
+ #
9
+ #
10
+ # Copyright:: Copyright (C) 2019 BioRuby Project
11
+ # Copyright (C) 2005 Mitsuteru C. Nakao <n@bioruby.org>
12
+ # License:: The Ruby License
13
+ #
14
+ #
15
+
16
+ require 'bio'
17
+
18
+ # thickness to color
19
+ def thickness2color(t)
20
+ c = "%02X" % ((t * 255.0).to_i)
21
+ c * 3
22
+ end
23
+
24
+ # Creates
25
+ def create_score2color_hashes
26
+ h_bg = {}
27
+ h_char = {}
28
+ cutoff_low = 0
29
+ cutoff_high = 50
30
+ range = cutoff_high - cutoff_low
31
+ sc_min = -5
32
+ sc_max = 100
33
+ (sc_min..sc_max).each do |i|
34
+ t = if i <= cutoff_low then
35
+ 0.0
36
+ elsif i >= cutoff_high then
37
+ 1.0
38
+ else
39
+ (i - cutoff_low).to_f / range
40
+ end
41
+ h_bg[i] = thickness2color(t)
42
+ h_char[i] = thickness2color((t > 0.3) ? 0.0 : 0.55)
43
+ end
44
+ h_bg.default = h_bg[cutoff_low]
45
+ h_char.default = h_char[cutoff_low]
46
+ [h_bg, h_char]
47
+ end
48
+
49
+ # Color code from quality score
50
+ SCORE2COLOR_BG, SCORE2COLOR_CHAR = create_score2color_hashes
51
+
52
+ # returns folded sequence with <br>.
53
+ def br(i, width = 80)
54
+ return "<br\n>" if i % width == 0
55
+ ""
56
+ end
57
+
58
+ # returns sequence html doc
59
+ def display(naseq, scores)
60
+ html = '<p style="font-family: monospace">'
61
+ postfix = '</span>'
62
+ i = 0
63
+ naseq.each_char.with_index do |c, i|
64
+ sc = scores[i]
65
+ bgcol = SCORE2COLOR_BG[sc]
66
+ col = SCORE2COLOR_CHAR[sc]
67
+ prefix = %Q(<span style="color:\##{col}; background:\##{bgcol};">)
68
+ html += prefix + c + postfix
69
+ html += br(i += 1)
70
+ end
71
+ html + '</p>'
72
+ end
73
+
74
+ # returns colorized html doc
75
+ def fastq2html(definition, naseq, scores)
76
+ html = display(naseq, scores)
77
+ return ['<div>', "<div>&gt;#{CGI.escapeHTML(definition)}</div>", html, '</div>']
78
+ end
79
+
80
+ title = 'Sequences with quality scores'
81
+ puts ['<html>',
82
+ '<header>', '<title>', title, '</title>', '</header>',
83
+ '<body>', '<h1>', title, '</h1>']
84
+
85
+ #main loop
86
+ ARGV.each do |filename|
87
+ Bio::FlatFile.open(filename) do |ff|
88
+ ff.each do |e|
89
+ puts fastq2html(e.definition, e.naseq, e.quality_scores)
90
+ end
91
+ end
92
+ end
93
+
94
+ puts ['</body>','</html>']
@@ -0,0 +1,5 @@
1
+ fastq:
2
+ - class: File
3
+ location: ../test/data/fastq/longreads_as_sanger.fastq
4
+ - class: File
5
+ location: ../test/data/fastq/sanger_full_range_original_sanger.fastq