bio 1.6.0.pre.20181210 → 2.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +32 -34
  3. data/ChangeLog +1462 -8
  4. data/Gemfile +0 -2
  5. data/KNOWN_ISSUES.rdoc +4 -10
  6. data/LEGAL +0 -10
  7. data/README.rdoc +31 -80
  8. data/README_DEV.rdoc +5 -5
  9. data/RELEASE_NOTES.rdoc +171 -18
  10. data/Rakefile +0 -34
  11. data/appveyor.yml +15 -7
  12. data/bioruby.gemspec +13 -4
  13. data/bioruby.gemspec.erb +0 -1
  14. data/gemfiles/Gemfile.travis-rbx +0 -2
  15. data/gemfiles/Gemfile.travis-ruby1.8 +0 -2
  16. data/gemfiles/Gemfile.travis-ruby1.9 +0 -2
  17. data/gemfiles/Gemfile.windows +6 -0
  18. data/lib/bio/appl/blast/report.rb +40 -8
  19. data/lib/bio/appl/iprscan/report.rb +3 -3
  20. data/lib/bio/appl/sosui/report.rb +1 -1
  21. data/lib/bio/db/embl/uniprotkb.rb +1 -1
  22. data/lib/bio/db/gff.rb +3 -1
  23. data/lib/bio/db/go.rb +2 -2
  24. data/lib/bio/db/kegg/common.rb +14 -0
  25. data/lib/bio/db/kegg/genes.rb +26 -0
  26. data/lib/bio/db/kegg/pathway.rb +5 -11
  27. data/lib/bio/sequence/common.rb +112 -0
  28. data/lib/bio/sequence/format.rb +1 -0
  29. data/lib/bio/tree.rb +1 -1
  30. data/lib/bio/version.rb +3 -3
  31. data/sample/color_scheme_aa.rb +82 -0
  32. data/sample/color_scheme_na.rb +5 -6
  33. data/sample/fastq2html.cwl +23 -0
  34. data/sample/fastq2html.rb +94 -0
  35. data/sample/fastq2html.testdata.yaml +5 -0
  36. data/sample/na2aa.cwl +23 -0
  37. data/sample/na2aa.rb +11 -25
  38. data/sample/na2aa.testdata.yaml +7 -0
  39. data/sample/rev_comp.cwl +23 -0
  40. data/sample/rev_comp.rb +20 -0
  41. data/sample/rev_comp.testdata.yaml +7 -0
  42. data/test/network/bio/db/kegg/test_genes_hsa7422.rb +91 -0
  43. data/test/unit/bio/appl/blast/test_report.rb +4 -4
  44. data/test/unit/bio/db/test_gff.rb +5 -0
  45. data/test/unit/bio/sequence/test_ruby3.rb +462 -0
  46. metadata +17 -8
  47. data/lib/bio/appl/blast/xmlparser.rb +0 -236
  48. data/setup.rb +0 -1600
@@ -3,8 +3,6 @@ source "https://rubygems.org"
3
3
  gem "rake"
4
4
  gem "rdoc"
5
5
 
6
- gem "xmlparser"
7
-
8
6
  platforms :rbx do
9
7
  gem 'racc'
10
8
  gem 'rubysl', '~> 2.0'
@@ -4,5 +4,3 @@ gem "rake", "~>10.4"
4
4
  # rdoc 4.3.0 requires Ruby >= 1.9.3
5
5
  gem "rdoc", "~>4.2.0"
6
6
 
7
- gem "xmlparser", "~>0.7.2"
8
-
@@ -3,5 +3,3 @@ source "https://rubygems.org"
3
3
  gem "rake"
4
4
  gem "rdoc"
5
5
 
6
- gem "xmlparser"
7
-
@@ -0,0 +1,6 @@
1
+ source "https://rubygems.org"
2
+
3
+ gem "rake"
4
+ gem "rdoc"
5
+ gem "test-unit"
6
+
@@ -43,18 +43,20 @@ class Report
43
43
  #--
44
44
  # require lines moved here to avoid circular require
45
45
  #++
46
- require 'bio/appl/blast/xmlparser'
47
46
  require 'bio/appl/blast/rexml'
48
47
  require 'bio/appl/blast/format8'
49
48
 
49
+ #--
50
+ # loading bio-blast-xmlparser plugin if available
51
+ #++
52
+ begin
53
+ require 'bio-blast-xmlparser'
54
+ rescue LoadError
55
+ end
56
+
50
57
  # for Bio::FlatFile support (only for XML data)
51
58
  DELIMITER = RS = "</BlastOutput>\n"
52
59
 
53
- # Specify to use XMLParser to parse XML (-m 7) output.
54
- def self.xmlparser(data)
55
- self.new(data, :xmlparser)
56
- end
57
-
58
60
  # Specify to use REXML to parse XML (-m 7) output.
59
61
  def self.rexml(data)
60
62
  self.new(data, :rexml)
@@ -67,7 +69,7 @@ class Report
67
69
 
68
70
  def auto_parse(data)
69
71
  if /<?xml/.match(data[/.*/])
70
- if defined?(XMLParser)
72
+ if defined? xmlparser_parse
71
73
  xmlparser_parse(data)
72
74
  @reports = blastxml_split_reports
73
75
  else
@@ -87,7 +89,11 @@ class Report
87
89
  @parameters = {}
88
90
  case parser
89
91
  when :xmlparser # format 7
90
- xmlparser_parse(data)
92
+ if defined? xmlparser_parse
93
+ xmlparser_parse(data)
94
+ else
95
+ raise NameError, "xmlparser_parse does not defined"
96
+ end
91
97
  @reports = blastxml_split_reports
92
98
  when :rexml # format 7
93
99
  rexml_parse(data)
@@ -383,6 +389,32 @@ class Report
383
389
  attr_reader :reports
384
390
 
385
391
  private
392
+ # set parameter of the key as val
393
+ def xml_set_parameter(key, val)
394
+ #labels = {
395
+ # 'matrix' => 'Parameters_matrix',
396
+ # 'expect' => 'Parameters_expect',
397
+ # 'include' => 'Parameters_include',
398
+ # 'sc-match' => 'Parameters_sc-match',
399
+ # 'sc-mismatch' => 'Parameters_sc-mismatch',
400
+ # 'gap-open' => 'Parameters_gap-open',
401
+ # 'gap-extend' => 'Parameters_gap-extend',
402
+ # 'filter' => 'Parameters_filter',
403
+ # 'pattern' => 'Parameters_pattern',
404
+ # 'entrez-query' => 'Parameters_entrez-query',
405
+ #}
406
+ k = key.sub(/\AParameters\_/, '')
407
+ @parameters[k] =
408
+ case k
409
+ when 'expect', 'include'
410
+ val.to_f
411
+ when /\Agap\-/, /\Asc\-/
412
+ val.to_i
413
+ else
414
+ val
415
+ end
416
+ end
417
+
386
418
  # (private method)
387
419
  # In new BLAST XML (blastall >= 2.2.14), results of multiple queries
388
420
  # are stored in <Iteration>. This method splits iterations into
@@ -83,7 +83,7 @@ module Bio
83
83
  yield Bio::Iprscan::Report.parse_raw_entry(entry) if entry != ''
84
84
  end
85
85
 
86
- # Parser method for a raw formated entry. Retruns a Bio::Iprscan::Report
86
+ # Parser method for a raw formated entry. Returns a Bio::Iprscan::Report
87
87
  # object.
88
88
  def self.parse_raw_entry(str)
89
89
  report = self.new
@@ -113,7 +113,7 @@ module Bio
113
113
 
114
114
 
115
115
 
116
- # Parser method for a xml formated entry. Retruns a Bio::Iprscan::Report
116
+ # Parser method for a xml formated entry. Returns a Bio::Iprscan::Report
117
117
  # object.
118
118
  # def self.parse_xml(str)
119
119
  # end
@@ -196,7 +196,7 @@ module Bio
196
196
  end
197
197
  end
198
198
 
199
- # Parser method for a pseudo-txt formated entry. Retruns a Bio::Iprscan::Report
199
+ # Parser method for a pseudo-txt formated entry. Returns a Bio::Iprscan::Report
200
200
  # object.
201
201
  #
202
202
  # == Usage
@@ -78,7 +78,7 @@ module Bio
78
78
  # Returns aRng of transmembrane helix
79
79
  attr_reader :range
80
80
 
81
- # Retruns ``PRIMARY'' or ``SECONDARY'' of helix.
81
+ # Returns ``PRIMARY'' or ``SECONDARY'' of helix.
82
82
  attr_reader :grade
83
83
 
84
84
  # Returns the sequence. of transmembrane helix.
@@ -174,7 +174,7 @@ class UniProtKB < EMBLDB
174
174
  #
175
175
  # http://www.uniprot.org/docs/sp_news.htm
176
176
  def parse_DE_line_rel14(str)
177
- # Retruns if it is not the new format since Rel.14
177
+ # Returns if it is not the new format since Rel.14
178
178
  return nil unless /^DE (RecName|AltName|SubName)\: / =~ str
179
179
  ret = []
180
180
  cur = nil
data/lib/bio/db/gff.rb CHANGED
@@ -359,7 +359,9 @@ module Bio
359
359
  # Parses a GFF2-formatted line and returns a new
360
360
  # Bio::GFF::GFF2::Record object.
361
361
  def self.parse(str)
362
- self.new.parse(str)
362
+ ret = self.new
363
+ ret.parse(str)
364
+ ret
363
365
  end
364
366
 
365
367
  # Creates a Bio::GFF::GFF2::Record object.
data/lib/bio/db/go.rb CHANGED
@@ -193,7 +193,7 @@ class GO
193
193
  # Delimiter
194
194
  RS = DELIMITER
195
195
 
196
- # Retruns an Array of parsed gene_association flatfile.
196
+ # Returns an Array of parsed gene_association flatfile.
197
197
  # Block is acceptable.
198
198
  def self.parser(str)
199
199
  if block_given?
@@ -226,7 +226,7 @@ class GO
226
226
  # Returns Db_Reference variable.
227
227
  attr_reader :db_reference # -> []
228
228
 
229
- # Retruns Evidence code variable.
229
+ # Returns Evidence code variable.
230
230
  attr_reader :evidence
231
231
 
232
232
  # Returns the entry is associated with this value.
@@ -210,6 +210,20 @@ class KEGG
210
210
  private :strings_as_hash
211
211
  end #module StringsAsHash
212
212
 
213
+ # This module provides diseases_as_hash method.
214
+ #
215
+ # Bio::KEGG::* internal use only.
216
+ module DiseasesAsHash
217
+ include StringsAsHash
218
+ # Returns a Hash of the disease ID and its definition
219
+ def diseases_as_hash
220
+ unless (defined? @diseases_as_hash) && @diseases_as_hash
221
+ @diseases_as_hash = strings_as_hash(diseases_as_strings)
222
+ end
223
+ @diseases_as_hash
224
+ end
225
+ end #module DiseasesAsHash
226
+
213
227
  end #module Common
214
228
  end #class KEGG
215
229
  end #module Bio
@@ -107,6 +107,11 @@ class GENES < KEGGDB
107
107
  def orthologs_as_hash; super; end if false #dummy for RDoc
108
108
  alias orthologs orthologs_as_hash
109
109
 
110
+ include Common::DiseasesAsHash
111
+ # Returns a Hash of the disease ID and its definition
112
+ def diseases_as_hash; super; end if false #dummy for RDoc
113
+ alias diseases diseases_as_hash
114
+
110
115
  # Creates a new Bio::KEGG::GENES object.
111
116
  # ---
112
117
  # *Arguments*:
@@ -238,6 +243,27 @@ class GENES < KEGGDB
238
243
  lines_fetch('PATHWAY')
239
244
  end
240
245
 
246
+ # Networks described in the NETWORK lines.
247
+ # ---
248
+ # *Returns*:: Array containing String
249
+ def networks_as_strings
250
+ lines_fetch('NETWORK')
251
+ end
252
+
253
+ # Diseases described in the DISEASE lines.
254
+ # ---
255
+ # *Returns*:: Array containing String
256
+ def diseases_as_strings
257
+ lines_fetch('DISEASE')
258
+ end
259
+
260
+ # Drug targets described in the DRUG_TARGET lines.
261
+ # ---
262
+ # *Returns*:: Array containing String
263
+ def drug_targets_as_strings
264
+ lines_fetch('DRUG_TARGET')
265
+ end
266
+
241
267
  # Returns CLASS field of the entry.
242
268
  def keggclass
243
269
  field_fetch('CLASS')
@@ -42,6 +42,11 @@ class PATHWAY < KEGGDB
42
42
  def orthologs_as_hash; super; end if false #dummy for RDoc
43
43
  alias orthologs orthologs_as_hash
44
44
 
45
+ include Common::DiseasesAsHash
46
+ # Returns a Hash of the disease ID and its definition
47
+ def diseases_as_hash; super; end if false #dummy for RDoc
48
+ alias diseases diseases_as_hash
49
+
45
50
  include Common::References
46
51
  # REFERENCE -- Returns contents of the REFERENCE records as an Array of
47
52
  # Bio::Reference objects.
@@ -122,17 +127,6 @@ class PATHWAY < KEGGDB
122
127
  lines_fetch('DISEASE')
123
128
  end
124
129
 
125
- # Diseases described in the DISEASE lines.
126
- # ---
127
- # *Returns*:: Hash of disease ID and its definition
128
- def diseases_as_hash
129
- unless (defined? @diseases_as_hash) && @diseases_as_hash
130
- @diseases_as_hash = strings_as_hash(diseases_as_strings)
131
- end
132
- @diseases_as_hash
133
- end
134
- alias diseases diseases_as_hash
135
-
136
130
  # Returns an Array of a database name and entry IDs in DBLINKS field.
137
131
  # ---
138
132
  # *Returns*:: Array containing String
@@ -303,6 +303,118 @@ module Common
303
303
  end
304
304
  alias splicing splice
305
305
 
306
+ #--
307
+ # Workaround for Ruby 3.0.0 incompatible changes
308
+ if ::RUBY_VERSION > "3"
309
+
310
+ # Acts almost the same as String#split.
311
+ def split(*arg)
312
+ if block_given?
313
+ super
314
+ else
315
+ ret = super(*arg)
316
+ ret.collect! { |x| self.class.new('').replace(x) }
317
+ ret
318
+ end
319
+ end
320
+
321
+ %w( * ljust rjust center ).each do |w|
322
+ module_eval %Q{
323
+ def #{w}(*arg)
324
+ self.class.new('').replace(super)
325
+ end
326
+ }
327
+ end
328
+
329
+ %w( chomp chop
330
+ delete delete_prefix delete_suffix
331
+ lstrip rstrip strip
332
+ reverse
333
+ squeeze
334
+ succ next
335
+ tr tr_s
336
+ capitalize upcase downcase swapcase
337
+ ).each do |w|
338
+ module_eval %Q{
339
+ def #{w}(*arg)
340
+ s = self.dup
341
+ s.#{w}!(*arg)
342
+ s
343
+ end
344
+ }
345
+ end
346
+
347
+ %w( sub gsub ).each do |w|
348
+ module_eval %Q{
349
+ def #{w}(*arg, &block)
350
+ s = self.dup
351
+ s.#{w}!(*arg, &block)
352
+ s
353
+ end
354
+ }
355
+ end
356
+
357
+ #Reference: https://nacl-ltd.github.io/2018/11/08/gsub-wrapper.html
358
+ #(Title: Is it possible to implement gsub wrapper?)
359
+ %w( sub! gsub! ).each do |w|
360
+ module_eval %Q{
361
+ def #{w}(*arg, &block)
362
+ if block_given? then
363
+ super(*arg) do |m|
364
+ b = Thread.current[:_backref]
365
+ Thread.current[:_backref] = ::Regexp.last_match
366
+ block.binding.eval("$~ = Thread.current[:_backref]")
367
+ Thread.current[:_backref] = b
368
+ block.call(self.class.new('').replace(m))
369
+ end
370
+ else
371
+ super
372
+ end
373
+ end
374
+ }
375
+ end
376
+
377
+ %w( each_char each_grapheme_cluster each_line ).each do |w|
378
+ module_eval %Q{
379
+ def #{w}
380
+ if block_given?
381
+ super { |c| yield(self.class.new('').replace(c)) }
382
+ else
383
+ enum_for(:#{w})
384
+ end
385
+ end
386
+ }
387
+ end
388
+
389
+ %w( slice [] slice! ).each do |w|
390
+ module_eval %Q{
391
+ def #{w}(*arg)
392
+ r = super
393
+ r ? self.class.new('').replace(r) : r
394
+ end
395
+ }
396
+ end
397
+
398
+ %w( partition rpartition ).each do |w|
399
+ module_eval %Q{
400
+ def #{w}(sep)
401
+ r = super
402
+ if r.kind_of?(Array)
403
+ r[1] == sep ?
404
+ [ self.class.new('').replace(r[0]),
405
+ r[1],
406
+ self.class.new('').replace(r[2]) ] :
407
+ r.collect { |x| self.class.new('').replace(x) }
408
+ else
409
+ r
410
+ end
411
+ end
412
+ }
413
+ end
414
+ #++
415
+
416
+ end # if ::RUBY_VERSION > "3"
417
+
306
418
  end # Common
307
419
 
308
420
  end # Sequence
@@ -10,6 +10,7 @@
10
10
  #
11
11
 
12
12
  require 'erb'
13
+ require 'date'
13
14
 
14
15
  module Bio
15
16
 
data/lib/bio/tree.rb CHANGED
@@ -605,7 +605,7 @@ module Bio
605
605
  end
606
606
 
607
607
  # Gets path from node1 to node2.
608
- # Retruns an array of nodes, including node1 and node2.
608
+ # Returns an array of nodes, including node1 and node2.
609
609
  # If node1 and/or node2 do not exist, IndexError is raised.
610
610
  # If node1 and node2 are not connected, NoPathError is raised.
611
611
  # The result is unspecified for cyclic trees.
data/lib/bio/version.rb CHANGED
@@ -10,7 +10,7 @@
10
10
  module Bio
11
11
 
12
12
  # BioRuby version (Array containing Integer)
13
- BIORUBY_VERSION = [1, 6, 0].extend(Comparable).freeze
13
+ BIORUBY_VERSION = [2, 0, 3].extend(Comparable).freeze
14
14
 
15
15
  # Extra version specifier (String or nil).
16
16
  # Existance of the value indicates development version.
@@ -19,8 +19,8 @@ module Bio
19
19
  # ".pre :: Pre-release version.
20
20
  #
21
21
  # References: https://guides.rubygems.org/patterns/#prerelease-gems
22
- BIORUBY_EXTRA_VERSION = #nil
23
- ".pre"
22
+ BIORUBY_EXTRA_VERSION = nil
23
+ #".pre"
24
24
 
25
25
  # Version identifier, including extra version string (String)
26
26
  # Unlike BIORUBY_VERSION, it is not comparable.
@@ -0,0 +1,82 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # color_scheme_aa.rb - A Bio::ColorScheme demo script for Amino Acid sequences.
4
+ #
5
+ # Usage:
6
+ #
7
+ # % ruby color_scheme_aa.rb > cs-seq-faa.html
8
+ #
9
+ # % cat seq.faa
10
+ # >AA_sequence
11
+ # MKRISTTITTTITITTGNGAG
12
+ # % ruby color_scheme_aa.rb seq.faa > colored-seq-faa.html
13
+ #
14
+ #
15
+ # Copyright:: Copyright (C) 2005
16
+ # Mitsuteru C. Nakao <n@bioruby.org>
17
+ # License:: The Ruby License
18
+ #
19
+
20
+ require 'bio'
21
+
22
+
23
+ # returns folded sequence with <br>.
24
+ def br(i, width = 80)
25
+ return "<br\n>" if i % width == 0
26
+ ""
27
+ end
28
+
29
+
30
+ # returns sequence html doc
31
+ def display(seq, cs)
32
+ html = '<p style="font-family: monospace">'
33
+ postfix = '</span>'
34
+ i = 0
35
+ seq.each_char do |c|
36
+ color = cs[c]
37
+ prefix = %Q(<span style="background:\##{color};">)
38
+ html += prefix + c + postfix
39
+ html += br(i += 1)
40
+ end
41
+ html + '</p>'
42
+ end
43
+
44
+
45
+ # returns scheme wise html doc
46
+ def display_scheme(scheme, aaseq)
47
+ html = ''
48
+ cs = Bio::ColorScheme.const_get(scheme.intern)
49
+ [aaseq].each do |seq|
50
+ html += display(seq, cs)
51
+ end
52
+ return ['<div>', "<h3>#{cs}</h3>", html, '</div>']
53
+ end
54
+
55
+
56
+
57
+ if fna = ARGV.shift
58
+ aaseq = Bio::FlatFile.open(fna) { |ff| ff.next_entry.aaseq }
59
+ else
60
+ aaseq = Bio::Sequence::AA.new('ARNDCQEGHILKMFPSTWYV' * 20).randomize
61
+ end
62
+
63
+ title = 'Bio::ColorScheme for amino acid sequences'
64
+ doc = ['<html>',
65
+ '<header>', '<title>', title, '</title>', '</header>',
66
+ '<body>', '<h1>', title, '</h1>']
67
+
68
+ doc << ['<div>', '<h2>', 'Simple colors', '</h2>']
69
+
70
+ ['Zappo', 'Taylor' ].each do |scheme|
71
+ doc << display_scheme(scheme, aaseq)
72
+ end
73
+ doc << ['</div>']
74
+
75
+
76
+ doc << ['<div>', '<h2>', 'Score colors', '</h2>']
77
+ ['Buried', 'Helix', 'Hydropathy', 'Strand', 'Turn'].each do |score|
78
+ doc << display_scheme(score, aaseq)
79
+ end
80
+ doc << ['</div>']
81
+
82
+ puts doc + ['</body>','</html>']
@@ -17,7 +17,6 @@
17
17
  # Mitsuteru C. Nakao <n@bioruby.org>
18
18
  # License:: The Ruby License
19
19
  #
20
- # $Id: color_scheme_na.rb,v 1.3 2007/04/05 23:35:42 trevor Exp $
21
20
  #
22
21
 
23
22
  require 'bio'
@@ -35,10 +34,10 @@ def display(seq, cs)
35
34
  html = '<p style="font-family: monospace">'
36
35
  postfix = '</span>'
37
36
  i = 0
38
- seq.each_byte do |c|
39
- color = cs[c.chr]
37
+ seq.each_char do |c|
38
+ color = cs[c]
40
39
  prefix = %Q(<span style="background:\##{color};">)
41
- html += prefix + c.chr + postfix
40
+ html += prefix + c + postfix
42
41
  html += br(i += 1)
43
42
  end
44
43
  html + '</p>'
@@ -48,7 +47,7 @@ end
48
47
  # returns scheme wise html doc
49
48
  def display_scheme(scheme, naseq, aaseq)
50
49
  html = ''
51
- cs = eval("Bio::ColorScheme::#{scheme}")
50
+ cs = Bio::ColorScheme.const_get(scheme.intern)
52
51
  [naseq, aaseq].each do |seq|
53
52
  html += display(seq, cs)
54
53
  end
@@ -58,7 +57,7 @@ end
58
57
 
59
58
 
60
59
  if fna = ARGV.shift
61
- naseq = Bio::FastaFormat.new(File.open(fna, 'r').read).naseq
60
+ naseq = Bio::FlatFile.open(fna) { |ff| ff.next_entry.naseq }
62
61
  aaseq = naseq.translate
63
62
  else
64
63
  naseq = Bio::Sequence::NA.new('acgtu' * 20).randomize
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env cwl-runner
2
+
3
+ cwlVersion: v1.0
4
+ class: CommandLineTool
5
+ baseCommand: [ruby]
6
+
7
+ inputs:
8
+ - id: script
9
+ type: File
10
+ default:
11
+ class: File
12
+ location: fastq2html.rb
13
+ inputBinding:
14
+ position: -1
15
+ - id: fastq
16
+ type: File[]
17
+ inputBinding:
18
+ position: 1
19
+
20
+ outputs:
21
+ - id: out
22
+ type: stdout
23
+ stdout: $(inputs.script.nameroot)-$(inputs.fastq[0].nameroot).html
@@ -0,0 +1,94 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # fastq2html.rb - HTML visualization of FASTQ sequences
4
+ #
5
+ # Usage:
6
+ #
7
+ # % ruby fastq2html.rb seq00.fastq > seq00.html
8
+ #
9
+ #
10
+ # Copyright:: Copyright (C) 2019 BioRuby Project
11
+ # Copyright (C) 2005 Mitsuteru C. Nakao <n@bioruby.org>
12
+ # License:: The Ruby License
13
+ #
14
+ #
15
+
16
+ require 'bio'
17
+
18
+ # thickness to color
19
+ def thickness2color(t)
20
+ c = "%02X" % ((t * 255.0).to_i)
21
+ c * 3
22
+ end
23
+
24
+ # Creates
25
+ def create_score2color_hashes
26
+ h_bg = {}
27
+ h_char = {}
28
+ cutoff_low = 0
29
+ cutoff_high = 50
30
+ range = cutoff_high - cutoff_low
31
+ sc_min = -5
32
+ sc_max = 100
33
+ (sc_min..sc_max).each do |i|
34
+ t = if i <= cutoff_low then
35
+ 0.0
36
+ elsif i >= cutoff_high then
37
+ 1.0
38
+ else
39
+ (i - cutoff_low).to_f / range
40
+ end
41
+ h_bg[i] = thickness2color(t)
42
+ h_char[i] = thickness2color((t > 0.3) ? 0.0 : 0.55)
43
+ end
44
+ h_bg.default = h_bg[cutoff_low]
45
+ h_char.default = h_char[cutoff_low]
46
+ [h_bg, h_char]
47
+ end
48
+
49
+ # Color code from quality score
50
+ SCORE2COLOR_BG, SCORE2COLOR_CHAR = create_score2color_hashes
51
+
52
+ # returns folded sequence with <br>.
53
+ def br(i, width = 80)
54
+ return "<br\n>" if i % width == 0
55
+ ""
56
+ end
57
+
58
+ # returns sequence html doc
59
+ def display(naseq, scores)
60
+ html = '<p style="font-family: monospace">'
61
+ postfix = '</span>'
62
+ i = 0
63
+ naseq.each_char.with_index do |c, i|
64
+ sc = scores[i]
65
+ bgcol = SCORE2COLOR_BG[sc]
66
+ col = SCORE2COLOR_CHAR[sc]
67
+ prefix = %Q(<span style="color:\##{col}; background:\##{bgcol};">)
68
+ html += prefix + c + postfix
69
+ html += br(i += 1)
70
+ end
71
+ html + '</p>'
72
+ end
73
+
74
+ # returns colorized html doc
75
+ def fastq2html(definition, naseq, scores)
76
+ html = display(naseq, scores)
77
+ return ['<div>', "<div>&gt;#{CGI.escapeHTML(definition)}</div>", html, '</div>']
78
+ end
79
+
80
+ title = 'Sequences with quality scores'
81
+ puts ['<html>',
82
+ '<header>', '<title>', title, '</title>', '</header>',
83
+ '<body>', '<h1>', title, '</h1>']
84
+
85
+ #main loop
86
+ ARGV.each do |filename|
87
+ Bio::FlatFile.open(filename) do |ff|
88
+ ff.each do |e|
89
+ puts fastq2html(e.definition, e.naseq, e.quality_scores)
90
+ end
91
+ end
92
+ end
93
+
94
+ puts ['</body>','</html>']
@@ -0,0 +1,5 @@
1
+ fastq:
2
+ - class: File
3
+ location: ../test/data/fastq/longreads_as_sanger.fastq
4
+ - class: File
5
+ location: ../test/data/fastq/sanger_full_range_original_sanger.fastq