dna 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. data/lib/dna.rb +34 -17
  2. data/spec/dna_spec.rb +15 -3
  3. data/spec/record_spec.rb +13 -13
  4. metadata +2 -2
data/lib/dna.rb CHANGED
@@ -4,11 +4,12 @@
4
4
  class Dna # iterator
5
5
  include Enumerable
6
6
 
7
+ attr_reader :format
8
+
7
9
  def initialize(handle, args={})
8
10
  @handle = handle
9
- @format = args[:format].to_sym
10
-
11
- @iterator =
11
+ @format = detect_format
12
+ @iterator =
12
13
  case @format
13
14
  when :fasta
14
15
  fasta_parser
@@ -21,6 +22,22 @@ class Dna # iterator
21
22
  end
22
23
  end
23
24
 
25
+ def detect_format
26
+ first_line = @handle.first
27
+ @handle.rewind if @handle.class == File
28
+
29
+ # detect qseq by counting number of tabs.
30
+ if first_line.split("\t").length == 11
31
+ return :qseq
32
+ elsif first_line[0].chr == '>'
33
+ return :fasta
34
+ elsif first_line[0].chr == '@'
35
+ return :fastq
36
+ else
37
+ raise Exception, "cannot detect format of input"
38
+ end
39
+ end
40
+
24
41
  def each &block
25
42
  @iterator.each do |r|
26
43
  if block_given?
@@ -38,21 +55,21 @@ class Dna # iterator
38
55
  Enumerator.new do |enum|
39
56
  @handle.each do |line|
40
57
  if line[0].chr == '>'
41
- enum.yield Fasta.new(name: header, sequence: sequence) if sequence
58
+ enum.yield Fasta.new(:name => header, :sequence => sequence) if sequence
42
59
  sequence = ''
43
60
  header = line[1..-1].strip
44
61
  else
45
62
  sequence << line.strip.tr(' ','')
46
63
  end
47
64
  end
48
- enum.yield Fasta.new(name: header, sequence: sequence)
65
+ enum.yield Fasta.new(:name => header, :sequence => sequence)
49
66
  end
50
67
  end
51
68
 
52
69
  def fastq_parser
53
70
  c = (0..3).cycle
54
71
  Enumerator.new do |enum|
55
- params = { name: nil, sequence: nil, quality: nil }
72
+ params = { :name => nil, :sequence => nil, :quality => nil }
56
73
  @handle.each do |line|
57
74
  n = c.next
58
75
  case n
@@ -76,17 +93,17 @@ class Dna # iterator
76
93
  @handle.each do |line|
77
94
  line = line.strip.split("\t")
78
95
  record = QSEQ.new(
79
- machine: line[0],
80
- run: line[1],
81
- lane: line[2],
82
- tile: line[3],
83
- x: line[4],
84
- y: line[5],
85
- index: line[6],
86
- read_no: line[7],
87
- sequence: line[8],
88
- quality: line[9],
89
- filtered: line[10],
96
+ :machine => line[0],
97
+ :run => line[1],
98
+ :lane => line[2],
99
+ :tile => line[3],
100
+ :x => line[4],
101
+ :y => line[5],
102
+ :index => line[6],
103
+ :read_no => line[7],
104
+ :sequence => line[8],
105
+ :quality => line[9],
106
+ :filtered => line[10]
90
107
  )
91
108
  enum.yield record
92
109
  end
data/spec/dna_spec.rb CHANGED
@@ -3,9 +3,21 @@ require 'spec_helper'
3
3
  describe Dna do
4
4
  include_context "parser stuff"
5
5
 
6
- let(:fasta) { Dna.new @fasta_file, format: 'fasta' }
7
- let (:fastq) { Dna.new @fastq_file, format: 'fastq' }
8
- let (:qseq) { Dna.new @qseq_file, format: 'qseq' }
6
+ let(:fasta) { Dna.new @fasta_file }
7
+ let (:fastq) { Dna.new @fastq_file }
8
+ let (:qseq) { Dna.new @qseq_file }
9
+
10
+ it 'can auto-detect fasta format' do
11
+ fasta.format == 'fasta'
12
+ end
13
+
14
+ it 'can auto-detect fastq format' do
15
+ fastq.format == 'fastq'
16
+ end
17
+
18
+ it 'can auto-detect qseq format' do
19
+ qseq.format == 'qseq'
20
+ end
9
21
 
10
22
  it 'can properly parse a crappily formatted fasta file' do
11
23
  records = fasta.to_a
data/spec/record_spec.rb CHANGED
@@ -2,7 +2,7 @@ require 'spec_helper'
2
2
 
3
3
  describe Fasta do
4
4
 
5
- let (:fasta) { Fasta.new name: 'test', sequence: 'GATC'}
5
+ let (:fasta) { Fasta.new :name => 'test', :sequence => 'GATC'}
6
6
 
7
7
  it 'can be created' do
8
8
  fasta.should_not be_nil
@@ -26,7 +26,7 @@ describe Fasta do
26
26
  end
27
27
 
28
28
  describe Fastq do
29
- let (:fastq) { Fastq.new name: 'test', sequence: 'GATC', quality: 'BBBB' }
29
+ let (:fastq) { Fastq.new :name => 'test', :sequence => 'GATC', :quality => 'BBBB' }
30
30
 
31
31
  it 'can be created' do
32
32
  fastq.should_not be_nil
@@ -58,17 +58,17 @@ describe QSEQ do
58
58
 
59
59
  let (:properties) {
60
60
  line = string.split("\t")
61
- { machine: line[0],
62
- run: line[1],
63
- lane: line[2],
64
- tile: line[3],
65
- x: line[4],
66
- y: line[5],
67
- index: line[6],
68
- read_no: line[7],
69
- sequence: line[8],
70
- quality: line[9],
71
- filtered: line[10] }
61
+ { :machine => line[0],
62
+ :run => line[1],
63
+ :lane => line[2],
64
+ :tile => line[3],
65
+ :x => line[4],
66
+ :y => line[5],
67
+ :index => line[6],
68
+ :read_no => line[7],
69
+ :sequence => line[8],
70
+ :quality => line[9],
71
+ :filtered => line[10] }
72
72
  }
73
73
 
74
74
  let (:qseq) {
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dna
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2010-02-07 00:00:00.000000000 Z
12
+ date: 2010-02-10 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: for parsing various types of DNA sequence files
15
15
  email: harekrishna@gmail.com