dna 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. data/lib/dna.rb +34 -17
  2. data/spec/dna_spec.rb +15 -3
  3. data/spec/record_spec.rb +13 -13
  4. metadata +2 -2
data/lib/dna.rb CHANGED
@@ -4,11 +4,12 @@
4
4
  class Dna # iterator
5
5
  include Enumerable
6
6
 
7
+ attr_reader :format
8
+
7
9
  def initialize(handle, args={})
8
10
  @handle = handle
9
- @format = args[:format].to_sym
10
-
11
- @iterator =
11
+ @format = detect_format
12
+ @iterator =
12
13
  case @format
13
14
  when :fasta
14
15
  fasta_parser
@@ -21,6 +22,22 @@ class Dna # iterator
21
22
  end
22
23
  end
23
24
 
25
+ def detect_format
26
+ first_line = @handle.first
27
+ @handle.rewind if @handle.class == File
28
+
29
+ # detect qseq by counting number of tabs.
30
+ if first_line.split("\t").length == 11
31
+ return :qseq
32
+ elsif first_line[0].chr == '>'
33
+ return :fasta
34
+ elsif first_line[0].chr == '@'
35
+ return :fastq
36
+ else
37
+ raise Exception, "cannot detect format of input"
38
+ end
39
+ end
40
+
24
41
  def each &block
25
42
  @iterator.each do |r|
26
43
  if block_given?
@@ -38,21 +55,21 @@ class Dna # iterator
38
55
  Enumerator.new do |enum|
39
56
  @handle.each do |line|
40
57
  if line[0].chr == '>'
41
- enum.yield Fasta.new(name: header, sequence: sequence) if sequence
58
+ enum.yield Fasta.new(:name => header, :sequence => sequence) if sequence
42
59
  sequence = ''
43
60
  header = line[1..-1].strip
44
61
  else
45
62
  sequence << line.strip.tr(' ','')
46
63
  end
47
64
  end
48
- enum.yield Fasta.new(name: header, sequence: sequence)
65
+ enum.yield Fasta.new(:name => header, :sequence => sequence)
49
66
  end
50
67
  end
51
68
 
52
69
  def fastq_parser
53
70
  c = (0..3).cycle
54
71
  Enumerator.new do |enum|
55
- params = { name: nil, sequence: nil, quality: nil }
72
+ params = { :name => nil, :sequence => nil, :quality => nil }
56
73
  @handle.each do |line|
57
74
  n = c.next
58
75
  case n
@@ -76,17 +93,17 @@ class Dna # iterator
76
93
  @handle.each do |line|
77
94
  line = line.strip.split("\t")
78
95
  record = QSEQ.new(
79
- machine: line[0],
80
- run: line[1],
81
- lane: line[2],
82
- tile: line[3],
83
- x: line[4],
84
- y: line[5],
85
- index: line[6],
86
- read_no: line[7],
87
- sequence: line[8],
88
- quality: line[9],
89
- filtered: line[10],
96
+ :machine => line[0],
97
+ :run => line[1],
98
+ :lane => line[2],
99
+ :tile => line[3],
100
+ :x => line[4],
101
+ :y => line[5],
102
+ :index => line[6],
103
+ :read_no => line[7],
104
+ :sequence => line[8],
105
+ :quality => line[9],
106
+ :filtered => line[10]
90
107
  )
91
108
  enum.yield record
92
109
  end
data/spec/dna_spec.rb CHANGED
@@ -3,9 +3,21 @@ require 'spec_helper'
3
3
  describe Dna do
4
4
  include_context "parser stuff"
5
5
 
6
- let(:fasta) { Dna.new @fasta_file, format: 'fasta' }
7
- let (:fastq) { Dna.new @fastq_file, format: 'fastq' }
8
- let (:qseq) { Dna.new @qseq_file, format: 'qseq' }
6
+ let(:fasta) { Dna.new @fasta_file }
7
+ let (:fastq) { Dna.new @fastq_file }
8
+ let (:qseq) { Dna.new @qseq_file }
9
+
10
+ it 'can auto-detect fasta format' do
11
+ fasta.format == 'fasta'
12
+ end
13
+
14
+ it 'can auto-detect fastq format' do
15
+ fastq.format == 'fastq'
16
+ end
17
+
18
+ it 'can auto-detect qseq format' do
19
+ qseq.format == 'qseq'
20
+ end
9
21
 
10
22
  it 'can properly parse a crappily formatted fasta file' do
11
23
  records = fasta.to_a
data/spec/record_spec.rb CHANGED
@@ -2,7 +2,7 @@ require 'spec_helper'
2
2
 
3
3
  describe Fasta do
4
4
 
5
- let (:fasta) { Fasta.new name: 'test', sequence: 'GATC'}
5
+ let (:fasta) { Fasta.new :name => 'test', :sequence => 'GATC'}
6
6
 
7
7
  it 'can be created' do
8
8
  fasta.should_not be_nil
@@ -26,7 +26,7 @@ describe Fasta do
26
26
  end
27
27
 
28
28
  describe Fastq do
29
- let (:fastq) { Fastq.new name: 'test', sequence: 'GATC', quality: 'BBBB' }
29
+ let (:fastq) { Fastq.new :name => 'test', :sequence => 'GATC', :quality => 'BBBB' }
30
30
 
31
31
  it 'can be created' do
32
32
  fastq.should_not be_nil
@@ -58,17 +58,17 @@ describe QSEQ do
58
58
 
59
59
  let (:properties) {
60
60
  line = string.split("\t")
61
- { machine: line[0],
62
- run: line[1],
63
- lane: line[2],
64
- tile: line[3],
65
- x: line[4],
66
- y: line[5],
67
- index: line[6],
68
- read_no: line[7],
69
- sequence: line[8],
70
- quality: line[9],
71
- filtered: line[10] }
61
+ { :machine => line[0],
62
+ :run => line[1],
63
+ :lane => line[2],
64
+ :tile => line[3],
65
+ :x => line[4],
66
+ :y => line[5],
67
+ :index => line[6],
68
+ :read_no => line[7],
69
+ :sequence => line[8],
70
+ :quality => line[9],
71
+ :filtered => line[10] }
72
72
  }
73
73
 
74
74
  let (:qseq) {
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dna
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2010-02-07 00:00:00.000000000 Z
12
+ date: 2010-02-10 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: for parsing various types of DNA sequence files
15
15
  email: harekrishna@gmail.com