dna 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/dna.rb +34 -17
- data/spec/dna_spec.rb +15 -3
- data/spec/record_spec.rb +13 -13
- metadata +2 -2
data/lib/dna.rb
CHANGED
@@ -4,11 +4,12 @@
|
|
4
4
|
class Dna # iterator
|
5
5
|
include Enumerable
|
6
6
|
|
7
|
+
attr_reader :format
|
8
|
+
|
7
9
|
def initialize(handle, args={})
|
8
10
|
@handle = handle
|
9
|
-
@format =
|
10
|
-
|
11
|
-
@iterator =
|
11
|
+
@format = detect_format
|
12
|
+
@iterator =
|
12
13
|
case @format
|
13
14
|
when :fasta
|
14
15
|
fasta_parser
|
@@ -21,6 +22,22 @@ class Dna # iterator
|
|
21
22
|
end
|
22
23
|
end
|
23
24
|
|
25
|
+
def detect_format
|
26
|
+
first_line = @handle.first
|
27
|
+
@handle.rewind if @handle.class == File
|
28
|
+
|
29
|
+
# detect qseq by counting number of tabs.
|
30
|
+
if first_line.split("\t").length == 11
|
31
|
+
return :qseq
|
32
|
+
elsif first_line[0].chr == '>'
|
33
|
+
return :fasta
|
34
|
+
elsif first_line[0].chr == '@'
|
35
|
+
return :fastq
|
36
|
+
else
|
37
|
+
raise Exception, "cannot detect format of input"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
24
41
|
def each &block
|
25
42
|
@iterator.each do |r|
|
26
43
|
if block_given?
|
@@ -38,21 +55,21 @@ class Dna # iterator
|
|
38
55
|
Enumerator.new do |enum|
|
39
56
|
@handle.each do |line|
|
40
57
|
if line[0].chr == '>'
|
41
|
-
enum.yield Fasta.new(name
|
58
|
+
enum.yield Fasta.new(:name => header, :sequence => sequence) if sequence
|
42
59
|
sequence = ''
|
43
60
|
header = line[1..-1].strip
|
44
61
|
else
|
45
62
|
sequence << line.strip.tr(' ','')
|
46
63
|
end
|
47
64
|
end
|
48
|
-
enum.yield Fasta.new(name
|
65
|
+
enum.yield Fasta.new(:name => header, :sequence => sequence)
|
49
66
|
end
|
50
67
|
end
|
51
68
|
|
52
69
|
def fastq_parser
|
53
70
|
c = (0..3).cycle
|
54
71
|
Enumerator.new do |enum|
|
55
|
-
params = { name
|
72
|
+
params = { :name => nil, :sequence => nil, :quality => nil }
|
56
73
|
@handle.each do |line|
|
57
74
|
n = c.next
|
58
75
|
case n
|
@@ -76,17 +93,17 @@ class Dna # iterator
|
|
76
93
|
@handle.each do |line|
|
77
94
|
line = line.strip.split("\t")
|
78
95
|
record = QSEQ.new(
|
79
|
-
machine
|
80
|
-
run
|
81
|
-
lane
|
82
|
-
tile
|
83
|
-
x
|
84
|
-
y
|
85
|
-
index
|
86
|
-
read_no
|
87
|
-
sequence
|
88
|
-
quality
|
89
|
-
filtered
|
96
|
+
:machine => line[0],
|
97
|
+
:run => line[1],
|
98
|
+
:lane => line[2],
|
99
|
+
:tile => line[3],
|
100
|
+
:x => line[4],
|
101
|
+
:y => line[5],
|
102
|
+
:index => line[6],
|
103
|
+
:read_no => line[7],
|
104
|
+
:sequence => line[8],
|
105
|
+
:quality => line[9],
|
106
|
+
:filtered => line[10]
|
90
107
|
)
|
91
108
|
enum.yield record
|
92
109
|
end
|
data/spec/dna_spec.rb
CHANGED
@@ -3,9 +3,21 @@ require 'spec_helper'
|
|
3
3
|
describe Dna do
|
4
4
|
include_context "parser stuff"
|
5
5
|
|
6
|
-
let(:fasta) { Dna.new @fasta_file
|
7
|
-
let (:fastq) { Dna.new @fastq_file
|
8
|
-
let (:qseq) { Dna.new @qseq_file
|
6
|
+
let(:fasta) { Dna.new @fasta_file }
|
7
|
+
let (:fastq) { Dna.new @fastq_file }
|
8
|
+
let (:qseq) { Dna.new @qseq_file }
|
9
|
+
|
10
|
+
it 'can auto-detect fasta format' do
|
11
|
+
fasta.format == 'fasta'
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'can auto-detect fastq format' do
|
15
|
+
fastq.format == 'fastq'
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'can auto-detect qseq format' do
|
19
|
+
qseq.format == 'qseq'
|
20
|
+
end
|
9
21
|
|
10
22
|
it 'can properly parse a crappily formatted fasta file' do
|
11
23
|
records = fasta.to_a
|
data/spec/record_spec.rb
CHANGED
@@ -2,7 +2,7 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Fasta do
|
4
4
|
|
5
|
-
let (:fasta) { Fasta.new name
|
5
|
+
let (:fasta) { Fasta.new :name => 'test', :sequence => 'GATC'}
|
6
6
|
|
7
7
|
it 'can be created' do
|
8
8
|
fasta.should_not be_nil
|
@@ -26,7 +26,7 @@ describe Fasta do
|
|
26
26
|
end
|
27
27
|
|
28
28
|
describe Fastq do
|
29
|
-
let (:fastq) { Fastq.new name
|
29
|
+
let (:fastq) { Fastq.new :name => 'test', :sequence => 'GATC', :quality => 'BBBB' }
|
30
30
|
|
31
31
|
it 'can be created' do
|
32
32
|
fastq.should_not be_nil
|
@@ -58,17 +58,17 @@ describe QSEQ do
|
|
58
58
|
|
59
59
|
let (:properties) {
|
60
60
|
line = string.split("\t")
|
61
|
-
{ machine
|
62
|
-
run
|
63
|
-
lane
|
64
|
-
tile
|
65
|
-
x
|
66
|
-
y
|
67
|
-
index
|
68
|
-
read_no
|
69
|
-
sequence
|
70
|
-
quality
|
71
|
-
filtered
|
61
|
+
{ :machine => line[0],
|
62
|
+
:run => line[1],
|
63
|
+
:lane => line[2],
|
64
|
+
:tile => line[3],
|
65
|
+
:x => line[4],
|
66
|
+
:y => line[5],
|
67
|
+
:index => line[6],
|
68
|
+
:read_no => line[7],
|
69
|
+
:sequence => line[8],
|
70
|
+
:quality => line[9],
|
71
|
+
:filtered => line[10] }
|
72
72
|
}
|
73
73
|
|
74
74
|
let (:qseq) {
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dna
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-10 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: for parsing various types of DNA sequence files
|
15
15
|
email: harekrishna@gmail.com
|