dna 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/dna.rb +34 -17
- data/spec/dna_spec.rb +15 -3
- data/spec/record_spec.rb +13 -13
- metadata +2 -2
data/lib/dna.rb
CHANGED
@@ -4,11 +4,12 @@
|
|
4
4
|
class Dna # iterator
|
5
5
|
include Enumerable
|
6
6
|
|
7
|
+
attr_reader :format
|
8
|
+
|
7
9
|
def initialize(handle, args={})
|
8
10
|
@handle = handle
|
9
|
-
@format =
|
10
|
-
|
11
|
-
@iterator =
|
11
|
+
@format = detect_format
|
12
|
+
@iterator =
|
12
13
|
case @format
|
13
14
|
when :fasta
|
14
15
|
fasta_parser
|
@@ -21,6 +22,22 @@ class Dna # iterator
|
|
21
22
|
end
|
22
23
|
end
|
23
24
|
|
25
|
+
def detect_format
|
26
|
+
first_line = @handle.first
|
27
|
+
@handle.rewind if @handle.class == File
|
28
|
+
|
29
|
+
# detect qseq by counting number of tabs.
|
30
|
+
if first_line.split("\t").length == 11
|
31
|
+
return :qseq
|
32
|
+
elsif first_line[0].chr == '>'
|
33
|
+
return :fasta
|
34
|
+
elsif first_line[0].chr == '@'
|
35
|
+
return :fastq
|
36
|
+
else
|
37
|
+
raise Exception, "cannot detect format of input"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
24
41
|
def each &block
|
25
42
|
@iterator.each do |r|
|
26
43
|
if block_given?
|
@@ -38,21 +55,21 @@ class Dna # iterator
|
|
38
55
|
Enumerator.new do |enum|
|
39
56
|
@handle.each do |line|
|
40
57
|
if line[0].chr == '>'
|
41
|
-
enum.yield Fasta.new(name
|
58
|
+
enum.yield Fasta.new(:name => header, :sequence => sequence) if sequence
|
42
59
|
sequence = ''
|
43
60
|
header = line[1..-1].strip
|
44
61
|
else
|
45
62
|
sequence << line.strip.tr(' ','')
|
46
63
|
end
|
47
64
|
end
|
48
|
-
enum.yield Fasta.new(name
|
65
|
+
enum.yield Fasta.new(:name => header, :sequence => sequence)
|
49
66
|
end
|
50
67
|
end
|
51
68
|
|
52
69
|
def fastq_parser
|
53
70
|
c = (0..3).cycle
|
54
71
|
Enumerator.new do |enum|
|
55
|
-
params = { name
|
72
|
+
params = { :name => nil, :sequence => nil, :quality => nil }
|
56
73
|
@handle.each do |line|
|
57
74
|
n = c.next
|
58
75
|
case n
|
@@ -76,17 +93,17 @@ class Dna # iterator
|
|
76
93
|
@handle.each do |line|
|
77
94
|
line = line.strip.split("\t")
|
78
95
|
record = QSEQ.new(
|
79
|
-
machine
|
80
|
-
run
|
81
|
-
lane
|
82
|
-
tile
|
83
|
-
x
|
84
|
-
y
|
85
|
-
index
|
86
|
-
read_no
|
87
|
-
sequence
|
88
|
-
quality
|
89
|
-
filtered
|
96
|
+
:machine => line[0],
|
97
|
+
:run => line[1],
|
98
|
+
:lane => line[2],
|
99
|
+
:tile => line[3],
|
100
|
+
:x => line[4],
|
101
|
+
:y => line[5],
|
102
|
+
:index => line[6],
|
103
|
+
:read_no => line[7],
|
104
|
+
:sequence => line[8],
|
105
|
+
:quality => line[9],
|
106
|
+
:filtered => line[10]
|
90
107
|
)
|
91
108
|
enum.yield record
|
92
109
|
end
|
data/spec/dna_spec.rb
CHANGED
@@ -3,9 +3,21 @@ require 'spec_helper'
|
|
3
3
|
describe Dna do
|
4
4
|
include_context "parser stuff"
|
5
5
|
|
6
|
-
let(:fasta) { Dna.new @fasta_file
|
7
|
-
let (:fastq) { Dna.new @fastq_file
|
8
|
-
let (:qseq) { Dna.new @qseq_file
|
6
|
+
let(:fasta) { Dna.new @fasta_file }
|
7
|
+
let (:fastq) { Dna.new @fastq_file }
|
8
|
+
let (:qseq) { Dna.new @qseq_file }
|
9
|
+
|
10
|
+
it 'can auto-detect fasta format' do
|
11
|
+
fasta.format == 'fasta'
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'can auto-detect fastq format' do
|
15
|
+
fastq.format == 'fastq'
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'can auto-detect qseq format' do
|
19
|
+
qseq.format == 'qseq'
|
20
|
+
end
|
9
21
|
|
10
22
|
it 'can properly parse a crappily formatted fasta file' do
|
11
23
|
records = fasta.to_a
|
data/spec/record_spec.rb
CHANGED
@@ -2,7 +2,7 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Fasta do
|
4
4
|
|
5
|
-
let (:fasta) { Fasta.new name
|
5
|
+
let (:fasta) { Fasta.new :name => 'test', :sequence => 'GATC'}
|
6
6
|
|
7
7
|
it 'can be created' do
|
8
8
|
fasta.should_not be_nil
|
@@ -26,7 +26,7 @@ describe Fasta do
|
|
26
26
|
end
|
27
27
|
|
28
28
|
describe Fastq do
|
29
|
-
let (:fastq) { Fastq.new name
|
29
|
+
let (:fastq) { Fastq.new :name => 'test', :sequence => 'GATC', :quality => 'BBBB' }
|
30
30
|
|
31
31
|
it 'can be created' do
|
32
32
|
fastq.should_not be_nil
|
@@ -58,17 +58,17 @@ describe QSEQ do
|
|
58
58
|
|
59
59
|
let (:properties) {
|
60
60
|
line = string.split("\t")
|
61
|
-
{ machine
|
62
|
-
run
|
63
|
-
lane
|
64
|
-
tile
|
65
|
-
x
|
66
|
-
y
|
67
|
-
index
|
68
|
-
read_no
|
69
|
-
sequence
|
70
|
-
quality
|
71
|
-
filtered
|
61
|
+
{ :machine => line[0],
|
62
|
+
:run => line[1],
|
63
|
+
:lane => line[2],
|
64
|
+
:tile => line[3],
|
65
|
+
:x => line[4],
|
66
|
+
:y => line[5],
|
67
|
+
:index => line[6],
|
68
|
+
:read_no => line[7],
|
69
|
+
:sequence => line[8],
|
70
|
+
:quality => line[9],
|
71
|
+
:filtered => line[10] }
|
72
72
|
}
|
73
73
|
|
74
74
|
let (:qseq) {
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dna
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2010-02-
|
12
|
+
date: 2010-02-10 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: for parsing various types of DNA sequence files
|
15
15
|
email: harekrishna@gmail.com
|