dna 0.0.0 → 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/dna.rb +131 -11
- metadata +1 -1
data/lib/dna.rb
CHANGED
@@ -3,24 +3,102 @@ class Dna # iterator
|
|
3
3
|
|
4
4
|
def initialize(handle, args={})
|
5
5
|
@handle = handle
|
6
|
+
@format = args[:type].to_sym
|
7
|
+
|
8
|
+
@iterator =
|
9
|
+
case @format
|
10
|
+
when :fasta
|
11
|
+
fasta_parser
|
12
|
+
when :fastq
|
13
|
+
fastq_parser
|
14
|
+
when :qseq
|
15
|
+
qseq_parser
|
16
|
+
else
|
17
|
+
raise "#{@type} not supported."
|
18
|
+
end
|
6
19
|
end
|
7
20
|
|
8
|
-
def each
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
yield Dna.new(:name => header, :sequence => sequence) if sequence
|
13
|
-
sequence = ''
|
14
|
-
header = line[1..-1].strip
|
21
|
+
def each &block
|
22
|
+
@iterator.each do |r|
|
23
|
+
if block_given?
|
24
|
+
block.call r
|
15
25
|
else
|
16
|
-
|
26
|
+
yield r
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def fasta_parser
|
34
|
+
sequence, header = nil, nil
|
35
|
+
Enumerator.new do |enum|
|
36
|
+
@handle.each do |line|
|
37
|
+
if line[0].chr == '>'
|
38
|
+
enum.yield Fasta.new(name: header, sequence: sequence) if sequence
|
39
|
+
sequence = ''
|
40
|
+
header = line[1..-1].strip
|
41
|
+
else
|
42
|
+
sequence << line.strip.tr(' ','')
|
43
|
+
end
|
44
|
+
end
|
45
|
+
enum.yield Fasta.new(name: header, sequence: sequence)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def fastq_parser
|
50
|
+
c = (0..3).cycle
|
51
|
+
Enumerator.new do |enum|
|
52
|
+
@handle.each do |line|
|
53
|
+
n = c.next
|
54
|
+
case n
|
55
|
+
when 0
|
56
|
+
header = line.strip
|
57
|
+
when 1
|
58
|
+
sequence = line.strip
|
59
|
+
when 2
|
60
|
+
nil
|
61
|
+
when 3
|
62
|
+
quality = line.strip
|
63
|
+
enum.yield Fastq.new(name: header, sequence: sequence, quality: quality)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def qseq_parser
|
70
|
+
Enumerator.new do |enum|
|
71
|
+
@handle.each do |line|
|
72
|
+
line = line.strip.split("\t")
|
73
|
+
record = QSEQ.new(
|
74
|
+
machine: line[0],
|
75
|
+
run: line[1],
|
76
|
+
lane: line[2],
|
77
|
+
tile: line[3],
|
78
|
+
x: line[4],
|
79
|
+
y: line[5],
|
80
|
+
index: line[6],
|
81
|
+
read_no: line[7],
|
82
|
+
sequence: line[8],
|
83
|
+
quality: line[9],
|
84
|
+
filtered: line[10],
|
85
|
+
)
|
86
|
+
enum.yield record
|
17
87
|
end
|
18
88
|
end
|
19
|
-
yield Dna.new(:name => header, :sequence => sequence)
|
20
89
|
end
|
21
90
|
end
|
22
91
|
|
23
92
|
class Record # nucleotide record
|
93
|
+
def length
|
94
|
+
@sequence.length
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
##
|
99
|
+
# Fasta record
|
100
|
+
#
|
101
|
+
class Fasta < Record
|
24
102
|
attr_accessor :name, :sequence
|
25
103
|
|
26
104
|
def initialize(args={})
|
@@ -31,8 +109,50 @@ class Record # nucleotide record
|
|
31
109
|
def to_s
|
32
110
|
">#{@name}\n#{@sequence}\n"
|
33
111
|
end
|
112
|
+
end
|
113
|
+
|
114
|
+
##
|
115
|
+
# Fastq record
|
116
|
+
#
|
117
|
+
class Fastq < Record
|
118
|
+
attr_accessor :name, :sequence, :format, :quality
|
34
119
|
|
35
|
-
def
|
36
|
-
@
|
120
|
+
def initialize(args={})
|
121
|
+
@name = args[:name]
|
122
|
+
@sequence = args[:sequence]
|
123
|
+
@quality = args[:quality]
|
124
|
+
end
|
125
|
+
|
126
|
+
def to_s
|
127
|
+
"@#{@name}\n#{@sequence}\n+#{@name}\n#{@quality}"
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
##
|
132
|
+
# QSEQ record
|
133
|
+
#
|
134
|
+
class QSEQ < Record
|
135
|
+
attr_accessor :machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered
|
136
|
+
|
137
|
+
def initialize(args={})
|
138
|
+
@machine = args[:machine]
|
139
|
+
@run = args[:run]
|
140
|
+
@lane = args[:lane]
|
141
|
+
@tile = args[:tile]
|
142
|
+
@x = args[:x]
|
143
|
+
@y = args[:y]
|
144
|
+
@index = args[:index]
|
145
|
+
@read_no = args[:read_no]
|
146
|
+
@sequence = args[:sequence]
|
147
|
+
@quality = args[:quality]
|
148
|
+
@filtered = args[:filtered]
|
149
|
+
end
|
150
|
+
|
151
|
+
def to_s
|
152
|
+
[@machine, @run, @lane, @tile, @x, @y, @index, @read_no, @sequence, @quality, @filtered].join("\t")
|
153
|
+
end
|
154
|
+
|
155
|
+
def header
|
156
|
+
[@machine, @run, @lane, @tile, @x, @y, @index, @read_no, @sequence, @quality, @filtered].join(':')
|
37
157
|
end
|
38
158
|
end
|