dna 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/dna.rb +131 -11
- metadata +1 -1
data/lib/dna.rb
CHANGED
@@ -3,24 +3,102 @@ class Dna # iterator
|
|
3
3
|
|
4
4
|
def initialize(handle, args={})
|
5
5
|
@handle = handle
|
6
|
+
@format = args[:type].to_sym
|
7
|
+
|
8
|
+
@iterator =
|
9
|
+
case @format
|
10
|
+
when :fasta
|
11
|
+
fasta_parser
|
12
|
+
when :fastq
|
13
|
+
fastq_parser
|
14
|
+
when :qseq
|
15
|
+
qseq_parser
|
16
|
+
else
|
17
|
+
raise "#{@type} not supported."
|
18
|
+
end
|
6
19
|
end
|
7
20
|
|
8
|
-
def each
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
yield Dna.new(:name => header, :sequence => sequence) if sequence
|
13
|
-
sequence = ''
|
14
|
-
header = line[1..-1].strip
|
21
|
+
def each &block
|
22
|
+
@iterator.each do |r|
|
23
|
+
if block_given?
|
24
|
+
block.call r
|
15
25
|
else
|
16
|
-
|
26
|
+
yield r
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
private
|
32
|
+
|
33
|
+
def fasta_parser
|
34
|
+
sequence, header = nil, nil
|
35
|
+
Enumerator.new do |enum|
|
36
|
+
@handle.each do |line|
|
37
|
+
if line[0].chr == '>'
|
38
|
+
enum.yield Fasta.new(name: header, sequence: sequence) if sequence
|
39
|
+
sequence = ''
|
40
|
+
header = line[1..-1].strip
|
41
|
+
else
|
42
|
+
sequence << line.strip.tr(' ','')
|
43
|
+
end
|
44
|
+
end
|
45
|
+
enum.yield Fasta.new(name: header, sequence: sequence)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def fastq_parser
|
50
|
+
c = (0..3).cycle
|
51
|
+
Enumerator.new do |enum|
|
52
|
+
@handle.each do |line|
|
53
|
+
n = c.next
|
54
|
+
case n
|
55
|
+
when 0
|
56
|
+
header = line.strip
|
57
|
+
when 1
|
58
|
+
sequence = line.strip
|
59
|
+
when 2
|
60
|
+
nil
|
61
|
+
when 3
|
62
|
+
quality = line.strip
|
63
|
+
enum.yield Fastq.new(name: header, sequence: sequence, quality: quality)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def qseq_parser
|
70
|
+
Enumerator.new do |enum|
|
71
|
+
@handle.each do |line|
|
72
|
+
line = line.strip.split("\t")
|
73
|
+
record = QSEQ.new(
|
74
|
+
machine: line[0],
|
75
|
+
run: line[1],
|
76
|
+
lane: line[2],
|
77
|
+
tile: line[3],
|
78
|
+
x: line[4],
|
79
|
+
y: line[5],
|
80
|
+
index: line[6],
|
81
|
+
read_no: line[7],
|
82
|
+
sequence: line[8],
|
83
|
+
quality: line[9],
|
84
|
+
filtered: line[10],
|
85
|
+
)
|
86
|
+
enum.yield record
|
17
87
|
end
|
18
88
|
end
|
19
|
-
yield Dna.new(:name => header, :sequence => sequence)
|
20
89
|
end
|
21
90
|
end
|
22
91
|
|
23
92
|
class Record # nucleotide record
|
93
|
+
def length
|
94
|
+
@sequence.length
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
##
|
99
|
+
# Fasta record
|
100
|
+
#
|
101
|
+
class Fasta < Record
|
24
102
|
attr_accessor :name, :sequence
|
25
103
|
|
26
104
|
def initialize(args={})
|
@@ -31,8 +109,50 @@ class Record # nucleotide record
|
|
31
109
|
def to_s
|
32
110
|
">#{@name}\n#{@sequence}\n"
|
33
111
|
end
|
112
|
+
end
|
113
|
+
|
114
|
+
##
|
115
|
+
# Fastq record
|
116
|
+
#
|
117
|
+
class Fastq < Record
|
118
|
+
attr_accessor :name, :sequence, :format, :quality
|
34
119
|
|
35
|
-
def
|
36
|
-
@
|
120
|
+
def initialize(args={})
|
121
|
+
@name = args[:name]
|
122
|
+
@sequence = args[:sequence]
|
123
|
+
@quality = args[:quality]
|
124
|
+
end
|
125
|
+
|
126
|
+
def to_s
|
127
|
+
"@#{@name}\n#{@sequence}\n+#{@name}\n#{@quality}"
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
##
|
132
|
+
# QSEQ record
|
133
|
+
#
|
134
|
+
class QSEQ < Record
|
135
|
+
attr_accessor :machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered
|
136
|
+
|
137
|
+
def initialize(args={})
|
138
|
+
@machine = args[:machine]
|
139
|
+
@run = args[:run]
|
140
|
+
@lane = args[:lane]
|
141
|
+
@tile = args[:tile]
|
142
|
+
@x = args[:x]
|
143
|
+
@y = args[:y]
|
144
|
+
@index = args[:index]
|
145
|
+
@read_no = args[:read_no]
|
146
|
+
@sequence = args[:sequence]
|
147
|
+
@quality = args[:quality]
|
148
|
+
@filtered = args[:filtered]
|
149
|
+
end
|
150
|
+
|
151
|
+
def to_s
|
152
|
+
[@machine, @run, @lane, @tile, @x, @y, @index, @read_no, @sequence, @quality, @filtered].join("\t")
|
153
|
+
end
|
154
|
+
|
155
|
+
def header
|
156
|
+
[@machine, @run, @lane, @tile, @x, @y, @index, @read_no, @sequence, @quality, @filtered].join(':')
|
37
157
|
end
|
38
158
|
end
|