dna 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/dna.rb +34 -35
- data/readme.md +47 -0
- data/spec/dna_spec.rb +37 -0
- data/spec/record_spec.rb +94 -0
- data/spec/spec_helper.rb +17 -0
- metadata +8 -4
data/lib/dna.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
|
+
##
|
2
|
+
# Dna
|
3
|
+
#
|
1
4
|
class Dna # iterator
|
2
5
|
include Enumerable
|
3
|
-
|
6
|
+
|
4
7
|
def initialize(handle, args={})
|
5
8
|
@handle = handle
|
6
|
-
@format = args[:
|
7
|
-
|
9
|
+
@format = args[:format].to_sym
|
10
|
+
|
8
11
|
@iterator =
|
9
12
|
case @format
|
10
13
|
when :fasta
|
@@ -17,7 +20,7 @@ class Dna # iterator
|
|
17
20
|
raise "#{@type} not supported."
|
18
21
|
end
|
19
22
|
end
|
20
|
-
|
23
|
+
|
21
24
|
def each &block
|
22
25
|
@iterator.each do |r|
|
23
26
|
if block_given?
|
@@ -27,9 +30,9 @@ class Dna # iterator
|
|
27
30
|
end
|
28
31
|
end
|
29
32
|
end
|
30
|
-
|
33
|
+
|
31
34
|
private
|
32
|
-
|
35
|
+
|
33
36
|
def fasta_parser
|
34
37
|
sequence, header = nil, nil
|
35
38
|
Enumerator.new do |enum|
|
@@ -45,27 +48,29 @@ class Dna # iterator
|
|
45
48
|
enum.yield Fasta.new(name: header, sequence: sequence)
|
46
49
|
end
|
47
50
|
end
|
48
|
-
|
51
|
+
|
49
52
|
def fastq_parser
|
50
53
|
c = (0..3).cycle
|
51
54
|
Enumerator.new do |enum|
|
55
|
+
params = { name: nil, sequence: nil, quality: nil }
|
52
56
|
@handle.each do |line|
|
53
57
|
n = c.next
|
54
58
|
case n
|
55
59
|
when 0
|
56
|
-
|
60
|
+
params[:name] = line.strip
|
57
61
|
when 1
|
58
|
-
sequence = line.strip
|
62
|
+
params[:sequence] = line.strip
|
59
63
|
when 2
|
60
64
|
nil
|
61
65
|
when 3
|
62
|
-
quality = line.strip
|
63
|
-
|
66
|
+
params[:quality] = line.strip
|
67
|
+
record = Fastq.new params
|
68
|
+
enum.yield record
|
64
69
|
end
|
65
70
|
end
|
66
71
|
end
|
67
72
|
end
|
68
|
-
|
73
|
+
|
69
74
|
def qseq_parser
|
70
75
|
Enumerator.new do |enum|
|
71
76
|
@handle.each do |line|
|
@@ -100,14 +105,14 @@ end
|
|
100
105
|
#
|
101
106
|
class Fasta < Record
|
102
107
|
attr_accessor :name, :sequence
|
103
|
-
|
108
|
+
|
104
109
|
def initialize(args={})
|
105
110
|
@name = args[:name]
|
106
111
|
@sequence = args[:sequence]
|
107
112
|
end
|
108
|
-
|
113
|
+
|
109
114
|
def to_s
|
110
|
-
">#{@name}\n#{@sequence}
|
115
|
+
">#{@name}\n#{@sequence}"
|
111
116
|
end
|
112
117
|
end
|
113
118
|
|
@@ -116,13 +121,13 @@ end
|
|
116
121
|
#
|
117
122
|
class Fastq < Record
|
118
123
|
attr_accessor :name, :sequence, :format, :quality
|
119
|
-
|
124
|
+
|
120
125
|
def initialize(args={})
|
121
126
|
@name = args[:name]
|
122
127
|
@sequence = args[:sequence]
|
123
128
|
@quality = args[:quality]
|
124
129
|
end
|
125
|
-
|
130
|
+
|
126
131
|
def to_s
|
127
132
|
"@#{@name}\n#{@sequence}\n+#{@name}\n#{@quality}"
|
128
133
|
end
|
@@ -130,29 +135,23 @@ end
|
|
130
135
|
|
131
136
|
##
|
132
137
|
# QSEQ record
|
133
|
-
#
|
138
|
+
#2
|
134
139
|
class QSEQ < Record
|
135
|
-
|
136
|
-
|
140
|
+
|
137
141
|
def initialize(args={})
|
138
|
-
|
139
|
-
|
140
|
-
@
|
141
|
-
@
|
142
|
-
|
143
|
-
|
144
|
-
@index = args[:index]
|
145
|
-
@read_no = args[:read_no]
|
146
|
-
@sequence = args[:sequence]
|
147
|
-
@quality = args[:quality]
|
148
|
-
@filtered = args[:filtered]
|
142
|
+
# These are the properties defined by the qseq spec
|
143
|
+
# they must be in the same order that they appear in the tab-separated qseq file
|
144
|
+
@properties = [:machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered]
|
145
|
+
@properties.each do |p|
|
146
|
+
self.class.send(:define_method, p) { args[p] }
|
147
|
+
end
|
149
148
|
end
|
150
|
-
|
149
|
+
|
151
150
|
def to_s
|
152
|
-
|
151
|
+
@properties.collect { |x| self.send(x) }.join("\t")
|
153
152
|
end
|
154
|
-
|
153
|
+
|
155
154
|
def header
|
156
|
-
|
155
|
+
@properties.collect { |x| self.send(x) }.join("\t")
|
157
156
|
end
|
158
157
|
end
|
data/readme.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# DNA
|
2
|
+
|
3
|
+
A minimalistic sequence file parser.
|
4
|
+
|
5
|
+
Austin G. Davis-Richardson
|
6
|
+
|
7
|
+
Supported Formats:
|
8
|
+
|
9
|
+
- [fasta](http://en.wikipedia.org/wiki/FASTA)
|
10
|
+
- [fastq](http://en.wikipedia.org/wiki/Fastq)
|
11
|
+
- [qseq](http://blog.kokocinski.net/index.php/qseq-files-format?blog=2)
|
12
|
+
|
13
|
+
[Request a format](https://github.com/audy/dna/issues/new)
|
14
|
+
|
15
|
+
## Installation
|
16
|
+
|
17
|
+
With Ruby 1.9.2 or better:
|
18
|
+
|
19
|
+
`gem install dna`
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
|
25
|
+
require 'dna'
|
26
|
+
|
27
|
+
File.open('sequences.fasta') do |handle|
|
28
|
+
records = Dna.new handle, format: 'fastq'
|
29
|
+
|
30
|
+
records.each do |record|
|
31
|
+
puts record.length
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
File.open('sequences.fastq') do |handle|
|
36
|
+
records = Dna.new handle, format: 'fastq'
|
37
|
+
|
38
|
+
records.each do |record|
|
39
|
+
puts record.quality
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
File.open('sequences.qseq') do |handle|
|
44
|
+
records = Dna.new handle, format: 'qseq'
|
45
|
+
puts records.first.inspect
|
46
|
+
end
|
47
|
+
```
|
data/spec/dna_spec.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Dna do
|
4
|
+
include_context "parser stuff"
|
5
|
+
|
6
|
+
let(:fasta) { Dna.new @fasta_file, format: 'fasta' }
|
7
|
+
let (:fastq) { Dna.new @fastq_file, format: 'fastq' }
|
8
|
+
let (:qseq) { Dna.new @qseq_file, format: 'qseq' }
|
9
|
+
|
10
|
+
it 'can parse a crappily formatted fasta file' do
|
11
|
+
fasta.to_a.length.should == 2
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'generates fasta objects from a fasta file' do
|
15
|
+
fasta.first.class.should == Fasta
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'can parse fastq format' do
|
19
|
+
fastq.to_a.length.should == 2
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'generates fastq objects from a fasta file' do
|
23
|
+
fastq.first.class.should == Fastq
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'generates fastq objects from a fasta file' do
|
27
|
+
fastq.first.sequence.should_not be_nil
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'can parse qseq' do
|
31
|
+
qseq.to_a.length.should == 2
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'generates qseq objects from a qseq file' do
|
35
|
+
qseq.first.class.should == QSEQ
|
36
|
+
end
|
37
|
+
end
|
data/spec/record_spec.rb
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Fasta do
|
4
|
+
|
5
|
+
let (:fasta) { Fasta.new name: 'test', sequence: 'GATC'}
|
6
|
+
|
7
|
+
it 'can be created' do
|
8
|
+
fasta.should_not be_nil
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'has a name' do
|
12
|
+
fasta.name.should_not be_nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'has a sequence' do
|
16
|
+
fasta.sequence.should_not be_nil
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'has a length' do
|
20
|
+
fasta.length.should == fasta.sequence.length
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'can be printed as fasta' do
|
24
|
+
fasta.to_s.should == ">#{fasta.name}\n#{fasta.sequence}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe Fastq do
|
29
|
+
let (:fastq) { Fastq.new name: 'test', sequence: 'GATC', quality: 'BBBB' }
|
30
|
+
|
31
|
+
it 'can be created' do
|
32
|
+
fastq.should_not be_nil
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'has a name' do
|
36
|
+
fastq.name.should_not be_nil
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'has a sequence' do
|
40
|
+
fastq.sequence.should_not be_nil
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'has a quality score string' do
|
44
|
+
fastq.quality.should_not be_nil
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'has a length' do
|
48
|
+
fastq.length.should == fastq.sequence.length
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'can be printed as fastq' do
|
52
|
+
fastq.to_s.should == "@#{fastq.name}\n#{fastq.sequence}\n+#{fastq.name}\n#{fastq.quality}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe QSEQ do
|
57
|
+
let (:string) { "HWUSI-EAS1690\t0007\t1\t1\t1139\t20892\t0\t1\tGTGTGCCAGCCGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGTGTAAAGGGCGCGTAGGCGGCCCTGTAAGTCAGTGGTGAAATCTC\tfffffffffffffffeeeeedddddabdd\dddabeeeee^aabdcabddaKdddc`RcY`_c`aT`Ib]Tc^\cZEKOZ_\]\bZVK^UZG]`[^BBBBB\t1" }
|
58
|
+
|
59
|
+
let (:properties) {
|
60
|
+
line = string.split("\t")
|
61
|
+
{ machine: line[0],
|
62
|
+
run: line[1],
|
63
|
+
lane: line[2],
|
64
|
+
tile: line[3],
|
65
|
+
x: line[4],
|
66
|
+
y: line[5],
|
67
|
+
index: line[6],
|
68
|
+
read_no: line[7],
|
69
|
+
sequence: line[8],
|
70
|
+
quality: line[9],
|
71
|
+
filtered: line[10] }
|
72
|
+
}
|
73
|
+
|
74
|
+
let (:qseq) {
|
75
|
+
QSEQ.new(properties)}
|
76
|
+
|
77
|
+
it 'can be created' do
|
78
|
+
qseq.should_not be_nil
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'has a header' do
|
82
|
+
qseq.header.should_not be_nil
|
83
|
+
end
|
84
|
+
|
85
|
+
['machine', 'run', 'lane', 'tile', 'x', 'y', 'index', 'read_no', 'sequence', 'quality', 'filtered'].each do |property|
|
86
|
+
it "has property #{property}" do
|
87
|
+
qseq.send(property).should == properties[property.to_sym]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'can be printed back in its original form' do
|
92
|
+
qseq.to_s.should == string
|
93
|
+
end
|
94
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
$LOAD_PATH << File.join(File.dirname(__FILE__), 'lib')
|
2
|
+
|
3
|
+
require 'rspec'
|
4
|
+
require 'dna'
|
5
|
+
|
6
|
+
path = File.dirname(__FILE__)
|
7
|
+
fasta_file = File.readlines(File.join(path, 'data/test.fasta'))
|
8
|
+
fastq_file = File.readlines(File.join(path, 'data/test.fastq'))
|
9
|
+
qseq_file = File.readlines(File.join(path, 'data/test.qseq'))
|
10
|
+
|
11
|
+
shared_context "parser stuff" do
|
12
|
+
before do
|
13
|
+
@fasta_file = fasta_file
|
14
|
+
@fastq_file = fastq_file
|
15
|
+
@qseq_file = qseq_file
|
16
|
+
end
|
17
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dna
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,15 +9,19 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2010-01
|
12
|
+
date: 2010-02-01 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
|
-
description: for parsing
|
14
|
+
description: for parsing various types of DNA sequence files
|
15
15
|
email: harekrishna@gmail.com
|
16
16
|
executables: []
|
17
17
|
extensions: []
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- lib/dna.rb
|
21
|
+
- spec/dna_spec.rb
|
22
|
+
- spec/record_spec.rb
|
23
|
+
- spec/spec_helper.rb
|
24
|
+
- readme.md
|
21
25
|
homepage: http://github.com/audy/dna
|
22
26
|
licenses: []
|
23
27
|
post_install_message:
|
@@ -41,5 +45,5 @@ rubyforge_project:
|
|
41
45
|
rubygems_version: 1.8.15
|
42
46
|
signing_key:
|
43
47
|
specification_version: 3
|
44
|
-
summary:
|
48
|
+
summary: DNA sequence parser
|
45
49
|
test_files: []
|