dna 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/dna.rb +34 -35
- data/readme.md +47 -0
- data/spec/dna_spec.rb +37 -0
- data/spec/record_spec.rb +94 -0
- data/spec/spec_helper.rb +17 -0
- metadata +8 -4
data/lib/dna.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
|
+
##
|
2
|
+
# Dna
|
3
|
+
#
|
1
4
|
class Dna # iterator
|
2
5
|
include Enumerable
|
3
|
-
|
6
|
+
|
4
7
|
def initialize(handle, args={})
|
5
8
|
@handle = handle
|
6
|
-
@format = args[:
|
7
|
-
|
9
|
+
@format = args[:format].to_sym
|
10
|
+
|
8
11
|
@iterator =
|
9
12
|
case @format
|
10
13
|
when :fasta
|
@@ -17,7 +20,7 @@ class Dna # iterator
|
|
17
20
|
raise "#{@type} not supported."
|
18
21
|
end
|
19
22
|
end
|
20
|
-
|
23
|
+
|
21
24
|
def each &block
|
22
25
|
@iterator.each do |r|
|
23
26
|
if block_given?
|
@@ -27,9 +30,9 @@ class Dna # iterator
|
|
27
30
|
end
|
28
31
|
end
|
29
32
|
end
|
30
|
-
|
33
|
+
|
31
34
|
private
|
32
|
-
|
35
|
+
|
33
36
|
def fasta_parser
|
34
37
|
sequence, header = nil, nil
|
35
38
|
Enumerator.new do |enum|
|
@@ -45,27 +48,29 @@ class Dna # iterator
|
|
45
48
|
enum.yield Fasta.new(name: header, sequence: sequence)
|
46
49
|
end
|
47
50
|
end
|
48
|
-
|
51
|
+
|
49
52
|
def fastq_parser
|
50
53
|
c = (0..3).cycle
|
51
54
|
Enumerator.new do |enum|
|
55
|
+
params = { name: nil, sequence: nil, quality: nil }
|
52
56
|
@handle.each do |line|
|
53
57
|
n = c.next
|
54
58
|
case n
|
55
59
|
when 0
|
56
|
-
|
60
|
+
params[:name] = line.strip
|
57
61
|
when 1
|
58
|
-
sequence = line.strip
|
62
|
+
params[:sequence] = line.strip
|
59
63
|
when 2
|
60
64
|
nil
|
61
65
|
when 3
|
62
|
-
quality = line.strip
|
63
|
-
|
66
|
+
params[:quality] = line.strip
|
67
|
+
record = Fastq.new params
|
68
|
+
enum.yield record
|
64
69
|
end
|
65
70
|
end
|
66
71
|
end
|
67
72
|
end
|
68
|
-
|
73
|
+
|
69
74
|
def qseq_parser
|
70
75
|
Enumerator.new do |enum|
|
71
76
|
@handle.each do |line|
|
@@ -100,14 +105,14 @@ end
|
|
100
105
|
#
|
101
106
|
class Fasta < Record
|
102
107
|
attr_accessor :name, :sequence
|
103
|
-
|
108
|
+
|
104
109
|
def initialize(args={})
|
105
110
|
@name = args[:name]
|
106
111
|
@sequence = args[:sequence]
|
107
112
|
end
|
108
|
-
|
113
|
+
|
109
114
|
def to_s
|
110
|
-
">#{@name}\n#{@sequence}
|
115
|
+
">#{@name}\n#{@sequence}"
|
111
116
|
end
|
112
117
|
end
|
113
118
|
|
@@ -116,13 +121,13 @@ end
|
|
116
121
|
#
|
117
122
|
class Fastq < Record
|
118
123
|
attr_accessor :name, :sequence, :format, :quality
|
119
|
-
|
124
|
+
|
120
125
|
def initialize(args={})
|
121
126
|
@name = args[:name]
|
122
127
|
@sequence = args[:sequence]
|
123
128
|
@quality = args[:quality]
|
124
129
|
end
|
125
|
-
|
130
|
+
|
126
131
|
def to_s
|
127
132
|
"@#{@name}\n#{@sequence}\n+#{@name}\n#{@quality}"
|
128
133
|
end
|
@@ -130,29 +135,23 @@ end
|
|
130
135
|
|
131
136
|
##
|
132
137
|
# QSEQ record
|
133
|
-
#
|
138
|
+
#2
|
134
139
|
class QSEQ < Record
|
135
|
-
|
136
|
-
|
140
|
+
|
137
141
|
def initialize(args={})
|
138
|
-
|
139
|
-
|
140
|
-
@
|
141
|
-
@
|
142
|
-
|
143
|
-
|
144
|
-
@index = args[:index]
|
145
|
-
@read_no = args[:read_no]
|
146
|
-
@sequence = args[:sequence]
|
147
|
-
@quality = args[:quality]
|
148
|
-
@filtered = args[:filtered]
|
142
|
+
# These are the properties defined by the qseq spec
|
143
|
+
# they must be in the same order that they appear in the tab-separated qseq file
|
144
|
+
@properties = [:machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered]
|
145
|
+
@properties.each do |p|
|
146
|
+
self.class.send(:define_method, p) { args[p] }
|
147
|
+
end
|
149
148
|
end
|
150
|
-
|
149
|
+
|
151
150
|
def to_s
|
152
|
-
|
151
|
+
@properties.collect { |x| self.send(x) }.join("\t")
|
153
152
|
end
|
154
|
-
|
153
|
+
|
155
154
|
def header
|
156
|
-
|
155
|
+
@properties.collect { |x| self.send(x) }.join("\t")
|
157
156
|
end
|
158
157
|
end
|
data/readme.md
ADDED
@@ -0,0 +1,47 @@
|
|
1
|
+
# DNA
|
2
|
+
|
3
|
+
A minimalistic sequence file parser.
|
4
|
+
|
5
|
+
Austin G. Davis-Richardson
|
6
|
+
|
7
|
+
Supported Formats:
|
8
|
+
|
9
|
+
- [fasta](http://en.wikipedia.org/wiki/FASTA)
|
10
|
+
- [fastq](http://en.wikipedia.org/wiki/Fastq)
|
11
|
+
- [qseq](http://blog.kokocinski.net/index.php/qseq-files-format?blog=2)
|
12
|
+
|
13
|
+
[Request a format](https://github.com/audy/dna/issues/new)
|
14
|
+
|
15
|
+
## Installation
|
16
|
+
|
17
|
+
With Ruby 1.9.2 or better:
|
18
|
+
|
19
|
+
`gem install dna`
|
20
|
+
|
21
|
+
## Usage
|
22
|
+
|
23
|
+
```ruby
|
24
|
+
|
25
|
+
require 'dna'
|
26
|
+
|
27
|
+
File.open('sequences.fasta') do |handle|
|
28
|
+
records = Dna.new handle, format: 'fastq'
|
29
|
+
|
30
|
+
records.each do |record|
|
31
|
+
puts record.length
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
File.open('sequences.fastq') do |handle|
|
36
|
+
records = Dna.new handle, format: 'fastq'
|
37
|
+
|
38
|
+
records.each do |record|
|
39
|
+
puts record.quality
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
File.open('sequences.qseq') do |handle|
|
44
|
+
records = Dna.new handle, format: 'qseq'
|
45
|
+
puts records.first.inspect
|
46
|
+
end
|
47
|
+
```
|
data/spec/dna_spec.rb
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Dna do
|
4
|
+
include_context "parser stuff"
|
5
|
+
|
6
|
+
let(:fasta) { Dna.new @fasta_file, format: 'fasta' }
|
7
|
+
let (:fastq) { Dna.new @fastq_file, format: 'fastq' }
|
8
|
+
let (:qseq) { Dna.new @qseq_file, format: 'qseq' }
|
9
|
+
|
10
|
+
it 'can parse a crappily formatted fasta file' do
|
11
|
+
fasta.to_a.length.should == 2
|
12
|
+
end
|
13
|
+
|
14
|
+
it 'generates fasta objects from a fasta file' do
|
15
|
+
fasta.first.class.should == Fasta
|
16
|
+
end
|
17
|
+
|
18
|
+
it 'can parse fastq format' do
|
19
|
+
fastq.to_a.length.should == 2
|
20
|
+
end
|
21
|
+
|
22
|
+
it 'generates fastq objects from a fasta file' do
|
23
|
+
fastq.first.class.should == Fastq
|
24
|
+
end
|
25
|
+
|
26
|
+
it 'generates fastq objects from a fasta file' do
|
27
|
+
fastq.first.sequence.should_not be_nil
|
28
|
+
end
|
29
|
+
|
30
|
+
it 'can parse qseq' do
|
31
|
+
qseq.to_a.length.should == 2
|
32
|
+
end
|
33
|
+
|
34
|
+
it 'generates qseq objects from a qseq file' do
|
35
|
+
qseq.first.class.should == QSEQ
|
36
|
+
end
|
37
|
+
end
|
data/spec/record_spec.rb
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Fasta do
|
4
|
+
|
5
|
+
let (:fasta) { Fasta.new name: 'test', sequence: 'GATC'}
|
6
|
+
|
7
|
+
it 'can be created' do
|
8
|
+
fasta.should_not be_nil
|
9
|
+
end
|
10
|
+
|
11
|
+
it 'has a name' do
|
12
|
+
fasta.name.should_not be_nil
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'has a sequence' do
|
16
|
+
fasta.sequence.should_not be_nil
|
17
|
+
end
|
18
|
+
|
19
|
+
it 'has a length' do
|
20
|
+
fasta.length.should == fasta.sequence.length
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'can be printed as fasta' do
|
24
|
+
fasta.to_s.should == ">#{fasta.name}\n#{fasta.sequence}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe Fastq do
|
29
|
+
let (:fastq) { Fastq.new name: 'test', sequence: 'GATC', quality: 'BBBB' }
|
30
|
+
|
31
|
+
it 'can be created' do
|
32
|
+
fastq.should_not be_nil
|
33
|
+
end
|
34
|
+
|
35
|
+
it 'has a name' do
|
36
|
+
fastq.name.should_not be_nil
|
37
|
+
end
|
38
|
+
|
39
|
+
it 'has a sequence' do
|
40
|
+
fastq.sequence.should_not be_nil
|
41
|
+
end
|
42
|
+
|
43
|
+
it 'has a quality score string' do
|
44
|
+
fastq.quality.should_not be_nil
|
45
|
+
end
|
46
|
+
|
47
|
+
it 'has a length' do
|
48
|
+
fastq.length.should == fastq.sequence.length
|
49
|
+
end
|
50
|
+
|
51
|
+
it 'can be printed as fastq' do
|
52
|
+
fastq.to_s.should == "@#{fastq.name}\n#{fastq.sequence}\n+#{fastq.name}\n#{fastq.quality}"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
describe QSEQ do
|
57
|
+
let (:string) { "HWUSI-EAS1690\t0007\t1\t1\t1139\t20892\t0\t1\tGTGTGCCAGCCGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGTGTAAAGGGCGCGTAGGCGGCCCTGTAAGTCAGTGGTGAAATCTC\tfffffffffffffffeeeeedddddabdd\dddabeeeee^aabdcabddaKdddc`RcY`_c`aT`Ib]Tc^\cZEKOZ_\]\bZVK^UZG]`[^BBBBB\t1" }
|
58
|
+
|
59
|
+
let (:properties) {
|
60
|
+
line = string.split("\t")
|
61
|
+
{ machine: line[0],
|
62
|
+
run: line[1],
|
63
|
+
lane: line[2],
|
64
|
+
tile: line[3],
|
65
|
+
x: line[4],
|
66
|
+
y: line[5],
|
67
|
+
index: line[6],
|
68
|
+
read_no: line[7],
|
69
|
+
sequence: line[8],
|
70
|
+
quality: line[9],
|
71
|
+
filtered: line[10] }
|
72
|
+
}
|
73
|
+
|
74
|
+
let (:qseq) {
|
75
|
+
QSEQ.new(properties)}
|
76
|
+
|
77
|
+
it 'can be created' do
|
78
|
+
qseq.should_not be_nil
|
79
|
+
end
|
80
|
+
|
81
|
+
it 'has a header' do
|
82
|
+
qseq.header.should_not be_nil
|
83
|
+
end
|
84
|
+
|
85
|
+
['machine', 'run', 'lane', 'tile', 'x', 'y', 'index', 'read_no', 'sequence', 'quality', 'filtered'].each do |property|
|
86
|
+
it "has property #{property}" do
|
87
|
+
qseq.send(property).should == properties[property.to_sym]
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
it 'can be printed back in its original form' do
|
92
|
+
qseq.to_s.should == string
|
93
|
+
end
|
94
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
$LOAD_PATH << File.join(File.dirname(__FILE__), 'lib')
|
2
|
+
|
3
|
+
require 'rspec'
|
4
|
+
require 'dna'
|
5
|
+
|
6
|
+
path = File.dirname(__FILE__)
|
7
|
+
fasta_file = File.readlines(File.join(path, 'data/test.fasta'))
|
8
|
+
fastq_file = File.readlines(File.join(path, 'data/test.fastq'))
|
9
|
+
qseq_file = File.readlines(File.join(path, 'data/test.qseq'))
|
10
|
+
|
11
|
+
shared_context "parser stuff" do
|
12
|
+
before do
|
13
|
+
@fasta_file = fasta_file
|
14
|
+
@fastq_file = fastq_file
|
15
|
+
@qseq_file = qseq_file
|
16
|
+
end
|
17
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dna
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,15 +9,19 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2010-01
|
12
|
+
date: 2010-02-01 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
|
-
description: for parsing
|
14
|
+
description: for parsing various types of DNA sequence files
|
15
15
|
email: harekrishna@gmail.com
|
16
16
|
executables: []
|
17
17
|
extensions: []
|
18
18
|
extra_rdoc_files: []
|
19
19
|
files:
|
20
20
|
- lib/dna.rb
|
21
|
+
- spec/dna_spec.rb
|
22
|
+
- spec/record_spec.rb
|
23
|
+
- spec/spec_helper.rb
|
24
|
+
- readme.md
|
21
25
|
homepage: http://github.com/audy/dna
|
22
26
|
licenses: []
|
23
27
|
post_install_message:
|
@@ -41,5 +45,5 @@ rubyforge_project:
|
|
41
45
|
rubygems_version: 1.8.15
|
42
46
|
signing_key:
|
43
47
|
specification_version: 3
|
44
|
-
summary:
|
48
|
+
summary: DNA sequence parser
|
45
49
|
test_files: []
|