dna 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/lib/dna.rb CHANGED
@@ -1,10 +1,13 @@
1
+ ##
2
+ # Dna
3
+ #
1
4
  class Dna # iterator
2
5
  include Enumerable
3
-
6
+
4
7
  def initialize(handle, args={})
5
8
  @handle = handle
6
- @format = args[:type].to_sym
7
-
9
+ @format = args[:format].to_sym
10
+
8
11
  @iterator =
9
12
  case @format
10
13
  when :fasta
@@ -17,7 +20,7 @@ class Dna # iterator
17
20
  raise "#{@type} not supported."
18
21
  end
19
22
  end
20
-
23
+
21
24
  def each &block
22
25
  @iterator.each do |r|
23
26
  if block_given?
@@ -27,9 +30,9 @@ class Dna # iterator
27
30
  end
28
31
  end
29
32
  end
30
-
33
+
31
34
  private
32
-
35
+
33
36
  def fasta_parser
34
37
  sequence, header = nil, nil
35
38
  Enumerator.new do |enum|
@@ -45,27 +48,29 @@ class Dna # iterator
45
48
  enum.yield Fasta.new(name: header, sequence: sequence)
46
49
  end
47
50
  end
48
-
51
+
49
52
  def fastq_parser
50
53
  c = (0..3).cycle
51
54
  Enumerator.new do |enum|
55
+ params = { name: nil, sequence: nil, quality: nil }
52
56
  @handle.each do |line|
53
57
  n = c.next
54
58
  case n
55
59
  when 0
56
- header = line.strip
60
+ params[:name] = line.strip
57
61
  when 1
58
- sequence = line.strip
62
+ params[:sequence] = line.strip
59
63
  when 2
60
64
  nil
61
65
  when 3
62
- quality = line.strip
63
- enum.yield Fastq.new(name: header, sequence: sequence, quality: quality)
66
+ params[:quality] = line.strip
67
+ record = Fastq.new params
68
+ enum.yield record
64
69
  end
65
70
  end
66
71
  end
67
72
  end
68
-
73
+
69
74
  def qseq_parser
70
75
  Enumerator.new do |enum|
71
76
  @handle.each do |line|
@@ -100,14 +105,14 @@ end
100
105
  #
101
106
  class Fasta < Record
102
107
  attr_accessor :name, :sequence
103
-
108
+
104
109
  def initialize(args={})
105
110
  @name = args[:name]
106
111
  @sequence = args[:sequence]
107
112
  end
108
-
113
+
109
114
  def to_s
110
- ">#{@name}\n#{@sequence}\n"
115
+ ">#{@name}\n#{@sequence}"
111
116
  end
112
117
  end
113
118
 
@@ -116,13 +121,13 @@ end
116
121
  #
117
122
  class Fastq < Record
118
123
  attr_accessor :name, :sequence, :format, :quality
119
-
124
+
120
125
  def initialize(args={})
121
126
  @name = args[:name]
122
127
  @sequence = args[:sequence]
123
128
  @quality = args[:quality]
124
129
  end
125
-
130
+
126
131
  def to_s
127
132
  "@#{@name}\n#{@sequence}\n+#{@name}\n#{@quality}"
128
133
  end
@@ -130,29 +135,23 @@ end
130
135
 
131
136
  ##
132
137
  # QSEQ record
133
- #
138
+ #2
134
139
  class QSEQ < Record
135
- attr_accessor :machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered
136
-
140
+
137
141
  def initialize(args={})
138
- @machine = args[:machine]
139
- @run = args[:run]
140
- @lane = args[:lane]
141
- @tile = args[:tile]
142
- @x = args[:x]
143
- @y = args[:y]
144
- @index = args[:index]
145
- @read_no = args[:read_no]
146
- @sequence = args[:sequence]
147
- @quality = args[:quality]
148
- @filtered = args[:filtered]
142
+ # These are the properties defined by the qseq spec
143
+ # they must be in the same order that they appear in the tab-separated qseq file
144
+ @properties = [:machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered]
145
+ @properties.each do |p|
146
+ self.class.send(:define_method, p) { args[p] }
147
+ end
149
148
  end
150
-
149
+
151
150
  def to_s
152
- [@machine, @run, @lane, @tile, @x, @y, @index, @read_no, @sequence, @quality, @filtered].join("\t")
151
+ @properties.collect { |x| self.send(x) }.join("\t")
153
152
  end
154
-
153
+
155
154
  def header
156
- [@machine, @run, @lane, @tile, @x, @y, @index, @read_no, @sequence, @quality, @filtered].join(':')
155
+ @properties.collect { |x| self.send(x) }.join("\t")
157
156
  end
158
157
  end
data/readme.md ADDED
@@ -0,0 +1,47 @@
1
+ # DNA
2
+
3
+ A minimalistic sequence file parser.
4
+
5
+ Austin G. Davis-Richardson
6
+
7
+ Supported Formats:
8
+
9
+ - [fasta](http://en.wikipedia.org/wiki/FASTA)
10
+ - [fastq](http://en.wikipedia.org/wiki/Fastq)
11
+ - [qseq](http://blog.kokocinski.net/index.php/qseq-files-format?blog=2)
12
+
13
+ [Request a format](https://github.com/audy/dna/issues/new)
14
+
15
+ ## Installation
16
+
17
+ With Ruby 1.9.2 or better:
18
+
19
+ `gem install dna`
20
+
21
+ ## Usage
22
+
23
+ ```ruby
24
+
25
+ require 'dna'
26
+
27
+ File.open('sequences.fasta') do |handle|
28
+ records = Dna.new handle, format: 'fastq'
29
+
30
+ records.each do |record|
31
+ puts record.length
32
+ end
33
+ end
34
+
35
+ File.open('sequences.fastq') do |handle|
36
+ records = Dna.new handle, format: 'fastq'
37
+
38
+ records.each do |record|
39
+ puts record.quality
40
+ end
41
+ end
42
+
43
+ File.open('sequences.qseq') do |handle|
44
+ records = Dna.new handle, format: 'qseq'
45
+ puts records.first.inspect
46
+ end
47
+ ```
data/spec/dna_spec.rb ADDED
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ describe Dna do
4
+ include_context "parser stuff"
5
+
6
+ let(:fasta) { Dna.new @fasta_file, format: 'fasta' }
7
+ let (:fastq) { Dna.new @fastq_file, format: 'fastq' }
8
+ let (:qseq) { Dna.new @qseq_file, format: 'qseq' }
9
+
10
+ it 'can parse a crappily formatted fasta file' do
11
+ fasta.to_a.length.should == 2
12
+ end
13
+
14
+ it 'generates fasta objects from a fasta file' do
15
+ fasta.first.class.should == Fasta
16
+ end
17
+
18
+ it 'can parse fastq format' do
19
+ fastq.to_a.length.should == 2
20
+ end
21
+
22
+ it 'generates fastq objects from a fasta file' do
23
+ fastq.first.class.should == Fastq
24
+ end
25
+
26
+ it 'generates fastq objects from a fasta file' do
27
+ fastq.first.sequence.should_not be_nil
28
+ end
29
+
30
+ it 'can parse qseq' do
31
+ qseq.to_a.length.should == 2
32
+ end
33
+
34
+ it 'generates qseq objects from a qseq file' do
35
+ qseq.first.class.should == QSEQ
36
+ end
37
+ end
@@ -0,0 +1,94 @@
1
+ require 'spec_helper'
2
+
3
+ describe Fasta do
4
+
5
+ let (:fasta) { Fasta.new name: 'test', sequence: 'GATC'}
6
+
7
+ it 'can be created' do
8
+ fasta.should_not be_nil
9
+ end
10
+
11
+ it 'has a name' do
12
+ fasta.name.should_not be_nil
13
+ end
14
+
15
+ it 'has a sequence' do
16
+ fasta.sequence.should_not be_nil
17
+ end
18
+
19
+ it 'has a length' do
20
+ fasta.length.should == fasta.sequence.length
21
+ end
22
+
23
+ it 'can be printed as fasta' do
24
+ fasta.to_s.should == ">#{fasta.name}\n#{fasta.sequence}"
25
+ end
26
+ end
27
+
28
+ describe Fastq do
29
+ let (:fastq) { Fastq.new name: 'test', sequence: 'GATC', quality: 'BBBB' }
30
+
31
+ it 'can be created' do
32
+ fastq.should_not be_nil
33
+ end
34
+
35
+ it 'has a name' do
36
+ fastq.name.should_not be_nil
37
+ end
38
+
39
+ it 'has a sequence' do
40
+ fastq.sequence.should_not be_nil
41
+ end
42
+
43
+ it 'has a quality score string' do
44
+ fastq.quality.should_not be_nil
45
+ end
46
+
47
+ it 'has a length' do
48
+ fastq.length.should == fastq.sequence.length
49
+ end
50
+
51
+ it 'can be printed as fastq' do
52
+ fastq.to_s.should == "@#{fastq.name}\n#{fastq.sequence}\n+#{fastq.name}\n#{fastq.quality}"
53
+ end
54
+ end
55
+
56
+ describe QSEQ do
57
+ let (:string) { "HWUSI-EAS1690\t0007\t1\t1\t1139\t20892\t0\t1\tGTGTGCCAGCCGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGTGTAAAGGGCGCGTAGGCGGCCCTGTAAGTCAGTGGTGAAATCTC\tfffffffffffffffeeeeedddddabdd\dddabeeeee^aabdcabddaKdddc`RcY`_c`aT`Ib]Tc^\cZEKOZ_\]\bZVK^UZG]`[^BBBBB\t1" }
58
+
59
+ let (:properties) {
60
+ line = string.split("\t")
61
+ { machine: line[0],
62
+ run: line[1],
63
+ lane: line[2],
64
+ tile: line[3],
65
+ x: line[4],
66
+ y: line[5],
67
+ index: line[6],
68
+ read_no: line[7],
69
+ sequence: line[8],
70
+ quality: line[9],
71
+ filtered: line[10] }
72
+ }
73
+
74
+ let (:qseq) {
75
+ QSEQ.new(properties)}
76
+
77
+ it 'can be created' do
78
+ qseq.should_not be_nil
79
+ end
80
+
81
+ it 'has a header' do
82
+ qseq.header.should_not be_nil
83
+ end
84
+
85
+ ['machine', 'run', 'lane', 'tile', 'x', 'y', 'index', 'read_no', 'sequence', 'quality', 'filtered'].each do |property|
86
+ it "has property #{property}" do
87
+ qseq.send(property).should == properties[property.to_sym]
88
+ end
89
+ end
90
+
91
+ it 'can be printed back in its original form' do
92
+ qseq.to_s.should == string
93
+ end
94
+ end
@@ -0,0 +1,17 @@
1
+ $LOAD_PATH << File.join(File.dirname(__FILE__), 'lib')
2
+
3
+ require 'rspec'
4
+ require 'dna'
5
+
6
+ path = File.dirname(__FILE__)
7
+ fasta_file = File.readlines(File.join(path, 'data/test.fasta'))
8
+ fastq_file = File.readlines(File.join(path, 'data/test.fastq'))
9
+ qseq_file = File.readlines(File.join(path, 'data/test.qseq'))
10
+
11
+ shared_context "parser stuff" do
12
+ before do
13
+ @fasta_file = fasta_file
14
+ @fastq_file = fastq_file
15
+ @qseq_file = qseq_file
16
+ end
17
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dna
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,15 +9,19 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2010-01-27 00:00:00.000000000 Z
12
+ date: 2010-02-01 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: for parsing fasta files
14
+ description: for parsing various types of DNA sequence files
15
15
  email: harekrishna@gmail.com
16
16
  executables: []
17
17
  extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - lib/dna.rb
21
+ - spec/dna_spec.rb
22
+ - spec/record_spec.rb
23
+ - spec/spec_helper.rb
24
+ - readme.md
21
25
  homepage: http://github.com/audy/dna
22
26
  licenses: []
23
27
  post_install_message:
@@ -41,5 +45,5 @@ rubyforge_project:
41
45
  rubygems_version: 1.8.15
42
46
  signing_key:
43
47
  specification_version: 3
44
- summary: fasta parser
48
+ summary: DNA sequence parser
45
49
  test_files: []