dna 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/dna.rb CHANGED
@@ -1,10 +1,13 @@
1
+ ##
2
+ # Dna
3
+ #
1
4
  class Dna # iterator
2
5
  include Enumerable
3
-
6
+
4
7
  def initialize(handle, args={})
5
8
  @handle = handle
6
- @format = args[:type].to_sym
7
-
9
+ @format = args[:format].to_sym
10
+
8
11
  @iterator =
9
12
  case @format
10
13
  when :fasta
@@ -17,7 +20,7 @@ class Dna # iterator
17
20
  raise "#{@type} not supported."
18
21
  end
19
22
  end
20
-
23
+
21
24
  def each &block
22
25
  @iterator.each do |r|
23
26
  if block_given?
@@ -27,9 +30,9 @@ class Dna # iterator
27
30
  end
28
31
  end
29
32
  end
30
-
33
+
31
34
  private
32
-
35
+
33
36
  def fasta_parser
34
37
  sequence, header = nil, nil
35
38
  Enumerator.new do |enum|
@@ -45,27 +48,29 @@ class Dna # iterator
45
48
  enum.yield Fasta.new(name: header, sequence: sequence)
46
49
  end
47
50
  end
48
-
51
+
49
52
  def fastq_parser
50
53
  c = (0..3).cycle
51
54
  Enumerator.new do |enum|
55
+ params = { name: nil, sequence: nil, quality: nil }
52
56
  @handle.each do |line|
53
57
  n = c.next
54
58
  case n
55
59
  when 0
56
- header = line.strip
60
+ params[:name] = line.strip
57
61
  when 1
58
- sequence = line.strip
62
+ params[:sequence] = line.strip
59
63
  when 2
60
64
  nil
61
65
  when 3
62
- quality = line.strip
63
- enum.yield Fastq.new(name: header, sequence: sequence, quality: quality)
66
+ params[:quality] = line.strip
67
+ record = Fastq.new params
68
+ enum.yield record
64
69
  end
65
70
  end
66
71
  end
67
72
  end
68
-
73
+
69
74
  def qseq_parser
70
75
  Enumerator.new do |enum|
71
76
  @handle.each do |line|
@@ -100,14 +105,14 @@ end
100
105
  #
101
106
  class Fasta < Record
102
107
  attr_accessor :name, :sequence
103
-
108
+
104
109
  def initialize(args={})
105
110
  @name = args[:name]
106
111
  @sequence = args[:sequence]
107
112
  end
108
-
113
+
109
114
  def to_s
110
- ">#{@name}\n#{@sequence}\n"
115
+ ">#{@name}\n#{@sequence}"
111
116
  end
112
117
  end
113
118
 
@@ -116,13 +121,13 @@ end
116
121
  #
117
122
  class Fastq < Record
118
123
  attr_accessor :name, :sequence, :format, :quality
119
-
124
+
120
125
  def initialize(args={})
121
126
  @name = args[:name]
122
127
  @sequence = args[:sequence]
123
128
  @quality = args[:quality]
124
129
  end
125
-
130
+
126
131
  def to_s
127
132
  "@#{@name}\n#{@sequence}\n+#{@name}\n#{@quality}"
128
133
  end
@@ -130,29 +135,23 @@ end
130
135
 
131
136
  ##
132
137
  # QSEQ record
133
- #
138
+ #2
134
139
  class QSEQ < Record
135
- attr_accessor :machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered
136
-
140
+
137
141
  def initialize(args={})
138
- @machine = args[:machine]
139
- @run = args[:run]
140
- @lane = args[:lane]
141
- @tile = args[:tile]
142
- @x = args[:x]
143
- @y = args[:y]
144
- @index = args[:index]
145
- @read_no = args[:read_no]
146
- @sequence = args[:sequence]
147
- @quality = args[:quality]
148
- @filtered = args[:filtered]
142
+ # These are the properties defined by the qseq spec
143
+ # they must be in the same order that they appear in the tab-separated qseq file
144
+ @properties = [:machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered]
145
+ @properties.each do |p|
146
+ self.class.send(:define_method, p) { args[p] }
147
+ end
149
148
  end
150
-
149
+
151
150
  def to_s
152
- [@machine, @run, @lane, @tile, @x, @y, @index, @read_no, @sequence, @quality, @filtered].join("\t")
151
+ @properties.collect { |x| self.send(x) }.join("\t")
153
152
  end
154
-
153
+
155
154
  def header
156
- [@machine, @run, @lane, @tile, @x, @y, @index, @read_no, @sequence, @quality, @filtered].join(':')
155
+ @properties.collect { |x| self.send(x) }.join("\t")
157
156
  end
158
157
  end
data/readme.md ADDED
@@ -0,0 +1,47 @@
1
+ # DNA
2
+
3
+ A minimalistic sequence file parser.
4
+
5
+ Austin G. Davis-Richardson
6
+
7
+ Supported Formats:
8
+
9
+ - [fasta](http://en.wikipedia.org/wiki/FASTA)
10
+ - [fastq](http://en.wikipedia.org/wiki/Fastq)
11
+ - [qseq](http://blog.kokocinski.net/index.php/qseq-files-format?blog=2)
12
+
13
+ [Request a format](https://github.com/audy/dna/issues/new)
14
+
15
+ ## Installation
16
+
17
+ With Ruby 1.9.2 or better:
18
+
19
+ `gem install dna`
20
+
21
+ ## Usage
22
+
23
+ ```ruby
24
+
25
+ require 'dna'
26
+
27
+ File.open('sequences.fasta') do |handle|
28
+ records = Dna.new handle, format: 'fastq'
29
+
30
+ records.each do |record|
31
+ puts record.length
32
+ end
33
+ end
34
+
35
+ File.open('sequences.fastq') do |handle|
36
+ records = Dna.new handle, format: 'fastq'
37
+
38
+ records.each do |record|
39
+ puts record.quality
40
+ end
41
+ end
42
+
43
+ File.open('sequences.qseq') do |handle|
44
+ records = Dna.new handle, format: 'qseq'
45
+ puts records.first.inspect
46
+ end
47
+ ```
data/spec/dna_spec.rb ADDED
@@ -0,0 +1,37 @@
1
+ require 'spec_helper'
2
+
3
+ describe Dna do
4
+ include_context "parser stuff"
5
+
6
+ let(:fasta) { Dna.new @fasta_file, format: 'fasta' }
7
+ let (:fastq) { Dna.new @fastq_file, format: 'fastq' }
8
+ let (:qseq) { Dna.new @qseq_file, format: 'qseq' }
9
+
10
+ it 'can parse a crappily formatted fasta file' do
11
+ fasta.to_a.length.should == 2
12
+ end
13
+
14
+ it 'generates fasta objects from a fasta file' do
15
+ fasta.first.class.should == Fasta
16
+ end
17
+
18
+ it 'can parse fastq format' do
19
+ fastq.to_a.length.should == 2
20
+ end
21
+
22
+ it 'generates fastq objects from a fasta file' do
23
+ fastq.first.class.should == Fastq
24
+ end
25
+
26
+ it 'generates fastq objects from a fasta file' do
27
+ fastq.first.sequence.should_not be_nil
28
+ end
29
+
30
+ it 'can parse qseq' do
31
+ qseq.to_a.length.should == 2
32
+ end
33
+
34
+ it 'generates qseq objects from a qseq file' do
35
+ qseq.first.class.should == QSEQ
36
+ end
37
+ end
@@ -0,0 +1,94 @@
1
+ require 'spec_helper'
2
+
3
+ describe Fasta do
4
+
5
+ let (:fasta) { Fasta.new name: 'test', sequence: 'GATC'}
6
+
7
+ it 'can be created' do
8
+ fasta.should_not be_nil
9
+ end
10
+
11
+ it 'has a name' do
12
+ fasta.name.should_not be_nil
13
+ end
14
+
15
+ it 'has a sequence' do
16
+ fasta.sequence.should_not be_nil
17
+ end
18
+
19
+ it 'has a length' do
20
+ fasta.length.should == fasta.sequence.length
21
+ end
22
+
23
+ it 'can be printed as fasta' do
24
+ fasta.to_s.should == ">#{fasta.name}\n#{fasta.sequence}"
25
+ end
26
+ end
27
+
28
+ describe Fastq do
29
+ let (:fastq) { Fastq.new name: 'test', sequence: 'GATC', quality: 'BBBB' }
30
+
31
+ it 'can be created' do
32
+ fastq.should_not be_nil
33
+ end
34
+
35
+ it 'has a name' do
36
+ fastq.name.should_not be_nil
37
+ end
38
+
39
+ it 'has a sequence' do
40
+ fastq.sequence.should_not be_nil
41
+ end
42
+
43
+ it 'has a quality score string' do
44
+ fastq.quality.should_not be_nil
45
+ end
46
+
47
+ it 'has a length' do
48
+ fastq.length.should == fastq.sequence.length
49
+ end
50
+
51
+ it 'can be printed as fastq' do
52
+ fastq.to_s.should == "@#{fastq.name}\n#{fastq.sequence}\n+#{fastq.name}\n#{fastq.quality}"
53
+ end
54
+ end
55
+
56
+ describe QSEQ do
57
+ let (:string) { "HWUSI-EAS1690\t0007\t1\t1\t1139\t20892\t0\t1\tGTGTGCCAGCCGCCGCGGTAATACGTAGGTGGCAAGCGTTGTCCGGATTTATTGGGTGTAAAGGGCGCGTAGGCGGCCCTGTAAGTCAGTGGTGAAATCTC\tfffffffffffffffeeeeedddddabdd\dddabeeeee^aabdcabddaKdddc`RcY`_c`aT`Ib]Tc^\cZEKOZ_\]\bZVK^UZG]`[^BBBBB\t1" }
58
+
59
+ let (:properties) {
60
+ line = string.split("\t")
61
+ { machine: line[0],
62
+ run: line[1],
63
+ lane: line[2],
64
+ tile: line[3],
65
+ x: line[4],
66
+ y: line[5],
67
+ index: line[6],
68
+ read_no: line[7],
69
+ sequence: line[8],
70
+ quality: line[9],
71
+ filtered: line[10] }
72
+ }
73
+
74
+ let (:qseq) {
75
+ QSEQ.new(properties)}
76
+
77
+ it 'can be created' do
78
+ qseq.should_not be_nil
79
+ end
80
+
81
+ it 'has a header' do
82
+ qseq.header.should_not be_nil
83
+ end
84
+
85
+ ['machine', 'run', 'lane', 'tile', 'x', 'y', 'index', 'read_no', 'sequence', 'quality', 'filtered'].each do |property|
86
+ it "has property #{property}" do
87
+ qseq.send(property).should == properties[property.to_sym]
88
+ end
89
+ end
90
+
91
+ it 'can be printed back in its original form' do
92
+ qseq.to_s.should == string
93
+ end
94
+ end
@@ -0,0 +1,17 @@
1
+ $LOAD_PATH << File.join(File.dirname(__FILE__), 'lib')
2
+
3
+ require 'rspec'
4
+ require 'dna'
5
+
6
+ path = File.dirname(__FILE__)
7
+ fasta_file = File.readlines(File.join(path, 'data/test.fasta'))
8
+ fastq_file = File.readlines(File.join(path, 'data/test.fastq'))
9
+ qseq_file = File.readlines(File.join(path, 'data/test.qseq'))
10
+
11
+ shared_context "parser stuff" do
12
+ before do
13
+ @fasta_file = fasta_file
14
+ @fastq_file = fastq_file
15
+ @qseq_file = qseq_file
16
+ end
17
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dna
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,15 +9,19 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2010-01-27 00:00:00.000000000 Z
12
+ date: 2010-02-01 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: for parsing fasta files
14
+ description: for parsing various types of DNA sequence files
15
15
  email: harekrishna@gmail.com
16
16
  executables: []
17
17
  extensions: []
18
18
  extra_rdoc_files: []
19
19
  files:
20
20
  - lib/dna.rb
21
+ - spec/dna_spec.rb
22
+ - spec/record_spec.rb
23
+ - spec/spec_helper.rb
24
+ - readme.md
21
25
  homepage: http://github.com/audy/dna
22
26
  licenses: []
23
27
  post_install_message:
@@ -41,5 +45,5 @@ rubyforge_project:
41
45
  rubygems_version: 1.8.15
42
46
  signing_key:
43
47
  specification_version: 3
44
- summary: fasta parser
48
+ summary: DNA sequence parser
45
49
  test_files: []