dna 0.0.11 → 0.0.12
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -0
- data/Rakefile +48 -0
- data/dna.gemspec +55 -14
- data/lib/dna/dna.rb +58 -0
- data/lib/{parsers → dna/parsers}/fasta.rb +18 -2
- data/lib/{parsers → dna/parsers}/fastq.rb +19 -2
- data/lib/dna/parsers/qseq.rb +59 -0
- data/lib/dna/record.rb +7 -0
- data/lib/dna.rb +1 -130
- metadata +47 -24
- data/lib/parsers/qseq.rb +0 -26
data/Gemfile
CHANGED
data/Rakefile
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
|
6
|
+
DNA_VERSION = '0.0.12'
|
7
|
+
|
8
|
+
begin
|
9
|
+
Bundler.setup(:default, :development)
|
10
|
+
rescue Bundler::BundlerError => e
|
11
|
+
$stderr.puts e.message
|
12
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
13
|
+
exit e.status_code
|
14
|
+
end
|
15
|
+
require 'rake'
|
16
|
+
|
17
|
+
require 'jeweler'
|
18
|
+
Jeweler::Tasks.new do |gem|
|
19
|
+
gem.name = "dna"
|
20
|
+
gem.homepage = "http://audy.github.com/dna"
|
21
|
+
gem.license = "MIT"
|
22
|
+
gem.summary = "Simple FASTA/FASTQ/QSEQ parser library for Ruby"
|
23
|
+
gem.description = "Simple FASTA/FASTQ/QSEQ parser library for Ruby."
|
24
|
+
gem.email = "harekrishna@gmail.com"
|
25
|
+
gem.authors = ["Austin G. Davis-Richardson"]
|
26
|
+
gem.version = DNA_VERSION
|
27
|
+
end
|
28
|
+
Jeweler::RubygemsDotOrgTasks.new
|
29
|
+
|
30
|
+
require 'rake/testtask'
|
31
|
+
Rake::TestTask.new(:test) do |test|
|
32
|
+
test.libs << 'lib' << 'test'
|
33
|
+
test.pattern = 'test/**/test_*.rb'
|
34
|
+
test.verbose = true
|
35
|
+
end
|
36
|
+
|
37
|
+
task :default => :test
|
38
|
+
|
39
|
+
require 'rdoc/task'
|
40
|
+
Rake::RDocTask.new do |rdoc|
|
41
|
+
|
42
|
+
version = DNA_VERSION
|
43
|
+
|
44
|
+
rdoc.rdoc_dir = 'rdoc'
|
45
|
+
rdoc.title = "test #{version}"
|
46
|
+
rdoc.rdoc_files.include('readme.md')
|
47
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
48
|
+
end
|
data/dna.gemspec
CHANGED
@@ -1,19 +1,60 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
s.summary = 'DNA sequence parser'
|
7
|
-
s.description = 'for parsing various types of DNA sequence files'
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
8
5
|
|
9
|
-
|
10
|
-
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "dna"
|
8
|
+
s.version = "0.0.12"
|
11
9
|
|
12
|
-
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
+
s.date = "2012-08-09"
|
13
|
+
s.description = "Simple FASTA/FASTQ/QSEQ parser library for Ruby."
|
14
|
+
s.email = "harekrishna@gmail.com"
|
15
|
+
s.executables = ["dna"]
|
16
|
+
s.files = [
|
17
|
+
".rspec",
|
18
|
+
".rvmrc",
|
19
|
+
"Gemfile",
|
20
|
+
"Rakefile",
|
21
|
+
"bin/dna",
|
22
|
+
"dna.gemspec",
|
23
|
+
"lib/dna.rb",
|
24
|
+
"lib/dna/dna.rb",
|
25
|
+
"lib/dna/parsers/fasta.rb",
|
26
|
+
"lib/dna/parsers/fastq.rb",
|
27
|
+
"lib/dna/parsers/qseq.rb",
|
28
|
+
"lib/dna/record.rb",
|
29
|
+
"readme.md",
|
30
|
+
"spec/data/empty.txt",
|
31
|
+
"spec/data/test.fasta",
|
32
|
+
"spec/data/test.fasta.gz",
|
33
|
+
"spec/data/test.fastq",
|
34
|
+
"spec/data/test.qseq",
|
35
|
+
"spec/dna_spec.rb",
|
36
|
+
"spec/record_spec.rb",
|
37
|
+
"spec/spec_helper.rb"
|
38
|
+
]
|
39
|
+
s.homepage = "http://audy.github.com/dna"
|
40
|
+
s.licenses = ["MIT"]
|
41
|
+
s.require_paths = ["lib"]
|
42
|
+
s.rubygems_version = "1.8.24"
|
43
|
+
s.summary = "Simple FASTA/FASTQ/QSEQ parser library for Ruby"
|
13
44
|
|
14
|
-
|
15
|
-
|
16
|
-
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
45
|
+
if s.respond_to? :specification_version then
|
46
|
+
s.specification_version = 3
|
17
47
|
|
18
|
-
|
48
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
+
s.add_runtime_dependency(%q<rspec>, [">= 0"])
|
50
|
+
s.add_runtime_dependency(%q<jeweler>, [">= 0"])
|
51
|
+
else
|
52
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
53
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
54
|
+
end
|
55
|
+
else
|
56
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
57
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
58
|
+
end
|
19
59
|
end
|
60
|
+
|
data/lib/dna/dna.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
|
2
|
+
##
|
3
|
+
# Dna
|
4
|
+
#
|
5
|
+
|
6
|
+
class Dna
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
attr_reader :format
|
10
|
+
|
11
|
+
def initialize(handle)
|
12
|
+
@handle = handle
|
13
|
+
@format = detect_format
|
14
|
+
@iterator =
|
15
|
+
case @format
|
16
|
+
when :fasta
|
17
|
+
FastaParser.new @handle
|
18
|
+
when :fastq
|
19
|
+
FastqParser.new @handle
|
20
|
+
when :qseq
|
21
|
+
QSEQParser.new @handle
|
22
|
+
else
|
23
|
+
raise "#{@format} not supported."
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def detect_format
|
28
|
+
|
29
|
+
# is gzipped?
|
30
|
+
unless @handle.class == Array # for tests mostly...
|
31
|
+
begin
|
32
|
+
@handle = Zlib::GzipReader.new(@handle)
|
33
|
+
rescue
|
34
|
+
@handle.rewind
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
first_line = @handle.first
|
39
|
+
@handle.rewind if @handle.class == File
|
40
|
+
|
41
|
+
return :unknown if first_line == nil
|
42
|
+
|
43
|
+
# detect qseq by counting number of tabs.
|
44
|
+
if first_line.split("\t").length == 11
|
45
|
+
return :qseq
|
46
|
+
elsif first_line[0].chr == '>'
|
47
|
+
return :fasta
|
48
|
+
elsif first_line[0].chr == '@'
|
49
|
+
return :fastq
|
50
|
+
else
|
51
|
+
raise Exception, "cannot detect format of input"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def each &block
|
56
|
+
@iterator.each(&block)
|
57
|
+
end
|
58
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
class FastaParser
|
2
|
-
|
2
|
+
|
3
3
|
def initialize(handle)
|
4
4
|
@handle = handle
|
5
5
|
end
|
@@ -17,4 +17,20 @@ class FastaParser
|
|
17
17
|
end
|
18
18
|
yield Fasta.new(:name => header, :sequence => sequence)
|
19
19
|
end
|
20
|
-
end
|
20
|
+
end
|
21
|
+
|
22
|
+
##
|
23
|
+
# Fasta record
|
24
|
+
#
|
25
|
+
class Fasta < Record
|
26
|
+
attr_accessor :name, :sequence
|
27
|
+
|
28
|
+
def initialize(args={})
|
29
|
+
@name = args[:name]
|
30
|
+
@sequence = args[:sequence]
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_s
|
34
|
+
">#{@name}\n#{@sequence}"
|
35
|
+
end
|
36
|
+
end
|
@@ -3,7 +3,7 @@ class FastqParser
|
|
3
3
|
def initialize(handle)
|
4
4
|
@handle = handle
|
5
5
|
end
|
6
|
-
|
6
|
+
|
7
7
|
def each
|
8
8
|
c = (0..3).cycle
|
9
9
|
params = { :name => nil, :sequence => nil, :quality => nil }
|
@@ -23,4 +23,21 @@ class FastqParser
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
end
|
26
|
-
end
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# Fastq record
|
30
|
+
#
|
31
|
+
class Fastq < Record
|
32
|
+
attr_accessor :name, :sequence, :format, :quality
|
33
|
+
|
34
|
+
def initialize(args={})
|
35
|
+
@name = args[:name]
|
36
|
+
@sequence = args[:sequence]
|
37
|
+
@quality = args[:quality]
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_s
|
41
|
+
"@#{@name}\n#{@sequence}\n+#{@name}\n#{@quality}"
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
class QSEQParser
|
2
|
+
|
3
|
+
def initialize(handle)
|
4
|
+
@handle = handle
|
5
|
+
end
|
6
|
+
|
7
|
+
def each
|
8
|
+
@handle.each do |line|
|
9
|
+
line = line.strip.split("\t")
|
10
|
+
record = QSEQ.new(
|
11
|
+
:machine => line[0],
|
12
|
+
:run => line[1],
|
13
|
+
:lane => line[2],
|
14
|
+
:tile => line[3],
|
15
|
+
:x => line[4],
|
16
|
+
:y => line[5],
|
17
|
+
:index => line[6],
|
18
|
+
:read_no => line[7],
|
19
|
+
:sequence => line[8],
|
20
|
+
:quality => line[9],
|
21
|
+
:filtered => line[10]
|
22
|
+
)
|
23
|
+
yield record
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# QSEQ record
|
30
|
+
#
|
31
|
+
class QSEQ < Record
|
32
|
+
|
33
|
+
attr_accessor :machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered
|
34
|
+
|
35
|
+
def initialize(args={})
|
36
|
+
# These are the properties defined by the qseq spec
|
37
|
+
# they must be in the same order that they appear in the tab-separated qseq file
|
38
|
+
@properties = :machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered
|
39
|
+
@machine = args[:machine]
|
40
|
+
@run = args[:run]
|
41
|
+
@lane = args[:lane]
|
42
|
+
@tile = args[:tile]
|
43
|
+
@x = args[:x]
|
44
|
+
@y = args[:y]
|
45
|
+
@index = args[:index]
|
46
|
+
@read_no = args[:read_no]
|
47
|
+
@sequence = args[:sequence]
|
48
|
+
@quality = args[:quality]
|
49
|
+
@filtered = args[:filtered]
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_s
|
53
|
+
@properties.collect { |x| self.send(x) }.join("\t")
|
54
|
+
end
|
55
|
+
|
56
|
+
def header
|
57
|
+
@properties.collect { |x| self.send(x) }.join("\t")
|
58
|
+
end
|
59
|
+
end
|
data/lib/dna/record.rb
ADDED
data/lib/dna.rb
CHANGED
@@ -1,132 +1,3 @@
|
|
1
1
|
require 'zlib'
|
2
2
|
|
3
|
-
Dir.glob(File.join(File.dirname(__FILE__), '
|
4
|
-
|
5
|
-
##
|
6
|
-
# Dna
|
7
|
-
#
|
8
|
-
class Dna
|
9
|
-
include Enumerable
|
10
|
-
|
11
|
-
attr_reader :format
|
12
|
-
|
13
|
-
def initialize(handle)
|
14
|
-
@handle = handle
|
15
|
-
@format = detect_format
|
16
|
-
@iterator =
|
17
|
-
case @format
|
18
|
-
when :fasta
|
19
|
-
FastaParser.new @handle
|
20
|
-
when :fastq
|
21
|
-
FastqParser.new @handle
|
22
|
-
when :qseq
|
23
|
-
QSEQParser.new @handle
|
24
|
-
else
|
25
|
-
raise "#{@format} not supported."
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
def detect_format
|
30
|
-
|
31
|
-
# is gzipped?
|
32
|
-
unless @handle.class == Array # for tests mostly...
|
33
|
-
begin
|
34
|
-
@handle = Zlib::GzipReader.new(@handle)
|
35
|
-
rescue
|
36
|
-
@handle.rewind
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
first_line = @handle.first
|
41
|
-
@handle.rewind if @handle.class == File
|
42
|
-
|
43
|
-
return :unknown if first_line == nil
|
44
|
-
|
45
|
-
# detect qseq by counting number of tabs.
|
46
|
-
if first_line.split("\t").length == 11
|
47
|
-
return :qseq
|
48
|
-
elsif first_line[0].chr == '>'
|
49
|
-
return :fasta
|
50
|
-
elsif first_line[0].chr == '@'
|
51
|
-
return :fastq
|
52
|
-
else
|
53
|
-
raise Exception, "cannot detect format of input"
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def each &block
|
58
|
-
@iterator.each(&block)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
class Record # nucleotide record
|
63
|
-
def length
|
64
|
-
@sequence.length
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
##
|
69
|
-
# Fasta record
|
70
|
-
#
|
71
|
-
class Fasta < Record
|
72
|
-
attr_accessor :name, :sequence
|
73
|
-
|
74
|
-
def initialize(args={})
|
75
|
-
@name = args[:name]
|
76
|
-
@sequence = args[:sequence]
|
77
|
-
end
|
78
|
-
|
79
|
-
def to_s
|
80
|
-
">#{@name}\n#{@sequence}"
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
##
|
85
|
-
# Fastq record
|
86
|
-
#
|
87
|
-
class Fastq < Record
|
88
|
-
attr_accessor :name, :sequence, :format, :quality
|
89
|
-
|
90
|
-
def initialize(args={})
|
91
|
-
@name = args[:name]
|
92
|
-
@sequence = args[:sequence]
|
93
|
-
@quality = args[:quality]
|
94
|
-
end
|
95
|
-
|
96
|
-
def to_s
|
97
|
-
"@#{@name}\n#{@sequence}\n+#{@name}\n#{@quality}"
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
##
|
102
|
-
# QSEQ record
|
103
|
-
#2
|
104
|
-
class QSEQ < Record
|
105
|
-
|
106
|
-
attr_accessor :machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered
|
107
|
-
|
108
|
-
def initialize(args={})
|
109
|
-
# These are the properties defined by the qseq spec
|
110
|
-
# they must be in the same order that they appear in the tab-separated qseq file
|
111
|
-
@properties = :machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered
|
112
|
-
@machine = args[:machine]
|
113
|
-
@run = args[:run]
|
114
|
-
@lane = args[:lane]
|
115
|
-
@tile = args[:tile]
|
116
|
-
@x = args[:x]
|
117
|
-
@y = args[:y]
|
118
|
-
@index = args[:index]
|
119
|
-
@read_no = args[:read_no]
|
120
|
-
@sequence = args[:sequence]
|
121
|
-
@quality = args[:quality]
|
122
|
-
@filtered = args[:filtered]
|
123
|
-
end
|
124
|
-
|
125
|
-
def to_s
|
126
|
-
@properties.collect { |x| self.send(x) }.join("\t")
|
127
|
-
end
|
128
|
-
|
129
|
-
def header
|
130
|
-
@properties.collect { |x| self.send(x) }.join("\t")
|
131
|
-
end
|
132
|
-
end
|
3
|
+
Dir.glob(File.join(File.dirname(__FILE__), 'dna', '*.rb')).each { |f| require f }
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dna
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 7
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 12
|
10
|
+
version: 0.0.12
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -15,10 +15,37 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
19
|
-
dependencies:
|
20
|
-
|
21
|
-
|
18
|
+
date: 2012-08-09 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
type: :runtime
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
+
none: false
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
hash: 3
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
version_requirements: *id001
|
32
|
+
name: rspec
|
33
|
+
prerelease: false
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
type: :runtime
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
hash: 3
|
42
|
+
segments:
|
43
|
+
- 0
|
44
|
+
version: "0"
|
45
|
+
version_requirements: *id002
|
46
|
+
name: jeweler
|
47
|
+
prerelease: false
|
48
|
+
description: Simple FASTA/FASTQ/QSEQ parser library for Ruby.
|
22
49
|
email: harekrishna@gmail.com
|
23
50
|
executables:
|
24
51
|
- dna
|
@@ -30,12 +57,15 @@ files:
|
|
30
57
|
- .rspec
|
31
58
|
- .rvmrc
|
32
59
|
- Gemfile
|
60
|
+
- Rakefile
|
33
61
|
- bin/dna
|
34
62
|
- dna.gemspec
|
35
63
|
- lib/dna.rb
|
36
|
-
- lib/
|
37
|
-
- lib/parsers/
|
38
|
-
- lib/parsers/
|
64
|
+
- lib/dna/dna.rb
|
65
|
+
- lib/dna/parsers/fasta.rb
|
66
|
+
- lib/dna/parsers/fastq.rb
|
67
|
+
- lib/dna/parsers/qseq.rb
|
68
|
+
- lib/dna/record.rb
|
39
69
|
- readme.md
|
40
70
|
- spec/data/empty.txt
|
41
71
|
- spec/data/test.fasta
|
@@ -45,9 +75,9 @@ files:
|
|
45
75
|
- spec/dna_spec.rb
|
46
76
|
- spec/record_spec.rb
|
47
77
|
- spec/spec_helper.rb
|
48
|
-
homepage: http://github.com/
|
49
|
-
licenses:
|
50
|
-
|
78
|
+
homepage: http://audy.github.com/dna
|
79
|
+
licenses:
|
80
|
+
- MIT
|
51
81
|
post_install_message:
|
52
82
|
rdoc_options: []
|
53
83
|
|
@@ -73,17 +103,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
73
103
|
version: "0"
|
74
104
|
requirements: []
|
75
105
|
|
76
|
-
rubyforge_project:
|
106
|
+
rubyforge_project:
|
77
107
|
rubygems_version: 1.8.24
|
78
108
|
signing_key:
|
79
109
|
specification_version: 3
|
80
|
-
summary:
|
81
|
-
test_files:
|
82
|
-
|
83
|
-
- spec/data/test.fasta
|
84
|
-
- spec/data/test.fasta.gz
|
85
|
-
- spec/data/test.fastq
|
86
|
-
- spec/data/test.qseq
|
87
|
-
- spec/dna_spec.rb
|
88
|
-
- spec/record_spec.rb
|
89
|
-
- spec/spec_helper.rb
|
110
|
+
summary: Simple FASTA/FASTQ/QSEQ parser library for Ruby
|
111
|
+
test_files: []
|
112
|
+
|
data/lib/parsers/qseq.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
class QSEQParser
|
2
|
-
|
3
|
-
def initialize(handle)
|
4
|
-
@handle = handle
|
5
|
-
end
|
6
|
-
|
7
|
-
def each
|
8
|
-
@handle.each do |line|
|
9
|
-
line = line.strip.split("\t")
|
10
|
-
record = QSEQ.new(
|
11
|
-
:machine => line[0],
|
12
|
-
:run => line[1],
|
13
|
-
:lane => line[2],
|
14
|
-
:tile => line[3],
|
15
|
-
:x => line[4],
|
16
|
-
:y => line[5],
|
17
|
-
:index => line[6],
|
18
|
-
:read_no => line[7],
|
19
|
-
:sequence => line[8],
|
20
|
-
:quality => line[9],
|
21
|
-
:filtered => line[10]
|
22
|
-
)
|
23
|
-
yield record
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|