dna 0.0.11 → 0.0.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -0
- data/Rakefile +48 -0
- data/dna.gemspec +55 -14
- data/lib/dna/dna.rb +58 -0
- data/lib/{parsers → dna/parsers}/fasta.rb +18 -2
- data/lib/{parsers → dna/parsers}/fastq.rb +19 -2
- data/lib/dna/parsers/qseq.rb +59 -0
- data/lib/dna/record.rb +7 -0
- data/lib/dna.rb +1 -130
- metadata +47 -24
- data/lib/parsers/qseq.rb +0 -26
data/Gemfile
CHANGED
data/Rakefile
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'bundler'
|
5
|
+
|
6
|
+
DNA_VERSION = '0.0.12'
|
7
|
+
|
8
|
+
begin
|
9
|
+
Bundler.setup(:default, :development)
|
10
|
+
rescue Bundler::BundlerError => e
|
11
|
+
$stderr.puts e.message
|
12
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
13
|
+
exit e.status_code
|
14
|
+
end
|
15
|
+
require 'rake'
|
16
|
+
|
17
|
+
require 'jeweler'
|
18
|
+
Jeweler::Tasks.new do |gem|
|
19
|
+
gem.name = "dna"
|
20
|
+
gem.homepage = "http://audy.github.com/dna"
|
21
|
+
gem.license = "MIT"
|
22
|
+
gem.summary = "Simple FASTA/FASTQ/QSEQ parser library for Ruby"
|
23
|
+
gem.description = "Simple FASTA/FASTQ/QSEQ parser library for Ruby."
|
24
|
+
gem.email = "harekrishna@gmail.com"
|
25
|
+
gem.authors = ["Austin G. Davis-Richardson"]
|
26
|
+
gem.version = DNA_VERSION
|
27
|
+
end
|
28
|
+
Jeweler::RubygemsDotOrgTasks.new
|
29
|
+
|
30
|
+
require 'rake/testtask'
|
31
|
+
Rake::TestTask.new(:test) do |test|
|
32
|
+
test.libs << 'lib' << 'test'
|
33
|
+
test.pattern = 'test/**/test_*.rb'
|
34
|
+
test.verbose = true
|
35
|
+
end
|
36
|
+
|
37
|
+
task :default => :test
|
38
|
+
|
39
|
+
require 'rdoc/task'
|
40
|
+
Rake::RDocTask.new do |rdoc|
|
41
|
+
|
42
|
+
version = DNA_VERSION
|
43
|
+
|
44
|
+
rdoc.rdoc_dir = 'rdoc'
|
45
|
+
rdoc.title = "test #{version}"
|
46
|
+
rdoc.rdoc_files.include('readme.md')
|
47
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
48
|
+
end
|
data/dna.gemspec
CHANGED
@@ -1,19 +1,60 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
s.summary = 'DNA sequence parser'
|
7
|
-
s.description = 'for parsing various types of DNA sequence files'
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
8
5
|
|
9
|
-
|
10
|
-
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "dna"
|
8
|
+
s.version = "0.0.12"
|
11
9
|
|
12
|
-
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Austin G. Davis-Richardson"]
|
12
|
+
s.date = "2012-08-09"
|
13
|
+
s.description = "Simple FASTA/FASTQ/QSEQ parser library for Ruby."
|
14
|
+
s.email = "harekrishna@gmail.com"
|
15
|
+
s.executables = ["dna"]
|
16
|
+
s.files = [
|
17
|
+
".rspec",
|
18
|
+
".rvmrc",
|
19
|
+
"Gemfile",
|
20
|
+
"Rakefile",
|
21
|
+
"bin/dna",
|
22
|
+
"dna.gemspec",
|
23
|
+
"lib/dna.rb",
|
24
|
+
"lib/dna/dna.rb",
|
25
|
+
"lib/dna/parsers/fasta.rb",
|
26
|
+
"lib/dna/parsers/fastq.rb",
|
27
|
+
"lib/dna/parsers/qseq.rb",
|
28
|
+
"lib/dna/record.rb",
|
29
|
+
"readme.md",
|
30
|
+
"spec/data/empty.txt",
|
31
|
+
"spec/data/test.fasta",
|
32
|
+
"spec/data/test.fasta.gz",
|
33
|
+
"spec/data/test.fastq",
|
34
|
+
"spec/data/test.qseq",
|
35
|
+
"spec/dna_spec.rb",
|
36
|
+
"spec/record_spec.rb",
|
37
|
+
"spec/spec_helper.rb"
|
38
|
+
]
|
39
|
+
s.homepage = "http://audy.github.com/dna"
|
40
|
+
s.licenses = ["MIT"]
|
41
|
+
s.require_paths = ["lib"]
|
42
|
+
s.rubygems_version = "1.8.24"
|
43
|
+
s.summary = "Simple FASTA/FASTQ/QSEQ parser library for Ruby"
|
13
44
|
|
14
|
-
|
15
|
-
|
16
|
-
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
45
|
+
if s.respond_to? :specification_version then
|
46
|
+
s.specification_version = 3
|
17
47
|
|
18
|
-
|
48
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
+
s.add_runtime_dependency(%q<rspec>, [">= 0"])
|
50
|
+
s.add_runtime_dependency(%q<jeweler>, [">= 0"])
|
51
|
+
else
|
52
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
53
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
54
|
+
end
|
55
|
+
else
|
56
|
+
s.add_dependency(%q<rspec>, [">= 0"])
|
57
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
58
|
+
end
|
19
59
|
end
|
60
|
+
|
data/lib/dna/dna.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
|
2
|
+
##
|
3
|
+
# Dna
|
4
|
+
#
|
5
|
+
|
6
|
+
class Dna
|
7
|
+
include Enumerable
|
8
|
+
|
9
|
+
attr_reader :format
|
10
|
+
|
11
|
+
def initialize(handle)
|
12
|
+
@handle = handle
|
13
|
+
@format = detect_format
|
14
|
+
@iterator =
|
15
|
+
case @format
|
16
|
+
when :fasta
|
17
|
+
FastaParser.new @handle
|
18
|
+
when :fastq
|
19
|
+
FastqParser.new @handle
|
20
|
+
when :qseq
|
21
|
+
QSEQParser.new @handle
|
22
|
+
else
|
23
|
+
raise "#{@format} not supported."
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def detect_format
|
28
|
+
|
29
|
+
# is gzipped?
|
30
|
+
unless @handle.class == Array # for tests mostly...
|
31
|
+
begin
|
32
|
+
@handle = Zlib::GzipReader.new(@handle)
|
33
|
+
rescue
|
34
|
+
@handle.rewind
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
first_line = @handle.first
|
39
|
+
@handle.rewind if @handle.class == File
|
40
|
+
|
41
|
+
return :unknown if first_line == nil
|
42
|
+
|
43
|
+
# detect qseq by counting number of tabs.
|
44
|
+
if first_line.split("\t").length == 11
|
45
|
+
return :qseq
|
46
|
+
elsif first_line[0].chr == '>'
|
47
|
+
return :fasta
|
48
|
+
elsif first_line[0].chr == '@'
|
49
|
+
return :fastq
|
50
|
+
else
|
51
|
+
raise Exception, "cannot detect format of input"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def each &block
|
56
|
+
@iterator.each(&block)
|
57
|
+
end
|
58
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
class FastaParser
|
2
|
-
|
2
|
+
|
3
3
|
def initialize(handle)
|
4
4
|
@handle = handle
|
5
5
|
end
|
@@ -17,4 +17,20 @@ class FastaParser
|
|
17
17
|
end
|
18
18
|
yield Fasta.new(:name => header, :sequence => sequence)
|
19
19
|
end
|
20
|
-
end
|
20
|
+
end
|
21
|
+
|
22
|
+
##
|
23
|
+
# Fasta record
|
24
|
+
#
|
25
|
+
class Fasta < Record
|
26
|
+
attr_accessor :name, :sequence
|
27
|
+
|
28
|
+
def initialize(args={})
|
29
|
+
@name = args[:name]
|
30
|
+
@sequence = args[:sequence]
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_s
|
34
|
+
">#{@name}\n#{@sequence}"
|
35
|
+
end
|
36
|
+
end
|
@@ -3,7 +3,7 @@ class FastqParser
|
|
3
3
|
def initialize(handle)
|
4
4
|
@handle = handle
|
5
5
|
end
|
6
|
-
|
6
|
+
|
7
7
|
def each
|
8
8
|
c = (0..3).cycle
|
9
9
|
params = { :name => nil, :sequence => nil, :quality => nil }
|
@@ -23,4 +23,21 @@ class FastqParser
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
end
|
26
|
-
end
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# Fastq record
|
30
|
+
#
|
31
|
+
class Fastq < Record
|
32
|
+
attr_accessor :name, :sequence, :format, :quality
|
33
|
+
|
34
|
+
def initialize(args={})
|
35
|
+
@name = args[:name]
|
36
|
+
@sequence = args[:sequence]
|
37
|
+
@quality = args[:quality]
|
38
|
+
end
|
39
|
+
|
40
|
+
def to_s
|
41
|
+
"@#{@name}\n#{@sequence}\n+#{@name}\n#{@quality}"
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
class QSEQParser
|
2
|
+
|
3
|
+
def initialize(handle)
|
4
|
+
@handle = handle
|
5
|
+
end
|
6
|
+
|
7
|
+
def each
|
8
|
+
@handle.each do |line|
|
9
|
+
line = line.strip.split("\t")
|
10
|
+
record = QSEQ.new(
|
11
|
+
:machine => line[0],
|
12
|
+
:run => line[1],
|
13
|
+
:lane => line[2],
|
14
|
+
:tile => line[3],
|
15
|
+
:x => line[4],
|
16
|
+
:y => line[5],
|
17
|
+
:index => line[6],
|
18
|
+
:read_no => line[7],
|
19
|
+
:sequence => line[8],
|
20
|
+
:quality => line[9],
|
21
|
+
:filtered => line[10]
|
22
|
+
)
|
23
|
+
yield record
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
##
|
29
|
+
# QSEQ record
|
30
|
+
#
|
31
|
+
class QSEQ < Record
|
32
|
+
|
33
|
+
attr_accessor :machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered
|
34
|
+
|
35
|
+
def initialize(args={})
|
36
|
+
# These are the properties defined by the qseq spec
|
37
|
+
# they must be in the same order that they appear in the tab-separated qseq file
|
38
|
+
@properties = :machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered
|
39
|
+
@machine = args[:machine]
|
40
|
+
@run = args[:run]
|
41
|
+
@lane = args[:lane]
|
42
|
+
@tile = args[:tile]
|
43
|
+
@x = args[:x]
|
44
|
+
@y = args[:y]
|
45
|
+
@index = args[:index]
|
46
|
+
@read_no = args[:read_no]
|
47
|
+
@sequence = args[:sequence]
|
48
|
+
@quality = args[:quality]
|
49
|
+
@filtered = args[:filtered]
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_s
|
53
|
+
@properties.collect { |x| self.send(x) }.join("\t")
|
54
|
+
end
|
55
|
+
|
56
|
+
def header
|
57
|
+
@properties.collect { |x| self.send(x) }.join("\t")
|
58
|
+
end
|
59
|
+
end
|
data/lib/dna/record.rb
ADDED
data/lib/dna.rb
CHANGED
@@ -1,132 +1,3 @@
|
|
1
1
|
require 'zlib'
|
2
2
|
|
3
|
-
Dir.glob(File.join(File.dirname(__FILE__), '
|
4
|
-
|
5
|
-
##
|
6
|
-
# Dna
|
7
|
-
#
|
8
|
-
class Dna
|
9
|
-
include Enumerable
|
10
|
-
|
11
|
-
attr_reader :format
|
12
|
-
|
13
|
-
def initialize(handle)
|
14
|
-
@handle = handle
|
15
|
-
@format = detect_format
|
16
|
-
@iterator =
|
17
|
-
case @format
|
18
|
-
when :fasta
|
19
|
-
FastaParser.new @handle
|
20
|
-
when :fastq
|
21
|
-
FastqParser.new @handle
|
22
|
-
when :qseq
|
23
|
-
QSEQParser.new @handle
|
24
|
-
else
|
25
|
-
raise "#{@format} not supported."
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
def detect_format
|
30
|
-
|
31
|
-
# is gzipped?
|
32
|
-
unless @handle.class == Array # for tests mostly...
|
33
|
-
begin
|
34
|
-
@handle = Zlib::GzipReader.new(@handle)
|
35
|
-
rescue
|
36
|
-
@handle.rewind
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
first_line = @handle.first
|
41
|
-
@handle.rewind if @handle.class == File
|
42
|
-
|
43
|
-
return :unknown if first_line == nil
|
44
|
-
|
45
|
-
# detect qseq by counting number of tabs.
|
46
|
-
if first_line.split("\t").length == 11
|
47
|
-
return :qseq
|
48
|
-
elsif first_line[0].chr == '>'
|
49
|
-
return :fasta
|
50
|
-
elsif first_line[0].chr == '@'
|
51
|
-
return :fastq
|
52
|
-
else
|
53
|
-
raise Exception, "cannot detect format of input"
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def each &block
|
58
|
-
@iterator.each(&block)
|
59
|
-
end
|
60
|
-
end
|
61
|
-
|
62
|
-
class Record # nucleotide record
|
63
|
-
def length
|
64
|
-
@sequence.length
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
##
|
69
|
-
# Fasta record
|
70
|
-
#
|
71
|
-
class Fasta < Record
|
72
|
-
attr_accessor :name, :sequence
|
73
|
-
|
74
|
-
def initialize(args={})
|
75
|
-
@name = args[:name]
|
76
|
-
@sequence = args[:sequence]
|
77
|
-
end
|
78
|
-
|
79
|
-
def to_s
|
80
|
-
">#{@name}\n#{@sequence}"
|
81
|
-
end
|
82
|
-
end
|
83
|
-
|
84
|
-
##
|
85
|
-
# Fastq record
|
86
|
-
#
|
87
|
-
class Fastq < Record
|
88
|
-
attr_accessor :name, :sequence, :format, :quality
|
89
|
-
|
90
|
-
def initialize(args={})
|
91
|
-
@name = args[:name]
|
92
|
-
@sequence = args[:sequence]
|
93
|
-
@quality = args[:quality]
|
94
|
-
end
|
95
|
-
|
96
|
-
def to_s
|
97
|
-
"@#{@name}\n#{@sequence}\n+#{@name}\n#{@quality}"
|
98
|
-
end
|
99
|
-
end
|
100
|
-
|
101
|
-
##
|
102
|
-
# QSEQ record
|
103
|
-
#2
|
104
|
-
class QSEQ < Record
|
105
|
-
|
106
|
-
attr_accessor :machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered
|
107
|
-
|
108
|
-
def initialize(args={})
|
109
|
-
# These are the properties defined by the qseq spec
|
110
|
-
# they must be in the same order that they appear in the tab-separated qseq file
|
111
|
-
@properties = :machine, :run, :lane, :tile, :x, :y, :index, :read_no, :sequence, :quality, :filtered
|
112
|
-
@machine = args[:machine]
|
113
|
-
@run = args[:run]
|
114
|
-
@lane = args[:lane]
|
115
|
-
@tile = args[:tile]
|
116
|
-
@x = args[:x]
|
117
|
-
@y = args[:y]
|
118
|
-
@index = args[:index]
|
119
|
-
@read_no = args[:read_no]
|
120
|
-
@sequence = args[:sequence]
|
121
|
-
@quality = args[:quality]
|
122
|
-
@filtered = args[:filtered]
|
123
|
-
end
|
124
|
-
|
125
|
-
def to_s
|
126
|
-
@properties.collect { |x| self.send(x) }.join("\t")
|
127
|
-
end
|
128
|
-
|
129
|
-
def header
|
130
|
-
@properties.collect { |x| self.send(x) }.join("\t")
|
131
|
-
end
|
132
|
-
end
|
3
|
+
Dir.glob(File.join(File.dirname(__FILE__), 'dna', '*.rb')).each { |f| require f }
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dna
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 7
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 12
|
10
|
+
version: 0.0.12
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Austin G. Davis-Richardson
|
@@ -15,10 +15,37 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2012-
|
19
|
-
dependencies:
|
20
|
-
|
21
|
-
|
18
|
+
date: 2012-08-09 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
type: :runtime
|
22
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
23
|
+
none: false
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
hash: 3
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
version: "0"
|
31
|
+
version_requirements: *id001
|
32
|
+
name: rspec
|
33
|
+
prerelease: false
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
type: :runtime
|
36
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
37
|
+
none: false
|
38
|
+
requirements:
|
39
|
+
- - ">="
|
40
|
+
- !ruby/object:Gem::Version
|
41
|
+
hash: 3
|
42
|
+
segments:
|
43
|
+
- 0
|
44
|
+
version: "0"
|
45
|
+
version_requirements: *id002
|
46
|
+
name: jeweler
|
47
|
+
prerelease: false
|
48
|
+
description: Simple FASTA/FASTQ/QSEQ parser library for Ruby.
|
22
49
|
email: harekrishna@gmail.com
|
23
50
|
executables:
|
24
51
|
- dna
|
@@ -30,12 +57,15 @@ files:
|
|
30
57
|
- .rspec
|
31
58
|
- .rvmrc
|
32
59
|
- Gemfile
|
60
|
+
- Rakefile
|
33
61
|
- bin/dna
|
34
62
|
- dna.gemspec
|
35
63
|
- lib/dna.rb
|
36
|
-
- lib/
|
37
|
-
- lib/parsers/
|
38
|
-
- lib/parsers/
|
64
|
+
- lib/dna/dna.rb
|
65
|
+
- lib/dna/parsers/fasta.rb
|
66
|
+
- lib/dna/parsers/fastq.rb
|
67
|
+
- lib/dna/parsers/qseq.rb
|
68
|
+
- lib/dna/record.rb
|
39
69
|
- readme.md
|
40
70
|
- spec/data/empty.txt
|
41
71
|
- spec/data/test.fasta
|
@@ -45,9 +75,9 @@ files:
|
|
45
75
|
- spec/dna_spec.rb
|
46
76
|
- spec/record_spec.rb
|
47
77
|
- spec/spec_helper.rb
|
48
|
-
homepage: http://github.com/
|
49
|
-
licenses:
|
50
|
-
|
78
|
+
homepage: http://audy.github.com/dna
|
79
|
+
licenses:
|
80
|
+
- MIT
|
51
81
|
post_install_message:
|
52
82
|
rdoc_options: []
|
53
83
|
|
@@ -73,17 +103,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
73
103
|
version: "0"
|
74
104
|
requirements: []
|
75
105
|
|
76
|
-
rubyforge_project:
|
106
|
+
rubyforge_project:
|
77
107
|
rubygems_version: 1.8.24
|
78
108
|
signing_key:
|
79
109
|
specification_version: 3
|
80
|
-
summary:
|
81
|
-
test_files:
|
82
|
-
|
83
|
-
- spec/data/test.fasta
|
84
|
-
- spec/data/test.fasta.gz
|
85
|
-
- spec/data/test.fastq
|
86
|
-
- spec/data/test.qseq
|
87
|
-
- spec/dna_spec.rb
|
88
|
-
- spec/record_spec.rb
|
89
|
-
- spec/spec_helper.rb
|
110
|
+
summary: Simple FASTA/FASTQ/QSEQ parser library for Ruby
|
111
|
+
test_files: []
|
112
|
+
|
data/lib/parsers/qseq.rb
DELETED
@@ -1,26 +0,0 @@
|
|
1
|
-
class QSEQParser
|
2
|
-
|
3
|
-
def initialize(handle)
|
4
|
-
@handle = handle
|
5
|
-
end
|
6
|
-
|
7
|
-
def each
|
8
|
-
@handle.each do |line|
|
9
|
-
line = line.strip.split("\t")
|
10
|
-
record = QSEQ.new(
|
11
|
-
:machine => line[0],
|
12
|
-
:run => line[1],
|
13
|
-
:lane => line[2],
|
14
|
-
:tile => line[3],
|
15
|
-
:x => line[4],
|
16
|
-
:y => line[5],
|
17
|
-
:index => line[6],
|
18
|
-
:read_no => line[7],
|
19
|
-
:sequence => line[8],
|
20
|
-
:quality => line[9],
|
21
|
-
:filtered => line[10]
|
22
|
-
)
|
23
|
-
yield record
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|