scaffolder 0.2.6 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +15 -0
- data/LICENSE +1 -1
- data/README.rdoc +29 -17
- data/Rakefile +22 -41
- data/VERSION +1 -1
- data/cucumber.yml +2 -0
- data/features/insert.feature +15 -0
- data/features/sequence.feature +20 -0
- data/features/step_definitions/scaffolder_steps.rb +48 -0
- data/features/support/env.rb +30 -0
- data/lib/scaffolder/errors.rb +6 -0
- data/lib/scaffolder/region/insert.rb +51 -0
- data/lib/scaffolder/region/sequence.rb +74 -0
- data/lib/scaffolder/region/unresolved.rb +23 -0
- data/lib/scaffolder/region.rb +139 -1
- data/lib/scaffolder.rb +197 -33
- data/scaffolder.gemspec +61 -43
- data/test/helper.rb +18 -3
- data/test/test_insert.rb +35 -43
- data/test/test_region.rb +143 -4
- data/test/test_scaffolder.rb +78 -47
- data/test/test_sequence.rb +61 -84
- data/test/test_unresolved.rb +23 -0
- data/yard/attribute_handler.rb +12 -0
- metadata +102 -45
- data/.gitignore +0 -22
- data/lib/scaffolder/insert.rb +0 -32
- data/lib/scaffolder/sequence.rb +0 -50
- data/test/data/sequences.fna +0 -4
data/lib/scaffolder.rb
CHANGED
@@ -1,51 +1,215 @@
|
|
1
1
|
require 'delegate'
|
2
2
|
require 'bio'
|
3
3
|
|
4
|
+
# == Quick start
|
5
|
+
#
|
6
|
+
# Given a fasta file containing two sequences.
|
7
|
+
#
|
8
|
+
# >seqA
|
9
|
+
# GCGCGC
|
10
|
+
# >seqB
|
11
|
+
# ATATAT
|
12
|
+
#
|
13
|
+
# A simple genome scaffold containing the two sequences is specified as a YAML
|
14
|
+
# formatted text file shown below. Each dash (-) indicates a region in the
|
15
|
+
# scaffold. In the example below the keyword *sequence* inserts a sequence from
|
16
|
+
# the fasta file, the keyword *source* identifies that seqA should be used.
|
17
|
+
#
|
18
|
+
# ---
|
19
|
+
# - sequence:
|
20
|
+
# source: 'seqA'
|
21
|
+
# - sequence:
|
22
|
+
# source: 'seqB'
|
23
|
+
#
|
24
|
+
# The scaffolder API can then be used as follows to generate a complete
|
25
|
+
# sequence.
|
26
|
+
#
|
27
|
+
# scaffold = Scaffolder.new('scaffold.yml','sequences.fasta')
|
28
|
+
# sequence = scaffold.inject(String.new) do |build,entry|
|
29
|
+
# build << entry.sequence
|
30
|
+
# end
|
31
|
+
# puts sequence # Prints GCGCGCATATAT
|
32
|
+
#
|
33
|
+
# == The Scaffold File
|
34
|
+
#
|
35
|
+
# The above example is simplified to demonstrates basic usage. The sections
|
36
|
+
# below outline the types of regions that can be used in the scaffold file.
|
37
|
+
#
|
38
|
+
# === Sequence Regions
|
39
|
+
#
|
40
|
+
# Contigs sequences in the scaffold are specified using the *sequence* keyword.
|
41
|
+
# The *source* keyword should specifies the sequence to use from the fasta file
|
42
|
+
# and should match the first space delimited word in the fasta header.
|
43
|
+
#
|
44
|
+
# ==== Sub-Sequences
|
45
|
+
#
|
46
|
+
# When generating a scaffolder only a subset of a sequence may be required.
|
47
|
+
# Inserting sub-sequences into the scaffold is specified using the *start* and
|
48
|
+
# *stop* keywords. All of the sequence before the start coordinate is ignored
|
49
|
+
# and all of sequence after the stop coordinate is ignored, meaning only the
|
50
|
+
# sequence between the start and stop position inclusively is used in the
|
51
|
+
# scaffold.
|
52
|
+
#
|
53
|
+
# ---
|
54
|
+
# - sequence:
|
55
|
+
# source: 'sequence1'
|
56
|
+
# start: 42
|
57
|
+
# stop: 1764
|
58
|
+
#
|
59
|
+
# ==== Reverse Complementation
|
60
|
+
#
|
61
|
+
# The *reverse* keyword specifies that the selected sequence is reversed
|
62
|
+
# complemented.
|
63
|
+
#
|
64
|
+
# ---
|
65
|
+
# - sequence:
|
66
|
+
# source: 'sequence1'
|
67
|
+
# reverse: true
|
68
|
+
#
|
69
|
+
# === Insert Regions
|
70
|
+
#
|
71
|
+
# Sequence contigs may contain gaps, for example where the sequence could not
|
72
|
+
# be correctly resolved during assembly. Additional sequencing may however
|
73
|
+
# produce sequences that can be used to fill these gaps. These inserts can be
|
74
|
+
# added to a sequence using the *insert* keyword and specifying a YAML array of
|
75
|
+
# the inserts. Multiple inserts can be specified, each separated by a dash (-)
|
76
|
+
# followed by a new line.
|
77
|
+
#
|
78
|
+
# ---
|
79
|
+
# - sequence:
|
80
|
+
# source: 'sequence1'
|
81
|
+
# inserts:
|
82
|
+
# -
|
83
|
+
# source: 'insert1'
|
84
|
+
# open: 3
|
85
|
+
# close: 10
|
86
|
+
#
|
87
|
+
# ==== Insert Position
|
88
|
+
#
|
89
|
+
# The location where an insert is added to a sequence is defined by either the
|
90
|
+
# *open*, *close* keywords, or both. This defines where the host sequence is
|
91
|
+
# 'opened' and 'closed' to add the insert. If only one parameter is used, for
|
92
|
+
# example using *open*, then the close position is determined from the length
|
93
|
+
# of the insert sequence and vice versa.
|
94
|
+
#
|
95
|
+
# ==== Insert Sub-Sequence
|
96
|
+
#
|
97
|
+
# An insert can be subsequenced in the same way as a sequence using the *start*
|
98
|
+
# and *stop* keywords. Similarly the insert sequence can be reverse completed
|
99
|
+
# using the *reverse* keyword.
|
100
|
+
#
|
101
|
+
# ---
|
102
|
+
# - sequence:
|
103
|
+
# source: 'sequence1'
|
104
|
+
# inserts:
|
105
|
+
# -
|
106
|
+
# source: 'insert1'
|
107
|
+
# open: 3
|
108
|
+
# close: 10
|
109
|
+
# start: 8
|
110
|
+
# stop: 16
|
111
|
+
# reverse: true
|
112
|
+
#
|
113
|
+
#
|
114
|
+
# === Unresolved Regions
|
115
|
+
#
|
116
|
+
# There may be regions in between sequences in the genome which are unknown but
|
117
|
+
# which the approximate length is. These can be specified in the scaffold file
|
118
|
+
# using the *unresolved* keyword. Unresolved regions are filled with 'N'
|
119
|
+
# nucleotide characters equal to the value specified by the *length* keyword.
|
120
|
+
#
|
121
|
+
# ---
|
122
|
+
# - unresolved:
|
123
|
+
# length: 10
|
124
|
+
#
|
125
|
+
# === Scaffold File Processing Order
|
126
|
+
#
|
127
|
+
# The scaffolder API processes the regions in YAML scaffold file as follows:
|
128
|
+
#
|
129
|
+
# * Each region in the scaffold in processed in the order specified in the
|
130
|
+
# scaffolder file.
|
131
|
+
# * If the region is a sequence and inserts are specified, the inserts are
|
132
|
+
# sorted by stop position, then processed from last to first. Each insert is
|
133
|
+
# processed as follows:
|
134
|
+
#
|
135
|
+
# * The insert is subsequenced if specified.
|
136
|
+
# * The insert is reverse complemented if specified.
|
137
|
+
# * The insert is added to each host sequence replacing the region of
|
138
|
+
# sequence specified by the open and close co-ordinates.
|
139
|
+
# * The host sequence stop position is extended by the difference in length
|
140
|
+
# that the insert sequence fills. For example if a 5 base pair insert fills
|
141
|
+
# a 4 base region, the host sequence stop position is increased by the
|
142
|
+
# difference: 1.
|
143
|
+
# * The region is subsequenced if specified.
|
144
|
+
# * The region is reverse complemented if specified.
|
145
|
+
#
|
146
|
+
# === WARNING
|
147
|
+
#
|
148
|
+
# Inserts with overlapping *open* and *close* regions in the same sequence will
|
149
|
+
# cause unexpected behaviour and should be avoided.
|
150
|
+
#
|
4
151
|
class Scaffolder < DelegateClass(Array)
|
5
|
-
|
6
|
-
|
7
|
-
autoload :Sequence, 'scaffolder/sequence'
|
152
|
+
require 'scaffolder/errors'
|
153
|
+
require 'scaffolder/region'
|
8
154
|
|
155
|
+
include Scaffolder::Errors
|
156
|
+
|
157
|
+
# Source is a reserved keyword. The 'source' keyword identifies the
|
158
|
+
# which corresponding fasta sequence should be retreived from the fasta
|
159
|
+
# file.
|
160
|
+
SOURCE = 'source'
|
161
|
+
|
162
|
+
# Raw_sequence is a reserved keyword. The 'raw_sequence' keyword points to
|
163
|
+
# the sequence from the fasta file identified by the 'source' keyword.
|
164
|
+
RAW_SEQUENCE = 'raw_sequence'
|
165
|
+
|
166
|
+
# @param [Hash] assembly Produced from loading the scaffold file using YAML.load
|
167
|
+
# @param [String] sequence Location of the fasta file corresponding to the
|
168
|
+
# scaffold sequences
|
169
|
+
# @return [Array] Returns an array of scaffold regions
|
170
|
+
# @example
|
171
|
+
# Scaffolder.new(YAML.load('scaffold.yml'),'sequences.fasta')
|
9
172
|
def initialize(assembly,sequence)
|
10
|
-
|
173
|
+
sequences = Hash[ *Bio::FlatFile::auto(sequence).collect { |s|
|
11
174
|
[s.definition.split.first,s.seq]
|
12
175
|
}.flatten]
|
13
176
|
|
14
177
|
super(assembly.map do |entry|
|
15
178
|
type, data = entry.keys.first, entry.values.first
|
16
179
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
sequence = Scaffolder::Sequence.new(
|
22
|
-
:name => data['source'],
|
23
|
-
:start => data['start'],
|
24
|
-
:end => data['end'],
|
25
|
-
:reverse => data['reverse'],
|
26
|
-
:sequence => fetch_sequence(data['source'])
|
27
|
-
)
|
28
|
-
if data['inserts']
|
29
|
-
sequence.add_inserts(data['inserts'].map do |insert|
|
30
|
-
Scaffolder::Insert.new(
|
31
|
-
:start => insert['start'],
|
32
|
-
:stop => insert['stop'],
|
33
|
-
:reverse => insert['reverse'],
|
34
|
-
:sequence => fetch_sequence(insert['source'])
|
35
|
-
)
|
36
|
-
end)
|
37
|
-
end
|
38
|
-
sequence
|
39
|
-
else
|
40
|
-
raise ArgumentError.new("Unknown tag: #{type}")
|
41
|
-
end
|
180
|
+
# Source is the only reserved keyword. Fetches sequence from fasta file.
|
181
|
+
data = Scaffolder.update_with_sequence(data,sequences)
|
182
|
+
|
183
|
+
Scaffolder::Region[type].generate(data)
|
42
184
|
end)
|
43
185
|
end
|
44
186
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
187
|
+
# Inserts corresponding fasta data into scaffold data hash. Every hash
|
188
|
+
# that contains the reserved 'source' keyword has the 'raw_sequence' keyword
|
189
|
+
# added for the corresponding fasta sequence from the fasta file.
|
190
|
+
# @param [Hash] data The scaffold hash
|
191
|
+
# @param [Hash] seqs A hash with identifier => sequence key/value pairs from
|
192
|
+
# the fasta sequence data.
|
193
|
+
# @return [Hash] The data hash updated with the 'raw_sequence' sequence
|
194
|
+
# keyword data.
|
195
|
+
# @raise [UnkownSequenceError] if the source keyword is used but
|
196
|
+
# there is no corresponding fasta sequence entry
|
197
|
+
def self.update_with_sequence(data,seqs)
|
198
|
+
if data.instance_of? Array
|
199
|
+
data.each{|a| update_with_sequence(a,seqs) }
|
200
|
+
else
|
201
|
+
if data[SOURCE]
|
202
|
+
sequence = seqs[data[SOURCE]]
|
203
|
+
if sequence.nil?
|
204
|
+
raise UnknownSequenceError.new("Unknown sequence: #{data[SOURCE]}")
|
205
|
+
end
|
206
|
+
data.merge!({RAW_SEQUENCE => sequence})
|
207
|
+
end
|
208
|
+
data.select{|k,v| v.respond_to? :each}.each do |key,hash|
|
209
|
+
update_with_sequence(hash,seqs)
|
210
|
+
end
|
211
|
+
end
|
212
|
+
data
|
49
213
|
end
|
50
214
|
|
51
215
|
end
|
data/scaffolder.gemspec
CHANGED
@@ -1,51 +1,60 @@
|
|
1
1
|
# Generated by jeweler
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{scaffolder}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.4.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Michael Barton"]
|
12
|
-
s.date = %q{
|
13
|
-
s.description = %q{Organise
|
12
|
+
s.date = %q{2011-01-04}
|
13
|
+
s.description = %q{Organise sequence contigs into genome scaffolds using simple human-readable YAML files.}
|
14
14
|
s.email = %q{mail@michaelbarton.me.uk}
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENSE",
|
17
|
-
|
17
|
+
"README.rdoc"
|
18
18
|
]
|
19
19
|
s.files = [
|
20
20
|
".document",
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
21
|
+
"Gemfile",
|
22
|
+
"LICENSE",
|
23
|
+
"README.rdoc",
|
24
|
+
"Rakefile",
|
25
|
+
"VERSION",
|
26
|
+
"cucumber.yml",
|
27
|
+
"features/insert.feature",
|
28
|
+
"features/sequence.feature",
|
29
|
+
"features/step_definitions/scaffolder_steps.rb",
|
30
|
+
"features/support/env.rb",
|
31
|
+
"lib/scaffolder.rb",
|
32
|
+
"lib/scaffolder/errors.rb",
|
33
|
+
"lib/scaffolder/region.rb",
|
34
|
+
"lib/scaffolder/region/insert.rb",
|
35
|
+
"lib/scaffolder/region/sequence.rb",
|
36
|
+
"lib/scaffolder/region/unresolved.rb",
|
37
|
+
"scaffolder.gemspec",
|
38
|
+
"test/helper.rb",
|
39
|
+
"test/test_insert.rb",
|
40
|
+
"test/test_region.rb",
|
41
|
+
"test/test_scaffolder.rb",
|
42
|
+
"test/test_sequence.rb",
|
43
|
+
"test/test_unresolved.rb",
|
44
|
+
"yard/attribute_handler.rb"
|
37
45
|
]
|
38
|
-
s.homepage = %q{http://
|
39
|
-
s.
|
46
|
+
s.homepage = %q{http://www.michaelbarton.me.uk/scaffolder/}
|
47
|
+
s.licenses = ["MIT"]
|
40
48
|
s.require_paths = ["lib"]
|
41
49
|
s.rubygems_version = %q{1.3.7}
|
42
|
-
s.summary = %q{
|
50
|
+
s.summary = %q{Genome scaffolding for human beings.}
|
43
51
|
s.test_files = [
|
44
52
|
"test/helper.rb",
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
53
|
+
"test/test_insert.rb",
|
54
|
+
"test/test_region.rb",
|
55
|
+
"test/test_scaffolder.rb",
|
56
|
+
"test/test_sequence.rb",
|
57
|
+
"test/test_unresolved.rb"
|
49
58
|
]
|
50
59
|
|
51
60
|
if s.respond_to? :specification_version then
|
@@ -53,24 +62,33 @@ Gem::Specification.new do |s|
|
|
53
62
|
s.specification_version = 3
|
54
63
|
|
55
64
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
56
|
-
s.add_runtime_dependency(%q<bio>, ["
|
57
|
-
s.add_development_dependency(%q<
|
58
|
-
s.add_development_dependency(%q<shoulda>, ["
|
59
|
-
s.add_development_dependency(%q<
|
60
|
-
s.add_development_dependency(%q<yard>, ["
|
65
|
+
s.add_runtime_dependency(%q<bio>, ["~> 1.4"])
|
66
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0"])
|
67
|
+
s.add_development_dependency(%q<shoulda>, ["~> 2.11"])
|
68
|
+
s.add_development_dependency(%q<mocha>, ["~> 0.9"])
|
69
|
+
s.add_development_dependency(%q<yard>, ["~> 0.6"])
|
70
|
+
s.add_development_dependency(%q<cucumber>, ["~> 0.9"])
|
71
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.5"])
|
72
|
+
s.add_development_dependency(%q<redgreen>, ["~> 1.2"])
|
61
73
|
else
|
62
|
-
s.add_dependency(%q<bio>, ["
|
63
|
-
s.add_dependency(%q<
|
64
|
-
s.add_dependency(%q<shoulda>, ["
|
65
|
-
s.add_dependency(%q<
|
66
|
-
s.add_dependency(%q<yard>, ["
|
74
|
+
s.add_dependency(%q<bio>, ["~> 1.4"])
|
75
|
+
s.add_dependency(%q<bundler>, ["~> 1.0"])
|
76
|
+
s.add_dependency(%q<shoulda>, ["~> 2.11"])
|
77
|
+
s.add_dependency(%q<mocha>, ["~> 0.9"])
|
78
|
+
s.add_dependency(%q<yard>, ["~> 0.6"])
|
79
|
+
s.add_dependency(%q<cucumber>, ["~> 0.9"])
|
80
|
+
s.add_dependency(%q<jeweler>, ["~> 1.5"])
|
81
|
+
s.add_dependency(%q<redgreen>, ["~> 1.2"])
|
67
82
|
end
|
68
83
|
else
|
69
|
-
s.add_dependency(%q<bio>, ["
|
70
|
-
s.add_dependency(%q<
|
71
|
-
s.add_dependency(%q<shoulda>, ["
|
72
|
-
s.add_dependency(%q<
|
73
|
-
s.add_dependency(%q<yard>, ["
|
84
|
+
s.add_dependency(%q<bio>, ["~> 1.4"])
|
85
|
+
s.add_dependency(%q<bundler>, ["~> 1.0"])
|
86
|
+
s.add_dependency(%q<shoulda>, ["~> 2.11"])
|
87
|
+
s.add_dependency(%q<mocha>, ["~> 0.9"])
|
88
|
+
s.add_dependency(%q<yard>, ["~> 0.6"])
|
89
|
+
s.add_dependency(%q<cucumber>, ["~> 0.9"])
|
90
|
+
s.add_dependency(%q<jeweler>, ["~> 1.5"])
|
91
|
+
s.add_dependency(%q<redgreen>, ["~> 1.2"])
|
74
92
|
end
|
75
93
|
end
|
76
94
|
|
data/test/helper.rb
CHANGED
@@ -2,7 +2,7 @@ require 'rubygems'
|
|
2
2
|
require 'test/unit'
|
3
3
|
require 'shoulda'
|
4
4
|
require 'redgreen'
|
5
|
-
require '
|
5
|
+
require 'mocha'
|
6
6
|
|
7
7
|
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
8
8
|
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
@@ -10,6 +10,21 @@ $LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
10
10
|
require 'scaffolder'
|
11
11
|
|
12
12
|
class Test::Unit::TestCase
|
13
|
-
|
14
|
-
|
13
|
+
class << self
|
14
|
+
|
15
|
+
def should_have_method_attribute(klass)
|
16
|
+
should "have method #attribute" do
|
17
|
+
assert_respond_to( klass, :attribute )
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def should_have_attribute(klass, *attributes)
|
22
|
+
attributes.each do |attribute|
|
23
|
+
should "have instance attribute #{attribute}" do
|
24
|
+
assert_respond_to( klass.new, attribute )
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
15
30
|
end
|
data/test/test_insert.rb
CHANGED
@@ -1,58 +1,50 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
3
|
class TestInsert < Test::Unit::TestCase
|
4
|
-
context Scaffolder::Insert do
|
5
|
-
|
6
|
-
setup do
|
7
|
-
@options = {
|
8
|
-
:start => 5,
|
9
|
-
:stop => 10,
|
10
|
-
:sequence => "ATGCGGGC"
|
11
|
-
}
|
12
|
-
end
|
4
|
+
context Scaffolder::Region::Insert do
|
13
5
|
|
14
|
-
|
15
|
-
i = Scaffolder::Insert.new @options
|
16
|
-
assert_equal(i.start, @options[:start])
|
17
|
-
assert_equal(i.stop, @options[:stop])
|
18
|
-
assert_equal(i.sequence, @options[:sequence])
|
19
|
-
end
|
6
|
+
context "attributes" do
|
20
7
|
|
21
|
-
|
22
|
-
i = Scaffolder::Insert.new @options.merge(:reverse => true)
|
23
|
-
rev = Bio::Sequence::NA.new(@options[:sequence]).reverse_complement
|
24
|
-
assert_equal(i.sequence, rev.upcase)
|
25
|
-
end
|
8
|
+
should_have_attribute Scaffolder::Region::Insert, :source, :open, :close
|
26
9
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
10
|
+
setup do
|
11
|
+
@length = 15
|
12
|
+
@insert = Scaffolder::Region::Insert.new
|
13
|
+
@insert.raw_sequence('N' * @length)
|
14
|
+
end
|
31
15
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
end
|
16
|
+
should "return open plus sequence length as default close" do
|
17
|
+
@insert.open 5
|
18
|
+
assert_equal(@insert.close,@insert.open + @length - 1)
|
19
|
+
end
|
37
20
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
21
|
+
should "return close minus sequence length as default open" do
|
22
|
+
@insert.close 20
|
23
|
+
assert_equal(@insert.open,@insert.close - @length - 1)
|
24
|
+
end
|
25
|
+
|
26
|
+
should "include the insert position" do
|
27
|
+
@insert.open 5
|
28
|
+
@insert.close 10
|
29
|
+
assert_equal(@insert.position,4..9)
|
30
|
+
end
|
43
31
|
|
44
|
-
|
45
|
-
|
46
|
-
@options.delete(:stop)
|
47
|
-
assert_raise ArgumentError do
|
48
|
-
Scaffolder::Insert.new @options
|
32
|
+
should "throw an error when neither open or close are provided" do
|
33
|
+
assert_raise(Scaffolder::Errors::CoordinateError){ @insert.position }
|
49
34
|
end
|
35
|
+
|
50
36
|
end
|
51
37
|
|
52
|
-
should "be comparable by
|
53
|
-
a = Scaffolder::Insert.new
|
54
|
-
|
55
|
-
|
38
|
+
should "be comparable by close position" do
|
39
|
+
a = Scaffolder::Region::Insert.new
|
40
|
+
a.close 1
|
41
|
+
|
42
|
+
b = a.clone
|
43
|
+
b.close 2
|
44
|
+
|
45
|
+
c = b.clone
|
46
|
+
c.close 3
|
47
|
+
|
56
48
|
assert_equal([c,a,b].sort, [a,b,c])
|
57
49
|
end
|
58
50
|
|
data/test/test_region.rb
CHANGED
@@ -2,10 +2,149 @@ require 'helper'
|
|
2
2
|
|
3
3
|
class TestScaffolder < Test::Unit::TestCase
|
4
4
|
context Scaffolder::Region do
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
|
6
|
+
context "adding instance methods with attribute method" do
|
7
|
+
|
8
|
+
setup do
|
9
|
+
@attr = :some_attribute
|
10
|
+
end
|
11
|
+
|
12
|
+
should "create a single accessor attribute" do
|
13
|
+
Scaffolder::Region.attribute @attr
|
14
|
+
assert(Scaffolder::Region.instance_methods.include? @attr.to_s)
|
15
|
+
end
|
16
|
+
|
17
|
+
should "return nil until attribute value is stored" do
|
18
|
+
Scaffolder::Region.attribute @attr
|
19
|
+
region = Scaffolder::Region.new
|
20
|
+
assert_equal(region.send(@attr),nil)
|
21
|
+
region.send(@attr,5)
|
22
|
+
assert_equal(region.send(@attr),5)
|
23
|
+
end
|
24
|
+
|
25
|
+
should "allow specification of default value" do
|
26
|
+
Scaffolder::Region.attribute @attr, :default => 1
|
27
|
+
region = Scaffolder::Region.new
|
28
|
+
assert_equal(region.send(@attr),1)
|
29
|
+
region.send(@attr,5)
|
30
|
+
assert_equal(region.send(@attr),5)
|
31
|
+
end
|
32
|
+
|
33
|
+
should "allow specification of default value using a block" do
|
34
|
+
Scaffolder::Region.attribute @attr, :default => lambda{|s| s.entry_type }
|
35
|
+
region = Scaffolder::Region.new
|
36
|
+
assert_equal(region.send(@attr),region.entry_type)
|
37
|
+
region.send(@attr,5)
|
38
|
+
assert_equal(region.send(@attr),5)
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
context "passing the yaml hash to the generate method" do
|
44
|
+
|
45
|
+
setup do
|
46
|
+
Scaffolder::Region.attribute(:one)
|
47
|
+
Scaffolder::Region.attribute(:two)
|
48
|
+
@tags = {'one' => 1, 'two' => 2}
|
49
|
+
end
|
50
|
+
|
51
|
+
should "should call each tag in the hash as a method to store the value" do
|
52
|
+
Scaffolder::Region.any_instance.expects(:one).with(1)
|
53
|
+
Scaffolder::Region.any_instance.expects(:two).with(2)
|
54
|
+
Scaffolder::Region.generate(@tags)
|
55
|
+
end
|
56
|
+
|
57
|
+
should "return an instantiated region object" do
|
58
|
+
region = Scaffolder::Region.generate(@tags)
|
59
|
+
assert_equal(region.one,1)
|
60
|
+
assert_equal(region.two,2)
|
61
|
+
end
|
62
|
+
|
63
|
+
should "throw UnknownAttributeError for an unknown attribute" do
|
64
|
+
assert_raise Scaffolder::Errors::UnknownAttributeError do
|
65
|
+
Scaffolder::Region.generate({:three => 3})
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
|
71
|
+
context "attributes" do
|
72
|
+
|
73
|
+
should_have_attribute Scaffolder::Region,
|
74
|
+
:start, :stop, :reverse, :raw_sequence
|
75
|
+
|
76
|
+
should "return the class name as the entry type" do
|
77
|
+
Scaffolder::Region::NewRegion = Class.new(Scaffolder::Region)
|
78
|
+
assert_equal(Scaffolder::Region::NewRegion.new.entry_type,:newregion)
|
79
|
+
end
|
80
|
+
|
81
|
+
should "return 1 as default value for start attribute" do
|
82
|
+
sequence = Scaffolder::Region.new
|
83
|
+
assert_equal(sequence.start,1)
|
84
|
+
end
|
85
|
+
|
86
|
+
should "return #raw_sequence length as default value for stop attribute" do
|
87
|
+
length = 5
|
88
|
+
sequence = Scaffolder::Region.new
|
89
|
+
sequence.raw_sequence 'N' * length
|
90
|
+
assert_equal(sequence.stop,length)
|
91
|
+
end
|
92
|
+
|
9
93
|
end
|
94
|
+
|
95
|
+
context "generating the processed sequence" do
|
96
|
+
|
97
|
+
[:sequence_hook, :raw_sequence].each do |method|
|
98
|
+
|
99
|
+
context "using the #{method} method" do
|
100
|
+
|
101
|
+
setup do
|
102
|
+
# Test class to prevent interference with other tests
|
103
|
+
@s = Class.new(Scaffolder::Region).new
|
104
|
+
@s.class.send(:define_method,method,lambda{'ATGCCAGATAACTGACTAGCATG'})
|
105
|
+
end
|
106
|
+
|
107
|
+
should "return the sequence when no other options are passed" do
|
108
|
+
assert_equal(@s.sequence,'ATGCCAGATAACTGACTAGCATG')
|
109
|
+
end
|
110
|
+
|
111
|
+
should "reverse complement sequence when passed the reverse option" do
|
112
|
+
@s.reverse true
|
113
|
+
assert_equal(@s.sequence, 'CATGCTAGTCAGTTATCTGGCAT')
|
114
|
+
end
|
115
|
+
|
116
|
+
should "create subsequence when passed sequence coordinates" do
|
117
|
+
@s.start 5
|
118
|
+
@s.stop 20
|
119
|
+
assert_equal(@s.sequence,'CAGATAACTGACTAGC')
|
120
|
+
end
|
121
|
+
|
122
|
+
should "raise a CoordinateError when start is less than 1" do
|
123
|
+
@s.start 0
|
124
|
+
assert_raise(Scaffolder::Errors::CoordinateError){ @s.sequence }
|
125
|
+
end
|
126
|
+
|
127
|
+
should "raise a CoordinateError when stop is greater than sequence " do
|
128
|
+
@s.stop 24
|
129
|
+
assert_raise(Scaffolder::Errors::CoordinateError){ @s.sequence }
|
130
|
+
end
|
131
|
+
|
132
|
+
should "raise a CoordinateError when stop is greater than start " do
|
133
|
+
@s.start 6
|
134
|
+
@s.stop 5
|
135
|
+
assert_raise(Scaffolder::Errors::CoordinateError){ @s.sequence }
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
139
|
+
|
140
|
+
end
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
should "instantiate return corresponding region subclass when requested" do
|
145
|
+
Scaffolder::Region::Type = Class.new
|
146
|
+
assert_equal(Scaffolder::Region['type'],Scaffolder::Region::Type)
|
147
|
+
end
|
148
|
+
|
10
149
|
end
|
11
150
|
end
|