bio-maf 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Gemfile +2 -1
- data/README.md +98 -29
- data/Rakefile +6 -2
- data/bin/maf_tile +59 -35
- data/bio-maf.gemspec +4 -3
- data/features/block-joining.feature +32 -0
- data/features/dir-access.feature +46 -0
- data/features/maf-indexing.feature +23 -0
- data/features/maf-to-fasta.feature +9 -0
- data/features/slice.feature +54 -0
- data/features/step_definitions/dir-access_steps.rb +15 -0
- data/features/step_definitions/file_steps.rb +7 -0
- data/features/step_definitions/gap_removal_steps.rb +4 -0
- data/features/step_definitions/index_steps.rb +3 -3
- data/features/step_definitions/output_steps.rb +9 -1
- data/features/step_definitions/parse_steps.rb +13 -2
- data/features/step_definitions/query_steps.rb +7 -6
- data/features/step_definitions/slice_steps.rb +15 -0
- data/features/step_definitions/{gap-filling_steps.rb → tiling_steps.rb} +0 -0
- data/features/support/aruba.rb +1 -0
- data/features/support/env.rb +3 -1
- data/features/{gap-filling.feature → tiling.feature} +85 -0
- data/lib/bio/maf/index.rb +223 -11
- data/lib/bio/maf/maf.rb +209 -0
- data/lib/bio/maf/parser.rb +190 -111
- data/lib/bio/maf/tiler.rb +33 -6
- data/man/maf_index.1 +1 -1
- data/man/maf_tile.1 +7 -7
- data/man/maf_tile.1.ronn +21 -13
- data/man/maf_to_fasta.1 +1 -1
- data/spec/bio/maf/index_spec.rb +99 -0
- data/spec/bio/maf/maf_spec.rb +184 -0
- data/spec/bio/maf/parser_spec.rb +75 -115
- data/spec/bio/maf/tiler_spec.rb +44 -0
- data/test/data/chr22_ieq2.maf +11 -0
- data/test/data/gap-1.kct +0 -0
- data/test/data/gap-1.maf +9 -0
- data/test/data/gap-filled1.fa +6 -0
- data/test/data/gap-sp1.fa.gz +0 -0
- data/test/data/mm8_chr7_tiny_slice1.maf +9 -0
- data/test/data/mm8_chr7_tiny_slice2.maf +10 -0
- data/test/data/mm8_chr7_tiny_slice3.maf +10 -0
- data/test/data/mm8_chrM_tiny.kct +0 -0
- data/test/data/mm8_chrM_tiny.maf +1000 -0
- metadata +59 -7
data/.gitignore
CHANGED
data/Gemfile
CHANGED
@@ -13,7 +13,6 @@ group :development do
|
|
13
13
|
gem "redcarpet", "~> 2.1.1", :platforms => :mri
|
14
14
|
gem "ronn", "~> 0.7.3", :platforms => :mri
|
15
15
|
gem "sinatra", "~> 1.3.2" # for ronn --server
|
16
|
-
gem "rubygems-tasks", "~> 0.2.3"
|
17
16
|
end
|
18
17
|
|
19
18
|
group :test do
|
@@ -21,4 +20,6 @@ group :test do
|
|
21
20
|
gem "rake", ">= 0.9"
|
22
21
|
gem "cucumber", ">= 0"
|
23
22
|
gem "rspec", "~> 2.10.0"
|
23
|
+
gem "rubygems-tasks", "~> 0.2.3"
|
24
|
+
gem "aruba", "~> 0.4.11"
|
24
25
|
end
|
data/README.md
CHANGED
@@ -92,6 +92,19 @@ Or programmatically:
|
|
92
92
|
|
93
93
|
Refer to [`mm8_chr7_tiny.maf`](https://github.com/csw/bioruby-maf/blob/master/test/data/mm8_chr7_tiny.maf).
|
94
94
|
|
95
|
+
require 'bio-maf'
|
96
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
97
|
+
|
98
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
99
|
+
access.find(q) do |block|
|
100
|
+
ref_seq = block.sequences[0]
|
101
|
+
puts "Matched block at #{ref_seq.start}, #{ref_seq.size} bases"
|
102
|
+
end
|
103
|
+
|
104
|
+
# => Matched block at 80082592, 121 bases
|
105
|
+
# => Matched block at 80082713, 54 bases
|
106
|
+
|
107
|
+
Or, equivalently, one can work with a specific MAF file and index directly:
|
95
108
|
|
96
109
|
require 'bio-maf'
|
97
110
|
parser = Bio::MAF::Parser.new('test/data/mm8_chr7_tiny.maf')
|
@@ -106,15 +119,27 @@ Refer to [`mm8_chr7_tiny.maf`](https://github.com/csw/bioruby-maf/blob/master/te
|
|
106
119
|
# => Matched block at 80082592, 121 bases
|
107
120
|
# => Matched block at 80082713, 54 bases
|
108
121
|
|
122
|
+
### Extract alignment blocks truncated to a given interval
|
123
|
+
|
124
|
+
Given a genomic interval of interest, one can also extract only the
|
125
|
+
subsets of blocks that intersect with that interval, using the
|
126
|
+
`#slice` method like so:
|
127
|
+
|
128
|
+
require 'bio-maf'
|
129
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
130
|
+
int = Bio::GenomicInterval.zero_based('mm8.chr7', 80082350, 80082380)
|
131
|
+
blocks = access.slice(int).to_a
|
132
|
+
puts "Got #{blocks.size} blocks, first #{blocks.first.ref_seq.size} base pairs."
|
133
|
+
# => Got 2 blocks, first 18 base pairs.
|
134
|
+
|
109
135
|
### Filter species returned in alignment blocks
|
110
136
|
|
111
137
|
require 'bio-maf'
|
112
|
-
|
113
|
-
idx = Bio::MAF::KyotoIndex.open('test/data/mm8_chr7_tiny.kct')
|
138
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
114
139
|
|
115
|
-
|
140
|
+
access.sequence_filter = { :only_species => %w(hg18 mm8 rheMac2) }
|
116
141
|
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
117
|
-
blocks =
|
142
|
+
blocks = access.find(q)
|
118
143
|
block = blocks.first
|
119
144
|
puts "Block has #{block.sequences.size} sequences."
|
120
145
|
|
@@ -129,23 +154,26 @@ See also the [Cucumber feature][] and [step definitions][] for this.
|
|
129
154
|
|
130
155
|
#### Match only blocks with all specified species
|
131
156
|
|
157
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
132
158
|
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082471, 80082730)]
|
133
|
-
|
134
|
-
n_blocks =
|
159
|
+
access.block_filter = { :with_all_species => %w(panTro2 loxAfr1) }
|
160
|
+
n_blocks = access.find(q).count
|
135
161
|
# => 1
|
136
162
|
|
137
163
|
#### Match only blocks with a certain number of sequences
|
138
164
|
|
165
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
139
166
|
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082767, 80083008)]
|
140
|
-
|
141
|
-
n_blocks =
|
167
|
+
access.block_filter = { :at_least_n_sequences => 6 }
|
168
|
+
n_blocks = access.find(q).count
|
142
169
|
# => 1
|
143
170
|
|
144
171
|
#### Match only blocks within a text size range
|
145
172
|
|
173
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
146
174
|
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 0, 80100000)]
|
147
|
-
|
148
|
-
n_blocks =
|
175
|
+
access.block_filter = { :min_size => 72, :max_size => 160 }
|
176
|
+
n_blocks = access.find(q).count
|
149
177
|
# => 3
|
150
178
|
|
151
179
|
### Process each block in a MAF file
|
@@ -155,7 +183,7 @@ See also the [Cucumber feature][] and [step definitions][] for this.
|
|
155
183
|
puts "MAF version: #{p.header.version}"
|
156
184
|
# => MAF version: 1
|
157
185
|
|
158
|
-
p.
|
186
|
+
p.each_block do |block|
|
159
187
|
block.sequences.each do |seq|
|
160
188
|
do_something(seq)
|
161
189
|
end
|
@@ -183,6 +211,12 @@ Refer to [`chr22_ieq.maf`](https://github.com/csw/bioruby-maf/blob/master/test/d
|
|
183
211
|
# @size=1601, @strand=:+, @src_size=50103, @text=nil,
|
184
212
|
# @status="I">
|
185
213
|
|
214
|
+
Such options can also be set on a Bio::MAF::Access object:
|
215
|
+
|
216
|
+
require 'bio-maf'
|
217
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
218
|
+
access.parse_options[:parse_empty] = true
|
219
|
+
|
186
220
|
### Remove gaps from parsed blocks
|
187
221
|
|
188
222
|
After filtering out species with
|
@@ -192,8 +226,42 @@ sequences that were filtered out. Such gaps can be removed by setting
|
|
192
226
|
the `:remove_gaps` parser option:
|
193
227
|
|
194
228
|
require 'bio-maf'
|
195
|
-
|
196
|
-
|
229
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
230
|
+
access.parse_options[:remove_gaps] = true
|
231
|
+
|
232
|
+
### Join blocks after filtering together
|
233
|
+
|
234
|
+
Similarly, filtering out species may remove a species which had caused
|
235
|
+
two adjacent alignment blocks to be split. By enabling the
|
236
|
+
`:join_blocks` parser option, such blocks can be joined together:
|
237
|
+
|
238
|
+
require 'bio-maf'
|
239
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
240
|
+
access.parse_options[:join_blocks] = true
|
241
|
+
|
242
|
+
See the [Cucumber feature][] for more details.
|
243
|
+
|
244
|
+
[Cucumber feature]: https://github.com/csw/bioruby-maf/blob/master/features/block-joining.feature
|
245
|
+
|
246
|
+
### Extract bio-alignment representations of blocks
|
247
|
+
|
248
|
+
When the `:as_bio_alignment` parser option is given, blocks will be
|
249
|
+
returned as [Bio::BioAlignment::Alignment][] objects as used in the
|
250
|
+
[bio-alignment] Biogem. This offers a great deal of built-in
|
251
|
+
functionality for column-wise operations, alignment manipulation, and
|
252
|
+
more.
|
253
|
+
|
254
|
+
[Bio::BioAlignment::Alignment]: http://rdoc.info/gems/bio-alignment/Bio/BioAlignment/Alignment
|
255
|
+
[bio-alignment]: https://github.com/pjotrp/bioruby-alignment
|
256
|
+
|
257
|
+
require 'bio-maf'
|
258
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
259
|
+
access.parse_options[:as_bio_alignment] = true
|
260
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
261
|
+
access.find(q) do |aln|
|
262
|
+
col = aln.columns[3]
|
263
|
+
puts "bases in column 3: #{col}"
|
264
|
+
end
|
197
265
|
|
198
266
|
### Tile blocks together over an interval
|
199
267
|
|
@@ -206,24 +274,25 @@ the
|
|
206
274
|
[`maf_tile(1)`](http://csw.github.com/bioruby-maf/man/maf_tile.1.html)
|
207
275
|
man page.
|
208
276
|
|
209
|
-
[feature]: https://github.com/csw/bioruby-maf/blob/master/features/
|
277
|
+
[feature]: https://github.com/csw/bioruby-maf/blob/master/features/tiling.feature
|
210
278
|
|
211
279
|
require 'bio-maf'
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
280
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
281
|
+
interval = Bio::GenomicInterval.zero_based('mm8.chr7',
|
282
|
+
80082334,
|
283
|
+
80082468)
|
284
|
+
access.tile(interval) do |tiler|
|
285
|
+
# reference is optional
|
286
|
+
tiler.reference = 'reference.fa.gz'
|
287
|
+
tiler.species = %w(mm8 rn4 hg18)
|
288
|
+
# species_map is optional
|
289
|
+
tiler.species_map = {
|
290
|
+
'mm8' => 'mouse',
|
291
|
+
'rn4' => 'rat',
|
292
|
+
'hg18' => 'human'
|
293
|
+
}
|
294
|
+
tiler.write_fasta($stdout)
|
295
|
+
end
|
227
296
|
|
228
297
|
### Command line tools
|
229
298
|
|
data/Rakefile
CHANGED
@@ -23,7 +23,10 @@ RSpec::Core::RakeTask.new(:spec) do |spec|
|
|
23
23
|
end
|
24
24
|
|
25
25
|
require 'cucumber/rake/task'
|
26
|
-
Cucumber::Rake::Task.new do |
|
26
|
+
Cucumber::Rake::Task.new do |t|
|
27
|
+
opts = "features"
|
28
|
+
opts << ' --tags ~@no_jruby' if RUBY_PLATFORM == 'java'
|
29
|
+
t.cucumber_opts = opts
|
27
30
|
end
|
28
31
|
|
29
32
|
task :test => [ :spec, :cucumber ]
|
@@ -44,7 +47,8 @@ if ronn_avail
|
|
44
47
|
desc "Generate man pages"
|
45
48
|
task :man do
|
46
49
|
file_spec = RONN_FILES.join(' ')
|
47
|
-
sh "ronn --roff --html --style toc --date #{$gemspec.date.strftime('%Y-%m-%d')} --manual='BioRuby Manual' --organization='#{$gemspec.author}' #{file_spec}"
|
50
|
+
#sh "ronn --roff --html --style toc --date #{$gemspec.date.strftime('%Y-%m-%d')} --manual='BioRuby Manual' --organization='#{$gemspec.author}' #{file_spec}"
|
51
|
+
sh "ronn --roff --html --style toc --date #{Time.now.strftime('%Y-%m-%d')} --manual='BioRuby Manual' --organization='BioRuby' #{file_spec}"
|
48
52
|
end
|
49
53
|
|
50
54
|
namespace :man do
|
data/bin/maf_tile
CHANGED
@@ -6,6 +6,24 @@ require 'ostruct'
|
|
6
6
|
require 'bio-maf'
|
7
7
|
require 'bio-genomic-interval'
|
8
8
|
|
9
|
+
def parse_interval(line)
|
10
|
+
src, r_start_s, r_end_s, _ = line.split(nil, 4)
|
11
|
+
r_start = r_start_s.to_i
|
12
|
+
r_end = r_end_s.to_i
|
13
|
+
return Bio::GenomicInterval.zero_based(src, r_start, r_end)
|
14
|
+
end
|
15
|
+
|
16
|
+
def target_for(base, interval, &blk)
|
17
|
+
path = "#{base}_#{interval.zero_start}-#{interval.zero_end}.fa"
|
18
|
+
File.open(path, 'w', &blk)
|
19
|
+
end
|
20
|
+
|
21
|
+
def apply_options(options, tiler)
|
22
|
+
tiler.reference = options.ref if options.ref
|
23
|
+
tiler.species = options.species
|
24
|
+
tiler.species_map = options.species_map
|
25
|
+
end
|
26
|
+
|
9
27
|
options = OpenStruct.new
|
10
28
|
options.p = { :threads => 1 }
|
11
29
|
options.species = []
|
@@ -13,16 +31,20 @@ options.species_map = {}
|
|
13
31
|
options.usage = false
|
14
32
|
|
15
33
|
o_parser = OptionParser.new do |opts|
|
16
|
-
opts.banner = "Usage: maf_tile [options] <maf>
|
34
|
+
opts.banner = "Usage: maf_tile [options] <maf> [index]"
|
17
35
|
opts.separator ""
|
18
36
|
opts.separator "Options:"
|
19
37
|
opts.on("-r", "--reference SEQ", "FASTA reference sequence") do |ref|
|
20
38
|
options.ref = ref
|
21
39
|
end
|
22
|
-
opts.on("-i", "--interval BEGIN:END", "Genomic interval, zero-based") do |int|
|
23
|
-
if int =~ /(\d+):(\d+)/
|
40
|
+
opts.on("-i", "--interval [CHR:]BEGIN:END", "Genomic interval, zero-based") do |int|
|
41
|
+
if int =~ /(.+):(\d+):(\d+)/
|
42
|
+
gi = Bio::GenomicInterval.zero_based($1, ($2.to_i), ($3.to_i))
|
43
|
+
options.genomic_interval = gi
|
44
|
+
elsif int =~ /(\d+):(\d+)/
|
24
45
|
options.interval = ($1.to_i)...($2.to_i)
|
25
46
|
else
|
47
|
+
$stderr.puts "Invalid interval specification #{int}!"
|
26
48
|
options.usage = true
|
27
49
|
end
|
28
50
|
end
|
@@ -51,30 +73,19 @@ maf_p = ARGV.shift
|
|
51
73
|
index_p = ARGV.shift
|
52
74
|
|
53
75
|
unless (! options.usage) \
|
54
|
-
&& maf_p &&
|
55
|
-
&& (options.output_base
|
76
|
+
&& maf_p && (! options.species.empty?) \
|
77
|
+
&& (options.output_base \
|
78
|
+
? options.bed \
|
79
|
+
: options.interval || options.genomic_interval)
|
56
80
|
$stderr.puts o_parser
|
57
81
|
exit 2
|
58
82
|
end
|
59
83
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
tiler.species_map = options.species_map
|
66
|
-
|
67
|
-
def parse_interval(line)
|
68
|
-
src, r_start_s, r_end_s, _ = line.split(nil, 4)
|
69
|
-
r_start = r_start_s.to_i
|
70
|
-
r_end = r_end_s.to_i
|
71
|
-
return Bio::GenomicInterval.zero_based(src, r_start, r_end)
|
72
|
-
end
|
73
|
-
|
74
|
-
def target_for(base, interval)
|
75
|
-
path = "#{base}_#{interval.zero_start}-#{interval.zero_end}.fa"
|
76
|
-
File.open(path, 'w')
|
77
|
-
end
|
84
|
+
access = if File.directory? maf_p
|
85
|
+
Bio::MAF::Access.maf_dir(maf_p, options.p)
|
86
|
+
else
|
87
|
+
Bio::MAF::Access.file(maf_p, index_p, options.p)
|
88
|
+
end
|
78
89
|
|
79
90
|
if options.bed
|
80
91
|
intervals = []
|
@@ -83,21 +94,34 @@ if options.bed
|
|
83
94
|
end
|
84
95
|
intervals.sort_by! { |int| int.zero_start }
|
85
96
|
intervals.each do |int|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
97
|
+
access.tile(int) do |tiler|
|
98
|
+
apply_options(options, tiler)
|
99
|
+
target_for(options.output_base, int) do |target|
|
100
|
+
tiler.write_fasta(target)
|
101
|
+
end
|
102
|
+
end
|
90
103
|
end
|
91
104
|
else
|
92
105
|
# single interval
|
93
|
-
|
94
|
-
|
95
|
-
options.interval.end)
|
96
|
-
if options.output_base
|
97
|
-
target = target_for(options.output_base, tiler.interval)
|
106
|
+
if options.genomic_interval
|
107
|
+
interval = options.genomic_interval
|
98
108
|
else
|
99
|
-
|
109
|
+
if access.indices.size != 1
|
110
|
+
raise "Must explicitly specify sequence in --interval argument with multiple candidate MAF files!"
|
111
|
+
end
|
112
|
+
ref_seq = access.indices.keys.first
|
113
|
+
interval = Bio::GenomicInterval.zero_based(ref_seq,
|
114
|
+
options.interval.begin,
|
115
|
+
options.interval.end)
|
116
|
+
end
|
117
|
+
access.tile(interval) do |tiler|
|
118
|
+
apply_options(options, tiler)
|
119
|
+
if options.output_base
|
120
|
+
target = target_for(options.output_base, tiler.interval)
|
121
|
+
else
|
122
|
+
target = $stdout
|
123
|
+
end
|
124
|
+
tiler.write_fasta(target)
|
125
|
+
target.close
|
100
126
|
end
|
101
|
-
tiler.write_fasta(target)
|
102
|
-
target.close
|
103
127
|
end
|
data/bio-maf.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "bio-maf"
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.3.0"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Clayton Wheeler"]
|
9
|
-
s.date = "2012-
|
9
|
+
s.date = "2012-07-18"
|
10
10
|
s.description = "Multiple Alignment Format parser for BioRuby."
|
11
11
|
s.email = "cswh@umich.edu"
|
12
12
|
s.executables = ["maf_count", "maf_dump_blocks", "maf_extract_ranges_count", "maf_index", "maf_parse_bench", "maf_to_fasta", "maf_write", "random_ranges"]
|
@@ -32,10 +32,11 @@ Gem::Specification.new do |s|
|
|
32
32
|
s.platform = 'java'
|
33
33
|
end
|
34
34
|
|
35
|
+
s.add_runtime_dependency('bio-alignment', ["~> 0.0.7"])
|
35
36
|
s.add_runtime_dependency('bio-bigbio', [">= 0"])
|
36
37
|
s.add_runtime_dependency('bio-genomic-interval', ["~> 0.1.2"])
|
37
38
|
if RUBY_PLATFORM == 'java'
|
38
|
-
s.add_runtime_dependency('kyotocabinet-java', ["~> 0.
|
39
|
+
s.add_runtime_dependency('kyotocabinet-java', ["~> 0.3.0"])
|
39
40
|
else
|
40
41
|
s.add_runtime_dependency('kyotocabinet-ruby', ["~> 1.27.1"])
|
41
42
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
Feature: Join adjacent alignment blocks
|
2
|
+
After filtering out sequences
|
3
|
+
The sequence that caused two blocks to be separate may be removed
|
4
|
+
So it can be desirable to join such blocks together
|
5
|
+
|
6
|
+
Scenario: Two blocks natively in indexed access
|
7
|
+
Given indexed MAF files in "test/data"
|
8
|
+
When I query for the genomic intervals
|
9
|
+
| chrom | start | end |
|
10
|
+
| mm8.chr7 | 80082334 | 80082471 |
|
11
|
+
Then 2 blocks are obtained
|
12
|
+
And the text size of block 0 is 54
|
13
|
+
And the text size of block 1 is 156
|
14
|
+
|
15
|
+
Scenario: Two blocks joined in indexed access
|
16
|
+
Given indexed MAF files in "test/data"
|
17
|
+
When I enable the :join_blocks parser option
|
18
|
+
And I filter for only the species
|
19
|
+
| mm8 |
|
20
|
+
| rn4 |
|
21
|
+
| oryCun1 |
|
22
|
+
| hg18 |
|
23
|
+
| panTro2 |
|
24
|
+
| rheMac2 |
|
25
|
+
| canFam2 |
|
26
|
+
| loxAfr1 |
|
27
|
+
| echTel1 |
|
28
|
+
And I query for the genomic intervals
|
29
|
+
| chrom | start | end |
|
30
|
+
| mm8.chr7 | 80082334 | 80082471 |
|
31
|
+
Then 1 block is obtained
|
32
|
+
And the text size of block 0 is 210
|
@@ -0,0 +1,46 @@
|
|
1
|
+
Feature: Provide access to multiple MAF files in a directory
|
2
|
+
In order to efficiently work with many MAF files
|
3
|
+
We need to provide a convenient interface to them
|
4
|
+
|
5
|
+
Scenario: Query for several chromosomes at once
|
6
|
+
Given indexed MAF files in "test/data"
|
7
|
+
When I query for the genomic intervals
|
8
|
+
| chrom | start | end |
|
9
|
+
| mm8.chr7 | 80082580 | 80082612 |
|
10
|
+
| mm8.chrM | 1400 | 1590 |
|
11
|
+
Then 5 blocks are obtained
|
12
|
+
|
13
|
+
Scenario: Apply block filters
|
14
|
+
Given indexed MAF files in "test/data"
|
15
|
+
When I filter for blocks with text size at most 200
|
16
|
+
And I query for the genomic intervals
|
17
|
+
| chrom | start | end |
|
18
|
+
| mm8.chr7 | 80082580 | 80082612 |
|
19
|
+
| mm8.chrM | 1400 | 1590 |
|
20
|
+
Then 3 blocks are obtained
|
21
|
+
|
22
|
+
Scenario: Apply sequence filters
|
23
|
+
Given indexed MAF files in "test/data"
|
24
|
+
When I filter for only the species
|
25
|
+
| mm8 |
|
26
|
+
| rn4 |
|
27
|
+
| hg18 |
|
28
|
+
And I query for the genomic intervals
|
29
|
+
| chrom | start | end |
|
30
|
+
| mm8.chr7 | 80082580 | 80082612 |
|
31
|
+
| mm8.chrM | 1400 | 1590 |
|
32
|
+
Then 5 blocks are obtained
|
33
|
+
And block 0 has 3 sequences
|
34
|
+
|
35
|
+
Scenario: Set parse options
|
36
|
+
Given indexed MAF files in "test/data"
|
37
|
+
When I enable the :remove_gaps parser option
|
38
|
+
And I filter for only the species
|
39
|
+
| mm8 |
|
40
|
+
| rn4 |
|
41
|
+
| hg18 |
|
42
|
+
And I query for the genomic intervals
|
43
|
+
| chrom | start | end |
|
44
|
+
| mm8.chr7 | 80082580 | 80082612 |
|
45
|
+
Then 2 blocks are obtained
|
46
|
+
And the text size of block 1 is 121
|