bio-maf 0.2.0-java → 0.3.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Gemfile +3 -1
- data/README.md +98 -29
- data/Rakefile +6 -2
- data/bin/maf_tile +59 -35
- data/bio-maf.gemspec +4 -3
- data/features/block-joining.feature +32 -0
- data/features/dir-access.feature +46 -0
- data/features/maf-indexing.feature +23 -0
- data/features/maf-to-fasta.feature +9 -0
- data/features/slice.feature +54 -0
- data/features/step_definitions/dir-access_steps.rb +15 -0
- data/features/step_definitions/file_steps.rb +7 -0
- data/features/step_definitions/gap_removal_steps.rb +4 -0
- data/features/step_definitions/index_steps.rb +3 -3
- data/features/step_definitions/output_steps.rb +9 -1
- data/features/step_definitions/parse_steps.rb +13 -2
- data/features/step_definitions/query_steps.rb +7 -6
- data/features/step_definitions/slice_steps.rb +15 -0
- data/features/step_definitions/{gap-filling_steps.rb → tiling_steps.rb} +0 -0
- data/features/support/aruba.rb +1 -0
- data/features/support/env.rb +3 -1
- data/features/{gap-filling.feature → tiling.feature} +85 -0
- data/lib/bio/maf/index.rb +223 -11
- data/lib/bio/maf/maf.rb +209 -0
- data/lib/bio/maf/parser.rb +190 -111
- data/lib/bio/maf/tiler.rb +33 -6
- data/man/maf_index.1 +1 -1
- data/man/maf_tile.1 +7 -7
- data/man/maf_tile.1.ronn +21 -13
- data/man/maf_to_fasta.1 +1 -1
- data/spec/bio/maf/index_spec.rb +99 -0
- data/spec/bio/maf/maf_spec.rb +184 -0
- data/spec/bio/maf/parser_spec.rb +75 -115
- data/spec/bio/maf/tiler_spec.rb +44 -0
- data/test/data/chr22_ieq2.maf +11 -0
- data/test/data/gap-1.kct +0 -0
- data/test/data/gap-1.maf +9 -0
- data/test/data/gap-filled1.fa +6 -0
- data/test/data/gap-sp1.fa.gz +0 -0
- data/test/data/mm8_chr7_tiny_slice1.maf +9 -0
- data/test/data/mm8_chr7_tiny_slice2.maf +10 -0
- data/test/data/mm8_chr7_tiny_slice3.maf +10 -0
- data/test/data/mm8_chrM_tiny.kct +0 -0
- data/test/data/mm8_chrM_tiny.maf +1000 -0
- metadata +65 -16
data/.gitignore
CHANGED
data/Gemfile
CHANGED
@@ -13,7 +13,7 @@ group :development do
|
|
13
13
|
gem "redcarpet", "~> 2.1.1", :platforms => :mri
|
14
14
|
gem "ronn", "~> 0.7.3", :platforms => :mri
|
15
15
|
gem "sinatra", "~> 1.3.2" # for ronn --server
|
16
|
-
gem "
|
16
|
+
gem "jruby-openssl", ">= 0.7", :platforms => :jruby
|
17
17
|
end
|
18
18
|
|
19
19
|
group :test do
|
@@ -21,4 +21,6 @@ group :test do
|
|
21
21
|
gem "rake", ">= 0.9"
|
22
22
|
gem "cucumber", ">= 0"
|
23
23
|
gem "rspec", "~> 2.10.0"
|
24
|
+
gem "rubygems-tasks", "~> 0.2.3"
|
25
|
+
gem "aruba", "~> 0.4.11"
|
24
26
|
end
|
data/README.md
CHANGED
@@ -92,6 +92,19 @@ Or programmatically:
|
|
92
92
|
|
93
93
|
Refer to [`mm8_chr7_tiny.maf`](https://github.com/csw/bioruby-maf/blob/master/test/data/mm8_chr7_tiny.maf).
|
94
94
|
|
95
|
+
require 'bio-maf'
|
96
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
97
|
+
|
98
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
99
|
+
access.find(q) do |block|
|
100
|
+
ref_seq = block.sequences[0]
|
101
|
+
puts "Matched block at #{ref_seq.start}, #{ref_seq.size} bases"
|
102
|
+
end
|
103
|
+
|
104
|
+
# => Matched block at 80082592, 121 bases
|
105
|
+
# => Matched block at 80082713, 54 bases
|
106
|
+
|
107
|
+
Or, equivalently, one can work with a specific MAF file and index directly:
|
95
108
|
|
96
109
|
require 'bio-maf'
|
97
110
|
parser = Bio::MAF::Parser.new('test/data/mm8_chr7_tiny.maf')
|
@@ -106,15 +119,27 @@ Refer to [`mm8_chr7_tiny.maf`](https://github.com/csw/bioruby-maf/blob/master/te
|
|
106
119
|
# => Matched block at 80082592, 121 bases
|
107
120
|
# => Matched block at 80082713, 54 bases
|
108
121
|
|
122
|
+
### Extract alignment blocks truncated to a given interval
|
123
|
+
|
124
|
+
Given a genomic interval of interest, one can also extract only the
|
125
|
+
subsets of blocks that intersect with that interval, using the
|
126
|
+
`#slice` method like so:
|
127
|
+
|
128
|
+
require 'bio-maf'
|
129
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
130
|
+
int = Bio::GenomicInterval.zero_based('mm8.chr7', 80082350, 80082380)
|
131
|
+
blocks = access.slice(int).to_a
|
132
|
+
puts "Got #{blocks.size} blocks, first #{blocks.first.ref_seq.size} base pairs."
|
133
|
+
# => Got 2 blocks, first 18 base pairs.
|
134
|
+
|
109
135
|
### Filter species returned in alignment blocks
|
110
136
|
|
111
137
|
require 'bio-maf'
|
112
|
-
|
113
|
-
idx = Bio::MAF::KyotoIndex.open('test/data/mm8_chr7_tiny.kct')
|
138
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
114
139
|
|
115
|
-
|
140
|
+
access.sequence_filter = { :only_species => %w(hg18 mm8 rheMac2) }
|
116
141
|
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
117
|
-
blocks =
|
142
|
+
blocks = access.find(q)
|
118
143
|
block = blocks.first
|
119
144
|
puts "Block has #{block.sequences.size} sequences."
|
120
145
|
|
@@ -129,23 +154,26 @@ See also the [Cucumber feature][] and [step definitions][] for this.
|
|
129
154
|
|
130
155
|
#### Match only blocks with all specified species
|
131
156
|
|
157
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
132
158
|
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082471, 80082730)]
|
133
|
-
|
134
|
-
n_blocks =
|
159
|
+
access.block_filter = { :with_all_species => %w(panTro2 loxAfr1) }
|
160
|
+
n_blocks = access.find(q).count
|
135
161
|
# => 1
|
136
162
|
|
137
163
|
#### Match only blocks with a certain number of sequences
|
138
164
|
|
165
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
139
166
|
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082767, 80083008)]
|
140
|
-
|
141
|
-
n_blocks =
|
167
|
+
access.block_filter = { :at_least_n_sequences => 6 }
|
168
|
+
n_blocks = access.find(q).count
|
142
169
|
# => 1
|
143
170
|
|
144
171
|
#### Match only blocks within a text size range
|
145
172
|
|
173
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
146
174
|
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 0, 80100000)]
|
147
|
-
|
148
|
-
n_blocks =
|
175
|
+
access.block_filter = { :min_size => 72, :max_size => 160 }
|
176
|
+
n_blocks = access.find(q).count
|
149
177
|
# => 3
|
150
178
|
|
151
179
|
### Process each block in a MAF file
|
@@ -155,7 +183,7 @@ See also the [Cucumber feature][] and [step definitions][] for this.
|
|
155
183
|
puts "MAF version: #{p.header.version}"
|
156
184
|
# => MAF version: 1
|
157
185
|
|
158
|
-
p.
|
186
|
+
p.each_block do |block|
|
159
187
|
block.sequences.each do |seq|
|
160
188
|
do_something(seq)
|
161
189
|
end
|
@@ -183,6 +211,12 @@ Refer to [`chr22_ieq.maf`](https://github.com/csw/bioruby-maf/blob/master/test/d
|
|
183
211
|
# @size=1601, @strand=:+, @src_size=50103, @text=nil,
|
184
212
|
# @status="I">
|
185
213
|
|
214
|
+
Such options can also be set on a Bio::MAF::Access object:
|
215
|
+
|
216
|
+
require 'bio-maf'
|
217
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
218
|
+
access.parse_options[:parse_empty] = true
|
219
|
+
|
186
220
|
### Remove gaps from parsed blocks
|
187
221
|
|
188
222
|
After filtering out species with
|
@@ -192,8 +226,42 @@ sequences that were filtered out. Such gaps can be removed by setting
|
|
192
226
|
the `:remove_gaps` parser option:
|
193
227
|
|
194
228
|
require 'bio-maf'
|
195
|
-
|
196
|
-
|
229
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
230
|
+
access.parse_options[:remove_gaps] = true
|
231
|
+
|
232
|
+
### Join blocks after filtering together
|
233
|
+
|
234
|
+
Similarly, filtering out species may remove a species which had caused
|
235
|
+
two adjacent alignment blocks to be split. By enabling the
|
236
|
+
`:join_blocks` parser option, such blocks can be joined together:
|
237
|
+
|
238
|
+
require 'bio-maf'
|
239
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
240
|
+
access.parse_options[:join_blocks] = true
|
241
|
+
|
242
|
+
See the [Cucumber feature][] for more details.
|
243
|
+
|
244
|
+
[Cucumber feature]: https://github.com/csw/bioruby-maf/blob/master/features/block-joining.feature
|
245
|
+
|
246
|
+
### Extract bio-alignment representations of blocks
|
247
|
+
|
248
|
+
When the `:as_bio_alignment` parser option is given, blocks will be
|
249
|
+
returned as [Bio::BioAlignment::Alignment][] objects as used in the
|
250
|
+
[bio-alignment] Biogem. This offers a great deal of built-in
|
251
|
+
functionality for column-wise operations, alignment manipulation, and
|
252
|
+
more.
|
253
|
+
|
254
|
+
[Bio::BioAlignment::Alignment]: http://rdoc.info/gems/bio-alignment/Bio/BioAlignment/Alignment
|
255
|
+
[bio-alignment]: https://github.com/pjotrp/bioruby-alignment
|
256
|
+
|
257
|
+
require 'bio-maf'
|
258
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
259
|
+
access.parse_options[:as_bio_alignment] = true
|
260
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
261
|
+
access.find(q) do |aln|
|
262
|
+
col = aln.columns[3]
|
263
|
+
puts "bases in column 3: #{col}"
|
264
|
+
end
|
197
265
|
|
198
266
|
### Tile blocks together over an interval
|
199
267
|
|
@@ -206,24 +274,25 @@ the
|
|
206
274
|
[`maf_tile(1)`](http://csw.github.com/bioruby-maf/man/maf_tile.1.html)
|
207
275
|
man page.
|
208
276
|
|
209
|
-
[feature]: https://github.com/csw/bioruby-maf/blob/master/features/
|
277
|
+
[feature]: https://github.com/csw/bioruby-maf/blob/master/features/tiling.feature
|
210
278
|
|
211
279
|
require 'bio-maf'
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
280
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
281
|
+
interval = Bio::GenomicInterval.zero_based('mm8.chr7',
|
282
|
+
80082334,
|
283
|
+
80082468)
|
284
|
+
access.tile(interval) do |tiler|
|
285
|
+
# reference is optional
|
286
|
+
tiler.reference = 'reference.fa.gz'
|
287
|
+
tiler.species = %w(mm8 rn4 hg18)
|
288
|
+
# species_map is optional
|
289
|
+
tiler.species_map = {
|
290
|
+
'mm8' => 'mouse',
|
291
|
+
'rn4' => 'rat',
|
292
|
+
'hg18' => 'human'
|
293
|
+
}
|
294
|
+
tiler.write_fasta($stdout)
|
295
|
+
end
|
227
296
|
|
228
297
|
### Command line tools
|
229
298
|
|
data/Rakefile
CHANGED
@@ -23,7 +23,10 @@ RSpec::Core::RakeTask.new(:spec) do |spec|
|
|
23
23
|
end
|
24
24
|
|
25
25
|
require 'cucumber/rake/task'
|
26
|
-
Cucumber::Rake::Task.new do |
|
26
|
+
Cucumber::Rake::Task.new do |t|
|
27
|
+
opts = "features"
|
28
|
+
opts << ' --tags ~@no_jruby' if RUBY_PLATFORM == 'java'
|
29
|
+
t.cucumber_opts = opts
|
27
30
|
end
|
28
31
|
|
29
32
|
task :test => [ :spec, :cucumber ]
|
@@ -44,7 +47,8 @@ if ronn_avail
|
|
44
47
|
desc "Generate man pages"
|
45
48
|
task :man do
|
46
49
|
file_spec = RONN_FILES.join(' ')
|
47
|
-
sh "ronn --roff --html --style toc --date #{$gemspec.date.strftime('%Y-%m-%d')} --manual='BioRuby Manual' --organization='#{$gemspec.author}' #{file_spec}"
|
50
|
+
#sh "ronn --roff --html --style toc --date #{$gemspec.date.strftime('%Y-%m-%d')} --manual='BioRuby Manual' --organization='#{$gemspec.author}' #{file_spec}"
|
51
|
+
sh "ronn --roff --html --style toc --date #{Time.now.strftime('%Y-%m-%d')} --manual='BioRuby Manual' --organization='BioRuby' #{file_spec}"
|
48
52
|
end
|
49
53
|
|
50
54
|
namespace :man do
|
data/bin/maf_tile
CHANGED
@@ -6,6 +6,24 @@ require 'ostruct'
|
|
6
6
|
require 'bio-maf'
|
7
7
|
require 'bio-genomic-interval'
|
8
8
|
|
9
|
+
def parse_interval(line)
|
10
|
+
src, r_start_s, r_end_s, _ = line.split(nil, 4)
|
11
|
+
r_start = r_start_s.to_i
|
12
|
+
r_end = r_end_s.to_i
|
13
|
+
return Bio::GenomicInterval.zero_based(src, r_start, r_end)
|
14
|
+
end
|
15
|
+
|
16
|
+
def target_for(base, interval, &blk)
|
17
|
+
path = "#{base}_#{interval.zero_start}-#{interval.zero_end}.fa"
|
18
|
+
File.open(path, 'w', &blk)
|
19
|
+
end
|
20
|
+
|
21
|
+
def apply_options(options, tiler)
|
22
|
+
tiler.reference = options.ref if options.ref
|
23
|
+
tiler.species = options.species
|
24
|
+
tiler.species_map = options.species_map
|
25
|
+
end
|
26
|
+
|
9
27
|
options = OpenStruct.new
|
10
28
|
options.p = { :threads => 1 }
|
11
29
|
options.species = []
|
@@ -13,16 +31,20 @@ options.species_map = {}
|
|
13
31
|
options.usage = false
|
14
32
|
|
15
33
|
o_parser = OptionParser.new do |opts|
|
16
|
-
opts.banner = "Usage: maf_tile [options] <maf>
|
34
|
+
opts.banner = "Usage: maf_tile [options] <maf> [index]"
|
17
35
|
opts.separator ""
|
18
36
|
opts.separator "Options:"
|
19
37
|
opts.on("-r", "--reference SEQ", "FASTA reference sequence") do |ref|
|
20
38
|
options.ref = ref
|
21
39
|
end
|
22
|
-
opts.on("-i", "--interval BEGIN:END", "Genomic interval, zero-based") do |int|
|
23
|
-
if int =~ /(\d+):(\d+)/
|
40
|
+
opts.on("-i", "--interval [CHR:]BEGIN:END", "Genomic interval, zero-based") do |int|
|
41
|
+
if int =~ /(.+):(\d+):(\d+)/
|
42
|
+
gi = Bio::GenomicInterval.zero_based($1, ($2.to_i), ($3.to_i))
|
43
|
+
options.genomic_interval = gi
|
44
|
+
elsif int =~ /(\d+):(\d+)/
|
24
45
|
options.interval = ($1.to_i)...($2.to_i)
|
25
46
|
else
|
47
|
+
$stderr.puts "Invalid interval specification #{int}!"
|
26
48
|
options.usage = true
|
27
49
|
end
|
28
50
|
end
|
@@ -51,30 +73,19 @@ maf_p = ARGV.shift
|
|
51
73
|
index_p = ARGV.shift
|
52
74
|
|
53
75
|
unless (! options.usage) \
|
54
|
-
&& maf_p &&
|
55
|
-
&& (options.output_base
|
76
|
+
&& maf_p && (! options.species.empty?) \
|
77
|
+
&& (options.output_base \
|
78
|
+
? options.bed \
|
79
|
+
: options.interval || options.genomic_interval)
|
56
80
|
$stderr.puts o_parser
|
57
81
|
exit 2
|
58
82
|
end
|
59
83
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
tiler.species_map = options.species_map
|
66
|
-
|
67
|
-
def parse_interval(line)
|
68
|
-
src, r_start_s, r_end_s, _ = line.split(nil, 4)
|
69
|
-
r_start = r_start_s.to_i
|
70
|
-
r_end = r_end_s.to_i
|
71
|
-
return Bio::GenomicInterval.zero_based(src, r_start, r_end)
|
72
|
-
end
|
73
|
-
|
74
|
-
def target_for(base, interval)
|
75
|
-
path = "#{base}_#{interval.zero_start}-#{interval.zero_end}.fa"
|
76
|
-
File.open(path, 'w')
|
77
|
-
end
|
84
|
+
access = if File.directory? maf_p
|
85
|
+
Bio::MAF::Access.maf_dir(maf_p, options.p)
|
86
|
+
else
|
87
|
+
Bio::MAF::Access.file(maf_p, index_p, options.p)
|
88
|
+
end
|
78
89
|
|
79
90
|
if options.bed
|
80
91
|
intervals = []
|
@@ -83,21 +94,34 @@ if options.bed
|
|
83
94
|
end
|
84
95
|
intervals.sort_by! { |int| int.zero_start }
|
85
96
|
intervals.each do |int|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
97
|
+
access.tile(int) do |tiler|
|
98
|
+
apply_options(options, tiler)
|
99
|
+
target_for(options.output_base, int) do |target|
|
100
|
+
tiler.write_fasta(target)
|
101
|
+
end
|
102
|
+
end
|
90
103
|
end
|
91
104
|
else
|
92
105
|
# single interval
|
93
|
-
|
94
|
-
|
95
|
-
options.interval.end)
|
96
|
-
if options.output_base
|
97
|
-
target = target_for(options.output_base, tiler.interval)
|
106
|
+
if options.genomic_interval
|
107
|
+
interval = options.genomic_interval
|
98
108
|
else
|
99
|
-
|
109
|
+
if access.indices.size != 1
|
110
|
+
raise "Must explicitly specify sequence in --interval argument with multiple candidate MAF files!"
|
111
|
+
end
|
112
|
+
ref_seq = access.indices.keys.first
|
113
|
+
interval = Bio::GenomicInterval.zero_based(ref_seq,
|
114
|
+
options.interval.begin,
|
115
|
+
options.interval.end)
|
116
|
+
end
|
117
|
+
access.tile(interval) do |tiler|
|
118
|
+
apply_options(options, tiler)
|
119
|
+
if options.output_base
|
120
|
+
target = target_for(options.output_base, tiler.interval)
|
121
|
+
else
|
122
|
+
target = $stdout
|
123
|
+
end
|
124
|
+
tiler.write_fasta(target)
|
125
|
+
target.close
|
100
126
|
end
|
101
|
-
tiler.write_fasta(target)
|
102
|
-
target.close
|
103
127
|
end
|
data/bio-maf.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = "bio-maf"
|
5
|
-
s.version = "0.
|
5
|
+
s.version = "0.3.0"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["Clayton Wheeler"]
|
9
|
-
s.date = "2012-
|
9
|
+
s.date = "2012-07-18"
|
10
10
|
s.description = "Multiple Alignment Format parser for BioRuby."
|
11
11
|
s.email = "cswh@umich.edu"
|
12
12
|
s.executables = ["maf_count", "maf_dump_blocks", "maf_extract_ranges_count", "maf_index", "maf_parse_bench", "maf_to_fasta", "maf_write", "random_ranges"]
|
@@ -32,10 +32,11 @@ Gem::Specification.new do |s|
|
|
32
32
|
s.platform = 'java'
|
33
33
|
end
|
34
34
|
|
35
|
+
s.add_runtime_dependency('bio-alignment', ["~> 0.0.7"])
|
35
36
|
s.add_runtime_dependency('bio-bigbio', [">= 0"])
|
36
37
|
s.add_runtime_dependency('bio-genomic-interval', ["~> 0.1.2"])
|
37
38
|
if RUBY_PLATFORM == 'java'
|
38
|
-
s.add_runtime_dependency('kyotocabinet-java', ["~> 0.
|
39
|
+
s.add_runtime_dependency('kyotocabinet-java', ["~> 0.3.0"])
|
39
40
|
else
|
40
41
|
s.add_runtime_dependency('kyotocabinet-ruby', ["~> 1.27.1"])
|
41
42
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
Feature: Join adjacent alignment blocks
|
2
|
+
After filtering out sequences
|
3
|
+
The sequence that caused two blocks to be separate may be removed
|
4
|
+
So it can be desirable to join such blocks together
|
5
|
+
|
6
|
+
Scenario: Two blocks natively in indexed access
|
7
|
+
Given indexed MAF files in "test/data"
|
8
|
+
When I query for the genomic intervals
|
9
|
+
| chrom | start | end |
|
10
|
+
| mm8.chr7 | 80082334 | 80082471 |
|
11
|
+
Then 2 blocks are obtained
|
12
|
+
And the text size of block 0 is 54
|
13
|
+
And the text size of block 1 is 156
|
14
|
+
|
15
|
+
Scenario: Two blocks joined in indexed access
|
16
|
+
Given indexed MAF files in "test/data"
|
17
|
+
When I enable the :join_blocks parser option
|
18
|
+
And I filter for only the species
|
19
|
+
| mm8 |
|
20
|
+
| rn4 |
|
21
|
+
| oryCun1 |
|
22
|
+
| hg18 |
|
23
|
+
| panTro2 |
|
24
|
+
| rheMac2 |
|
25
|
+
| canFam2 |
|
26
|
+
| loxAfr1 |
|
27
|
+
| echTel1 |
|
28
|
+
And I query for the genomic intervals
|
29
|
+
| chrom | start | end |
|
30
|
+
| mm8.chr7 | 80082334 | 80082471 |
|
31
|
+
Then 1 block is obtained
|
32
|
+
And the text size of block 0 is 210
|
@@ -0,0 +1,46 @@
|
|
1
|
+
Feature: Provide access to multiple MAF files in a directory
|
2
|
+
In order to efficiently work with many MAF files
|
3
|
+
We need to provide a convenient interface to them
|
4
|
+
|
5
|
+
Scenario: Query for several chromosomes at once
|
6
|
+
Given indexed MAF files in "test/data"
|
7
|
+
When I query for the genomic intervals
|
8
|
+
| chrom | start | end |
|
9
|
+
| mm8.chr7 | 80082580 | 80082612 |
|
10
|
+
| mm8.chrM | 1400 | 1590 |
|
11
|
+
Then 5 blocks are obtained
|
12
|
+
|
13
|
+
Scenario: Apply block filters
|
14
|
+
Given indexed MAF files in "test/data"
|
15
|
+
When I filter for blocks with text size at most 200
|
16
|
+
And I query for the genomic intervals
|
17
|
+
| chrom | start | end |
|
18
|
+
| mm8.chr7 | 80082580 | 80082612 |
|
19
|
+
| mm8.chrM | 1400 | 1590 |
|
20
|
+
Then 3 blocks are obtained
|
21
|
+
|
22
|
+
Scenario: Apply sequence filters
|
23
|
+
Given indexed MAF files in "test/data"
|
24
|
+
When I filter for only the species
|
25
|
+
| mm8 |
|
26
|
+
| rn4 |
|
27
|
+
| hg18 |
|
28
|
+
And I query for the genomic intervals
|
29
|
+
| chrom | start | end |
|
30
|
+
| mm8.chr7 | 80082580 | 80082612 |
|
31
|
+
| mm8.chrM | 1400 | 1590 |
|
32
|
+
Then 5 blocks are obtained
|
33
|
+
And block 0 has 3 sequences
|
34
|
+
|
35
|
+
Scenario: Set parse options
|
36
|
+
Given indexed MAF files in "test/data"
|
37
|
+
When I enable the :remove_gaps parser option
|
38
|
+
And I filter for only the species
|
39
|
+
| mm8 |
|
40
|
+
| rn4 |
|
41
|
+
| hg18 |
|
42
|
+
And I query for the genomic intervals
|
43
|
+
| chrom | start | end |
|
44
|
+
| mm8.chr7 | 80082580 | 80082612 |
|
45
|
+
Then 2 blocks are obtained
|
46
|
+
And the text size of block 1 is 121
|