bio-maf 0.3.0-java → 0.3.2-java
Sign up to get free protection for your applications and to get access to all the features.
- data/DEVELOPMENT.md +4 -0
- data/README.md +172 -114
- data/bin/maf_count +0 -1
- data/bin/maf_dump_blocks +0 -1
- data/bin/maf_extract +180 -0
- data/bin/maf_index +15 -8
- data/bin/maf_tile +2 -0
- data/bin/maf_to_fasta +4 -7
- data/bio-maf.gemspec +3 -4
- data/features/maf-indexing.feature +21 -1
- data/features/step_definitions/convert_steps.rb +2 -7
- data/features/step_definitions/index_steps.rb +4 -0
- data/lib/bio-maf.rb +5 -0
- data/lib/bio/maf/index.rb +33 -23
- data/lib/bio/maf/maf.rb +10 -7
- data/lib/bio/maf/parser.rb +37 -15
- data/lib/bio/maf/tiler.rb +60 -8
- data/lib/bio/maf/writer.rb +26 -0
- data/man/maf_extract.1 +268 -0
- data/man/maf_extract.1.ronn +213 -0
- data/man/maf_index.1 +21 -10
- data/man/maf_index.1.ronn +14 -7
- data/man/maf_tile.1 +12 -0
- data/man/maf_tile.1.ronn +9 -0
- data/spec/bio/maf/index_spec.rb +23 -0
- metadata +14 -10
data/DEVELOPMENT.md
CHANGED
data/README.md
CHANGED
@@ -81,43 +81,57 @@ create one with [maf_index(1)][], like so:
|
|
81
81
|
|
82
82
|
|
83
83
|
$ maf_index test/data/mm8_chr7_tiny.maf /tmp/mm8_chr7_tiny.kct
|
84
|
-
|
85
|
-
Or programmatically:
|
86
84
|
|
87
|
-
|
88
|
-
|
89
|
-
|
85
|
+
To index all sequences for searching, not just the reference sequence:
|
86
|
+
|
87
|
+
$ maf_index --all test/data/mm8_chr7_tiny.maf /tmp/mm8_chr7_tiny.kct
|
88
|
+
|
89
|
+
To build an index programmatically:
|
90
|
+
|
91
|
+
```ruby
|
92
|
+
require 'bio-maf'
|
93
|
+
parser = Bio::MAF::Parser.new("test/data/mm8_chr7_tiny.maf")
|
94
|
+
idx = Bio::MAF::KyotoIndex.build(parser, "/tmp/mm8_chr7_tiny.kct", false)
|
95
|
+
```
|
90
96
|
|
91
97
|
### Extract blocks from an indexed MAF file, by genomic interval
|
92
98
|
|
93
99
|
Refer to [`mm8_chr7_tiny.maf`](https://github.com/csw/bioruby-maf/blob/master/test/data/mm8_chr7_tiny.maf).
|
94
100
|
|
95
|
-
|
96
|
-
|
101
|
+
```ruby
|
102
|
+
require 'bio-maf'
|
103
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
97
104
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
105
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
106
|
+
access.find(q) do |block|
|
107
|
+
ref_seq = block.sequences[0]
|
108
|
+
puts "Matched block at #{ref_seq.start}, #{ref_seq.size} bases"
|
109
|
+
end
|
103
110
|
|
104
|
-
|
105
|
-
|
111
|
+
# => Matched block at 80082592, 121 bases
|
112
|
+
# => Matched block at 80082713, 54 bases
|
113
|
+
```
|
106
114
|
|
107
115
|
Or, equivalently, one can work with a specific MAF file and index directly:
|
108
116
|
|
109
|
-
|
110
|
-
|
111
|
-
|
117
|
+
```ruby
|
118
|
+
require 'bio-maf'
|
119
|
+
parser = Bio::MAF::Parser.new('test/data/mm8_chr7_tiny.maf')
|
120
|
+
idx = Bio::MAF::KyotoIndex.open('test/data/mm8_chr7_tiny.kct')
|
121
|
+
|
122
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
123
|
+
idx.find(q, parser).each do |block|
|
124
|
+
ref_seq = block.sequences[0]
|
125
|
+
puts "Matched block at #{ref_seq.start}, #{ref_seq.size} bases"
|
126
|
+
end
|
112
127
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
puts "Matched block at #{ref_seq.start}, #{ref_seq.size} bases"
|
117
|
-
end
|
128
|
+
# => Matched block at 80082592, 121 bases
|
129
|
+
# => Matched block at 80082713, 54 bases
|
130
|
+
```
|
118
131
|
|
119
|
-
|
120
|
-
|
132
|
+
This can be done with [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html) as well:
|
133
|
+
|
134
|
+
$ maf_extract -d test/data --interval mm8.chr7:80082592-80082766
|
121
135
|
|
122
136
|
### Extract alignment blocks truncated to a given interval
|
123
137
|
|
@@ -125,25 +139,37 @@ Given a genomic interval of interest, one can also extract only the
|
|
125
139
|
subsets of blocks that intersect with that interval, using the
|
126
140
|
`#slice` method like so:
|
127
141
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
142
|
+
```ruby
|
143
|
+
require 'bio-maf'
|
144
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
145
|
+
int = Bio::GenomicInterval.zero_based('mm8.chr7', 80082350, 80082380)
|
146
|
+
blocks = access.slice(int).to_a
|
147
|
+
puts "Got #{blocks.size} blocks, first #{blocks.first.ref_seq.size} base pairs."
|
148
|
+
# => Got 2 blocks, first 18 base pairs.
|
149
|
+
```
|
150
|
+
|
151
|
+
Or, with [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html):
|
152
|
+
|
153
|
+
$ maf_extract -d test/data --mode slice --interval mm8.chr7:80082592-80082766
|
134
154
|
|
135
155
|
### Filter species returned in alignment blocks
|
136
156
|
|
137
|
-
|
138
|
-
|
157
|
+
```ruby
|
158
|
+
require 'bio-maf'
|
159
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
160
|
+
|
161
|
+
access.sequence_filter = { :only_species => %w(hg18 mm8 rheMac2) }
|
162
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
163
|
+
blocks = access.find(q)
|
164
|
+
block = blocks.first
|
165
|
+
puts "Block has #{block.sequences.size} sequences."
|
139
166
|
|
140
|
-
|
141
|
-
|
142
|
-
blocks = access.find(q)
|
143
|
-
block = blocks.first
|
144
|
-
puts "Block has #{block.sequences.size} sequences."
|
167
|
+
# => Block has 3 sequences.
|
168
|
+
```
|
145
169
|
|
146
|
-
|
170
|
+
With [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html):
|
171
|
+
|
172
|
+
$ maf_extract -d test/data --interval mm8.chr7:80082592-80082766 --only-species hg18,mm8,rheMac2
|
147
173
|
|
148
174
|
### Extract blocks matching certain conditions
|
149
175
|
|
@@ -154,68 +180,92 @@ See also the [Cucumber feature][] and [step definitions][] for this.
|
|
154
180
|
|
155
181
|
#### Match only blocks with all specified species
|
156
182
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
183
|
+
```ruby
|
184
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
185
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082471, 80082730)]
|
186
|
+
access.block_filter = { :with_all_species => %w(panTro2 loxAfr1) }
|
187
|
+
n_blocks = access.find(q).count
|
188
|
+
# => 1
|
189
|
+
```
|
190
|
+
|
191
|
+
With [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html):
|
192
|
+
|
193
|
+
$ maf_extract -d test/data --interval mm8.chr7:80082471-80082730 --with-all-species panTro2,loxAfr1
|
162
194
|
|
163
195
|
#### Match only blocks with a certain number of sequences
|
164
196
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
197
|
+
```ruby
|
198
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
199
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082767, 80083008)]
|
200
|
+
access.block_filter = { :at_least_n_sequences => 6 }
|
201
|
+
n_blocks = access.find(q).count
|
202
|
+
# => 1
|
203
|
+
```
|
204
|
+
|
205
|
+
With [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html):
|
206
|
+
|
207
|
+
$ maf_extract -d test/data --interval mm8.chr7:80082767-80083008 --min-sequences 6
|
170
208
|
|
171
209
|
#### Match only blocks within a text size range
|
172
210
|
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
211
|
+
```ruby
|
212
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
213
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 0, 80100000)]
|
214
|
+
access.block_filter = { :min_size => 72, :max_size => 160 }
|
215
|
+
n_blocks = access.find(q).count
|
216
|
+
# => 3
|
217
|
+
```
|
218
|
+
|
219
|
+
With [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html):
|
220
|
+
|
221
|
+
$ maf_extract -d test/data --interval mm8.chr7:0-80100000 --min-text-size 72 --max-text-size 160
|
178
222
|
|
179
223
|
### Process each block in a MAF file
|
180
224
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
225
|
+
```ruby
|
226
|
+
require 'bio-maf'
|
227
|
+
p = Bio::MAF::Parser.new('test/data/mm8_chr7_tiny.maf')
|
228
|
+
puts "MAF version: #{p.header.version}"
|
229
|
+
# => MAF version: 1
|
185
230
|
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
231
|
+
p.each_block do |block|
|
232
|
+
block.sequences.each do |seq|
|
233
|
+
do_something(seq)
|
234
|
+
end
|
235
|
+
end
|
236
|
+
```
|
191
237
|
|
192
238
|
### Parse empty ('e') lines
|
193
239
|
|
194
240
|
Refer to [`chr22_ieq.maf`](https://github.com/csw/bioruby-maf/blob/master/test/data/chr22_ieq.maf).
|
195
241
|
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
242
|
+
```ruby
|
243
|
+
require 'bio-maf'
|
244
|
+
p = Bio::MAF::Parser.new('test/data/chr22_ieq.maf',
|
245
|
+
:parse_empty => false)
|
246
|
+
block = p.parse_block
|
247
|
+
block.sequences.size
|
248
|
+
# => 3
|
249
|
+
|
250
|
+
p = Bio::MAF::Parser.new('test/data/chr22_ieq.maf',
|
251
|
+
:parse_empty => true)
|
252
|
+
block = p.parse_block
|
253
|
+
block.sequences.size
|
254
|
+
# => 4
|
255
|
+
block.sequences.find { |s| s.empty? }
|
256
|
+
# => #<Bio::MAF::EmptySequence:0x007fe1f39882d0
|
257
|
+
# @source="turTru1.scaffold_109008", @start=25049,
|
258
|
+
# @size=1601, @strand=:+, @src_size=50103, @text=nil,
|
259
|
+
# @status="I">
|
260
|
+
```
|
213
261
|
|
214
262
|
Such options can also be set on a Bio::MAF::Access object:
|
215
263
|
|
216
|
-
|
217
|
-
|
218
|
-
|
264
|
+
```ruby
|
265
|
+
require 'bio-maf'
|
266
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
267
|
+
access.parse_options[:parse_empty] = true
|
268
|
+
```
|
219
269
|
|
220
270
|
### Remove gaps from parsed blocks
|
221
271
|
|
@@ -225,9 +275,11 @@ gaps may be left where there was an insertion present only in
|
|
225
275
|
sequences that were filtered out. Such gaps can be removed by setting
|
226
276
|
the `:remove_gaps` parser option:
|
227
277
|
|
228
|
-
|
229
|
-
|
230
|
-
|
278
|
+
```ruby
|
279
|
+
require 'bio-maf'
|
280
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
281
|
+
access.parse_options[:remove_gaps] = true
|
282
|
+
```
|
231
283
|
|
232
284
|
### Join blocks after filtering together
|
233
285
|
|
@@ -235,9 +287,11 @@ Similarly, filtering out species may remove a species which had caused
|
|
235
287
|
two adjacent alignment blocks to be split. By enabling the
|
236
288
|
`:join_blocks` parser option, such blocks can be joined together:
|
237
289
|
|
238
|
-
|
239
|
-
|
240
|
-
|
290
|
+
```ruby
|
291
|
+
require 'bio-maf'
|
292
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
293
|
+
access.parse_options[:join_blocks] = true
|
294
|
+
```
|
241
295
|
|
242
296
|
See the [Cucumber feature][] for more details.
|
243
297
|
|
@@ -254,14 +308,16 @@ more.
|
|
254
308
|
[Bio::BioAlignment::Alignment]: http://rdoc.info/gems/bio-alignment/Bio/BioAlignment/Alignment
|
255
309
|
[bio-alignment]: https://github.com/pjotrp/bioruby-alignment
|
256
310
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
311
|
+
```ruby
|
312
|
+
require 'bio-maf'
|
313
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
314
|
+
access.parse_options[:as_bio_alignment] = true
|
315
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
316
|
+
access.find(q) do |aln|
|
317
|
+
col = aln.columns[3]
|
318
|
+
puts "bases in column 3: #{col}"
|
319
|
+
end
|
320
|
+
```
|
265
321
|
|
266
322
|
### Tile blocks together over an interval
|
267
323
|
|
@@ -276,29 +332,32 @@ man page.
|
|
276
332
|
|
277
333
|
[feature]: https://github.com/csw/bioruby-maf/blob/master/features/tiling.feature
|
278
334
|
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
335
|
+
```ruby
|
336
|
+
require 'bio-maf'
|
337
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
338
|
+
interval = Bio::GenomicInterval.zero_based('mm8.chr7',
|
339
|
+
80082334,
|
340
|
+
80082468)
|
341
|
+
access.tile(interval) do |tiler|
|
342
|
+
# reference is optional
|
343
|
+
tiler.reference = 'reference.fa.gz'
|
344
|
+
tiler.species = %w(mm8 rn4 hg18)
|
345
|
+
# species_map is optional
|
346
|
+
tiler.species_map = {
|
347
|
+
'mm8' => 'mouse',
|
348
|
+
'rn4' => 'rat',
|
349
|
+
'hg18' => 'human'
|
350
|
+
}
|
351
|
+
tiler.write_fasta($stdout)
|
352
|
+
end
|
353
|
+
```
|
296
354
|
|
297
355
|
### Command line tools
|
298
356
|
|
299
357
|
Man pages for command line tools:
|
300
358
|
|
301
359
|
* [`maf_index(1)`](http://csw.github.com/bioruby-maf/man/maf_index.1.html)
|
360
|
+
* [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html)
|
302
361
|
* [`maf_to_fasta(1)`](http://csw.github.com/bioruby-maf/man/maf_to_fasta.1.html)
|
303
362
|
* [`maf_tile(1)`](http://csw.github.com/bioruby-maf/man/maf_tile.1.html)
|
304
363
|
|
@@ -343,4 +402,3 @@ This Biogem is published at [biogems.info](http://biogems.info/index.html#bio-ma
|
|
343
402
|
## Copyright
|
344
403
|
|
345
404
|
Copyright (c) 2012 Clayton Wheeler. See LICENSE.txt for further details.
|
346
|
-
|
data/bin/maf_count
CHANGED
data/bin/maf_dump_blocks
CHANGED
data/bin/maf_extract
ADDED
@@ -0,0 +1,180 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
|
7
|
+
include Bio::MAF
|
8
|
+
|
9
|
+
options = OpenStruct.new
|
10
|
+
options.mode = :intersect
|
11
|
+
options.format = :maf
|
12
|
+
options.seq_filter = {}
|
13
|
+
options.block_filter = {}
|
14
|
+
options.parse_options = {}
|
15
|
+
|
16
|
+
def handle_list_spec(spec)
|
17
|
+
if spec =~ /^@(.+)/
|
18
|
+
File.read($1).split
|
19
|
+
else
|
20
|
+
spec.split(',')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def handle_interval_spec(int)
|
25
|
+
if int =~ /(.+):(\d+)-(\d+)/
|
26
|
+
Bio::GenomicInterval.zero_based($1, $2.to_i, $3.to_i)
|
27
|
+
else
|
28
|
+
raise "Invalid interval specification: #{int}"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
$op = OptionParser.new do |opts|
|
33
|
+
opts.banner = "Usage: maf_extract (-m MAF [-i INDEX] | -d MAFDIR) [options]"
|
34
|
+
opts.separator ""
|
35
|
+
opts.separator "MAF source options (either --maf or --maf-dir must be given):"
|
36
|
+
opts.on("-m", "--maf MAF", "MAF file") do |maf|
|
37
|
+
options.maf = maf
|
38
|
+
end
|
39
|
+
opts.on("-i", "--index INDEX", "MAF index") do |idx|
|
40
|
+
options.idx = idx
|
41
|
+
end
|
42
|
+
opts.on("-d", "--maf-dir DIR", "MAF directory") do |dir|
|
43
|
+
options.maf_dir = dir
|
44
|
+
end
|
45
|
+
opts.separator ""
|
46
|
+
opts.separator "Extraction options:"
|
47
|
+
opts.on("--mode MODE", [:intersect, :slice],
|
48
|
+
"Extraction mode; 'intersect' to match ",
|
49
|
+
"blocks intersecting the given region,",
|
50
|
+
"or 'slice' to extract subsets covering ",
|
51
|
+
"given regions") do |mode|
|
52
|
+
options.mode = mode
|
53
|
+
end
|
54
|
+
opts.on("--bed BED", "Use intervals from the given BED file") do |bed|
|
55
|
+
options.bed = bed
|
56
|
+
end
|
57
|
+
opts.on("--interval SEQ:START:END", "Zero-based genomic interval to match") do |int|
|
58
|
+
options.interval = handle_interval_spec(int)
|
59
|
+
end
|
60
|
+
opts.separator ""
|
61
|
+
opts.separator "Output options:"
|
62
|
+
opts.on("-f", "--format FMT", [:maf, :fasta], "Output format") do |fmt|
|
63
|
+
options.format = fmt
|
64
|
+
end
|
65
|
+
opts.on("-o", "--output OUT", "Write output to file OUT") do |out|
|
66
|
+
options.out_path = out
|
67
|
+
end
|
68
|
+
opts.separator ""
|
69
|
+
opts.separator "Filtering options:"
|
70
|
+
opts.on("--only-species SPECIES",
|
71
|
+
"Filter out all but the species in the",
|
72
|
+
"given comma-separated list",
|
73
|
+
"(or @FILE to read from a file)") do |spec|
|
74
|
+
options.seq_filter[:only_species] = handle_list_spec(spec)
|
75
|
+
end
|
76
|
+
opts.on("--with-all-species SPECIES",
|
77
|
+
"Only match blocks with all the given",
|
78
|
+
"species, comma-separated",
|
79
|
+
"(or @FILE to read from a file)") do |spec|
|
80
|
+
options.block_filter[:with_all_species] = handle_list_spec(spec)
|
81
|
+
end
|
82
|
+
opts.on("--min-sequences N", Integer,
|
83
|
+
"Match only blocks with at least N sequences") do |n|
|
84
|
+
options.block_filter[:at_least_n_sequences] = n
|
85
|
+
end
|
86
|
+
opts.on("--min-text-size N", Integer,
|
87
|
+
"Match only blocks with minimum text size N") do |n|
|
88
|
+
options.block_filter[:min_size] = n
|
89
|
+
end
|
90
|
+
opts.on("--max-text-size N", Integer,
|
91
|
+
"Match only blocks with maximum text size N") do |n|
|
92
|
+
options.block_filter[:max_size] = n
|
93
|
+
end
|
94
|
+
opts.separator ""
|
95
|
+
opts.separator "Block processing options:"
|
96
|
+
opts.on("--join-blocks",
|
97
|
+
"Join blocks if appropriate after filtering",
|
98
|
+
"out sequences") do
|
99
|
+
options.parse_options[:join_blocks] = true
|
100
|
+
end
|
101
|
+
opts.on("--remove-gaps", "Remove gaps after filtering out sequences") do
|
102
|
+
options.parse_options[:remove_gaps] = true
|
103
|
+
end
|
104
|
+
opts.on("--parse-extended", "Parse 'extended' MAF data (i, q lines)") do
|
105
|
+
options.parse_options[:parse_extended] = true
|
106
|
+
end
|
107
|
+
opts.on("--parse-empty", "Parse empty (e) lines of MAF data") do
|
108
|
+
options.parse_options[:parse_empty] = true
|
109
|
+
end
|
110
|
+
opts.separator ""
|
111
|
+
opts.separator "Logging options:"
|
112
|
+
Bio::MAF::handle_logging_options(opts)
|
113
|
+
end
|
114
|
+
$op.parse!(ARGV)
|
115
|
+
Bio::Log::CLI.configure('bio-maf')
|
116
|
+
|
117
|
+
def usage(msg)
|
118
|
+
$stderr.puts msg
|
119
|
+
$stderr.puts $op
|
120
|
+
exit 2
|
121
|
+
end
|
122
|
+
|
123
|
+
if options.maf
|
124
|
+
access = Access.file(options.maf, options.idx, options.parse_options)
|
125
|
+
elsif options.maf_dir
|
126
|
+
access = Access.maf_dir(options.maf_dir, options.parse_options)
|
127
|
+
else
|
128
|
+
usage "Must supply --maf or --maf-dir!"
|
129
|
+
end
|
130
|
+
|
131
|
+
begin
|
132
|
+
access.sequence_filter = options.seq_filter unless options.seq_filter.empty?
|
133
|
+
access.block_filter = options.block_filter unless options.block_filter.empty?
|
134
|
+
if options.out_path
|
135
|
+
outf = File.open(options.out_path, 'w')
|
136
|
+
else
|
137
|
+
outf = $stdout
|
138
|
+
end
|
139
|
+
|
140
|
+
case options.format
|
141
|
+
when :maf
|
142
|
+
writer = Writer.new(outf)
|
143
|
+
when :fasta
|
144
|
+
writer = FASTAWriter.new(outf)
|
145
|
+
else
|
146
|
+
raise "unsupported output format #{format}!"
|
147
|
+
end
|
148
|
+
|
149
|
+
if options.bed
|
150
|
+
intervals = read_bed_intervals(options.bed)
|
151
|
+
elsif options.interval
|
152
|
+
intervals = [options.interval]
|
153
|
+
else
|
154
|
+
usage "Must supply --interval or --bed!"
|
155
|
+
end
|
156
|
+
|
157
|
+
# TODO: provide access to original MAF header?
|
158
|
+
if options.format == :maf
|
159
|
+
writer.write_header(Header.default)
|
160
|
+
end
|
161
|
+
|
162
|
+
case options.mode
|
163
|
+
when :intersect
|
164
|
+
access.find(intervals) do |block|
|
165
|
+
writer.write_block(block)
|
166
|
+
end
|
167
|
+
when :slice
|
168
|
+
# TODO: multiple files if intervals.size > 1?
|
169
|
+
intervals.each do |interval|
|
170
|
+
access.slice(interval) do |block|
|
171
|
+
writer.write_block(block)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
else
|
175
|
+
raise "Unsupported mode #{options.mode}!"
|
176
|
+
end
|
177
|
+
|
178
|
+
ensure
|
179
|
+
access.close
|
180
|
+
end
|