bio-maf 0.3.0-java → 0.3.2-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/DEVELOPMENT.md +4 -0
- data/README.md +172 -114
- data/bin/maf_count +0 -1
- data/bin/maf_dump_blocks +0 -1
- data/bin/maf_extract +180 -0
- data/bin/maf_index +15 -8
- data/bin/maf_tile +2 -0
- data/bin/maf_to_fasta +4 -7
- data/bio-maf.gemspec +3 -4
- data/features/maf-indexing.feature +21 -1
- data/features/step_definitions/convert_steps.rb +2 -7
- data/features/step_definitions/index_steps.rb +4 -0
- data/lib/bio-maf.rb +5 -0
- data/lib/bio/maf/index.rb +33 -23
- data/lib/bio/maf/maf.rb +10 -7
- data/lib/bio/maf/parser.rb +37 -15
- data/lib/bio/maf/tiler.rb +60 -8
- data/lib/bio/maf/writer.rb +26 -0
- data/man/maf_extract.1 +268 -0
- data/man/maf_extract.1.ronn +213 -0
- data/man/maf_index.1 +21 -10
- data/man/maf_index.1.ronn +14 -7
- data/man/maf_tile.1 +12 -0
- data/man/maf_tile.1.ronn +9 -0
- data/spec/bio/maf/index_spec.rb +23 -0
- metadata +14 -10
data/DEVELOPMENT.md
CHANGED
data/README.md
CHANGED
@@ -81,43 +81,57 @@ create one with [maf_index(1)][], like so:
|
|
81
81
|
|
82
82
|
|
83
83
|
$ maf_index test/data/mm8_chr7_tiny.maf /tmp/mm8_chr7_tiny.kct
|
84
|
-
|
85
|
-
Or programmatically:
|
86
84
|
|
87
|
-
|
88
|
-
|
89
|
-
|
85
|
+
To index all sequences for searching, not just the reference sequence:
|
86
|
+
|
87
|
+
$ maf_index --all test/data/mm8_chr7_tiny.maf /tmp/mm8_chr7_tiny.kct
|
88
|
+
|
89
|
+
To build an index programmatically:
|
90
|
+
|
91
|
+
```ruby
|
92
|
+
require 'bio-maf'
|
93
|
+
parser = Bio::MAF::Parser.new("test/data/mm8_chr7_tiny.maf")
|
94
|
+
idx = Bio::MAF::KyotoIndex.build(parser, "/tmp/mm8_chr7_tiny.kct", false)
|
95
|
+
```
|
90
96
|
|
91
97
|
### Extract blocks from an indexed MAF file, by genomic interval
|
92
98
|
|
93
99
|
Refer to [`mm8_chr7_tiny.maf`](https://github.com/csw/bioruby-maf/blob/master/test/data/mm8_chr7_tiny.maf).
|
94
100
|
|
95
|
-
|
96
|
-
|
101
|
+
```ruby
|
102
|
+
require 'bio-maf'
|
103
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
97
104
|
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
105
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
106
|
+
access.find(q) do |block|
|
107
|
+
ref_seq = block.sequences[0]
|
108
|
+
puts "Matched block at #{ref_seq.start}, #{ref_seq.size} bases"
|
109
|
+
end
|
103
110
|
|
104
|
-
|
105
|
-
|
111
|
+
# => Matched block at 80082592, 121 bases
|
112
|
+
# => Matched block at 80082713, 54 bases
|
113
|
+
```
|
106
114
|
|
107
115
|
Or, equivalently, one can work with a specific MAF file and index directly:
|
108
116
|
|
109
|
-
|
110
|
-
|
111
|
-
|
117
|
+
```ruby
|
118
|
+
require 'bio-maf'
|
119
|
+
parser = Bio::MAF::Parser.new('test/data/mm8_chr7_tiny.maf')
|
120
|
+
idx = Bio::MAF::KyotoIndex.open('test/data/mm8_chr7_tiny.kct')
|
121
|
+
|
122
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
123
|
+
idx.find(q, parser).each do |block|
|
124
|
+
ref_seq = block.sequences[0]
|
125
|
+
puts "Matched block at #{ref_seq.start}, #{ref_seq.size} bases"
|
126
|
+
end
|
112
127
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
puts "Matched block at #{ref_seq.start}, #{ref_seq.size} bases"
|
117
|
-
end
|
128
|
+
# => Matched block at 80082592, 121 bases
|
129
|
+
# => Matched block at 80082713, 54 bases
|
130
|
+
```
|
118
131
|
|
119
|
-
|
120
|
-
|
132
|
+
This can be done with [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html) as well:
|
133
|
+
|
134
|
+
$ maf_extract -d test/data --interval mm8.chr7:80082592-80082766
|
121
135
|
|
122
136
|
### Extract alignment blocks truncated to a given interval
|
123
137
|
|
@@ -125,25 +139,37 @@ Given a genomic interval of interest, one can also extract only the
|
|
125
139
|
subsets of blocks that intersect with that interval, using the
|
126
140
|
`#slice` method like so:
|
127
141
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
142
|
+
```ruby
|
143
|
+
require 'bio-maf'
|
144
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
145
|
+
int = Bio::GenomicInterval.zero_based('mm8.chr7', 80082350, 80082380)
|
146
|
+
blocks = access.slice(int).to_a
|
147
|
+
puts "Got #{blocks.size} blocks, first #{blocks.first.ref_seq.size} base pairs."
|
148
|
+
# => Got 2 blocks, first 18 base pairs.
|
149
|
+
```
|
150
|
+
|
151
|
+
Or, with [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html):
|
152
|
+
|
153
|
+
$ maf_extract -d test/data --mode slice --interval mm8.chr7:80082592-80082766
|
134
154
|
|
135
155
|
### Filter species returned in alignment blocks
|
136
156
|
|
137
|
-
|
138
|
-
|
157
|
+
```ruby
|
158
|
+
require 'bio-maf'
|
159
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
160
|
+
|
161
|
+
access.sequence_filter = { :only_species => %w(hg18 mm8 rheMac2) }
|
162
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
163
|
+
blocks = access.find(q)
|
164
|
+
block = blocks.first
|
165
|
+
puts "Block has #{block.sequences.size} sequences."
|
139
166
|
|
140
|
-
|
141
|
-
|
142
|
-
blocks = access.find(q)
|
143
|
-
block = blocks.first
|
144
|
-
puts "Block has #{block.sequences.size} sequences."
|
167
|
+
# => Block has 3 sequences.
|
168
|
+
```
|
145
169
|
|
146
|
-
|
170
|
+
With [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html):
|
171
|
+
|
172
|
+
$ maf_extract -d test/data --interval mm8.chr7:80082592-80082766 --only-species hg18,mm8,rheMac2
|
147
173
|
|
148
174
|
### Extract blocks matching certain conditions
|
149
175
|
|
@@ -154,68 +180,92 @@ See also the [Cucumber feature][] and [step definitions][] for this.
|
|
154
180
|
|
155
181
|
#### Match only blocks with all specified species
|
156
182
|
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
183
|
+
```ruby
|
184
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
185
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082471, 80082730)]
|
186
|
+
access.block_filter = { :with_all_species => %w(panTro2 loxAfr1) }
|
187
|
+
n_blocks = access.find(q).count
|
188
|
+
# => 1
|
189
|
+
```
|
190
|
+
|
191
|
+
With [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html):
|
192
|
+
|
193
|
+
$ maf_extract -d test/data --interval mm8.chr7:80082471-80082730 --with-all-species panTro2,loxAfr1
|
162
194
|
|
163
195
|
#### Match only blocks with a certain number of sequences
|
164
196
|
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
197
|
+
```ruby
|
198
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
199
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082767, 80083008)]
|
200
|
+
access.block_filter = { :at_least_n_sequences => 6 }
|
201
|
+
n_blocks = access.find(q).count
|
202
|
+
# => 1
|
203
|
+
```
|
204
|
+
|
205
|
+
With [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html):
|
206
|
+
|
207
|
+
$ maf_extract -d test/data --interval mm8.chr7:80082767-80083008 --min-sequences 6
|
170
208
|
|
171
209
|
#### Match only blocks within a text size range
|
172
210
|
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
211
|
+
```ruby
|
212
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
213
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 0, 80100000)]
|
214
|
+
access.block_filter = { :min_size => 72, :max_size => 160 }
|
215
|
+
n_blocks = access.find(q).count
|
216
|
+
# => 3
|
217
|
+
```
|
218
|
+
|
219
|
+
With [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html):
|
220
|
+
|
221
|
+
$ maf_extract -d test/data --interval mm8.chr7:0-80100000 --min-text-size 72 --max-text-size 160
|
178
222
|
|
179
223
|
### Process each block in a MAF file
|
180
224
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
225
|
+
```ruby
|
226
|
+
require 'bio-maf'
|
227
|
+
p = Bio::MAF::Parser.new('test/data/mm8_chr7_tiny.maf')
|
228
|
+
puts "MAF version: #{p.header.version}"
|
229
|
+
# => MAF version: 1
|
185
230
|
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
231
|
+
p.each_block do |block|
|
232
|
+
block.sequences.each do |seq|
|
233
|
+
do_something(seq)
|
234
|
+
end
|
235
|
+
end
|
236
|
+
```
|
191
237
|
|
192
238
|
### Parse empty ('e') lines
|
193
239
|
|
194
240
|
Refer to [`chr22_ieq.maf`](https://github.com/csw/bioruby-maf/blob/master/test/data/chr22_ieq.maf).
|
195
241
|
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
242
|
+
```ruby
|
243
|
+
require 'bio-maf'
|
244
|
+
p = Bio::MAF::Parser.new('test/data/chr22_ieq.maf',
|
245
|
+
:parse_empty => false)
|
246
|
+
block = p.parse_block
|
247
|
+
block.sequences.size
|
248
|
+
# => 3
|
249
|
+
|
250
|
+
p = Bio::MAF::Parser.new('test/data/chr22_ieq.maf',
|
251
|
+
:parse_empty => true)
|
252
|
+
block = p.parse_block
|
253
|
+
block.sequences.size
|
254
|
+
# => 4
|
255
|
+
block.sequences.find { |s| s.empty? }
|
256
|
+
# => #<Bio::MAF::EmptySequence:0x007fe1f39882d0
|
257
|
+
# @source="turTru1.scaffold_109008", @start=25049,
|
258
|
+
# @size=1601, @strand=:+, @src_size=50103, @text=nil,
|
259
|
+
# @status="I">
|
260
|
+
```
|
213
261
|
|
214
262
|
Such options can also be set on a Bio::MAF::Access object:
|
215
263
|
|
216
|
-
|
217
|
-
|
218
|
-
|
264
|
+
```ruby
|
265
|
+
require 'bio-maf'
|
266
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
267
|
+
access.parse_options[:parse_empty] = true
|
268
|
+
```
|
219
269
|
|
220
270
|
### Remove gaps from parsed blocks
|
221
271
|
|
@@ -225,9 +275,11 @@ gaps may be left where there was an insertion present only in
|
|
225
275
|
sequences that were filtered out. Such gaps can be removed by setting
|
226
276
|
the `:remove_gaps` parser option:
|
227
277
|
|
228
|
-
|
229
|
-
|
230
|
-
|
278
|
+
```ruby
|
279
|
+
require 'bio-maf'
|
280
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
281
|
+
access.parse_options[:remove_gaps] = true
|
282
|
+
```
|
231
283
|
|
232
284
|
### Join blocks after filtering together
|
233
285
|
|
@@ -235,9 +287,11 @@ Similarly, filtering out species may remove a species which had caused
|
|
235
287
|
two adjacent alignment blocks to be split. By enabling the
|
236
288
|
`:join_blocks` parser option, such blocks can be joined together:
|
237
289
|
|
238
|
-
|
239
|
-
|
240
|
-
|
290
|
+
```ruby
|
291
|
+
require 'bio-maf'
|
292
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
293
|
+
access.parse_options[:join_blocks] = true
|
294
|
+
```
|
241
295
|
|
242
296
|
See the [Cucumber feature][] for more details.
|
243
297
|
|
@@ -254,14 +308,16 @@ more.
|
|
254
308
|
[Bio::BioAlignment::Alignment]: http://rdoc.info/gems/bio-alignment/Bio/BioAlignment/Alignment
|
255
309
|
[bio-alignment]: https://github.com/pjotrp/bioruby-alignment
|
256
310
|
|
257
|
-
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
311
|
+
```ruby
|
312
|
+
require 'bio-maf'
|
313
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
314
|
+
access.parse_options[:as_bio_alignment] = true
|
315
|
+
q = [Bio::GenomicInterval.zero_based('mm8.chr7', 80082592, 80082766)]
|
316
|
+
access.find(q) do |aln|
|
317
|
+
col = aln.columns[3]
|
318
|
+
puts "bases in column 3: #{col}"
|
319
|
+
end
|
320
|
+
```
|
265
321
|
|
266
322
|
### Tile blocks together over an interval
|
267
323
|
|
@@ -276,29 +332,32 @@ man page.
|
|
276
332
|
|
277
333
|
[feature]: https://github.com/csw/bioruby-maf/blob/master/features/tiling.feature
|
278
334
|
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
289
|
-
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
335
|
+
```ruby
|
336
|
+
require 'bio-maf'
|
337
|
+
access = Bio::MAF::Access.maf_dir('test/data')
|
338
|
+
interval = Bio::GenomicInterval.zero_based('mm8.chr7',
|
339
|
+
80082334,
|
340
|
+
80082468)
|
341
|
+
access.tile(interval) do |tiler|
|
342
|
+
# reference is optional
|
343
|
+
tiler.reference = 'reference.fa.gz'
|
344
|
+
tiler.species = %w(mm8 rn4 hg18)
|
345
|
+
# species_map is optional
|
346
|
+
tiler.species_map = {
|
347
|
+
'mm8' => 'mouse',
|
348
|
+
'rn4' => 'rat',
|
349
|
+
'hg18' => 'human'
|
350
|
+
}
|
351
|
+
tiler.write_fasta($stdout)
|
352
|
+
end
|
353
|
+
```
|
296
354
|
|
297
355
|
### Command line tools
|
298
356
|
|
299
357
|
Man pages for command line tools:
|
300
358
|
|
301
359
|
* [`maf_index(1)`](http://csw.github.com/bioruby-maf/man/maf_index.1.html)
|
360
|
+
* [`maf_extract(1)`](http://csw.github.com/bioruby-maf/man/maf_extract.1.html)
|
302
361
|
* [`maf_to_fasta(1)`](http://csw.github.com/bioruby-maf/man/maf_to_fasta.1.html)
|
303
362
|
* [`maf_tile(1)`](http://csw.github.com/bioruby-maf/man/maf_tile.1.html)
|
304
363
|
|
@@ -343,4 +402,3 @@ This Biogem is published at [biogems.info](http://biogems.info/index.html#bio-ma
|
|
343
402
|
## Copyright
|
344
403
|
|
345
404
|
Copyright (c) 2012 Clayton Wheeler. See LICENSE.txt for further details.
|
346
|
-
|
data/bin/maf_count
CHANGED
data/bin/maf_dump_blocks
CHANGED
data/bin/maf_extract
ADDED
@@ -0,0 +1,180 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'bio-maf'
|
4
|
+
require 'optparse'
|
5
|
+
require 'ostruct'
|
6
|
+
|
7
|
+
include Bio::MAF
|
8
|
+
|
9
|
+
options = OpenStruct.new
|
10
|
+
options.mode = :intersect
|
11
|
+
options.format = :maf
|
12
|
+
options.seq_filter = {}
|
13
|
+
options.block_filter = {}
|
14
|
+
options.parse_options = {}
|
15
|
+
|
16
|
+
def handle_list_spec(spec)
|
17
|
+
if spec =~ /^@(.+)/
|
18
|
+
File.read($1).split
|
19
|
+
else
|
20
|
+
spec.split(',')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def handle_interval_spec(int)
|
25
|
+
if int =~ /(.+):(\d+)-(\d+)/
|
26
|
+
Bio::GenomicInterval.zero_based($1, $2.to_i, $3.to_i)
|
27
|
+
else
|
28
|
+
raise "Invalid interval specification: #{int}"
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
$op = OptionParser.new do |opts|
|
33
|
+
opts.banner = "Usage: maf_extract (-m MAF [-i INDEX] | -d MAFDIR) [options]"
|
34
|
+
opts.separator ""
|
35
|
+
opts.separator "MAF source options (either --maf or --maf-dir must be given):"
|
36
|
+
opts.on("-m", "--maf MAF", "MAF file") do |maf|
|
37
|
+
options.maf = maf
|
38
|
+
end
|
39
|
+
opts.on("-i", "--index INDEX", "MAF index") do |idx|
|
40
|
+
options.idx = idx
|
41
|
+
end
|
42
|
+
opts.on("-d", "--maf-dir DIR", "MAF directory") do |dir|
|
43
|
+
options.maf_dir = dir
|
44
|
+
end
|
45
|
+
opts.separator ""
|
46
|
+
opts.separator "Extraction options:"
|
47
|
+
opts.on("--mode MODE", [:intersect, :slice],
|
48
|
+
"Extraction mode; 'intersect' to match ",
|
49
|
+
"blocks intersecting the given region,",
|
50
|
+
"or 'slice' to extract subsets covering ",
|
51
|
+
"given regions") do |mode|
|
52
|
+
options.mode = mode
|
53
|
+
end
|
54
|
+
opts.on("--bed BED", "Use intervals from the given BED file") do |bed|
|
55
|
+
options.bed = bed
|
56
|
+
end
|
57
|
+
opts.on("--interval SEQ:START:END", "Zero-based genomic interval to match") do |int|
|
58
|
+
options.interval = handle_interval_spec(int)
|
59
|
+
end
|
60
|
+
opts.separator ""
|
61
|
+
opts.separator "Output options:"
|
62
|
+
opts.on("-f", "--format FMT", [:maf, :fasta], "Output format") do |fmt|
|
63
|
+
options.format = fmt
|
64
|
+
end
|
65
|
+
opts.on("-o", "--output OUT", "Write output to file OUT") do |out|
|
66
|
+
options.out_path = out
|
67
|
+
end
|
68
|
+
opts.separator ""
|
69
|
+
opts.separator "Filtering options:"
|
70
|
+
opts.on("--only-species SPECIES",
|
71
|
+
"Filter out all but the species in the",
|
72
|
+
"given comma-separated list",
|
73
|
+
"(or @FILE to read from a file)") do |spec|
|
74
|
+
options.seq_filter[:only_species] = handle_list_spec(spec)
|
75
|
+
end
|
76
|
+
opts.on("--with-all-species SPECIES",
|
77
|
+
"Only match blocks with all the given",
|
78
|
+
"species, comma-separated",
|
79
|
+
"(or @FILE to read from a file)") do |spec|
|
80
|
+
options.block_filter[:with_all_species] = handle_list_spec(spec)
|
81
|
+
end
|
82
|
+
opts.on("--min-sequences N", Integer,
|
83
|
+
"Match only blocks with at least N sequences") do |n|
|
84
|
+
options.block_filter[:at_least_n_sequences] = n
|
85
|
+
end
|
86
|
+
opts.on("--min-text-size N", Integer,
|
87
|
+
"Match only blocks with minimum text size N") do |n|
|
88
|
+
options.block_filter[:min_size] = n
|
89
|
+
end
|
90
|
+
opts.on("--max-text-size N", Integer,
|
91
|
+
"Match only blocks with maximum text size N") do |n|
|
92
|
+
options.block_filter[:max_size] = n
|
93
|
+
end
|
94
|
+
opts.separator ""
|
95
|
+
opts.separator "Block processing options:"
|
96
|
+
opts.on("--join-blocks",
|
97
|
+
"Join blocks if appropriate after filtering",
|
98
|
+
"out sequences") do
|
99
|
+
options.parse_options[:join_blocks] = true
|
100
|
+
end
|
101
|
+
opts.on("--remove-gaps", "Remove gaps after filtering out sequences") do
|
102
|
+
options.parse_options[:remove_gaps] = true
|
103
|
+
end
|
104
|
+
opts.on("--parse-extended", "Parse 'extended' MAF data (i, q lines)") do
|
105
|
+
options.parse_options[:parse_extended] = true
|
106
|
+
end
|
107
|
+
opts.on("--parse-empty", "Parse empty (e) lines of MAF data") do
|
108
|
+
options.parse_options[:parse_empty] = true
|
109
|
+
end
|
110
|
+
opts.separator ""
|
111
|
+
opts.separator "Logging options:"
|
112
|
+
Bio::MAF::handle_logging_options(opts)
|
113
|
+
end
|
114
|
+
$op.parse!(ARGV)
|
115
|
+
Bio::Log::CLI.configure('bio-maf')
|
116
|
+
|
117
|
+
def usage(msg)
|
118
|
+
$stderr.puts msg
|
119
|
+
$stderr.puts $op
|
120
|
+
exit 2
|
121
|
+
end
|
122
|
+
|
123
|
+
if options.maf
|
124
|
+
access = Access.file(options.maf, options.idx, options.parse_options)
|
125
|
+
elsif options.maf_dir
|
126
|
+
access = Access.maf_dir(options.maf_dir, options.parse_options)
|
127
|
+
else
|
128
|
+
usage "Must supply --maf or --maf-dir!"
|
129
|
+
end
|
130
|
+
|
131
|
+
begin
|
132
|
+
access.sequence_filter = options.seq_filter unless options.seq_filter.empty?
|
133
|
+
access.block_filter = options.block_filter unless options.block_filter.empty?
|
134
|
+
if options.out_path
|
135
|
+
outf = File.open(options.out_path, 'w')
|
136
|
+
else
|
137
|
+
outf = $stdout
|
138
|
+
end
|
139
|
+
|
140
|
+
case options.format
|
141
|
+
when :maf
|
142
|
+
writer = Writer.new(outf)
|
143
|
+
when :fasta
|
144
|
+
writer = FASTAWriter.new(outf)
|
145
|
+
else
|
146
|
+
raise "unsupported output format #{format}!"
|
147
|
+
end
|
148
|
+
|
149
|
+
if options.bed
|
150
|
+
intervals = read_bed_intervals(options.bed)
|
151
|
+
elsif options.interval
|
152
|
+
intervals = [options.interval]
|
153
|
+
else
|
154
|
+
usage "Must supply --interval or --bed!"
|
155
|
+
end
|
156
|
+
|
157
|
+
# TODO: provide access to original MAF header?
|
158
|
+
if options.format == :maf
|
159
|
+
writer.write_header(Header.default)
|
160
|
+
end
|
161
|
+
|
162
|
+
case options.mode
|
163
|
+
when :intersect
|
164
|
+
access.find(intervals) do |block|
|
165
|
+
writer.write_block(block)
|
166
|
+
end
|
167
|
+
when :slice
|
168
|
+
# TODO: multiple files if intervals.size > 1?
|
169
|
+
intervals.each do |interval|
|
170
|
+
access.slice(interval) do |block|
|
171
|
+
writer.write_block(block)
|
172
|
+
end
|
173
|
+
end
|
174
|
+
else
|
175
|
+
raise "Unsupported mode #{options.mode}!"
|
176
|
+
end
|
177
|
+
|
178
|
+
ensure
|
179
|
+
access.close
|
180
|
+
end
|