bio-maf 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/Gemfile +2 -1
- data/README.md +98 -29
- data/Rakefile +6 -2
- data/bin/maf_tile +59 -35
- data/bio-maf.gemspec +4 -3
- data/features/block-joining.feature +32 -0
- data/features/dir-access.feature +46 -0
- data/features/maf-indexing.feature +23 -0
- data/features/maf-to-fasta.feature +9 -0
- data/features/slice.feature +54 -0
- data/features/step_definitions/dir-access_steps.rb +15 -0
- data/features/step_definitions/file_steps.rb +7 -0
- data/features/step_definitions/gap_removal_steps.rb +4 -0
- data/features/step_definitions/index_steps.rb +3 -3
- data/features/step_definitions/output_steps.rb +9 -1
- data/features/step_definitions/parse_steps.rb +13 -2
- data/features/step_definitions/query_steps.rb +7 -6
- data/features/step_definitions/slice_steps.rb +15 -0
- data/features/step_definitions/{gap-filling_steps.rb → tiling_steps.rb} +0 -0
- data/features/support/aruba.rb +1 -0
- data/features/support/env.rb +3 -1
- data/features/{gap-filling.feature → tiling.feature} +85 -0
- data/lib/bio/maf/index.rb +223 -11
- data/lib/bio/maf/maf.rb +209 -0
- data/lib/bio/maf/parser.rb +190 -111
- data/lib/bio/maf/tiler.rb +33 -6
- data/man/maf_index.1 +1 -1
- data/man/maf_tile.1 +7 -7
- data/man/maf_tile.1.ronn +21 -13
- data/man/maf_to_fasta.1 +1 -1
- data/spec/bio/maf/index_spec.rb +99 -0
- data/spec/bio/maf/maf_spec.rb +184 -0
- data/spec/bio/maf/parser_spec.rb +75 -115
- data/spec/bio/maf/tiler_spec.rb +44 -0
- data/test/data/chr22_ieq2.maf +11 -0
- data/test/data/gap-1.kct +0 -0
- data/test/data/gap-1.maf +9 -0
- data/test/data/gap-filled1.fa +6 -0
- data/test/data/gap-sp1.fa.gz +0 -0
- data/test/data/mm8_chr7_tiny_slice1.maf +9 -0
- data/test/data/mm8_chr7_tiny_slice2.maf +10 -0
- data/test/data/mm8_chr7_tiny_slice3.maf +10 -0
- data/test/data/mm8_chrM_tiny.kct +0 -0
- data/test/data/mm8_chrM_tiny.maf +1000 -0
- metadata +59 -7
data/lib/bio/maf/maf.rb
CHANGED
@@ -1,4 +1,15 @@
|
|
1
|
+
require 'bio-alignment'
|
2
|
+
|
1
3
|
module Bio
|
4
|
+
class GenomicInterval
|
5
|
+
def intersection(other)
|
6
|
+
raise ArgumentError unless self.chrom == other.chrom
|
7
|
+
GenomicInterval.new(self.chrom,
|
8
|
+
[self.chr_start, other.chr_start].max,
|
9
|
+
[self.chr_end, other.chr_end].min)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
2
13
|
module MAF
|
3
14
|
|
4
15
|
# A MAF header, containing the variable-value pairs from the first
|
@@ -12,6 +23,12 @@ module Bio
|
|
12
23
|
# @return [Hash]
|
13
24
|
attr_accessor :alignment_params
|
14
25
|
|
26
|
+
# Create a default header with version=1.
|
27
|
+
# @return [Header]
|
28
|
+
def Header.default
|
29
|
+
Header.new({:version => 1}, nil)
|
30
|
+
end
|
31
|
+
|
15
32
|
def initialize(vars, params)
|
16
33
|
@vars = vars
|
17
34
|
@alignment_params = params
|
@@ -47,6 +64,7 @@ module Bio
|
|
47
64
|
attr_reader :size
|
48
65
|
|
49
66
|
def initialize(vars, sequences, offset, size, filtered)
|
67
|
+
#raise ArgumentError, "no sequences given for block at offset #{offset}!" unless sequences && sequences.first
|
50
68
|
@vars = vars
|
51
69
|
@sequences = sequences
|
52
70
|
@offset = offset
|
@@ -79,6 +97,11 @@ module Bio
|
|
79
97
|
@filtered
|
80
98
|
end
|
81
99
|
|
100
|
+
def to_bio_alignment
|
101
|
+
ba_seq = sequences.collect { |s| s.to_bio_alignment }
|
102
|
+
Bio::BioAlignment::Alignment.new(ba_seq)
|
103
|
+
end
|
104
|
+
|
82
105
|
GAP = /-+/
|
83
106
|
|
84
107
|
# Remove gaps present in all sequences. These would generally
|
@@ -116,6 +139,96 @@ module Bio
|
|
116
139
|
gaps.size
|
117
140
|
end
|
118
141
|
|
142
|
+
# Returns a new Block covering only the region where it overlaps
|
143
|
+
# the given interval.
|
144
|
+
# @param [Bio::GenomicInterval] interval to slice the block with
|
145
|
+
# @return [Block] block covering intersection with interval
|
146
|
+
def slice(interval)
|
147
|
+
case interval.compare(ref_seq.interval)
|
148
|
+
when :equal
|
149
|
+
return self
|
150
|
+
when :contains, :contained_by, :left_overlapped, :right_overlapped
|
151
|
+
_slice(interval.intersection(ref_seq.interval))
|
152
|
+
when :left_adjacent, :right_adjacent, :left_off, :right_off
|
153
|
+
raise "Cannot slice a block with a non-overlapping interval! Block #{ref_seq.interval}, interval #{interval}"
|
154
|
+
when :different_chrom
|
155
|
+
raise "Cannot slice a block with reference sequence #{ref_seq.source} using an interval on #{interval.chrom}!"
|
156
|
+
else
|
157
|
+
raise "Unhandled comparison result: #{interval.compare(ref_seq.interval)}"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def _slice(interval)
|
162
|
+
range = _slice_text_range(interval)
|
163
|
+
s2 = sequences.collect { |s| s.slice(range) }
|
164
|
+
v2 = vars.dup
|
165
|
+
#v2[:score] = '0.0'
|
166
|
+
# TODO: should the filtered param be #modified? instead?
|
167
|
+
Block.new(v2, s2, offset, size, @filtered)
|
168
|
+
end
|
169
|
+
|
170
|
+
def _slice_text_range(interval)
|
171
|
+
i_start = interval.zero_start
|
172
|
+
i_end = interval.zero_end
|
173
|
+
g_pos = ref_seq.start
|
174
|
+
t_start = nil
|
175
|
+
t_end = nil
|
176
|
+
ref_seq.text.each_char.each_with_index do |c, t_pos|
|
177
|
+
if c != '-'
|
178
|
+
# non-gap
|
179
|
+
if g_pos == i_start
|
180
|
+
t_start = t_pos
|
181
|
+
end
|
182
|
+
g_pos += 1
|
183
|
+
if t_start && g_pos == i_end
|
184
|
+
t_end = t_pos + 1
|
185
|
+
break
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
unless t_start && t_end
|
190
|
+
raise "did not find start and end for #{interval} in #{ref_seq.inspect}!"
|
191
|
+
end
|
192
|
+
return t_start...t_end
|
193
|
+
end
|
194
|
+
|
195
|
+
def joinable_with?(other)
|
196
|
+
if sequences.size == other.sequences.size
|
197
|
+
r1 = ref_seq
|
198
|
+
r2 = other.ref_seq
|
199
|
+
return false if r1.source != r2.source
|
200
|
+
return false if r1.end != r2.start
|
201
|
+
rest = sequences.each_with_index
|
202
|
+
rest.next
|
203
|
+
mismatch = rest.find do |s1, i|
|
204
|
+
s2 = other.seq_from(s1.source, i)
|
205
|
+
(! s2) || ! s1.joinable_with?(s2)
|
206
|
+
end
|
207
|
+
return (! mismatch)
|
208
|
+
else
|
209
|
+
return false
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
def join(other)
|
214
|
+
nseq = sequences.each_with_index.collect do |s1, i|
|
215
|
+
s2 = other.seq_from(s1.source, i)
|
216
|
+
s1.join(s2)
|
217
|
+
end
|
218
|
+
v2 = vars.dup
|
219
|
+
v2[:score] = '0.0'
|
220
|
+
Block.new(v2, nseq, offset, nil, @filtered)
|
221
|
+
end
|
222
|
+
|
223
|
+
def seq_from(src, pos_guess)
|
224
|
+
sg = sequences[pos_guess]
|
225
|
+
if sg.source == src
|
226
|
+
sg
|
227
|
+
else
|
228
|
+
sequences.find { |s| s.source == src }
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
119
232
|
end
|
120
233
|
|
121
234
|
# A sequence within an alignment block.
|
@@ -153,6 +266,29 @@ module Bio
|
|
153
266
|
start + size
|
154
267
|
end
|
155
268
|
|
269
|
+
def interval
|
270
|
+
GenomicInterval.zero_based(self.source, self.start, self.end)
|
271
|
+
end
|
272
|
+
|
273
|
+
def slice(range)
|
274
|
+
before = text.slice(0...(range.begin))
|
275
|
+
non_gap_prev = before.delete("-").size
|
276
|
+
new_text = text.slice(range)
|
277
|
+
unless new_text
|
278
|
+
raise "could not extract slice #{range} from #{self.inspect}!"
|
279
|
+
end
|
280
|
+
non_gap_text = new_text.delete("-").size
|
281
|
+
s2 = Sequence.new(source,
|
282
|
+
start + non_gap_prev,
|
283
|
+
non_gap_text,
|
284
|
+
strand,
|
285
|
+
src_size,
|
286
|
+
new_text)
|
287
|
+
s2.quality = quality.slice(range) if quality
|
288
|
+
# TODO: what to do with synteny data?
|
289
|
+
s2
|
290
|
+
end
|
291
|
+
|
156
292
|
# Whether this sequence is empty. Only true for {EmptySequence}
|
157
293
|
# instances from 'e' lines.
|
158
294
|
def empty?
|
@@ -163,6 +299,43 @@ module Bio
|
|
163
299
|
size != text.size
|
164
300
|
end
|
165
301
|
|
302
|
+
I_STATUS = {
|
303
|
+
'C' => :contiguous,
|
304
|
+
'I' => :intervening,
|
305
|
+
'N' => :first,
|
306
|
+
'n' => :first_bridged,
|
307
|
+
'M' => :missing_data,
|
308
|
+
'T' => :tandem
|
309
|
+
}
|
310
|
+
|
311
|
+
def decode_status_char(c)
|
312
|
+
I_STATUS[c] || raise("Unsupported status character #{c}!")
|
313
|
+
end
|
314
|
+
|
315
|
+
def left_status_char
|
316
|
+
i_data && i_data[0]
|
317
|
+
end
|
318
|
+
|
319
|
+
def left_status
|
320
|
+
i_data && decode_status_char(left_status_char())
|
321
|
+
end
|
322
|
+
|
323
|
+
def left_count
|
324
|
+
i_data && i_data[1].to_i
|
325
|
+
end
|
326
|
+
|
327
|
+
def right_status_char
|
328
|
+
i_data && i_data[2]
|
329
|
+
end
|
330
|
+
|
331
|
+
def right_status
|
332
|
+
i_data && decode_status_char(right_status_char())
|
333
|
+
end
|
334
|
+
|
335
|
+
def right_count
|
336
|
+
i_data && i_data[3].to_i
|
337
|
+
end
|
338
|
+
|
166
339
|
def species
|
167
340
|
parts = source.split('.', 2)
|
168
341
|
parts.size == 2 ? parts[0] : nil
|
@@ -177,11 +350,34 @@ module Bio
|
|
177
350
|
end
|
178
351
|
end
|
179
352
|
|
353
|
+
def to_bio_alignment
|
354
|
+
Bio::BioAlignment::Sequence.new(source, text)
|
355
|
+
end
|
356
|
+
|
180
357
|
def write_fasta(writer)
|
181
358
|
writer.write("#{source}:#{start}-#{start + size}",
|
182
359
|
text)
|
183
360
|
end
|
184
361
|
|
362
|
+
def joinable_with?(o)
|
363
|
+
(self.end == o.start) \
|
364
|
+
&& (self.strand == o.strand) \
|
365
|
+
&& (self.empty? == o.empty?)
|
366
|
+
end
|
367
|
+
|
368
|
+
def join(o)
|
369
|
+
s2 = Sequence.new(source,
|
370
|
+
start,
|
371
|
+
size + o.size,
|
372
|
+
strand,
|
373
|
+
src_size,
|
374
|
+
text + o.text)
|
375
|
+
if quality && o.quality
|
376
|
+
s2.quality = quality + o.quality
|
377
|
+
end
|
378
|
+
s2
|
379
|
+
end
|
380
|
+
|
185
381
|
# Maps the given zero-based genomic range onto a range of string
|
186
382
|
# offsets, suitable for extracting the text for the given range
|
187
383
|
# from #text.
|
@@ -253,6 +449,19 @@ module Bio
|
|
253
449
|
''
|
254
450
|
end
|
255
451
|
|
452
|
+
def slice(offset, len)
|
453
|
+
self
|
454
|
+
end
|
455
|
+
|
456
|
+
def join(o)
|
457
|
+
EmptySequence.new(source,
|
458
|
+
start,
|
459
|
+
size + o.size,
|
460
|
+
strand,
|
461
|
+
src_size,
|
462
|
+
@status)
|
463
|
+
end
|
464
|
+
|
256
465
|
def empty?
|
257
466
|
true
|
258
467
|
end
|
data/lib/bio/maf/parser.rb
CHANGED
@@ -150,7 +150,7 @@ module Bio
|
|
150
150
|
#
|
151
151
|
# @return [Block] alignment block
|
152
152
|
# @api public
|
153
|
-
def
|
153
|
+
def _parse_block
|
154
154
|
return nil if at_end
|
155
155
|
if s.pos != last_block_pos
|
156
156
|
# in non-trailing block
|
@@ -296,19 +296,11 @@ module Bio
|
|
296
296
|
parse_error "unexpected line: '#{line}'"
|
297
297
|
end
|
298
298
|
end
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
postprocess_block(block)
|
305
|
-
end
|
306
|
-
|
307
|
-
def postprocess_block(block)
|
308
|
-
if block.filtered? && opts[:remove_gaps]
|
309
|
-
block.remove_gaps!
|
310
|
-
end
|
311
|
-
block
|
299
|
+
Block.new(block_vars,
|
300
|
+
seqs,
|
301
|
+
block_offset,
|
302
|
+
s.pos - block_start_pos,
|
303
|
+
filtered)
|
312
304
|
end
|
313
305
|
|
314
306
|
# Parse an 's' line.
|
@@ -419,19 +411,21 @@ module Bio
|
|
419
411
|
# @param [Array] block_offsets Offsets of blocks to parse.
|
420
412
|
# @return [Array<Block>]
|
421
413
|
def fetch_blocks(offset, len, block_offsets)
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
414
|
+
if block_given?
|
415
|
+
start_chunk_read_if_needed(offset, len)
|
416
|
+
# read chunks until we have the entire merged set of
|
417
|
+
# blocks ready to parse
|
418
|
+
# to avoid fragment joining
|
419
|
+
append_chunks_to(len)
|
420
|
+
# parse the blocks
|
429
421
|
block_offsets.each do |expected_offset|
|
430
|
-
block =
|
431
|
-
|
432
|
-
|
433
|
-
|
422
|
+
block = _parse_block
|
423
|
+
parse_error("expected a block at offset #{expected_offset} but could not parse one!") unless block
|
424
|
+
parse_error("got block with offset #{block.offset}, expected #{expected_offset}!") unless block.offset == expected_offset
|
425
|
+
yield block
|
434
426
|
end
|
427
|
+
else
|
428
|
+
enum_for(:fetch_blocks, offset, len, block_offsets)
|
435
429
|
end
|
436
430
|
end
|
437
431
|
|
@@ -530,6 +524,10 @@ module Bio
|
|
530
524
|
_parse_header()
|
531
525
|
end
|
532
526
|
|
527
|
+
def close
|
528
|
+
f.close
|
529
|
+
end
|
530
|
+
|
533
531
|
# Create a {ParseContext} for random access, using the given
|
534
532
|
# chunk size.
|
535
533
|
#
|
@@ -574,13 +572,19 @@ module Bio
|
|
574
572
|
# `fetch_list` should be an array of `[offset, length]` tuples.
|
575
573
|
#
|
576
574
|
# @param [Array] fetch_list the fetch list
|
577
|
-
# @
|
578
|
-
|
579
|
-
|
580
|
-
if
|
581
|
-
|
575
|
+
# @yield [block] each block matched, in turn
|
576
|
+
# @return [Enumerable<Block>] each matching {Block}, if no block given
|
577
|
+
def fetch_blocks(fetch_list, &blk)
|
578
|
+
if blk
|
579
|
+
merged = merge_fetch_list(fetch_list)
|
580
|
+
if RUBY_PLATFORM == 'java' && @opts.fetch(:threads, 1) > 1
|
581
|
+
fun = lambda { |&b2| fetch_blocks_merged_parallel(merged, &b2) }
|
582
|
+
else
|
583
|
+
fun = lambda { |&b2| fetch_blocks_merged(merged, &b2) }
|
584
|
+
end
|
585
|
+
wrap_block_seq(fun, &blk)
|
582
586
|
else
|
583
|
-
|
587
|
+
enum_for(:fetch_blocks, fetch_list)
|
584
588
|
end
|
585
589
|
end
|
586
590
|
|
@@ -588,23 +592,19 @@ module Bio
|
|
588
592
|
#
|
589
593
|
# @param [Array] fetch_list merged fetch list from {#merge_fetch_list}.
|
590
594
|
# @return [Array<Block>] the requested alignment blocks
|
591
|
-
def fetch_blocks_merged(fetch_list)
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
ctx.fetch_blocks(*e).each do |block|
|
598
|
-
y << block
|
599
|
-
#total_size += block.size
|
600
|
-
end
|
601
|
-
end
|
595
|
+
def fetch_blocks_merged(fetch_list, &blk)
|
596
|
+
start = Time.now
|
597
|
+
total_size = fetch_list.collect { |e| e[1] }.reduce(:+)
|
598
|
+
with_context(@random_access_chunk_size) do |ctx|
|
599
|
+
fetch_list.each do |e|
|
600
|
+
ctx.fetch_blocks(*e, &blk)
|
602
601
|
end
|
603
|
-
elapsed = Time.now - start
|
604
|
-
rate = (total_size / 1048576.0) / elapsed
|
605
|
-
$stderr.printf("Fetched blocks in %.3fs, %.1f MB/s.\n",
|
606
|
-
elapsed, rate)
|
607
602
|
end
|
603
|
+
elapsed = Time.now - start
|
604
|
+
# TODO: debug log
|
605
|
+
# rate = (total_size / 1048576.0) / elapsed
|
606
|
+
# $stderr.printf("Fetched blocks in %.3fs, %.1f MB/s.\n",
|
607
|
+
# elapsed, rate)
|
608
608
|
end
|
609
609
|
|
610
610
|
# Fetch and parse the blocks given by the merged fetch list, in
|
@@ -614,40 +614,38 @@ module Bio
|
|
614
614
|
# @param [Array] fetch_list merged fetch list from {#merge_fetch_list}.
|
615
615
|
# @return [Array<Block>] the requested alignment blocks
|
616
616
|
def fetch_blocks_merged_parallel(fetch_list)
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
next
|
635
|
-
end
|
636
|
-
c.each do |block|
|
637
|
-
y << block
|
638
|
-
end
|
639
|
-
n_res += 1
|
617
|
+
total_size = fetch_list.collect { |e| e[1] }.reduce(:+)
|
618
|
+
start = Time.now
|
619
|
+
n_threads = @opts.fetch(:threads, 1)
|
620
|
+
# TODO: break entries up into longer runs for more
|
621
|
+
# sequential I/O
|
622
|
+
jobs = java.util.concurrent.ConcurrentLinkedQueue.new(fetch_list)
|
623
|
+
ct = CompletionTracker.new(fetch_list)
|
624
|
+
completed = ct.queue
|
625
|
+
threads = []
|
626
|
+
n_threads.times { threads << make_worker(jobs, ct) }
|
627
|
+
|
628
|
+
n_res = 0
|
629
|
+
while n_res < fetch_list.size
|
630
|
+
c = completed.poll(1, java.util.concurrent.TimeUnit::SECONDS)
|
631
|
+
unless c
|
632
|
+
raise "Worker failed!" if threads.find { |t| t.status.nil? }
|
633
|
+
next
|
640
634
|
end
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
elapsed)
|
646
|
-
mb = total_size / 1048576.0
|
647
|
-
$stderr.printf("%.3f MB processed (%.1f MB/s).\n",
|
648
|
-
mb,
|
649
|
-
mb / elapsed)
|
635
|
+
c.each do |block|
|
636
|
+
yield block
|
637
|
+
end
|
638
|
+
n_res += 1
|
650
639
|
end
|
640
|
+
threads.each { |t| t.join }
|
641
|
+
elapsed = Time.now - start
|
642
|
+
$stderr.printf("Fetched blocks from %d threads in %.1fs.\n",
|
643
|
+
n_threads,
|
644
|
+
elapsed)
|
645
|
+
mb = total_size / 1048576.0
|
646
|
+
$stderr.printf("%.3f MB processed (%.1f MB/s).\n",
|
647
|
+
mb,
|
648
|
+
mb / elapsed)
|
651
649
|
end
|
652
650
|
|
653
651
|
# Create a worker thread for parallel parsing.
|
@@ -721,30 +719,119 @@ module Bio
|
|
721
719
|
# Delegates to {#parse_blocks_parallel} if `:threads` is set
|
722
720
|
# under JRuby.
|
723
721
|
#
|
724
|
-
# @return [Enumerator<Block>] enumerator of
|
722
|
+
# @return [Enumerator<Block>] enumerator of {Block}s if no block given.
|
723
|
+
# @yield [block] Passes each {Block} in turn to a block
|
725
724
|
# @api public
|
726
|
-
def
|
727
|
-
if
|
728
|
-
|
725
|
+
def each_block(&blk)
|
726
|
+
if block_given?
|
727
|
+
if RUBY_PLATFORM == 'java' && @opts.has_key?(:threads)
|
728
|
+
fun = method(:parse_blocks_parallel)
|
729
|
+
else
|
730
|
+
fun = method(:each_block_seq)
|
731
|
+
end
|
732
|
+
wrap_block_seq(fun, &blk)
|
729
733
|
else
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
+
enum_for(:each_block)
|
735
|
+
end
|
736
|
+
end
|
737
|
+
alias_method :parse_blocks, :each_block
|
738
|
+
|
739
|
+
def each_block_seq
|
740
|
+
until at_end
|
741
|
+
block = _parse_block()
|
742
|
+
yield block if block
|
743
|
+
end
|
744
|
+
end
|
745
|
+
|
746
|
+
def parse_block
|
747
|
+
b = nil
|
748
|
+
wrap_block_seq(lambda { |&blk| blk.call(_parse_block()) }) do |block|
|
749
|
+
b = block
|
750
|
+
end
|
751
|
+
b
|
752
|
+
end
|
753
|
+
|
754
|
+
WRAP_OPTS = [:as_bio_alignment, :join_blocks, :remove_gaps]
|
755
|
+
|
756
|
+
def wrap_block_seq(fun, &blk)
|
757
|
+
opts = WRAP_OPTS.find_all { |o| @opts[o] }
|
758
|
+
opts << :sequence_filter if sequence_filter && (! sequence_filter.empty?)
|
759
|
+
_wrap(opts, fun, &blk)
|
760
|
+
end
|
761
|
+
|
762
|
+
# options should be [:outer, ..., :inner]
|
763
|
+
def _wrap(options, fun, &blk)
|
764
|
+
first = options.shift
|
765
|
+
case first
|
766
|
+
when nil
|
767
|
+
fun.call(&blk)
|
768
|
+
when :sequence_filter
|
769
|
+
conv_map(options,
|
770
|
+
fun,
|
771
|
+
lambda { |b| b if b.sequences.size > 1 },
|
772
|
+
&blk)
|
773
|
+
when :join_blocks
|
774
|
+
block_joiner(options, fun, &blk)
|
775
|
+
when :as_bio_alignment
|
776
|
+
conv_send(options,
|
777
|
+
fun,
|
778
|
+
:to_bio_alignment,
|
779
|
+
&blk)
|
780
|
+
when :remove_gaps
|
781
|
+
conv_map(options,
|
782
|
+
fun,
|
783
|
+
lambda { |b| b.remove_gaps! if b.filtered?; b },
|
784
|
+
&blk)
|
785
|
+
else
|
786
|
+
raise "unhandled wrapper mode: #{first}"
|
787
|
+
end
|
788
|
+
end
|
789
|
+
|
790
|
+
def filter_seq_count(fun)
|
791
|
+
fun.call() do |block|
|
792
|
+
yield block if block.filtered? && block.sequences.size > 1
|
793
|
+
end
|
794
|
+
end
|
795
|
+
|
796
|
+
def block_joiner(options, fun)
|
797
|
+
prev = nil
|
798
|
+
_wrap(options, fun) do |cur|
|
799
|
+
if prev && (prev.filtered? || cur.filtered?) \
|
800
|
+
&& prev.joinable_with?(cur)
|
801
|
+
prev = prev.join(cur)
|
802
|
+
else
|
803
|
+
yield prev if prev
|
804
|
+
prev = cur
|
734
805
|
end
|
735
806
|
end
|
807
|
+
yield prev if prev
|
808
|
+
end
|
809
|
+
|
810
|
+
def conv_map(options, search, fun)
|
811
|
+
_wrap(options, search) do |block|
|
812
|
+
v = fun.call(block)
|
813
|
+
yield v if v
|
814
|
+
end
|
815
|
+
end
|
816
|
+
|
817
|
+
def conv_send(options, search, sym)
|
818
|
+
_wrap(options, search) do |block|
|
819
|
+
v = block.send(sym)
|
820
|
+
yield v if v
|
821
|
+
end
|
736
822
|
end
|
737
823
|
|
738
824
|
# Parse alignment blocks with a worker thread.
|
739
825
|
#
|
740
|
-
# @
|
826
|
+
# @block block handler
|
741
827
|
# @api private
|
742
828
|
def parse_blocks_parallel
|
743
829
|
queue = java.util.concurrent.LinkedBlockingQueue.new(128)
|
744
830
|
worker = Thread.new do
|
745
831
|
begin
|
746
832
|
until at_end
|
747
|
-
|
833
|
+
block = _parse_block()
|
834
|
+
queue.put(block) if block
|
748
835
|
end
|
749
836
|
queue.put(:eof)
|
750
837
|
rescue
|
@@ -752,31 +839,23 @@ module Bio
|
|
752
839
|
$stderr.puts $!.backtrace.join("\n")
|
753
840
|
end
|
754
841
|
end
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
n_final_poll += 1 unless worker.alive?
|
768
|
-
end
|
769
|
-
break if n_final_poll > 1
|
770
|
-
end
|
771
|
-
unless saw_eof
|
772
|
-
raise "worker exited unexpectedly!"
|
842
|
+
saw_eof = false
|
843
|
+
n_final_poll = 0
|
844
|
+
while true
|
845
|
+
block = queue.poll(1, java.util.concurrent.TimeUnit::SECONDS)
|
846
|
+
if block == :eof
|
847
|
+
saw_eof = true
|
848
|
+
break
|
849
|
+
elsif block
|
850
|
+
yield block
|
851
|
+
else
|
852
|
+
# timed out
|
853
|
+
n_final_poll += 1 unless worker.alive?
|
773
854
|
end
|
855
|
+
break if n_final_poll > 1
|
774
856
|
end
|
775
|
-
|
776
|
-
|
777
|
-
def each_block
|
778
|
-
until at_end
|
779
|
-
yield parse_block()
|
857
|
+
unless saw_eof
|
858
|
+
raise "worker exited unexpectedly!"
|
780
859
|
end
|
781
860
|
end
|
782
861
|
|