bio-maf 0.2.0-java → 0.3.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/Gemfile +3 -1
- data/README.md +98 -29
- data/Rakefile +6 -2
- data/bin/maf_tile +59 -35
- data/bio-maf.gemspec +4 -3
- data/features/block-joining.feature +32 -0
- data/features/dir-access.feature +46 -0
- data/features/maf-indexing.feature +23 -0
- data/features/maf-to-fasta.feature +9 -0
- data/features/slice.feature +54 -0
- data/features/step_definitions/dir-access_steps.rb +15 -0
- data/features/step_definitions/file_steps.rb +7 -0
- data/features/step_definitions/gap_removal_steps.rb +4 -0
- data/features/step_definitions/index_steps.rb +3 -3
- data/features/step_definitions/output_steps.rb +9 -1
- data/features/step_definitions/parse_steps.rb +13 -2
- data/features/step_definitions/query_steps.rb +7 -6
- data/features/step_definitions/slice_steps.rb +15 -0
- data/features/step_definitions/{gap-filling_steps.rb → tiling_steps.rb} +0 -0
- data/features/support/aruba.rb +1 -0
- data/features/support/env.rb +3 -1
- data/features/{gap-filling.feature → tiling.feature} +85 -0
- data/lib/bio/maf/index.rb +223 -11
- data/lib/bio/maf/maf.rb +209 -0
- data/lib/bio/maf/parser.rb +190 -111
- data/lib/bio/maf/tiler.rb +33 -6
- data/man/maf_index.1 +1 -1
- data/man/maf_tile.1 +7 -7
- data/man/maf_tile.1.ronn +21 -13
- data/man/maf_to_fasta.1 +1 -1
- data/spec/bio/maf/index_spec.rb +99 -0
- data/spec/bio/maf/maf_spec.rb +184 -0
- data/spec/bio/maf/parser_spec.rb +75 -115
- data/spec/bio/maf/tiler_spec.rb +44 -0
- data/test/data/chr22_ieq2.maf +11 -0
- data/test/data/gap-1.kct +0 -0
- data/test/data/gap-1.maf +9 -0
- data/test/data/gap-filled1.fa +6 -0
- data/test/data/gap-sp1.fa.gz +0 -0
- data/test/data/mm8_chr7_tiny_slice1.maf +9 -0
- data/test/data/mm8_chr7_tiny_slice2.maf +10 -0
- data/test/data/mm8_chr7_tiny_slice3.maf +10 -0
- data/test/data/mm8_chrM_tiny.kct +0 -0
- data/test/data/mm8_chrM_tiny.maf +1000 -0
- metadata +65 -16
data/lib/bio/maf/maf.rb
CHANGED
@@ -1,4 +1,15 @@
|
|
1
|
+
require 'bio-alignment'
|
2
|
+
|
1
3
|
module Bio
|
4
|
+
class GenomicInterval
|
5
|
+
def intersection(other)
|
6
|
+
raise ArgumentError unless self.chrom == other.chrom
|
7
|
+
GenomicInterval.new(self.chrom,
|
8
|
+
[self.chr_start, other.chr_start].max,
|
9
|
+
[self.chr_end, other.chr_end].min)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
2
13
|
module MAF
|
3
14
|
|
4
15
|
# A MAF header, containing the variable-value pairs from the first
|
@@ -12,6 +23,12 @@ module Bio
|
|
12
23
|
# @return [Hash]
|
13
24
|
attr_accessor :alignment_params
|
14
25
|
|
26
|
+
# Create a default header with version=1.
|
27
|
+
# @return [Header]
|
28
|
+
def Header.default
|
29
|
+
Header.new({:version => 1}, nil)
|
30
|
+
end
|
31
|
+
|
15
32
|
def initialize(vars, params)
|
16
33
|
@vars = vars
|
17
34
|
@alignment_params = params
|
@@ -47,6 +64,7 @@ module Bio
|
|
47
64
|
attr_reader :size
|
48
65
|
|
49
66
|
def initialize(vars, sequences, offset, size, filtered)
|
67
|
+
#raise ArgumentError, "no sequences given for block at offset #{offset}!" unless sequences && sequences.first
|
50
68
|
@vars = vars
|
51
69
|
@sequences = sequences
|
52
70
|
@offset = offset
|
@@ -79,6 +97,11 @@ module Bio
|
|
79
97
|
@filtered
|
80
98
|
end
|
81
99
|
|
100
|
+
def to_bio_alignment
|
101
|
+
ba_seq = sequences.collect { |s| s.to_bio_alignment }
|
102
|
+
Bio::BioAlignment::Alignment.new(ba_seq)
|
103
|
+
end
|
104
|
+
|
82
105
|
GAP = /-+/
|
83
106
|
|
84
107
|
# Remove gaps present in all sequences. These would generally
|
@@ -116,6 +139,96 @@ module Bio
|
|
116
139
|
gaps.size
|
117
140
|
end
|
118
141
|
|
142
|
+
# Returns a new Block covering only the region where it overlaps
|
143
|
+
# the given interval.
|
144
|
+
# @param [Bio::GenomicInterval] interval to slice the block with
|
145
|
+
# @return [Block] block covering intersection with interval
|
146
|
+
def slice(interval)
|
147
|
+
case interval.compare(ref_seq.interval)
|
148
|
+
when :equal
|
149
|
+
return self
|
150
|
+
when :contains, :contained_by, :left_overlapped, :right_overlapped
|
151
|
+
_slice(interval.intersection(ref_seq.interval))
|
152
|
+
when :left_adjacent, :right_adjacent, :left_off, :right_off
|
153
|
+
raise "Cannot slice a block with a non-overlapping interval! Block #{ref_seq.interval}, interval #{interval}"
|
154
|
+
when :different_chrom
|
155
|
+
raise "Cannot slice a block with reference sequence #{ref_seq.source} using an interval on #{interval.chrom}!"
|
156
|
+
else
|
157
|
+
raise "Unhandled comparison result: #{interval.compare(ref_seq.interval)}"
|
158
|
+
end
|
159
|
+
end
|
160
|
+
|
161
|
+
def _slice(interval)
|
162
|
+
range = _slice_text_range(interval)
|
163
|
+
s2 = sequences.collect { |s| s.slice(range) }
|
164
|
+
v2 = vars.dup
|
165
|
+
#v2[:score] = '0.0'
|
166
|
+
# TODO: should the filtered param be #modified? instead?
|
167
|
+
Block.new(v2, s2, offset, size, @filtered)
|
168
|
+
end
|
169
|
+
|
170
|
+
def _slice_text_range(interval)
|
171
|
+
i_start = interval.zero_start
|
172
|
+
i_end = interval.zero_end
|
173
|
+
g_pos = ref_seq.start
|
174
|
+
t_start = nil
|
175
|
+
t_end = nil
|
176
|
+
ref_seq.text.each_char.each_with_index do |c, t_pos|
|
177
|
+
if c != '-'
|
178
|
+
# non-gap
|
179
|
+
if g_pos == i_start
|
180
|
+
t_start = t_pos
|
181
|
+
end
|
182
|
+
g_pos += 1
|
183
|
+
if t_start && g_pos == i_end
|
184
|
+
t_end = t_pos + 1
|
185
|
+
break
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
unless t_start && t_end
|
190
|
+
raise "did not find start and end for #{interval} in #{ref_seq.inspect}!"
|
191
|
+
end
|
192
|
+
return t_start...t_end
|
193
|
+
end
|
194
|
+
|
195
|
+
def joinable_with?(other)
|
196
|
+
if sequences.size == other.sequences.size
|
197
|
+
r1 = ref_seq
|
198
|
+
r2 = other.ref_seq
|
199
|
+
return false if r1.source != r2.source
|
200
|
+
return false if r1.end != r2.start
|
201
|
+
rest = sequences.each_with_index
|
202
|
+
rest.next
|
203
|
+
mismatch = rest.find do |s1, i|
|
204
|
+
s2 = other.seq_from(s1.source, i)
|
205
|
+
(! s2) || ! s1.joinable_with?(s2)
|
206
|
+
end
|
207
|
+
return (! mismatch)
|
208
|
+
else
|
209
|
+
return false
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
def join(other)
|
214
|
+
nseq = sequences.each_with_index.collect do |s1, i|
|
215
|
+
s2 = other.seq_from(s1.source, i)
|
216
|
+
s1.join(s2)
|
217
|
+
end
|
218
|
+
v2 = vars.dup
|
219
|
+
v2[:score] = '0.0'
|
220
|
+
Block.new(v2, nseq, offset, nil, @filtered)
|
221
|
+
end
|
222
|
+
|
223
|
+
def seq_from(src, pos_guess)
|
224
|
+
sg = sequences[pos_guess]
|
225
|
+
if sg.source == src
|
226
|
+
sg
|
227
|
+
else
|
228
|
+
sequences.find { |s| s.source == src }
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
119
232
|
end
|
120
233
|
|
121
234
|
# A sequence within an alignment block.
|
@@ -153,6 +266,29 @@ module Bio
|
|
153
266
|
start + size
|
154
267
|
end
|
155
268
|
|
269
|
+
def interval
|
270
|
+
GenomicInterval.zero_based(self.source, self.start, self.end)
|
271
|
+
end
|
272
|
+
|
273
|
+
def slice(range)
|
274
|
+
before = text.slice(0...(range.begin))
|
275
|
+
non_gap_prev = before.delete("-").size
|
276
|
+
new_text = text.slice(range)
|
277
|
+
unless new_text
|
278
|
+
raise "could not extract slice #{range} from #{self.inspect}!"
|
279
|
+
end
|
280
|
+
non_gap_text = new_text.delete("-").size
|
281
|
+
s2 = Sequence.new(source,
|
282
|
+
start + non_gap_prev,
|
283
|
+
non_gap_text,
|
284
|
+
strand,
|
285
|
+
src_size,
|
286
|
+
new_text)
|
287
|
+
s2.quality = quality.slice(range) if quality
|
288
|
+
# TODO: what to do with synteny data?
|
289
|
+
s2
|
290
|
+
end
|
291
|
+
|
156
292
|
# Whether this sequence is empty. Only true for {EmptySequence}
|
157
293
|
# instances from 'e' lines.
|
158
294
|
def empty?
|
@@ -163,6 +299,43 @@ module Bio
|
|
163
299
|
size != text.size
|
164
300
|
end
|
165
301
|
|
302
|
+
I_STATUS = {
|
303
|
+
'C' => :contiguous,
|
304
|
+
'I' => :intervening,
|
305
|
+
'N' => :first,
|
306
|
+
'n' => :first_bridged,
|
307
|
+
'M' => :missing_data,
|
308
|
+
'T' => :tandem
|
309
|
+
}
|
310
|
+
|
311
|
+
def decode_status_char(c)
|
312
|
+
I_STATUS[c] || raise("Unsupported status character #{c}!")
|
313
|
+
end
|
314
|
+
|
315
|
+
def left_status_char
|
316
|
+
i_data && i_data[0]
|
317
|
+
end
|
318
|
+
|
319
|
+
def left_status
|
320
|
+
i_data && decode_status_char(left_status_char())
|
321
|
+
end
|
322
|
+
|
323
|
+
def left_count
|
324
|
+
i_data && i_data[1].to_i
|
325
|
+
end
|
326
|
+
|
327
|
+
def right_status_char
|
328
|
+
i_data && i_data[2]
|
329
|
+
end
|
330
|
+
|
331
|
+
def right_status
|
332
|
+
i_data && decode_status_char(right_status_char())
|
333
|
+
end
|
334
|
+
|
335
|
+
def right_count
|
336
|
+
i_data && i_data[3].to_i
|
337
|
+
end
|
338
|
+
|
166
339
|
def species
|
167
340
|
parts = source.split('.', 2)
|
168
341
|
parts.size == 2 ? parts[0] : nil
|
@@ -177,11 +350,34 @@ module Bio
|
|
177
350
|
end
|
178
351
|
end
|
179
352
|
|
353
|
+
def to_bio_alignment
|
354
|
+
Bio::BioAlignment::Sequence.new(source, text)
|
355
|
+
end
|
356
|
+
|
180
357
|
def write_fasta(writer)
|
181
358
|
writer.write("#{source}:#{start}-#{start + size}",
|
182
359
|
text)
|
183
360
|
end
|
184
361
|
|
362
|
+
def joinable_with?(o)
|
363
|
+
(self.end == o.start) \
|
364
|
+
&& (self.strand == o.strand) \
|
365
|
+
&& (self.empty? == o.empty?)
|
366
|
+
end
|
367
|
+
|
368
|
+
def join(o)
|
369
|
+
s2 = Sequence.new(source,
|
370
|
+
start,
|
371
|
+
size + o.size,
|
372
|
+
strand,
|
373
|
+
src_size,
|
374
|
+
text + o.text)
|
375
|
+
if quality && o.quality
|
376
|
+
s2.quality = quality + o.quality
|
377
|
+
end
|
378
|
+
s2
|
379
|
+
end
|
380
|
+
|
185
381
|
# Maps the given zero-based genomic range onto a range of string
|
186
382
|
# offsets, suitable for extracting the text for the given range
|
187
383
|
# from #text.
|
@@ -253,6 +449,19 @@ module Bio
|
|
253
449
|
''
|
254
450
|
end
|
255
451
|
|
452
|
+
def slice(offset, len)
|
453
|
+
self
|
454
|
+
end
|
455
|
+
|
456
|
+
def join(o)
|
457
|
+
EmptySequence.new(source,
|
458
|
+
start,
|
459
|
+
size + o.size,
|
460
|
+
strand,
|
461
|
+
src_size,
|
462
|
+
@status)
|
463
|
+
end
|
464
|
+
|
256
465
|
def empty?
|
257
466
|
true
|
258
467
|
end
|
data/lib/bio/maf/parser.rb
CHANGED
@@ -150,7 +150,7 @@ module Bio
|
|
150
150
|
#
|
151
151
|
# @return [Block] alignment block
|
152
152
|
# @api public
|
153
|
-
def
|
153
|
+
def _parse_block
|
154
154
|
return nil if at_end
|
155
155
|
if s.pos != last_block_pos
|
156
156
|
# in non-trailing block
|
@@ -296,19 +296,11 @@ module Bio
|
|
296
296
|
parse_error "unexpected line: '#{line}'"
|
297
297
|
end
|
298
298
|
end
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
postprocess_block(block)
|
305
|
-
end
|
306
|
-
|
307
|
-
def postprocess_block(block)
|
308
|
-
if block.filtered? && opts[:remove_gaps]
|
309
|
-
block.remove_gaps!
|
310
|
-
end
|
311
|
-
block
|
299
|
+
Block.new(block_vars,
|
300
|
+
seqs,
|
301
|
+
block_offset,
|
302
|
+
s.pos - block_start_pos,
|
303
|
+
filtered)
|
312
304
|
end
|
313
305
|
|
314
306
|
# Parse an 's' line.
|
@@ -419,19 +411,21 @@ module Bio
|
|
419
411
|
# @param [Array] block_offsets Offsets of blocks to parse.
|
420
412
|
# @return [Array<Block>]
|
421
413
|
def fetch_blocks(offset, len, block_offsets)
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
414
|
+
if block_given?
|
415
|
+
start_chunk_read_if_needed(offset, len)
|
416
|
+
# read chunks until we have the entire merged set of
|
417
|
+
# blocks ready to parse
|
418
|
+
# to avoid fragment joining
|
419
|
+
append_chunks_to(len)
|
420
|
+
# parse the blocks
|
429
421
|
block_offsets.each do |expected_offset|
|
430
|
-
block =
|
431
|
-
|
432
|
-
|
433
|
-
|
422
|
+
block = _parse_block
|
423
|
+
parse_error("expected a block at offset #{expected_offset} but could not parse one!") unless block
|
424
|
+
parse_error("got block with offset #{block.offset}, expected #{expected_offset}!") unless block.offset == expected_offset
|
425
|
+
yield block
|
434
426
|
end
|
427
|
+
else
|
428
|
+
enum_for(:fetch_blocks, offset, len, block_offsets)
|
435
429
|
end
|
436
430
|
end
|
437
431
|
|
@@ -530,6 +524,10 @@ module Bio
|
|
530
524
|
_parse_header()
|
531
525
|
end
|
532
526
|
|
527
|
+
def close
|
528
|
+
f.close
|
529
|
+
end
|
530
|
+
|
533
531
|
# Create a {ParseContext} for random access, using the given
|
534
532
|
# chunk size.
|
535
533
|
#
|
@@ -574,13 +572,19 @@ module Bio
|
|
574
572
|
# `fetch_list` should be an array of `[offset, length]` tuples.
|
575
573
|
#
|
576
574
|
# @param [Array] fetch_list the fetch list
|
577
|
-
# @
|
578
|
-
|
579
|
-
|
580
|
-
if
|
581
|
-
|
575
|
+
# @yield [block] each block matched, in turn
|
576
|
+
# @return [Enumerable<Block>] each matching {Block}, if no block given
|
577
|
+
def fetch_blocks(fetch_list, &blk)
|
578
|
+
if blk
|
579
|
+
merged = merge_fetch_list(fetch_list)
|
580
|
+
if RUBY_PLATFORM == 'java' && @opts.fetch(:threads, 1) > 1
|
581
|
+
fun = lambda { |&b2| fetch_blocks_merged_parallel(merged, &b2) }
|
582
|
+
else
|
583
|
+
fun = lambda { |&b2| fetch_blocks_merged(merged, &b2) }
|
584
|
+
end
|
585
|
+
wrap_block_seq(fun, &blk)
|
582
586
|
else
|
583
|
-
|
587
|
+
enum_for(:fetch_blocks, fetch_list)
|
584
588
|
end
|
585
589
|
end
|
586
590
|
|
@@ -588,23 +592,19 @@ module Bio
|
|
588
592
|
#
|
589
593
|
# @param [Array] fetch_list merged fetch list from {#merge_fetch_list}.
|
590
594
|
# @return [Array<Block>] the requested alignment blocks
|
591
|
-
def fetch_blocks_merged(fetch_list)
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
596
|
-
|
597
|
-
ctx.fetch_blocks(*e).each do |block|
|
598
|
-
y << block
|
599
|
-
#total_size += block.size
|
600
|
-
end
|
601
|
-
end
|
595
|
+
def fetch_blocks_merged(fetch_list, &blk)
|
596
|
+
start = Time.now
|
597
|
+
total_size = fetch_list.collect { |e| e[1] }.reduce(:+)
|
598
|
+
with_context(@random_access_chunk_size) do |ctx|
|
599
|
+
fetch_list.each do |e|
|
600
|
+
ctx.fetch_blocks(*e, &blk)
|
602
601
|
end
|
603
|
-
elapsed = Time.now - start
|
604
|
-
rate = (total_size / 1048576.0) / elapsed
|
605
|
-
$stderr.printf("Fetched blocks in %.3fs, %.1f MB/s.\n",
|
606
|
-
elapsed, rate)
|
607
602
|
end
|
603
|
+
elapsed = Time.now - start
|
604
|
+
# TODO: debug log
|
605
|
+
# rate = (total_size / 1048576.0) / elapsed
|
606
|
+
# $stderr.printf("Fetched blocks in %.3fs, %.1f MB/s.\n",
|
607
|
+
# elapsed, rate)
|
608
608
|
end
|
609
609
|
|
610
610
|
# Fetch and parse the blocks given by the merged fetch list, in
|
@@ -614,40 +614,38 @@ module Bio
|
|
614
614
|
# @param [Array] fetch_list merged fetch list from {#merge_fetch_list}.
|
615
615
|
# @return [Array<Block>] the requested alignment blocks
|
616
616
|
def fetch_blocks_merged_parallel(fetch_list)
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
next
|
635
|
-
end
|
636
|
-
c.each do |block|
|
637
|
-
y << block
|
638
|
-
end
|
639
|
-
n_res += 1
|
617
|
+
total_size = fetch_list.collect { |e| e[1] }.reduce(:+)
|
618
|
+
start = Time.now
|
619
|
+
n_threads = @opts.fetch(:threads, 1)
|
620
|
+
# TODO: break entries up into longer runs for more
|
621
|
+
# sequential I/O
|
622
|
+
jobs = java.util.concurrent.ConcurrentLinkedQueue.new(fetch_list)
|
623
|
+
ct = CompletionTracker.new(fetch_list)
|
624
|
+
completed = ct.queue
|
625
|
+
threads = []
|
626
|
+
n_threads.times { threads << make_worker(jobs, ct) }
|
627
|
+
|
628
|
+
n_res = 0
|
629
|
+
while n_res < fetch_list.size
|
630
|
+
c = completed.poll(1, java.util.concurrent.TimeUnit::SECONDS)
|
631
|
+
unless c
|
632
|
+
raise "Worker failed!" if threads.find { |t| t.status.nil? }
|
633
|
+
next
|
640
634
|
end
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
elapsed)
|
646
|
-
mb = total_size / 1048576.0
|
647
|
-
$stderr.printf("%.3f MB processed (%.1f MB/s).\n",
|
648
|
-
mb,
|
649
|
-
mb / elapsed)
|
635
|
+
c.each do |block|
|
636
|
+
yield block
|
637
|
+
end
|
638
|
+
n_res += 1
|
650
639
|
end
|
640
|
+
threads.each { |t| t.join }
|
641
|
+
elapsed = Time.now - start
|
642
|
+
$stderr.printf("Fetched blocks from %d threads in %.1fs.\n",
|
643
|
+
n_threads,
|
644
|
+
elapsed)
|
645
|
+
mb = total_size / 1048576.0
|
646
|
+
$stderr.printf("%.3f MB processed (%.1f MB/s).\n",
|
647
|
+
mb,
|
648
|
+
mb / elapsed)
|
651
649
|
end
|
652
650
|
|
653
651
|
# Create a worker thread for parallel parsing.
|
@@ -721,30 +719,119 @@ module Bio
|
|
721
719
|
# Delegates to {#parse_blocks_parallel} if `:threads` is set
|
722
720
|
# under JRuby.
|
723
721
|
#
|
724
|
-
# @return [Enumerator<Block>] enumerator of
|
722
|
+
# @return [Enumerator<Block>] enumerator of {Block}s if no block given.
|
723
|
+
# @yield [block] Passes each {Block} in turn to a block
|
725
724
|
# @api public
|
726
|
-
def
|
727
|
-
if
|
728
|
-
|
725
|
+
def each_block(&blk)
|
726
|
+
if block_given?
|
727
|
+
if RUBY_PLATFORM == 'java' && @opts.has_key?(:threads)
|
728
|
+
fun = method(:parse_blocks_parallel)
|
729
|
+
else
|
730
|
+
fun = method(:each_block_seq)
|
731
|
+
end
|
732
|
+
wrap_block_seq(fun, &blk)
|
729
733
|
else
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
+
enum_for(:each_block)
|
735
|
+
end
|
736
|
+
end
|
737
|
+
alias_method :parse_blocks, :each_block
|
738
|
+
|
739
|
+
def each_block_seq
|
740
|
+
until at_end
|
741
|
+
block = _parse_block()
|
742
|
+
yield block if block
|
743
|
+
end
|
744
|
+
end
|
745
|
+
|
746
|
+
def parse_block
|
747
|
+
b = nil
|
748
|
+
wrap_block_seq(lambda { |&blk| blk.call(_parse_block()) }) do |block|
|
749
|
+
b = block
|
750
|
+
end
|
751
|
+
b
|
752
|
+
end
|
753
|
+
|
754
|
+
WRAP_OPTS = [:as_bio_alignment, :join_blocks, :remove_gaps]
|
755
|
+
|
756
|
+
def wrap_block_seq(fun, &blk)
|
757
|
+
opts = WRAP_OPTS.find_all { |o| @opts[o] }
|
758
|
+
opts << :sequence_filter if sequence_filter && (! sequence_filter.empty?)
|
759
|
+
_wrap(opts, fun, &blk)
|
760
|
+
end
|
761
|
+
|
762
|
+
# options should be [:outer, ..., :inner]
|
763
|
+
def _wrap(options, fun, &blk)
|
764
|
+
first = options.shift
|
765
|
+
case first
|
766
|
+
when nil
|
767
|
+
fun.call(&blk)
|
768
|
+
when :sequence_filter
|
769
|
+
conv_map(options,
|
770
|
+
fun,
|
771
|
+
lambda { |b| b if b.sequences.size > 1 },
|
772
|
+
&blk)
|
773
|
+
when :join_blocks
|
774
|
+
block_joiner(options, fun, &blk)
|
775
|
+
when :as_bio_alignment
|
776
|
+
conv_send(options,
|
777
|
+
fun,
|
778
|
+
:to_bio_alignment,
|
779
|
+
&blk)
|
780
|
+
when :remove_gaps
|
781
|
+
conv_map(options,
|
782
|
+
fun,
|
783
|
+
lambda { |b| b.remove_gaps! if b.filtered?; b },
|
784
|
+
&blk)
|
785
|
+
else
|
786
|
+
raise "unhandled wrapper mode: #{first}"
|
787
|
+
end
|
788
|
+
end
|
789
|
+
|
790
|
+
def filter_seq_count(fun)
|
791
|
+
fun.call() do |block|
|
792
|
+
yield block if block.filtered? && block.sequences.size > 1
|
793
|
+
end
|
794
|
+
end
|
795
|
+
|
796
|
+
def block_joiner(options, fun)
|
797
|
+
prev = nil
|
798
|
+
_wrap(options, fun) do |cur|
|
799
|
+
if prev && (prev.filtered? || cur.filtered?) \
|
800
|
+
&& prev.joinable_with?(cur)
|
801
|
+
prev = prev.join(cur)
|
802
|
+
else
|
803
|
+
yield prev if prev
|
804
|
+
prev = cur
|
734
805
|
end
|
735
806
|
end
|
807
|
+
yield prev if prev
|
808
|
+
end
|
809
|
+
|
810
|
+
def conv_map(options, search, fun)
|
811
|
+
_wrap(options, search) do |block|
|
812
|
+
v = fun.call(block)
|
813
|
+
yield v if v
|
814
|
+
end
|
815
|
+
end
|
816
|
+
|
817
|
+
def conv_send(options, search, sym)
|
818
|
+
_wrap(options, search) do |block|
|
819
|
+
v = block.send(sym)
|
820
|
+
yield v if v
|
821
|
+
end
|
736
822
|
end
|
737
823
|
|
738
824
|
# Parse alignment blocks with a worker thread.
|
739
825
|
#
|
740
|
-
# @
|
826
|
+
# @block block handler
|
741
827
|
# @api private
|
742
828
|
def parse_blocks_parallel
|
743
829
|
queue = java.util.concurrent.LinkedBlockingQueue.new(128)
|
744
830
|
worker = Thread.new do
|
745
831
|
begin
|
746
832
|
until at_end
|
747
|
-
|
833
|
+
block = _parse_block()
|
834
|
+
queue.put(block) if block
|
748
835
|
end
|
749
836
|
queue.put(:eof)
|
750
837
|
rescue
|
@@ -752,31 +839,23 @@ module Bio
|
|
752
839
|
$stderr.puts $!.backtrace.join("\n")
|
753
840
|
end
|
754
841
|
end
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
n_final_poll += 1 unless worker.alive?
|
768
|
-
end
|
769
|
-
break if n_final_poll > 1
|
770
|
-
end
|
771
|
-
unless saw_eof
|
772
|
-
raise "worker exited unexpectedly!"
|
842
|
+
saw_eof = false
|
843
|
+
n_final_poll = 0
|
844
|
+
while true
|
845
|
+
block = queue.poll(1, java.util.concurrent.TimeUnit::SECONDS)
|
846
|
+
if block == :eof
|
847
|
+
saw_eof = true
|
848
|
+
break
|
849
|
+
elsif block
|
850
|
+
yield block
|
851
|
+
else
|
852
|
+
# timed out
|
853
|
+
n_final_poll += 1 unless worker.alive?
|
773
854
|
end
|
855
|
+
break if n_final_poll > 1
|
774
856
|
end
|
775
|
-
|
776
|
-
|
777
|
-
def each_block
|
778
|
-
until at_end
|
779
|
-
yield parse_block()
|
857
|
+
unless saw_eof
|
858
|
+
raise "worker exited unexpectedly!"
|
780
859
|
end
|
781
860
|
end
|
782
861
|
|