bio-maf 0.1.0-java → 0.2.0-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +53 -0
- data/DEVELOPMENT.md +29 -0
- data/Gemfile +1 -0
- data/README.md +69 -1
- data/Rakefile +4 -3
- data/bin/find_overlaps +21 -0
- data/bin/maf_tile +103 -0
- data/bio-maf.gemspec +43 -0
- data/features/gap-filling.feature +158 -0
- data/features/gap-removal.feature +50 -0
- data/features/step_definitions/gap-filling_steps.rb +32 -0
- data/features/step_definitions/gap_removal_steps.rb +19 -0
- data/features/step_definitions/parse_steps.rb +2 -1
- data/lib/bio/maf.rb +2 -0
- data/lib/bio/maf/index.rb +15 -8
- data/lib/bio/maf/maf.rb +267 -0
- data/lib/bio/maf/parser.rb +115 -175
- data/lib/bio/maf/tiler.rb +167 -0
- data/man/maf_tile.1 +108 -0
- data/man/maf_tile.1.ronn +104 -0
- data/spec/bio/maf/index_spec.rb +1 -0
- data/spec/bio/maf/parser_spec.rb +103 -0
- data/spec/bio/maf/tiler_spec.rb +69 -0
- data/test/data/gap-sp1.fa +6 -0
- data/test/data/mm8_chr7_tiny.kct +0 -0
- metadata +65 -7
data/lib/bio/maf/parser.rb
CHANGED
@@ -9,142 +9,6 @@ module Bio
|
|
9
9
|
# @api public
|
10
10
|
class ParseError < Exception; end
|
11
11
|
|
12
|
-
# A MAF header, containing the variable-value pairs from the first
|
13
|
-
# line of the file as well as the alignment parameters.
|
14
|
-
# @api public
|
15
|
-
class Header
|
16
|
-
# Variable-value pairs from the ##maf line
|
17
|
-
# @return [Hash]
|
18
|
-
attr_accessor :vars
|
19
|
-
# Alignment parameters from the MAF header.
|
20
|
-
# @return [Hash]
|
21
|
-
attr_accessor :alignment_params
|
22
|
-
|
23
|
-
def initialize(vars, params)
|
24
|
-
@vars = vars
|
25
|
-
@alignment_params = params
|
26
|
-
end
|
27
|
-
|
28
|
-
# The required version parameter.
|
29
|
-
# @return [String]
|
30
|
-
def version
|
31
|
-
vars[:version]
|
32
|
-
end
|
33
|
-
|
34
|
-
# The optional scoring parameter, if present.
|
35
|
-
# @return [String]
|
36
|
-
def scoring
|
37
|
-
vars[:scoring]
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
41
|
-
|
42
|
-
# A MAF alignment block.
|
43
|
-
# @api public
|
44
|
-
class Block
|
45
|
-
# Parameters from the 'a' line starting the alignment block.
|
46
|
-
attr_reader :vars
|
47
|
-
# Sequences, one per 's' or 'e' line.
|
48
|
-
# @return [Array<Sequence>]
|
49
|
-
attr_reader :sequences
|
50
|
-
# Offset of the alignment block within the MAF file, in bytes.
|
51
|
-
# @return [Integer]
|
52
|
-
attr_reader :offset
|
53
|
-
# Size of the alignment block within the MAF file, in bytes.
|
54
|
-
# @return [Integer]
|
55
|
-
attr_reader :size
|
56
|
-
|
57
|
-
def initialize(*args)
|
58
|
-
@vars, @sequences, @offset, @size = args
|
59
|
-
end
|
60
|
-
|
61
|
-
def raw_seq(i)
|
62
|
-
sequences.fetch(i)
|
63
|
-
end
|
64
|
-
|
65
|
-
def each_raw_seq
|
66
|
-
sequences.each { |s| yield s }
|
67
|
-
end
|
68
|
-
|
69
|
-
# Text size of the alignment block. This is the number of text
|
70
|
-
# characters in each line of sequence data, including dashes and
|
71
|
-
# other gaps in the sequence.
|
72
|
-
def text_size
|
73
|
-
sequences.first.text.size
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
# A sequence within an alignment block.
|
79
|
-
# @api public
|
80
|
-
class Sequence
|
81
|
-
# @return [String] Source sequence name.
|
82
|
-
attr_reader :source
|
83
|
-
# @return [Integer] Zero-based start position.
|
84
|
-
attr_reader :start
|
85
|
-
# @return [Integer] Size of aligning region in source sequence.
|
86
|
-
attr_reader :size
|
87
|
-
# :+ or :-, indicating which strand the alignment is to.
|
88
|
-
# @return [Symbol]
|
89
|
-
attr_reader :strand
|
90
|
-
# Size of the entire source sequence, not just the aligning
|
91
|
-
# region.
|
92
|
-
# @return [Integer]
|
93
|
-
attr_reader :src_size
|
94
|
-
# Sequence data for the alignment, including insertions.
|
95
|
-
# @return [String]
|
96
|
-
attr_reader :text
|
97
|
-
# Array of raw synteny information from 'i' line.
|
98
|
-
# @return [Array<String>]
|
99
|
-
attr_accessor :i_data
|
100
|
-
# Quality string from 'q' line.
|
101
|
-
# @return [String]
|
102
|
-
attr_accessor :quality
|
103
|
-
alias_method :source_size, :src_size
|
104
|
-
|
105
|
-
def initialize(*args)
|
106
|
-
@source, @start, @size, @strand, @src_size, @text = args
|
107
|
-
end
|
108
|
-
|
109
|
-
# Whether this sequence is empty. Only true for {EmptySequence}
|
110
|
-
# instances from 'e' lines.
|
111
|
-
def empty?
|
112
|
-
false
|
113
|
-
end
|
114
|
-
|
115
|
-
def write_fasta(writer)
|
116
|
-
writer.write("#{source}:#{start}-#{start + size}",
|
117
|
-
text)
|
118
|
-
end
|
119
|
-
end
|
120
|
-
|
121
|
-
# An empty sequence record from an 'e' line.
|
122
|
-
#
|
123
|
-
# This indicates that "there isn't aligning DNA for a species but
|
124
|
-
# that the current block is bridged by a chain that connects
|
125
|
-
# blocks before and after this block" (MAF spec).
|
126
|
-
# @api public
|
127
|
-
class EmptySequence < Sequence
|
128
|
-
attr_reader :status
|
129
|
-
|
130
|
-
def initialize(*args)
|
131
|
-
super(*args[0..4])
|
132
|
-
@status = args[5]
|
133
|
-
end
|
134
|
-
|
135
|
-
def text
|
136
|
-
''
|
137
|
-
end
|
138
|
-
|
139
|
-
def empty?
|
140
|
-
true
|
141
|
-
end
|
142
|
-
|
143
|
-
def write_fasta(writer)
|
144
|
-
raise "empty sequence output not implemented!"
|
145
|
-
end
|
146
|
-
end
|
147
|
-
|
148
12
|
# Reads MAF files in chunks.
|
149
13
|
# @api private
|
150
14
|
class ChunkReader
|
@@ -399,16 +263,25 @@ module Bio
|
|
399
263
|
payload = s.rest
|
400
264
|
s.pos = s.string.size # jump to EOS
|
401
265
|
end
|
266
|
+
filtered = false
|
402
267
|
lines = payload.split("\n")
|
403
268
|
until lines.empty?
|
404
269
|
line = lines.shift
|
405
270
|
first = line.getbyte(0)
|
406
271
|
if first == S
|
407
272
|
seq = parse_seq_line(line, sequence_filter)
|
408
|
-
|
273
|
+
if seq
|
274
|
+
seqs << seq
|
275
|
+
else
|
276
|
+
filtered = true
|
277
|
+
end
|
409
278
|
elsif first == E && parse_empty
|
410
279
|
e_seq = parse_empty_line(line, sequence_filter)
|
411
|
-
|
280
|
+
if e_seq
|
281
|
+
seqs << e_seq
|
282
|
+
else
|
283
|
+
filtered = true
|
284
|
+
end
|
412
285
|
elsif first == I && parse_extended
|
413
286
|
parts = line.split
|
414
287
|
parse_error("wrong i source #{parts[1]}!") unless seqs.last.source == parts[1]
|
@@ -423,10 +296,19 @@ module Bio
|
|
423
296
|
parse_error "unexpected line: '#{line}'"
|
424
297
|
end
|
425
298
|
end
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
299
|
+
block = Block.new(block_vars,
|
300
|
+
seqs,
|
301
|
+
block_offset,
|
302
|
+
s.pos - block_start_pos,
|
303
|
+
filtered)
|
304
|
+
postprocess_block(block)
|
305
|
+
end
|
306
|
+
|
307
|
+
def postprocess_block(block)
|
308
|
+
if block.filtered? && opts[:remove_gaps]
|
309
|
+
block.remove_gaps!
|
310
|
+
end
|
311
|
+
block
|
430
312
|
end
|
431
313
|
|
432
314
|
# Parse an 's' line.
|
@@ -503,12 +385,13 @@ module Bio
|
|
503
385
|
# A MAF parsing context, used for random-access parsing.
|
504
386
|
class ParseContext
|
505
387
|
include MAFParsing
|
506
|
-
attr_accessor :f, :s, :cr, :parser
|
388
|
+
attr_accessor :f, :s, :cr, :parser, :opts
|
507
389
|
attr_accessor :chunk_start, :last_block_pos, :at_end
|
508
390
|
|
509
|
-
def initialize(fd, chunk_size, parser
|
391
|
+
def initialize(fd, chunk_size, parser)
|
510
392
|
@f = fd
|
511
393
|
@parser = parser
|
394
|
+
@opts = parser.opts
|
512
395
|
reader = opts[:chunk_reader] || ChunkReader
|
513
396
|
@cr = reader.new(@f, chunk_size)
|
514
397
|
@last_block_pos = -1
|
@@ -580,6 +463,7 @@ module Bio
|
|
580
463
|
#
|
581
464
|
# * `:parse_extended`: whether to parse 'i' and 'q' lines
|
582
465
|
# * `:parse_empty`: whether to parse 'e' lines
|
466
|
+
# * `:remove_gaps`: remove gaps left after filtering sequences
|
583
467
|
# * `:chunk_size`: read MAF file in chunks of this many bytes
|
584
468
|
# * `:random_chunk_size`: as above, but for random access ({#fetch_blocks})
|
585
469
|
# * `:merge_max`: merge up to this many bytes of blocks for
|
@@ -611,9 +495,6 @@ module Bio
|
|
611
495
|
attr_reader :chunk_start
|
612
496
|
# @return [Integer] offset of the last block start in this chunk.
|
613
497
|
attr_reader :last_block_pos
|
614
|
-
# Sequence filter to apply.
|
615
|
-
# @api public
|
616
|
-
attr_accessor :sequence_filter
|
617
498
|
|
618
499
|
# @api private
|
619
500
|
attr_accessor :parse_extended
|
@@ -630,6 +511,9 @@ module Bio
|
|
630
511
|
# @api public
|
631
512
|
def initialize(file_spec, opts={})
|
632
513
|
@opts = opts
|
514
|
+
if RUBY_PLATFORM == 'java'
|
515
|
+
opts[:threads] ||= java.lang.Runtime.runtime.availableProcessors
|
516
|
+
end
|
633
517
|
chunk_size = opts[:chunk_size] || SEQ_CHUNK_SIZE
|
634
518
|
@random_access_chunk_size = opts[:random_chunk_size] || RANDOM_CHUNK_SIZE
|
635
519
|
@merge_max = opts[:merge_max] || MERGE_MAX
|
@@ -654,7 +538,7 @@ module Bio
|
|
654
538
|
def context(chunk_size)
|
655
539
|
# IO#dup calls dup(2) internally, but seems broken on JRuby...
|
656
540
|
fd = File.open(file_spec)
|
657
|
-
ParseContext.new(fd, chunk_size, self
|
541
|
+
ParseContext.new(fd, chunk_size, self)
|
658
542
|
end
|
659
543
|
|
660
544
|
# Execute the given block with a {ParseContext} using the given
|
@@ -671,6 +555,20 @@ module Bio
|
|
671
555
|
end
|
672
556
|
end
|
673
557
|
|
558
|
+
# Sequence filter to apply.
|
559
|
+
# @api public
|
560
|
+
# @return [Hash]
|
561
|
+
def sequence_filter
|
562
|
+
@sequence_filter ||= {}
|
563
|
+
end
|
564
|
+
|
565
|
+
# Set the sequence filter.
|
566
|
+
# @api public
|
567
|
+
# @param [Hash] filter the new filter
|
568
|
+
def sequence_filter=(filter)
|
569
|
+
@sequence_filter = filter
|
570
|
+
end
|
571
|
+
|
674
572
|
# Fetch and parse blocks given by `fetch_list`.
|
675
573
|
#
|
676
574
|
# `fetch_list` should be an array of `[offset, length]` tuples.
|
@@ -723,25 +621,22 @@ module Bio
|
|
723
621
|
# TODO: break entries up into longer runs for more
|
724
622
|
# sequential I/O
|
725
623
|
jobs = java.util.concurrent.ConcurrentLinkedQueue.new(fetch_list)
|
726
|
-
|
624
|
+
ct = CompletionTracker.new(fetch_list)
|
625
|
+
completed = ct.queue
|
727
626
|
threads = []
|
728
|
-
n_threads.times { threads << make_worker(jobs,
|
729
|
-
|
730
|
-
|
731
|
-
while
|
732
|
-
c = completed.poll(
|
733
|
-
|
734
|
-
if threads.find { |t| t.
|
735
|
-
|
736
|
-
else
|
737
|
-
raise "No threads alive, completed #{n_completed}/#{fetch_list.size} jobs!"
|
738
|
-
end
|
627
|
+
n_threads.times { threads << make_worker(jobs, ct) }
|
628
|
+
|
629
|
+
n_res = 0
|
630
|
+
while n_res < fetch_list.size
|
631
|
+
c = completed.poll(1, java.util.concurrent.TimeUnit::SECONDS)
|
632
|
+
unless c
|
633
|
+
raise "Worker failed!" if threads.find { |t| t.status.nil? }
|
634
|
+
next
|
739
635
|
end
|
740
|
-
raise "worker failed: #{c}" if c.is_a? Exception
|
741
636
|
c.each do |block|
|
742
637
|
y << block
|
743
638
|
end
|
744
|
-
|
639
|
+
n_res += 1
|
745
640
|
end
|
746
641
|
threads.each { |t| t.join }
|
747
642
|
elapsed = Time.now - start
|
@@ -758,26 +653,25 @@ module Bio
|
|
758
653
|
# Create a worker thread for parallel parsing.
|
759
654
|
#
|
760
655
|
# @see #fetch_blocks_merged_parallel
|
761
|
-
def make_worker(jobs,
|
656
|
+
def make_worker(jobs, ct)
|
762
657
|
Thread.new do
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
658
|
+
begin
|
659
|
+
with_context(@random_access_chunk_size) do |ctx|
|
660
|
+
while true
|
661
|
+
req = jobs.poll
|
662
|
+
break unless req
|
768
663
|
n_blocks = req[2].size
|
769
664
|
blocks = ctx.fetch_blocks(*req).to_a
|
770
665
|
if blocks.size != n_blocks
|
771
666
|
raise "expected #{n_blocks}, got #{blocks.size}: #{e.inspect}"
|
772
667
|
end
|
773
|
-
|
774
|
-
rescue Exception => e
|
775
|
-
completed.put(e)
|
776
|
-
$stderr.puts "Worker failing: #{e.class}: #{e}"
|
777
|
-
$stderr.puts e.backtrace.join("\n")
|
778
|
-
raise e
|
668
|
+
ct << blocks
|
779
669
|
end
|
780
670
|
end
|
671
|
+
rescue Exception => e
|
672
|
+
$stderr.puts "Worker failing: #{e.class}: #{e}"
|
673
|
+
$stderr.puts e.backtrace.join("\n")
|
674
|
+
raise e
|
781
675
|
end
|
782
676
|
end
|
783
677
|
end
|
@@ -860,14 +754,19 @@ module Bio
|
|
860
754
|
end
|
861
755
|
Enumerator.new do |y|
|
862
756
|
saw_eof = false
|
863
|
-
|
757
|
+
n_final_poll = 0
|
758
|
+
while true
|
864
759
|
block = queue.poll(1, java.util.concurrent.TimeUnit::SECONDS)
|
865
760
|
if block == :eof
|
866
761
|
saw_eof = true
|
867
762
|
break
|
868
763
|
elsif block
|
869
764
|
y << block
|
765
|
+
else
|
766
|
+
# timed out
|
767
|
+
n_final_poll += 1 unless worker.alive?
|
870
768
|
end
|
769
|
+
break if n_final_poll > 1
|
871
770
|
end
|
872
771
|
unless saw_eof
|
873
772
|
raise "worker exited unexpectedly!"
|
@@ -883,6 +782,47 @@ module Bio
|
|
883
782
|
|
884
783
|
end
|
885
784
|
|
785
|
+
class CompletionTracker
|
786
|
+
attr_reader :queue, :offsets, :delayed
|
787
|
+
|
788
|
+
def initialize(fetch_list)
|
789
|
+
@offsets = fetch_list.collect { |e| e[0] }
|
790
|
+
@queue = java.util.concurrent.LinkedBlockingQueue.new(128)
|
791
|
+
@delayed = {}
|
792
|
+
@sem = Mutex.new
|
793
|
+
end
|
794
|
+
|
795
|
+
def next_expected
|
796
|
+
offsets.first
|
797
|
+
end
|
798
|
+
|
799
|
+
def <<(blocks)
|
800
|
+
@sem.synchronize do
|
801
|
+
f_offset = blocks.first.offset
|
802
|
+
if f_offset == next_expected
|
803
|
+
offsets.shift
|
804
|
+
queue.put(blocks)
|
805
|
+
drain_delayed
|
806
|
+
else
|
807
|
+
# out of order
|
808
|
+
delayed[f_offset] = blocks
|
809
|
+
end
|
810
|
+
end
|
811
|
+
end
|
812
|
+
|
813
|
+
def drain_delayed
|
814
|
+
while e = delayed.delete(next_expected)
|
815
|
+
offsets.shift
|
816
|
+
queue.put(e)
|
817
|
+
end
|
818
|
+
end
|
819
|
+
end
|
820
|
+
|
821
|
+
# Exposes parser internals for unit tests.
|
822
|
+
class DummyParser
|
823
|
+
include MAFParsing
|
824
|
+
end
|
825
|
+
|
886
826
|
end
|
887
827
|
|
888
828
|
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
require 'zlib'
|
2
|
+
|
3
|
+
module Bio::MAF
|
4
|
+
|
5
|
+
# Tiles a given genomic interval.
|
6
|
+
# Inspired by: lib/bx/align/tools/tile.py in bx-python
|
7
|
+
|
8
|
+
class Tiler
|
9
|
+
|
10
|
+
attr_accessor :index
|
11
|
+
attr_accessor :parser
|
12
|
+
attr_accessor :reference
|
13
|
+
# GenomicInterval
|
14
|
+
attr_accessor :interval
|
15
|
+
attr_accessor :species
|
16
|
+
attr_accessor :species_map
|
17
|
+
|
18
|
+
def initialize
|
19
|
+
@species_map = {}
|
20
|
+
end
|
21
|
+
|
22
|
+
def ref_data(range)
|
23
|
+
if reference
|
24
|
+
if reference.respond_to? :read_interval
|
25
|
+
reference.read_interval(range.begin, range.end)
|
26
|
+
elsif reference.is_a? String
|
27
|
+
reference.slice(range)
|
28
|
+
else
|
29
|
+
raise "Unhandled reference data source: #{reference}"
|
30
|
+
end
|
31
|
+
else
|
32
|
+
nil
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def tile
|
37
|
+
parser.sequence_filter[:only_species] = @species
|
38
|
+
# TODO: remove gaps
|
39
|
+
blocks = index.find([interval], parser).sort_by { |b| b.vars[:score] }
|
40
|
+
mask = Array.new(interval.length, :ref)
|
41
|
+
i_start = interval.zero_start
|
42
|
+
i_end = interval.zero_end
|
43
|
+
if reference
|
44
|
+
ref_region = ref_data(i_start...i_end)
|
45
|
+
end
|
46
|
+
blocks.each do |block|
|
47
|
+
ref = block.ref_seq
|
48
|
+
slice_start = [i_start, ref.start].max
|
49
|
+
slice_end = [i_end, ref.end].min
|
50
|
+
mask.fill(block,
|
51
|
+
(slice_start - i_start)...(slice_end - i_start))
|
52
|
+
end
|
53
|
+
text = []
|
54
|
+
species.each { |s| text << '' }
|
55
|
+
nonref_text = text[1...text.size]
|
56
|
+
runs(mask) do |range, block|
|
57
|
+
g_range = (range.begin + i_start)...(range.end + i_start)
|
58
|
+
if block == :ref
|
59
|
+
# not covered by an alignment block
|
60
|
+
# use the reference sequence if given, otherwise 'N'
|
61
|
+
range_size = range.end - range.begin
|
62
|
+
text[0] << if ref_region
|
63
|
+
ref_region.slice(range)
|
64
|
+
else
|
65
|
+
'N' * range_size
|
66
|
+
end
|
67
|
+
stars = '*' * range_size
|
68
|
+
nonref_text.each { |t| t << stars }
|
69
|
+
else
|
70
|
+
# covered by an alignment block
|
71
|
+
t_range = block.ref_seq.text_range(g_range)
|
72
|
+
species.each_with_index do |species, i|
|
73
|
+
sp_text = text[i]
|
74
|
+
seq = block.sequences.find { |s| s.source == species || s.species == species }
|
75
|
+
if seq
|
76
|
+
# got alignment text
|
77
|
+
sp_text << seq.text.slice(t_range)
|
78
|
+
else
|
79
|
+
# no alignment for this one here, use '*'
|
80
|
+
sp_text << '*' * (t_range.end - t_range.begin)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
text
|
86
|
+
end
|
87
|
+
|
88
|
+
def write_fasta(f)
|
89
|
+
species.zip(tile()) do |species, text|
|
90
|
+
sp_out = species_map[species] || species
|
91
|
+
f.puts ">#{sp_out}"
|
92
|
+
f.puts text
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
def runs(mask)
|
97
|
+
cur = nil
|
98
|
+
cur_start = nil
|
99
|
+
mask.each_with_index do |obj, i|
|
100
|
+
if ! cur.equal?(obj)
|
101
|
+
yield(cur_start...i, cur) if cur
|
102
|
+
cur = obj
|
103
|
+
cur_start = i
|
104
|
+
end
|
105
|
+
end
|
106
|
+
yield(cur_start...mask.size, cur)
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
class FASTARangeReader
|
112
|
+
attr_reader :f, :pos
|
113
|
+
|
114
|
+
def initialize(fspec)
|
115
|
+
if fspec.respond_to? :seek
|
116
|
+
@f = fspec
|
117
|
+
else
|
118
|
+
reader_class = if fspec =~ /.gz$/
|
119
|
+
Zlib::GzipReader
|
120
|
+
else
|
121
|
+
File
|
122
|
+
end
|
123
|
+
@f = reader_class.open(fspec)
|
124
|
+
end
|
125
|
+
position_at_start
|
126
|
+
end
|
127
|
+
|
128
|
+
GT = '>'.getbyte(0)
|
129
|
+
|
130
|
+
def position_at_start
|
131
|
+
first = f.readline
|
132
|
+
raise "expected FASTA comment" unless first =~ /^>/
|
133
|
+
@pos = 0
|
134
|
+
end
|
135
|
+
|
136
|
+
def read_interval(z_start, z_end)
|
137
|
+
if z_start < pos
|
138
|
+
position_at_start
|
139
|
+
end
|
140
|
+
data = ''
|
141
|
+
region_size = z_end - z_start
|
142
|
+
in_region = false
|
143
|
+
f.each_line do |line_raw|
|
144
|
+
if line_raw.getbyte(0) == GT
|
145
|
+
raise "unexpected description line: #{line_raw.inspect}"
|
146
|
+
end
|
147
|
+
line = line_raw.strip
|
148
|
+
end_pos = pos + line.size
|
149
|
+
if (! in_region) && pos <= z_start && z_start < end_pos
|
150
|
+
data << line.slice((z_start - pos)...(line.size))
|
151
|
+
in_region = true
|
152
|
+
elsif in_region
|
153
|
+
need = region_size - data.size
|
154
|
+
if need > line.size
|
155
|
+
data << line
|
156
|
+
else
|
157
|
+
# last line
|
158
|
+
data << line.slice(0, need)
|
159
|
+
break
|
160
|
+
end
|
161
|
+
end
|
162
|
+
@pos = end_pos
|
163
|
+
end
|
164
|
+
return data
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|