pdf-reader 1.1.1 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG +87 -2
- data/{README.rdoc → README.md} +43 -31
- data/Rakefile +21 -16
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_object +4 -1
- data/bin/pdf_text +1 -3
- data/examples/callbacks.rb +2 -1
- data/examples/extract_images.rb +11 -6
- data/examples/fuzzy_paragraphs.rb +24 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier.afm +342 -0
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -0
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
- data/lib/pdf/reader/buffer.rb +90 -63
- data/lib/pdf/reader/cid_widths.rb +63 -0
- data/lib/pdf/reader/cmap.rb +69 -38
- data/lib/pdf/reader/encoding.rb +74 -48
- data/lib/pdf/reader/error.rb +24 -4
- data/lib/pdf/reader/filter/ascii85.rb +28 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
- data/lib/pdf/reader/filter/depredict.rb +141 -0
- data/lib/pdf/reader/filter/flate.rb +53 -0
- data/lib/pdf/reader/filter/lzw.rb +21 -0
- data/lib/pdf/reader/filter/null.rb +18 -0
- data/lib/pdf/reader/filter/run_length.rb +45 -0
- data/lib/pdf/reader/filter.rb +15 -234
- data/lib/pdf/reader/font.rb +107 -43
- data/lib/pdf/reader/font_descriptor.rb +80 -0
- data/lib/pdf/reader/form_xobject.rb +26 -4
- data/lib/pdf/reader/glyph_hash.rb +56 -18
- data/lib/pdf/reader/lzw.rb +6 -4
- data/lib/pdf/reader/null_security_handler.rb +17 -0
- data/lib/pdf/reader/object_cache.rb +40 -16
- data/lib/pdf/reader/object_hash.rb +94 -40
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/orientation_detector.rb +34 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +48 -3
- data/lib/pdf/reader/page_layout.rb +125 -0
- data/lib/pdf/reader/page_state.rb +185 -70
- data/lib/pdf/reader/page_text_receiver.rb +70 -20
- data/lib/pdf/reader/pages_strategy.rb +4 -293
- data/lib/pdf/reader/parser.rb +37 -61
- data/lib/pdf/reader/print_receiver.rb +6 -0
- data/lib/pdf/reader/reference.rb +4 -1
- data/lib/pdf/reader/register_receiver.rb +17 -31
- data/lib/pdf/reader/resource_methods.rb +1 -0
- data/lib/pdf/reader/standard_security_handler.rb +82 -42
- data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
- data/lib/pdf/reader/stream.rb +5 -2
- data/lib/pdf/reader/synchronized_cache.rb +33 -0
- data/lib/pdf/reader/text_run.rb +99 -0
- data/lib/pdf/reader/token.rb +4 -1
- data/lib/pdf/reader/transformation_matrix.rb +195 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
- data/lib/pdf/reader/width_calculator/composite.rb +28 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
- data/lib/pdf/reader/width_calculator.rb +12 -0
- data/lib/pdf/reader/xref.rb +41 -9
- data/lib/pdf/reader.rb +45 -104
- data/lib/pdf-reader.rb +4 -1
- metadata +220 -101
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/hash.rb +0 -15
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -264
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module PDF
|
4
5
|
|
@@ -17,11 +18,12 @@ module PDF
|
|
17
18
|
#
|
18
19
|
class LZW # :nodoc:
|
19
20
|
|
21
|
+
# Wraps an LZW encoded string
|
20
22
|
class BitStream # :nodoc:
|
21
23
|
|
22
24
|
def initialize(data, bits_in_chunk)
|
23
25
|
@data = data
|
24
|
-
@data.force_encoding("BINARY")
|
26
|
+
@data.force_encoding("BINARY")
|
25
27
|
@bits_in_chunk = bits_in_chunk
|
26
28
|
@current_pos = 0
|
27
29
|
@bits_left_in_byte = 8
|
@@ -81,9 +83,10 @@ module PDF
|
|
81
83
|
#
|
82
84
|
def self.decode(data)
|
83
85
|
stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
|
84
|
-
result =
|
86
|
+
result = "".dup
|
85
87
|
until (code = stream.read) == CODE_EOD
|
86
88
|
if code == CODE_CLEAR_TABLE
|
89
|
+
stream.set_bits_in_chunk(9)
|
87
90
|
string_table = StringTable.new
|
88
91
|
code = stream.read
|
89
92
|
break if code == CODE_EOD
|
@@ -114,11 +117,10 @@ module PDF
|
|
114
117
|
result
|
115
118
|
end
|
116
119
|
|
117
|
-
private
|
118
|
-
|
119
120
|
def self.create_new_string(string_table,some_code, other_code)
|
120
121
|
string_table[some_code] + string_table[other_code][0].chr
|
121
122
|
end
|
123
|
+
private_class_method :create_new_string
|
122
124
|
|
123
125
|
end
|
124
126
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
class PDF::Reader
|
5
|
+
|
6
|
+
# A null object security handler. Used when a PDF is unencrypted.
|
7
|
+
class NullSecurityHandler
|
8
|
+
|
9
|
+
def self.supports?(encrypt)
|
10
|
+
encrypt.nil?
|
11
|
+
end
|
12
|
+
|
13
|
+
def decrypt(buf, _ref)
|
14
|
+
buf
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -1,10 +1,13 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'hashery/lru_hash'
|
2
5
|
|
3
6
|
class PDF::Reader
|
4
7
|
|
5
8
|
# A Hash-like object for caching commonly used objects from a PDF file.
|
6
9
|
#
|
7
|
-
# This is an internal class
|
10
|
+
# This is an internal class, no promises about a stable API.
|
8
11
|
#
|
9
12
|
class ObjectCache # nodoc
|
10
13
|
|
@@ -13,53 +16,67 @@ class PDF::Reader
|
|
13
16
|
# avoid lots of repetitive (and expensive) tokenising
|
14
17
|
CACHEABLE_TYPES = [:Catalog, :Page, :Pages]
|
15
18
|
|
16
|
-
|
19
|
+
attr_reader :hits, :misses
|
20
|
+
|
21
|
+
def initialize(lru_size = 1000)
|
17
22
|
@objects = {}
|
23
|
+
@lru_cache = Hashery::LRUHash.new(lru_size.to_i)
|
24
|
+
@hits = 0
|
25
|
+
@misses = 0
|
18
26
|
end
|
19
27
|
|
20
28
|
def [](key)
|
21
|
-
|
29
|
+
update_stats(key)
|
30
|
+
@objects[key] || @lru_cache[key]
|
22
31
|
end
|
23
32
|
|
24
33
|
def []=(key, value)
|
25
|
-
|
34
|
+
if cacheable?(value)
|
35
|
+
@objects[key] = value
|
36
|
+
else
|
37
|
+
@lru_cache[key] = value
|
38
|
+
end
|
26
39
|
end
|
27
40
|
|
28
41
|
def fetch(key, local_default = nil)
|
29
|
-
|
42
|
+
update_stats(key)
|
43
|
+
@objects[key] || @lru_cache.fetch(key, local_default)
|
30
44
|
end
|
31
45
|
|
32
46
|
def each(&block)
|
33
47
|
@objects.each(&block)
|
48
|
+
@lru_cache.each(&block)
|
34
49
|
end
|
35
50
|
alias :each_pair :each
|
36
51
|
|
37
52
|
def each_key(&block)
|
38
53
|
@objects.each_key(&block)
|
54
|
+
@lru_cache.each_key(&block)
|
39
55
|
end
|
40
56
|
|
41
57
|
def each_value(&block)
|
42
58
|
@objects.each_value(&block)
|
59
|
+
@lru_cache.each_value(&block)
|
43
60
|
end
|
44
61
|
|
45
62
|
def size
|
46
|
-
@objects.size
|
63
|
+
@objects.size + @lru_cache.size
|
47
64
|
end
|
48
65
|
alias :length :size
|
49
66
|
|
50
67
|
def empty?
|
51
|
-
@objects.empty?
|
68
|
+
@objects.empty? && @lru_cache.empty?
|
52
69
|
end
|
53
70
|
|
54
|
-
def
|
55
|
-
@objects.
|
71
|
+
def include?(key)
|
72
|
+
@objects.include?(key) || @lru_cache.include?(key)
|
56
73
|
end
|
57
|
-
alias :
|
58
|
-
alias :key? :
|
59
|
-
alias :member? :
|
74
|
+
alias :has_key? :include?
|
75
|
+
alias :key? :include?
|
76
|
+
alias :member? :include?
|
60
77
|
|
61
78
|
def has_value?(value)
|
62
|
-
@objects.has_value?(value)
|
79
|
+
@objects.has_value?(value) || @lru_cache.has_value?(value)
|
63
80
|
end
|
64
81
|
|
65
82
|
def to_s
|
@@ -67,19 +84,26 @@ class PDF::Reader
|
|
67
84
|
end
|
68
85
|
|
69
86
|
def keys
|
70
|
-
@objects.keys
|
87
|
+
@objects.keys + @lru_cache.keys
|
71
88
|
end
|
72
89
|
|
73
90
|
def values
|
74
|
-
@objects.values
|
91
|
+
@objects.values + @lru_cache.values
|
75
92
|
end
|
76
93
|
|
77
94
|
private
|
78
95
|
|
96
|
+
def update_stats(key)
|
97
|
+
if has_key?(key)
|
98
|
+
@hits += 1
|
99
|
+
else
|
100
|
+
@misses += 1
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
79
104
|
def cacheable?(obj)
|
80
105
|
obj.is_a?(Hash) && CACHEABLE_TYPES.include?(obj[:Type])
|
81
106
|
end
|
82
107
|
|
83
|
-
|
84
108
|
end
|
85
109
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
class PDF::Reader
|
4
5
|
# Provides low level access to the objects in a PDF file via a hash-like
|
@@ -41,10 +42,11 @@ class PDF::Reader
|
|
41
42
|
#
|
42
43
|
def initialize(input, opts = {})
|
43
44
|
@io = extract_io_from(input)
|
44
|
-
@pdf_version = read_version
|
45
45
|
@xref = PDF::Reader::XRef.new(@io)
|
46
|
+
@pdf_version = read_version
|
46
47
|
@trailer = @xref.trailer
|
47
|
-
@cache = PDF::Reader::ObjectCache.new
|
48
|
+
@cache = opts[:cache] || PDF::Reader::ObjectCache.new
|
49
|
+
@sec_handler = NullSecurityHandler.new
|
48
50
|
@sec_handler = build_security_handler(opts)
|
49
51
|
end
|
50
52
|
|
@@ -76,16 +78,7 @@ class PDF::Reader
|
|
76
78
|
key = PDF::Reader::Reference.new(key.to_i, 0)
|
77
79
|
end
|
78
80
|
|
79
|
-
|
80
|
-
@cache[key]
|
81
|
-
elsif xref[key].is_a?(Fixnum)
|
82
|
-
buf = new_buffer(xref[key])
|
83
|
-
@cache[key] = decrypt(key, Parser.new(buf, self).object(key.id, key.gen))
|
84
|
-
elsif xref[key].is_a?(PDF::Reader::Reference)
|
85
|
-
container_key = xref[key]
|
86
|
-
object_streams[container_key] ||= PDF::Reader::ObjectStream.new(object(container_key))
|
87
|
-
@cache[key] = object_streams[container_key][key.id]
|
88
|
-
end
|
81
|
+
@cache[key] ||= fetch_object(key) || fetch_object_stream(key)
|
89
82
|
rescue InvalidObjectError
|
90
83
|
return default
|
91
84
|
end
|
@@ -102,21 +95,7 @@ class PDF::Reader
|
|
102
95
|
# a PDF::Reader::Reference, the key is returned unchanged.
|
103
96
|
#
|
104
97
|
def deref!(key)
|
105
|
-
|
106
|
-
when Hash
|
107
|
-
{}.tap { |hash|
|
108
|
-
object.each do |k, value|
|
109
|
-
hash[k] = deref!(value)
|
110
|
-
end
|
111
|
-
}
|
112
|
-
when PDF::Reader::Stream
|
113
|
-
object.hash = deref!(object.hash)
|
114
|
-
object
|
115
|
-
when Array
|
116
|
-
object.map { |value| deref!(value) }
|
117
|
-
else
|
118
|
-
object
|
119
|
-
end
|
98
|
+
deref_internal!(key, {})
|
120
99
|
end
|
121
100
|
|
122
101
|
# Access an object from the PDF. key can be an int or a PDF::Reader::Reference
|
@@ -266,24 +245,95 @@ class PDF::Reader
|
|
266
245
|
|
267
246
|
private
|
268
247
|
|
269
|
-
|
270
|
-
|
248
|
+
# parse a traditional object from the PDF, starting from the byte offset indicated
|
249
|
+
# in the xref table
|
250
|
+
#
|
251
|
+
def fetch_object(key)
|
252
|
+
if xref[key].is_a?(Integer)
|
253
|
+
buf = new_buffer(xref[key])
|
254
|
+
decrypt(key, Parser.new(buf, self).object(key.id, key.gen))
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
# parse a object that's embedded in an object stream in the PDF
|
259
|
+
#
|
260
|
+
def fetch_object_stream(key)
|
261
|
+
if xref[key].is_a?(PDF::Reader::Reference)
|
262
|
+
container_key = xref[key]
|
263
|
+
object_streams[container_key] ||= PDF::Reader::ObjectStream.new(object(container_key))
|
264
|
+
object_streams[container_key][key.id]
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
# Private implementation of deref!, which exists to ensure the `seen` argument
|
269
|
+
# isn't publicly available. It's used to avoid endless loops in the recursion, and
|
270
|
+
# doesn't need to be part of the public API.
|
271
|
+
#
|
272
|
+
def deref_internal!(key, seen)
|
273
|
+
seen_key = key.is_a?(PDF::Reader::Reference) ? key : key.object_id
|
274
|
+
|
275
|
+
return seen[seen_key] if seen.key?(seen_key)
|
276
|
+
|
277
|
+
case object = deref(key)
|
278
|
+
when Hash
|
279
|
+
seen[seen_key] ||= {}
|
280
|
+
object.each do |k, value|
|
281
|
+
seen[seen_key][k] = deref_internal!(value, seen)
|
282
|
+
end
|
283
|
+
seen[seen_key]
|
284
|
+
when PDF::Reader::Stream
|
285
|
+
seen[seen_key] ||= PDF::Reader::Stream.new({}, object.data)
|
286
|
+
object.hash.each do |k,value|
|
287
|
+
seen[seen_key].hash[k] = deref_internal!(value, seen)
|
288
|
+
end
|
289
|
+
seen[seen_key]
|
290
|
+
when Array
|
291
|
+
seen[seen_key] ||= []
|
292
|
+
object.each do |value|
|
293
|
+
seen[seen_key] << deref_internal!(value, seen)
|
294
|
+
end
|
295
|
+
seen[seen_key]
|
296
|
+
else
|
297
|
+
object
|
298
|
+
end
|
299
|
+
end
|
271
300
|
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
301
|
+
def build_security_handler(opts = {})
|
302
|
+
encrypt = deref(trailer[:Encrypt])
|
303
|
+
if NullSecurityHandler.supports?(encrypt)
|
304
|
+
NullSecurityHandler.new
|
305
|
+
elsif StandardSecurityHandler.supports?(encrypt)
|
306
|
+
encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
|
307
|
+
StandardSecurityHandler.new(
|
308
|
+
key_length: (encrypt[:Length] || 40).to_i,
|
309
|
+
revision: encrypt[:R],
|
310
|
+
owner_key: encrypt[:O],
|
311
|
+
user_key: encrypt[:U],
|
312
|
+
permissions: encrypt[:P].to_i,
|
313
|
+
encrypted_metadata: encmeta,
|
314
|
+
file_id: (deref(trailer[:ID]) || []).first,
|
315
|
+
password: opts[:password],
|
316
|
+
cfm: encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
317
|
+
)
|
318
|
+
elsif StandardSecurityHandlerV5.supports?(encrypt)
|
319
|
+
StandardSecurityHandlerV5.new(
|
320
|
+
O: encrypt[:O],
|
321
|
+
U: encrypt[:U],
|
322
|
+
OE: encrypt[:OE],
|
323
|
+
UE: encrypt[:UE],
|
324
|
+
password: opts[:password]
|
325
|
+
)
|
276
326
|
else
|
277
|
-
|
327
|
+
UnimplementedSecurityHandler.new
|
278
328
|
end
|
279
329
|
end
|
280
330
|
|
281
331
|
def decrypt(ref, obj)
|
282
|
-
return obj unless sec_handler?
|
283
|
-
|
284
332
|
case obj
|
285
333
|
when PDF::Reader::Stream then
|
286
|
-
|
334
|
+
# PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
|
335
|
+
# Therefore we shouldn't try to decrypt it.
|
336
|
+
obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
|
287
337
|
obj
|
288
338
|
when Hash then
|
289
339
|
arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
|
@@ -312,18 +362,22 @@ class PDF::Reader
|
|
312
362
|
# returns a nested array of object references for all pages in this object store.
|
313
363
|
#
|
314
364
|
def get_page_objects(ref)
|
315
|
-
obj =
|
365
|
+
obj = deref(ref)
|
366
|
+
|
367
|
+
unless obj.kind_of?(::Hash)
|
368
|
+
raise MalformedPDFError, "Dereferenced page object must be a dict"
|
369
|
+
end
|
316
370
|
|
317
371
|
if obj[:Type] == :Page
|
318
372
|
ref
|
319
|
-
elsif obj[:
|
373
|
+
elsif obj[:Kids]
|
320
374
|
deref(obj[:Kids]).map { |kid| get_page_objects(kid) }
|
321
375
|
end
|
322
376
|
end
|
323
377
|
|
324
378
|
def read_version
|
325
379
|
@io.seek(0)
|
326
|
-
|
380
|
+
_m, version = *@io.read(10).match(/PDF-(\d.\d)/)
|
327
381
|
@io.seek(0)
|
328
382
|
version.to_f
|
329
383
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
class PDF::Reader
|
5
|
+
# Small util class for detecting the orientation of a single PDF page. Accounts
|
6
|
+
# for any page rotation that is in place.
|
7
|
+
#
|
8
|
+
# OrientationDetector.new(:MediaBox => [0,0,612,792]).orientation
|
9
|
+
# => "portrait"
|
10
|
+
#
|
11
|
+
class OrientationDetector
|
12
|
+
def initialize(attributes)
|
13
|
+
@attributes = attributes
|
14
|
+
end
|
15
|
+
|
16
|
+
def orientation
|
17
|
+
@orientation ||= detect_orientation
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def detect_orientation
|
23
|
+
llx,lly,urx,ury = @attributes[:MediaBox]
|
24
|
+
rotation = @attributes[:Rotate].to_i
|
25
|
+
width = (urx.to_i - llx.to_i).abs
|
26
|
+
height = (ury.to_i - lly.to_i).abs
|
27
|
+
if width > height
|
28
|
+
(rotation % 180).zero? ? 'landscape' : 'portrait'
|
29
|
+
else
|
30
|
+
(rotation % 180).zero? ? 'portrait' : 'landscape'
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
class PDF::Reader
|
4
|
+
# remove duplicates from a collection of TextRun objects. This can be helpful when a PDF
|
5
|
+
# uses slightly offset overlapping characters to achieve a fake 'bold' effect.
|
6
|
+
class OverlappingRunsFilter
|
7
|
+
|
8
|
+
# This should be between 0 and 1. If TextRun B obscures this much of TextRun A (and they
|
9
|
+
# have identical characters) then one will be discarded
|
10
|
+
OVERLAPPING_THRESHOLD = 0.5
|
11
|
+
|
12
|
+
def self.exclude_redundant_runs(runs)
|
13
|
+
sweep_line_status = Array.new
|
14
|
+
event_point_schedule = Array.new
|
15
|
+
to_exclude = []
|
16
|
+
|
17
|
+
runs.each do |run|
|
18
|
+
event_point_schedule << EventPoint.new(run.x, run)
|
19
|
+
event_point_schedule << EventPoint.new(run.endx, run)
|
20
|
+
end
|
21
|
+
|
22
|
+
event_point_schedule.sort! { |a,b| a.x <=> b.x }
|
23
|
+
|
24
|
+
event_point_schedule.each do |event_point|
|
25
|
+
run = event_point.run
|
26
|
+
|
27
|
+
if event_point.start?
|
28
|
+
if detect_intersection(sweep_line_status, event_point)
|
29
|
+
to_exclude << run
|
30
|
+
end
|
31
|
+
sweep_line_status.push(run)
|
32
|
+
else
|
33
|
+
sweep_line_status.delete(run)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
runs - to_exclude
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.detect_intersection(sweep_line_status, event_point)
|
40
|
+
sweep_line_status.each do |open_text_run|
|
41
|
+
if event_point.x >= open_text_run.x &&
|
42
|
+
event_point.x <= open_text_run.endx &&
|
43
|
+
open_text_run.intersection_area_percent(event_point.run) >= OVERLAPPING_THRESHOLD
|
44
|
+
return true
|
45
|
+
end
|
46
|
+
end
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Utility class used to avoid modifying the underlying TextRun objects while we're
|
52
|
+
# looking for duplicates
|
53
|
+
class EventPoint
|
54
|
+
attr_reader :x, :run
|
55
|
+
|
56
|
+
def initialize x, run
|
57
|
+
@x, @run = x, run
|
58
|
+
end
|
59
|
+
|
60
|
+
def start?
|
61
|
+
@x == @run.x
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|