pdf-reader 1.1.1 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG +87 -2
- data/{README.rdoc → README.md} +43 -31
- data/Rakefile +21 -16
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_object +4 -1
- data/bin/pdf_text +1 -3
- data/examples/callbacks.rb +2 -1
- data/examples/extract_images.rb +11 -6
- data/examples/fuzzy_paragraphs.rb +24 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier.afm +342 -0
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -0
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
- data/lib/pdf/reader/buffer.rb +90 -63
- data/lib/pdf/reader/cid_widths.rb +63 -0
- data/lib/pdf/reader/cmap.rb +69 -38
- data/lib/pdf/reader/encoding.rb +74 -48
- data/lib/pdf/reader/error.rb +24 -4
- data/lib/pdf/reader/filter/ascii85.rb +28 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
- data/lib/pdf/reader/filter/depredict.rb +141 -0
- data/lib/pdf/reader/filter/flate.rb +53 -0
- data/lib/pdf/reader/filter/lzw.rb +21 -0
- data/lib/pdf/reader/filter/null.rb +18 -0
- data/lib/pdf/reader/filter/run_length.rb +45 -0
- data/lib/pdf/reader/filter.rb +15 -234
- data/lib/pdf/reader/font.rb +107 -43
- data/lib/pdf/reader/font_descriptor.rb +80 -0
- data/lib/pdf/reader/form_xobject.rb +26 -4
- data/lib/pdf/reader/glyph_hash.rb +56 -18
- data/lib/pdf/reader/lzw.rb +6 -4
- data/lib/pdf/reader/null_security_handler.rb +17 -0
- data/lib/pdf/reader/object_cache.rb +40 -16
- data/lib/pdf/reader/object_hash.rb +94 -40
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/orientation_detector.rb +34 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +48 -3
- data/lib/pdf/reader/page_layout.rb +125 -0
- data/lib/pdf/reader/page_state.rb +185 -70
- data/lib/pdf/reader/page_text_receiver.rb +70 -20
- data/lib/pdf/reader/pages_strategy.rb +4 -293
- data/lib/pdf/reader/parser.rb +37 -61
- data/lib/pdf/reader/print_receiver.rb +6 -0
- data/lib/pdf/reader/reference.rb +4 -1
- data/lib/pdf/reader/register_receiver.rb +17 -31
- data/lib/pdf/reader/resource_methods.rb +1 -0
- data/lib/pdf/reader/standard_security_handler.rb +82 -42
- data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
- data/lib/pdf/reader/stream.rb +5 -2
- data/lib/pdf/reader/synchronized_cache.rb +33 -0
- data/lib/pdf/reader/text_run.rb +99 -0
- data/lib/pdf/reader/token.rb +4 -1
- data/lib/pdf/reader/transformation_matrix.rb +195 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
- data/lib/pdf/reader/width_calculator/composite.rb +28 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
- data/lib/pdf/reader/width_calculator.rb +12 -0
- data/lib/pdf/reader/xref.rb +41 -9
- data/lib/pdf/reader.rb +45 -104
- data/lib/pdf-reader.rb +4 -1
- metadata +220 -101
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/hash.rb +0 -15
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -264
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module PDF
|
4
5
|
|
@@ -17,11 +18,12 @@ module PDF
|
|
17
18
|
#
|
18
19
|
class LZW # :nodoc:
|
19
20
|
|
21
|
+
# Wraps an LZW encoded string
|
20
22
|
class BitStream # :nodoc:
|
21
23
|
|
22
24
|
def initialize(data, bits_in_chunk)
|
23
25
|
@data = data
|
24
|
-
@data.force_encoding("BINARY")
|
26
|
+
@data.force_encoding("BINARY")
|
25
27
|
@bits_in_chunk = bits_in_chunk
|
26
28
|
@current_pos = 0
|
27
29
|
@bits_left_in_byte = 8
|
@@ -81,9 +83,10 @@ module PDF
|
|
81
83
|
#
|
82
84
|
def self.decode(data)
|
83
85
|
stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
|
84
|
-
result =
|
86
|
+
result = "".dup
|
85
87
|
until (code = stream.read) == CODE_EOD
|
86
88
|
if code == CODE_CLEAR_TABLE
|
89
|
+
stream.set_bits_in_chunk(9)
|
87
90
|
string_table = StringTable.new
|
88
91
|
code = stream.read
|
89
92
|
break if code == CODE_EOD
|
@@ -114,11 +117,10 @@ module PDF
|
|
114
117
|
result
|
115
118
|
end
|
116
119
|
|
117
|
-
private
|
118
|
-
|
119
120
|
def self.create_new_string(string_table,some_code, other_code)
|
120
121
|
string_table[some_code] + string_table[other_code][0].chr
|
121
122
|
end
|
123
|
+
private_class_method :create_new_string
|
122
124
|
|
123
125
|
end
|
124
126
|
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
class PDF::Reader
|
5
|
+
|
6
|
+
# A null object security handler. Used when a PDF is unencrypted.
|
7
|
+
class NullSecurityHandler
|
8
|
+
|
9
|
+
def self.supports?(encrypt)
|
10
|
+
encrypt.nil?
|
11
|
+
end
|
12
|
+
|
13
|
+
def decrypt(buf, _ref)
|
14
|
+
buf
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -1,10 +1,13 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'hashery/lru_hash'
|
2
5
|
|
3
6
|
class PDF::Reader
|
4
7
|
|
5
8
|
# A Hash-like object for caching commonly used objects from a PDF file.
|
6
9
|
#
|
7
|
-
# This is an internal class
|
10
|
+
# This is an internal class, no promises about a stable API.
|
8
11
|
#
|
9
12
|
class ObjectCache # nodoc
|
10
13
|
|
@@ -13,53 +16,67 @@ class PDF::Reader
|
|
13
16
|
# avoid lots of repetitive (and expensive) tokenising
|
14
17
|
CACHEABLE_TYPES = [:Catalog, :Page, :Pages]
|
15
18
|
|
16
|
-
|
19
|
+
attr_reader :hits, :misses
|
20
|
+
|
21
|
+
def initialize(lru_size = 1000)
|
17
22
|
@objects = {}
|
23
|
+
@lru_cache = Hashery::LRUHash.new(lru_size.to_i)
|
24
|
+
@hits = 0
|
25
|
+
@misses = 0
|
18
26
|
end
|
19
27
|
|
20
28
|
def [](key)
|
21
|
-
|
29
|
+
update_stats(key)
|
30
|
+
@objects[key] || @lru_cache[key]
|
22
31
|
end
|
23
32
|
|
24
33
|
def []=(key, value)
|
25
|
-
|
34
|
+
if cacheable?(value)
|
35
|
+
@objects[key] = value
|
36
|
+
else
|
37
|
+
@lru_cache[key] = value
|
38
|
+
end
|
26
39
|
end
|
27
40
|
|
28
41
|
def fetch(key, local_default = nil)
|
29
|
-
|
42
|
+
update_stats(key)
|
43
|
+
@objects[key] || @lru_cache.fetch(key, local_default)
|
30
44
|
end
|
31
45
|
|
32
46
|
def each(&block)
|
33
47
|
@objects.each(&block)
|
48
|
+
@lru_cache.each(&block)
|
34
49
|
end
|
35
50
|
alias :each_pair :each
|
36
51
|
|
37
52
|
def each_key(&block)
|
38
53
|
@objects.each_key(&block)
|
54
|
+
@lru_cache.each_key(&block)
|
39
55
|
end
|
40
56
|
|
41
57
|
def each_value(&block)
|
42
58
|
@objects.each_value(&block)
|
59
|
+
@lru_cache.each_value(&block)
|
43
60
|
end
|
44
61
|
|
45
62
|
def size
|
46
|
-
@objects.size
|
63
|
+
@objects.size + @lru_cache.size
|
47
64
|
end
|
48
65
|
alias :length :size
|
49
66
|
|
50
67
|
def empty?
|
51
|
-
@objects.empty?
|
68
|
+
@objects.empty? && @lru_cache.empty?
|
52
69
|
end
|
53
70
|
|
54
|
-
def
|
55
|
-
@objects.
|
71
|
+
def include?(key)
|
72
|
+
@objects.include?(key) || @lru_cache.include?(key)
|
56
73
|
end
|
57
|
-
alias :
|
58
|
-
alias :key? :
|
59
|
-
alias :member? :
|
74
|
+
alias :has_key? :include?
|
75
|
+
alias :key? :include?
|
76
|
+
alias :member? :include?
|
60
77
|
|
61
78
|
def has_value?(value)
|
62
|
-
@objects.has_value?(value)
|
79
|
+
@objects.has_value?(value) || @lru_cache.has_value?(value)
|
63
80
|
end
|
64
81
|
|
65
82
|
def to_s
|
@@ -67,19 +84,26 @@ class PDF::Reader
|
|
67
84
|
end
|
68
85
|
|
69
86
|
def keys
|
70
|
-
@objects.keys
|
87
|
+
@objects.keys + @lru_cache.keys
|
71
88
|
end
|
72
89
|
|
73
90
|
def values
|
74
|
-
@objects.values
|
91
|
+
@objects.values + @lru_cache.values
|
75
92
|
end
|
76
93
|
|
77
94
|
private
|
78
95
|
|
96
|
+
def update_stats(key)
|
97
|
+
if has_key?(key)
|
98
|
+
@hits += 1
|
99
|
+
else
|
100
|
+
@misses += 1
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
79
104
|
def cacheable?(obj)
|
80
105
|
obj.is_a?(Hash) && CACHEABLE_TYPES.include?(obj[:Type])
|
81
106
|
end
|
82
107
|
|
83
|
-
|
84
108
|
end
|
85
109
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
class PDF::Reader
|
4
5
|
# Provides low level access to the objects in a PDF file via a hash-like
|
@@ -41,10 +42,11 @@ class PDF::Reader
|
|
41
42
|
#
|
42
43
|
def initialize(input, opts = {})
|
43
44
|
@io = extract_io_from(input)
|
44
|
-
@pdf_version = read_version
|
45
45
|
@xref = PDF::Reader::XRef.new(@io)
|
46
|
+
@pdf_version = read_version
|
46
47
|
@trailer = @xref.trailer
|
47
|
-
@cache = PDF::Reader::ObjectCache.new
|
48
|
+
@cache = opts[:cache] || PDF::Reader::ObjectCache.new
|
49
|
+
@sec_handler = NullSecurityHandler.new
|
48
50
|
@sec_handler = build_security_handler(opts)
|
49
51
|
end
|
50
52
|
|
@@ -76,16 +78,7 @@ class PDF::Reader
|
|
76
78
|
key = PDF::Reader::Reference.new(key.to_i, 0)
|
77
79
|
end
|
78
80
|
|
79
|
-
|
80
|
-
@cache[key]
|
81
|
-
elsif xref[key].is_a?(Fixnum)
|
82
|
-
buf = new_buffer(xref[key])
|
83
|
-
@cache[key] = decrypt(key, Parser.new(buf, self).object(key.id, key.gen))
|
84
|
-
elsif xref[key].is_a?(PDF::Reader::Reference)
|
85
|
-
container_key = xref[key]
|
86
|
-
object_streams[container_key] ||= PDF::Reader::ObjectStream.new(object(container_key))
|
87
|
-
@cache[key] = object_streams[container_key][key.id]
|
88
|
-
end
|
81
|
+
@cache[key] ||= fetch_object(key) || fetch_object_stream(key)
|
89
82
|
rescue InvalidObjectError
|
90
83
|
return default
|
91
84
|
end
|
@@ -102,21 +95,7 @@ class PDF::Reader
|
|
102
95
|
# a PDF::Reader::Reference, the key is returned unchanged.
|
103
96
|
#
|
104
97
|
def deref!(key)
|
105
|
-
|
106
|
-
when Hash
|
107
|
-
{}.tap { |hash|
|
108
|
-
object.each do |k, value|
|
109
|
-
hash[k] = deref!(value)
|
110
|
-
end
|
111
|
-
}
|
112
|
-
when PDF::Reader::Stream
|
113
|
-
object.hash = deref!(object.hash)
|
114
|
-
object
|
115
|
-
when Array
|
116
|
-
object.map { |value| deref!(value) }
|
117
|
-
else
|
118
|
-
object
|
119
|
-
end
|
98
|
+
deref_internal!(key, {})
|
120
99
|
end
|
121
100
|
|
122
101
|
# Access an object from the PDF. key can be an int or a PDF::Reader::Reference
|
@@ -266,24 +245,95 @@ class PDF::Reader
|
|
266
245
|
|
267
246
|
private
|
268
247
|
|
269
|
-
|
270
|
-
|
248
|
+
# parse a traditional object from the PDF, starting from the byte offset indicated
|
249
|
+
# in the xref table
|
250
|
+
#
|
251
|
+
def fetch_object(key)
|
252
|
+
if xref[key].is_a?(Integer)
|
253
|
+
buf = new_buffer(xref[key])
|
254
|
+
decrypt(key, Parser.new(buf, self).object(key.id, key.gen))
|
255
|
+
end
|
256
|
+
end
|
257
|
+
|
258
|
+
# parse a object that's embedded in an object stream in the PDF
|
259
|
+
#
|
260
|
+
def fetch_object_stream(key)
|
261
|
+
if xref[key].is_a?(PDF::Reader::Reference)
|
262
|
+
container_key = xref[key]
|
263
|
+
object_streams[container_key] ||= PDF::Reader::ObjectStream.new(object(container_key))
|
264
|
+
object_streams[container_key][key.id]
|
265
|
+
end
|
266
|
+
end
|
267
|
+
|
268
|
+
# Private implementation of deref!, which exists to ensure the `seen` argument
|
269
|
+
# isn't publicly available. It's used to avoid endless loops in the recursion, and
|
270
|
+
# doesn't need to be part of the public API.
|
271
|
+
#
|
272
|
+
def deref_internal!(key, seen)
|
273
|
+
seen_key = key.is_a?(PDF::Reader::Reference) ? key : key.object_id
|
274
|
+
|
275
|
+
return seen[seen_key] if seen.key?(seen_key)
|
276
|
+
|
277
|
+
case object = deref(key)
|
278
|
+
when Hash
|
279
|
+
seen[seen_key] ||= {}
|
280
|
+
object.each do |k, value|
|
281
|
+
seen[seen_key][k] = deref_internal!(value, seen)
|
282
|
+
end
|
283
|
+
seen[seen_key]
|
284
|
+
when PDF::Reader::Stream
|
285
|
+
seen[seen_key] ||= PDF::Reader::Stream.new({}, object.data)
|
286
|
+
object.hash.each do |k,value|
|
287
|
+
seen[seen_key].hash[k] = deref_internal!(value, seen)
|
288
|
+
end
|
289
|
+
seen[seen_key]
|
290
|
+
when Array
|
291
|
+
seen[seen_key] ||= []
|
292
|
+
object.each do |value|
|
293
|
+
seen[seen_key] << deref_internal!(value, seen)
|
294
|
+
end
|
295
|
+
seen[seen_key]
|
296
|
+
else
|
297
|
+
object
|
298
|
+
end
|
299
|
+
end
|
271
300
|
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
301
|
+
def build_security_handler(opts = {})
|
302
|
+
encrypt = deref(trailer[:Encrypt])
|
303
|
+
if NullSecurityHandler.supports?(encrypt)
|
304
|
+
NullSecurityHandler.new
|
305
|
+
elsif StandardSecurityHandler.supports?(encrypt)
|
306
|
+
encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
|
307
|
+
StandardSecurityHandler.new(
|
308
|
+
key_length: (encrypt[:Length] || 40).to_i,
|
309
|
+
revision: encrypt[:R],
|
310
|
+
owner_key: encrypt[:O],
|
311
|
+
user_key: encrypt[:U],
|
312
|
+
permissions: encrypt[:P].to_i,
|
313
|
+
encrypted_metadata: encmeta,
|
314
|
+
file_id: (deref(trailer[:ID]) || []).first,
|
315
|
+
password: opts[:password],
|
316
|
+
cfm: encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
317
|
+
)
|
318
|
+
elsif StandardSecurityHandlerV5.supports?(encrypt)
|
319
|
+
StandardSecurityHandlerV5.new(
|
320
|
+
O: encrypt[:O],
|
321
|
+
U: encrypt[:U],
|
322
|
+
OE: encrypt[:OE],
|
323
|
+
UE: encrypt[:UE],
|
324
|
+
password: opts[:password]
|
325
|
+
)
|
276
326
|
else
|
277
|
-
|
327
|
+
UnimplementedSecurityHandler.new
|
278
328
|
end
|
279
329
|
end
|
280
330
|
|
281
331
|
def decrypt(ref, obj)
|
282
|
-
return obj unless sec_handler?
|
283
|
-
|
284
332
|
case obj
|
285
333
|
when PDF::Reader::Stream then
|
286
|
-
|
334
|
+
# PDF 32000-1:2008 7.5.8.2: "The cross-reference stream shall not be encrypted [...]."
|
335
|
+
# Therefore we shouldn't try to decrypt it.
|
336
|
+
obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
|
287
337
|
obj
|
288
338
|
when Hash then
|
289
339
|
arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
|
@@ -312,18 +362,22 @@ class PDF::Reader
|
|
312
362
|
# returns a nested array of object references for all pages in this object store.
|
313
363
|
#
|
314
364
|
def get_page_objects(ref)
|
315
|
-
obj =
|
365
|
+
obj = deref(ref)
|
366
|
+
|
367
|
+
unless obj.kind_of?(::Hash)
|
368
|
+
raise MalformedPDFError, "Dereferenced page object must be a dict"
|
369
|
+
end
|
316
370
|
|
317
371
|
if obj[:Type] == :Page
|
318
372
|
ref
|
319
|
-
elsif obj[:
|
373
|
+
elsif obj[:Kids]
|
320
374
|
deref(obj[:Kids]).map { |kid| get_page_objects(kid) }
|
321
375
|
end
|
322
376
|
end
|
323
377
|
|
324
378
|
def read_version
|
325
379
|
@io.seek(0)
|
326
|
-
|
380
|
+
_m, version = *@io.read(10).match(/PDF-(\d.\d)/)
|
327
381
|
@io.seek(0)
|
328
382
|
version.to_f
|
329
383
|
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
class PDF::Reader
|
5
|
+
# Small util class for detecting the orientation of a single PDF page. Accounts
|
6
|
+
# for any page rotation that is in place.
|
7
|
+
#
|
8
|
+
# OrientationDetector.new(:MediaBox => [0,0,612,792]).orientation
|
9
|
+
# => "portrait"
|
10
|
+
#
|
11
|
+
class OrientationDetector
|
12
|
+
def initialize(attributes)
|
13
|
+
@attributes = attributes
|
14
|
+
end
|
15
|
+
|
16
|
+
def orientation
|
17
|
+
@orientation ||= detect_orientation
|
18
|
+
end
|
19
|
+
|
20
|
+
private
|
21
|
+
|
22
|
+
def detect_orientation
|
23
|
+
llx,lly,urx,ury = @attributes[:MediaBox]
|
24
|
+
rotation = @attributes[:Rotate].to_i
|
25
|
+
width = (urx.to_i - llx.to_i).abs
|
26
|
+
height = (ury.to_i - lly.to_i).abs
|
27
|
+
if width > height
|
28
|
+
(rotation % 180).zero? ? 'landscape' : 'portrait'
|
29
|
+
else
|
30
|
+
(rotation % 180).zero? ? 'portrait' : 'landscape'
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
class PDF::Reader
|
4
|
+
# remove duplicates from a collection of TextRun objects. This can be helpful when a PDF
|
5
|
+
# uses slightly offset overlapping characters to achieve a fake 'bold' effect.
|
6
|
+
class OverlappingRunsFilter
|
7
|
+
|
8
|
+
# This should be between 0 and 1. If TextRun B obscures this much of TextRun A (and they
|
9
|
+
# have identical characters) then one will be discarded
|
10
|
+
OVERLAPPING_THRESHOLD = 0.5
|
11
|
+
|
12
|
+
def self.exclude_redundant_runs(runs)
|
13
|
+
sweep_line_status = Array.new
|
14
|
+
event_point_schedule = Array.new
|
15
|
+
to_exclude = []
|
16
|
+
|
17
|
+
runs.each do |run|
|
18
|
+
event_point_schedule << EventPoint.new(run.x, run)
|
19
|
+
event_point_schedule << EventPoint.new(run.endx, run)
|
20
|
+
end
|
21
|
+
|
22
|
+
event_point_schedule.sort! { |a,b| a.x <=> b.x }
|
23
|
+
|
24
|
+
event_point_schedule.each do |event_point|
|
25
|
+
run = event_point.run
|
26
|
+
|
27
|
+
if event_point.start?
|
28
|
+
if detect_intersection(sweep_line_status, event_point)
|
29
|
+
to_exclude << run
|
30
|
+
end
|
31
|
+
sweep_line_status.push(run)
|
32
|
+
else
|
33
|
+
sweep_line_status.delete(run)
|
34
|
+
end
|
35
|
+
end
|
36
|
+
runs - to_exclude
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.detect_intersection(sweep_line_status, event_point)
|
40
|
+
sweep_line_status.each do |open_text_run|
|
41
|
+
if event_point.x >= open_text_run.x &&
|
42
|
+
event_point.x <= open_text_run.endx &&
|
43
|
+
open_text_run.intersection_area_percent(event_point.run) >= OVERLAPPING_THRESHOLD
|
44
|
+
return true
|
45
|
+
end
|
46
|
+
end
|
47
|
+
return false
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Utility class used to avoid modifying the underlying TextRun objects while we're
|
52
|
+
# looking for duplicates
|
53
|
+
class EventPoint
|
54
|
+
attr_reader :x, :run
|
55
|
+
|
56
|
+
def initialize x, run
|
57
|
+
@x, @run = x, run
|
58
|
+
end
|
59
|
+
|
60
|
+
def start?
|
61
|
+
@x == @run.x
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|