pdf-reader 2.14.1 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +15 -0
- data/lib/pdf/reader/advanced_text_run_filter.rb +17 -2
- data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
- data/lib/pdf/reader/buffer.rb +35 -17
- data/lib/pdf/reader/cid_widths.rb +7 -1
- data/lib/pdf/reader/cmap.rb +14 -3
- data/lib/pdf/reader/encoding.rb +37 -12
- data/lib/pdf/reader/error.rb +6 -0
- data/lib/pdf/reader/filter/ascii85.rb +2 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
- data/lib/pdf/reader/filter/depredict.rb +4 -0
- data/lib/pdf/reader/filter/flate.rb +5 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +2 -0
- data/lib/pdf/reader/filter/run_length.rb +2 -0
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/font.rb +90 -22
- data/lib/pdf/reader/font_descriptor.rb +76 -23
- data/lib/pdf/reader/form_xobject.rb +11 -0
- data/lib/pdf/reader/glyph_hash.rb +34 -9
- data/lib/pdf/reader/key_builder_v5.rb +17 -9
- data/lib/pdf/reader/lzw.rb +17 -6
- data/lib/pdf/reader/no_text_filter.rb +1 -0
- data/lib/pdf/reader/null_security_handler.rb +1 -0
- data/lib/pdf/reader/object_cache.rb +7 -2
- data/lib/pdf/reader/object_hash.rb +116 -9
- data/lib/pdf/reader/object_stream.rb +19 -2
- data/lib/pdf/reader/overlapping_runs_filter.rb +7 -1
- data/lib/pdf/reader/page.rb +41 -7
- data/lib/pdf/reader/page_layout.rb +25 -8
- data/lib/pdf/reader/page_state.rb +5 -2
- data/lib/pdf/reader/page_text_receiver.rb +6 -2
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/parser.rb +51 -10
- data/lib/pdf/reader/point.rb +9 -2
- data/lib/pdf/reader/print_receiver.rb +2 -6
- data/lib/pdf/reader/rc4_security_handler.rb +2 -0
- data/lib/pdf/reader/rectangle.rb +24 -1
- data/lib/pdf/reader/reference.rb +10 -1
- data/lib/pdf/reader/register_receiver.rb +15 -2
- data/lib/pdf/reader/resources.rb +9 -0
- data/lib/pdf/reader/security_handler_factory.rb +13 -0
- data/lib/pdf/reader/standard_key_builder.rb +37 -23
- data/lib/pdf/reader/stream.rb +9 -3
- data/lib/pdf/reader/synchronized_cache.rb +5 -2
- data/lib/pdf/reader/text_run.rb +28 -1
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +33 -2
- data/lib/pdf/reader/type_check.rb +10 -3
- data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
- data/lib/pdf/reader/validating_receiver.rb +29 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +10 -3
- data/lib/pdf/reader/width_calculator/composite.rb +5 -1
- data/lib/pdf/reader/width_calculator/true_type.rb +5 -1
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +3 -1
- data/lib/pdf/reader/width_calculator/type_zero.rb +2 -0
- data/lib/pdf/reader/xref.rb +28 -7
- data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
- data/lib/pdf/reader.rb +18 -2
- data/rbi/pdf-reader.rbi +1502 -1594
- metadata +17 -11
@@ -32,8 +32,24 @@ class PDF::Reader
|
|
32
32
|
class ObjectHash
|
33
33
|
include Enumerable
|
34
34
|
|
35
|
+
#: type securityHandler = (
|
36
|
+
#| PDF::Reader::NullSecurityHandler |
|
37
|
+
#| PDF::Reader::AesV2SecurityHandler |
|
38
|
+
#| PDF::Reader::Rc4SecurityHandler |
|
39
|
+
#| PDF::Reader::AesV3SecurityHandler |
|
40
|
+
#| PDF::Reader::UnimplementedSecurityHandler
|
41
|
+
#| )
|
42
|
+
|
43
|
+
#: untyped
|
35
44
|
attr_accessor :default
|
36
|
-
|
45
|
+
|
46
|
+
#: Hash[Symbol, untyped]
|
47
|
+
attr_reader :trailer
|
48
|
+
|
49
|
+
#: Float
|
50
|
+
attr_reader :pdf_version
|
51
|
+
|
52
|
+
#: securityHandler
|
37
53
|
attr_reader :sec_handler
|
38
54
|
|
39
55
|
# Creates a new ObjectHash object. Input can be a string with a valid filename
|
@@ -43,21 +59,25 @@ class PDF::Reader
|
|
43
59
|
#
|
44
60
|
# :password - the user password to decrypt the source PDF
|
45
61
|
#
|
62
|
+
#: ((IO | Tempfile | StringIO | String), ?Hash[Symbol, untyped]) -> void
|
46
63
|
def initialize(input, opts = {})
|
47
|
-
@io = extract_io_from(input)
|
48
|
-
@xref = PDF::Reader::XRef.new(@io)
|
49
|
-
@pdf_version = read_version
|
50
|
-
@trailer = @xref.trailer
|
51
|
-
@cache = opts[:cache] || PDF::Reader::ObjectCache.new
|
52
|
-
@sec_handler = NullSecurityHandler.new
|
64
|
+
@io = extract_io_from(input) #: IO | Tempfile | StringIO
|
65
|
+
@xref = PDF::Reader::XRef.new(@io) #: PDF::Reader::XRef[PDF::Reader::Reference]
|
66
|
+
@pdf_version = read_version #: Float
|
67
|
+
@trailer = @xref.trailer #: Hash[Symbol, untyped]
|
68
|
+
@cache = opts[:cache] || PDF::Reader::ObjectCache.new #: PDF::Reader::ObjectCache
|
69
|
+
@sec_handler = NullSecurityHandler.new #: securityHandler
|
53
70
|
@sec_handler = SecurityHandlerFactory.build(
|
54
71
|
deref(trailer[:Encrypt]),
|
55
72
|
deref(trailer[:ID]),
|
56
73
|
opts[:password]
|
57
74
|
)
|
75
|
+
@page_references = nil #: Array[PDF::Reader::Reference | Hash[Symbol, untyped]]?
|
76
|
+
@object_streams = nil #: Hash[PDF::Reader::Reference, PDF::Reader::ObjectStream]?
|
58
77
|
end
|
59
78
|
|
60
79
|
# returns the type of object a ref points to
|
80
|
+
#: ((Integer | PDF::Reader::Reference)) -> Symbol?
|
61
81
|
def obj_type(ref)
|
62
82
|
self[ref].class.to_s.to_sym
|
63
83
|
rescue
|
@@ -65,6 +85,7 @@ class PDF::Reader
|
|
65
85
|
end
|
66
86
|
|
67
87
|
# returns true if the supplied references points to an object with a stream
|
88
|
+
#: ((Integer | PDF::Reader::Reference)) -> bool
|
68
89
|
def stream?(ref)
|
69
90
|
self.has_key?(ref) && self[ref].is_a?(PDF::Reader::Stream)
|
70
91
|
end
|
@@ -78,6 +99,7 @@ class PDF::Reader
|
|
78
99
|
# If a PDF::Reader::Reference object is used the exact ID and generation number
|
79
100
|
# can be specified.
|
80
101
|
#
|
102
|
+
#: ((Integer | PDF::Reader::Reference)) -> untyped
|
81
103
|
def [](key)
|
82
104
|
return default if key.to_i <= 0
|
83
105
|
|
@@ -93,6 +115,7 @@ class PDF::Reader
|
|
93
115
|
# If key is a PDF::Reader::Reference object, lookup the corresponding
|
94
116
|
# object in the PDF and return it. Otherwise return key untouched.
|
95
117
|
#
|
118
|
+
#: (untyped) -> untyped
|
96
119
|
def object(key)
|
97
120
|
key.is_a?(PDF::Reader::Reference) ? self[key] : key
|
98
121
|
end
|
@@ -104,6 +127,7 @@ class PDF::Reader
|
|
104
127
|
# Guaranteed to only return an Array or nil. If the dereference results in
|
105
128
|
# any other type then a MalformedPDFError exception will raise. Useful when
|
106
129
|
# expecting an Array and no other type will do.
|
130
|
+
#: (untyped) -> Array[untyped]?
|
107
131
|
def deref_array(key)
|
108
132
|
obj = deref(key)
|
109
133
|
|
@@ -122,6 +146,7 @@ class PDF::Reader
|
|
122
146
|
# expecting an Array and no other type will do.
|
123
147
|
#
|
124
148
|
# Some effort to cast array elements to a number is made for any non-numeric elements.
|
149
|
+
#: (untyped) -> Array[Numeric]?
|
125
150
|
def deref_array_of_numbers(key)
|
126
151
|
arr = deref(key)
|
127
152
|
|
@@ -148,6 +173,7 @@ class PDF::Reader
|
|
148
173
|
# Guaranteed to only return a Hash or nil. If the dereference results in
|
149
174
|
# any other type then a MalformedPDFError exception will raise. Useful when
|
150
175
|
# expecting an Array and no other type will do.
|
176
|
+
#: (untyped) -> Hash[Symbol, untyped]?
|
151
177
|
def deref_hash(key)
|
152
178
|
obj = deref(key)
|
153
179
|
|
@@ -166,6 +192,7 @@ class PDF::Reader
|
|
166
192
|
# expecting an Array and no other type will do.
|
167
193
|
#
|
168
194
|
# Some effort to cast to a symbol is made when the reference points to a non-symbol.
|
195
|
+
#: (untyped) -> Symbol?
|
169
196
|
def deref_name(key)
|
170
197
|
obj = deref(key)
|
171
198
|
|
@@ -190,6 +217,7 @@ class PDF::Reader
|
|
190
217
|
# expecting an Array and no other type will do.
|
191
218
|
#
|
192
219
|
# Some effort to cast to an int is made when the reference points to a non-integer.
|
220
|
+
#: (untyped) -> Integer?
|
193
221
|
def deref_integer(key)
|
194
222
|
obj = deref(key)
|
195
223
|
|
@@ -214,6 +242,7 @@ class PDF::Reader
|
|
214
242
|
# expecting an Array and no other type will do.
|
215
243
|
#
|
216
244
|
# Some effort to cast to a number is made when the reference points to a non-number.
|
245
|
+
#: (untyped) -> Numeric?
|
217
246
|
def deref_number(key)
|
218
247
|
obj = deref(key)
|
219
248
|
|
@@ -238,6 +267,7 @@ class PDF::Reader
|
|
238
267
|
# Guaranteed to only return a PDF::Reader::Stream or nil. If the dereference results in
|
239
268
|
# any other type then a MalformedPDFError exception will raise. Useful when
|
240
269
|
# expecting a stream and no other type will do.
|
270
|
+
#: (untyped) -> PDF::Reader::Stream?
|
241
271
|
def deref_stream(key)
|
242
272
|
obj = deref(key)
|
243
273
|
|
@@ -258,6 +288,7 @@ class PDF::Reader
|
|
258
288
|
# expecting a string and no other type will do.
|
259
289
|
#
|
260
290
|
# Some effort to cast to a string is made when the reference points to a non-string.
|
291
|
+
#: (untyped) -> String?
|
261
292
|
def deref_string(key)
|
262
293
|
obj = deref(key)
|
263
294
|
|
@@ -280,6 +311,7 @@ class PDF::Reader
|
|
280
311
|
# Guaranteed to only return a PDF Name (symbol), Array or nil. If the dereference results in
|
281
312
|
# any other type then a MalformedPDFError exception will raise. Useful when
|
282
313
|
# expecting a Name or Array and no other type will do.
|
314
|
+
#: (untyped) -> (Symbol | Array[untyped] | nil)
|
283
315
|
def deref_name_or_array(key)
|
284
316
|
obj = deref(key)
|
285
317
|
|
@@ -298,6 +330,7 @@ class PDF::Reader
|
|
298
330
|
# Guaranteed to only return a PDF::Reader::Stream, Array or nil. If the dereference results in
|
299
331
|
# any other type then a MalformedPDFError exception will raise. Useful when
|
300
332
|
# expecting a stream or Array and no other type will do.
|
333
|
+
#: (untyped) -> (PDF::Reader::Stream | Array[untyped] | nil)
|
301
334
|
def deref_stream_or_array(key)
|
302
335
|
obj = deref(key)
|
303
336
|
|
@@ -313,10 +346,12 @@ class PDF::Reader
|
|
313
346
|
# Recursively dereferences the object refered to be +key+. If +key+ is not
|
314
347
|
# a PDF::Reader::Reference, the key is returned unchanged.
|
315
348
|
#
|
349
|
+
#: (untyped) -> untyped
|
316
350
|
def deref!(key)
|
317
351
|
deref_internal!(key, {})
|
318
352
|
end
|
319
353
|
|
354
|
+
#: (untyped) -> Array[untyped]?
|
320
355
|
def deref_array!(key)
|
321
356
|
deref!(key).tap { |obj|
|
322
357
|
if !obj.nil? && !obj.is_a?(Array)
|
@@ -325,6 +360,7 @@ class PDF::Reader
|
|
325
360
|
}
|
326
361
|
end
|
327
362
|
|
363
|
+
#: (untyped) -> Hash[Symbol, untyped]?
|
328
364
|
def deref_hash!(key)
|
329
365
|
deref!(key).tap { |obj|
|
330
366
|
if !obj.nil? && !obj.is_a?(Hash)
|
@@ -345,6 +381,7 @@ class PDF::Reader
|
|
345
381
|
# local_default is the object that will be returned if the requested key doesn't
|
346
382
|
# exist.
|
347
383
|
#
|
384
|
+
#: (untyped, ?untyped) -> untyped
|
348
385
|
def fetch(key, local_default = nil)
|
349
386
|
obj = self[key]
|
350
387
|
if obj
|
@@ -358,6 +395,8 @@ class PDF::Reader
|
|
358
395
|
|
359
396
|
# iterate over each key, value. Just like a ruby hash.
|
360
397
|
#
|
398
|
+
# @override(allow_incompatible: true)
|
399
|
+
#: () { (PDF::Reader::Reference, untyped) -> untyped } -> untyped
|
361
400
|
def each(&block)
|
362
401
|
@xref.each do |ref|
|
363
402
|
yield ref, self[ref]
|
@@ -367,6 +406,7 @@ class PDF::Reader
|
|
367
406
|
|
368
407
|
# iterate over each key. Just like a ruby hash.
|
369
408
|
#
|
409
|
+
#: { (PDF::Reader::Reference) -> untyped } -> untyped
|
370
410
|
def each_key(&block)
|
371
411
|
each do |id, obj|
|
372
412
|
yield id
|
@@ -375,6 +415,7 @@ class PDF::Reader
|
|
375
415
|
|
376
416
|
# iterate over each value. Just like a ruby hash.
|
377
417
|
#
|
418
|
+
#: { (untyped) -> untyped } -> untyped
|
378
419
|
def each_value(&block)
|
379
420
|
each do |id, obj|
|
380
421
|
yield obj
|
@@ -383,6 +424,7 @@ class PDF::Reader
|
|
383
424
|
|
384
425
|
# return the number of objects in the file. An object with multiple generations
|
385
426
|
# is counted once.
|
427
|
+
#: () -> Integer
|
386
428
|
def size
|
387
429
|
xref.size
|
388
430
|
end
|
@@ -390,6 +432,7 @@ class PDF::Reader
|
|
390
432
|
|
391
433
|
# return true if there are no objects in this file
|
392
434
|
#
|
435
|
+
#: () -> bool
|
393
436
|
def empty?
|
394
437
|
size == 0 ? true : false
|
395
438
|
end
|
@@ -397,6 +440,7 @@ class PDF::Reader
|
|
397
440
|
# return true if the specified key exists in the file. key
|
398
441
|
# can be an int or a PDF::Reader::Reference
|
399
442
|
#
|
443
|
+
#: (untyped) -> bool
|
400
444
|
def has_key?(check_key)
|
401
445
|
# TODO update from O(n) to O(1)
|
402
446
|
each_key do |key|
|
@@ -414,6 +458,7 @@ class PDF::Reader
|
|
414
458
|
|
415
459
|
# return true if the specifiedvalue exists in the file
|
416
460
|
#
|
461
|
+
#: (untyped) -> bool
|
417
462
|
def has_value?(value)
|
418
463
|
# TODO update from O(n) to O(1)
|
419
464
|
each_value do |obj|
|
@@ -423,12 +468,14 @@ class PDF::Reader
|
|
423
468
|
end
|
424
469
|
alias :value? :has_key?
|
425
470
|
|
471
|
+
#: () -> String
|
426
472
|
def to_s
|
427
473
|
"<PDF::Reader::ObjectHash size: #{self.size}>"
|
428
474
|
end
|
429
475
|
|
430
476
|
# return an array of all keys in the file
|
431
477
|
#
|
478
|
+
#: () -> Array[PDF::Reader::Reference]
|
432
479
|
def keys
|
433
480
|
ret = []
|
434
481
|
each_key { |k| ret << k }
|
@@ -437,6 +484,7 @@ class PDF::Reader
|
|
437
484
|
|
438
485
|
# return an array of all values in the file
|
439
486
|
#
|
487
|
+
#: () -> untyped
|
440
488
|
def values
|
441
489
|
ret = []
|
442
490
|
each_value { |v| ret << v }
|
@@ -445,12 +493,14 @@ class PDF::Reader
|
|
445
493
|
|
446
494
|
# return an array of all values from the specified keys
|
447
495
|
#
|
496
|
+
#: (*untyped) -> untyped
|
448
497
|
def values_at(*ids)
|
449
498
|
ids.map { |id| self[id] }
|
450
499
|
end
|
451
500
|
|
452
501
|
# return an array of arrays. Each sub array contains a key/value pair.
|
453
502
|
#
|
503
|
+
#: () -> untyped
|
454
504
|
def to_a
|
455
505
|
ret = []
|
456
506
|
each do |id, obj|
|
@@ -465,6 +515,7 @@ class PDF::Reader
|
|
465
515
|
#
|
466
516
|
# Useful for apps that want to extract data from specific pages.
|
467
517
|
#
|
518
|
+
#: () -> Array[PDF::Reader::Reference | Hash[Symbol, untyped]]
|
468
519
|
def page_references
|
469
520
|
root = fetch(trailer[:Root])
|
470
521
|
@page_references ||= begin
|
@@ -473,10 +524,12 @@ class PDF::Reader
|
|
473
524
|
end
|
474
525
|
end
|
475
526
|
|
527
|
+
#: () -> bool
|
476
528
|
def encrypted?
|
477
529
|
trailer.has_key?(:Encrypt)
|
478
530
|
end
|
479
531
|
|
532
|
+
#: () -> bool
|
480
533
|
def sec_handler?
|
481
534
|
!!sec_handler
|
482
535
|
end
|
@@ -486,6 +539,17 @@ class PDF::Reader
|
|
486
539
|
# parse a traditional object from the PDF, starting from the byte offset indicated
|
487
540
|
# in the xref table
|
488
541
|
#
|
542
|
+
#: (PDF::Reader::Reference) -> (
|
543
|
+
#| PDF::Reader::Reference |
|
544
|
+
#| PDF::Reader::Token |
|
545
|
+
#| PDF::Reader::Stream |
|
546
|
+
#| Numeric |
|
547
|
+
#| String |
|
548
|
+
#| Symbol |
|
549
|
+
#| Array[untyped] |
|
550
|
+
#| Hash[untyped, untyped] |
|
551
|
+
#| nil
|
552
|
+
#| )
|
489
553
|
def fetch_object(key)
|
490
554
|
if xref[key].is_a?(Integer)
|
491
555
|
buf = new_buffer(xref[key])
|
@@ -495,13 +559,25 @@ class PDF::Reader
|
|
495
559
|
|
496
560
|
# parse a object that's embedded in an object stream in the PDF
|
497
561
|
#
|
562
|
+
#: (PDF::Reader::Reference) -> (
|
563
|
+
#| PDF::Reader::Reference |
|
564
|
+
#| PDF::Reader::Token |
|
565
|
+
#| PDF::Reader::Stream |
|
566
|
+
#| Numeric |
|
567
|
+
#| String |
|
568
|
+
#| Symbol |
|
569
|
+
#| Array[untyped] |
|
570
|
+
#| Hash[untyped, untyped] |
|
571
|
+
#| nil
|
572
|
+
#| )
|
498
573
|
def fetch_object_stream(key)
|
499
574
|
if xref[key].is_a?(PDF::Reader::Reference)
|
500
575
|
container_key = xref[key]
|
501
576
|
stream = deref_stream(container_key)
|
502
577
|
raise MalformedPDFError, "Object Stream cannot be nil" if stream.nil?
|
503
|
-
object_streams[container_key] ||= PDF::Reader::ObjectStream.new(stream)
|
504
|
-
|
578
|
+
if objstream = object_streams[container_key] ||= PDF::Reader::ObjectStream.new(stream)
|
579
|
+
objstream[key.id]
|
580
|
+
end
|
505
581
|
end
|
506
582
|
end
|
507
583
|
|
@@ -509,6 +585,17 @@ class PDF::Reader
|
|
509
585
|
# isn't publicly available. It's used to avoid endless loops in the recursion, and
|
510
586
|
# doesn't need to be part of the public API.
|
511
587
|
#
|
588
|
+
#: (untyped, Hash[Integer, untyped]) -> (
|
589
|
+
#| PDF::Reader::Reference |
|
590
|
+
#| PDF::Reader::Token |
|
591
|
+
#| PDF::Reader::Stream |
|
592
|
+
#| Numeric |
|
593
|
+
#| String |
|
594
|
+
#| Symbol |
|
595
|
+
#| Array[untyped] |
|
596
|
+
#| Hash[untyped, untyped] |
|
597
|
+
#| nil
|
598
|
+
#| )
|
512
599
|
def deref_internal!(key, seen)
|
513
600
|
seen_key = key.is_a?(PDF::Reader::Reference) ? key : key.object_id
|
514
601
|
|
@@ -538,6 +625,17 @@ class PDF::Reader
|
|
538
625
|
end
|
539
626
|
end
|
540
627
|
|
628
|
+
#: (PDF::Reader::Reference, untyped) -> (
|
629
|
+
#| PDF::Reader::Reference |
|
630
|
+
#| PDF::Reader::Token |
|
631
|
+
#| PDF::Reader::Stream |
|
632
|
+
#| Numeric |
|
633
|
+
#| String |
|
634
|
+
#| Symbol |
|
635
|
+
#| Array[untyped] |
|
636
|
+
#| Hash[untyped, untyped] |
|
637
|
+
#| nil
|
638
|
+
#| )
|
541
639
|
def decrypt(ref, obj)
|
542
640
|
case obj
|
543
641
|
when PDF::Reader::Stream then
|
@@ -559,14 +657,17 @@ class PDF::Reader
|
|
559
657
|
end
|
560
658
|
end
|
561
659
|
|
660
|
+
#: (?Integer) -> PDF::Reader::Buffer
|
562
661
|
def new_buffer(offset = 0)
|
563
662
|
PDF::Reader::Buffer.new(@io, :seek => offset)
|
564
663
|
end
|
565
664
|
|
665
|
+
#: () -> PDF::Reader::XRef[PDF::Reader::Reference]
|
566
666
|
def xref
|
567
667
|
@xref
|
568
668
|
end
|
569
669
|
|
670
|
+
#: () -> Hash[PDF::Reader::Reference, PDF::Reader::ObjectStream]
|
570
671
|
def object_streams
|
571
672
|
@object_streams ||= {}
|
572
673
|
end
|
@@ -574,6 +675,9 @@ class PDF::Reader
|
|
574
675
|
# returns an array of object references for all pages in this object store. The ordering of
|
575
676
|
# the Array is significant and matches the page ordering of the document
|
576
677
|
#
|
678
|
+
#: (PDF::Reader::Reference | Hash[Symbol, untyped]) -> (
|
679
|
+
#| Array[PDF::Reader::Reference | Hash[Symbol, untyped] ]
|
680
|
+
#| )
|
577
681
|
def get_page_objects(obj)
|
578
682
|
derefed_obj = deref_hash(obj)
|
579
683
|
|
@@ -591,6 +695,7 @@ class PDF::Reader
|
|
591
695
|
end
|
592
696
|
end
|
593
697
|
|
698
|
+
#: () -> Float
|
594
699
|
def read_version
|
595
700
|
@io.seek(0)
|
596
701
|
_m, version = *@io.read(10).to_s.match(/PDF-(\d.\d)/)
|
@@ -598,6 +703,7 @@ class PDF::Reader
|
|
598
703
|
version.to_f
|
599
704
|
end
|
600
705
|
|
706
|
+
#: (IO | Tempfile | StringIO | String) -> (IO | Tempfile | StringIO)
|
601
707
|
def extract_io_from(input)
|
602
708
|
if input.is_a?(IO) || input.is_a?(StringIO) || input.is_a?(Tempfile)
|
603
709
|
input
|
@@ -608,6 +714,7 @@ class PDF::Reader
|
|
608
714
|
end
|
609
715
|
end
|
610
716
|
|
717
|
+
#: (String) -> (String)
|
611
718
|
def read_as_binary(input)
|
612
719
|
if File.respond_to?(:binread)
|
613
720
|
File.binread(input.to_s)
|
@@ -8,11 +8,24 @@ class PDF::Reader
|
|
8
8
|
# This is done for added compression and is described as an "Object Stream" in the spec.
|
9
9
|
#
|
10
10
|
class ObjectStream # :nodoc:
|
11
|
+
#: (PDF::Reader::Stream) -> void
|
11
12
|
def initialize(stream)
|
12
|
-
@dict = stream.hash
|
13
|
-
@data = stream.unfiltered_data
|
13
|
+
@dict = stream.hash #: Hash[Symbol, untyped]
|
14
|
+
@data = stream.unfiltered_data #: String
|
15
|
+
@offsets = nil #: Hash[Integer, Integer] | nil
|
16
|
+
@buffer = nil #: PDF::Reader::Buffer | nil
|
14
17
|
end
|
15
18
|
|
19
|
+
#: (Integer) -> (
|
20
|
+
#| PDF::Reader::Reference |
|
21
|
+
#| PDF::Reader::Token |
|
22
|
+
#| Numeric |
|
23
|
+
#| String |
|
24
|
+
#| Symbol |
|
25
|
+
#| Array[untyped] |
|
26
|
+
#| Hash[untyped, untyped] |
|
27
|
+
#| nil
|
28
|
+
#| )
|
16
29
|
def [](objid)
|
17
30
|
if offsets[objid].nil?
|
18
31
|
nil
|
@@ -23,12 +36,14 @@ class PDF::Reader
|
|
23
36
|
end
|
24
37
|
end
|
25
38
|
|
39
|
+
#: () -> Integer
|
26
40
|
def size
|
27
41
|
TypeCheck.cast_to_int!(@dict[:N])
|
28
42
|
end
|
29
43
|
|
30
44
|
private
|
31
45
|
|
46
|
+
#: () -> Hash[Integer, Integer]
|
32
47
|
def offsets
|
33
48
|
@offsets ||= {}
|
34
49
|
return @offsets if @offsets.keys.size > 0
|
@@ -39,10 +54,12 @@ class PDF::Reader
|
|
39
54
|
@offsets
|
40
55
|
end
|
41
56
|
|
57
|
+
#: () -> Integer
|
42
58
|
def first
|
43
59
|
TypeCheck.cast_to_int!(@dict[:First])
|
44
60
|
end
|
45
61
|
|
62
|
+
#: () -> PDF::Reader::Buffer
|
46
63
|
def buffer
|
47
64
|
@buffer ||= PDF::Reader::Buffer.new(StringIO.new(@data))
|
48
65
|
end
|
@@ -9,8 +9,9 @@ class PDF::Reader
|
|
9
9
|
|
10
10
|
# This should be between 0 and 1. If TextRun B obscures this much of TextRun A (and they
|
11
11
|
# have identical characters) then one will be discarded
|
12
|
-
OVERLAPPING_THRESHOLD = 0.5
|
12
|
+
OVERLAPPING_THRESHOLD = 0.5 #: Float
|
13
13
|
|
14
|
+
#: (Array[PDF::Reader::TextRun]) -> Array[PDF::Reader::TextRun]
|
14
15
|
def self.exclude_redundant_runs(runs)
|
15
16
|
sweep_line_status = Array.new
|
16
17
|
event_point_schedule = Array.new
|
@@ -38,6 +39,7 @@ class PDF::Reader
|
|
38
39
|
runs - to_exclude
|
39
40
|
end
|
40
41
|
|
42
|
+
#: (Array[PDF::Reader::TextRun], PDF::Reader::EventPoint) -> bool
|
41
43
|
def self.detect_intersection(sweep_line_status, event_point)
|
42
44
|
sweep_line_status.each do |open_text_run|
|
43
45
|
if open_text_run.text == event_point.run.text &&
|
@@ -55,15 +57,19 @@ class PDF::Reader
|
|
55
57
|
# looking for duplicates
|
56
58
|
class EventPoint
|
57
59
|
|
60
|
+
#: Numeric
|
58
61
|
attr_reader :x
|
59
62
|
|
63
|
+
#: PDF::Reader::TextRun
|
60
64
|
attr_reader :run
|
61
65
|
|
66
|
+
#: (Numeric, PDF::Reader::TextRun) -> void
|
62
67
|
def initialize(x, run)
|
63
68
|
@x = x
|
64
69
|
@run = run
|
65
70
|
end
|
66
71
|
|
72
|
+
#: () -> bool
|
67
73
|
def start?
|
68
74
|
@x == @run.x
|
69
75
|
end
|