pdf-reader 2.9.2 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +39 -0
  3. data/README.md +33 -33
  4. data/Rakefile +2 -2
  5. data/lib/pdf/reader/advanced_text_run_filter.rb +152 -0
  6. data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
  7. data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
  8. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
  9. data/lib/pdf/reader/buffer.rb +39 -22
  10. data/lib/pdf/reader/cid_widths.rb +14 -6
  11. data/lib/pdf/reader/cmap.rb +16 -5
  12. data/lib/pdf/reader/encoding.rb +42 -18
  13. data/lib/pdf/reader/error.rb +6 -4
  14. data/lib/pdf/reader/filter/ascii85.rb +2 -0
  15. data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
  16. data/lib/pdf/reader/filter/depredict.rb +6 -2
  17. data/lib/pdf/reader/filter/flate.rb +5 -2
  18. data/lib/pdf/reader/filter/lzw.rb +2 -0
  19. data/lib/pdf/reader/filter/null.rb +2 -0
  20. data/lib/pdf/reader/filter/run_length.rb +2 -0
  21. data/lib/pdf/reader/filter.rb +1 -0
  22. data/lib/pdf/reader/font.rb +99 -32
  23. data/lib/pdf/reader/font_descriptor.rb +79 -24
  24. data/lib/pdf/reader/form_xobject.rb +15 -1
  25. data/lib/pdf/reader/glyph_hash.rb +41 -8
  26. data/lib/pdf/reader/key_builder_v5.rb +17 -9
  27. data/lib/pdf/reader/lzw.rb +42 -16
  28. data/lib/pdf/reader/no_text_filter.rb +15 -0
  29. data/lib/pdf/reader/null_security_handler.rb +1 -0
  30. data/lib/pdf/reader/object_cache.rb +7 -2
  31. data/lib/pdf/reader/object_hash.rb +129 -16
  32. data/lib/pdf/reader/object_stream.rb +22 -5
  33. data/lib/pdf/reader/overlapping_runs_filter.rb +8 -2
  34. data/lib/pdf/reader/page.rb +66 -13
  35. data/lib/pdf/reader/page_layout.rb +26 -9
  36. data/lib/pdf/reader/page_state.rb +12 -3
  37. data/lib/pdf/reader/page_text_receiver.rb +16 -2
  38. data/lib/pdf/reader/pages_strategy.rb +1 -1
  39. data/lib/pdf/reader/parser.rb +52 -13
  40. data/lib/pdf/reader/point.rb +9 -2
  41. data/lib/pdf/reader/print_receiver.rb +2 -6
  42. data/lib/pdf/reader/rc4_security_handler.rb +2 -0
  43. data/lib/pdf/reader/rectangle.rb +24 -1
  44. data/lib/pdf/reader/reference.rb +13 -3
  45. data/lib/pdf/reader/register_receiver.rb +15 -2
  46. data/lib/pdf/reader/resources.rb +12 -2
  47. data/lib/pdf/reader/security_handler_factory.rb +13 -0
  48. data/lib/pdf/reader/standard_key_builder.rb +37 -23
  49. data/lib/pdf/reader/stream.rb +9 -3
  50. data/lib/pdf/reader/synchronized_cache.rb +6 -3
  51. data/lib/pdf/reader/text_run.rb +33 -3
  52. data/lib/pdf/reader/token.rb +1 -0
  53. data/lib/pdf/reader/transformation_matrix.rb +41 -10
  54. data/lib/pdf/reader/type_check.rb +53 -0
  55. data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
  56. data/lib/pdf/reader/validating_receiver.rb +29 -0
  57. data/lib/pdf/reader/width_calculator/built_in.rb +13 -5
  58. data/lib/pdf/reader/width_calculator/composite.rb +11 -3
  59. data/lib/pdf/reader/width_calculator/true_type.rb +14 -12
  60. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +8 -5
  61. data/lib/pdf/reader/width_calculator/type_zero.rb +8 -3
  62. data/lib/pdf/reader/xref.rb +31 -10
  63. data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
  64. data/lib/pdf/reader.rb +24 -12
  65. data/rbi/pdf-reader.rbi +1504 -1480
  66. metadata +34 -17
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -22,25 +22,31 @@ module PDF
22
22
  # Wraps an LZW encoded string
23
23
  class BitStream # :nodoc:
24
24
 
25
+ #: (String, Integer) -> void
25
26
  def initialize(data, bits_in_chunk)
26
27
  @data = data
27
28
  @data.force_encoding("BINARY")
28
- @bits_in_chunk = bits_in_chunk
29
- @current_pos = 0
30
- @bits_left_in_byte = 8
29
+ @current_pos = 0 #: Integer
30
+ @bits_left_in_byte = 8 #: Integer
31
+ @bits_in_chunk = 0 #: Integer
32
+ set_bits_in_chunk(bits_in_chunk)
31
33
  end
32
34
 
35
+ #: (Integer) -> void
33
36
  def set_bits_in_chunk(bits_in_chunk)
37
+ raise MalformedPDFError, "invalid LZW bits" if bits_in_chunk < 9 || bits_in_chunk > 12
38
+
34
39
  @bits_in_chunk = bits_in_chunk
35
40
  end
36
41
 
42
+ #: () -> Integer
37
43
  def read
38
44
  bits_left_in_chunk = @bits_in_chunk
39
45
  chunk = -1
40
46
  while bits_left_in_chunk > 0 and @current_pos < @data.size
41
47
  chunk = 0 if chunk < 0
42
- codepoint = @data[@current_pos, 1].unpack("C*")[0]
43
- current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
48
+ codepoint = @data[@current_pos, 1].to_s.unpack("C*")[0].to_i
49
+ current_byte = codepoint & (2**@bits_left_in_byte - 1).to_i #clear consumed bits
44
50
  dif = bits_left_in_chunk - @bits_left_in_byte
45
51
  if dif > 0 then current_byte <<= dif
46
52
  elsif dif < 0 then current_byte >>= dif.abs
@@ -57,33 +63,43 @@ module PDF
57
63
  end
58
64
  end
59
65
 
60
- CODE_EOD = 257 #end of data
61
- CODE_CLEAR_TABLE = 256 #clear table
66
+ CODE_EOD = 257 #: Integer #end of data
67
+ CODE_CLEAR_TABLE = 256 #: Integer #clear table
62
68
 
63
69
  # stores de pairs code => string
64
- class StringTable < Hash # :nodoc:
70
+ class StringTable
71
+ #: Integer
65
72
  attr_reader :string_table_pos
66
73
 
74
+ #: () -> void
67
75
  def initialize
68
- super
69
- @string_table_pos = 258 #initial code
76
+ @data = Hash.new #: Hash[Integer, String]
77
+ # The initial code
78
+ @string_table_pos = 258 #: Integer
70
79
  end
71
80
 
72
81
  #if code less than 258 return fixed string
82
+ #: (Integer) -> String?
73
83
  def [](key)
74
- if key > 257 then super else key.chr end
84
+ if key > 257
85
+ @data[key]
86
+ else
87
+ key.chr
88
+ end
75
89
  end
76
90
 
91
+ #: (String) -> void
77
92
  def add(string)
78
- store(@string_table_pos, string)
93
+ @data.store(@string_table_pos, string)
79
94
  @string_table_pos += 1
80
95
  end
81
96
  end
82
97
 
83
98
  # Decompresses a LZW compressed string.
84
99
  #
100
+ #: (String) -> String
85
101
  def self.decode(data)
86
- stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
102
+ stream = BitStream.new(data.to_s, 9) # size of codes between 9 and 12 bits
87
103
  string_table = StringTable.new
88
104
  result = "".dup
89
105
  until (code = stream.read) == CODE_EOD
@@ -119,8 +135,18 @@ module PDF
119
135
  result
120
136
  end
121
137
 
122
- def self.create_new_string(string_table,some_code, other_code)
123
- string_table[some_code] + string_table[other_code][0].chr
138
+ #: (PDF::Reader::LZW::StringTable, Integer?, Integer?) -> String
139
+ def self.create_new_string(string_table, some_code, other_code)
140
+ raise MalformedPDFError, "invalid LZW data" if some_code.nil? || other_code.nil?
141
+
142
+ item_one = string_table[some_code]
143
+ item_two = string_table[other_code]
144
+
145
+ if item_one && item_two
146
+ item_one + item_two.chr
147
+ else
148
+ raise MalformedPDFError, "invalid LZW data"
149
+ end
124
150
  end
125
151
  private_class_method :create_new_string
126
152
 
@@ -0,0 +1,15 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ class PDF::Reader
6
+ # There's no point rendering zero-width characters
7
+ class NoTextFilter
8
+
9
+ #: (Array[PDF::Reader::TextRun]) -> Array[PDF::Reader::TextRun]
10
+ def self.exclude_empty_strings(runs)
11
+ runs.reject { |run| run.text.to_s.size == 0 }
12
+ end
13
+ end
14
+ end
15
+
@@ -7,6 +7,7 @@ class PDF::Reader
7
7
  # A null object security handler. Used when a PDF is unencrypted.
8
8
  class NullSecurityHandler
9
9
 
10
+ #: (String, PDF::Reader::Reference) -> String
10
11
  def decrypt(buf, _ref)
11
12
  buf
12
13
  end
@@ -15,10 +15,15 @@ class PDF::Reader
15
15
  # These object types use little memory and are accessed a heap of times as
16
16
  # part of random page access, so we'll cache the unmarshalled objects and
17
17
  # avoid lots of repetitive (and expensive) tokenising
18
- CACHEABLE_TYPES = [:Catalog, :Page, :Pages]
18
+ CACHEABLE_TYPES = [:Catalog, :Page, :Pages] #: Array[Symbol]
19
19
 
20
- attr_reader :hits, :misses
20
+ #: untyped
21
+ attr_reader :hits
21
22
 
23
+ #: untyped
24
+ attr_reader :misses
25
+
26
+ #: (?untyped) -> void
22
27
  def initialize(lru_size = 1000)
23
28
  @objects = {}
24
29
  @lru_cache = Hashery::LRUHash.new(lru_size.to_i)
@@ -2,6 +2,8 @@
2
2
  # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
+ require 'tempfile'
6
+
5
7
  class PDF::Reader
6
8
  # Provides low level access to the objects in a PDF file via a hash-like
7
9
  # object.
@@ -30,8 +32,24 @@ class PDF::Reader
30
32
  class ObjectHash
31
33
  include Enumerable
32
34
 
35
+ #: type securityHandler = (
36
+ #| PDF::Reader::NullSecurityHandler |
37
+ #| PDF::Reader::AesV2SecurityHandler |
38
+ #| PDF::Reader::Rc4SecurityHandler |
39
+ #| PDF::Reader::AesV3SecurityHandler |
40
+ #| PDF::Reader::UnimplementedSecurityHandler
41
+ #| )
42
+
43
+ #: untyped
33
44
  attr_accessor :default
34
- attr_reader :trailer, :pdf_version
45
+
46
+ #: Hash[Symbol, untyped]
47
+ attr_reader :trailer
48
+
49
+ #: Float
50
+ attr_reader :pdf_version
51
+
52
+ #: securityHandler
35
53
  attr_reader :sec_handler
36
54
 
37
55
  # Creates a new ObjectHash object. Input can be a string with a valid filename
@@ -41,21 +59,25 @@ class PDF::Reader
41
59
  #
42
60
  # :password - the user password to decrypt the source PDF
43
61
  #
62
+ #: ((IO | Tempfile | StringIO | String), ?Hash[Symbol, untyped]) -> void
44
63
  def initialize(input, opts = {})
45
- @io = extract_io_from(input)
46
- @xref = PDF::Reader::XRef.new(@io)
47
- @pdf_version = read_version
48
- @trailer = @xref.trailer
49
- @cache = opts[:cache] || PDF::Reader::ObjectCache.new
50
- @sec_handler = NullSecurityHandler.new
64
+ @io = extract_io_from(input) #: IO | Tempfile | StringIO
65
+ @xref = PDF::Reader::XRef.new(@io) #: PDF::Reader::XRef[PDF::Reader::Reference]
66
+ @pdf_version = read_version #: Float
67
+ @trailer = @xref.trailer #: Hash[Symbol, untyped]
68
+ @cache = opts[:cache] || PDF::Reader::ObjectCache.new #: PDF::Reader::ObjectCache
69
+ @sec_handler = NullSecurityHandler.new #: securityHandler
51
70
  @sec_handler = SecurityHandlerFactory.build(
52
71
  deref(trailer[:Encrypt]),
53
72
  deref(trailer[:ID]),
54
73
  opts[:password]
55
74
  )
75
+ @page_references = nil #: Array[PDF::Reader::Reference | Hash[Symbol, untyped]]?
76
+ @object_streams = nil #: Hash[PDF::Reader::Reference, PDF::Reader::ObjectStream]?
56
77
  end
57
78
 
58
79
  # returns the type of object a ref points to
80
+ #: ((Integer | PDF::Reader::Reference)) -> Symbol?
59
81
  def obj_type(ref)
60
82
  self[ref].class.to_s.to_sym
61
83
  rescue
@@ -63,6 +85,7 @@ class PDF::Reader
63
85
  end
64
86
 
65
87
  # returns true if the supplied references points to an object with a stream
88
+ #: ((Integer | PDF::Reader::Reference)) -> bool
66
89
  def stream?(ref)
67
90
  self.has_key?(ref) && self[ref].is_a?(PDF::Reader::Stream)
68
91
  end
@@ -76,6 +99,7 @@ class PDF::Reader
76
99
  # If a PDF::Reader::Reference object is used the exact ID and generation number
77
100
  # can be specified.
78
101
  #
102
+ #: ((Integer | PDF::Reader::Reference)) -> untyped
79
103
  def [](key)
80
104
  return default if key.to_i <= 0
81
105
 
@@ -91,6 +115,7 @@ class PDF::Reader
91
115
  # If key is a PDF::Reader::Reference object, lookup the corresponding
92
116
  # object in the PDF and return it. Otherwise return key untouched.
93
117
  #
118
+ #: (untyped) -> untyped
94
119
  def object(key)
95
120
  key.is_a?(PDF::Reader::Reference) ? self[key] : key
96
121
  end
@@ -102,6 +127,7 @@ class PDF::Reader
102
127
  # Guaranteed to only return an Array or nil. If the dereference results in
103
128
  # any other type then a MalformedPDFError exception will raise. Useful when
104
129
  # expecting an Array and no other type will do.
130
+ #: (untyped) -> Array[untyped]?
105
131
  def deref_array(key)
106
132
  obj = deref(key)
107
133
 
@@ -120,6 +146,7 @@ class PDF::Reader
120
146
  # expecting an Array and no other type will do.
121
147
  #
122
148
  # Some effort to cast array elements to a number is made for any non-numeric elements.
149
+ #: (untyped) -> Array[Numeric]?
123
150
  def deref_array_of_numbers(key)
124
151
  arr = deref(key)
125
152
 
@@ -146,6 +173,7 @@ class PDF::Reader
146
173
  # Guaranteed to only return a Hash or nil. If the dereference results in
147
174
  # any other type then a MalformedPDFError exception will raise. Useful when
148
175
  # expecting an Array and no other type will do.
176
+ #: (untyped) -> Hash[Symbol, untyped]?
149
177
  def deref_hash(key)
150
178
  obj = deref(key)
151
179
 
@@ -164,6 +192,7 @@ class PDF::Reader
164
192
  # expecting an Array and no other type will do.
165
193
  #
166
194
  # Some effort to cast to a symbol is made when the reference points to a non-symbol.
195
+ #: (untyped) -> Symbol?
167
196
  def deref_name(key)
168
197
  obj = deref(key)
169
198
 
@@ -188,6 +217,7 @@ class PDF::Reader
188
217
  # expecting an Array and no other type will do.
189
218
  #
190
219
  # Some effort to cast to an int is made when the reference points to a non-integer.
220
+ #: (untyped) -> Integer?
191
221
  def deref_integer(key)
192
222
  obj = deref(key)
193
223
 
@@ -212,6 +242,7 @@ class PDF::Reader
212
242
  # expecting an Array and no other type will do.
213
243
  #
214
244
  # Some effort to cast to a number is made when the reference points to a non-number.
245
+ #: (untyped) -> Numeric?
215
246
  def deref_number(key)
216
247
  obj = deref(key)
217
248
 
@@ -236,6 +267,7 @@ class PDF::Reader
236
267
  # Guaranteed to only return a PDF::Reader::Stream or nil. If the dereference results in
237
268
  # any other type then a MalformedPDFError exception will raise. Useful when
238
269
  # expecting a stream and no other type will do.
270
+ #: (untyped) -> PDF::Reader::Stream?
239
271
  def deref_stream(key)
240
272
  obj = deref(key)
241
273
 
@@ -243,7 +275,7 @@ class PDF::Reader
243
275
 
244
276
  obj.tap { |obj|
245
277
  if !obj.is_a?(PDF::Reader::Stream)
246
- raise MalformedPDFError, "expected object to be an Array or nil"
278
+ raise MalformedPDFError, "expected object to be a Stream or nil"
247
279
  end
248
280
  }
249
281
  end
@@ -256,6 +288,7 @@ class PDF::Reader
256
288
  # expecting a string and no other type will do.
257
289
  #
258
290
  # Some effort to cast to a string is made when the reference points to a non-string.
291
+ #: (untyped) -> String?
259
292
  def deref_string(key)
260
293
  obj = deref(key)
261
294
 
@@ -278,6 +311,7 @@ class PDF::Reader
278
311
  # Guaranteed to only return a PDF Name (symbol), Array or nil. If the dereference results in
279
312
  # any other type then a MalformedPDFError exception will raise. Useful when
280
313
  # expecting a Name or Array and no other type will do.
314
+ #: (untyped) -> (Symbol | Array[untyped] | nil)
281
315
  def deref_name_or_array(key)
282
316
  obj = deref(key)
283
317
 
@@ -296,6 +330,7 @@ class PDF::Reader
296
330
  # Guaranteed to only return a PDF::Reader::Stream, Array or nil. If the dereference results in
297
331
  # any other type then a MalformedPDFError exception will raise. Useful when
298
332
  # expecting a stream or Array and no other type will do.
333
+ #: (untyped) -> (PDF::Reader::Stream | Array[untyped] | nil)
299
334
  def deref_stream_or_array(key)
300
335
  obj = deref(key)
301
336
 
@@ -311,10 +346,12 @@ class PDF::Reader
311
346
  # Recursively dereferences the object refered to be +key+. If +key+ is not
312
347
  # a PDF::Reader::Reference, the key is returned unchanged.
313
348
  #
349
+ #: (untyped) -> untyped
314
350
  def deref!(key)
315
351
  deref_internal!(key, {})
316
352
  end
317
353
 
354
+ #: (untyped) -> Array[untyped]?
318
355
  def deref_array!(key)
319
356
  deref!(key).tap { |obj|
320
357
  if !obj.nil? && !obj.is_a?(Array)
@@ -323,6 +360,7 @@ class PDF::Reader
323
360
  }
324
361
  end
325
362
 
363
+ #: (untyped) -> Hash[Symbol, untyped]?
326
364
  def deref_hash!(key)
327
365
  deref!(key).tap { |obj|
328
366
  if !obj.nil? && !obj.is_a?(Hash)
@@ -343,6 +381,7 @@ class PDF::Reader
343
381
  # local_default is the object that will be returned if the requested key doesn't
344
382
  # exist.
345
383
  #
384
+ #: (untyped, ?untyped) -> untyped
346
385
  def fetch(key, local_default = nil)
347
386
  obj = self[key]
348
387
  if obj
@@ -356,6 +395,8 @@ class PDF::Reader
356
395
 
357
396
  # iterate over each key, value. Just like a ruby hash.
358
397
  #
398
+ # @override(allow_incompatible: true)
399
+ #: () { (PDF::Reader::Reference, untyped) -> untyped } -> untyped
359
400
  def each(&block)
360
401
  @xref.each do |ref|
361
402
  yield ref, self[ref]
@@ -365,6 +406,7 @@ class PDF::Reader
365
406
 
366
407
  # iterate over each key. Just like a ruby hash.
367
408
  #
409
+ #: { (PDF::Reader::Reference) -> untyped } -> untyped
368
410
  def each_key(&block)
369
411
  each do |id, obj|
370
412
  yield id
@@ -373,6 +415,7 @@ class PDF::Reader
373
415
 
374
416
  # iterate over each value. Just like a ruby hash.
375
417
  #
418
+ #: { (untyped) -> untyped } -> untyped
376
419
  def each_value(&block)
377
420
  each do |id, obj|
378
421
  yield obj
@@ -381,6 +424,7 @@ class PDF::Reader
381
424
 
382
425
  # return the number of objects in the file. An object with multiple generations
383
426
  # is counted once.
427
+ #: () -> Integer
384
428
  def size
385
429
  xref.size
386
430
  end
@@ -388,6 +432,7 @@ class PDF::Reader
388
432
 
389
433
  # return true if there are no objects in this file
390
434
  #
435
+ #: () -> bool
391
436
  def empty?
392
437
  size == 0 ? true : false
393
438
  end
@@ -395,6 +440,7 @@ class PDF::Reader
395
440
  # return true if the specified key exists in the file. key
396
441
  # can be an int or a PDF::Reader::Reference
397
442
  #
443
+ #: (untyped) -> bool
398
444
  def has_key?(check_key)
399
445
  # TODO update from O(n) to O(1)
400
446
  each_key do |key|
@@ -412,6 +458,7 @@ class PDF::Reader
412
458
 
413
459
  # return true if the specifiedvalue exists in the file
414
460
  #
461
+ #: (untyped) -> bool
415
462
  def has_value?(value)
416
463
  # TODO update from O(n) to O(1)
417
464
  each_value do |obj|
@@ -421,12 +468,14 @@ class PDF::Reader
421
468
  end
422
469
  alias :value? :has_key?
423
470
 
471
+ #: () -> String
424
472
  def to_s
425
473
  "<PDF::Reader::ObjectHash size: #{self.size}>"
426
474
  end
427
475
 
428
476
  # return an array of all keys in the file
429
477
  #
478
+ #: () -> Array[PDF::Reader::Reference]
430
479
  def keys
431
480
  ret = []
432
481
  each_key { |k| ret << k }
@@ -435,6 +484,7 @@ class PDF::Reader
435
484
 
436
485
  # return an array of all values in the file
437
486
  #
487
+ #: () -> untyped
438
488
  def values
439
489
  ret = []
440
490
  each_value { |v| ret << v }
@@ -443,12 +493,14 @@ class PDF::Reader
443
493
 
444
494
  # return an array of all values from the specified keys
445
495
  #
496
+ #: (*untyped) -> untyped
446
497
  def values_at(*ids)
447
498
  ids.map { |id| self[id] }
448
499
  end
449
500
 
450
501
  # return an array of arrays. Each sub array contains a key/value pair.
451
502
  #
503
+ #: () -> untyped
452
504
  def to_a
453
505
  ret = []
454
506
  each do |id, obj|
@@ -463,6 +515,7 @@ class PDF::Reader
463
515
  #
464
516
  # Useful for apps that want to extract data from specific pages.
465
517
  #
518
+ #: () -> Array[PDF::Reader::Reference | Hash[Symbol, untyped]]
466
519
  def page_references
467
520
  root = fetch(trailer[:Root])
468
521
  @page_references ||= begin
@@ -471,10 +524,12 @@ class PDF::Reader
471
524
  end
472
525
  end
473
526
 
527
+ #: () -> bool
474
528
  def encrypted?
475
529
  trailer.has_key?(:Encrypt)
476
530
  end
477
531
 
532
+ #: () -> bool
478
533
  def sec_handler?
479
534
  !!sec_handler
480
535
  end
@@ -484,6 +539,17 @@ class PDF::Reader
484
539
  # parse a traditional object from the PDF, starting from the byte offset indicated
485
540
  # in the xref table
486
541
  #
542
+ #: (PDF::Reader::Reference) -> (
543
+ #| PDF::Reader::Reference |
544
+ #| PDF::Reader::Token |
545
+ #| PDF::Reader::Stream |
546
+ #| Numeric |
547
+ #| String |
548
+ #| Symbol |
549
+ #| Array[untyped] |
550
+ #| Hash[untyped, untyped] |
551
+ #| nil
552
+ #| )
487
553
  def fetch_object(key)
488
554
  if xref[key].is_a?(Integer)
489
555
  buf = new_buffer(xref[key])
@@ -493,11 +559,25 @@ class PDF::Reader
493
559
 
494
560
  # parse a object that's embedded in an object stream in the PDF
495
561
  #
562
+ #: (PDF::Reader::Reference) -> (
563
+ #| PDF::Reader::Reference |
564
+ #| PDF::Reader::Token |
565
+ #| PDF::Reader::Stream |
566
+ #| Numeric |
567
+ #| String |
568
+ #| Symbol |
569
+ #| Array[untyped] |
570
+ #| Hash[untyped, untyped] |
571
+ #| nil
572
+ #| )
496
573
  def fetch_object_stream(key)
497
574
  if xref[key].is_a?(PDF::Reader::Reference)
498
575
  container_key = xref[key]
499
- object_streams[container_key] ||= PDF::Reader::ObjectStream.new(object(container_key))
500
- object_streams[container_key][key.id]
576
+ stream = deref_stream(container_key)
577
+ raise MalformedPDFError, "Object Stream cannot be nil" if stream.nil?
578
+ if objstream = object_streams[container_key] ||= PDF::Reader::ObjectStream.new(stream)
579
+ objstream[key.id]
580
+ end
501
581
  end
502
582
  end
503
583
 
@@ -505,6 +585,17 @@ class PDF::Reader
505
585
  # isn't publicly available. It's used to avoid endless loops in the recursion, and
506
586
  # doesn't need to be part of the public API.
507
587
  #
588
+ #: (untyped, Hash[Integer, untyped]) -> (
589
+ #| PDF::Reader::Reference |
590
+ #| PDF::Reader::Token |
591
+ #| PDF::Reader::Stream |
592
+ #| Numeric |
593
+ #| String |
594
+ #| Symbol |
595
+ #| Array[untyped] |
596
+ #| Hash[untyped, untyped] |
597
+ #| nil
598
+ #| )
508
599
  def deref_internal!(key, seen)
509
600
  seen_key = key.is_a?(PDF::Reader::Reference) ? key : key.object_id
510
601
 
@@ -534,6 +625,17 @@ class PDF::Reader
534
625
  end
535
626
  end
536
627
 
628
+ #: (PDF::Reader::Reference, untyped) -> (
629
+ #| PDF::Reader::Reference |
630
+ #| PDF::Reader::Token |
631
+ #| PDF::Reader::Stream |
632
+ #| Numeric |
633
+ #| String |
634
+ #| Symbol |
635
+ #| Array[untyped] |
636
+ #| Hash[untyped, untyped] |
637
+ #| nil
638
+ #| )
537
639
  def decrypt(ref, obj)
538
640
  case obj
539
641
  when PDF::Reader::Stream then
@@ -555,25 +657,33 @@ class PDF::Reader
555
657
  end
556
658
  end
557
659
 
660
+ #: (?Integer) -> PDF::Reader::Buffer
558
661
  def new_buffer(offset = 0)
559
662
  PDF::Reader::Buffer.new(@io, :seek => offset)
560
663
  end
561
664
 
665
+ #: () -> PDF::Reader::XRef[PDF::Reader::Reference]
562
666
  def xref
563
667
  @xref
564
668
  end
565
669
 
670
+ #: () -> Hash[PDF::Reader::Reference, PDF::Reader::ObjectStream]
566
671
  def object_streams
567
- @object_stream ||= {}
672
+ @object_streams ||= {}
568
673
  end
569
674
 
570
675
  # returns an array of object references for all pages in this object store. The ordering of
571
676
  # the Array is significant and matches the page ordering of the document
572
677
  #
678
+ #: (PDF::Reader::Reference | Hash[Symbol, untyped]) -> (
679
+ #| Array[PDF::Reader::Reference | Hash[Symbol, untyped] ]
680
+ #| )
573
681
  def get_page_objects(obj)
574
682
  derefed_obj = deref_hash(obj)
575
683
 
576
- if derefed_obj[:Type] == :Page
684
+ if derefed_obj.nil?
685
+ raise MalformedPDFError, "Expected Page or Pages object, got nil"
686
+ elsif derefed_obj[:Type] == :Page
577
687
  [obj]
578
688
  elsif derefed_obj[:Kids]
579
689
  kids = deref_array(derefed_obj[:Kids]) || []
@@ -585,23 +695,26 @@ class PDF::Reader
585
695
  end
586
696
  end
587
697
 
698
+ #: () -> Float
588
699
  def read_version
589
700
  @io.seek(0)
590
- _m, version = *@io.read(10).match(/PDF-(\d.\d)/)
701
+ _m, version = *@io.read(10).to_s.match(/PDF-(\d.\d)/)
591
702
  @io.seek(0)
592
703
  version.to_f
593
704
  end
594
705
 
706
+ #: (IO | Tempfile | StringIO | String) -> (IO | Tempfile | StringIO)
595
707
  def extract_io_from(input)
596
- if input.respond_to?(:seek) && input.respond_to?(:read)
708
+ if input.is_a?(IO) || input.is_a?(StringIO) || input.is_a?(Tempfile)
597
709
  input
598
710
  elsif File.file?(input.to_s)
599
- StringIO.new read_as_binary(input)
711
+ StringIO.new read_as_binary(input.to_s)
600
712
  else
601
- raise ArgumentError, "input must be an IO-like object or a filename"
713
+ raise ArgumentError, "input must be an IO-like object or a filename (#{input.class})"
602
714
  end
603
715
  end
604
716
 
717
+ #: (String) -> (String)
605
718
  def read_as_binary(input)
606
719
  if File.respond_to?(:binread)
607
720
  File.binread(input.to_s)
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -8,11 +8,24 @@ class PDF::Reader
8
8
  # This is done for added compression and is described as an "Object Stream" in the spec.
9
9
  #
10
10
  class ObjectStream # :nodoc:
11
+ #: (PDF::Reader::Stream) -> void
11
12
  def initialize(stream)
12
- @dict = stream.hash
13
- @data = stream.unfiltered_data
13
+ @dict = stream.hash #: Hash[Symbol, untyped]
14
+ @data = stream.unfiltered_data #: String
15
+ @offsets = nil #: Hash[Integer, Integer] | nil
16
+ @buffer = nil #: PDF::Reader::Buffer | nil
14
17
  end
15
18
 
19
+ #: (Integer) -> (
20
+ #| PDF::Reader::Reference |
21
+ #| PDF::Reader::Token |
22
+ #| Numeric |
23
+ #| String |
24
+ #| Symbol |
25
+ #| Array[untyped] |
26
+ #| Hash[untyped, untyped] |
27
+ #| nil
28
+ #| )
16
29
  def [](objid)
17
30
  if offsets[objid].nil?
18
31
  nil
@@ -23,12 +36,14 @@ class PDF::Reader
23
36
  end
24
37
  end
25
38
 
39
+ #: () -> Integer
26
40
  def size
27
- @dict[:N]
41
+ TypeCheck.cast_to_int!(@dict[:N])
28
42
  end
29
43
 
30
44
  private
31
45
 
46
+ #: () -> Hash[Integer, Integer]
32
47
  def offsets
33
48
  @offsets ||= {}
34
49
  return @offsets if @offsets.keys.size > 0
@@ -39,10 +54,12 @@ class PDF::Reader
39
54
  @offsets
40
55
  end
41
56
 
57
+ #: () -> Integer
42
58
  def first
43
- @dict[:First]
59
+ TypeCheck.cast_to_int!(@dict[:First])
44
60
  end
45
61
 
62
+ #: () -> PDF::Reader::Buffer
46
63
  def buffer
47
64
  @buffer ||= PDF::Reader::Buffer.new(StringIO.new(@data))
48
65
  end