pdf-reader 2.5.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +42 -0
  3. data/README.md +16 -1
  4. data/Rakefile +1 -1
  5. data/examples/extract_fonts.rb +12 -7
  6. data/examples/rspec.rb +1 -0
  7. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  8. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  9. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  10. data/lib/pdf/reader/buffer.rb +90 -46
  11. data/lib/pdf/reader/cid_widths.rb +1 -0
  12. data/lib/pdf/reader/cmap.rb +65 -50
  13. data/lib/pdf/reader/encoding.rb +3 -2
  14. data/lib/pdf/reader/error.rb +19 -3
  15. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  16. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  17. data/lib/pdf/reader/filter/depredict.rb +11 -9
  18. data/lib/pdf/reader/filter/flate.rb +4 -2
  19. data/lib/pdf/reader/filter/lzw.rb +2 -0
  20. data/lib/pdf/reader/filter/null.rb +1 -1
  21. data/lib/pdf/reader/filter/run_length.rb +19 -13
  22. data/lib/pdf/reader/filter.rb +2 -1
  23. data/lib/pdf/reader/font.rb +72 -16
  24. data/lib/pdf/reader/font_descriptor.rb +19 -17
  25. data/lib/pdf/reader/form_xobject.rb +15 -5
  26. data/lib/pdf/reader/glyph_hash.rb +16 -9
  27. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  28. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  29. data/lib/pdf/reader/lzw.rb +4 -2
  30. data/lib/pdf/reader/null_security_handler.rb +1 -4
  31. data/lib/pdf/reader/object_cache.rb +1 -0
  32. data/lib/pdf/reader/object_hash.rb +252 -44
  33. data/lib/pdf/reader/object_stream.rb +1 -0
  34. data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
  35. data/lib/pdf/reader/page.rb +99 -19
  36. data/lib/pdf/reader/page_layout.rb +36 -37
  37. data/lib/pdf/reader/page_state.rb +12 -11
  38. data/lib/pdf/reader/page_text_receiver.rb +57 -10
  39. data/lib/pdf/reader/pages_strategy.rb +1 -0
  40. data/lib/pdf/reader/parser.rb +23 -12
  41. data/lib/pdf/reader/point.rb +25 -0
  42. data/lib/pdf/reader/print_receiver.rb +1 -0
  43. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  44. data/lib/pdf/reader/rectangle.rb +113 -0
  45. data/lib/pdf/reader/reference.rb +1 -0
  46. data/lib/pdf/reader/register_receiver.rb +1 -0
  47. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
  48. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  49. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  50. data/lib/pdf/reader/stream.rb +2 -1
  51. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  52. data/lib/pdf/reader/text_run.rb +14 -6
  53. data/lib/pdf/reader/token.rb +1 -0
  54. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  55. data/lib/pdf/reader/type_check.rb +52 -0
  56. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  57. data/lib/pdf/reader/validating_receiver.rb +262 -0
  58. data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
  59. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  60. data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
  61. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  62. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  63. data/lib/pdf/reader/width_calculator.rb +1 -0
  64. data/lib/pdf/reader/xref.rb +27 -4
  65. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  66. data/lib/pdf/reader.rb +46 -15
  67. data/lib/pdf-reader.rb +1 -0
  68. data/rbi/pdf-reader.rbi +1978 -0
  69. metadata +21 -10
  70. data/lib/pdf/reader/orientation_detector.rb +0 -34
  71. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -47,7 +48,11 @@ class PDF::Reader
47
48
  @trailer = @xref.trailer
48
49
  @cache = opts[:cache] || PDF::Reader::ObjectCache.new
49
50
  @sec_handler = NullSecurityHandler.new
50
- @sec_handler = build_security_handler(opts)
51
+ @sec_handler = SecurityHandlerFactory.build(
52
+ deref(trailer[:Encrypt]),
53
+ deref(trailer[:ID]),
54
+ opts[:password]
55
+ )
51
56
  end
52
57
 
53
58
  # returns the type of object a ref points to
@@ -91,6 +96,218 @@ class PDF::Reader
91
96
  end
92
97
  alias :deref :object
93
98
 
99
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
100
+ # object in the PDF and return it. Otherwise return key untouched.
101
+ #
102
+ # Guaranteed to only return an Array or nil. If the dereference results in
103
+ # any other type then a MalformedPDFError exception will raise. Useful when
104
+ # expecting an Array and no other type will do.
105
+ def deref_array(key)
106
+ obj = deref(key)
107
+
108
+ return obj if obj.nil?
109
+
110
+ obj.tap { |obj|
111
+ raise MalformedPDFError, "expected object to be an Array or nil" if !obj.is_a?(Array)
112
+ }
113
+ end
114
+
115
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
116
+ # object in the PDF and return it. Otherwise return key untouched.
117
+ #
118
+ # Guaranteed to only return an Array of Numerics or nil. If the dereference results in
119
+ # any other type then a MalformedPDFError exception will raise. Useful when
120
+ # expecting an Array and no other type will do.
121
+ #
122
+ # Some effort to cast array elements to a number is made for any non-numeric elements.
123
+ def deref_array_of_numbers(key)
124
+ arr = deref(key)
125
+
126
+ return arr if arr.nil?
127
+
128
+ raise MalformedPDFError, "expected object to be an Array" unless arr.is_a?(Array)
129
+
130
+ arr.map { |item|
131
+ if item.is_a?(Numeric)
132
+ item
133
+ elsif item.respond_to?(:to_f)
134
+ item.to_f
135
+ elsif item.respond_to?(:to_i)
136
+ item.to_i
137
+ else
138
+ raise MalformedPDFError, "expected object to be a number"
139
+ end
140
+ }
141
+ end
142
+
143
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
144
+ # object in the PDF and return it. Otherwise return key untouched.
145
+ #
146
+ # Guaranteed to only return a Hash or nil. If the dereference results in
147
+ # any other type then a MalformedPDFError exception will raise. Useful when
148
+ # expecting an Array and no other type will do.
149
+ def deref_hash(key)
150
+ obj = deref(key)
151
+
152
+ return obj if obj.nil?
153
+
154
+ obj.tap { |obj|
155
+ raise MalformedPDFError, "expected object to be a Hash or nil" if !obj.is_a?(Hash)
156
+ }
157
+ end
158
+
159
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
160
+ # object in the PDF and return it. Otherwise return key untouched.
161
+ #
162
+ # Guaranteed to only return a PDF name (Symbol) or nil. If the dereference results in
163
+ # any other type then a MalformedPDFError exception will raise. Useful when
164
+ # expecting an Array and no other type will do.
165
+ #
166
+ # Some effort to cast to a symbol is made when the reference points to a non-symbol.
167
+ def deref_name(key)
168
+ obj = deref(key)
169
+
170
+ return obj if obj.nil?
171
+
172
+ if !obj.is_a?(Symbol)
173
+ if obj.respond_to?(:to_sym)
174
+ obj = obj.to_sym
175
+ else
176
+ raise MalformedPDFError, "expected object to be a Name"
177
+ end
178
+ end
179
+
180
+ obj
181
+ end
182
+
183
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
184
+ # object in the PDF and return it. Otherwise return key untouched.
185
+ #
186
+ # Guaranteed to only return an Integer or nil. If the dereference results in
187
+ # any other type then a MalformedPDFError exception will raise. Useful when
188
+ # expecting an Array and no other type will do.
189
+ #
190
+ # Some effort to cast to an int is made when the reference points to a non-integer.
191
+ def deref_integer(key)
192
+ obj = deref(key)
193
+
194
+ return obj if obj.nil?
195
+
196
+ if !obj.is_a?(Integer)
197
+ if obj.respond_to?(:to_i)
198
+ obj = obj.to_i
199
+ else
200
+ raise MalformedPDFError, "expected object to be an Integer"
201
+ end
202
+ end
203
+
204
+ obj
205
+ end
206
+
207
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
208
+ # object in the PDF and return it. Otherwise return key untouched.
209
+ #
210
+ # Guaranteed to only return a Numeric or nil. If the dereference results in
211
+ # any other type then a MalformedPDFError exception will raise. Useful when
212
+ # expecting an Array and no other type will do.
213
+ #
214
+ # Some effort to cast to a number is made when the reference points to a non-number.
215
+ def deref_number(key)
216
+ obj = deref(key)
217
+
218
+ return obj if obj.nil?
219
+
220
+ if !obj.is_a?(Numeric)
221
+ if obj.respond_to?(:to_f)
222
+ obj = obj.to_f
223
+ elsif obj.respond_to?(:to_i)
224
+ obj.to_i
225
+ else
226
+ raise MalformedPDFError, "expected object to be a number"
227
+ end
228
+ end
229
+
230
+ obj
231
+ end
232
+
233
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
234
+ # object in the PDF and return it. Otherwise return key untouched.
235
+ #
236
+ # Guaranteed to only return a PDF::Reader::Stream or nil. If the dereference results in
237
+ # any other type then a MalformedPDFError exception will raise. Useful when
238
+ # expecting a stream and no other type will do.
239
+ def deref_stream(key)
240
+ obj = deref(key)
241
+
242
+ return obj if obj.nil?
243
+
244
+ obj.tap { |obj|
245
+ if !obj.is_a?(PDF::Reader::Stream)
246
+ raise MalformedPDFError, "expected object to be an Array or nil"
247
+ end
248
+ }
249
+ end
250
+
251
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
252
+ # object in the PDF and return it. Otherwise return key untouched.
253
+ #
254
+ # Guaranteed to only return a String or nil. If the dereference results in
255
+ # any other type then a MalformedPDFError exception will raise. Useful when
256
+ # expecting a string and no other type will do.
257
+ #
258
+ # Some effort to cast to a string is made when the reference points to a non-string.
259
+ def deref_string(key)
260
+ obj = deref(key)
261
+
262
+ return obj if obj.nil?
263
+
264
+ if !obj.is_a?(String)
265
+ if obj.respond_to?(:to_s)
266
+ obj = obj.to_s
267
+ else
268
+ raise MalformedPDFError, "expected object to be a string"
269
+ end
270
+ end
271
+
272
+ obj
273
+ end
274
+
275
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
276
+ # object in the PDF and return it. Otherwise return key untouched.
277
+ #
278
+ # Guaranteed to only return a PDF Name (symbol), Array or nil. If the dereference results in
279
+ # any other type then a MalformedPDFError exception will raise. Useful when
280
+ # expecting a Name or Array and no other type will do.
281
+ def deref_name_or_array(key)
282
+ obj = deref(key)
283
+
284
+ return obj if obj.nil?
285
+
286
+ obj.tap { |obj|
287
+ if !obj.is_a?(Symbol) && !obj.is_a?(Array)
288
+ raise MalformedPDFError, "expected object to be an Array or Name"
289
+ end
290
+ }
291
+ end
292
+
293
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
294
+ # object in the PDF and return it. Otherwise return key untouched.
295
+ #
296
+ # Guaranteed to only return a PDF::Reader::Stream, Array or nil. If the dereference results in
297
+ # any other type then a MalformedPDFError exception will raise. Useful when
298
+ # expecting a stream or Array and no other type will do.
299
+ def deref_stream_or_array(key)
300
+ obj = deref(key)
301
+
302
+ return obj if obj.nil?
303
+
304
+ obj.tap { |obj|
305
+ if !obj.is_a?(PDF::Reader::Stream) && !obj.is_a?(Array)
306
+ raise MalformedPDFError, "expected object to be an Array or Stream"
307
+ end
308
+ }
309
+ end
310
+
94
311
  # Recursively dereferences the object refered to be +key+. If +key+ is not
95
312
  # a PDF::Reader::Reference, the key is returned unchanged.
96
313
  #
@@ -98,6 +315,22 @@ class PDF::Reader
98
315
  deref_internal!(key, {})
99
316
  end
100
317
 
318
+ def deref_array!(key)
319
+ deref!(key).tap { |obj|
320
+ if !obj.nil? && !obj.is_a?(Array)
321
+ raise MalformedPDFError, "expected object (#{obj.inspect}) to be an Array or nil"
322
+ end
323
+ }
324
+ end
325
+
326
+ def deref_hash!(key)
327
+ deref!(key).tap { |obj|
328
+ if !obj.nil? && !obj.is_a?(Hash)
329
+ raise MalformedPDFError, "expected object (#{obj.inspect}) to be a Hash or nil"
330
+ end
331
+ }
332
+ end
333
+
101
334
  # Access an object from the PDF. key can be an int or a PDF::Reader::Reference
102
335
  # object.
103
336
  #
@@ -232,7 +465,10 @@ class PDF::Reader
232
465
  #
233
466
  def page_references
234
467
  root = fetch(trailer[:Root])
235
- @page_references ||= get_page_objects(root[:Pages]).flatten
468
+ @page_references ||= begin
469
+ pages_root = deref_hash(root[:Pages]) || {}
470
+ get_page_objects(pages_root)
471
+ end
236
472
  end
237
473
 
238
474
  def encrypted?
@@ -298,36 +534,6 @@ class PDF::Reader
298
534
  end
299
535
  end
300
536
 
301
- def build_security_handler(opts = {})
302
- encrypt = deref(trailer[:Encrypt])
303
- if NullSecurityHandler.supports?(encrypt)
304
- NullSecurityHandler.new
305
- elsif StandardSecurityHandler.supports?(encrypt)
306
- encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
307
- StandardSecurityHandler.new(
308
- key_length: (encrypt[:Length] || 40).to_i,
309
- revision: encrypt[:R],
310
- owner_key: encrypt[:O],
311
- user_key: encrypt[:U],
312
- permissions: encrypt[:P].to_i,
313
- encrypted_metadata: encmeta,
314
- file_id: (deref(trailer[:ID]) || []).first,
315
- password: opts[:password],
316
- cfm: encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
317
- )
318
- elsif StandardSecurityHandlerV5.supports?(encrypt)
319
- StandardSecurityHandlerV5.new(
320
- O: encrypt[:O],
321
- U: encrypt[:U],
322
- OE: encrypt[:OE],
323
- UE: encrypt[:UE],
324
- password: opts[:password]
325
- )
326
- else
327
- UnimplementedSecurityHandler.new
328
- end
329
- end
330
-
331
537
  def decrypt(ref, obj)
332
538
  case obj
333
539
  when PDF::Reader::Stream then
@@ -336,8 +542,10 @@ class PDF::Reader
336
542
  obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
337
543
  obj
338
544
  when Hash then
339
- arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
340
- Hash[*arr]
545
+ arr = obj.map { |key,val| [key, decrypt(ref, val)] }
546
+ arr.each_with_object({}) { |(k,v), accum|
547
+ accum[k] = v
548
+ }
341
549
  when Array then
342
550
  obj.collect { |item| decrypt(ref, item) }
343
551
  when String
@@ -359,19 +567,19 @@ class PDF::Reader
359
567
  @object_stream ||= {}
360
568
  end
361
569
 
362
- # returns a nested array of object references for all pages in this object store.
570
+ # returns an array of object references for all pages in this object store. The ordering of
571
+ # the Array is significant and matches the page ordering of the document
363
572
  #
364
- def get_page_objects(ref)
365
- obj = deref(ref)
366
-
367
- unless obj.kind_of?(::Hash)
368
- raise MalformedPDFError, "Dereferenced page object must be a dict"
369
- end
370
-
573
+ def get_page_objects(obj)
371
574
  if obj[:Type] == :Page
372
- ref
575
+ [obj]
373
576
  elsif obj[:Kids]
374
- deref(obj[:Kids]).map { |kid| get_page_objects(kid) }
577
+ kids = deref_array(obj[:Kids]) || []
578
+ kids.map { |kid|
579
+ get_page_objects(deref_hash(kid) || {})
580
+ }.flatten
581
+ else
582
+ raise MalformedPDFError, "Expected Page or Pages object"
375
583
  end
376
584
  end
377
585
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -1,4 +1,6 @@
1
+ # typed: true
1
2
  # coding: utf-8
3
+ # frozen_string_literal: true
2
4
 
3
5
  class PDF::Reader
4
6
  # remove duplicates from a collection of TextRun objects. This can be helpful when a PDF
@@ -38,7 +40,8 @@ class PDF::Reader
38
40
 
39
41
  def self.detect_intersection(sweep_line_status, event_point)
40
42
  sweep_line_status.each do |open_text_run|
41
- if event_point.x >= open_text_run.x &&
43
+ if open_text_run.text == event_point.run.text &&
44
+ event_point.x >= open_text_run.x &&
42
45
  event_point.x <= open_text_run.endx &&
43
46
  open_text_run.intersection_area_percent(event_point.run) >= OVERLAPPING_THRESHOLD
44
47
  return true
@@ -51,10 +54,14 @@ class PDF::Reader
51
54
  # Utility class used to avoid modifying the underlying TextRun objects while we're
52
55
  # looking for duplicates
53
56
  class EventPoint
54
- attr_reader :x, :run
55
57
 
56
- def initialize x, run
57
- @x, @run = x, run
58
+ attr_reader :x
59
+
60
+ attr_reader :run
61
+
62
+ def initialize(x, run)
63
+ @x = x
64
+ @run = run
58
65
  end
59
66
 
60
67
  def start?
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  module PDF
@@ -13,7 +14,7 @@ module PDF
13
14
  # objects accessor to help walk the page dictionary in any useful way.
14
15
  #
15
16
  class Page
16
- include ResourceMethods
17
+ extend Forwardable
17
18
 
18
19
  # lowlevel hash-like access to all objects in the underlying PDF
19
20
  attr_reader :objects
@@ -26,6 +27,15 @@ module PDF
26
27
  # operations
27
28
  attr_reader :cache
28
29
 
30
+ def_delegators :resources, :color_spaces
31
+ def_delegators :resources, :fonts
32
+ def_delegators :resources, :graphic_states
33
+ def_delegators :resources, :patterns
34
+ def_delegators :resources, :procedure_sets
35
+ def_delegators :resources, :properties
36
+ def_delegators :resources, :shadings
37
+ def_delegators :resources, :xobjects
38
+
29
39
  # creates a new page wrapper.
30
40
  #
31
41
  # * objects - an ObjectHash instance that wraps a PDF file
@@ -33,7 +43,7 @@ module PDF
33
43
  #
34
44
  def initialize(objects, pagenum, options = {})
35
45
  @objects, @pagenum = objects, pagenum
36
- @page_object = objects.deref(objects.page_references[pagenum - 1])
46
+ @page_object = objects.deref_hash(objects.page_references[pagenum - 1])
37
47
  @cache = options[:cache] || {}
38
48
 
39
49
  unless @page_object.is_a?(::Hash)
@@ -59,7 +69,7 @@ module PDF
59
69
  def attributes
60
70
  @attributes ||= {}.tap { |hash|
61
71
  page_with_ancestors.reverse.each do |obj|
62
- hash.merge!(@objects.deref(obj))
72
+ hash.merge!(@objects.deref_hash(obj) || {})
63
73
  end
64
74
  }
65
75
  # This shouldn't be necesary, but some non compliant PDFs leave MediaBox
@@ -68,22 +78,56 @@ module PDF
68
78
  @attributes
69
79
  end
70
80
 
81
+ def height
82
+ rect = Rectangle.new(*attributes[:MediaBox])
83
+ rect.apply_rotation(rotate) if rotate > 0
84
+ rect.height
85
+ end
86
+
87
+ def width
88
+ rect = Rectangle.new(*attributes[:MediaBox])
89
+ rect.apply_rotation(rotate) if rotate > 0
90
+ rect.width
91
+ end
92
+
93
+ def origin
94
+ rect = Rectangle.new(*attributes[:MediaBox])
95
+ rect.apply_rotation(rotate) if rotate > 0
96
+
97
+ rect.bottom_left
98
+ end
99
+
71
100
  # Convenience method to identify the page's orientation.
72
101
  #
73
102
  def orientation
74
- OrientationDetector.new(attributes).orientation
103
+ if height > width
104
+ "portrait"
105
+ else
106
+ "landscape"
107
+ end
75
108
  end
76
109
 
77
110
  # returns the plain text content of this page encoded as UTF-8. Any
78
111
  # characters that can't be translated will be returned as a ▯
79
112
  #
80
- def text
113
+ def text(opts = {})
81
114
  receiver = PageTextReceiver.new
82
115
  walk(receiver)
83
- receiver.content
116
+ runs = receiver.runs(opts)
117
+
118
+ # rectangles[:MediaBox] can never be nil, but I have no easy way to tell sorbet that atm
119
+ mediabox = rectangles[:MediaBox] || Rectangle.new(0, 0, 0, 0)
120
+
121
+ PageLayout.new(runs, mediabox).to_s
84
122
  end
85
123
  alias :to_s :text
86
124
 
125
+ def runs(opts = {})
126
+ receiver = PageTextReceiver.new
127
+ walk(receiver)
128
+ receiver.runs(opts)
129
+ end
130
+
87
131
  # processes the raw content stream for this page in sequential order and
88
132
  # passes callbacks to the receiver objects.
89
133
  #
@@ -108,6 +152,9 @@ module PDF
108
152
  # the program in the correct order and calls out to your implementation.
109
153
  #
110
154
  def walk(*receivers)
155
+ receivers = receivers.map { |receiver|
156
+ ValidatingReceiver.new(receiver)
157
+ }
111
158
  callback(receivers, :page=, [self])
112
159
  content_stream(receivers, raw_content)
113
160
  end
@@ -116,10 +163,10 @@ module PDF
116
163
  # see here unless you're a PDF nerd like me.
117
164
  #
118
165
  def raw_content
119
- contents = objects.deref(@page_object[:Contents])
166
+ contents = objects.deref_stream_or_array(@page_object[:Contents])
120
167
  [contents].flatten.compact.map { |obj|
121
- objects.deref(obj)
122
- }.map { |obj|
168
+ objects.deref_stream(obj)
169
+ }.compact.map { |obj|
123
170
  obj.unfiltered_data
124
171
  }.join(" ")
125
172
  end
@@ -139,30 +186,62 @@ module PDF
139
186
  # returns the "boxes" that define the page object.
140
187
  # values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
141
188
  #
189
+ # DEPRECATED. Recommend using Page#rectangles instead
190
+ #
142
191
  def boxes
143
- mediabox = attributes[:MediaBox]
144
- cropbox = attributes[:Cropbox] || mediabox
192
+ # In ruby 2.4+ we could use Hash#transform_values
193
+ Hash[rectangles.map{ |k,rect| [k,rect.to_a] } ]
194
+ end
195
+
196
+ # returns the "boxes" that define the page object.
197
+ # values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
198
+ #
199
+ def rectangles
200
+ # attributes[:MediaBox] can never be nil, but I have no easy way to tell sorbet that atm
201
+ mediabox = objects.deref_array_of_numbers(attributes[:MediaBox]) || []
202
+ cropbox = objects.deref_array_of_numbers(attributes[:CropBox]) || mediabox
203
+ bleedbox = objects.deref_array_of_numbers(attributes[:BleedBox]) || cropbox
204
+ trimbox = objects.deref_array_of_numbers(attributes[:TrimBox]) || cropbox
205
+ artbox = objects.deref_array_of_numbers(attributes[:ArtBox]) || cropbox
206
+
207
+ begin
208
+ mediarect = Rectangle.from_array(mediabox)
209
+ croprect = Rectangle.from_array(cropbox)
210
+ bleedrect = Rectangle.from_array(bleedbox)
211
+ trimrect = Rectangle.from_array(trimbox)
212
+ artrect = Rectangle.from_array(artbox)
213
+ rescue ArgumentError => e
214
+ raise MalformedPDFError, e.message
215
+ end
216
+
217
+ if rotate > 0
218
+ mediarect.apply_rotation(rotate)
219
+ croprect.apply_rotation(rotate)
220
+ bleedrect.apply_rotation(rotate)
221
+ trimrect.apply_rotation(rotate)
222
+ artrect.apply_rotation(rotate)
223
+ end
145
224
 
146
225
  {
147
- MediaBox: objects.deref!(mediabox),
148
- CropBox: objects.deref!(cropbox),
149
- BleedBox: objects.deref!(attributes[:BleedBox] || cropbox),
150
- TrimBox: objects.deref!(attributes[:TrimBox] || cropbox),
151
- ArtBox: objects.deref!(attributes[:ArtBox] || cropbox)
226
+ MediaBox: mediarect,
227
+ CropBox: croprect,
228
+ BleedBox: bleedrect,
229
+ TrimBox: trimrect,
230
+ ArtBox: artrect,
152
231
  }
153
232
  end
154
233
 
155
234
  private
156
235
 
157
236
  def root
158
- root ||= objects.deref(@objects.trailer[:Root])
237
+ @root ||= objects.deref_hash(@objects.trailer[:Root]) || {}
159
238
  end
160
239
 
161
240
  # Returns the resources that accompany this page. Includes
162
241
  # resources inherited from parents.
163
242
  #
164
243
  def resources
165
- @resources ||= @objects.deref(attributes[:Resources]) || {}
244
+ @resources ||= Resources.new(@objects, @objects.deref_hash(attributes[:Resources]) || {})
166
245
  end
167
246
 
168
247
  def content_stream(receivers, instructions)
@@ -198,7 +277,8 @@ module PDF
198
277
  if origin.nil?
199
278
  []
200
279
  else
201
- obj = objects.deref(origin)
280
+ obj = objects.deref_hash(origin)
281
+ PDF::Reader::Error.validate_not_nil_as_malformed(obj, "parent")
202
282
  [ select_inheritable(obj) ] + ancestors(obj[:Parent])
203
283
  end
204
284
  end