pdf-reader 2.5.0 → 2.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +42 -0
  3. data/README.md +16 -1
  4. data/Rakefile +1 -1
  5. data/examples/extract_fonts.rb +12 -7
  6. data/examples/rspec.rb +1 -0
  7. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  8. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  9. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  10. data/lib/pdf/reader/buffer.rb +90 -46
  11. data/lib/pdf/reader/cid_widths.rb +1 -0
  12. data/lib/pdf/reader/cmap.rb +65 -50
  13. data/lib/pdf/reader/encoding.rb +3 -2
  14. data/lib/pdf/reader/error.rb +19 -3
  15. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  16. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  17. data/lib/pdf/reader/filter/depredict.rb +11 -9
  18. data/lib/pdf/reader/filter/flate.rb +4 -2
  19. data/lib/pdf/reader/filter/lzw.rb +2 -0
  20. data/lib/pdf/reader/filter/null.rb +1 -1
  21. data/lib/pdf/reader/filter/run_length.rb +19 -13
  22. data/lib/pdf/reader/filter.rb +2 -1
  23. data/lib/pdf/reader/font.rb +72 -16
  24. data/lib/pdf/reader/font_descriptor.rb +19 -17
  25. data/lib/pdf/reader/form_xobject.rb +15 -5
  26. data/lib/pdf/reader/glyph_hash.rb +16 -9
  27. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  28. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  29. data/lib/pdf/reader/lzw.rb +4 -2
  30. data/lib/pdf/reader/null_security_handler.rb +1 -4
  31. data/lib/pdf/reader/object_cache.rb +1 -0
  32. data/lib/pdf/reader/object_hash.rb +252 -44
  33. data/lib/pdf/reader/object_stream.rb +1 -0
  34. data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
  35. data/lib/pdf/reader/page.rb +99 -19
  36. data/lib/pdf/reader/page_layout.rb +36 -37
  37. data/lib/pdf/reader/page_state.rb +12 -11
  38. data/lib/pdf/reader/page_text_receiver.rb +57 -10
  39. data/lib/pdf/reader/pages_strategy.rb +1 -0
  40. data/lib/pdf/reader/parser.rb +23 -12
  41. data/lib/pdf/reader/point.rb +25 -0
  42. data/lib/pdf/reader/print_receiver.rb +1 -0
  43. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  44. data/lib/pdf/reader/rectangle.rb +113 -0
  45. data/lib/pdf/reader/reference.rb +1 -0
  46. data/lib/pdf/reader/register_receiver.rb +1 -0
  47. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
  48. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  49. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  50. data/lib/pdf/reader/stream.rb +2 -1
  51. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  52. data/lib/pdf/reader/text_run.rb +14 -6
  53. data/lib/pdf/reader/token.rb +1 -0
  54. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  55. data/lib/pdf/reader/type_check.rb +52 -0
  56. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  57. data/lib/pdf/reader/validating_receiver.rb +262 -0
  58. data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
  59. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  60. data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
  61. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  62. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  63. data/lib/pdf/reader/width_calculator.rb +1 -0
  64. data/lib/pdf/reader/xref.rb +27 -4
  65. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  66. data/lib/pdf/reader.rb +46 -15
  67. data/lib/pdf-reader.rb +1 -0
  68. data/rbi/pdf-reader.rbi +1978 -0
  69. metadata +21 -10
  70. data/lib/pdf/reader/orientation_detector.rb +0 -34
  71. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -47,7 +48,11 @@ class PDF::Reader
47
48
  @trailer = @xref.trailer
48
49
  @cache = opts[:cache] || PDF::Reader::ObjectCache.new
49
50
  @sec_handler = NullSecurityHandler.new
50
- @sec_handler = build_security_handler(opts)
51
+ @sec_handler = SecurityHandlerFactory.build(
52
+ deref(trailer[:Encrypt]),
53
+ deref(trailer[:ID]),
54
+ opts[:password]
55
+ )
51
56
  end
52
57
 
53
58
  # returns the type of object a ref points to
@@ -91,6 +96,218 @@ class PDF::Reader
91
96
  end
92
97
  alias :deref :object
93
98
 
99
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
100
+ # object in the PDF and return it. Otherwise return key untouched.
101
+ #
102
+ # Guaranteed to only return an Array or nil. If the dereference results in
103
+ # any other type then a MalformedPDFError exception will raise. Useful when
104
+ # expecting an Array and no other type will do.
105
+ def deref_array(key)
106
+ obj = deref(key)
107
+
108
+ return obj if obj.nil?
109
+
110
+ obj.tap { |obj|
111
+ raise MalformedPDFError, "expected object to be an Array or nil" if !obj.is_a?(Array)
112
+ }
113
+ end
114
+
115
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
116
+ # object in the PDF and return it. Otherwise return key untouched.
117
+ #
118
+ # Guaranteed to only return an Array of Numerics or nil. If the dereference results in
119
+ # any other type then a MalformedPDFError exception will raise. Useful when
120
+ # expecting an Array and no other type will do.
121
+ #
122
+ # Some effort to cast array elements to a number is made for any non-numeric elements.
123
+ def deref_array_of_numbers(key)
124
+ arr = deref(key)
125
+
126
+ return arr if arr.nil?
127
+
128
+ raise MalformedPDFError, "expected object to be an Array" unless arr.is_a?(Array)
129
+
130
+ arr.map { |item|
131
+ if item.is_a?(Numeric)
132
+ item
133
+ elsif item.respond_to?(:to_f)
134
+ item.to_f
135
+ elsif item.respond_to?(:to_i)
136
+ item.to_i
137
+ else
138
+ raise MalformedPDFError, "expected object to be a number"
139
+ end
140
+ }
141
+ end
142
+
143
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
144
+ # object in the PDF and return it. Otherwise return key untouched.
145
+ #
146
+ # Guaranteed to only return a Hash or nil. If the dereference results in
147
+ # any other type then a MalformedPDFError exception will raise. Useful when
148
+ # expecting an Array and no other type will do.
149
+ def deref_hash(key)
150
+ obj = deref(key)
151
+
152
+ return obj if obj.nil?
153
+
154
+ obj.tap { |obj|
155
+ raise MalformedPDFError, "expected object to be a Hash or nil" if !obj.is_a?(Hash)
156
+ }
157
+ end
158
+
159
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
160
+ # object in the PDF and return it. Otherwise return key untouched.
161
+ #
162
+ # Guaranteed to only return a PDF name (Symbol) or nil. If the dereference results in
163
+ # any other type then a MalformedPDFError exception will raise. Useful when
164
+ # expecting an Array and no other type will do.
165
+ #
166
+ # Some effort to cast to a symbol is made when the reference points to a non-symbol.
167
+ def deref_name(key)
168
+ obj = deref(key)
169
+
170
+ return obj if obj.nil?
171
+
172
+ if !obj.is_a?(Symbol)
173
+ if obj.respond_to?(:to_sym)
174
+ obj = obj.to_sym
175
+ else
176
+ raise MalformedPDFError, "expected object to be a Name"
177
+ end
178
+ end
179
+
180
+ obj
181
+ end
182
+
183
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
184
+ # object in the PDF and return it. Otherwise return key untouched.
185
+ #
186
+ # Guaranteed to only return an Integer or nil. If the dereference results in
187
+ # any other type then a MalformedPDFError exception will raise. Useful when
188
+ # expecting an Array and no other type will do.
189
+ #
190
+ # Some effort to cast to an int is made when the reference points to a non-integer.
191
+ def deref_integer(key)
192
+ obj = deref(key)
193
+
194
+ return obj if obj.nil?
195
+
196
+ if !obj.is_a?(Integer)
197
+ if obj.respond_to?(:to_i)
198
+ obj = obj.to_i
199
+ else
200
+ raise MalformedPDFError, "expected object to be an Integer"
201
+ end
202
+ end
203
+
204
+ obj
205
+ end
206
+
207
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
208
+ # object in the PDF and return it. Otherwise return key untouched.
209
+ #
210
+ # Guaranteed to only return a Numeric or nil. If the dereference results in
211
+ # any other type then a MalformedPDFError exception will raise. Useful when
212
+ # expecting an Array and no other type will do.
213
+ #
214
+ # Some effort to cast to a number is made when the reference points to a non-number.
215
+ def deref_number(key)
216
+ obj = deref(key)
217
+
218
+ return obj if obj.nil?
219
+
220
+ if !obj.is_a?(Numeric)
221
+ if obj.respond_to?(:to_f)
222
+ obj = obj.to_f
223
+ elsif obj.respond_to?(:to_i)
224
+ obj.to_i
225
+ else
226
+ raise MalformedPDFError, "expected object to be a number"
227
+ end
228
+ end
229
+
230
+ obj
231
+ end
232
+
233
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
234
+ # object in the PDF and return it. Otherwise return key untouched.
235
+ #
236
+ # Guaranteed to only return a PDF::Reader::Stream or nil. If the dereference results in
237
+ # any other type then a MalformedPDFError exception will raise. Useful when
238
+ # expecting a stream and no other type will do.
239
+ def deref_stream(key)
240
+ obj = deref(key)
241
+
242
+ return obj if obj.nil?
243
+
244
+ obj.tap { |obj|
245
+ if !obj.is_a?(PDF::Reader::Stream)
246
+ raise MalformedPDFError, "expected object to be an Array or nil"
247
+ end
248
+ }
249
+ end
250
+
251
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
252
+ # object in the PDF and return it. Otherwise return key untouched.
253
+ #
254
+ # Guaranteed to only return a String or nil. If the dereference results in
255
+ # any other type then a MalformedPDFError exception will raise. Useful when
256
+ # expecting a string and no other type will do.
257
+ #
258
+ # Some effort to cast to a string is made when the reference points to a non-string.
259
+ def deref_string(key)
260
+ obj = deref(key)
261
+
262
+ return obj if obj.nil?
263
+
264
+ if !obj.is_a?(String)
265
+ if obj.respond_to?(:to_s)
266
+ obj = obj.to_s
267
+ else
268
+ raise MalformedPDFError, "expected object to be a string"
269
+ end
270
+ end
271
+
272
+ obj
273
+ end
274
+
275
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
276
+ # object in the PDF and return it. Otherwise return key untouched.
277
+ #
278
+ # Guaranteed to only return a PDF Name (symbol), Array or nil. If the dereference results in
279
+ # any other type then a MalformedPDFError exception will raise. Useful when
280
+ # expecting a Name or Array and no other type will do.
281
+ def deref_name_or_array(key)
282
+ obj = deref(key)
283
+
284
+ return obj if obj.nil?
285
+
286
+ obj.tap { |obj|
287
+ if !obj.is_a?(Symbol) && !obj.is_a?(Array)
288
+ raise MalformedPDFError, "expected object to be an Array or Name"
289
+ end
290
+ }
291
+ end
292
+
293
+ # If key is a PDF::Reader::Reference object, lookup the corresponding
294
+ # object in the PDF and return it. Otherwise return key untouched.
295
+ #
296
+ # Guaranteed to only return a PDF::Reader::Stream, Array or nil. If the dereference results in
297
+ # any other type then a MalformedPDFError exception will raise. Useful when
298
+ # expecting a stream or Array and no other type will do.
299
+ def deref_stream_or_array(key)
300
+ obj = deref(key)
301
+
302
+ return obj if obj.nil?
303
+
304
+ obj.tap { |obj|
305
+ if !obj.is_a?(PDF::Reader::Stream) && !obj.is_a?(Array)
306
+ raise MalformedPDFError, "expected object to be an Array or Stream"
307
+ end
308
+ }
309
+ end
310
+
94
311
  # Recursively dereferences the object refered to be +key+. If +key+ is not
95
312
  # a PDF::Reader::Reference, the key is returned unchanged.
96
313
  #
@@ -98,6 +315,22 @@ class PDF::Reader
98
315
  deref_internal!(key, {})
99
316
  end
100
317
 
318
+ def deref_array!(key)
319
+ deref!(key).tap { |obj|
320
+ if !obj.nil? && !obj.is_a?(Array)
321
+ raise MalformedPDFError, "expected object (#{obj.inspect}) to be an Array or nil"
322
+ end
323
+ }
324
+ end
325
+
326
+ def deref_hash!(key)
327
+ deref!(key).tap { |obj|
328
+ if !obj.nil? && !obj.is_a?(Hash)
329
+ raise MalformedPDFError, "expected object (#{obj.inspect}) to be a Hash or nil"
330
+ end
331
+ }
332
+ end
333
+
101
334
  # Access an object from the PDF. key can be an int or a PDF::Reader::Reference
102
335
  # object.
103
336
  #
@@ -232,7 +465,10 @@ class PDF::Reader
232
465
  #
233
466
  def page_references
234
467
  root = fetch(trailer[:Root])
235
- @page_references ||= get_page_objects(root[:Pages]).flatten
468
+ @page_references ||= begin
469
+ pages_root = deref_hash(root[:Pages]) || {}
470
+ get_page_objects(pages_root)
471
+ end
236
472
  end
237
473
 
238
474
  def encrypted?
@@ -298,36 +534,6 @@ class PDF::Reader
298
534
  end
299
535
  end
300
536
 
301
- def build_security_handler(opts = {})
302
- encrypt = deref(trailer[:Encrypt])
303
- if NullSecurityHandler.supports?(encrypt)
304
- NullSecurityHandler.new
305
- elsif StandardSecurityHandler.supports?(encrypt)
306
- encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
307
- StandardSecurityHandler.new(
308
- key_length: (encrypt[:Length] || 40).to_i,
309
- revision: encrypt[:R],
310
- owner_key: encrypt[:O],
311
- user_key: encrypt[:U],
312
- permissions: encrypt[:P].to_i,
313
- encrypted_metadata: encmeta,
314
- file_id: (deref(trailer[:ID]) || []).first,
315
- password: opts[:password],
316
- cfm: encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
317
- )
318
- elsif StandardSecurityHandlerV5.supports?(encrypt)
319
- StandardSecurityHandlerV5.new(
320
- O: encrypt[:O],
321
- U: encrypt[:U],
322
- OE: encrypt[:OE],
323
- UE: encrypt[:UE],
324
- password: opts[:password]
325
- )
326
- else
327
- UnimplementedSecurityHandler.new
328
- end
329
- end
330
-
331
537
  def decrypt(ref, obj)
332
538
  case obj
333
539
  when PDF::Reader::Stream then
@@ -336,8 +542,10 @@ class PDF::Reader
336
542
  obj.data = sec_handler.decrypt(obj.data, ref) unless obj.hash[:Type] == :XRef
337
543
  obj
338
544
  when Hash then
339
- arr = obj.map { |key,val| [key, decrypt(ref, val)] }.flatten(1)
340
- Hash[*arr]
545
+ arr = obj.map { |key,val| [key, decrypt(ref, val)] }
546
+ arr.each_with_object({}) { |(k,v), accum|
547
+ accum[k] = v
548
+ }
341
549
  when Array then
342
550
  obj.collect { |item| decrypt(ref, item) }
343
551
  when String
@@ -359,19 +567,19 @@ class PDF::Reader
359
567
  @object_stream ||= {}
360
568
  end
361
569
 
362
- # returns a nested array of object references for all pages in this object store.
570
+ # returns an array of object references for all pages in this object store. The ordering of
571
+ # the Array is significant and matches the page ordering of the document
363
572
  #
364
- def get_page_objects(ref)
365
- obj = deref(ref)
366
-
367
- unless obj.kind_of?(::Hash)
368
- raise MalformedPDFError, "Dereferenced page object must be a dict"
369
- end
370
-
573
+ def get_page_objects(obj)
371
574
  if obj[:Type] == :Page
372
- ref
575
+ [obj]
373
576
  elsif obj[:Kids]
374
- deref(obj[:Kids]).map { |kid| get_page_objects(kid) }
577
+ kids = deref_array(obj[:Kids]) || []
578
+ kids.map { |kid|
579
+ get_page_objects(deref_hash(kid) || {})
580
+ }.flatten
581
+ else
582
+ raise MalformedPDFError, "Expected Page or Pages object"
375
583
  end
376
584
  end
377
585
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -1,4 +1,6 @@
1
+ # typed: true
1
2
  # coding: utf-8
3
+ # frozen_string_literal: true
2
4
 
3
5
  class PDF::Reader
4
6
  # remove duplicates from a collection of TextRun objects. This can be helpful when a PDF
@@ -38,7 +40,8 @@ class PDF::Reader
38
40
 
39
41
  def self.detect_intersection(sweep_line_status, event_point)
40
42
  sweep_line_status.each do |open_text_run|
41
- if event_point.x >= open_text_run.x &&
43
+ if open_text_run.text == event_point.run.text &&
44
+ event_point.x >= open_text_run.x &&
42
45
  event_point.x <= open_text_run.endx &&
43
46
  open_text_run.intersection_area_percent(event_point.run) >= OVERLAPPING_THRESHOLD
44
47
  return true
@@ -51,10 +54,14 @@ class PDF::Reader
51
54
  # Utility class used to avoid modifying the underlying TextRun objects while we're
52
55
  # looking for duplicates
53
56
  class EventPoint
54
- attr_reader :x, :run
55
57
 
56
- def initialize x, run
57
- @x, @run = x, run
58
+ attr_reader :x
59
+
60
+ attr_reader :run
61
+
62
+ def initialize(x, run)
63
+ @x = x
64
+ @run = run
58
65
  end
59
66
 
60
67
  def start?
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  module PDF
@@ -13,7 +14,7 @@ module PDF
13
14
  # objects accessor to help walk the page dictionary in any useful way.
14
15
  #
15
16
  class Page
16
- include ResourceMethods
17
+ extend Forwardable
17
18
 
18
19
  # lowlevel hash-like access to all objects in the underlying PDF
19
20
  attr_reader :objects
@@ -26,6 +27,15 @@ module PDF
26
27
  # operations
27
28
  attr_reader :cache
28
29
 
30
+ def_delegators :resources, :color_spaces
31
+ def_delegators :resources, :fonts
32
+ def_delegators :resources, :graphic_states
33
+ def_delegators :resources, :patterns
34
+ def_delegators :resources, :procedure_sets
35
+ def_delegators :resources, :properties
36
+ def_delegators :resources, :shadings
37
+ def_delegators :resources, :xobjects
38
+
29
39
  # creates a new page wrapper.
30
40
  #
31
41
  # * objects - an ObjectHash instance that wraps a PDF file
@@ -33,7 +43,7 @@ module PDF
33
43
  #
34
44
  def initialize(objects, pagenum, options = {})
35
45
  @objects, @pagenum = objects, pagenum
36
- @page_object = objects.deref(objects.page_references[pagenum - 1])
46
+ @page_object = objects.deref_hash(objects.page_references[pagenum - 1])
37
47
  @cache = options[:cache] || {}
38
48
 
39
49
  unless @page_object.is_a?(::Hash)
@@ -59,7 +69,7 @@ module PDF
59
69
  def attributes
60
70
  @attributes ||= {}.tap { |hash|
61
71
  page_with_ancestors.reverse.each do |obj|
62
- hash.merge!(@objects.deref(obj))
72
+ hash.merge!(@objects.deref_hash(obj) || {})
63
73
  end
64
74
  }
65
75
  # This shouldn't be necesary, but some non compliant PDFs leave MediaBox
@@ -68,22 +78,56 @@ module PDF
68
78
  @attributes
69
79
  end
70
80
 
81
+ def height
82
+ rect = Rectangle.new(*attributes[:MediaBox])
83
+ rect.apply_rotation(rotate) if rotate > 0
84
+ rect.height
85
+ end
86
+
87
+ def width
88
+ rect = Rectangle.new(*attributes[:MediaBox])
89
+ rect.apply_rotation(rotate) if rotate > 0
90
+ rect.width
91
+ end
92
+
93
+ def origin
94
+ rect = Rectangle.new(*attributes[:MediaBox])
95
+ rect.apply_rotation(rotate) if rotate > 0
96
+
97
+ rect.bottom_left
98
+ end
99
+
71
100
  # Convenience method to identify the page's orientation.
72
101
  #
73
102
  def orientation
74
- OrientationDetector.new(attributes).orientation
103
+ if height > width
104
+ "portrait"
105
+ else
106
+ "landscape"
107
+ end
75
108
  end
76
109
 
77
110
  # returns the plain text content of this page encoded as UTF-8. Any
78
111
  # characters that can't be translated will be returned as a ▯
79
112
  #
80
- def text
113
+ def text(opts = {})
81
114
  receiver = PageTextReceiver.new
82
115
  walk(receiver)
83
- receiver.content
116
+ runs = receiver.runs(opts)
117
+
118
+ # rectangles[:MediaBox] can never be nil, but I have no easy way to tell sorbet that atm
119
+ mediabox = rectangles[:MediaBox] || Rectangle.new(0, 0, 0, 0)
120
+
121
+ PageLayout.new(runs, mediabox).to_s
84
122
  end
85
123
  alias :to_s :text
86
124
 
125
+ def runs(opts = {})
126
+ receiver = PageTextReceiver.new
127
+ walk(receiver)
128
+ receiver.runs(opts)
129
+ end
130
+
87
131
  # processes the raw content stream for this page in sequential order and
88
132
  # passes callbacks to the receiver objects.
89
133
  #
@@ -108,6 +152,9 @@ module PDF
108
152
  # the program in the correct order and calls out to your implementation.
109
153
  #
110
154
  def walk(*receivers)
155
+ receivers = receivers.map { |receiver|
156
+ ValidatingReceiver.new(receiver)
157
+ }
111
158
  callback(receivers, :page=, [self])
112
159
  content_stream(receivers, raw_content)
113
160
  end
@@ -116,10 +163,10 @@ module PDF
116
163
  # see here unless you're a PDF nerd like me.
117
164
  #
118
165
  def raw_content
119
- contents = objects.deref(@page_object[:Contents])
166
+ contents = objects.deref_stream_or_array(@page_object[:Contents])
120
167
  [contents].flatten.compact.map { |obj|
121
- objects.deref(obj)
122
- }.map { |obj|
168
+ objects.deref_stream(obj)
169
+ }.compact.map { |obj|
123
170
  obj.unfiltered_data
124
171
  }.join(" ")
125
172
  end
@@ -139,30 +186,62 @@ module PDF
139
186
  # returns the "boxes" that define the page object.
140
187
  # values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
141
188
  #
189
+ # DEPRECATED. Recommend using Page#rectangles instead
190
+ #
142
191
  def boxes
143
- mediabox = attributes[:MediaBox]
144
- cropbox = attributes[:Cropbox] || mediabox
192
+ # In ruby 2.4+ we could use Hash#transform_values
193
+ Hash[rectangles.map{ |k,rect| [k,rect.to_a] } ]
194
+ end
195
+
196
+ # returns the "boxes" that define the page object.
197
+ # values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
198
+ #
199
+ def rectangles
200
+ # attributes[:MediaBox] can never be nil, but I have no easy way to tell sorbet that atm
201
+ mediabox = objects.deref_array_of_numbers(attributes[:MediaBox]) || []
202
+ cropbox = objects.deref_array_of_numbers(attributes[:CropBox]) || mediabox
203
+ bleedbox = objects.deref_array_of_numbers(attributes[:BleedBox]) || cropbox
204
+ trimbox = objects.deref_array_of_numbers(attributes[:TrimBox]) || cropbox
205
+ artbox = objects.deref_array_of_numbers(attributes[:ArtBox]) || cropbox
206
+
207
+ begin
208
+ mediarect = Rectangle.from_array(mediabox)
209
+ croprect = Rectangle.from_array(cropbox)
210
+ bleedrect = Rectangle.from_array(bleedbox)
211
+ trimrect = Rectangle.from_array(trimbox)
212
+ artrect = Rectangle.from_array(artbox)
213
+ rescue ArgumentError => e
214
+ raise MalformedPDFError, e.message
215
+ end
216
+
217
+ if rotate > 0
218
+ mediarect.apply_rotation(rotate)
219
+ croprect.apply_rotation(rotate)
220
+ bleedrect.apply_rotation(rotate)
221
+ trimrect.apply_rotation(rotate)
222
+ artrect.apply_rotation(rotate)
223
+ end
145
224
 
146
225
  {
147
- MediaBox: objects.deref!(mediabox),
148
- CropBox: objects.deref!(cropbox),
149
- BleedBox: objects.deref!(attributes[:BleedBox] || cropbox),
150
- TrimBox: objects.deref!(attributes[:TrimBox] || cropbox),
151
- ArtBox: objects.deref!(attributes[:ArtBox] || cropbox)
226
+ MediaBox: mediarect,
227
+ CropBox: croprect,
228
+ BleedBox: bleedrect,
229
+ TrimBox: trimrect,
230
+ ArtBox: artrect,
152
231
  }
153
232
  end
154
233
 
155
234
  private
156
235
 
157
236
  def root
158
- root ||= objects.deref(@objects.trailer[:Root])
237
+ @root ||= objects.deref_hash(@objects.trailer[:Root]) || {}
159
238
  end
160
239
 
161
240
  # Returns the resources that accompany this page. Includes
162
241
  # resources inherited from parents.
163
242
  #
164
243
  def resources
165
- @resources ||= @objects.deref(attributes[:Resources]) || {}
244
+ @resources ||= Resources.new(@objects, @objects.deref_hash(attributes[:Resources]) || {})
166
245
  end
167
246
 
168
247
  def content_stream(receivers, instructions)
@@ -198,7 +277,8 @@ module PDF
198
277
  if origin.nil?
199
278
  []
200
279
  else
201
- obj = objects.deref(origin)
280
+ obj = objects.deref_hash(origin)
281
+ PDF::Reader::Error.validate_not_nil_as_malformed(obj, "parent")
202
282
  [ select_inheritable(obj) ] + ancestors(obj[:Parent])
203
283
  end
204
284
  end