pdf-reader 2.8.0 → 2.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +9 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/buffer.rb +36 -34
- data/lib/pdf/reader/cmap.rb +64 -51
- data/lib/pdf/reader/error.rb +8 -0
- data/lib/pdf/reader/filter/ascii85.rb +1 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +1 -1
- data/lib/pdf/reader/filter/depredict.rb +1 -1
- data/lib/pdf/reader/filter/flate.rb +3 -3
- data/lib/pdf/reader/filter/lzw.rb +1 -1
- data/lib/pdf/reader/filter/null.rb +1 -2
- data/lib/pdf/reader/filter/run_length.rb +1 -1
- data/lib/pdf/reader/filter.rb +10 -11
- data/lib/pdf/reader/font.rb +29 -17
- data/lib/pdf/reader/font_descriptor.rb +18 -17
- data/lib/pdf/reader/form_xobject.rb +14 -5
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/null_security_handler.rb +0 -4
- data/lib/pdf/reader/object_hash.rb +247 -42
- data/lib/pdf/reader/page.rb +38 -20
- data/lib/pdf/reader/page_state.rb +1 -1
- data/lib/pdf/reader/page_text_receiver.rb +4 -1
- data/lib/pdf/reader/parser.rb +20 -8
- data/lib/pdf/reader/point.rb +1 -1
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +2 -2
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +15 -13
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -95
- data/lib/pdf/reader/stream.rb +2 -2
- data/lib/pdf/reader/type_check.rb +52 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +1 -1
- data/lib/pdf/reader/xref.rb +20 -3
- data/lib/pdf/reader.rb +17 -9
- data/rbi/pdf-reader.rbi +388 -173
- metadata +15 -9
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -92
@@ -15,22 +15,23 @@ class PDF::Reader
|
|
15
15
|
:x_height, :font_flags
|
16
16
|
|
17
17
|
def initialize(ohash, fd_hash)
|
18
|
-
|
19
|
-
@
|
20
|
-
@
|
21
|
-
@
|
22
|
-
@
|
23
|
-
@
|
24
|
-
@
|
25
|
-
@
|
26
|
-
@
|
27
|
-
@
|
28
|
-
@
|
29
|
-
@
|
30
|
-
@
|
31
|
-
@
|
32
|
-
@
|
33
|
-
@
|
18
|
+
# TODO change these to typed derefs
|
19
|
+
@ascent = ohash.deref_number(fd_hash[:Ascent]) || 0
|
20
|
+
@descent = ohash.deref_number(fd_hash[:Descent]) || 0
|
21
|
+
@missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0
|
22
|
+
@font_bounding_box = ohash.deref_array_of_numbers(fd_hash[:FontBBox]) || [0,0,0,0]
|
23
|
+
@avg_width = ohash.deref_number(fd_hash[:AvgWidth]) || 0
|
24
|
+
@cap_height = ohash.deref_number(fd_hash[:CapHeight]) || 0
|
25
|
+
@font_flags = ohash.deref_integer(fd_hash[:Flags]) || 0
|
26
|
+
@italic_angle = ohash.deref_number(fd_hash[:ItalicAngle])
|
27
|
+
@font_name = ohash.deref_name(fd_hash[:FontName]).to_s
|
28
|
+
@leading = ohash.deref_number(fd_hash[:Leading]) || 0
|
29
|
+
@max_width = ohash.deref_number(fd_hash[:MaxWidth]) || 0
|
30
|
+
@stem_v = ohash.deref_number(fd_hash[:StemV])
|
31
|
+
@x_height = ohash.deref_number(fd_hash[:XHeight])
|
32
|
+
@font_stretch = ohash.deref_name(fd_hash[:FontStretch]) || :Normal
|
33
|
+
@font_weight = ohash.deref_number(fd_hash[:FontWeight]) || 400
|
34
|
+
@font_family = ohash.deref_string(fd_hash[:FontFamily])
|
34
35
|
|
35
36
|
# A FontDescriptor may have an embedded font program in FontFile
|
36
37
|
# (Type 1 Font Program), FontFile2 (TrueType font program), or
|
@@ -40,7 +41,7 @@ class PDF::Reader
|
|
40
41
|
# 2) CIDFontType0C: Type 0 Font Program in Compact Font Format
|
41
42
|
# 3) OpenType: OpenType Font Program
|
42
43
|
# see Section 9.9, PDF 32000-1:2008, pp 288-292
|
43
|
-
@font_program_stream = ohash.
|
44
|
+
@font_program_stream = ohash.deref_stream(fd_hash[:FontFile2])
|
44
45
|
#TODO handle FontFile and FontFile3
|
45
46
|
|
46
47
|
@is_ttf = true if @font_program_stream
|
@@ -15,15 +15,24 @@ module PDF
|
|
15
15
|
# This behaves and looks much like a limited PDF::Reader::Page class.
|
16
16
|
#
|
17
17
|
class FormXObject
|
18
|
-
|
18
|
+
extend Forwardable
|
19
19
|
|
20
20
|
attr_reader :xobject
|
21
21
|
|
22
|
+
def_delegators :resources, :color_spaces
|
23
|
+
def_delegators :resources, :fonts
|
24
|
+
def_delegators :resources, :graphic_states
|
25
|
+
def_delegators :resources, :patterns
|
26
|
+
def_delegators :resources, :procedure_sets
|
27
|
+
def_delegators :resources, :properties
|
28
|
+
def_delegators :resources, :shadings
|
29
|
+
def_delegators :resources, :xobjects
|
30
|
+
|
22
31
|
def initialize(page, xobject, options = {})
|
23
32
|
@page = page
|
24
33
|
@objects = page.objects
|
25
34
|
@cache = options[:cache] || {}
|
26
|
-
@xobject = @objects.
|
35
|
+
@xobject = @objects.deref_stream(xobject)
|
27
36
|
end
|
28
37
|
|
29
38
|
# return a hash of fonts used on this form.
|
@@ -34,9 +43,9 @@ module PDF
|
|
34
43
|
# to most available metrics for each font.
|
35
44
|
#
|
36
45
|
def font_objects
|
37
|
-
raw_fonts = @objects.
|
46
|
+
raw_fonts = @objects.deref_hash(fonts)
|
38
47
|
::Hash[raw_fonts.map { |label, font|
|
39
|
-
[label, PDF::Reader::Font.new(@objects, @objects.
|
48
|
+
[label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font))]
|
40
49
|
}]
|
41
50
|
end
|
42
51
|
|
@@ -61,7 +70,7 @@ module PDF
|
|
61
70
|
# Returns the resources that accompany this form.
|
62
71
|
#
|
63
72
|
def resources
|
64
|
-
@resources ||= @objects.
|
73
|
+
@resources ||= Resources.new(@objects, @objects.deref_hash(@xobject.hash[:Resources]) || {})
|
65
74
|
end
|
66
75
|
|
67
76
|
def callback(receivers, name, params=[])
|
@@ -0,0 +1,138 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
require 'digest/md5'
|
6
|
+
require 'rc4'
|
7
|
+
|
8
|
+
class PDF::Reader
|
9
|
+
|
10
|
+
# Processes the Encrypt dict from an encrypted PDF and a user provided
|
11
|
+
# password and returns a key that can decrypt the file.
|
12
|
+
#
|
13
|
+
# This can generate a decryption key compatible with the following standard encryption algorithms:
|
14
|
+
#
|
15
|
+
# * Version 5 (AESV3)
|
16
|
+
#
|
17
|
+
class KeyBuilderV5
|
18
|
+
|
19
|
+
def initialize(opts = {})
|
20
|
+
@key_length = 256
|
21
|
+
|
22
|
+
# hash(32B) + validation salt(8B) + key salt(8B)
|
23
|
+
@owner_key = opts[:owner_key] || ""
|
24
|
+
|
25
|
+
# hash(32B) + validation salt(8B) + key salt(8B)
|
26
|
+
@user_key = opts[:user_key] || ""
|
27
|
+
|
28
|
+
# decryption key, encrypted w/ owner password
|
29
|
+
@owner_encryption_key = opts[:owner_encryption_key] || ""
|
30
|
+
|
31
|
+
# decryption key, encrypted w/ user password
|
32
|
+
@user_encryption_key = opts[:user_encryption_key] || ""
|
33
|
+
end
|
34
|
+
|
35
|
+
# Takes a string containing a user provided password.
|
36
|
+
#
|
37
|
+
# If the password matches the file, then a string containing a key suitable for
|
38
|
+
# decrypting the file will be returned. If the password doesn't match the file,
|
39
|
+
# and exception will be raised.
|
40
|
+
#
|
41
|
+
def key(pass)
|
42
|
+
pass = pass.byteslice(0...127).to_s # UTF-8 encoded password. first 127 bytes
|
43
|
+
|
44
|
+
encrypt_key = auth_owner_pass(pass)
|
45
|
+
encrypt_key ||= auth_user_pass(pass)
|
46
|
+
encrypt_key ||= auth_owner_pass_r6(pass)
|
47
|
+
encrypt_key ||= auth_user_pass_r6(pass)
|
48
|
+
|
49
|
+
raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
|
50
|
+
encrypt_key
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
# Algorithm 3.2a - Computing an encryption key
|
56
|
+
#
|
57
|
+
# Defined in PDF 1.7 Extension Level 3
|
58
|
+
#
|
59
|
+
# if the string is a valid user/owner password, this will return the decryption key
|
60
|
+
#
|
61
|
+
def auth_owner_pass(password)
|
62
|
+
if Digest::SHA256.digest(password + @owner_key[32..39] + @user_key) == @owner_key[0..31]
|
63
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
64
|
+
cipher.decrypt
|
65
|
+
cipher.key = Digest::SHA256.digest(password + @owner_key[40..-1] + @user_key)
|
66
|
+
cipher.iv = "\x00" * 16
|
67
|
+
cipher.padding = 0
|
68
|
+
cipher.update(@owner_encryption_key) + cipher.final
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def auth_user_pass(password)
|
73
|
+
if Digest::SHA256.digest(password + @user_key[32..39]) == @user_key[0..31]
|
74
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
75
|
+
cipher.decrypt
|
76
|
+
cipher.key = Digest::SHA256.digest(password + @user_key[40..-1])
|
77
|
+
cipher.iv = "\x00" * 16
|
78
|
+
cipher.padding = 0
|
79
|
+
cipher.update(@user_encryption_key) + cipher.final
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def auth_owner_pass_r6(password)
|
84
|
+
if r6_digest(password, @owner_key[32..39].to_s, @user_key[0,48].to_s) == @owner_key[0..31]
|
85
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
86
|
+
cipher.decrypt
|
87
|
+
cipher.key = r6_digest(password, @owner_key[40,8].to_s, @user_key[0, 48].to_s)
|
88
|
+
cipher.iv = "\x00" * 16
|
89
|
+
cipher.padding = 0
|
90
|
+
cipher.update(@owner_encryption_key) + cipher.final
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def auth_user_pass_r6(password)
|
95
|
+
if r6_digest(password, @user_key[32..39].to_s) == @user_key[0..31]
|
96
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
97
|
+
cipher.decrypt
|
98
|
+
cipher.key = r6_digest(password, @user_key[40,8].to_s)
|
99
|
+
cipher.iv = "\x00" * 16
|
100
|
+
cipher.padding = 0
|
101
|
+
cipher.update(@user_encryption_key) + cipher.final
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# PDF 2.0 spec, 7.6.4.3.4
|
106
|
+
# Algorithm 2.B: Computing a hash (revision 6 and later)
|
107
|
+
def r6_digest(password, salt, user_key = '')
|
108
|
+
k = Digest::SHA256.digest(password + salt + user_key)
|
109
|
+
e = ''
|
110
|
+
|
111
|
+
i = 0
|
112
|
+
while i < 64 or e.getbyte(-1).to_i > i - 32
|
113
|
+
k1 = (password + k + user_key) * 64
|
114
|
+
|
115
|
+
aes = OpenSSL::Cipher.new("aes-128-cbc").encrypt
|
116
|
+
aes.key = k[0, 16].to_s
|
117
|
+
aes.iv = k[16, 16].to_s
|
118
|
+
aes.padding = 0
|
119
|
+
e = String.new(aes.update(k1))
|
120
|
+
k = case unpack_128bit_bigendian_int(e) % 3
|
121
|
+
when 0 then Digest::SHA256.digest(e)
|
122
|
+
when 1 then Digest::SHA384.digest(e)
|
123
|
+
when 2 then Digest::SHA512.digest(e)
|
124
|
+
end
|
125
|
+
i = i + 1
|
126
|
+
end
|
127
|
+
|
128
|
+
k[0, 32].to_s
|
129
|
+
end
|
130
|
+
|
131
|
+
def unpack_128bit_bigendian_int(str)
|
132
|
+
ints = str[0,16].to_s.unpack("N*")
|
133
|
+
(ints[0].to_i << 96) + (ints[1].to_i << 64) + (ints[2].to_i << 32) + ints[3].to_i
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
@@ -48,7 +48,11 @@ class PDF::Reader
|
|
48
48
|
@trailer = @xref.trailer
|
49
49
|
@cache = opts[:cache] || PDF::Reader::ObjectCache.new
|
50
50
|
@sec_handler = NullSecurityHandler.new
|
51
|
-
@sec_handler =
|
51
|
+
@sec_handler = SecurityHandlerFactory.build(
|
52
|
+
deref(trailer[:Encrypt]),
|
53
|
+
deref(trailer[:ID]),
|
54
|
+
opts[:password]
|
55
|
+
)
|
52
56
|
end
|
53
57
|
|
54
58
|
# returns the type of object a ref points to
|
@@ -92,6 +96,218 @@ class PDF::Reader
|
|
92
96
|
end
|
93
97
|
alias :deref :object
|
94
98
|
|
99
|
+
# If key is a PDF::Reader::Reference object, lookup the corresponding
|
100
|
+
# object in the PDF and return it. Otherwise return key untouched.
|
101
|
+
#
|
102
|
+
# Guaranteed to only return an Array or nil. If the dereference results in
|
103
|
+
# any other type then a MalformedPDFError exception will raise. Useful when
|
104
|
+
# expecting an Array and no other type will do.
|
105
|
+
def deref_array(key)
|
106
|
+
obj = deref(key)
|
107
|
+
|
108
|
+
return obj if obj.nil?
|
109
|
+
|
110
|
+
obj.tap { |obj|
|
111
|
+
raise MalformedPDFError, "expected object to be an Array or nil" if !obj.is_a?(Array)
|
112
|
+
}
|
113
|
+
end
|
114
|
+
|
115
|
+
# If key is a PDF::Reader::Reference object, lookup the corresponding
|
116
|
+
# object in the PDF and return it. Otherwise return key untouched.
|
117
|
+
#
|
118
|
+
# Guaranteed to only return an Array of Numerics or nil. If the dereference results in
|
119
|
+
# any other type then a MalformedPDFError exception will raise. Useful when
|
120
|
+
# expecting an Array and no other type will do.
|
121
|
+
#
|
122
|
+
# Some effort to cast array elements to a number is made for any non-numeric elements.
|
123
|
+
def deref_array_of_numbers(key)
|
124
|
+
arr = deref(key)
|
125
|
+
|
126
|
+
return arr if arr.nil?
|
127
|
+
|
128
|
+
raise MalformedPDFError, "expected object to be an Array" unless arr.is_a?(Array)
|
129
|
+
|
130
|
+
arr.map { |item|
|
131
|
+
if item.is_a?(Numeric)
|
132
|
+
item
|
133
|
+
elsif item.respond_to?(:to_f)
|
134
|
+
item.to_f
|
135
|
+
elsif item.respond_to?(:to_i)
|
136
|
+
item.to_i
|
137
|
+
else
|
138
|
+
raise MalformedPDFError, "expected object to be a number"
|
139
|
+
end
|
140
|
+
}
|
141
|
+
end
|
142
|
+
|
143
|
+
# If key is a PDF::Reader::Reference object, lookup the corresponding
|
144
|
+
# object in the PDF and return it. Otherwise return key untouched.
|
145
|
+
#
|
146
|
+
# Guaranteed to only return a Hash or nil. If the dereference results in
|
147
|
+
# any other type then a MalformedPDFError exception will raise. Useful when
|
148
|
+
# expecting an Array and no other type will do.
|
149
|
+
def deref_hash(key)
|
150
|
+
obj = deref(key)
|
151
|
+
|
152
|
+
return obj if obj.nil?
|
153
|
+
|
154
|
+
obj.tap { |obj|
|
155
|
+
raise MalformedPDFError, "expected object to be a Hash or nil" if !obj.is_a?(Hash)
|
156
|
+
}
|
157
|
+
end
|
158
|
+
|
159
|
+
# If key is a PDF::Reader::Reference object, lookup the corresponding
|
160
|
+
# object in the PDF and return it. Otherwise return key untouched.
|
161
|
+
#
|
162
|
+
# Guaranteed to only return a PDF name (Symbol) or nil. If the dereference results in
|
163
|
+
# any other type then a MalformedPDFError exception will raise. Useful when
|
164
|
+
# expecting an Array and no other type will do.
|
165
|
+
#
|
166
|
+
# Some effort to cast to a symbol is made when the reference points to a non-symbol.
|
167
|
+
def deref_name(key)
|
168
|
+
obj = deref(key)
|
169
|
+
|
170
|
+
return obj if obj.nil?
|
171
|
+
|
172
|
+
if !obj.is_a?(Symbol)
|
173
|
+
if obj.respond_to?(:to_sym)
|
174
|
+
obj = obj.to_sym
|
175
|
+
else
|
176
|
+
raise MalformedPDFError, "expected object to be a Name"
|
177
|
+
end
|
178
|
+
end
|
179
|
+
|
180
|
+
obj
|
181
|
+
end
|
182
|
+
|
183
|
+
# If key is a PDF::Reader::Reference object, lookup the corresponding
|
184
|
+
# object in the PDF and return it. Otherwise return key untouched.
|
185
|
+
#
|
186
|
+
# Guaranteed to only return an Integer or nil. If the dereference results in
|
187
|
+
# any other type then a MalformedPDFError exception will raise. Useful when
|
188
|
+
# expecting an Array and no other type will do.
|
189
|
+
#
|
190
|
+
# Some effort to cast to an int is made when the reference points to a non-integer.
|
191
|
+
def deref_integer(key)
|
192
|
+
obj = deref(key)
|
193
|
+
|
194
|
+
return obj if obj.nil?
|
195
|
+
|
196
|
+
if !obj.is_a?(Integer)
|
197
|
+
if obj.respond_to?(:to_i)
|
198
|
+
obj = obj.to_i
|
199
|
+
else
|
200
|
+
raise MalformedPDFError, "expected object to be an Integer"
|
201
|
+
end
|
202
|
+
end
|
203
|
+
|
204
|
+
obj
|
205
|
+
end
|
206
|
+
|
207
|
+
# If key is a PDF::Reader::Reference object, lookup the corresponding
|
208
|
+
# object in the PDF and return it. Otherwise return key untouched.
|
209
|
+
#
|
210
|
+
# Guaranteed to only return a Numeric or nil. If the dereference results in
|
211
|
+
# any other type then a MalformedPDFError exception will raise. Useful when
|
212
|
+
# expecting an Array and no other type will do.
|
213
|
+
#
|
214
|
+
# Some effort to cast to a number is made when the reference points to a non-number.
|
215
|
+
def deref_number(key)
|
216
|
+
obj = deref(key)
|
217
|
+
|
218
|
+
return obj if obj.nil?
|
219
|
+
|
220
|
+
if !obj.is_a?(Numeric)
|
221
|
+
if obj.respond_to?(:to_f)
|
222
|
+
obj = obj.to_f
|
223
|
+
elsif obj.respond_to?(:to_i)
|
224
|
+
obj.to_i
|
225
|
+
else
|
226
|
+
raise MalformedPDFError, "expected object to be a number"
|
227
|
+
end
|
228
|
+
end
|
229
|
+
|
230
|
+
obj
|
231
|
+
end
|
232
|
+
|
233
|
+
# If key is a PDF::Reader::Reference object, lookup the corresponding
|
234
|
+
# object in the PDF and return it. Otherwise return key untouched.
|
235
|
+
#
|
236
|
+
# Guaranteed to only return a PDF::Reader::Stream or nil. If the dereference results in
|
237
|
+
# any other type then a MalformedPDFError exception will raise. Useful when
|
238
|
+
# expecting a stream and no other type will do.
|
239
|
+
def deref_stream(key)
|
240
|
+
obj = deref(key)
|
241
|
+
|
242
|
+
return obj if obj.nil?
|
243
|
+
|
244
|
+
obj.tap { |obj|
|
245
|
+
if !obj.is_a?(PDF::Reader::Stream)
|
246
|
+
raise MalformedPDFError, "expected object to be an Array or nil"
|
247
|
+
end
|
248
|
+
}
|
249
|
+
end
|
250
|
+
|
251
|
+
# If key is a PDF::Reader::Reference object, lookup the corresponding
|
252
|
+
# object in the PDF and return it. Otherwise return key untouched.
|
253
|
+
#
|
254
|
+
# Guaranteed to only return a String or nil. If the dereference results in
|
255
|
+
# any other type then a MalformedPDFError exception will raise. Useful when
|
256
|
+
# expecting a string and no other type will do.
|
257
|
+
#
|
258
|
+
# Some effort to cast to a string is made when the reference points to a non-string.
|
259
|
+
def deref_string(key)
|
260
|
+
obj = deref(key)
|
261
|
+
|
262
|
+
return obj if obj.nil?
|
263
|
+
|
264
|
+
if !obj.is_a?(String)
|
265
|
+
if obj.respond_to?(:to_s)
|
266
|
+
obj = obj.to_s
|
267
|
+
else
|
268
|
+
raise MalformedPDFError, "expected object to be a string"
|
269
|
+
end
|
270
|
+
end
|
271
|
+
|
272
|
+
obj
|
273
|
+
end
|
274
|
+
|
275
|
+
# If key is a PDF::Reader::Reference object, lookup the corresponding
|
276
|
+
# object in the PDF and return it. Otherwise return key untouched.
|
277
|
+
#
|
278
|
+
# Guaranteed to only return a PDF Name (symbol), Array or nil. If the dereference results in
|
279
|
+
# any other type then a MalformedPDFError exception will raise. Useful when
|
280
|
+
# expecting a Name or Array and no other type will do.
|
281
|
+
def deref_name_or_array(key)
|
282
|
+
obj = deref(key)
|
283
|
+
|
284
|
+
return obj if obj.nil?
|
285
|
+
|
286
|
+
obj.tap { |obj|
|
287
|
+
if !obj.is_a?(Symbol) && !obj.is_a?(Array)
|
288
|
+
raise MalformedPDFError, "expected object to be an Array or Name"
|
289
|
+
end
|
290
|
+
}
|
291
|
+
end
|
292
|
+
|
293
|
+
# If key is a PDF::Reader::Reference object, lookup the corresponding
|
294
|
+
# object in the PDF and return it. Otherwise return key untouched.
|
295
|
+
#
|
296
|
+
# Guaranteed to only return a PDF::Reader::Stream, Array or nil. If the dereference results in
|
297
|
+
# any other type then a MalformedPDFError exception will raise. Useful when
|
298
|
+
# expecting a stream or Array and no other type will do.
|
299
|
+
def deref_stream_or_array(key)
|
300
|
+
obj = deref(key)
|
301
|
+
|
302
|
+
return obj if obj.nil?
|
303
|
+
|
304
|
+
obj.tap { |obj|
|
305
|
+
if !obj.is_a?(PDF::Reader::Stream) && !obj.is_a?(Array)
|
306
|
+
raise MalformedPDFError, "expected object to be an Array or Stream"
|
307
|
+
end
|
308
|
+
}
|
309
|
+
end
|
310
|
+
|
95
311
|
# Recursively dereferences the object refered to be +key+. If +key+ is not
|
96
312
|
# a PDF::Reader::Reference, the key is returned unchanged.
|
97
313
|
#
|
@@ -99,6 +315,22 @@ class PDF::Reader
|
|
99
315
|
deref_internal!(key, {})
|
100
316
|
end
|
101
317
|
|
318
|
+
def deref_array!(key)
|
319
|
+
deref!(key).tap { |obj|
|
320
|
+
if !obj.nil? && !obj.is_a?(Array)
|
321
|
+
raise MalformedPDFError, "expected object (#{obj.inspect}) to be an Array or nil"
|
322
|
+
end
|
323
|
+
}
|
324
|
+
end
|
325
|
+
|
326
|
+
def deref_hash!(key)
|
327
|
+
deref!(key).tap { |obj|
|
328
|
+
if !obj.nil? && !obj.is_a?(Hash)
|
329
|
+
raise MalformedPDFError, "expected object (#{obj.inspect}) to be a Hash or nil"
|
330
|
+
end
|
331
|
+
}
|
332
|
+
end
|
333
|
+
|
102
334
|
# Access an object from the PDF. key can be an int or a PDF::Reader::Reference
|
103
335
|
# object.
|
104
336
|
#
|
@@ -233,7 +465,10 @@ class PDF::Reader
|
|
233
465
|
#
|
234
466
|
def page_references
|
235
467
|
root = fetch(trailer[:Root])
|
236
|
-
@page_references ||=
|
468
|
+
@page_references ||= begin
|
469
|
+
pages_root = deref_hash(root[:Pages]) || {}
|
470
|
+
get_page_objects(pages_root)
|
471
|
+
end
|
237
472
|
end
|
238
473
|
|
239
474
|
def encrypted?
|
@@ -299,36 +534,6 @@ class PDF::Reader
|
|
299
534
|
end
|
300
535
|
end
|
301
536
|
|
302
|
-
def build_security_handler(opts = {})
|
303
|
-
encrypt = deref(trailer[:Encrypt])
|
304
|
-
if NullSecurityHandler.supports?(encrypt)
|
305
|
-
NullSecurityHandler.new
|
306
|
-
elsif StandardSecurityHandler.supports?(encrypt)
|
307
|
-
encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
|
308
|
-
StandardSecurityHandler.new(
|
309
|
-
key_length: (encrypt[:Length] || 40).to_i,
|
310
|
-
revision: encrypt[:R],
|
311
|
-
owner_key: encrypt[:O],
|
312
|
-
user_key: encrypt[:U],
|
313
|
-
permissions: encrypt[:P].to_i,
|
314
|
-
encrypted_metadata: encmeta,
|
315
|
-
file_id: (deref(trailer[:ID]) || []).first,
|
316
|
-
password: opts[:password],
|
317
|
-
cfm: encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
318
|
-
)
|
319
|
-
elsif StandardSecurityHandlerV5.supports?(encrypt)
|
320
|
-
StandardSecurityHandlerV5.new(
|
321
|
-
O: encrypt[:O],
|
322
|
-
U: encrypt[:U],
|
323
|
-
OE: encrypt[:OE],
|
324
|
-
UE: encrypt[:UE],
|
325
|
-
password: opts[:password]
|
326
|
-
)
|
327
|
-
else
|
328
|
-
UnimplementedSecurityHandler.new
|
329
|
-
end
|
330
|
-
end
|
331
|
-
|
332
537
|
def decrypt(ref, obj)
|
333
538
|
case obj
|
334
539
|
when PDF::Reader::Stream then
|
@@ -362,19 +567,19 @@ class PDF::Reader
|
|
362
567
|
@object_stream ||= {}
|
363
568
|
end
|
364
569
|
|
365
|
-
# returns
|
570
|
+
# returns an array of object references for all pages in this object store. The ordering of
|
571
|
+
# the Array is significant and matches the page ordering of the document
|
366
572
|
#
|
367
|
-
def get_page_objects(
|
368
|
-
obj = deref(ref)
|
369
|
-
|
370
|
-
unless obj.kind_of?(::Hash)
|
371
|
-
raise MalformedPDFError, "Dereferenced page object must be a dict"
|
372
|
-
end
|
373
|
-
|
573
|
+
def get_page_objects(obj)
|
374
574
|
if obj[:Type] == :Page
|
375
|
-
|
575
|
+
[obj]
|
376
576
|
elsif obj[:Kids]
|
377
|
-
|
577
|
+
kids = deref_array(obj[:Kids]) || []
|
578
|
+
kids.map { |kid|
|
579
|
+
get_page_objects(deref_hash(kid) || {})
|
580
|
+
}.flatten
|
581
|
+
else
|
582
|
+
raise MalformedPDFError, "Expected Page or Pages object"
|
378
583
|
end
|
379
584
|
end
|
380
585
|
|