pdf-reader 2.8.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +5 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/buffer.rb +36 -34
- data/lib/pdf/reader/cmap.rb +64 -51
- data/lib/pdf/reader/error.rb +8 -0
- data/lib/pdf/reader/filter/ascii85.rb +1 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +1 -1
- data/lib/pdf/reader/filter/depredict.rb +1 -1
- data/lib/pdf/reader/filter/flate.rb +3 -3
- data/lib/pdf/reader/filter/lzw.rb +1 -1
- data/lib/pdf/reader/filter/null.rb +1 -2
- data/lib/pdf/reader/filter/run_length.rb +1 -1
- data/lib/pdf/reader/filter.rb +1 -1
- data/lib/pdf/reader/font.rb +29 -17
- data/lib/pdf/reader/font_descriptor.rb +18 -17
- data/lib/pdf/reader/form_xobject.rb +14 -5
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/null_security_handler.rb +0 -4
- data/lib/pdf/reader/object_hash.rb +247 -42
- data/lib/pdf/reader/page.rb +38 -20
- data/lib/pdf/reader/page_state.rb +1 -1
- data/lib/pdf/reader/page_text_receiver.rb +4 -1
- data/lib/pdf/reader/parser.rb +9 -6
- data/lib/pdf/reader/point.rb +1 -1
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +2 -2
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +15 -13
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -95
- data/lib/pdf/reader/stream.rb +2 -2
- data/lib/pdf/reader/type_check.rb +52 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +1 -1
- data/lib/pdf/reader/xref.rb +20 -3
- data/lib/pdf/reader.rb +17 -9
- data/rbi/pdf-reader.rbi +388 -173
- metadata +15 -9
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -92
data/lib/pdf/reader/page.rb
CHANGED
@@ -14,7 +14,7 @@ module PDF
|
|
14
14
|
# objects accessor to help walk the page dictionary in any useful way.
|
15
15
|
#
|
16
16
|
class Page
|
17
|
-
|
17
|
+
extend Forwardable
|
18
18
|
|
19
19
|
# lowlevel hash-like access to all objects in the underlying PDF
|
20
20
|
attr_reader :objects
|
@@ -27,6 +27,15 @@ module PDF
|
|
27
27
|
# operations
|
28
28
|
attr_reader :cache
|
29
29
|
|
30
|
+
def_delegators :resources, :color_spaces
|
31
|
+
def_delegators :resources, :fonts
|
32
|
+
def_delegators :resources, :graphic_states
|
33
|
+
def_delegators :resources, :patterns
|
34
|
+
def_delegators :resources, :procedure_sets
|
35
|
+
def_delegators :resources, :properties
|
36
|
+
def_delegators :resources, :shadings
|
37
|
+
def_delegators :resources, :xobjects
|
38
|
+
|
30
39
|
# creates a new page wrapper.
|
31
40
|
#
|
32
41
|
# * objects - an ObjectHash instance that wraps a PDF file
|
@@ -34,7 +43,7 @@ module PDF
|
|
34
43
|
#
|
35
44
|
def initialize(objects, pagenum, options = {})
|
36
45
|
@objects, @pagenum = objects, pagenum
|
37
|
-
@page_object = objects.
|
46
|
+
@page_object = objects.deref_hash(objects.page_references[pagenum - 1])
|
38
47
|
@cache = options[:cache] || {}
|
39
48
|
|
40
49
|
unless @page_object.is_a?(::Hash)
|
@@ -60,7 +69,7 @@ module PDF
|
|
60
69
|
def attributes
|
61
70
|
@attributes ||= {}.tap { |hash|
|
62
71
|
page_with_ancestors.reverse.each do |obj|
|
63
|
-
hash.merge!(@objects.
|
72
|
+
hash.merge!(@objects.deref_hash(obj) || {})
|
64
73
|
end
|
65
74
|
}
|
66
75
|
# This shouldn't be necesary, but some non compliant PDFs leave MediaBox
|
@@ -143,6 +152,9 @@ module PDF
|
|
143
152
|
# the program in the correct order and calls out to your implementation.
|
144
153
|
#
|
145
154
|
def walk(*receivers)
|
155
|
+
receivers = receivers.map { |receiver|
|
156
|
+
ValidatingReceiver.new(receiver)
|
157
|
+
}
|
146
158
|
callback(receivers, :page=, [self])
|
147
159
|
content_stream(receivers, raw_content)
|
148
160
|
end
|
@@ -151,10 +163,10 @@ module PDF
|
|
151
163
|
# see here unless you're a PDF nerd like me.
|
152
164
|
#
|
153
165
|
def raw_content
|
154
|
-
contents = objects.
|
166
|
+
contents = objects.deref_stream_or_array(@page_object[:Contents])
|
155
167
|
[contents].flatten.compact.map { |obj|
|
156
|
-
objects.
|
157
|
-
}.map { |obj|
|
168
|
+
objects.deref_stream(obj)
|
169
|
+
}.compact.map { |obj|
|
158
170
|
obj.unfiltered_data
|
159
171
|
}.join(" ")
|
160
172
|
end
|
@@ -185,17 +197,22 @@ module PDF
|
|
185
197
|
# values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
|
186
198
|
#
|
187
199
|
def rectangles
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
200
|
+
# attributes[:MediaBox] can never be nil, but I have no easy way to tell sorbet that atm
|
201
|
+
mediabox = objects.deref_array_of_numbers(attributes[:MediaBox]) || []
|
202
|
+
cropbox = objects.deref_array_of_numbers(attributes[:CropBox]) || mediabox
|
203
|
+
bleedbox = objects.deref_array_of_numbers(attributes[:BleedBox]) || cropbox
|
204
|
+
trimbox = objects.deref_array_of_numbers(attributes[:TrimBox]) || cropbox
|
205
|
+
artbox = objects.deref_array_of_numbers(attributes[:ArtBox]) || cropbox
|
206
|
+
|
207
|
+
begin
|
208
|
+
mediarect = Rectangle.from_array(mediabox)
|
209
|
+
croprect = Rectangle.from_array(cropbox)
|
210
|
+
bleedrect = Rectangle.from_array(bleedbox)
|
211
|
+
trimrect = Rectangle.from_array(trimbox)
|
212
|
+
artrect = Rectangle.from_array(artbox)
|
213
|
+
rescue ArgumentError => e
|
214
|
+
raise MalformedPDFError, e.message
|
215
|
+
end
|
199
216
|
|
200
217
|
if rotate > 0
|
201
218
|
mediarect.apply_rotation(rotate)
|
@@ -217,14 +234,14 @@ module PDF
|
|
217
234
|
private
|
218
235
|
|
219
236
|
def root
|
220
|
-
|
237
|
+
@root ||= objects.deref_hash(@objects.trailer[:Root]) || {}
|
221
238
|
end
|
222
239
|
|
223
240
|
# Returns the resources that accompany this page. Includes
|
224
241
|
# resources inherited from parents.
|
225
242
|
#
|
226
243
|
def resources
|
227
|
-
@resources ||= @objects.
|
244
|
+
@resources ||= Resources.new(@objects, @objects.deref_hash(attributes[:Resources]) || {})
|
228
245
|
end
|
229
246
|
|
230
247
|
def content_stream(receivers, instructions)
|
@@ -260,7 +277,8 @@ module PDF
|
|
260
277
|
if origin.nil?
|
261
278
|
[]
|
262
279
|
else
|
263
|
-
obj = objects.
|
280
|
+
obj = objects.deref_hash(origin)
|
281
|
+
PDF::Reader::Error.validate_not_nil_as_malformed(obj, "parent")
|
264
282
|
[ select_inheritable(obj) ] + ancestors(obj[:Parent])
|
265
283
|
end
|
266
284
|
end
|
@@ -384,7 +384,7 @@ class PDF::Reader
|
|
384
384
|
#
|
385
385
|
def build_fonts(raw_fonts)
|
386
386
|
wrapped_fonts = raw_fonts.map { |label, font|
|
387
|
-
[label, PDF::Reader::Font.new(@objects, @objects.
|
387
|
+
[label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
|
388
388
|
}
|
389
389
|
|
390
390
|
::Hash[wrapped_fonts]
|
@@ -87,8 +87,10 @@ module PDF
|
|
87
87
|
params.each do |arg|
|
88
88
|
if arg.is_a?(String)
|
89
89
|
internal_show_text(arg)
|
90
|
-
|
90
|
+
elsif arg.is_a?(Numeric)
|
91
91
|
@state.process_glyph_displacement(0, arg, false)
|
92
|
+
else
|
93
|
+
# skip it
|
92
94
|
end
|
93
95
|
end
|
94
96
|
end
|
@@ -119,6 +121,7 @@ module PDF
|
|
119
121
|
private
|
120
122
|
|
121
123
|
def internal_show_text(string)
|
124
|
+
PDF::Reader::Error.validate_type_as_malformed(string, "string", String)
|
122
125
|
if @state.current_font.nil?
|
123
126
|
raise PDF::Reader::MalformedPDFError, "current font is invalid"
|
124
127
|
end
|
data/lib/pdf/reader/parser.rb
CHANGED
@@ -80,8 +80,8 @@ class PDF::Reader
|
|
80
80
|
token
|
81
81
|
elsif operators.has_key? token
|
82
82
|
Token.new(token)
|
83
|
-
elsif token.
|
84
|
-
token
|
83
|
+
elsif token.frozen?
|
84
|
+
token
|
85
85
|
elsif token =~ /\d*\.\d/
|
86
86
|
token.to_f
|
87
87
|
else
|
@@ -103,7 +103,7 @@ class PDF::Reader
|
|
103
103
|
obj = parse_token
|
104
104
|
post_obj = parse_token
|
105
105
|
|
106
|
-
if post_obj == "stream"
|
106
|
+
if obj.is_a?(Hash) && post_obj == "stream"
|
107
107
|
stream(obj)
|
108
108
|
else
|
109
109
|
obj
|
@@ -121,7 +121,7 @@ class PDF::Reader
|
|
121
121
|
key = parse_token
|
122
122
|
break if key.kind_of?(Token) and key == ">>"
|
123
123
|
raise MalformedPDFError, "unterminated dict" if @buffer.empty?
|
124
|
-
|
124
|
+
PDF::Reader::Error.validate_type_as_malformed(key, "Dictionary key", Symbol)
|
125
125
|
|
126
126
|
value = parse_token
|
127
127
|
value.kind_of?(Token) and Error.str_assert_not(value, ">>")
|
@@ -209,13 +209,16 @@ class PDF::Reader
|
|
209
209
|
def stream(dict)
|
210
210
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
|
211
211
|
if @objects
|
212
|
-
length = @objects.
|
212
|
+
length = @objects.deref_integer(dict[:Length])
|
213
213
|
if dict[:Filter]
|
214
|
-
dict[:Filter] = @objects.
|
214
|
+
dict[:Filter] = @objects.deref_name_or_array(dict[:Filter])
|
215
215
|
end
|
216
216
|
else
|
217
217
|
length = dict[:Length] || 0
|
218
218
|
end
|
219
|
+
|
220
|
+
PDF::Reader::Error.validate_type_as_malformed(length, "length", Numeric)
|
221
|
+
|
219
222
|
data = @buffer.read(length, :skip_eol => true)
|
220
223
|
|
221
224
|
Error.str_assert(parse_token, "endstream")
|
data/lib/pdf/reader/point.rb
CHANGED
@@ -0,0 +1,38 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
require 'digest/md5'
|
6
|
+
require 'rc4'
|
7
|
+
|
8
|
+
class PDF::Reader
|
9
|
+
|
10
|
+
# Decrypts data using the RC4 algorithim defined in the PDF spec. Requires
|
11
|
+
# a decryption key, which is usually generated by PDF::Reader::StandardKeyBuilder
|
12
|
+
#
|
13
|
+
class Rc4SecurityHandler
|
14
|
+
|
15
|
+
def initialize(key)
|
16
|
+
@encrypt_key = key
|
17
|
+
end
|
18
|
+
|
19
|
+
##7.6.2 General Encryption Algorithm
|
20
|
+
#
|
21
|
+
# Algorithm 1: Encryption of data using the RC4 algorithm
|
22
|
+
#
|
23
|
+
# version <=3 or (version == 4 and CFM == V2)
|
24
|
+
#
|
25
|
+
# buf - a string to decrypt
|
26
|
+
# ref - a PDF::Reader::Reference for the object to decrypt
|
27
|
+
#
|
28
|
+
def decrypt( buf, ref )
|
29
|
+
objKey = @encrypt_key.dup
|
30
|
+
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
31
|
+
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
32
|
+
length = objKey.length < 16 ? objKey.length : 16
|
33
|
+
rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
|
34
|
+
rc4.decrypt(buf)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
data/lib/pdf/reader/rectangle.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
module PDF
|
@@ -85,7 +85,7 @@ module PDF
|
|
85
85
|
new_x2 = bottom_left.x
|
86
86
|
new_y2 = bottom_left.y + width
|
87
87
|
end
|
88
|
-
set_corners(new_x1, new_y1, new_x2, new_y2)
|
88
|
+
set_corners(new_x1 || 0, new_y1 || 0, new_x2 || 0, new_y2 || 0)
|
89
89
|
end
|
90
90
|
|
91
91
|
private
|
@@ -1,16 +1,18 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: true
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
|
-
# Setting this file to "typed: true" is difficult because it's a mixin that assumes some things
|
6
|
-
# are aavailable from the class, like @objects and resources. Sorbet doesn't know about them.
|
7
|
-
|
8
5
|
module PDF
|
9
6
|
class Reader
|
10
7
|
|
11
8
|
# mixin for common methods in Page and FormXobjects
|
12
9
|
#
|
13
|
-
|
10
|
+
class Resources
|
11
|
+
|
12
|
+
def initialize(objects, resources)
|
13
|
+
@objects = objects
|
14
|
+
@resources = resources
|
15
|
+
end
|
14
16
|
|
15
17
|
# Returns a Hash of color spaces that are available to this page
|
16
18
|
#
|
@@ -19,7 +21,7 @@ module PDF
|
|
19
21
|
# of calling it over and over.
|
20
22
|
#
|
21
23
|
def color_spaces
|
22
|
-
@objects.
|
24
|
+
@objects.deref_hash!(@resources[:ColorSpace]) || {}
|
23
25
|
end
|
24
26
|
|
25
27
|
# Returns a Hash of fonts that are available to this page
|
@@ -29,7 +31,7 @@ module PDF
|
|
29
31
|
# of calling it over and over.
|
30
32
|
#
|
31
33
|
def fonts
|
32
|
-
@objects.
|
34
|
+
@objects.deref_hash!(@resources[:Font]) || {}
|
33
35
|
end
|
34
36
|
|
35
37
|
# Returns a Hash of external graphic states that are available to this
|
@@ -40,7 +42,7 @@ module PDF
|
|
40
42
|
# of calling it over and over.
|
41
43
|
#
|
42
44
|
def graphic_states
|
43
|
-
@objects.
|
45
|
+
@objects.deref_hash!(@resources[:ExtGState]) || {}
|
44
46
|
end
|
45
47
|
|
46
48
|
# Returns a Hash of patterns that are available to this page
|
@@ -50,7 +52,7 @@ module PDF
|
|
50
52
|
# of calling it over and over.
|
51
53
|
#
|
52
54
|
def patterns
|
53
|
-
@objects.
|
55
|
+
@objects.deref_hash!(@resources[:Pattern]) || {}
|
54
56
|
end
|
55
57
|
|
56
58
|
# Returns an Array of procedure sets that are available to this page
|
@@ -60,7 +62,7 @@ module PDF
|
|
60
62
|
# of calling it over and over.
|
61
63
|
#
|
62
64
|
def procedure_sets
|
63
|
-
@objects.
|
65
|
+
@objects.deref_array!(@resources[:ProcSet]) || []
|
64
66
|
end
|
65
67
|
|
66
68
|
# Returns a Hash of properties sets that are available to this page
|
@@ -70,7 +72,7 @@ module PDF
|
|
70
72
|
# of calling it over and over.
|
71
73
|
#
|
72
74
|
def properties
|
73
|
-
@objects.
|
75
|
+
@objects.deref_hash!(@resources[:Properties]) || {}
|
74
76
|
end
|
75
77
|
|
76
78
|
# Returns a Hash of shadings that are available to this page
|
@@ -80,7 +82,7 @@ module PDF
|
|
80
82
|
# of calling it over and over.
|
81
83
|
#
|
82
84
|
def shadings
|
83
|
-
@objects.
|
85
|
+
@objects.deref_hash!(@resources[:Shading]) || {}
|
84
86
|
end
|
85
87
|
|
86
88
|
# Returns a Hash of XObjects that are available to this page
|
@@ -90,7 +92,7 @@ module PDF
|
|
90
92
|
# of calling it over and over.
|
91
93
|
#
|
92
94
|
def xobjects
|
93
|
-
@objects.
|
95
|
+
@objects.deref_hash!(@resources[:XObject]) || {}
|
94
96
|
end
|
95
97
|
|
96
98
|
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
class PDF::Reader
|
6
|
+
# Examines the Encrypt entry of a PDF trailer (if any) and returns an object that's
|
7
|
+
# able to decrypt the file.
|
8
|
+
class SecurityHandlerFactory
|
9
|
+
|
10
|
+
def self.build(encrypt, doc_id, password)
|
11
|
+
doc_id ||= []
|
12
|
+
password ||= ""
|
13
|
+
|
14
|
+
if encrypt.nil?
|
15
|
+
NullSecurityHandler.new
|
16
|
+
elsif standard?(encrypt)
|
17
|
+
build_standard_handler(encrypt, doc_id, password)
|
18
|
+
elsif standard_v5?(encrypt)
|
19
|
+
build_v5_handler(encrypt, doc_id, password)
|
20
|
+
else
|
21
|
+
UnimplementedSecurityHandler.new
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.build_standard_handler(encrypt, doc_id, password)
|
26
|
+
encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
|
27
|
+
key_builder = StandardKeyBuilder.new(
|
28
|
+
key_length: (encrypt[:Length] || 40).to_i,
|
29
|
+
revision: encrypt[:R],
|
30
|
+
owner_key: encrypt[:O],
|
31
|
+
user_key: encrypt[:U],
|
32
|
+
permissions: encrypt[:P].to_i,
|
33
|
+
encrypted_metadata: encmeta,
|
34
|
+
file_id: doc_id.first,
|
35
|
+
)
|
36
|
+
cfm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
37
|
+
if cfm == :AESV2
|
38
|
+
AesV2SecurityHandler.new(key_builder.key(password))
|
39
|
+
else
|
40
|
+
Rc4SecurityHandler.new(key_builder.key(password))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.build_v5_handler(encrypt, doc_id, password)
|
45
|
+
key_builder = KeyBuilderV5.new(
|
46
|
+
owner_key: encrypt[:O],
|
47
|
+
user_key: encrypt[:U],
|
48
|
+
owner_encryption_key: encrypt[:OE],
|
49
|
+
user_encryption_key: encrypt[:UE],
|
50
|
+
)
|
51
|
+
AesV3SecurityHandler.new(key_builder.key(password))
|
52
|
+
end
|
53
|
+
|
54
|
+
# This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
|
55
|
+
def self.standard?(encrypt)
|
56
|
+
return false if encrypt.nil?
|
57
|
+
|
58
|
+
filter = encrypt.fetch(:Filter, :Standard)
|
59
|
+
version = encrypt.fetch(:V, 0)
|
60
|
+
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
61
|
+
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
62
|
+
(version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
|
63
|
+
end
|
64
|
+
|
65
|
+
# This handler supports both
|
66
|
+
# - AES-256 encryption defined in PDF 1.7 Extension Level 3 ('revision 5')
|
67
|
+
# - AES-256 encryption defined in PDF 2.0 ('revision 6')
|
68
|
+
def self.standard_v5?(encrypt)
|
69
|
+
return false if encrypt.nil?
|
70
|
+
|
71
|
+
filter = encrypt.fetch(:Filter, :Standard)
|
72
|
+
version = encrypt.fetch(:V, 0)
|
73
|
+
revision = encrypt.fetch(:R, 0)
|
74
|
+
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
75
|
+
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
76
|
+
((version == 5) && (revision == 5 || revision == 6) && (algorithm == :AESV3))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -1,39 +1,19 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed: true
|
3
|
-
# frozen_string_literal: true
|
4
2
|
|
5
|
-
################################################################################
|
6
|
-
#
|
7
|
-
# Copyright (C) 2011 Evan J Brunner (ejbrun@appittome.com)
|
8
|
-
#
|
9
|
-
# Permission is hereby granted, free of charge, to any person obtaining
|
10
|
-
# a copy of this software and associated documentation files (the
|
11
|
-
# "Software"), to deal in the Software without restriction, including
|
12
|
-
# without limitation the rights to use, copy, modify, merge, publish,
|
13
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
14
|
-
# permit persons to whom the Software is furnished to do so, subject to
|
15
|
-
# the following conditions:
|
16
|
-
#
|
17
|
-
# The above copyright notice and this permission notice shall be
|
18
|
-
# included in all copies or substantial portions of the Software.
|
19
|
-
#
|
20
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
21
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
22
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
23
|
-
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
24
|
-
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
25
|
-
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
26
|
-
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
27
|
-
#
|
28
|
-
################################################################################
|
29
3
|
require 'digest/md5'
|
30
|
-
require 'openssl'
|
31
4
|
require 'rc4'
|
32
5
|
|
33
6
|
class PDF::Reader
|
34
7
|
|
35
|
-
#
|
36
|
-
|
8
|
+
# Processes the Encrypt dict from an encrypted PDF and a user provided
|
9
|
+
# password and returns a key that can decrypt the file.
|
10
|
+
#
|
11
|
+
# This can generate a key compatible with the following standard encryption algorithms:
|
12
|
+
#
|
13
|
+
# * Version 1-3, all variants
|
14
|
+
# * Version 4, V2 (RC4) and AESV2
|
15
|
+
#
|
16
|
+
class StandardKeyBuilder
|
37
17
|
|
38
18
|
## 7.6.3.3 Encryption Key Algorithm (pp61)
|
39
19
|
#
|
@@ -45,9 +25,6 @@ class PDF::Reader
|
|
45
25
|
0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
|
46
26
|
0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]
|
47
27
|
|
48
|
-
attr_reader :key_length, :revision, :encrypt_key
|
49
|
-
attr_reader :owner_key, :user_key, :permissions, :file_id, :password
|
50
|
-
|
51
28
|
def initialize(opts = {})
|
52
29
|
@key_length = opts[:key_length].to_i/8
|
53
30
|
@revision = opts[:revision].to_i
|
@@ -56,72 +33,30 @@ class PDF::Reader
|
|
56
33
|
@permissions = opts[:permissions].to_i
|
57
34
|
@encryptMeta = opts.fetch(:encrypted_metadata, true)
|
58
35
|
@file_id = opts[:file_id] || ""
|
59
|
-
@encrypt_key = build_standard_key(opts[:password] || "")
|
60
|
-
@cfm = opts[:cfm]
|
61
36
|
|
62
37
|
if @key_length != 5 && @key_length != 16
|
63
|
-
msg = "
|
38
|
+
msg = "StandardKeyBuilder only supports 40 and 128 bit\
|
64
39
|
encryption (#{@key_length * 8}bit)"
|
65
|
-
raise
|
40
|
+
raise UnsupportedFeatureError, msg
|
66
41
|
end
|
67
42
|
end
|
68
43
|
|
69
|
-
#
|
70
|
-
def self.supports?(encrypt)
|
71
|
-
return false if encrypt.nil?
|
72
|
-
|
73
|
-
filter = encrypt.fetch(:Filter, :Standard)
|
74
|
-
version = encrypt.fetch(:V, 0)
|
75
|
-
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
76
|
-
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
77
|
-
(version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
|
78
|
-
end
|
79
|
-
|
80
|
-
##7.6.2 General Encryption Algorithm
|
81
|
-
#
|
82
|
-
# Algorithm 1: Encryption of data using the RC4 or AES algorithms
|
83
|
-
#
|
84
|
-
# used to decrypt RC4/AES encrypted PDF streams (buf)
|
44
|
+
# Takes a string containing a user provided password.
|
85
45
|
#
|
86
|
-
#
|
87
|
-
#
|
46
|
+
# If the password matches the file, then a string containing a key suitable for
|
47
|
+
# decrypting the file will be returned. If the password doesn't match the file,
|
48
|
+
# and exception will be raised.
|
88
49
|
#
|
89
|
-
def
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
else
|
94
|
-
decrypt_rc4(buf, ref)
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
private
|
50
|
+
def key(pass)
|
51
|
+
pass ||= ""
|
52
|
+
encrypt_key = auth_owner_pass(pass)
|
53
|
+
encrypt_key ||= auth_user_pass(pass)
|
99
54
|
|
100
|
-
|
101
|
-
|
102
|
-
def decrypt_rc4( buf, ref )
|
103
|
-
objKey = @encrypt_key.dup
|
104
|
-
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
105
|
-
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
106
|
-
length = objKey.length < 16 ? objKey.length : 16
|
107
|
-
rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
|
108
|
-
rc4.decrypt(buf)
|
55
|
+
raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
|
56
|
+
encrypt_key
|
109
57
|
end
|
110
58
|
|
111
|
-
|
112
|
-
# when (version == 4 and CFM == AESV2)
|
113
|
-
def decrypt_aes128( buf, ref )
|
114
|
-
objKey = @encrypt_key.dup
|
115
|
-
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
116
|
-
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
117
|
-
objKey << 'sAlT' # Algorithm 1, b)
|
118
|
-
length = objKey.length < 16 ? objKey.length : 16
|
119
|
-
cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
|
120
|
-
cipher.decrypt
|
121
|
-
cipher.key = Digest::MD5.digest(objKey)[0,length]
|
122
|
-
cipher.iv = buf[0..15]
|
123
|
-
cipher.update(buf[16..-1]) + cipher.final
|
124
|
-
end
|
59
|
+
private
|
125
60
|
|
126
61
|
# Pads supplied password to 32bytes using PassPadBytes as specified on
|
127
62
|
# pp61 of spec
|
@@ -153,7 +88,7 @@ class PDF::Reader
|
|
153
88
|
md5 = Digest::MD5.digest(pad_pass(pass))
|
154
89
|
if @revision > 2 then
|
155
90
|
50.times { md5 = Digest::MD5.digest(md5) }
|
156
|
-
keyBegins = md5[0, key_length]
|
91
|
+
keyBegins = md5[0, @key_length]
|
157
92
|
#first iteration decrypt owner_key
|
158
93
|
out = @owner_key
|
159
94
|
#RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
|
@@ -218,12 +153,5 @@ class PDF::Reader
|
|
218
153
|
end
|
219
154
|
end
|
220
155
|
|
221
|
-
def build_standard_key(pass)
|
222
|
-
encrypt_key = auth_owner_pass(pass)
|
223
|
-
encrypt_key ||= auth_user_pass(pass)
|
224
|
-
|
225
|
-
raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
|
226
|
-
encrypt_key
|
227
|
-
end
|
228
156
|
end
|
229
157
|
end
|
data/lib/pdf/reader/stream.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -62,7 +62,7 @@ class PDF::Reader
|
|
62
62
|
end
|
63
63
|
|
64
64
|
Array(hash[:Filter]).each_with_index do |filter, index|
|
65
|
-
@udata = Filter.with(filter, options[index]).filter(@udata)
|
65
|
+
@udata = Filter.with(filter, options[index] || {}).filter(@udata)
|
66
66
|
end
|
67
67
|
end
|
68
68
|
@udata
|
@@ -0,0 +1,52 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# Cast untrusted input (usually parsed out of a PDF file) to a known type
|
9
|
+
#
|
10
|
+
class TypeCheck
|
11
|
+
|
12
|
+
def self.cast_to_numeric!(obj)
|
13
|
+
if obj.is_a?(Numeric)
|
14
|
+
obj
|
15
|
+
elsif obj.nil?
|
16
|
+
0
|
17
|
+
elsif obj.respond_to?(:to_f)
|
18
|
+
obj.to_f
|
19
|
+
elsif obj.respond_to?(:to_i)
|
20
|
+
obj.to_i
|
21
|
+
else
|
22
|
+
raise MalformedPDFError, "Unable to cast to numeric"
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.cast_to_string!(string)
|
27
|
+
if string.is_a?(String)
|
28
|
+
string
|
29
|
+
elsif string.nil?
|
30
|
+
""
|
31
|
+
elsif string.respond_to?(:to_s)
|
32
|
+
string.to_s
|
33
|
+
else
|
34
|
+
raise MalformedPDFError, "Unable to cast to string"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def self.cast_to_symbol(obj)
|
39
|
+
if obj.is_a?(Symbol)
|
40
|
+
obj
|
41
|
+
elsif obj.nil?
|
42
|
+
nil
|
43
|
+
elsif obj.respond_to?(:to_sym)
|
44
|
+
obj.to_sym
|
45
|
+
else
|
46
|
+
raise MalformedPDFError, "Unable to cast to symbol"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|