pdf-reader 2.8.0 → 2.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +5 -0
  3. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  4. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  5. data/lib/pdf/reader/buffer.rb +36 -34
  6. data/lib/pdf/reader/cmap.rb +64 -51
  7. data/lib/pdf/reader/error.rb +8 -0
  8. data/lib/pdf/reader/filter/ascii85.rb +1 -1
  9. data/lib/pdf/reader/filter/ascii_hex.rb +1 -1
  10. data/lib/pdf/reader/filter/depredict.rb +1 -1
  11. data/lib/pdf/reader/filter/flate.rb +3 -3
  12. data/lib/pdf/reader/filter/lzw.rb +1 -1
  13. data/lib/pdf/reader/filter/null.rb +1 -2
  14. data/lib/pdf/reader/filter/run_length.rb +1 -1
  15. data/lib/pdf/reader/filter.rb +1 -1
  16. data/lib/pdf/reader/font.rb +29 -17
  17. data/lib/pdf/reader/font_descriptor.rb +18 -17
  18. data/lib/pdf/reader/form_xobject.rb +14 -5
  19. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  20. data/lib/pdf/reader/null_security_handler.rb +0 -4
  21. data/lib/pdf/reader/object_hash.rb +247 -42
  22. data/lib/pdf/reader/page.rb +38 -20
  23. data/lib/pdf/reader/page_state.rb +1 -1
  24. data/lib/pdf/reader/page_text_receiver.rb +4 -1
  25. data/lib/pdf/reader/parser.rb +9 -6
  26. data/lib/pdf/reader/point.rb +1 -1
  27. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  28. data/lib/pdf/reader/rectangle.rb +2 -2
  29. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +15 -13
  30. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  31. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -95
  32. data/lib/pdf/reader/stream.rb +2 -2
  33. data/lib/pdf/reader/type_check.rb +52 -0
  34. data/lib/pdf/reader/validating_receiver.rb +262 -0
  35. data/lib/pdf/reader/width_calculator/true_type.rb +1 -1
  36. data/lib/pdf/reader/xref.rb +20 -3
  37. data/lib/pdf/reader.rb +17 -9
  38. data/rbi/pdf-reader.rbi +388 -173
  39. metadata +15 -9
  40. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -92
@@ -14,7 +14,7 @@ module PDF
14
14
  # objects accessor to help walk the page dictionary in any useful way.
15
15
  #
16
16
  class Page
17
- include ResourceMethods
17
+ extend Forwardable
18
18
 
19
19
  # lowlevel hash-like access to all objects in the underlying PDF
20
20
  attr_reader :objects
@@ -27,6 +27,15 @@ module PDF
27
27
  # operations
28
28
  attr_reader :cache
29
29
 
30
+ def_delegators :resources, :color_spaces
31
+ def_delegators :resources, :fonts
32
+ def_delegators :resources, :graphic_states
33
+ def_delegators :resources, :patterns
34
+ def_delegators :resources, :procedure_sets
35
+ def_delegators :resources, :properties
36
+ def_delegators :resources, :shadings
37
+ def_delegators :resources, :xobjects
38
+
30
39
  # creates a new page wrapper.
31
40
  #
32
41
  # * objects - an ObjectHash instance that wraps a PDF file
@@ -34,7 +43,7 @@ module PDF
34
43
  #
35
44
  def initialize(objects, pagenum, options = {})
36
45
  @objects, @pagenum = objects, pagenum
37
- @page_object = objects.deref(objects.page_references[pagenum - 1])
46
+ @page_object = objects.deref_hash(objects.page_references[pagenum - 1])
38
47
  @cache = options[:cache] || {}
39
48
 
40
49
  unless @page_object.is_a?(::Hash)
@@ -60,7 +69,7 @@ module PDF
60
69
  def attributes
61
70
  @attributes ||= {}.tap { |hash|
62
71
  page_with_ancestors.reverse.each do |obj|
63
- hash.merge!(@objects.deref(obj))
72
+ hash.merge!(@objects.deref_hash(obj) || {})
64
73
  end
65
74
  }
66
75
  # This shouldn't be necesary, but some non compliant PDFs leave MediaBox
@@ -143,6 +152,9 @@ module PDF
143
152
  # the program in the correct order and calls out to your implementation.
144
153
  #
145
154
  def walk(*receivers)
155
+ receivers = receivers.map { |receiver|
156
+ ValidatingReceiver.new(receiver)
157
+ }
146
158
  callback(receivers, :page=, [self])
147
159
  content_stream(receivers, raw_content)
148
160
  end
@@ -151,10 +163,10 @@ module PDF
151
163
  # see here unless you're a PDF nerd like me.
152
164
  #
153
165
  def raw_content
154
- contents = objects.deref(@page_object[:Contents])
166
+ contents = objects.deref_stream_or_array(@page_object[:Contents])
155
167
  [contents].flatten.compact.map { |obj|
156
- objects.deref(obj)
157
- }.map { |obj|
168
+ objects.deref_stream(obj)
169
+ }.compact.map { |obj|
158
170
  obj.unfiltered_data
159
171
  }.join(" ")
160
172
  end
@@ -185,17 +197,22 @@ module PDF
185
197
  # values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
186
198
  #
187
199
  def rectangles
188
- mediabox = objects.deref!(attributes[:MediaBox])
189
- cropbox = objects.deref!(attributes[:Cropbox]) || mediabox
190
- bleedbox = objects.deref!(attributes[:BleedBox]) || cropbox
191
- trimbox = objects.deref!(attributes[:TrimBox]) || cropbox
192
- artbox = objects.deref!(attributes[:ArtBox]) || cropbox
193
-
194
- mediarect = Rectangle.new(*mediabox)
195
- croprect = Rectangle.new(*cropbox)
196
- bleedrect = Rectangle.new(*bleedbox)
197
- trimrect = Rectangle.new(*trimbox)
198
- artrect = Rectangle.new(*artbox)
200
+ # attributes[:MediaBox] can never be nil, but I have no easy way to tell sorbet that atm
201
+ mediabox = objects.deref_array_of_numbers(attributes[:MediaBox]) || []
202
+ cropbox = objects.deref_array_of_numbers(attributes[:CropBox]) || mediabox
203
+ bleedbox = objects.deref_array_of_numbers(attributes[:BleedBox]) || cropbox
204
+ trimbox = objects.deref_array_of_numbers(attributes[:TrimBox]) || cropbox
205
+ artbox = objects.deref_array_of_numbers(attributes[:ArtBox]) || cropbox
206
+
207
+ begin
208
+ mediarect = Rectangle.from_array(mediabox)
209
+ croprect = Rectangle.from_array(cropbox)
210
+ bleedrect = Rectangle.from_array(bleedbox)
211
+ trimrect = Rectangle.from_array(trimbox)
212
+ artrect = Rectangle.from_array(artbox)
213
+ rescue ArgumentError => e
214
+ raise MalformedPDFError, e.message
215
+ end
199
216
 
200
217
  if rotate > 0
201
218
  mediarect.apply_rotation(rotate)
@@ -217,14 +234,14 @@ module PDF
217
234
  private
218
235
 
219
236
  def root
220
- @root ||= objects.deref(@objects.trailer[:Root])
237
+ @root ||= objects.deref_hash(@objects.trailer[:Root]) || {}
221
238
  end
222
239
 
223
240
  # Returns the resources that accompany this page. Includes
224
241
  # resources inherited from parents.
225
242
  #
226
243
  def resources
227
- @resources ||= @objects.deref(attributes[:Resources]) || {}
244
+ @resources ||= Resources.new(@objects, @objects.deref_hash(attributes[:Resources]) || {})
228
245
  end
229
246
 
230
247
  def content_stream(receivers, instructions)
@@ -260,7 +277,8 @@ module PDF
260
277
  if origin.nil?
261
278
  []
262
279
  else
263
- obj = objects.deref(origin)
280
+ obj = objects.deref_hash(origin)
281
+ PDF::Reader::Error.validate_not_nil_as_malformed(obj, "parent")
264
282
  [ select_inheritable(obj) ] + ancestors(obj[:Parent])
265
283
  end
266
284
  end
@@ -384,7 +384,7 @@ class PDF::Reader
384
384
  #
385
385
  def build_fonts(raw_fonts)
386
386
  wrapped_fonts = raw_fonts.map { |label, font|
387
- [label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
387
+ [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
388
388
  }
389
389
 
390
390
  ::Hash[wrapped_fonts]
@@ -87,8 +87,10 @@ module PDF
87
87
  params.each do |arg|
88
88
  if arg.is_a?(String)
89
89
  internal_show_text(arg)
90
- else
90
+ elsif arg.is_a?(Numeric)
91
91
  @state.process_glyph_displacement(0, arg, false)
92
+ else
93
+ # skip it
92
94
  end
93
95
  end
94
96
  end
@@ -119,6 +121,7 @@ module PDF
119
121
  private
120
122
 
121
123
  def internal_show_text(string)
124
+ PDF::Reader::Error.validate_type_as_malformed(string, "string", String)
122
125
  if @state.current_font.nil?
123
126
  raise PDF::Reader::MalformedPDFError, "current font is invalid"
124
127
  end
@@ -80,8 +80,8 @@ class PDF::Reader
80
80
  token
81
81
  elsif operators.has_key? token
82
82
  Token.new(token)
83
- elsif token.respond_to?(:to_token)
84
- token.to_token
83
+ elsif token.frozen?
84
+ token
85
85
  elsif token =~ /\d*\.\d/
86
86
  token.to_f
87
87
  else
@@ -103,7 +103,7 @@ class PDF::Reader
103
103
  obj = parse_token
104
104
  post_obj = parse_token
105
105
 
106
- if post_obj == "stream"
106
+ if obj.is_a?(Hash) && post_obj == "stream"
107
107
  stream(obj)
108
108
  else
109
109
  obj
@@ -121,7 +121,7 @@ class PDF::Reader
121
121
  key = parse_token
122
122
  break if key.kind_of?(Token) and key == ">>"
123
123
  raise MalformedPDFError, "unterminated dict" if @buffer.empty?
124
- raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Symbol)
124
+ PDF::Reader::Error.validate_type_as_malformed(key, "Dictionary key", Symbol)
125
125
 
126
126
  value = parse_token
127
127
  value.kind_of?(Token) and Error.str_assert_not(value, ">>")
@@ -209,13 +209,16 @@ class PDF::Reader
209
209
  def stream(dict)
210
210
  raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
211
211
  if @objects
212
- length = @objects.deref(dict[:Length])
212
+ length = @objects.deref_integer(dict[:Length])
213
213
  if dict[:Filter]
214
- dict[:Filter] = @objects.deref(dict[:Filter])
214
+ dict[:Filter] = @objects.deref_name_or_array(dict[:Filter])
215
215
  end
216
216
  else
217
217
  length = dict[:Length] || 0
218
218
  end
219
+
220
+ PDF::Reader::Error.validate_type_as_malformed(length, "length", Numeric)
221
+
219
222
  data = @buffer.read(length, :skip_eol => true)
220
223
 
221
224
  Error.str_assert(parse_token, "endstream")
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest/md5'
6
+ require 'rc4'
7
+
8
+ class PDF::Reader
9
+
10
+ # Decrypts data using the RC4 algorithim defined in the PDF spec. Requires
11
+ # a decryption key, which is usually generated by PDF::Reader::StandardKeyBuilder
12
+ #
13
+ class Rc4SecurityHandler
14
+
15
+ def initialize(key)
16
+ @encrypt_key = key
17
+ end
18
+
19
+ ##7.6.2 General Encryption Algorithm
20
+ #
21
+ # Algorithm 1: Encryption of data using the RC4 algorithm
22
+ #
23
+ # version <=3 or (version == 4 and CFM == V2)
24
+ #
25
+ # buf - a string to decrypt
26
+ # ref - a PDF::Reader::Reference for the object to decrypt
27
+ #
28
+ def decrypt( buf, ref )
29
+ objKey = @encrypt_key.dup
30
+ (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
31
+ (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
32
+ length = objKey.length < 16 ? objKey.length : 16
33
+ rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
34
+ rc4.decrypt(buf)
35
+ end
36
+
37
+ end
38
+ end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -85,7 +85,7 @@ module PDF
85
85
  new_x2 = bottom_left.x
86
86
  new_y2 = bottom_left.y + width
87
87
  end
88
- set_corners(new_x1, new_y1, new_x2, new_y2)
88
+ set_corners(new_x1 || 0, new_y1 || 0, new_x2 || 0, new_y2 || 0)
89
89
  end
90
90
 
91
91
  private
@@ -1,16 +1,18 @@
1
1
  # coding: utf-8
2
- # typed: false
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
- # Setting this file to "typed: true" is difficult because it's a mixin that assumes some things
6
- # are aavailable from the class, like @objects and resources. Sorbet doesn't know about them.
7
-
8
5
  module PDF
9
6
  class Reader
10
7
 
11
8
  # mixin for common methods in Page and FormXobjects
12
9
  #
13
- module ResourceMethods
10
+ class Resources
11
+
12
+ def initialize(objects, resources)
13
+ @objects = objects
14
+ @resources = resources
15
+ end
14
16
 
15
17
  # Returns a Hash of color spaces that are available to this page
16
18
  #
@@ -19,7 +21,7 @@ module PDF
19
21
  # of calling it over and over.
20
22
  #
21
23
  def color_spaces
22
- @objects.deref!(resources[:ColorSpace]) || {}
24
+ @objects.deref_hash!(@resources[:ColorSpace]) || {}
23
25
  end
24
26
 
25
27
  # Returns a Hash of fonts that are available to this page
@@ -29,7 +31,7 @@ module PDF
29
31
  # of calling it over and over.
30
32
  #
31
33
  def fonts
32
- @objects.deref!(resources[:Font]) || {}
34
+ @objects.deref_hash!(@resources[:Font]) || {}
33
35
  end
34
36
 
35
37
  # Returns a Hash of external graphic states that are available to this
@@ -40,7 +42,7 @@ module PDF
40
42
  # of calling it over and over.
41
43
  #
42
44
  def graphic_states
43
- @objects.deref!(resources[:ExtGState]) || {}
45
+ @objects.deref_hash!(@resources[:ExtGState]) || {}
44
46
  end
45
47
 
46
48
  # Returns a Hash of patterns that are available to this page
@@ -50,7 +52,7 @@ module PDF
50
52
  # of calling it over and over.
51
53
  #
52
54
  def patterns
53
- @objects.deref!(resources[:Pattern]) || {}
55
+ @objects.deref_hash!(@resources[:Pattern]) || {}
54
56
  end
55
57
 
56
58
  # Returns an Array of procedure sets that are available to this page
@@ -60,7 +62,7 @@ module PDF
60
62
  # of calling it over and over.
61
63
  #
62
64
  def procedure_sets
63
- @objects.deref!(resources[:ProcSet]) || []
65
+ @objects.deref_array!(@resources[:ProcSet]) || []
64
66
  end
65
67
 
66
68
  # Returns a Hash of properties sets that are available to this page
@@ -70,7 +72,7 @@ module PDF
70
72
  # of calling it over and over.
71
73
  #
72
74
  def properties
73
- @objects.deref!(resources[:Properties]) || {}
75
+ @objects.deref_hash!(@resources[:Properties]) || {}
74
76
  end
75
77
 
76
78
  # Returns a Hash of shadings that are available to this page
@@ -80,7 +82,7 @@ module PDF
80
82
  # of calling it over and over.
81
83
  #
82
84
  def shadings
83
- @objects.deref!(resources[:Shading]) || {}
85
+ @objects.deref_hash!(@resources[:Shading]) || {}
84
86
  end
85
87
 
86
88
  # Returns a Hash of XObjects that are available to this page
@@ -90,7 +92,7 @@ module PDF
90
92
  # of calling it over and over.
91
93
  #
92
94
  def xobjects
93
- @objects.deref!(resources[:XObject]) || {}
95
+ @objects.deref_hash!(@resources[:XObject]) || {}
94
96
  end
95
97
 
96
98
  end
@@ -0,0 +1,79 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ class PDF::Reader
6
+ # Examines the Encrypt entry of a PDF trailer (if any) and returns an object that's
7
+ # able to decrypt the file.
8
+ class SecurityHandlerFactory
9
+
10
+ def self.build(encrypt, doc_id, password)
11
+ doc_id ||= []
12
+ password ||= ""
13
+
14
+ if encrypt.nil?
15
+ NullSecurityHandler.new
16
+ elsif standard?(encrypt)
17
+ build_standard_handler(encrypt, doc_id, password)
18
+ elsif standard_v5?(encrypt)
19
+ build_v5_handler(encrypt, doc_id, password)
20
+ else
21
+ UnimplementedSecurityHandler.new
22
+ end
23
+ end
24
+
25
+ def self.build_standard_handler(encrypt, doc_id, password)
26
+ encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
27
+ key_builder = StandardKeyBuilder.new(
28
+ key_length: (encrypt[:Length] || 40).to_i,
29
+ revision: encrypt[:R],
30
+ owner_key: encrypt[:O],
31
+ user_key: encrypt[:U],
32
+ permissions: encrypt[:P].to_i,
33
+ encrypted_metadata: encmeta,
34
+ file_id: doc_id.first,
35
+ )
36
+ cfm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
37
+ if cfm == :AESV2
38
+ AesV2SecurityHandler.new(key_builder.key(password))
39
+ else
40
+ Rc4SecurityHandler.new(key_builder.key(password))
41
+ end
42
+ end
43
+
44
+ def self.build_v5_handler(encrypt, doc_id, password)
45
+ key_builder = KeyBuilderV5.new(
46
+ owner_key: encrypt[:O],
47
+ user_key: encrypt[:U],
48
+ owner_encryption_key: encrypt[:OE],
49
+ user_encryption_key: encrypt[:UE],
50
+ )
51
+ AesV3SecurityHandler.new(key_builder.key(password))
52
+ end
53
+
54
+ # This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
55
+ def self.standard?(encrypt)
56
+ return false if encrypt.nil?
57
+
58
+ filter = encrypt.fetch(:Filter, :Standard)
59
+ version = encrypt.fetch(:V, 0)
60
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
61
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
62
+ (version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
63
+ end
64
+
65
+ # This handler supports both
66
+ # - AES-256 encryption defined in PDF 1.7 Extension Level 3 ('revision 5')
67
+ # - AES-256 encryption defined in PDF 2.0 ('revision 6')
68
+ def self.standard_v5?(encrypt)
69
+ return false if encrypt.nil?
70
+
71
+ filter = encrypt.fetch(:Filter, :Standard)
72
+ version = encrypt.fetch(:V, 0)
73
+ revision = encrypt.fetch(:R, 0)
74
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
75
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
76
+ ((version == 5) && (revision == 5 || revision == 6) && (algorithm == :AESV3))
77
+ end
78
+ end
79
+ end
@@ -1,39 +1,19 @@
1
1
  # coding: utf-8
2
- # typed: true
3
- # frozen_string_literal: true
4
2
 
5
- ################################################################################
6
- #
7
- # Copyright (C) 2011 Evan J Brunner (ejbrun@appittome.com)
8
- #
9
- # Permission is hereby granted, free of charge, to any person obtaining
10
- # a copy of this software and associated documentation files (the
11
- # "Software"), to deal in the Software without restriction, including
12
- # without limitation the rights to use, copy, modify, merge, publish,
13
- # distribute, sublicense, and/or sell copies of the Software, and to
14
- # permit persons to whom the Software is furnished to do so, subject to
15
- # the following conditions:
16
- #
17
- # The above copyright notice and this permission notice shall be
18
- # included in all copies or substantial portions of the Software.
19
- #
20
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
- #
28
- ################################################################################
29
3
  require 'digest/md5'
30
- require 'openssl'
31
4
  require 'rc4'
32
5
 
33
6
  class PDF::Reader
34
7
 
35
- # class creates interface to encrypt dictionary for use in Decrypt
36
- class StandardSecurityHandler
8
+ # Processes the Encrypt dict from an encrypted PDF and a user provided
9
+ # password and returns a key that can decrypt the file.
10
+ #
11
+ # This can generate a key compatible with the following standard encryption algorithms:
12
+ #
13
+ # * Version 1-3, all variants
14
+ # * Version 4, V2 (RC4) and AESV2
15
+ #
16
+ class StandardKeyBuilder
37
17
 
38
18
  ## 7.6.3.3 Encryption Key Algorithm (pp61)
39
19
  #
@@ -45,9 +25,6 @@ class PDF::Reader
45
25
  0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
46
26
  0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]
47
27
 
48
- attr_reader :key_length, :revision, :encrypt_key
49
- attr_reader :owner_key, :user_key, :permissions, :file_id, :password
50
-
51
28
  def initialize(opts = {})
52
29
  @key_length = opts[:key_length].to_i/8
53
30
  @revision = opts[:revision].to_i
@@ -56,72 +33,30 @@ class PDF::Reader
56
33
  @permissions = opts[:permissions].to_i
57
34
  @encryptMeta = opts.fetch(:encrypted_metadata, true)
58
35
  @file_id = opts[:file_id] || ""
59
- @encrypt_key = build_standard_key(opts[:password] || "")
60
- @cfm = opts[:cfm]
61
36
 
62
37
  if @key_length != 5 && @key_length != 16
63
- msg = "StandardSecurityHandler only supports 40 and 128 bit\
38
+ msg = "StandardKeyBuilder only supports 40 and 128 bit\
64
39
  encryption (#{@key_length * 8}bit)"
65
- raise ArgumentError, msg
40
+ raise UnsupportedFeatureError, msg
66
41
  end
67
42
  end
68
43
 
69
- # This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
70
- def self.supports?(encrypt)
71
- return false if encrypt.nil?
72
-
73
- filter = encrypt.fetch(:Filter, :Standard)
74
- version = encrypt.fetch(:V, 0)
75
- algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
76
- (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
77
- (version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
78
- end
79
-
80
- ##7.6.2 General Encryption Algorithm
81
- #
82
- # Algorithm 1: Encryption of data using the RC4 or AES algorithms
83
- #
84
- # used to decrypt RC4/AES encrypted PDF streams (buf)
44
+ # Takes a string containing a user provided password.
85
45
  #
86
- # buf - a string to decrypt
87
- # ref - a PDF::Reader::Reference for the object to decrypt
46
+ # If the password matches the file, then a string containing a key suitable for
47
+ # decrypting the file will be returned. If the password doesn't match the file,
48
+ # and exception will be raised.
88
49
  #
89
- def decrypt( buf, ref )
90
- case @cfm
91
- when :AESV2
92
- decrypt_aes128(buf, ref)
93
- else
94
- decrypt_rc4(buf, ref)
95
- end
96
- end
97
-
98
- private
50
+ def key(pass)
51
+ pass ||= ""
52
+ encrypt_key = auth_owner_pass(pass)
53
+ encrypt_key ||= auth_user_pass(pass)
99
54
 
100
- # decrypt with RC4 algorithm
101
- # version <=3 or (version == 4 and CFM == V2)
102
- def decrypt_rc4( buf, ref )
103
- objKey = @encrypt_key.dup
104
- (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
105
- (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
106
- length = objKey.length < 16 ? objKey.length : 16
107
- rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
108
- rc4.decrypt(buf)
55
+ raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
56
+ encrypt_key
109
57
  end
110
58
 
111
- # decrypt with AES-128-CBC algorithm
112
- # when (version == 4 and CFM == AESV2)
113
- def decrypt_aes128( buf, ref )
114
- objKey = @encrypt_key.dup
115
- (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
116
- (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
117
- objKey << 'sAlT' # Algorithm 1, b)
118
- length = objKey.length < 16 ? objKey.length : 16
119
- cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
120
- cipher.decrypt
121
- cipher.key = Digest::MD5.digest(objKey)[0,length]
122
- cipher.iv = buf[0..15]
123
- cipher.update(buf[16..-1]) + cipher.final
124
- end
59
+ private
125
60
 
126
61
  # Pads supplied password to 32bytes using PassPadBytes as specified on
127
62
  # pp61 of spec
@@ -153,7 +88,7 @@ class PDF::Reader
153
88
  md5 = Digest::MD5.digest(pad_pass(pass))
154
89
  if @revision > 2 then
155
90
  50.times { md5 = Digest::MD5.digest(md5) }
156
- keyBegins = md5[0, key_length]
91
+ keyBegins = md5[0, @key_length]
157
92
  #first iteration decrypt owner_key
158
93
  out = @owner_key
159
94
  #RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
@@ -218,12 +153,5 @@ class PDF::Reader
218
153
  end
219
154
  end
220
155
 
221
- def build_standard_key(pass)
222
- encrypt_key = auth_owner_pass(pass)
223
- encrypt_key ||= auth_user_pass(pass)
224
-
225
- raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
226
- encrypt_key
227
- end
228
156
  end
229
157
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -62,7 +62,7 @@ class PDF::Reader
62
62
  end
63
63
 
64
64
  Array(hash[:Filter]).each_with_index do |filter, index|
65
- @udata = Filter.with(filter, options[index]).filter(@udata)
65
+ @udata = Filter.with(filter, options[index] || {}).filter(@udata)
66
66
  end
67
67
  end
68
68
  @udata
@@ -0,0 +1,52 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # Cast untrusted input (usually parsed out of a PDF file) to a known type
9
+ #
10
+ class TypeCheck
11
+
12
+ def self.cast_to_numeric!(obj)
13
+ if obj.is_a?(Numeric)
14
+ obj
15
+ elsif obj.nil?
16
+ 0
17
+ elsif obj.respond_to?(:to_f)
18
+ obj.to_f
19
+ elsif obj.respond_to?(:to_i)
20
+ obj.to_i
21
+ else
22
+ raise MalformedPDFError, "Unable to cast to numeric"
23
+ end
24
+ end
25
+
26
+ def self.cast_to_string!(string)
27
+ if string.is_a?(String)
28
+ string
29
+ elsif string.nil?
30
+ ""
31
+ elsif string.respond_to?(:to_s)
32
+ string.to_s
33
+ else
34
+ raise MalformedPDFError, "Unable to cast to string"
35
+ end
36
+ end
37
+
38
+ def self.cast_to_symbol(obj)
39
+ if obj.is_a?(Symbol)
40
+ obj
41
+ elsif obj.nil?
42
+ nil
43
+ elsif obj.respond_to?(:to_sym)
44
+ obj.to_sym
45
+ else
46
+ raise MalformedPDFError, "Unable to cast to symbol"
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+