pdf-reader 2.8.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +5 -0
  3. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  4. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  5. data/lib/pdf/reader/buffer.rb +36 -34
  6. data/lib/pdf/reader/cmap.rb +64 -51
  7. data/lib/pdf/reader/error.rb +8 -0
  8. data/lib/pdf/reader/filter/ascii85.rb +1 -1
  9. data/lib/pdf/reader/filter/ascii_hex.rb +1 -1
  10. data/lib/pdf/reader/filter/depredict.rb +1 -1
  11. data/lib/pdf/reader/filter/flate.rb +3 -3
  12. data/lib/pdf/reader/filter/lzw.rb +1 -1
  13. data/lib/pdf/reader/filter/null.rb +1 -2
  14. data/lib/pdf/reader/filter/run_length.rb +1 -1
  15. data/lib/pdf/reader/filter.rb +1 -1
  16. data/lib/pdf/reader/font.rb +29 -17
  17. data/lib/pdf/reader/font_descriptor.rb +18 -17
  18. data/lib/pdf/reader/form_xobject.rb +14 -5
  19. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  20. data/lib/pdf/reader/null_security_handler.rb +0 -4
  21. data/lib/pdf/reader/object_hash.rb +247 -42
  22. data/lib/pdf/reader/page.rb +38 -20
  23. data/lib/pdf/reader/page_state.rb +1 -1
  24. data/lib/pdf/reader/page_text_receiver.rb +4 -1
  25. data/lib/pdf/reader/parser.rb +9 -6
  26. data/lib/pdf/reader/point.rb +1 -1
  27. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  28. data/lib/pdf/reader/rectangle.rb +2 -2
  29. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +15 -13
  30. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  31. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -95
  32. data/lib/pdf/reader/stream.rb +2 -2
  33. data/lib/pdf/reader/type_check.rb +52 -0
  34. data/lib/pdf/reader/validating_receiver.rb +262 -0
  35. data/lib/pdf/reader/width_calculator/true_type.rb +1 -1
  36. data/lib/pdf/reader/xref.rb +20 -3
  37. data/lib/pdf/reader.rb +17 -9
  38. data/rbi/pdf-reader.rbi +388 -173
  39. metadata +15 -9
  40. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -92
@@ -14,7 +14,7 @@ module PDF
14
14
  # objects accessor to help walk the page dictionary in any useful way.
15
15
  #
16
16
  class Page
17
- include ResourceMethods
17
+ extend Forwardable
18
18
 
19
19
  # lowlevel hash-like access to all objects in the underlying PDF
20
20
  attr_reader :objects
@@ -27,6 +27,15 @@ module PDF
27
27
  # operations
28
28
  attr_reader :cache
29
29
 
30
+ def_delegators :resources, :color_spaces
31
+ def_delegators :resources, :fonts
32
+ def_delegators :resources, :graphic_states
33
+ def_delegators :resources, :patterns
34
+ def_delegators :resources, :procedure_sets
35
+ def_delegators :resources, :properties
36
+ def_delegators :resources, :shadings
37
+ def_delegators :resources, :xobjects
38
+
30
39
  # creates a new page wrapper.
31
40
  #
32
41
  # * objects - an ObjectHash instance that wraps a PDF file
@@ -34,7 +43,7 @@ module PDF
34
43
  #
35
44
  def initialize(objects, pagenum, options = {})
36
45
  @objects, @pagenum = objects, pagenum
37
- @page_object = objects.deref(objects.page_references[pagenum - 1])
46
+ @page_object = objects.deref_hash(objects.page_references[pagenum - 1])
38
47
  @cache = options[:cache] || {}
39
48
 
40
49
  unless @page_object.is_a?(::Hash)
@@ -60,7 +69,7 @@ module PDF
60
69
  def attributes
61
70
  @attributes ||= {}.tap { |hash|
62
71
  page_with_ancestors.reverse.each do |obj|
63
- hash.merge!(@objects.deref(obj))
72
+ hash.merge!(@objects.deref_hash(obj) || {})
64
73
  end
65
74
  }
66
75
  # This shouldn't be necesary, but some non compliant PDFs leave MediaBox
@@ -143,6 +152,9 @@ module PDF
143
152
  # the program in the correct order and calls out to your implementation.
144
153
  #
145
154
  def walk(*receivers)
155
+ receivers = receivers.map { |receiver|
156
+ ValidatingReceiver.new(receiver)
157
+ }
146
158
  callback(receivers, :page=, [self])
147
159
  content_stream(receivers, raw_content)
148
160
  end
@@ -151,10 +163,10 @@ module PDF
151
163
  # see here unless you're a PDF nerd like me.
152
164
  #
153
165
  def raw_content
154
- contents = objects.deref(@page_object[:Contents])
166
+ contents = objects.deref_stream_or_array(@page_object[:Contents])
155
167
  [contents].flatten.compact.map { |obj|
156
- objects.deref(obj)
157
- }.map { |obj|
168
+ objects.deref_stream(obj)
169
+ }.compact.map { |obj|
158
170
  obj.unfiltered_data
159
171
  }.join(" ")
160
172
  end
@@ -185,17 +197,22 @@ module PDF
185
197
  # values are defaulted according to section 7.7.3.3 of the PDF Spec 1.7
186
198
  #
187
199
  def rectangles
188
- mediabox = objects.deref!(attributes[:MediaBox])
189
- cropbox = objects.deref!(attributes[:Cropbox]) || mediabox
190
- bleedbox = objects.deref!(attributes[:BleedBox]) || cropbox
191
- trimbox = objects.deref!(attributes[:TrimBox]) || cropbox
192
- artbox = objects.deref!(attributes[:ArtBox]) || cropbox
193
-
194
- mediarect = Rectangle.new(*mediabox)
195
- croprect = Rectangle.new(*cropbox)
196
- bleedrect = Rectangle.new(*bleedbox)
197
- trimrect = Rectangle.new(*trimbox)
198
- artrect = Rectangle.new(*artbox)
200
+ # attributes[:MediaBox] can never be nil, but I have no easy way to tell sorbet that atm
201
+ mediabox = objects.deref_array_of_numbers(attributes[:MediaBox]) || []
202
+ cropbox = objects.deref_array_of_numbers(attributes[:CropBox]) || mediabox
203
+ bleedbox = objects.deref_array_of_numbers(attributes[:BleedBox]) || cropbox
204
+ trimbox = objects.deref_array_of_numbers(attributes[:TrimBox]) || cropbox
205
+ artbox = objects.deref_array_of_numbers(attributes[:ArtBox]) || cropbox
206
+
207
+ begin
208
+ mediarect = Rectangle.from_array(mediabox)
209
+ croprect = Rectangle.from_array(cropbox)
210
+ bleedrect = Rectangle.from_array(bleedbox)
211
+ trimrect = Rectangle.from_array(trimbox)
212
+ artrect = Rectangle.from_array(artbox)
213
+ rescue ArgumentError => e
214
+ raise MalformedPDFError, e.message
215
+ end
199
216
 
200
217
  if rotate > 0
201
218
  mediarect.apply_rotation(rotate)
@@ -217,14 +234,14 @@ module PDF
217
234
  private
218
235
 
219
236
  def root
220
- @root ||= objects.deref(@objects.trailer[:Root])
237
+ @root ||= objects.deref_hash(@objects.trailer[:Root]) || {}
221
238
  end
222
239
 
223
240
  # Returns the resources that accompany this page. Includes
224
241
  # resources inherited from parents.
225
242
  #
226
243
  def resources
227
- @resources ||= @objects.deref(attributes[:Resources]) || {}
244
+ @resources ||= Resources.new(@objects, @objects.deref_hash(attributes[:Resources]) || {})
228
245
  end
229
246
 
230
247
  def content_stream(receivers, instructions)
@@ -260,7 +277,8 @@ module PDF
260
277
  if origin.nil?
261
278
  []
262
279
  else
263
- obj = objects.deref(origin)
280
+ obj = objects.deref_hash(origin)
281
+ PDF::Reader::Error.validate_not_nil_as_malformed(obj, "parent")
264
282
  [ select_inheritable(obj) ] + ancestors(obj[:Parent])
265
283
  end
266
284
  end
@@ -384,7 +384,7 @@ class PDF::Reader
384
384
  #
385
385
  def build_fonts(raw_fonts)
386
386
  wrapped_fonts = raw_fonts.map { |label, font|
387
- [label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
387
+ [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
388
388
  }
389
389
 
390
390
  ::Hash[wrapped_fonts]
@@ -87,8 +87,10 @@ module PDF
87
87
  params.each do |arg|
88
88
  if arg.is_a?(String)
89
89
  internal_show_text(arg)
90
- else
90
+ elsif arg.is_a?(Numeric)
91
91
  @state.process_glyph_displacement(0, arg, false)
92
+ else
93
+ # skip it
92
94
  end
93
95
  end
94
96
  end
@@ -119,6 +121,7 @@ module PDF
119
121
  private
120
122
 
121
123
  def internal_show_text(string)
124
+ PDF::Reader::Error.validate_type_as_malformed(string, "string", String)
122
125
  if @state.current_font.nil?
123
126
  raise PDF::Reader::MalformedPDFError, "current font is invalid"
124
127
  end
@@ -80,8 +80,8 @@ class PDF::Reader
80
80
  token
81
81
  elsif operators.has_key? token
82
82
  Token.new(token)
83
- elsif token.respond_to?(:to_token)
84
- token.to_token
83
+ elsif token.frozen?
84
+ token
85
85
  elsif token =~ /\d*\.\d/
86
86
  token.to_f
87
87
  else
@@ -103,7 +103,7 @@ class PDF::Reader
103
103
  obj = parse_token
104
104
  post_obj = parse_token
105
105
 
106
- if post_obj == "stream"
106
+ if obj.is_a?(Hash) && post_obj == "stream"
107
107
  stream(obj)
108
108
  else
109
109
  obj
@@ -121,7 +121,7 @@ class PDF::Reader
121
121
  key = parse_token
122
122
  break if key.kind_of?(Token) and key == ">>"
123
123
  raise MalformedPDFError, "unterminated dict" if @buffer.empty?
124
- raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Symbol)
124
+ PDF::Reader::Error.validate_type_as_malformed(key, "Dictionary key", Symbol)
125
125
 
126
126
  value = parse_token
127
127
  value.kind_of?(Token) and Error.str_assert_not(value, ">>")
@@ -209,13 +209,16 @@ class PDF::Reader
209
209
  def stream(dict)
210
210
  raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
211
211
  if @objects
212
- length = @objects.deref(dict[:Length])
212
+ length = @objects.deref_integer(dict[:Length])
213
213
  if dict[:Filter]
214
- dict[:Filter] = @objects.deref(dict[:Filter])
214
+ dict[:Filter] = @objects.deref_name_or_array(dict[:Filter])
215
215
  end
216
216
  else
217
217
  length = dict[:Length] || 0
218
218
  end
219
+
220
+ PDF::Reader::Error.validate_type_as_malformed(length, "length", Numeric)
221
+
219
222
  data = @buffer.read(length, :skip_eol => true)
220
223
 
221
224
  Error.str_assert(parse_token, "endstream")
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest/md5'
6
+ require 'rc4'
7
+
8
+ class PDF::Reader
9
+
10
+ # Decrypts data using the RC4 algorithim defined in the PDF spec. Requires
11
+ # a decryption key, which is usually generated by PDF::Reader::StandardKeyBuilder
12
+ #
13
+ class Rc4SecurityHandler
14
+
15
+ def initialize(key)
16
+ @encrypt_key = key
17
+ end
18
+
19
+ ##7.6.2 General Encryption Algorithm
20
+ #
21
+ # Algorithm 1: Encryption of data using the RC4 algorithm
22
+ #
23
+ # version <=3 or (version == 4 and CFM == V2)
24
+ #
25
+ # buf - a string to decrypt
26
+ # ref - a PDF::Reader::Reference for the object to decrypt
27
+ #
28
+ def decrypt( buf, ref )
29
+ objKey = @encrypt_key.dup
30
+ (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
31
+ (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
32
+ length = objKey.length < 16 ? objKey.length : 16
33
+ rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
34
+ rc4.decrypt(buf)
35
+ end
36
+
37
+ end
38
+ end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -85,7 +85,7 @@ module PDF
85
85
  new_x2 = bottom_left.x
86
86
  new_y2 = bottom_left.y + width
87
87
  end
88
- set_corners(new_x1, new_y1, new_x2, new_y2)
88
+ set_corners(new_x1 || 0, new_y1 || 0, new_x2 || 0, new_y2 || 0)
89
89
  end
90
90
 
91
91
  private
@@ -1,16 +1,18 @@
1
1
  # coding: utf-8
2
- # typed: false
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
- # Setting this file to "typed: true" is difficult because it's a mixin that assumes some things
6
- # are aavailable from the class, like @objects and resources. Sorbet doesn't know about them.
7
-
8
5
  module PDF
9
6
  class Reader
10
7
 
11
8
  # mixin for common methods in Page and FormXobjects
12
9
  #
13
- module ResourceMethods
10
+ class Resources
11
+
12
+ def initialize(objects, resources)
13
+ @objects = objects
14
+ @resources = resources
15
+ end
14
16
 
15
17
  # Returns a Hash of color spaces that are available to this page
16
18
  #
@@ -19,7 +21,7 @@ module PDF
19
21
  # of calling it over and over.
20
22
  #
21
23
  def color_spaces
22
- @objects.deref!(resources[:ColorSpace]) || {}
24
+ @objects.deref_hash!(@resources[:ColorSpace]) || {}
23
25
  end
24
26
 
25
27
  # Returns a Hash of fonts that are available to this page
@@ -29,7 +31,7 @@ module PDF
29
31
  # of calling it over and over.
30
32
  #
31
33
  def fonts
32
- @objects.deref!(resources[:Font]) || {}
34
+ @objects.deref_hash!(@resources[:Font]) || {}
33
35
  end
34
36
 
35
37
  # Returns a Hash of external graphic states that are available to this
@@ -40,7 +42,7 @@ module PDF
40
42
  # of calling it over and over.
41
43
  #
42
44
  def graphic_states
43
- @objects.deref!(resources[:ExtGState]) || {}
45
+ @objects.deref_hash!(@resources[:ExtGState]) || {}
44
46
  end
45
47
 
46
48
  # Returns a Hash of patterns that are available to this page
@@ -50,7 +52,7 @@ module PDF
50
52
  # of calling it over and over.
51
53
  #
52
54
  def patterns
53
- @objects.deref!(resources[:Pattern]) || {}
55
+ @objects.deref_hash!(@resources[:Pattern]) || {}
54
56
  end
55
57
 
56
58
  # Returns an Array of procedure sets that are available to this page
@@ -60,7 +62,7 @@ module PDF
60
62
  # of calling it over and over.
61
63
  #
62
64
  def procedure_sets
63
- @objects.deref!(resources[:ProcSet]) || []
65
+ @objects.deref_array!(@resources[:ProcSet]) || []
64
66
  end
65
67
 
66
68
  # Returns a Hash of properties sets that are available to this page
@@ -70,7 +72,7 @@ module PDF
70
72
  # of calling it over and over.
71
73
  #
72
74
  def properties
73
- @objects.deref!(resources[:Properties]) || {}
75
+ @objects.deref_hash!(@resources[:Properties]) || {}
74
76
  end
75
77
 
76
78
  # Returns a Hash of shadings that are available to this page
@@ -80,7 +82,7 @@ module PDF
80
82
  # of calling it over and over.
81
83
  #
82
84
  def shadings
83
- @objects.deref!(resources[:Shading]) || {}
85
+ @objects.deref_hash!(@resources[:Shading]) || {}
84
86
  end
85
87
 
86
88
  # Returns a Hash of XObjects that are available to this page
@@ -90,7 +92,7 @@ module PDF
90
92
  # of calling it over and over.
91
93
  #
92
94
  def xobjects
93
- @objects.deref!(resources[:XObject]) || {}
95
+ @objects.deref_hash!(@resources[:XObject]) || {}
94
96
  end
95
97
 
96
98
  end
@@ -0,0 +1,79 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ class PDF::Reader
6
+ # Examines the Encrypt entry of a PDF trailer (if any) and returns an object that's
7
+ # able to decrypt the file.
8
+ class SecurityHandlerFactory
9
+
10
+ def self.build(encrypt, doc_id, password)
11
+ doc_id ||= []
12
+ password ||= ""
13
+
14
+ if encrypt.nil?
15
+ NullSecurityHandler.new
16
+ elsif standard?(encrypt)
17
+ build_standard_handler(encrypt, doc_id, password)
18
+ elsif standard_v5?(encrypt)
19
+ build_v5_handler(encrypt, doc_id, password)
20
+ else
21
+ UnimplementedSecurityHandler.new
22
+ end
23
+ end
24
+
25
+ def self.build_standard_handler(encrypt, doc_id, password)
26
+ encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
27
+ key_builder = StandardKeyBuilder.new(
28
+ key_length: (encrypt[:Length] || 40).to_i,
29
+ revision: encrypt[:R],
30
+ owner_key: encrypt[:O],
31
+ user_key: encrypt[:U],
32
+ permissions: encrypt[:P].to_i,
33
+ encrypted_metadata: encmeta,
34
+ file_id: doc_id.first,
35
+ )
36
+ cfm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
37
+ if cfm == :AESV2
38
+ AesV2SecurityHandler.new(key_builder.key(password))
39
+ else
40
+ Rc4SecurityHandler.new(key_builder.key(password))
41
+ end
42
+ end
43
+
44
+ def self.build_v5_handler(encrypt, doc_id, password)
45
+ key_builder = KeyBuilderV5.new(
46
+ owner_key: encrypt[:O],
47
+ user_key: encrypt[:U],
48
+ owner_encryption_key: encrypt[:OE],
49
+ user_encryption_key: encrypt[:UE],
50
+ )
51
+ AesV3SecurityHandler.new(key_builder.key(password))
52
+ end
53
+
54
+ # This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
55
+ def self.standard?(encrypt)
56
+ return false if encrypt.nil?
57
+
58
+ filter = encrypt.fetch(:Filter, :Standard)
59
+ version = encrypt.fetch(:V, 0)
60
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
61
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
62
+ (version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
63
+ end
64
+
65
+ # This handler supports both
66
+ # - AES-256 encryption defined in PDF 1.7 Extension Level 3 ('revision 5')
67
+ # - AES-256 encryption defined in PDF 2.0 ('revision 6')
68
+ def self.standard_v5?(encrypt)
69
+ return false if encrypt.nil?
70
+
71
+ filter = encrypt.fetch(:Filter, :Standard)
72
+ version = encrypt.fetch(:V, 0)
73
+ revision = encrypt.fetch(:R, 0)
74
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
75
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
76
+ ((version == 5) && (revision == 5 || revision == 6) && (algorithm == :AESV3))
77
+ end
78
+ end
79
+ end
@@ -1,39 +1,19 @@
1
1
  # coding: utf-8
2
- # typed: true
3
- # frozen_string_literal: true
4
2
 
5
- ################################################################################
6
- #
7
- # Copyright (C) 2011 Evan J Brunner (ejbrun@appittome.com)
8
- #
9
- # Permission is hereby granted, free of charge, to any person obtaining
10
- # a copy of this software and associated documentation files (the
11
- # "Software"), to deal in the Software without restriction, including
12
- # without limitation the rights to use, copy, modify, merge, publish,
13
- # distribute, sublicense, and/or sell copies of the Software, and to
14
- # permit persons to whom the Software is furnished to do so, subject to
15
- # the following conditions:
16
- #
17
- # The above copyright notice and this permission notice shall be
18
- # included in all copies or substantial portions of the Software.
19
- #
20
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
21
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
23
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
24
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
25
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
26
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27
- #
28
- ################################################################################
29
3
  require 'digest/md5'
30
- require 'openssl'
31
4
  require 'rc4'
32
5
 
33
6
  class PDF::Reader
34
7
 
35
- # class creates interface to encrypt dictionary for use in Decrypt
36
- class StandardSecurityHandler
8
+ # Processes the Encrypt dict from an encrypted PDF and a user provided
9
+ # password and returns a key that can decrypt the file.
10
+ #
11
+ # This can generate a key compatible with the following standard encryption algorithms:
12
+ #
13
+ # * Version 1-3, all variants
14
+ # * Version 4, V2 (RC4) and AESV2
15
+ #
16
+ class StandardKeyBuilder
37
17
 
38
18
  ## 7.6.3.3 Encryption Key Algorithm (pp61)
39
19
  #
@@ -45,9 +25,6 @@ class PDF::Reader
45
25
  0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
46
26
  0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]
47
27
 
48
- attr_reader :key_length, :revision, :encrypt_key
49
- attr_reader :owner_key, :user_key, :permissions, :file_id, :password
50
-
51
28
  def initialize(opts = {})
52
29
  @key_length = opts[:key_length].to_i/8
53
30
  @revision = opts[:revision].to_i
@@ -56,72 +33,30 @@ class PDF::Reader
56
33
  @permissions = opts[:permissions].to_i
57
34
  @encryptMeta = opts.fetch(:encrypted_metadata, true)
58
35
  @file_id = opts[:file_id] || ""
59
- @encrypt_key = build_standard_key(opts[:password] || "")
60
- @cfm = opts[:cfm]
61
36
 
62
37
  if @key_length != 5 && @key_length != 16
63
- msg = "StandardSecurityHandler only supports 40 and 128 bit\
38
+ msg = "StandardKeyBuilder only supports 40 and 128 bit\
64
39
  encryption (#{@key_length * 8}bit)"
65
- raise ArgumentError, msg
40
+ raise UnsupportedFeatureError, msg
66
41
  end
67
42
  end
68
43
 
69
- # This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
70
- def self.supports?(encrypt)
71
- return false if encrypt.nil?
72
-
73
- filter = encrypt.fetch(:Filter, :Standard)
74
- version = encrypt.fetch(:V, 0)
75
- algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
76
- (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
77
- (version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
78
- end
79
-
80
- ##7.6.2 General Encryption Algorithm
81
- #
82
- # Algorithm 1: Encryption of data using the RC4 or AES algorithms
83
- #
84
- # used to decrypt RC4/AES encrypted PDF streams (buf)
44
+ # Takes a string containing a user provided password.
85
45
  #
86
- # buf - a string to decrypt
87
- # ref - a PDF::Reader::Reference for the object to decrypt
46
+ # If the password matches the file, then a string containing a key suitable for
47
+ # decrypting the file will be returned. If the password doesn't match the file,
48
+ # and exception will be raised.
88
49
  #
89
- def decrypt( buf, ref )
90
- case @cfm
91
- when :AESV2
92
- decrypt_aes128(buf, ref)
93
- else
94
- decrypt_rc4(buf, ref)
95
- end
96
- end
97
-
98
- private
50
+ def key(pass)
51
+ pass ||= ""
52
+ encrypt_key = auth_owner_pass(pass)
53
+ encrypt_key ||= auth_user_pass(pass)
99
54
 
100
- # decrypt with RC4 algorithm
101
- # version <=3 or (version == 4 and CFM == V2)
102
- def decrypt_rc4( buf, ref )
103
- objKey = @encrypt_key.dup
104
- (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
105
- (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
106
- length = objKey.length < 16 ? objKey.length : 16
107
- rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
108
- rc4.decrypt(buf)
55
+ raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
56
+ encrypt_key
109
57
  end
110
58
 
111
- # decrypt with AES-128-CBC algorithm
112
- # when (version == 4 and CFM == AESV2)
113
- def decrypt_aes128( buf, ref )
114
- objKey = @encrypt_key.dup
115
- (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
116
- (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
117
- objKey << 'sAlT' # Algorithm 1, b)
118
- length = objKey.length < 16 ? objKey.length : 16
119
- cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
120
- cipher.decrypt
121
- cipher.key = Digest::MD5.digest(objKey)[0,length]
122
- cipher.iv = buf[0..15]
123
- cipher.update(buf[16..-1]) + cipher.final
124
- end
59
+ private
125
60
 
126
61
  # Pads supplied password to 32bytes using PassPadBytes as specified on
127
62
  # pp61 of spec
@@ -153,7 +88,7 @@ class PDF::Reader
153
88
  md5 = Digest::MD5.digest(pad_pass(pass))
154
89
  if @revision > 2 then
155
90
  50.times { md5 = Digest::MD5.digest(md5) }
156
- keyBegins = md5[0, key_length]
91
+ keyBegins = md5[0, @key_length]
157
92
  #first iteration decrypt owner_key
158
93
  out = @owner_key
159
94
  #RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
@@ -218,12 +153,5 @@ class PDF::Reader
218
153
  end
219
154
  end
220
155
 
221
- def build_standard_key(pass)
222
- encrypt_key = auth_owner_pass(pass)
223
- encrypt_key ||= auth_user_pass(pass)
224
-
225
- raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
226
- encrypt_key
227
- end
228
156
  end
229
157
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -62,7 +62,7 @@ class PDF::Reader
62
62
  end
63
63
 
64
64
  Array(hash[:Filter]).each_with_index do |filter, index|
65
- @udata = Filter.with(filter, options[index]).filter(@udata)
65
+ @udata = Filter.with(filter, options[index] || {}).filter(@udata)
66
66
  end
67
67
  end
68
68
  @udata
@@ -0,0 +1,52 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # Cast untrusted input (usually parsed out of a PDF file) to a known type
9
+ #
10
+ class TypeCheck
11
+
12
+ def self.cast_to_numeric!(obj)
13
+ if obj.is_a?(Numeric)
14
+ obj
15
+ elsif obj.nil?
16
+ 0
17
+ elsif obj.respond_to?(:to_f)
18
+ obj.to_f
19
+ elsif obj.respond_to?(:to_i)
20
+ obj.to_i
21
+ else
22
+ raise MalformedPDFError, "Unable to cast to numeric"
23
+ end
24
+ end
25
+
26
+ def self.cast_to_string!(string)
27
+ if string.is_a?(String)
28
+ string
29
+ elsif string.nil?
30
+ ""
31
+ elsif string.respond_to?(:to_s)
32
+ string.to_s
33
+ else
34
+ raise MalformedPDFError, "Unable to cast to string"
35
+ end
36
+ end
37
+
38
+ def self.cast_to_symbol(obj)
39
+ if obj.is_a?(Symbol)
40
+ obj
41
+ elsif obj.nil?
42
+ nil
43
+ elsif obj.respond_to?(:to_sym)
44
+ obj.to_sym
45
+ else
46
+ raise MalformedPDFError, "Unable to cast to symbol"
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+