pdf-reader 1.4.1 → 2.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG +53 -3
  3. data/{README.rdoc → README.md} +40 -23
  4. data/Rakefile +2 -2
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_object +4 -1
  7. data/bin/pdf_text +1 -1
  8. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  9. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  10. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  11. data/lib/pdf/reader/afm/Courier.afm +342 -342
  12. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  13. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  14. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  15. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  16. data/lib/pdf/reader/afm/MustRead.html +19 -0
  17. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  18. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  19. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  20. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  21. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  22. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  23. data/lib/pdf/reader/buffer.rb +14 -12
  24. data/lib/pdf/reader/cid_widths.rb +2 -0
  25. data/lib/pdf/reader/cmap.rb +48 -36
  26. data/lib/pdf/reader/encoding.rb +16 -18
  27. data/lib/pdf/reader/error.rb +5 -0
  28. data/lib/pdf/reader/filter/ascii85.rb +1 -0
  29. data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
  30. data/lib/pdf/reader/filter/depredict.rb +1 -0
  31. data/lib/pdf/reader/filter/flate.rb +29 -16
  32. data/lib/pdf/reader/filter/lzw.rb +2 -0
  33. data/lib/pdf/reader/filter/null.rb +2 -0
  34. data/lib/pdf/reader/filter/run_length.rb +4 -6
  35. data/lib/pdf/reader/filter.rb +2 -0
  36. data/lib/pdf/reader/font.rb +12 -13
  37. data/lib/pdf/reader/font_descriptor.rb +1 -0
  38. data/lib/pdf/reader/form_xobject.rb +1 -0
  39. data/lib/pdf/reader/glyph_hash.rb +7 -2
  40. data/lib/pdf/reader/lzw.rb +4 -4
  41. data/lib/pdf/reader/null_security_handler.rb +17 -0
  42. data/lib/pdf/reader/object_cache.rb +1 -0
  43. data/lib/pdf/reader/object_hash.rb +91 -37
  44. data/lib/pdf/reader/object_stream.rb +1 -0
  45. data/lib/pdf/reader/orientation_detector.rb +5 -4
  46. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  47. data/lib/pdf/reader/page.rb +30 -1
  48. data/lib/pdf/reader/page_layout.rb +19 -24
  49. data/lib/pdf/reader/page_state.rb +8 -5
  50. data/lib/pdf/reader/page_text_receiver.rb +23 -1
  51. data/lib/pdf/reader/pages_strategy.rb +2 -304
  52. data/lib/pdf/reader/parser.rb +10 -7
  53. data/lib/pdf/reader/print_receiver.rb +1 -0
  54. data/lib/pdf/reader/reference.rb +1 -0
  55. data/lib/pdf/reader/register_receiver.rb +1 -0
  56. data/lib/pdf/reader/resource_methods.rb +1 -0
  57. data/lib/pdf/reader/standard_security_handler.rb +80 -42
  58. data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
  59. data/lib/pdf/reader/stream.rb +1 -0
  60. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  61. data/lib/pdf/reader/text_run.rb +28 -9
  62. data/lib/pdf/reader/token.rb +1 -0
  63. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  64. data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
  65. data/lib/pdf/reader/width_calculator/built_in.rb +25 -16
  66. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  67. data/lib/pdf/reader/width_calculator/true_type.rb +2 -2
  68. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  69. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  70. data/lib/pdf/reader/width_calculator.rb +1 -0
  71. data/lib/pdf/reader/xref.rb +11 -5
  72. data/lib/pdf/reader.rb +30 -119
  73. data/lib/pdf-reader.rb +1 -0
  74. metadata +35 -61
  75. data/bin/pdf_list_callbacks +0 -17
  76. data/lib/pdf/hash.rb +0 -19
  77. data/lib/pdf/reader/abstract_strategy.rb +0 -81
  78. data/lib/pdf/reader/metadata_strategy.rb +0 -56
  79. data/lib/pdf/reader/text_receiver.rb +0 -265
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -27,42 +28,8 @@
27
28
 
28
29
  class PDF::Reader
29
30
  ################################################################################
30
- # Walks the pages of the PDF file and calls the appropriate callback methods when
31
- # something of interest is found.
32
- #
33
- # The callback methods should exist on the receiver object passed into the constructor.
34
- # Whenever some content is found that will trigger a callback, the receiver is checked
35
- # to see if the callback is defined.
36
- #
37
- # If it is defined it will be called. If not, processing will continue.
38
- #
39
- # = Available Callbacks
40
- # The following callbacks are available and should be methods defined on your receiver class. Only
41
- # implement the ones you need - the rest will be ignored.
42
- #
43
- # Some callbacks will include parameters which will be passed in as an array. For callbacks
44
- # that supply no paramters, or where you don't need them, the *params argument can be left off.
45
- # Some example callback method definitions are:
46
- #
47
- # def begin_document
48
- # def end_page
49
- # def show_text(string, *params)
50
- # def fill_stroke(*params)
51
- #
52
- # You should be able to infer the basic command the callback is reporting based on the name. For
53
- # further experimentation, define the callback with just a *params parameter, then print out the
54
- # contents of the array using something like:
55
- #
56
- # puts params.inspect
57
- #
58
31
  # == Text Callbacks
59
32
  #
60
- # All text passed into these callbacks will be encoded as UTF-8. Depending on where (and when) the
61
- # PDF was generated, there's a good chance the text is NOT stored as UTF-8 internally so be
62
- # careful when doing a comparison on strings returned from PDF::Reader (when doing unit tests for
63
- # example). The string may not be byte-by-byte identical with the string that was originally
64
- # written to the PDF.
65
- #
66
33
  # - end_text_object
67
34
  # - move_to_start_of_next_line
68
35
  # - set_character_spacing
@@ -80,14 +47,6 @@ class PDF::Reader
80
47
  # - move_to_next_line_and_show_text
81
48
  # - set_spacing_next_line_show_text
82
49
  #
83
- # If the :raw_text option was passed to the PDF::Reader class the following callbacks
84
- # may also appear:
85
- #
86
- # - show_text_raw
87
- # - show_text_with_positioning_raw
88
- # - move_to_next_line_and_show_text_raw
89
- # - set_spacing_next_line_show_text_raw
90
- #
91
50
  # == Graphics Callbacks
92
51
  # - close_fill_stroke
93
52
  # - fill_stroke
@@ -145,42 +104,7 @@ class PDF::Reader
145
104
  # - set_clipping_path_with_even_odd
146
105
  # - append_curved_segment_final_point_replicated
147
106
  #
148
- # == Misc Callbacks
149
- # - begin_compatibility_section
150
- # - end_compatibility_section,
151
- # - begin_document
152
- # - end_document
153
- # - begin_page_container
154
- # - end_page_container
155
- # - begin_page
156
- # - end_page
157
- # - metadata
158
- # - xml_metadata
159
- # - page_count
160
- # - begin_form_xobject
161
- # - end_form_xobject
162
- #
163
- # == Resource Callbacks
164
- #
165
- # Each page can contain (or inherit) a range of resources required for the page,
166
- # including things like fonts and images. The following callbacks may appear
167
- # after begin_page if the relevant resources exist on a page:
168
- #
169
- # - resource_procset
170
- # - resource_xobject
171
- # - resource_extgstate
172
- # - resource_colorspace
173
- # - resource_pattern
174
- # - resource_font
175
- #
176
- # In most cases, these callbacks associate a name with each resource, allowing it
177
- # to be referred to by name in the page content. For example, an XObject can hold an image.
178
- # If it gets mapped to the name "IM1", then it can be placed on the page using
179
- # invoke_xobject "IM1".
180
- #
181
- # DEPRECATED: this class was deprecated in version 0.11.0 and will
182
- # eventually be removed
183
- class PagesStrategy< AbstractStrategy # :nodoc:
107
+ class PagesStrategy # :nodoc:
184
108
  OPERATORS = {
185
109
  'b' => :close_fill_stroke,
186
110
  'B' => :fill_stroke,
@@ -256,232 +180,6 @@ class PDF::Reader
256
180
  '\'' => :move_to_next_line_and_show_text,
257
181
  '"' => :set_spacing_next_line_show_text,
258
182
  }
259
- def self.to_sym
260
- :pages
261
- end
262
- ################################################################################
263
- # Begin processing the document
264
- def process
265
- return false unless options[:pages]
266
-
267
- callback(:begin_document, [root])
268
- walk_pages(@ohash.object(root[:Pages]))
269
- callback(:end_document)
270
- end
271
- private
272
- ################################################################################
273
- def params_to_utf8(params, font)
274
- if params.is_a?(String)
275
- font.to_utf8(params)
276
- elsif params.is_a?(Array)
277
- params.map { |i| params_to_utf8(i, font)}
278
- else
279
- params
280
- end
281
- end
282
- ################################################################################
283
- # Walk over all pages in the PDF file, calling the appropriate callbacks for each page and all
284
- # its content
285
- def walk_pages(page)
286
-
287
- # extract page content
288
- if page[:Type] == :Pages
289
- callback(:begin_page_container, [page])
290
- res = @ohash.object(page[:Resources])
291
- resources.push res if res
292
- @ohash.object(page[:Kids]).each {|child| walk_pages(@ohash.object(child))}
293
- resources.pop if res
294
- callback(:end_page_container)
295
- elsif page[:Type] == :Page
296
- callback(:begin_page, [page])
297
- res = @ohash.object(page[:Resources])
298
- resources.push res if res
299
- walk_resources(current_resources)
300
-
301
- if @ohash.object(page[:Contents]).kind_of?(Array)
302
- contents = @ohash.object(page[:Contents])
303
- else
304
- contents = [page[:Contents]]
305
- end
306
-
307
- fonts = font_hash_from_resources(current_resources)
308
-
309
- if page.has_key?(:Contents) and page[:Contents]
310
- direct_contents = contents.map { |content| @ohash.object(content) }
311
- content_stream(direct_contents, fonts)
312
- end
313
-
314
- resources.pop if res
315
- callback(:end_page)
316
- end
317
- end
318
- ################################################################################
319
- # Retreive the XObject for the supplied label and if it's a Form, walk it
320
- # like a regular page content stream.
321
- #
322
- def walk_xobject_form(label)
323
- xobjects = @ohash.object(current_resources[:XObject]) || {}
324
- xobject = @ohash.object(xobjects[label])
325
-
326
- if xobject && xobject.hash[:Subtype] == :Form
327
- callback(:begin_form_xobject)
328
- xobj_resources = @ohash.object(xobject.hash[:Resources])
329
- if xobj_resources
330
- resources.push xobj_resources
331
- walk_resources(xobj_resources)
332
- end
333
- fonts = font_hash_from_resources(xobj_resources)
334
- content_stream(xobject, fonts)
335
- callback(:end_form_xobject)
336
- resources.pop if xobj_resources
337
- end
338
- end
339
-
340
- ################################################################################
341
- # Return a merged hash of all resources that are current. Pages, page and xobject
342
- #
343
- def current_resources
344
- hash = {}
345
- resources.each do |res|
346
- hash.merge!(res)
347
- end
348
- hash
349
- end
350
- ################################################################################
351
- # Reads a PDF content stream and calls all the appropriate callback methods for the operators
352
- # it contains
353
- #
354
- def content_stream(instructions, fonts = {})
355
- instructions = [instructions] unless instructions.kind_of?(Array)
356
- instructions = instructions.map { |ins|
357
- ins.is_a?(PDF::Reader::Stream) ? ins.unfiltered_data : ins.to_s
358
- }.join
359
- buffer = Buffer.new(StringIO.new(instructions), :content_stream => true)
360
- parser = Parser.new(buffer, @ohash)
361
- current_font = nil
362
- params = []
363
-
364
- while (token = parser.parse_token(OPERATORS))
365
- if token.kind_of?(Token) and OPERATORS.has_key?(token)
366
- if OPERATORS[token] == :set_text_font_and_size
367
- current_font = params.first
368
- if fonts[current_font].nil?
369
- raise MalformedPDFError, "Unknown font #{current_font}"
370
- end
371
- end
372
-
373
- # handle special cases in response to certain operators
374
- if OPERATORS[token].to_s.include?("show_text")
375
- # convert any text to utf-8, but output the raw string if the user wants it
376
- if options[:raw_text]
377
- callback("#{OPERATORS[token]}_raw".to_sym, params)
378
- end
379
- params = params_to_utf8(params, fonts[current_font])
380
- elsif token == "ID"
381
- # inline image data, first convert the current params into a more familiar hash
382
- map = {}
383
- params.each_slice(2) do |key, value|
384
- map[key] = value
385
- end
386
- params = [map, buffer.token]
387
- end
388
-
389
- callback(OPERATORS[token], params)
390
-
391
- if OPERATORS[token] == :invoke_xobject
392
- xobject_label = params.first
393
- params.clear
394
- walk_xobject_form(xobject_label)
395
- else
396
- params.clear
397
- end
398
- else
399
- params << token
400
- end
401
- end
402
- rescue EOFError
403
- raise MalformedPDFError, "End Of File while processing a content stream"
404
- end
405
- ################################################################################
406
- def walk_resources(resources)
407
- return unless resources.respond_to?(:[])
408
-
409
- resources = resolve_references(resources)
410
-
411
- # extract any procset information
412
- if resources[:ProcSet]
413
- callback(:resource_procset, resources[:ProcSet])
414
- end
415
-
416
- # extract any xobject information
417
- if resources[:XObject]
418
- @ohash.object(resources[:XObject]).each do |name, val|
419
- callback(:resource_xobject, [name, @ohash.object(val)])
420
- end
421
- end
422
-
423
- # extract any extgstate information
424
- if resources[:ExtGState]
425
- @ohash.object(resources[:ExtGState]).each do |name, val|
426
- callback(:resource_extgstate, [name, @ohash.object(val)])
427
- end
428
- end
429
-
430
- # extract any colorspace information
431
- if resources[:ColorSpace]
432
- @ohash.object(resources[:ColorSpace]).each do |name, val|
433
- callback(:resource_colorspace, [name, @ohash.object(val)])
434
- end
435
- end
436
-
437
- # extract any pattern information
438
- if resources[:Pattern]
439
- @ohash.object(resources[:Pattern]).each do |name, val|
440
- callback(:resource_pattern, [name, @ohash.object(val)])
441
- end
442
- end
443
-
444
- # extract any font information
445
- if resources[:Font]
446
- fonts = font_hash_from_resources(resources)
447
- fonts.each do |label, font|
448
- callback(:resource_font, [label, font])
449
- end
450
- end
451
- end
452
- ################################################################################
453
- # Convert any PDF::Reader::Resource objects into a real object
454
- def resolve_references(obj)
455
- case obj
456
- when PDF::Reader::Stream then
457
- obj.hash = resolve_references(obj.hash)
458
- obj
459
- when PDF::Reader::Reference then
460
- resolve_references(@ohash.object(obj))
461
- when Hash then
462
- arr = obj.map { |key,val| [key, resolve_references(val)] }.flatten(1)
463
- Hash[*arr]
464
- when Array then
465
- obj.collect { |item| resolve_references(item) }
466
- else
467
- obj
468
- end
469
- end
470
- ################################################################################
471
- ################################################################################
472
- def font_hash_from_resources(resources)
473
- return {} unless resources.respond_to?(:[])
474
-
475
- fonts = {}
476
- resources = @ohash.object(resources[:Font]) || {}
477
- resources.each do |label, desc|
478
- fonts[label] = PDF::Reader::Font.new(@ohash, @ohash.object(desc))
479
- end
480
- fonts
481
- end
482
- def resources
483
- @resources ||= []
484
- end
485
183
  end
486
184
  ################################################################################
487
185
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -118,6 +119,7 @@ class PDF::Reader
118
119
  loop do
119
120
  key = parse_token
120
121
  break if key.kind_of?(Token) and key == ">>"
122
+ raise MalformedPDFError, "unterminated dict" if @buffer.empty?
121
123
  raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Symbol)
122
124
 
123
125
  value = parse_token
@@ -131,8 +133,7 @@ class PDF::Reader
131
133
  # reads a PDF name from the buffer and converts it to a Ruby Symbol
132
134
  def pdf_name
133
135
  tok = @buffer.token
134
- tok = " " if tok == "" && RUBY_VERSION < "1.9"
135
- tok.gsub!(/#([A-Fa-f0-9]{2})/) do |match|
136
+ tok = tok.dup.gsub(/#([A-Fa-f0-9]{2})/) do |match|
136
137
  match[1, 2].hex.chr
137
138
  end
138
139
  tok.to_sym
@@ -145,6 +146,7 @@ class PDF::Reader
145
146
  loop do
146
147
  item = parse_token
147
148
  break if item.kind_of?(Token) and item == "]"
149
+ raise MalformedPDFError, "unterminated array" if @buffer.empty?
148
150
  a << item
149
151
  end
150
152
 
@@ -153,29 +155,30 @@ class PDF::Reader
153
155
  ################################################################################
154
156
  # Reads a PDF hex string from the buffer and converts it to a Ruby String
155
157
  def hex_string
156
- str = ""
158
+ str = "".dup
157
159
 
158
160
  loop do
159
161
  token = @buffer.token
160
162
  break if token == ">"
163
+ raise MalformedPDFError, "unterminated hex string" if @buffer.empty?
161
164
  str << token
162
165
  end
163
166
 
164
167
  # add a missing digit if required, as required by the spec
165
168
  str << "0" unless str.size % 2 == 0
166
- str.scan(/../).map {|i| i.hex.chr}.join
169
+ str.scan(/../).map {|i| i.hex.chr}.join.force_encoding("binary")
167
170
  end
168
171
  ################################################################################
169
172
  # Reads a PDF String from the buffer and converts it to a Ruby String
170
173
  def string
171
174
  str = @buffer.token
172
- return "" if str == ")"
175
+ return "".dup.force_encoding("binary") if str == ")"
173
176
  Error.assert_equal(parse_token, ")")
174
177
 
175
178
  str.gsub!(/\\([nrtbf()\\\n]|\d{1,3})?|\r\n?|\n\r/m) do |match|
176
- MAPPING[match] || ""
179
+ MAPPING[match] || "".dup
177
180
  end
178
- str
181
+ str.force_encoding("binary")
179
182
  end
180
183
 
181
184
  MAPPING = {
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  # A simple receiver that prints all operaters and parameters in the content
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  # Copyright (C) 2010 James Healy (jimmy@deefa.com)
4
5
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  module PDF
4
5
  class Reader
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -25,6 +26,7 @@
25
26
  #
26
27
  ################################################################################
27
28
  require 'digest/md5'
29
+ require 'openssl'
28
30
  require 'rc4'
29
31
 
30
32
  class PDF::Reader
@@ -42,51 +44,83 @@ class PDF::Reader
42
44
  0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
43
45
  0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]
44
46
 
45
- attr_reader :filter, :subFilter, :version, :key_length,
46
- :crypt_filter, :stream_filter, :string_filter, :embedded_file_filter,
47
- :encrypt_key
48
- attr_reader :revision, :owner_key, :user_key, :permissions, :file_id, :password
49
-
50
- def initialize( enc, file_id, password )
51
- @filter = enc[:Filter]
52
- @subFilter = enc[:SubFilter]
53
- @version = enc[:V].to_i
54
- @key_length = enc[:Length].to_i/8
55
- @crypt_filter = enc[:CF]
56
- @stream_filter = enc[:StmF]
57
- @string_filter = enc[:StrF]
58
- @revision = enc[:R].to_i
59
- @owner_key = enc[:O]
60
- @user_key = enc[:U]
61
- @permissions = enc[:P].to_i
62
- @embedded_file_filter = enc[:EFF]
63
-
64
- @encryptMeta = enc.has_key?(:EncryptMetadata)? enc[:EncryptMetadata].to_s == "true" : true;
65
-
66
- @file_id = (file_id || []).first || ""
67
-
68
- @encrypt_key = build_standard_key(password)
47
+ attr_reader :key_length, :revision, :encrypt_key
48
+ attr_reader :owner_key, :user_key, :permissions, :file_id, :password
49
+
50
+ def initialize(opts = {})
51
+ @key_length = opts[:key_length].to_i/8
52
+ @revision = opts[:revision].to_i
53
+ @owner_key = opts[:owner_key]
54
+ @user_key = opts[:user_key]
55
+ @permissions = opts[:permissions].to_i
56
+ @encryptMeta = opts.fetch(:encrypted_metadata, true)
57
+ @file_id = opts[:file_id] || ""
58
+ @encrypt_key = build_standard_key(opts[:password] || "")
59
+ @cfm = opts[:cfm]
60
+
61
+ if @key_length != 5 && @key_length != 16
62
+ msg = "StandardSecurityHandler only supports 40 and 128 bit\
63
+ encryption (#{@key_length * 8}bit)"
64
+ raise ArgumentError, msg
65
+ end
66
+ end
67
+
68
+ # This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
69
+ def self.supports?(encrypt)
70
+ return false if encrypt.nil?
71
+
72
+ filter = encrypt.fetch(:Filter, :Standard)
73
+ version = encrypt.fetch(:V, 0)
74
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
75
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
76
+ (version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
69
77
  end
70
78
 
71
79
  ##7.6.2 General Encryption Algorithm
72
80
  #
73
81
  # Algorithm 1: Encryption of data using the RC4 or AES algorithms
74
82
  #
75
- # used to decrypt RC4 encrypted PDF streams (buf)
83
+ # used to decrypt RC4/AES encrypted PDF streams (buf)
76
84
  #
77
85
  # buf - a string to decrypt
78
86
  # ref - a PDF::Reader::Reference for the object to decrypt
79
87
  #
80
88
  def decrypt( buf, ref )
89
+ case @cfm
90
+ when :AESV2
91
+ decrypt_aes128(buf, ref)
92
+ else
93
+ decrypt_rc4(buf, ref)
94
+ end
95
+ end
96
+
97
+ private
98
+
99
+ # decrypt with RC4 algorithm
100
+ # version <=3 or (version == 4 and CFM == V2)
101
+ def decrypt_rc4( buf, ref )
81
102
  objKey = @encrypt_key.dup
82
103
  (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
83
104
  (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
84
105
  length = objKey.length < 16 ? objKey.length : 16
85
- rc4 = RC4.new( Digest::MD5.digest(objKey)[(0...length)] )
106
+ rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
86
107
  rc4.decrypt(buf)
87
108
  end
88
109
 
89
- private
110
+ # decrypt with AES-128-CBC algorithm
111
+ # when (version == 4 and CFM == AESV2)
112
+ def decrypt_aes128( buf, ref )
113
+ objKey = @encrypt_key.dup
114
+ (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
115
+ (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
116
+ objKey << 'sAlT' # Algorithm 1, b)
117
+ length = objKey.length < 16 ? objKey.length : 16
118
+ cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
119
+ cipher.decrypt
120
+ cipher.key = Digest::MD5.digest(objKey)[0,length]
121
+ cipher.iv = buf[0..15]
122
+ cipher.update(buf[16..-1]) + cipher.final
123
+ end
90
124
 
91
125
  # Pads supplied password to 32bytes using PassPadBytes as specified on
92
126
  # pp61 of spec
@@ -94,7 +128,7 @@ class PDF::Reader
94
128
  if p.nil? || p.empty?
95
129
  PassPadBytes.pack('C*')
96
130
  else
97
- p[(0...32)] + PassPadBytes[0...(32-p.length)].pack('C*')
131
+ p[0, 32] + PassPadBytes[0, 32-p.length].pack('C*')
98
132
  end
99
133
  end
100
134
 
@@ -118,13 +152,13 @@ class PDF::Reader
118
152
  md5 = Digest::MD5.digest(pad_pass(pass))
119
153
  if @revision > 2 then
120
154
  50.times { md5 = Digest::MD5.digest(md5) }
121
- keyBegins = md5[(0...@key_length)]
122
- #first itteration decrypt owner_key
155
+ keyBegins = md5[0, key_length]
156
+ #first iteration decrypt owner_key
123
157
  out = @owner_key
124
- #RC4 keyed with (keyBegins XOR with itteration #) to decrypt previous out
158
+ #RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
125
159
  19.downto(0).each { |i| out=RC4.new(xor_each_byte(keyBegins,i)).decrypt(out) }
126
160
  else
127
- out = RC4.new( md5[(0...5)] ).decrypt( @owner_key )
161
+ out = RC4.new( md5[0, 5] ).decrypt( @owner_key )
128
162
  end
129
163
  # c) check output as user password
130
164
  auth_user_pass( out )
@@ -142,12 +176,12 @@ class PDF::Reader
142
176
  #
143
177
  def auth_user_pass(pass)
144
178
  keyBegins = make_file_key(pass)
145
- if @revision > 2
179
+ if @revision >= 3
146
180
  #initialize out for first iteration
147
181
  out = Digest::MD5.digest(PassPadBytes.pack("C*") + @file_id)
148
182
  #zero doesn't matter -> so from 0-19
149
- 20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).decrypt(out) }
150
- pass = @user_key[(0...16)] == out
183
+ 20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).encrypt(out) }
184
+ pass = @user_key[0, 16] == out
151
185
  else
152
186
  pass = RC4.new(keyBegins).encrypt(PassPadBytes.pack("C*")) == @user_key
153
187
  end
@@ -163,20 +197,24 @@ class PDF::Reader
163
197
  (0..24).step(8){|e| @buf << (@permissions >> e & 0xFF)}
164
198
  # e) add the file ID
165
199
  @buf << @file_id
166
- # f) if revision > 4 then if encryptMetadata add 4 bytes of 0x00 else add 4 bytes of 0xFF
167
- if @revision > 4
168
- @buf << [ @encryptMetadata ? 0x00 : 0xFF ].pack('C')*4
200
+ # f) if revision >= 4 and metadata not encrypted then add 4 bytes of 0xFF
201
+ if @revision >= 4 && !@encryptMeta
202
+ @buf << [0xFF,0xFF,0xFF,0xFF].pack('C*')
169
203
  end
170
204
  # b) init MD5 digest + g) finish the hash
171
205
  md5 = Digest::MD5.digest(@buf)
172
206
  # h) spin hash 50 times
173
- if @revision > 2
207
+ if @revision >= 3
174
208
  50.times {
175
- md5 = Digest::MD5.digest(md5[(0...@key_length)])
209
+ md5 = Digest::MD5.digest(md5[0, @key_length])
176
210
  }
177
211
  end
178
- # i) n = key_length revision > 3, n = 5 revision == 2
179
- md5[(0...((@revision < 3) ? 5 : @key_length))]
212
+ # i) n = key_length revision >= 3, n = 5 revision == 2
213
+ if @revision < 3
214
+ md5[0, 5]
215
+ else
216
+ md5[0, @key_length]
217
+ end
180
218
  end
181
219
 
182
220
  def build_standard_key(pass)