pdf-reader 1.4.1 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +5 -5
  2. data/CHANGELOG +53 -3
  3. data/{README.rdoc → README.md} +40 -23
  4. data/Rakefile +2 -2
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_object +4 -1
  7. data/bin/pdf_text +1 -1
  8. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  9. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  10. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  11. data/lib/pdf/reader/afm/Courier.afm +342 -342
  12. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  13. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  14. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  15. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  16. data/lib/pdf/reader/afm/MustRead.html +19 -0
  17. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  18. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  19. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  20. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  21. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  22. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  23. data/lib/pdf/reader/buffer.rb +14 -12
  24. data/lib/pdf/reader/cid_widths.rb +2 -0
  25. data/lib/pdf/reader/cmap.rb +48 -36
  26. data/lib/pdf/reader/encoding.rb +16 -18
  27. data/lib/pdf/reader/error.rb +5 -0
  28. data/lib/pdf/reader/filter/ascii85.rb +1 -0
  29. data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
  30. data/lib/pdf/reader/filter/depredict.rb +1 -0
  31. data/lib/pdf/reader/filter/flate.rb +29 -16
  32. data/lib/pdf/reader/filter/lzw.rb +2 -0
  33. data/lib/pdf/reader/filter/null.rb +2 -0
  34. data/lib/pdf/reader/filter/run_length.rb +4 -6
  35. data/lib/pdf/reader/filter.rb +2 -0
  36. data/lib/pdf/reader/font.rb +12 -13
  37. data/lib/pdf/reader/font_descriptor.rb +1 -0
  38. data/lib/pdf/reader/form_xobject.rb +1 -0
  39. data/lib/pdf/reader/glyph_hash.rb +7 -2
  40. data/lib/pdf/reader/lzw.rb +4 -4
  41. data/lib/pdf/reader/null_security_handler.rb +17 -0
  42. data/lib/pdf/reader/object_cache.rb +1 -0
  43. data/lib/pdf/reader/object_hash.rb +91 -37
  44. data/lib/pdf/reader/object_stream.rb +1 -0
  45. data/lib/pdf/reader/orientation_detector.rb +5 -4
  46. data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
  47. data/lib/pdf/reader/page.rb +30 -1
  48. data/lib/pdf/reader/page_layout.rb +19 -24
  49. data/lib/pdf/reader/page_state.rb +8 -5
  50. data/lib/pdf/reader/page_text_receiver.rb +23 -1
  51. data/lib/pdf/reader/pages_strategy.rb +2 -304
  52. data/lib/pdf/reader/parser.rb +10 -7
  53. data/lib/pdf/reader/print_receiver.rb +1 -0
  54. data/lib/pdf/reader/reference.rb +1 -0
  55. data/lib/pdf/reader/register_receiver.rb +1 -0
  56. data/lib/pdf/reader/resource_methods.rb +1 -0
  57. data/lib/pdf/reader/standard_security_handler.rb +80 -42
  58. data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
  59. data/lib/pdf/reader/stream.rb +1 -0
  60. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  61. data/lib/pdf/reader/text_run.rb +28 -9
  62. data/lib/pdf/reader/token.rb +1 -0
  63. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  64. data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
  65. data/lib/pdf/reader/width_calculator/built_in.rb +25 -16
  66. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  67. data/lib/pdf/reader/width_calculator/true_type.rb +2 -2
  68. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  69. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  70. data/lib/pdf/reader/width_calculator.rb +1 -0
  71. data/lib/pdf/reader/xref.rb +11 -5
  72. data/lib/pdf/reader.rb +30 -119
  73. data/lib/pdf-reader.rb +1 -0
  74. metadata +35 -61
  75. data/bin/pdf_list_callbacks +0 -17
  76. data/lib/pdf/hash.rb +0 -19
  77. data/lib/pdf/reader/abstract_strategy.rb +0 -81
  78. data/lib/pdf/reader/metadata_strategy.rb +0 -56
  79. data/lib/pdf/reader/text_receiver.rb +0 -265
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -27,42 +28,8 @@
27
28
 
28
29
  class PDF::Reader
29
30
  ################################################################################
30
- # Walks the pages of the PDF file and calls the appropriate callback methods when
31
- # something of interest is found.
32
- #
33
- # The callback methods should exist on the receiver object passed into the constructor.
34
- # Whenever some content is found that will trigger a callback, the receiver is checked
35
- # to see if the callback is defined.
36
- #
37
- # If it is defined it will be called. If not, processing will continue.
38
- #
39
- # = Available Callbacks
40
- # The following callbacks are available and should be methods defined on your receiver class. Only
41
- # implement the ones you need - the rest will be ignored.
42
- #
43
- # Some callbacks will include parameters which will be passed in as an array. For callbacks
44
- # that supply no paramters, or where you don't need them, the *params argument can be left off.
45
- # Some example callback method definitions are:
46
- #
47
- # def begin_document
48
- # def end_page
49
- # def show_text(string, *params)
50
- # def fill_stroke(*params)
51
- #
52
- # You should be able to infer the basic command the callback is reporting based on the name. For
53
- # further experimentation, define the callback with just a *params parameter, then print out the
54
- # contents of the array using something like:
55
- #
56
- # puts params.inspect
57
- #
58
31
  # == Text Callbacks
59
32
  #
60
- # All text passed into these callbacks will be encoded as UTF-8. Depending on where (and when) the
61
- # PDF was generated, there's a good chance the text is NOT stored as UTF-8 internally so be
62
- # careful when doing a comparison on strings returned from PDF::Reader (when doing unit tests for
63
- # example). The string may not be byte-by-byte identical with the string that was originally
64
- # written to the PDF.
65
- #
66
33
  # - end_text_object
67
34
  # - move_to_start_of_next_line
68
35
  # - set_character_spacing
@@ -80,14 +47,6 @@ class PDF::Reader
80
47
  # - move_to_next_line_and_show_text
81
48
  # - set_spacing_next_line_show_text
82
49
  #
83
- # If the :raw_text option was passed to the PDF::Reader class the following callbacks
84
- # may also appear:
85
- #
86
- # - show_text_raw
87
- # - show_text_with_positioning_raw
88
- # - move_to_next_line_and_show_text_raw
89
- # - set_spacing_next_line_show_text_raw
90
- #
91
50
  # == Graphics Callbacks
92
51
  # - close_fill_stroke
93
52
  # - fill_stroke
@@ -145,42 +104,7 @@ class PDF::Reader
145
104
  # - set_clipping_path_with_even_odd
146
105
  # - append_curved_segment_final_point_replicated
147
106
  #
148
- # == Misc Callbacks
149
- # - begin_compatibility_section
150
- # - end_compatibility_section,
151
- # - begin_document
152
- # - end_document
153
- # - begin_page_container
154
- # - end_page_container
155
- # - begin_page
156
- # - end_page
157
- # - metadata
158
- # - xml_metadata
159
- # - page_count
160
- # - begin_form_xobject
161
- # - end_form_xobject
162
- #
163
- # == Resource Callbacks
164
- #
165
- # Each page can contain (or inherit) a range of resources required for the page,
166
- # including things like fonts and images. The following callbacks may appear
167
- # after begin_page if the relevant resources exist on a page:
168
- #
169
- # - resource_procset
170
- # - resource_xobject
171
- # - resource_extgstate
172
- # - resource_colorspace
173
- # - resource_pattern
174
- # - resource_font
175
- #
176
- # In most cases, these callbacks associate a name with each resource, allowing it
177
- # to be referred to by name in the page content. For example, an XObject can hold an image.
178
- # If it gets mapped to the name "IM1", then it can be placed on the page using
179
- # invoke_xobject "IM1".
180
- #
181
- # DEPRECATED: this class was deprecated in version 0.11.0 and will
182
- # eventually be removed
183
- class PagesStrategy< AbstractStrategy # :nodoc:
107
+ class PagesStrategy # :nodoc:
184
108
  OPERATORS = {
185
109
  'b' => :close_fill_stroke,
186
110
  'B' => :fill_stroke,
@@ -256,232 +180,6 @@ class PDF::Reader
256
180
  '\'' => :move_to_next_line_and_show_text,
257
181
  '"' => :set_spacing_next_line_show_text,
258
182
  }
259
- def self.to_sym
260
- :pages
261
- end
262
- ################################################################################
263
- # Begin processing the document
264
- def process
265
- return false unless options[:pages]
266
-
267
- callback(:begin_document, [root])
268
- walk_pages(@ohash.object(root[:Pages]))
269
- callback(:end_document)
270
- end
271
- private
272
- ################################################################################
273
- def params_to_utf8(params, font)
274
- if params.is_a?(String)
275
- font.to_utf8(params)
276
- elsif params.is_a?(Array)
277
- params.map { |i| params_to_utf8(i, font)}
278
- else
279
- params
280
- end
281
- end
282
- ################################################################################
283
- # Walk over all pages in the PDF file, calling the appropriate callbacks for each page and all
284
- # its content
285
- def walk_pages(page)
286
-
287
- # extract page content
288
- if page[:Type] == :Pages
289
- callback(:begin_page_container, [page])
290
- res = @ohash.object(page[:Resources])
291
- resources.push res if res
292
- @ohash.object(page[:Kids]).each {|child| walk_pages(@ohash.object(child))}
293
- resources.pop if res
294
- callback(:end_page_container)
295
- elsif page[:Type] == :Page
296
- callback(:begin_page, [page])
297
- res = @ohash.object(page[:Resources])
298
- resources.push res if res
299
- walk_resources(current_resources)
300
-
301
- if @ohash.object(page[:Contents]).kind_of?(Array)
302
- contents = @ohash.object(page[:Contents])
303
- else
304
- contents = [page[:Contents]]
305
- end
306
-
307
- fonts = font_hash_from_resources(current_resources)
308
-
309
- if page.has_key?(:Contents) and page[:Contents]
310
- direct_contents = contents.map { |content| @ohash.object(content) }
311
- content_stream(direct_contents, fonts)
312
- end
313
-
314
- resources.pop if res
315
- callback(:end_page)
316
- end
317
- end
318
- ################################################################################
319
- # Retreive the XObject for the supplied label and if it's a Form, walk it
320
- # like a regular page content stream.
321
- #
322
- def walk_xobject_form(label)
323
- xobjects = @ohash.object(current_resources[:XObject]) || {}
324
- xobject = @ohash.object(xobjects[label])
325
-
326
- if xobject && xobject.hash[:Subtype] == :Form
327
- callback(:begin_form_xobject)
328
- xobj_resources = @ohash.object(xobject.hash[:Resources])
329
- if xobj_resources
330
- resources.push xobj_resources
331
- walk_resources(xobj_resources)
332
- end
333
- fonts = font_hash_from_resources(xobj_resources)
334
- content_stream(xobject, fonts)
335
- callback(:end_form_xobject)
336
- resources.pop if xobj_resources
337
- end
338
- end
339
-
340
- ################################################################################
341
- # Return a merged hash of all resources that are current. Pages, page and xobject
342
- #
343
- def current_resources
344
- hash = {}
345
- resources.each do |res|
346
- hash.merge!(res)
347
- end
348
- hash
349
- end
350
- ################################################################################
351
- # Reads a PDF content stream and calls all the appropriate callback methods for the operators
352
- # it contains
353
- #
354
- def content_stream(instructions, fonts = {})
355
- instructions = [instructions] unless instructions.kind_of?(Array)
356
- instructions = instructions.map { |ins|
357
- ins.is_a?(PDF::Reader::Stream) ? ins.unfiltered_data : ins.to_s
358
- }.join
359
- buffer = Buffer.new(StringIO.new(instructions), :content_stream => true)
360
- parser = Parser.new(buffer, @ohash)
361
- current_font = nil
362
- params = []
363
-
364
- while (token = parser.parse_token(OPERATORS))
365
- if token.kind_of?(Token) and OPERATORS.has_key?(token)
366
- if OPERATORS[token] == :set_text_font_and_size
367
- current_font = params.first
368
- if fonts[current_font].nil?
369
- raise MalformedPDFError, "Unknown font #{current_font}"
370
- end
371
- end
372
-
373
- # handle special cases in response to certain operators
374
- if OPERATORS[token].to_s.include?("show_text")
375
- # convert any text to utf-8, but output the raw string if the user wants it
376
- if options[:raw_text]
377
- callback("#{OPERATORS[token]}_raw".to_sym, params)
378
- end
379
- params = params_to_utf8(params, fonts[current_font])
380
- elsif token == "ID"
381
- # inline image data, first convert the current params into a more familiar hash
382
- map = {}
383
- params.each_slice(2) do |key, value|
384
- map[key] = value
385
- end
386
- params = [map, buffer.token]
387
- end
388
-
389
- callback(OPERATORS[token], params)
390
-
391
- if OPERATORS[token] == :invoke_xobject
392
- xobject_label = params.first
393
- params.clear
394
- walk_xobject_form(xobject_label)
395
- else
396
- params.clear
397
- end
398
- else
399
- params << token
400
- end
401
- end
402
- rescue EOFError
403
- raise MalformedPDFError, "End Of File while processing a content stream"
404
- end
405
- ################################################################################
406
- def walk_resources(resources)
407
- return unless resources.respond_to?(:[])
408
-
409
- resources = resolve_references(resources)
410
-
411
- # extract any procset information
412
- if resources[:ProcSet]
413
- callback(:resource_procset, resources[:ProcSet])
414
- end
415
-
416
- # extract any xobject information
417
- if resources[:XObject]
418
- @ohash.object(resources[:XObject]).each do |name, val|
419
- callback(:resource_xobject, [name, @ohash.object(val)])
420
- end
421
- end
422
-
423
- # extract any extgstate information
424
- if resources[:ExtGState]
425
- @ohash.object(resources[:ExtGState]).each do |name, val|
426
- callback(:resource_extgstate, [name, @ohash.object(val)])
427
- end
428
- end
429
-
430
- # extract any colorspace information
431
- if resources[:ColorSpace]
432
- @ohash.object(resources[:ColorSpace]).each do |name, val|
433
- callback(:resource_colorspace, [name, @ohash.object(val)])
434
- end
435
- end
436
-
437
- # extract any pattern information
438
- if resources[:Pattern]
439
- @ohash.object(resources[:Pattern]).each do |name, val|
440
- callback(:resource_pattern, [name, @ohash.object(val)])
441
- end
442
- end
443
-
444
- # extract any font information
445
- if resources[:Font]
446
- fonts = font_hash_from_resources(resources)
447
- fonts.each do |label, font|
448
- callback(:resource_font, [label, font])
449
- end
450
- end
451
- end
452
- ################################################################################
453
- # Convert any PDF::Reader::Resource objects into a real object
454
- def resolve_references(obj)
455
- case obj
456
- when PDF::Reader::Stream then
457
- obj.hash = resolve_references(obj.hash)
458
- obj
459
- when PDF::Reader::Reference then
460
- resolve_references(@ohash.object(obj))
461
- when Hash then
462
- arr = obj.map { |key,val| [key, resolve_references(val)] }.flatten(1)
463
- Hash[*arr]
464
- when Array then
465
- obj.collect { |item| resolve_references(item) }
466
- else
467
- obj
468
- end
469
- end
470
- ################################################################################
471
- ################################################################################
472
- def font_hash_from_resources(resources)
473
- return {} unless resources.respond_to?(:[])
474
-
475
- fonts = {}
476
- resources = @ohash.object(resources[:Font]) || {}
477
- resources.each do |label, desc|
478
- fonts[label] = PDF::Reader::Font.new(@ohash, @ohash.object(desc))
479
- end
480
- fonts
481
- end
482
- def resources
483
- @resources ||= []
484
- end
485
183
  end
486
184
  ################################################################################
487
185
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -118,6 +119,7 @@ class PDF::Reader
118
119
  loop do
119
120
  key = parse_token
120
121
  break if key.kind_of?(Token) and key == ">>"
122
+ raise MalformedPDFError, "unterminated dict" if @buffer.empty?
121
123
  raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Symbol)
122
124
 
123
125
  value = parse_token
@@ -131,8 +133,7 @@ class PDF::Reader
131
133
  # reads a PDF name from the buffer and converts it to a Ruby Symbol
132
134
  def pdf_name
133
135
  tok = @buffer.token
134
- tok = " " if tok == "" && RUBY_VERSION < "1.9"
135
- tok.gsub!(/#([A-Fa-f0-9]{2})/) do |match|
136
+ tok = tok.dup.gsub(/#([A-Fa-f0-9]{2})/) do |match|
136
137
  match[1, 2].hex.chr
137
138
  end
138
139
  tok.to_sym
@@ -145,6 +146,7 @@ class PDF::Reader
145
146
  loop do
146
147
  item = parse_token
147
148
  break if item.kind_of?(Token) and item == "]"
149
+ raise MalformedPDFError, "unterminated array" if @buffer.empty?
148
150
  a << item
149
151
  end
150
152
 
@@ -153,29 +155,30 @@ class PDF::Reader
153
155
  ################################################################################
154
156
  # Reads a PDF hex string from the buffer and converts it to a Ruby String
155
157
  def hex_string
156
- str = ""
158
+ str = "".dup
157
159
 
158
160
  loop do
159
161
  token = @buffer.token
160
162
  break if token == ">"
163
+ raise MalformedPDFError, "unterminated hex string" if @buffer.empty?
161
164
  str << token
162
165
  end
163
166
 
164
167
  # add a missing digit if required, as required by the spec
165
168
  str << "0" unless str.size % 2 == 0
166
- str.scan(/../).map {|i| i.hex.chr}.join
169
+ str.scan(/../).map {|i| i.hex.chr}.join.force_encoding("binary")
167
170
  end
168
171
  ################################################################################
169
172
  # Reads a PDF String from the buffer and converts it to a Ruby String
170
173
  def string
171
174
  str = @buffer.token
172
- return "" if str == ")"
175
+ return "".dup.force_encoding("binary") if str == ")"
173
176
  Error.assert_equal(parse_token, ")")
174
177
 
175
178
  str.gsub!(/\\([nrtbf()\\\n]|\d{1,3})?|\r\n?|\n\r/m) do |match|
176
- MAPPING[match] || ""
179
+ MAPPING[match] || "".dup
177
180
  end
178
- str
181
+ str.force_encoding("binary")
179
182
  end
180
183
 
181
184
  MAPPING = {
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  class PDF::Reader
4
5
  # A simple receiver that prints all operaters and parameters in the content
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  # Copyright (C) 2010 James Healy (jimmy@deefa.com)
4
5
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  module PDF
4
5
  class Reader
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # frozen_string_literal: true
2
3
 
3
4
  ################################################################################
4
5
  #
@@ -25,6 +26,7 @@
25
26
  #
26
27
  ################################################################################
27
28
  require 'digest/md5'
29
+ require 'openssl'
28
30
  require 'rc4'
29
31
 
30
32
  class PDF::Reader
@@ -42,51 +44,83 @@ class PDF::Reader
42
44
  0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
43
45
  0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]
44
46
 
45
- attr_reader :filter, :subFilter, :version, :key_length,
46
- :crypt_filter, :stream_filter, :string_filter, :embedded_file_filter,
47
- :encrypt_key
48
- attr_reader :revision, :owner_key, :user_key, :permissions, :file_id, :password
49
-
50
- def initialize( enc, file_id, password )
51
- @filter = enc[:Filter]
52
- @subFilter = enc[:SubFilter]
53
- @version = enc[:V].to_i
54
- @key_length = enc[:Length].to_i/8
55
- @crypt_filter = enc[:CF]
56
- @stream_filter = enc[:StmF]
57
- @string_filter = enc[:StrF]
58
- @revision = enc[:R].to_i
59
- @owner_key = enc[:O]
60
- @user_key = enc[:U]
61
- @permissions = enc[:P].to_i
62
- @embedded_file_filter = enc[:EFF]
63
-
64
- @encryptMeta = enc.has_key?(:EncryptMetadata)? enc[:EncryptMetadata].to_s == "true" : true;
65
-
66
- @file_id = (file_id || []).first || ""
67
-
68
- @encrypt_key = build_standard_key(password)
47
+ attr_reader :key_length, :revision, :encrypt_key
48
+ attr_reader :owner_key, :user_key, :permissions, :file_id, :password
49
+
50
+ def initialize(opts = {})
51
+ @key_length = opts[:key_length].to_i/8
52
+ @revision = opts[:revision].to_i
53
+ @owner_key = opts[:owner_key]
54
+ @user_key = opts[:user_key]
55
+ @permissions = opts[:permissions].to_i
56
+ @encryptMeta = opts.fetch(:encrypted_metadata, true)
57
+ @file_id = opts[:file_id] || ""
58
+ @encrypt_key = build_standard_key(opts[:password] || "")
59
+ @cfm = opts[:cfm]
60
+
61
+ if @key_length != 5 && @key_length != 16
62
+ msg = "StandardSecurityHandler only supports 40 and 128 bit\
63
+ encryption (#{@key_length * 8}bit)"
64
+ raise ArgumentError, msg
65
+ end
66
+ end
67
+
68
+ # This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
69
+ def self.supports?(encrypt)
70
+ return false if encrypt.nil?
71
+
72
+ filter = encrypt.fetch(:Filter, :Standard)
73
+ version = encrypt.fetch(:V, 0)
74
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
75
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
76
+ (version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
69
77
  end
70
78
 
71
79
  ##7.6.2 General Encryption Algorithm
72
80
  #
73
81
  # Algorithm 1: Encryption of data using the RC4 or AES algorithms
74
82
  #
75
- # used to decrypt RC4 encrypted PDF streams (buf)
83
+ # used to decrypt RC4/AES encrypted PDF streams (buf)
76
84
  #
77
85
  # buf - a string to decrypt
78
86
  # ref - a PDF::Reader::Reference for the object to decrypt
79
87
  #
80
88
  def decrypt( buf, ref )
89
+ case @cfm
90
+ when :AESV2
91
+ decrypt_aes128(buf, ref)
92
+ else
93
+ decrypt_rc4(buf, ref)
94
+ end
95
+ end
96
+
97
+ private
98
+
99
+ # decrypt with RC4 algorithm
100
+ # version <=3 or (version == 4 and CFM == V2)
101
+ def decrypt_rc4( buf, ref )
81
102
  objKey = @encrypt_key.dup
82
103
  (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
83
104
  (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
84
105
  length = objKey.length < 16 ? objKey.length : 16
85
- rc4 = RC4.new( Digest::MD5.digest(objKey)[(0...length)] )
106
+ rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
86
107
  rc4.decrypt(buf)
87
108
  end
88
109
 
89
- private
110
+ # decrypt with AES-128-CBC algorithm
111
+ # when (version == 4 and CFM == AESV2)
112
+ def decrypt_aes128( buf, ref )
113
+ objKey = @encrypt_key.dup
114
+ (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
115
+ (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
116
+ objKey << 'sAlT' # Algorithm 1, b)
117
+ length = objKey.length < 16 ? objKey.length : 16
118
+ cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
119
+ cipher.decrypt
120
+ cipher.key = Digest::MD5.digest(objKey)[0,length]
121
+ cipher.iv = buf[0..15]
122
+ cipher.update(buf[16..-1]) + cipher.final
123
+ end
90
124
 
91
125
  # Pads supplied password to 32bytes using PassPadBytes as specified on
92
126
  # pp61 of spec
@@ -94,7 +128,7 @@ class PDF::Reader
94
128
  if p.nil? || p.empty?
95
129
  PassPadBytes.pack('C*')
96
130
  else
97
- p[(0...32)] + PassPadBytes[0...(32-p.length)].pack('C*')
131
+ p[0, 32] + PassPadBytes[0, 32-p.length].pack('C*')
98
132
  end
99
133
  end
100
134
 
@@ -118,13 +152,13 @@ class PDF::Reader
118
152
  md5 = Digest::MD5.digest(pad_pass(pass))
119
153
  if @revision > 2 then
120
154
  50.times { md5 = Digest::MD5.digest(md5) }
121
- keyBegins = md5[(0...@key_length)]
122
- #first itteration decrypt owner_key
155
+ keyBegins = md5[0, key_length]
156
+ #first iteration decrypt owner_key
123
157
  out = @owner_key
124
- #RC4 keyed with (keyBegins XOR with itteration #) to decrypt previous out
158
+ #RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
125
159
  19.downto(0).each { |i| out=RC4.new(xor_each_byte(keyBegins,i)).decrypt(out) }
126
160
  else
127
- out = RC4.new( md5[(0...5)] ).decrypt( @owner_key )
161
+ out = RC4.new( md5[0, 5] ).decrypt( @owner_key )
128
162
  end
129
163
  # c) check output as user password
130
164
  auth_user_pass( out )
@@ -142,12 +176,12 @@ class PDF::Reader
142
176
  #
143
177
  def auth_user_pass(pass)
144
178
  keyBegins = make_file_key(pass)
145
- if @revision > 2
179
+ if @revision >= 3
146
180
  #initialize out for first iteration
147
181
  out = Digest::MD5.digest(PassPadBytes.pack("C*") + @file_id)
148
182
  #zero doesn't matter -> so from 0-19
149
- 20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).decrypt(out) }
150
- pass = @user_key[(0...16)] == out
183
+ 20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).encrypt(out) }
184
+ pass = @user_key[0, 16] == out
151
185
  else
152
186
  pass = RC4.new(keyBegins).encrypt(PassPadBytes.pack("C*")) == @user_key
153
187
  end
@@ -163,20 +197,24 @@ class PDF::Reader
163
197
  (0..24).step(8){|e| @buf << (@permissions >> e & 0xFF)}
164
198
  # e) add the file ID
165
199
  @buf << @file_id
166
- # f) if revision > 4 then if encryptMetadata add 4 bytes of 0x00 else add 4 bytes of 0xFF
167
- if @revision > 4
168
- @buf << [ @encryptMetadata ? 0x00 : 0xFF ].pack('C')*4
200
+ # f) if revision >= 4 and metadata not encrypted then add 4 bytes of 0xFF
201
+ if @revision >= 4 && !@encryptMeta
202
+ @buf << [0xFF,0xFF,0xFF,0xFF].pack('C*')
169
203
  end
170
204
  # b) init MD5 digest + g) finish the hash
171
205
  md5 = Digest::MD5.digest(@buf)
172
206
  # h) spin hash 50 times
173
- if @revision > 2
207
+ if @revision >= 3
174
208
  50.times {
175
- md5 = Digest::MD5.digest(md5[(0...@key_length)])
209
+ md5 = Digest::MD5.digest(md5[0, @key_length])
176
210
  }
177
211
  end
178
- # i) n = key_length revision > 3, n = 5 revision == 2
179
- md5[(0...((@revision < 3) ? 5 : @key_length))]
212
+ # i) n = key_length revision >= 3, n = 5 revision == 2
213
+ if @revision < 3
214
+ md5[0, 5]
215
+ else
216
+ md5[0, @key_length]
217
+ end
180
218
  end
181
219
 
182
220
  def build_standard_key(pass)