pdf-reader 2.2.0 → 2.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +90 -0
  3. data/README.md +18 -3
  4. data/Rakefile +1 -1
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_text +1 -1
  7. data/examples/extract_fonts.rb +12 -7
  8. data/examples/rspec.rb +1 -0
  9. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  10. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  14. data/lib/pdf/reader/afm/Courier.afm +342 -342
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  26. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  27. data/lib/pdf/reader/buffer.rb +91 -47
  28. data/lib/pdf/reader/cid_widths.rb +7 -4
  29. data/lib/pdf/reader/cmap.rb +83 -59
  30. data/lib/pdf/reader/encoding.rb +17 -14
  31. data/lib/pdf/reader/error.rb +15 -3
  32. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  33. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  34. data/lib/pdf/reader/filter/depredict.rb +12 -10
  35. data/lib/pdf/reader/filter/flate.rb +30 -16
  36. data/lib/pdf/reader/filter/lzw.rb +2 -0
  37. data/lib/pdf/reader/filter/null.rb +1 -1
  38. data/lib/pdf/reader/filter/run_length.rb +19 -13
  39. data/lib/pdf/reader/filter.rb +11 -11
  40. data/lib/pdf/reader/font.rb +89 -26
  41. data/lib/pdf/reader/font_descriptor.rb +22 -18
  42. data/lib/pdf/reader/form_xobject.rb +18 -5
  43. data/lib/pdf/reader/glyph_hash.rb +28 -13
  44. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  45. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  46. data/lib/pdf/reader/lzw.rb +28 -11
  47. data/lib/pdf/reader/no_text_filter.rb +14 -0
  48. data/lib/pdf/reader/null_security_handler.rb +1 -4
  49. data/lib/pdf/reader/object_cache.rb +1 -0
  50. data/lib/pdf/reader/object_hash.rb +292 -63
  51. data/lib/pdf/reader/object_stream.rb +3 -2
  52. data/lib/pdf/reader/overlapping_runs_filter.rb +72 -0
  53. data/lib/pdf/reader/page.rb +143 -16
  54. data/lib/pdf/reader/page_layout.rb +43 -39
  55. data/lib/pdf/reader/page_state.rb +26 -17
  56. data/lib/pdf/reader/page_text_receiver.rb +74 -4
  57. data/lib/pdf/reader/pages_strategy.rb +1 -0
  58. data/lib/pdf/reader/parser.rb +34 -14
  59. data/lib/pdf/reader/point.rb +25 -0
  60. data/lib/pdf/reader/print_receiver.rb +1 -0
  61. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  62. data/lib/pdf/reader/rectangle.rb +113 -0
  63. data/lib/pdf/reader/reference.rb +3 -1
  64. data/lib/pdf/reader/register_receiver.rb +1 -0
  65. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +17 -9
  66. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  67. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  68. data/lib/pdf/reader/stream.rb +3 -2
  69. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  70. data/lib/pdf/reader/text_run.rb +40 -5
  71. data/lib/pdf/reader/token.rb +1 -0
  72. data/lib/pdf/reader/transformation_matrix.rb +8 -7
  73. data/lib/pdf/reader/type_check.rb +98 -0
  74. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  75. data/lib/pdf/reader/validating_receiver.rb +262 -0
  76. data/lib/pdf/reader/width_calculator/built_in.rb +27 -17
  77. data/lib/pdf/reader/width_calculator/composite.rb +6 -1
  78. data/lib/pdf/reader/width_calculator/true_type.rb +10 -11
  79. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -4
  80. data/lib/pdf/reader/width_calculator/type_zero.rb +6 -2
  81. data/lib/pdf/reader/width_calculator.rb +1 -0
  82. data/lib/pdf/reader/xref.rb +37 -11
  83. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  84. data/lib/pdf/reader.rb +49 -24
  85. data/lib/pdf-reader.rb +1 -0
  86. data/rbi/pdf-reader.rbi +2048 -0
  87. metadata +39 -23
  88. data/lib/pdf/hash.rb +0 -20
  89. data/lib/pdf/reader/orientation_detector.rb +0 -34
  90. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -79,8 +80,8 @@ class PDF::Reader
79
80
  token
80
81
  elsif operators.has_key? token
81
82
  Token.new(token)
82
- elsif token.respond_to?(:to_token)
83
- token.to_token
83
+ elsif token.frozen?
84
+ token
84
85
  elsif token =~ /\d*\.\d/
85
86
  token.to_f
86
87
  else
@@ -95,14 +96,20 @@ class PDF::Reader
95
96
  # id - the object ID to return
96
97
  # gen - the object revision number to return
97
98
  def object(id, gen)
98
- Error.assert_equal(parse_token, id)
99
+ idCheck = parse_token
100
+
101
+ # Sometimes the xref table is corrupt and points to an offset slightly too early in the file.
102
+ # check the next token, maybe we can find the start of the object we're looking for
103
+ if idCheck != id
104
+ Error.assert_equal(parse_token, id)
105
+ end
99
106
  Error.assert_equal(parse_token, gen)
100
107
  Error.str_assert(parse_token, "obj")
101
108
 
102
109
  obj = parse_token
103
110
  post_obj = parse_token
104
111
 
105
- if post_obj == "stream"
112
+ if obj.is_a?(Hash) && post_obj == "stream"
106
113
  stream(obj)
107
114
  else
108
115
  obj
@@ -120,7 +127,7 @@ class PDF::Reader
120
127
  key = parse_token
121
128
  break if key.kind_of?(Token) and key == ">>"
122
129
  raise MalformedPDFError, "unterminated dict" if @buffer.empty?
123
- raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Symbol)
130
+ PDF::Reader::Error.validate_type_as_malformed(key, "Dictionary key", Symbol)
124
131
 
125
132
  value = parse_token
126
133
  value.kind_of?(Token) and Error.str_assert_not(value, ">>")
@@ -166,7 +173,9 @@ class PDF::Reader
166
173
 
167
174
  # add a missing digit if required, as required by the spec
168
175
  str << "0" unless str.size % 2 == 0
169
- str.scan(/../).map {|i| i.hex.chr}.join.force_encoding("binary")
176
+ str.chars.each_slice(2).map { |nibbles|
177
+ nibbles.join("").hex.chr
178
+ }.join.force_encoding("binary")
170
179
  end
171
180
  ################################################################################
172
181
  # Reads a PDF String from the buffer and converts it to a Ruby String
@@ -175,15 +184,18 @@ class PDF::Reader
175
184
  return "".dup.force_encoding("binary") if str == ")"
176
185
  Error.assert_equal(parse_token, ")")
177
186
 
178
- str.gsub!(/\\([nrtbf()\\\n]|\d{1,3})?|\r\n?|\n\r/m) do |match|
179
- MAPPING[match] || "".dup
187
+ str.gsub!(/\\(\r\n|[nrtbf()\\\n\r]|([0-7]{1,3}))?|\r\n?/m) do |match|
188
+ if $2.nil? # not octal digits
189
+ MAPPING[match] || "".dup
190
+ else # must be octal digits
191
+ ($2.oct & 0xff).chr # ignore high level overflow
192
+ end
180
193
  end
181
194
  str.force_encoding("binary")
182
195
  end
183
196
 
184
197
  MAPPING = {
185
198
  "\r" => "\n",
186
- "\n\r" => "\n",
187
199
  "\r\n" => "\n",
188
200
  "\\n" => "\n",
189
201
  "\\r" => "\r",
@@ -194,24 +206,32 @@ class PDF::Reader
194
206
  "\\)" => ")",
195
207
  "\\\\" => "\\",
196
208
  "\\\n" => "",
209
+ "\\\r" => "",
210
+ "\\\r\n" => "",
197
211
  }
198
- 0.upto(9) { |n| MAPPING["\\00"+n.to_s] = ("00"+n.to_s).oct.chr }
199
- 0.upto(99) { |n| MAPPING["\\0"+n.to_s] = ("0"+n.to_s).oct.chr }
200
- 0.upto(377) { |n| MAPPING["\\"+n.to_s] = n.to_s.oct.chr }
201
212
 
202
213
  ################################################################################
203
214
  # Decodes the contents of a PDF Stream and returns it as a Ruby String.
204
215
  def stream(dict)
205
216
  raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
206
217
  if @objects
207
- length = @objects.deref(dict[:Length])
218
+ length = @objects.deref_integer(dict[:Length])
219
+ if dict[:Filter]
220
+ dict[:Filter] = @objects.deref_name_or_array(dict[:Filter])
221
+ end
208
222
  else
209
223
  length = dict[:Length] || 0
210
224
  end
225
+
226
+ PDF::Reader::Error.validate_type_as_malformed(length, "length", Numeric)
227
+
211
228
  data = @buffer.read(length, :skip_eol => true)
212
229
 
213
230
  Error.str_assert(parse_token, "endstream")
214
- Error.str_assert(parse_token, "endobj")
231
+
232
+ # We used to assert that the stream had the correct closing token, but it doesn't *really*
233
+ # matter if it's missing, and other readers seems to handle its absence just fine
234
+ # Error.str_assert(parse_token, "endobj")
215
235
 
216
236
  PDF::Reader::Stream.new(dict, data)
217
237
  end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # PDFs are all about positioning content on a page, so there's lots of need to
9
+ # work with a set of X,Y coordinates.
10
+ #
11
+ class Point
12
+
13
+ attr_reader :x, :y
14
+
15
+ def initialize(x, y)
16
+ @x, @y = x, y
17
+ end
18
+
19
+ def ==(other)
20
+ other.respond_to?(:x) && other.respond_to?(:y) && x == other.x && y == other.y
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest/md5'
6
+ require 'rc4'
7
+
8
+ class PDF::Reader
9
+
10
+ # Decrypts data using the RC4 algorithim defined in the PDF spec. Requires
11
+ # a decryption key, which is usually generated by PDF::Reader::StandardKeyBuilder
12
+ #
13
+ class Rc4SecurityHandler
14
+
15
+ def initialize(key)
16
+ @encrypt_key = key
17
+ end
18
+
19
+ ##7.6.2 General Encryption Algorithm
20
+ #
21
+ # Algorithm 1: Encryption of data using the RC4 algorithm
22
+ #
23
+ # version <=3 or (version == 4 and CFM == V2)
24
+ #
25
+ # buf - a string to decrypt
26
+ # ref - a PDF::Reader::Reference for the object to decrypt
27
+ #
28
+ def decrypt( buf, ref )
29
+ objKey = @encrypt_key.dup
30
+ (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
31
+ (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
32
+ length = objKey.length < 16 ? objKey.length : 16
33
+ rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
34
+ rc4.decrypt(buf)
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,113 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # PDFs represent rectangles all over the place. They're 4 element arrays, like this:
9
+ #
10
+ # [A, B, C, D]
11
+ #
12
+ # Four element arrays are yucky to work with though, so here's a class that's better.
13
+ # Initialize it with the 4 elements, and get utility functions (width, height, etc)
14
+ # for free.
15
+ #
16
+ # By convention the first two elements are x1, y1, the co-ords for the bottom left corner
17
+ # of the rectangle. The third and fourth elements are x2, y2, the co-ords for the top left
18
+ # corner of the rectangle. It's valid for the alternative corners to be used though, so
19
+ # we don't assume which is which.
20
+ #
21
+ class Rectangle
22
+
23
+ attr_reader :bottom_left, :bottom_right, :top_left, :top_right
24
+
25
+ def initialize(x1, y1, x2, y2)
26
+ set_corners(x1, y1, x2, y2)
27
+ end
28
+
29
+ def self.from_array(arr)
30
+ if arr.size != 4
31
+ raise ArgumentError, "Only 4-element Arrays can be converted to a Rectangle"
32
+ end
33
+
34
+ PDF::Reader::Rectangle.new(
35
+ arr[0].to_f,
36
+ arr[1].to_f,
37
+ arr[2].to_f,
38
+ arr[3].to_f,
39
+ )
40
+ end
41
+
42
+ def ==(other)
43
+ to_a == other.to_a
44
+ end
45
+
46
+ def height
47
+ top_right.y - bottom_right.y
48
+ end
49
+
50
+ def width
51
+ bottom_right.x - bottom_left.x
52
+ end
53
+
54
+ def contains?(point)
55
+ point.x >= bottom_left.x && point.x <= top_right.x &&
56
+ point.y >= bottom_left.y && point.y <= top_right.y
57
+ end
58
+
59
+ # A pdf-style 4-number array
60
+ def to_a
61
+ [
62
+ bottom_left.x,
63
+ bottom_left.y,
64
+ top_right.x,
65
+ top_right.y,
66
+ ]
67
+ end
68
+
69
+ def apply_rotation(degrees)
70
+ return if degrees != 90 && degrees != 180 && degrees != 270
71
+
72
+ if degrees == 90
73
+ new_x1 = bottom_left.x
74
+ new_y1 = bottom_left.y - width
75
+ new_x2 = bottom_left.x + height
76
+ new_y2 = bottom_left.y
77
+ elsif degrees == 180
78
+ new_x1 = bottom_left.x - width
79
+ new_y1 = bottom_left.y - height
80
+ new_x2 = bottom_left.x
81
+ new_y2 = bottom_left.y
82
+ elsif degrees == 270
83
+ new_x1 = bottom_left.x - height
84
+ new_y1 = bottom_left.y
85
+ new_x2 = bottom_left.x
86
+ new_y2 = bottom_left.y + width
87
+ end
88
+ set_corners(new_x1 || 0, new_y1 || 0, new_x2 || 0, new_y2 || 0)
89
+ end
90
+
91
+ private
92
+
93
+ def set_corners(x1, y1, x2, y2)
94
+ @bottom_left = PDF::Reader::Point.new(
95
+ [x1, x2].min,
96
+ [y1, y2].min,
97
+ )
98
+ @bottom_right = PDF::Reader::Point.new(
99
+ [x1, x2].max,
100
+ [y1, y2].min,
101
+ )
102
+ @top_left = PDF::Reader::Point.new(
103
+ [x1, x2].min,
104
+ [y1, y2].max,
105
+ )
106
+ @top_right = PDF::Reader::Point.new(
107
+ [x1, x2].max,
108
+ [y1, y2].max,
109
+ )
110
+ end
111
+ end
112
+ end
113
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -30,7 +31,8 @@ class PDF::Reader
30
31
  ################################################################################
31
32
  # An internal PDF::Reader class that represents an indirect reference to a PDF Object
32
33
  class Reference
33
- attr_reader :id, :gen
34
+ attr_reader :id
35
+ attr_reader :gen
34
36
  ################################################################################
35
37
  # Create a new Reference to an object with the specified id and revision number
36
38
  def initialize(id, gen)
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  # Copyright (C) 2010 James Healy (jimmy@deefa.com)
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  module PDF
@@ -6,7 +7,13 @@ module PDF
6
7
 
7
8
  # mixin for common methods in Page and FormXobjects
8
9
  #
9
- module ResourceMethods
10
+ class Resources
11
+
12
+ def initialize(objects, resources)
13
+ @objects = objects
14
+ @resources = resources
15
+ end
16
+
10
17
  # Returns a Hash of color spaces that are available to this page
11
18
  #
12
19
  # NOTE: this method de-serialise objects from the underlying PDF
@@ -14,7 +21,7 @@ module PDF
14
21
  # of calling it over and over.
15
22
  #
16
23
  def color_spaces
17
- @objects.deref!(resources[:ColorSpace]) || {}
24
+ @objects.deref_hash!(@resources[:ColorSpace]) || {}
18
25
  end
19
26
 
20
27
  # Returns a Hash of fonts that are available to this page
@@ -24,7 +31,7 @@ module PDF
24
31
  # of calling it over and over.
25
32
  #
26
33
  def fonts
27
- @objects.deref!(resources[:Font]) || {}
34
+ @objects.deref_hash!(@resources[:Font]) || {}
28
35
  end
29
36
 
30
37
  # Returns a Hash of external graphic states that are available to this
@@ -35,7 +42,7 @@ module PDF
35
42
  # of calling it over and over.
36
43
  #
37
44
  def graphic_states
38
- @objects.deref!(resources[:ExtGState]) || {}
45
+ @objects.deref_hash!(@resources[:ExtGState]) || {}
39
46
  end
40
47
 
41
48
  # Returns a Hash of patterns that are available to this page
@@ -45,7 +52,7 @@ module PDF
45
52
  # of calling it over and over.
46
53
  #
47
54
  def patterns
48
- @objects.deref!(resources[:Pattern]) || {}
55
+ @objects.deref_hash!(@resources[:Pattern]) || {}
49
56
  end
50
57
 
51
58
  # Returns an Array of procedure sets that are available to this page
@@ -55,7 +62,7 @@ module PDF
55
62
  # of calling it over and over.
56
63
  #
57
64
  def procedure_sets
58
- @objects.deref!(resources[:ProcSet]) || []
65
+ @objects.deref_array!(@resources[:ProcSet]) || []
59
66
  end
60
67
 
61
68
  # Returns a Hash of properties sets that are available to this page
@@ -65,7 +72,7 @@ module PDF
65
72
  # of calling it over and over.
66
73
  #
67
74
  def properties
68
- @objects.deref!(resources[:Properties]) || {}
75
+ @objects.deref_hash!(@resources[:Properties]) || {}
69
76
  end
70
77
 
71
78
  # Returns a Hash of shadings that are available to this page
@@ -75,7 +82,7 @@ module PDF
75
82
  # of calling it over and over.
76
83
  #
77
84
  def shadings
78
- @objects.deref!(resources[:Shading]) || {}
85
+ @objects.deref_hash!(@resources[:Shading]) || {}
79
86
  end
80
87
 
81
88
  # Returns a Hash of XObjects that are available to this page
@@ -85,7 +92,8 @@ module PDF
85
92
  # of calling it over and over.
86
93
  #
87
94
  def xobjects
88
- @objects.deref!(resources[:XObject]) || {}
95
+ dict = @objects.deref_hash!(@resources[:XObject]) || {}
96
+ TypeCheck.cast_to_pdf_dict_with_stream_values!(dict)
89
97
  end
90
98
 
91
99
  end
@@ -0,0 +1,79 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ class PDF::Reader
6
+ # Examines the Encrypt entry of a PDF trailer (if any) and returns an object that's
7
+ # able to decrypt the file.
8
+ class SecurityHandlerFactory
9
+
10
+ def self.build(encrypt, doc_id, password)
11
+ doc_id ||= []
12
+ password ||= ""
13
+
14
+ if encrypt.nil?
15
+ NullSecurityHandler.new
16
+ elsif standard?(encrypt)
17
+ build_standard_handler(encrypt, doc_id, password)
18
+ elsif standard_v5?(encrypt)
19
+ build_v5_handler(encrypt, doc_id, password)
20
+ else
21
+ UnimplementedSecurityHandler.new
22
+ end
23
+ end
24
+
25
+ def self.build_standard_handler(encrypt, doc_id, password)
26
+ encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
27
+ key_builder = StandardKeyBuilder.new(
28
+ key_length: (encrypt[:Length] || 40).to_i,
29
+ revision: encrypt[:R],
30
+ owner_key: encrypt[:O],
31
+ user_key: encrypt[:U],
32
+ permissions: encrypt[:P].to_i,
33
+ encrypted_metadata: encmeta,
34
+ file_id: doc_id.first,
35
+ )
36
+ cfm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
37
+ if cfm == :AESV2
38
+ AesV2SecurityHandler.new(key_builder.key(password))
39
+ else
40
+ Rc4SecurityHandler.new(key_builder.key(password))
41
+ end
42
+ end
43
+
44
+ def self.build_v5_handler(encrypt, doc_id, password)
45
+ key_builder = KeyBuilderV5.new(
46
+ owner_key: encrypt[:O],
47
+ user_key: encrypt[:U],
48
+ owner_encryption_key: encrypt[:OE],
49
+ user_encryption_key: encrypt[:UE],
50
+ )
51
+ AesV3SecurityHandler.new(key_builder.key(password))
52
+ end
53
+
54
+ # This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
55
+ def self.standard?(encrypt)
56
+ return false if encrypt.nil?
57
+
58
+ filter = encrypt.fetch(:Filter, :Standard)
59
+ version = encrypt.fetch(:V, 0)
60
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
61
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
62
+ (version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
63
+ end
64
+
65
+ # This handler supports both
66
+ # - AES-256 encryption defined in PDF 1.7 Extension Level 3 ('revision 5')
67
+ # - AES-256 encryption defined in PDF 2.0 ('revision 6')
68
+ def self.standard_v5?(encrypt)
69
+ return false if encrypt.nil?
70
+
71
+ filter = encrypt.fetch(:Filter, :Standard)
72
+ version = encrypt.fetch(:V, 0)
73
+ revision = encrypt.fetch(:R, 0)
74
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
75
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
76
+ ((version == 5) && (revision == 5 || revision == 6) && (algorithm == :AESV3))
77
+ end
78
+ end
79
+ end
@@ -1,38 +1,19 @@
1
1
  # coding: utf-8
2
- # frozen_string_literal: true
3
2
 
4
- ################################################################################
5
- #
6
- # Copyright (C) 2011 Evan J Brunner (ejbrun@appittome.com)
7
- #
8
- # Permission is hereby granted, free of charge, to any person obtaining
9
- # a copy of this software and associated documentation files (the
10
- # "Software"), to deal in the Software without restriction, including
11
- # without limitation the rights to use, copy, modify, merge, publish,
12
- # distribute, sublicense, and/or sell copies of the Software, and to
13
- # permit persons to whom the Software is furnished to do so, subject to
14
- # the following conditions:
15
- #
16
- # The above copyright notice and this permission notice shall be
17
- # included in all copies or substantial portions of the Software.
18
- #
19
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
- #
27
- ################################################################################
28
3
  require 'digest/md5'
29
- require 'openssl'
30
4
  require 'rc4'
31
5
 
32
6
  class PDF::Reader
33
7
 
34
- # class creates interface to encrypt dictionary for use in Decrypt
35
- class StandardSecurityHandler
8
+ # Processes the Encrypt dict from an encrypted PDF and a user provided
9
+ # password and returns a key that can decrypt the file.
10
+ #
11
+ # This can generate a key compatible with the following standard encryption algorithms:
12
+ #
13
+ # * Version 1-3, all variants
14
+ # * Version 4, V2 (RC4) and AESV2
15
+ #
16
+ class StandardKeyBuilder
36
17
 
37
18
  ## 7.6.3.3 Encryption Key Algorithm (pp61)
38
19
  #
@@ -44,9 +25,6 @@ class PDF::Reader
44
25
  0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
45
26
  0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]
46
27
 
47
- attr_reader :key_length, :revision, :encrypt_key
48
- attr_reader :owner_key, :user_key, :permissions, :file_id, :password
49
-
50
28
  def initialize(opts = {})
51
29
  @key_length = opts[:key_length].to_i/8
52
30
  @revision = opts[:revision].to_i
@@ -55,72 +33,30 @@ class PDF::Reader
55
33
  @permissions = opts[:permissions].to_i
56
34
  @encryptMeta = opts.fetch(:encrypted_metadata, true)
57
35
  @file_id = opts[:file_id] || ""
58
- @encrypt_key = build_standard_key(opts[:password] || "")
59
- @cfm = opts[:cfm]
60
36
 
61
37
  if @key_length != 5 && @key_length != 16
62
- msg = "StandardSecurityHandler only supports 40 and 128 bit\
38
+ msg = "StandardKeyBuilder only supports 40 and 128 bit\
63
39
  encryption (#{@key_length * 8}bit)"
64
- raise ArgumentError, msg
40
+ raise UnsupportedFeatureError, msg
65
41
  end
66
42
  end
67
43
 
68
- # This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
69
- def self.supports?(encrypt)
70
- return false if encrypt.nil?
71
-
72
- filter = encrypt.fetch(:Filter, :Standard)
73
- version = encrypt.fetch(:V, 0)
74
- algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
75
- (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
76
- (version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
77
- end
78
-
79
- ##7.6.2 General Encryption Algorithm
80
- #
81
- # Algorithm 1: Encryption of data using the RC4 or AES algorithms
82
- #
83
- # used to decrypt RC4/AES encrypted PDF streams (buf)
44
+ # Takes a string containing a user provided password.
84
45
  #
85
- # buf - a string to decrypt
86
- # ref - a PDF::Reader::Reference for the object to decrypt
46
+ # If the password matches the file, then a string containing a key suitable for
47
+ # decrypting the file will be returned. If the password doesn't match the file,
48
+ # and exception will be raised.
87
49
  #
88
- def decrypt( buf, ref )
89
- case @cfm
90
- when :AESV2
91
- decrypt_aes128(buf, ref)
92
- else
93
- decrypt_rc4(buf, ref)
94
- end
95
- end
96
-
97
- private
50
+ def key(pass)
51
+ pass ||= ""
52
+ encrypt_key = auth_owner_pass(pass)
53
+ encrypt_key ||= auth_user_pass(pass)
98
54
 
99
- # decrypt with RC4 algorithm
100
- # version <=3 or (version == 4 and CFM == V2)
101
- def decrypt_rc4( buf, ref )
102
- objKey = @encrypt_key.dup
103
- (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
104
- (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
105
- length = objKey.length < 16 ? objKey.length : 16
106
- rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
107
- rc4.decrypt(buf)
55
+ raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
56
+ encrypt_key
108
57
  end
109
58
 
110
- # decrypt with AES-128-CBC algorithm
111
- # when (version == 4 and CFM == AESV2)
112
- def decrypt_aes128( buf, ref )
113
- objKey = @encrypt_key.dup
114
- (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
115
- (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
116
- objKey << 'sAlT' # Algorithm 1, b)
117
- length = objKey.length < 16 ? objKey.length : 16
118
- cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
119
- cipher.decrypt
120
- cipher.key = Digest::MD5.digest(objKey)[0,length]
121
- cipher.iv = buf[0..15]
122
- cipher.update(buf[16..-1]) + cipher.final
123
- end
59
+ private
124
60
 
125
61
  # Pads supplied password to 32bytes using PassPadBytes as specified on
126
62
  # pp61 of spec
@@ -152,7 +88,7 @@ class PDF::Reader
152
88
  md5 = Digest::MD5.digest(pad_pass(pass))
153
89
  if @revision > 2 then
154
90
  50.times { md5 = Digest::MD5.digest(md5) }
155
- keyBegins = md5[0, key_length]
91
+ keyBegins = md5[0, @key_length]
156
92
  #first iteration decrypt owner_key
157
93
  out = @owner_key
158
94
  #RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
@@ -217,12 +153,5 @@ class PDF::Reader
217
153
  end
218
154
  end
219
155
 
220
- def build_standard_key(pass)
221
- encrypt_key = auth_owner_pass(pass)
222
- encrypt_key ||= auth_user_pass(pass)
223
-
224
- raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
225
- encrypt_key
226
- end
227
156
  end
228
157
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -39,7 +40,7 @@ class PDF::Reader
39
40
  # Creates a new stream with the specified dictionary and data. The dictionary
40
41
  # should be a standard ruby hash, the data should be a standard ruby string.
41
42
  def initialize(hash, data)
42
- @hash = hash
43
+ @hash = TypeCheck.cast_to_pdf_dict!(hash)
43
44
  @data = data
44
45
  @udata = nil
45
46
  end
@@ -61,7 +62,7 @@ class PDF::Reader
61
62
  end
62
63
 
63
64
  Array(hash[:Filter]).each_with_index do |filter, index|
64
- @udata = Filter.with(filter, options[index]).filter(@udata)
65
+ @udata = Filter.with(filter, options[index] || {}).filter(@udata)
65
66
  end
66
67
  end
67
68
  @udata