pdf-reader 2.2.0 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +90 -0
  3. data/README.md +18 -3
  4. data/Rakefile +1 -1
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_text +1 -1
  7. data/examples/extract_fonts.rb +12 -7
  8. data/examples/rspec.rb +1 -0
  9. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  10. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  14. data/lib/pdf/reader/afm/Courier.afm +342 -342
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  26. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  27. data/lib/pdf/reader/buffer.rb +91 -47
  28. data/lib/pdf/reader/cid_widths.rb +7 -4
  29. data/lib/pdf/reader/cmap.rb +83 -59
  30. data/lib/pdf/reader/encoding.rb +17 -14
  31. data/lib/pdf/reader/error.rb +15 -3
  32. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  33. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  34. data/lib/pdf/reader/filter/depredict.rb +12 -10
  35. data/lib/pdf/reader/filter/flate.rb +30 -16
  36. data/lib/pdf/reader/filter/lzw.rb +2 -0
  37. data/lib/pdf/reader/filter/null.rb +1 -1
  38. data/lib/pdf/reader/filter/run_length.rb +19 -13
  39. data/lib/pdf/reader/filter.rb +11 -11
  40. data/lib/pdf/reader/font.rb +89 -26
  41. data/lib/pdf/reader/font_descriptor.rb +22 -18
  42. data/lib/pdf/reader/form_xobject.rb +18 -5
  43. data/lib/pdf/reader/glyph_hash.rb +28 -13
  44. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  45. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  46. data/lib/pdf/reader/lzw.rb +28 -11
  47. data/lib/pdf/reader/no_text_filter.rb +14 -0
  48. data/lib/pdf/reader/null_security_handler.rb +1 -4
  49. data/lib/pdf/reader/object_cache.rb +1 -0
  50. data/lib/pdf/reader/object_hash.rb +292 -63
  51. data/lib/pdf/reader/object_stream.rb +3 -2
  52. data/lib/pdf/reader/overlapping_runs_filter.rb +72 -0
  53. data/lib/pdf/reader/page.rb +143 -16
  54. data/lib/pdf/reader/page_layout.rb +43 -39
  55. data/lib/pdf/reader/page_state.rb +26 -17
  56. data/lib/pdf/reader/page_text_receiver.rb +74 -4
  57. data/lib/pdf/reader/pages_strategy.rb +1 -0
  58. data/lib/pdf/reader/parser.rb +34 -14
  59. data/lib/pdf/reader/point.rb +25 -0
  60. data/lib/pdf/reader/print_receiver.rb +1 -0
  61. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  62. data/lib/pdf/reader/rectangle.rb +113 -0
  63. data/lib/pdf/reader/reference.rb +3 -1
  64. data/lib/pdf/reader/register_receiver.rb +1 -0
  65. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +17 -9
  66. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  67. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  68. data/lib/pdf/reader/stream.rb +3 -2
  69. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  70. data/lib/pdf/reader/text_run.rb +40 -5
  71. data/lib/pdf/reader/token.rb +1 -0
  72. data/lib/pdf/reader/transformation_matrix.rb +8 -7
  73. data/lib/pdf/reader/type_check.rb +98 -0
  74. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  75. data/lib/pdf/reader/validating_receiver.rb +262 -0
  76. data/lib/pdf/reader/width_calculator/built_in.rb +27 -17
  77. data/lib/pdf/reader/width_calculator/composite.rb +6 -1
  78. data/lib/pdf/reader/width_calculator/true_type.rb +10 -11
  79. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -4
  80. data/lib/pdf/reader/width_calculator/type_zero.rb +6 -2
  81. data/lib/pdf/reader/width_calculator.rb +1 -0
  82. data/lib/pdf/reader/xref.rb +37 -11
  83. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  84. data/lib/pdf/reader.rb +49 -24
  85. data/lib/pdf-reader.rb +1 -0
  86. data/rbi/pdf-reader.rbi +2048 -0
  87. metadata +39 -23
  88. data/lib/pdf/hash.rb +0 -20
  89. data/lib/pdf/reader/orientation_detector.rb +0 -34
  90. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -79,8 +80,8 @@ class PDF::Reader
79
80
  token
80
81
  elsif operators.has_key? token
81
82
  Token.new(token)
82
- elsif token.respond_to?(:to_token)
83
- token.to_token
83
+ elsif token.frozen?
84
+ token
84
85
  elsif token =~ /\d*\.\d/
85
86
  token.to_f
86
87
  else
@@ -95,14 +96,20 @@ class PDF::Reader
95
96
  # id - the object ID to return
96
97
  # gen - the object revision number to return
97
98
  def object(id, gen)
98
- Error.assert_equal(parse_token, id)
99
+ idCheck = parse_token
100
+
101
+ # Sometimes the xref table is corrupt and points to an offset slightly too early in the file.
102
+ # check the next token, maybe we can find the start of the object we're looking for
103
+ if idCheck != id
104
+ Error.assert_equal(parse_token, id)
105
+ end
99
106
  Error.assert_equal(parse_token, gen)
100
107
  Error.str_assert(parse_token, "obj")
101
108
 
102
109
  obj = parse_token
103
110
  post_obj = parse_token
104
111
 
105
- if post_obj == "stream"
112
+ if obj.is_a?(Hash) && post_obj == "stream"
106
113
  stream(obj)
107
114
  else
108
115
  obj
@@ -120,7 +127,7 @@ class PDF::Reader
120
127
  key = parse_token
121
128
  break if key.kind_of?(Token) and key == ">>"
122
129
  raise MalformedPDFError, "unterminated dict" if @buffer.empty?
123
- raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Symbol)
130
+ PDF::Reader::Error.validate_type_as_malformed(key, "Dictionary key", Symbol)
124
131
 
125
132
  value = parse_token
126
133
  value.kind_of?(Token) and Error.str_assert_not(value, ">>")
@@ -166,7 +173,9 @@ class PDF::Reader
166
173
 
167
174
  # add a missing digit if required, as required by the spec
168
175
  str << "0" unless str.size % 2 == 0
169
- str.scan(/../).map {|i| i.hex.chr}.join.force_encoding("binary")
176
+ str.chars.each_slice(2).map { |nibbles|
177
+ nibbles.join("").hex.chr
178
+ }.join.force_encoding("binary")
170
179
  end
171
180
  ################################################################################
172
181
  # Reads a PDF String from the buffer and converts it to a Ruby String
@@ -175,15 +184,18 @@ class PDF::Reader
175
184
  return "".dup.force_encoding("binary") if str == ")"
176
185
  Error.assert_equal(parse_token, ")")
177
186
 
178
- str.gsub!(/\\([nrtbf()\\\n]|\d{1,3})?|\r\n?|\n\r/m) do |match|
179
- MAPPING[match] || "".dup
187
+ str.gsub!(/\\(\r\n|[nrtbf()\\\n\r]|([0-7]{1,3}))?|\r\n?/m) do |match|
188
+ if $2.nil? # not octal digits
189
+ MAPPING[match] || "".dup
190
+ else # must be octal digits
191
+ ($2.oct & 0xff).chr # ignore high level overflow
192
+ end
180
193
  end
181
194
  str.force_encoding("binary")
182
195
  end
183
196
 
184
197
  MAPPING = {
185
198
  "\r" => "\n",
186
- "\n\r" => "\n",
187
199
  "\r\n" => "\n",
188
200
  "\\n" => "\n",
189
201
  "\\r" => "\r",
@@ -194,24 +206,32 @@ class PDF::Reader
194
206
  "\\)" => ")",
195
207
  "\\\\" => "\\",
196
208
  "\\\n" => "",
209
+ "\\\r" => "",
210
+ "\\\r\n" => "",
197
211
  }
198
- 0.upto(9) { |n| MAPPING["\\00"+n.to_s] = ("00"+n.to_s).oct.chr }
199
- 0.upto(99) { |n| MAPPING["\\0"+n.to_s] = ("0"+n.to_s).oct.chr }
200
- 0.upto(377) { |n| MAPPING["\\"+n.to_s] = n.to_s.oct.chr }
201
212
 
202
213
  ################################################################################
203
214
  # Decodes the contents of a PDF Stream and returns it as a Ruby String.
204
215
  def stream(dict)
205
216
  raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
206
217
  if @objects
207
- length = @objects.deref(dict[:Length])
218
+ length = @objects.deref_integer(dict[:Length])
219
+ if dict[:Filter]
220
+ dict[:Filter] = @objects.deref_name_or_array(dict[:Filter])
221
+ end
208
222
  else
209
223
  length = dict[:Length] || 0
210
224
  end
225
+
226
+ PDF::Reader::Error.validate_type_as_malformed(length, "length", Numeric)
227
+
211
228
  data = @buffer.read(length, :skip_eol => true)
212
229
 
213
230
  Error.str_assert(parse_token, "endstream")
214
- Error.str_assert(parse_token, "endobj")
231
+
232
+ # We used to assert that the stream had the correct closing token, but it doesn't *really*
233
+ # matter if it's missing, and other readers seems to handle its absence just fine
234
+ # Error.str_assert(parse_token, "endobj")
215
235
 
216
236
  PDF::Reader::Stream.new(dict, data)
217
237
  end
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # PDFs are all about positioning content on a page, so there's lots of need to
9
+ # work with a set of X,Y coordinates.
10
+ #
11
+ class Point
12
+
13
+ attr_reader :x, :y
14
+
15
+ def initialize(x, y)
16
+ @x, @y = x, y
17
+ end
18
+
19
+ def ==(other)
20
+ other.respond_to?(:x) && other.respond_to?(:y) && x == other.x && y == other.y
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest/md5'
6
+ require 'rc4'
7
+
8
+ class PDF::Reader
9
+
10
+ # Decrypts data using the RC4 algorithim defined in the PDF spec. Requires
11
+ # a decryption key, which is usually generated by PDF::Reader::StandardKeyBuilder
12
+ #
13
+ class Rc4SecurityHandler
14
+
15
+ def initialize(key)
16
+ @encrypt_key = key
17
+ end
18
+
19
+ ##7.6.2 General Encryption Algorithm
20
+ #
21
+ # Algorithm 1: Encryption of data using the RC4 algorithm
22
+ #
23
+ # version <=3 or (version == 4 and CFM == V2)
24
+ #
25
+ # buf - a string to decrypt
26
+ # ref - a PDF::Reader::Reference for the object to decrypt
27
+ #
28
+ def decrypt( buf, ref )
29
+ objKey = @encrypt_key.dup
30
+ (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
31
+ (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
32
+ length = objKey.length < 16 ? objKey.length : 16
33
+ rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
34
+ rc4.decrypt(buf)
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,113 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # PDFs represent rectangles all over the place. They're 4 element arrays, like this:
9
+ #
10
+ # [A, B, C, D]
11
+ #
12
+ # Four element arrays are yucky to work with though, so here's a class that's better.
13
+ # Initialize it with the 4 elements, and get utility functions (width, height, etc)
14
+ # for free.
15
+ #
16
+ # By convention the first two elements are x1, y1, the co-ords for the bottom left corner
17
+ # of the rectangle. The third and fourth elements are x2, y2, the co-ords for the top left
18
+ # corner of the rectangle. It's valid for the alternative corners to be used though, so
19
+ # we don't assume which is which.
20
+ #
21
+ class Rectangle
22
+
23
+ attr_reader :bottom_left, :bottom_right, :top_left, :top_right
24
+
25
+ def initialize(x1, y1, x2, y2)
26
+ set_corners(x1, y1, x2, y2)
27
+ end
28
+
29
+ def self.from_array(arr)
30
+ if arr.size != 4
31
+ raise ArgumentError, "Only 4-element Arrays can be converted to a Rectangle"
32
+ end
33
+
34
+ PDF::Reader::Rectangle.new(
35
+ arr[0].to_f,
36
+ arr[1].to_f,
37
+ arr[2].to_f,
38
+ arr[3].to_f,
39
+ )
40
+ end
41
+
42
+ def ==(other)
43
+ to_a == other.to_a
44
+ end
45
+
46
+ def height
47
+ top_right.y - bottom_right.y
48
+ end
49
+
50
+ def width
51
+ bottom_right.x - bottom_left.x
52
+ end
53
+
54
+ def contains?(point)
55
+ point.x >= bottom_left.x && point.x <= top_right.x &&
56
+ point.y >= bottom_left.y && point.y <= top_right.y
57
+ end
58
+
59
+ # A pdf-style 4-number array
60
+ def to_a
61
+ [
62
+ bottom_left.x,
63
+ bottom_left.y,
64
+ top_right.x,
65
+ top_right.y,
66
+ ]
67
+ end
68
+
69
+ def apply_rotation(degrees)
70
+ return if degrees != 90 && degrees != 180 && degrees != 270
71
+
72
+ if degrees == 90
73
+ new_x1 = bottom_left.x
74
+ new_y1 = bottom_left.y - width
75
+ new_x2 = bottom_left.x + height
76
+ new_y2 = bottom_left.y
77
+ elsif degrees == 180
78
+ new_x1 = bottom_left.x - width
79
+ new_y1 = bottom_left.y - height
80
+ new_x2 = bottom_left.x
81
+ new_y2 = bottom_left.y
82
+ elsif degrees == 270
83
+ new_x1 = bottom_left.x - height
84
+ new_y1 = bottom_left.y
85
+ new_x2 = bottom_left.x
86
+ new_y2 = bottom_left.y + width
87
+ end
88
+ set_corners(new_x1 || 0, new_y1 || 0, new_x2 || 0, new_y2 || 0)
89
+ end
90
+
91
+ private
92
+
93
+ def set_corners(x1, y1, x2, y2)
94
+ @bottom_left = PDF::Reader::Point.new(
95
+ [x1, x2].min,
96
+ [y1, y2].min,
97
+ )
98
+ @bottom_right = PDF::Reader::Point.new(
99
+ [x1, x2].max,
100
+ [y1, y2].min,
101
+ )
102
+ @top_left = PDF::Reader::Point.new(
103
+ [x1, x2].min,
104
+ [y1, y2].max,
105
+ )
106
+ @top_right = PDF::Reader::Point.new(
107
+ [x1, x2].max,
108
+ [y1, y2].max,
109
+ )
110
+ end
111
+ end
112
+ end
113
+ end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -30,7 +31,8 @@ class PDF::Reader
30
31
  ################################################################################
31
32
  # An internal PDF::Reader class that represents an indirect reference to a PDF Object
32
33
  class Reference
33
- attr_reader :id, :gen
34
+ attr_reader :id
35
+ attr_reader :gen
34
36
  ################################################################################
35
37
  # Create a new Reference to an object with the specified id and revision number
36
38
  def initialize(id, gen)
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  # Copyright (C) 2010 James Healy (jimmy@deefa.com)
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  module PDF
@@ -6,7 +7,13 @@ module PDF
6
7
 
7
8
  # mixin for common methods in Page and FormXobjects
8
9
  #
9
- module ResourceMethods
10
+ class Resources
11
+
12
+ def initialize(objects, resources)
13
+ @objects = objects
14
+ @resources = resources
15
+ end
16
+
10
17
  # Returns a Hash of color spaces that are available to this page
11
18
  #
12
19
  # NOTE: this method de-serialise objects from the underlying PDF
@@ -14,7 +21,7 @@ module PDF
14
21
  # of calling it over and over.
15
22
  #
16
23
  def color_spaces
17
- @objects.deref!(resources[:ColorSpace]) || {}
24
+ @objects.deref_hash!(@resources[:ColorSpace]) || {}
18
25
  end
19
26
 
20
27
  # Returns a Hash of fonts that are available to this page
@@ -24,7 +31,7 @@ module PDF
24
31
  # of calling it over and over.
25
32
  #
26
33
  def fonts
27
- @objects.deref!(resources[:Font]) || {}
34
+ @objects.deref_hash!(@resources[:Font]) || {}
28
35
  end
29
36
 
30
37
  # Returns a Hash of external graphic states that are available to this
@@ -35,7 +42,7 @@ module PDF
35
42
  # of calling it over and over.
36
43
  #
37
44
  def graphic_states
38
- @objects.deref!(resources[:ExtGState]) || {}
45
+ @objects.deref_hash!(@resources[:ExtGState]) || {}
39
46
  end
40
47
 
41
48
  # Returns a Hash of patterns that are available to this page
@@ -45,7 +52,7 @@ module PDF
45
52
  # of calling it over and over.
46
53
  #
47
54
  def patterns
48
- @objects.deref!(resources[:Pattern]) || {}
55
+ @objects.deref_hash!(@resources[:Pattern]) || {}
49
56
  end
50
57
 
51
58
  # Returns an Array of procedure sets that are available to this page
@@ -55,7 +62,7 @@ module PDF
55
62
  # of calling it over and over.
56
63
  #
57
64
  def procedure_sets
58
- @objects.deref!(resources[:ProcSet]) || []
65
+ @objects.deref_array!(@resources[:ProcSet]) || []
59
66
  end
60
67
 
61
68
  # Returns a Hash of properties sets that are available to this page
@@ -65,7 +72,7 @@ module PDF
65
72
  # of calling it over and over.
66
73
  #
67
74
  def properties
68
- @objects.deref!(resources[:Properties]) || {}
75
+ @objects.deref_hash!(@resources[:Properties]) || {}
69
76
  end
70
77
 
71
78
  # Returns a Hash of shadings that are available to this page
@@ -75,7 +82,7 @@ module PDF
75
82
  # of calling it over and over.
76
83
  #
77
84
  def shadings
78
- @objects.deref!(resources[:Shading]) || {}
85
+ @objects.deref_hash!(@resources[:Shading]) || {}
79
86
  end
80
87
 
81
88
  # Returns a Hash of XObjects that are available to this page
@@ -85,7 +92,8 @@ module PDF
85
92
  # of calling it over and over.
86
93
  #
87
94
  def xobjects
88
- @objects.deref!(resources[:XObject]) || {}
95
+ dict = @objects.deref_hash!(@resources[:XObject]) || {}
96
+ TypeCheck.cast_to_pdf_dict_with_stream_values!(dict)
89
97
  end
90
98
 
91
99
  end
@@ -0,0 +1,79 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ class PDF::Reader
6
+ # Examines the Encrypt entry of a PDF trailer (if any) and returns an object that's
7
+ # able to decrypt the file.
8
+ class SecurityHandlerFactory
9
+
10
+ def self.build(encrypt, doc_id, password)
11
+ doc_id ||= []
12
+ password ||= ""
13
+
14
+ if encrypt.nil?
15
+ NullSecurityHandler.new
16
+ elsif standard?(encrypt)
17
+ build_standard_handler(encrypt, doc_id, password)
18
+ elsif standard_v5?(encrypt)
19
+ build_v5_handler(encrypt, doc_id, password)
20
+ else
21
+ UnimplementedSecurityHandler.new
22
+ end
23
+ end
24
+
25
+ def self.build_standard_handler(encrypt, doc_id, password)
26
+ encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
27
+ key_builder = StandardKeyBuilder.new(
28
+ key_length: (encrypt[:Length] || 40).to_i,
29
+ revision: encrypt[:R],
30
+ owner_key: encrypt[:O],
31
+ user_key: encrypt[:U],
32
+ permissions: encrypt[:P].to_i,
33
+ encrypted_metadata: encmeta,
34
+ file_id: doc_id.first,
35
+ )
36
+ cfm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
37
+ if cfm == :AESV2
38
+ AesV2SecurityHandler.new(key_builder.key(password))
39
+ else
40
+ Rc4SecurityHandler.new(key_builder.key(password))
41
+ end
42
+ end
43
+
44
+ def self.build_v5_handler(encrypt, doc_id, password)
45
+ key_builder = KeyBuilderV5.new(
46
+ owner_key: encrypt[:O],
47
+ user_key: encrypt[:U],
48
+ owner_encryption_key: encrypt[:OE],
49
+ user_encryption_key: encrypt[:UE],
50
+ )
51
+ AesV3SecurityHandler.new(key_builder.key(password))
52
+ end
53
+
54
+ # This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
55
+ def self.standard?(encrypt)
56
+ return false if encrypt.nil?
57
+
58
+ filter = encrypt.fetch(:Filter, :Standard)
59
+ version = encrypt.fetch(:V, 0)
60
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
61
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
62
+ (version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
63
+ end
64
+
65
+ # This handler supports both
66
+ # - AES-256 encryption defined in PDF 1.7 Extension Level 3 ('revision 5')
67
+ # - AES-256 encryption defined in PDF 2.0 ('revision 6')
68
+ def self.standard_v5?(encrypt)
69
+ return false if encrypt.nil?
70
+
71
+ filter = encrypt.fetch(:Filter, :Standard)
72
+ version = encrypt.fetch(:V, 0)
73
+ revision = encrypt.fetch(:R, 0)
74
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
75
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
76
+ ((version == 5) && (revision == 5 || revision == 6) && (algorithm == :AESV3))
77
+ end
78
+ end
79
+ end
@@ -1,38 +1,19 @@
1
1
  # coding: utf-8
2
- # frozen_string_literal: true
3
2
 
4
- ################################################################################
5
- #
6
- # Copyright (C) 2011 Evan J Brunner (ejbrun@appittome.com)
7
- #
8
- # Permission is hereby granted, free of charge, to any person obtaining
9
- # a copy of this software and associated documentation files (the
10
- # "Software"), to deal in the Software without restriction, including
11
- # without limitation the rights to use, copy, modify, merge, publish,
12
- # distribute, sublicense, and/or sell copies of the Software, and to
13
- # permit persons to whom the Software is furnished to do so, subject to
14
- # the following conditions:
15
- #
16
- # The above copyright notice and this permission notice shall be
17
- # included in all copies or substantial portions of the Software.
18
- #
19
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
- # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
- # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
22
- # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
23
- # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24
- # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25
- # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
- #
27
- ################################################################################
28
3
  require 'digest/md5'
29
- require 'openssl'
30
4
  require 'rc4'
31
5
 
32
6
  class PDF::Reader
33
7
 
34
- # class creates interface to encrypt dictionary for use in Decrypt
35
- class StandardSecurityHandler
8
+ # Processes the Encrypt dict from an encrypted PDF and a user provided
9
+ # password and returns a key that can decrypt the file.
10
+ #
11
+ # This can generate a key compatible with the following standard encryption algorithms:
12
+ #
13
+ # * Version 1-3, all variants
14
+ # * Version 4, V2 (RC4) and AESV2
15
+ #
16
+ class StandardKeyBuilder
36
17
 
37
18
  ## 7.6.3.3 Encryption Key Algorithm (pp61)
38
19
  #
@@ -44,9 +25,6 @@ class PDF::Reader
44
25
  0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
45
26
  0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]
46
27
 
47
- attr_reader :key_length, :revision, :encrypt_key
48
- attr_reader :owner_key, :user_key, :permissions, :file_id, :password
49
-
50
28
  def initialize(opts = {})
51
29
  @key_length = opts[:key_length].to_i/8
52
30
  @revision = opts[:revision].to_i
@@ -55,72 +33,30 @@ class PDF::Reader
55
33
  @permissions = opts[:permissions].to_i
56
34
  @encryptMeta = opts.fetch(:encrypted_metadata, true)
57
35
  @file_id = opts[:file_id] || ""
58
- @encrypt_key = build_standard_key(opts[:password] || "")
59
- @cfm = opts[:cfm]
60
36
 
61
37
  if @key_length != 5 && @key_length != 16
62
- msg = "StandardSecurityHandler only supports 40 and 128 bit\
38
+ msg = "StandardKeyBuilder only supports 40 and 128 bit\
63
39
  encryption (#{@key_length * 8}bit)"
64
- raise ArgumentError, msg
40
+ raise UnsupportedFeatureError, msg
65
41
  end
66
42
  end
67
43
 
68
- # This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
69
- def self.supports?(encrypt)
70
- return false if encrypt.nil?
71
-
72
- filter = encrypt.fetch(:Filter, :Standard)
73
- version = encrypt.fetch(:V, 0)
74
- algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
75
- (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
76
- (version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
77
- end
78
-
79
- ##7.6.2 General Encryption Algorithm
80
- #
81
- # Algorithm 1: Encryption of data using the RC4 or AES algorithms
82
- #
83
- # used to decrypt RC4/AES encrypted PDF streams (buf)
44
+ # Takes a string containing a user provided password.
84
45
  #
85
- # buf - a string to decrypt
86
- # ref - a PDF::Reader::Reference for the object to decrypt
46
+ # If the password matches the file, then a string containing a key suitable for
47
+ # decrypting the file will be returned. If the password doesn't match the file,
48
+ # and exception will be raised.
87
49
  #
88
- def decrypt( buf, ref )
89
- case @cfm
90
- when :AESV2
91
- decrypt_aes128(buf, ref)
92
- else
93
- decrypt_rc4(buf, ref)
94
- end
95
- end
96
-
97
- private
50
+ def key(pass)
51
+ pass ||= ""
52
+ encrypt_key = auth_owner_pass(pass)
53
+ encrypt_key ||= auth_user_pass(pass)
98
54
 
99
- # decrypt with RC4 algorithm
100
- # version <=3 or (version == 4 and CFM == V2)
101
- def decrypt_rc4( buf, ref )
102
- objKey = @encrypt_key.dup
103
- (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
104
- (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
105
- length = objKey.length < 16 ? objKey.length : 16
106
- rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
107
- rc4.decrypt(buf)
55
+ raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
56
+ encrypt_key
108
57
  end
109
58
 
110
- # decrypt with AES-128-CBC algorithm
111
- # when (version == 4 and CFM == AESV2)
112
- def decrypt_aes128( buf, ref )
113
- objKey = @encrypt_key.dup
114
- (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
115
- (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
116
- objKey << 'sAlT' # Algorithm 1, b)
117
- length = objKey.length < 16 ? objKey.length : 16
118
- cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
119
- cipher.decrypt
120
- cipher.key = Digest::MD5.digest(objKey)[0,length]
121
- cipher.iv = buf[0..15]
122
- cipher.update(buf[16..-1]) + cipher.final
123
- end
59
+ private
124
60
 
125
61
  # Pads supplied password to 32bytes using PassPadBytes as specified on
126
62
  # pp61 of spec
@@ -152,7 +88,7 @@ class PDF::Reader
152
88
  md5 = Digest::MD5.digest(pad_pass(pass))
153
89
  if @revision > 2 then
154
90
  50.times { md5 = Digest::MD5.digest(md5) }
155
- keyBegins = md5[0, key_length]
91
+ keyBegins = md5[0, @key_length]
156
92
  #first iteration decrypt owner_key
157
93
  out = @owner_key
158
94
  #RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
@@ -217,12 +153,5 @@ class PDF::Reader
217
153
  end
218
154
  end
219
155
 
220
- def build_standard_key(pass)
221
- encrypt_key = auth_owner_pass(pass)
222
- encrypt_key ||= auth_user_pass(pass)
223
-
224
- raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
225
- encrypt_key
226
- end
227
156
  end
228
157
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -39,7 +40,7 @@ class PDF::Reader
39
40
  # Creates a new stream with the specified dictionary and data. The dictionary
40
41
  # should be a standard ruby hash, the data should be a standard ruby string.
41
42
  def initialize(hash, data)
42
- @hash = hash
43
+ @hash = TypeCheck.cast_to_pdf_dict!(hash)
43
44
  @data = data
44
45
  @udata = nil
45
46
  end
@@ -61,7 +62,7 @@ class PDF::Reader
61
62
  end
62
63
 
63
64
  Array(hash[:Filter]).each_with_index do |filter, index|
64
- @udata = Filter.with(filter, options[index]).filter(@udata)
65
+ @udata = Filter.with(filter, options[index] || {}).filter(@udata)
65
66
  end
66
67
  end
67
68
  @udata