pdf-reader 2.6.0 → 2.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +30 -1
  3. data/Rakefile +1 -1
  4. data/examples/rspec.rb +1 -0
  5. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  6. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  7. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  8. data/lib/pdf/reader/buffer.rb +36 -33
  9. data/lib/pdf/reader/cid_widths.rb +1 -0
  10. data/lib/pdf/reader/cmap.rb +65 -50
  11. data/lib/pdf/reader/encoding.rb +2 -1
  12. data/lib/pdf/reader/error.rb +16 -0
  13. data/lib/pdf/reader/filter/ascii85.rb +2 -0
  14. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  15. data/lib/pdf/reader/filter/depredict.rb +8 -6
  16. data/lib/pdf/reader/filter/flate.rb +4 -2
  17. data/lib/pdf/reader/filter/lzw.rb +2 -0
  18. data/lib/pdf/reader/filter/null.rb +1 -1
  19. data/lib/pdf/reader/filter/run_length.rb +19 -13
  20. data/lib/pdf/reader/filter.rb +11 -11
  21. data/lib/pdf/reader/font.rb +72 -16
  22. data/lib/pdf/reader/font_descriptor.rb +19 -17
  23. data/lib/pdf/reader/form_xobject.rb +15 -5
  24. data/lib/pdf/reader/glyph_hash.rb +1 -0
  25. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  26. data/lib/pdf/reader/lzw.rb +4 -2
  27. data/lib/pdf/reader/null_security_handler.rb +1 -4
  28. data/lib/pdf/reader/object_cache.rb +1 -0
  29. data/lib/pdf/reader/object_hash.rb +252 -44
  30. data/lib/pdf/reader/object_stream.rb +1 -0
  31. data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
  32. data/lib/pdf/reader/page.rb +99 -19
  33. data/lib/pdf/reader/page_layout.rb +28 -32
  34. data/lib/pdf/reader/page_state.rb +12 -11
  35. data/lib/pdf/reader/page_text_receiver.rb +57 -10
  36. data/lib/pdf/reader/pages_strategy.rb +1 -0
  37. data/lib/pdf/reader/parser.rb +26 -8
  38. data/lib/pdf/reader/point.rb +25 -0
  39. data/lib/pdf/reader/print_receiver.rb +1 -0
  40. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  41. data/lib/pdf/reader/rectangle.rb +113 -0
  42. data/lib/pdf/reader/reference.rb +1 -0
  43. data/lib/pdf/reader/register_receiver.rb +1 -0
  44. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
  45. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  46. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  47. data/lib/pdf/reader/stream.rb +2 -1
  48. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  49. data/lib/pdf/reader/text_run.rb +14 -6
  50. data/lib/pdf/reader/token.rb +1 -0
  51. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  52. data/lib/pdf/reader/type_check.rb +52 -0
  53. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  54. data/lib/pdf/reader/validating_receiver.rb +262 -0
  55. data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
  56. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  57. data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
  58. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  59. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  60. data/lib/pdf/reader/width_calculator.rb +1 -0
  61. data/lib/pdf/reader/xref.rb +21 -3
  62. data/lib/pdf/reader/zero_width_runs_filter.rb +2 -0
  63. data/lib/pdf/reader.rb +46 -15
  64. data/lib/pdf-reader.rb +1 -0
  65. data/rbi/pdf-reader.rbi +1978 -0
  66. metadata +22 -13
  67. data/lib/pdf/reader/orientation_detector.rb +0 -34
  68. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
 
@@ -8,6 +9,7 @@ class PDF::Reader
8
9
  module Filter # :nodoc:
9
10
  # implementation of the Flate (zlib) stream filter
10
11
  class Flate
12
+
11
13
  ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
12
14
  ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
13
15
 
@@ -32,7 +34,7 @@ class PDF::Reader
32
34
  def zlib_inflate(data)
33
35
  begin
34
36
  return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
35
- rescue Zlib::DataError
37
+ rescue Zlib::Error
36
38
  # by default, Ruby's Zlib assumes the data it's inflating
37
39
  # is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
38
40
  # fails, swallow the exception and attempt to inflate the data as a raw
@@ -41,7 +43,7 @@ class PDF::Reader
41
43
 
42
44
  begin
43
45
  return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
44
- rescue StandardError
46
+ rescue Zlib::Error
45
47
  # swallow this one too, so we can try some other fallback options
46
48
  end
47
49
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  #
@@ -6,6 +7,7 @@ class PDF::Reader
6
7
  module Filter # :nodoc:
7
8
  # implementation of the LZW stream filter
8
9
  class Lzw
10
+
9
11
  def initialize(options = {})
10
12
  @options = options
11
13
  end
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
- #
5
5
  class PDF::Reader
6
6
  module Filter # :nodoc:
7
7
  # implementation of the null stream filter
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  #
@@ -6,6 +7,7 @@ class PDF::Reader # :nodoc:
6
7
  module Filter # :nodoc:
7
8
  # implementation of the run length stream filter
8
9
  class RunLength
10
+
9
11
  def initialize(options = {})
10
12
  @options = options
11
13
  end
@@ -20,19 +22,23 @@ class PDF::Reader # :nodoc:
20
22
  length = data.getbyte(pos)
21
23
  pos += 1
22
24
 
23
- case
24
- when length == 128
25
- break
26
- when length < 128
27
- # When the length is < 128, we copy the following length+1 bytes
28
- # literally.
29
- out << data[pos, length + 1]
30
- pos += length
31
- else
32
- # When the length is > 128, we copy the next byte (257 - length)
33
- # times; i.e., "\xFA\x00" ([250, 0]) will expand to
34
- # "\x00\x00\x00\x00\x00\x00\x00".
35
- out << data[pos, 1] * (257 - length)
25
+ unless length.nil?
26
+ case
27
+ # nothing
28
+ when length == 128
29
+ break
30
+ when length < 128
31
+ # When the length is < 128, we copy the following length+1 bytes
32
+ # literally.
33
+ out << data[pos, length + 1]
34
+ pos += length
35
+ else
36
+ # When the length is > 128, we copy the next byte (257 - length)
37
+ # times; i.e., "\xFA\x00" ([250, 0]) will expand to
38
+ # "\x00\x00\x00\x00\x00\x00\x00".
39
+ previous_byte = data[pos, 1] || ""
40
+ out << previous_byte * (257 - length)
41
+ end
36
42
  end
37
43
 
38
44
  pos += 1
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -41,17 +42,16 @@ class PDF::Reader
41
42
  # returned untouched. At this stage PDF::Reader has no need to decode images.
42
43
  #
43
44
  def self.with(name, options = {})
44
- case name.to_sym
45
- when :ASCII85Decode then PDF::Reader::Filter::Ascii85.new(options)
46
- when :ASCIIHexDecode then PDF::Reader::Filter::AsciiHex.new(options)
47
- when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
48
- when :DCTDecode then PDF::Reader::Filter::Null.new(options)
49
- when :FlateDecode then PDF::Reader::Filter::Flate.new(options)
50
- when :Fl then PDF::Reader::Filter::Flate.new(options)
51
- when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
52
- when :JPXDecode then PDF::Reader::Filter::Null.new(options)
53
- when :LZWDecode then PDF::Reader::Filter::Lzw.new(options)
54
- when :RunLengthDecode then PDF::Reader::Filter::RunLength.new(options)
45
+ case name
46
+ when :ASCII85Decode, :A85 then PDF::Reader::Filter::Ascii85.new(options)
47
+ when :ASCIIHexDecode, :AHx then PDF::Reader::Filter::AsciiHex.new(options)
48
+ when :CCITTFaxDecode, :CCF then PDF::Reader::Filter::Null.new(options)
49
+ when :DCTDecode, :DCT then PDF::Reader::Filter::Null.new(options)
50
+ when :FlateDecode, :Fl then PDF::Reader::Filter::Flate.new(options)
51
+ when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
52
+ when :JPXDecode then PDF::Reader::Filter::Null.new(options)
53
+ when :LZWDecode, :LZW then PDF::Reader::Filter::Lzw.new(options)
54
+ when :RunLengthDecode, :RL then PDF::Reader::Filter::RunLength.new(options)
55
55
  else
56
56
  raise UnsupportedFeatureError, "Unknown filter: #{name}"
57
57
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -42,6 +43,7 @@ class PDF::Reader
42
43
  @tounicode = nil
43
44
 
44
45
  extract_base_info(obj)
46
+ extract_type3_info(obj)
45
47
  extract_descriptor(obj)
46
48
  extract_descendants(obj)
47
49
  @width_calc = build_width_calculator
@@ -72,8 +74,44 @@ class PDF::Reader
72
74
  @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
73
75
  end
74
76
 
77
+ # In most cases glyph width is converted into text space with a simple divide by 1000.
78
+ #
79
+ # However, Type3 fonts provide their own FontMatrix that's used for the transformation.
80
+ #
81
+ def glyph_width_in_text_space(code_point)
82
+ glyph_width_in_glyph_space = glyph_width(code_point)
83
+
84
+ if @subtype == :Type3
85
+ x1, y1 = font_matrix_transform(0,0)
86
+ x2, y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
87
+ (x2 - x1).abs.round(2)
88
+ else
89
+ glyph_width_in_glyph_space / 1000.0
90
+ end
91
+ end
92
+
75
93
  private
76
94
 
95
+ # Only valid for Type3 fonts
96
+ def font_matrix_transform(x, y)
97
+ return x, y if @font_matrix.nil?
98
+
99
+ matrix = TransformationMatrix.new(
100
+ @font_matrix[0], @font_matrix[1],
101
+ @font_matrix[2], @font_matrix[3],
102
+ @font_matrix[4], @font_matrix[5],
103
+ )
104
+
105
+ if x == 0 && y == 0
106
+ [matrix.e, matrix.f]
107
+ else
108
+ [
109
+ (matrix.a * x) + (matrix.c * y) + (matrix.e),
110
+ (matrix.b * x) + (matrix.d * y) + (matrix.f)
111
+ ]
112
+ end
113
+ end
114
+
77
115
  def default_encoding(font_name)
78
116
  case font_name.to_s
79
117
  when "Symbol" then
@@ -111,37 +149,55 @@ class PDF::Reader
111
149
  end
112
150
  end
113
151
 
114
- def extract_base_info(obj)
115
- @subtype = @ohash.object(obj[:Subtype])
116
- @basefont = @ohash.object(obj[:BaseFont])
117
- if @ohash.object(obj[:Encoding])
118
- @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
152
+ def build_encoding(obj)
153
+ if obj[:Encoding].is_a?(Symbol)
154
+ # one of the standard encodings, referenced by name
155
+ # TODO pass in a standard shape, always a Hash
156
+ PDF::Reader::Encoding.new(obj[:Encoding])
157
+ elsif obj[:Encoding].is_a?(Hash) || obj[:Encoding].is_a?(PDF::Reader::Stream)
158
+ PDF::Reader::Encoding.new(obj[:Encoding])
159
+ elsif obj[:Encoding].nil?
160
+ default_encoding(@basefont)
119
161
  else
120
- @encoding = default_encoding(@basefont)
162
+ raise MalformedPDFError, "Unexpected type for Encoding (#{obj[:Encoding].class})"
121
163
  end
122
- @widths = @ohash.object(obj[:Widths]) || []
123
- @first_char = @ohash.object(obj[:FirstChar])
124
- @last_char = @ohash.object(obj[:LastChar])
164
+ end
165
+
166
+ def extract_base_info(obj)
167
+ @subtype = @ohash.deref_name(obj[:Subtype])
168
+ @basefont = @ohash.deref_name(obj[:BaseFont])
169
+ @encoding = build_encoding(obj)
170
+ @widths = @ohash.deref_array_of_numbers(obj[:Widths]) || []
171
+ @first_char = @ohash.deref_integer(obj[:FirstChar])
172
+ @last_char = @ohash.deref_integer(obj[:LastChar])
125
173
 
126
174
  # CID Fonts are not required to have a W or DW entry, if they don't exist,
127
175
  # the default cid width = 1000, see Section 9.7.4.1 PDF 32000-1:2008 pp 269
128
- @cid_widths = @ohash.object(obj[:W]) || []
129
- @cid_default_width = @ohash.object(obj[:DW]) || 1000
176
+ @cid_widths = @ohash.deref_array(obj[:W]) || []
177
+ @cid_default_width = @ohash.deref_number(obj[:DW]) || 1000
130
178
 
131
179
  if obj[:ToUnicode]
132
180
  # ToUnicode is optional for Type1 and Type3
133
- stream = @ohash.object(obj[:ToUnicode])
134
- if stream.is_a?(PDF::Reader::Stream)
181
+ stream = @ohash.deref_stream(obj[:ToUnicode])
182
+ if stream
135
183
  @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
136
184
  end
137
185
  end
138
186
  end
139
187
 
188
+ def extract_type3_info(obj)
189
+ if @subtype == :Type3
190
+ @font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
191
+ 0.001, 0, 0, 0.001, 0, 0
192
+ ]
193
+ end
194
+ end
195
+
140
196
  def extract_descriptor(obj)
141
197
  if obj[:FontDescriptor]
142
198
  # create a font descriptor object if we can, in other words, unless this is
143
199
  # a CID Font
144
- fd = @ohash.object(obj[:FontDescriptor])
200
+ fd = @ohash.deref_hash(obj[:FontDescriptor])
145
201
  @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
146
202
  else
147
203
  @font_descriptor = nil
@@ -153,9 +209,9 @@ class PDF::Reader
153
209
  # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
154
210
  # A one-element array specifying the CIDFont dictionary that is the
155
211
  # descendant of this Type 0 font.
156
- descendants = @ohash.object(obj[:DescendantFonts])
212
+ descendants = @ohash.deref_array(obj[:DescendantFonts])
157
213
  @descendantfonts = descendants.map { |desc|
158
- PDF::Reader::Font.new(@ohash, @ohash.object(desc))
214
+ PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
159
215
  }
160
216
  end
161
217
 
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'ttfunk'
@@ -14,22 +15,23 @@ class PDF::Reader
14
15
  :x_height, :font_flags
15
16
 
16
17
  def initialize(ohash, fd_hash)
17
- @ascent = ohash.object(fd_hash[:Ascent]) || 0
18
- @descent = ohash.object(fd_hash[:Descent]) || 0
19
- @missing_width = ohash.object(fd_hash[:MissingWidth]) || 0
20
- @font_bounding_box = ohash.object(fd_hash[:FontBBox]) || [0,0,0,0]
21
- @avg_width = ohash.object(fd_hash[:AvgWidth]) || 0
22
- @cap_height = ohash.object(fd_hash[:CapHeight]) || 0
23
- @font_flags = ohash.object(fd_hash[:Flags]) || 0
24
- @italic_angle = ohash.object(fd_hash[:ItalicAngle])
25
- @font_name = ohash.object(fd_hash[:FontName]).to_s
26
- @leading = ohash.object(fd_hash[:Leading]) || 0
27
- @max_width = ohash.object(fd_hash[:MaxWidth]) || 0
28
- @stem_v = ohash.object(fd_hash[:StemV])
29
- @x_height = ohash.object(fd_hash[:XHeight])
30
- @font_stretch = ohash.object(fd_hash[:FontStretch]) || :Normal
31
- @font_weight = ohash.object(fd_hash[:FontWeight]) || 400
32
- @font_family = ohash.object(fd_hash[:FontFamily])
18
+ # TODO change these to typed derefs
19
+ @ascent = ohash.deref_number(fd_hash[:Ascent]) || 0
20
+ @descent = ohash.deref_number(fd_hash[:Descent]) || 0
21
+ @missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0
22
+ @font_bounding_box = ohash.deref_array_of_numbers(fd_hash[:FontBBox]) || [0,0,0,0]
23
+ @avg_width = ohash.deref_number(fd_hash[:AvgWidth]) || 0
24
+ @cap_height = ohash.deref_number(fd_hash[:CapHeight]) || 0
25
+ @font_flags = ohash.deref_integer(fd_hash[:Flags]) || 0
26
+ @italic_angle = ohash.deref_number(fd_hash[:ItalicAngle])
27
+ @font_name = ohash.deref_name(fd_hash[:FontName]).to_s
28
+ @leading = ohash.deref_number(fd_hash[:Leading]) || 0
29
+ @max_width = ohash.deref_number(fd_hash[:MaxWidth]) || 0
30
+ @stem_v = ohash.deref_number(fd_hash[:StemV])
31
+ @x_height = ohash.deref_number(fd_hash[:XHeight])
32
+ @font_stretch = ohash.deref_name(fd_hash[:FontStretch]) || :Normal
33
+ @font_weight = ohash.deref_number(fd_hash[:FontWeight]) || 400
34
+ @font_family = ohash.deref_string(fd_hash[:FontFamily])
33
35
 
34
36
  # A FontDescriptor may have an embedded font program in FontFile
35
37
  # (Type 1 Font Program), FontFile2 (TrueType font program), or
@@ -39,7 +41,7 @@ class PDF::Reader
39
41
  # 2) CIDFontType0C: Type 0 Font Program in Compact Font Format
40
42
  # 3) OpenType: OpenType Font Program
41
43
  # see Section 9.9, PDF 32000-1:2008, pp 288-292
42
- @font_program_stream = ohash.object(fd_hash[:FontFile2])
44
+ @font_program_stream = ohash.deref_stream(fd_hash[:FontFile2])
43
45
  #TODO handle FontFile and FontFile3
44
46
 
45
47
  @is_ttf = true if @font_program_stream
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'digest/md5'
@@ -14,15 +15,24 @@ module PDF
14
15
  # This behaves and looks much like a limited PDF::Reader::Page class.
15
16
  #
16
17
  class FormXObject
17
- include ResourceMethods
18
+ extend Forwardable
18
19
 
19
20
  attr_reader :xobject
20
21
 
22
+ def_delegators :resources, :color_spaces
23
+ def_delegators :resources, :fonts
24
+ def_delegators :resources, :graphic_states
25
+ def_delegators :resources, :patterns
26
+ def_delegators :resources, :procedure_sets
27
+ def_delegators :resources, :properties
28
+ def_delegators :resources, :shadings
29
+ def_delegators :resources, :xobjects
30
+
21
31
  def initialize(page, xobject, options = {})
22
32
  @page = page
23
33
  @objects = page.objects
24
34
  @cache = options[:cache] || {}
25
- @xobject = @objects.deref(xobject)
35
+ @xobject = @objects.deref_stream(xobject)
26
36
  end
27
37
 
28
38
  # return a hash of fonts used on this form.
@@ -33,9 +43,9 @@ module PDF
33
43
  # to most available metrics for each font.
34
44
  #
35
45
  def font_objects
36
- raw_fonts = @objects.deref(resources[:Font] || {})
46
+ raw_fonts = @objects.deref_hash(fonts)
37
47
  ::Hash[raw_fonts.map { |label, font|
38
- [label, PDF::Reader::Font.new(@objects, @objects.deref(font))]
48
+ [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font))]
39
49
  }]
40
50
  end
41
51
 
@@ -60,7 +70,7 @@ module PDF
60
70
  # Returns the resources that accompany this form.
61
71
  #
62
72
  def resources
63
- @resources ||= @objects.deref(@xobject.hash[:Resources]) || {}
73
+ @resources ||= Resources.new(@objects, @objects.deref_hash(@xobject.hash[:Resources]) || {})
64
74
  end
65
75
 
66
76
  def callback(receivers, name, params=[])
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  ################################################################################
@@ -0,0 +1,138 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest/md5'
6
+ require 'rc4'
7
+
8
+ class PDF::Reader
9
+
10
+ # Processes the Encrypt dict from an encrypted PDF and a user provided
11
+ # password and returns a key that can decrypt the file.
12
+ #
13
+ # This can generate a decryption key compatible with the following standard encryption algorithms:
14
+ #
15
+ # * Version 5 (AESV3)
16
+ #
17
+ class KeyBuilderV5
18
+
19
+ def initialize(opts = {})
20
+ @key_length = 256
21
+
22
+ # hash(32B) + validation salt(8B) + key salt(8B)
23
+ @owner_key = opts[:owner_key] || ""
24
+
25
+ # hash(32B) + validation salt(8B) + key salt(8B)
26
+ @user_key = opts[:user_key] || ""
27
+
28
+ # decryption key, encrypted w/ owner password
29
+ @owner_encryption_key = opts[:owner_encryption_key] || ""
30
+
31
+ # decryption key, encrypted w/ user password
32
+ @user_encryption_key = opts[:user_encryption_key] || ""
33
+ end
34
+
35
+ # Takes a string containing a user provided password.
36
+ #
37
+ # If the password matches the file, then a string containing a key suitable for
38
+ # decrypting the file will be returned. If the password doesn't match the file,
39
+ # and exception will be raised.
40
+ #
41
+ def key(pass)
42
+ pass = pass.byteslice(0...127).to_s # UTF-8 encoded password. first 127 bytes
43
+
44
+ encrypt_key = auth_owner_pass(pass)
45
+ encrypt_key ||= auth_user_pass(pass)
46
+ encrypt_key ||= auth_owner_pass_r6(pass)
47
+ encrypt_key ||= auth_user_pass_r6(pass)
48
+
49
+ raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
50
+ encrypt_key
51
+ end
52
+
53
+ private
54
+
55
+ # Algorithm 3.2a - Computing an encryption key
56
+ #
57
+ # Defined in PDF 1.7 Extension Level 3
58
+ #
59
+ # if the string is a valid user/owner password, this will return the decryption key
60
+ #
61
+ def auth_owner_pass(password)
62
+ if Digest::SHA256.digest(password + @owner_key[32..39] + @user_key) == @owner_key[0..31]
63
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
64
+ cipher.decrypt
65
+ cipher.key = Digest::SHA256.digest(password + @owner_key[40..-1] + @user_key)
66
+ cipher.iv = "\x00" * 16
67
+ cipher.padding = 0
68
+ cipher.update(@owner_encryption_key) + cipher.final
69
+ end
70
+ end
71
+
72
+ def auth_user_pass(password)
73
+ if Digest::SHA256.digest(password + @user_key[32..39]) == @user_key[0..31]
74
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
75
+ cipher.decrypt
76
+ cipher.key = Digest::SHA256.digest(password + @user_key[40..-1])
77
+ cipher.iv = "\x00" * 16
78
+ cipher.padding = 0
79
+ cipher.update(@user_encryption_key) + cipher.final
80
+ end
81
+ end
82
+
83
+ def auth_owner_pass_r6(password)
84
+ if r6_digest(password, @owner_key[32..39].to_s, @user_key[0,48].to_s) == @owner_key[0..31]
85
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
86
+ cipher.decrypt
87
+ cipher.key = r6_digest(password, @owner_key[40,8].to_s, @user_key[0, 48].to_s)
88
+ cipher.iv = "\x00" * 16
89
+ cipher.padding = 0
90
+ cipher.update(@owner_encryption_key) + cipher.final
91
+ end
92
+ end
93
+
94
+ def auth_user_pass_r6(password)
95
+ if r6_digest(password, @user_key[32..39].to_s) == @user_key[0..31]
96
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
97
+ cipher.decrypt
98
+ cipher.key = r6_digest(password, @user_key[40,8].to_s)
99
+ cipher.iv = "\x00" * 16
100
+ cipher.padding = 0
101
+ cipher.update(@user_encryption_key) + cipher.final
102
+ end
103
+ end
104
+
105
+ # PDF 2.0 spec, 7.6.4.3.4
106
+ # Algorithm 2.B: Computing a hash (revision 6 and later)
107
+ def r6_digest(password, salt, user_key = '')
108
+ k = Digest::SHA256.digest(password + salt + user_key)
109
+ e = ''
110
+
111
+ i = 0
112
+ while i < 64 or e.getbyte(-1).to_i > i - 32
113
+ k1 = (password + k + user_key) * 64
114
+
115
+ aes = OpenSSL::Cipher.new("aes-128-cbc").encrypt
116
+ aes.key = k[0, 16].to_s
117
+ aes.iv = k[16, 16].to_s
118
+ aes.padding = 0
119
+ e = String.new(aes.update(k1))
120
+ k = case unpack_128bit_bigendian_int(e) % 3
121
+ when 0 then Digest::SHA256.digest(e)
122
+ when 1 then Digest::SHA384.digest(e)
123
+ when 2 then Digest::SHA512.digest(e)
124
+ end
125
+ i = i + 1
126
+ end
127
+
128
+ k[0, 32].to_s
129
+ end
130
+
131
+ def unpack_128bit_bigendian_int(str)
132
+ ints = str[0,16].to_s.unpack("N*")
133
+ (ints[0].to_i << 96) + (ints[1].to_i << 64) + (ints[2].to_i << 32) + ints[3].to_i
134
+ end
135
+
136
+ end
137
+ end
138
+
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  module PDF
@@ -35,9 +36,9 @@ module PDF
35
36
 
36
37
  def read
37
38
  bits_left_in_chunk = @bits_in_chunk
38
- chunk = nil
39
+ chunk = -1
39
40
  while bits_left_in_chunk > 0 and @current_pos < @data.size
40
- chunk = 0 if chunk.nil?
41
+ chunk = 0 if chunk < 0
41
42
  codepoint = @data[@current_pos, 1].unpack("C*")[0]
42
43
  current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
43
44
  dif = bits_left_in_chunk - @bits_left_in_byte
@@ -83,6 +84,7 @@ module PDF
83
84
  #
84
85
  def self.decode(data)
85
86
  stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
87
+ string_table = StringTable.new
86
88
  result = "".dup
87
89
  until (code = stream.read) == CODE_EOD
88
90
  if code == CODE_CLEAR_TABLE
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -6,10 +7,6 @@ class PDF::Reader
6
7
  # A null object security handler. Used when a PDF is unencrypted.
7
8
  class NullSecurityHandler
8
9
 
9
- def self.supports?(encrypt)
10
- encrypt.nil?
11
- end
12
-
13
10
  def decrypt(buf, _ref)
14
11
  buf
15
12
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'hashery/lru_hash'