pdf-reader 2.0.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: d34a3033638fcf4f78ce2d71dd5f5aebc2ea7d68
4
- data.tar.gz: 912ed3986ab7e7eefce8f987cffc0d0357bcd675
2
+ SHA256:
3
+ metadata.gz: b1e63414d8a3db12b6ea802fc45893ebf35c09dd37ca02c5cc73137d7d782364
4
+ data.tar.gz: afb778860a3dd8aab83d634c393666e159101505aba843262f61f7af49cf30e5
5
5
  SHA512:
6
- metadata.gz: a24c874cae12838cf122c92338e6da1639d15187be9ef5c1f149eedd0167cc21b53544c3148efd37992535efe2d0ec4487a02fc3a668992295cecc7aa5529f2a
7
- data.tar.gz: c88b9daa21de84eb2a841cc024c499ecc7c5d4b5908ad6618494fa3e7f5d694d291ae87ceb72f5ff9f4d2db66b3bc781e1ff499485ba50290710c6e1061a76db
6
+ metadata.gz: 1c32f7ac1b0d9f0d27ec445905af7dce3544d505221d6b940b8f5f37f85eaf95fad81d40850c85788dc459b59dd1f58398b27e9ec8e72bdbb077e94f77e9f332
7
+ data.tar.gz: 88bdd1bebe08ad919344788a9a7416e782c7fb5185ef984447ab1f9c968a8fb6a24af2b95dec99da2b43d4d4861a64ba1e8584f0ec25d01c3c13ae4f81f0191c
data/CHANGELOG CHANGED
@@ -1,3 +1,7 @@
1
+ v2.1.0 (15th Februar 2018)
2
+ - Support extra encrypted PDF variants (thanks to Gyuchang Jun)
3
+ - various bug fixes
4
+
1
5
  v2.0.0 (25th February 2017)
2
6
  - various bug fixes
3
7
 
@@ -134,6 +134,9 @@ module PDF
134
134
 
135
135
  def page_count
136
136
  pages = @objects.deref(root[:Pages])
137
+ unless pages.kind_of?(::Hash)
138
+ raise MalformedPDFError, 'Pages structure is missing'
139
+ end
137
140
  @page_count ||= @objects.deref(pages[:Count])
138
141
  end
139
142
 
@@ -173,9 +176,13 @@ module PDF
173
176
  # methods available on each page
174
177
  #
175
178
  def pages
176
- (1..self.page_count).map { |num|
177
- PDF::Reader::Page.new(@objects, num, :cache => @cache)
178
- }
179
+ (1..self.page_count).map do |num|
180
+ begin
181
+ PDF::Reader::Page.new(@objects, num, :cache => @cache)
182
+ rescue InvalidPageError => ex
183
+ raise MalformedPDFError, "Missing data for page: #{num}"
184
+ end
185
+ end
179
186
  end
180
187
 
181
188
  # returns a single PDF::Reader::Page for the specified page.
@@ -193,7 +200,7 @@ module PDF
193
200
  def page(num)
194
201
  num = num.to_i
195
202
  if num < 1 || num > self.page_count
196
- raise ArgumentError, "valid pages are 1 .. #{self.page_count}"
203
+ raise InvalidPageError, "Valid pages are 1 .. #{self.page_count}"
197
204
  end
198
205
  PDF::Reader::Page.new(@objects, num, :cache => @cache)
199
206
  end
@@ -219,7 +226,7 @@ module PDF
219
226
  pdfdoc_to_utf8(obj)
220
227
  end
221
228
  else
222
- obj
229
+ @objects.deref(obj)
223
230
  end
224
231
  end
225
232
 
@@ -241,7 +248,13 @@ module PDF
241
248
  end
242
249
 
243
250
  def root
244
- @root ||= @objects.deref(@objects.trailer[:Root])
251
+ @root ||= begin
252
+ obj = @objects.deref(@objects.trailer[:Root])
253
+ unless obj.kind_of?(::Hash)
254
+ raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
255
+ end
256
+ obj
257
+ end
245
258
  end
246
259
 
247
260
  end
@@ -277,6 +290,7 @@ require 'pdf/reader/reference'
277
290
  require 'pdf/reader/register_receiver'
278
291
  require 'pdf/reader/null_security_handler'
279
292
  require 'pdf/reader/standard_security_handler'
293
+ require 'pdf/reader/standard_security_handler_v5'
280
294
  require 'pdf/reader/unimplemented_security_handler'
281
295
  require 'pdf/reader/stream'
282
296
  require 'pdf/reader/text_run'
@@ -198,10 +198,6 @@ class PDF::Reader
198
198
  end
199
199
  end
200
200
 
201
- def has_mapping?
202
- @mapping.size > 0
203
- end
204
-
205
201
  def glyphlist
206
202
  @glyphlist ||= PDF::Reader::GlyphHash.new
207
203
  end
@@ -52,6 +52,10 @@ class PDF::Reader
52
52
  # the PDF spec and cannot be recovered
53
53
  class MalformedPDFError < RuntimeError; end
54
54
 
55
+ ################################################################################
56
+ # an exception that is raised when an invalid page number is used
57
+ class InvalidPageError < ArgumentError; end
58
+
55
59
  ################################################################################
56
60
  # an exception that is raised when a PDF object appears to be invalid
57
61
  class InvalidObjectError < MalformedPDFError; end
@@ -116,11 +116,10 @@ module PDF
116
116
  result
117
117
  end
118
118
 
119
- private
120
-
121
119
  def self.create_new_string(string_table,some_code, other_code)
122
120
  string_table[some_code] + string_table[other_code][0].chr
123
121
  end
122
+ private_class_method :create_new_string
124
123
 
125
124
  end
126
125
  end
@@ -300,7 +300,16 @@ class PDF::Reader
300
300
  permissions: encrypt[:P].to_i,
301
301
  encrypted_metadata: encmeta,
302
302
  file_id: (deref(trailer[:ID]) || []).first,
303
- password: opts[:password]
303
+ password: opts[:password],
304
+ cfm: encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
305
+ )
306
+ elsif StandardSecurityHandlerV5.supports?(encrypt)
307
+ StandardSecurityHandlerV5.new(
308
+ O: encrypt[:O],
309
+ U: encrypt[:U],
310
+ OE: encrypt[:OE],
311
+ UE: encrypt[:UE],
312
+ password: opts[:password]
304
313
  )
305
314
  else
306
315
  UnimplementedSecurityHandler.new
@@ -341,6 +350,10 @@ class PDF::Reader
341
350
  def get_page_objects(ref)
342
351
  obj = deref(ref)
343
352
 
353
+ unless obj.kind_of?(::Hash)
354
+ raise MalformedPDFError, "Dereferenced page object must be a dict"
355
+ end
356
+
344
357
  if obj[:Type] == :Page
345
358
  ref
346
359
  elsif obj[:Kids]
@@ -36,7 +36,7 @@ module PDF
36
36
  @cache = options[:cache] || {}
37
37
 
38
38
  unless @page_object.is_a?(::Hash)
39
- raise ArgumentError, "invalid page: #{pagenum}"
39
+ raise InvalidPageError, "Invalid page: #{pagenum}"
40
40
  end
41
41
  end
42
42
 
@@ -30,8 +30,8 @@ class PDF::Reader
30
30
  @runs.each do |run|
31
31
  x_pos = ((run.x - @x_offset) / col_multiplier).round
32
32
  y_pos = row_count - (run.y / row_multiplier).round
33
- if y_pos < row_count && y_pos >= 0 && x_pos < col_count && x_pos >= 0
34
- local_string_insert(page[y_pos], run.text, x_pos)
33
+ if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
34
+ local_string_insert(page[y_pos-1], run.text, x_pos)
35
35
  end
36
36
  end
37
37
  interesting_rows(page).map(&:rstrip).join("\n")
@@ -25,6 +25,7 @@
25
25
  #
26
26
  ################################################################################
27
27
  require 'digest/md5'
28
+ require 'openssl'
28
29
  require 'rc4'
29
30
 
30
31
  class PDF::Reader
@@ -54,6 +55,7 @@ class PDF::Reader
54
55
  @encryptMeta = opts.fetch(:encrypted_metadata, true)
55
56
  @file_id = opts[:file_id] || ""
56
57
  @encrypt_key = build_standard_key(opts[:password] || "")
58
+ @cfm = opts[:cfm]
57
59
 
58
60
  if @key_length != 5 && @key_length != 16
59
61
  msg = "StandardSecurityHandler only supports 40 and 128 bit\
@@ -62,28 +64,40 @@ class PDF::Reader
62
64
  end
63
65
  end
64
66
 
65
- # This handler supports all RC4 encryption that follows the PDF spec. It does not support
66
- # AES encryption that was added in later versions of the spec.
67
+ # This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
67
68
  def self.supports?(encrypt)
68
69
  return false if encrypt.nil?
69
70
 
70
71
  filter = encrypt.fetch(:Filter, :Standard)
71
72
  version = encrypt.fetch(:V, 0)
72
73
  algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
73
- filter == :Standard &&
74
- (version <= 3 || (version == 4 && algorithm != :AESV2))
74
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
75
+ (version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
75
76
  end
76
77
 
77
78
  ##7.6.2 General Encryption Algorithm
78
79
  #
79
80
  # Algorithm 1: Encryption of data using the RC4 or AES algorithms
80
81
  #
81
- # used to decrypt RC4 encrypted PDF streams (buf)
82
+ # used to decrypt RC4/AES encrypted PDF streams (buf)
82
83
  #
83
84
  # buf - a string to decrypt
84
85
  # ref - a PDF::Reader::Reference for the object to decrypt
85
86
  #
86
87
  def decrypt( buf, ref )
88
+ case @cfm
89
+ when :AESV2
90
+ decrypt_aes128(buf, ref)
91
+ else
92
+ decrypt_rc4(buf, ref)
93
+ end
94
+ end
95
+
96
+ private
97
+
98
+ # decrypt with RC4 algorithm
99
+ # version <=3 or (version == 4 and CFM == V2)
100
+ def decrypt_rc4( buf, ref )
87
101
  objKey = @encrypt_key.dup
88
102
  (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
89
103
  (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
@@ -92,7 +106,20 @@ class PDF::Reader
92
106
  rc4.decrypt(buf)
93
107
  end
94
108
 
95
- private
109
+ # decrypt with AES-128-CBC algorithm
110
+ # when (version == 4 and CFM == AESV2)
111
+ def decrypt_aes128( buf, ref )
112
+ objKey = @encrypt_key.dup
113
+ (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
114
+ (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
115
+ objKey << 'sAlT' # Algorithm 1, b)
116
+ length = objKey.length < 16 ? objKey.length : 16
117
+ cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
118
+ cipher.decrypt
119
+ cipher.key = Digest::MD5.digest(objKey)[0,length]
120
+ cipher.iv = buf[0..15]
121
+ cipher.update(buf[16..-1]) + cipher.final
122
+ end
96
123
 
97
124
  # Pads supplied password to 32bytes using PassPadBytes as specified on
98
125
  # pp61 of spec
@@ -125,9 +152,9 @@ class PDF::Reader
125
152
  if @revision > 2 then
126
153
  50.times { md5 = Digest::MD5.digest(md5) }
127
154
  keyBegins = md5[0, key_length]
128
- #first itteration decrypt owner_key
155
+ #first iteration decrypt owner_key
129
156
  out = @owner_key
130
- #RC4 keyed with (keyBegins XOR with itteration #) to decrypt previous out
157
+ #RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
131
158
  19.downto(0).each { |i| out=RC4.new(xor_each_byte(keyBegins,i)).decrypt(out) }
132
159
  else
133
160
  out = RC4.new( md5[0, 5] ).decrypt( @owner_key )
@@ -0,0 +1,89 @@
1
+ # coding: utf-8
2
+ require 'digest'
3
+ require 'openssl'
4
+
5
+ class PDF::Reader
6
+
7
+ # class creates interface to encrypt dictionary for use in Decrypt
8
+ class StandardSecurityHandlerV5
9
+
10
+ attr_reader :key_length, :encrypt_key
11
+
12
+ def initialize(opts = {})
13
+ @key_length = 256
14
+ @O = opts[:O] # hash(32B) + validation salt(8B) + key salt(8B)
15
+ @U = opts[:U] # hash(32B) + validation salt(8B) + key salt(8B)
16
+ @OE = opts[:OE] # decryption key, encrypted w/ owner password
17
+ @UE = opts[:UE] # decryption key, encrypted w/ user password
18
+ @encrypt_key = build_standard_key(opts[:password] || '')
19
+ end
20
+
21
+ # This handler supports AES-256 encryption defined in PDF 1.7 Extension Level 3
22
+ def self.supports?(encrypt)
23
+ return false if encrypt.nil?
24
+
25
+ filter = encrypt.fetch(:Filter, :Standard)
26
+ version = encrypt.fetch(:V, 0)
27
+ revision = encrypt.fetch(:R, 0)
28
+ algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
29
+ (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
30
+ ((version == 5) && (revision == 5) && (algorithm == :AESV3))
31
+ end
32
+
33
+ ##7.6.2 General Encryption Algorithm
34
+ #
35
+ # Algorithm 1: Encryption of data using the RC4 or AES algorithms
36
+ #
37
+ # used to decrypt RC4/AES encrypted PDF streams (buf)
38
+ #
39
+ # buf - a string to decrypt
40
+ # ref - a PDF::Reader::Reference for the object to decrypt
41
+ #
42
+ def decrypt( buf, ref )
43
+ cipher = OpenSSL::Cipher.new("AES-#{@key_length}-CBC")
44
+ cipher.decrypt
45
+ cipher.key = @encrypt_key.dup
46
+ cipher.iv = buf[0..15]
47
+ cipher.update(buf[16..-1]) + cipher.final
48
+ end
49
+
50
+ private
51
+ # Algorithm 3.2a - Computing an encryption key
52
+ #
53
+ # Defined in PDF 1.7 Extension Level 3
54
+ #
55
+ # if the string is a valid user/owner password, this will return the decryption key
56
+ #
57
+ def auth_owner_pass(password)
58
+ if Digest::SHA256.digest(password + @O[32..39] + @U) == @O[0..31]
59
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
60
+ cipher.decrypt
61
+ cipher.key = Digest::SHA256.digest(password + @O[40..-1] + @U)
62
+ cipher.iv = "\x00" * 16
63
+ cipher.padding = 0
64
+ cipher.update(@OE) + cipher.final
65
+ end
66
+ end
67
+
68
+ def auth_user_pass(password)
69
+ if Digest::SHA256.digest(password + @U[32..39]) == @U[0..31]
70
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
71
+ cipher.decrypt
72
+ cipher.key = Digest::SHA256.digest(password + @U[40..-1])
73
+ cipher.iv = "\x00" * 16
74
+ cipher.padding = 0
75
+ cipher.update(@UE) + cipher.final
76
+ end
77
+ end
78
+
79
+ def build_standard_key(pass)
80
+ pass = pass.byteslice(0...127) # UTF-8 encoded password. first 127 bytes
81
+
82
+ encrypt_key = auth_owner_pass(pass)
83
+ encrypt_key ||= auth_user_pass(pass)
84
+
85
+ raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
86
+ encrypt_key
87
+ end
88
+ end
89
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-02-25 00:00:00.000000000 Z
11
+ date: 2018-02-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -257,6 +257,7 @@ files:
257
257
  - lib/pdf/reader/register_receiver.rb
258
258
  - lib/pdf/reader/resource_methods.rb
259
259
  - lib/pdf/reader/standard_security_handler.rb
260
+ - lib/pdf/reader/standard_security_handler_v5.rb
260
261
  - lib/pdf/reader/stream.rb
261
262
  - lib/pdf/reader/synchronized_cache.rb
262
263
  - lib/pdf/reader/text_run.rb
@@ -295,7 +296,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
295
296
  version: '0'
296
297
  requirements: []
297
298
  rubyforge_project:
298
- rubygems_version: 2.6.8
299
+ rubygems_version: 2.7.3
299
300
  signing_key:
300
301
  specification_version: 4
301
302
  summary: A library for accessing the content of PDF files