pdf-reader 2.0.0 → 2.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG +4 -0
- data/lib/pdf/reader.rb +20 -6
- data/lib/pdf/reader/encoding.rb +0 -4
- data/lib/pdf/reader/error.rb +4 -0
- data/lib/pdf/reader/lzw.rb +1 -2
- data/lib/pdf/reader/object_hash.rb +14 -1
- data/lib/pdf/reader/page.rb +1 -1
- data/lib/pdf/reader/page_layout.rb +2 -2
- data/lib/pdf/reader/standard_security_handler.rb +35 -8
- data/lib/pdf/reader/standard_security_handler_v5.rb +89 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: b1e63414d8a3db12b6ea802fc45893ebf35c09dd37ca02c5cc73137d7d782364
|
4
|
+
data.tar.gz: afb778860a3dd8aab83d634c393666e159101505aba843262f61f7af49cf30e5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c32f7ac1b0d9f0d27ec445905af7dce3544d505221d6b940b8f5f37f85eaf95fad81d40850c85788dc459b59dd1f58398b27e9ec8e72bdbb077e94f77e9f332
|
7
|
+
data.tar.gz: 88bdd1bebe08ad919344788a9a7416e782c7fb5185ef984447ab1f9c968a8fb6a24af2b95dec99da2b43d4d4861a64ba1e8584f0ec25d01c3c13ae4f81f0191c
|
data/CHANGELOG
CHANGED
data/lib/pdf/reader.rb
CHANGED
@@ -134,6 +134,9 @@ module PDF
|
|
134
134
|
|
135
135
|
def page_count
|
136
136
|
pages = @objects.deref(root[:Pages])
|
137
|
+
unless pages.kind_of?(::Hash)
|
138
|
+
raise MalformedPDFError, 'Pages structure is missing'
|
139
|
+
end
|
137
140
|
@page_count ||= @objects.deref(pages[:Count])
|
138
141
|
end
|
139
142
|
|
@@ -173,9 +176,13 @@ module PDF
|
|
173
176
|
# methods available on each page
|
174
177
|
#
|
175
178
|
def pages
|
176
|
-
(1..self.page_count).map
|
177
|
-
|
178
|
-
|
179
|
+
(1..self.page_count).map do |num|
|
180
|
+
begin
|
181
|
+
PDF::Reader::Page.new(@objects, num, :cache => @cache)
|
182
|
+
rescue InvalidPageError => ex
|
183
|
+
raise MalformedPDFError, "Missing data for page: #{num}"
|
184
|
+
end
|
185
|
+
end
|
179
186
|
end
|
180
187
|
|
181
188
|
# returns a single PDF::Reader::Page for the specified page.
|
@@ -193,7 +200,7 @@ module PDF
|
|
193
200
|
def page(num)
|
194
201
|
num = num.to_i
|
195
202
|
if num < 1 || num > self.page_count
|
196
|
-
raise
|
203
|
+
raise InvalidPageError, "Valid pages are 1 .. #{self.page_count}"
|
197
204
|
end
|
198
205
|
PDF::Reader::Page.new(@objects, num, :cache => @cache)
|
199
206
|
end
|
@@ -219,7 +226,7 @@ module PDF
|
|
219
226
|
pdfdoc_to_utf8(obj)
|
220
227
|
end
|
221
228
|
else
|
222
|
-
obj
|
229
|
+
@objects.deref(obj)
|
223
230
|
end
|
224
231
|
end
|
225
232
|
|
@@ -241,7 +248,13 @@ module PDF
|
|
241
248
|
end
|
242
249
|
|
243
250
|
def root
|
244
|
-
@root ||=
|
251
|
+
@root ||= begin
|
252
|
+
obj = @objects.deref(@objects.trailer[:Root])
|
253
|
+
unless obj.kind_of?(::Hash)
|
254
|
+
raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
|
255
|
+
end
|
256
|
+
obj
|
257
|
+
end
|
245
258
|
end
|
246
259
|
|
247
260
|
end
|
@@ -277,6 +290,7 @@ require 'pdf/reader/reference'
|
|
277
290
|
require 'pdf/reader/register_receiver'
|
278
291
|
require 'pdf/reader/null_security_handler'
|
279
292
|
require 'pdf/reader/standard_security_handler'
|
293
|
+
require 'pdf/reader/standard_security_handler_v5'
|
280
294
|
require 'pdf/reader/unimplemented_security_handler'
|
281
295
|
require 'pdf/reader/stream'
|
282
296
|
require 'pdf/reader/text_run'
|
data/lib/pdf/reader/encoding.rb
CHANGED
data/lib/pdf/reader/error.rb
CHANGED
@@ -52,6 +52,10 @@ class PDF::Reader
|
|
52
52
|
# the PDF spec and cannot be recovered
|
53
53
|
class MalformedPDFError < RuntimeError; end
|
54
54
|
|
55
|
+
################################################################################
|
56
|
+
# an exception that is raised when an invalid page number is used
|
57
|
+
class InvalidPageError < ArgumentError; end
|
58
|
+
|
55
59
|
################################################################################
|
56
60
|
# an exception that is raised when a PDF object appears to be invalid
|
57
61
|
class InvalidObjectError < MalformedPDFError; end
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -116,11 +116,10 @@ module PDF
|
|
116
116
|
result
|
117
117
|
end
|
118
118
|
|
119
|
-
private
|
120
|
-
|
121
119
|
def self.create_new_string(string_table,some_code, other_code)
|
122
120
|
string_table[some_code] + string_table[other_code][0].chr
|
123
121
|
end
|
122
|
+
private_class_method :create_new_string
|
124
123
|
|
125
124
|
end
|
126
125
|
end
|
@@ -300,7 +300,16 @@ class PDF::Reader
|
|
300
300
|
permissions: encrypt[:P].to_i,
|
301
301
|
encrypted_metadata: encmeta,
|
302
302
|
file_id: (deref(trailer[:ID]) || []).first,
|
303
|
-
password: opts[:password]
|
303
|
+
password: opts[:password],
|
304
|
+
cfm: encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
305
|
+
)
|
306
|
+
elsif StandardSecurityHandlerV5.supports?(encrypt)
|
307
|
+
StandardSecurityHandlerV5.new(
|
308
|
+
O: encrypt[:O],
|
309
|
+
U: encrypt[:U],
|
310
|
+
OE: encrypt[:OE],
|
311
|
+
UE: encrypt[:UE],
|
312
|
+
password: opts[:password]
|
304
313
|
)
|
305
314
|
else
|
306
315
|
UnimplementedSecurityHandler.new
|
@@ -341,6 +350,10 @@ class PDF::Reader
|
|
341
350
|
def get_page_objects(ref)
|
342
351
|
obj = deref(ref)
|
343
352
|
|
353
|
+
unless obj.kind_of?(::Hash)
|
354
|
+
raise MalformedPDFError, "Dereferenced page object must be a dict"
|
355
|
+
end
|
356
|
+
|
344
357
|
if obj[:Type] == :Page
|
345
358
|
ref
|
346
359
|
elsif obj[:Kids]
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -30,8 +30,8 @@ class PDF::Reader
|
|
30
30
|
@runs.each do |run|
|
31
31
|
x_pos = ((run.x - @x_offset) / col_multiplier).round
|
32
32
|
y_pos = row_count - (run.y / row_multiplier).round
|
33
|
-
if y_pos
|
34
|
-
local_string_insert(page[y_pos], run.text, x_pos)
|
33
|
+
if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
|
34
|
+
local_string_insert(page[y_pos-1], run.text, x_pos)
|
35
35
|
end
|
36
36
|
end
|
37
37
|
interesting_rows(page).map(&:rstrip).join("\n")
|
@@ -25,6 +25,7 @@
|
|
25
25
|
#
|
26
26
|
################################################################################
|
27
27
|
require 'digest/md5'
|
28
|
+
require 'openssl'
|
28
29
|
require 'rc4'
|
29
30
|
|
30
31
|
class PDF::Reader
|
@@ -54,6 +55,7 @@ class PDF::Reader
|
|
54
55
|
@encryptMeta = opts.fetch(:encrypted_metadata, true)
|
55
56
|
@file_id = opts[:file_id] || ""
|
56
57
|
@encrypt_key = build_standard_key(opts[:password] || "")
|
58
|
+
@cfm = opts[:cfm]
|
57
59
|
|
58
60
|
if @key_length != 5 && @key_length != 16
|
59
61
|
msg = "StandardSecurityHandler only supports 40 and 128 bit\
|
@@ -62,28 +64,40 @@ class PDF::Reader
|
|
62
64
|
end
|
63
65
|
end
|
64
66
|
|
65
|
-
# This handler supports all
|
66
|
-
# AES encryption that was added in later versions of the spec.
|
67
|
+
# This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
|
67
68
|
def self.supports?(encrypt)
|
68
69
|
return false if encrypt.nil?
|
69
70
|
|
70
71
|
filter = encrypt.fetch(:Filter, :Standard)
|
71
72
|
version = encrypt.fetch(:V, 0)
|
72
73
|
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
73
|
-
filter == :Standard &&
|
74
|
-
(version <= 3 || (version == 4 && algorithm
|
74
|
+
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
75
|
+
(version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
|
75
76
|
end
|
76
77
|
|
77
78
|
##7.6.2 General Encryption Algorithm
|
78
79
|
#
|
79
80
|
# Algorithm 1: Encryption of data using the RC4 or AES algorithms
|
80
81
|
#
|
81
|
-
# used to decrypt RC4 encrypted PDF streams (buf)
|
82
|
+
# used to decrypt RC4/AES encrypted PDF streams (buf)
|
82
83
|
#
|
83
84
|
# buf - a string to decrypt
|
84
85
|
# ref - a PDF::Reader::Reference for the object to decrypt
|
85
86
|
#
|
86
87
|
def decrypt( buf, ref )
|
88
|
+
case @cfm
|
89
|
+
when :AESV2
|
90
|
+
decrypt_aes128(buf, ref)
|
91
|
+
else
|
92
|
+
decrypt_rc4(buf, ref)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
|
98
|
+
# decrypt with RC4 algorithm
|
99
|
+
# version <=3 or (version == 4 and CFM == V2)
|
100
|
+
def decrypt_rc4( buf, ref )
|
87
101
|
objKey = @encrypt_key.dup
|
88
102
|
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
89
103
|
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
@@ -92,7 +106,20 @@ class PDF::Reader
|
|
92
106
|
rc4.decrypt(buf)
|
93
107
|
end
|
94
108
|
|
95
|
-
|
109
|
+
# decrypt with AES-128-CBC algorithm
|
110
|
+
# when (version == 4 and CFM == AESV2)
|
111
|
+
def decrypt_aes128( buf, ref )
|
112
|
+
objKey = @encrypt_key.dup
|
113
|
+
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
114
|
+
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
115
|
+
objKey << 'sAlT' # Algorithm 1, b)
|
116
|
+
length = objKey.length < 16 ? objKey.length : 16
|
117
|
+
cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
|
118
|
+
cipher.decrypt
|
119
|
+
cipher.key = Digest::MD5.digest(objKey)[0,length]
|
120
|
+
cipher.iv = buf[0..15]
|
121
|
+
cipher.update(buf[16..-1]) + cipher.final
|
122
|
+
end
|
96
123
|
|
97
124
|
# Pads supplied password to 32bytes using PassPadBytes as specified on
|
98
125
|
# pp61 of spec
|
@@ -125,9 +152,9 @@ class PDF::Reader
|
|
125
152
|
if @revision > 2 then
|
126
153
|
50.times { md5 = Digest::MD5.digest(md5) }
|
127
154
|
keyBegins = md5[0, key_length]
|
128
|
-
#first
|
155
|
+
#first iteration decrypt owner_key
|
129
156
|
out = @owner_key
|
130
|
-
#RC4 keyed with (keyBegins XOR with
|
157
|
+
#RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
|
131
158
|
19.downto(0).each { |i| out=RC4.new(xor_each_byte(keyBegins,i)).decrypt(out) }
|
132
159
|
else
|
133
160
|
out = RC4.new( md5[0, 5] ).decrypt( @owner_key )
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'digest'
|
3
|
+
require 'openssl'
|
4
|
+
|
5
|
+
class PDF::Reader
|
6
|
+
|
7
|
+
# class creates interface to encrypt dictionary for use in Decrypt
|
8
|
+
class StandardSecurityHandlerV5
|
9
|
+
|
10
|
+
attr_reader :key_length, :encrypt_key
|
11
|
+
|
12
|
+
def initialize(opts = {})
|
13
|
+
@key_length = 256
|
14
|
+
@O = opts[:O] # hash(32B) + validation salt(8B) + key salt(8B)
|
15
|
+
@U = opts[:U] # hash(32B) + validation salt(8B) + key salt(8B)
|
16
|
+
@OE = opts[:OE] # decryption key, encrypted w/ owner password
|
17
|
+
@UE = opts[:UE] # decryption key, encrypted w/ user password
|
18
|
+
@encrypt_key = build_standard_key(opts[:password] || '')
|
19
|
+
end
|
20
|
+
|
21
|
+
# This handler supports AES-256 encryption defined in PDF 1.7 Extension Level 3
|
22
|
+
def self.supports?(encrypt)
|
23
|
+
return false if encrypt.nil?
|
24
|
+
|
25
|
+
filter = encrypt.fetch(:Filter, :Standard)
|
26
|
+
version = encrypt.fetch(:V, 0)
|
27
|
+
revision = encrypt.fetch(:R, 0)
|
28
|
+
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
29
|
+
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
30
|
+
((version == 5) && (revision == 5) && (algorithm == :AESV3))
|
31
|
+
end
|
32
|
+
|
33
|
+
##7.6.2 General Encryption Algorithm
|
34
|
+
#
|
35
|
+
# Algorithm 1: Encryption of data using the RC4 or AES algorithms
|
36
|
+
#
|
37
|
+
# used to decrypt RC4/AES encrypted PDF streams (buf)
|
38
|
+
#
|
39
|
+
# buf - a string to decrypt
|
40
|
+
# ref - a PDF::Reader::Reference for the object to decrypt
|
41
|
+
#
|
42
|
+
def decrypt( buf, ref )
|
43
|
+
cipher = OpenSSL::Cipher.new("AES-#{@key_length}-CBC")
|
44
|
+
cipher.decrypt
|
45
|
+
cipher.key = @encrypt_key.dup
|
46
|
+
cipher.iv = buf[0..15]
|
47
|
+
cipher.update(buf[16..-1]) + cipher.final
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
# Algorithm 3.2a - Computing an encryption key
|
52
|
+
#
|
53
|
+
# Defined in PDF 1.7 Extension Level 3
|
54
|
+
#
|
55
|
+
# if the string is a valid user/owner password, this will return the decryption key
|
56
|
+
#
|
57
|
+
def auth_owner_pass(password)
|
58
|
+
if Digest::SHA256.digest(password + @O[32..39] + @U) == @O[0..31]
|
59
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
60
|
+
cipher.decrypt
|
61
|
+
cipher.key = Digest::SHA256.digest(password + @O[40..-1] + @U)
|
62
|
+
cipher.iv = "\x00" * 16
|
63
|
+
cipher.padding = 0
|
64
|
+
cipher.update(@OE) + cipher.final
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def auth_user_pass(password)
|
69
|
+
if Digest::SHA256.digest(password + @U[32..39]) == @U[0..31]
|
70
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
71
|
+
cipher.decrypt
|
72
|
+
cipher.key = Digest::SHA256.digest(password + @U[40..-1])
|
73
|
+
cipher.iv = "\x00" * 16
|
74
|
+
cipher.padding = 0
|
75
|
+
cipher.update(@UE) + cipher.final
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def build_standard_key(pass)
|
80
|
+
pass = pass.byteslice(0...127) # UTF-8 encoded password. first 127 bytes
|
81
|
+
|
82
|
+
encrypt_key = auth_owner_pass(pass)
|
83
|
+
encrypt_key ||= auth_user_pass(pass)
|
84
|
+
|
85
|
+
raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
|
86
|
+
encrypt_key
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -257,6 +257,7 @@ files:
|
|
257
257
|
- lib/pdf/reader/register_receiver.rb
|
258
258
|
- lib/pdf/reader/resource_methods.rb
|
259
259
|
- lib/pdf/reader/standard_security_handler.rb
|
260
|
+
- lib/pdf/reader/standard_security_handler_v5.rb
|
260
261
|
- lib/pdf/reader/stream.rb
|
261
262
|
- lib/pdf/reader/synchronized_cache.rb
|
262
263
|
- lib/pdf/reader/text_run.rb
|
@@ -295,7 +296,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
295
296
|
version: '0'
|
296
297
|
requirements: []
|
297
298
|
rubyforge_project:
|
298
|
-
rubygems_version: 2.
|
299
|
+
rubygems_version: 2.7.3
|
299
300
|
signing_key:
|
300
301
|
specification_version: 4
|
301
302
|
summary: A library for accessing the content of PDF files
|