pdf-reader 2.0.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG +4 -0
- data/lib/pdf/reader.rb +20 -6
- data/lib/pdf/reader/encoding.rb +0 -4
- data/lib/pdf/reader/error.rb +4 -0
- data/lib/pdf/reader/lzw.rb +1 -2
- data/lib/pdf/reader/object_hash.rb +14 -1
- data/lib/pdf/reader/page.rb +1 -1
- data/lib/pdf/reader/page_layout.rb +2 -2
- data/lib/pdf/reader/standard_security_handler.rb +35 -8
- data/lib/pdf/reader/standard_security_handler_v5.rb +89 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: b1e63414d8a3db12b6ea802fc45893ebf35c09dd37ca02c5cc73137d7d782364
|
4
|
+
data.tar.gz: afb778860a3dd8aab83d634c393666e159101505aba843262f61f7af49cf30e5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1c32f7ac1b0d9f0d27ec445905af7dce3544d505221d6b940b8f5f37f85eaf95fad81d40850c85788dc459b59dd1f58398b27e9ec8e72bdbb077e94f77e9f332
|
7
|
+
data.tar.gz: 88bdd1bebe08ad919344788a9a7416e782c7fb5185ef984447ab1f9c968a8fb6a24af2b95dec99da2b43d4d4861a64ba1e8584f0ec25d01c3c13ae4f81f0191c
|
data/CHANGELOG
CHANGED
data/lib/pdf/reader.rb
CHANGED
@@ -134,6 +134,9 @@ module PDF
|
|
134
134
|
|
135
135
|
def page_count
|
136
136
|
pages = @objects.deref(root[:Pages])
|
137
|
+
unless pages.kind_of?(::Hash)
|
138
|
+
raise MalformedPDFError, 'Pages structure is missing'
|
139
|
+
end
|
137
140
|
@page_count ||= @objects.deref(pages[:Count])
|
138
141
|
end
|
139
142
|
|
@@ -173,9 +176,13 @@ module PDF
|
|
173
176
|
# methods available on each page
|
174
177
|
#
|
175
178
|
def pages
|
176
|
-
(1..self.page_count).map
|
177
|
-
|
178
|
-
|
179
|
+
(1..self.page_count).map do |num|
|
180
|
+
begin
|
181
|
+
PDF::Reader::Page.new(@objects, num, :cache => @cache)
|
182
|
+
rescue InvalidPageError => ex
|
183
|
+
raise MalformedPDFError, "Missing data for page: #{num}"
|
184
|
+
end
|
185
|
+
end
|
179
186
|
end
|
180
187
|
|
181
188
|
# returns a single PDF::Reader::Page for the specified page.
|
@@ -193,7 +200,7 @@ module PDF
|
|
193
200
|
def page(num)
|
194
201
|
num = num.to_i
|
195
202
|
if num < 1 || num > self.page_count
|
196
|
-
raise
|
203
|
+
raise InvalidPageError, "Valid pages are 1 .. #{self.page_count}"
|
197
204
|
end
|
198
205
|
PDF::Reader::Page.new(@objects, num, :cache => @cache)
|
199
206
|
end
|
@@ -219,7 +226,7 @@ module PDF
|
|
219
226
|
pdfdoc_to_utf8(obj)
|
220
227
|
end
|
221
228
|
else
|
222
|
-
obj
|
229
|
+
@objects.deref(obj)
|
223
230
|
end
|
224
231
|
end
|
225
232
|
|
@@ -241,7 +248,13 @@ module PDF
|
|
241
248
|
end
|
242
249
|
|
243
250
|
def root
|
244
|
-
@root ||=
|
251
|
+
@root ||= begin
|
252
|
+
obj = @objects.deref(@objects.trailer[:Root])
|
253
|
+
unless obj.kind_of?(::Hash)
|
254
|
+
raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
|
255
|
+
end
|
256
|
+
obj
|
257
|
+
end
|
245
258
|
end
|
246
259
|
|
247
260
|
end
|
@@ -277,6 +290,7 @@ require 'pdf/reader/reference'
|
|
277
290
|
require 'pdf/reader/register_receiver'
|
278
291
|
require 'pdf/reader/null_security_handler'
|
279
292
|
require 'pdf/reader/standard_security_handler'
|
293
|
+
require 'pdf/reader/standard_security_handler_v5'
|
280
294
|
require 'pdf/reader/unimplemented_security_handler'
|
281
295
|
require 'pdf/reader/stream'
|
282
296
|
require 'pdf/reader/text_run'
|
data/lib/pdf/reader/encoding.rb
CHANGED
data/lib/pdf/reader/error.rb
CHANGED
@@ -52,6 +52,10 @@ class PDF::Reader
|
|
52
52
|
# the PDF spec and cannot be recovered
|
53
53
|
class MalformedPDFError < RuntimeError; end
|
54
54
|
|
55
|
+
################################################################################
|
56
|
+
# an exception that is raised when an invalid page number is used
|
57
|
+
class InvalidPageError < ArgumentError; end
|
58
|
+
|
55
59
|
################################################################################
|
56
60
|
# an exception that is raised when a PDF object appears to be invalid
|
57
61
|
class InvalidObjectError < MalformedPDFError; end
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -116,11 +116,10 @@ module PDF
|
|
116
116
|
result
|
117
117
|
end
|
118
118
|
|
119
|
-
private
|
120
|
-
|
121
119
|
def self.create_new_string(string_table,some_code, other_code)
|
122
120
|
string_table[some_code] + string_table[other_code][0].chr
|
123
121
|
end
|
122
|
+
private_class_method :create_new_string
|
124
123
|
|
125
124
|
end
|
126
125
|
end
|
@@ -300,7 +300,16 @@ class PDF::Reader
|
|
300
300
|
permissions: encrypt[:P].to_i,
|
301
301
|
encrypted_metadata: encmeta,
|
302
302
|
file_id: (deref(trailer[:ID]) || []).first,
|
303
|
-
password: opts[:password]
|
303
|
+
password: opts[:password],
|
304
|
+
cfm: encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
305
|
+
)
|
306
|
+
elsif StandardSecurityHandlerV5.supports?(encrypt)
|
307
|
+
StandardSecurityHandlerV5.new(
|
308
|
+
O: encrypt[:O],
|
309
|
+
U: encrypt[:U],
|
310
|
+
OE: encrypt[:OE],
|
311
|
+
UE: encrypt[:UE],
|
312
|
+
password: opts[:password]
|
304
313
|
)
|
305
314
|
else
|
306
315
|
UnimplementedSecurityHandler.new
|
@@ -341,6 +350,10 @@ class PDF::Reader
|
|
341
350
|
def get_page_objects(ref)
|
342
351
|
obj = deref(ref)
|
343
352
|
|
353
|
+
unless obj.kind_of?(::Hash)
|
354
|
+
raise MalformedPDFError, "Dereferenced page object must be a dict"
|
355
|
+
end
|
356
|
+
|
344
357
|
if obj[:Type] == :Page
|
345
358
|
ref
|
346
359
|
elsif obj[:Kids]
|
data/lib/pdf/reader/page.rb
CHANGED
@@ -30,8 +30,8 @@ class PDF::Reader
|
|
30
30
|
@runs.each do |run|
|
31
31
|
x_pos = ((run.x - @x_offset) / col_multiplier).round
|
32
32
|
y_pos = row_count - (run.y / row_multiplier).round
|
33
|
-
if y_pos
|
34
|
-
local_string_insert(page[y_pos], run.text, x_pos)
|
33
|
+
if y_pos <= row_count && y_pos >= 0 && x_pos <= col_count && x_pos >= 0
|
34
|
+
local_string_insert(page[y_pos-1], run.text, x_pos)
|
35
35
|
end
|
36
36
|
end
|
37
37
|
interesting_rows(page).map(&:rstrip).join("\n")
|
@@ -25,6 +25,7 @@
|
|
25
25
|
#
|
26
26
|
################################################################################
|
27
27
|
require 'digest/md5'
|
28
|
+
require 'openssl'
|
28
29
|
require 'rc4'
|
29
30
|
|
30
31
|
class PDF::Reader
|
@@ -54,6 +55,7 @@ class PDF::Reader
|
|
54
55
|
@encryptMeta = opts.fetch(:encrypted_metadata, true)
|
55
56
|
@file_id = opts[:file_id] || ""
|
56
57
|
@encrypt_key = build_standard_key(opts[:password] || "")
|
58
|
+
@cfm = opts[:cfm]
|
57
59
|
|
58
60
|
if @key_length != 5 && @key_length != 16
|
59
61
|
msg = "StandardSecurityHandler only supports 40 and 128 bit\
|
@@ -62,28 +64,40 @@ class PDF::Reader
|
|
62
64
|
end
|
63
65
|
end
|
64
66
|
|
65
|
-
# This handler supports all
|
66
|
-
# AES encryption that was added in later versions of the spec.
|
67
|
+
# This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
|
67
68
|
def self.supports?(encrypt)
|
68
69
|
return false if encrypt.nil?
|
69
70
|
|
70
71
|
filter = encrypt.fetch(:Filter, :Standard)
|
71
72
|
version = encrypt.fetch(:V, 0)
|
72
73
|
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
73
|
-
filter == :Standard &&
|
74
|
-
(version <= 3 || (version == 4 && algorithm
|
74
|
+
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
75
|
+
(version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
|
75
76
|
end
|
76
77
|
|
77
78
|
##7.6.2 General Encryption Algorithm
|
78
79
|
#
|
79
80
|
# Algorithm 1: Encryption of data using the RC4 or AES algorithms
|
80
81
|
#
|
81
|
-
# used to decrypt RC4 encrypted PDF streams (buf)
|
82
|
+
# used to decrypt RC4/AES encrypted PDF streams (buf)
|
82
83
|
#
|
83
84
|
# buf - a string to decrypt
|
84
85
|
# ref - a PDF::Reader::Reference for the object to decrypt
|
85
86
|
#
|
86
87
|
def decrypt( buf, ref )
|
88
|
+
case @cfm
|
89
|
+
when :AESV2
|
90
|
+
decrypt_aes128(buf, ref)
|
91
|
+
else
|
92
|
+
decrypt_rc4(buf, ref)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
private
|
97
|
+
|
98
|
+
# decrypt with RC4 algorithm
|
99
|
+
# version <=3 or (version == 4 and CFM == V2)
|
100
|
+
def decrypt_rc4( buf, ref )
|
87
101
|
objKey = @encrypt_key.dup
|
88
102
|
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
89
103
|
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
@@ -92,7 +106,20 @@ class PDF::Reader
|
|
92
106
|
rc4.decrypt(buf)
|
93
107
|
end
|
94
108
|
|
95
|
-
|
109
|
+
# decrypt with AES-128-CBC algorithm
|
110
|
+
# when (version == 4 and CFM == AESV2)
|
111
|
+
def decrypt_aes128( buf, ref )
|
112
|
+
objKey = @encrypt_key.dup
|
113
|
+
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
114
|
+
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
115
|
+
objKey << 'sAlT' # Algorithm 1, b)
|
116
|
+
length = objKey.length < 16 ? objKey.length : 16
|
117
|
+
cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
|
118
|
+
cipher.decrypt
|
119
|
+
cipher.key = Digest::MD5.digest(objKey)[0,length]
|
120
|
+
cipher.iv = buf[0..15]
|
121
|
+
cipher.update(buf[16..-1]) + cipher.final
|
122
|
+
end
|
96
123
|
|
97
124
|
# Pads supplied password to 32bytes using PassPadBytes as specified on
|
98
125
|
# pp61 of spec
|
@@ -125,9 +152,9 @@ class PDF::Reader
|
|
125
152
|
if @revision > 2 then
|
126
153
|
50.times { md5 = Digest::MD5.digest(md5) }
|
127
154
|
keyBegins = md5[0, key_length]
|
128
|
-
#first
|
155
|
+
#first iteration decrypt owner_key
|
129
156
|
out = @owner_key
|
130
|
-
#RC4 keyed with (keyBegins XOR with
|
157
|
+
#RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
|
131
158
|
19.downto(0).each { |i| out=RC4.new(xor_each_byte(keyBegins,i)).decrypt(out) }
|
132
159
|
else
|
133
160
|
out = RC4.new( md5[0, 5] ).decrypt( @owner_key )
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'digest'
|
3
|
+
require 'openssl'
|
4
|
+
|
5
|
+
class PDF::Reader
|
6
|
+
|
7
|
+
# class creates interface to encrypt dictionary for use in Decrypt
|
8
|
+
class StandardSecurityHandlerV5
|
9
|
+
|
10
|
+
attr_reader :key_length, :encrypt_key
|
11
|
+
|
12
|
+
def initialize(opts = {})
|
13
|
+
@key_length = 256
|
14
|
+
@O = opts[:O] # hash(32B) + validation salt(8B) + key salt(8B)
|
15
|
+
@U = opts[:U] # hash(32B) + validation salt(8B) + key salt(8B)
|
16
|
+
@OE = opts[:OE] # decryption key, encrypted w/ owner password
|
17
|
+
@UE = opts[:UE] # decryption key, encrypted w/ user password
|
18
|
+
@encrypt_key = build_standard_key(opts[:password] || '')
|
19
|
+
end
|
20
|
+
|
21
|
+
# This handler supports AES-256 encryption defined in PDF 1.7 Extension Level 3
|
22
|
+
def self.supports?(encrypt)
|
23
|
+
return false if encrypt.nil?
|
24
|
+
|
25
|
+
filter = encrypt.fetch(:Filter, :Standard)
|
26
|
+
version = encrypt.fetch(:V, 0)
|
27
|
+
revision = encrypt.fetch(:R, 0)
|
28
|
+
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
29
|
+
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
30
|
+
((version == 5) && (revision == 5) && (algorithm == :AESV3))
|
31
|
+
end
|
32
|
+
|
33
|
+
##7.6.2 General Encryption Algorithm
|
34
|
+
#
|
35
|
+
# Algorithm 1: Encryption of data using the RC4 or AES algorithms
|
36
|
+
#
|
37
|
+
# used to decrypt RC4/AES encrypted PDF streams (buf)
|
38
|
+
#
|
39
|
+
# buf - a string to decrypt
|
40
|
+
# ref - a PDF::Reader::Reference for the object to decrypt
|
41
|
+
#
|
42
|
+
def decrypt( buf, ref )
|
43
|
+
cipher = OpenSSL::Cipher.new("AES-#{@key_length}-CBC")
|
44
|
+
cipher.decrypt
|
45
|
+
cipher.key = @encrypt_key.dup
|
46
|
+
cipher.iv = buf[0..15]
|
47
|
+
cipher.update(buf[16..-1]) + cipher.final
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
# Algorithm 3.2a - Computing an encryption key
|
52
|
+
#
|
53
|
+
# Defined in PDF 1.7 Extension Level 3
|
54
|
+
#
|
55
|
+
# if the string is a valid user/owner password, this will return the decryption key
|
56
|
+
#
|
57
|
+
def auth_owner_pass(password)
|
58
|
+
if Digest::SHA256.digest(password + @O[32..39] + @U) == @O[0..31]
|
59
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
60
|
+
cipher.decrypt
|
61
|
+
cipher.key = Digest::SHA256.digest(password + @O[40..-1] + @U)
|
62
|
+
cipher.iv = "\x00" * 16
|
63
|
+
cipher.padding = 0
|
64
|
+
cipher.update(@OE) + cipher.final
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def auth_user_pass(password)
|
69
|
+
if Digest::SHA256.digest(password + @U[32..39]) == @U[0..31]
|
70
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
71
|
+
cipher.decrypt
|
72
|
+
cipher.key = Digest::SHA256.digest(password + @U[40..-1])
|
73
|
+
cipher.iv = "\x00" * 16
|
74
|
+
cipher.padding = 0
|
75
|
+
cipher.update(@UE) + cipher.final
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
def build_standard_key(pass)
|
80
|
+
pass = pass.byteslice(0...127) # UTF-8 encoded password. first 127 bytes
|
81
|
+
|
82
|
+
encrypt_key = auth_owner_pass(pass)
|
83
|
+
encrypt_key ||= auth_user_pass(pass)
|
84
|
+
|
85
|
+
raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
|
86
|
+
encrypt_key
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-02-15 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -257,6 +257,7 @@ files:
|
|
257
257
|
- lib/pdf/reader/register_receiver.rb
|
258
258
|
- lib/pdf/reader/resource_methods.rb
|
259
259
|
- lib/pdf/reader/standard_security_handler.rb
|
260
|
+
- lib/pdf/reader/standard_security_handler_v5.rb
|
260
261
|
- lib/pdf/reader/stream.rb
|
261
262
|
- lib/pdf/reader/synchronized_cache.rb
|
262
263
|
- lib/pdf/reader/text_run.rb
|
@@ -295,7 +296,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
295
296
|
version: '0'
|
296
297
|
requirements: []
|
297
298
|
rubyforge_project:
|
298
|
-
rubygems_version: 2.
|
299
|
+
rubygems_version: 2.7.3
|
299
300
|
signing_key:
|
300
301
|
specification_version: 4
|
301
302
|
summary: A library for accessing the content of PDF files
|