pdf-reader 1.1.1 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG +87 -2
- data/{README.rdoc → README.md} +43 -31
- data/Rakefile +21 -16
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_object +4 -1
- data/bin/pdf_text +1 -3
- data/examples/callbacks.rb +2 -1
- data/examples/extract_images.rb +11 -6
- data/examples/fuzzy_paragraphs.rb +24 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -0
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -0
- data/lib/pdf/reader/afm/Courier.afm +342 -0
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -0
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -0
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -0
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -0
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -0
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -0
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -0
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -0
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -0
- data/lib/pdf/reader/buffer.rb +90 -63
- data/lib/pdf/reader/cid_widths.rb +63 -0
- data/lib/pdf/reader/cmap.rb +69 -38
- data/lib/pdf/reader/encoding.rb +74 -48
- data/lib/pdf/reader/error.rb +24 -4
- data/lib/pdf/reader/filter/ascii85.rb +28 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +30 -0
- data/lib/pdf/reader/filter/depredict.rb +141 -0
- data/lib/pdf/reader/filter/flate.rb +53 -0
- data/lib/pdf/reader/filter/lzw.rb +21 -0
- data/lib/pdf/reader/filter/null.rb +18 -0
- data/lib/pdf/reader/filter/run_length.rb +45 -0
- data/lib/pdf/reader/filter.rb +15 -234
- data/lib/pdf/reader/font.rb +107 -43
- data/lib/pdf/reader/font_descriptor.rb +80 -0
- data/lib/pdf/reader/form_xobject.rb +26 -4
- data/lib/pdf/reader/glyph_hash.rb +56 -18
- data/lib/pdf/reader/lzw.rb +6 -4
- data/lib/pdf/reader/null_security_handler.rb +17 -0
- data/lib/pdf/reader/object_cache.rb +40 -16
- data/lib/pdf/reader/object_hash.rb +94 -40
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/orientation_detector.rb +34 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +65 -0
- data/lib/pdf/reader/page.rb +48 -3
- data/lib/pdf/reader/page_layout.rb +125 -0
- data/lib/pdf/reader/page_state.rb +185 -70
- data/lib/pdf/reader/page_text_receiver.rb +70 -20
- data/lib/pdf/reader/pages_strategy.rb +4 -293
- data/lib/pdf/reader/parser.rb +37 -61
- data/lib/pdf/reader/print_receiver.rb +6 -0
- data/lib/pdf/reader/reference.rb +4 -1
- data/lib/pdf/reader/register_receiver.rb +17 -31
- data/lib/pdf/reader/resource_methods.rb +1 -0
- data/lib/pdf/reader/standard_security_handler.rb +82 -42
- data/lib/pdf/reader/standard_security_handler_v5.rb +91 -0
- data/lib/pdf/reader/stream.rb +5 -2
- data/lib/pdf/reader/synchronized_cache.rb +33 -0
- data/lib/pdf/reader/text_run.rb +99 -0
- data/lib/pdf/reader/token.rb +4 -1
- data/lib/pdf/reader/transformation_matrix.rb +195 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +17 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +67 -0
- data/lib/pdf/reader/width_calculator/composite.rb +28 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +56 -0
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +33 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +25 -0
- data/lib/pdf/reader/width_calculator.rb +12 -0
- data/lib/pdf/reader/xref.rb +41 -9
- data/lib/pdf/reader.rb +45 -104
- data/lib/pdf-reader.rb +4 -1
- metadata +220 -101
- data/bin/pdf_list_callbacks +0 -17
- data/lib/pdf/hash.rb +0 -15
- data/lib/pdf/reader/abstract_strategy.rb +0 -81
- data/lib/pdf/reader/metadata_strategy.rb +0 -56
- data/lib/pdf/reader/text_receiver.rb +0 -264
data/lib/pdf/reader/parser.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
################################################################################
|
2
5
|
#
|
3
6
|
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
@@ -58,7 +61,7 @@ class PDF::Reader
|
|
58
61
|
#
|
59
62
|
# buffer - a PDF::Reader::Buffer object that contains PDF data
|
60
63
|
# objects - a PDF::Reader::ObjectHash object that can return objects from the PDF file
|
61
|
-
def initialize
|
64
|
+
def initialize(buffer, objects=nil)
|
62
65
|
@buffer = buffer
|
63
66
|
@objects = objects
|
64
67
|
end
|
@@ -67,15 +70,13 @@ class PDF::Reader
|
|
67
70
|
# object
|
68
71
|
#
|
69
72
|
# operators - a hash of supported operators to read from the underlying buffer.
|
70
|
-
def parse_token
|
73
|
+
def parse_token(operators={})
|
71
74
|
token = @buffer.token
|
72
75
|
|
73
76
|
if STRATEGIES.has_key? token
|
74
77
|
STRATEGIES[token].call(self, token)
|
75
78
|
elsif token.is_a? PDF::Reader::Reference
|
76
79
|
token
|
77
|
-
elsif token.is_a? Token
|
78
|
-
token
|
79
80
|
elsif operators.has_key? token
|
80
81
|
Token.new(token)
|
81
82
|
elsif token.respond_to?(:to_token)
|
@@ -93,13 +94,14 @@ class PDF::Reader
|
|
93
94
|
#
|
94
95
|
# id - the object ID to return
|
95
96
|
# gen - the object revision number to return
|
96
|
-
def object
|
97
|
+
def object(id, gen)
|
97
98
|
Error.assert_equal(parse_token, id)
|
98
99
|
Error.assert_equal(parse_token, gen)
|
99
100
|
Error.str_assert(parse_token, "obj")
|
100
101
|
|
101
102
|
obj = parse_token
|
102
103
|
post_obj = parse_token
|
104
|
+
|
103
105
|
if post_obj == "stream"
|
104
106
|
stream(obj)
|
105
107
|
else
|
@@ -117,6 +119,7 @@ class PDF::Reader
|
|
117
119
|
loop do
|
118
120
|
key = parse_token
|
119
121
|
break if key.kind_of?(Token) and key == ">>"
|
122
|
+
raise MalformedPDFError, "unterminated dict" if @buffer.empty?
|
120
123
|
raise MalformedPDFError, "Dictionary key (#{key.inspect}) is not a name" unless key.kind_of?(Symbol)
|
121
124
|
|
122
125
|
value = parse_token
|
@@ -130,8 +133,7 @@ class PDF::Reader
|
|
130
133
|
# reads a PDF name from the buffer and converts it to a Ruby Symbol
|
131
134
|
def pdf_name
|
132
135
|
tok = @buffer.token
|
133
|
-
tok =
|
134
|
-
tok.gsub!(/#([A-Fa-f0-9]{2})/) do |match|
|
136
|
+
tok = tok.dup.gsub(/#([A-Fa-f0-9]{2})/) do |match|
|
135
137
|
match[1, 2].hex.chr
|
136
138
|
end
|
137
139
|
tok.to_sym
|
@@ -144,6 +146,7 @@ class PDF::Reader
|
|
144
146
|
loop do
|
145
147
|
item = parse_token
|
146
148
|
break if item.kind_of?(Token) and item == "]"
|
149
|
+
raise MalformedPDFError, "unterminated array" if @buffer.empty?
|
147
150
|
a << item
|
148
151
|
end
|
149
152
|
|
@@ -152,80 +155,53 @@ class PDF::Reader
|
|
152
155
|
################################################################################
|
153
156
|
# Reads a PDF hex string from the buffer and converts it to a Ruby String
|
154
157
|
def hex_string
|
155
|
-
str = ""
|
158
|
+
str = "".dup
|
156
159
|
|
157
160
|
loop do
|
158
161
|
token = @buffer.token
|
159
162
|
break if token == ">"
|
163
|
+
raise MalformedPDFError, "unterminated hex string" if @buffer.empty?
|
160
164
|
str << token
|
161
165
|
end
|
162
166
|
|
163
167
|
# add a missing digit if required, as required by the spec
|
164
168
|
str << "0" unless str.size % 2 == 0
|
165
|
-
str.scan(/../).map {|i| i.hex.chr}.join
|
169
|
+
str.scan(/../).map {|i| i.hex.chr}.join.force_encoding("binary")
|
166
170
|
end
|
167
171
|
################################################################################
|
168
172
|
# Reads a PDF String from the buffer and converts it to a Ruby String
|
169
173
|
def string
|
170
174
|
str = @buffer.token
|
171
|
-
return "" if str == ")"
|
175
|
+
return "".dup.force_encoding("binary") if str == ")"
|
172
176
|
Error.assert_equal(parse_token, ")")
|
173
177
|
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
while idx < str.size
|
178
|
-
chr = str[idx,1]
|
179
|
-
jump = 1
|
180
|
-
|
181
|
-
if chr == "\\"
|
182
|
-
jump = 2
|
183
|
-
case str[idx+1, 1]
|
184
|
-
when "" then jump = 1
|
185
|
-
when "n" then chr = "\n"
|
186
|
-
when "r" then chr = "\r"
|
187
|
-
when "t" then chr = "\t"
|
188
|
-
when "b" then chr = "\b"
|
189
|
-
when "f" then chr = "\f"
|
190
|
-
when "(" then chr = "("
|
191
|
-
when ")" then chr = ")"
|
192
|
-
when "\\" then chr = "\\"
|
193
|
-
when "\n" then
|
194
|
-
chr = ""
|
195
|
-
jump = 2
|
196
|
-
else
|
197
|
-
if str[idx+1,3].match(/\d{3}/)
|
198
|
-
jump = 4
|
199
|
-
chr = str[idx+1,3].oct.chr
|
200
|
-
elsif str[idx+1,2].match(/\d{2}/)
|
201
|
-
jump = 3
|
202
|
-
chr = ("0"+str[idx+1,2]).oct.chr
|
203
|
-
elsif str[idx+1,1].match(/\d/)
|
204
|
-
jump = 2
|
205
|
-
chr = ("00"+str[idx+1,1]).oct.chr
|
206
|
-
else
|
207
|
-
jump = 1
|
208
|
-
chr = ""
|
209
|
-
end
|
210
|
-
|
211
|
-
end
|
212
|
-
elsif chr == "\r" && str[idx+1,1] == "\n"
|
213
|
-
chr = "\n"
|
214
|
-
jump = 2
|
215
|
-
elsif chr == "\n" && str[idx+1,1] == "\r"
|
216
|
-
chr = "\n"
|
217
|
-
jump = 2
|
218
|
-
elsif chr == "\r"
|
219
|
-
chr = "\n"
|
220
|
-
end
|
221
|
-
ret << chr
|
222
|
-
idx += jump
|
178
|
+
str.gsub!(/\\([nrtbf()\\\n]|\d{1,3})?|\r\n?|\n\r/m) do |match|
|
179
|
+
MAPPING[match] || "".dup
|
223
180
|
end
|
224
|
-
|
181
|
+
str.force_encoding("binary")
|
225
182
|
end
|
183
|
+
|
184
|
+
MAPPING = {
|
185
|
+
"\r" => "\n",
|
186
|
+
"\n\r" => "\n",
|
187
|
+
"\r\n" => "\n",
|
188
|
+
"\\n" => "\n",
|
189
|
+
"\\r" => "\r",
|
190
|
+
"\\t" => "\t",
|
191
|
+
"\\b" => "\b",
|
192
|
+
"\\f" => "\f",
|
193
|
+
"\\(" => "(",
|
194
|
+
"\\)" => ")",
|
195
|
+
"\\\\" => "\\",
|
196
|
+
"\\\n" => "",
|
197
|
+
}
|
198
|
+
0.upto(9) { |n| MAPPING["\\00"+n.to_s] = ("00"+n.to_s).oct.chr }
|
199
|
+
0.upto(99) { |n| MAPPING["\\0"+n.to_s] = ("0"+n.to_s).oct.chr }
|
200
|
+
0.upto(377) { |n| MAPPING["\\"+n.to_s] = n.to_s.oct.chr }
|
201
|
+
|
226
202
|
################################################################################
|
227
203
|
# Decodes the contents of a PDF Stream and returns it as a Ruby String.
|
228
|
-
def stream
|
204
|
+
def stream(dict)
|
229
205
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
|
230
206
|
if @objects
|
231
207
|
length = @objects.deref(dict[:Length])
|
data/lib/pdf/reader/reference.rb
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
################################################################################
|
2
5
|
#
|
3
6
|
# Copyright (C) 2006 Peter J Jones (pjones@pmade.com)
|
@@ -30,7 +33,7 @@ class PDF::Reader
|
|
30
33
|
attr_reader :id, :gen
|
31
34
|
################################################################################
|
32
35
|
# Create a new Reference to an object with the specified id and revision number
|
33
|
-
def initialize
|
36
|
+
def initialize(id, gen)
|
34
37
|
@id, @gen = id, gen
|
35
38
|
end
|
36
39
|
################################################################################
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
2
3
|
|
3
4
|
# Copyright (C) 2010 James Healy (jimmy@deefa.com)
|
4
5
|
|
@@ -11,10 +12,12 @@ class PDF::Reader
|
|
11
12
|
#
|
12
13
|
# Usage:
|
13
14
|
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
15
|
+
# PDF::Reader.open("somefile.pdf") do |reader|
|
16
|
+
# receiver = PDF::Reader::RegisterReceiver.new
|
17
|
+
# reader.page(1).walk(receiver)
|
18
|
+
# callback = receiver.first_occurance_of(:show_text)
|
19
|
+
# callback[:args].first.should == "Hellow World"
|
20
|
+
# end
|
18
21
|
#
|
19
22
|
class RegisterReceiver
|
20
23
|
|
@@ -34,18 +37,12 @@ class PDF::Reader
|
|
34
37
|
|
35
38
|
# count the number of times a callback fired
|
36
39
|
def count(methodname)
|
37
|
-
|
38
|
-
callbacks.each { |cb| counter += 1 if cb[:name] == methodname}
|
39
|
-
return counter
|
40
|
+
callbacks.count { |cb| cb[:name] == methodname}
|
40
41
|
end
|
41
42
|
|
42
43
|
# return the details for every time the specified callback was fired
|
43
44
|
def all(methodname)
|
44
|
-
|
45
|
-
callbacks.each do |cb|
|
46
|
-
ret << cb if cb[:name] == methodname
|
47
|
-
end
|
48
|
-
return ret
|
45
|
+
callbacks.select { |cb| cb[:name] == methodname }
|
49
46
|
end
|
50
47
|
|
51
48
|
def all_args(methodname)
|
@@ -54,42 +51,31 @@ class PDF::Reader
|
|
54
51
|
|
55
52
|
# return the details for the first time the specified callback was fired
|
56
53
|
def first_occurance_of(methodname)
|
57
|
-
callbacks.
|
58
|
-
return cb if cb[:name] == methodname
|
59
|
-
end
|
60
|
-
return nil
|
54
|
+
callbacks.find { |cb| cb[:name] == methodname }
|
61
55
|
end
|
62
56
|
|
63
57
|
# return the details for the final time the specified callback was fired
|
64
58
|
def final_occurance_of(methodname)
|
65
|
-
|
66
|
-
callbacks.each do |cb|
|
67
|
-
returnme = cb if cb[:name] == methodname
|
68
|
-
end
|
69
|
-
return returnme
|
59
|
+
all(methodname).last
|
70
60
|
end
|
71
61
|
|
72
62
|
# return the first occurance of a particular series of callbacks
|
73
63
|
def series(*methods)
|
74
64
|
return nil if methods.empty?
|
75
65
|
|
76
|
-
indexes = (0..(callbacks.size-1
|
66
|
+
indexes = (0..(callbacks.size-1))
|
77
67
|
method_indexes = (0..(methods.size-1))
|
78
|
-
match = nil
|
79
68
|
|
80
69
|
indexes.each do |idx|
|
81
70
|
count = methods.size
|
82
71
|
method_indexes.each do |midx|
|
83
|
-
count -= 1 if callbacks[idx+midx][:name] == methods[midx]
|
72
|
+
count -= 1 if callbacks[idx+midx] && callbacks[idx+midx][:name] == methods[midx]
|
73
|
+
end
|
74
|
+
if count == 0
|
75
|
+
return callbacks[idx, methods.size]
|
84
76
|
end
|
85
|
-
match = idx and break if count == 0
|
86
|
-
end
|
87
|
-
|
88
|
-
if match
|
89
|
-
return callbacks[match, methods.size]
|
90
|
-
else
|
91
|
-
return nil
|
92
77
|
end
|
78
|
+
nil
|
93
79
|
end
|
94
80
|
end
|
95
81
|
end
|
@@ -1,3 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
1
4
|
################################################################################
|
2
5
|
#
|
3
6
|
# Copyright (C) 2011 Evan J Brunner (ejbrun@appittome.com)
|
@@ -23,6 +26,7 @@
|
|
23
26
|
#
|
24
27
|
################################################################################
|
25
28
|
require 'digest/md5'
|
29
|
+
require 'openssl'
|
26
30
|
require 'rc4'
|
27
31
|
|
28
32
|
class PDF::Reader
|
@@ -40,51 +44,83 @@ class PDF::Reader
|
|
40
44
|
0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
|
41
45
|
0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]
|
42
46
|
|
43
|
-
attr_reader :
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
@
|
50
|
-
@
|
51
|
-
@
|
52
|
-
@
|
53
|
-
@
|
54
|
-
@
|
55
|
-
@
|
56
|
-
|
57
|
-
@
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
47
|
+
attr_reader :key_length, :revision, :encrypt_key
|
48
|
+
attr_reader :owner_key, :user_key, :permissions, :file_id, :password
|
49
|
+
|
50
|
+
def initialize(opts = {})
|
51
|
+
@key_length = opts[:key_length].to_i/8
|
52
|
+
@revision = opts[:revision].to_i
|
53
|
+
@owner_key = opts[:owner_key]
|
54
|
+
@user_key = opts[:user_key]
|
55
|
+
@permissions = opts[:permissions].to_i
|
56
|
+
@encryptMeta = opts.fetch(:encrypted_metadata, true)
|
57
|
+
@file_id = opts[:file_id] || ""
|
58
|
+
@encrypt_key = build_standard_key(opts[:password] || "")
|
59
|
+
@cfm = opts[:cfm]
|
60
|
+
|
61
|
+
if @key_length != 5 && @key_length != 16
|
62
|
+
msg = "StandardSecurityHandler only supports 40 and 128 bit\
|
63
|
+
encryption (#{@key_length * 8}bit)"
|
64
|
+
raise ArgumentError, msg
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
|
69
|
+
def self.supports?(encrypt)
|
70
|
+
return false if encrypt.nil?
|
71
|
+
|
72
|
+
filter = encrypt.fetch(:Filter, :Standard)
|
73
|
+
version = encrypt.fetch(:V, 0)
|
74
|
+
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
75
|
+
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
76
|
+
(version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
|
67
77
|
end
|
68
78
|
|
69
79
|
##7.6.2 General Encryption Algorithm
|
70
80
|
#
|
71
81
|
# Algorithm 1: Encryption of data using the RC4 or AES algorithms
|
72
82
|
#
|
73
|
-
# used to decrypt RC4 encrypted PDF streams (buf)
|
83
|
+
# used to decrypt RC4/AES encrypted PDF streams (buf)
|
74
84
|
#
|
75
85
|
# buf - a string to decrypt
|
76
86
|
# ref - a PDF::Reader::Reference for the object to decrypt
|
77
87
|
#
|
78
88
|
def decrypt( buf, ref )
|
89
|
+
case @cfm
|
90
|
+
when :AESV2
|
91
|
+
decrypt_aes128(buf, ref)
|
92
|
+
else
|
93
|
+
decrypt_rc4(buf, ref)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
private
|
98
|
+
|
99
|
+
# decrypt with RC4 algorithm
|
100
|
+
# version <=3 or (version == 4 and CFM == V2)
|
101
|
+
def decrypt_rc4( buf, ref )
|
79
102
|
objKey = @encrypt_key.dup
|
80
103
|
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
81
104
|
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
82
105
|
length = objKey.length < 16 ? objKey.length : 16
|
83
|
-
rc4 = RC4.new( Digest::MD5.digest(objKey)[
|
106
|
+
rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
|
84
107
|
rc4.decrypt(buf)
|
85
108
|
end
|
86
109
|
|
87
|
-
|
110
|
+
# decrypt with AES-128-CBC algorithm
|
111
|
+
# when (version == 4 and CFM == AESV2)
|
112
|
+
def decrypt_aes128( buf, ref )
|
113
|
+
objKey = @encrypt_key.dup
|
114
|
+
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
115
|
+
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
116
|
+
objKey << 'sAlT' # Algorithm 1, b)
|
117
|
+
length = objKey.length < 16 ? objKey.length : 16
|
118
|
+
cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
|
119
|
+
cipher.decrypt
|
120
|
+
cipher.key = Digest::MD5.digest(objKey)[0,length]
|
121
|
+
cipher.iv = buf[0..15]
|
122
|
+
cipher.update(buf[16..-1]) + cipher.final
|
123
|
+
end
|
88
124
|
|
89
125
|
# Pads supplied password to 32bytes using PassPadBytes as specified on
|
90
126
|
# pp61 of spec
|
@@ -92,7 +128,7 @@ class PDF::Reader
|
|
92
128
|
if p.nil? || p.empty?
|
93
129
|
PassPadBytes.pack('C*')
|
94
130
|
else
|
95
|
-
p[
|
131
|
+
p[0, 32] + PassPadBytes[0, 32-p.length].pack('C*')
|
96
132
|
end
|
97
133
|
end
|
98
134
|
|
@@ -116,13 +152,13 @@ class PDF::Reader
|
|
116
152
|
md5 = Digest::MD5.digest(pad_pass(pass))
|
117
153
|
if @revision > 2 then
|
118
154
|
50.times { md5 = Digest::MD5.digest(md5) }
|
119
|
-
keyBegins = md5[
|
120
|
-
#first
|
155
|
+
keyBegins = md5[0, key_length]
|
156
|
+
#first iteration decrypt owner_key
|
121
157
|
out = @owner_key
|
122
|
-
#RC4 keyed with (keyBegins XOR with
|
158
|
+
#RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
|
123
159
|
19.downto(0).each { |i| out=RC4.new(xor_each_byte(keyBegins,i)).decrypt(out) }
|
124
160
|
else
|
125
|
-
out = RC4.new( md5[
|
161
|
+
out = RC4.new( md5[0, 5] ).decrypt( @owner_key )
|
126
162
|
end
|
127
163
|
# c) check output as user password
|
128
164
|
auth_user_pass( out )
|
@@ -140,12 +176,12 @@ class PDF::Reader
|
|
140
176
|
#
|
141
177
|
def auth_user_pass(pass)
|
142
178
|
keyBegins = make_file_key(pass)
|
143
|
-
if @revision
|
179
|
+
if @revision >= 3
|
144
180
|
#initialize out for first iteration
|
145
181
|
out = Digest::MD5.digest(PassPadBytes.pack("C*") + @file_id)
|
146
182
|
#zero doesn't matter -> so from 0-19
|
147
|
-
20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).
|
148
|
-
pass = @user_key[
|
183
|
+
20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).encrypt(out) }
|
184
|
+
pass = @user_key[0, 16] == out
|
149
185
|
else
|
150
186
|
pass = RC4.new(keyBegins).encrypt(PassPadBytes.pack("C*")) == @user_key
|
151
187
|
end
|
@@ -161,20 +197,24 @@ class PDF::Reader
|
|
161
197
|
(0..24).step(8){|e| @buf << (@permissions >> e & 0xFF)}
|
162
198
|
# e) add the file ID
|
163
199
|
@buf << @file_id
|
164
|
-
# f) if revision
|
165
|
-
if @revision
|
166
|
-
@buf << [
|
200
|
+
# f) if revision >= 4 and metadata not encrypted then add 4 bytes of 0xFF
|
201
|
+
if @revision >= 4 && !@encryptMeta
|
202
|
+
@buf << [0xFF,0xFF,0xFF,0xFF].pack('C*')
|
167
203
|
end
|
168
204
|
# b) init MD5 digest + g) finish the hash
|
169
205
|
md5 = Digest::MD5.digest(@buf)
|
170
206
|
# h) spin hash 50 times
|
171
|
-
if @revision
|
207
|
+
if @revision >= 3
|
172
208
|
50.times {
|
173
|
-
md5 = Digest::MD5.digest(md5[
|
209
|
+
md5 = Digest::MD5.digest(md5[0, @key_length])
|
174
210
|
}
|
175
211
|
end
|
176
|
-
# i) n = key_length revision
|
177
|
-
|
212
|
+
# i) n = key_length revision >= 3, n = 5 revision == 2
|
213
|
+
if @revision < 3
|
214
|
+
md5[0, 5]
|
215
|
+
else
|
216
|
+
md5[0, @key_length]
|
217
|
+
end
|
178
218
|
end
|
179
219
|
|
180
220
|
def build_standard_key(pass)
|