pdf-reader 2.2.0 → 2.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +90 -0
- data/README.md +18 -3
- data/Rakefile +1 -1
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_text +1 -1
- data/examples/extract_fonts.rb +12 -7
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier.afm +342 -342
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -213
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +91 -47
- data/lib/pdf/reader/cid_widths.rb +7 -4
- data/lib/pdf/reader/cmap.rb +83 -59
- data/lib/pdf/reader/encoding.rb +17 -14
- data/lib/pdf/reader/error.rb +15 -3
- data/lib/pdf/reader/filter/ascii85.rb +7 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +12 -10
- data/lib/pdf/reader/filter/flate.rb +30 -16
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -1
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +11 -11
- data/lib/pdf/reader/font.rb +89 -26
- data/lib/pdf/reader/font_descriptor.rb +22 -18
- data/lib/pdf/reader/form_xobject.rb +18 -5
- data/lib/pdf/reader/glyph_hash.rb +28 -13
- data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/lzw.rb +28 -11
- data/lib/pdf/reader/no_text_filter.rb +14 -0
- data/lib/pdf/reader/null_security_handler.rb +1 -4
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +292 -63
- data/lib/pdf/reader/object_stream.rb +3 -2
- data/lib/pdf/reader/overlapping_runs_filter.rb +72 -0
- data/lib/pdf/reader/page.rb +143 -16
- data/lib/pdf/reader/page_layout.rb +43 -39
- data/lib/pdf/reader/page_state.rb +26 -17
- data/lib/pdf/reader/page_text_receiver.rb +74 -4
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +34 -14
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +3 -1
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +17 -9
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
- data/lib/pdf/reader/stream.rb +3 -2
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +40 -5
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +8 -7
- data/lib/pdf/reader/type_check.rb +98 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +27 -17
- data/lib/pdf/reader/width_calculator/composite.rb +6 -1
- data/lib/pdf/reader/width_calculator/true_type.rb +10 -11
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -4
- data/lib/pdf/reader/width_calculator/type_zero.rb +6 -2
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +37 -11
- data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
- data/lib/pdf/reader.rb +49 -24
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +2048 -0
- metadata +39 -23
- data/lib/pdf/hash.rb +0 -20
- data/lib/pdf/reader/orientation_detector.rb +0 -34
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
data/lib/pdf/reader/parser.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -79,8 +80,8 @@ class PDF::Reader
|
|
79
80
|
token
|
80
81
|
elsif operators.has_key? token
|
81
82
|
Token.new(token)
|
82
|
-
elsif token.
|
83
|
-
token
|
83
|
+
elsif token.frozen?
|
84
|
+
token
|
84
85
|
elsif token =~ /\d*\.\d/
|
85
86
|
token.to_f
|
86
87
|
else
|
@@ -95,14 +96,20 @@ class PDF::Reader
|
|
95
96
|
# id - the object ID to return
|
96
97
|
# gen - the object revision number to return
|
97
98
|
def object(id, gen)
|
98
|
-
|
99
|
+
idCheck = parse_token
|
100
|
+
|
101
|
+
# Sometimes the xref table is corrupt and points to an offset slightly too early in the file.
|
102
|
+
# check the next token, maybe we can find the start of the object we're looking for
|
103
|
+
if idCheck != id
|
104
|
+
Error.assert_equal(parse_token, id)
|
105
|
+
end
|
99
106
|
Error.assert_equal(parse_token, gen)
|
100
107
|
Error.str_assert(parse_token, "obj")
|
101
108
|
|
102
109
|
obj = parse_token
|
103
110
|
post_obj = parse_token
|
104
111
|
|
105
|
-
if post_obj == "stream"
|
112
|
+
if obj.is_a?(Hash) && post_obj == "stream"
|
106
113
|
stream(obj)
|
107
114
|
else
|
108
115
|
obj
|
@@ -120,7 +127,7 @@ class PDF::Reader
|
|
120
127
|
key = parse_token
|
121
128
|
break if key.kind_of?(Token) and key == ">>"
|
122
129
|
raise MalformedPDFError, "unterminated dict" if @buffer.empty?
|
123
|
-
|
130
|
+
PDF::Reader::Error.validate_type_as_malformed(key, "Dictionary key", Symbol)
|
124
131
|
|
125
132
|
value = parse_token
|
126
133
|
value.kind_of?(Token) and Error.str_assert_not(value, ">>")
|
@@ -166,7 +173,9 @@ class PDF::Reader
|
|
166
173
|
|
167
174
|
# add a missing digit if required, as required by the spec
|
168
175
|
str << "0" unless str.size % 2 == 0
|
169
|
-
str.
|
176
|
+
str.chars.each_slice(2).map { |nibbles|
|
177
|
+
nibbles.join("").hex.chr
|
178
|
+
}.join.force_encoding("binary")
|
170
179
|
end
|
171
180
|
################################################################################
|
172
181
|
# Reads a PDF String from the buffer and converts it to a Ruby String
|
@@ -175,15 +184,18 @@ class PDF::Reader
|
|
175
184
|
return "".dup.force_encoding("binary") if str == ")"
|
176
185
|
Error.assert_equal(parse_token, ")")
|
177
186
|
|
178
|
-
str.gsub!(/\\([nrtbf()\\\n]
|
179
|
-
|
187
|
+
str.gsub!(/\\(\r\n|[nrtbf()\\\n\r]|([0-7]{1,3}))?|\r\n?/m) do |match|
|
188
|
+
if $2.nil? # not octal digits
|
189
|
+
MAPPING[match] || "".dup
|
190
|
+
else # must be octal digits
|
191
|
+
($2.oct & 0xff).chr # ignore high level overflow
|
192
|
+
end
|
180
193
|
end
|
181
194
|
str.force_encoding("binary")
|
182
195
|
end
|
183
196
|
|
184
197
|
MAPPING = {
|
185
198
|
"\r" => "\n",
|
186
|
-
"\n\r" => "\n",
|
187
199
|
"\r\n" => "\n",
|
188
200
|
"\\n" => "\n",
|
189
201
|
"\\r" => "\r",
|
@@ -194,24 +206,32 @@ class PDF::Reader
|
|
194
206
|
"\\)" => ")",
|
195
207
|
"\\\\" => "\\",
|
196
208
|
"\\\n" => "",
|
209
|
+
"\\\r" => "",
|
210
|
+
"\\\r\n" => "",
|
197
211
|
}
|
198
|
-
0.upto(9) { |n| MAPPING["\\00"+n.to_s] = ("00"+n.to_s).oct.chr }
|
199
|
-
0.upto(99) { |n| MAPPING["\\0"+n.to_s] = ("0"+n.to_s).oct.chr }
|
200
|
-
0.upto(377) { |n| MAPPING["\\"+n.to_s] = n.to_s.oct.chr }
|
201
212
|
|
202
213
|
################################################################################
|
203
214
|
# Decodes the contents of a PDF Stream and returns it as a Ruby String.
|
204
215
|
def stream(dict)
|
205
216
|
raise MalformedPDFError, "PDF malformed, missing stream length" unless dict.has_key?(:Length)
|
206
217
|
if @objects
|
207
|
-
length = @objects.
|
218
|
+
length = @objects.deref_integer(dict[:Length])
|
219
|
+
if dict[:Filter]
|
220
|
+
dict[:Filter] = @objects.deref_name_or_array(dict[:Filter])
|
221
|
+
end
|
208
222
|
else
|
209
223
|
length = dict[:Length] || 0
|
210
224
|
end
|
225
|
+
|
226
|
+
PDF::Reader::Error.validate_type_as_malformed(length, "length", Numeric)
|
227
|
+
|
211
228
|
data = @buffer.read(length, :skip_eol => true)
|
212
229
|
|
213
230
|
Error.str_assert(parse_token, "endstream")
|
214
|
-
|
231
|
+
|
232
|
+
# We used to assert that the stream had the correct closing token, but it doesn't *really*
|
233
|
+
# matter if it's missing, and other readers seems to handle its absence just fine
|
234
|
+
# Error.str_assert(parse_token, "endobj")
|
215
235
|
|
216
236
|
PDF::Reader::Stream.new(dict, data)
|
217
237
|
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# PDFs are all about positioning content on a page, so there's lots of need to
|
9
|
+
# work with a set of X,Y coordinates.
|
10
|
+
#
|
11
|
+
class Point
|
12
|
+
|
13
|
+
attr_reader :x, :y
|
14
|
+
|
15
|
+
def initialize(x, y)
|
16
|
+
@x, @y = x, y
|
17
|
+
end
|
18
|
+
|
19
|
+
def ==(other)
|
20
|
+
other.respond_to?(:x) && other.respond_to?(:y) && x == other.x && y == other.y
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
require 'digest/md5'
|
6
|
+
require 'rc4'
|
7
|
+
|
8
|
+
class PDF::Reader
|
9
|
+
|
10
|
+
# Decrypts data using the RC4 algorithim defined in the PDF spec. Requires
|
11
|
+
# a decryption key, which is usually generated by PDF::Reader::StandardKeyBuilder
|
12
|
+
#
|
13
|
+
class Rc4SecurityHandler
|
14
|
+
|
15
|
+
def initialize(key)
|
16
|
+
@encrypt_key = key
|
17
|
+
end
|
18
|
+
|
19
|
+
##7.6.2 General Encryption Algorithm
|
20
|
+
#
|
21
|
+
# Algorithm 1: Encryption of data using the RC4 algorithm
|
22
|
+
#
|
23
|
+
# version <=3 or (version == 4 and CFM == V2)
|
24
|
+
#
|
25
|
+
# buf - a string to decrypt
|
26
|
+
# ref - a PDF::Reader::Reference for the object to decrypt
|
27
|
+
#
|
28
|
+
def decrypt( buf, ref )
|
29
|
+
objKey = @encrypt_key.dup
|
30
|
+
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
31
|
+
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
32
|
+
length = objKey.length < 16 ? objKey.length : 16
|
33
|
+
rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
|
34
|
+
rc4.decrypt(buf)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# PDFs represent rectangles all over the place. They're 4 element arrays, like this:
|
9
|
+
#
|
10
|
+
# [A, B, C, D]
|
11
|
+
#
|
12
|
+
# Four element arrays are yucky to work with though, so here's a class that's better.
|
13
|
+
# Initialize it with the 4 elements, and get utility functions (width, height, etc)
|
14
|
+
# for free.
|
15
|
+
#
|
16
|
+
# By convention the first two elements are x1, y1, the co-ords for the bottom left corner
|
17
|
+
# of the rectangle. The third and fourth elements are x2, y2, the co-ords for the top left
|
18
|
+
# corner of the rectangle. It's valid for the alternative corners to be used though, so
|
19
|
+
# we don't assume which is which.
|
20
|
+
#
|
21
|
+
class Rectangle
|
22
|
+
|
23
|
+
attr_reader :bottom_left, :bottom_right, :top_left, :top_right
|
24
|
+
|
25
|
+
def initialize(x1, y1, x2, y2)
|
26
|
+
set_corners(x1, y1, x2, y2)
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.from_array(arr)
|
30
|
+
if arr.size != 4
|
31
|
+
raise ArgumentError, "Only 4-element Arrays can be converted to a Rectangle"
|
32
|
+
end
|
33
|
+
|
34
|
+
PDF::Reader::Rectangle.new(
|
35
|
+
arr[0].to_f,
|
36
|
+
arr[1].to_f,
|
37
|
+
arr[2].to_f,
|
38
|
+
arr[3].to_f,
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
def ==(other)
|
43
|
+
to_a == other.to_a
|
44
|
+
end
|
45
|
+
|
46
|
+
def height
|
47
|
+
top_right.y - bottom_right.y
|
48
|
+
end
|
49
|
+
|
50
|
+
def width
|
51
|
+
bottom_right.x - bottom_left.x
|
52
|
+
end
|
53
|
+
|
54
|
+
def contains?(point)
|
55
|
+
point.x >= bottom_left.x && point.x <= top_right.x &&
|
56
|
+
point.y >= bottom_left.y && point.y <= top_right.y
|
57
|
+
end
|
58
|
+
|
59
|
+
# A pdf-style 4-number array
|
60
|
+
def to_a
|
61
|
+
[
|
62
|
+
bottom_left.x,
|
63
|
+
bottom_left.y,
|
64
|
+
top_right.x,
|
65
|
+
top_right.y,
|
66
|
+
]
|
67
|
+
end
|
68
|
+
|
69
|
+
def apply_rotation(degrees)
|
70
|
+
return if degrees != 90 && degrees != 180 && degrees != 270
|
71
|
+
|
72
|
+
if degrees == 90
|
73
|
+
new_x1 = bottom_left.x
|
74
|
+
new_y1 = bottom_left.y - width
|
75
|
+
new_x2 = bottom_left.x + height
|
76
|
+
new_y2 = bottom_left.y
|
77
|
+
elsif degrees == 180
|
78
|
+
new_x1 = bottom_left.x - width
|
79
|
+
new_y1 = bottom_left.y - height
|
80
|
+
new_x2 = bottom_left.x
|
81
|
+
new_y2 = bottom_left.y
|
82
|
+
elsif degrees == 270
|
83
|
+
new_x1 = bottom_left.x - height
|
84
|
+
new_y1 = bottom_left.y
|
85
|
+
new_x2 = bottom_left.x
|
86
|
+
new_y2 = bottom_left.y + width
|
87
|
+
end
|
88
|
+
set_corners(new_x1 || 0, new_y1 || 0, new_x2 || 0, new_y2 || 0)
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def set_corners(x1, y1, x2, y2)
|
94
|
+
@bottom_left = PDF::Reader::Point.new(
|
95
|
+
[x1, x2].min,
|
96
|
+
[y1, y2].min,
|
97
|
+
)
|
98
|
+
@bottom_right = PDF::Reader::Point.new(
|
99
|
+
[x1, x2].max,
|
100
|
+
[y1, y2].min,
|
101
|
+
)
|
102
|
+
@top_left = PDF::Reader::Point.new(
|
103
|
+
[x1, x2].min,
|
104
|
+
[y1, y2].max,
|
105
|
+
)
|
106
|
+
@top_right = PDF::Reader::Point.new(
|
107
|
+
[x1, x2].max,
|
108
|
+
[y1, y2].max,
|
109
|
+
)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
data/lib/pdf/reader/reference.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -30,7 +31,8 @@ class PDF::Reader
|
|
30
31
|
################################################################################
|
31
32
|
# An internal PDF::Reader class that represents an indirect reference to a PDF Object
|
32
33
|
class Reference
|
33
|
-
attr_reader :id
|
34
|
+
attr_reader :id
|
35
|
+
attr_reader :gen
|
34
36
|
################################################################################
|
35
37
|
# Create a new Reference to an object with the specified id and revision number
|
36
38
|
def initialize(id, gen)
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
module PDF
|
@@ -6,7 +7,13 @@ module PDF
|
|
6
7
|
|
7
8
|
# mixin for common methods in Page and FormXobjects
|
8
9
|
#
|
9
|
-
|
10
|
+
class Resources
|
11
|
+
|
12
|
+
def initialize(objects, resources)
|
13
|
+
@objects = objects
|
14
|
+
@resources = resources
|
15
|
+
end
|
16
|
+
|
10
17
|
# Returns a Hash of color spaces that are available to this page
|
11
18
|
#
|
12
19
|
# NOTE: this method de-serialise objects from the underlying PDF
|
@@ -14,7 +21,7 @@ module PDF
|
|
14
21
|
# of calling it over and over.
|
15
22
|
#
|
16
23
|
def color_spaces
|
17
|
-
@objects.
|
24
|
+
@objects.deref_hash!(@resources[:ColorSpace]) || {}
|
18
25
|
end
|
19
26
|
|
20
27
|
# Returns a Hash of fonts that are available to this page
|
@@ -24,7 +31,7 @@ module PDF
|
|
24
31
|
# of calling it over and over.
|
25
32
|
#
|
26
33
|
def fonts
|
27
|
-
@objects.
|
34
|
+
@objects.deref_hash!(@resources[:Font]) || {}
|
28
35
|
end
|
29
36
|
|
30
37
|
# Returns a Hash of external graphic states that are available to this
|
@@ -35,7 +42,7 @@ module PDF
|
|
35
42
|
# of calling it over and over.
|
36
43
|
#
|
37
44
|
def graphic_states
|
38
|
-
@objects.
|
45
|
+
@objects.deref_hash!(@resources[:ExtGState]) || {}
|
39
46
|
end
|
40
47
|
|
41
48
|
# Returns a Hash of patterns that are available to this page
|
@@ -45,7 +52,7 @@ module PDF
|
|
45
52
|
# of calling it over and over.
|
46
53
|
#
|
47
54
|
def patterns
|
48
|
-
@objects.
|
55
|
+
@objects.deref_hash!(@resources[:Pattern]) || {}
|
49
56
|
end
|
50
57
|
|
51
58
|
# Returns an Array of procedure sets that are available to this page
|
@@ -55,7 +62,7 @@ module PDF
|
|
55
62
|
# of calling it over and over.
|
56
63
|
#
|
57
64
|
def procedure_sets
|
58
|
-
@objects.
|
65
|
+
@objects.deref_array!(@resources[:ProcSet]) || []
|
59
66
|
end
|
60
67
|
|
61
68
|
# Returns a Hash of properties sets that are available to this page
|
@@ -65,7 +72,7 @@ module PDF
|
|
65
72
|
# of calling it over and over.
|
66
73
|
#
|
67
74
|
def properties
|
68
|
-
@objects.
|
75
|
+
@objects.deref_hash!(@resources[:Properties]) || {}
|
69
76
|
end
|
70
77
|
|
71
78
|
# Returns a Hash of shadings that are available to this page
|
@@ -75,7 +82,7 @@ module PDF
|
|
75
82
|
# of calling it over and over.
|
76
83
|
#
|
77
84
|
def shadings
|
78
|
-
@objects.
|
85
|
+
@objects.deref_hash!(@resources[:Shading]) || {}
|
79
86
|
end
|
80
87
|
|
81
88
|
# Returns a Hash of XObjects that are available to this page
|
@@ -85,7 +92,8 @@ module PDF
|
|
85
92
|
# of calling it over and over.
|
86
93
|
#
|
87
94
|
def xobjects
|
88
|
-
@objects.
|
95
|
+
dict = @objects.deref_hash!(@resources[:XObject]) || {}
|
96
|
+
TypeCheck.cast_to_pdf_dict_with_stream_values!(dict)
|
89
97
|
end
|
90
98
|
|
91
99
|
end
|
@@ -0,0 +1,79 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
class PDF::Reader
|
6
|
+
# Examines the Encrypt entry of a PDF trailer (if any) and returns an object that's
|
7
|
+
# able to decrypt the file.
|
8
|
+
class SecurityHandlerFactory
|
9
|
+
|
10
|
+
def self.build(encrypt, doc_id, password)
|
11
|
+
doc_id ||= []
|
12
|
+
password ||= ""
|
13
|
+
|
14
|
+
if encrypt.nil?
|
15
|
+
NullSecurityHandler.new
|
16
|
+
elsif standard?(encrypt)
|
17
|
+
build_standard_handler(encrypt, doc_id, password)
|
18
|
+
elsif standard_v5?(encrypt)
|
19
|
+
build_v5_handler(encrypt, doc_id, password)
|
20
|
+
else
|
21
|
+
UnimplementedSecurityHandler.new
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.build_standard_handler(encrypt, doc_id, password)
|
26
|
+
encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
|
27
|
+
key_builder = StandardKeyBuilder.new(
|
28
|
+
key_length: (encrypt[:Length] || 40).to_i,
|
29
|
+
revision: encrypt[:R],
|
30
|
+
owner_key: encrypt[:O],
|
31
|
+
user_key: encrypt[:U],
|
32
|
+
permissions: encrypt[:P].to_i,
|
33
|
+
encrypted_metadata: encmeta,
|
34
|
+
file_id: doc_id.first,
|
35
|
+
)
|
36
|
+
cfm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
37
|
+
if cfm == :AESV2
|
38
|
+
AesV2SecurityHandler.new(key_builder.key(password))
|
39
|
+
else
|
40
|
+
Rc4SecurityHandler.new(key_builder.key(password))
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.build_v5_handler(encrypt, doc_id, password)
|
45
|
+
key_builder = KeyBuilderV5.new(
|
46
|
+
owner_key: encrypt[:O],
|
47
|
+
user_key: encrypt[:U],
|
48
|
+
owner_encryption_key: encrypt[:OE],
|
49
|
+
user_encryption_key: encrypt[:UE],
|
50
|
+
)
|
51
|
+
AesV3SecurityHandler.new(key_builder.key(password))
|
52
|
+
end
|
53
|
+
|
54
|
+
# This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
|
55
|
+
def self.standard?(encrypt)
|
56
|
+
return false if encrypt.nil?
|
57
|
+
|
58
|
+
filter = encrypt.fetch(:Filter, :Standard)
|
59
|
+
version = encrypt.fetch(:V, 0)
|
60
|
+
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
61
|
+
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
62
|
+
(version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
|
63
|
+
end
|
64
|
+
|
65
|
+
# This handler supports both
|
66
|
+
# - AES-256 encryption defined in PDF 1.7 Extension Level 3 ('revision 5')
|
67
|
+
# - AES-256 encryption defined in PDF 2.0 ('revision 6')
|
68
|
+
def self.standard_v5?(encrypt)
|
69
|
+
return false if encrypt.nil?
|
70
|
+
|
71
|
+
filter = encrypt.fetch(:Filter, :Standard)
|
72
|
+
version = encrypt.fetch(:V, 0)
|
73
|
+
revision = encrypt.fetch(:R, 0)
|
74
|
+
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
75
|
+
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
76
|
+
((version == 5) && (revision == 5 || revision == 6) && (algorithm == :AESV3))
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
@@ -1,38 +1,19 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# frozen_string_literal: true
|
3
2
|
|
4
|
-
################################################################################
|
5
|
-
#
|
6
|
-
# Copyright (C) 2011 Evan J Brunner (ejbrun@appittome.com)
|
7
|
-
#
|
8
|
-
# Permission is hereby granted, free of charge, to any person obtaining
|
9
|
-
# a copy of this software and associated documentation files (the
|
10
|
-
# "Software"), to deal in the Software without restriction, including
|
11
|
-
# without limitation the rights to use, copy, modify, merge, publish,
|
12
|
-
# distribute, sublicense, and/or sell copies of the Software, and to
|
13
|
-
# permit persons to whom the Software is furnished to do so, subject to
|
14
|
-
# the following conditions:
|
15
|
-
#
|
16
|
-
# The above copyright notice and this permission notice shall be
|
17
|
-
# included in all copies or substantial portions of the Software.
|
18
|
-
#
|
19
|
-
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
20
|
-
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
21
|
-
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
22
|
-
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
23
|
-
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
24
|
-
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
25
|
-
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
26
|
-
#
|
27
|
-
################################################################################
|
28
3
|
require 'digest/md5'
|
29
|
-
require 'openssl'
|
30
4
|
require 'rc4'
|
31
5
|
|
32
6
|
class PDF::Reader
|
33
7
|
|
34
|
-
#
|
35
|
-
|
8
|
+
# Processes the Encrypt dict from an encrypted PDF and a user provided
|
9
|
+
# password and returns a key that can decrypt the file.
|
10
|
+
#
|
11
|
+
# This can generate a key compatible with the following standard encryption algorithms:
|
12
|
+
#
|
13
|
+
# * Version 1-3, all variants
|
14
|
+
# * Version 4, V2 (RC4) and AESV2
|
15
|
+
#
|
16
|
+
class StandardKeyBuilder
|
36
17
|
|
37
18
|
## 7.6.3.3 Encryption Key Algorithm (pp61)
|
38
19
|
#
|
@@ -44,9 +25,6 @@ class PDF::Reader
|
|
44
25
|
0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
|
45
26
|
0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]
|
46
27
|
|
47
|
-
attr_reader :key_length, :revision, :encrypt_key
|
48
|
-
attr_reader :owner_key, :user_key, :permissions, :file_id, :password
|
49
|
-
|
50
28
|
def initialize(opts = {})
|
51
29
|
@key_length = opts[:key_length].to_i/8
|
52
30
|
@revision = opts[:revision].to_i
|
@@ -55,72 +33,30 @@ class PDF::Reader
|
|
55
33
|
@permissions = opts[:permissions].to_i
|
56
34
|
@encryptMeta = opts.fetch(:encrypted_metadata, true)
|
57
35
|
@file_id = opts[:file_id] || ""
|
58
|
-
@encrypt_key = build_standard_key(opts[:password] || "")
|
59
|
-
@cfm = opts[:cfm]
|
60
36
|
|
61
37
|
if @key_length != 5 && @key_length != 16
|
62
|
-
msg = "
|
38
|
+
msg = "StandardKeyBuilder only supports 40 and 128 bit\
|
63
39
|
encryption (#{@key_length * 8}bit)"
|
64
|
-
raise
|
40
|
+
raise UnsupportedFeatureError, msg
|
65
41
|
end
|
66
42
|
end
|
67
43
|
|
68
|
-
#
|
69
|
-
def self.supports?(encrypt)
|
70
|
-
return false if encrypt.nil?
|
71
|
-
|
72
|
-
filter = encrypt.fetch(:Filter, :Standard)
|
73
|
-
version = encrypt.fetch(:V, 0)
|
74
|
-
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
75
|
-
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
76
|
-
(version <= 3 || (version == 4 && ((algorithm == :V2) || (algorithm == :AESV2))))
|
77
|
-
end
|
78
|
-
|
79
|
-
##7.6.2 General Encryption Algorithm
|
80
|
-
#
|
81
|
-
# Algorithm 1: Encryption of data using the RC4 or AES algorithms
|
82
|
-
#
|
83
|
-
# used to decrypt RC4/AES encrypted PDF streams (buf)
|
44
|
+
# Takes a string containing a user provided password.
|
84
45
|
#
|
85
|
-
#
|
86
|
-
#
|
46
|
+
# If the password matches the file, then a string containing a key suitable for
|
47
|
+
# decrypting the file will be returned. If the password doesn't match the file,
|
48
|
+
# and exception will be raised.
|
87
49
|
#
|
88
|
-
def
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
else
|
93
|
-
decrypt_rc4(buf, ref)
|
94
|
-
end
|
95
|
-
end
|
96
|
-
|
97
|
-
private
|
50
|
+
def key(pass)
|
51
|
+
pass ||= ""
|
52
|
+
encrypt_key = auth_owner_pass(pass)
|
53
|
+
encrypt_key ||= auth_user_pass(pass)
|
98
54
|
|
99
|
-
|
100
|
-
|
101
|
-
def decrypt_rc4( buf, ref )
|
102
|
-
objKey = @encrypt_key.dup
|
103
|
-
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
104
|
-
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
105
|
-
length = objKey.length < 16 ? objKey.length : 16
|
106
|
-
rc4 = RC4.new( Digest::MD5.digest(objKey)[0,length] )
|
107
|
-
rc4.decrypt(buf)
|
55
|
+
raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
|
56
|
+
encrypt_key
|
108
57
|
end
|
109
58
|
|
110
|
-
|
111
|
-
# when (version == 4 and CFM == AESV2)
|
112
|
-
def decrypt_aes128( buf, ref )
|
113
|
-
objKey = @encrypt_key.dup
|
114
|
-
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
115
|
-
(0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
|
116
|
-
objKey << 'sAlT' # Algorithm 1, b)
|
117
|
-
length = objKey.length < 16 ? objKey.length : 16
|
118
|
-
cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
|
119
|
-
cipher.decrypt
|
120
|
-
cipher.key = Digest::MD5.digest(objKey)[0,length]
|
121
|
-
cipher.iv = buf[0..15]
|
122
|
-
cipher.update(buf[16..-1]) + cipher.final
|
123
|
-
end
|
59
|
+
private
|
124
60
|
|
125
61
|
# Pads supplied password to 32bytes using PassPadBytes as specified on
|
126
62
|
# pp61 of spec
|
@@ -152,7 +88,7 @@ class PDF::Reader
|
|
152
88
|
md5 = Digest::MD5.digest(pad_pass(pass))
|
153
89
|
if @revision > 2 then
|
154
90
|
50.times { md5 = Digest::MD5.digest(md5) }
|
155
|
-
keyBegins = md5[0, key_length]
|
91
|
+
keyBegins = md5[0, @key_length]
|
156
92
|
#first iteration decrypt owner_key
|
157
93
|
out = @owner_key
|
158
94
|
#RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
|
@@ -217,12 +153,5 @@ class PDF::Reader
|
|
217
153
|
end
|
218
154
|
end
|
219
155
|
|
220
|
-
def build_standard_key(pass)
|
221
|
-
encrypt_key = auth_owner_pass(pass)
|
222
|
-
encrypt_key ||= auth_user_pass(pass)
|
223
|
-
|
224
|
-
raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
|
225
|
-
encrypt_key
|
226
|
-
end
|
227
156
|
end
|
228
157
|
end
|
data/lib/pdf/reader/stream.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
################################################################################
|
@@ -39,7 +40,7 @@ class PDF::Reader
|
|
39
40
|
# Creates a new stream with the specified dictionary and data. The dictionary
|
40
41
|
# should be a standard ruby hash, the data should be a standard ruby string.
|
41
42
|
def initialize(hash, data)
|
42
|
-
@hash = hash
|
43
|
+
@hash = TypeCheck.cast_to_pdf_dict!(hash)
|
43
44
|
@data = data
|
44
45
|
@udata = nil
|
45
46
|
end
|
@@ -61,7 +62,7 @@ class PDF::Reader
|
|
61
62
|
end
|
62
63
|
|
63
64
|
Array(hash[:Filter]).each_with_index do |filter, index|
|
64
|
-
@udata = Filter.with(filter, options[index]).filter(@udata)
|
65
|
+
@udata = Filter.with(filter, options[index] || {}).filter(@udata)
|
65
66
|
end
|
66
67
|
end
|
67
68
|
@udata
|