pdf-reader 2.5.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +42 -0
- data/README.md +16 -1
- data/Rakefile +1 -1
- data/examples/extract_fonts.rb +12 -7
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +90 -46
- data/lib/pdf/reader/cid_widths.rb +1 -0
- data/lib/pdf/reader/cmap.rb +65 -50
- data/lib/pdf/reader/encoding.rb +3 -2
- data/lib/pdf/reader/error.rb +19 -3
- data/lib/pdf/reader/filter/ascii85.rb +7 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +11 -9
- data/lib/pdf/reader/filter/flate.rb +4 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -1
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +2 -1
- data/lib/pdf/reader/font.rb +72 -16
- data/lib/pdf/reader/font_descriptor.rb +19 -17
- data/lib/pdf/reader/form_xobject.rb +15 -5
- data/lib/pdf/reader/glyph_hash.rb +16 -9
- data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/lzw.rb +4 -2
- data/lib/pdf/reader/null_security_handler.rb +1 -4
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +252 -44
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
- data/lib/pdf/reader/page.rb +99 -19
- data/lib/pdf/reader/page_layout.rb +36 -37
- data/lib/pdf/reader/page_state.rb +12 -11
- data/lib/pdf/reader/page_text_receiver.rb +57 -10
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +23 -12
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +1 -0
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
- data/lib/pdf/reader/stream.rb +2 -1
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +14 -6
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +1 -0
- data/lib/pdf/reader/type_check.rb +52 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
- data/lib/pdf/reader/width_calculator/composite.rb +1 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +27 -4
- data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
- data/lib/pdf/reader.rb +46 -15
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +1978 -0
- metadata +21 -10
- data/lib/pdf/reader/orientation_detector.rb +0 -34
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -0,0 +1,245 @@
|
|
1
|
+
# -----------------------------------------------------------
|
2
|
+
# Copyright 2002-2019 Adobe (http://www.adobe.com/).
|
3
|
+
#
|
4
|
+
# Redistribution and use in source and binary forms, with or
|
5
|
+
# without modification, are permitted provided that the
|
6
|
+
# following conditions are met:
|
7
|
+
#
|
8
|
+
# Redistributions of source code must retain the above
|
9
|
+
# copyright notice, this list of conditions and the following
|
10
|
+
# disclaimer.
|
11
|
+
#
|
12
|
+
# Redistributions in binary form must reproduce the above
|
13
|
+
# copyright notice, this list of conditions and the following
|
14
|
+
# disclaimer in the documentation and/or other materials
|
15
|
+
# provided with the distribution.
|
16
|
+
#
|
17
|
+
# Neither the name of Adobe nor the names of its contributors
|
18
|
+
# may be used to endorse or promote products derived from this
|
19
|
+
# software without specific prior written permission.
|
20
|
+
#
|
21
|
+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
22
|
+
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
23
|
+
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
24
|
+
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
25
|
+
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
|
26
|
+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
27
|
+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
28
|
+
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
29
|
+
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
30
|
+
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
31
|
+
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
32
|
+
# OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
33
|
+
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
34
|
+
# -----------------------------------------------------------
|
35
|
+
# Name: ITC Zapf Dingbats Glyph List
|
36
|
+
# Table version: 2.0
|
37
|
+
# Date: September 20, 2002
|
38
|
+
# URL: https://github.com/adobe-type-tools/agl-aglfn
|
39
|
+
#
|
40
|
+
# Format: two semicolon-delimited fields:
|
41
|
+
# (1) glyph name--upper/lowercase letters and digits
|
42
|
+
# (2) Unicode scalar value--four uppercase hexadecimal digits
|
43
|
+
#
|
44
|
+
a100;275E
|
45
|
+
a101;2761
|
46
|
+
a102;2762
|
47
|
+
a103;2763
|
48
|
+
a104;2764
|
49
|
+
a105;2710
|
50
|
+
a106;2765
|
51
|
+
a107;2766
|
52
|
+
a108;2767
|
53
|
+
a109;2660
|
54
|
+
a10;2721
|
55
|
+
a110;2665
|
56
|
+
a111;2666
|
57
|
+
a112;2663
|
58
|
+
a117;2709
|
59
|
+
a118;2708
|
60
|
+
a119;2707
|
61
|
+
a11;261B
|
62
|
+
a120;2460
|
63
|
+
a121;2461
|
64
|
+
a122;2462
|
65
|
+
a123;2463
|
66
|
+
a124;2464
|
67
|
+
a125;2465
|
68
|
+
a126;2466
|
69
|
+
a127;2467
|
70
|
+
a128;2468
|
71
|
+
a129;2469
|
72
|
+
a12;261E
|
73
|
+
a130;2776
|
74
|
+
a131;2777
|
75
|
+
a132;2778
|
76
|
+
a133;2779
|
77
|
+
a134;277A
|
78
|
+
a135;277B
|
79
|
+
a136;277C
|
80
|
+
a137;277D
|
81
|
+
a138;277E
|
82
|
+
a139;277F
|
83
|
+
a13;270C
|
84
|
+
a140;2780
|
85
|
+
a141;2781
|
86
|
+
a142;2782
|
87
|
+
a143;2783
|
88
|
+
a144;2784
|
89
|
+
a145;2785
|
90
|
+
a146;2786
|
91
|
+
a147;2787
|
92
|
+
a148;2788
|
93
|
+
a149;2789
|
94
|
+
a14;270D
|
95
|
+
a150;278A
|
96
|
+
a151;278B
|
97
|
+
a152;278C
|
98
|
+
a153;278D
|
99
|
+
a154;278E
|
100
|
+
a155;278F
|
101
|
+
a156;2790
|
102
|
+
a157;2791
|
103
|
+
a158;2792
|
104
|
+
a159;2793
|
105
|
+
a15;270E
|
106
|
+
a160;2794
|
107
|
+
a161;2192
|
108
|
+
a162;27A3
|
109
|
+
a163;2194
|
110
|
+
a164;2195
|
111
|
+
a165;2799
|
112
|
+
a166;279B
|
113
|
+
a167;279C
|
114
|
+
a168;279D
|
115
|
+
a169;279E
|
116
|
+
a16;270F
|
117
|
+
a170;279F
|
118
|
+
a171;27A0
|
119
|
+
a172;27A1
|
120
|
+
a173;27A2
|
121
|
+
a174;27A4
|
122
|
+
a175;27A5
|
123
|
+
a176;27A6
|
124
|
+
a177;27A7
|
125
|
+
a178;27A8
|
126
|
+
a179;27A9
|
127
|
+
a17;2711
|
128
|
+
a180;27AB
|
129
|
+
a181;27AD
|
130
|
+
a182;27AF
|
131
|
+
a183;27B2
|
132
|
+
a184;27B3
|
133
|
+
a185;27B5
|
134
|
+
a186;27B8
|
135
|
+
a187;27BA
|
136
|
+
a188;27BB
|
137
|
+
a189;27BC
|
138
|
+
a18;2712
|
139
|
+
a190;27BD
|
140
|
+
a191;27BE
|
141
|
+
a192;279A
|
142
|
+
a193;27AA
|
143
|
+
a194;27B6
|
144
|
+
a195;27B9
|
145
|
+
a196;2798
|
146
|
+
a197;27B4
|
147
|
+
a198;27B7
|
148
|
+
a199;27AC
|
149
|
+
a19;2713
|
150
|
+
a1;2701
|
151
|
+
a200;27AE
|
152
|
+
a201;27B1
|
153
|
+
a202;2703
|
154
|
+
a203;2750
|
155
|
+
a204;2752
|
156
|
+
a205;276E
|
157
|
+
a206;2770
|
158
|
+
a20;2714
|
159
|
+
a21;2715
|
160
|
+
a22;2716
|
161
|
+
a23;2717
|
162
|
+
a24;2718
|
163
|
+
a25;2719
|
164
|
+
a26;271A
|
165
|
+
a27;271B
|
166
|
+
a28;271C
|
167
|
+
a29;2722
|
168
|
+
a2;2702
|
169
|
+
a30;2723
|
170
|
+
a31;2724
|
171
|
+
a32;2725
|
172
|
+
a33;2726
|
173
|
+
a34;2727
|
174
|
+
a35;2605
|
175
|
+
a36;2729
|
176
|
+
a37;272A
|
177
|
+
a38;272B
|
178
|
+
a39;272C
|
179
|
+
a3;2704
|
180
|
+
a40;272D
|
181
|
+
a41;272E
|
182
|
+
a42;272F
|
183
|
+
a43;2730
|
184
|
+
a44;2731
|
185
|
+
a45;2732
|
186
|
+
a46;2733
|
187
|
+
a47;2734
|
188
|
+
a48;2735
|
189
|
+
a49;2736
|
190
|
+
a4;260E
|
191
|
+
a50;2737
|
192
|
+
a51;2738
|
193
|
+
a52;2739
|
194
|
+
a53;273A
|
195
|
+
a54;273B
|
196
|
+
a55;273C
|
197
|
+
a56;273D
|
198
|
+
a57;273E
|
199
|
+
a58;273F
|
200
|
+
a59;2740
|
201
|
+
a5;2706
|
202
|
+
a60;2741
|
203
|
+
a61;2742
|
204
|
+
a62;2743
|
205
|
+
a63;2744
|
206
|
+
a64;2745
|
207
|
+
a65;2746
|
208
|
+
a66;2747
|
209
|
+
a67;2748
|
210
|
+
a68;2749
|
211
|
+
a69;274A
|
212
|
+
a6;271D
|
213
|
+
a70;274B
|
214
|
+
a71;25CF
|
215
|
+
a72;274D
|
216
|
+
a73;25A0
|
217
|
+
a74;274F
|
218
|
+
a75;2751
|
219
|
+
a76;25B2
|
220
|
+
a77;25BC
|
221
|
+
a78;25C6
|
222
|
+
a79;2756
|
223
|
+
a7;271E
|
224
|
+
a81;25D7
|
225
|
+
a82;2758
|
226
|
+
a83;2759
|
227
|
+
a84;275A
|
228
|
+
a85;276F
|
229
|
+
a86;2771
|
230
|
+
a87;2772
|
231
|
+
a88;2773
|
232
|
+
a89;2768
|
233
|
+
a8;271F
|
234
|
+
a90;2769
|
235
|
+
a91;276C
|
236
|
+
a92;276D
|
237
|
+
a93;276A
|
238
|
+
a94;276B
|
239
|
+
a95;2774
|
240
|
+
a96;2775
|
241
|
+
a97;275B
|
242
|
+
a98;275C
|
243
|
+
a99;275D
|
244
|
+
a9;2720
|
245
|
+
# END
|
@@ -0,0 +1,138 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
require 'digest/md5'
|
6
|
+
require 'rc4'
|
7
|
+
|
8
|
+
class PDF::Reader
|
9
|
+
|
10
|
+
# Processes the Encrypt dict from an encrypted PDF and a user provided
|
11
|
+
# password and returns a key that can decrypt the file.
|
12
|
+
#
|
13
|
+
# This can generate a decryption key compatible with the following standard encryption algorithms:
|
14
|
+
#
|
15
|
+
# * Version 5 (AESV3)
|
16
|
+
#
|
17
|
+
class KeyBuilderV5
|
18
|
+
|
19
|
+
def initialize(opts = {})
|
20
|
+
@key_length = 256
|
21
|
+
|
22
|
+
# hash(32B) + validation salt(8B) + key salt(8B)
|
23
|
+
@owner_key = opts[:owner_key] || ""
|
24
|
+
|
25
|
+
# hash(32B) + validation salt(8B) + key salt(8B)
|
26
|
+
@user_key = opts[:user_key] || ""
|
27
|
+
|
28
|
+
# decryption key, encrypted w/ owner password
|
29
|
+
@owner_encryption_key = opts[:owner_encryption_key] || ""
|
30
|
+
|
31
|
+
# decryption key, encrypted w/ user password
|
32
|
+
@user_encryption_key = opts[:user_encryption_key] || ""
|
33
|
+
end
|
34
|
+
|
35
|
+
# Takes a string containing a user provided password.
|
36
|
+
#
|
37
|
+
# If the password matches the file, then a string containing a key suitable for
|
38
|
+
# decrypting the file will be returned. If the password doesn't match the file,
|
39
|
+
# and exception will be raised.
|
40
|
+
#
|
41
|
+
def key(pass)
|
42
|
+
pass = pass.byteslice(0...127).to_s # UTF-8 encoded password. first 127 bytes
|
43
|
+
|
44
|
+
encrypt_key = auth_owner_pass(pass)
|
45
|
+
encrypt_key ||= auth_user_pass(pass)
|
46
|
+
encrypt_key ||= auth_owner_pass_r6(pass)
|
47
|
+
encrypt_key ||= auth_user_pass_r6(pass)
|
48
|
+
|
49
|
+
raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
|
50
|
+
encrypt_key
|
51
|
+
end
|
52
|
+
|
53
|
+
private
|
54
|
+
|
55
|
+
# Algorithm 3.2a - Computing an encryption key
|
56
|
+
#
|
57
|
+
# Defined in PDF 1.7 Extension Level 3
|
58
|
+
#
|
59
|
+
# if the string is a valid user/owner password, this will return the decryption key
|
60
|
+
#
|
61
|
+
def auth_owner_pass(password)
|
62
|
+
if Digest::SHA256.digest(password + @owner_key[32..39] + @user_key) == @owner_key[0..31]
|
63
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
64
|
+
cipher.decrypt
|
65
|
+
cipher.key = Digest::SHA256.digest(password + @owner_key[40..-1] + @user_key)
|
66
|
+
cipher.iv = "\x00" * 16
|
67
|
+
cipher.padding = 0
|
68
|
+
cipher.update(@owner_encryption_key) + cipher.final
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def auth_user_pass(password)
|
73
|
+
if Digest::SHA256.digest(password + @user_key[32..39]) == @user_key[0..31]
|
74
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
75
|
+
cipher.decrypt
|
76
|
+
cipher.key = Digest::SHA256.digest(password + @user_key[40..-1])
|
77
|
+
cipher.iv = "\x00" * 16
|
78
|
+
cipher.padding = 0
|
79
|
+
cipher.update(@user_encryption_key) + cipher.final
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
def auth_owner_pass_r6(password)
|
84
|
+
if r6_digest(password, @owner_key[32..39].to_s, @user_key[0,48].to_s) == @owner_key[0..31]
|
85
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
86
|
+
cipher.decrypt
|
87
|
+
cipher.key = r6_digest(password, @owner_key[40,8].to_s, @user_key[0, 48].to_s)
|
88
|
+
cipher.iv = "\x00" * 16
|
89
|
+
cipher.padding = 0
|
90
|
+
cipher.update(@owner_encryption_key) + cipher.final
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def auth_user_pass_r6(password)
|
95
|
+
if r6_digest(password, @user_key[32..39].to_s) == @user_key[0..31]
|
96
|
+
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
97
|
+
cipher.decrypt
|
98
|
+
cipher.key = r6_digest(password, @user_key[40,8].to_s)
|
99
|
+
cipher.iv = "\x00" * 16
|
100
|
+
cipher.padding = 0
|
101
|
+
cipher.update(@user_encryption_key) + cipher.final
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
# PDF 2.0 spec, 7.6.4.3.4
|
106
|
+
# Algorithm 2.B: Computing a hash (revision 6 and later)
|
107
|
+
def r6_digest(password, salt, user_key = '')
|
108
|
+
k = Digest::SHA256.digest(password + salt + user_key)
|
109
|
+
e = ''
|
110
|
+
|
111
|
+
i = 0
|
112
|
+
while i < 64 or e.getbyte(-1).to_i > i - 32
|
113
|
+
k1 = (password + k + user_key) * 64
|
114
|
+
|
115
|
+
aes = OpenSSL::Cipher.new("aes-128-cbc").encrypt
|
116
|
+
aes.key = k[0, 16].to_s
|
117
|
+
aes.iv = k[16, 16].to_s
|
118
|
+
aes.padding = 0
|
119
|
+
e = String.new(aes.update(k1))
|
120
|
+
k = case unpack_128bit_bigendian_int(e) % 3
|
121
|
+
when 0 then Digest::SHA256.digest(e)
|
122
|
+
when 1 then Digest::SHA384.digest(e)
|
123
|
+
when 2 then Digest::SHA512.digest(e)
|
124
|
+
end
|
125
|
+
i = i + 1
|
126
|
+
end
|
127
|
+
|
128
|
+
k[0, 32].to_s
|
129
|
+
end
|
130
|
+
|
131
|
+
def unpack_128bit_bigendian_int(str)
|
132
|
+
ints = str[0,16].to_s.unpack("N*")
|
133
|
+
(ints[0].to_i << 96) + (ints[1].to_i << 64) + (ints[2].to_i << 32) + ints[3].to_i
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
end
|
138
|
+
|
data/lib/pdf/reader/lzw.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: true
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
module PDF
|
@@ -35,9 +36,9 @@ module PDF
|
|
35
36
|
|
36
37
|
def read
|
37
38
|
bits_left_in_chunk = @bits_in_chunk
|
38
|
-
chunk =
|
39
|
+
chunk = -1
|
39
40
|
while bits_left_in_chunk > 0 and @current_pos < @data.size
|
40
|
-
chunk = 0 if chunk
|
41
|
+
chunk = 0 if chunk < 0
|
41
42
|
codepoint = @data[@current_pos, 1].unpack("C*")[0]
|
42
43
|
current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
|
43
44
|
dif = bits_left_in_chunk - @bits_left_in_byte
|
@@ -83,6 +84,7 @@ module PDF
|
|
83
84
|
#
|
84
85
|
def self.decode(data)
|
85
86
|
stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
|
87
|
+
string_table = StringTable.new
|
86
88
|
result = "".dup
|
87
89
|
until (code = stream.read) == CODE_EOD
|
88
90
|
if code == CODE_CLEAR_TABLE
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
# frozen_string_literal: true
|
3
4
|
|
4
5
|
class PDF::Reader
|
@@ -6,10 +7,6 @@ class PDF::Reader
|
|
6
7
|
# A null object security handler. Used when a PDF is unencrypted.
|
7
8
|
class NullSecurityHandler
|
8
9
|
|
9
|
-
def self.supports?(encrypt)
|
10
|
-
encrypt.nil?
|
11
|
-
end
|
12
|
-
|
13
10
|
def decrypt(buf, _ref)
|
14
11
|
buf
|
15
12
|
end
|