pdf-reader 2.5.0 → 2.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +42 -0
  3. data/README.md +16 -1
  4. data/Rakefile +1 -1
  5. data/examples/extract_fonts.rb +12 -7
  6. data/examples/rspec.rb +1 -0
  7. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  8. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  9. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  10. data/lib/pdf/reader/buffer.rb +90 -46
  11. data/lib/pdf/reader/cid_widths.rb +1 -0
  12. data/lib/pdf/reader/cmap.rb +65 -50
  13. data/lib/pdf/reader/encoding.rb +3 -2
  14. data/lib/pdf/reader/error.rb +19 -3
  15. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  16. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  17. data/lib/pdf/reader/filter/depredict.rb +11 -9
  18. data/lib/pdf/reader/filter/flate.rb +4 -2
  19. data/lib/pdf/reader/filter/lzw.rb +2 -0
  20. data/lib/pdf/reader/filter/null.rb +1 -1
  21. data/lib/pdf/reader/filter/run_length.rb +19 -13
  22. data/lib/pdf/reader/filter.rb +2 -1
  23. data/lib/pdf/reader/font.rb +72 -16
  24. data/lib/pdf/reader/font_descriptor.rb +19 -17
  25. data/lib/pdf/reader/form_xobject.rb +15 -5
  26. data/lib/pdf/reader/glyph_hash.rb +16 -9
  27. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  28. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  29. data/lib/pdf/reader/lzw.rb +4 -2
  30. data/lib/pdf/reader/null_security_handler.rb +1 -4
  31. data/lib/pdf/reader/object_cache.rb +1 -0
  32. data/lib/pdf/reader/object_hash.rb +252 -44
  33. data/lib/pdf/reader/object_stream.rb +1 -0
  34. data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
  35. data/lib/pdf/reader/page.rb +99 -19
  36. data/lib/pdf/reader/page_layout.rb +36 -37
  37. data/lib/pdf/reader/page_state.rb +12 -11
  38. data/lib/pdf/reader/page_text_receiver.rb +57 -10
  39. data/lib/pdf/reader/pages_strategy.rb +1 -0
  40. data/lib/pdf/reader/parser.rb +23 -12
  41. data/lib/pdf/reader/point.rb +25 -0
  42. data/lib/pdf/reader/print_receiver.rb +1 -0
  43. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  44. data/lib/pdf/reader/rectangle.rb +113 -0
  45. data/lib/pdf/reader/reference.rb +1 -0
  46. data/lib/pdf/reader/register_receiver.rb +1 -0
  47. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
  48. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  49. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  50. data/lib/pdf/reader/stream.rb +2 -1
  51. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  52. data/lib/pdf/reader/text_run.rb +14 -6
  53. data/lib/pdf/reader/token.rb +1 -0
  54. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  55. data/lib/pdf/reader/type_check.rb +52 -0
  56. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  57. data/lib/pdf/reader/validating_receiver.rb +262 -0
  58. data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
  59. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  60. data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
  61. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  62. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  63. data/lib/pdf/reader/width_calculator.rb +1 -0
  64. data/lib/pdf/reader/xref.rb +27 -4
  65. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  66. data/lib/pdf/reader.rb +46 -15
  67. data/lib/pdf-reader.rb +1 -0
  68. data/rbi/pdf-reader.rbi +1978 -0
  69. metadata +21 -10
  70. data/lib/pdf/reader/orientation_detector.rb +0 -34
  71. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -0,0 +1,245 @@
1
+ # -----------------------------------------------------------
2
+ # Copyright 2002-2019 Adobe (http://www.adobe.com/).
3
+ #
4
+ # Redistribution and use in source and binary forms, with or
5
+ # without modification, are permitted provided that the
6
+ # following conditions are met:
7
+ #
8
+ # Redistributions of source code must retain the above
9
+ # copyright notice, this list of conditions and the following
10
+ # disclaimer.
11
+ #
12
+ # Redistributions in binary form must reproduce the above
13
+ # copyright notice, this list of conditions and the following
14
+ # disclaimer in the documentation and/or other materials
15
+ # provided with the distribution.
16
+ #
17
+ # Neither the name of Adobe nor the names of its contributors
18
+ # may be used to endorse or promote products derived from this
19
+ # software without specific prior written permission.
20
+ #
21
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
22
+ # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
23
+ # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
26
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28
+ # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29
+ # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
+ # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32
+ # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
+ # -----------------------------------------------------------
35
+ # Name: ITC Zapf Dingbats Glyph List
36
+ # Table version: 2.0
37
+ # Date: September 20, 2002
38
+ # URL: https://github.com/adobe-type-tools/agl-aglfn
39
+ #
40
+ # Format: two semicolon-delimited fields:
41
+ # (1) glyph name--upper/lowercase letters and digits
42
+ # (2) Unicode scalar value--four uppercase hexadecimal digits
43
+ #
44
+ a100;275E
45
+ a101;2761
46
+ a102;2762
47
+ a103;2763
48
+ a104;2764
49
+ a105;2710
50
+ a106;2765
51
+ a107;2766
52
+ a108;2767
53
+ a109;2660
54
+ a10;2721
55
+ a110;2665
56
+ a111;2666
57
+ a112;2663
58
+ a117;2709
59
+ a118;2708
60
+ a119;2707
61
+ a11;261B
62
+ a120;2460
63
+ a121;2461
64
+ a122;2462
65
+ a123;2463
66
+ a124;2464
67
+ a125;2465
68
+ a126;2466
69
+ a127;2467
70
+ a128;2468
71
+ a129;2469
72
+ a12;261E
73
+ a130;2776
74
+ a131;2777
75
+ a132;2778
76
+ a133;2779
77
+ a134;277A
78
+ a135;277B
79
+ a136;277C
80
+ a137;277D
81
+ a138;277E
82
+ a139;277F
83
+ a13;270C
84
+ a140;2780
85
+ a141;2781
86
+ a142;2782
87
+ a143;2783
88
+ a144;2784
89
+ a145;2785
90
+ a146;2786
91
+ a147;2787
92
+ a148;2788
93
+ a149;2789
94
+ a14;270D
95
+ a150;278A
96
+ a151;278B
97
+ a152;278C
98
+ a153;278D
99
+ a154;278E
100
+ a155;278F
101
+ a156;2790
102
+ a157;2791
103
+ a158;2792
104
+ a159;2793
105
+ a15;270E
106
+ a160;2794
107
+ a161;2192
108
+ a162;27A3
109
+ a163;2194
110
+ a164;2195
111
+ a165;2799
112
+ a166;279B
113
+ a167;279C
114
+ a168;279D
115
+ a169;279E
116
+ a16;270F
117
+ a170;279F
118
+ a171;27A0
119
+ a172;27A1
120
+ a173;27A2
121
+ a174;27A4
122
+ a175;27A5
123
+ a176;27A6
124
+ a177;27A7
125
+ a178;27A8
126
+ a179;27A9
127
+ a17;2711
128
+ a180;27AB
129
+ a181;27AD
130
+ a182;27AF
131
+ a183;27B2
132
+ a184;27B3
133
+ a185;27B5
134
+ a186;27B8
135
+ a187;27BA
136
+ a188;27BB
137
+ a189;27BC
138
+ a18;2712
139
+ a190;27BD
140
+ a191;27BE
141
+ a192;279A
142
+ a193;27AA
143
+ a194;27B6
144
+ a195;27B9
145
+ a196;2798
146
+ a197;27B4
147
+ a198;27B7
148
+ a199;27AC
149
+ a19;2713
150
+ a1;2701
151
+ a200;27AE
152
+ a201;27B1
153
+ a202;2703
154
+ a203;2750
155
+ a204;2752
156
+ a205;276E
157
+ a206;2770
158
+ a20;2714
159
+ a21;2715
160
+ a22;2716
161
+ a23;2717
162
+ a24;2718
163
+ a25;2719
164
+ a26;271A
165
+ a27;271B
166
+ a28;271C
167
+ a29;2722
168
+ a2;2702
169
+ a30;2723
170
+ a31;2724
171
+ a32;2725
172
+ a33;2726
173
+ a34;2727
174
+ a35;2605
175
+ a36;2729
176
+ a37;272A
177
+ a38;272B
178
+ a39;272C
179
+ a3;2704
180
+ a40;272D
181
+ a41;272E
182
+ a42;272F
183
+ a43;2730
184
+ a44;2731
185
+ a45;2732
186
+ a46;2733
187
+ a47;2734
188
+ a48;2735
189
+ a49;2736
190
+ a4;260E
191
+ a50;2737
192
+ a51;2738
193
+ a52;2739
194
+ a53;273A
195
+ a54;273B
196
+ a55;273C
197
+ a56;273D
198
+ a57;273E
199
+ a58;273F
200
+ a59;2740
201
+ a5;2706
202
+ a60;2741
203
+ a61;2742
204
+ a62;2743
205
+ a63;2744
206
+ a64;2745
207
+ a65;2746
208
+ a66;2747
209
+ a67;2748
210
+ a68;2749
211
+ a69;274A
212
+ a6;271D
213
+ a70;274B
214
+ a71;25CF
215
+ a72;274D
216
+ a73;25A0
217
+ a74;274F
218
+ a75;2751
219
+ a76;25B2
220
+ a77;25BC
221
+ a78;25C6
222
+ a79;2756
223
+ a7;271E
224
+ a81;25D7
225
+ a82;2758
226
+ a83;2759
227
+ a84;275A
228
+ a85;276F
229
+ a86;2771
230
+ a87;2772
231
+ a88;2773
232
+ a89;2768
233
+ a8;271F
234
+ a90;2769
235
+ a91;276C
236
+ a92;276D
237
+ a93;276A
238
+ a94;276B
239
+ a95;2774
240
+ a96;2775
241
+ a97;275B
242
+ a98;275C
243
+ a99;275D
244
+ a9;2720
245
+ # END
@@ -0,0 +1,138 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest/md5'
6
+ require 'rc4'
7
+
8
+ class PDF::Reader
9
+
10
+ # Processes the Encrypt dict from an encrypted PDF and a user provided
11
+ # password and returns a key that can decrypt the file.
12
+ #
13
+ # This can generate a decryption key compatible with the following standard encryption algorithms:
14
+ #
15
+ # * Version 5 (AESV3)
16
+ #
17
+ class KeyBuilderV5
18
+
19
+ def initialize(opts = {})
20
+ @key_length = 256
21
+
22
+ # hash(32B) + validation salt(8B) + key salt(8B)
23
+ @owner_key = opts[:owner_key] || ""
24
+
25
+ # hash(32B) + validation salt(8B) + key salt(8B)
26
+ @user_key = opts[:user_key] || ""
27
+
28
+ # decryption key, encrypted w/ owner password
29
+ @owner_encryption_key = opts[:owner_encryption_key] || ""
30
+
31
+ # decryption key, encrypted w/ user password
32
+ @user_encryption_key = opts[:user_encryption_key] || ""
33
+ end
34
+
35
+ # Takes a string containing a user provided password.
36
+ #
37
+ # If the password matches the file, then a string containing a key suitable for
38
+ # decrypting the file will be returned. If the password doesn't match the file,
39
+ # and exception will be raised.
40
+ #
41
+ def key(pass)
42
+ pass = pass.byteslice(0...127).to_s # UTF-8 encoded password. first 127 bytes
43
+
44
+ encrypt_key = auth_owner_pass(pass)
45
+ encrypt_key ||= auth_user_pass(pass)
46
+ encrypt_key ||= auth_owner_pass_r6(pass)
47
+ encrypt_key ||= auth_user_pass_r6(pass)
48
+
49
+ raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
50
+ encrypt_key
51
+ end
52
+
53
+ private
54
+
55
+ # Algorithm 3.2a - Computing an encryption key
56
+ #
57
+ # Defined in PDF 1.7 Extension Level 3
58
+ #
59
+ # if the string is a valid user/owner password, this will return the decryption key
60
+ #
61
+ def auth_owner_pass(password)
62
+ if Digest::SHA256.digest(password + @owner_key[32..39] + @user_key) == @owner_key[0..31]
63
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
64
+ cipher.decrypt
65
+ cipher.key = Digest::SHA256.digest(password + @owner_key[40..-1] + @user_key)
66
+ cipher.iv = "\x00" * 16
67
+ cipher.padding = 0
68
+ cipher.update(@owner_encryption_key) + cipher.final
69
+ end
70
+ end
71
+
72
+ def auth_user_pass(password)
73
+ if Digest::SHA256.digest(password + @user_key[32..39]) == @user_key[0..31]
74
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
75
+ cipher.decrypt
76
+ cipher.key = Digest::SHA256.digest(password + @user_key[40..-1])
77
+ cipher.iv = "\x00" * 16
78
+ cipher.padding = 0
79
+ cipher.update(@user_encryption_key) + cipher.final
80
+ end
81
+ end
82
+
83
+ def auth_owner_pass_r6(password)
84
+ if r6_digest(password, @owner_key[32..39].to_s, @user_key[0,48].to_s) == @owner_key[0..31]
85
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
86
+ cipher.decrypt
87
+ cipher.key = r6_digest(password, @owner_key[40,8].to_s, @user_key[0, 48].to_s)
88
+ cipher.iv = "\x00" * 16
89
+ cipher.padding = 0
90
+ cipher.update(@owner_encryption_key) + cipher.final
91
+ end
92
+ end
93
+
94
+ def auth_user_pass_r6(password)
95
+ if r6_digest(password, @user_key[32..39].to_s) == @user_key[0..31]
96
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
97
+ cipher.decrypt
98
+ cipher.key = r6_digest(password, @user_key[40,8].to_s)
99
+ cipher.iv = "\x00" * 16
100
+ cipher.padding = 0
101
+ cipher.update(@user_encryption_key) + cipher.final
102
+ end
103
+ end
104
+
105
+ # PDF 2.0 spec, 7.6.4.3.4
106
+ # Algorithm 2.B: Computing a hash (revision 6 and later)
107
+ def r6_digest(password, salt, user_key = '')
108
+ k = Digest::SHA256.digest(password + salt + user_key)
109
+ e = ''
110
+
111
+ i = 0
112
+ while i < 64 or e.getbyte(-1).to_i > i - 32
113
+ k1 = (password + k + user_key) * 64
114
+
115
+ aes = OpenSSL::Cipher.new("aes-128-cbc").encrypt
116
+ aes.key = k[0, 16].to_s
117
+ aes.iv = k[16, 16].to_s
118
+ aes.padding = 0
119
+ e = String.new(aes.update(k1))
120
+ k = case unpack_128bit_bigendian_int(e) % 3
121
+ when 0 then Digest::SHA256.digest(e)
122
+ when 1 then Digest::SHA384.digest(e)
123
+ when 2 then Digest::SHA512.digest(e)
124
+ end
125
+ i = i + 1
126
+ end
127
+
128
+ k[0, 32].to_s
129
+ end
130
+
131
+ def unpack_128bit_bigendian_int(str)
132
+ ints = str[0,16].to_s.unpack("N*")
133
+ (ints[0].to_i << 96) + (ints[1].to_i << 64) + (ints[2].to_i << 32) + ints[3].to_i
134
+ end
135
+
136
+ end
137
+ end
138
+
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  module PDF
@@ -35,9 +36,9 @@ module PDF
35
36
 
36
37
  def read
37
38
  bits_left_in_chunk = @bits_in_chunk
38
- chunk = nil
39
+ chunk = -1
39
40
  while bits_left_in_chunk > 0 and @current_pos < @data.size
40
- chunk = 0 if chunk.nil?
41
+ chunk = 0 if chunk < 0
41
42
  codepoint = @data[@current_pos, 1].unpack("C*")[0]
42
43
  current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
43
44
  dif = bits_left_in_chunk - @bits_left_in_byte
@@ -83,6 +84,7 @@ module PDF
83
84
  #
84
85
  def self.decode(data)
85
86
  stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
87
+ string_table = StringTable.new
86
88
  result = "".dup
87
89
  until (code = stream.read) == CODE_EOD
88
90
  if code == CODE_CLEAR_TABLE
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -6,10 +7,6 @@ class PDF::Reader
6
7
  # A null object security handler. Used when a PDF is unencrypted.
7
8
  class NullSecurityHandler
8
9
 
9
- def self.supports?(encrypt)
10
- encrypt.nil?
11
- end
12
-
13
10
  def decrypt(buf, _ref)
14
11
  buf
15
12
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'hashery/lru_hash'