pdf-reader 2.5.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +42 -0
  3. data/README.md +16 -1
  4. data/Rakefile +1 -1
  5. data/examples/extract_fonts.rb +12 -7
  6. data/examples/rspec.rb +1 -0
  7. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  8. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  9. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  10. data/lib/pdf/reader/buffer.rb +90 -46
  11. data/lib/pdf/reader/cid_widths.rb +1 -0
  12. data/lib/pdf/reader/cmap.rb +65 -50
  13. data/lib/pdf/reader/encoding.rb +3 -2
  14. data/lib/pdf/reader/error.rb +19 -3
  15. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  16. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  17. data/lib/pdf/reader/filter/depredict.rb +11 -9
  18. data/lib/pdf/reader/filter/flate.rb +4 -2
  19. data/lib/pdf/reader/filter/lzw.rb +2 -0
  20. data/lib/pdf/reader/filter/null.rb +1 -1
  21. data/lib/pdf/reader/filter/run_length.rb +19 -13
  22. data/lib/pdf/reader/filter.rb +2 -1
  23. data/lib/pdf/reader/font.rb +72 -16
  24. data/lib/pdf/reader/font_descriptor.rb +19 -17
  25. data/lib/pdf/reader/form_xobject.rb +15 -5
  26. data/lib/pdf/reader/glyph_hash.rb +16 -9
  27. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  28. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  29. data/lib/pdf/reader/lzw.rb +4 -2
  30. data/lib/pdf/reader/null_security_handler.rb +1 -4
  31. data/lib/pdf/reader/object_cache.rb +1 -0
  32. data/lib/pdf/reader/object_hash.rb +252 -44
  33. data/lib/pdf/reader/object_stream.rb +1 -0
  34. data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
  35. data/lib/pdf/reader/page.rb +99 -19
  36. data/lib/pdf/reader/page_layout.rb +36 -37
  37. data/lib/pdf/reader/page_state.rb +12 -11
  38. data/lib/pdf/reader/page_text_receiver.rb +57 -10
  39. data/lib/pdf/reader/pages_strategy.rb +1 -0
  40. data/lib/pdf/reader/parser.rb +23 -12
  41. data/lib/pdf/reader/point.rb +25 -0
  42. data/lib/pdf/reader/print_receiver.rb +1 -0
  43. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  44. data/lib/pdf/reader/rectangle.rb +113 -0
  45. data/lib/pdf/reader/reference.rb +1 -0
  46. data/lib/pdf/reader/register_receiver.rb +1 -0
  47. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
  48. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  49. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  50. data/lib/pdf/reader/stream.rb +2 -1
  51. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  52. data/lib/pdf/reader/text_run.rb +14 -6
  53. data/lib/pdf/reader/token.rb +1 -0
  54. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  55. data/lib/pdf/reader/type_check.rb +52 -0
  56. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  57. data/lib/pdf/reader/validating_receiver.rb +262 -0
  58. data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
  59. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  60. data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
  61. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  62. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  63. data/lib/pdf/reader/width_calculator.rb +1 -0
  64. data/lib/pdf/reader/xref.rb +27 -4
  65. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  66. data/lib/pdf/reader.rb +46 -15
  67. data/lib/pdf-reader.rb +1 -0
  68. data/rbi/pdf-reader.rbi +1978 -0
  69. metadata +21 -10
  70. data/lib/pdf/reader/orientation_detector.rb +0 -34
  71. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
@@ -0,0 +1,245 @@
1
+ # -----------------------------------------------------------
2
+ # Copyright 2002-2019 Adobe (http://www.adobe.com/).
3
+ #
4
+ # Redistribution and use in source and binary forms, with or
5
+ # without modification, are permitted provided that the
6
+ # following conditions are met:
7
+ #
8
+ # Redistributions of source code must retain the above
9
+ # copyright notice, this list of conditions and the following
10
+ # disclaimer.
11
+ #
12
+ # Redistributions in binary form must reproduce the above
13
+ # copyright notice, this list of conditions and the following
14
+ # disclaimer in the documentation and/or other materials
15
+ # provided with the distribution.
16
+ #
17
+ # Neither the name of Adobe nor the names of its contributors
18
+ # may be used to endorse or promote products derived from this
19
+ # software without specific prior written permission.
20
+ #
21
+ # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
22
+ # CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
23
+ # INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
24
+ # MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25
+ # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
26
+ # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
27
+ # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
28
+ # NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29
+ # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30
+ # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31
+ # CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
32
+ # OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
33
+ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
34
+ # -----------------------------------------------------------
35
+ # Name: ITC Zapf Dingbats Glyph List
36
+ # Table version: 2.0
37
+ # Date: September 20, 2002
38
+ # URL: https://github.com/adobe-type-tools/agl-aglfn
39
+ #
40
+ # Format: two semicolon-delimited fields:
41
+ # (1) glyph name--upper/lowercase letters and digits
42
+ # (2) Unicode scalar value--four uppercase hexadecimal digits
43
+ #
44
+ a100;275E
45
+ a101;2761
46
+ a102;2762
47
+ a103;2763
48
+ a104;2764
49
+ a105;2710
50
+ a106;2765
51
+ a107;2766
52
+ a108;2767
53
+ a109;2660
54
+ a10;2721
55
+ a110;2665
56
+ a111;2666
57
+ a112;2663
58
+ a117;2709
59
+ a118;2708
60
+ a119;2707
61
+ a11;261B
62
+ a120;2460
63
+ a121;2461
64
+ a122;2462
65
+ a123;2463
66
+ a124;2464
67
+ a125;2465
68
+ a126;2466
69
+ a127;2467
70
+ a128;2468
71
+ a129;2469
72
+ a12;261E
73
+ a130;2776
74
+ a131;2777
75
+ a132;2778
76
+ a133;2779
77
+ a134;277A
78
+ a135;277B
79
+ a136;277C
80
+ a137;277D
81
+ a138;277E
82
+ a139;277F
83
+ a13;270C
84
+ a140;2780
85
+ a141;2781
86
+ a142;2782
87
+ a143;2783
88
+ a144;2784
89
+ a145;2785
90
+ a146;2786
91
+ a147;2787
92
+ a148;2788
93
+ a149;2789
94
+ a14;270D
95
+ a150;278A
96
+ a151;278B
97
+ a152;278C
98
+ a153;278D
99
+ a154;278E
100
+ a155;278F
101
+ a156;2790
102
+ a157;2791
103
+ a158;2792
104
+ a159;2793
105
+ a15;270E
106
+ a160;2794
107
+ a161;2192
108
+ a162;27A3
109
+ a163;2194
110
+ a164;2195
111
+ a165;2799
112
+ a166;279B
113
+ a167;279C
114
+ a168;279D
115
+ a169;279E
116
+ a16;270F
117
+ a170;279F
118
+ a171;27A0
119
+ a172;27A1
120
+ a173;27A2
121
+ a174;27A4
122
+ a175;27A5
123
+ a176;27A6
124
+ a177;27A7
125
+ a178;27A8
126
+ a179;27A9
127
+ a17;2711
128
+ a180;27AB
129
+ a181;27AD
130
+ a182;27AF
131
+ a183;27B2
132
+ a184;27B3
133
+ a185;27B5
134
+ a186;27B8
135
+ a187;27BA
136
+ a188;27BB
137
+ a189;27BC
138
+ a18;2712
139
+ a190;27BD
140
+ a191;27BE
141
+ a192;279A
142
+ a193;27AA
143
+ a194;27B6
144
+ a195;27B9
145
+ a196;2798
146
+ a197;27B4
147
+ a198;27B7
148
+ a199;27AC
149
+ a19;2713
150
+ a1;2701
151
+ a200;27AE
152
+ a201;27B1
153
+ a202;2703
154
+ a203;2750
155
+ a204;2752
156
+ a205;276E
157
+ a206;2770
158
+ a20;2714
159
+ a21;2715
160
+ a22;2716
161
+ a23;2717
162
+ a24;2718
163
+ a25;2719
164
+ a26;271A
165
+ a27;271B
166
+ a28;271C
167
+ a29;2722
168
+ a2;2702
169
+ a30;2723
170
+ a31;2724
171
+ a32;2725
172
+ a33;2726
173
+ a34;2727
174
+ a35;2605
175
+ a36;2729
176
+ a37;272A
177
+ a38;272B
178
+ a39;272C
179
+ a3;2704
180
+ a40;272D
181
+ a41;272E
182
+ a42;272F
183
+ a43;2730
184
+ a44;2731
185
+ a45;2732
186
+ a46;2733
187
+ a47;2734
188
+ a48;2735
189
+ a49;2736
190
+ a4;260E
191
+ a50;2737
192
+ a51;2738
193
+ a52;2739
194
+ a53;273A
195
+ a54;273B
196
+ a55;273C
197
+ a56;273D
198
+ a57;273E
199
+ a58;273F
200
+ a59;2740
201
+ a5;2706
202
+ a60;2741
203
+ a61;2742
204
+ a62;2743
205
+ a63;2744
206
+ a64;2745
207
+ a65;2746
208
+ a66;2747
209
+ a67;2748
210
+ a68;2749
211
+ a69;274A
212
+ a6;271D
213
+ a70;274B
214
+ a71;25CF
215
+ a72;274D
216
+ a73;25A0
217
+ a74;274F
218
+ a75;2751
219
+ a76;25B2
220
+ a77;25BC
221
+ a78;25C6
222
+ a79;2756
223
+ a7;271E
224
+ a81;25D7
225
+ a82;2758
226
+ a83;2759
227
+ a84;275A
228
+ a85;276F
229
+ a86;2771
230
+ a87;2772
231
+ a88;2773
232
+ a89;2768
233
+ a8;271F
234
+ a90;2769
235
+ a91;276C
236
+ a92;276D
237
+ a93;276A
238
+ a94;276B
239
+ a95;2774
240
+ a96;2775
241
+ a97;275B
242
+ a98;275C
243
+ a99;275D
244
+ a9;2720
245
+ # END
@@ -0,0 +1,138 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest/md5'
6
+ require 'rc4'
7
+
8
+ class PDF::Reader
9
+
10
+ # Processes the Encrypt dict from an encrypted PDF and a user provided
11
+ # password and returns a key that can decrypt the file.
12
+ #
13
+ # This can generate a decryption key compatible with the following standard encryption algorithms:
14
+ #
15
+ # * Version 5 (AESV3)
16
+ #
17
+ class KeyBuilderV5
18
+
19
+ def initialize(opts = {})
20
+ @key_length = 256
21
+
22
+ # hash(32B) + validation salt(8B) + key salt(8B)
23
+ @owner_key = opts[:owner_key] || ""
24
+
25
+ # hash(32B) + validation salt(8B) + key salt(8B)
26
+ @user_key = opts[:user_key] || ""
27
+
28
+ # decryption key, encrypted w/ owner password
29
+ @owner_encryption_key = opts[:owner_encryption_key] || ""
30
+
31
+ # decryption key, encrypted w/ user password
32
+ @user_encryption_key = opts[:user_encryption_key] || ""
33
+ end
34
+
35
+ # Takes a string containing a user provided password.
36
+ #
37
+ # If the password matches the file, then a string containing a key suitable for
38
+ # decrypting the file will be returned. If the password doesn't match the file,
39
+ # and exception will be raised.
40
+ #
41
+ def key(pass)
42
+ pass = pass.byteslice(0...127).to_s # UTF-8 encoded password. first 127 bytes
43
+
44
+ encrypt_key = auth_owner_pass(pass)
45
+ encrypt_key ||= auth_user_pass(pass)
46
+ encrypt_key ||= auth_owner_pass_r6(pass)
47
+ encrypt_key ||= auth_user_pass_r6(pass)
48
+
49
+ raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
50
+ encrypt_key
51
+ end
52
+
53
+ private
54
+
55
+ # Algorithm 3.2a - Computing an encryption key
56
+ #
57
+ # Defined in PDF 1.7 Extension Level 3
58
+ #
59
+ # if the string is a valid user/owner password, this will return the decryption key
60
+ #
61
+ def auth_owner_pass(password)
62
+ if Digest::SHA256.digest(password + @owner_key[32..39] + @user_key) == @owner_key[0..31]
63
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
64
+ cipher.decrypt
65
+ cipher.key = Digest::SHA256.digest(password + @owner_key[40..-1] + @user_key)
66
+ cipher.iv = "\x00" * 16
67
+ cipher.padding = 0
68
+ cipher.update(@owner_encryption_key) + cipher.final
69
+ end
70
+ end
71
+
72
+ def auth_user_pass(password)
73
+ if Digest::SHA256.digest(password + @user_key[32..39]) == @user_key[0..31]
74
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
75
+ cipher.decrypt
76
+ cipher.key = Digest::SHA256.digest(password + @user_key[40..-1])
77
+ cipher.iv = "\x00" * 16
78
+ cipher.padding = 0
79
+ cipher.update(@user_encryption_key) + cipher.final
80
+ end
81
+ end
82
+
83
+ def auth_owner_pass_r6(password)
84
+ if r6_digest(password, @owner_key[32..39].to_s, @user_key[0,48].to_s) == @owner_key[0..31]
85
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
86
+ cipher.decrypt
87
+ cipher.key = r6_digest(password, @owner_key[40,8].to_s, @user_key[0, 48].to_s)
88
+ cipher.iv = "\x00" * 16
89
+ cipher.padding = 0
90
+ cipher.update(@owner_encryption_key) + cipher.final
91
+ end
92
+ end
93
+
94
+ def auth_user_pass_r6(password)
95
+ if r6_digest(password, @user_key[32..39].to_s) == @user_key[0..31]
96
+ cipher = OpenSSL::Cipher.new('AES-256-CBC')
97
+ cipher.decrypt
98
+ cipher.key = r6_digest(password, @user_key[40,8].to_s)
99
+ cipher.iv = "\x00" * 16
100
+ cipher.padding = 0
101
+ cipher.update(@user_encryption_key) + cipher.final
102
+ end
103
+ end
104
+
105
+ # PDF 2.0 spec, 7.6.4.3.4
106
+ # Algorithm 2.B: Computing a hash (revision 6 and later)
107
+ def r6_digest(password, salt, user_key = '')
108
+ k = Digest::SHA256.digest(password + salt + user_key)
109
+ e = ''
110
+
111
+ i = 0
112
+ while i < 64 or e.getbyte(-1).to_i > i - 32
113
+ k1 = (password + k + user_key) * 64
114
+
115
+ aes = OpenSSL::Cipher.new("aes-128-cbc").encrypt
116
+ aes.key = k[0, 16].to_s
117
+ aes.iv = k[16, 16].to_s
118
+ aes.padding = 0
119
+ e = String.new(aes.update(k1))
120
+ k = case unpack_128bit_bigendian_int(e) % 3
121
+ when 0 then Digest::SHA256.digest(e)
122
+ when 1 then Digest::SHA384.digest(e)
123
+ when 2 then Digest::SHA512.digest(e)
124
+ end
125
+ i = i + 1
126
+ end
127
+
128
+ k[0, 32].to_s
129
+ end
130
+
131
+ def unpack_128bit_bigendian_int(str)
132
+ ints = str[0,16].to_s.unpack("N*")
133
+ (ints[0].to_i << 96) + (ints[1].to_i << 64) + (ints[2].to_i << 32) + ints[3].to_i
134
+ end
135
+
136
+ end
137
+ end
138
+
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  module PDF
@@ -35,9 +36,9 @@ module PDF
35
36
 
36
37
  def read
37
38
  bits_left_in_chunk = @bits_in_chunk
38
- chunk = nil
39
+ chunk = -1
39
40
  while bits_left_in_chunk > 0 and @current_pos < @data.size
40
- chunk = 0 if chunk.nil?
41
+ chunk = 0 if chunk < 0
41
42
  codepoint = @data[@current_pos, 1].unpack("C*")[0]
42
43
  current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
43
44
  dif = bits_left_in_chunk - @bits_left_in_byte
@@ -83,6 +84,7 @@ module PDF
83
84
  #
84
85
  def self.decode(data)
85
86
  stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
87
+ string_table = StringTable.new
86
88
  result = "".dup
87
89
  until (code = stream.read) == CODE_EOD
88
90
  if code == CODE_CLEAR_TABLE
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: strict
2
3
  # frozen_string_literal: true
3
4
 
4
5
  class PDF::Reader
@@ -6,10 +7,6 @@ class PDF::Reader
6
7
  # A null object security handler. Used when a PDF is unencrypted.
7
8
  class NullSecurityHandler
8
9
 
9
- def self.supports?(encrypt)
10
- encrypt.nil?
11
- end
12
-
13
10
  def decrypt(buf, _ref)
14
11
  buf
15
12
  end
@@ -1,4 +1,5 @@
1
1
  # coding: utf-8
2
+ # typed: true
2
3
  # frozen_string_literal: true
3
4
 
4
5
  require 'hashery/lru_hash'