pdf-reader 2.6.0 → 2.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +30 -1
  3. data/Rakefile +1 -1
  4. data/examples/rspec.rb +1 -0
  5. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  6. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  7. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  8. data/lib/pdf/reader/buffer.rb +36 -33
  9. data/lib/pdf/reader/cid_widths.rb +1 -0
  10. data/lib/pdf/reader/cmap.rb +65 -50
  11. data/lib/pdf/reader/encoding.rb +2 -1
  12. data/lib/pdf/reader/error.rb +16 -0
  13. data/lib/pdf/reader/filter/ascii85.rb +2 -0
  14. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  15. data/lib/pdf/reader/filter/depredict.rb +8 -6
  16. data/lib/pdf/reader/filter/flate.rb +4 -2
  17. data/lib/pdf/reader/filter/lzw.rb +2 -0
  18. data/lib/pdf/reader/filter/null.rb +1 -1
  19. data/lib/pdf/reader/filter/run_length.rb +19 -13
  20. data/lib/pdf/reader/filter.rb +11 -11
  21. data/lib/pdf/reader/font.rb +72 -16
  22. data/lib/pdf/reader/font_descriptor.rb +19 -17
  23. data/lib/pdf/reader/form_xobject.rb +15 -5
  24. data/lib/pdf/reader/glyph_hash.rb +1 -0
  25. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  26. data/lib/pdf/reader/lzw.rb +4 -2
  27. data/lib/pdf/reader/null_security_handler.rb +1 -4
  28. data/lib/pdf/reader/object_cache.rb +1 -0
  29. data/lib/pdf/reader/object_hash.rb +252 -44
  30. data/lib/pdf/reader/object_stream.rb +1 -0
  31. data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
  32. data/lib/pdf/reader/page.rb +99 -19
  33. data/lib/pdf/reader/page_layout.rb +28 -32
  34. data/lib/pdf/reader/page_state.rb +12 -11
  35. data/lib/pdf/reader/page_text_receiver.rb +57 -10
  36. data/lib/pdf/reader/pages_strategy.rb +1 -0
  37. data/lib/pdf/reader/parser.rb +26 -8
  38. data/lib/pdf/reader/point.rb +25 -0
  39. data/lib/pdf/reader/print_receiver.rb +1 -0
  40. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  41. data/lib/pdf/reader/rectangle.rb +113 -0
  42. data/lib/pdf/reader/reference.rb +1 -0
  43. data/lib/pdf/reader/register_receiver.rb +1 -0
  44. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
  45. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  46. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  47. data/lib/pdf/reader/stream.rb +2 -1
  48. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  49. data/lib/pdf/reader/text_run.rb +14 -6
  50. data/lib/pdf/reader/token.rb +1 -0
  51. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  52. data/lib/pdf/reader/type_check.rb +52 -0
  53. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  54. data/lib/pdf/reader/validating_receiver.rb +262 -0
  55. data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
  56. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  57. data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
  58. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  59. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  60. data/lib/pdf/reader/width_calculator.rb +1 -0
  61. data/lib/pdf/reader/xref.rb +21 -3
  62. data/lib/pdf/reader/zero_width_runs_filter.rb +2 -0
  63. data/lib/pdf/reader.rb +46 -15
  64. data/lib/pdf-reader.rb +1 -0
  65. data/rbi/pdf-reader.rbi +1978 -0
  66. metadata +22 -13
  67. data/lib/pdf/reader/orientation_detector.rb +0 -34
  68. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.6.0
4
+ version: 2.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-12 00:00:00.000000000 Z
11
+ date: 2022-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -200,6 +200,8 @@ files:
200
200
  - examples/version.rb
201
201
  - lib/pdf-reader.rb
202
202
  - lib/pdf/reader.rb
203
+ - lib/pdf/reader/aes_v2_security_handler.rb
204
+ - lib/pdf/reader/aes_v3_security_handler.rb
203
205
  - lib/pdf/reader/afm/Courier-Bold.afm
204
206
  - lib/pdf/reader/afm/Courier-BoldOblique.afm
205
207
  - lib/pdf/reader/afm/Courier-Oblique.afm
@@ -215,6 +217,7 @@ files:
215
217
  - lib/pdf/reader/afm/Times-Italic.afm
216
218
  - lib/pdf/reader/afm/Times-Roman.afm
217
219
  - lib/pdf/reader/afm/ZapfDingbats.afm
220
+ - lib/pdf/reader/bounding_rectangle_runs_filter.rb
218
221
  - lib/pdf/reader/buffer.rb
219
222
  - lib/pdf/reader/cid_widths.rb
220
223
  - lib/pdf/reader/cmap.rb
@@ -241,12 +244,12 @@ files:
241
244
  - lib/pdf/reader/glyph_hash.rb
242
245
  - lib/pdf/reader/glyphlist-zapfdingbats.txt
243
246
  - lib/pdf/reader/glyphlist.txt
247
+ - lib/pdf/reader/key_builder_v5.rb
244
248
  - lib/pdf/reader/lzw.rb
245
249
  - lib/pdf/reader/null_security_handler.rb
246
250
  - lib/pdf/reader/object_cache.rb
247
251
  - lib/pdf/reader/object_hash.rb
248
252
  - lib/pdf/reader/object_stream.rb
249
- - lib/pdf/reader/orientation_detector.rb
250
253
  - lib/pdf/reader/overlapping_runs_filter.rb
251
254
  - lib/pdf/reader/page.rb
252
255
  - lib/pdf/reader/page_layout.rb
@@ -254,18 +257,23 @@ files:
254
257
  - lib/pdf/reader/page_text_receiver.rb
255
258
  - lib/pdf/reader/pages_strategy.rb
256
259
  - lib/pdf/reader/parser.rb
260
+ - lib/pdf/reader/point.rb
257
261
  - lib/pdf/reader/print_receiver.rb
262
+ - lib/pdf/reader/rc4_security_handler.rb
263
+ - lib/pdf/reader/rectangle.rb
258
264
  - lib/pdf/reader/reference.rb
259
265
  - lib/pdf/reader/register_receiver.rb
260
- - lib/pdf/reader/resource_methods.rb
261
- - lib/pdf/reader/standard_security_handler.rb
262
- - lib/pdf/reader/standard_security_handler_v5.rb
266
+ - lib/pdf/reader/resources.rb
267
+ - lib/pdf/reader/security_handler_factory.rb
268
+ - lib/pdf/reader/standard_key_builder.rb
263
269
  - lib/pdf/reader/stream.rb
264
270
  - lib/pdf/reader/synchronized_cache.rb
265
271
  - lib/pdf/reader/text_run.rb
266
272
  - lib/pdf/reader/token.rb
267
273
  - lib/pdf/reader/transformation_matrix.rb
274
+ - lib/pdf/reader/type_check.rb
268
275
  - lib/pdf/reader/unimplemented_security_handler.rb
276
+ - lib/pdf/reader/validating_receiver.rb
269
277
  - lib/pdf/reader/width_calculator.rb
270
278
  - lib/pdf/reader/width_calculator/built_in.rb
271
279
  - lib/pdf/reader/width_calculator/composite.rb
@@ -274,15 +282,16 @@ files:
274
282
  - lib/pdf/reader/width_calculator/type_zero.rb
275
283
  - lib/pdf/reader/xref.rb
276
284
  - lib/pdf/reader/zero_width_runs_filter.rb
285
+ - rbi/pdf-reader.rbi
277
286
  homepage: https://github.com/yob/pdf-reader
278
287
  licenses:
279
288
  - MIT
280
289
  metadata:
281
290
  bug_tracker_uri: https://github.com/yob/pdf-reader/issues
282
- changelog_uri: https://github.com/yob/pdf-reader/blob/v2.6.0/CHANGELOG
283
- documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.6.0
284
- source_code_uri: https://github.com/yob/pdf-reader/tree/v2.6.0
285
- post_install_message:
291
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.9.1/CHANGELOG
292
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.9.1
293
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.9.1
294
+ post_install_message:
286
295
  rdoc_options:
287
296
  - "--title"
288
297
  - PDF::Reader Documentation
@@ -302,8 +311,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
302
311
  - !ruby/object:Gem::Version
303
312
  version: '0'
304
313
  requirements: []
305
- rubygems_version: 3.1.4
306
- signing_key:
314
+ rubygems_version: 3.2.32
315
+ signing_key:
307
316
  specification_version: 4
308
317
  summary: A library for accessing the content of PDF files
309
318
  test_files: []
@@ -1,34 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- class PDF::Reader
5
- # Small util class for detecting the orientation of a single PDF page. Accounts
6
- # for any page rotation that is in place.
7
- #
8
- # OrientationDetector.new(:MediaBox => [0,0,612,792]).orientation
9
- # => "portrait"
10
- #
11
- class OrientationDetector
12
- def initialize(attributes)
13
- @attributes = attributes
14
- end
15
-
16
- def orientation
17
- @orientation ||= detect_orientation
18
- end
19
-
20
- private
21
-
22
- def detect_orientation
23
- llx,lly,urx,ury = @attributes[:MediaBox]
24
- rotation = @attributes[:Rotate].to_i
25
- width = (urx.to_i - llx.to_i).abs
26
- height = (ury.to_i - lly.to_i).abs
27
- if width > height
28
- (rotation % 180).zero? ? 'landscape' : 'portrait'
29
- else
30
- (rotation % 180).zero? ? 'portrait' : 'landscape'
31
- end
32
- end
33
- end
34
- end
@@ -1,91 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- require 'digest'
5
- require 'openssl'
6
-
7
- class PDF::Reader
8
-
9
- # class creates interface to encrypt dictionary for use in Decrypt
10
- class StandardSecurityHandlerV5
11
-
12
- attr_reader :key_length, :encrypt_key
13
-
14
- def initialize(opts = {})
15
- @key_length = 256
16
- @O = opts[:O] # hash(32B) + validation salt(8B) + key salt(8B)
17
- @U = opts[:U] # hash(32B) + validation salt(8B) + key salt(8B)
18
- @OE = opts[:OE] # decryption key, encrypted w/ owner password
19
- @UE = opts[:UE] # decryption key, encrypted w/ user password
20
- @encrypt_key = build_standard_key(opts[:password] || '')
21
- end
22
-
23
- # This handler supports AES-256 encryption defined in PDF 1.7 Extension Level 3
24
- def self.supports?(encrypt)
25
- return false if encrypt.nil?
26
-
27
- filter = encrypt.fetch(:Filter, :Standard)
28
- version = encrypt.fetch(:V, 0)
29
- revision = encrypt.fetch(:R, 0)
30
- algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
31
- (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
32
- ((version == 5) && (revision == 5) && (algorithm == :AESV3))
33
- end
34
-
35
- ##7.6.2 General Encryption Algorithm
36
- #
37
- # Algorithm 1: Encryption of data using the RC4 or AES algorithms
38
- #
39
- # used to decrypt RC4/AES encrypted PDF streams (buf)
40
- #
41
- # buf - a string to decrypt
42
- # ref - a PDF::Reader::Reference for the object to decrypt
43
- #
44
- def decrypt( buf, ref )
45
- cipher = OpenSSL::Cipher.new("AES-#{@key_length}-CBC")
46
- cipher.decrypt
47
- cipher.key = @encrypt_key.dup
48
- cipher.iv = buf[0..15]
49
- cipher.update(buf[16..-1]) + cipher.final
50
- end
51
-
52
- private
53
- # Algorithm 3.2a - Computing an encryption key
54
- #
55
- # Defined in PDF 1.7 Extension Level 3
56
- #
57
- # if the string is a valid user/owner password, this will return the decryption key
58
- #
59
- def auth_owner_pass(password)
60
- if Digest::SHA256.digest(password + @O[32..39] + @U) == @O[0..31]
61
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
62
- cipher.decrypt
63
- cipher.key = Digest::SHA256.digest(password + @O[40..-1] + @U)
64
- cipher.iv = "\x00" * 16
65
- cipher.padding = 0
66
- cipher.update(@OE) + cipher.final
67
- end
68
- end
69
-
70
- def auth_user_pass(password)
71
- if Digest::SHA256.digest(password + @U[32..39]) == @U[0..31]
72
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
73
- cipher.decrypt
74
- cipher.key = Digest::SHA256.digest(password + @U[40..-1])
75
- cipher.iv = "\x00" * 16
76
- cipher.padding = 0
77
- cipher.update(@UE) + cipher.final
78
- end
79
- end
80
-
81
- def build_standard_key(pass)
82
- pass = pass.byteslice(0...127) # UTF-8 encoded password. first 127 bytes
83
-
84
- encrypt_key = auth_owner_pass(pass)
85
- encrypt_key ||= auth_user_pass(pass)
86
-
87
- raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
88
- encrypt_key
89
- end
90
- end
91
- end