pdf-reader 2.6.0 → 2.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +30 -1
  3. data/Rakefile +1 -1
  4. data/examples/rspec.rb +1 -0
  5. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  6. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  7. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  8. data/lib/pdf/reader/buffer.rb +36 -33
  9. data/lib/pdf/reader/cid_widths.rb +1 -0
  10. data/lib/pdf/reader/cmap.rb +65 -50
  11. data/lib/pdf/reader/encoding.rb +2 -1
  12. data/lib/pdf/reader/error.rb +16 -0
  13. data/lib/pdf/reader/filter/ascii85.rb +2 -0
  14. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  15. data/lib/pdf/reader/filter/depredict.rb +8 -6
  16. data/lib/pdf/reader/filter/flate.rb +4 -2
  17. data/lib/pdf/reader/filter/lzw.rb +2 -0
  18. data/lib/pdf/reader/filter/null.rb +1 -1
  19. data/lib/pdf/reader/filter/run_length.rb +19 -13
  20. data/lib/pdf/reader/filter.rb +11 -11
  21. data/lib/pdf/reader/font.rb +72 -16
  22. data/lib/pdf/reader/font_descriptor.rb +19 -17
  23. data/lib/pdf/reader/form_xobject.rb +15 -5
  24. data/lib/pdf/reader/glyph_hash.rb +1 -0
  25. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  26. data/lib/pdf/reader/lzw.rb +4 -2
  27. data/lib/pdf/reader/null_security_handler.rb +1 -4
  28. data/lib/pdf/reader/object_cache.rb +1 -0
  29. data/lib/pdf/reader/object_hash.rb +252 -44
  30. data/lib/pdf/reader/object_stream.rb +1 -0
  31. data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
  32. data/lib/pdf/reader/page.rb +99 -19
  33. data/lib/pdf/reader/page_layout.rb +28 -32
  34. data/lib/pdf/reader/page_state.rb +12 -11
  35. data/lib/pdf/reader/page_text_receiver.rb +57 -10
  36. data/lib/pdf/reader/pages_strategy.rb +1 -0
  37. data/lib/pdf/reader/parser.rb +26 -8
  38. data/lib/pdf/reader/point.rb +25 -0
  39. data/lib/pdf/reader/print_receiver.rb +1 -0
  40. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  41. data/lib/pdf/reader/rectangle.rb +113 -0
  42. data/lib/pdf/reader/reference.rb +1 -0
  43. data/lib/pdf/reader/register_receiver.rb +1 -0
  44. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
  45. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  46. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  47. data/lib/pdf/reader/stream.rb +2 -1
  48. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  49. data/lib/pdf/reader/text_run.rb +14 -6
  50. data/lib/pdf/reader/token.rb +1 -0
  51. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  52. data/lib/pdf/reader/type_check.rb +52 -0
  53. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  54. data/lib/pdf/reader/validating_receiver.rb +262 -0
  55. data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
  56. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  57. data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
  58. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  59. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  60. data/lib/pdf/reader/width_calculator.rb +1 -0
  61. data/lib/pdf/reader/xref.rb +21 -3
  62. data/lib/pdf/reader/zero_width_runs_filter.rb +2 -0
  63. data/lib/pdf/reader.rb +46 -15
  64. data/lib/pdf-reader.rb +1 -0
  65. data/rbi/pdf-reader.rbi +1978 -0
  66. metadata +22 -13
  67. data/lib/pdf/reader/orientation_detector.rb +0 -34
  68. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.6.0
4
+ version: 2.9.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-12 00:00:00.000000000 Z
11
+ date: 2022-02-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -200,6 +200,8 @@ files:
200
200
  - examples/version.rb
201
201
  - lib/pdf-reader.rb
202
202
  - lib/pdf/reader.rb
203
+ - lib/pdf/reader/aes_v2_security_handler.rb
204
+ - lib/pdf/reader/aes_v3_security_handler.rb
203
205
  - lib/pdf/reader/afm/Courier-Bold.afm
204
206
  - lib/pdf/reader/afm/Courier-BoldOblique.afm
205
207
  - lib/pdf/reader/afm/Courier-Oblique.afm
@@ -215,6 +217,7 @@ files:
215
217
  - lib/pdf/reader/afm/Times-Italic.afm
216
218
  - lib/pdf/reader/afm/Times-Roman.afm
217
219
  - lib/pdf/reader/afm/ZapfDingbats.afm
220
+ - lib/pdf/reader/bounding_rectangle_runs_filter.rb
218
221
  - lib/pdf/reader/buffer.rb
219
222
  - lib/pdf/reader/cid_widths.rb
220
223
  - lib/pdf/reader/cmap.rb
@@ -241,12 +244,12 @@ files:
241
244
  - lib/pdf/reader/glyph_hash.rb
242
245
  - lib/pdf/reader/glyphlist-zapfdingbats.txt
243
246
  - lib/pdf/reader/glyphlist.txt
247
+ - lib/pdf/reader/key_builder_v5.rb
244
248
  - lib/pdf/reader/lzw.rb
245
249
  - lib/pdf/reader/null_security_handler.rb
246
250
  - lib/pdf/reader/object_cache.rb
247
251
  - lib/pdf/reader/object_hash.rb
248
252
  - lib/pdf/reader/object_stream.rb
249
- - lib/pdf/reader/orientation_detector.rb
250
253
  - lib/pdf/reader/overlapping_runs_filter.rb
251
254
  - lib/pdf/reader/page.rb
252
255
  - lib/pdf/reader/page_layout.rb
@@ -254,18 +257,23 @@ files:
254
257
  - lib/pdf/reader/page_text_receiver.rb
255
258
  - lib/pdf/reader/pages_strategy.rb
256
259
  - lib/pdf/reader/parser.rb
260
+ - lib/pdf/reader/point.rb
257
261
  - lib/pdf/reader/print_receiver.rb
262
+ - lib/pdf/reader/rc4_security_handler.rb
263
+ - lib/pdf/reader/rectangle.rb
258
264
  - lib/pdf/reader/reference.rb
259
265
  - lib/pdf/reader/register_receiver.rb
260
- - lib/pdf/reader/resource_methods.rb
261
- - lib/pdf/reader/standard_security_handler.rb
262
- - lib/pdf/reader/standard_security_handler_v5.rb
266
+ - lib/pdf/reader/resources.rb
267
+ - lib/pdf/reader/security_handler_factory.rb
268
+ - lib/pdf/reader/standard_key_builder.rb
263
269
  - lib/pdf/reader/stream.rb
264
270
  - lib/pdf/reader/synchronized_cache.rb
265
271
  - lib/pdf/reader/text_run.rb
266
272
  - lib/pdf/reader/token.rb
267
273
  - lib/pdf/reader/transformation_matrix.rb
274
+ - lib/pdf/reader/type_check.rb
268
275
  - lib/pdf/reader/unimplemented_security_handler.rb
276
+ - lib/pdf/reader/validating_receiver.rb
269
277
  - lib/pdf/reader/width_calculator.rb
270
278
  - lib/pdf/reader/width_calculator/built_in.rb
271
279
  - lib/pdf/reader/width_calculator/composite.rb
@@ -274,15 +282,16 @@ files:
274
282
  - lib/pdf/reader/width_calculator/type_zero.rb
275
283
  - lib/pdf/reader/xref.rb
276
284
  - lib/pdf/reader/zero_width_runs_filter.rb
285
+ - rbi/pdf-reader.rbi
277
286
  homepage: https://github.com/yob/pdf-reader
278
287
  licenses:
279
288
  - MIT
280
289
  metadata:
281
290
  bug_tracker_uri: https://github.com/yob/pdf-reader/issues
282
- changelog_uri: https://github.com/yob/pdf-reader/blob/v2.6.0/CHANGELOG
283
- documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.6.0
284
- source_code_uri: https://github.com/yob/pdf-reader/tree/v2.6.0
285
- post_install_message:
291
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.9.1/CHANGELOG
292
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.9.1
293
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.9.1
294
+ post_install_message:
286
295
  rdoc_options:
287
296
  - "--title"
288
297
  - PDF::Reader Documentation
@@ -302,8 +311,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
302
311
  - !ruby/object:Gem::Version
303
312
  version: '0'
304
313
  requirements: []
305
- rubygems_version: 3.1.4
306
- signing_key:
314
+ rubygems_version: 3.2.32
315
+ signing_key:
307
316
  specification_version: 4
308
317
  summary: A library for accessing the content of PDF files
309
318
  test_files: []
@@ -1,34 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- class PDF::Reader
5
- # Small util class for detecting the orientation of a single PDF page. Accounts
6
- # for any page rotation that is in place.
7
- #
8
- # OrientationDetector.new(:MediaBox => [0,0,612,792]).orientation
9
- # => "portrait"
10
- #
11
- class OrientationDetector
12
- def initialize(attributes)
13
- @attributes = attributes
14
- end
15
-
16
- def orientation
17
- @orientation ||= detect_orientation
18
- end
19
-
20
- private
21
-
22
- def detect_orientation
23
- llx,lly,urx,ury = @attributes[:MediaBox]
24
- rotation = @attributes[:Rotate].to_i
25
- width = (urx.to_i - llx.to_i).abs
26
- height = (ury.to_i - lly.to_i).abs
27
- if width > height
28
- (rotation % 180).zero? ? 'landscape' : 'portrait'
29
- else
30
- (rotation % 180).zero? ? 'portrait' : 'landscape'
31
- end
32
- end
33
- end
34
- end
@@ -1,91 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- require 'digest'
5
- require 'openssl'
6
-
7
- class PDF::Reader
8
-
9
- # class creates interface to encrypt dictionary for use in Decrypt
10
- class StandardSecurityHandlerV5
11
-
12
- attr_reader :key_length, :encrypt_key
13
-
14
- def initialize(opts = {})
15
- @key_length = 256
16
- @O = opts[:O] # hash(32B) + validation salt(8B) + key salt(8B)
17
- @U = opts[:U] # hash(32B) + validation salt(8B) + key salt(8B)
18
- @OE = opts[:OE] # decryption key, encrypted w/ owner password
19
- @UE = opts[:UE] # decryption key, encrypted w/ user password
20
- @encrypt_key = build_standard_key(opts[:password] || '')
21
- end
22
-
23
- # This handler supports AES-256 encryption defined in PDF 1.7 Extension Level 3
24
- def self.supports?(encrypt)
25
- return false if encrypt.nil?
26
-
27
- filter = encrypt.fetch(:Filter, :Standard)
28
- version = encrypt.fetch(:V, 0)
29
- revision = encrypt.fetch(:R, 0)
30
- algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
31
- (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
32
- ((version == 5) && (revision == 5) && (algorithm == :AESV3))
33
- end
34
-
35
- ##7.6.2 General Encryption Algorithm
36
- #
37
- # Algorithm 1: Encryption of data using the RC4 or AES algorithms
38
- #
39
- # used to decrypt RC4/AES encrypted PDF streams (buf)
40
- #
41
- # buf - a string to decrypt
42
- # ref - a PDF::Reader::Reference for the object to decrypt
43
- #
44
- def decrypt( buf, ref )
45
- cipher = OpenSSL::Cipher.new("AES-#{@key_length}-CBC")
46
- cipher.decrypt
47
- cipher.key = @encrypt_key.dup
48
- cipher.iv = buf[0..15]
49
- cipher.update(buf[16..-1]) + cipher.final
50
- end
51
-
52
- private
53
- # Algorithm 3.2a - Computing an encryption key
54
- #
55
- # Defined in PDF 1.7 Extension Level 3
56
- #
57
- # if the string is a valid user/owner password, this will return the decryption key
58
- #
59
- def auth_owner_pass(password)
60
- if Digest::SHA256.digest(password + @O[32..39] + @U) == @O[0..31]
61
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
62
- cipher.decrypt
63
- cipher.key = Digest::SHA256.digest(password + @O[40..-1] + @U)
64
- cipher.iv = "\x00" * 16
65
- cipher.padding = 0
66
- cipher.update(@OE) + cipher.final
67
- end
68
- end
69
-
70
- def auth_user_pass(password)
71
- if Digest::SHA256.digest(password + @U[32..39]) == @U[0..31]
72
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
73
- cipher.decrypt
74
- cipher.key = Digest::SHA256.digest(password + @U[40..-1])
75
- cipher.iv = "\x00" * 16
76
- cipher.padding = 0
77
- cipher.update(@UE) + cipher.final
78
- end
79
- end
80
-
81
- def build_standard_key(pass)
82
- pass = pass.byteslice(0...127) # UTF-8 encoded password. first 127 bytes
83
-
84
- encrypt_key = auth_owner_pass(pass)
85
- encrypt_key ||= auth_user_pass(pass)
86
-
87
- raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
88
- encrypt_key
89
- end
90
- end
91
- end