pdf-reader 2.5.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +42 -0
  3. data/README.md +16 -1
  4. data/Rakefile +1 -1
  5. data/examples/extract_fonts.rb +12 -7
  6. data/examples/rspec.rb +1 -0
  7. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  8. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  9. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  10. data/lib/pdf/reader/buffer.rb +90 -46
  11. data/lib/pdf/reader/cid_widths.rb +1 -0
  12. data/lib/pdf/reader/cmap.rb +65 -50
  13. data/lib/pdf/reader/encoding.rb +3 -2
  14. data/lib/pdf/reader/error.rb +19 -3
  15. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  16. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  17. data/lib/pdf/reader/filter/depredict.rb +11 -9
  18. data/lib/pdf/reader/filter/flate.rb +4 -2
  19. data/lib/pdf/reader/filter/lzw.rb +2 -0
  20. data/lib/pdf/reader/filter/null.rb +1 -1
  21. data/lib/pdf/reader/filter/run_length.rb +19 -13
  22. data/lib/pdf/reader/filter.rb +2 -1
  23. data/lib/pdf/reader/font.rb +72 -16
  24. data/lib/pdf/reader/font_descriptor.rb +19 -17
  25. data/lib/pdf/reader/form_xobject.rb +15 -5
  26. data/lib/pdf/reader/glyph_hash.rb +16 -9
  27. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  28. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  29. data/lib/pdf/reader/lzw.rb +4 -2
  30. data/lib/pdf/reader/null_security_handler.rb +1 -4
  31. data/lib/pdf/reader/object_cache.rb +1 -0
  32. data/lib/pdf/reader/object_hash.rb +252 -44
  33. data/lib/pdf/reader/object_stream.rb +1 -0
  34. data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
  35. data/lib/pdf/reader/page.rb +99 -19
  36. data/lib/pdf/reader/page_layout.rb +36 -37
  37. data/lib/pdf/reader/page_state.rb +12 -11
  38. data/lib/pdf/reader/page_text_receiver.rb +57 -10
  39. data/lib/pdf/reader/pages_strategy.rb +1 -0
  40. data/lib/pdf/reader/parser.rb +23 -12
  41. data/lib/pdf/reader/point.rb +25 -0
  42. data/lib/pdf/reader/print_receiver.rb +1 -0
  43. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  44. data/lib/pdf/reader/rectangle.rb +113 -0
  45. data/lib/pdf/reader/reference.rb +1 -0
  46. data/lib/pdf/reader/register_receiver.rb +1 -0
  47. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
  48. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  49. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  50. data/lib/pdf/reader/stream.rb +2 -1
  51. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  52. data/lib/pdf/reader/text_run.rb +14 -6
  53. data/lib/pdf/reader/token.rb +1 -0
  54. data/lib/pdf/reader/transformation_matrix.rb +1 -0
  55. data/lib/pdf/reader/type_check.rb +52 -0
  56. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  57. data/lib/pdf/reader/validating_receiver.rb +262 -0
  58. data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
  59. data/lib/pdf/reader/width_calculator/composite.rb +1 -0
  60. data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
  61. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
  62. data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
  63. data/lib/pdf/reader/width_calculator.rb +1 -0
  64. data/lib/pdf/reader/xref.rb +27 -4
  65. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  66. data/lib/pdf/reader.rb +46 -15
  67. data/lib/pdf-reader.rb +1 -0
  68. data/rbi/pdf-reader.rbi +1978 -0
  69. metadata +21 -10
  70. data/lib/pdf/reader/orientation_detector.rb +0 -34
  71. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.5.0
4
+ version: 2.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-06-06 00:00:00.000000000 Z
11
+ date: 2022-01-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -200,6 +200,8 @@ files:
200
200
  - examples/version.rb
201
201
  - lib/pdf-reader.rb
202
202
  - lib/pdf/reader.rb
203
+ - lib/pdf/reader/aes_v2_security_handler.rb
204
+ - lib/pdf/reader/aes_v3_security_handler.rb
203
205
  - lib/pdf/reader/afm/Courier-Bold.afm
204
206
  - lib/pdf/reader/afm/Courier-BoldOblique.afm
205
207
  - lib/pdf/reader/afm/Courier-Oblique.afm
@@ -215,6 +217,7 @@ files:
215
217
  - lib/pdf/reader/afm/Times-Italic.afm
216
218
  - lib/pdf/reader/afm/Times-Roman.afm
217
219
  - lib/pdf/reader/afm/ZapfDingbats.afm
220
+ - lib/pdf/reader/bounding_rectangle_runs_filter.rb
218
221
  - lib/pdf/reader/buffer.rb
219
222
  - lib/pdf/reader/cid_widths.rb
220
223
  - lib/pdf/reader/cmap.rb
@@ -239,13 +242,14 @@ files:
239
242
  - lib/pdf/reader/font_descriptor.rb
240
243
  - lib/pdf/reader/form_xobject.rb
241
244
  - lib/pdf/reader/glyph_hash.rb
245
+ - lib/pdf/reader/glyphlist-zapfdingbats.txt
242
246
  - lib/pdf/reader/glyphlist.txt
247
+ - lib/pdf/reader/key_builder_v5.rb
243
248
  - lib/pdf/reader/lzw.rb
244
249
  - lib/pdf/reader/null_security_handler.rb
245
250
  - lib/pdf/reader/object_cache.rb
246
251
  - lib/pdf/reader/object_hash.rb
247
252
  - lib/pdf/reader/object_stream.rb
248
- - lib/pdf/reader/orientation_detector.rb
249
253
  - lib/pdf/reader/overlapping_runs_filter.rb
250
254
  - lib/pdf/reader/page.rb
251
255
  - lib/pdf/reader/page_layout.rb
@@ -253,18 +257,23 @@ files:
253
257
  - lib/pdf/reader/page_text_receiver.rb
254
258
  - lib/pdf/reader/pages_strategy.rb
255
259
  - lib/pdf/reader/parser.rb
260
+ - lib/pdf/reader/point.rb
256
261
  - lib/pdf/reader/print_receiver.rb
262
+ - lib/pdf/reader/rc4_security_handler.rb
263
+ - lib/pdf/reader/rectangle.rb
257
264
  - lib/pdf/reader/reference.rb
258
265
  - lib/pdf/reader/register_receiver.rb
259
- - lib/pdf/reader/resource_methods.rb
260
- - lib/pdf/reader/standard_security_handler.rb
261
- - lib/pdf/reader/standard_security_handler_v5.rb
266
+ - lib/pdf/reader/resources.rb
267
+ - lib/pdf/reader/security_handler_factory.rb
268
+ - lib/pdf/reader/standard_key_builder.rb
262
269
  - lib/pdf/reader/stream.rb
263
270
  - lib/pdf/reader/synchronized_cache.rb
264
271
  - lib/pdf/reader/text_run.rb
265
272
  - lib/pdf/reader/token.rb
266
273
  - lib/pdf/reader/transformation_matrix.rb
274
+ - lib/pdf/reader/type_check.rb
267
275
  - lib/pdf/reader/unimplemented_security_handler.rb
276
+ - lib/pdf/reader/validating_receiver.rb
268
277
  - lib/pdf/reader/width_calculator.rb
269
278
  - lib/pdf/reader/width_calculator/built_in.rb
270
279
  - lib/pdf/reader/width_calculator/composite.rb
@@ -272,14 +281,16 @@ files:
272
281
  - lib/pdf/reader/width_calculator/type_one_or_three.rb
273
282
  - lib/pdf/reader/width_calculator/type_zero.rb
274
283
  - lib/pdf/reader/xref.rb
284
+ - lib/pdf/reader/zero_width_runs_filter.rb
285
+ - rbi/pdf-reader.rbi
275
286
  homepage: https://github.com/yob/pdf-reader
276
287
  licenses:
277
288
  - MIT
278
289
  metadata:
279
290
  bug_tracker_uri: https://github.com/yob/pdf-reader/issues
280
- changelog_uri: https://github.com/yob/pdf-reader/blob/v2.5.0/CHANGELOG
281
- documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.5.0
282
- source_code_uri: https://github.com/yob/pdf-reader/tree/v2.5.0
291
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.9.0/CHANGELOG
292
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.9.0
293
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.9.0
283
294
  post_install_message:
284
295
  rdoc_options:
285
296
  - "--title"
@@ -300,7 +311,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
300
311
  - !ruby/object:Gem::Version
301
312
  version: '0'
302
313
  requirements: []
303
- rubygems_version: 3.2.3
314
+ rubygems_version: 3.1.6
304
315
  signing_key:
305
316
  specification_version: 4
306
317
  summary: A library for accessing the content of PDF files
@@ -1,34 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- class PDF::Reader
5
- # Small util class for detecting the orientation of a single PDF page. Accounts
6
- # for any page rotation that is in place.
7
- #
8
- # OrientationDetector.new(:MediaBox => [0,0,612,792]).orientation
9
- # => "portrait"
10
- #
11
- class OrientationDetector
12
- def initialize(attributes)
13
- @attributes = attributes
14
- end
15
-
16
- def orientation
17
- @orientation ||= detect_orientation
18
- end
19
-
20
- private
21
-
22
- def detect_orientation
23
- llx,lly,urx,ury = @attributes[:MediaBox]
24
- rotation = @attributes[:Rotate].to_i
25
- width = (urx.to_i - llx.to_i).abs
26
- height = (ury.to_i - lly.to_i).abs
27
- if width > height
28
- (rotation % 180).zero? ? 'landscape' : 'portrait'
29
- else
30
- (rotation % 180).zero? ? 'portrait' : 'landscape'
31
- end
32
- end
33
- end
34
- end
@@ -1,91 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- require 'digest'
5
- require 'openssl'
6
-
7
- class PDF::Reader
8
-
9
- # class creates interface to encrypt dictionary for use in Decrypt
10
- class StandardSecurityHandlerV5
11
-
12
- attr_reader :key_length, :encrypt_key
13
-
14
- def initialize(opts = {})
15
- @key_length = 256
16
- @O = opts[:O] # hash(32B) + validation salt(8B) + key salt(8B)
17
- @U = opts[:U] # hash(32B) + validation salt(8B) + key salt(8B)
18
- @OE = opts[:OE] # decryption key, encrypted w/ owner password
19
- @UE = opts[:UE] # decryption key, encrypted w/ user password
20
- @encrypt_key = build_standard_key(opts[:password] || '')
21
- end
22
-
23
- # This handler supports AES-256 encryption defined in PDF 1.7 Extension Level 3
24
- def self.supports?(encrypt)
25
- return false if encrypt.nil?
26
-
27
- filter = encrypt.fetch(:Filter, :Standard)
28
- version = encrypt.fetch(:V, 0)
29
- revision = encrypt.fetch(:R, 0)
30
- algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
31
- (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
32
- ((version == 5) && (revision == 5) && (algorithm == :AESV3))
33
- end
34
-
35
- ##7.6.2 General Encryption Algorithm
36
- #
37
- # Algorithm 1: Encryption of data using the RC4 or AES algorithms
38
- #
39
- # used to decrypt RC4/AES encrypted PDF streams (buf)
40
- #
41
- # buf - a string to decrypt
42
- # ref - a PDF::Reader::Reference for the object to decrypt
43
- #
44
- def decrypt( buf, ref )
45
- cipher = OpenSSL::Cipher.new("AES-#{@key_length}-CBC")
46
- cipher.decrypt
47
- cipher.key = @encrypt_key.dup
48
- cipher.iv = buf[0..15]
49
- cipher.update(buf[16..-1]) + cipher.final
50
- end
51
-
52
- private
53
- # Algorithm 3.2a - Computing an encryption key
54
- #
55
- # Defined in PDF 1.7 Extension Level 3
56
- #
57
- # if the string is a valid user/owner password, this will return the decryption key
58
- #
59
- def auth_owner_pass(password)
60
- if Digest::SHA256.digest(password + @O[32..39] + @U) == @O[0..31]
61
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
62
- cipher.decrypt
63
- cipher.key = Digest::SHA256.digest(password + @O[40..-1] + @U)
64
- cipher.iv = "\x00" * 16
65
- cipher.padding = 0
66
- cipher.update(@OE) + cipher.final
67
- end
68
- end
69
-
70
- def auth_user_pass(password)
71
- if Digest::SHA256.digest(password + @U[32..39]) == @U[0..31]
72
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
73
- cipher.decrypt
74
- cipher.key = Digest::SHA256.digest(password + @U[40..-1])
75
- cipher.iv = "\x00" * 16
76
- cipher.padding = 0
77
- cipher.update(@UE) + cipher.final
78
- end
79
- end
80
-
81
- def build_standard_key(pass)
82
- pass = pass.byteslice(0...127) # UTF-8 encoded password. first 127 bytes
83
-
84
- encrypt_key = auth_owner_pass(pass)
85
- encrypt_key ||= auth_user_pass(pass)
86
-
87
- raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
88
- encrypt_key
89
- end
90
- end
91
- end