pdf-reader 2.2.0 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +90 -0
  3. data/README.md +18 -3
  4. data/Rakefile +1 -1
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_text +1 -1
  7. data/examples/extract_fonts.rb +12 -7
  8. data/examples/rspec.rb +1 -0
  9. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  10. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  14. data/lib/pdf/reader/afm/Courier.afm +342 -342
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  26. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  27. data/lib/pdf/reader/buffer.rb +91 -47
  28. data/lib/pdf/reader/cid_widths.rb +7 -4
  29. data/lib/pdf/reader/cmap.rb +83 -59
  30. data/lib/pdf/reader/encoding.rb +17 -14
  31. data/lib/pdf/reader/error.rb +15 -3
  32. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  33. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  34. data/lib/pdf/reader/filter/depredict.rb +12 -10
  35. data/lib/pdf/reader/filter/flate.rb +30 -16
  36. data/lib/pdf/reader/filter/lzw.rb +2 -0
  37. data/lib/pdf/reader/filter/null.rb +1 -1
  38. data/lib/pdf/reader/filter/run_length.rb +19 -13
  39. data/lib/pdf/reader/filter.rb +11 -11
  40. data/lib/pdf/reader/font.rb +89 -26
  41. data/lib/pdf/reader/font_descriptor.rb +22 -18
  42. data/lib/pdf/reader/form_xobject.rb +18 -5
  43. data/lib/pdf/reader/glyph_hash.rb +28 -13
  44. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  45. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  46. data/lib/pdf/reader/lzw.rb +28 -11
  47. data/lib/pdf/reader/no_text_filter.rb +14 -0
  48. data/lib/pdf/reader/null_security_handler.rb +1 -4
  49. data/lib/pdf/reader/object_cache.rb +1 -0
  50. data/lib/pdf/reader/object_hash.rb +292 -63
  51. data/lib/pdf/reader/object_stream.rb +3 -2
  52. data/lib/pdf/reader/overlapping_runs_filter.rb +72 -0
  53. data/lib/pdf/reader/page.rb +143 -16
  54. data/lib/pdf/reader/page_layout.rb +43 -39
  55. data/lib/pdf/reader/page_state.rb +26 -17
  56. data/lib/pdf/reader/page_text_receiver.rb +74 -4
  57. data/lib/pdf/reader/pages_strategy.rb +1 -0
  58. data/lib/pdf/reader/parser.rb +34 -14
  59. data/lib/pdf/reader/point.rb +25 -0
  60. data/lib/pdf/reader/print_receiver.rb +1 -0
  61. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  62. data/lib/pdf/reader/rectangle.rb +113 -0
  63. data/lib/pdf/reader/reference.rb +3 -1
  64. data/lib/pdf/reader/register_receiver.rb +1 -0
  65. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +17 -9
  66. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  67. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  68. data/lib/pdf/reader/stream.rb +3 -2
  69. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  70. data/lib/pdf/reader/text_run.rb +40 -5
  71. data/lib/pdf/reader/token.rb +1 -0
  72. data/lib/pdf/reader/transformation_matrix.rb +8 -7
  73. data/lib/pdf/reader/type_check.rb +98 -0
  74. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  75. data/lib/pdf/reader/validating_receiver.rb +262 -0
  76. data/lib/pdf/reader/width_calculator/built_in.rb +27 -17
  77. data/lib/pdf/reader/width_calculator/composite.rb +6 -1
  78. data/lib/pdf/reader/width_calculator/true_type.rb +10 -11
  79. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -4
  80. data/lib/pdf/reader/width_calculator/type_zero.rb +6 -2
  81. data/lib/pdf/reader/width_calculator.rb +1 -0
  82. data/lib/pdf/reader/xref.rb +37 -11
  83. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  84. data/lib/pdf/reader.rb +49 -24
  85. data/lib/pdf-reader.rb +1 -0
  86. data/rbi/pdf-reader.rbi +2048 -0
  87. metadata +39 -23
  88. data/lib/pdf/hash.rb +0 -20
  89. data/lib/pdf/reader/orientation_detector.rb +0 -34
  90. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-12-18 00:00:00.000000000 Z
11
+ date: 2022-10-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "<"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '13.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "<"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '13.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +67,7 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.2'
69
69
  - !ruby/object:Gem::Dependency
70
- name: ir_b
70
+ name: pry
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 1.0.0
103
+ version: '1.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 1.0.0
110
+ version: '1.0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: ruby-rc4
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -167,7 +167,7 @@ dependencies:
167
167
  description: The PDF::Reader library implements a PDF parser conforming as much as
168
168
  possible to the PDF specification from Adobe
169
169
  email:
170
- - jimmy@deefa.com
170
+ - james@yob.id.au
171
171
  executables:
172
172
  - pdf_object
173
173
  - pdf_text
@@ -199,8 +199,9 @@ files:
199
199
  - examples/text.rb
200
200
  - examples/version.rb
201
201
  - lib/pdf-reader.rb
202
- - lib/pdf/hash.rb
203
202
  - lib/pdf/reader.rb
203
+ - lib/pdf/reader/aes_v2_security_handler.rb
204
+ - lib/pdf/reader/aes_v3_security_handler.rb
204
205
  - lib/pdf/reader/afm/Courier-Bold.afm
205
206
  - lib/pdf/reader/afm/Courier-BoldOblique.afm
206
207
  - lib/pdf/reader/afm/Courier-Oblique.afm
@@ -209,12 +210,14 @@ files:
209
210
  - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
210
211
  - lib/pdf/reader/afm/Helvetica-Oblique.afm
211
212
  - lib/pdf/reader/afm/Helvetica.afm
213
+ - lib/pdf/reader/afm/MustRead.html
212
214
  - lib/pdf/reader/afm/Symbol.afm
213
215
  - lib/pdf/reader/afm/Times-Bold.afm
214
216
  - lib/pdf/reader/afm/Times-BoldItalic.afm
215
217
  - lib/pdf/reader/afm/Times-Italic.afm
216
218
  - lib/pdf/reader/afm/Times-Roman.afm
217
219
  - lib/pdf/reader/afm/ZapfDingbats.afm
220
+ - lib/pdf/reader/bounding_rectangle_runs_filter.rb
218
221
  - lib/pdf/reader/buffer.rb
219
222
  - lib/pdf/reader/cid_widths.rb
220
223
  - lib/pdf/reader/cmap.rb
@@ -239,31 +242,39 @@ files:
239
242
  - lib/pdf/reader/font_descriptor.rb
240
243
  - lib/pdf/reader/form_xobject.rb
241
244
  - lib/pdf/reader/glyph_hash.rb
245
+ - lib/pdf/reader/glyphlist-zapfdingbats.txt
242
246
  - lib/pdf/reader/glyphlist.txt
247
+ - lib/pdf/reader/key_builder_v5.rb
243
248
  - lib/pdf/reader/lzw.rb
249
+ - lib/pdf/reader/no_text_filter.rb
244
250
  - lib/pdf/reader/null_security_handler.rb
245
251
  - lib/pdf/reader/object_cache.rb
246
252
  - lib/pdf/reader/object_hash.rb
247
253
  - lib/pdf/reader/object_stream.rb
248
- - lib/pdf/reader/orientation_detector.rb
254
+ - lib/pdf/reader/overlapping_runs_filter.rb
249
255
  - lib/pdf/reader/page.rb
250
256
  - lib/pdf/reader/page_layout.rb
251
257
  - lib/pdf/reader/page_state.rb
252
258
  - lib/pdf/reader/page_text_receiver.rb
253
259
  - lib/pdf/reader/pages_strategy.rb
254
260
  - lib/pdf/reader/parser.rb
261
+ - lib/pdf/reader/point.rb
255
262
  - lib/pdf/reader/print_receiver.rb
263
+ - lib/pdf/reader/rc4_security_handler.rb
264
+ - lib/pdf/reader/rectangle.rb
256
265
  - lib/pdf/reader/reference.rb
257
266
  - lib/pdf/reader/register_receiver.rb
258
- - lib/pdf/reader/resource_methods.rb
259
- - lib/pdf/reader/standard_security_handler.rb
260
- - lib/pdf/reader/standard_security_handler_v5.rb
267
+ - lib/pdf/reader/resources.rb
268
+ - lib/pdf/reader/security_handler_factory.rb
269
+ - lib/pdf/reader/standard_key_builder.rb
261
270
  - lib/pdf/reader/stream.rb
262
271
  - lib/pdf/reader/synchronized_cache.rb
263
272
  - lib/pdf/reader/text_run.rb
264
273
  - lib/pdf/reader/token.rb
265
274
  - lib/pdf/reader/transformation_matrix.rb
275
+ - lib/pdf/reader/type_check.rb
266
276
  - lib/pdf/reader/unimplemented_security_handler.rb
277
+ - lib/pdf/reader/validating_receiver.rb
267
278
  - lib/pdf/reader/width_calculator.rb
268
279
  - lib/pdf/reader/width_calculator/built_in.rb
269
280
  - lib/pdf/reader/width_calculator/composite.rb
@@ -271,11 +282,17 @@ files:
271
282
  - lib/pdf/reader/width_calculator/type_one_or_three.rb
272
283
  - lib/pdf/reader/width_calculator/type_zero.rb
273
284
  - lib/pdf/reader/xref.rb
274
- homepage: http://github.com/yob/pdf-reader
285
+ - lib/pdf/reader/zero_width_runs_filter.rb
286
+ - rbi/pdf-reader.rbi
287
+ homepage: https://github.com/yob/pdf-reader
275
288
  licenses:
276
289
  - MIT
277
- metadata: {}
278
- post_install_message:
290
+ metadata:
291
+ bug_tracker_uri: https://github.com/yob/pdf-reader/issues
292
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.11.0/CHANGELOG
293
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.11.0
294
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.11.0
295
+ post_install_message:
279
296
  rdoc_options:
280
297
  - "--title"
281
298
  - PDF::Reader Documentation
@@ -288,16 +305,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
288
305
  requirements:
289
306
  - - ">="
290
307
  - !ruby/object:Gem::Version
291
- version: 1.9.3
308
+ version: '2.0'
292
309
  required_rubygems_version: !ruby/object:Gem::Requirement
293
310
  requirements:
294
311
  - - ">="
295
312
  - !ruby/object:Gem::Version
296
313
  version: '0'
297
314
  requirements: []
298
- rubyforge_project:
299
- rubygems_version: 2.7.6
300
- signing_key:
315
+ rubygems_version: 3.2.32
316
+ signing_key:
301
317
  specification_version: 4
302
318
  summary: A library for accessing the content of PDF files
303
319
  test_files: []
data/lib/pdf/hash.rb DELETED
@@ -1,20 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- module PDF
5
- # This class is deprecated, please stop using it.
6
- class Hash < ::PDF::Reader::ObjectHash # :nodoc:
7
- def initialize(input)
8
- warn "DEPRECATION NOTICE: PDF::Hash has been deprecated, use PDF::Reader::ObjectHash instead"
9
- super
10
- end
11
-
12
- def version
13
- warn <<-EOS
14
- DEPRECATION NOTICE: PDF::Hash#version has been deprecated,
15
- use PDF::Reader::ObjectHash#pdf_version instead
16
- EOS
17
- pdf_version
18
- end
19
- end
20
- end
@@ -1,34 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- class PDF::Reader
5
- # Small util class for detecting the orientation of a single PDF page. Accounts
6
- # for any page rotation that is in place.
7
- #
8
- # OrientationDetector.new(:MediaBox => [0,0,612,792]).orientation
9
- # => "portrait"
10
- #
11
- class OrientationDetector
12
- def initialize(attributes)
13
- @attributes = attributes
14
- end
15
-
16
- def orientation
17
- @orientation ||= detect_orientation
18
- end
19
-
20
- private
21
-
22
- def detect_orientation
23
- llx,lly,urx,ury = @attributes[:MediaBox]
24
- rotation = @attributes[:Rotate].to_i
25
- width = urx.to_i - llx.to_i
26
- height = ury.to_i - lly.to_i
27
- if width > height
28
- [0,180].include?(rotation) ? 'landscape' : 'portrait'
29
- else
30
- [0,180].include?(rotation) ? 'portrait' : 'landscape'
31
- end
32
- end
33
- end
34
- end
@@ -1,91 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- require 'digest'
5
- require 'openssl'
6
-
7
- class PDF::Reader
8
-
9
- # class creates interface to encrypt dictionary for use in Decrypt
10
- class StandardSecurityHandlerV5
11
-
12
- attr_reader :key_length, :encrypt_key
13
-
14
- def initialize(opts = {})
15
- @key_length = 256
16
- @O = opts[:O] # hash(32B) + validation salt(8B) + key salt(8B)
17
- @U = opts[:U] # hash(32B) + validation salt(8B) + key salt(8B)
18
- @OE = opts[:OE] # decryption key, encrypted w/ owner password
19
- @UE = opts[:UE] # decryption key, encrypted w/ user password
20
- @encrypt_key = build_standard_key(opts[:password] || '')
21
- end
22
-
23
- # This handler supports AES-256 encryption defined in PDF 1.7 Extension Level 3
24
- def self.supports?(encrypt)
25
- return false if encrypt.nil?
26
-
27
- filter = encrypt.fetch(:Filter, :Standard)
28
- version = encrypt.fetch(:V, 0)
29
- revision = encrypt.fetch(:R, 0)
30
- algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
31
- (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
32
- ((version == 5) && (revision == 5) && (algorithm == :AESV3))
33
- end
34
-
35
- ##7.6.2 General Encryption Algorithm
36
- #
37
- # Algorithm 1: Encryption of data using the RC4 or AES algorithms
38
- #
39
- # used to decrypt RC4/AES encrypted PDF streams (buf)
40
- #
41
- # buf - a string to decrypt
42
- # ref - a PDF::Reader::Reference for the object to decrypt
43
- #
44
- def decrypt( buf, ref )
45
- cipher = OpenSSL::Cipher.new("AES-#{@key_length}-CBC")
46
- cipher.decrypt
47
- cipher.key = @encrypt_key.dup
48
- cipher.iv = buf[0..15]
49
- cipher.update(buf[16..-1]) + cipher.final
50
- end
51
-
52
- private
53
- # Algorithm 3.2a - Computing an encryption key
54
- #
55
- # Defined in PDF 1.7 Extension Level 3
56
- #
57
- # if the string is a valid user/owner password, this will return the decryption key
58
- #
59
- def auth_owner_pass(password)
60
- if Digest::SHA256.digest(password + @O[32..39] + @U) == @O[0..31]
61
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
62
- cipher.decrypt
63
- cipher.key = Digest::SHA256.digest(password + @O[40..-1] + @U)
64
- cipher.iv = "\x00" * 16
65
- cipher.padding = 0
66
- cipher.update(@OE) + cipher.final
67
- end
68
- end
69
-
70
- def auth_user_pass(password)
71
- if Digest::SHA256.digest(password + @U[32..39]) == @U[0..31]
72
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
73
- cipher.decrypt
74
- cipher.key = Digest::SHA256.digest(password + @U[40..-1])
75
- cipher.iv = "\x00" * 16
76
- cipher.padding = 0
77
- cipher.update(@UE) + cipher.final
78
- end
79
- end
80
-
81
- def build_standard_key(pass)
82
- pass = pass.byteslice(0...127) # UTF-8 encoded password. first 127 bytes
83
-
84
- encrypt_key = auth_owner_pass(pass)
85
- encrypt_key ||= auth_user_pass(pass)
86
-
87
- raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
88
- encrypt_key
89
- end
90
- end
91
- end