pdf-reader 2.2.0 → 2.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (90) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +90 -0
  3. data/README.md +18 -3
  4. data/Rakefile +1 -1
  5. data/bin/pdf_callbacks +1 -1
  6. data/bin/pdf_text +1 -1
  7. data/examples/extract_fonts.rb +12 -7
  8. data/examples/rspec.rb +1 -0
  9. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  10. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  11. data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
  12. data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
  13. data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
  14. data/lib/pdf/reader/afm/Courier.afm +342 -342
  15. data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
  16. data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
  17. data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
  18. data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
  19. data/lib/pdf/reader/afm/MustRead.html +19 -0
  20. data/lib/pdf/reader/afm/Symbol.afm +213 -213
  21. data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
  22. data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
  23. data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
  24. data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
  25. data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
  26. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  27. data/lib/pdf/reader/buffer.rb +91 -47
  28. data/lib/pdf/reader/cid_widths.rb +7 -4
  29. data/lib/pdf/reader/cmap.rb +83 -59
  30. data/lib/pdf/reader/encoding.rb +17 -14
  31. data/lib/pdf/reader/error.rb +15 -3
  32. data/lib/pdf/reader/filter/ascii85.rb +7 -1
  33. data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
  34. data/lib/pdf/reader/filter/depredict.rb +12 -10
  35. data/lib/pdf/reader/filter/flate.rb +30 -16
  36. data/lib/pdf/reader/filter/lzw.rb +2 -0
  37. data/lib/pdf/reader/filter/null.rb +1 -1
  38. data/lib/pdf/reader/filter/run_length.rb +19 -13
  39. data/lib/pdf/reader/filter.rb +11 -11
  40. data/lib/pdf/reader/font.rb +89 -26
  41. data/lib/pdf/reader/font_descriptor.rb +22 -18
  42. data/lib/pdf/reader/form_xobject.rb +18 -5
  43. data/lib/pdf/reader/glyph_hash.rb +28 -13
  44. data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
  45. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  46. data/lib/pdf/reader/lzw.rb +28 -11
  47. data/lib/pdf/reader/no_text_filter.rb +14 -0
  48. data/lib/pdf/reader/null_security_handler.rb +1 -4
  49. data/lib/pdf/reader/object_cache.rb +1 -0
  50. data/lib/pdf/reader/object_hash.rb +292 -63
  51. data/lib/pdf/reader/object_stream.rb +3 -2
  52. data/lib/pdf/reader/overlapping_runs_filter.rb +72 -0
  53. data/lib/pdf/reader/page.rb +143 -16
  54. data/lib/pdf/reader/page_layout.rb +43 -39
  55. data/lib/pdf/reader/page_state.rb +26 -17
  56. data/lib/pdf/reader/page_text_receiver.rb +74 -4
  57. data/lib/pdf/reader/pages_strategy.rb +1 -0
  58. data/lib/pdf/reader/parser.rb +34 -14
  59. data/lib/pdf/reader/point.rb +25 -0
  60. data/lib/pdf/reader/print_receiver.rb +1 -0
  61. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  62. data/lib/pdf/reader/rectangle.rb +113 -0
  63. data/lib/pdf/reader/reference.rb +3 -1
  64. data/lib/pdf/reader/register_receiver.rb +1 -0
  65. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +17 -9
  66. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  67. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
  68. data/lib/pdf/reader/stream.rb +3 -2
  69. data/lib/pdf/reader/synchronized_cache.rb +1 -0
  70. data/lib/pdf/reader/text_run.rb +40 -5
  71. data/lib/pdf/reader/token.rb +1 -0
  72. data/lib/pdf/reader/transformation_matrix.rb +8 -7
  73. data/lib/pdf/reader/type_check.rb +98 -0
  74. data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
  75. data/lib/pdf/reader/validating_receiver.rb +262 -0
  76. data/lib/pdf/reader/width_calculator/built_in.rb +27 -17
  77. data/lib/pdf/reader/width_calculator/composite.rb +6 -1
  78. data/lib/pdf/reader/width_calculator/true_type.rb +10 -11
  79. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -4
  80. data/lib/pdf/reader/width_calculator/type_zero.rb +6 -2
  81. data/lib/pdf/reader/width_calculator.rb +1 -0
  82. data/lib/pdf/reader/xref.rb +37 -11
  83. data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
  84. data/lib/pdf/reader.rb +49 -24
  85. data/lib/pdf-reader.rb +1 -0
  86. data/rbi/pdf-reader.rbi +2048 -0
  87. metadata +39 -23
  88. data/lib/pdf/hash.rb +0 -20
  89. data/lib/pdf/reader/orientation_detector.rb +0 -34
  90. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
metadata CHANGED
@@ -1,29 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pdf-reader
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-12-18 00:00:00.000000000 Z
11
+ date: 2022-10-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ">="
17
+ - - "<"
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '13.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ">="
24
+ - - "<"
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '13.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +67,7 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0.2'
69
69
  - !ruby/object:Gem::Dependency
70
- name: ir_b
70
+ name: pry
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
@@ -100,14 +100,14 @@ dependencies:
100
100
  requirements:
101
101
  - - "~>"
102
102
  - !ruby/object:Gem::Version
103
- version: 1.0.0
103
+ version: '1.0'
104
104
  type: :runtime
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
- version: 1.0.0
110
+ version: '1.0'
111
111
  - !ruby/object:Gem::Dependency
112
112
  name: ruby-rc4
113
113
  requirement: !ruby/object:Gem::Requirement
@@ -167,7 +167,7 @@ dependencies:
167
167
  description: The PDF::Reader library implements a PDF parser conforming as much as
168
168
  possible to the PDF specification from Adobe
169
169
  email:
170
- - jimmy@deefa.com
170
+ - james@yob.id.au
171
171
  executables:
172
172
  - pdf_object
173
173
  - pdf_text
@@ -199,8 +199,9 @@ files:
199
199
  - examples/text.rb
200
200
  - examples/version.rb
201
201
  - lib/pdf-reader.rb
202
- - lib/pdf/hash.rb
203
202
  - lib/pdf/reader.rb
203
+ - lib/pdf/reader/aes_v2_security_handler.rb
204
+ - lib/pdf/reader/aes_v3_security_handler.rb
204
205
  - lib/pdf/reader/afm/Courier-Bold.afm
205
206
  - lib/pdf/reader/afm/Courier-BoldOblique.afm
206
207
  - lib/pdf/reader/afm/Courier-Oblique.afm
@@ -209,12 +210,14 @@ files:
209
210
  - lib/pdf/reader/afm/Helvetica-BoldOblique.afm
210
211
  - lib/pdf/reader/afm/Helvetica-Oblique.afm
211
212
  - lib/pdf/reader/afm/Helvetica.afm
213
+ - lib/pdf/reader/afm/MustRead.html
212
214
  - lib/pdf/reader/afm/Symbol.afm
213
215
  - lib/pdf/reader/afm/Times-Bold.afm
214
216
  - lib/pdf/reader/afm/Times-BoldItalic.afm
215
217
  - lib/pdf/reader/afm/Times-Italic.afm
216
218
  - lib/pdf/reader/afm/Times-Roman.afm
217
219
  - lib/pdf/reader/afm/ZapfDingbats.afm
220
+ - lib/pdf/reader/bounding_rectangle_runs_filter.rb
218
221
  - lib/pdf/reader/buffer.rb
219
222
  - lib/pdf/reader/cid_widths.rb
220
223
  - lib/pdf/reader/cmap.rb
@@ -239,31 +242,39 @@ files:
239
242
  - lib/pdf/reader/font_descriptor.rb
240
243
  - lib/pdf/reader/form_xobject.rb
241
244
  - lib/pdf/reader/glyph_hash.rb
245
+ - lib/pdf/reader/glyphlist-zapfdingbats.txt
242
246
  - lib/pdf/reader/glyphlist.txt
247
+ - lib/pdf/reader/key_builder_v5.rb
243
248
  - lib/pdf/reader/lzw.rb
249
+ - lib/pdf/reader/no_text_filter.rb
244
250
  - lib/pdf/reader/null_security_handler.rb
245
251
  - lib/pdf/reader/object_cache.rb
246
252
  - lib/pdf/reader/object_hash.rb
247
253
  - lib/pdf/reader/object_stream.rb
248
- - lib/pdf/reader/orientation_detector.rb
254
+ - lib/pdf/reader/overlapping_runs_filter.rb
249
255
  - lib/pdf/reader/page.rb
250
256
  - lib/pdf/reader/page_layout.rb
251
257
  - lib/pdf/reader/page_state.rb
252
258
  - lib/pdf/reader/page_text_receiver.rb
253
259
  - lib/pdf/reader/pages_strategy.rb
254
260
  - lib/pdf/reader/parser.rb
261
+ - lib/pdf/reader/point.rb
255
262
  - lib/pdf/reader/print_receiver.rb
263
+ - lib/pdf/reader/rc4_security_handler.rb
264
+ - lib/pdf/reader/rectangle.rb
256
265
  - lib/pdf/reader/reference.rb
257
266
  - lib/pdf/reader/register_receiver.rb
258
- - lib/pdf/reader/resource_methods.rb
259
- - lib/pdf/reader/standard_security_handler.rb
260
- - lib/pdf/reader/standard_security_handler_v5.rb
267
+ - lib/pdf/reader/resources.rb
268
+ - lib/pdf/reader/security_handler_factory.rb
269
+ - lib/pdf/reader/standard_key_builder.rb
261
270
  - lib/pdf/reader/stream.rb
262
271
  - lib/pdf/reader/synchronized_cache.rb
263
272
  - lib/pdf/reader/text_run.rb
264
273
  - lib/pdf/reader/token.rb
265
274
  - lib/pdf/reader/transformation_matrix.rb
275
+ - lib/pdf/reader/type_check.rb
266
276
  - lib/pdf/reader/unimplemented_security_handler.rb
277
+ - lib/pdf/reader/validating_receiver.rb
267
278
  - lib/pdf/reader/width_calculator.rb
268
279
  - lib/pdf/reader/width_calculator/built_in.rb
269
280
  - lib/pdf/reader/width_calculator/composite.rb
@@ -271,11 +282,17 @@ files:
271
282
  - lib/pdf/reader/width_calculator/type_one_or_three.rb
272
283
  - lib/pdf/reader/width_calculator/type_zero.rb
273
284
  - lib/pdf/reader/xref.rb
274
- homepage: http://github.com/yob/pdf-reader
285
+ - lib/pdf/reader/zero_width_runs_filter.rb
286
+ - rbi/pdf-reader.rbi
287
+ homepage: https://github.com/yob/pdf-reader
275
288
  licenses:
276
289
  - MIT
277
- metadata: {}
278
- post_install_message:
290
+ metadata:
291
+ bug_tracker_uri: https://github.com/yob/pdf-reader/issues
292
+ changelog_uri: https://github.com/yob/pdf-reader/blob/v2.11.0/CHANGELOG
293
+ documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.11.0
294
+ source_code_uri: https://github.com/yob/pdf-reader/tree/v2.11.0
295
+ post_install_message:
279
296
  rdoc_options:
280
297
  - "--title"
281
298
  - PDF::Reader Documentation
@@ -288,16 +305,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
288
305
  requirements:
289
306
  - - ">="
290
307
  - !ruby/object:Gem::Version
291
- version: 1.9.3
308
+ version: '2.0'
292
309
  required_rubygems_version: !ruby/object:Gem::Requirement
293
310
  requirements:
294
311
  - - ">="
295
312
  - !ruby/object:Gem::Version
296
313
  version: '0'
297
314
  requirements: []
298
- rubyforge_project:
299
- rubygems_version: 2.7.6
300
- signing_key:
315
+ rubygems_version: 3.2.32
316
+ signing_key:
301
317
  specification_version: 4
302
318
  summary: A library for accessing the content of PDF files
303
319
  test_files: []
data/lib/pdf/hash.rb DELETED
@@ -1,20 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- module PDF
5
- # This class is deprecated, please stop using it.
6
- class Hash < ::PDF::Reader::ObjectHash # :nodoc:
7
- def initialize(input)
8
- warn "DEPRECATION NOTICE: PDF::Hash has been deprecated, use PDF::Reader::ObjectHash instead"
9
- super
10
- end
11
-
12
- def version
13
- warn <<-EOS
14
- DEPRECATION NOTICE: PDF::Hash#version has been deprecated,
15
- use PDF::Reader::ObjectHash#pdf_version instead
16
- EOS
17
- pdf_version
18
- end
19
- end
20
- end
@@ -1,34 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- class PDF::Reader
5
- # Small util class for detecting the orientation of a single PDF page. Accounts
6
- # for any page rotation that is in place.
7
- #
8
- # OrientationDetector.new(:MediaBox => [0,0,612,792]).orientation
9
- # => "portrait"
10
- #
11
- class OrientationDetector
12
- def initialize(attributes)
13
- @attributes = attributes
14
- end
15
-
16
- def orientation
17
- @orientation ||= detect_orientation
18
- end
19
-
20
- private
21
-
22
- def detect_orientation
23
- llx,lly,urx,ury = @attributes[:MediaBox]
24
- rotation = @attributes[:Rotate].to_i
25
- width = urx.to_i - llx.to_i
26
- height = ury.to_i - lly.to_i
27
- if width > height
28
- [0,180].include?(rotation) ? 'landscape' : 'portrait'
29
- else
30
- [0,180].include?(rotation) ? 'portrait' : 'landscape'
31
- end
32
- end
33
- end
34
- end
@@ -1,91 +0,0 @@
1
- # coding: utf-8
2
- # frozen_string_literal: true
3
-
4
- require 'digest'
5
- require 'openssl'
6
-
7
- class PDF::Reader
8
-
9
- # class creates interface to encrypt dictionary for use in Decrypt
10
- class StandardSecurityHandlerV5
11
-
12
- attr_reader :key_length, :encrypt_key
13
-
14
- def initialize(opts = {})
15
- @key_length = 256
16
- @O = opts[:O] # hash(32B) + validation salt(8B) + key salt(8B)
17
- @U = opts[:U] # hash(32B) + validation salt(8B) + key salt(8B)
18
- @OE = opts[:OE] # decryption key, encrypted w/ owner password
19
- @UE = opts[:UE] # decryption key, encrypted w/ user password
20
- @encrypt_key = build_standard_key(opts[:password] || '')
21
- end
22
-
23
- # This handler supports AES-256 encryption defined in PDF 1.7 Extension Level 3
24
- def self.supports?(encrypt)
25
- return false if encrypt.nil?
26
-
27
- filter = encrypt.fetch(:Filter, :Standard)
28
- version = encrypt.fetch(:V, 0)
29
- revision = encrypt.fetch(:R, 0)
30
- algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
31
- (filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
32
- ((version == 5) && (revision == 5) && (algorithm == :AESV3))
33
- end
34
-
35
- ##7.6.2 General Encryption Algorithm
36
- #
37
- # Algorithm 1: Encryption of data using the RC4 or AES algorithms
38
- #
39
- # used to decrypt RC4/AES encrypted PDF streams (buf)
40
- #
41
- # buf - a string to decrypt
42
- # ref - a PDF::Reader::Reference for the object to decrypt
43
- #
44
- def decrypt( buf, ref )
45
- cipher = OpenSSL::Cipher.new("AES-#{@key_length}-CBC")
46
- cipher.decrypt
47
- cipher.key = @encrypt_key.dup
48
- cipher.iv = buf[0..15]
49
- cipher.update(buf[16..-1]) + cipher.final
50
- end
51
-
52
- private
53
- # Algorithm 3.2a - Computing an encryption key
54
- #
55
- # Defined in PDF 1.7 Extension Level 3
56
- #
57
- # if the string is a valid user/owner password, this will return the decryption key
58
- #
59
- def auth_owner_pass(password)
60
- if Digest::SHA256.digest(password + @O[32..39] + @U) == @O[0..31]
61
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
62
- cipher.decrypt
63
- cipher.key = Digest::SHA256.digest(password + @O[40..-1] + @U)
64
- cipher.iv = "\x00" * 16
65
- cipher.padding = 0
66
- cipher.update(@OE) + cipher.final
67
- end
68
- end
69
-
70
- def auth_user_pass(password)
71
- if Digest::SHA256.digest(password + @U[32..39]) == @U[0..31]
72
- cipher = OpenSSL::Cipher.new('AES-256-CBC')
73
- cipher.decrypt
74
- cipher.key = Digest::SHA256.digest(password + @U[40..-1])
75
- cipher.iv = "\x00" * 16
76
- cipher.padding = 0
77
- cipher.update(@UE) + cipher.final
78
- end
79
- end
80
-
81
- def build_standard_key(pass)
82
- pass = pass.byteslice(0...127) # UTF-8 encoded password. first 127 bytes
83
-
84
- encrypt_key = auth_owner_pass(pass)
85
- encrypt_key ||= auth_user_pass(pass)
86
-
87
- raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
88
- encrypt_key
89
- end
90
- end
91
- end