pdf-reader 2.2.0 → 2.11.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +90 -0
- data/README.md +18 -3
- data/Rakefile +1 -1
- data/bin/pdf_callbacks +1 -1
- data/bin/pdf_text +1 -1
- data/examples/extract_fonts.rb +12 -7
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/afm/Courier-Bold.afm +342 -342
- data/lib/pdf/reader/afm/Courier-BoldOblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier-Oblique.afm +342 -342
- data/lib/pdf/reader/afm/Courier.afm +342 -342
- data/lib/pdf/reader/afm/Helvetica-Bold.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-BoldOblique.afm +2827 -2827
- data/lib/pdf/reader/afm/Helvetica-Oblique.afm +3051 -3051
- data/lib/pdf/reader/afm/Helvetica.afm +3051 -3051
- data/lib/pdf/reader/afm/MustRead.html +19 -0
- data/lib/pdf/reader/afm/Symbol.afm +213 -213
- data/lib/pdf/reader/afm/Times-Bold.afm +2588 -2588
- data/lib/pdf/reader/afm/Times-BoldItalic.afm +2384 -2384
- data/lib/pdf/reader/afm/Times-Italic.afm +2667 -2667
- data/lib/pdf/reader/afm/Times-Roman.afm +2419 -2419
- data/lib/pdf/reader/afm/ZapfDingbats.afm +225 -225
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +91 -47
- data/lib/pdf/reader/cid_widths.rb +7 -4
- data/lib/pdf/reader/cmap.rb +83 -59
- data/lib/pdf/reader/encoding.rb +17 -14
- data/lib/pdf/reader/error.rb +15 -3
- data/lib/pdf/reader/filter/ascii85.rb +7 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +12 -10
- data/lib/pdf/reader/filter/flate.rb +30 -16
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -1
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +11 -11
- data/lib/pdf/reader/font.rb +89 -26
- data/lib/pdf/reader/font_descriptor.rb +22 -18
- data/lib/pdf/reader/form_xobject.rb +18 -5
- data/lib/pdf/reader/glyph_hash.rb +28 -13
- data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/lzw.rb +28 -11
- data/lib/pdf/reader/no_text_filter.rb +14 -0
- data/lib/pdf/reader/null_security_handler.rb +1 -4
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +292 -63
- data/lib/pdf/reader/object_stream.rb +3 -2
- data/lib/pdf/reader/overlapping_runs_filter.rb +72 -0
- data/lib/pdf/reader/page.rb +143 -16
- data/lib/pdf/reader/page_layout.rb +43 -39
- data/lib/pdf/reader/page_state.rb +26 -17
- data/lib/pdf/reader/page_text_receiver.rb +74 -4
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +34 -14
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +3 -1
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +17 -9
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
- data/lib/pdf/reader/stream.rb +3 -2
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +40 -5
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +8 -7
- data/lib/pdf/reader/type_check.rb +98 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +27 -17
- data/lib/pdf/reader/width_calculator/composite.rb +6 -1
- data/lib/pdf/reader/width_calculator/true_type.rb +10 -11
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -4
- data/lib/pdf/reader/width_calculator/type_zero.rb +6 -2
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +37 -11
- data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
- data/lib/pdf/reader.rb +49 -24
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +2048 -0
- metadata +39 -23
- data/lib/pdf/hash.rb +0 -20
- data/lib/pdf/reader/orientation_detector.rb +0 -34
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
metadata
CHANGED
@@ -1,29 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-10-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- - "
|
17
|
+
- - "<"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
19
|
+
version: '13.0'
|
20
20
|
type: :development
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
|
-
- - "
|
24
|
+
- - "<"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
26
|
+
version: '13.0'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: rspec
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0.2'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: pry
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: 1.0
|
103
|
+
version: '1.0'
|
104
104
|
type: :runtime
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: 1.0
|
110
|
+
version: '1.0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: ruby-rc4
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -167,7 +167,7 @@ dependencies:
|
|
167
167
|
description: The PDF::Reader library implements a PDF parser conforming as much as
|
168
168
|
possible to the PDF specification from Adobe
|
169
169
|
email:
|
170
|
-
-
|
170
|
+
- james@yob.id.au
|
171
171
|
executables:
|
172
172
|
- pdf_object
|
173
173
|
- pdf_text
|
@@ -199,8 +199,9 @@ files:
|
|
199
199
|
- examples/text.rb
|
200
200
|
- examples/version.rb
|
201
201
|
- lib/pdf-reader.rb
|
202
|
-
- lib/pdf/hash.rb
|
203
202
|
- lib/pdf/reader.rb
|
203
|
+
- lib/pdf/reader/aes_v2_security_handler.rb
|
204
|
+
- lib/pdf/reader/aes_v3_security_handler.rb
|
204
205
|
- lib/pdf/reader/afm/Courier-Bold.afm
|
205
206
|
- lib/pdf/reader/afm/Courier-BoldOblique.afm
|
206
207
|
- lib/pdf/reader/afm/Courier-Oblique.afm
|
@@ -209,12 +210,14 @@ files:
|
|
209
210
|
- lib/pdf/reader/afm/Helvetica-BoldOblique.afm
|
210
211
|
- lib/pdf/reader/afm/Helvetica-Oblique.afm
|
211
212
|
- lib/pdf/reader/afm/Helvetica.afm
|
213
|
+
- lib/pdf/reader/afm/MustRead.html
|
212
214
|
- lib/pdf/reader/afm/Symbol.afm
|
213
215
|
- lib/pdf/reader/afm/Times-Bold.afm
|
214
216
|
- lib/pdf/reader/afm/Times-BoldItalic.afm
|
215
217
|
- lib/pdf/reader/afm/Times-Italic.afm
|
216
218
|
- lib/pdf/reader/afm/Times-Roman.afm
|
217
219
|
- lib/pdf/reader/afm/ZapfDingbats.afm
|
220
|
+
- lib/pdf/reader/bounding_rectangle_runs_filter.rb
|
218
221
|
- lib/pdf/reader/buffer.rb
|
219
222
|
- lib/pdf/reader/cid_widths.rb
|
220
223
|
- lib/pdf/reader/cmap.rb
|
@@ -239,31 +242,39 @@ files:
|
|
239
242
|
- lib/pdf/reader/font_descriptor.rb
|
240
243
|
- lib/pdf/reader/form_xobject.rb
|
241
244
|
- lib/pdf/reader/glyph_hash.rb
|
245
|
+
- lib/pdf/reader/glyphlist-zapfdingbats.txt
|
242
246
|
- lib/pdf/reader/glyphlist.txt
|
247
|
+
- lib/pdf/reader/key_builder_v5.rb
|
243
248
|
- lib/pdf/reader/lzw.rb
|
249
|
+
- lib/pdf/reader/no_text_filter.rb
|
244
250
|
- lib/pdf/reader/null_security_handler.rb
|
245
251
|
- lib/pdf/reader/object_cache.rb
|
246
252
|
- lib/pdf/reader/object_hash.rb
|
247
253
|
- lib/pdf/reader/object_stream.rb
|
248
|
-
- lib/pdf/reader/
|
254
|
+
- lib/pdf/reader/overlapping_runs_filter.rb
|
249
255
|
- lib/pdf/reader/page.rb
|
250
256
|
- lib/pdf/reader/page_layout.rb
|
251
257
|
- lib/pdf/reader/page_state.rb
|
252
258
|
- lib/pdf/reader/page_text_receiver.rb
|
253
259
|
- lib/pdf/reader/pages_strategy.rb
|
254
260
|
- lib/pdf/reader/parser.rb
|
261
|
+
- lib/pdf/reader/point.rb
|
255
262
|
- lib/pdf/reader/print_receiver.rb
|
263
|
+
- lib/pdf/reader/rc4_security_handler.rb
|
264
|
+
- lib/pdf/reader/rectangle.rb
|
256
265
|
- lib/pdf/reader/reference.rb
|
257
266
|
- lib/pdf/reader/register_receiver.rb
|
258
|
-
- lib/pdf/reader/
|
259
|
-
- lib/pdf/reader/
|
260
|
-
- lib/pdf/reader/
|
267
|
+
- lib/pdf/reader/resources.rb
|
268
|
+
- lib/pdf/reader/security_handler_factory.rb
|
269
|
+
- lib/pdf/reader/standard_key_builder.rb
|
261
270
|
- lib/pdf/reader/stream.rb
|
262
271
|
- lib/pdf/reader/synchronized_cache.rb
|
263
272
|
- lib/pdf/reader/text_run.rb
|
264
273
|
- lib/pdf/reader/token.rb
|
265
274
|
- lib/pdf/reader/transformation_matrix.rb
|
275
|
+
- lib/pdf/reader/type_check.rb
|
266
276
|
- lib/pdf/reader/unimplemented_security_handler.rb
|
277
|
+
- lib/pdf/reader/validating_receiver.rb
|
267
278
|
- lib/pdf/reader/width_calculator.rb
|
268
279
|
- lib/pdf/reader/width_calculator/built_in.rb
|
269
280
|
- lib/pdf/reader/width_calculator/composite.rb
|
@@ -271,11 +282,17 @@ files:
|
|
271
282
|
- lib/pdf/reader/width_calculator/type_one_or_three.rb
|
272
283
|
- lib/pdf/reader/width_calculator/type_zero.rb
|
273
284
|
- lib/pdf/reader/xref.rb
|
274
|
-
|
285
|
+
- lib/pdf/reader/zero_width_runs_filter.rb
|
286
|
+
- rbi/pdf-reader.rbi
|
287
|
+
homepage: https://github.com/yob/pdf-reader
|
275
288
|
licenses:
|
276
289
|
- MIT
|
277
|
-
metadata:
|
278
|
-
|
290
|
+
metadata:
|
291
|
+
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
292
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.11.0/CHANGELOG
|
293
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.11.0
|
294
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.11.0
|
295
|
+
post_install_message:
|
279
296
|
rdoc_options:
|
280
297
|
- "--title"
|
281
298
|
- PDF::Reader Documentation
|
@@ -288,16 +305,15 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
288
305
|
requirements:
|
289
306
|
- - ">="
|
290
307
|
- !ruby/object:Gem::Version
|
291
|
-
version:
|
308
|
+
version: '2.0'
|
292
309
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
293
310
|
requirements:
|
294
311
|
- - ">="
|
295
312
|
- !ruby/object:Gem::Version
|
296
313
|
version: '0'
|
297
314
|
requirements: []
|
298
|
-
|
299
|
-
|
300
|
-
signing_key:
|
315
|
+
rubygems_version: 3.2.32
|
316
|
+
signing_key:
|
301
317
|
specification_version: 4
|
302
318
|
summary: A library for accessing the content of PDF files
|
303
319
|
test_files: []
|
data/lib/pdf/hash.rb
DELETED
@@ -1,20 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
module PDF
|
5
|
-
# This class is deprecated, please stop using it.
|
6
|
-
class Hash < ::PDF::Reader::ObjectHash # :nodoc:
|
7
|
-
def initialize(input)
|
8
|
-
warn "DEPRECATION NOTICE: PDF::Hash has been deprecated, use PDF::Reader::ObjectHash instead"
|
9
|
-
super
|
10
|
-
end
|
11
|
-
|
12
|
-
def version
|
13
|
-
warn <<-EOS
|
14
|
-
DEPRECATION NOTICE: PDF::Hash#version has been deprecated,
|
15
|
-
use PDF::Reader::ObjectHash#pdf_version instead
|
16
|
-
EOS
|
17
|
-
pdf_version
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
@@ -1,34 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
class PDF::Reader
|
5
|
-
# Small util class for detecting the orientation of a single PDF page. Accounts
|
6
|
-
# for any page rotation that is in place.
|
7
|
-
#
|
8
|
-
# OrientationDetector.new(:MediaBox => [0,0,612,792]).orientation
|
9
|
-
# => "portrait"
|
10
|
-
#
|
11
|
-
class OrientationDetector
|
12
|
-
def initialize(attributes)
|
13
|
-
@attributes = attributes
|
14
|
-
end
|
15
|
-
|
16
|
-
def orientation
|
17
|
-
@orientation ||= detect_orientation
|
18
|
-
end
|
19
|
-
|
20
|
-
private
|
21
|
-
|
22
|
-
def detect_orientation
|
23
|
-
llx,lly,urx,ury = @attributes[:MediaBox]
|
24
|
-
rotation = @attributes[:Rotate].to_i
|
25
|
-
width = urx.to_i - llx.to_i
|
26
|
-
height = ury.to_i - lly.to_i
|
27
|
-
if width > height
|
28
|
-
[0,180].include?(rotation) ? 'landscape' : 'portrait'
|
29
|
-
else
|
30
|
-
[0,180].include?(rotation) ? 'portrait' : 'landscape'
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
@@ -1,91 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
require 'digest'
|
5
|
-
require 'openssl'
|
6
|
-
|
7
|
-
class PDF::Reader
|
8
|
-
|
9
|
-
# class creates interface to encrypt dictionary for use in Decrypt
|
10
|
-
class StandardSecurityHandlerV5
|
11
|
-
|
12
|
-
attr_reader :key_length, :encrypt_key
|
13
|
-
|
14
|
-
def initialize(opts = {})
|
15
|
-
@key_length = 256
|
16
|
-
@O = opts[:O] # hash(32B) + validation salt(8B) + key salt(8B)
|
17
|
-
@U = opts[:U] # hash(32B) + validation salt(8B) + key salt(8B)
|
18
|
-
@OE = opts[:OE] # decryption key, encrypted w/ owner password
|
19
|
-
@UE = opts[:UE] # decryption key, encrypted w/ user password
|
20
|
-
@encrypt_key = build_standard_key(opts[:password] || '')
|
21
|
-
end
|
22
|
-
|
23
|
-
# This handler supports AES-256 encryption defined in PDF 1.7 Extension Level 3
|
24
|
-
def self.supports?(encrypt)
|
25
|
-
return false if encrypt.nil?
|
26
|
-
|
27
|
-
filter = encrypt.fetch(:Filter, :Standard)
|
28
|
-
version = encrypt.fetch(:V, 0)
|
29
|
-
revision = encrypt.fetch(:R, 0)
|
30
|
-
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
31
|
-
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
32
|
-
((version == 5) && (revision == 5) && (algorithm == :AESV3))
|
33
|
-
end
|
34
|
-
|
35
|
-
##7.6.2 General Encryption Algorithm
|
36
|
-
#
|
37
|
-
# Algorithm 1: Encryption of data using the RC4 or AES algorithms
|
38
|
-
#
|
39
|
-
# used to decrypt RC4/AES encrypted PDF streams (buf)
|
40
|
-
#
|
41
|
-
# buf - a string to decrypt
|
42
|
-
# ref - a PDF::Reader::Reference for the object to decrypt
|
43
|
-
#
|
44
|
-
def decrypt( buf, ref )
|
45
|
-
cipher = OpenSSL::Cipher.new("AES-#{@key_length}-CBC")
|
46
|
-
cipher.decrypt
|
47
|
-
cipher.key = @encrypt_key.dup
|
48
|
-
cipher.iv = buf[0..15]
|
49
|
-
cipher.update(buf[16..-1]) + cipher.final
|
50
|
-
end
|
51
|
-
|
52
|
-
private
|
53
|
-
# Algorithm 3.2a - Computing an encryption key
|
54
|
-
#
|
55
|
-
# Defined in PDF 1.7 Extension Level 3
|
56
|
-
#
|
57
|
-
# if the string is a valid user/owner password, this will return the decryption key
|
58
|
-
#
|
59
|
-
def auth_owner_pass(password)
|
60
|
-
if Digest::SHA256.digest(password + @O[32..39] + @U) == @O[0..31]
|
61
|
-
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
62
|
-
cipher.decrypt
|
63
|
-
cipher.key = Digest::SHA256.digest(password + @O[40..-1] + @U)
|
64
|
-
cipher.iv = "\x00" * 16
|
65
|
-
cipher.padding = 0
|
66
|
-
cipher.update(@OE) + cipher.final
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def auth_user_pass(password)
|
71
|
-
if Digest::SHA256.digest(password + @U[32..39]) == @U[0..31]
|
72
|
-
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
73
|
-
cipher.decrypt
|
74
|
-
cipher.key = Digest::SHA256.digest(password + @U[40..-1])
|
75
|
-
cipher.iv = "\x00" * 16
|
76
|
-
cipher.padding = 0
|
77
|
-
cipher.update(@UE) + cipher.final
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def build_standard_key(pass)
|
82
|
-
pass = pass.byteslice(0...127) # UTF-8 encoded password. first 127 bytes
|
83
|
-
|
84
|
-
encrypt_key = auth_owner_pass(pass)
|
85
|
-
encrypt_key ||= auth_user_pass(pass)
|
86
|
-
|
87
|
-
raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
|
88
|
-
encrypt_key
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|