pdf-reader 2.5.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +42 -0
- data/README.md +16 -1
- data/Rakefile +1 -1
- data/examples/extract_fonts.rb +12 -7
- data/examples/rspec.rb +1 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +90 -46
- data/lib/pdf/reader/cid_widths.rb +1 -0
- data/lib/pdf/reader/cmap.rb +65 -50
- data/lib/pdf/reader/encoding.rb +3 -2
- data/lib/pdf/reader/error.rb +19 -3
- data/lib/pdf/reader/filter/ascii85.rb +7 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +6 -1
- data/lib/pdf/reader/filter/depredict.rb +11 -9
- data/lib/pdf/reader/filter/flate.rb +4 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +1 -1
- data/lib/pdf/reader/filter/run_length.rb +19 -13
- data/lib/pdf/reader/filter.rb +2 -1
- data/lib/pdf/reader/font.rb +72 -16
- data/lib/pdf/reader/font_descriptor.rb +19 -17
- data/lib/pdf/reader/form_xobject.rb +15 -5
- data/lib/pdf/reader/glyph_hash.rb +16 -9
- data/lib/pdf/reader/glyphlist-zapfdingbats.txt +245 -0
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/lzw.rb +4 -2
- data/lib/pdf/reader/null_security_handler.rb +1 -4
- data/lib/pdf/reader/object_cache.rb +1 -0
- data/lib/pdf/reader/object_hash.rb +252 -44
- data/lib/pdf/reader/object_stream.rb +1 -0
- data/lib/pdf/reader/overlapping_runs_filter.rb +11 -4
- data/lib/pdf/reader/page.rb +99 -19
- data/lib/pdf/reader/page_layout.rb +36 -37
- data/lib/pdf/reader/page_state.rb +12 -11
- data/lib/pdf/reader/page_text_receiver.rb +57 -10
- data/lib/pdf/reader/pages_strategy.rb +1 -0
- data/lib/pdf/reader/parser.rb +23 -12
- data/lib/pdf/reader/point.rb +25 -0
- data/lib/pdf/reader/print_receiver.rb +1 -0
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +113 -0
- data/lib/pdf/reader/reference.rb +1 -0
- data/lib/pdf/reader/register_receiver.rb +1 -0
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +16 -9
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -94
- data/lib/pdf/reader/stream.rb +2 -1
- data/lib/pdf/reader/synchronized_cache.rb +1 -0
- data/lib/pdf/reader/text_run.rb +14 -6
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +1 -0
- data/lib/pdf/reader/type_check.rb +52 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +1 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +1 -0
- data/lib/pdf/reader/width_calculator/composite.rb +1 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +2 -1
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +1 -0
- data/lib/pdf/reader/width_calculator/type_zero.rb +1 -0
- data/lib/pdf/reader/width_calculator.rb +1 -0
- data/lib/pdf/reader/xref.rb +27 -4
- data/lib/pdf/reader/zero_width_runs_filter.rb +13 -0
- data/lib/pdf/reader.rb +46 -15
- data/lib/pdf-reader.rb +1 -0
- data/rbi/pdf-reader.rbi +1978 -0
- metadata +21 -10
- data/lib/pdf/reader/orientation_detector.rb +0 -34
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -91
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pdf-reader
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-24 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -200,6 +200,8 @@ files:
|
|
200
200
|
- examples/version.rb
|
201
201
|
- lib/pdf-reader.rb
|
202
202
|
- lib/pdf/reader.rb
|
203
|
+
- lib/pdf/reader/aes_v2_security_handler.rb
|
204
|
+
- lib/pdf/reader/aes_v3_security_handler.rb
|
203
205
|
- lib/pdf/reader/afm/Courier-Bold.afm
|
204
206
|
- lib/pdf/reader/afm/Courier-BoldOblique.afm
|
205
207
|
- lib/pdf/reader/afm/Courier-Oblique.afm
|
@@ -215,6 +217,7 @@ files:
|
|
215
217
|
- lib/pdf/reader/afm/Times-Italic.afm
|
216
218
|
- lib/pdf/reader/afm/Times-Roman.afm
|
217
219
|
- lib/pdf/reader/afm/ZapfDingbats.afm
|
220
|
+
- lib/pdf/reader/bounding_rectangle_runs_filter.rb
|
218
221
|
- lib/pdf/reader/buffer.rb
|
219
222
|
- lib/pdf/reader/cid_widths.rb
|
220
223
|
- lib/pdf/reader/cmap.rb
|
@@ -239,13 +242,14 @@ files:
|
|
239
242
|
- lib/pdf/reader/font_descriptor.rb
|
240
243
|
- lib/pdf/reader/form_xobject.rb
|
241
244
|
- lib/pdf/reader/glyph_hash.rb
|
245
|
+
- lib/pdf/reader/glyphlist-zapfdingbats.txt
|
242
246
|
- lib/pdf/reader/glyphlist.txt
|
247
|
+
- lib/pdf/reader/key_builder_v5.rb
|
243
248
|
- lib/pdf/reader/lzw.rb
|
244
249
|
- lib/pdf/reader/null_security_handler.rb
|
245
250
|
- lib/pdf/reader/object_cache.rb
|
246
251
|
- lib/pdf/reader/object_hash.rb
|
247
252
|
- lib/pdf/reader/object_stream.rb
|
248
|
-
- lib/pdf/reader/orientation_detector.rb
|
249
253
|
- lib/pdf/reader/overlapping_runs_filter.rb
|
250
254
|
- lib/pdf/reader/page.rb
|
251
255
|
- lib/pdf/reader/page_layout.rb
|
@@ -253,18 +257,23 @@ files:
|
|
253
257
|
- lib/pdf/reader/page_text_receiver.rb
|
254
258
|
- lib/pdf/reader/pages_strategy.rb
|
255
259
|
- lib/pdf/reader/parser.rb
|
260
|
+
- lib/pdf/reader/point.rb
|
256
261
|
- lib/pdf/reader/print_receiver.rb
|
262
|
+
- lib/pdf/reader/rc4_security_handler.rb
|
263
|
+
- lib/pdf/reader/rectangle.rb
|
257
264
|
- lib/pdf/reader/reference.rb
|
258
265
|
- lib/pdf/reader/register_receiver.rb
|
259
|
-
- lib/pdf/reader/
|
260
|
-
- lib/pdf/reader/
|
261
|
-
- lib/pdf/reader/
|
266
|
+
- lib/pdf/reader/resources.rb
|
267
|
+
- lib/pdf/reader/security_handler_factory.rb
|
268
|
+
- lib/pdf/reader/standard_key_builder.rb
|
262
269
|
- lib/pdf/reader/stream.rb
|
263
270
|
- lib/pdf/reader/synchronized_cache.rb
|
264
271
|
- lib/pdf/reader/text_run.rb
|
265
272
|
- lib/pdf/reader/token.rb
|
266
273
|
- lib/pdf/reader/transformation_matrix.rb
|
274
|
+
- lib/pdf/reader/type_check.rb
|
267
275
|
- lib/pdf/reader/unimplemented_security_handler.rb
|
276
|
+
- lib/pdf/reader/validating_receiver.rb
|
268
277
|
- lib/pdf/reader/width_calculator.rb
|
269
278
|
- lib/pdf/reader/width_calculator/built_in.rb
|
270
279
|
- lib/pdf/reader/width_calculator/composite.rb
|
@@ -272,14 +281,16 @@ files:
|
|
272
281
|
- lib/pdf/reader/width_calculator/type_one_or_three.rb
|
273
282
|
- lib/pdf/reader/width_calculator/type_zero.rb
|
274
283
|
- lib/pdf/reader/xref.rb
|
284
|
+
- lib/pdf/reader/zero_width_runs_filter.rb
|
285
|
+
- rbi/pdf-reader.rbi
|
275
286
|
homepage: https://github.com/yob/pdf-reader
|
276
287
|
licenses:
|
277
288
|
- MIT
|
278
289
|
metadata:
|
279
290
|
bug_tracker_uri: https://github.com/yob/pdf-reader/issues
|
280
|
-
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.
|
281
|
-
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.
|
282
|
-
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.
|
291
|
+
changelog_uri: https://github.com/yob/pdf-reader/blob/v2.9.0/CHANGELOG
|
292
|
+
documentation_uri: https://www.rubydoc.info/gems/pdf-reader/2.9.0
|
293
|
+
source_code_uri: https://github.com/yob/pdf-reader/tree/v2.9.0
|
283
294
|
post_install_message:
|
284
295
|
rdoc_options:
|
285
296
|
- "--title"
|
@@ -300,7 +311,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
300
311
|
- !ruby/object:Gem::Version
|
301
312
|
version: '0'
|
302
313
|
requirements: []
|
303
|
-
rubygems_version: 3.
|
314
|
+
rubygems_version: 3.1.6
|
304
315
|
signing_key:
|
305
316
|
specification_version: 4
|
306
317
|
summary: A library for accessing the content of PDF files
|
@@ -1,34 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
class PDF::Reader
|
5
|
-
# Small util class for detecting the orientation of a single PDF page. Accounts
|
6
|
-
# for any page rotation that is in place.
|
7
|
-
#
|
8
|
-
# OrientationDetector.new(:MediaBox => [0,0,612,792]).orientation
|
9
|
-
# => "portrait"
|
10
|
-
#
|
11
|
-
class OrientationDetector
|
12
|
-
def initialize(attributes)
|
13
|
-
@attributes = attributes
|
14
|
-
end
|
15
|
-
|
16
|
-
def orientation
|
17
|
-
@orientation ||= detect_orientation
|
18
|
-
end
|
19
|
-
|
20
|
-
private
|
21
|
-
|
22
|
-
def detect_orientation
|
23
|
-
llx,lly,urx,ury = @attributes[:MediaBox]
|
24
|
-
rotation = @attributes[:Rotate].to_i
|
25
|
-
width = (urx.to_i - llx.to_i).abs
|
26
|
-
height = (ury.to_i - lly.to_i).abs
|
27
|
-
if width > height
|
28
|
-
(rotation % 180).zero? ? 'landscape' : 'portrait'
|
29
|
-
else
|
30
|
-
(rotation % 180).zero? ? 'portrait' : 'landscape'
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
@@ -1,91 +0,0 @@
|
|
1
|
-
# coding: utf-8
|
2
|
-
# frozen_string_literal: true
|
3
|
-
|
4
|
-
require 'digest'
|
5
|
-
require 'openssl'
|
6
|
-
|
7
|
-
class PDF::Reader
|
8
|
-
|
9
|
-
# class creates interface to encrypt dictionary for use in Decrypt
|
10
|
-
class StandardSecurityHandlerV5
|
11
|
-
|
12
|
-
attr_reader :key_length, :encrypt_key
|
13
|
-
|
14
|
-
def initialize(opts = {})
|
15
|
-
@key_length = 256
|
16
|
-
@O = opts[:O] # hash(32B) + validation salt(8B) + key salt(8B)
|
17
|
-
@U = opts[:U] # hash(32B) + validation salt(8B) + key salt(8B)
|
18
|
-
@OE = opts[:OE] # decryption key, encrypted w/ owner password
|
19
|
-
@UE = opts[:UE] # decryption key, encrypted w/ user password
|
20
|
-
@encrypt_key = build_standard_key(opts[:password] || '')
|
21
|
-
end
|
22
|
-
|
23
|
-
# This handler supports AES-256 encryption defined in PDF 1.7 Extension Level 3
|
24
|
-
def self.supports?(encrypt)
|
25
|
-
return false if encrypt.nil?
|
26
|
-
|
27
|
-
filter = encrypt.fetch(:Filter, :Standard)
|
28
|
-
version = encrypt.fetch(:V, 0)
|
29
|
-
revision = encrypt.fetch(:R, 0)
|
30
|
-
algorithm = encrypt.fetch(:CF, {}).fetch(encrypt[:StmF], {}).fetch(:CFM, nil)
|
31
|
-
(filter == :Standard) && (encrypt[:StmF] == encrypt[:StrF]) &&
|
32
|
-
((version == 5) && (revision == 5) && (algorithm == :AESV3))
|
33
|
-
end
|
34
|
-
|
35
|
-
##7.6.2 General Encryption Algorithm
|
36
|
-
#
|
37
|
-
# Algorithm 1: Encryption of data using the RC4 or AES algorithms
|
38
|
-
#
|
39
|
-
# used to decrypt RC4/AES encrypted PDF streams (buf)
|
40
|
-
#
|
41
|
-
# buf - a string to decrypt
|
42
|
-
# ref - a PDF::Reader::Reference for the object to decrypt
|
43
|
-
#
|
44
|
-
def decrypt( buf, ref )
|
45
|
-
cipher = OpenSSL::Cipher.new("AES-#{@key_length}-CBC")
|
46
|
-
cipher.decrypt
|
47
|
-
cipher.key = @encrypt_key.dup
|
48
|
-
cipher.iv = buf[0..15]
|
49
|
-
cipher.update(buf[16..-1]) + cipher.final
|
50
|
-
end
|
51
|
-
|
52
|
-
private
|
53
|
-
# Algorithm 3.2a - Computing an encryption key
|
54
|
-
#
|
55
|
-
# Defined in PDF 1.7 Extension Level 3
|
56
|
-
#
|
57
|
-
# if the string is a valid user/owner password, this will return the decryption key
|
58
|
-
#
|
59
|
-
def auth_owner_pass(password)
|
60
|
-
if Digest::SHA256.digest(password + @O[32..39] + @U) == @O[0..31]
|
61
|
-
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
62
|
-
cipher.decrypt
|
63
|
-
cipher.key = Digest::SHA256.digest(password + @O[40..-1] + @U)
|
64
|
-
cipher.iv = "\x00" * 16
|
65
|
-
cipher.padding = 0
|
66
|
-
cipher.update(@OE) + cipher.final
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def auth_user_pass(password)
|
71
|
-
if Digest::SHA256.digest(password + @U[32..39]) == @U[0..31]
|
72
|
-
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
73
|
-
cipher.decrypt
|
74
|
-
cipher.key = Digest::SHA256.digest(password + @U[40..-1])
|
75
|
-
cipher.iv = "\x00" * 16
|
76
|
-
cipher.padding = 0
|
77
|
-
cipher.update(@UE) + cipher.final
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def build_standard_key(pass)
|
82
|
-
pass = pass.byteslice(0...127) # UTF-8 encoded password. first 127 bytes
|
83
|
-
|
84
|
-
encrypt_key = auth_owner_pass(pass)
|
85
|
-
encrypt_key ||= auth_user_pass(pass)
|
86
|
-
|
87
|
-
raise PDF::Reader::EncryptedPDFError, "Invalid password (#{pass})" if encrypt_key.nil?
|
88
|
-
encrypt_key
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|