pdf-reader 2.9.2 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +39 -0
- data/README.md +33 -33
- data/Rakefile +2 -2
- data/lib/pdf/reader/advanced_text_run_filter.rb +152 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
- data/lib/pdf/reader/buffer.rb +39 -22
- data/lib/pdf/reader/cid_widths.rb +14 -6
- data/lib/pdf/reader/cmap.rb +16 -5
- data/lib/pdf/reader/encoding.rb +42 -18
- data/lib/pdf/reader/error.rb +6 -4
- data/lib/pdf/reader/filter/ascii85.rb +2 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
- data/lib/pdf/reader/filter/depredict.rb +6 -2
- data/lib/pdf/reader/filter/flate.rb +5 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +2 -0
- data/lib/pdf/reader/filter/run_length.rb +2 -0
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/font.rb +99 -32
- data/lib/pdf/reader/font_descriptor.rb +79 -24
- data/lib/pdf/reader/form_xobject.rb +15 -1
- data/lib/pdf/reader/glyph_hash.rb +41 -8
- data/lib/pdf/reader/key_builder_v5.rb +17 -9
- data/lib/pdf/reader/lzw.rb +42 -16
- data/lib/pdf/reader/no_text_filter.rb +15 -0
- data/lib/pdf/reader/null_security_handler.rb +1 -0
- data/lib/pdf/reader/object_cache.rb +7 -2
- data/lib/pdf/reader/object_hash.rb +129 -16
- data/lib/pdf/reader/object_stream.rb +22 -5
- data/lib/pdf/reader/overlapping_runs_filter.rb +8 -2
- data/lib/pdf/reader/page.rb +66 -13
- data/lib/pdf/reader/page_layout.rb +26 -9
- data/lib/pdf/reader/page_state.rb +12 -3
- data/lib/pdf/reader/page_text_receiver.rb +16 -2
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/parser.rb +52 -13
- data/lib/pdf/reader/point.rb +9 -2
- data/lib/pdf/reader/print_receiver.rb +2 -6
- data/lib/pdf/reader/rc4_security_handler.rb +2 -0
- data/lib/pdf/reader/rectangle.rb +24 -1
- data/lib/pdf/reader/reference.rb +13 -3
- data/lib/pdf/reader/register_receiver.rb +15 -2
- data/lib/pdf/reader/resources.rb +12 -2
- data/lib/pdf/reader/security_handler_factory.rb +13 -0
- data/lib/pdf/reader/standard_key_builder.rb +37 -23
- data/lib/pdf/reader/stream.rb +9 -3
- data/lib/pdf/reader/synchronized_cache.rb +6 -3
- data/lib/pdf/reader/text_run.rb +33 -3
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +41 -10
- data/lib/pdf/reader/type_check.rb +53 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
- data/lib/pdf/reader/validating_receiver.rb +29 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +13 -5
- data/lib/pdf/reader/width_calculator/composite.rb +11 -3
- data/lib/pdf/reader/width_calculator/true_type.rb +14 -12
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +8 -5
- data/lib/pdf/reader/width_calculator/type_zero.rb +8 -3
- data/lib/pdf/reader/xref.rb +31 -10
- data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
- data/lib/pdf/reader.rb +24 -12
- data/rbi/pdf-reader.rbi +1504 -1480
- metadata +34 -17
@@ -8,16 +8,12 @@ class PDF::Reader
|
|
8
8
|
#
|
9
9
|
class PrintReceiver
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
def initialize
|
14
|
-
@callbacks = []
|
15
|
-
end
|
16
|
-
|
11
|
+
#: (untyped) -> bool
|
17
12
|
def respond_to?(meth)
|
18
13
|
true
|
19
14
|
end
|
20
15
|
|
16
|
+
#: (Symbol, *untyped) -> void
|
21
17
|
def method_missing(methodname, *args)
|
22
18
|
puts "#{methodname} => #{args.inspect}"
|
23
19
|
end
|
@@ -12,6 +12,7 @@ class PDF::Reader
|
|
12
12
|
#
|
13
13
|
class Rc4SecurityHandler
|
14
14
|
|
15
|
+
#: (String) -> void
|
15
16
|
def initialize(key)
|
16
17
|
@encrypt_key = key
|
17
18
|
end
|
@@ -25,6 +26,7 @@ class PDF::Reader
|
|
25
26
|
# buf - a string to decrypt
|
26
27
|
# ref - a PDF::Reader::Reference for the object to decrypt
|
27
28
|
#
|
29
|
+
#: (String, PDF::Reader::Reference) -> String
|
28
30
|
def decrypt( buf, ref )
|
29
31
|
objKey = @encrypt_key.dup
|
30
32
|
(0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
|
data/lib/pdf/reader/rectangle.rb
CHANGED
@@ -20,12 +20,28 @@ module PDF
|
|
20
20
|
#
|
21
21
|
class Rectangle
|
22
22
|
|
23
|
-
|
23
|
+
#: PDF::Reader::Point
|
24
|
+
attr_reader :bottom_left
|
24
25
|
|
26
|
+
#: PDF::Reader::Point
|
27
|
+
attr_reader :bottom_right
|
28
|
+
|
29
|
+
#: PDF::Reader::Point
|
30
|
+
attr_reader :top_left
|
31
|
+
|
32
|
+
#: PDF::Reader::Point
|
33
|
+
attr_reader :top_right
|
34
|
+
|
35
|
+
#: (Numeric, Numeric, Numeric, Numeric) -> void
|
25
36
|
def initialize(x1, y1, x2, y2)
|
37
|
+
@bottom_left = Point.new(0,0) #: PDF::Reader::Point
|
38
|
+
@bottom_right = Point.new(0,0) #: PDF::Reader::Point
|
39
|
+
@top_left = Point.new(0,0) #: PDF::Reader::Point
|
40
|
+
@top_right = Point.new(0,0) #: PDF::Reader::Point
|
26
41
|
set_corners(x1, y1, x2, y2)
|
27
42
|
end
|
28
43
|
|
44
|
+
#: (Array[Numeric]) -> PDF::Reader::Rectangle
|
29
45
|
def self.from_array(arr)
|
30
46
|
if arr.size != 4
|
31
47
|
raise ArgumentError, "Only 4-element Arrays can be converted to a Rectangle"
|
@@ -39,24 +55,29 @@ module PDF
|
|
39
55
|
)
|
40
56
|
end
|
41
57
|
|
58
|
+
#: (PDF::Reader::Rectangle) -> bool
|
42
59
|
def ==(other)
|
43
60
|
to_a == other.to_a
|
44
61
|
end
|
45
62
|
|
63
|
+
#: () -> Numeric
|
46
64
|
def height
|
47
65
|
top_right.y - bottom_right.y
|
48
66
|
end
|
49
67
|
|
68
|
+
#: () -> Numeric
|
50
69
|
def width
|
51
70
|
bottom_right.x - bottom_left.x
|
52
71
|
end
|
53
72
|
|
73
|
+
#: (PDF::Reader::Point) -> bool
|
54
74
|
def contains?(point)
|
55
75
|
point.x >= bottom_left.x && point.x <= top_right.x &&
|
56
76
|
point.y >= bottom_left.y && point.y <= top_right.y
|
57
77
|
end
|
58
78
|
|
59
79
|
# A pdf-style 4-number array
|
80
|
+
#: () -> Array[Numeric]
|
60
81
|
def to_a
|
61
82
|
[
|
62
83
|
bottom_left.x,
|
@@ -66,6 +87,7 @@ module PDF
|
|
66
87
|
]
|
67
88
|
end
|
68
89
|
|
90
|
+
#: (Integer) -> void
|
69
91
|
def apply_rotation(degrees)
|
70
92
|
return if degrees != 90 && degrees != 180 && degrees != 270
|
71
93
|
|
@@ -90,6 +112,7 @@ module PDF
|
|
90
112
|
|
91
113
|
private
|
92
114
|
|
115
|
+
#: (Numeric, Numeric, Numeric, Numeric) -> void
|
93
116
|
def set_corners(x1, y1, x2, y2)
|
94
117
|
@bottom_left = PDF::Reader::Point.new(
|
95
118
|
[x1, x2].min,
|
data/lib/pdf/reader/reference.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -31,25 +31,34 @@ class PDF::Reader
|
|
31
31
|
################################################################################
|
32
32
|
# An internal PDF::Reader class that represents an indirect reference to a PDF Object
|
33
33
|
class Reference
|
34
|
-
|
34
|
+
#: Integer
|
35
|
+
attr_reader :id
|
36
|
+
|
37
|
+
#: Integer
|
38
|
+
attr_reader :gen
|
35
39
|
################################################################################
|
36
40
|
# Create a new Reference to an object with the specified id and revision number
|
41
|
+
#: (Integer, Integer) -> void
|
37
42
|
def initialize(id, gen)
|
38
|
-
@id
|
43
|
+
@id = id
|
44
|
+
@gen = gen
|
39
45
|
end
|
40
46
|
################################################################################
|
41
47
|
# returns the current Reference object in an array with a single element
|
48
|
+
#: () -> Array[PDF::Reader::Reference]
|
42
49
|
def to_a
|
43
50
|
[self]
|
44
51
|
end
|
45
52
|
################################################################################
|
46
53
|
# returns the ID of this reference. Use with caution, ignores the generation id
|
54
|
+
#: () -> Integer
|
47
55
|
def to_i
|
48
56
|
self.id
|
49
57
|
end
|
50
58
|
################################################################################
|
51
59
|
# returns true if the provided object points to the same PDF Object as the
|
52
60
|
# current object
|
61
|
+
#: (Object) -> bool
|
53
62
|
def ==(obj)
|
54
63
|
return false unless obj.kind_of?(PDF::Reader::Reference)
|
55
64
|
|
@@ -60,6 +69,7 @@ class PDF::Reader
|
|
60
69
|
# returns a hash based on the PDF::Reference this object points to. Two
|
61
70
|
# different Reference objects that point to the same PDF Object will
|
62
71
|
# return an identical hash
|
72
|
+
#: () -> Integer
|
63
73
|
def hash
|
64
74
|
"#{self.id}:#{self.gen}".hash
|
65
75
|
end
|
@@ -22,45 +22,55 @@ class PDF::Reader
|
|
22
22
|
#
|
23
23
|
class RegisterReceiver
|
24
24
|
|
25
|
+
#: Array[Hash[Symbol, untyped]]
|
25
26
|
attr_accessor :callbacks
|
26
27
|
|
28
|
+
#: () -> void
|
27
29
|
def initialize
|
28
|
-
@callbacks = []
|
30
|
+
@callbacks = [] #: Array[Hash[Symbol, untyped]]
|
29
31
|
end
|
30
32
|
|
33
|
+
#: (untyped) -> bool
|
31
34
|
def respond_to?(meth)
|
32
35
|
true
|
33
36
|
end
|
34
37
|
|
38
|
+
#: (Symbol, *untyped) -> void
|
35
39
|
def method_missing(methodname, *args)
|
36
40
|
callbacks << {:name => methodname.to_sym, :args => args}
|
37
41
|
end
|
38
42
|
|
39
43
|
# count the number of times a callback fired
|
44
|
+
#: (Symbol) -> Integer
|
40
45
|
def count(methodname)
|
41
46
|
callbacks.count { |cb| cb[:name] == methodname}
|
42
47
|
end
|
43
48
|
|
44
49
|
# return the details for every time the specified callback was fired
|
50
|
+
#: (Symbol) -> Array[Hash[Symbol, untyped]]
|
45
51
|
def all(methodname)
|
46
52
|
callbacks.select { |cb| cb[:name] == methodname }
|
47
53
|
end
|
48
54
|
|
55
|
+
#: (Symbol) -> Array[Array[untyped]]
|
49
56
|
def all_args(methodname)
|
50
57
|
all(methodname).map { |cb| cb[:args] }
|
51
58
|
end
|
52
59
|
|
53
60
|
# return the details for the first time the specified callback was fired
|
61
|
+
#: (Symbol) -> Hash[Symbol, untyped]?
|
54
62
|
def first_occurance_of(methodname)
|
55
63
|
callbacks.find { |cb| cb[:name] == methodname }
|
56
64
|
end
|
57
65
|
|
58
66
|
# return the details for the final time the specified callback was fired
|
67
|
+
#: (Symbol) -> Hash[Symbol, untyped]?
|
59
68
|
def final_occurance_of(methodname)
|
60
69
|
all(methodname).last
|
61
70
|
end
|
62
71
|
|
63
72
|
# return the first occurance of a particular series of callbacks
|
73
|
+
#: (*Symbol) -> Array[Hash[Symbol, untyped]]?
|
64
74
|
def series(*methods)
|
65
75
|
return nil if methods.empty?
|
66
76
|
|
@@ -70,7 +80,10 @@ class PDF::Reader
|
|
70
80
|
indexes.each do |idx|
|
71
81
|
count = methods.size
|
72
82
|
method_indexes.each do |midx|
|
73
|
-
|
83
|
+
res = callbacks[idx+midx]
|
84
|
+
if res && res[:name] == methods[midx]
|
85
|
+
count -= 1
|
86
|
+
end
|
74
87
|
end
|
75
88
|
if count == 0
|
76
89
|
return callbacks[idx, methods.size]
|
data/lib/pdf/reader/resources.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
module PDF
|
@@ -9,6 +9,7 @@ module PDF
|
|
9
9
|
#
|
10
10
|
class Resources
|
11
11
|
|
12
|
+
#: (PDF::Reader::ObjectHash, Hash[untyped, untyped]) -> void
|
12
13
|
def initialize(objects, resources)
|
13
14
|
@objects = objects
|
14
15
|
@resources = resources
|
@@ -20,6 +21,7 @@ module PDF
|
|
20
21
|
# with no caching. You will want to cache the results instead
|
21
22
|
# of calling it over and over.
|
22
23
|
#
|
24
|
+
#: () -> Hash[Symbol, untyped]
|
23
25
|
def color_spaces
|
24
26
|
@objects.deref_hash!(@resources[:ColorSpace]) || {}
|
25
27
|
end
|
@@ -30,6 +32,7 @@ module PDF
|
|
30
32
|
# with no caching. You will want to cache the results instead
|
31
33
|
# of calling it over and over.
|
32
34
|
#
|
35
|
+
#: () -> Hash[Symbol, untyped]
|
33
36
|
def fonts
|
34
37
|
@objects.deref_hash!(@resources[:Font]) || {}
|
35
38
|
end
|
@@ -41,6 +44,7 @@ module PDF
|
|
41
44
|
# with no caching. You will want to cache the results instead
|
42
45
|
# of calling it over and over.
|
43
46
|
#
|
47
|
+
#: () -> Hash[Symbol, untyped]
|
44
48
|
def graphic_states
|
45
49
|
@objects.deref_hash!(@resources[:ExtGState]) || {}
|
46
50
|
end
|
@@ -51,6 +55,7 @@ module PDF
|
|
51
55
|
# with no caching. You will want to cache the results instead
|
52
56
|
# of calling it over and over.
|
53
57
|
#
|
58
|
+
#: () -> Hash[Symbol, untyped]
|
54
59
|
def patterns
|
55
60
|
@objects.deref_hash!(@resources[:Pattern]) || {}
|
56
61
|
end
|
@@ -61,6 +66,7 @@ module PDF
|
|
61
66
|
# with no caching. You will want to cache the results instead
|
62
67
|
# of calling it over and over.
|
63
68
|
#
|
69
|
+
#: () -> Array[Symbol]
|
64
70
|
def procedure_sets
|
65
71
|
@objects.deref_array!(@resources[:ProcSet]) || []
|
66
72
|
end
|
@@ -71,6 +77,7 @@ module PDF
|
|
71
77
|
# with no caching. You will want to cache the results instead
|
72
78
|
# of calling it over and over.
|
73
79
|
#
|
80
|
+
#: () -> Hash[Symbol, untyped]
|
74
81
|
def properties
|
75
82
|
@objects.deref_hash!(@resources[:Properties]) || {}
|
76
83
|
end
|
@@ -81,6 +88,7 @@ module PDF
|
|
81
88
|
# with no caching. You will want to cache the results instead
|
82
89
|
# of calling it over and over.
|
83
90
|
#
|
91
|
+
#: () -> Hash[Symbol, untyped]
|
84
92
|
def shadings
|
85
93
|
@objects.deref_hash!(@resources[:Shading]) || {}
|
86
94
|
end
|
@@ -91,8 +99,10 @@ module PDF
|
|
91
99
|
# with no caching. You will want to cache the results instead
|
92
100
|
# of calling it over and over.
|
93
101
|
#
|
102
|
+
#: () -> Hash[Symbol, PDF::Reader::Stream]
|
94
103
|
def xobjects
|
95
|
-
@objects.deref_hash!(@resources[:XObject]) || {}
|
104
|
+
dict = @objects.deref_hash!(@resources[:XObject]) || {}
|
105
|
+
TypeCheck.cast_to_pdf_dict_with_stream_values!(dict)
|
96
106
|
end
|
97
107
|
|
98
108
|
end
|
@@ -7,6 +7,13 @@ class PDF::Reader
|
|
7
7
|
# able to decrypt the file.
|
8
8
|
class SecurityHandlerFactory
|
9
9
|
|
10
|
+
#: (Hash[Symbol, untyped], Array[untyped] | nil, String | nil) -> (
|
11
|
+
#| NullSecurityHandler |
|
12
|
+
#| AesV2SecurityHandler |
|
13
|
+
#| Rc4SecurityHandler |
|
14
|
+
#| AesV3SecurityHandler |
|
15
|
+
#| UnimplementedSecurityHandler
|
16
|
+
#| )
|
10
17
|
def self.build(encrypt, doc_id, password)
|
11
18
|
doc_id ||= []
|
12
19
|
password ||= ""
|
@@ -22,6 +29,9 @@ class PDF::Reader
|
|
22
29
|
end
|
23
30
|
end
|
24
31
|
|
32
|
+
#: (Hash[Symbol, untyped], Array[untyped], String) -> (
|
33
|
+
#| AesV2SecurityHandler | Rc4SecurityHandler
|
34
|
+
#| )
|
25
35
|
def self.build_standard_handler(encrypt, doc_id, password)
|
26
36
|
encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
|
27
37
|
key_builder = StandardKeyBuilder.new(
|
@@ -41,6 +51,7 @@ class PDF::Reader
|
|
41
51
|
end
|
42
52
|
end
|
43
53
|
|
54
|
+
#: (Hash[Symbol, untyped], Array[untyped], String) -> (AesV3SecurityHandler)
|
44
55
|
def self.build_v5_handler(encrypt, doc_id, password)
|
45
56
|
key_builder = KeyBuilderV5.new(
|
46
57
|
owner_key: encrypt[:O],
|
@@ -52,6 +63,7 @@ class PDF::Reader
|
|
52
63
|
end
|
53
64
|
|
54
65
|
# This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
|
66
|
+
#: (Hash[Symbol, untyped]) -> bool
|
55
67
|
def self.standard?(encrypt)
|
56
68
|
return false if encrypt.nil?
|
57
69
|
|
@@ -65,6 +77,7 @@ class PDF::Reader
|
|
65
77
|
# This handler supports both
|
66
78
|
# - AES-256 encryption defined in PDF 1.7 Extension Level 3 ('revision 5')
|
67
79
|
# - AES-256 encryption defined in PDF 2.0 ('revision 6')
|
80
|
+
#: (Hash[Symbol, untyped]) -> untyped
|
68
81
|
def self.standard_v5?(encrypt)
|
69
82
|
return false if encrypt.nil?
|
70
83
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
|
3
4
|
require 'digest/md5'
|
4
5
|
require 'rc4'
|
@@ -23,16 +24,17 @@ class PDF::Reader
|
|
23
24
|
PassPadBytes = [ 0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41,
|
24
25
|
0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08,
|
25
26
|
0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
|
26
|
-
0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]
|
27
|
+
0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ] #: Array[Integer]
|
27
28
|
|
29
|
+
#: (?Hash[Symbol, untyped]) -> void
|
28
30
|
def initialize(opts = {})
|
29
|
-
@key_length = opts[:key_length].to_i/8
|
30
|
-
@revision = opts[:revision].to_i
|
31
|
-
@owner_key = opts[:owner_key]
|
32
|
-
@user_key = opts[:user_key]
|
33
|
-
@permissions = opts[:permissions].to_i
|
34
|
-
@encryptMeta = opts.fetch(:encrypted_metadata, true)
|
35
|
-
@file_id = opts[:file_id] || ""
|
31
|
+
@key_length = opts[:key_length].to_i/8 #: Integer
|
32
|
+
@revision = opts[:revision].to_i #: Integer
|
33
|
+
@owner_key = opts[:owner_key] #: String?
|
34
|
+
@user_key = opts[:user_key] #: String?
|
35
|
+
@permissions = opts[:permissions].to_i #: Integer
|
36
|
+
@encryptMeta = opts.fetch(:encrypted_metadata, true) #: bool
|
37
|
+
@file_id = opts[:file_id] || "" #: String
|
36
38
|
|
37
39
|
if @key_length != 5 && @key_length != 16
|
38
40
|
msg = "StandardKeyBuilder only supports 40 and 128 bit\
|
@@ -47,8 +49,8 @@ class PDF::Reader
|
|
47
49
|
# decrypting the file will be returned. If the password doesn't match the file,
|
48
50
|
# and exception will be raised.
|
49
51
|
#
|
50
|
-
|
51
|
-
|
52
|
+
#: (?String) -> String
|
53
|
+
def key(pass = "")
|
52
54
|
encrypt_key = auth_owner_pass(pass)
|
53
55
|
encrypt_key ||= auth_user_pass(pass)
|
54
56
|
|
@@ -60,14 +62,17 @@ class PDF::Reader
|
|
60
62
|
|
61
63
|
# Pads supplied password to 32bytes using PassPadBytes as specified on
|
62
64
|
# pp61 of spec
|
65
|
+
#
|
66
|
+
#: (?String?) -> String
|
63
67
|
def pad_pass(p="")
|
64
|
-
if p.nil? || p.empty?
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
68
|
+
return PassPadBytes.pack('C*') if p.nil? || p.empty?
|
69
|
+
|
70
|
+
pTrimmedLength = [p.bytesize, 32].min
|
71
|
+
pad = PassPadBytes[0, 32 - pTrimmedLength] || []
|
72
|
+
p[0, 32].to_s + pad.pack('C*')
|
69
73
|
end
|
70
74
|
|
75
|
+
#: (String, Integer) -> String
|
71
76
|
def xor_each_byte(buf, int)
|
72
77
|
buf.each_byte.map{ |b| b^int}.pack("C*")
|
73
78
|
end
|
@@ -84,6 +89,7 @@ class PDF::Reader
|
|
84
89
|
# if the supplied password is not a valid owner password for this document
|
85
90
|
# then it returns nil
|
86
91
|
#
|
92
|
+
#: (String) -> String?
|
87
93
|
def auth_owner_pass(pass)
|
88
94
|
md5 = Digest::MD5.digest(pad_pass(pass))
|
89
95
|
if @revision > 2 then
|
@@ -92,7 +98,13 @@ class PDF::Reader
|
|
92
98
|
#first iteration decrypt owner_key
|
93
99
|
out = @owner_key
|
94
100
|
#RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
|
95
|
-
19.downto(0).each { |i|
|
101
|
+
19.downto(0).each { |i|
|
102
|
+
# The RC4 gem doen't have type annotations, so the type checker doesn't
|
103
|
+
# know decrypt() returns a string
|
104
|
+
out = TypeCheck.cast_to_string!(
|
105
|
+
RC4.new(xor_each_byte(keyBegins,i)).decrypt(out)
|
106
|
+
)
|
107
|
+
}
|
96
108
|
else
|
97
109
|
out = RC4.new( md5[0, 5] ).decrypt( @owner_key )
|
98
110
|
end
|
@@ -110,6 +122,7 @@ class PDF::Reader
|
|
110
122
|
# if the supplied password is not a valid user password for this document
|
111
123
|
# then it returns nil
|
112
124
|
#
|
125
|
+
#: (String) -> String?
|
113
126
|
def auth_user_pass(pass)
|
114
127
|
keyBegins = make_file_key(pass)
|
115
128
|
if @revision >= 3
|
@@ -117,28 +130,29 @@ class PDF::Reader
|
|
117
130
|
out = Digest::MD5.digest(PassPadBytes.pack("C*") + @file_id)
|
118
131
|
#zero doesn't matter -> so from 0-19
|
119
132
|
20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).encrypt(out) }
|
120
|
-
pass = @user_key[0, 16] == out
|
133
|
+
pass = @user_key.to_s[0, 16] == out
|
121
134
|
else
|
122
135
|
pass = RC4.new(keyBegins).encrypt(PassPadBytes.pack("C*")) == @user_key
|
123
136
|
end
|
124
137
|
pass ? keyBegins : nil
|
125
138
|
end
|
126
139
|
|
140
|
+
#: (String) -> String
|
127
141
|
def make_file_key( user_pass )
|
128
142
|
# a) if there's a password, pad it to 32 bytes, else, just use the padding.
|
129
|
-
|
143
|
+
buf = pad_pass(user_pass)
|
130
144
|
# c) add owner key
|
131
|
-
|
145
|
+
buf << @owner_key
|
132
146
|
# d) add permissions 1 byte at a time, in little-endian order
|
133
|
-
(0..24).step(8){|e|
|
147
|
+
(0..24).step(8){|e| buf << (@permissions >> e & 0xFF)}
|
134
148
|
# e) add the file ID
|
135
|
-
|
149
|
+
buf << @file_id
|
136
150
|
# f) if revision >= 4 and metadata not encrypted then add 4 bytes of 0xFF
|
137
151
|
if @revision >= 4 && !@encryptMeta
|
138
|
-
|
152
|
+
buf << [0xFF,0xFF,0xFF,0xFF].pack('C*')
|
139
153
|
end
|
140
154
|
# b) init MD5 digest + g) finish the hash
|
141
|
-
md5 = Digest::MD5.digest(
|
155
|
+
md5 = Digest::MD5.digest(buf)
|
142
156
|
# h) spin hash 50 times
|
143
157
|
if @revision >= 3
|
144
158
|
50.times {
|
data/lib/pdf/reader/stream.rb
CHANGED
@@ -34,18 +34,24 @@ class PDF::Reader
|
|
34
34
|
# compression, etc) and a stream of bytes.
|
35
35
|
#
|
36
36
|
class Stream
|
37
|
-
|
37
|
+
#: Hash[Symbol, untyped]
|
38
|
+
attr_accessor :hash
|
39
|
+
|
40
|
+
#: String
|
41
|
+
attr_accessor :data
|
38
42
|
|
39
43
|
################################################################################
|
40
44
|
# Creates a new stream with the specified dictionary and data. The dictionary
|
41
45
|
# should be a standard ruby hash, the data should be a standard ruby string.
|
46
|
+
#: (Hash[Symbol, untyped], String) -> void
|
42
47
|
def initialize(hash, data)
|
43
|
-
@hash = hash
|
48
|
+
@hash = TypeCheck.cast_to_pdf_dict!(hash) #: Hash[Symbol, untyped]
|
44
49
|
@data = data
|
45
|
-
@udata = nil
|
50
|
+
@udata = nil #: String | nil
|
46
51
|
end
|
47
52
|
################################################################################
|
48
53
|
# apply this streams filters to its data and return the result.
|
54
|
+
#: () -> String
|
49
55
|
def unfiltered_data
|
50
56
|
return @udata if @udata
|
51
57
|
@udata = data.dup
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# encoding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
# utilities.rb : General-purpose utility classes which don't fit anywhere else
|
@@ -20,13 +20,16 @@ class PDF::Reader
|
|
20
20
|
# same time, we don't want to throw away thread safety We have two
|
21
21
|
# interchangeable thread-safe cache implementations:
|
22
22
|
class SynchronizedCache
|
23
|
+
#: () -> void
|
23
24
|
def initialize
|
24
|
-
@cache = {}
|
25
|
-
@mutex = Mutex.new
|
25
|
+
@cache = {} #: Hash[Object, untyped]
|
26
|
+
@mutex = Mutex.new #: Mutex
|
26
27
|
end
|
28
|
+
#: (Object) -> untyped
|
27
29
|
def [](key)
|
28
30
|
@mutex.synchronize { @cache[key] }
|
29
31
|
end
|
32
|
+
#: (Object, (Object | nil)) -> untyped
|
30
33
|
def []=(key,value)
|
31
34
|
@mutex.synchronize { @cache[key] = value }
|
32
35
|
end
|
data/lib/pdf/reader/text_run.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
class PDF::Reader
|
@@ -7,19 +7,34 @@ class PDF::Reader
|
|
7
7
|
class TextRun
|
8
8
|
include Comparable
|
9
9
|
|
10
|
-
|
10
|
+
#: PDF::Reader::Point
|
11
|
+
attr_reader :origin
|
12
|
+
|
13
|
+
#: Numeric
|
14
|
+
attr_reader :width
|
15
|
+
|
16
|
+
#: Numeric
|
17
|
+
attr_reader :font_size
|
18
|
+
|
19
|
+
#: String
|
20
|
+
attr_reader :text
|
11
21
|
|
12
22
|
alias :to_s :text
|
13
23
|
|
24
|
+
#: (Numeric, Numeric, Numeric, Numeric, String) -> void
|
14
25
|
def initialize(x, y, width, font_size, text)
|
15
|
-
@origin = PDF::Reader::Point.new(x, y)
|
26
|
+
@origin = PDF::Reader::Point.new(x, y) #: PDF::Reader::Point
|
16
27
|
@width = width
|
17
28
|
@font_size = font_size
|
18
29
|
@text = text
|
30
|
+
@endx = nil #: Numeric | nil
|
31
|
+
@endy = nil #: Numeric | nil
|
32
|
+
@mergable_range = nil #: Range[Numeric] | nil
|
19
33
|
end
|
20
34
|
|
21
35
|
# Allows collections of TextRun objects to be sorted. They will be sorted
|
22
36
|
# in order of their position on a cartesian plain - Top Left to Bottom Right
|
37
|
+
#: (PDF::Reader::Point) -> Numeric
|
23
38
|
def <=>(other)
|
24
39
|
if x == other.x && y == other.y
|
25
40
|
0
|
@@ -31,33 +46,42 @@ class PDF::Reader
|
|
31
46
|
-1
|
32
47
|
elsif x > other.x
|
33
48
|
1
|
49
|
+
else
|
50
|
+
0 # Unreachable?
|
34
51
|
end
|
35
52
|
end
|
36
53
|
|
54
|
+
#: () -> Numeric
|
37
55
|
def x
|
38
56
|
@origin.x
|
39
57
|
end
|
40
58
|
|
59
|
+
#: () -> Numeric
|
41
60
|
def y
|
42
61
|
@origin.y
|
43
62
|
end
|
44
63
|
|
64
|
+
#: () -> Numeric
|
45
65
|
def endx
|
46
66
|
@endx ||= @origin.x + width
|
47
67
|
end
|
48
68
|
|
69
|
+
#: () -> Numeric
|
49
70
|
def endy
|
50
71
|
@endy ||= @origin.y + font_size
|
51
72
|
end
|
52
73
|
|
74
|
+
#: () -> Numeric
|
53
75
|
def mean_character_width
|
54
76
|
@width / character_count
|
55
77
|
end
|
56
78
|
|
79
|
+
#: (PDF::Reader::TextRun) -> bool
|
57
80
|
def mergable?(other)
|
58
81
|
y.to_i == other.y.to_i && font_size == other.font_size && mergable_range.include?(other.x)
|
59
82
|
end
|
60
83
|
|
84
|
+
#: (PDF::Reader::TextRun) -> PDF::Reader::TextRun
|
61
85
|
def +(other)
|
62
86
|
raise ArgumentError, "#{other} cannot be merged with this run" unless mergable?(other)
|
63
87
|
|
@@ -68,16 +92,19 @@ class PDF::Reader
|
|
68
92
|
end
|
69
93
|
end
|
70
94
|
|
95
|
+
#: () -> String
|
71
96
|
def inspect
|
72
97
|
"#{text} w:#{width} f:#{font_size} @#{x},#{y}"
|
73
98
|
end
|
74
99
|
|
100
|
+
#: (PDF::Reader::TextRun) -> bool
|
75
101
|
def intersect?(other_run)
|
76
102
|
x <= other_run.endx && endx >= other_run.x &&
|
77
103
|
endy >= other_run.y && y <= other_run.endy
|
78
104
|
end
|
79
105
|
|
80
106
|
# return what percentage of this text run is overlapped by another run
|
107
|
+
#: (PDF::Reader::TextRun) -> Numeric
|
81
108
|
def intersection_area_percent(other_run)
|
82
109
|
return 0 unless intersect?(other_run)
|
83
110
|
|
@@ -90,16 +117,19 @@ class PDF::Reader
|
|
90
117
|
|
91
118
|
private
|
92
119
|
|
120
|
+
#: () -> Numeric
|
93
121
|
def area
|
94
122
|
(endx - x) * (endy - y)
|
95
123
|
end
|
96
124
|
|
125
|
+
#: () -> Range[Numeric]
|
97
126
|
def mergable_range
|
98
127
|
@mergable_range ||= Range.new(endx - 3, endx + font_size)
|
99
128
|
end
|
100
129
|
|
101
130
|
# Assume string encoding is marked correctly and we can trust String#size to return a
|
102
131
|
# character count
|
132
|
+
#: () -> Float
|
103
133
|
def character_count
|
104
134
|
@text.size.to_f
|
105
135
|
end
|
data/lib/pdf/reader/token.rb
CHANGED