pdf-reader 2.14.0 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +22 -0
- data/lib/pdf/reader/advanced_text_run_filter.rb +17 -2
- data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
- data/lib/pdf/reader/buffer.rb +35 -17
- data/lib/pdf/reader/cid_widths.rb +7 -1
- data/lib/pdf/reader/cmap.rb +14 -3
- data/lib/pdf/reader/encoding.rb +37 -12
- data/lib/pdf/reader/error.rb +6 -0
- data/lib/pdf/reader/filter/ascii85.rb +2 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
- data/lib/pdf/reader/filter/depredict.rb +4 -0
- data/lib/pdf/reader/filter/flate.rb +5 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +2 -0
- data/lib/pdf/reader/filter/run_length.rb +2 -0
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/font.rb +90 -22
- data/lib/pdf/reader/font_descriptor.rb +76 -23
- data/lib/pdf/reader/form_xobject.rb +11 -0
- data/lib/pdf/reader/glyph_hash.rb +34 -9
- data/lib/pdf/reader/key_builder_v5.rb +17 -9
- data/lib/pdf/reader/lzw.rb +17 -6
- data/lib/pdf/reader/no_text_filter.rb +1 -0
- data/lib/pdf/reader/null_security_handler.rb +1 -0
- data/lib/pdf/reader/object_cache.rb +7 -2
- data/lib/pdf/reader/object_hash.rb +116 -9
- data/lib/pdf/reader/object_stream.rb +19 -2
- data/lib/pdf/reader/overlapping_runs_filter.rb +7 -1
- data/lib/pdf/reader/page.rb +41 -7
- data/lib/pdf/reader/page_layout.rb +25 -8
- data/lib/pdf/reader/page_state.rb +5 -2
- data/lib/pdf/reader/page_text_receiver.rb +6 -2
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/parser.rb +51 -10
- data/lib/pdf/reader/point.rb +9 -2
- data/lib/pdf/reader/print_receiver.rb +2 -6
- data/lib/pdf/reader/rc4_security_handler.rb +2 -0
- data/lib/pdf/reader/rectangle.rb +24 -1
- data/lib/pdf/reader/reference.rb +10 -1
- data/lib/pdf/reader/register_receiver.rb +15 -2
- data/lib/pdf/reader/resources.rb +9 -0
- data/lib/pdf/reader/security_handler_factory.rb +13 -0
- data/lib/pdf/reader/standard_key_builder.rb +37 -23
- data/lib/pdf/reader/stream.rb +9 -3
- data/lib/pdf/reader/synchronized_cache.rb +5 -2
- data/lib/pdf/reader/text_run.rb +28 -1
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +33 -2
- data/lib/pdf/reader/type_check.rb +10 -3
- data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
- data/lib/pdf/reader/validating_receiver.rb +29 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +10 -3
- data/lib/pdf/reader/width_calculator/composite.rb +5 -1
- data/lib/pdf/reader/width_calculator/true_type.rb +5 -1
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +3 -1
- data/lib/pdf/reader/width_calculator/type_zero.rb +2 -0
- data/lib/pdf/reader/xref.rb +28 -7
- data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
- data/lib/pdf/reader.rb +18 -2
- data/rbi/pdf-reader.rbi +1502 -1594
- metadata +17 -11
data/lib/pdf/reader/reference.rb
CHANGED
@@ -31,26 +31,34 @@ class PDF::Reader
|
|
31
31
|
################################################################################
|
32
32
|
# An internal PDF::Reader class that represents an indirect reference to a PDF Object
|
33
33
|
class Reference
|
34
|
+
#: Integer
|
34
35
|
attr_reader :id
|
36
|
+
|
37
|
+
#: Integer
|
35
38
|
attr_reader :gen
|
36
39
|
################################################################################
|
37
40
|
# Create a new Reference to an object with the specified id and revision number
|
41
|
+
#: (Integer, Integer) -> void
|
38
42
|
def initialize(id, gen)
|
39
|
-
@id
|
43
|
+
@id = id
|
44
|
+
@gen = gen
|
40
45
|
end
|
41
46
|
################################################################################
|
42
47
|
# returns the current Reference object in an array with a single element
|
48
|
+
#: () -> Array[PDF::Reader::Reference]
|
43
49
|
def to_a
|
44
50
|
[self]
|
45
51
|
end
|
46
52
|
################################################################################
|
47
53
|
# returns the ID of this reference. Use with caution, ignores the generation id
|
54
|
+
#: () -> Integer
|
48
55
|
def to_i
|
49
56
|
self.id
|
50
57
|
end
|
51
58
|
################################################################################
|
52
59
|
# returns true if the provided object points to the same PDF Object as the
|
53
60
|
# current object
|
61
|
+
#: (Object) -> bool
|
54
62
|
def ==(obj)
|
55
63
|
return false unless obj.kind_of?(PDF::Reader::Reference)
|
56
64
|
|
@@ -61,6 +69,7 @@ class PDF::Reader
|
|
61
69
|
# returns a hash based on the PDF::Reference this object points to. Two
|
62
70
|
# different Reference objects that point to the same PDF Object will
|
63
71
|
# return an identical hash
|
72
|
+
#: () -> Integer
|
64
73
|
def hash
|
65
74
|
"#{self.id}:#{self.gen}".hash
|
66
75
|
end
|
@@ -22,45 +22,55 @@ class PDF::Reader
|
|
22
22
|
#
|
23
23
|
class RegisterReceiver
|
24
24
|
|
25
|
+
#: Array[Hash[Symbol, untyped]]
|
25
26
|
attr_accessor :callbacks
|
26
27
|
|
28
|
+
#: () -> void
|
27
29
|
def initialize
|
28
|
-
@callbacks = []
|
30
|
+
@callbacks = [] #: Array[Hash[Symbol, untyped]]
|
29
31
|
end
|
30
32
|
|
33
|
+
#: (untyped) -> bool
|
31
34
|
def respond_to?(meth)
|
32
35
|
true
|
33
36
|
end
|
34
37
|
|
38
|
+
#: (Symbol, *untyped) -> void
|
35
39
|
def method_missing(methodname, *args)
|
36
40
|
callbacks << {:name => methodname.to_sym, :args => args}
|
37
41
|
end
|
38
42
|
|
39
43
|
# count the number of times a callback fired
|
44
|
+
#: (Symbol) -> Integer
|
40
45
|
def count(methodname)
|
41
46
|
callbacks.count { |cb| cb[:name] == methodname}
|
42
47
|
end
|
43
48
|
|
44
49
|
# return the details for every time the specified callback was fired
|
50
|
+
#: (Symbol) -> Array[Hash[Symbol, untyped]]
|
45
51
|
def all(methodname)
|
46
52
|
callbacks.select { |cb| cb[:name] == methodname }
|
47
53
|
end
|
48
54
|
|
55
|
+
#: (Symbol) -> Array[Array[untyped]]
|
49
56
|
def all_args(methodname)
|
50
57
|
all(methodname).map { |cb| cb[:args] }
|
51
58
|
end
|
52
59
|
|
53
60
|
# return the details for the first time the specified callback was fired
|
61
|
+
#: (Symbol) -> Hash[Symbol, untyped]?
|
54
62
|
def first_occurance_of(methodname)
|
55
63
|
callbacks.find { |cb| cb[:name] == methodname }
|
56
64
|
end
|
57
65
|
|
58
66
|
# return the details for the final time the specified callback was fired
|
67
|
+
#: (Symbol) -> Hash[Symbol, untyped]?
|
59
68
|
def final_occurance_of(methodname)
|
60
69
|
all(methodname).last
|
61
70
|
end
|
62
71
|
|
63
72
|
# return the first occurance of a particular series of callbacks
|
73
|
+
#: (*Symbol) -> Array[Hash[Symbol, untyped]]?
|
64
74
|
def series(*methods)
|
65
75
|
return nil if methods.empty?
|
66
76
|
|
@@ -70,7 +80,10 @@ class PDF::Reader
|
|
70
80
|
indexes.each do |idx|
|
71
81
|
count = methods.size
|
72
82
|
method_indexes.each do |midx|
|
73
|
-
|
83
|
+
res = callbacks[idx+midx]
|
84
|
+
if res && res[:name] == methods[midx]
|
85
|
+
count -= 1
|
86
|
+
end
|
74
87
|
end
|
75
88
|
if count == 0
|
76
89
|
return callbacks[idx, methods.size]
|
data/lib/pdf/reader/resources.rb
CHANGED
@@ -9,6 +9,7 @@ module PDF
|
|
9
9
|
#
|
10
10
|
class Resources
|
11
11
|
|
12
|
+
#: (PDF::Reader::ObjectHash, Hash[untyped, untyped]) -> void
|
12
13
|
def initialize(objects, resources)
|
13
14
|
@objects = objects
|
14
15
|
@resources = resources
|
@@ -20,6 +21,7 @@ module PDF
|
|
20
21
|
# with no caching. You will want to cache the results instead
|
21
22
|
# of calling it over and over.
|
22
23
|
#
|
24
|
+
#: () -> Hash[Symbol, untyped]
|
23
25
|
def color_spaces
|
24
26
|
@objects.deref_hash!(@resources[:ColorSpace]) || {}
|
25
27
|
end
|
@@ -30,6 +32,7 @@ module PDF
|
|
30
32
|
# with no caching. You will want to cache the results instead
|
31
33
|
# of calling it over and over.
|
32
34
|
#
|
35
|
+
#: () -> Hash[Symbol, untyped]
|
33
36
|
def fonts
|
34
37
|
@objects.deref_hash!(@resources[:Font]) || {}
|
35
38
|
end
|
@@ -41,6 +44,7 @@ module PDF
|
|
41
44
|
# with no caching. You will want to cache the results instead
|
42
45
|
# of calling it over and over.
|
43
46
|
#
|
47
|
+
#: () -> Hash[Symbol, untyped]
|
44
48
|
def graphic_states
|
45
49
|
@objects.deref_hash!(@resources[:ExtGState]) || {}
|
46
50
|
end
|
@@ -51,6 +55,7 @@ module PDF
|
|
51
55
|
# with no caching. You will want to cache the results instead
|
52
56
|
# of calling it over and over.
|
53
57
|
#
|
58
|
+
#: () -> Hash[Symbol, untyped]
|
54
59
|
def patterns
|
55
60
|
@objects.deref_hash!(@resources[:Pattern]) || {}
|
56
61
|
end
|
@@ -61,6 +66,7 @@ module PDF
|
|
61
66
|
# with no caching. You will want to cache the results instead
|
62
67
|
# of calling it over and over.
|
63
68
|
#
|
69
|
+
#: () -> Array[Symbol]
|
64
70
|
def procedure_sets
|
65
71
|
@objects.deref_array!(@resources[:ProcSet]) || []
|
66
72
|
end
|
@@ -71,6 +77,7 @@ module PDF
|
|
71
77
|
# with no caching. You will want to cache the results instead
|
72
78
|
# of calling it over and over.
|
73
79
|
#
|
80
|
+
#: () -> Hash[Symbol, untyped]
|
74
81
|
def properties
|
75
82
|
@objects.deref_hash!(@resources[:Properties]) || {}
|
76
83
|
end
|
@@ -81,6 +88,7 @@ module PDF
|
|
81
88
|
# with no caching. You will want to cache the results instead
|
82
89
|
# of calling it over and over.
|
83
90
|
#
|
91
|
+
#: () -> Hash[Symbol, untyped]
|
84
92
|
def shadings
|
85
93
|
@objects.deref_hash!(@resources[:Shading]) || {}
|
86
94
|
end
|
@@ -91,6 +99,7 @@ module PDF
|
|
91
99
|
# with no caching. You will want to cache the results instead
|
92
100
|
# of calling it over and over.
|
93
101
|
#
|
102
|
+
#: () -> Hash[Symbol, PDF::Reader::Stream]
|
94
103
|
def xobjects
|
95
104
|
dict = @objects.deref_hash!(@resources[:XObject]) || {}
|
96
105
|
TypeCheck.cast_to_pdf_dict_with_stream_values!(dict)
|
@@ -7,6 +7,13 @@ class PDF::Reader
|
|
7
7
|
# able to decrypt the file.
|
8
8
|
class SecurityHandlerFactory
|
9
9
|
|
10
|
+
#: (Hash[Symbol, untyped], Array[untyped] | nil, String | nil) -> (
|
11
|
+
#| NullSecurityHandler |
|
12
|
+
#| AesV2SecurityHandler |
|
13
|
+
#| Rc4SecurityHandler |
|
14
|
+
#| AesV3SecurityHandler |
|
15
|
+
#| UnimplementedSecurityHandler
|
16
|
+
#| )
|
10
17
|
def self.build(encrypt, doc_id, password)
|
11
18
|
doc_id ||= []
|
12
19
|
password ||= ""
|
@@ -22,6 +29,9 @@ class PDF::Reader
|
|
22
29
|
end
|
23
30
|
end
|
24
31
|
|
32
|
+
#: (Hash[Symbol, untyped], Array[untyped], String) -> (
|
33
|
+
#| AesV2SecurityHandler | Rc4SecurityHandler
|
34
|
+
#| )
|
25
35
|
def self.build_standard_handler(encrypt, doc_id, password)
|
26
36
|
encmeta = !encrypt.has_key?(:EncryptMetadata) || encrypt[:EncryptMetadata].to_s == "true"
|
27
37
|
key_builder = StandardKeyBuilder.new(
|
@@ -41,6 +51,7 @@ class PDF::Reader
|
|
41
51
|
end
|
42
52
|
end
|
43
53
|
|
54
|
+
#: (Hash[Symbol, untyped], Array[untyped], String) -> (AesV3SecurityHandler)
|
44
55
|
def self.build_v5_handler(encrypt, doc_id, password)
|
45
56
|
key_builder = KeyBuilderV5.new(
|
46
57
|
owner_key: encrypt[:O],
|
@@ -52,6 +63,7 @@ class PDF::Reader
|
|
52
63
|
end
|
53
64
|
|
54
65
|
# This handler supports all encryption that follows upto PDF 1.5 spec (revision 4)
|
66
|
+
#: (Hash[Symbol, untyped]) -> bool
|
55
67
|
def self.standard?(encrypt)
|
56
68
|
return false if encrypt.nil?
|
57
69
|
|
@@ -65,6 +77,7 @@ class PDF::Reader
|
|
65
77
|
# This handler supports both
|
66
78
|
# - AES-256 encryption defined in PDF 1.7 Extension Level 3 ('revision 5')
|
67
79
|
# - AES-256 encryption defined in PDF 2.0 ('revision 6')
|
80
|
+
#: (Hash[Symbol, untyped]) -> untyped
|
68
81
|
def self.standard_v5?(encrypt)
|
69
82
|
return false if encrypt.nil?
|
70
83
|
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
# typed: strict
|
2
3
|
|
3
4
|
require 'digest/md5'
|
4
5
|
require 'rc4'
|
@@ -23,16 +24,17 @@ class PDF::Reader
|
|
23
24
|
PassPadBytes = [ 0x28, 0xbf, 0x4e, 0x5e, 0x4e, 0x75, 0x8a, 0x41,
|
24
25
|
0x64, 0x00, 0x4e, 0x56, 0xff, 0xfa, 0x01, 0x08,
|
25
26
|
0x2e, 0x2e, 0x00, 0xb6, 0xd0, 0x68, 0x3e, 0x80,
|
26
|
-
0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ]
|
27
|
+
0x2f, 0x0c, 0xa9, 0xfe, 0x64, 0x53, 0x69, 0x7a ] #: Array[Integer]
|
27
28
|
|
29
|
+
#: (?Hash[Symbol, untyped]) -> void
|
28
30
|
def initialize(opts = {})
|
29
|
-
@key_length = opts[:key_length].to_i/8
|
30
|
-
@revision = opts[:revision].to_i
|
31
|
-
@owner_key = opts[:owner_key]
|
32
|
-
@user_key = opts[:user_key]
|
33
|
-
@permissions = opts[:permissions].to_i
|
34
|
-
@encryptMeta = opts.fetch(:encrypted_metadata, true)
|
35
|
-
@file_id = opts[:file_id] || ""
|
31
|
+
@key_length = opts[:key_length].to_i/8 #: Integer
|
32
|
+
@revision = opts[:revision].to_i #: Integer
|
33
|
+
@owner_key = opts[:owner_key] #: String?
|
34
|
+
@user_key = opts[:user_key] #: String?
|
35
|
+
@permissions = opts[:permissions].to_i #: Integer
|
36
|
+
@encryptMeta = opts.fetch(:encrypted_metadata, true) #: bool
|
37
|
+
@file_id = opts[:file_id] || "" #: String
|
36
38
|
|
37
39
|
if @key_length != 5 && @key_length != 16
|
38
40
|
msg = "StandardKeyBuilder only supports 40 and 128 bit\
|
@@ -47,8 +49,8 @@ class PDF::Reader
|
|
47
49
|
# decrypting the file will be returned. If the password doesn't match the file,
|
48
50
|
# and exception will be raised.
|
49
51
|
#
|
50
|
-
|
51
|
-
|
52
|
+
#: (?String) -> String
|
53
|
+
def key(pass = "")
|
52
54
|
encrypt_key = auth_owner_pass(pass)
|
53
55
|
encrypt_key ||= auth_user_pass(pass)
|
54
56
|
|
@@ -60,14 +62,17 @@ class PDF::Reader
|
|
60
62
|
|
61
63
|
# Pads supplied password to 32bytes using PassPadBytes as specified on
|
62
64
|
# pp61 of spec
|
65
|
+
#
|
66
|
+
#: (?String?) -> String
|
63
67
|
def pad_pass(p="")
|
64
|
-
if p.nil? || p.empty?
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
68
|
+
return PassPadBytes.pack('C*') if p.nil? || p.empty?
|
69
|
+
|
70
|
+
pTrimmedLength = [p.bytesize, 32].min
|
71
|
+
pad = PassPadBytes[0, 32 - pTrimmedLength] || []
|
72
|
+
p[0, 32].to_s + pad.pack('C*')
|
69
73
|
end
|
70
74
|
|
75
|
+
#: (String, Integer) -> String
|
71
76
|
def xor_each_byte(buf, int)
|
72
77
|
buf.each_byte.map{ |b| b^int}.pack("C*")
|
73
78
|
end
|
@@ -84,6 +89,7 @@ class PDF::Reader
|
|
84
89
|
# if the supplied password is not a valid owner password for this document
|
85
90
|
# then it returns nil
|
86
91
|
#
|
92
|
+
#: (String) -> String?
|
87
93
|
def auth_owner_pass(pass)
|
88
94
|
md5 = Digest::MD5.digest(pad_pass(pass))
|
89
95
|
if @revision > 2 then
|
@@ -92,7 +98,13 @@ class PDF::Reader
|
|
92
98
|
#first iteration decrypt owner_key
|
93
99
|
out = @owner_key
|
94
100
|
#RC4 keyed with (keyBegins XOR with iteration #) to decrypt previous out
|
95
|
-
19.downto(0).each { |i|
|
101
|
+
19.downto(0).each { |i|
|
102
|
+
# The RC4 gem doen't have type annotations, so the type checker doesn't
|
103
|
+
# know decrypt() returns a string
|
104
|
+
out = TypeCheck.cast_to_string!(
|
105
|
+
RC4.new(xor_each_byte(keyBegins,i)).decrypt(out)
|
106
|
+
)
|
107
|
+
}
|
96
108
|
else
|
97
109
|
out = RC4.new( md5[0, 5] ).decrypt( @owner_key )
|
98
110
|
end
|
@@ -110,6 +122,7 @@ class PDF::Reader
|
|
110
122
|
# if the supplied password is not a valid user password for this document
|
111
123
|
# then it returns nil
|
112
124
|
#
|
125
|
+
#: (String) -> String?
|
113
126
|
def auth_user_pass(pass)
|
114
127
|
keyBegins = make_file_key(pass)
|
115
128
|
if @revision >= 3
|
@@ -117,28 +130,29 @@ class PDF::Reader
|
|
117
130
|
out = Digest::MD5.digest(PassPadBytes.pack("C*") + @file_id)
|
118
131
|
#zero doesn't matter -> so from 0-19
|
119
132
|
20.times{ |i| out=RC4.new(xor_each_byte(keyBegins, i)).encrypt(out) }
|
120
|
-
pass = @user_key[0, 16] == out
|
133
|
+
pass = @user_key.to_s[0, 16] == out
|
121
134
|
else
|
122
135
|
pass = RC4.new(keyBegins).encrypt(PassPadBytes.pack("C*")) == @user_key
|
123
136
|
end
|
124
137
|
pass ? keyBegins : nil
|
125
138
|
end
|
126
139
|
|
140
|
+
#: (String) -> String
|
127
141
|
def make_file_key( user_pass )
|
128
142
|
# a) if there's a password, pad it to 32 bytes, else, just use the padding.
|
129
|
-
|
143
|
+
buf = pad_pass(user_pass)
|
130
144
|
# c) add owner key
|
131
|
-
|
145
|
+
buf << @owner_key
|
132
146
|
# d) add permissions 1 byte at a time, in little-endian order
|
133
|
-
(0..24).step(8){|e|
|
147
|
+
(0..24).step(8){|e| buf << (@permissions >> e & 0xFF)}
|
134
148
|
# e) add the file ID
|
135
|
-
|
149
|
+
buf << @file_id
|
136
150
|
# f) if revision >= 4 and metadata not encrypted then add 4 bytes of 0xFF
|
137
151
|
if @revision >= 4 && !@encryptMeta
|
138
|
-
|
152
|
+
buf << [0xFF,0xFF,0xFF,0xFF].pack('C*')
|
139
153
|
end
|
140
154
|
# b) init MD5 digest + g) finish the hash
|
141
|
-
md5 = Digest::MD5.digest(
|
155
|
+
md5 = Digest::MD5.digest(buf)
|
142
156
|
# h) spin hash 50 times
|
143
157
|
if @revision >= 3
|
144
158
|
50.times {
|
data/lib/pdf/reader/stream.rb
CHANGED
@@ -34,18 +34,24 @@ class PDF::Reader
|
|
34
34
|
# compression, etc) and a stream of bytes.
|
35
35
|
#
|
36
36
|
class Stream
|
37
|
-
|
37
|
+
#: Hash[Symbol, untyped]
|
38
|
+
attr_accessor :hash
|
39
|
+
|
40
|
+
#: String
|
41
|
+
attr_accessor :data
|
38
42
|
|
39
43
|
################################################################################
|
40
44
|
# Creates a new stream with the specified dictionary and data. The dictionary
|
41
45
|
# should be a standard ruby hash, the data should be a standard ruby string.
|
46
|
+
#: (Hash[Symbol, untyped], String) -> void
|
42
47
|
def initialize(hash, data)
|
43
|
-
@hash = TypeCheck.cast_to_pdf_dict!(hash)
|
48
|
+
@hash = TypeCheck.cast_to_pdf_dict!(hash) #: Hash[Symbol, untyped]
|
44
49
|
@data = data
|
45
|
-
@udata = nil
|
50
|
+
@udata = nil #: String | nil
|
46
51
|
end
|
47
52
|
################################################################################
|
48
53
|
# apply this streams filters to its data and return the result.
|
54
|
+
#: () -> String
|
49
55
|
def unfiltered_data
|
50
56
|
return @udata if @udata
|
51
57
|
@udata = data.dup
|
@@ -20,13 +20,16 @@ class PDF::Reader
|
|
20
20
|
# same time, we don't want to throw away thread safety We have two
|
21
21
|
# interchangeable thread-safe cache implementations:
|
22
22
|
class SynchronizedCache
|
23
|
+
#: () -> void
|
23
24
|
def initialize
|
24
|
-
@cache = {}
|
25
|
-
@mutex = Mutex.new
|
25
|
+
@cache = {} #: Hash[Object, untyped]
|
26
|
+
@mutex = Mutex.new #: Mutex
|
26
27
|
end
|
28
|
+
#: (Object) -> untyped
|
27
29
|
def [](key)
|
28
30
|
@mutex.synchronize { @cache[key] }
|
29
31
|
end
|
32
|
+
#: (Object, (Object | nil)) -> untyped
|
30
33
|
def []=(key,value)
|
31
34
|
@mutex.synchronize { @cache[key] = value }
|
32
35
|
end
|
data/lib/pdf/reader/text_run.rb
CHANGED
@@ -7,22 +7,34 @@ class PDF::Reader
|
|
7
7
|
class TextRun
|
8
8
|
include Comparable
|
9
9
|
|
10
|
+
#: PDF::Reader::Point
|
10
11
|
attr_reader :origin
|
12
|
+
|
13
|
+
#: Numeric
|
11
14
|
attr_reader :width
|
15
|
+
|
16
|
+
#: Numeric
|
12
17
|
attr_reader :font_size
|
18
|
+
|
19
|
+
#: String
|
13
20
|
attr_reader :text
|
14
21
|
|
15
22
|
alias :to_s :text
|
16
23
|
|
24
|
+
#: (Numeric, Numeric, Numeric, Numeric, String) -> void
|
17
25
|
def initialize(x, y, width, font_size, text)
|
18
|
-
@origin = PDF::Reader::Point.new(x, y)
|
26
|
+
@origin = PDF::Reader::Point.new(x, y) #: PDF::Reader::Point
|
19
27
|
@width = width
|
20
28
|
@font_size = font_size
|
21
29
|
@text = text
|
30
|
+
@endx = nil #: Numeric | nil
|
31
|
+
@endy = nil #: Numeric | nil
|
32
|
+
@mergable_range = nil #: Range[Numeric] | nil
|
22
33
|
end
|
23
34
|
|
24
35
|
# Allows collections of TextRun objects to be sorted. They will be sorted
|
25
36
|
# in order of their position on a cartesian plain - Top Left to Bottom Right
|
37
|
+
#: (PDF::Reader::Point) -> Numeric
|
26
38
|
def <=>(other)
|
27
39
|
if x == other.x && y == other.y
|
28
40
|
0
|
@@ -34,33 +46,42 @@ class PDF::Reader
|
|
34
46
|
-1
|
35
47
|
elsif x > other.x
|
36
48
|
1
|
49
|
+
else
|
50
|
+
0 # Unreachable?
|
37
51
|
end
|
38
52
|
end
|
39
53
|
|
54
|
+
#: () -> Numeric
|
40
55
|
def x
|
41
56
|
@origin.x
|
42
57
|
end
|
43
58
|
|
59
|
+
#: () -> Numeric
|
44
60
|
def y
|
45
61
|
@origin.y
|
46
62
|
end
|
47
63
|
|
64
|
+
#: () -> Numeric
|
48
65
|
def endx
|
49
66
|
@endx ||= @origin.x + width
|
50
67
|
end
|
51
68
|
|
69
|
+
#: () -> Numeric
|
52
70
|
def endy
|
53
71
|
@endy ||= @origin.y + font_size
|
54
72
|
end
|
55
73
|
|
74
|
+
#: () -> Numeric
|
56
75
|
def mean_character_width
|
57
76
|
@width / character_count
|
58
77
|
end
|
59
78
|
|
79
|
+
#: (PDF::Reader::TextRun) -> bool
|
60
80
|
def mergable?(other)
|
61
81
|
y.to_i == other.y.to_i && font_size == other.font_size && mergable_range.include?(other.x)
|
62
82
|
end
|
63
83
|
|
84
|
+
#: (PDF::Reader::TextRun) -> PDF::Reader::TextRun
|
64
85
|
def +(other)
|
65
86
|
raise ArgumentError, "#{other} cannot be merged with this run" unless mergable?(other)
|
66
87
|
|
@@ -71,16 +92,19 @@ class PDF::Reader
|
|
71
92
|
end
|
72
93
|
end
|
73
94
|
|
95
|
+
#: () -> String
|
74
96
|
def inspect
|
75
97
|
"#{text} w:#{width} f:#{font_size} @#{x},#{y}"
|
76
98
|
end
|
77
99
|
|
100
|
+
#: (PDF::Reader::TextRun) -> bool
|
78
101
|
def intersect?(other_run)
|
79
102
|
x <= other_run.endx && endx >= other_run.x &&
|
80
103
|
endy >= other_run.y && y <= other_run.endy
|
81
104
|
end
|
82
105
|
|
83
106
|
# return what percentage of this text run is overlapped by another run
|
107
|
+
#: (PDF::Reader::TextRun) -> Numeric
|
84
108
|
def intersection_area_percent(other_run)
|
85
109
|
return 0 unless intersect?(other_run)
|
86
110
|
|
@@ -93,16 +117,19 @@ class PDF::Reader
|
|
93
117
|
|
94
118
|
private
|
95
119
|
|
120
|
+
#: () -> Numeric
|
96
121
|
def area
|
97
122
|
(endx - x) * (endy - y)
|
98
123
|
end
|
99
124
|
|
125
|
+
#: () -> Range[Numeric]
|
100
126
|
def mergable_range
|
101
127
|
@mergable_range ||= Range.new(endx - 3, endx + font_size)
|
102
128
|
end
|
103
129
|
|
104
130
|
# Assume string encoding is marked correctly and we can trust String#size to return a
|
105
131
|
# character count
|
132
|
+
#: () -> Float
|
106
133
|
def character_count
|
107
134
|
@text.size.to_f
|
108
135
|
end
|
data/lib/pdf/reader/token.rb
CHANGED
@@ -14,16 +14,40 @@ class PDF::Reader
|
|
14
14
|
# only 6 numbers. This is important to save CPU time, memory and GC pressure
|
15
15
|
# caused by allocating too many unnecessary objects.
|
16
16
|
class TransformationMatrix
|
17
|
-
|
17
|
+
#: Numeric
|
18
|
+
attr_reader :a
|
18
19
|
|
20
|
+
#: Numeric
|
21
|
+
attr_reader :b
|
22
|
+
|
23
|
+
#: Numeric
|
24
|
+
attr_reader :c
|
25
|
+
|
26
|
+
#: Numeric
|
27
|
+
attr_reader :d
|
28
|
+
|
29
|
+
#: Numeric
|
30
|
+
attr_reader :e
|
31
|
+
|
32
|
+
#: Numeric
|
33
|
+
attr_reader :f
|
34
|
+
|
35
|
+
#: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> void
|
19
36
|
def initialize(a, b, c, d, e, f)
|
20
|
-
@a
|
37
|
+
@a = a
|
38
|
+
@b = b
|
39
|
+
@c = c
|
40
|
+
@d = d
|
41
|
+
@e = e
|
42
|
+
@f = f
|
21
43
|
end
|
22
44
|
|
45
|
+
#: () -> String
|
23
46
|
def inspect
|
24
47
|
"#{a}, #{b}, 0,\n#{c}, #{d}, #{0},\n#{e}, #{f}, 1"
|
25
48
|
end
|
26
49
|
|
50
|
+
#: () -> [Numeric]
|
27
51
|
def to_a
|
28
52
|
[@a,@b,0,
|
29
53
|
@c,@d,0,
|
@@ -51,6 +75,7 @@ class PDF::Reader
|
|
51
75
|
# displacement to speed up processing documents that use vertical
|
52
76
|
# writing systems
|
53
77
|
#
|
78
|
+
#: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> PDF::Reader::TransformationMatrix
|
54
79
|
def multiply!(a,b,c, d,e,f)
|
55
80
|
if a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0
|
56
81
|
# the identity matrix, no effect
|
@@ -90,6 +115,7 @@ class PDF::Reader
|
|
90
115
|
# [ 3 4 0 ] x [ 0 1 0 ]
|
91
116
|
# [ 5 6 1 ] [ e2 0 1 ]
|
92
117
|
#
|
118
|
+
#: (Numeric) -> void
|
93
119
|
def horizontal_displacement_multiply!(e2)
|
94
120
|
@e = @e + e2
|
95
121
|
end
|
@@ -105,6 +131,7 @@ class PDF::Reader
|
|
105
131
|
# [ 0 1 0 ] x [ 3 4 0 ]
|
106
132
|
# [ 5 0 1 ] [ 5 6 1 ]
|
107
133
|
#
|
134
|
+
#: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> void
|
108
135
|
def horizontal_displacement_multiply_reversed!(a2,b2,c2,d2,e2,f2)
|
109
136
|
newa = a2
|
110
137
|
newb = b2
|
@@ -124,6 +151,7 @@ class PDF::Reader
|
|
124
151
|
# [ 3 4 0 ] x [ 0 5 0 ]
|
125
152
|
# [ 5 6 1 ] [ 0 0 1 ]
|
126
153
|
#
|
154
|
+
#: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> void
|
127
155
|
def xy_scaling_multiply!(a2,b2,c2,d2,e2,f2)
|
128
156
|
newa = @a * a2
|
129
157
|
newb = @b * d2
|
@@ -143,6 +171,7 @@ class PDF::Reader
|
|
143
171
|
# [ 0 5 0 ] x [ 3 4 0 ]
|
144
172
|
# [ 0 0 1 ] [ 5 6 1 ]
|
145
173
|
#
|
174
|
+
#: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> void
|
146
175
|
def xy_scaling_multiply_reversed!(a2,b2,c2,d2,e2,f2)
|
147
176
|
newa = @a * a2
|
148
177
|
newb = @a * b2
|
@@ -163,6 +192,7 @@ class PDF::Reader
|
|
163
192
|
# [ c d 0 ] x [ c d 0 ]
|
164
193
|
# [ e f 1 ] [ e f 1 ]
|
165
194
|
#
|
195
|
+
#: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> void
|
166
196
|
def regular_multiply!(a2,b2,c2,d2,e2,f2)
|
167
197
|
newa = (@a * a2) + (@b * c2) + (e2 * 0)
|
168
198
|
newb = (@a * b2) + (@b * d2) + (f2 * 0)
|
@@ -183,6 +213,7 @@ class PDF::Reader
|
|
183
213
|
# [ c d 0 ] x [ c d 0 ]
|
184
214
|
# [ e f 1 ] [ e f 1 ]
|
185
215
|
#
|
216
|
+
#: (Numeric, Numeric, Numeric, Numeric, Numeric, Numeric) -> void
|
186
217
|
def faster_multiply!(a2,b2,c2, d2,e2,f2)
|
187
218
|
newa = (@a * a2) + (@b * c2)
|
188
219
|
newb = (@a * b2) + (@b * d2)
|
@@ -9,6 +9,7 @@ module PDF
|
|
9
9
|
#
|
10
10
|
class TypeCheck
|
11
11
|
|
12
|
+
#: (untyped) -> Integer
|
12
13
|
def self.cast_to_int!(obj)
|
13
14
|
if obj.is_a?(Integer)
|
14
15
|
obj
|
@@ -21,6 +22,7 @@ module PDF
|
|
21
22
|
end
|
22
23
|
end
|
23
24
|
|
25
|
+
#: (untyped) -> Numeric
|
24
26
|
def self.cast_to_numeric!(obj)
|
25
27
|
if obj.is_a?(Numeric)
|
26
28
|
obj
|
@@ -35,6 +37,7 @@ module PDF
|
|
35
37
|
end
|
36
38
|
end
|
37
39
|
|
40
|
+
#: (untyped) -> String
|
38
41
|
def self.cast_to_string!(string)
|
39
42
|
if string.is_a?(String)
|
40
43
|
string
|
@@ -47,6 +50,7 @@ module PDF
|
|
47
50
|
end
|
48
51
|
end
|
49
52
|
|
53
|
+
#: (untyped) -> Symbol | nil
|
50
54
|
def self.cast_to_symbol(obj)
|
51
55
|
if obj.is_a?(Symbol)
|
52
56
|
obj
|
@@ -59,15 +63,17 @@ module PDF
|
|
59
63
|
end
|
60
64
|
end
|
61
65
|
|
66
|
+
#: (untyped) -> Symbol
|
62
67
|
def self.cast_to_symbol!(obj)
|
63
68
|
res = cast_to_symbol(obj)
|
64
|
-
if res
|
65
|
-
res
|
66
|
-
else
|
69
|
+
if res.nil?
|
67
70
|
raise MalformedPDFError, "Unable to cast to symbol"
|
71
|
+
else
|
72
|
+
res
|
68
73
|
end
|
69
74
|
end
|
70
75
|
|
76
|
+
#: (untyped) -> Hash[Symbol, untyped]
|
71
77
|
def self.cast_to_pdf_dict!(obj)
|
72
78
|
if obj.is_a?(Hash)
|
73
79
|
obj
|
@@ -78,6 +84,7 @@ module PDF
|
|
78
84
|
end
|
79
85
|
end
|
80
86
|
|
87
|
+
#: (untyped) -> Hash[Symbol, PDF::Reader::Stream]
|
81
88
|
def self.cast_to_pdf_dict_with_stream_values!(obj)
|
82
89
|
if obj.is_a?(Hash)
|
83
90
|
result = Hash.new
|