pdf-reader 2.14.0 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +22 -0
- data/lib/pdf/reader/advanced_text_run_filter.rb +17 -2
- data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
- data/lib/pdf/reader/buffer.rb +35 -17
- data/lib/pdf/reader/cid_widths.rb +7 -1
- data/lib/pdf/reader/cmap.rb +14 -3
- data/lib/pdf/reader/encoding.rb +37 -12
- data/lib/pdf/reader/error.rb +6 -0
- data/lib/pdf/reader/filter/ascii85.rb +2 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
- data/lib/pdf/reader/filter/depredict.rb +4 -0
- data/lib/pdf/reader/filter/flate.rb +5 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +2 -0
- data/lib/pdf/reader/filter/run_length.rb +2 -0
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/font.rb +90 -22
- data/lib/pdf/reader/font_descriptor.rb +76 -23
- data/lib/pdf/reader/form_xobject.rb +11 -0
- data/lib/pdf/reader/glyph_hash.rb +34 -9
- data/lib/pdf/reader/key_builder_v5.rb +17 -9
- data/lib/pdf/reader/lzw.rb +17 -6
- data/lib/pdf/reader/no_text_filter.rb +1 -0
- data/lib/pdf/reader/null_security_handler.rb +1 -0
- data/lib/pdf/reader/object_cache.rb +7 -2
- data/lib/pdf/reader/object_hash.rb +116 -9
- data/lib/pdf/reader/object_stream.rb +19 -2
- data/lib/pdf/reader/overlapping_runs_filter.rb +7 -1
- data/lib/pdf/reader/page.rb +41 -7
- data/lib/pdf/reader/page_layout.rb +25 -8
- data/lib/pdf/reader/page_state.rb +5 -2
- data/lib/pdf/reader/page_text_receiver.rb +6 -2
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/parser.rb +51 -10
- data/lib/pdf/reader/point.rb +9 -2
- data/lib/pdf/reader/print_receiver.rb +2 -6
- data/lib/pdf/reader/rc4_security_handler.rb +2 -0
- data/lib/pdf/reader/rectangle.rb +24 -1
- data/lib/pdf/reader/reference.rb +10 -1
- data/lib/pdf/reader/register_receiver.rb +15 -2
- data/lib/pdf/reader/resources.rb +9 -0
- data/lib/pdf/reader/security_handler_factory.rb +13 -0
- data/lib/pdf/reader/standard_key_builder.rb +37 -23
- data/lib/pdf/reader/stream.rb +9 -3
- data/lib/pdf/reader/synchronized_cache.rb +5 -2
- data/lib/pdf/reader/text_run.rb +28 -1
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +33 -2
- data/lib/pdf/reader/type_check.rb +10 -3
- data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
- data/lib/pdf/reader/validating_receiver.rb +29 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +10 -3
- data/lib/pdf/reader/width_calculator/composite.rb +5 -1
- data/lib/pdf/reader/width_calculator/true_type.rb +5 -1
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +3 -1
- data/lib/pdf/reader/width_calculator/type_zero.rb +2 -0
- data/lib/pdf/reader/xref.rb +28 -7
- data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
- data/lib/pdf/reader.rb +18 -2
- data/rbi/pdf-reader.rbi +1502 -1594
- metadata +17 -11
data/lib/pdf/reader/encoding.rb
CHANGED
@@ -32,25 +32,31 @@ class PDF::Reader
|
|
32
32
|
# convert strings of various PDF-dialect encodings into UTF-8.
|
33
33
|
class Encoding # :nodoc:
|
34
34
|
CONTROL_CHARS = [0,1,2,3,4,5,6,7,8,11,12,14,15,16,17,18,19,20,21,22,23,
|
35
|
-
24,25,26,27,28,29,30,31]
|
36
|
-
UNKNOWN_CHAR = 0x25AF # ▯
|
35
|
+
24,25,26,27,28,29,30,31] #: Array[Integer]
|
36
|
+
UNKNOWN_CHAR = 0x25AF #: Integer # ▯
|
37
37
|
|
38
|
+
#: String
|
38
39
|
attr_reader :unpack
|
39
40
|
|
41
|
+
#: (Hash[Symbol, untyped] | Symbol | nil) -> void
|
40
42
|
def initialize(enc)
|
41
|
-
|
42
|
-
@
|
43
|
+
# maps from character codes to Unicode codepoints
|
44
|
+
@mapping = default_mapping #: Hash[Integer, Integer]
|
43
45
|
|
44
|
-
|
45
|
-
|
46
|
+
# maps from character codes to UTF-8 strings.
|
47
|
+
@string_cache = {} #: Hash[Integer, String]
|
48
|
+
|
49
|
+
@enc_name = :StandardEncoding #: Symbol
|
50
|
+
if enc.kind_of?(Hash)
|
51
|
+
@enc_name = enc[:Encoding] || enc[:BaseEncoding]
|
46
52
|
elsif enc && enc.respond_to?(:to_sym)
|
47
|
-
enc.to_sym
|
48
|
-
else
|
49
|
-
:StandardEncoding
|
53
|
+
@enc_name = enc.to_sym
|
50
54
|
end
|
51
55
|
|
52
|
-
@unpack = get_unpack(@enc_name)
|
53
|
-
@map_file = get_mapping_file(@enc_name)
|
56
|
+
@unpack = get_unpack(@enc_name) #: String
|
57
|
+
@map_file = get_mapping_file(@enc_name) #: String | nil
|
58
|
+
@differences = nil #: Hash[Integer, Integer] | nil
|
59
|
+
@glyphlist = nil #: PDF::Reader::GlyphHash | nil
|
54
60
|
|
55
61
|
load_mapping(@map_file) if @map_file
|
56
62
|
|
@@ -68,6 +74,7 @@ class PDF::Reader
|
|
68
74
|
# To save space the following array is also valid and equivalent to the previous one
|
69
75
|
#
|
70
76
|
# [25, :A, :B]
|
77
|
+
#: (Array[Integer | Symbol]) -> Hash[Integer, Integer]
|
71
78
|
def differences=(diff)
|
72
79
|
PDF::Reader::Error.validate_type(diff, "diff", Array)
|
73
80
|
|
@@ -85,6 +92,7 @@ class PDF::Reader
|
|
85
92
|
@differences
|
86
93
|
end
|
87
94
|
|
95
|
+
#: () -> Hash[Integer, Integer]
|
88
96
|
def differences
|
89
97
|
# this method is only used by the spec tests
|
90
98
|
@differences ||= {}
|
@@ -101,6 +109,7 @@ class PDF::Reader
|
|
101
109
|
# * pack the final array of Unicode codepoints into a utf-8 string
|
102
110
|
# * mark the string as utf-8 if we're running on a M17N aware VM
|
103
111
|
#
|
112
|
+
#: (String) -> String
|
104
113
|
def to_utf8(str)
|
105
114
|
if utf8_conversion_impossible?
|
106
115
|
little_boxes(str.unpack(unpack).size)
|
@@ -109,6 +118,7 @@ class PDF::Reader
|
|
109
118
|
end
|
110
119
|
end
|
111
120
|
|
121
|
+
#: (Integer) -> String
|
112
122
|
def int_to_utf8_string(glyph_code)
|
113
123
|
@string_cache[glyph_code] ||= internal_int_to_utf8_string(glyph_code)
|
114
124
|
end
|
@@ -118,13 +128,19 @@ class PDF::Reader
|
|
118
128
|
# int_to_name(65)
|
119
129
|
# => [:A]
|
120
130
|
#
|
131
|
+
#: (Integer) -> Array[Symbol]
|
121
132
|
def int_to_name(glyph_code)
|
122
133
|
if @enc_name == :"Identity-H" || @enc_name == :"Identity-V"
|
123
134
|
[]
|
124
135
|
elsif differences[glyph_code]
|
125
136
|
[differences[glyph_code]]
|
126
137
|
elsif @mapping[glyph_code]
|
127
|
-
|
138
|
+
val = @mapping[glyph_code]
|
139
|
+
if val
|
140
|
+
glyphlist.unicode_to_name(val)
|
141
|
+
else
|
142
|
+
[]
|
143
|
+
end
|
128
144
|
else
|
129
145
|
[]
|
130
146
|
end
|
@@ -137,6 +153,7 @@ class PDF::Reader
|
|
137
153
|
# - leaves all other bytes <= 255 unchaged
|
138
154
|
#
|
139
155
|
# Each specific encoding will change this default as required for their glyphs
|
156
|
+
#: () -> Hash[Integer, Integer]
|
140
157
|
def default_mapping
|
141
158
|
all_bytes = (0..255).to_a
|
142
159
|
tuples = all_bytes.map {|i|
|
@@ -146,6 +163,7 @@ class PDF::Reader
|
|
146
163
|
mapping
|
147
164
|
end
|
148
165
|
|
166
|
+
#: (Integer) -> String
|
149
167
|
def internal_int_to_utf8_string(glyph_code)
|
150
168
|
ret = [
|
151
169
|
@mapping[glyph_code.to_i] || glyph_code.to_i
|
@@ -154,10 +172,12 @@ class PDF::Reader
|
|
154
172
|
ret
|
155
173
|
end
|
156
174
|
|
175
|
+
#: () -> bool
|
157
176
|
def utf8_conversion_impossible?
|
158
177
|
@enc_name == :"Identity-H" || @enc_name == :"Identity-V"
|
159
178
|
end
|
160
179
|
|
180
|
+
#: (Integer) -> String
|
161
181
|
def little_boxes(times)
|
162
182
|
codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
|
163
183
|
ret = codepoints.pack("U*")
|
@@ -165,12 +185,14 @@ class PDF::Reader
|
|
165
185
|
ret
|
166
186
|
end
|
167
187
|
|
188
|
+
#: (String) -> String
|
168
189
|
def convert_to_utf8(str)
|
169
190
|
ret = str.unpack(unpack).map! { |c| @mapping[c.to_i] || c }.pack("U*")
|
170
191
|
ret.force_encoding("UTF-8")
|
171
192
|
ret
|
172
193
|
end
|
173
194
|
|
195
|
+
#: (Symbol) -> String
|
174
196
|
def get_unpack(enc)
|
175
197
|
case enc
|
176
198
|
when :"Identity-H", :"Identity-V", :UTF16Encoding
|
@@ -180,6 +202,7 @@ class PDF::Reader
|
|
180
202
|
end
|
181
203
|
end
|
182
204
|
|
205
|
+
#: (Symbol) -> String?
|
183
206
|
def get_mapping_file(enc)
|
184
207
|
case enc
|
185
208
|
when :"Identity-H", :"Identity-V", :UTF16Encoding then
|
@@ -201,10 +224,12 @@ class PDF::Reader
|
|
201
224
|
end
|
202
225
|
end
|
203
226
|
|
227
|
+
#: () -> PDF::Reader::GlyphHash
|
204
228
|
def glyphlist
|
205
229
|
@glyphlist ||= PDF::Reader::GlyphHash.new
|
206
230
|
end
|
207
231
|
|
232
|
+
#: (String) -> void
|
208
233
|
def load_mapping(file)
|
209
234
|
File.open(file, "r:BINARY") do |f|
|
210
235
|
f.each do |l|
|
data/lib/pdf/reader/error.rb
CHANGED
@@ -31,30 +31,36 @@ class PDF::Reader
|
|
31
31
|
# are valid
|
32
32
|
class Error # :nodoc:
|
33
33
|
################################################################################
|
34
|
+
#: (untyped, untyped, ?untyped) -> untyped
|
34
35
|
def self.str_assert(lvalue, rvalue, chars=nil)
|
35
36
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
36
37
|
lvalue = lvalue[0,chars] if chars
|
37
38
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
|
38
39
|
end
|
39
40
|
################################################################################
|
41
|
+
#: (untyped, untyped, ?untyped) -> untyped
|
40
42
|
def self.str_assert_not(lvalue, rvalue, chars=nil)
|
41
43
|
raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
|
42
44
|
lvalue = lvalue[0,chars] if chars
|
43
45
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue == rvalue
|
44
46
|
end
|
45
47
|
################################################################################
|
48
|
+
#: (untyped, untyped) -> untyped
|
46
49
|
def self.assert_equal(lvalue, rvalue)
|
47
50
|
raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
|
48
51
|
end
|
49
52
|
################################################################################
|
53
|
+
#: (Object, String, Module) -> void
|
50
54
|
def self.validate_type(object, name, klass)
|
51
55
|
raise ArgumentError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
|
52
56
|
end
|
53
57
|
################################################################################
|
58
|
+
#: (Object, String, Module) -> void
|
54
59
|
def self.validate_type_as_malformed(object, name, klass)
|
55
60
|
raise MalformedPDFError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
|
56
61
|
end
|
57
62
|
################################################################################
|
63
|
+
#: (Object, String) -> void
|
58
64
|
def self.validate_not_nil(object, name)
|
59
65
|
raise ArgumentError, "#{object} must not be nil" if object.nil?
|
60
66
|
end
|
@@ -9,6 +9,7 @@ class PDF::Reader
|
|
9
9
|
# implementation of the Ascii85 filter
|
10
10
|
class Ascii85
|
11
11
|
|
12
|
+
#: (?Hash[untyped, untyped]) -> void
|
12
13
|
def initialize(options = {})
|
13
14
|
@options = options
|
14
15
|
end
|
@@ -17,6 +18,7 @@ class PDF::Reader
|
|
17
18
|
# Decode the specified data using the Ascii85 algorithm. Relies on the AScii85
|
18
19
|
# rubygem.
|
19
20
|
#
|
21
|
+
#: (String) -> String
|
20
22
|
def filter(data)
|
21
23
|
data = "<~#{data}" unless data.to_s[0,2] == "<~"
|
22
24
|
if defined?(::Ascii85Native)
|
@@ -8,6 +8,7 @@ class PDF::Reader
|
|
8
8
|
# implementation of the AsciiHex stream filter
|
9
9
|
class AsciiHex
|
10
10
|
|
11
|
+
#: (?Hash[untyped, untyped]) -> void
|
11
12
|
def initialize(options = {})
|
12
13
|
@options = options
|
13
14
|
end
|
@@ -15,6 +16,7 @@ class PDF::Reader
|
|
15
16
|
################################################################################
|
16
17
|
# Decode the specified data using the AsciiHex algorithm.
|
17
18
|
#
|
19
|
+
#: (String) -> String
|
18
20
|
def filter(data)
|
19
21
|
data.chop! if data[-1,1] == ">"
|
20
22
|
data = data[1,data.size] if data[0,1] == "<"
|
@@ -8,6 +8,7 @@ class PDF::Reader
|
|
8
8
|
# improve compression
|
9
9
|
class Depredict
|
10
10
|
|
11
|
+
#: (?Hash[untyped, untyped]) -> void
|
11
12
|
def initialize(options = {})
|
12
13
|
@options = options
|
13
14
|
end
|
@@ -16,6 +17,7 @@ class PDF::Reader
|
|
16
17
|
# Streams can be preprocessed to improve compression. This reverses the
|
17
18
|
# preprocessing
|
18
19
|
#
|
20
|
+
#: (String) -> String
|
19
21
|
def filter(data)
|
20
22
|
predictor = @options[:Predictor].to_i
|
21
23
|
|
@@ -34,6 +36,7 @@ class PDF::Reader
|
|
34
36
|
private
|
35
37
|
|
36
38
|
################################################################################
|
39
|
+
#: (untyped) -> String
|
37
40
|
def tiff_depredict(data)
|
38
41
|
data = data.unpack("C*")
|
39
42
|
unfiltered = ''
|
@@ -60,6 +63,7 @@ class PDF::Reader
|
|
60
63
|
unfiltered
|
61
64
|
end
|
62
65
|
################################################################################
|
66
|
+
#: (untyped) -> String
|
63
67
|
def png_depredict(data)
|
64
68
|
return data if @options[:Predictor].to_i < 10
|
65
69
|
|
@@ -10,15 +10,17 @@ class PDF::Reader
|
|
10
10
|
# implementation of the Flate (zlib) stream filter
|
11
11
|
class Flate
|
12
12
|
|
13
|
-
ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
|
14
|
-
ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
|
13
|
+
ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 #: Integer # Zlib::MAX_WBITS + 32
|
14
|
+
ZLIB_RAW_DEFLATE = -15 #: Integer # Zlib::MAX_WBITS * -1
|
15
15
|
|
16
|
+
#: (?Hash[untyped, untyped]) -> void
|
16
17
|
def initialize(options = {})
|
17
18
|
@options = options
|
18
19
|
end
|
19
20
|
|
20
21
|
################################################################################
|
21
22
|
# Decode the specified data with the Zlib compression algorithm
|
23
|
+
#: (String) -> String
|
22
24
|
def filter(data)
|
23
25
|
deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
|
24
26
|
|
@@ -31,6 +33,7 @@ class PDF::Reader
|
|
31
33
|
|
32
34
|
private
|
33
35
|
|
36
|
+
#: (untyped) -> untyped
|
34
37
|
def zlib_inflate(data)
|
35
38
|
begin
|
36
39
|
return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
|
@@ -8,12 +8,14 @@ class PDF::Reader
|
|
8
8
|
# implementation of the LZW stream filter
|
9
9
|
class Lzw
|
10
10
|
|
11
|
+
#: (?Hash[untyped, untyped]) -> void
|
11
12
|
def initialize(options = {})
|
12
13
|
@options = options
|
13
14
|
end
|
14
15
|
|
15
16
|
################################################################################
|
16
17
|
# Decode the specified data with the LZW compression algorithm
|
18
|
+
#: (String) -> String
|
17
19
|
def filter(data)
|
18
20
|
data = PDF::Reader::LZW.decode(data)
|
19
21
|
Depredict.new(@options).filter(data)
|
@@ -6,10 +6,12 @@ class PDF::Reader
|
|
6
6
|
module Filter # :nodoc:
|
7
7
|
# implementation of the null stream filter
|
8
8
|
class Null
|
9
|
+
#: (?Hash[untyped, untyped]) -> void
|
9
10
|
def initialize(options = {})
|
10
11
|
@options = options
|
11
12
|
end
|
12
13
|
|
14
|
+
#: (String) -> String
|
13
15
|
def filter(data)
|
14
16
|
data
|
15
17
|
end
|
@@ -8,12 +8,14 @@ class PDF::Reader # :nodoc:
|
|
8
8
|
# implementation of the run length stream filter
|
9
9
|
class RunLength
|
10
10
|
|
11
|
+
#: (?Hash[untyped, untyped]) -> void
|
11
12
|
def initialize(options = {})
|
12
13
|
@options = options
|
13
14
|
end
|
14
15
|
|
15
16
|
################################################################################
|
16
17
|
# Decode the specified data with the RunLengthDecode compression algorithm
|
18
|
+
#: (String) -> String
|
17
19
|
def filter(data)
|
18
20
|
pos = 0
|
19
21
|
out = "".dup
|
data/lib/pdf/reader/filter.rb
CHANGED
@@ -41,6 +41,7 @@ class PDF::Reader
|
|
41
41
|
# Filters that are only used to encode image data are accepted, but the data is
|
42
42
|
# returned untouched. At this stage PDF::Reader has no need to decode images.
|
43
43
|
#
|
44
|
+
#: (Symbol, ?Hash[untyped, untyped]) -> untyped
|
44
45
|
def self.with(name, options = {})
|
45
46
|
case name
|
46
47
|
when :ASCII85Decode, :A85 then PDF::Reader::Filter::Ascii85.new(options)
|
data/lib/pdf/reader/font.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -29,48 +29,99 @@
|
|
29
29
|
|
30
30
|
require 'pdf/reader/width_calculator'
|
31
31
|
|
32
|
+
|
32
33
|
class PDF::Reader
|
33
34
|
# Represents a single font PDF object and provides some useful methods
|
34
35
|
# for extracting info. Mainly used for converting text to UTF-8.
|
35
36
|
#
|
36
37
|
class Font
|
37
|
-
|
38
|
-
|
39
|
-
|
38
|
+
#: type widthCalculator = (
|
39
|
+
#| PDF::Reader::WidthCalculator::TypeZero |
|
40
|
+
#| PDF::Reader::WidthCalculator::BuiltIn |
|
41
|
+
#| PDF::Reader::WidthCalculator::TypeOneOrThree |
|
42
|
+
#| PDF::Reader::WidthCalculator::TrueType |
|
43
|
+
#| PDF::Reader::WidthCalculator::Composite
|
44
|
+
#| )
|
45
|
+
|
46
|
+
#: Symbol?
|
47
|
+
attr_accessor :subtype
|
48
|
+
|
49
|
+
#: PDF::Reader::Encoding
|
50
|
+
attr_accessor :encoding
|
51
|
+
|
52
|
+
#: Array[PDF::Reader::Font]
|
53
|
+
attr_accessor :descendantfonts
|
54
|
+
|
55
|
+
#: PDF::Reader::CMap | nil
|
56
|
+
attr_accessor :tounicode
|
57
|
+
|
58
|
+
#: Array[Numeric]
|
59
|
+
attr_reader :widths
|
60
|
+
|
61
|
+
#: Integer?
|
62
|
+
attr_reader :first_char
|
40
63
|
|
64
|
+
#: Integer?
|
65
|
+
attr_reader :last_char
|
66
|
+
|
67
|
+
#: Symbol?
|
68
|
+
attr_reader :basefont
|
69
|
+
|
70
|
+
#: PDF::Reader::FontDescriptor?
|
71
|
+
attr_reader :font_descriptor
|
72
|
+
|
73
|
+
#: Array[Numeric]
|
74
|
+
attr_reader :cid_widths
|
75
|
+
|
76
|
+
#: Numeric
|
77
|
+
attr_reader :cid_default_width
|
78
|
+
|
79
|
+
#: (PDF::Reader::ObjectHash, Hash[Symbol, untyped]) -> void
|
41
80
|
def initialize(ohash, obj)
|
42
81
|
@ohash = ohash
|
43
|
-
@tounicode = nil
|
82
|
+
@tounicode = nil #: PDF::Reader::CMap | nil
|
83
|
+
@descendantfonts = [] #: Array[PDF::Reader::Font]
|
84
|
+
@widths = [] #: Array[Numeric]
|
85
|
+
@first_char = nil #: Integer?
|
86
|
+
@last_char = nil #: Integer?
|
87
|
+
@basefont = nil #: Symbol?
|
88
|
+
@font_descriptor = nil #: PDF::Reader::FontDescriptor?
|
89
|
+
@cid_widths = [] #: Array[Numeric]
|
90
|
+
@cid_default_width = 0 #: Numeric
|
91
|
+
@encoding = PDF::Reader::Encoding.new(:StandardEncoding) #: PDF::Reader::Encoding
|
92
|
+
@cached_widths = {} #: Hash[Integer, Numeric]
|
93
|
+
@font_matrix = nil #: Array[Numeric] | nil
|
44
94
|
|
45
95
|
extract_base_info(obj)
|
46
96
|
extract_type3_info(obj)
|
47
97
|
extract_descriptor(obj)
|
48
98
|
extract_descendants(obj)
|
49
|
-
@width_calc = build_width_calculator
|
50
|
-
|
51
|
-
@encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
|
99
|
+
@width_calc = build_width_calculator #: widthCalculator
|
52
100
|
end
|
53
101
|
|
102
|
+
#: (Integer | String | Array[Integer | String]) -> String
|
54
103
|
def to_utf8(params)
|
55
104
|
if @tounicode
|
56
|
-
to_utf8_via_cmap(params)
|
105
|
+
to_utf8_via_cmap(params, @tounicode)
|
57
106
|
else
|
58
107
|
to_utf8_via_encoding(params)
|
59
108
|
end
|
60
109
|
end
|
61
110
|
|
111
|
+
#: (String) -> (Array[Integer | Float | String | nil] | nil)
|
62
112
|
def unpack(data)
|
63
113
|
data.unpack(encoding.unpack)
|
64
114
|
end
|
65
115
|
|
66
116
|
# looks up the specified codepoint and returns a value that is in (pdf)
|
67
117
|
# glyph space, which is 1000 glyph units = 1 text space unit
|
118
|
+
#: (Integer | String) -> Numeric
|
68
119
|
def glyph_width(code_point)
|
69
120
|
if code_point.is_a?(String)
|
70
|
-
code_point = code_point
|
121
|
+
code_point = unpack_string_to_array_of_ints(code_point, encoding.unpack).first
|
122
|
+
raise MalformedPDFError, "code point missing" if code_point.nil?
|
71
123
|
end
|
72
124
|
|
73
|
-
@cached_widths ||= {}
|
74
125
|
@cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
|
75
126
|
end
|
76
127
|
|
@@ -78,6 +129,7 @@ class PDF::Reader
|
|
78
129
|
#
|
79
130
|
# However, Type3 fonts provide their own FontMatrix that's used for the transformation.
|
80
131
|
#
|
132
|
+
#: (Integer | String) -> Numeric
|
81
133
|
def glyph_width_in_text_space(code_point)
|
82
134
|
glyph_width_in_glyph_space = glyph_width(code_point)
|
83
135
|
|
@@ -93,13 +145,14 @@ class PDF::Reader
|
|
93
145
|
private
|
94
146
|
|
95
147
|
# Only valid for Type3 fonts
|
148
|
+
#: (Numeric, Numeric) -> [Numeric, Numeric]
|
96
149
|
def font_matrix_transform(x, y)
|
97
150
|
return x, y if @font_matrix.nil?
|
98
151
|
|
99
152
|
matrix = TransformationMatrix.new(
|
100
|
-
@font_matrix[0], @font_matrix[1],
|
101
|
-
@font_matrix[2], @font_matrix[3],
|
102
|
-
@font_matrix[4], @font_matrix[5],
|
153
|
+
@font_matrix[0] || 0, @font_matrix[1] || 0,
|
154
|
+
@font_matrix[2] || 0, @font_matrix[3] || 0,
|
155
|
+
@font_matrix[4] || 0, @font_matrix[5] || 0,
|
103
156
|
)
|
104
157
|
|
105
158
|
if x == 0 && y == 0
|
@@ -112,6 +165,7 @@ class PDF::Reader
|
|
112
165
|
end
|
113
166
|
end
|
114
167
|
|
168
|
+
#: (Symbol | String | nil) -> PDF::Reader::Encoding
|
115
169
|
def default_encoding(font_name)
|
116
170
|
case font_name.to_s
|
117
171
|
when "Symbol" then
|
@@ -123,6 +177,7 @@ class PDF::Reader
|
|
123
177
|
end
|
124
178
|
end
|
125
179
|
|
180
|
+
#: () -> widthCalculator
|
126
181
|
def build_width_calculator
|
127
182
|
if @subtype == :Type0
|
128
183
|
PDF::Reader::WidthCalculator::TypeZero.new(self)
|
@@ -149,6 +204,7 @@ class PDF::Reader
|
|
149
204
|
end
|
150
205
|
end
|
151
206
|
|
207
|
+
#: (Hash[Symbol, untyped]) -> PDF::Reader::Encoding
|
152
208
|
def build_encoding(obj)
|
153
209
|
if obj[:Encoding].is_a?(Symbol)
|
154
210
|
# one of the standard encodings, referenced by name
|
@@ -163,6 +219,7 @@ class PDF::Reader
|
|
163
219
|
end
|
164
220
|
end
|
165
221
|
|
222
|
+
#: (Hash[Symbol, untyped]) -> void
|
166
223
|
def extract_base_info(obj)
|
167
224
|
@subtype = @ohash.deref_name(obj[:Subtype])
|
168
225
|
@basefont = @ohash.deref_name(obj[:BaseFont])
|
@@ -185,6 +242,7 @@ class PDF::Reader
|
|
185
242
|
end
|
186
243
|
end
|
187
244
|
|
245
|
+
#: (Hash[Symbol, untyped]) -> void
|
188
246
|
def extract_type3_info(obj)
|
189
247
|
if @subtype == :Type3
|
190
248
|
@font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
|
@@ -193,46 +251,50 @@ class PDF::Reader
|
|
193
251
|
end
|
194
252
|
end
|
195
253
|
|
254
|
+
#: (Hash[Symbol, untyped]) -> void
|
196
255
|
def extract_descriptor(obj)
|
197
256
|
if obj[:FontDescriptor]
|
198
257
|
# create a font descriptor object if we can, in other words, unless this is
|
199
258
|
# a CID Font
|
200
|
-
fd = @ohash.deref_hash(obj[:FontDescriptor])
|
259
|
+
fd = @ohash.deref_hash(obj[:FontDescriptor]) || {}
|
201
260
|
@font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
|
202
261
|
else
|
203
262
|
@font_descriptor = nil
|
204
263
|
end
|
205
264
|
end
|
206
265
|
|
266
|
+
#: (Hash[Symbol, untyped]) -> void
|
207
267
|
def extract_descendants(obj)
|
208
268
|
# per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
|
209
269
|
# A one-element array specifying the CIDFont dictionary that is the
|
210
270
|
# descendant of this Type 0 font.
|
211
271
|
if obj[:DescendantFonts]
|
212
|
-
descendants = @ohash.deref_array(obj[:DescendantFonts])
|
272
|
+
descendants = @ohash.deref_array(obj[:DescendantFonts]) || []
|
213
273
|
@descendantfonts = descendants.map { |desc|
|
214
|
-
PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
|
274
|
+
PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc) || {})
|
215
275
|
}
|
216
276
|
else
|
217
277
|
@descendantfonts = []
|
218
278
|
end
|
219
279
|
end
|
220
280
|
|
221
|
-
|
281
|
+
#: (Integer | String | Array[Integer | String], PDF::Reader::CMap) -> String
|
282
|
+
def to_utf8_via_cmap(params, cmap)
|
222
283
|
case params
|
223
284
|
when Integer
|
224
285
|
[
|
225
|
-
|
286
|
+
cmap.decode(params)
|
226
287
|
].flatten.pack("U*")
|
227
288
|
when String
|
228
|
-
params
|
229
|
-
|
289
|
+
unpack_string_to_array_of_ints(params, encoding.unpack).map { |code_point|
|
290
|
+
cmap.decode(code_point)
|
230
291
|
}.flatten.pack("U*")
|
231
292
|
when Array
|
232
|
-
params.collect { |param| to_utf8_via_cmap(param) }.join("")
|
293
|
+
params.collect { |param| to_utf8_via_cmap(param, cmap) }.join("")
|
233
294
|
end
|
234
295
|
end
|
235
296
|
|
297
|
+
#: (Integer | String | Array[Integer | String]) -> String
|
236
298
|
def to_utf8_via_encoding(params)
|
237
299
|
if encoding.kind_of?(String)
|
238
300
|
raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported"
|
@@ -248,5 +310,11 @@ class PDF::Reader
|
|
248
310
|
end
|
249
311
|
end
|
250
312
|
|
313
|
+
#: (String, String) -> Array[Integer]
|
314
|
+
def unpack_string_to_array_of_ints(unpack_me, unpack_arg)
|
315
|
+
unpack_me.unpack(unpack_arg).map { |code_point|
|
316
|
+
code_point = TypeCheck.cast_to_int!(code_point)
|
317
|
+
}
|
318
|
+
end
|
251
319
|
end
|
252
320
|
end
|