pdf-reader 2.9.2 → 2.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +39 -0
- data/README.md +33 -33
- data/Rakefile +2 -2
- data/lib/pdf/reader/advanced_text_run_filter.rb +152 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
- data/lib/pdf/reader/buffer.rb +39 -22
- data/lib/pdf/reader/cid_widths.rb +14 -6
- data/lib/pdf/reader/cmap.rb +16 -5
- data/lib/pdf/reader/encoding.rb +42 -18
- data/lib/pdf/reader/error.rb +6 -4
- data/lib/pdf/reader/filter/ascii85.rb +2 -0
- data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
- data/lib/pdf/reader/filter/depredict.rb +6 -2
- data/lib/pdf/reader/filter/flate.rb +5 -2
- data/lib/pdf/reader/filter/lzw.rb +2 -0
- data/lib/pdf/reader/filter/null.rb +2 -0
- data/lib/pdf/reader/filter/run_length.rb +2 -0
- data/lib/pdf/reader/filter.rb +1 -0
- data/lib/pdf/reader/font.rb +99 -32
- data/lib/pdf/reader/font_descriptor.rb +79 -24
- data/lib/pdf/reader/form_xobject.rb +15 -1
- data/lib/pdf/reader/glyph_hash.rb +41 -8
- data/lib/pdf/reader/key_builder_v5.rb +17 -9
- data/lib/pdf/reader/lzw.rb +42 -16
- data/lib/pdf/reader/no_text_filter.rb +15 -0
- data/lib/pdf/reader/null_security_handler.rb +1 -0
- data/lib/pdf/reader/object_cache.rb +7 -2
- data/lib/pdf/reader/object_hash.rb +129 -16
- data/lib/pdf/reader/object_stream.rb +22 -5
- data/lib/pdf/reader/overlapping_runs_filter.rb +8 -2
- data/lib/pdf/reader/page.rb +66 -13
- data/lib/pdf/reader/page_layout.rb +26 -9
- data/lib/pdf/reader/page_state.rb +12 -3
- data/lib/pdf/reader/page_text_receiver.rb +16 -2
- data/lib/pdf/reader/pages_strategy.rb +1 -1
- data/lib/pdf/reader/parser.rb +52 -13
- data/lib/pdf/reader/point.rb +9 -2
- data/lib/pdf/reader/print_receiver.rb +2 -6
- data/lib/pdf/reader/rc4_security_handler.rb +2 -0
- data/lib/pdf/reader/rectangle.rb +24 -1
- data/lib/pdf/reader/reference.rb +13 -3
- data/lib/pdf/reader/register_receiver.rb +15 -2
- data/lib/pdf/reader/resources.rb +12 -2
- data/lib/pdf/reader/security_handler_factory.rb +13 -0
- data/lib/pdf/reader/standard_key_builder.rb +37 -23
- data/lib/pdf/reader/stream.rb +9 -3
- data/lib/pdf/reader/synchronized_cache.rb +6 -3
- data/lib/pdf/reader/text_run.rb +33 -3
- data/lib/pdf/reader/token.rb +1 -0
- data/lib/pdf/reader/transformation_matrix.rb +41 -10
- data/lib/pdf/reader/type_check.rb +53 -0
- data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
- data/lib/pdf/reader/validating_receiver.rb +29 -0
- data/lib/pdf/reader/width_calculator/built_in.rb +13 -5
- data/lib/pdf/reader/width_calculator/composite.rb +11 -3
- data/lib/pdf/reader/width_calculator/true_type.rb +14 -12
- data/lib/pdf/reader/width_calculator/type_one_or_three.rb +8 -5
- data/lib/pdf/reader/width_calculator/type_zero.rb +8 -3
- data/lib/pdf/reader/xref.rb +31 -10
- data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
- data/lib/pdf/reader.rb +24 -12
- data/rbi/pdf-reader.rbi +1504 -1480
- metadata +34 -17
data/lib/pdf/reader/font.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -29,48 +29,99 @@
|
|
29
29
|
|
30
30
|
require 'pdf/reader/width_calculator'
|
31
31
|
|
32
|
+
|
32
33
|
class PDF::Reader
|
33
34
|
# Represents a single font PDF object and provides some useful methods
|
34
35
|
# for extracting info. Mainly used for converting text to UTF-8.
|
35
36
|
#
|
36
37
|
class Font
|
37
|
-
|
38
|
-
|
39
|
-
|
38
|
+
#: type widthCalculator = (
|
39
|
+
#| PDF::Reader::WidthCalculator::TypeZero |
|
40
|
+
#| PDF::Reader::WidthCalculator::BuiltIn |
|
41
|
+
#| PDF::Reader::WidthCalculator::TypeOneOrThree |
|
42
|
+
#| PDF::Reader::WidthCalculator::TrueType |
|
43
|
+
#| PDF::Reader::WidthCalculator::Composite
|
44
|
+
#| )
|
45
|
+
|
46
|
+
#: Symbol?
|
47
|
+
attr_accessor :subtype
|
48
|
+
|
49
|
+
#: PDF::Reader::Encoding
|
50
|
+
attr_accessor :encoding
|
51
|
+
|
52
|
+
#: Array[PDF::Reader::Font]
|
53
|
+
attr_accessor :descendantfonts
|
54
|
+
|
55
|
+
#: PDF::Reader::CMap | nil
|
56
|
+
attr_accessor :tounicode
|
57
|
+
|
58
|
+
#: Array[Numeric]
|
59
|
+
attr_reader :widths
|
60
|
+
|
61
|
+
#: Integer?
|
62
|
+
attr_reader :first_char
|
63
|
+
|
64
|
+
#: Integer?
|
65
|
+
attr_reader :last_char
|
66
|
+
|
67
|
+
#: Symbol?
|
68
|
+
attr_reader :basefont
|
69
|
+
|
70
|
+
#: PDF::Reader::FontDescriptor?
|
71
|
+
attr_reader :font_descriptor
|
40
72
|
|
73
|
+
#: Array[Numeric]
|
74
|
+
attr_reader :cid_widths
|
75
|
+
|
76
|
+
#: Numeric
|
77
|
+
attr_reader :cid_default_width
|
78
|
+
|
79
|
+
#: (PDF::Reader::ObjectHash, Hash[Symbol, untyped]) -> void
|
41
80
|
def initialize(ohash, obj)
|
42
81
|
@ohash = ohash
|
43
|
-
@tounicode = nil
|
82
|
+
@tounicode = nil #: PDF::Reader::CMap | nil
|
83
|
+
@descendantfonts = [] #: Array[PDF::Reader::Font]
|
84
|
+
@widths = [] #: Array[Numeric]
|
85
|
+
@first_char = nil #: Integer?
|
86
|
+
@last_char = nil #: Integer?
|
87
|
+
@basefont = nil #: Symbol?
|
88
|
+
@font_descriptor = nil #: PDF::Reader::FontDescriptor?
|
89
|
+
@cid_widths = [] #: Array[Numeric]
|
90
|
+
@cid_default_width = 0 #: Numeric
|
91
|
+
@encoding = PDF::Reader::Encoding.new(:StandardEncoding) #: PDF::Reader::Encoding
|
92
|
+
@cached_widths = {} #: Hash[Integer, Numeric]
|
93
|
+
@font_matrix = nil #: Array[Numeric] | nil
|
44
94
|
|
45
95
|
extract_base_info(obj)
|
46
96
|
extract_type3_info(obj)
|
47
97
|
extract_descriptor(obj)
|
48
98
|
extract_descendants(obj)
|
49
|
-
@width_calc = build_width_calculator
|
50
|
-
|
51
|
-
@encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
|
99
|
+
@width_calc = build_width_calculator #: widthCalculator
|
52
100
|
end
|
53
101
|
|
102
|
+
#: (Integer | String | Array[Integer | String]) -> String
|
54
103
|
def to_utf8(params)
|
55
104
|
if @tounicode
|
56
|
-
to_utf8_via_cmap(params)
|
105
|
+
to_utf8_via_cmap(params, @tounicode)
|
57
106
|
else
|
58
107
|
to_utf8_via_encoding(params)
|
59
108
|
end
|
60
109
|
end
|
61
110
|
|
111
|
+
#: (String) -> (Array[Integer | Float | String | nil] | nil)
|
62
112
|
def unpack(data)
|
63
113
|
data.unpack(encoding.unpack)
|
64
114
|
end
|
65
115
|
|
66
116
|
# looks up the specified codepoint and returns a value that is in (pdf)
|
67
117
|
# glyph space, which is 1000 glyph units = 1 text space unit
|
118
|
+
#: (Integer | String) -> Numeric
|
68
119
|
def glyph_width(code_point)
|
69
120
|
if code_point.is_a?(String)
|
70
|
-
code_point = code_point
|
121
|
+
code_point = unpack_string_to_array_of_ints(code_point, encoding.unpack).first
|
122
|
+
raise MalformedPDFError, "code point missing" if code_point.nil?
|
71
123
|
end
|
72
124
|
|
73
|
-
@cached_widths ||= {}
|
74
125
|
@cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
|
75
126
|
end
|
76
127
|
|
@@ -78,12 +129,13 @@ class PDF::Reader
|
|
78
129
|
#
|
79
130
|
# However, Type3 fonts provide their own FontMatrix that's used for the transformation.
|
80
131
|
#
|
132
|
+
#: (Integer | String) -> Numeric
|
81
133
|
def glyph_width_in_text_space(code_point)
|
82
134
|
glyph_width_in_glyph_space = glyph_width(code_point)
|
83
135
|
|
84
136
|
if @subtype == :Type3
|
85
|
-
x1,
|
86
|
-
x2,
|
137
|
+
x1, _y1 = font_matrix_transform(0,0)
|
138
|
+
x2, _y2 = font_matrix_transform(glyph_width_in_glyph_space, 0)
|
87
139
|
(x2 - x1).abs.round(2)
|
88
140
|
else
|
89
141
|
glyph_width_in_glyph_space / 1000.0
|
@@ -93,13 +145,14 @@ class PDF::Reader
|
|
93
145
|
private
|
94
146
|
|
95
147
|
# Only valid for Type3 fonts
|
148
|
+
#: (Numeric, Numeric) -> [Numeric, Numeric]
|
96
149
|
def font_matrix_transform(x, y)
|
97
150
|
return x, y if @font_matrix.nil?
|
98
151
|
|
99
152
|
matrix = TransformationMatrix.new(
|
100
|
-
@font_matrix[0], @font_matrix[1],
|
101
|
-
@font_matrix[2], @font_matrix[3],
|
102
|
-
@font_matrix[4], @font_matrix[5],
|
153
|
+
@font_matrix[0] || 0, @font_matrix[1] || 0,
|
154
|
+
@font_matrix[2] || 0, @font_matrix[3] || 0,
|
155
|
+
@font_matrix[4] || 0, @font_matrix[5] || 0,
|
103
156
|
)
|
104
157
|
|
105
158
|
if x == 0 && y == 0
|
@@ -112,6 +165,7 @@ class PDF::Reader
|
|
112
165
|
end
|
113
166
|
end
|
114
167
|
|
168
|
+
#: (Symbol | String | nil) -> PDF::Reader::Encoding
|
115
169
|
def default_encoding(font_name)
|
116
170
|
case font_name.to_s
|
117
171
|
when "Symbol" then
|
@@ -123,6 +177,7 @@ class PDF::Reader
|
|
123
177
|
end
|
124
178
|
end
|
125
179
|
|
180
|
+
#: () -> widthCalculator
|
126
181
|
def build_width_calculator
|
127
182
|
if @subtype == :Type0
|
128
183
|
PDF::Reader::WidthCalculator::TypeZero.new(self)
|
@@ -149,6 +204,7 @@ class PDF::Reader
|
|
149
204
|
end
|
150
205
|
end
|
151
206
|
|
207
|
+
#: (Hash[Symbol, untyped]) -> PDF::Reader::Encoding
|
152
208
|
def build_encoding(obj)
|
153
209
|
if obj[:Encoding].is_a?(Symbol)
|
154
210
|
# one of the standard encodings, referenced by name
|
@@ -163,6 +219,7 @@ class PDF::Reader
|
|
163
219
|
end
|
164
220
|
end
|
165
221
|
|
222
|
+
#: (Hash[Symbol, untyped]) -> void
|
166
223
|
def extract_base_info(obj)
|
167
224
|
@subtype = @ohash.deref_name(obj[:Subtype])
|
168
225
|
@basefont = @ohash.deref_name(obj[:BaseFont])
|
@@ -185,6 +242,7 @@ class PDF::Reader
|
|
185
242
|
end
|
186
243
|
end
|
187
244
|
|
245
|
+
#: (Hash[Symbol, untyped]) -> void
|
188
246
|
def extract_type3_info(obj)
|
189
247
|
if @subtype == :Type3
|
190
248
|
@font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
|
@@ -193,45 +251,50 @@ class PDF::Reader
|
|
193
251
|
end
|
194
252
|
end
|
195
253
|
|
254
|
+
#: (Hash[Symbol, untyped]) -> void
|
196
255
|
def extract_descriptor(obj)
|
197
256
|
if obj[:FontDescriptor]
|
198
257
|
# create a font descriptor object if we can, in other words, unless this is
|
199
258
|
# a CID Font
|
200
|
-
fd = @ohash.deref_hash(obj[:FontDescriptor])
|
259
|
+
fd = @ohash.deref_hash(obj[:FontDescriptor]) || {}
|
201
260
|
@font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
|
202
261
|
else
|
203
262
|
@font_descriptor = nil
|
204
263
|
end
|
205
264
|
end
|
206
265
|
|
266
|
+
#: (Hash[Symbol, untyped]) -> void
|
207
267
|
def extract_descendants(obj)
|
208
|
-
return unless obj[:DescendantFonts]
|
209
268
|
# per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
|
210
269
|
# A one-element array specifying the CIDFont dictionary that is the
|
211
270
|
# descendant of this Type 0 font.
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
271
|
+
if obj[:DescendantFonts]
|
272
|
+
descendants = @ohash.deref_array(obj[:DescendantFonts]) || []
|
273
|
+
@descendantfonts = descendants.map { |desc|
|
274
|
+
PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc) || {})
|
275
|
+
}
|
276
|
+
else
|
277
|
+
@descendantfonts = []
|
278
|
+
end
|
216
279
|
end
|
217
280
|
|
218
|
-
|
281
|
+
#: (Integer | String | Array[Integer | String], PDF::Reader::CMap) -> String
|
282
|
+
def to_utf8_via_cmap(params, cmap)
|
219
283
|
case params
|
220
284
|
when Integer
|
221
285
|
[
|
222
|
-
|
286
|
+
cmap.decode(params)
|
223
287
|
].flatten.pack("U*")
|
224
288
|
when String
|
225
|
-
params
|
226
|
-
|
289
|
+
unpack_string_to_array_of_ints(params, encoding.unpack).map { |code_point|
|
290
|
+
cmap.decode(code_point)
|
227
291
|
}.flatten.pack("U*")
|
228
292
|
when Array
|
229
|
-
params.collect { |param| to_utf8_via_cmap(param) }
|
230
|
-
else
|
231
|
-
params
|
293
|
+
params.collect { |param| to_utf8_via_cmap(param, cmap) }.join("")
|
232
294
|
end
|
233
295
|
end
|
234
296
|
|
297
|
+
#: (Integer | String | Array[Integer | String]) -> String
|
235
298
|
def to_utf8_via_encoding(params)
|
236
299
|
if encoding.kind_of?(String)
|
237
300
|
raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported"
|
@@ -243,11 +306,15 @@ class PDF::Reader
|
|
243
306
|
when String
|
244
307
|
encoding.to_utf8(params)
|
245
308
|
when Array
|
246
|
-
params.collect { |param| to_utf8_via_encoding(param) }
|
247
|
-
else
|
248
|
-
params
|
309
|
+
params.collect { |param| to_utf8_via_encoding(param) }.join("")
|
249
310
|
end
|
250
311
|
end
|
251
312
|
|
313
|
+
#: (String, String) -> Array[Integer]
|
314
|
+
def unpack_string_to_array_of_ints(unpack_me, unpack_arg)
|
315
|
+
unpack_me.unpack(unpack_arg).map { |code_point|
|
316
|
+
code_point = TypeCheck.cast_to_int!(code_point)
|
317
|
+
}
|
318
|
+
end
|
252
319
|
end
|
253
320
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
require 'ttfunk'
|
@@ -9,29 +9,75 @@ class PDF::Reader
|
|
9
9
|
# Font descriptors are outlined in Section 9.8, PDF 32000-1:2008, pp 281-288
|
10
10
|
class FontDescriptor
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
:avg_width, :max_width, :missing_width, :italic_angle, :stem_v,
|
15
|
-
:x_height, :font_flags
|
12
|
+
#: String
|
13
|
+
attr_reader :font_name
|
16
14
|
|
15
|
+
#: String?
|
16
|
+
attr_reader :font_family
|
17
|
+
|
18
|
+
#: Symbol
|
19
|
+
attr_reader :font_stretch
|
20
|
+
|
21
|
+
#: Numeric
|
22
|
+
attr_reader :font_weight
|
23
|
+
|
24
|
+
#: Array[Numeric]
|
25
|
+
attr_reader :font_bounding_box
|
26
|
+
|
27
|
+
#: Numeric
|
28
|
+
attr_reader :cap_height
|
29
|
+
|
30
|
+
#: Numeric
|
31
|
+
attr_reader :ascent
|
32
|
+
|
33
|
+
#: Numeric
|
34
|
+
attr_reader :descent
|
35
|
+
|
36
|
+
#: Numeric
|
37
|
+
attr_reader :leading
|
38
|
+
|
39
|
+
#: Numeric
|
40
|
+
attr_reader :avg_width
|
41
|
+
|
42
|
+
#: Numeric
|
43
|
+
attr_reader :max_width
|
44
|
+
|
45
|
+
#: Numeric
|
46
|
+
attr_reader :missing_width
|
47
|
+
|
48
|
+
#: Numeric?
|
49
|
+
attr_reader :italic_angle
|
50
|
+
|
51
|
+
#: Numeric?
|
52
|
+
attr_reader :stem_v
|
53
|
+
|
54
|
+
#: Numeric?
|
55
|
+
attr_reader :x_height
|
56
|
+
|
57
|
+
#: Integer
|
58
|
+
attr_reader :font_flags
|
59
|
+
|
60
|
+
#: (PDF::Reader::ObjectHash, Hash[untyped, untyped]) -> void
|
17
61
|
def initialize(ohash, fd_hash)
|
18
62
|
# TODO change these to typed derefs
|
19
|
-
@ascent = ohash.deref_number(fd_hash[:Ascent]) || 0
|
20
|
-
@descent = ohash.deref_number(fd_hash[:Descent]) || 0
|
21
|
-
@missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0
|
22
|
-
@font_bounding_box = ohash.deref_array_of_numbers(
|
23
|
-
|
24
|
-
|
25
|
-
@
|
26
|
-
@
|
27
|
-
@
|
28
|
-
@
|
29
|
-
@
|
30
|
-
@
|
31
|
-
@
|
32
|
-
@
|
33
|
-
@
|
34
|
-
@
|
63
|
+
@ascent = ohash.deref_number(fd_hash[:Ascent]) || 0 #: Numeric
|
64
|
+
@descent = ohash.deref_number(fd_hash[:Descent]) || 0 #: Numeric
|
65
|
+
@missing_width = ohash.deref_number(fd_hash[:MissingWidth]) || 0 #: Numeric
|
66
|
+
@font_bounding_box = ohash.deref_array_of_numbers(
|
67
|
+
fd_hash[:FontBBox]
|
68
|
+
) || [0,0,0,0] #: Array[Numeric]
|
69
|
+
@avg_width = ohash.deref_number(fd_hash[:AvgWidth]) || 0 #: Numeric
|
70
|
+
@cap_height = ohash.deref_number(fd_hash[:CapHeight]) || 0 #: Numeric
|
71
|
+
@font_flags = ohash.deref_integer(fd_hash[:Flags]) || 0 #: Integer
|
72
|
+
@italic_angle = ohash.deref_number(fd_hash[:ItalicAngle]) #: Numeric?
|
73
|
+
@font_name = ohash.deref_name(fd_hash[:FontName]).to_s #: String
|
74
|
+
@leading = ohash.deref_number(fd_hash[:Leading]) || 0 #: Numeric
|
75
|
+
@max_width = ohash.deref_number(fd_hash[:MaxWidth]) || 0 #: Numeric
|
76
|
+
@stem_v = ohash.deref_number(fd_hash[:StemV]) #: Numeric?
|
77
|
+
@x_height = ohash.deref_number(fd_hash[:XHeight]) #: Numeric?
|
78
|
+
@font_stretch = ohash.deref_name(fd_hash[:FontStretch]) || :Normal #: Symbol
|
79
|
+
@font_weight = ohash.deref_number(fd_hash[:FontWeight]) || 400 #: Numeric
|
80
|
+
@font_family = ohash.deref_string(fd_hash[:FontFamily]) #: String?
|
35
81
|
|
36
82
|
# A FontDescriptor may have an embedded font program in FontFile
|
37
83
|
# (Type 1 Font Program), FontFile2 (TrueType font program), or
|
@@ -41,12 +87,15 @@ class PDF::Reader
|
|
41
87
|
# 2) CIDFontType0C: Type 0 Font Program in Compact Font Format
|
42
88
|
# 3) OpenType: OpenType Font Program
|
43
89
|
# see Section 9.9, PDF 32000-1:2008, pp 288-292
|
44
|
-
@font_program_stream = ohash.deref_stream(fd_hash[:FontFile2])
|
90
|
+
@font_program_stream = ohash.deref_stream(fd_hash[:FontFile2]) #: PDF::Reader::Stream?
|
45
91
|
#TODO handle FontFile and FontFile3
|
92
|
+
@ttf_program_stream = nil #: TTFunk::File?
|
46
93
|
|
47
|
-
@is_ttf = true
|
94
|
+
@is_ttf = @font_program_stream ? true : false #: bool
|
95
|
+
@glyph_to_pdf_sf = nil #: Numeric?
|
48
96
|
end
|
49
97
|
|
98
|
+
#: (Integer) -> Numeric
|
50
99
|
def glyph_width(char_code)
|
51
100
|
if @is_ttf
|
52
101
|
if ttf_program_stream.cmap.unicode.length > 0
|
@@ -56,13 +105,16 @@ class PDF::Reader
|
|
56
105
|
end
|
57
106
|
char_metric = ttf_program_stream.horizontal_metrics.metrics[glyph_id]
|
58
107
|
if char_metric
|
59
|
-
|
108
|
+
char_metric.advance_width
|
109
|
+
else
|
110
|
+
0
|
60
111
|
end
|
61
112
|
end
|
62
113
|
end
|
63
114
|
|
64
115
|
# PDF states that a glyph is 1000 units wide, true type doesn't enforce
|
65
116
|
# any behavior, but uses units/em to define how wide the 'M' is (the widest letter)
|
117
|
+
#: () -> Numeric
|
66
118
|
def glyph_to_pdf_scale_factor
|
67
119
|
if @is_ttf
|
68
120
|
@glyph_to_pdf_sf ||= (1.0 / ttf_program_stream.header.units_per_em) * 1000.0
|
@@ -74,7 +126,10 @@ class PDF::Reader
|
|
74
126
|
|
75
127
|
private
|
76
128
|
|
129
|
+
#: () -> TTFunk::File
|
77
130
|
def ttf_program_stream
|
131
|
+
raise MalformedPDFError, "No font_program_stream" unless @font_program_stream
|
132
|
+
|
78
133
|
@ttf_program_stream ||= TTFunk::File.new(@font_program_stream.unfiltered_data)
|
79
134
|
end
|
80
135
|
end
|
@@ -17,6 +17,7 @@ module PDF
|
|
17
17
|
class FormXObject
|
18
18
|
extend Forwardable
|
19
19
|
|
20
|
+
#: untyped
|
20
21
|
attr_reader :xobject
|
21
22
|
|
22
23
|
def_delegators :resources, :color_spaces
|
@@ -28,6 +29,7 @@ module PDF
|
|
28
29
|
def_delegators :resources, :shadings
|
29
30
|
def_delegators :resources, :xobjects
|
30
31
|
|
32
|
+
#: (untyped, untyped, ?Hash[untyped, untyped]) -> void
|
31
33
|
def initialize(page, xobject, options = {})
|
32
34
|
@page = page
|
33
35
|
@objects = page.objects
|
@@ -42,10 +44,11 @@ module PDF
|
|
42
44
|
# The values are a PDF::Reader::Font instances that provide access
|
43
45
|
# to most available metrics for each font.
|
44
46
|
#
|
47
|
+
#: () -> untyped
|
45
48
|
def font_objects
|
46
49
|
raw_fonts = @objects.deref_hash(fonts)
|
47
50
|
::Hash[raw_fonts.map { |label, font|
|
48
|
-
[label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font))]
|
51
|
+
[label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
|
49
52
|
}]
|
50
53
|
end
|
51
54
|
|
@@ -54,13 +57,18 @@ module PDF
|
|
54
57
|
#
|
55
58
|
# See the comments on PDF::Reader::Page#walk for more detail.
|
56
59
|
#
|
60
|
+
#: (*untyped) -> untyped
|
57
61
|
def walk(*receivers)
|
62
|
+
receivers = receivers.map { |receiver|
|
63
|
+
ValidatingReceiver.new(receiver)
|
64
|
+
}
|
58
65
|
content_stream(receivers, raw_content)
|
59
66
|
end
|
60
67
|
|
61
68
|
# returns the raw content stream for this page. This is plumbing, nothing to
|
62
69
|
# see here unless you're a PDF nerd like me.
|
63
70
|
#
|
71
|
+
#: () -> untyped
|
64
72
|
def raw_content
|
65
73
|
@xobject.unfiltered_data
|
66
74
|
end
|
@@ -69,24 +77,29 @@ module PDF
|
|
69
77
|
|
70
78
|
# Returns the resources that accompany this form.
|
71
79
|
#
|
80
|
+
#: () -> untyped
|
72
81
|
def resources
|
73
82
|
@resources ||= Resources.new(@objects, @objects.deref_hash(@xobject.hash[:Resources]) || {})
|
74
83
|
end
|
75
84
|
|
85
|
+
#: (untyped, untyped, ?Array[untyped]) -> untyped
|
76
86
|
def callback(receivers, name, params=[])
|
77
87
|
receivers.each do |receiver|
|
78
88
|
receiver.send(name, *params) if receiver.respond_to?(name)
|
79
89
|
end
|
80
90
|
end
|
81
91
|
|
92
|
+
#: () -> untyped
|
82
93
|
def content_stream_md5
|
83
94
|
@content_stream_md5 ||= Digest::MD5.hexdigest(raw_content)
|
84
95
|
end
|
85
96
|
|
97
|
+
#: () -> untyped
|
86
98
|
def cached_tokens_key
|
87
99
|
@cached_tokens_key ||= "tokens-#{content_stream_md5}"
|
88
100
|
end
|
89
101
|
|
102
|
+
#: () -> untyped
|
90
103
|
def tokens
|
91
104
|
@cache[cached_tokens_key] ||= begin
|
92
105
|
buffer = Buffer.new(StringIO.new(raw_content), :content_stream => true)
|
@@ -99,6 +112,7 @@ module PDF
|
|
99
112
|
end
|
100
113
|
end
|
101
114
|
|
115
|
+
#: (untyped, untyped) -> untyped
|
102
116
|
def content_stream(receivers, instructions)
|
103
117
|
params = []
|
104
118
|
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
# typed:
|
2
|
+
# typed: strict
|
3
3
|
# frozen_string_literal: true
|
4
4
|
|
5
5
|
################################################################################
|
@@ -32,11 +32,38 @@ class PDF::Reader
|
|
32
32
|
# The mapping is read from a data file on disk the first time it's needed.
|
33
33
|
#
|
34
34
|
class GlyphHash # :nodoc:
|
35
|
+
@@by_codepoint_cache = nil #: Hash[Integer, Array[Symbol]] | nil
|
36
|
+
@@by_name_cache = nil #: Hash[Symbol, Integer] | nil
|
37
|
+
|
38
|
+
# An internal class for returning multiple pieces of data and keep sorbet happy
|
39
|
+
class ReturnData
|
40
|
+
#: Hash[Symbol, Integer]
|
41
|
+
attr_reader :by_name
|
42
|
+
|
43
|
+
#: Hash[Integer, Array[Symbol]]
|
44
|
+
attr_reader :by_codepoint
|
45
|
+
|
46
|
+
#:(Hash[Symbol, Integer], Hash[Integer, Array[Symbol]]) -> void
|
47
|
+
def initialize(by_name, by_codepoint)
|
48
|
+
@by_name = by_name
|
49
|
+
@by_codepoint = by_codepoint
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
#: () -> void
|
35
54
|
def initialize
|
55
|
+
@@by_codepoint_cache ||= nil
|
56
|
+
@@by_name_cache ||= nil
|
57
|
+
|
36
58
|
# only parse the glyph list once, and cache the results (for performance)
|
37
|
-
|
38
|
-
|
39
|
-
|
59
|
+
if @@by_codepoint_cache != nil && @@by_name_cache != nil
|
60
|
+
@by_name = @@by_name_cache #: Hash[Symbol, Integer]
|
61
|
+
@by_codepoint = @@by_codepoint_cache #: Hash[Integer, Array[Symbol]]
|
62
|
+
else
|
63
|
+
res = load_adobe_glyph_mapping
|
64
|
+
@by_name = @@by_name_cache ||= res.by_name
|
65
|
+
@by_codepoint = @@by_codepoint_cache ||= res.by_codepoint
|
66
|
+
end
|
40
67
|
end
|
41
68
|
|
42
69
|
# attempt to convert a PDF Name to a unicode codepoint. Returns nil
|
@@ -59,6 +86,7 @@ class PDF::Reader
|
|
59
86
|
# h.name_to_unicode(:34)
|
60
87
|
# => 34
|
61
88
|
#
|
89
|
+
#: (Symbol | nil) -> (Integer | nil)
|
62
90
|
def name_to_unicode(name)
|
63
91
|
return nil unless name.is_a?(Symbol)
|
64
92
|
|
@@ -96,6 +124,7 @@ class PDF::Reader
|
|
96
124
|
# h.unicode_to_name(34)
|
97
125
|
# => [:34]
|
98
126
|
#
|
127
|
+
#: (Integer) -> Array[Symbol]
|
99
128
|
def unicode_to_name(codepoint)
|
100
129
|
@by_codepoint[codepoint.to_i] || []
|
101
130
|
end
|
@@ -105,9 +134,10 @@ class PDF::Reader
|
|
105
134
|
# returns a hash that maps glyph names to unicode codepoints. The mapping is based on
|
106
135
|
# a text file supplied by Adobe at:
|
107
136
|
# https://github.com/adobe-type-tools/agl-aglfn
|
137
|
+
#: () -> ReturnData
|
108
138
|
def load_adobe_glyph_mapping
|
109
|
-
keyed_by_name = {}
|
110
|
-
keyed_by_codepoint = {}
|
139
|
+
keyed_by_name = {} #: Hash[Symbol, Integer]
|
140
|
+
keyed_by_codepoint = {} #: Hash[Integer, Array[Symbol]]
|
111
141
|
|
112
142
|
paths = [
|
113
143
|
File.dirname(__FILE__) + "/glyphlist.txt",
|
@@ -121,13 +151,16 @@ class PDF::Reader
|
|
121
151
|
cp = "0x#{code}".hex
|
122
152
|
keyed_by_name[name.to_sym] = cp
|
123
153
|
keyed_by_codepoint[cp] ||= []
|
124
|
-
keyed_by_codepoint[cp]
|
154
|
+
arr = keyed_by_codepoint[cp]
|
155
|
+
if arr
|
156
|
+
arr.push(name.to_sym)
|
157
|
+
end
|
125
158
|
end
|
126
159
|
end
|
127
160
|
end
|
128
161
|
end
|
129
162
|
|
130
|
-
|
163
|
+
ReturnData.new(keyed_by_name.freeze, keyed_by_codepoint.freeze)
|
131
164
|
end
|
132
165
|
|
133
166
|
end
|
@@ -16,20 +16,21 @@ class PDF::Reader
|
|
16
16
|
#
|
17
17
|
class KeyBuilderV5
|
18
18
|
|
19
|
+
#: (?Hash[Symbol, String]) -> void
|
19
20
|
def initialize(opts = {})
|
20
|
-
@key_length = 256
|
21
|
+
@key_length = 256 #: Integer
|
21
22
|
|
22
23
|
# hash(32B) + validation salt(8B) + key salt(8B)
|
23
|
-
@owner_key = opts[:owner_key] || ""
|
24
|
+
@owner_key = opts[:owner_key] || "" #: String
|
24
25
|
|
25
26
|
# hash(32B) + validation salt(8B) + key salt(8B)
|
26
|
-
@user_key = opts[:user_key] || ""
|
27
|
+
@user_key = opts[:user_key] || "" #: String
|
27
28
|
|
28
29
|
# decryption key, encrypted w/ owner password
|
29
|
-
@owner_encryption_key = opts[:owner_encryption_key] || ""
|
30
|
+
@owner_encryption_key = opts[:owner_encryption_key] || "" #: String
|
30
31
|
|
31
32
|
# decryption key, encrypted w/ user password
|
32
|
-
@user_encryption_key = opts[:user_encryption_key] || ""
|
33
|
+
@user_encryption_key = opts[:user_encryption_key] || "" #: String
|
33
34
|
end
|
34
35
|
|
35
36
|
# Takes a string containing a user provided password.
|
@@ -38,6 +39,7 @@ class PDF::Reader
|
|
38
39
|
# decrypting the file will be returned. If the password doesn't match the file,
|
39
40
|
# and exception will be raised.
|
40
41
|
#
|
42
|
+
#: (String) -> String
|
41
43
|
def key(pass)
|
42
44
|
pass = pass.byteslice(0...127).to_s # UTF-8 encoded password. first 127 bytes
|
43
45
|
|
@@ -58,28 +60,31 @@ class PDF::Reader
|
|
58
60
|
#
|
59
61
|
# if the string is a valid user/owner password, this will return the decryption key
|
60
62
|
#
|
63
|
+
#: (String) -> (String | nil)
|
61
64
|
def auth_owner_pass(password)
|
62
|
-
if Digest::SHA256.digest(password + @owner_key[32..39] + @user_key) == @owner_key[0..31]
|
65
|
+
if Digest::SHA256.digest(password + @owner_key[32..39].to_s + @user_key) == @owner_key[0..31]
|
63
66
|
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
64
67
|
cipher.decrypt
|
65
|
-
cipher.key = Digest::SHA256.digest(password + @owner_key[40..-1] + @user_key)
|
68
|
+
cipher.key = Digest::SHA256.digest(password + @owner_key[40..-1].to_s + @user_key)
|
66
69
|
cipher.iv = "\x00" * 16
|
67
70
|
cipher.padding = 0
|
68
71
|
cipher.update(@owner_encryption_key) + cipher.final
|
69
72
|
end
|
70
73
|
end
|
71
74
|
|
75
|
+
#: (String) -> (String | nil)
|
72
76
|
def auth_user_pass(password)
|
73
|
-
if Digest::SHA256.digest(password + @user_key[32..39]) == @user_key[0..31]
|
77
|
+
if Digest::SHA256.digest(password + @user_key[32..39].to_s) == @user_key[0..31]
|
74
78
|
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
75
79
|
cipher.decrypt
|
76
|
-
cipher.key = Digest::SHA256.digest(password + @user_key[40..-1])
|
80
|
+
cipher.key = Digest::SHA256.digest(password + @user_key[40..-1].to_s)
|
77
81
|
cipher.iv = "\x00" * 16
|
78
82
|
cipher.padding = 0
|
79
83
|
cipher.update(@user_encryption_key) + cipher.final
|
80
84
|
end
|
81
85
|
end
|
82
86
|
|
87
|
+
#: (String) -> (String | nil)
|
83
88
|
def auth_owner_pass_r6(password)
|
84
89
|
if r6_digest(password, @owner_key[32..39].to_s, @user_key[0,48].to_s) == @owner_key[0..31]
|
85
90
|
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
@@ -91,6 +96,7 @@ class PDF::Reader
|
|
91
96
|
end
|
92
97
|
end
|
93
98
|
|
99
|
+
#: (String) -> (String | nil)
|
94
100
|
def auth_user_pass_r6(password)
|
95
101
|
if r6_digest(password, @user_key[32..39].to_s) == @user_key[0..31]
|
96
102
|
cipher = OpenSSL::Cipher.new('AES-256-CBC')
|
@@ -104,6 +110,7 @@ class PDF::Reader
|
|
104
110
|
|
105
111
|
# PDF 2.0 spec, 7.6.4.3.4
|
106
112
|
# Algorithm 2.B: Computing a hash (revision 6 and later)
|
113
|
+
#: (String, String, ?String) -> String
|
107
114
|
def r6_digest(password, salt, user_key = '')
|
108
115
|
k = Digest::SHA256.digest(password + salt + user_key)
|
109
116
|
e = ''
|
@@ -128,6 +135,7 @@ class PDF::Reader
|
|
128
135
|
k[0, 32].to_s
|
129
136
|
end
|
130
137
|
|
138
|
+
#: (String) -> Integer
|
131
139
|
def unpack_128bit_bigendian_int(str)
|
132
140
|
ints = str[0,16].to_s.unpack("N*")
|
133
141
|
(ints[0].to_i << 96) + (ints[1].to_i << 64) + (ints[2].to_i << 32) + ints[3].to_i
|