pdf-reader 2.8.0 → 2.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG +5 -0
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/buffer.rb +36 -34
- data/lib/pdf/reader/cmap.rb +64 -51
- data/lib/pdf/reader/error.rb +8 -0
- data/lib/pdf/reader/filter/ascii85.rb +1 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +1 -1
- data/lib/pdf/reader/filter/depredict.rb +1 -1
- data/lib/pdf/reader/filter/flate.rb +3 -3
- data/lib/pdf/reader/filter/lzw.rb +1 -1
- data/lib/pdf/reader/filter/null.rb +1 -2
- data/lib/pdf/reader/filter/run_length.rb +1 -1
- data/lib/pdf/reader/filter.rb +1 -1
- data/lib/pdf/reader/font.rb +29 -17
- data/lib/pdf/reader/font_descriptor.rb +18 -17
- data/lib/pdf/reader/form_xobject.rb +14 -5
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/null_security_handler.rb +0 -4
- data/lib/pdf/reader/object_hash.rb +247 -42
- data/lib/pdf/reader/page.rb +38 -20
- data/lib/pdf/reader/page_state.rb +1 -1
- data/lib/pdf/reader/page_text_receiver.rb +4 -1
- data/lib/pdf/reader/parser.rb +9 -6
- data/lib/pdf/reader/point.rb +1 -1
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +2 -2
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +15 -13
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -95
- data/lib/pdf/reader/stream.rb +2 -2
- data/lib/pdf/reader/type_check.rb +52 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +1 -1
- data/lib/pdf/reader/xref.rb +20 -3
- data/lib/pdf/reader.rb +17 -9
- data/rbi/pdf-reader.rbi +388 -173
- metadata +15 -9
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -92
@@ -0,0 +1,262 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# Page#walk will execute the content stream of a page, calling methods on a receiver class
|
9
|
+
# provided by the user. Each operator has a specific set of parameters it expects, and we
|
10
|
+
# wrap the users receiver class in this one to verify the PDF uses valid parameters.
|
11
|
+
#
|
12
|
+
# Without these checks, users can't be confident about the number of parameters they'll receive
|
13
|
+
# for an operator, or what the type of those parameters will be. Everyone ends up building their
|
14
|
+
# own type safety guard clauses and it's tedious.
|
15
|
+
#
|
16
|
+
# Not all operators have type safety implemented yet, but we can expand the number over time.
|
17
|
+
class ValidatingReceiver
|
18
|
+
|
19
|
+
def initialize(wrapped)
|
20
|
+
@wrapped = wrapped
|
21
|
+
end
|
22
|
+
|
23
|
+
def page=(page)
|
24
|
+
call_wrapped(:page=, page)
|
25
|
+
end
|
26
|
+
|
27
|
+
#####################################################
|
28
|
+
# Graphics State Operators
|
29
|
+
#####################################################
|
30
|
+
def save_graphics_state(*args)
|
31
|
+
call_wrapped(:save_graphics_state)
|
32
|
+
end
|
33
|
+
|
34
|
+
def restore_graphics_state(*args)
|
35
|
+
call_wrapped(:restore_graphics_state)
|
36
|
+
end
|
37
|
+
|
38
|
+
#####################################################
|
39
|
+
# Matrix Operators
|
40
|
+
#####################################################
|
41
|
+
|
42
|
+
def concatenate_matrix(*args)
|
43
|
+
a, b, c, d, e, f = *args
|
44
|
+
call_wrapped(
|
45
|
+
:concatenate_matrix,
|
46
|
+
TypeCheck.cast_to_numeric!(a),
|
47
|
+
TypeCheck.cast_to_numeric!(b),
|
48
|
+
TypeCheck.cast_to_numeric!(c),
|
49
|
+
TypeCheck.cast_to_numeric!(d),
|
50
|
+
TypeCheck.cast_to_numeric!(e),
|
51
|
+
TypeCheck.cast_to_numeric!(f),
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
#####################################################
|
56
|
+
# Text Object Operators
|
57
|
+
#####################################################
|
58
|
+
|
59
|
+
def begin_text_object(*args)
|
60
|
+
call_wrapped(:begin_text_object)
|
61
|
+
end
|
62
|
+
|
63
|
+
def end_text_object(*args)
|
64
|
+
call_wrapped(:end_text_object)
|
65
|
+
end
|
66
|
+
|
67
|
+
#####################################################
|
68
|
+
# Text State Operators
|
69
|
+
#####################################################
|
70
|
+
def set_character_spacing(*args)
|
71
|
+
char_spacing, _ = *args
|
72
|
+
call_wrapped(
|
73
|
+
:set_character_spacing,
|
74
|
+
TypeCheck.cast_to_numeric!(char_spacing)
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
def set_horizontal_text_scaling(*args)
|
79
|
+
h_scaling, _ = *args
|
80
|
+
call_wrapped(
|
81
|
+
:set_horizontal_text_scaling,
|
82
|
+
TypeCheck.cast_to_numeric!(h_scaling)
|
83
|
+
)
|
84
|
+
end
|
85
|
+
|
86
|
+
def set_text_font_and_size(*args)
|
87
|
+
label, size, _ = *args
|
88
|
+
call_wrapped(
|
89
|
+
:set_text_font_and_size,
|
90
|
+
TypeCheck.cast_to_symbol(label),
|
91
|
+
TypeCheck.cast_to_numeric!(size)
|
92
|
+
)
|
93
|
+
end
|
94
|
+
|
95
|
+
def set_text_leading(*args)
|
96
|
+
leading, _ = *args
|
97
|
+
call_wrapped(
|
98
|
+
:set_text_leading,
|
99
|
+
TypeCheck.cast_to_numeric!(leading)
|
100
|
+
)
|
101
|
+
end
|
102
|
+
|
103
|
+
def set_text_rendering_mode(*args)
|
104
|
+
mode, _ = *args
|
105
|
+
call_wrapped(
|
106
|
+
:set_text_rendering_mode,
|
107
|
+
TypeCheck.cast_to_numeric!(mode)
|
108
|
+
)
|
109
|
+
end
|
110
|
+
|
111
|
+
def set_text_rise(*args)
|
112
|
+
rise, _ = *args
|
113
|
+
call_wrapped(
|
114
|
+
:set_text_rise,
|
115
|
+
TypeCheck.cast_to_numeric!(rise)
|
116
|
+
)
|
117
|
+
end
|
118
|
+
|
119
|
+
def set_word_spacing(*args)
|
120
|
+
word_spacing, _ = *args
|
121
|
+
call_wrapped(
|
122
|
+
:set_word_spacing,
|
123
|
+
TypeCheck.cast_to_numeric!(word_spacing)
|
124
|
+
)
|
125
|
+
end
|
126
|
+
|
127
|
+
#####################################################
|
128
|
+
# Text Positioning Operators
|
129
|
+
#####################################################
|
130
|
+
|
131
|
+
def move_text_position(*args) # Td
|
132
|
+
x, y, _ = *args
|
133
|
+
call_wrapped(
|
134
|
+
:move_text_position,
|
135
|
+
TypeCheck.cast_to_numeric!(x),
|
136
|
+
TypeCheck.cast_to_numeric!(y)
|
137
|
+
)
|
138
|
+
end
|
139
|
+
|
140
|
+
def move_text_position_and_set_leading(*args) # TD
|
141
|
+
x, y, _ = *args
|
142
|
+
call_wrapped(
|
143
|
+
:move_text_position_and_set_leading,
|
144
|
+
TypeCheck.cast_to_numeric!(x),
|
145
|
+
TypeCheck.cast_to_numeric!(y)
|
146
|
+
)
|
147
|
+
end
|
148
|
+
|
149
|
+
def set_text_matrix_and_text_line_matrix(*args) # Tm
|
150
|
+
a, b, c, d, e, f = *args
|
151
|
+
call_wrapped(
|
152
|
+
:set_text_matrix_and_text_line_matrix,
|
153
|
+
TypeCheck.cast_to_numeric!(a),
|
154
|
+
TypeCheck.cast_to_numeric!(b),
|
155
|
+
TypeCheck.cast_to_numeric!(c),
|
156
|
+
TypeCheck.cast_to_numeric!(d),
|
157
|
+
TypeCheck.cast_to_numeric!(e),
|
158
|
+
TypeCheck.cast_to_numeric!(f),
|
159
|
+
)
|
160
|
+
end
|
161
|
+
|
162
|
+
def move_to_start_of_next_line(*args) # T*
|
163
|
+
call_wrapped(:move_to_start_of_next_line)
|
164
|
+
end
|
165
|
+
|
166
|
+
#####################################################
|
167
|
+
# Text Showing Operators
|
168
|
+
#####################################################
|
169
|
+
def show_text(*args) # Tj (AWAY)
|
170
|
+
string, _ = *args
|
171
|
+
call_wrapped(
|
172
|
+
:show_text,
|
173
|
+
TypeCheck.cast_to_string!(string)
|
174
|
+
)
|
175
|
+
end
|
176
|
+
|
177
|
+
def show_text_with_positioning(*args) # TJ [(A) 120 (WA) 20 (Y)]
|
178
|
+
params, _ = *args
|
179
|
+
unless params.is_a?(Array)
|
180
|
+
raise MalformedPDFError, "TJ operator expects a single Array argument"
|
181
|
+
end
|
182
|
+
|
183
|
+
call_wrapped(
|
184
|
+
:show_text_with_positioning,
|
185
|
+
params
|
186
|
+
)
|
187
|
+
end
|
188
|
+
|
189
|
+
def move_to_next_line_and_show_text(*args) # '
|
190
|
+
string, _ = *args
|
191
|
+
call_wrapped(
|
192
|
+
:move_to_next_line_and_show_text,
|
193
|
+
TypeCheck.cast_to_string!(string)
|
194
|
+
)
|
195
|
+
end
|
196
|
+
|
197
|
+
def set_spacing_next_line_show_text(*args) # "
|
198
|
+
aw, ac, string = *args
|
199
|
+
call_wrapped(
|
200
|
+
:set_spacing_next_line_show_text,
|
201
|
+
TypeCheck.cast_to_numeric!(aw),
|
202
|
+
TypeCheck.cast_to_numeric!(ac),
|
203
|
+
TypeCheck.cast_to_string!(string)
|
204
|
+
)
|
205
|
+
end
|
206
|
+
|
207
|
+
#####################################################
|
208
|
+
# Form XObject Operators
|
209
|
+
#####################################################
|
210
|
+
|
211
|
+
def invoke_xobject(*args)
|
212
|
+
label, _ = *args
|
213
|
+
|
214
|
+
call_wrapped(
|
215
|
+
:invoke_xobject,
|
216
|
+
TypeCheck.cast_to_symbol(label)
|
217
|
+
)
|
218
|
+
end
|
219
|
+
|
220
|
+
#####################################################
|
221
|
+
# Inline Image Operators
|
222
|
+
#####################################################
|
223
|
+
|
224
|
+
def begin_inline_image(*args)
|
225
|
+
call_wrapped(:begin_inline_image)
|
226
|
+
end
|
227
|
+
|
228
|
+
def begin_inline_image_data(*args)
|
229
|
+
# We can't use call_wrapped() here because sorbet won't allow splat args with a dynamic
|
230
|
+
# number of elements
|
231
|
+
@wrapped.begin_inline_image_data(*args) if @wrapped.respond_to?(:begin_inline_image_data)
|
232
|
+
end
|
233
|
+
|
234
|
+
def end_inline_image(*args)
|
235
|
+
data, _ = *args
|
236
|
+
|
237
|
+
call_wrapped(
|
238
|
+
:end_inline_image,
|
239
|
+
TypeCheck.cast_to_string!(data)
|
240
|
+
)
|
241
|
+
end
|
242
|
+
|
243
|
+
#####################################################
|
244
|
+
# Final safety net for any operators that don't have type checking enabled yet
|
245
|
+
#####################################################
|
246
|
+
|
247
|
+
def respond_to?(meth)
|
248
|
+
@wrapped.respond_to?(meth)
|
249
|
+
end
|
250
|
+
|
251
|
+
def method_missing(methodname, *args)
|
252
|
+
@wrapped.call(methodname, *args)
|
253
|
+
end
|
254
|
+
|
255
|
+
private
|
256
|
+
|
257
|
+
def call_wrapped(methodname, *args)
|
258
|
+
@wrapped.send(methodname, *args) if @wrapped.respond_to?(methodname)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
@@ -30,7 +30,7 @@ class PDF::Reader
|
|
30
30
|
|
31
31
|
# in ruby a negative index is valid, and will go from the end of the array
|
32
32
|
# which is undesireable in this case.
|
33
|
-
if @font.first_char <= code_point
|
33
|
+
if @font.first_char && @font.first_char <= code_point
|
34
34
|
@font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
|
35
35
|
else
|
36
36
|
@missing_width.to_f
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -104,13 +104,18 @@ class PDF::Reader
|
|
104
104
|
buf = new_buffer(offset)
|
105
105
|
tok_one = buf.token
|
106
106
|
|
107
|
+
# we have a traditional xref table
|
107
108
|
return load_xref_table(buf) if tok_one == "xref" || tok_one == "ref"
|
108
109
|
|
109
110
|
tok_two = buf.token
|
110
111
|
tok_three = buf.token
|
111
112
|
|
113
|
+
# we have an XRef stream
|
112
114
|
if tok_one.to_i >= 0 && tok_two.to_i >= 0 && tok_three == "obj"
|
113
115
|
buf = new_buffer(offset)
|
116
|
+
# Maybe we should be parsing the ObjectHash second argument to the Parser here,
|
117
|
+
# to handle the case where an XRef Stream has the Length specified via an
|
118
|
+
# indirect object
|
114
119
|
stream = PDF::Reader::Parser.new(buf).object(tok_one.to_i, tok_two.to_i)
|
115
120
|
return load_xref_stream(stream)
|
116
121
|
end
|
@@ -126,6 +131,10 @@ class PDF::Reader
|
|
126
131
|
|
127
132
|
while !params.include?("trailer") && !params.include?(nil)
|
128
133
|
if params.size == 2
|
134
|
+
unless params[0].to_s.match(/\A\d+\z/)
|
135
|
+
raise MalformedPDFError, "invalid xref table, expected object ID"
|
136
|
+
end
|
137
|
+
|
129
138
|
objid, count = params[0].to_i, params[1].to_i
|
130
139
|
count.times do
|
131
140
|
offset = buf.token.to_i
|
@@ -143,7 +152,7 @@ class PDF::Reader
|
|
143
152
|
params << buf.token
|
144
153
|
end
|
145
154
|
|
146
|
-
trailer = Parser.new(buf
|
155
|
+
trailer = Parser.new(buf).parse_token
|
147
156
|
|
148
157
|
unless trailer.kind_of?(Hash)
|
149
158
|
raise MalformedPDFError, "PDF malformed, trailer should be a dictionary"
|
@@ -168,8 +177,16 @@ class PDF::Reader
|
|
168
177
|
[:Size, :Prev, :Root, :Encrypt, :Info, :ID].include?(key)
|
169
178
|
}]
|
170
179
|
|
171
|
-
widths
|
172
|
-
|
180
|
+
widths = stream.hash[:W]
|
181
|
+
|
182
|
+
PDF::Reader::Error.validate_type_as_malformed(widths, "xref stream widths", Array)
|
183
|
+
|
184
|
+
entry_length = widths.inject(0) { |s, w|
|
185
|
+
unless w.is_a?(Integer)
|
186
|
+
w = 0
|
187
|
+
end
|
188
|
+
s + w
|
189
|
+
}
|
173
190
|
raw_data = StringIO.new(stream.unfiltered_data)
|
174
191
|
if stream.hash[:Index]
|
175
192
|
index = stream.hash[:Index]
|
data/lib/pdf/reader.rb
CHANGED
@@ -124,7 +124,7 @@ module PDF
|
|
124
124
|
# Return a Hash with some basic information about the PDF file
|
125
125
|
#
|
126
126
|
def info
|
127
|
-
dict = @objects.
|
127
|
+
dict = @objects.deref_hash(@objects.trailer[:Info]) || {}
|
128
128
|
doc_strings_to_utf8(dict)
|
129
129
|
end
|
130
130
|
|
@@ -132,7 +132,7 @@ module PDF
|
|
132
132
|
# always present.
|
133
133
|
#
|
134
134
|
def metadata
|
135
|
-
stream = @objects.
|
135
|
+
stream = @objects.deref_stream(root[:Metadata])
|
136
136
|
if stream.nil?
|
137
137
|
nil
|
138
138
|
else
|
@@ -145,11 +145,11 @@ module PDF
|
|
145
145
|
# To number of pages in this PDF
|
146
146
|
#
|
147
147
|
def page_count
|
148
|
-
pages = @objects.
|
148
|
+
pages = @objects.deref_hash(root[:Pages])
|
149
149
|
unless pages.kind_of?(::Hash)
|
150
150
|
raise MalformedPDFError, "Pages structure is missing #{pages.class}"
|
151
151
|
end
|
152
|
-
@page_count ||= @objects.
|
152
|
+
@page_count ||= @objects.deref_integer(pages[:Count]) || 0
|
153
153
|
end
|
154
154
|
|
155
155
|
# The PDF version this file uses
|
@@ -190,6 +190,8 @@ module PDF
|
|
190
190
|
# methods available on each page
|
191
191
|
#
|
192
192
|
def pages
|
193
|
+
return [] if page_count <= 0
|
194
|
+
|
193
195
|
(1..self.page_count).map do |num|
|
194
196
|
begin
|
195
197
|
PDF::Reader::Page.new(@objects, num, :cache => @cache)
|
@@ -240,7 +242,7 @@ module PDF
|
|
240
242
|
pdfdoc_to_utf8(obj)
|
241
243
|
end
|
242
244
|
else
|
243
|
-
|
245
|
+
obj
|
244
246
|
end
|
245
247
|
end
|
246
248
|
|
@@ -271,7 +273,7 @@ module PDF
|
|
271
273
|
|
272
274
|
def root
|
273
275
|
@root ||= begin
|
274
|
-
obj = @objects.
|
276
|
+
obj = @objects.deref_hash(@objects.trailer[:Root]) || {}
|
275
277
|
unless obj.kind_of?(::Hash)
|
276
278
|
raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
|
277
279
|
end
|
@@ -283,7 +285,7 @@ module PDF
|
|
283
285
|
end
|
284
286
|
################################################################################
|
285
287
|
|
286
|
-
require 'pdf/reader/
|
288
|
+
require 'pdf/reader/resources'
|
287
289
|
require 'pdf/reader/buffer'
|
288
290
|
require 'pdf/reader/bounding_rectangle_runs_filter'
|
289
291
|
require 'pdf/reader/cid_widths'
|
@@ -314,13 +316,19 @@ require 'pdf/reader/rectangle'
|
|
314
316
|
require 'pdf/reader/reference'
|
315
317
|
require 'pdf/reader/register_receiver'
|
316
318
|
require 'pdf/reader/null_security_handler'
|
317
|
-
require 'pdf/reader/
|
318
|
-
require 'pdf/reader/
|
319
|
+
require 'pdf/reader/security_handler_factory'
|
320
|
+
require 'pdf/reader/standard_key_builder'
|
321
|
+
require 'pdf/reader/key_builder_v5'
|
322
|
+
require 'pdf/reader/aes_v2_security_handler'
|
323
|
+
require 'pdf/reader/aes_v3_security_handler'
|
324
|
+
require 'pdf/reader/rc4_security_handler'
|
319
325
|
require 'pdf/reader/unimplemented_security_handler'
|
320
326
|
require 'pdf/reader/stream'
|
321
327
|
require 'pdf/reader/text_run'
|
328
|
+
require 'pdf/reader/type_check'
|
322
329
|
require 'pdf/reader/page_state'
|
323
330
|
require 'pdf/reader/page_text_receiver'
|
324
331
|
require 'pdf/reader/token'
|
325
332
|
require 'pdf/reader/xref'
|
326
333
|
require 'pdf/reader/page'
|
334
|
+
require 'pdf/reader/validating_receiver'
|