pdf-reader 2.7.0 → 2.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG +20 -0
- data/Rakefile +1 -1
- data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
- data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
- data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
- data/lib/pdf/reader/buffer.rb +36 -34
- data/lib/pdf/reader/cmap.rb +64 -51
- data/lib/pdf/reader/error.rb +8 -0
- data/lib/pdf/reader/filter/ascii85.rb +1 -1
- data/lib/pdf/reader/filter/ascii_hex.rb +1 -1
- data/lib/pdf/reader/filter/depredict.rb +1 -1
- data/lib/pdf/reader/filter/flate.rb +3 -3
- data/lib/pdf/reader/filter/lzw.rb +1 -1
- data/lib/pdf/reader/filter/null.rb +1 -2
- data/lib/pdf/reader/filter/run_length.rb +1 -1
- data/lib/pdf/reader/filter.rb +10 -11
- data/lib/pdf/reader/font.rb +71 -16
- data/lib/pdf/reader/font_descriptor.rb +18 -17
- data/lib/pdf/reader/form_xobject.rb +14 -5
- data/lib/pdf/reader/key_builder_v5.rb +138 -0
- data/lib/pdf/reader/null_security_handler.rb +0 -4
- data/lib/pdf/reader/object_hash.rb +251 -44
- data/lib/pdf/reader/page.rb +51 -22
- data/lib/pdf/reader/page_layout.rb +14 -28
- data/lib/pdf/reader/page_state.rb +1 -1
- data/lib/pdf/reader/page_text_receiver.rb +52 -10
- data/lib/pdf/reader/parser.rb +22 -7
- data/lib/pdf/reader/point.rb +1 -1
- data/lib/pdf/reader/rc4_security_handler.rb +38 -0
- data/lib/pdf/reader/rectangle.rb +20 -2
- data/lib/pdf/reader/{resource_methods.rb → resources.rb} +15 -13
- data/lib/pdf/reader/security_handler_factory.rb +79 -0
- data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -95
- data/lib/pdf/reader/stream.rb +2 -2
- data/lib/pdf/reader/text_run.rb +13 -6
- data/lib/pdf/reader/type_check.rb +52 -0
- data/lib/pdf/reader/validating_receiver.rb +262 -0
- data/lib/pdf/reader/width_calculator/true_type.rb +1 -1
- data/lib/pdf/reader/xref.rb +20 -3
- data/lib/pdf/reader.rb +32 -11
- data/rbi/pdf-reader.rbi +408 -174
- metadata +16 -9
- data/lib/pdf/reader/standard_security_handler_v5.rb +0 -92
@@ -0,0 +1,262 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
# typed: strict
|
3
|
+
# frozen_string_literal: true
|
4
|
+
|
5
|
+
module PDF
|
6
|
+
class Reader
|
7
|
+
|
8
|
+
# Page#walk will execute the content stream of a page, calling methods on a receiver class
|
9
|
+
# provided by the user. Each operator has a specific set of parameters it expects, and we
|
10
|
+
# wrap the users receiver class in this one to verify the PDF uses valid parameters.
|
11
|
+
#
|
12
|
+
# Without these checks, users can't be confident about the number of parameters they'll receive
|
13
|
+
# for an operator, or what the type of those parameters will be. Everyone ends up building their
|
14
|
+
# own type safety guard clauses and it's tedious.
|
15
|
+
#
|
16
|
+
# Not all operators have type safety implemented yet, but we can expand the number over time.
|
17
|
+
class ValidatingReceiver
|
18
|
+
|
19
|
+
def initialize(wrapped)
|
20
|
+
@wrapped = wrapped
|
21
|
+
end
|
22
|
+
|
23
|
+
def page=(page)
|
24
|
+
call_wrapped(:page=, page)
|
25
|
+
end
|
26
|
+
|
27
|
+
#####################################################
|
28
|
+
# Graphics State Operators
|
29
|
+
#####################################################
|
30
|
+
def save_graphics_state(*args)
|
31
|
+
call_wrapped(:save_graphics_state)
|
32
|
+
end
|
33
|
+
|
34
|
+
def restore_graphics_state(*args)
|
35
|
+
call_wrapped(:restore_graphics_state)
|
36
|
+
end
|
37
|
+
|
38
|
+
#####################################################
|
39
|
+
# Matrix Operators
|
40
|
+
#####################################################
|
41
|
+
|
42
|
+
def concatenate_matrix(*args)
|
43
|
+
a, b, c, d, e, f = *args
|
44
|
+
call_wrapped(
|
45
|
+
:concatenate_matrix,
|
46
|
+
TypeCheck.cast_to_numeric!(a),
|
47
|
+
TypeCheck.cast_to_numeric!(b),
|
48
|
+
TypeCheck.cast_to_numeric!(c),
|
49
|
+
TypeCheck.cast_to_numeric!(d),
|
50
|
+
TypeCheck.cast_to_numeric!(e),
|
51
|
+
TypeCheck.cast_to_numeric!(f),
|
52
|
+
)
|
53
|
+
end
|
54
|
+
|
55
|
+
#####################################################
|
56
|
+
# Text Object Operators
|
57
|
+
#####################################################
|
58
|
+
|
59
|
+
def begin_text_object(*args)
|
60
|
+
call_wrapped(:begin_text_object)
|
61
|
+
end
|
62
|
+
|
63
|
+
def end_text_object(*args)
|
64
|
+
call_wrapped(:end_text_object)
|
65
|
+
end
|
66
|
+
|
67
|
+
#####################################################
|
68
|
+
# Text State Operators
|
69
|
+
#####################################################
|
70
|
+
def set_character_spacing(*args)
|
71
|
+
char_spacing, _ = *args
|
72
|
+
call_wrapped(
|
73
|
+
:set_character_spacing,
|
74
|
+
TypeCheck.cast_to_numeric!(char_spacing)
|
75
|
+
)
|
76
|
+
end
|
77
|
+
|
78
|
+
def set_horizontal_text_scaling(*args)
|
79
|
+
h_scaling, _ = *args
|
80
|
+
call_wrapped(
|
81
|
+
:set_horizontal_text_scaling,
|
82
|
+
TypeCheck.cast_to_numeric!(h_scaling)
|
83
|
+
)
|
84
|
+
end
|
85
|
+
|
86
|
+
def set_text_font_and_size(*args)
|
87
|
+
label, size, _ = *args
|
88
|
+
call_wrapped(
|
89
|
+
:set_text_font_and_size,
|
90
|
+
TypeCheck.cast_to_symbol(label),
|
91
|
+
TypeCheck.cast_to_numeric!(size)
|
92
|
+
)
|
93
|
+
end
|
94
|
+
|
95
|
+
def set_text_leading(*args)
|
96
|
+
leading, _ = *args
|
97
|
+
call_wrapped(
|
98
|
+
:set_text_leading,
|
99
|
+
TypeCheck.cast_to_numeric!(leading)
|
100
|
+
)
|
101
|
+
end
|
102
|
+
|
103
|
+
def set_text_rendering_mode(*args)
|
104
|
+
mode, _ = *args
|
105
|
+
call_wrapped(
|
106
|
+
:set_text_rendering_mode,
|
107
|
+
TypeCheck.cast_to_numeric!(mode)
|
108
|
+
)
|
109
|
+
end
|
110
|
+
|
111
|
+
def set_text_rise(*args)
|
112
|
+
rise, _ = *args
|
113
|
+
call_wrapped(
|
114
|
+
:set_text_rise,
|
115
|
+
TypeCheck.cast_to_numeric!(rise)
|
116
|
+
)
|
117
|
+
end
|
118
|
+
|
119
|
+
def set_word_spacing(*args)
|
120
|
+
word_spacing, _ = *args
|
121
|
+
call_wrapped(
|
122
|
+
:set_word_spacing,
|
123
|
+
TypeCheck.cast_to_numeric!(word_spacing)
|
124
|
+
)
|
125
|
+
end
|
126
|
+
|
127
|
+
#####################################################
|
128
|
+
# Text Positioning Operators
|
129
|
+
#####################################################
|
130
|
+
|
131
|
+
def move_text_position(*args) # Td
|
132
|
+
x, y, _ = *args
|
133
|
+
call_wrapped(
|
134
|
+
:move_text_position,
|
135
|
+
TypeCheck.cast_to_numeric!(x),
|
136
|
+
TypeCheck.cast_to_numeric!(y)
|
137
|
+
)
|
138
|
+
end
|
139
|
+
|
140
|
+
def move_text_position_and_set_leading(*args) # TD
|
141
|
+
x, y, _ = *args
|
142
|
+
call_wrapped(
|
143
|
+
:move_text_position_and_set_leading,
|
144
|
+
TypeCheck.cast_to_numeric!(x),
|
145
|
+
TypeCheck.cast_to_numeric!(y)
|
146
|
+
)
|
147
|
+
end
|
148
|
+
|
149
|
+
def set_text_matrix_and_text_line_matrix(*args) # Tm
|
150
|
+
a, b, c, d, e, f = *args
|
151
|
+
call_wrapped(
|
152
|
+
:set_text_matrix_and_text_line_matrix,
|
153
|
+
TypeCheck.cast_to_numeric!(a),
|
154
|
+
TypeCheck.cast_to_numeric!(b),
|
155
|
+
TypeCheck.cast_to_numeric!(c),
|
156
|
+
TypeCheck.cast_to_numeric!(d),
|
157
|
+
TypeCheck.cast_to_numeric!(e),
|
158
|
+
TypeCheck.cast_to_numeric!(f),
|
159
|
+
)
|
160
|
+
end
|
161
|
+
|
162
|
+
def move_to_start_of_next_line(*args) # T*
|
163
|
+
call_wrapped(:move_to_start_of_next_line)
|
164
|
+
end
|
165
|
+
|
166
|
+
#####################################################
|
167
|
+
# Text Showing Operators
|
168
|
+
#####################################################
|
169
|
+
def show_text(*args) # Tj (AWAY)
|
170
|
+
string, _ = *args
|
171
|
+
call_wrapped(
|
172
|
+
:show_text,
|
173
|
+
TypeCheck.cast_to_string!(string)
|
174
|
+
)
|
175
|
+
end
|
176
|
+
|
177
|
+
def show_text_with_positioning(*args) # TJ [(A) 120 (WA) 20 (Y)]
|
178
|
+
params, _ = *args
|
179
|
+
unless params.is_a?(Array)
|
180
|
+
raise MalformedPDFError, "TJ operator expects a single Array argument"
|
181
|
+
end
|
182
|
+
|
183
|
+
call_wrapped(
|
184
|
+
:show_text_with_positioning,
|
185
|
+
params
|
186
|
+
)
|
187
|
+
end
|
188
|
+
|
189
|
+
def move_to_next_line_and_show_text(*args) # '
|
190
|
+
string, _ = *args
|
191
|
+
call_wrapped(
|
192
|
+
:move_to_next_line_and_show_text,
|
193
|
+
TypeCheck.cast_to_string!(string)
|
194
|
+
)
|
195
|
+
end
|
196
|
+
|
197
|
+
def set_spacing_next_line_show_text(*args) # "
|
198
|
+
aw, ac, string = *args
|
199
|
+
call_wrapped(
|
200
|
+
:set_spacing_next_line_show_text,
|
201
|
+
TypeCheck.cast_to_numeric!(aw),
|
202
|
+
TypeCheck.cast_to_numeric!(ac),
|
203
|
+
TypeCheck.cast_to_string!(string)
|
204
|
+
)
|
205
|
+
end
|
206
|
+
|
207
|
+
#####################################################
|
208
|
+
# Form XObject Operators
|
209
|
+
#####################################################
|
210
|
+
|
211
|
+
def invoke_xobject(*args)
|
212
|
+
label, _ = *args
|
213
|
+
|
214
|
+
call_wrapped(
|
215
|
+
:invoke_xobject,
|
216
|
+
TypeCheck.cast_to_symbol(label)
|
217
|
+
)
|
218
|
+
end
|
219
|
+
|
220
|
+
#####################################################
|
221
|
+
# Inline Image Operators
|
222
|
+
#####################################################
|
223
|
+
|
224
|
+
def begin_inline_image(*args)
|
225
|
+
call_wrapped(:begin_inline_image)
|
226
|
+
end
|
227
|
+
|
228
|
+
def begin_inline_image_data(*args)
|
229
|
+
# We can't use call_wrapped() here because sorbet won't allow splat args with a dynamic
|
230
|
+
# number of elements
|
231
|
+
@wrapped.begin_inline_image_data(*args) if @wrapped.respond_to?(:begin_inline_image_data)
|
232
|
+
end
|
233
|
+
|
234
|
+
def end_inline_image(*args)
|
235
|
+
data, _ = *args
|
236
|
+
|
237
|
+
call_wrapped(
|
238
|
+
:end_inline_image,
|
239
|
+
TypeCheck.cast_to_string!(data)
|
240
|
+
)
|
241
|
+
end
|
242
|
+
|
243
|
+
#####################################################
|
244
|
+
# Final safety net for any operators that don't have type checking enabled yet
|
245
|
+
#####################################################
|
246
|
+
|
247
|
+
def respond_to?(meth)
|
248
|
+
@wrapped.respond_to?(meth)
|
249
|
+
end
|
250
|
+
|
251
|
+
def method_missing(methodname, *args)
|
252
|
+
@wrapped.send(methodname, *args)
|
253
|
+
end
|
254
|
+
|
255
|
+
private
|
256
|
+
|
257
|
+
def call_wrapped(methodname, *args)
|
258
|
+
@wrapped.send(methodname, *args) if @wrapped.respond_to?(methodname)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
@@ -30,7 +30,7 @@ class PDF::Reader
|
|
30
30
|
|
31
31
|
# in ruby a negative index is valid, and will go from the end of the array
|
32
32
|
# which is undesireable in this case.
|
33
|
-
if @font.first_char <= code_point
|
33
|
+
if @font.first_char && @font.first_char <= code_point
|
34
34
|
@font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
|
35
35
|
else
|
36
36
|
@missing_width.to_f
|
data/lib/pdf/reader/xref.rb
CHANGED
@@ -104,13 +104,18 @@ class PDF::Reader
|
|
104
104
|
buf = new_buffer(offset)
|
105
105
|
tok_one = buf.token
|
106
106
|
|
107
|
+
# we have a traditional xref table
|
107
108
|
return load_xref_table(buf) if tok_one == "xref" || tok_one == "ref"
|
108
109
|
|
109
110
|
tok_two = buf.token
|
110
111
|
tok_three = buf.token
|
111
112
|
|
113
|
+
# we have an XRef stream
|
112
114
|
if tok_one.to_i >= 0 && tok_two.to_i >= 0 && tok_three == "obj"
|
113
115
|
buf = new_buffer(offset)
|
116
|
+
# Maybe we should be parsing the ObjectHash second argument to the Parser here,
|
117
|
+
# to handle the case where an XRef Stream has the Length specified via an
|
118
|
+
# indirect object
|
114
119
|
stream = PDF::Reader::Parser.new(buf).object(tok_one.to_i, tok_two.to_i)
|
115
120
|
return load_xref_stream(stream)
|
116
121
|
end
|
@@ -126,6 +131,10 @@ class PDF::Reader
|
|
126
131
|
|
127
132
|
while !params.include?("trailer") && !params.include?(nil)
|
128
133
|
if params.size == 2
|
134
|
+
unless params[0].to_s.match(/\A\d+\z/)
|
135
|
+
raise MalformedPDFError, "invalid xref table, expected object ID"
|
136
|
+
end
|
137
|
+
|
129
138
|
objid, count = params[0].to_i, params[1].to_i
|
130
139
|
count.times do
|
131
140
|
offset = buf.token.to_i
|
@@ -143,7 +152,7 @@ class PDF::Reader
|
|
143
152
|
params << buf.token
|
144
153
|
end
|
145
154
|
|
146
|
-
trailer = Parser.new(buf
|
155
|
+
trailer = Parser.new(buf).parse_token
|
147
156
|
|
148
157
|
unless trailer.kind_of?(Hash)
|
149
158
|
raise MalformedPDFError, "PDF malformed, trailer should be a dictionary"
|
@@ -168,8 +177,16 @@ class PDF::Reader
|
|
168
177
|
[:Size, :Prev, :Root, :Encrypt, :Info, :ID].include?(key)
|
169
178
|
}]
|
170
179
|
|
171
|
-
widths
|
172
|
-
|
180
|
+
widths = stream.hash[:W]
|
181
|
+
|
182
|
+
PDF::Reader::Error.validate_type_as_malformed(widths, "xref stream widths", Array)
|
183
|
+
|
184
|
+
entry_length = widths.inject(0) { |s, w|
|
185
|
+
unless w.is_a?(Integer)
|
186
|
+
w = 0
|
187
|
+
end
|
188
|
+
s + w
|
189
|
+
}
|
173
190
|
raw_data = StringIO.new(stream.unfiltered_data)
|
174
191
|
if stream.hash[:Index]
|
175
192
|
index = stream.hash[:Index]
|
data/lib/pdf/reader.rb
CHANGED
@@ -112,19 +112,27 @@ module PDF
|
|
112
112
|
#
|
113
113
|
# reader = PDF::Reader.new("somefile.pdf", :password => "apples")
|
114
114
|
#
|
115
|
+
# Using this method directly is supported, but it's more common to use
|
116
|
+
# `PDF::Reader.open`
|
117
|
+
#
|
115
118
|
def initialize(input, opts = {})
|
116
119
|
@cache = PDF::Reader::ObjectCache.new
|
117
120
|
opts.merge!(:cache => @cache)
|
118
121
|
@objects = PDF::Reader::ObjectHash.new(input, opts)
|
119
122
|
end
|
120
123
|
|
124
|
+
# Return a Hash with some basic information about the PDF file
|
125
|
+
#
|
121
126
|
def info
|
122
|
-
dict = @objects.
|
127
|
+
dict = @objects.deref_hash(@objects.trailer[:Info]) || {}
|
123
128
|
doc_strings_to_utf8(dict)
|
124
129
|
end
|
125
130
|
|
131
|
+
# Return a Hash with extra metadata provided by the author of the PDF file. Not
|
132
|
+
# always present.
|
133
|
+
#
|
126
134
|
def metadata
|
127
|
-
stream = @objects.
|
135
|
+
stream = @objects.deref_stream(root[:Metadata])
|
128
136
|
if stream.nil?
|
129
137
|
nil
|
130
138
|
else
|
@@ -134,20 +142,24 @@ module PDF
|
|
134
142
|
end
|
135
143
|
end
|
136
144
|
|
145
|
+
# To number of pages in this PDF
|
146
|
+
#
|
137
147
|
def page_count
|
138
|
-
pages = @objects.
|
148
|
+
pages = @objects.deref_hash(root[:Pages])
|
139
149
|
unless pages.kind_of?(::Hash)
|
140
150
|
raise MalformedPDFError, "Pages structure is missing #{pages.class}"
|
141
151
|
end
|
142
|
-
@page_count ||= @objects.
|
152
|
+
@page_count ||= @objects.deref_integer(pages[:Count]) || 0
|
143
153
|
end
|
144
154
|
|
155
|
+
# The PDF version this file uses
|
156
|
+
#
|
145
157
|
def pdf_version
|
146
158
|
@objects.pdf_version
|
147
159
|
end
|
148
160
|
|
149
|
-
# syntactic sugar for opening a PDF file. Accepts the
|
150
|
-
# as new().
|
161
|
+
# syntactic sugar for opening a PDF file and the most common approach. Accepts the
|
162
|
+
# same arguments as new().
|
151
163
|
#
|
152
164
|
# PDF::Reader.open("somefile.pdf") do |reader|
|
153
165
|
# puts reader.pdf_version
|
@@ -178,6 +190,8 @@ module PDF
|
|
178
190
|
# methods available on each page
|
179
191
|
#
|
180
192
|
def pages
|
193
|
+
return [] if page_count <= 0
|
194
|
+
|
181
195
|
(1..self.page_count).map do |num|
|
182
196
|
begin
|
183
197
|
PDF::Reader::Page.new(@objects, num, :cache => @cache)
|
@@ -228,7 +242,7 @@ module PDF
|
|
228
242
|
pdfdoc_to_utf8(obj)
|
229
243
|
end
|
230
244
|
else
|
231
|
-
|
245
|
+
obj
|
232
246
|
end
|
233
247
|
end
|
234
248
|
|
@@ -259,7 +273,7 @@ module PDF
|
|
259
273
|
|
260
274
|
def root
|
261
275
|
@root ||= begin
|
262
|
-
obj = @objects.
|
276
|
+
obj = @objects.deref_hash(@objects.trailer[:Root]) || {}
|
263
277
|
unless obj.kind_of?(::Hash)
|
264
278
|
raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
|
265
279
|
end
|
@@ -271,8 +285,9 @@ module PDF
|
|
271
285
|
end
|
272
286
|
################################################################################
|
273
287
|
|
274
|
-
require 'pdf/reader/
|
288
|
+
require 'pdf/reader/resources'
|
275
289
|
require 'pdf/reader/buffer'
|
290
|
+
require 'pdf/reader/bounding_rectangle_runs_filter'
|
276
291
|
require 'pdf/reader/cid_widths'
|
277
292
|
require 'pdf/reader/cmap'
|
278
293
|
require 'pdf/reader/encoding'
|
@@ -301,13 +316,19 @@ require 'pdf/reader/rectangle'
|
|
301
316
|
require 'pdf/reader/reference'
|
302
317
|
require 'pdf/reader/register_receiver'
|
303
318
|
require 'pdf/reader/null_security_handler'
|
304
|
-
require 'pdf/reader/
|
305
|
-
require 'pdf/reader/
|
319
|
+
require 'pdf/reader/security_handler_factory'
|
320
|
+
require 'pdf/reader/standard_key_builder'
|
321
|
+
require 'pdf/reader/key_builder_v5'
|
322
|
+
require 'pdf/reader/aes_v2_security_handler'
|
323
|
+
require 'pdf/reader/aes_v3_security_handler'
|
324
|
+
require 'pdf/reader/rc4_security_handler'
|
306
325
|
require 'pdf/reader/unimplemented_security_handler'
|
307
326
|
require 'pdf/reader/stream'
|
308
327
|
require 'pdf/reader/text_run'
|
328
|
+
require 'pdf/reader/type_check'
|
309
329
|
require 'pdf/reader/page_state'
|
310
330
|
require 'pdf/reader/page_text_receiver'
|
311
331
|
require 'pdf/reader/token'
|
312
332
|
require 'pdf/reader/xref'
|
313
333
|
require 'pdf/reader/page'
|
334
|
+
require 'pdf/reader/validating_receiver'
|