pdf-reader 2.8.0 → 2.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +9 -0
  3. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  4. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  5. data/lib/pdf/reader/buffer.rb +36 -34
  6. data/lib/pdf/reader/cmap.rb +64 -51
  7. data/lib/pdf/reader/error.rb +8 -0
  8. data/lib/pdf/reader/filter/ascii85.rb +1 -1
  9. data/lib/pdf/reader/filter/ascii_hex.rb +1 -1
  10. data/lib/pdf/reader/filter/depredict.rb +1 -1
  11. data/lib/pdf/reader/filter/flate.rb +3 -3
  12. data/lib/pdf/reader/filter/lzw.rb +1 -1
  13. data/lib/pdf/reader/filter/null.rb +1 -2
  14. data/lib/pdf/reader/filter/run_length.rb +1 -1
  15. data/lib/pdf/reader/filter.rb +10 -11
  16. data/lib/pdf/reader/font.rb +29 -17
  17. data/lib/pdf/reader/font_descriptor.rb +18 -17
  18. data/lib/pdf/reader/form_xobject.rb +14 -5
  19. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  20. data/lib/pdf/reader/null_security_handler.rb +0 -4
  21. data/lib/pdf/reader/object_hash.rb +247 -42
  22. data/lib/pdf/reader/page.rb +38 -20
  23. data/lib/pdf/reader/page_state.rb +1 -1
  24. data/lib/pdf/reader/page_text_receiver.rb +4 -1
  25. data/lib/pdf/reader/parser.rb +20 -8
  26. data/lib/pdf/reader/point.rb +1 -1
  27. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  28. data/lib/pdf/reader/rectangle.rb +2 -2
  29. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +15 -13
  30. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  31. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -95
  32. data/lib/pdf/reader/stream.rb +2 -2
  33. data/lib/pdf/reader/type_check.rb +52 -0
  34. data/lib/pdf/reader/validating_receiver.rb +262 -0
  35. data/lib/pdf/reader/width_calculator/true_type.rb +1 -1
  36. data/lib/pdf/reader/xref.rb +20 -3
  37. data/lib/pdf/reader.rb +17 -9
  38. data/rbi/pdf-reader.rbi +388 -173
  39. metadata +15 -9
  40. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -92
@@ -0,0 +1,52 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # Cast untrusted input (usually parsed out of a PDF file) to a known type
9
+ #
10
+ class TypeCheck
11
+
12
+ def self.cast_to_numeric!(obj)
13
+ if obj.is_a?(Numeric)
14
+ obj
15
+ elsif obj.nil?
16
+ 0
17
+ elsif obj.respond_to?(:to_f)
18
+ obj.to_f
19
+ elsif obj.respond_to?(:to_i)
20
+ obj.to_i
21
+ else
22
+ raise MalformedPDFError, "Unable to cast to numeric"
23
+ end
24
+ end
25
+
26
+ def self.cast_to_string!(string)
27
+ if string.is_a?(String)
28
+ string
29
+ elsif string.nil?
30
+ ""
31
+ elsif string.respond_to?(:to_s)
32
+ string.to_s
33
+ else
34
+ raise MalformedPDFError, "Unable to cast to string"
35
+ end
36
+ end
37
+
38
+ def self.cast_to_symbol(obj)
39
+ if obj.is_a?(Symbol)
40
+ obj
41
+ elsif obj.nil?
42
+ nil
43
+ elsif obj.respond_to?(:to_sym)
44
+ obj.to_sym
45
+ else
46
+ raise MalformedPDFError, "Unable to cast to symbol"
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
52
+
@@ -0,0 +1,262 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # Page#walk will execute the content stream of a page, calling methods on a receiver class
9
+ # provided by the user. Each operator has a specific set of parameters it expects, and we
10
+ # wrap the users receiver class in this one to verify the PDF uses valid parameters.
11
+ #
12
+ # Without these checks, users can't be confident about the number of parameters they'll receive
13
+ # for an operator, or what the type of those parameters will be. Everyone ends up building their
14
+ # own type safety guard clauses and it's tedious.
15
+ #
16
+ # Not all operators have type safety implemented yet, but we can expand the number over time.
17
+ class ValidatingReceiver
18
+
19
+ def initialize(wrapped)
20
+ @wrapped = wrapped
21
+ end
22
+
23
+ def page=(page)
24
+ call_wrapped(:page=, page)
25
+ end
26
+
27
+ #####################################################
28
+ # Graphics State Operators
29
+ #####################################################
30
+ def save_graphics_state(*args)
31
+ call_wrapped(:save_graphics_state)
32
+ end
33
+
34
+ def restore_graphics_state(*args)
35
+ call_wrapped(:restore_graphics_state)
36
+ end
37
+
38
+ #####################################################
39
+ # Matrix Operators
40
+ #####################################################
41
+
42
+ def concatenate_matrix(*args)
43
+ a, b, c, d, e, f = *args
44
+ call_wrapped(
45
+ :concatenate_matrix,
46
+ TypeCheck.cast_to_numeric!(a),
47
+ TypeCheck.cast_to_numeric!(b),
48
+ TypeCheck.cast_to_numeric!(c),
49
+ TypeCheck.cast_to_numeric!(d),
50
+ TypeCheck.cast_to_numeric!(e),
51
+ TypeCheck.cast_to_numeric!(f),
52
+ )
53
+ end
54
+
55
+ #####################################################
56
+ # Text Object Operators
57
+ #####################################################
58
+
59
+ def begin_text_object(*args)
60
+ call_wrapped(:begin_text_object)
61
+ end
62
+
63
+ def end_text_object(*args)
64
+ call_wrapped(:end_text_object)
65
+ end
66
+
67
+ #####################################################
68
+ # Text State Operators
69
+ #####################################################
70
+ def set_character_spacing(*args)
71
+ char_spacing, _ = *args
72
+ call_wrapped(
73
+ :set_character_spacing,
74
+ TypeCheck.cast_to_numeric!(char_spacing)
75
+ )
76
+ end
77
+
78
+ def set_horizontal_text_scaling(*args)
79
+ h_scaling, _ = *args
80
+ call_wrapped(
81
+ :set_horizontal_text_scaling,
82
+ TypeCheck.cast_to_numeric!(h_scaling)
83
+ )
84
+ end
85
+
86
+ def set_text_font_and_size(*args)
87
+ label, size, _ = *args
88
+ call_wrapped(
89
+ :set_text_font_and_size,
90
+ TypeCheck.cast_to_symbol(label),
91
+ TypeCheck.cast_to_numeric!(size)
92
+ )
93
+ end
94
+
95
+ def set_text_leading(*args)
96
+ leading, _ = *args
97
+ call_wrapped(
98
+ :set_text_leading,
99
+ TypeCheck.cast_to_numeric!(leading)
100
+ )
101
+ end
102
+
103
+ def set_text_rendering_mode(*args)
104
+ mode, _ = *args
105
+ call_wrapped(
106
+ :set_text_rendering_mode,
107
+ TypeCheck.cast_to_numeric!(mode)
108
+ )
109
+ end
110
+
111
+ def set_text_rise(*args)
112
+ rise, _ = *args
113
+ call_wrapped(
114
+ :set_text_rise,
115
+ TypeCheck.cast_to_numeric!(rise)
116
+ )
117
+ end
118
+
119
+ def set_word_spacing(*args)
120
+ word_spacing, _ = *args
121
+ call_wrapped(
122
+ :set_word_spacing,
123
+ TypeCheck.cast_to_numeric!(word_spacing)
124
+ )
125
+ end
126
+
127
+ #####################################################
128
+ # Text Positioning Operators
129
+ #####################################################
130
+
131
+ def move_text_position(*args) # Td
132
+ x, y, _ = *args
133
+ call_wrapped(
134
+ :move_text_position,
135
+ TypeCheck.cast_to_numeric!(x),
136
+ TypeCheck.cast_to_numeric!(y)
137
+ )
138
+ end
139
+
140
+ def move_text_position_and_set_leading(*args) # TD
141
+ x, y, _ = *args
142
+ call_wrapped(
143
+ :move_text_position_and_set_leading,
144
+ TypeCheck.cast_to_numeric!(x),
145
+ TypeCheck.cast_to_numeric!(y)
146
+ )
147
+ end
148
+
149
+ def set_text_matrix_and_text_line_matrix(*args) # Tm
150
+ a, b, c, d, e, f = *args
151
+ call_wrapped(
152
+ :set_text_matrix_and_text_line_matrix,
153
+ TypeCheck.cast_to_numeric!(a),
154
+ TypeCheck.cast_to_numeric!(b),
155
+ TypeCheck.cast_to_numeric!(c),
156
+ TypeCheck.cast_to_numeric!(d),
157
+ TypeCheck.cast_to_numeric!(e),
158
+ TypeCheck.cast_to_numeric!(f),
159
+ )
160
+ end
161
+
162
+ def move_to_start_of_next_line(*args) # T*
163
+ call_wrapped(:move_to_start_of_next_line)
164
+ end
165
+
166
+ #####################################################
167
+ # Text Showing Operators
168
+ #####################################################
169
+ def show_text(*args) # Tj (AWAY)
170
+ string, _ = *args
171
+ call_wrapped(
172
+ :show_text,
173
+ TypeCheck.cast_to_string!(string)
174
+ )
175
+ end
176
+
177
+ def show_text_with_positioning(*args) # TJ [(A) 120 (WA) 20 (Y)]
178
+ params, _ = *args
179
+ unless params.is_a?(Array)
180
+ raise MalformedPDFError, "TJ operator expects a single Array argument"
181
+ end
182
+
183
+ call_wrapped(
184
+ :show_text_with_positioning,
185
+ params
186
+ )
187
+ end
188
+
189
+ def move_to_next_line_and_show_text(*args) # '
190
+ string, _ = *args
191
+ call_wrapped(
192
+ :move_to_next_line_and_show_text,
193
+ TypeCheck.cast_to_string!(string)
194
+ )
195
+ end
196
+
197
+ def set_spacing_next_line_show_text(*args) # "
198
+ aw, ac, string = *args
199
+ call_wrapped(
200
+ :set_spacing_next_line_show_text,
201
+ TypeCheck.cast_to_numeric!(aw),
202
+ TypeCheck.cast_to_numeric!(ac),
203
+ TypeCheck.cast_to_string!(string)
204
+ )
205
+ end
206
+
207
+ #####################################################
208
+ # Form XObject Operators
209
+ #####################################################
210
+
211
+ def invoke_xobject(*args)
212
+ label, _ = *args
213
+
214
+ call_wrapped(
215
+ :invoke_xobject,
216
+ TypeCheck.cast_to_symbol(label)
217
+ )
218
+ end
219
+
220
+ #####################################################
221
+ # Inline Image Operators
222
+ #####################################################
223
+
224
+ def begin_inline_image(*args)
225
+ call_wrapped(:begin_inline_image)
226
+ end
227
+
228
+ def begin_inline_image_data(*args)
229
+ # We can't use call_wrapped() here because sorbet won't allow splat args with a dynamic
230
+ # number of elements
231
+ @wrapped.begin_inline_image_data(*args) if @wrapped.respond_to?(:begin_inline_image_data)
232
+ end
233
+
234
+ def end_inline_image(*args)
235
+ data, _ = *args
236
+
237
+ call_wrapped(
238
+ :end_inline_image,
239
+ TypeCheck.cast_to_string!(data)
240
+ )
241
+ end
242
+
243
+ #####################################################
244
+ # Final safety net for any operators that don't have type checking enabled yet
245
+ #####################################################
246
+
247
+ def respond_to?(meth)
248
+ @wrapped.respond_to?(meth)
249
+ end
250
+
251
+ def method_missing(methodname, *args)
252
+ @wrapped.send(methodname, *args)
253
+ end
254
+
255
+ private
256
+
257
+ def call_wrapped(methodname, *args)
258
+ @wrapped.send(methodname, *args) if @wrapped.respond_to?(methodname)
259
+ end
260
+ end
261
+ end
262
+ end
@@ -30,7 +30,7 @@ class PDF::Reader
30
30
 
31
31
  # in ruby a negative index is valid, and will go from the end of the array
32
32
  # which is undesireable in this case.
33
- if @font.first_char <= code_point
33
+ if @font.first_char && @font.first_char <= code_point
34
34
  @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
35
35
  else
36
36
  @missing_width.to_f
@@ -104,13 +104,18 @@ class PDF::Reader
104
104
  buf = new_buffer(offset)
105
105
  tok_one = buf.token
106
106
 
107
+ # we have a traditional xref table
107
108
  return load_xref_table(buf) if tok_one == "xref" || tok_one == "ref"
108
109
 
109
110
  tok_two = buf.token
110
111
  tok_three = buf.token
111
112
 
113
+ # we have an XRef stream
112
114
  if tok_one.to_i >= 0 && tok_two.to_i >= 0 && tok_three == "obj"
113
115
  buf = new_buffer(offset)
116
+ # Maybe we should be parsing the ObjectHash second argument to the Parser here,
117
+ # to handle the case where an XRef Stream has the Length specified via an
118
+ # indirect object
114
119
  stream = PDF::Reader::Parser.new(buf).object(tok_one.to_i, tok_two.to_i)
115
120
  return load_xref_stream(stream)
116
121
  end
@@ -126,6 +131,10 @@ class PDF::Reader
126
131
 
127
132
  while !params.include?("trailer") && !params.include?(nil)
128
133
  if params.size == 2
134
+ unless params[0].to_s.match(/\A\d+\z/)
135
+ raise MalformedPDFError, "invalid xref table, expected object ID"
136
+ end
137
+
129
138
  objid, count = params[0].to_i, params[1].to_i
130
139
  count.times do
131
140
  offset = buf.token.to_i
@@ -143,7 +152,7 @@ class PDF::Reader
143
152
  params << buf.token
144
153
  end
145
154
 
146
- trailer = Parser.new(buf, self).parse_token
155
+ trailer = Parser.new(buf).parse_token
147
156
 
148
157
  unless trailer.kind_of?(Hash)
149
158
  raise MalformedPDFError, "PDF malformed, trailer should be a dictionary"
@@ -168,8 +177,16 @@ class PDF::Reader
168
177
  [:Size, :Prev, :Root, :Encrypt, :Info, :ID].include?(key)
169
178
  }]
170
179
 
171
- widths = stream.hash[:W]
172
- entry_length = widths.inject(0) { |s, w| s + w }
180
+ widths = stream.hash[:W]
181
+
182
+ PDF::Reader::Error.validate_type_as_malformed(widths, "xref stream widths", Array)
183
+
184
+ entry_length = widths.inject(0) { |s, w|
185
+ unless w.is_a?(Integer)
186
+ w = 0
187
+ end
188
+ s + w
189
+ }
173
190
  raw_data = StringIO.new(stream.unfiltered_data)
174
191
  if stream.hash[:Index]
175
192
  index = stream.hash[:Index]
data/lib/pdf/reader.rb CHANGED
@@ -124,7 +124,7 @@ module PDF
124
124
  # Return a Hash with some basic information about the PDF file
125
125
  #
126
126
  def info
127
- dict = @objects.deref(@objects.trailer[:Info])
127
+ dict = @objects.deref_hash(@objects.trailer[:Info]) || {}
128
128
  doc_strings_to_utf8(dict)
129
129
  end
130
130
 
@@ -132,7 +132,7 @@ module PDF
132
132
  # always present.
133
133
  #
134
134
  def metadata
135
- stream = @objects.deref(root[:Metadata])
135
+ stream = @objects.deref_stream(root[:Metadata])
136
136
  if stream.nil?
137
137
  nil
138
138
  else
@@ -145,11 +145,11 @@ module PDF
145
145
  # To number of pages in this PDF
146
146
  #
147
147
  def page_count
148
- pages = @objects.deref(root[:Pages])
148
+ pages = @objects.deref_hash(root[:Pages])
149
149
  unless pages.kind_of?(::Hash)
150
150
  raise MalformedPDFError, "Pages structure is missing #{pages.class}"
151
151
  end
152
- @page_count ||= @objects.deref(pages[:Count])
152
+ @page_count ||= @objects.deref_integer(pages[:Count]) || 0
153
153
  end
154
154
 
155
155
  # The PDF version this file uses
@@ -190,6 +190,8 @@ module PDF
190
190
  # methods available on each page
191
191
  #
192
192
  def pages
193
+ return [] if page_count <= 0
194
+
193
195
  (1..self.page_count).map do |num|
194
196
  begin
195
197
  PDF::Reader::Page.new(@objects, num, :cache => @cache)
@@ -240,7 +242,7 @@ module PDF
240
242
  pdfdoc_to_utf8(obj)
241
243
  end
242
244
  else
243
- @objects.deref(obj)
245
+ obj
244
246
  end
245
247
  end
246
248
 
@@ -271,7 +273,7 @@ module PDF
271
273
 
272
274
  def root
273
275
  @root ||= begin
274
- obj = @objects.deref(@objects.trailer[:Root])
276
+ obj = @objects.deref_hash(@objects.trailer[:Root]) || {}
275
277
  unless obj.kind_of?(::Hash)
276
278
  raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
277
279
  end
@@ -283,7 +285,7 @@ module PDF
283
285
  end
284
286
  ################################################################################
285
287
 
286
- require 'pdf/reader/resource_methods'
288
+ require 'pdf/reader/resources'
287
289
  require 'pdf/reader/buffer'
288
290
  require 'pdf/reader/bounding_rectangle_runs_filter'
289
291
  require 'pdf/reader/cid_widths'
@@ -314,13 +316,19 @@ require 'pdf/reader/rectangle'
314
316
  require 'pdf/reader/reference'
315
317
  require 'pdf/reader/register_receiver'
316
318
  require 'pdf/reader/null_security_handler'
317
- require 'pdf/reader/standard_security_handler'
318
- require 'pdf/reader/standard_security_handler_v5'
319
+ require 'pdf/reader/security_handler_factory'
320
+ require 'pdf/reader/standard_key_builder'
321
+ require 'pdf/reader/key_builder_v5'
322
+ require 'pdf/reader/aes_v2_security_handler'
323
+ require 'pdf/reader/aes_v3_security_handler'
324
+ require 'pdf/reader/rc4_security_handler'
319
325
  require 'pdf/reader/unimplemented_security_handler'
320
326
  require 'pdf/reader/stream'
321
327
  require 'pdf/reader/text_run'
328
+ require 'pdf/reader/type_check'
322
329
  require 'pdf/reader/page_state'
323
330
  require 'pdf/reader/page_text_receiver'
324
331
  require 'pdf/reader/token'
325
332
  require 'pdf/reader/xref'
326
333
  require 'pdf/reader/page'
334
+ require 'pdf/reader/validating_receiver'