pdf-reader 2.8.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +5 -0
  3. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  4. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  5. data/lib/pdf/reader/buffer.rb +36 -34
  6. data/lib/pdf/reader/cmap.rb +64 -51
  7. data/lib/pdf/reader/error.rb +8 -0
  8. data/lib/pdf/reader/filter/ascii85.rb +1 -1
  9. data/lib/pdf/reader/filter/ascii_hex.rb +1 -1
  10. data/lib/pdf/reader/filter/depredict.rb +1 -1
  11. data/lib/pdf/reader/filter/flate.rb +3 -3
  12. data/lib/pdf/reader/filter/lzw.rb +1 -1
  13. data/lib/pdf/reader/filter/null.rb +1 -2
  14. data/lib/pdf/reader/filter/run_length.rb +1 -1
  15. data/lib/pdf/reader/filter.rb +1 -1
  16. data/lib/pdf/reader/font.rb +29 -17
  17. data/lib/pdf/reader/font_descriptor.rb +18 -17
  18. data/lib/pdf/reader/form_xobject.rb +14 -5
  19. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  20. data/lib/pdf/reader/null_security_handler.rb +0 -4
  21. data/lib/pdf/reader/object_hash.rb +247 -42
  22. data/lib/pdf/reader/page.rb +38 -20
  23. data/lib/pdf/reader/page_state.rb +1 -1
  24. data/lib/pdf/reader/page_text_receiver.rb +4 -1
  25. data/lib/pdf/reader/parser.rb +9 -6
  26. data/lib/pdf/reader/point.rb +1 -1
  27. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  28. data/lib/pdf/reader/rectangle.rb +2 -2
  29. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +15 -13
  30. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  31. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -95
  32. data/lib/pdf/reader/stream.rb +2 -2
  33. data/lib/pdf/reader/type_check.rb +52 -0
  34. data/lib/pdf/reader/validating_receiver.rb +262 -0
  35. data/lib/pdf/reader/width_calculator/true_type.rb +1 -1
  36. data/lib/pdf/reader/xref.rb +20 -3
  37. data/lib/pdf/reader.rb +17 -9
  38. data/rbi/pdf-reader.rbi +388 -173
  39. metadata +15 -9
  40. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -92
@@ -0,0 +1,262 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ module PDF
6
+ class Reader
7
+
8
+ # Page#walk will execute the content stream of a page, calling methods on a receiver class
9
+ # provided by the user. Each operator has a specific set of parameters it expects, and we
10
+ # wrap the users receiver class in this one to verify the PDF uses valid parameters.
11
+ #
12
+ # Without these checks, users can't be confident about the number of parameters they'll receive
13
+ # for an operator, or what the type of those parameters will be. Everyone ends up building their
14
+ # own type safety guard clauses and it's tedious.
15
+ #
16
+ # Not all operators have type safety implemented yet, but we can expand the number over time.
17
+ class ValidatingReceiver
18
+
19
+ def initialize(wrapped)
20
+ @wrapped = wrapped
21
+ end
22
+
23
+ def page=(page)
24
+ call_wrapped(:page=, page)
25
+ end
26
+
27
+ #####################################################
28
+ # Graphics State Operators
29
+ #####################################################
30
+ def save_graphics_state(*args)
31
+ call_wrapped(:save_graphics_state)
32
+ end
33
+
34
+ def restore_graphics_state(*args)
35
+ call_wrapped(:restore_graphics_state)
36
+ end
37
+
38
+ #####################################################
39
+ # Matrix Operators
40
+ #####################################################
41
+
42
+ def concatenate_matrix(*args)
43
+ a, b, c, d, e, f = *args
44
+ call_wrapped(
45
+ :concatenate_matrix,
46
+ TypeCheck.cast_to_numeric!(a),
47
+ TypeCheck.cast_to_numeric!(b),
48
+ TypeCheck.cast_to_numeric!(c),
49
+ TypeCheck.cast_to_numeric!(d),
50
+ TypeCheck.cast_to_numeric!(e),
51
+ TypeCheck.cast_to_numeric!(f),
52
+ )
53
+ end
54
+
55
+ #####################################################
56
+ # Text Object Operators
57
+ #####################################################
58
+
59
+ def begin_text_object(*args)
60
+ call_wrapped(:begin_text_object)
61
+ end
62
+
63
+ def end_text_object(*args)
64
+ call_wrapped(:end_text_object)
65
+ end
66
+
67
+ #####################################################
68
+ # Text State Operators
69
+ #####################################################
70
+ def set_character_spacing(*args)
71
+ char_spacing, _ = *args
72
+ call_wrapped(
73
+ :set_character_spacing,
74
+ TypeCheck.cast_to_numeric!(char_spacing)
75
+ )
76
+ end
77
+
78
+ def set_horizontal_text_scaling(*args)
79
+ h_scaling, _ = *args
80
+ call_wrapped(
81
+ :set_horizontal_text_scaling,
82
+ TypeCheck.cast_to_numeric!(h_scaling)
83
+ )
84
+ end
85
+
86
+ def set_text_font_and_size(*args)
87
+ label, size, _ = *args
88
+ call_wrapped(
89
+ :set_text_font_and_size,
90
+ TypeCheck.cast_to_symbol(label),
91
+ TypeCheck.cast_to_numeric!(size)
92
+ )
93
+ end
94
+
95
+ def set_text_leading(*args)
96
+ leading, _ = *args
97
+ call_wrapped(
98
+ :set_text_leading,
99
+ TypeCheck.cast_to_numeric!(leading)
100
+ )
101
+ end
102
+
103
+ def set_text_rendering_mode(*args)
104
+ mode, _ = *args
105
+ call_wrapped(
106
+ :set_text_rendering_mode,
107
+ TypeCheck.cast_to_numeric!(mode)
108
+ )
109
+ end
110
+
111
+ def set_text_rise(*args)
112
+ rise, _ = *args
113
+ call_wrapped(
114
+ :set_text_rise,
115
+ TypeCheck.cast_to_numeric!(rise)
116
+ )
117
+ end
118
+
119
+ def set_word_spacing(*args)
120
+ word_spacing, _ = *args
121
+ call_wrapped(
122
+ :set_word_spacing,
123
+ TypeCheck.cast_to_numeric!(word_spacing)
124
+ )
125
+ end
126
+
127
+ #####################################################
128
+ # Text Positioning Operators
129
+ #####################################################
130
+
131
+ def move_text_position(*args) # Td
132
+ x, y, _ = *args
133
+ call_wrapped(
134
+ :move_text_position,
135
+ TypeCheck.cast_to_numeric!(x),
136
+ TypeCheck.cast_to_numeric!(y)
137
+ )
138
+ end
139
+
140
+ def move_text_position_and_set_leading(*args) # TD
141
+ x, y, _ = *args
142
+ call_wrapped(
143
+ :move_text_position_and_set_leading,
144
+ TypeCheck.cast_to_numeric!(x),
145
+ TypeCheck.cast_to_numeric!(y)
146
+ )
147
+ end
148
+
149
+ def set_text_matrix_and_text_line_matrix(*args) # Tm
150
+ a, b, c, d, e, f = *args
151
+ call_wrapped(
152
+ :set_text_matrix_and_text_line_matrix,
153
+ TypeCheck.cast_to_numeric!(a),
154
+ TypeCheck.cast_to_numeric!(b),
155
+ TypeCheck.cast_to_numeric!(c),
156
+ TypeCheck.cast_to_numeric!(d),
157
+ TypeCheck.cast_to_numeric!(e),
158
+ TypeCheck.cast_to_numeric!(f),
159
+ )
160
+ end
161
+
162
+ def move_to_start_of_next_line(*args) # T*
163
+ call_wrapped(:move_to_start_of_next_line)
164
+ end
165
+
166
+ #####################################################
167
+ # Text Showing Operators
168
+ #####################################################
169
+ def show_text(*args) # Tj (AWAY)
170
+ string, _ = *args
171
+ call_wrapped(
172
+ :show_text,
173
+ TypeCheck.cast_to_string!(string)
174
+ )
175
+ end
176
+
177
+ def show_text_with_positioning(*args) # TJ [(A) 120 (WA) 20 (Y)]
178
+ params, _ = *args
179
+ unless params.is_a?(Array)
180
+ raise MalformedPDFError, "TJ operator expects a single Array argument"
181
+ end
182
+
183
+ call_wrapped(
184
+ :show_text_with_positioning,
185
+ params
186
+ )
187
+ end
188
+
189
+ def move_to_next_line_and_show_text(*args) # '
190
+ string, _ = *args
191
+ call_wrapped(
192
+ :move_to_next_line_and_show_text,
193
+ TypeCheck.cast_to_string!(string)
194
+ )
195
+ end
196
+
197
+ def set_spacing_next_line_show_text(*args) # "
198
+ aw, ac, string = *args
199
+ call_wrapped(
200
+ :set_spacing_next_line_show_text,
201
+ TypeCheck.cast_to_numeric!(aw),
202
+ TypeCheck.cast_to_numeric!(ac),
203
+ TypeCheck.cast_to_string!(string)
204
+ )
205
+ end
206
+
207
+ #####################################################
208
+ # Form XObject Operators
209
+ #####################################################
210
+
211
+ def invoke_xobject(*args)
212
+ label, _ = *args
213
+
214
+ call_wrapped(
215
+ :invoke_xobject,
216
+ TypeCheck.cast_to_symbol(label)
217
+ )
218
+ end
219
+
220
+ #####################################################
221
+ # Inline Image Operators
222
+ #####################################################
223
+
224
+ def begin_inline_image(*args)
225
+ call_wrapped(:begin_inline_image)
226
+ end
227
+
228
+ def begin_inline_image_data(*args)
229
+ # We can't use call_wrapped() here because sorbet won't allow splat args with a dynamic
230
+ # number of elements
231
+ @wrapped.begin_inline_image_data(*args) if @wrapped.respond_to?(:begin_inline_image_data)
232
+ end
233
+
234
+ def end_inline_image(*args)
235
+ data, _ = *args
236
+
237
+ call_wrapped(
238
+ :end_inline_image,
239
+ TypeCheck.cast_to_string!(data)
240
+ )
241
+ end
242
+
243
+ #####################################################
244
+ # Final safety net for any operators that don't have type checking enabled yet
245
+ #####################################################
246
+
247
+ def respond_to?(meth)
248
+ @wrapped.respond_to?(meth)
249
+ end
250
+
251
+ def method_missing(methodname, *args)
252
+ @wrapped.call(methodname, *args)
253
+ end
254
+
255
+ private
256
+
257
+ def call_wrapped(methodname, *args)
258
+ @wrapped.send(methodname, *args) if @wrapped.respond_to?(methodname)
259
+ end
260
+ end
261
+ end
262
+ end
@@ -30,7 +30,7 @@ class PDF::Reader
30
30
 
31
31
  # in ruby a negative index is valid, and will go from the end of the array
32
32
  # which is undesireable in this case.
33
- if @font.first_char <= code_point
33
+ if @font.first_char && @font.first_char <= code_point
34
34
  @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
35
35
  else
36
36
  @missing_width.to_f
@@ -104,13 +104,18 @@ class PDF::Reader
104
104
  buf = new_buffer(offset)
105
105
  tok_one = buf.token
106
106
 
107
+ # we have a traditional xref table
107
108
  return load_xref_table(buf) if tok_one == "xref" || tok_one == "ref"
108
109
 
109
110
  tok_two = buf.token
110
111
  tok_three = buf.token
111
112
 
113
+ # we have an XRef stream
112
114
  if tok_one.to_i >= 0 && tok_two.to_i >= 0 && tok_three == "obj"
113
115
  buf = new_buffer(offset)
116
+ # Maybe we should be parsing the ObjectHash second argument to the Parser here,
117
+ # to handle the case where an XRef Stream has the Length specified via an
118
+ # indirect object
114
119
  stream = PDF::Reader::Parser.new(buf).object(tok_one.to_i, tok_two.to_i)
115
120
  return load_xref_stream(stream)
116
121
  end
@@ -126,6 +131,10 @@ class PDF::Reader
126
131
 
127
132
  while !params.include?("trailer") && !params.include?(nil)
128
133
  if params.size == 2
134
+ unless params[0].to_s.match(/\A\d+\z/)
135
+ raise MalformedPDFError, "invalid xref table, expected object ID"
136
+ end
137
+
129
138
  objid, count = params[0].to_i, params[1].to_i
130
139
  count.times do
131
140
  offset = buf.token.to_i
@@ -143,7 +152,7 @@ class PDF::Reader
143
152
  params << buf.token
144
153
  end
145
154
 
146
- trailer = Parser.new(buf, self).parse_token
155
+ trailer = Parser.new(buf).parse_token
147
156
 
148
157
  unless trailer.kind_of?(Hash)
149
158
  raise MalformedPDFError, "PDF malformed, trailer should be a dictionary"
@@ -168,8 +177,16 @@ class PDF::Reader
168
177
  [:Size, :Prev, :Root, :Encrypt, :Info, :ID].include?(key)
169
178
  }]
170
179
 
171
- widths = stream.hash[:W]
172
- entry_length = widths.inject(0) { |s, w| s + w }
180
+ widths = stream.hash[:W]
181
+
182
+ PDF::Reader::Error.validate_type_as_malformed(widths, "xref stream widths", Array)
183
+
184
+ entry_length = widths.inject(0) { |s, w|
185
+ unless w.is_a?(Integer)
186
+ w = 0
187
+ end
188
+ s + w
189
+ }
173
190
  raw_data = StringIO.new(stream.unfiltered_data)
174
191
  if stream.hash[:Index]
175
192
  index = stream.hash[:Index]
data/lib/pdf/reader.rb CHANGED
@@ -124,7 +124,7 @@ module PDF
124
124
  # Return a Hash with some basic information about the PDF file
125
125
  #
126
126
  def info
127
- dict = @objects.deref(@objects.trailer[:Info])
127
+ dict = @objects.deref_hash(@objects.trailer[:Info]) || {}
128
128
  doc_strings_to_utf8(dict)
129
129
  end
130
130
 
@@ -132,7 +132,7 @@ module PDF
132
132
  # always present.
133
133
  #
134
134
  def metadata
135
- stream = @objects.deref(root[:Metadata])
135
+ stream = @objects.deref_stream(root[:Metadata])
136
136
  if stream.nil?
137
137
  nil
138
138
  else
@@ -145,11 +145,11 @@ module PDF
145
145
  # To number of pages in this PDF
146
146
  #
147
147
  def page_count
148
- pages = @objects.deref(root[:Pages])
148
+ pages = @objects.deref_hash(root[:Pages])
149
149
  unless pages.kind_of?(::Hash)
150
150
  raise MalformedPDFError, "Pages structure is missing #{pages.class}"
151
151
  end
152
- @page_count ||= @objects.deref(pages[:Count])
152
+ @page_count ||= @objects.deref_integer(pages[:Count]) || 0
153
153
  end
154
154
 
155
155
  # The PDF version this file uses
@@ -190,6 +190,8 @@ module PDF
190
190
  # methods available on each page
191
191
  #
192
192
  def pages
193
+ return [] if page_count <= 0
194
+
193
195
  (1..self.page_count).map do |num|
194
196
  begin
195
197
  PDF::Reader::Page.new(@objects, num, :cache => @cache)
@@ -240,7 +242,7 @@ module PDF
240
242
  pdfdoc_to_utf8(obj)
241
243
  end
242
244
  else
243
- @objects.deref(obj)
245
+ obj
244
246
  end
245
247
  end
246
248
 
@@ -271,7 +273,7 @@ module PDF
271
273
 
272
274
  def root
273
275
  @root ||= begin
274
- obj = @objects.deref(@objects.trailer[:Root])
276
+ obj = @objects.deref_hash(@objects.trailer[:Root]) || {}
275
277
  unless obj.kind_of?(::Hash)
276
278
  raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
277
279
  end
@@ -283,7 +285,7 @@ module PDF
283
285
  end
284
286
  ################################################################################
285
287
 
286
- require 'pdf/reader/resource_methods'
288
+ require 'pdf/reader/resources'
287
289
  require 'pdf/reader/buffer'
288
290
  require 'pdf/reader/bounding_rectangle_runs_filter'
289
291
  require 'pdf/reader/cid_widths'
@@ -314,13 +316,19 @@ require 'pdf/reader/rectangle'
314
316
  require 'pdf/reader/reference'
315
317
  require 'pdf/reader/register_receiver'
316
318
  require 'pdf/reader/null_security_handler'
317
- require 'pdf/reader/standard_security_handler'
318
- require 'pdf/reader/standard_security_handler_v5'
319
+ require 'pdf/reader/security_handler_factory'
320
+ require 'pdf/reader/standard_key_builder'
321
+ require 'pdf/reader/key_builder_v5'
322
+ require 'pdf/reader/aes_v2_security_handler'
323
+ require 'pdf/reader/aes_v3_security_handler'
324
+ require 'pdf/reader/rc4_security_handler'
319
325
  require 'pdf/reader/unimplemented_security_handler'
320
326
  require 'pdf/reader/stream'
321
327
  require 'pdf/reader/text_run'
328
+ require 'pdf/reader/type_check'
322
329
  require 'pdf/reader/page_state'
323
330
  require 'pdf/reader/page_text_receiver'
324
331
  require 'pdf/reader/token'
325
332
  require 'pdf/reader/xref'
326
333
  require 'pdf/reader/page'
334
+ require 'pdf/reader/validating_receiver'