pdf-reader 2.8.0 → 2.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +5 -0
  3. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  4. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  5. data/lib/pdf/reader/buffer.rb +36 -34
  6. data/lib/pdf/reader/cmap.rb +64 -51
  7. data/lib/pdf/reader/error.rb +8 -0
  8. data/lib/pdf/reader/filter/ascii85.rb +1 -1
  9. data/lib/pdf/reader/filter/ascii_hex.rb +1 -1
  10. data/lib/pdf/reader/filter/depredict.rb +1 -1
  11. data/lib/pdf/reader/filter/flate.rb +3 -3
  12. data/lib/pdf/reader/filter/lzw.rb +1 -1
  13. data/lib/pdf/reader/filter/null.rb +1 -2
  14. data/lib/pdf/reader/filter/run_length.rb +1 -1
  15. data/lib/pdf/reader/filter.rb +1 -1
  16. data/lib/pdf/reader/font.rb +29 -17
  17. data/lib/pdf/reader/font_descriptor.rb +18 -17
  18. data/lib/pdf/reader/form_xobject.rb +14 -5
  19. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  20. data/lib/pdf/reader/null_security_handler.rb +0 -4
  21. data/lib/pdf/reader/object_hash.rb +247 -42
  22. data/lib/pdf/reader/page.rb +38 -20
  23. data/lib/pdf/reader/page_state.rb +1 -1
  24. data/lib/pdf/reader/page_text_receiver.rb +4 -1
  25. data/lib/pdf/reader/parser.rb +9 -6
  26. data/lib/pdf/reader/point.rb +1 -1
  27. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  28. data/lib/pdf/reader/rectangle.rb +2 -2
  29. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +15 -13
  30. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  31. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -95
  32. data/lib/pdf/reader/stream.rb +2 -2
  33. data/lib/pdf/reader/type_check.rb +52 -0
  34. data/lib/pdf/reader/validating_receiver.rb +262 -0
  35. data/lib/pdf/reader/width_calculator/true_type.rb +1 -1
  36. data/lib/pdf/reader/xref.rb +20 -3
  37. data/lib/pdf/reader.rb +17 -9
  38. data/rbi/pdf-reader.rbi +388 -173
  39. metadata +15 -9
  40. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -92
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6182ffd59631afba6a2c234547a428382b1ec2d7b414d89830b1143f1a0e1704
4
- data.tar.gz: 6c0e6a7d32cf24912edc3aa96d72b7f70497d2fdd0e0913b86f871bbf9fa104f
3
+ metadata.gz: 2b4616131d0ad73c4ef2c4992ae79d4fde420d6857aba60e8dfac9b088a0b915
4
+ data.tar.gz: f93f481d7f76af426420dbf507a88e8ecead8ec84690781f42de3b7b5ffbd1bd
5
5
  SHA512:
6
- metadata.gz: 42dafbe0c36ce838da4c3120bf2187efde647e486971896d9a9c59c37dac3da0f2ccf3ecd98d8dd1d3acc5404bfcf26e64a327d7797648646afd6b40be02fec2
7
- data.tar.gz: 40f0b0958024b558d6aca7eb2b3b6f042f034059c8fca52ce97fab7d55a39c313797605341331c65efd1099a1310ccbe386c354024dbd3cbc61c1d96c423842d
6
+ metadata.gz: 86dbe3450a11e0deb3f5db98625375b252cc25f289d76c98b5de48342d1b4957de81c1c2b6cce53d7d09738e9576bd48213c92166d48911c1f45ad6a77f195a5
7
+ data.tar.gz: ee852ff644a095bae93eb7cc30c6d070c8c6adda4f9bfadecf938bf3ba2723fed08c75a3bf15ba30fcf8fded7ad6a5b74dad8a3f512823798686350f24b912eb
data/CHANGELOG CHANGED
@@ -1,3 +1,8 @@
1
+ v2.9.0 (24th January 2022)
2
+ - Support additional encryption standards (http://github.com/yob/pdf-reader/pull/419)
3
+ - Return CropBox correctly from Page#rectangles (https://github.com/yob/pdf-reader/pull/420)
4
+ - For sorbet users, additional type annotations are included in the gem
5
+
1
6
  v2.8.0 (28th Decemeber 2021)
2
7
  - Add PDF::Reader::Page#runs for extracting text from a page with positioning metadata (http://github.com/yob/pdf-reader/pull/411)
3
8
  - Add options to PDF::Reader::Page#text to make some behaviour configurable (http://github.com/yob/pdf-reader/pull/411)
@@ -0,0 +1,41 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest/md5'
6
+
7
+ class PDF::Reader
8
+
9
+ # Decrypts data using the AESV2 algorithim defined in the PDF spec. Requires
10
+ # a decryption key, which is usually generated by PDF::Reader::StandardKeyBuilder
11
+ #
12
+ class AesV2SecurityHandler
13
+
14
+ def initialize(key)
15
+ @encrypt_key = key
16
+ end
17
+
18
+ ##7.6.2 General Encryption Algorithm
19
+ #
20
+ # Algorithm 1: Encryption of data using the AES-128-CBC algorithm
21
+ #
22
+ # version == 4 and CFM == AESV2
23
+ #
24
+ # buf - a string to decrypt
25
+ # ref - a PDF::Reader::Reference for the object to decrypt
26
+ #
27
+ def decrypt( buf, ref )
28
+ objKey = @encrypt_key.dup
29
+ (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
30
+ (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
31
+ objKey << 'sAlT' # Algorithm 1, b)
32
+ length = objKey.length < 16 ? objKey.length : 16
33
+ cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
34
+ cipher.decrypt
35
+ cipher.key = Digest::MD5.digest(objKey)[0,length]
36
+ cipher.iv = buf[0..15]
37
+ cipher.update(buf[16..-1]) + cipher.final
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest'
6
+ require 'openssl'
7
+
8
+ class PDF::Reader
9
+
10
+ # Decrypts data using the AESV3 algorithim defined in the PDF 1.7, Extension Level 3 spec.
11
+ # Requires a decryption key, which is usually generated by PDF::Reader::KeyBuilderV5
12
+ #
13
+ class AesV3SecurityHandler
14
+
15
+ def initialize(key)
16
+ @encrypt_key = key
17
+ @cipher = "AES-256-CBC"
18
+ end
19
+
20
+ ##7.6.2 General Encryption Algorithm
21
+ #
22
+ # Algorithm 1: Encryption of data using the RC4 or AES algorithms
23
+ #
24
+ # used to decrypt RC4/AES encrypted PDF streams (buf)
25
+ #
26
+ # buf - a string to decrypt
27
+ # ref - a PDF::Reader::Reference for the object to decrypt
28
+ #
29
+ def decrypt( buf, ref )
30
+ cipher = OpenSSL::Cipher.new(@cipher)
31
+ cipher.decrypt
32
+ cipher.key = @encrypt_key.dup
33
+ cipher.iv = buf[0..15]
34
+ cipher.update(buf[16..-1]) + cipher.final
35
+ end
36
+
37
+ end
38
+ end
@@ -1,5 +1,5 @@
1
1
  # coding: ASCII-8BIT
2
- # typed: false
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -59,6 +59,9 @@ class PDF::Reader
59
59
  # Allow for this here
60
60
  TRAILING_BYTECOUNT = 5000
61
61
 
62
+ # must match whole tokens
63
+ DIGITS_ONLY = %r{\A\d+\z}
64
+
62
65
  attr_reader :pos
63
66
 
64
67
  # Creates a new buffer.
@@ -143,13 +146,20 @@ class PDF::Reader
143
146
  @io.seek(-TRAILING_BYTECOUNT, IO::SEEK_END) rescue @io.seek(0)
144
147
  data = @io.read(TRAILING_BYTECOUNT)
145
148
 
149
+ raise MalformedPDFError, "PDF does not contain EOF marker" if data.nil?
150
+
146
151
  # the PDF 1.7 spec (section #3.4) says that EOL markers can be either \r, \n, or both.
147
152
  lines = data.split(/[\n\r]+/).reverse
148
153
  eof_index = lines.index { |l| l.strip[/^%%EOF/] }
149
154
 
150
155
  raise MalformedPDFError, "PDF does not contain EOF marker" if eof_index.nil?
151
156
  raise MalformedPDFError, "PDF EOF marker does not follow offset" if eof_index >= lines.size-1
152
- lines[eof_index+1].to_i
157
+ offset = lines[eof_index+1].to_i
158
+
159
+ # a byte offset < 0 doesn't make much sense. This is unlikely to happen, but in theory some
160
+ # corrupted PDFs might have a line that looks like a negative int preceding the `%%EOF`
161
+ raise MalformedPDFError, "invalid xref offset" if offset < 0
162
+ offset
153
163
  end
154
164
 
155
165
  private
@@ -230,13 +240,12 @@ class PDF::Reader
230
240
  return if @tokens.size < 3
231
241
  return if @tokens[2] != "R"
232
242
 
233
- # must match whole tokens
234
- digits_only = %r{\A\d+\z}
235
- if @tokens[0].match(digits_only) && @tokens[1].match(digits_only)
236
- @tokens[0] = PDF::Reader::Reference.new(@tokens[0].to_i, @tokens[1].to_i)
237
- @tokens[1] = nil
238
- @tokens[2] = nil
239
- @tokens.compact!
243
+ token_one = @tokens[0]
244
+ token_two = @tokens[1]
245
+ if token_one.is_a?(String) && token_two.is_a?(String) && token_one.match(DIGITS_ONLY) && token_two.match(DIGITS_ONLY)
246
+ @tokens[0] = PDF::Reader::Reference.new(token_one.to_i, token_two.to_i)
247
+ @tokens.delete_at(2)
248
+ @tokens.delete_at(1)
240
249
  end
241
250
  end
242
251
 
@@ -246,7 +255,7 @@ class PDF::Reader
246
255
  # This is to reduce the chance of accidentally matching an embedded EI
247
256
  def prepare_inline_token
248
257
  idstart = @io.pos
249
- chr = prevchr = nil
258
+ prevchr = ''
250
259
  eisize = 0 # how many chars in the end marker
251
260
  seeking = 'E' # what are we looking for now?
252
261
  loop do
@@ -264,11 +273,11 @@ class PDF::Reader
264
273
  end
265
274
  when 'I'
266
275
  if chr == 'I'
267
- seeking = :END
276
+ seeking = ''
268
277
  else
269
278
  seeking = 'E'
270
279
  end
271
- when :END
280
+ when ''
272
281
  if WHITE_SPACE.include? chr
273
282
  eisize += 1 # Drop trailer
274
283
  break
@@ -276,28 +285,28 @@ class PDF::Reader
276
285
  seeking = 'E'
277
286
  end
278
287
  end
279
- prevchr = chr
288
+ prevchr = chr.is_a?(String) ? chr : ''
280
289
  end
281
- unless seeking == :END
290
+ unless seeking == ''
282
291
  raise MalformedPDFError, "EI terminator not found"
283
292
  end
284
293
  eiend = @io.pos
285
294
  @io.seek(idstart, IO::SEEK_SET)
286
295
  str = @io.read(eiend - eisize - idstart) # get the ID content
287
- @tokens << string_token(str)
296
+ @tokens << str.freeze if str
288
297
  end
289
298
 
290
299
  # if we're currently inside a hex string, read hex nibbles until
291
300
  # we find a closing >
292
301
  #
293
302
  def prepare_hex_token
303
+ finished = :false
294
304
  str = "".dup
295
- finished = false
296
305
 
297
- while !finished
306
+ until finished == :true
298
307
  byte = @io.getbyte
299
308
  if byte.nil?
300
- finished = true # unbalanced params
309
+ finished = :true # unbalanced params
301
310
  elsif (48..57).include?(byte) || (65..90).include?(byte) || (97..122).include?(byte)
302
311
  str << byte
303
312
  elsif byte <= 32
@@ -306,7 +315,7 @@ class PDF::Reader
306
315
  @tokens << str if str.size > 0
307
316
  @tokens << ">" if byte != 0x3E # '>'
308
317
  @tokens << byte.chr
309
- finished = true
318
+ finished = :true
310
319
  end
311
320
  end
312
321
  end
@@ -353,14 +362,17 @@ class PDF::Reader
353
362
  def prepare_regular_token
354
363
  tok = "".dup
355
364
 
356
- while byte = @io.getbyte
365
+ loop do
366
+ byte = @io.getbyte
367
+
357
368
  case byte
369
+ when nil
370
+ break
358
371
  when 0x25
359
372
  # comment, ignore everything until the next EOL char
360
- done = false
361
- while !done
362
- byte = @io.getbyte
363
- done = true if byte.nil? || byte == 0x0A || byte == 0x0D
373
+ loop do
374
+ commentbyte = @io.getbyte
375
+ break if commentbyte.nil? || commentbyte == 0x0A || commentbyte == 0x0D
364
376
  end
365
377
  when *TOKEN_WHITESPACE
366
378
  # white space, token finished
@@ -430,15 +442,5 @@ class PDF::Reader
430
442
  byte
431
443
  end
432
444
 
433
- # for a handful of tokens we want to tell the parser how to convert them
434
- # into higher level tokens. This methods adds a to_token() method
435
- # to tokens that should remain as strings.
436
- #
437
- def string_token(token)
438
- def token.to_token
439
- to_s
440
- end
441
- token
442
- end
443
445
  end
444
446
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: false
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -35,15 +35,15 @@ class PDF::Reader
35
35
  class CMap # :nodoc:
36
36
 
37
37
  CMAP_KEYWORDS = {
38
- "begincodespacerange" => 1,
39
- "endcodespacerange" => 1,
40
- "beginbfchar" => 1,
41
- "endbfchar" => 1,
42
- "beginbfrange" => 1,
43
- "endbfrange" => 1,
44
- "begin" => 1,
45
- "begincmap" => 1,
46
- "def" => 1
38
+ "begincodespacerange" => :noop,
39
+ "endcodespacerange" => :noop,
40
+ "beginbfchar" => :noop,
41
+ "endbfchar" => :noop,
42
+ "beginbfrange" => :noop,
43
+ "endbfrange" => :noop,
44
+ "begin" => :noop,
45
+ "begincmap" => :noop,
46
+ "def" => :noop
47
47
  }
48
48
 
49
49
  attr_reader :map
@@ -53,30 +53,6 @@ class PDF::Reader
53
53
  process_data(data)
54
54
  end
55
55
 
56
- def process_data(data)
57
- parser = build_parser(data)
58
- mode = :none
59
- instructions = []
60
-
61
- while token = parser.parse_token(CMAP_KEYWORDS)
62
- if token == "beginbfchar"
63
- mode = :char
64
- elsif token == "endbfchar"
65
- process_bfchar_instructions(instructions)
66
- instructions = []
67
- mode = :none
68
- elsif token == "beginbfrange"
69
- mode = :range
70
- elsif token == "endbfrange"
71
- process_bfrange_instructions(instructions)
72
- instructions = []
73
- mode = :none
74
- elsif mode == :char || mode == :range
75
- instructions << token
76
- end
77
- end
78
- end
79
-
80
56
  def size
81
57
  @map.size
82
58
  end
@@ -86,13 +62,40 @@ class PDF::Reader
86
62
  # Returns an array of Integers.
87
63
  #
88
64
  def decode(c)
89
- # TODO: implement the conversion
90
- return c unless Integer === c
91
- @map[c]
65
+ @map.fetch(c, [])
92
66
  end
93
67
 
94
68
  private
95
69
 
70
+ def process_data(data, initial_mode = :none)
71
+ parser = build_parser(data)
72
+ mode = initial_mode
73
+ instructions = []
74
+
75
+ while token = parser.parse_token(CMAP_KEYWORDS)
76
+ if token.is_a?(String) || token.is_a?(Array)
77
+ if token == "beginbfchar"
78
+ mode = :char
79
+ elsif token == "endbfchar"
80
+ process_bfchar_instructions(instructions)
81
+ instructions = []
82
+ mode = :none
83
+ elsif token == "beginbfrange"
84
+ mode = :range
85
+ elsif token == "endbfrange"
86
+ process_bfrange_instructions(instructions)
87
+ instructions = []
88
+ mode = :none
89
+ elsif mode == :char
90
+ instructions << token.to_s
91
+ elsif mode == :range
92
+ instructions << token
93
+ end
94
+ end
95
+ end
96
+ end
97
+
98
+
96
99
  def build_parser(instructions)
97
100
  buffer = Buffer.new(StringIO.new(instructions))
98
101
  Parser.new(buffer)
@@ -107,7 +110,6 @@ class PDF::Reader
107
110
  # exception when we try converting broken UTF-16 to UTF-8
108
111
  #
109
112
  def str_to_int(str)
110
- return nil if str.nil? || str.size == 0
111
113
  unpacked_string = if str.bytesize == 1 # UTF-8
112
114
  str.unpack("C*")
113
115
  else # UTF-16
@@ -115,12 +117,15 @@ class PDF::Reader
115
117
  end
116
118
  result = []
117
119
  while unpacked_string.any? do
118
- if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
120
+ if unpacked_string.size >= 2 &&
121
+ unpacked_string.first.to_i > 0xD800 &&
122
+ unpacked_string.first.to_i < 0xDBFF
119
123
  # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
120
124
  # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
121
125
  # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
122
- points = [unpacked_string.shift, unpacked_string.shift]
123
- result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
126
+ point_one = unpacked_string.shift.to_i
127
+ point_two = unpacked_string.shift.to_i
128
+ result << (point_one - 0xD800) * 0x400 + (point_two - 0xDC00) + 0x10000
124
129
  else
125
130
  result << unpacked_string.shift
126
131
  end
@@ -130,9 +135,11 @@ class PDF::Reader
130
135
 
131
136
  def process_bfchar_instructions(instructions)
132
137
  instructions.each_slice(2) do |one, two|
133
- find = str_to_int(one)
134
- replace = str_to_int(two)
135
- @map[find.first] = replace
138
+ find = str_to_int(one.to_s)
139
+ replace = str_to_int(two.to_s)
140
+ if find.any? && replace.any?
141
+ @map[find.first.to_i] = replace
142
+ end
136
143
  end
137
144
  end
138
145
 
@@ -143,30 +150,36 @@ class PDF::Reader
143
150
  elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array)
144
151
  bfrange_type_two(start, finish, to)
145
152
  else
146
- raise "invalid bfrange section"
153
+ raise MalformedPDFError, "invalid bfrange section"
147
154
  end
148
155
  end
149
156
  end
150
157
 
151
158
  def bfrange_type_one(start_code, end_code, dst)
152
- start_code = str_to_int(start_code)[0]
153
- end_code = str_to_int(end_code)[0]
159
+ start_code = str_to_int(start_code).first
160
+ end_code = str_to_int(end_code).first
154
161
  dst = str_to_int(dst)
155
162
 
163
+ return if start_code.nil? || end_code.nil?
164
+
156
165
  # add all values in the range to our mapping
157
166
  (start_code..end_code).each_with_index do |val, idx|
158
- @map[val] = dst.length == 1 ? [dst[0] + idx] : [dst[0], dst[1] + 1]
167
+ @map[val] = dst.length == 1 ? [dst[0].to_i + idx] : [dst[0].to_i, dst[1].to_i + 1]
159
168
  end
160
169
  end
161
170
 
162
171
  def bfrange_type_two(start_code, end_code, dst)
163
- start_code = str_to_int(start_code)[0]
164
- end_code = str_to_int(end_code)[0]
172
+ start_code = str_to_int(start_code).first
173
+ end_code = str_to_int(end_code).first
174
+
175
+ return if start_code.nil? || end_code.nil?
176
+
165
177
  from_range = (start_code..end_code)
166
178
 
167
179
  # add all values in the range to our mapping
168
180
  from_range.each_with_index do |val, idx|
169
- @map[val] = str_to_int(dst[idx])
181
+ dst_char = dst[idx]
182
+ @map[val.to_i] = str_to_int(dst_char) if dst_char
170
183
  end
171
184
  end
172
185
  end
@@ -51,9 +51,17 @@ class PDF::Reader
51
51
  raise ArgumentError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
52
52
  end
53
53
  ################################################################################
54
+ def self.validate_type_as_malformed(object, name, klass)
55
+ raise MalformedPDFError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
56
+ end
57
+ ################################################################################
54
58
  def self.validate_not_nil(object, name)
55
59
  raise ArgumentError, "#{object} must not be nil" if object.nil?
56
60
  end
61
+ ################################################################################
62
+ def self.validate_not_nil_as_malformed(object, name)
63
+ raise MalformedPDFError, "#{object} must not be nil" if object.nil?
64
+ end
57
65
  end
58
66
 
59
67
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: false
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'ascii85'
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  #
@@ -9,7 +9,7 @@ class PDF::Reader
9
9
  class Depredict
10
10
 
11
11
  def initialize(options = {})
12
- @options = options || {}
12
+ @options = options
13
13
  end
14
14
 
15
15
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
 
@@ -34,7 +34,7 @@ class PDF::Reader
34
34
  def zlib_inflate(data)
35
35
  begin
36
36
  return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
37
- rescue Zlib::DataError
37
+ rescue Zlib::Error
38
38
  # by default, Ruby's Zlib assumes the data it's inflating
39
39
  # is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
40
40
  # fails, swallow the exception and attempt to inflate the data as a raw
@@ -43,7 +43,7 @@ class PDF::Reader
43
43
 
44
44
  begin
45
45
  return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
46
- rescue StandardError
46
+ rescue Zlib::Error
47
47
  # swallow this one too, so we can try some other fallback options
48
48
  end
49
49
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  #
@@ -1,8 +1,7 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
- #
6
5
  class PDF::Reader
7
6
  module Filter # :nodoc:
8
7
  # implementation of the null stream filter
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  #
@@ -42,7 +42,7 @@ class PDF::Reader
42
42
  # returned untouched. At this stage PDF::Reader has no need to decode images.
43
43
  #
44
44
  def self.with(name, options = {})
45
- case name.to_sym
45
+ case name
46
46
  when :ASCII85Decode then PDF::Reader::Filter::Ascii85.new(options)
47
47
  when :ASCIIHexDecode then PDF::Reader::Filter::AsciiHex.new(options)
48
48
  when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
@@ -149,27 +149,37 @@ class PDF::Reader
149
149
  end
150
150
  end
151
151
 
152
- def extract_base_info(obj)
153
- @subtype = @ohash.object(obj[:Subtype])
154
- @basefont = @ohash.object(obj[:BaseFont])
155
- if @ohash.object(obj[:Encoding])
156
- @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
152
+ def build_encoding(obj)
153
+ if obj[:Encoding].is_a?(Symbol)
154
+ # one of the standard encodings, referenced by name
155
+ # TODO pass in a standard shape, always a Hash
156
+ PDF::Reader::Encoding.new(obj[:Encoding])
157
+ elsif obj[:Encoding].is_a?(Hash) || obj[:Encoding].is_a?(PDF::Reader::Stream)
158
+ PDF::Reader::Encoding.new(obj[:Encoding])
159
+ elsif obj[:Encoding].nil?
160
+ default_encoding(@basefont)
157
161
  else
158
- @encoding = default_encoding(@basefont)
162
+ raise MalformedPDFError, "Unexpected type for Encoding (#{obj[:Encoding].class})"
159
163
  end
160
- @widths = @ohash.object(obj[:Widths]) || []
161
- @first_char = @ohash.object(obj[:FirstChar])
162
- @last_char = @ohash.object(obj[:LastChar])
164
+ end
165
+
166
+ def extract_base_info(obj)
167
+ @subtype = @ohash.deref_name(obj[:Subtype])
168
+ @basefont = @ohash.deref_name(obj[:BaseFont])
169
+ @encoding = build_encoding(obj)
170
+ @widths = @ohash.deref_array_of_numbers(obj[:Widths]) || []
171
+ @first_char = @ohash.deref_integer(obj[:FirstChar])
172
+ @last_char = @ohash.deref_integer(obj[:LastChar])
163
173
 
164
174
  # CID Fonts are not required to have a W or DW entry, if they don't exist,
165
175
  # the default cid width = 1000, see Section 9.7.4.1 PDF 32000-1:2008 pp 269
166
- @cid_widths = @ohash.object(obj[:W]) || []
167
- @cid_default_width = @ohash.object(obj[:DW]) || 1000
176
+ @cid_widths = @ohash.deref_array(obj[:W]) || []
177
+ @cid_default_width = @ohash.deref_number(obj[:DW]) || 1000
168
178
 
169
179
  if obj[:ToUnicode]
170
180
  # ToUnicode is optional for Type1 and Type3
171
- stream = @ohash.object(obj[:ToUnicode])
172
- if stream.is_a?(PDF::Reader::Stream)
181
+ stream = @ohash.deref_stream(obj[:ToUnicode])
182
+ if stream
173
183
  @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
174
184
  end
175
185
  end
@@ -177,7 +187,9 @@ class PDF::Reader
177
187
 
178
188
  def extract_type3_info(obj)
179
189
  if @subtype == :Type3
180
- @font_matrix = @ohash.object(obj[:FontMatrix]) || [ 0.001, 0, 0, 0.001, 0, 0 ]
190
+ @font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
191
+ 0.001, 0, 0, 0.001, 0, 0
192
+ ]
181
193
  end
182
194
  end
183
195
 
@@ -185,7 +197,7 @@ class PDF::Reader
185
197
  if obj[:FontDescriptor]
186
198
  # create a font descriptor object if we can, in other words, unless this is
187
199
  # a CID Font
188
- fd = @ohash.object(obj[:FontDescriptor])
200
+ fd = @ohash.deref_hash(obj[:FontDescriptor])
189
201
  @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
190
202
  else
191
203
  @font_descriptor = nil
@@ -197,9 +209,9 @@ class PDF::Reader
197
209
  # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
198
210
  # A one-element array specifying the CIDFont dictionary that is the
199
211
  # descendant of this Type 0 font.
200
- descendants = @ohash.object(obj[:DescendantFonts])
212
+ descendants = @ohash.deref_array(obj[:DescendantFonts])
201
213
  @descendantfonts = descendants.map { |desc|
202
- PDF::Reader::Font.new(@ohash, @ohash.object(desc))
214
+ PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
203
215
  }
204
216
  end
205
217