pdf-reader 2.8.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +5 -0
  3. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  4. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  5. data/lib/pdf/reader/buffer.rb +36 -34
  6. data/lib/pdf/reader/cmap.rb +64 -51
  7. data/lib/pdf/reader/error.rb +8 -0
  8. data/lib/pdf/reader/filter/ascii85.rb +1 -1
  9. data/lib/pdf/reader/filter/ascii_hex.rb +1 -1
  10. data/lib/pdf/reader/filter/depredict.rb +1 -1
  11. data/lib/pdf/reader/filter/flate.rb +3 -3
  12. data/lib/pdf/reader/filter/lzw.rb +1 -1
  13. data/lib/pdf/reader/filter/null.rb +1 -2
  14. data/lib/pdf/reader/filter/run_length.rb +1 -1
  15. data/lib/pdf/reader/filter.rb +1 -1
  16. data/lib/pdf/reader/font.rb +29 -17
  17. data/lib/pdf/reader/font_descriptor.rb +18 -17
  18. data/lib/pdf/reader/form_xobject.rb +14 -5
  19. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  20. data/lib/pdf/reader/null_security_handler.rb +0 -4
  21. data/lib/pdf/reader/object_hash.rb +247 -42
  22. data/lib/pdf/reader/page.rb +38 -20
  23. data/lib/pdf/reader/page_state.rb +1 -1
  24. data/lib/pdf/reader/page_text_receiver.rb +4 -1
  25. data/lib/pdf/reader/parser.rb +9 -6
  26. data/lib/pdf/reader/point.rb +1 -1
  27. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  28. data/lib/pdf/reader/rectangle.rb +2 -2
  29. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +15 -13
  30. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  31. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -95
  32. data/lib/pdf/reader/stream.rb +2 -2
  33. data/lib/pdf/reader/type_check.rb +52 -0
  34. data/lib/pdf/reader/validating_receiver.rb +262 -0
  35. data/lib/pdf/reader/width_calculator/true_type.rb +1 -1
  36. data/lib/pdf/reader/xref.rb +20 -3
  37. data/lib/pdf/reader.rb +17 -9
  38. data/rbi/pdf-reader.rbi +388 -173
  39. metadata +15 -9
  40. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -92
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6182ffd59631afba6a2c234547a428382b1ec2d7b414d89830b1143f1a0e1704
4
- data.tar.gz: 6c0e6a7d32cf24912edc3aa96d72b7f70497d2fdd0e0913b86f871bbf9fa104f
3
+ metadata.gz: 2b4616131d0ad73c4ef2c4992ae79d4fde420d6857aba60e8dfac9b088a0b915
4
+ data.tar.gz: f93f481d7f76af426420dbf507a88e8ecead8ec84690781f42de3b7b5ffbd1bd
5
5
  SHA512:
6
- metadata.gz: 42dafbe0c36ce838da4c3120bf2187efde647e486971896d9a9c59c37dac3da0f2ccf3ecd98d8dd1d3acc5404bfcf26e64a327d7797648646afd6b40be02fec2
7
- data.tar.gz: 40f0b0958024b558d6aca7eb2b3b6f042f034059c8fca52ce97fab7d55a39c313797605341331c65efd1099a1310ccbe386c354024dbd3cbc61c1d96c423842d
6
+ metadata.gz: 86dbe3450a11e0deb3f5db98625375b252cc25f289d76c98b5de48342d1b4957de81c1c2b6cce53d7d09738e9576bd48213c92166d48911c1f45ad6a77f195a5
7
+ data.tar.gz: ee852ff644a095bae93eb7cc30c6d070c8c6adda4f9bfadecf938bf3ba2723fed08c75a3bf15ba30fcf8fded7ad6a5b74dad8a3f512823798686350f24b912eb
data/CHANGELOG CHANGED
@@ -1,3 +1,8 @@
1
+ v2.9.0 (24th January 2022)
2
+ - Support additional encryption standards (http://github.com/yob/pdf-reader/pull/419)
3
+ - Return CropBox correctly from Page#rectangles (https://github.com/yob/pdf-reader/pull/420)
4
+ - For sorbet users, additional type annotations are included in the gem
5
+
1
6
  v2.8.0 (28th Decemeber 2021)
2
7
  - Add PDF::Reader::Page#runs for extracting text from a page with positioning metadata (http://github.com/yob/pdf-reader/pull/411)
3
8
  - Add options to PDF::Reader::Page#text to make some behaviour configurable (http://github.com/yob/pdf-reader/pull/411)
@@ -0,0 +1,41 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest/md5'
6
+
7
+ class PDF::Reader
8
+
9
+ # Decrypts data using the AESV2 algorithim defined in the PDF spec. Requires
10
+ # a decryption key, which is usually generated by PDF::Reader::StandardKeyBuilder
11
+ #
12
+ class AesV2SecurityHandler
13
+
14
+ def initialize(key)
15
+ @encrypt_key = key
16
+ end
17
+
18
+ ##7.6.2 General Encryption Algorithm
19
+ #
20
+ # Algorithm 1: Encryption of data using the AES-128-CBC algorithm
21
+ #
22
+ # version == 4 and CFM == AESV2
23
+ #
24
+ # buf - a string to decrypt
25
+ # ref - a PDF::Reader::Reference for the object to decrypt
26
+ #
27
+ def decrypt( buf, ref )
28
+ objKey = @encrypt_key.dup
29
+ (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
30
+ (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
31
+ objKey << 'sAlT' # Algorithm 1, b)
32
+ length = objKey.length < 16 ? objKey.length : 16
33
+ cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
34
+ cipher.decrypt
35
+ cipher.key = Digest::MD5.digest(objKey)[0,length]
36
+ cipher.iv = buf[0..15]
37
+ cipher.update(buf[16..-1]) + cipher.final
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest'
6
+ require 'openssl'
7
+
8
+ class PDF::Reader
9
+
10
+ # Decrypts data using the AESV3 algorithim defined in the PDF 1.7, Extension Level 3 spec.
11
+ # Requires a decryption key, which is usually generated by PDF::Reader::KeyBuilderV5
12
+ #
13
+ class AesV3SecurityHandler
14
+
15
+ def initialize(key)
16
+ @encrypt_key = key
17
+ @cipher = "AES-256-CBC"
18
+ end
19
+
20
+ ##7.6.2 General Encryption Algorithm
21
+ #
22
+ # Algorithm 1: Encryption of data using the RC4 or AES algorithms
23
+ #
24
+ # used to decrypt RC4/AES encrypted PDF streams (buf)
25
+ #
26
+ # buf - a string to decrypt
27
+ # ref - a PDF::Reader::Reference for the object to decrypt
28
+ #
29
+ def decrypt( buf, ref )
30
+ cipher = OpenSSL::Cipher.new(@cipher)
31
+ cipher.decrypt
32
+ cipher.key = @encrypt_key.dup
33
+ cipher.iv = buf[0..15]
34
+ cipher.update(buf[16..-1]) + cipher.final
35
+ end
36
+
37
+ end
38
+ end
@@ -1,5 +1,5 @@
1
1
  # coding: ASCII-8BIT
2
- # typed: false
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -59,6 +59,9 @@ class PDF::Reader
59
59
  # Allow for this here
60
60
  TRAILING_BYTECOUNT = 5000
61
61
 
62
+ # must match whole tokens
63
+ DIGITS_ONLY = %r{\A\d+\z}
64
+
62
65
  attr_reader :pos
63
66
 
64
67
  # Creates a new buffer.
@@ -143,13 +146,20 @@ class PDF::Reader
143
146
  @io.seek(-TRAILING_BYTECOUNT, IO::SEEK_END) rescue @io.seek(0)
144
147
  data = @io.read(TRAILING_BYTECOUNT)
145
148
 
149
+ raise MalformedPDFError, "PDF does not contain EOF marker" if data.nil?
150
+
146
151
  # the PDF 1.7 spec (section #3.4) says that EOL markers can be either \r, \n, or both.
147
152
  lines = data.split(/[\n\r]+/).reverse
148
153
  eof_index = lines.index { |l| l.strip[/^%%EOF/] }
149
154
 
150
155
  raise MalformedPDFError, "PDF does not contain EOF marker" if eof_index.nil?
151
156
  raise MalformedPDFError, "PDF EOF marker does not follow offset" if eof_index >= lines.size-1
152
- lines[eof_index+1].to_i
157
+ offset = lines[eof_index+1].to_i
158
+
159
+ # a byte offset < 0 doesn't make much sense. This is unlikely to happen, but in theory some
160
+ # corrupted PDFs might have a line that looks like a negative int preceding the `%%EOF`
161
+ raise MalformedPDFError, "invalid xref offset" if offset < 0
162
+ offset
153
163
  end
154
164
 
155
165
  private
@@ -230,13 +240,12 @@ class PDF::Reader
230
240
  return if @tokens.size < 3
231
241
  return if @tokens[2] != "R"
232
242
 
233
- # must match whole tokens
234
- digits_only = %r{\A\d+\z}
235
- if @tokens[0].match(digits_only) && @tokens[1].match(digits_only)
236
- @tokens[0] = PDF::Reader::Reference.new(@tokens[0].to_i, @tokens[1].to_i)
237
- @tokens[1] = nil
238
- @tokens[2] = nil
239
- @tokens.compact!
243
+ token_one = @tokens[0]
244
+ token_two = @tokens[1]
245
+ if token_one.is_a?(String) && token_two.is_a?(String) && token_one.match(DIGITS_ONLY) && token_two.match(DIGITS_ONLY)
246
+ @tokens[0] = PDF::Reader::Reference.new(token_one.to_i, token_two.to_i)
247
+ @tokens.delete_at(2)
248
+ @tokens.delete_at(1)
240
249
  end
241
250
  end
242
251
 
@@ -246,7 +255,7 @@ class PDF::Reader
246
255
  # This is to reduce the chance of accidentally matching an embedded EI
247
256
  def prepare_inline_token
248
257
  idstart = @io.pos
249
- chr = prevchr = nil
258
+ prevchr = ''
250
259
  eisize = 0 # how many chars in the end marker
251
260
  seeking = 'E' # what are we looking for now?
252
261
  loop do
@@ -264,11 +273,11 @@ class PDF::Reader
264
273
  end
265
274
  when 'I'
266
275
  if chr == 'I'
267
- seeking = :END
276
+ seeking = ''
268
277
  else
269
278
  seeking = 'E'
270
279
  end
271
- when :END
280
+ when ''
272
281
  if WHITE_SPACE.include? chr
273
282
  eisize += 1 # Drop trailer
274
283
  break
@@ -276,28 +285,28 @@ class PDF::Reader
276
285
  seeking = 'E'
277
286
  end
278
287
  end
279
- prevchr = chr
288
+ prevchr = chr.is_a?(String) ? chr : ''
280
289
  end
281
- unless seeking == :END
290
+ unless seeking == ''
282
291
  raise MalformedPDFError, "EI terminator not found"
283
292
  end
284
293
  eiend = @io.pos
285
294
  @io.seek(idstart, IO::SEEK_SET)
286
295
  str = @io.read(eiend - eisize - idstart) # get the ID content
287
- @tokens << string_token(str)
296
+ @tokens << str.freeze if str
288
297
  end
289
298
 
290
299
  # if we're currently inside a hex string, read hex nibbles until
291
300
  # we find a closing >
292
301
  #
293
302
  def prepare_hex_token
303
+ finished = :false
294
304
  str = "".dup
295
- finished = false
296
305
 
297
- while !finished
306
+ until finished == :true
298
307
  byte = @io.getbyte
299
308
  if byte.nil?
300
- finished = true # unbalanced params
309
+ finished = :true # unbalanced params
301
310
  elsif (48..57).include?(byte) || (65..90).include?(byte) || (97..122).include?(byte)
302
311
  str << byte
303
312
  elsif byte <= 32
@@ -306,7 +315,7 @@ class PDF::Reader
306
315
  @tokens << str if str.size > 0
307
316
  @tokens << ">" if byte != 0x3E # '>'
308
317
  @tokens << byte.chr
309
- finished = true
318
+ finished = :true
310
319
  end
311
320
  end
312
321
  end
@@ -353,14 +362,17 @@ class PDF::Reader
353
362
  def prepare_regular_token
354
363
  tok = "".dup
355
364
 
356
- while byte = @io.getbyte
365
+ loop do
366
+ byte = @io.getbyte
367
+
357
368
  case byte
369
+ when nil
370
+ break
358
371
  when 0x25
359
372
  # comment, ignore everything until the next EOL char
360
- done = false
361
- while !done
362
- byte = @io.getbyte
363
- done = true if byte.nil? || byte == 0x0A || byte == 0x0D
373
+ loop do
374
+ commentbyte = @io.getbyte
375
+ break if commentbyte.nil? || commentbyte == 0x0A || commentbyte == 0x0D
364
376
  end
365
377
  when *TOKEN_WHITESPACE
366
378
  # white space, token finished
@@ -430,15 +442,5 @@ class PDF::Reader
430
442
  byte
431
443
  end
432
444
 
433
- # for a handful of tokens we want to tell the parser how to convert them
434
- # into higher level tokens. This methods adds a to_token() method
435
- # to tokens that should remain as strings.
436
- #
437
- def string_token(token)
438
- def token.to_token
439
- to_s
440
- end
441
- token
442
- end
443
445
  end
444
446
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: false
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -35,15 +35,15 @@ class PDF::Reader
35
35
  class CMap # :nodoc:
36
36
 
37
37
  CMAP_KEYWORDS = {
38
- "begincodespacerange" => 1,
39
- "endcodespacerange" => 1,
40
- "beginbfchar" => 1,
41
- "endbfchar" => 1,
42
- "beginbfrange" => 1,
43
- "endbfrange" => 1,
44
- "begin" => 1,
45
- "begincmap" => 1,
46
- "def" => 1
38
+ "begincodespacerange" => :noop,
39
+ "endcodespacerange" => :noop,
40
+ "beginbfchar" => :noop,
41
+ "endbfchar" => :noop,
42
+ "beginbfrange" => :noop,
43
+ "endbfrange" => :noop,
44
+ "begin" => :noop,
45
+ "begincmap" => :noop,
46
+ "def" => :noop
47
47
  }
48
48
 
49
49
  attr_reader :map
@@ -53,30 +53,6 @@ class PDF::Reader
53
53
  process_data(data)
54
54
  end
55
55
 
56
- def process_data(data)
57
- parser = build_parser(data)
58
- mode = :none
59
- instructions = []
60
-
61
- while token = parser.parse_token(CMAP_KEYWORDS)
62
- if token == "beginbfchar"
63
- mode = :char
64
- elsif token == "endbfchar"
65
- process_bfchar_instructions(instructions)
66
- instructions = []
67
- mode = :none
68
- elsif token == "beginbfrange"
69
- mode = :range
70
- elsif token == "endbfrange"
71
- process_bfrange_instructions(instructions)
72
- instructions = []
73
- mode = :none
74
- elsif mode == :char || mode == :range
75
- instructions << token
76
- end
77
- end
78
- end
79
-
80
56
  def size
81
57
  @map.size
82
58
  end
@@ -86,13 +62,40 @@ class PDF::Reader
86
62
  # Returns an array of Integers.
87
63
  #
88
64
  def decode(c)
89
- # TODO: implement the conversion
90
- return c unless Integer === c
91
- @map[c]
65
+ @map.fetch(c, [])
92
66
  end
93
67
 
94
68
  private
95
69
 
70
+ def process_data(data, initial_mode = :none)
71
+ parser = build_parser(data)
72
+ mode = initial_mode
73
+ instructions = []
74
+
75
+ while token = parser.parse_token(CMAP_KEYWORDS)
76
+ if token.is_a?(String) || token.is_a?(Array)
77
+ if token == "beginbfchar"
78
+ mode = :char
79
+ elsif token == "endbfchar"
80
+ process_bfchar_instructions(instructions)
81
+ instructions = []
82
+ mode = :none
83
+ elsif token == "beginbfrange"
84
+ mode = :range
85
+ elsif token == "endbfrange"
86
+ process_bfrange_instructions(instructions)
87
+ instructions = []
88
+ mode = :none
89
+ elsif mode == :char
90
+ instructions << token.to_s
91
+ elsif mode == :range
92
+ instructions << token
93
+ end
94
+ end
95
+ end
96
+ end
97
+
98
+
96
99
  def build_parser(instructions)
97
100
  buffer = Buffer.new(StringIO.new(instructions))
98
101
  Parser.new(buffer)
@@ -107,7 +110,6 @@ class PDF::Reader
107
110
  # exception when we try converting broken UTF-16 to UTF-8
108
111
  #
109
112
  def str_to_int(str)
110
- return nil if str.nil? || str.size == 0
111
113
  unpacked_string = if str.bytesize == 1 # UTF-8
112
114
  str.unpack("C*")
113
115
  else # UTF-16
@@ -115,12 +117,15 @@ class PDF::Reader
115
117
  end
116
118
  result = []
117
119
  while unpacked_string.any? do
118
- if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
120
+ if unpacked_string.size >= 2 &&
121
+ unpacked_string.first.to_i > 0xD800 &&
122
+ unpacked_string.first.to_i < 0xDBFF
119
123
  # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
120
124
  # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
121
125
  # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
122
- points = [unpacked_string.shift, unpacked_string.shift]
123
- result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
126
+ point_one = unpacked_string.shift.to_i
127
+ point_two = unpacked_string.shift.to_i
128
+ result << (point_one - 0xD800) * 0x400 + (point_two - 0xDC00) + 0x10000
124
129
  else
125
130
  result << unpacked_string.shift
126
131
  end
@@ -130,9 +135,11 @@ class PDF::Reader
130
135
 
131
136
  def process_bfchar_instructions(instructions)
132
137
  instructions.each_slice(2) do |one, two|
133
- find = str_to_int(one)
134
- replace = str_to_int(two)
135
- @map[find.first] = replace
138
+ find = str_to_int(one.to_s)
139
+ replace = str_to_int(two.to_s)
140
+ if find.any? && replace.any?
141
+ @map[find.first.to_i] = replace
142
+ end
136
143
  end
137
144
  end
138
145
 
@@ -143,30 +150,36 @@ class PDF::Reader
143
150
  elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array)
144
151
  bfrange_type_two(start, finish, to)
145
152
  else
146
- raise "invalid bfrange section"
153
+ raise MalformedPDFError, "invalid bfrange section"
147
154
  end
148
155
  end
149
156
  end
150
157
 
151
158
  def bfrange_type_one(start_code, end_code, dst)
152
- start_code = str_to_int(start_code)[0]
153
- end_code = str_to_int(end_code)[0]
159
+ start_code = str_to_int(start_code).first
160
+ end_code = str_to_int(end_code).first
154
161
  dst = str_to_int(dst)
155
162
 
163
+ return if start_code.nil? || end_code.nil?
164
+
156
165
  # add all values in the range to our mapping
157
166
  (start_code..end_code).each_with_index do |val, idx|
158
- @map[val] = dst.length == 1 ? [dst[0] + idx] : [dst[0], dst[1] + 1]
167
+ @map[val] = dst.length == 1 ? [dst[0].to_i + idx] : [dst[0].to_i, dst[1].to_i + 1]
159
168
  end
160
169
  end
161
170
 
162
171
  def bfrange_type_two(start_code, end_code, dst)
163
- start_code = str_to_int(start_code)[0]
164
- end_code = str_to_int(end_code)[0]
172
+ start_code = str_to_int(start_code).first
173
+ end_code = str_to_int(end_code).first
174
+
175
+ return if start_code.nil? || end_code.nil?
176
+
165
177
  from_range = (start_code..end_code)
166
178
 
167
179
  # add all values in the range to our mapping
168
180
  from_range.each_with_index do |val, idx|
169
- @map[val] = str_to_int(dst[idx])
181
+ dst_char = dst[idx]
182
+ @map[val.to_i] = str_to_int(dst_char) if dst_char
170
183
  end
171
184
  end
172
185
  end
@@ -51,9 +51,17 @@ class PDF::Reader
51
51
  raise ArgumentError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
52
52
  end
53
53
  ################################################################################
54
+ def self.validate_type_as_malformed(object, name, klass)
55
+ raise MalformedPDFError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
56
+ end
57
+ ################################################################################
54
58
  def self.validate_not_nil(object, name)
55
59
  raise ArgumentError, "#{object} must not be nil" if object.nil?
56
60
  end
61
+ ################################################################################
62
+ def self.validate_not_nil_as_malformed(object, name)
63
+ raise MalformedPDFError, "#{object} must not be nil" if object.nil?
64
+ end
57
65
  end
58
66
 
59
67
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: false
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'ascii85'
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  #
@@ -9,7 +9,7 @@ class PDF::Reader
9
9
  class Depredict
10
10
 
11
11
  def initialize(options = {})
12
- @options = options || {}
12
+ @options = options
13
13
  end
14
14
 
15
15
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
 
@@ -34,7 +34,7 @@ class PDF::Reader
34
34
  def zlib_inflate(data)
35
35
  begin
36
36
  return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
37
- rescue Zlib::DataError
37
+ rescue Zlib::Error
38
38
  # by default, Ruby's Zlib assumes the data it's inflating
39
39
  # is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
40
40
  # fails, swallow the exception and attempt to inflate the data as a raw
@@ -43,7 +43,7 @@ class PDF::Reader
43
43
 
44
44
  begin
45
45
  return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
46
- rescue StandardError
46
+ rescue Zlib::Error
47
47
  # swallow this one too, so we can try some other fallback options
48
48
  end
49
49
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  #
@@ -1,8 +1,7 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
- #
6
5
  class PDF::Reader
7
6
  module Filter # :nodoc:
8
7
  # implementation of the null stream filter
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  #
@@ -42,7 +42,7 @@ class PDF::Reader
42
42
  # returned untouched. At this stage PDF::Reader has no need to decode images.
43
43
  #
44
44
  def self.with(name, options = {})
45
- case name.to_sym
45
+ case name
46
46
  when :ASCII85Decode then PDF::Reader::Filter::Ascii85.new(options)
47
47
  when :ASCIIHexDecode then PDF::Reader::Filter::AsciiHex.new(options)
48
48
  when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
@@ -149,27 +149,37 @@ class PDF::Reader
149
149
  end
150
150
  end
151
151
 
152
- def extract_base_info(obj)
153
- @subtype = @ohash.object(obj[:Subtype])
154
- @basefont = @ohash.object(obj[:BaseFont])
155
- if @ohash.object(obj[:Encoding])
156
- @encoding = PDF::Reader::Encoding.new(@ohash.object(obj[:Encoding]))
152
+ def build_encoding(obj)
153
+ if obj[:Encoding].is_a?(Symbol)
154
+ # one of the standard encodings, referenced by name
155
+ # TODO pass in a standard shape, always a Hash
156
+ PDF::Reader::Encoding.new(obj[:Encoding])
157
+ elsif obj[:Encoding].is_a?(Hash) || obj[:Encoding].is_a?(PDF::Reader::Stream)
158
+ PDF::Reader::Encoding.new(obj[:Encoding])
159
+ elsif obj[:Encoding].nil?
160
+ default_encoding(@basefont)
157
161
  else
158
- @encoding = default_encoding(@basefont)
162
+ raise MalformedPDFError, "Unexpected type for Encoding (#{obj[:Encoding].class})"
159
163
  end
160
- @widths = @ohash.object(obj[:Widths]) || []
161
- @first_char = @ohash.object(obj[:FirstChar])
162
- @last_char = @ohash.object(obj[:LastChar])
164
+ end
165
+
166
+ def extract_base_info(obj)
167
+ @subtype = @ohash.deref_name(obj[:Subtype])
168
+ @basefont = @ohash.deref_name(obj[:BaseFont])
169
+ @encoding = build_encoding(obj)
170
+ @widths = @ohash.deref_array_of_numbers(obj[:Widths]) || []
171
+ @first_char = @ohash.deref_integer(obj[:FirstChar])
172
+ @last_char = @ohash.deref_integer(obj[:LastChar])
163
173
 
164
174
  # CID Fonts are not required to have a W or DW entry, if they don't exist,
165
175
  # the default cid width = 1000, see Section 9.7.4.1 PDF 32000-1:2008 pp 269
166
- @cid_widths = @ohash.object(obj[:W]) || []
167
- @cid_default_width = @ohash.object(obj[:DW]) || 1000
176
+ @cid_widths = @ohash.deref_array(obj[:W]) || []
177
+ @cid_default_width = @ohash.deref_number(obj[:DW]) || 1000
168
178
 
169
179
  if obj[:ToUnicode]
170
180
  # ToUnicode is optional for Type1 and Type3
171
- stream = @ohash.object(obj[:ToUnicode])
172
- if stream.is_a?(PDF::Reader::Stream)
181
+ stream = @ohash.deref_stream(obj[:ToUnicode])
182
+ if stream
173
183
  @tounicode = PDF::Reader::CMap.new(stream.unfiltered_data)
174
184
  end
175
185
  end
@@ -177,7 +187,9 @@ class PDF::Reader
177
187
 
178
188
  def extract_type3_info(obj)
179
189
  if @subtype == :Type3
180
- @font_matrix = @ohash.object(obj[:FontMatrix]) || [ 0.001, 0, 0, 0.001, 0, 0 ]
190
+ @font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
191
+ 0.001, 0, 0, 0.001, 0, 0
192
+ ]
181
193
  end
182
194
  end
183
195
 
@@ -185,7 +197,7 @@ class PDF::Reader
185
197
  if obj[:FontDescriptor]
186
198
  # create a font descriptor object if we can, in other words, unless this is
187
199
  # a CID Font
188
- fd = @ohash.object(obj[:FontDescriptor])
200
+ fd = @ohash.deref_hash(obj[:FontDescriptor])
189
201
  @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
190
202
  else
191
203
  @font_descriptor = nil
@@ -197,9 +209,9 @@ class PDF::Reader
197
209
  # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
198
210
  # A one-element array specifying the CIDFont dictionary that is the
199
211
  # descendant of this Type 0 font.
200
- descendants = @ohash.object(obj[:DescendantFonts])
212
+ descendants = @ohash.deref_array(obj[:DescendantFonts])
201
213
  @descendantfonts = descendants.map { |desc|
202
- PDF::Reader::Font.new(@ohash, @ohash.object(desc))
214
+ PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
203
215
  }
204
216
  end
205
217