pdf-reader 2.7.0 → 2.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +20 -0
  3. data/Rakefile +1 -1
  4. data/lib/pdf/reader/aes_v2_security_handler.rb +41 -0
  5. data/lib/pdf/reader/aes_v3_security_handler.rb +38 -0
  6. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +16 -0
  7. data/lib/pdf/reader/buffer.rb +36 -34
  8. data/lib/pdf/reader/cmap.rb +64 -51
  9. data/lib/pdf/reader/error.rb +8 -0
  10. data/lib/pdf/reader/filter/ascii85.rb +1 -1
  11. data/lib/pdf/reader/filter/ascii_hex.rb +1 -1
  12. data/lib/pdf/reader/filter/depredict.rb +1 -1
  13. data/lib/pdf/reader/filter/flate.rb +3 -3
  14. data/lib/pdf/reader/filter/lzw.rb +1 -1
  15. data/lib/pdf/reader/filter/null.rb +1 -2
  16. data/lib/pdf/reader/filter/run_length.rb +1 -1
  17. data/lib/pdf/reader/filter.rb +10 -11
  18. data/lib/pdf/reader/font.rb +71 -16
  19. data/lib/pdf/reader/font_descriptor.rb +18 -17
  20. data/lib/pdf/reader/form_xobject.rb +14 -5
  21. data/lib/pdf/reader/key_builder_v5.rb +138 -0
  22. data/lib/pdf/reader/null_security_handler.rb +0 -4
  23. data/lib/pdf/reader/object_hash.rb +251 -44
  24. data/lib/pdf/reader/page.rb +51 -22
  25. data/lib/pdf/reader/page_layout.rb +14 -28
  26. data/lib/pdf/reader/page_state.rb +1 -1
  27. data/lib/pdf/reader/page_text_receiver.rb +52 -10
  28. data/lib/pdf/reader/parser.rb +22 -7
  29. data/lib/pdf/reader/point.rb +1 -1
  30. data/lib/pdf/reader/rc4_security_handler.rb +38 -0
  31. data/lib/pdf/reader/rectangle.rb +20 -2
  32. data/lib/pdf/reader/{resource_methods.rb → resources.rb} +15 -13
  33. data/lib/pdf/reader/security_handler_factory.rb +79 -0
  34. data/lib/pdf/reader/{standard_security_handler.rb → standard_key_builder.rb} +23 -95
  35. data/lib/pdf/reader/stream.rb +2 -2
  36. data/lib/pdf/reader/text_run.rb +13 -6
  37. data/lib/pdf/reader/type_check.rb +52 -0
  38. data/lib/pdf/reader/validating_receiver.rb +262 -0
  39. data/lib/pdf/reader/width_calculator/true_type.rb +1 -1
  40. data/lib/pdf/reader/xref.rb +20 -3
  41. data/lib/pdf/reader.rb +32 -11
  42. data/rbi/pdf-reader.rbi +408 -174
  43. metadata +16 -9
  44. data/lib/pdf/reader/standard_security_handler_v5.rb +0 -92
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5ee0d8c3c55f6a0aebb60a0a6dce92428e8371b96a6beb6d75bfe90602bffae7
4
- data.tar.gz: '0911d108353bf577aa9fd7b49b97dda1cf9d54816bf8ff6c4225281eeda63229'
3
+ metadata.gz: cc98ab07b3c66f13f663ea5faf8132b45d769912e0da737917dd054e38318ede
4
+ data.tar.gz: 0f2928d9778b5b3ea8fca5e723a2b3fa6f275df70b02f1eb4385e077c535ac78
5
5
  SHA512:
6
- metadata.gz: 917db2b1fb977b41e7b057ff3d215b8f249577254d9fe3df72f330b32ff49630874c58f480495ddcd137d9f31d014083438623cdf7260b0d7a87bbe3a5f3685a
7
- data.tar.gz: cd9832f025264e54d586e81eff69727379e8646d741f53ae61e90a5b38945d852147853891d468bab683581bdd0beb68a9b7c7f5e54e064e9a3935262ea9d651
6
+ metadata.gz: 210cd8c8cef93b0e0fac1446c091c2a62772ffe8b1786627089e5a330ca7defd501df7cccc0b48d326d38ff74318b162e512220e8a4460260bebe7da0ef8b757
7
+ data.tar.gz: 047e7f6641411557b1d3b50035dbdf55647c63deede273b6ce4442230b85372045494b81e88c1ffcaa09a7c5ea26823ee33b33c3bf82013328d0e32a95021284
data/CHANGELOG CHANGED
@@ -1,3 +1,23 @@
1
+ v2.9.2 (20th February 2022)
2
+ - Fix PDF::Reader::ObjectHash#page_references to return an Array of PDF::Reader::Reference (http://github.com/yob/pdf-reader/pull/444)
3
+
4
+ v2.9.1 (4th February 2022)
5
+ - Fix exception in Page#walk introduced in 2.9.0 (http://github.com/yob/pdf-reader/pull/442)
6
+ - Other small bug fixes
7
+
8
+ v2.9.0 (24th January 2022)
9
+ - Support additional encryption standards (http://github.com/yob/pdf-reader/pull/419)
10
+ - Return CropBox correctly from Page#rectangles (https://github.com/yob/pdf-reader/pull/420)
11
+ - For sorbet users, additional type annotations are included in the gem
12
+
13
+ v2.8.0 (28th Decemeber 2021)
14
+ - Add PDF::Reader::Page#runs for extracting text from a page with positioning metadata (http://github.com/yob/pdf-reader/pull/411)
15
+ - Add options to PDF::Reader::Page#text to make some behaviour configurable (http://github.com/yob/pdf-reader/pull/411)
16
+ - including extracting the text for only part of the page
17
+ - Improve text positioning and extraction for Type3 fonts (http://github.com/yob/pdf-reader/pull/412)
18
+ - Skip extracting text that is positioned outside the page (http://github.com/yob/pdf-reader/pull/413)
19
+ - Fix occasional crash when reading some streams (http://github.com/yob/pdf-reader/pull/405)
20
+
1
21
  v2.7.0 (13th December 2021)
2
22
  - Include RBI type files in the gem
3
23
  - Downstream users of pdf-reader who also use sorbet *should* find many parts of the API will
data/Rakefile CHANGED
@@ -14,7 +14,7 @@ desc "Run cane to check quality metrics"
14
14
  Cane::RakeTask.new(:quality) do |cane|
15
15
  cane.abc_max = 20
16
16
  cane.style_measure = 100
17
- cane.max_violations = 32
17
+ cane.max_violations = 28
18
18
 
19
19
  cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
20
20
  end
@@ -0,0 +1,41 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest/md5'
6
+
7
+ class PDF::Reader
8
+
9
+ # Decrypts data using the AESV2 algorithim defined in the PDF spec. Requires
10
+ # a decryption key, which is usually generated by PDF::Reader::StandardKeyBuilder
11
+ #
12
+ class AesV2SecurityHandler
13
+
14
+ def initialize(key)
15
+ @encrypt_key = key
16
+ end
17
+
18
+ ##7.6.2 General Encryption Algorithm
19
+ #
20
+ # Algorithm 1: Encryption of data using the AES-128-CBC algorithm
21
+ #
22
+ # version == 4 and CFM == AESV2
23
+ #
24
+ # buf - a string to decrypt
25
+ # ref - a PDF::Reader::Reference for the object to decrypt
26
+ #
27
+ def decrypt( buf, ref )
28
+ objKey = @encrypt_key.dup
29
+ (0..2).each { |e| objKey << (ref.id >> e*8 & 0xFF ) }
30
+ (0..1).each { |e| objKey << (ref.gen >> e*8 & 0xFF ) }
31
+ objKey << 'sAlT' # Algorithm 1, b)
32
+ length = objKey.length < 16 ? objKey.length : 16
33
+ cipher = OpenSSL::Cipher.new("AES-#{length << 3}-CBC")
34
+ cipher.decrypt
35
+ cipher.key = Digest::MD5.digest(objKey)[0,length]
36
+ cipher.iv = buf[0..15]
37
+ cipher.update(buf[16..-1]) + cipher.final
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ require 'digest'
6
+ require 'openssl'
7
+
8
+ class PDF::Reader
9
+
10
+ # Decrypts data using the AESV3 algorithim defined in the PDF 1.7, Extension Level 3 spec.
11
+ # Requires a decryption key, which is usually generated by PDF::Reader::KeyBuilderV5
12
+ #
13
+ class AesV3SecurityHandler
14
+
15
+ def initialize(key)
16
+ @encrypt_key = key
17
+ @cipher = "AES-256-CBC"
18
+ end
19
+
20
+ ##7.6.2 General Encryption Algorithm
21
+ #
22
+ # Algorithm 1: Encryption of data using the RC4 or AES algorithms
23
+ #
24
+ # used to decrypt RC4/AES encrypted PDF streams (buf)
25
+ #
26
+ # buf - a string to decrypt
27
+ # ref - a PDF::Reader::Reference for the object to decrypt
28
+ #
29
+ def decrypt( buf, ref )
30
+ cipher = OpenSSL::Cipher.new(@cipher)
31
+ cipher.decrypt
32
+ cipher.key = @encrypt_key.dup
33
+ cipher.iv = buf[0..15]
34
+ cipher.update(buf[16..-1]) + cipher.final
35
+ end
36
+
37
+ end
38
+ end
@@ -0,0 +1,16 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ class PDF::Reader
6
+
7
+ # Filter our text/characters that are positioned outside a rectangle. Usually the page
8
+ # MediaBox or CropBox, but could be a user specified rectangle too
9
+ class BoundingRectangleRunsFilter
10
+
11
+ def self.runs_within_rect(runs, rect)
12
+ runs.select { |run| rect.contains?(run.origin) }
13
+ end
14
+ end
15
+ end
16
+
@@ -1,5 +1,5 @@
1
1
  # coding: ASCII-8BIT
2
- # typed: false
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -59,6 +59,9 @@ class PDF::Reader
59
59
  # Allow for this here
60
60
  TRAILING_BYTECOUNT = 5000
61
61
 
62
+ # must match whole tokens
63
+ DIGITS_ONLY = %r{\A\d+\z}
64
+
62
65
  attr_reader :pos
63
66
 
64
67
  # Creates a new buffer.
@@ -143,13 +146,20 @@ class PDF::Reader
143
146
  @io.seek(-TRAILING_BYTECOUNT, IO::SEEK_END) rescue @io.seek(0)
144
147
  data = @io.read(TRAILING_BYTECOUNT)
145
148
 
149
+ raise MalformedPDFError, "PDF does not contain EOF marker" if data.nil?
150
+
146
151
  # the PDF 1.7 spec (section #3.4) says that EOL markers can be either \r, \n, or both.
147
152
  lines = data.split(/[\n\r]+/).reverse
148
153
  eof_index = lines.index { |l| l.strip[/^%%EOF/] }
149
154
 
150
155
  raise MalformedPDFError, "PDF does not contain EOF marker" if eof_index.nil?
151
156
  raise MalformedPDFError, "PDF EOF marker does not follow offset" if eof_index >= lines.size-1
152
- lines[eof_index+1].to_i
157
+ offset = lines[eof_index+1].to_i
158
+
159
+ # a byte offset < 0 doesn't make much sense. This is unlikely to happen, but in theory some
160
+ # corrupted PDFs might have a line that looks like a negative int preceding the `%%EOF`
161
+ raise MalformedPDFError, "invalid xref offset" if offset < 0
162
+ offset
153
163
  end
154
164
 
155
165
  private
@@ -230,13 +240,12 @@ class PDF::Reader
230
240
  return if @tokens.size < 3
231
241
  return if @tokens[2] != "R"
232
242
 
233
- # must match whole tokens
234
- digits_only = %r{\A\d+\z}
235
- if @tokens[0].match(digits_only) && @tokens[1].match(digits_only)
236
- @tokens[0] = PDF::Reader::Reference.new(@tokens[0].to_i, @tokens[1].to_i)
237
- @tokens[1] = nil
238
- @tokens[2] = nil
239
- @tokens.compact!
243
+ token_one = @tokens[0]
244
+ token_two = @tokens[1]
245
+ if token_one.is_a?(String) && token_two.is_a?(String) && token_one.match(DIGITS_ONLY) && token_two.match(DIGITS_ONLY)
246
+ @tokens[0] = PDF::Reader::Reference.new(token_one.to_i, token_two.to_i)
247
+ @tokens.delete_at(2)
248
+ @tokens.delete_at(1)
240
249
  end
241
250
  end
242
251
 
@@ -246,7 +255,7 @@ class PDF::Reader
246
255
  # This is to reduce the chance of accidentally matching an embedded EI
247
256
  def prepare_inline_token
248
257
  idstart = @io.pos
249
- chr = prevchr = nil
258
+ prevchr = ''
250
259
  eisize = 0 # how many chars in the end marker
251
260
  seeking = 'E' # what are we looking for now?
252
261
  loop do
@@ -264,11 +273,11 @@ class PDF::Reader
264
273
  end
265
274
  when 'I'
266
275
  if chr == 'I'
267
- seeking = :END
276
+ seeking = ''
268
277
  else
269
278
  seeking = 'E'
270
279
  end
271
- when :END
280
+ when ''
272
281
  if WHITE_SPACE.include? chr
273
282
  eisize += 1 # Drop trailer
274
283
  break
@@ -276,28 +285,28 @@ class PDF::Reader
276
285
  seeking = 'E'
277
286
  end
278
287
  end
279
- prevchr = chr
288
+ prevchr = chr.is_a?(String) ? chr : ''
280
289
  end
281
- unless seeking == :END
290
+ unless seeking == ''
282
291
  raise MalformedPDFError, "EI terminator not found"
283
292
  end
284
293
  eiend = @io.pos
285
294
  @io.seek(idstart, IO::SEEK_SET)
286
295
  str = @io.read(eiend - eisize - idstart) # get the ID content
287
- @tokens << string_token(str)
296
+ @tokens << str.freeze if str
288
297
  end
289
298
 
290
299
  # if we're currently inside a hex string, read hex nibbles until
291
300
  # we find a closing >
292
301
  #
293
302
  def prepare_hex_token
303
+ finished = :false
294
304
  str = "".dup
295
- finished = false
296
305
 
297
- while !finished
306
+ until finished == :true
298
307
  byte = @io.getbyte
299
308
  if byte.nil?
300
- finished = true # unbalanced params
309
+ finished = :true # unbalanced params
301
310
  elsif (48..57).include?(byte) || (65..90).include?(byte) || (97..122).include?(byte)
302
311
  str << byte
303
312
  elsif byte <= 32
@@ -306,7 +315,7 @@ class PDF::Reader
306
315
  @tokens << str if str.size > 0
307
316
  @tokens << ">" if byte != 0x3E # '>'
308
317
  @tokens << byte.chr
309
- finished = true
318
+ finished = :true
310
319
  end
311
320
  end
312
321
  end
@@ -353,14 +362,17 @@ class PDF::Reader
353
362
  def prepare_regular_token
354
363
  tok = "".dup
355
364
 
356
- while byte = @io.getbyte
365
+ loop do
366
+ byte = @io.getbyte
367
+
357
368
  case byte
369
+ when nil
370
+ break
358
371
  when 0x25
359
372
  # comment, ignore everything until the next EOL char
360
- done = false
361
- while !done
362
- byte = @io.getbyte
363
- done = true if byte.nil? || byte == 0x0A || byte == 0x0D
373
+ loop do
374
+ commentbyte = @io.getbyte
375
+ break if commentbyte.nil? || commentbyte == 0x0A || commentbyte == 0x0D
364
376
  end
365
377
  when *TOKEN_WHITESPACE
366
378
  # white space, token finished
@@ -430,15 +442,5 @@ class PDF::Reader
430
442
  byte
431
443
  end
432
444
 
433
- # for a handful of tokens we want to tell the parser how to convert them
434
- # into higher level tokens. This methods adds a to_token() method
435
- # to tokens that should remain as strings.
436
- #
437
- def string_token(token)
438
- def token.to_token
439
- to_s
440
- end
441
- token
442
- end
443
445
  end
444
446
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: false
2
+ # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -35,15 +35,15 @@ class PDF::Reader
35
35
  class CMap # :nodoc:
36
36
 
37
37
  CMAP_KEYWORDS = {
38
- "begincodespacerange" => 1,
39
- "endcodespacerange" => 1,
40
- "beginbfchar" => 1,
41
- "endbfchar" => 1,
42
- "beginbfrange" => 1,
43
- "endbfrange" => 1,
44
- "begin" => 1,
45
- "begincmap" => 1,
46
- "def" => 1
38
+ "begincodespacerange" => :noop,
39
+ "endcodespacerange" => :noop,
40
+ "beginbfchar" => :noop,
41
+ "endbfchar" => :noop,
42
+ "beginbfrange" => :noop,
43
+ "endbfrange" => :noop,
44
+ "begin" => :noop,
45
+ "begincmap" => :noop,
46
+ "def" => :noop
47
47
  }
48
48
 
49
49
  attr_reader :map
@@ -53,30 +53,6 @@ class PDF::Reader
53
53
  process_data(data)
54
54
  end
55
55
 
56
- def process_data(data)
57
- parser = build_parser(data)
58
- mode = :none
59
- instructions = []
60
-
61
- while token = parser.parse_token(CMAP_KEYWORDS)
62
- if token == "beginbfchar"
63
- mode = :char
64
- elsif token == "endbfchar"
65
- process_bfchar_instructions(instructions)
66
- instructions = []
67
- mode = :none
68
- elsif token == "beginbfrange"
69
- mode = :range
70
- elsif token == "endbfrange"
71
- process_bfrange_instructions(instructions)
72
- instructions = []
73
- mode = :none
74
- elsif mode == :char || mode == :range
75
- instructions << token
76
- end
77
- end
78
- end
79
-
80
56
  def size
81
57
  @map.size
82
58
  end
@@ -86,13 +62,40 @@ class PDF::Reader
86
62
  # Returns an array of Integers.
87
63
  #
88
64
  def decode(c)
89
- # TODO: implement the conversion
90
- return c unless Integer === c
91
- @map[c]
65
+ @map.fetch(c, [])
92
66
  end
93
67
 
94
68
  private
95
69
 
70
+ def process_data(data, initial_mode = :none)
71
+ parser = build_parser(data)
72
+ mode = initial_mode
73
+ instructions = []
74
+
75
+ while token = parser.parse_token(CMAP_KEYWORDS)
76
+ if token.is_a?(String) || token.is_a?(Array)
77
+ if token == "beginbfchar"
78
+ mode = :char
79
+ elsif token == "endbfchar"
80
+ process_bfchar_instructions(instructions)
81
+ instructions = []
82
+ mode = :none
83
+ elsif token == "beginbfrange"
84
+ mode = :range
85
+ elsif token == "endbfrange"
86
+ process_bfrange_instructions(instructions)
87
+ instructions = []
88
+ mode = :none
89
+ elsif mode == :char
90
+ instructions << token.to_s
91
+ elsif mode == :range
92
+ instructions << token
93
+ end
94
+ end
95
+ end
96
+ end
97
+
98
+
96
99
  def build_parser(instructions)
97
100
  buffer = Buffer.new(StringIO.new(instructions))
98
101
  Parser.new(buffer)
@@ -107,7 +110,6 @@ class PDF::Reader
107
110
  # exception when we try converting broken UTF-16 to UTF-8
108
111
  #
109
112
  def str_to_int(str)
110
- return nil if str.nil? || str.size == 0
111
113
  unpacked_string = if str.bytesize == 1 # UTF-8
112
114
  str.unpack("C*")
113
115
  else # UTF-16
@@ -115,12 +117,15 @@ class PDF::Reader
115
117
  end
116
118
  result = []
117
119
  while unpacked_string.any? do
118
- if unpacked_string.size >= 2 && unpacked_string[0] > 0xD800 && unpacked_string[0] < 0xDBFF
120
+ if unpacked_string.size >= 2 &&
121
+ unpacked_string.first.to_i > 0xD800 &&
122
+ unpacked_string.first.to_i < 0xDBFF
119
123
  # this is a Unicode UTF-16 "Surrogate Pair" see Unicode Spec. Chapter 3.7
120
124
  # lets convert to a UTF-32. (the high bit is between 0xD800-0xDBFF, the
121
125
  # low bit is between 0xDC00-0xDFFF) for example: U+1D44E (U+D835 U+DC4E)
122
- points = [unpacked_string.shift, unpacked_string.shift]
123
- result << (points[0] - 0xD800) * 0x400 + (points[1] - 0xDC00) + 0x10000
126
+ point_one = unpacked_string.shift.to_i
127
+ point_two = unpacked_string.shift.to_i
128
+ result << (point_one - 0xD800) * 0x400 + (point_two - 0xDC00) + 0x10000
124
129
  else
125
130
  result << unpacked_string.shift
126
131
  end
@@ -130,9 +135,11 @@ class PDF::Reader
130
135
 
131
136
  def process_bfchar_instructions(instructions)
132
137
  instructions.each_slice(2) do |one, two|
133
- find = str_to_int(one)
134
- replace = str_to_int(two)
135
- @map[find.first] = replace
138
+ find = str_to_int(one.to_s)
139
+ replace = str_to_int(two.to_s)
140
+ if find.any? && replace.any?
141
+ @map[find.first.to_i] = replace
142
+ end
136
143
  end
137
144
  end
138
145
 
@@ -143,30 +150,36 @@ class PDF::Reader
143
150
  elsif start.kind_of?(String) && finish.kind_of?(String) && to.kind_of?(Array)
144
151
  bfrange_type_two(start, finish, to)
145
152
  else
146
- raise "invalid bfrange section"
153
+ raise MalformedPDFError, "invalid bfrange section"
147
154
  end
148
155
  end
149
156
  end
150
157
 
151
158
  def bfrange_type_one(start_code, end_code, dst)
152
- start_code = str_to_int(start_code)[0]
153
- end_code = str_to_int(end_code)[0]
159
+ start_code = str_to_int(start_code).first
160
+ end_code = str_to_int(end_code).first
154
161
  dst = str_to_int(dst)
155
162
 
163
+ return if start_code.nil? || end_code.nil?
164
+
156
165
  # add all values in the range to our mapping
157
166
  (start_code..end_code).each_with_index do |val, idx|
158
- @map[val] = dst.length == 1 ? [dst[0] + idx] : [dst[0], dst[1] + 1]
167
+ @map[val] = dst.length == 1 ? [dst[0].to_i + idx] : [dst[0].to_i, dst[1].to_i + 1]
159
168
  end
160
169
  end
161
170
 
162
171
  def bfrange_type_two(start_code, end_code, dst)
163
- start_code = str_to_int(start_code)[0]
164
- end_code = str_to_int(end_code)[0]
172
+ start_code = str_to_int(start_code).first
173
+ end_code = str_to_int(end_code).first
174
+
175
+ return if start_code.nil? || end_code.nil?
176
+
165
177
  from_range = (start_code..end_code)
166
178
 
167
179
  # add all values in the range to our mapping
168
180
  from_range.each_with_index do |val, idx|
169
- @map[val] = str_to_int(dst[idx])
181
+ dst_char = dst[idx]
182
+ @map[val.to_i] = str_to_int(dst_char) if dst_char
170
183
  end
171
184
  end
172
185
  end
@@ -51,9 +51,17 @@ class PDF::Reader
51
51
  raise ArgumentError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
52
52
  end
53
53
  ################################################################################
54
+ def self.validate_type_as_malformed(object, name, klass)
55
+ raise MalformedPDFError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
56
+ end
57
+ ################################################################################
54
58
  def self.validate_not_nil(object, name)
55
59
  raise ArgumentError, "#{object} must not be nil" if object.nil?
56
60
  end
61
+ ################################################################################
62
+ def self.validate_not_nil_as_malformed(object, name)
63
+ raise MalformedPDFError, "#{object} must not be nil" if object.nil?
64
+ end
57
65
  end
58
66
 
59
67
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: false
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'ascii85'
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  #
@@ -9,7 +9,7 @@ class PDF::Reader
9
9
  class Depredict
10
10
 
11
11
  def initialize(options = {})
12
- @options = options || {}
12
+ @options = options
13
13
  end
14
14
 
15
15
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
 
@@ -34,7 +34,7 @@ class PDF::Reader
34
34
  def zlib_inflate(data)
35
35
  begin
36
36
  return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
37
- rescue Zlib::DataError
37
+ rescue Zlib::Error
38
38
  # by default, Ruby's Zlib assumes the data it's inflating
39
39
  # is RFC1951 deflated data, wrapped in a RFC1950 zlib container. If that
40
40
  # fails, swallow the exception and attempt to inflate the data as a raw
@@ -43,7 +43,7 @@ class PDF::Reader
43
43
 
44
44
  begin
45
45
  return Zlib::Inflate.new(ZLIB_RAW_DEFLATE).inflate(data)
46
- rescue StandardError
46
+ rescue Zlib::Error
47
47
  # swallow this one too, so we can try some other fallback options
48
48
  end
49
49
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  #
@@ -1,8 +1,7 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
- #
6
5
  class PDF::Reader
7
6
  module Filter # :nodoc:
8
7
  # implementation of the null stream filter
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  #
@@ -42,17 +42,16 @@ class PDF::Reader
42
42
  # returned untouched. At this stage PDF::Reader has no need to decode images.
43
43
  #
44
44
  def self.with(name, options = {})
45
- case name.to_sym
46
- when :ASCII85Decode then PDF::Reader::Filter::Ascii85.new(options)
47
- when :ASCIIHexDecode then PDF::Reader::Filter::AsciiHex.new(options)
48
- when :CCITTFaxDecode then PDF::Reader::Filter::Null.new(options)
49
- when :DCTDecode then PDF::Reader::Filter::Null.new(options)
50
- when :FlateDecode then PDF::Reader::Filter::Flate.new(options)
51
- when :Fl then PDF::Reader::Filter::Flate.new(options)
52
- when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
53
- when :JPXDecode then PDF::Reader::Filter::Null.new(options)
54
- when :LZWDecode then PDF::Reader::Filter::Lzw.new(options)
55
- when :RunLengthDecode then PDF::Reader::Filter::RunLength.new(options)
45
+ case name
46
+ when :ASCII85Decode, :A85 then PDF::Reader::Filter::Ascii85.new(options)
47
+ when :ASCIIHexDecode, :AHx then PDF::Reader::Filter::AsciiHex.new(options)
48
+ when :CCITTFaxDecode, :CCF then PDF::Reader::Filter::Null.new(options)
49
+ when :DCTDecode, :DCT then PDF::Reader::Filter::Null.new(options)
50
+ when :FlateDecode, :Fl then PDF::Reader::Filter::Flate.new(options)
51
+ when :JBIG2Decode then PDF::Reader::Filter::Null.new(options)
52
+ when :JPXDecode then PDF::Reader::Filter::Null.new(options)
53
+ when :LZWDecode, :LZW then PDF::Reader::Filter::Lzw.new(options)
54
+ when :RunLengthDecode, :RL then PDF::Reader::Filter::RunLength.new(options)
56
55
  else
57
56
  raise UnsupportedFeatureError, "Unknown filter: #{name}"
58
57
  end