pdf-reader 2.9.2 → 2.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +8 -0
  3. data/Rakefile +1 -1
  4. data/lib/pdf/reader/buffer.rb +1 -1
  5. data/lib/pdf/reader/cid_widths.rb +7 -5
  6. data/lib/pdf/reader/cmap.rb +1 -1
  7. data/lib/pdf/reader/encoding.rb +4 -4
  8. data/lib/pdf/reader/error.rb +0 -4
  9. data/lib/pdf/reader/filter/depredict.rb +2 -2
  10. data/lib/pdf/reader/font.rb +10 -11
  11. data/lib/pdf/reader/font_descriptor.rb +3 -1
  12. data/lib/pdf/reader/form_xobject.rb +4 -1
  13. data/lib/pdf/reader/glyph_hash.rb +13 -5
  14. data/lib/pdf/reader/lzw.rb +25 -10
  15. data/lib/pdf/reader/no_text_filter.rb +14 -0
  16. data/lib/pdf/reader/object_hash.rb +15 -9
  17. data/lib/pdf/reader/object_stream.rb +3 -3
  18. data/lib/pdf/reader/overlapping_runs_filter.rb +1 -1
  19. data/lib/pdf/reader/page.rb +26 -7
  20. data/lib/pdf/reader/page_layout.rb +1 -1
  21. data/lib/pdf/reader/page_state.rb +7 -1
  22. data/lib/pdf/reader/page_text_receiver.rb +2 -0
  23. data/lib/pdf/reader/pages_strategy.rb +1 -1
  24. data/lib/pdf/reader/reference.rb +3 -2
  25. data/lib/pdf/reader/resources.rb +3 -2
  26. data/lib/pdf/reader/stream.rb +1 -1
  27. data/lib/pdf/reader/synchronized_cache.rb +1 -1
  28. data/lib/pdf/reader/text_run.rb +5 -2
  29. data/lib/pdf/reader/transformation_matrix.rb +8 -8
  30. data/lib/pdf/reader/type_check.rb +46 -0
  31. data/lib/pdf/reader/width_calculator/built_in.rb +4 -3
  32. data/lib/pdf/reader/width_calculator/composite.rb +6 -2
  33. data/lib/pdf/reader/width_calculator/true_type.rb +10 -12
  34. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -5
  35. data/lib/pdf/reader/width_calculator/type_zero.rb +6 -3
  36. data/lib/pdf/reader/xref.rb +3 -3
  37. data/lib/pdf/reader.rb +5 -10
  38. data/rbi/pdf-reader.rbi +428 -358
  39. metadata +6 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cc98ab07b3c66f13f663ea5faf8132b45d769912e0da737917dd054e38318ede
4
- data.tar.gz: 0f2928d9778b5b3ea8fca5e723a2b3fa6f275df70b02f1eb4385e077c535ac78
3
+ metadata.gz: 2c84983c18d983798ff5f2ede514b540ee55a788229501976474b7341bf57fba
4
+ data.tar.gz: 79b8f092e72a194110062cf7d7e9425c0a6531e145009c9b7c10c2c072b3d1d5
5
5
  SHA512:
6
- metadata.gz: 210cd8c8cef93b0e0fac1446c091c2a62772ffe8b1786627089e5a330ca7defd501df7cccc0b48d326d38ff74318b162e512220e8a4460260bebe7da0ef8b757
7
- data.tar.gz: 047e7f6641411557b1d3b50035dbdf55647c63deede273b6ce4442230b85372045494b81e88c1ffcaa09a7c5ea26823ee33b33c3bf82013328d0e32a95021284
6
+ metadata.gz: '09c97a875bb46389172ed48ae8b2779ba3a8e032852b6a9943f187de13c23649e2398a5374358c62b64cf9e13bbf7f819bb5072d9aaa6882b9b94e96d23f5c13'
7
+ data.tar.gz: ed92250acee85f4e355785dd043f7774a5883550fe82b01b3cd9e10011f93a1fcdd500108b0e1f4e2af562bddd833c03ca601078b3eba8ee2e9990fd5e76305a
data/CHANGELOG CHANGED
@@ -1,3 +1,11 @@
1
+ v2.11.0 (26th October 2022)
2
+ - Various bug fixes
3
+ - Expanded sorbet type annotations
4
+
5
+ v2.10.0 (12th May 2022)
6
+ - Various bug fixes
7
+ - Expanded sorbet type annotations
8
+
1
9
  v2.9.2 (20th February 2022)
2
10
  - Fix PDF::Reader::ObjectHash#page_references to return an Array of PDF::Reader::Reference (http://github.com/yob/pdf-reader/pull/444)
3
11
 
data/Rakefile CHANGED
@@ -14,7 +14,7 @@ desc "Run cane to check quality metrics"
14
14
  Cane::RakeTask.new(:quality) do |cane|
15
15
  cane.abc_max = 20
16
16
  cane.style_measure = 100
17
- cane.max_violations = 28
17
+ cane.max_violations = 33
18
18
 
19
19
  cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
20
20
  end
@@ -1,5 +1,5 @@
1
1
  # coding: ASCII-8BIT
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -1,9 +1,7 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
- #
6
-
7
5
  require 'forwardable'
8
6
 
9
7
  class PDF::Reader
@@ -33,10 +31,10 @@ class PDF::Reader
33
31
  params << array.shift
34
32
 
35
33
  if params.size == 2 && params.last.is_a?(Array)
36
- widths.merge! parse_first_form(params.first, params.last)
34
+ widths.merge! parse_first_form(params.first.to_i, Array(params.last))
37
35
  params = []
38
36
  elsif params.size == 3
39
- widths.merge! parse_second_form(params[0], params[1], params[2])
37
+ widths.merge! parse_second_form(params[0].to_i, params[1].to_i, params[2].to_i)
40
38
  params = []
41
39
  end
42
40
  end
@@ -54,6 +52,10 @@ class PDF::Reader
54
52
 
55
53
  # this is the form 10 20 123 where all index between 10 and 20 have width 123
56
54
  def parse_second_form(first, final, width)
55
+ if first > final
56
+ raise MalformedPDFError, "CidWidths: #{first} must be less than #{final}"
57
+ end
58
+
57
59
  (first..final).inject({}) { |accum, index|
58
60
  accum[index] = width
59
61
  accum
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -76,9 +76,9 @@ class PDF::Reader
76
76
  diff.each do |val|
77
77
  if val.kind_of?(Numeric)
78
78
  byte = val.to_i
79
- else
79
+ elsif codepoint = glyphlist.name_to_unicode(val)
80
80
  @differences[byte] = val
81
- @mapping[byte] = glyphlist.name_to_unicode(val)
81
+ @mapping[byte] = codepoint
82
82
  byte += 1
83
83
  end
84
84
  end
@@ -167,7 +167,7 @@ class PDF::Reader
167
167
  end
168
168
 
169
169
  def convert_to_utf8(str)
170
- ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
170
+ ret = str.unpack(unpack).map! { |c| @mapping[c.to_i] || c }.pack("U*")
171
171
  ret.force_encoding("UTF-8")
172
172
  ret
173
173
  end
@@ -58,10 +58,6 @@ class PDF::Reader
58
58
  def self.validate_not_nil(object, name)
59
59
  raise ArgumentError, "#{object} must not be nil" if object.nil?
60
60
  end
61
- ################################################################################
62
- def self.validate_not_nil_as_malformed(object, name)
63
- raise MalformedPDFError, "#{object} must not be nil" if object.nil?
64
- end
65
61
  end
66
62
 
67
63
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -125,7 +125,7 @@ class PDF::Reader
125
125
  row_data[index] = (byte + paeth) % 256
126
126
  end
127
127
  else
128
- raise ArgumentError, "Invalid filter algorithm #{filter}"
128
+ raise MalformedPDFError, "Invalid filter algorithm #{filter}"
129
129
  end
130
130
 
131
131
  s = []
@@ -205,14 +205,17 @@ class PDF::Reader
205
205
  end
206
206
 
207
207
  def extract_descendants(obj)
208
- return unless obj[:DescendantFonts]
209
208
  # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
210
209
  # A one-element array specifying the CIDFont dictionary that is the
211
210
  # descendant of this Type 0 font.
212
- descendants = @ohash.deref_array(obj[:DescendantFonts])
213
- @descendantfonts = descendants.map { |desc|
214
- PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
215
- }
211
+ if obj[:DescendantFonts]
212
+ descendants = @ohash.deref_array(obj[:DescendantFonts])
213
+ @descendantfonts = descendants.map { |desc|
214
+ PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
215
+ }
216
+ else
217
+ @descendantfonts = []
218
+ end
216
219
  end
217
220
 
218
221
  def to_utf8_via_cmap(params)
@@ -226,9 +229,7 @@ class PDF::Reader
226
229
  @tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
227
230
  }.flatten.pack("U*")
228
231
  when Array
229
- params.collect { |param| to_utf8_via_cmap(param) }
230
- else
231
- params
232
+ params.collect { |param| to_utf8_via_cmap(param) }.join("")
232
233
  end
233
234
  end
234
235
 
@@ -243,9 +244,7 @@ class PDF::Reader
243
244
  when String
244
245
  encoding.to_utf8(params)
245
246
  when Array
246
- params.collect { |param| to_utf8_via_encoding(param) }
247
- else
248
- params
247
+ params.collect { |param| to_utf8_via_encoding(param) }.join("")
249
248
  end
250
249
  end
251
250
 
@@ -56,7 +56,9 @@ class PDF::Reader
56
56
  end
57
57
  char_metric = ttf_program_stream.horizontal_metrics.metrics[glyph_id]
58
58
  if char_metric
59
- return char_metric.advance_width
59
+ char_metric.advance_width
60
+ else
61
+ 0
60
62
  end
61
63
  end
62
64
  end
@@ -45,7 +45,7 @@ module PDF
45
45
  def font_objects
46
46
  raw_fonts = @objects.deref_hash(fonts)
47
47
  ::Hash[raw_fonts.map { |label, font|
48
- [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font))]
48
+ [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
49
49
  }]
50
50
  end
51
51
 
@@ -55,6 +55,9 @@ module PDF
55
55
  # See the comments on PDF::Reader::Page#walk for more detail.
56
56
  #
57
57
  def walk(*receivers)
58
+ receivers = receivers.map { |receiver|
59
+ ValidatingReceiver.new(receiver)
60
+ }
58
61
  content_stream(receivers, raw_content)
59
62
  end
60
63
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -33,10 +33,18 @@ class PDF::Reader
33
33
  #
34
34
  class GlyphHash # :nodoc:
35
35
  def initialize
36
+ @@by_codepoint_cache ||= nil
37
+ @@by_name_cache ||= nil
38
+
36
39
  # only parse the glyph list once, and cache the results (for performance)
37
- adobe = @@cache ||= load_adobe_glyph_mapping
38
- @by_name = adobe.first
39
- @by_codepoint = adobe.last
40
+ if @@by_codepoint_cache != nil && @@by_name_cache != nil
41
+ @by_name = @@by_name_cache
42
+ @by_codepoint = @@by_codepoint_cache
43
+ else
44
+ by_name, by_codepoint = load_adobe_glyph_mapping
45
+ @by_name = @@by_name_cache ||= by_name
46
+ @by_codepoint = @@by_codepoint_cache ||= by_codepoint
47
+ end
40
48
  end
41
49
 
42
50
  # attempt to convert a PDF Name to a unicode codepoint. Returns nil
@@ -127,7 +135,7 @@ class PDF::Reader
127
135
  end
128
136
  end
129
137
 
130
- [keyed_by_name.freeze, keyed_by_codepoint.freeze]
138
+ return keyed_by_name.freeze, keyed_by_codepoint.freeze
131
139
  end
132
140
 
133
141
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -25,12 +25,14 @@ module PDF
25
25
  def initialize(data, bits_in_chunk)
26
26
  @data = data
27
27
  @data.force_encoding("BINARY")
28
- @bits_in_chunk = bits_in_chunk
28
+ set_bits_in_chunk(bits_in_chunk)
29
29
  @current_pos = 0
30
30
  @bits_left_in_byte = 8
31
31
  end
32
32
 
33
33
  def set_bits_in_chunk(bits_in_chunk)
34
+ raise MalformedPDFError, "invalid LZW bits" if bits_in_chunk < 9 || bits_in_chunk > 12
35
+
34
36
  @bits_in_chunk = bits_in_chunk
35
37
  end
36
38
 
@@ -39,7 +41,7 @@ module PDF
39
41
  chunk = -1
40
42
  while bits_left_in_chunk > 0 and @current_pos < @data.size
41
43
  chunk = 0 if chunk < 0
42
- codepoint = @data[@current_pos, 1].unpack("C*")[0]
44
+ codepoint = @data[@current_pos, 1].to_s.unpack("C*")[0].to_i
43
45
  current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
44
46
  dif = bits_left_in_chunk - @bits_left_in_byte
45
47
  if dif > 0 then current_byte <<= dif
@@ -61,21 +63,25 @@ module PDF
61
63
  CODE_CLEAR_TABLE = 256 #clear table
62
64
 
63
65
  # stores de pairs code => string
64
- class StringTable < Hash # :nodoc:
66
+ class StringTable
65
67
  attr_reader :string_table_pos
66
68
 
67
69
  def initialize
68
- super
70
+ @data = Hash.new
69
71
  @string_table_pos = 258 #initial code
70
72
  end
71
73
 
72
74
  #if code less than 258 return fixed string
73
75
  def [](key)
74
- if key > 257 then super else key.chr end
76
+ if key > 257
77
+ @data[key]
78
+ else
79
+ key.chr
80
+ end
75
81
  end
76
82
 
77
83
  def add(string)
78
- store(@string_table_pos, string)
84
+ @data.store(@string_table_pos, string)
79
85
  @string_table_pos += 1
80
86
  end
81
87
  end
@@ -83,7 +89,7 @@ module PDF
83
89
  # Decompresses a LZW compressed string.
84
90
  #
85
91
  def self.decode(data)
86
- stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
92
+ stream = BitStream.new(data.to_s, 9) # size of codes between 9 and 12 bits
87
93
  string_table = StringTable.new
88
94
  result = "".dup
89
95
  until (code = stream.read) == CODE_EOD
@@ -119,8 +125,17 @@ module PDF
119
125
  result
120
126
  end
121
127
 
122
- def self.create_new_string(string_table,some_code, other_code)
123
- string_table[some_code] + string_table[other_code][0].chr
128
+ def self.create_new_string(string_table, some_code, other_code)
129
+ raise MalformedPDFError, "invalid LZW data" if some_code.nil? || other_code.nil?
130
+
131
+ item_one = string_table[some_code]
132
+ item_two = string_table[other_code]
133
+
134
+ if item_one && item_two
135
+ item_one + item_two.chr
136
+ else
137
+ raise MalformedPDFError, "invalid LZW data"
138
+ end
124
139
  end
125
140
  private_class_method :create_new_string
126
141
 
@@ -0,0 +1,14 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ class PDF::Reader
6
+ # There's no point rendering zero-width characters
7
+ class NoTextFilter
8
+
9
+ def self.exclude_empty_strings(runs)
10
+ runs.reject { |run| run.text.to_s.size == 0 }
11
+ end
12
+ end
13
+ end
14
+
@@ -2,6 +2,8 @@
2
2
  # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
+ require 'tempfile'
6
+
5
7
  class PDF::Reader
6
8
  # Provides low level access to the objects in a PDF file via a hash-like
7
9
  # object.
@@ -243,7 +245,7 @@ class PDF::Reader
243
245
 
244
246
  obj.tap { |obj|
245
247
  if !obj.is_a?(PDF::Reader::Stream)
246
- raise MalformedPDFError, "expected object to be an Array or nil"
248
+ raise MalformedPDFError, "expected object to be a Stream or nil"
247
249
  end
248
250
  }
249
251
  end
@@ -496,7 +498,9 @@ class PDF::Reader
496
498
  def fetch_object_stream(key)
497
499
  if xref[key].is_a?(PDF::Reader::Reference)
498
500
  container_key = xref[key]
499
- object_streams[container_key] ||= PDF::Reader::ObjectStream.new(object(container_key))
501
+ stream = deref_stream(container_key)
502
+ raise MalformedPDFError, "Object Stream cannot be nil" if stream.nil?
503
+ object_streams[container_key] ||= PDF::Reader::ObjectStream.new(stream)
500
504
  object_streams[container_key][key.id]
501
505
  end
502
506
  end
@@ -564,7 +568,7 @@ class PDF::Reader
564
568
  end
565
569
 
566
570
  def object_streams
567
- @object_stream ||= {}
571
+ @object_streams ||= {}
568
572
  end
569
573
 
570
574
  # returns an array of object references for all pages in this object store. The ordering of
@@ -573,7 +577,9 @@ class PDF::Reader
573
577
  def get_page_objects(obj)
574
578
  derefed_obj = deref_hash(obj)
575
579
 
576
- if derefed_obj[:Type] == :Page
580
+ if derefed_obj.nil?
581
+ raise MalformedPDFError, "Expected Page or Pages object, got nil"
582
+ elsif derefed_obj[:Type] == :Page
577
583
  [obj]
578
584
  elsif derefed_obj[:Kids]
579
585
  kids = deref_array(derefed_obj[:Kids]) || []
@@ -587,18 +593,18 @@ class PDF::Reader
587
593
 
588
594
  def read_version
589
595
  @io.seek(0)
590
- _m, version = *@io.read(10).match(/PDF-(\d.\d)/)
596
+ _m, version = *@io.read(10).to_s.match(/PDF-(\d.\d)/)
591
597
  @io.seek(0)
592
598
  version.to_f
593
599
  end
594
600
 
595
601
  def extract_io_from(input)
596
- if input.respond_to?(:seek) && input.respond_to?(:read)
602
+ if input.is_a?(IO) || input.is_a?(StringIO) || input.is_a?(Tempfile)
597
603
  input
598
604
  elsif File.file?(input.to_s)
599
- StringIO.new read_as_binary(input)
605
+ StringIO.new read_as_binary(input.to_s)
600
606
  else
601
- raise ArgumentError, "input must be an IO-like object or a filename"
607
+ raise ArgumentError, "input must be an IO-like object or a filename (#{input.class})"
602
608
  end
603
609
  end
604
610
 
@@ -606,7 +612,7 @@ class PDF::Reader
606
612
  if File.respond_to?(:binread)
607
613
  File.binread(input.to_s)
608
614
  else
609
- File.open(input.to_s,"rb") { |f| f.read }
615
+ File.open(input.to_s,"rb") { |f| f.read } || ""
610
616
  end
611
617
  end
612
618
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -24,7 +24,7 @@ class PDF::Reader
24
24
  end
25
25
 
26
26
  def size
27
- @dict[:N]
27
+ TypeCheck.cast_to_int!(@dict[:N])
28
28
  end
29
29
 
30
30
  private
@@ -40,7 +40,7 @@ class PDF::Reader
40
40
  end
41
41
 
42
42
  def first
43
- @dict[:First]
43
+ TypeCheck.cast_to_int!(@dict[:First])
44
44
  end
45
45
 
46
46
  def buffer
@@ -1,6 +1,6 @@
1
- # typed: true
2
1
  # coding: utf-8
3
2
  # frozen_string_literal: true
3
+ # typed: strict
4
4
 
5
5
  class PDF::Reader
6
6
  # remove duplicates from a collection of TextRun objects. This can be helpful when a PDF
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -43,10 +43,10 @@ module PDF
43
43
  #
44
44
  def initialize(objects, pagenum, options = {})
45
45
  @objects, @pagenum = objects, pagenum
46
- @page_object = objects.deref_hash(objects.page_references[pagenum - 1])
46
+ @page_object = objects.deref_hash(objects.page_references[pagenum - 1]) || {}
47
47
  @cache = options[:cache] || {}
48
48
 
49
- unless @page_object.is_a?(::Hash)
49
+ if @page_object.empty?
50
50
  raise InvalidPageError, "Invalid page: #{pagenum}"
51
51
  end
52
52
  end
@@ -250,8 +250,8 @@ module PDF
250
250
  params = []
251
251
 
252
252
  while (token = parser.parse_token(PagesStrategy::OPERATORS))
253
- if token.kind_of?(Token) and PagesStrategy::OPERATORS.has_key?(token)
254
- callback(receivers, PagesStrategy::OPERATORS[token], params)
253
+ if token.kind_of?(Token) && method_name = PagesStrategy::OPERATORS[token]
254
+ callback(receivers, method_name, params)
255
255
  params.clear
256
256
  else
257
257
  params << token
@@ -263,9 +263,26 @@ module PDF
263
263
 
264
264
  # calls the name callback method on each receiver object with params as the arguments
265
265
  #
266
+ # The silly style here is because sorbet won't let me use splat arguments
267
+ #
266
268
  def callback(receivers, name, params=[])
267
269
  receivers.each do |receiver|
268
- receiver.send(name, *params) if receiver.respond_to?(name)
270
+ if receiver.respond_to?(name)
271
+ case params.size
272
+ when 0 then receiver.send(name)
273
+ when 1 then receiver.send(name, params[0])
274
+ when 2 then receiver.send(name, params[0], params[1])
275
+ when 3 then receiver.send(name, params[0], params[1], params[2])
276
+ when 4 then receiver.send(name, params[0], params[1], params[2], params[3])
277
+ when 5 then receiver.send(name, params[0], params[1], params[2], params[3], params[4])
278
+ when 6 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5])
279
+ when 7 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6])
280
+ when 8 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6], params[7])
281
+ when 9 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6], params[7], params[8])
282
+ else
283
+ receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6], params[7], params[8], params[9])
284
+ end
285
+ end
269
286
  end
270
287
  end
271
288
 
@@ -278,7 +295,9 @@ module PDF
278
295
  []
279
296
  else
280
297
  obj = objects.deref_hash(origin)
281
- PDF::Reader::Error.validate_not_nil_as_malformed(obj, "parent")
298
+ if obj.nil?
299
+ raise MalformedPDFError, "parent mus not be nil"
300
+ end
282
301
  [ select_inheritable(obj) ] + ancestors(obj[:Parent])
283
302
  end
284
303
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'pdf/reader/overlapping_runs_filter'
@@ -16,7 +16,7 @@ class PDF::Reader
16
16
  :h_scaling => 1.0,
17
17
  :text_leading => 0,
18
18
  :text_font => nil,
19
- :text_font_size => nil,
19
+ :text_font_size => 0,
20
20
  :text_mode => 0,
21
21
  :text_rise => 0,
22
22
  :text_knockout => 0
@@ -32,6 +32,12 @@ class PDF::Reader
32
32
  @cs_stack = [page.color_spaces]
33
33
  @stack = [DEFAULT_GRAPHICS_STATE.dup]
34
34
  state[:ctm] = identity_matrix
35
+
36
+ # These are only valid when inside a `BT` block and we re-initialize them on each
37
+ # `BT`. However, we need the instance variables set so PDFs with the text operators
38
+ # out order don't trigger NoMethodError when these are nil
39
+ @text_matrix = identity_matrix
40
+ @text_line_matrix = identity_matrix
35
41
  end
36
42
 
37
43
  #####################################################
@@ -62,6 +62,8 @@ module PDF
62
62
  runs = OverlappingRunsFilter.exclude_redundant_runs(runs)
63
63
  end
64
64
 
65
+ runs = NoTextFilter.exclude_empty_strings(runs)
66
+
65
67
  if opts.fetch(:merge, true)
66
68
  runs = merge_runs(runs)
67
69
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -31,7 +31,8 @@ class PDF::Reader
31
31
  ################################################################################
32
32
  # An internal PDF::Reader class that represents an indirect reference to a PDF Object
33
33
  class Reference
34
- attr_reader :id, :gen
34
+ attr_reader :id
35
+ attr_reader :gen
35
36
  ################################################################################
36
37
  # Create a new Reference to an object with the specified id and revision number
37
38
  def initialize(id, gen)
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -92,7 +92,8 @@ module PDF
92
92
  # of calling it over and over.
93
93
  #
94
94
  def xobjects
95
- @objects.deref_hash!(@resources[:XObject]) || {}
95
+ dict = @objects.deref_hash!(@resources[:XObject]) || {}
96
+ TypeCheck.cast_to_pdf_dict_with_stream_values!(dict)
96
97
  end
97
98
 
98
99
  end
@@ -40,7 +40,7 @@ class PDF::Reader
40
40
  # Creates a new stream with the specified dictionary and data. The dictionary
41
41
  # should be a standard ruby hash, the data should be a standard ruby string.
42
42
  def initialize(hash, data)
43
- @hash = hash
43
+ @hash = TypeCheck.cast_to_pdf_dict!(hash)
44
44
  @data = data
45
45
  @udata = nil
46
46
  end
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  # utilities.rb : General-purpose utility classes which don't fit anywhere else
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -7,7 +7,10 @@ class PDF::Reader
7
7
  class TextRun
8
8
  include Comparable
9
9
 
10
- attr_reader :origin, :width, :font_size, :text
10
+ attr_reader :origin
11
+ attr_reader :width
12
+ attr_reader :font_size
13
+ attr_reader :text
11
14
 
12
15
  alias :to_s :text
13
16