pdf-reader 2.9.2 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +8 -0
  3. data/Rakefile +1 -1
  4. data/lib/pdf/reader/buffer.rb +1 -1
  5. data/lib/pdf/reader/cid_widths.rb +7 -5
  6. data/lib/pdf/reader/cmap.rb +1 -1
  7. data/lib/pdf/reader/encoding.rb +4 -4
  8. data/lib/pdf/reader/error.rb +0 -4
  9. data/lib/pdf/reader/filter/depredict.rb +2 -2
  10. data/lib/pdf/reader/font.rb +10 -11
  11. data/lib/pdf/reader/font_descriptor.rb +3 -1
  12. data/lib/pdf/reader/form_xobject.rb +4 -1
  13. data/lib/pdf/reader/glyph_hash.rb +13 -5
  14. data/lib/pdf/reader/lzw.rb +25 -10
  15. data/lib/pdf/reader/no_text_filter.rb +14 -0
  16. data/lib/pdf/reader/object_hash.rb +15 -9
  17. data/lib/pdf/reader/object_stream.rb +3 -3
  18. data/lib/pdf/reader/overlapping_runs_filter.rb +1 -1
  19. data/lib/pdf/reader/page.rb +26 -7
  20. data/lib/pdf/reader/page_layout.rb +1 -1
  21. data/lib/pdf/reader/page_state.rb +7 -1
  22. data/lib/pdf/reader/page_text_receiver.rb +2 -0
  23. data/lib/pdf/reader/pages_strategy.rb +1 -1
  24. data/lib/pdf/reader/reference.rb +3 -2
  25. data/lib/pdf/reader/resources.rb +3 -2
  26. data/lib/pdf/reader/stream.rb +1 -1
  27. data/lib/pdf/reader/synchronized_cache.rb +1 -1
  28. data/lib/pdf/reader/text_run.rb +5 -2
  29. data/lib/pdf/reader/transformation_matrix.rb +8 -8
  30. data/lib/pdf/reader/type_check.rb +46 -0
  31. data/lib/pdf/reader/width_calculator/built_in.rb +4 -3
  32. data/lib/pdf/reader/width_calculator/composite.rb +6 -2
  33. data/lib/pdf/reader/width_calculator/true_type.rb +10 -12
  34. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -5
  35. data/lib/pdf/reader/width_calculator/type_zero.rb +6 -3
  36. data/lib/pdf/reader/xref.rb +3 -3
  37. data/lib/pdf/reader.rb +5 -10
  38. data/rbi/pdf-reader.rbi +428 -358
  39. metadata +6 -5
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cc98ab07b3c66f13f663ea5faf8132b45d769912e0da737917dd054e38318ede
4
- data.tar.gz: 0f2928d9778b5b3ea8fca5e723a2b3fa6f275df70b02f1eb4385e077c535ac78
3
+ metadata.gz: 2c84983c18d983798ff5f2ede514b540ee55a788229501976474b7341bf57fba
4
+ data.tar.gz: 79b8f092e72a194110062cf7d7e9425c0a6531e145009c9b7c10c2c072b3d1d5
5
5
  SHA512:
6
- metadata.gz: 210cd8c8cef93b0e0fac1446c091c2a62772ffe8b1786627089e5a330ca7defd501df7cccc0b48d326d38ff74318b162e512220e8a4460260bebe7da0ef8b757
7
- data.tar.gz: 047e7f6641411557b1d3b50035dbdf55647c63deede273b6ce4442230b85372045494b81e88c1ffcaa09a7c5ea26823ee33b33c3bf82013328d0e32a95021284
6
+ metadata.gz: '09c97a875bb46389172ed48ae8b2779ba3a8e032852b6a9943f187de13c23649e2398a5374358c62b64cf9e13bbf7f819bb5072d9aaa6882b9b94e96d23f5c13'
7
+ data.tar.gz: ed92250acee85f4e355785dd043f7774a5883550fe82b01b3cd9e10011f93a1fcdd500108b0e1f4e2af562bddd833c03ca601078b3eba8ee2e9990fd5e76305a
data/CHANGELOG CHANGED
@@ -1,3 +1,11 @@
1
+ v2.11.0 (26th October 2022)
2
+ - Various bug fixes
3
+ - Expanded sorbet type annotations
4
+
5
+ v2.10.0 (12th May 2022)
6
+ - Various bug fixes
7
+ - Expanded sorbet type annotations
8
+
1
9
  v2.9.2 (20th February 2022)
2
10
  - Fix PDF::Reader::ObjectHash#page_references to return an Array of PDF::Reader::Reference (http://github.com/yob/pdf-reader/pull/444)
3
11
 
data/Rakefile CHANGED
@@ -14,7 +14,7 @@ desc "Run cane to check quality metrics"
14
14
  Cane::RakeTask.new(:quality) do |cane|
15
15
  cane.abc_max = 20
16
16
  cane.style_measure = 100
17
- cane.max_violations = 28
17
+ cane.max_violations = 33
18
18
 
19
19
  cane.use Morecane::EncodingCheck, :encoding_glob => "{app,lib,spec}/**/*.rb"
20
20
  end
@@ -1,5 +1,5 @@
1
1
  # coding: ASCII-8BIT
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -1,9 +1,7 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
- #
6
-
7
5
  require 'forwardable'
8
6
 
9
7
  class PDF::Reader
@@ -33,10 +31,10 @@ class PDF::Reader
33
31
  params << array.shift
34
32
 
35
33
  if params.size == 2 && params.last.is_a?(Array)
36
- widths.merge! parse_first_form(params.first, params.last)
34
+ widths.merge! parse_first_form(params.first.to_i, Array(params.last))
37
35
  params = []
38
36
  elsif params.size == 3
39
- widths.merge! parse_second_form(params[0], params[1], params[2])
37
+ widths.merge! parse_second_form(params[0].to_i, params[1].to_i, params[2].to_i)
40
38
  params = []
41
39
  end
42
40
  end
@@ -54,6 +52,10 @@ class PDF::Reader
54
52
 
55
53
  # this is the form 10 20 123 where all index between 10 and 20 have width 123
56
54
  def parse_second_form(first, final, width)
55
+ if first > final
56
+ raise MalformedPDFError, "CidWidths: #{first} must be less than #{final}"
57
+ end
58
+
57
59
  (first..final).inject({}) { |accum, index|
58
60
  accum[index] = width
59
61
  accum
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -76,9 +76,9 @@ class PDF::Reader
76
76
  diff.each do |val|
77
77
  if val.kind_of?(Numeric)
78
78
  byte = val.to_i
79
- else
79
+ elsif codepoint = glyphlist.name_to_unicode(val)
80
80
  @differences[byte] = val
81
- @mapping[byte] = glyphlist.name_to_unicode(val)
81
+ @mapping[byte] = codepoint
82
82
  byte += 1
83
83
  end
84
84
  end
@@ -167,7 +167,7 @@ class PDF::Reader
167
167
  end
168
168
 
169
169
  def convert_to_utf8(str)
170
- ret = str.unpack(unpack).map! { |c| @mapping[c] || c }.pack("U*")
170
+ ret = str.unpack(unpack).map! { |c| @mapping[c.to_i] || c }.pack("U*")
171
171
  ret.force_encoding("UTF-8")
172
172
  ret
173
173
  end
@@ -58,10 +58,6 @@ class PDF::Reader
58
58
  def self.validate_not_nil(object, name)
59
59
  raise ArgumentError, "#{object} must not be nil" if object.nil?
60
60
  end
61
- ################################################################################
62
- def self.validate_not_nil_as_malformed(object, name)
63
- raise MalformedPDFError, "#{object} must not be nil" if object.nil?
64
- end
65
61
  end
66
62
 
67
63
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -125,7 +125,7 @@ class PDF::Reader
125
125
  row_data[index] = (byte + paeth) % 256
126
126
  end
127
127
  else
128
- raise ArgumentError, "Invalid filter algorithm #{filter}"
128
+ raise MalformedPDFError, "Invalid filter algorithm #{filter}"
129
129
  end
130
130
 
131
131
  s = []
@@ -205,14 +205,17 @@ class PDF::Reader
205
205
  end
206
206
 
207
207
  def extract_descendants(obj)
208
- return unless obj[:DescendantFonts]
209
208
  # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
210
209
  # A one-element array specifying the CIDFont dictionary that is the
211
210
  # descendant of this Type 0 font.
212
- descendants = @ohash.deref_array(obj[:DescendantFonts])
213
- @descendantfonts = descendants.map { |desc|
214
- PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
215
- }
211
+ if obj[:DescendantFonts]
212
+ descendants = @ohash.deref_array(obj[:DescendantFonts])
213
+ @descendantfonts = descendants.map { |desc|
214
+ PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
215
+ }
216
+ else
217
+ @descendantfonts = []
218
+ end
216
219
  end
217
220
 
218
221
  def to_utf8_via_cmap(params)
@@ -226,9 +229,7 @@ class PDF::Reader
226
229
  @tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
227
230
  }.flatten.pack("U*")
228
231
  when Array
229
- params.collect { |param| to_utf8_via_cmap(param) }
230
- else
231
- params
232
+ params.collect { |param| to_utf8_via_cmap(param) }.join("")
232
233
  end
233
234
  end
234
235
 
@@ -243,9 +244,7 @@ class PDF::Reader
243
244
  when String
244
245
  encoding.to_utf8(params)
245
246
  when Array
246
- params.collect { |param| to_utf8_via_encoding(param) }
247
- else
248
- params
247
+ params.collect { |param| to_utf8_via_encoding(param) }.join("")
249
248
  end
250
249
  end
251
250
 
@@ -56,7 +56,9 @@ class PDF::Reader
56
56
  end
57
57
  char_metric = ttf_program_stream.horizontal_metrics.metrics[glyph_id]
58
58
  if char_metric
59
- return char_metric.advance_width
59
+ char_metric.advance_width
60
+ else
61
+ 0
60
62
  end
61
63
  end
62
64
  end
@@ -45,7 +45,7 @@ module PDF
45
45
  def font_objects
46
46
  raw_fonts = @objects.deref_hash(fonts)
47
47
  ::Hash[raw_fonts.map { |label, font|
48
- [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font))]
48
+ [label, PDF::Reader::Font.new(@objects, @objects.deref_hash(font) || {})]
49
49
  }]
50
50
  end
51
51
 
@@ -55,6 +55,9 @@ module PDF
55
55
  # See the comments on PDF::Reader::Page#walk for more detail.
56
56
  #
57
57
  def walk(*receivers)
58
+ receivers = receivers.map { |receiver|
59
+ ValidatingReceiver.new(receiver)
60
+ }
58
61
  content_stream(receivers, raw_content)
59
62
  end
60
63
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -33,10 +33,18 @@ class PDF::Reader
33
33
  #
34
34
  class GlyphHash # :nodoc:
35
35
  def initialize
36
+ @@by_codepoint_cache ||= nil
37
+ @@by_name_cache ||= nil
38
+
36
39
  # only parse the glyph list once, and cache the results (for performance)
37
- adobe = @@cache ||= load_adobe_glyph_mapping
38
- @by_name = adobe.first
39
- @by_codepoint = adobe.last
40
+ if @@by_codepoint_cache != nil && @@by_name_cache != nil
41
+ @by_name = @@by_name_cache
42
+ @by_codepoint = @@by_codepoint_cache
43
+ else
44
+ by_name, by_codepoint = load_adobe_glyph_mapping
45
+ @by_name = @@by_name_cache ||= by_name
46
+ @by_codepoint = @@by_codepoint_cache ||= by_codepoint
47
+ end
40
48
  end
41
49
 
42
50
  # attempt to convert a PDF Name to a unicode codepoint. Returns nil
@@ -127,7 +135,7 @@ class PDF::Reader
127
135
  end
128
136
  end
129
137
 
130
- [keyed_by_name.freeze, keyed_by_codepoint.freeze]
138
+ return keyed_by_name.freeze, keyed_by_codepoint.freeze
131
139
  end
132
140
 
133
141
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -25,12 +25,14 @@ module PDF
25
25
  def initialize(data, bits_in_chunk)
26
26
  @data = data
27
27
  @data.force_encoding("BINARY")
28
- @bits_in_chunk = bits_in_chunk
28
+ set_bits_in_chunk(bits_in_chunk)
29
29
  @current_pos = 0
30
30
  @bits_left_in_byte = 8
31
31
  end
32
32
 
33
33
  def set_bits_in_chunk(bits_in_chunk)
34
+ raise MalformedPDFError, "invalid LZW bits" if bits_in_chunk < 9 || bits_in_chunk > 12
35
+
34
36
  @bits_in_chunk = bits_in_chunk
35
37
  end
36
38
 
@@ -39,7 +41,7 @@ module PDF
39
41
  chunk = -1
40
42
  while bits_left_in_chunk > 0 and @current_pos < @data.size
41
43
  chunk = 0 if chunk < 0
42
- codepoint = @data[@current_pos, 1].unpack("C*")[0]
44
+ codepoint = @data[@current_pos, 1].to_s.unpack("C*")[0].to_i
43
45
  current_byte = codepoint & (2**@bits_left_in_byte - 1) #clear consumed bits
44
46
  dif = bits_left_in_chunk - @bits_left_in_byte
45
47
  if dif > 0 then current_byte <<= dif
@@ -61,21 +63,25 @@ module PDF
61
63
  CODE_CLEAR_TABLE = 256 #clear table
62
64
 
63
65
  # stores de pairs code => string
64
- class StringTable < Hash # :nodoc:
66
+ class StringTable
65
67
  attr_reader :string_table_pos
66
68
 
67
69
  def initialize
68
- super
70
+ @data = Hash.new
69
71
  @string_table_pos = 258 #initial code
70
72
  end
71
73
 
72
74
  #if code less than 258 return fixed string
73
75
  def [](key)
74
- if key > 257 then super else key.chr end
76
+ if key > 257
77
+ @data[key]
78
+ else
79
+ key.chr
80
+ end
75
81
  end
76
82
 
77
83
  def add(string)
78
- store(@string_table_pos, string)
84
+ @data.store(@string_table_pos, string)
79
85
  @string_table_pos += 1
80
86
  end
81
87
  end
@@ -83,7 +89,7 @@ module PDF
83
89
  # Decompresses a LZW compressed string.
84
90
  #
85
91
  def self.decode(data)
86
- stream = BitStream.new data.to_s, 9 # size of codes between 9 and 12 bits
92
+ stream = BitStream.new(data.to_s, 9) # size of codes between 9 and 12 bits
87
93
  string_table = StringTable.new
88
94
  result = "".dup
89
95
  until (code = stream.read) == CODE_EOD
@@ -119,8 +125,17 @@ module PDF
119
125
  result
120
126
  end
121
127
 
122
- def self.create_new_string(string_table,some_code, other_code)
123
- string_table[some_code] + string_table[other_code][0].chr
128
+ def self.create_new_string(string_table, some_code, other_code)
129
+ raise MalformedPDFError, "invalid LZW data" if some_code.nil? || other_code.nil?
130
+
131
+ item_one = string_table[some_code]
132
+ item_two = string_table[other_code]
133
+
134
+ if item_one && item_two
135
+ item_one + item_two.chr
136
+ else
137
+ raise MalformedPDFError, "invalid LZW data"
138
+ end
124
139
  end
125
140
  private_class_method :create_new_string
126
141
 
@@ -0,0 +1,14 @@
1
+ # coding: utf-8
2
+ # typed: strict
3
+ # frozen_string_literal: true
4
+
5
+ class PDF::Reader
6
+ # There's no point rendering zero-width characters
7
+ class NoTextFilter
8
+
9
+ def self.exclude_empty_strings(runs)
10
+ runs.reject { |run| run.text.to_s.size == 0 }
11
+ end
12
+ end
13
+ end
14
+
@@ -2,6 +2,8 @@
2
2
  # typed: true
3
3
  # frozen_string_literal: true
4
4
 
5
+ require 'tempfile'
6
+
5
7
  class PDF::Reader
6
8
  # Provides low level access to the objects in a PDF file via a hash-like
7
9
  # object.
@@ -243,7 +245,7 @@ class PDF::Reader
243
245
 
244
246
  obj.tap { |obj|
245
247
  if !obj.is_a?(PDF::Reader::Stream)
246
- raise MalformedPDFError, "expected object to be an Array or nil"
248
+ raise MalformedPDFError, "expected object to be a Stream or nil"
247
249
  end
248
250
  }
249
251
  end
@@ -496,7 +498,9 @@ class PDF::Reader
496
498
  def fetch_object_stream(key)
497
499
  if xref[key].is_a?(PDF::Reader::Reference)
498
500
  container_key = xref[key]
499
- object_streams[container_key] ||= PDF::Reader::ObjectStream.new(object(container_key))
501
+ stream = deref_stream(container_key)
502
+ raise MalformedPDFError, "Object Stream cannot be nil" if stream.nil?
503
+ object_streams[container_key] ||= PDF::Reader::ObjectStream.new(stream)
500
504
  object_streams[container_key][key.id]
501
505
  end
502
506
  end
@@ -564,7 +568,7 @@ class PDF::Reader
564
568
  end
565
569
 
566
570
  def object_streams
567
- @object_stream ||= {}
571
+ @object_streams ||= {}
568
572
  end
569
573
 
570
574
  # returns an array of object references for all pages in this object store. The ordering of
@@ -573,7 +577,9 @@ class PDF::Reader
573
577
  def get_page_objects(obj)
574
578
  derefed_obj = deref_hash(obj)
575
579
 
576
- if derefed_obj[:Type] == :Page
580
+ if derefed_obj.nil?
581
+ raise MalformedPDFError, "Expected Page or Pages object, got nil"
582
+ elsif derefed_obj[:Type] == :Page
577
583
  [obj]
578
584
  elsif derefed_obj[:Kids]
579
585
  kids = deref_array(derefed_obj[:Kids]) || []
@@ -587,18 +593,18 @@ class PDF::Reader
587
593
 
588
594
  def read_version
589
595
  @io.seek(0)
590
- _m, version = *@io.read(10).match(/PDF-(\d.\d)/)
596
+ _m, version = *@io.read(10).to_s.match(/PDF-(\d.\d)/)
591
597
  @io.seek(0)
592
598
  version.to_f
593
599
  end
594
600
 
595
601
  def extract_io_from(input)
596
- if input.respond_to?(:seek) && input.respond_to?(:read)
602
+ if input.is_a?(IO) || input.is_a?(StringIO) || input.is_a?(Tempfile)
597
603
  input
598
604
  elsif File.file?(input.to_s)
599
- StringIO.new read_as_binary(input)
605
+ StringIO.new read_as_binary(input.to_s)
600
606
  else
601
- raise ArgumentError, "input must be an IO-like object or a filename"
607
+ raise ArgumentError, "input must be an IO-like object or a filename (#{input.class})"
602
608
  end
603
609
  end
604
610
 
@@ -606,7 +612,7 @@ class PDF::Reader
606
612
  if File.respond_to?(:binread)
607
613
  File.binread(input.to_s)
608
614
  else
609
- File.open(input.to_s,"rb") { |f| f.read }
615
+ File.open(input.to_s,"rb") { |f| f.read } || ""
610
616
  end
611
617
  end
612
618
 
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -24,7 +24,7 @@ class PDF::Reader
24
24
  end
25
25
 
26
26
  def size
27
- @dict[:N]
27
+ TypeCheck.cast_to_int!(@dict[:N])
28
28
  end
29
29
 
30
30
  private
@@ -40,7 +40,7 @@ class PDF::Reader
40
40
  end
41
41
 
42
42
  def first
43
- @dict[:First]
43
+ TypeCheck.cast_to_int!(@dict[:First])
44
44
  end
45
45
 
46
46
  def buffer
@@ -1,6 +1,6 @@
1
- # typed: true
2
1
  # coding: utf-8
3
2
  # frozen_string_literal: true
3
+ # typed: strict
4
4
 
5
5
  class PDF::Reader
6
6
  # remove duplicates from a collection of TextRun objects. This can be helpful when a PDF
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -43,10 +43,10 @@ module PDF
43
43
  #
44
44
  def initialize(objects, pagenum, options = {})
45
45
  @objects, @pagenum = objects, pagenum
46
- @page_object = objects.deref_hash(objects.page_references[pagenum - 1])
46
+ @page_object = objects.deref_hash(objects.page_references[pagenum - 1]) || {}
47
47
  @cache = options[:cache] || {}
48
48
 
49
- unless @page_object.is_a?(::Hash)
49
+ if @page_object.empty?
50
50
  raise InvalidPageError, "Invalid page: #{pagenum}"
51
51
  end
52
52
  end
@@ -250,8 +250,8 @@ module PDF
250
250
  params = []
251
251
 
252
252
  while (token = parser.parse_token(PagesStrategy::OPERATORS))
253
- if token.kind_of?(Token) and PagesStrategy::OPERATORS.has_key?(token)
254
- callback(receivers, PagesStrategy::OPERATORS[token], params)
253
+ if token.kind_of?(Token) && method_name = PagesStrategy::OPERATORS[token]
254
+ callback(receivers, method_name, params)
255
255
  params.clear
256
256
  else
257
257
  params << token
@@ -263,9 +263,26 @@ module PDF
263
263
 
264
264
  # calls the name callback method on each receiver object with params as the arguments
265
265
  #
266
+ # The silly style here is because sorbet won't let me use splat arguments
267
+ #
266
268
  def callback(receivers, name, params=[])
267
269
  receivers.each do |receiver|
268
- receiver.send(name, *params) if receiver.respond_to?(name)
270
+ if receiver.respond_to?(name)
271
+ case params.size
272
+ when 0 then receiver.send(name)
273
+ when 1 then receiver.send(name, params[0])
274
+ when 2 then receiver.send(name, params[0], params[1])
275
+ when 3 then receiver.send(name, params[0], params[1], params[2])
276
+ when 4 then receiver.send(name, params[0], params[1], params[2], params[3])
277
+ when 5 then receiver.send(name, params[0], params[1], params[2], params[3], params[4])
278
+ when 6 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5])
279
+ when 7 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6])
280
+ when 8 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6], params[7])
281
+ when 9 then receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6], params[7], params[8])
282
+ else
283
+ receiver.send(name, params[0], params[1], params[2], params[3], params[4], params[5], params[6], params[7], params[8], params[9])
284
+ end
285
+ end
269
286
  end
270
287
  end
271
288
 
@@ -278,7 +295,9 @@ module PDF
278
295
  []
279
296
  else
280
297
  obj = objects.deref_hash(origin)
281
- PDF::Reader::Error.validate_not_nil_as_malformed(obj, "parent")
298
+ if obj.nil?
299
+ raise MalformedPDFError, "parent mus not be nil"
300
+ end
282
301
  [ select_inheritable(obj) ] + ancestors(obj[:Parent])
283
302
  end
284
303
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'pdf/reader/overlapping_runs_filter'
@@ -16,7 +16,7 @@ class PDF::Reader
16
16
  :h_scaling => 1.0,
17
17
  :text_leading => 0,
18
18
  :text_font => nil,
19
- :text_font_size => nil,
19
+ :text_font_size => 0,
20
20
  :text_mode => 0,
21
21
  :text_rise => 0,
22
22
  :text_knockout => 0
@@ -32,6 +32,12 @@ class PDF::Reader
32
32
  @cs_stack = [page.color_spaces]
33
33
  @stack = [DEFAULT_GRAPHICS_STATE.dup]
34
34
  state[:ctm] = identity_matrix
35
+
36
+ # These are only valid when inside a `BT` block and we re-initialize them on each
37
+ # `BT`. However, we need the instance variables set so PDFs with the text operators
38
+ # out order don't trigger NoMethodError when these are nil
39
+ @text_matrix = identity_matrix
40
+ @text_line_matrix = identity_matrix
35
41
  end
36
42
 
37
43
  #####################################################
@@ -62,6 +62,8 @@ module PDF
62
62
  runs = OverlappingRunsFilter.exclude_redundant_runs(runs)
63
63
  end
64
64
 
65
+ runs = NoTextFilter.exclude_empty_strings(runs)
66
+
65
67
  if opts.fetch(:merge, true)
66
68
  runs = merge_runs(runs)
67
69
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -31,7 +31,8 @@ class PDF::Reader
31
31
  ################################################################################
32
32
  # An internal PDF::Reader class that represents an indirect reference to a PDF Object
33
33
  class Reference
34
- attr_reader :id, :gen
34
+ attr_reader :id
35
+ attr_reader :gen
35
36
  ################################################################################
36
37
  # Create a new Reference to an object with the specified id and revision number
37
38
  def initialize(id, gen)
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  module PDF
@@ -92,7 +92,8 @@ module PDF
92
92
  # of calling it over and over.
93
93
  #
94
94
  def xobjects
95
- @objects.deref_hash!(@resources[:XObject]) || {}
95
+ dict = @objects.deref_hash!(@resources[:XObject]) || {}
96
+ TypeCheck.cast_to_pdf_dict_with_stream_values!(dict)
96
97
  end
97
98
 
98
99
  end
@@ -40,7 +40,7 @@ class PDF::Reader
40
40
  # Creates a new stream with the specified dictionary and data. The dictionary
41
41
  # should be a standard ruby hash, the data should be a standard ruby string.
42
42
  def initialize(hash, data)
43
- @hash = hash
43
+ @hash = TypeCheck.cast_to_pdf_dict!(hash)
44
44
  @data = data
45
45
  @udata = nil
46
46
  end
@@ -1,5 +1,5 @@
1
1
  # encoding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  # utilities.rb : General-purpose utility classes which don't fit anywhere else
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -7,7 +7,10 @@ class PDF::Reader
7
7
  class TextRun
8
8
  include Comparable
9
9
 
10
- attr_reader :origin, :width, :font_size, :text
10
+ attr_reader :origin
11
+ attr_reader :width
12
+ attr_reader :font_size
13
+ attr_reader :text
11
14
 
12
15
  alias :to_s :text
13
16