pdf-reader 2.9.2 → 2.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +8 -0
  3. data/Rakefile +1 -1
  4. data/lib/pdf/reader/buffer.rb +1 -1
  5. data/lib/pdf/reader/cid_widths.rb +7 -5
  6. data/lib/pdf/reader/cmap.rb +1 -1
  7. data/lib/pdf/reader/encoding.rb +4 -4
  8. data/lib/pdf/reader/error.rb +0 -4
  9. data/lib/pdf/reader/filter/depredict.rb +2 -2
  10. data/lib/pdf/reader/font.rb +10 -11
  11. data/lib/pdf/reader/font_descriptor.rb +3 -1
  12. data/lib/pdf/reader/form_xobject.rb +4 -1
  13. data/lib/pdf/reader/glyph_hash.rb +13 -5
  14. data/lib/pdf/reader/lzw.rb +25 -10
  15. data/lib/pdf/reader/no_text_filter.rb +14 -0
  16. data/lib/pdf/reader/object_hash.rb +15 -9
  17. data/lib/pdf/reader/object_stream.rb +3 -3
  18. data/lib/pdf/reader/overlapping_runs_filter.rb +1 -1
  19. data/lib/pdf/reader/page.rb +26 -7
  20. data/lib/pdf/reader/page_layout.rb +1 -1
  21. data/lib/pdf/reader/page_state.rb +7 -1
  22. data/lib/pdf/reader/page_text_receiver.rb +2 -0
  23. data/lib/pdf/reader/pages_strategy.rb +1 -1
  24. data/lib/pdf/reader/reference.rb +3 -2
  25. data/lib/pdf/reader/resources.rb +3 -2
  26. data/lib/pdf/reader/stream.rb +1 -1
  27. data/lib/pdf/reader/synchronized_cache.rb +1 -1
  28. data/lib/pdf/reader/text_run.rb +5 -2
  29. data/lib/pdf/reader/transformation_matrix.rb +8 -8
  30. data/lib/pdf/reader/type_check.rb +46 -0
  31. data/lib/pdf/reader/width_calculator/built_in.rb +4 -3
  32. data/lib/pdf/reader/width_calculator/composite.rb +6 -2
  33. data/lib/pdf/reader/width_calculator/true_type.rb +10 -12
  34. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -5
  35. data/lib/pdf/reader/width_calculator/type_zero.rb +6 -3
  36. data/lib/pdf/reader/xref.rb +3 -3
  37. data/lib/pdf/reader.rb +5 -10
  38. data/rbi/pdf-reader.rbi +428 -358
  39. metadata +6 -5
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -51,7 +51,7 @@ class PDF::Reader
51
51
  # displacement to speed up processing documents that use vertical
52
52
  # writing systems
53
53
  #
54
- def multiply!(a,b=nil,c=nil, d=nil,e=nil,f=nil)
54
+ def multiply!(a,b,c, d,e,f)
55
55
  if a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0
56
56
  # the identity matrix, no effect
57
57
  self
@@ -164,12 +164,12 @@ class PDF::Reader
164
164
  # [ e f 1 ] [ e f 1 ]
165
165
  #
166
166
  def regular_multiply!(a2,b2,c2,d2,e2,f2)
167
- newa = (@a * a2) + (@b * c2) + (0 * e2)
168
- newb = (@a * b2) + (@b * d2) + (0 * f2)
169
- newc = (@c * a2) + (@d * c2) + (0 * e2)
170
- newd = (@c * b2) + (@d * d2) + (0 * f2)
171
- newe = (@e * a2) + (@f * c2) + (1 * e2)
172
- newf = (@e * b2) + (@f * d2) + (1 * f2)
167
+ newa = (@a * a2) + (@b * c2) + (e2 * 0)
168
+ newb = (@a * b2) + (@b * d2) + (f2 * 0)
169
+ newc = (@c * a2) + (@d * c2) + (e2 * 0)
170
+ newd = (@c * b2) + (@d * d2) + (f2 * 0)
171
+ newe = (@e * a2) + (@f * c2) + (e2 * 1)
172
+ newf = (@e * b2) + (@f * d2) + (f2 * 1)
173
173
  @a, @b, @c, @d, @e, @f = newa, newb, newc, newd, newe, newf
174
174
  end
175
175
 
@@ -9,6 +9,18 @@ module PDF
9
9
  #
10
10
  class TypeCheck
11
11
 
12
+ def self.cast_to_int!(obj)
13
+ if obj.is_a?(Integer)
14
+ obj
15
+ elsif obj.nil?
16
+ 0
17
+ elsif obj.respond_to?(:to_i)
18
+ obj.to_i
19
+ else
20
+ raise MalformedPDFError, "Unable to cast to integer"
21
+ end
22
+ end
23
+
12
24
  def self.cast_to_numeric!(obj)
13
25
  if obj.is_a?(Numeric)
14
26
  obj
@@ -46,6 +58,40 @@ module PDF
46
58
  raise MalformedPDFError, "Unable to cast to symbol"
47
59
  end
48
60
  end
61
+
62
+ def self.cast_to_symbol!(obj)
63
+ res = cast_to_symbol(obj)
64
+ if res
65
+ res
66
+ else
67
+ raise MalformedPDFError, "Unable to cast to symbol"
68
+ end
69
+ end
70
+
71
+ def self.cast_to_pdf_dict!(obj)
72
+ if obj.is_a?(Hash)
73
+ obj
74
+ elsif obj.respond_to?(:to_h)
75
+ obj.to_h
76
+ else
77
+ raise MalformedPDFError, "Unable to cast to hash"
78
+ end
79
+ end
80
+
81
+ def self.cast_to_pdf_dict_with_stream_values!(obj)
82
+ if obj.is_a?(Hash)
83
+ result = Hash.new
84
+ obj.each do |k, v|
85
+ raise MalformedPDFError, "Expected a stream" unless v.is_a?(PDF::Reader::Stream)
86
+ result[cast_to_symbol!(k)] = v
87
+ end
88
+ result
89
+ elsif obj.respond_to?(:to_h)
90
+ cast_to_pdf_dict_with_stream_values!(obj.to_h)
91
+ else
92
+ raise MalformedPDFError, "Unable to cast to hash"
93
+ end
94
+ end
49
95
  end
50
96
  end
51
97
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'afm'
@@ -53,12 +53,13 @@ class PDF::Reader
53
53
  private
54
54
 
55
55
  def control_character?(code_point)
56
- @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
56
+ match = @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
57
+ match ? true : false
57
58
  end
58
59
 
59
60
  def extract_basefont(font_name)
60
61
  if BUILTINS.include?(font_name)
61
- font_name
62
+ font_name.to_s
62
63
  else
63
64
  "Times-Roman"
64
65
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -22,7 +22,11 @@ class PDF::Reader
22
22
 
23
23
  w = @widths[code_point]
24
24
  # 0 is a valid width
25
- return w.to_f unless w.nil?
25
+ if w
26
+ w.to_f
27
+ else
28
+ 0
29
+ end
26
30
  end
27
31
  end
28
32
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -10,8 +10,8 @@ class PDF::Reader
10
10
  def initialize(font)
11
11
  @font = font
12
12
 
13
- if @font.font_descriptor
14
- @missing_width = @font.font_descriptor.missing_width
13
+ if fd = @font.font_descriptor
14
+ @missing_width = fd.missing_width
15
15
  else
16
16
  @missing_width = 0
17
17
  end
@@ -30,25 +30,23 @@ class PDF::Reader
30
30
 
31
31
  # in ruby a negative index is valid, and will go from the end of the array
32
32
  # which is undesireable in this case.
33
- if @font.first_char && @font.first_char <= code_point
34
- @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
33
+ first_char = @font.first_char
34
+ if first_char && first_char <= code_point
35
+ @font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
35
36
  else
36
37
  @missing_width.to_f
37
38
  end
38
39
  end
39
40
 
40
41
  def glyph_width_from_descriptor(code_point)
41
- return unless @font.font_descriptor
42
-
43
42
  # true type fonts will have most of their information contained
44
43
  # with-in a program inside the font descriptor, however the widths
45
44
  # may not be in standard PDF glyph widths (1000 units => 1 text space unit)
46
45
  # so this width will need to be scaled
47
- w = @font.font_descriptor.glyph_width(code_point)
48
- if w
49
- w.to_f * @font.font_descriptor.glyph_to_pdf_scale_factor
50
- else
51
- nil
46
+ if fd = @font.font_descriptor
47
+ if w = fd.glyph_width(code_point)
48
+ w.to_f * fd.glyph_to_pdf_scale_factor.to_f
49
+ end
52
50
  end
53
51
  end
54
52
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -10,8 +10,8 @@ class PDF::Reader
10
10
  def initialize(font)
11
11
  @font = font
12
12
 
13
- if @font.font_descriptor
14
- @missing_width = @font.font_descriptor.missing_width
13
+ if fd = @font.font_descriptor
14
+ @missing_width = fd.missing_width
15
15
  else
16
16
  @missing_width = 0
17
17
  end
@@ -23,8 +23,9 @@ class PDF::Reader
23
23
 
24
24
  # in ruby a negative index is valid, and will go from the end of the array
25
25
  # which is undesireable in this case.
26
- if @font.first_char <= code_point
27
- @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
26
+ first_char = @font.first_char
27
+ if first_char && first_char <= code_point
28
+ @font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
28
29
  else
29
30
  @missing_width.to_f
30
31
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -13,13 +13,16 @@ class PDF::Reader
13
13
 
14
14
  def initialize(font)
15
15
  @font = font
16
- @descendant_font = @font.descendantfonts.first
17
16
  end
18
17
 
19
18
  def glyph_width(code_point)
20
19
  return 0 if code_point.nil? || code_point < 0
21
20
 
22
- @descendant_font.glyph_width(code_point).to_f
21
+ if descendant_font = @font.descendantfonts.first
22
+ descendant_font.glyph_width(code_point).to_f
23
+ else
24
+ 0
25
+ end
23
26
  end
24
27
  end
25
28
  end
@@ -73,7 +73,7 @@ class PDF::Reader
73
73
  #
74
74
  # ref - a PDF::Reader::Reference object containing an object ID and revision number
75
75
  def [](ref)
76
- @xref[ref.id][ref.gen]
76
+ @xref.fetch(ref.id, {}).fetch(ref.gen)
77
77
  rescue
78
78
  raise InvalidObjectError, "Object #{ref.id}, Generation #{ref.gen} is invalid"
79
79
  end
@@ -82,8 +82,8 @@ class PDF::Reader
82
82
  def each(&block)
83
83
  ids = @xref.keys.sort
84
84
  ids.each do |id|
85
- gen = @xref[id].keys.sort[-1]
86
- yield PDF::Reader::Reference.new(id, gen)
85
+ gen = @xref.fetch(id, {}).keys.sort[-1]
86
+ yield PDF::Reader::Reference.new(id, gen.to_i)
87
87
  end
88
88
  end
89
89
  ################################################################################
data/lib/pdf/reader.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -128,7 +128,7 @@ module PDF
128
128
  doc_strings_to_utf8(dict)
129
129
  end
130
130
 
131
- # Return a Hash with extra metadata provided by the author of the PDF file. Not
131
+ # Return a String with extra XML metadata provided by the author of the PDF file. Not
132
132
  # always present.
133
133
  #
134
134
  def metadata
@@ -182,7 +182,7 @@ module PDF
182
182
  #
183
183
  # reader.pages.each do |page|
184
184
  # puts page.fonts
185
- # puts page.images
185
+ # puts page.rectangles
186
186
  # puts page.text
187
187
  # end
188
188
  #
@@ -272,13 +272,7 @@ module PDF
272
272
  end
273
273
 
274
274
  def root
275
- @root ||= begin
276
- obj = @objects.deref_hash(@objects.trailer[:Root]) || {}
277
- unless obj.kind_of?(::Hash)
278
- raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
279
- end
280
- obj
281
- end
275
+ @root ||= @objects.deref_hash(@objects.trailer[:Root]) || {}
282
276
  end
283
277
 
284
278
  end
@@ -315,6 +309,7 @@ require 'pdf/reader/print_receiver'
315
309
  require 'pdf/reader/rectangle'
316
310
  require 'pdf/reader/reference'
317
311
  require 'pdf/reader/register_receiver'
312
+ require 'pdf/reader/no_text_filter'
318
313
  require 'pdf/reader/null_security_handler'
319
314
  require 'pdf/reader/security_handler_factory'
320
315
  require 'pdf/reader/standard_key_builder'