pdf-reader 2.9.2 → 2.11.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +8 -0
  3. data/Rakefile +1 -1
  4. data/lib/pdf/reader/buffer.rb +1 -1
  5. data/lib/pdf/reader/cid_widths.rb +7 -5
  6. data/lib/pdf/reader/cmap.rb +1 -1
  7. data/lib/pdf/reader/encoding.rb +4 -4
  8. data/lib/pdf/reader/error.rb +0 -4
  9. data/lib/pdf/reader/filter/depredict.rb +2 -2
  10. data/lib/pdf/reader/font.rb +10 -11
  11. data/lib/pdf/reader/font_descriptor.rb +3 -1
  12. data/lib/pdf/reader/form_xobject.rb +4 -1
  13. data/lib/pdf/reader/glyph_hash.rb +13 -5
  14. data/lib/pdf/reader/lzw.rb +25 -10
  15. data/lib/pdf/reader/no_text_filter.rb +14 -0
  16. data/lib/pdf/reader/object_hash.rb +15 -9
  17. data/lib/pdf/reader/object_stream.rb +3 -3
  18. data/lib/pdf/reader/overlapping_runs_filter.rb +1 -1
  19. data/lib/pdf/reader/page.rb +26 -7
  20. data/lib/pdf/reader/page_layout.rb +1 -1
  21. data/lib/pdf/reader/page_state.rb +7 -1
  22. data/lib/pdf/reader/page_text_receiver.rb +2 -0
  23. data/lib/pdf/reader/pages_strategy.rb +1 -1
  24. data/lib/pdf/reader/reference.rb +3 -2
  25. data/lib/pdf/reader/resources.rb +3 -2
  26. data/lib/pdf/reader/stream.rb +1 -1
  27. data/lib/pdf/reader/synchronized_cache.rb +1 -1
  28. data/lib/pdf/reader/text_run.rb +5 -2
  29. data/lib/pdf/reader/transformation_matrix.rb +8 -8
  30. data/lib/pdf/reader/type_check.rb +46 -0
  31. data/lib/pdf/reader/width_calculator/built_in.rb +4 -3
  32. data/lib/pdf/reader/width_calculator/composite.rb +6 -2
  33. data/lib/pdf/reader/width_calculator/true_type.rb +10 -12
  34. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +6 -5
  35. data/lib/pdf/reader/width_calculator/type_zero.rb +6 -3
  36. data/lib/pdf/reader/xref.rb +3 -3
  37. data/lib/pdf/reader.rb +5 -10
  38. data/rbi/pdf-reader.rbi +428 -358
  39. metadata +6 -5
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -51,7 +51,7 @@ class PDF::Reader
51
51
  # displacement to speed up processing documents that use vertical
52
52
  # writing systems
53
53
  #
54
- def multiply!(a,b=nil,c=nil, d=nil,e=nil,f=nil)
54
+ def multiply!(a,b,c, d,e,f)
55
55
  if a == 1 && b == 0 && c == 0 && d == 1 && e == 0 && f == 0
56
56
  # the identity matrix, no effect
57
57
  self
@@ -164,12 +164,12 @@ class PDF::Reader
164
164
  # [ e f 1 ] [ e f 1 ]
165
165
  #
166
166
  def regular_multiply!(a2,b2,c2,d2,e2,f2)
167
- newa = (@a * a2) + (@b * c2) + (0 * e2)
168
- newb = (@a * b2) + (@b * d2) + (0 * f2)
169
- newc = (@c * a2) + (@d * c2) + (0 * e2)
170
- newd = (@c * b2) + (@d * d2) + (0 * f2)
171
- newe = (@e * a2) + (@f * c2) + (1 * e2)
172
- newf = (@e * b2) + (@f * d2) + (1 * f2)
167
+ newa = (@a * a2) + (@b * c2) + (e2 * 0)
168
+ newb = (@a * b2) + (@b * d2) + (f2 * 0)
169
+ newc = (@c * a2) + (@d * c2) + (e2 * 0)
170
+ newd = (@c * b2) + (@d * d2) + (f2 * 0)
171
+ newe = (@e * a2) + (@f * c2) + (e2 * 1)
172
+ newf = (@e * b2) + (@f * d2) + (f2 * 1)
173
173
  @a, @b, @c, @d, @e, @f = newa, newb, newc, newd, newe, newf
174
174
  end
175
175
 
@@ -9,6 +9,18 @@ module PDF
9
9
  #
10
10
  class TypeCheck
11
11
 
12
+ def self.cast_to_int!(obj)
13
+ if obj.is_a?(Integer)
14
+ obj
15
+ elsif obj.nil?
16
+ 0
17
+ elsif obj.respond_to?(:to_i)
18
+ obj.to_i
19
+ else
20
+ raise MalformedPDFError, "Unable to cast to integer"
21
+ end
22
+ end
23
+
12
24
  def self.cast_to_numeric!(obj)
13
25
  if obj.is_a?(Numeric)
14
26
  obj
@@ -46,6 +58,40 @@ module PDF
46
58
  raise MalformedPDFError, "Unable to cast to symbol"
47
59
  end
48
60
  end
61
+
62
+ def self.cast_to_symbol!(obj)
63
+ res = cast_to_symbol(obj)
64
+ if res
65
+ res
66
+ else
67
+ raise MalformedPDFError, "Unable to cast to symbol"
68
+ end
69
+ end
70
+
71
+ def self.cast_to_pdf_dict!(obj)
72
+ if obj.is_a?(Hash)
73
+ obj
74
+ elsif obj.respond_to?(:to_h)
75
+ obj.to_h
76
+ else
77
+ raise MalformedPDFError, "Unable to cast to hash"
78
+ end
79
+ end
80
+
81
+ def self.cast_to_pdf_dict_with_stream_values!(obj)
82
+ if obj.is_a?(Hash)
83
+ result = Hash.new
84
+ obj.each do |k, v|
85
+ raise MalformedPDFError, "Expected a stream" unless v.is_a?(PDF::Reader::Stream)
86
+ result[cast_to_symbol!(k)] = v
87
+ end
88
+ result
89
+ elsif obj.respond_to?(:to_h)
90
+ cast_to_pdf_dict_with_stream_values!(obj.to_h)
91
+ else
92
+ raise MalformedPDFError, "Unable to cast to hash"
93
+ end
94
+ end
49
95
  end
50
96
  end
51
97
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  require 'afm'
@@ -53,12 +53,13 @@ class PDF::Reader
53
53
  private
54
54
 
55
55
  def control_character?(code_point)
56
- @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
56
+ match = @font.encoding.int_to_name(code_point).first.to_s[/\Acontrol..\Z/]
57
+ match ? true : false
57
58
  end
58
59
 
59
60
  def extract_basefont(font_name)
60
61
  if BUILTINS.include?(font_name)
61
- font_name
62
+ font_name.to_s
62
63
  else
63
64
  "Times-Roman"
64
65
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -22,7 +22,11 @@ class PDF::Reader
22
22
 
23
23
  w = @widths[code_point]
24
24
  # 0 is a valid width
25
- return w.to_f unless w.nil?
25
+ if w
26
+ w.to_f
27
+ else
28
+ 0
29
+ end
26
30
  end
27
31
  end
28
32
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -10,8 +10,8 @@ class PDF::Reader
10
10
  def initialize(font)
11
11
  @font = font
12
12
 
13
- if @font.font_descriptor
14
- @missing_width = @font.font_descriptor.missing_width
13
+ if fd = @font.font_descriptor
14
+ @missing_width = fd.missing_width
15
15
  else
16
16
  @missing_width = 0
17
17
  end
@@ -30,25 +30,23 @@ class PDF::Reader
30
30
 
31
31
  # in ruby a negative index is valid, and will go from the end of the array
32
32
  # which is undesireable in this case.
33
- if @font.first_char && @font.first_char <= code_point
34
- @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
33
+ first_char = @font.first_char
34
+ if first_char && first_char <= code_point
35
+ @font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
35
36
  else
36
37
  @missing_width.to_f
37
38
  end
38
39
  end
39
40
 
40
41
  def glyph_width_from_descriptor(code_point)
41
- return unless @font.font_descriptor
42
-
43
42
  # true type fonts will have most of their information contained
44
43
  # with-in a program inside the font descriptor, however the widths
45
44
  # may not be in standard PDF glyph widths (1000 units => 1 text space unit)
46
45
  # so this width will need to be scaled
47
- w = @font.font_descriptor.glyph_width(code_point)
48
- if w
49
- w.to_f * @font.font_descriptor.glyph_to_pdf_scale_factor
50
- else
51
- nil
46
+ if fd = @font.font_descriptor
47
+ if w = fd.glyph_width(code_point)
48
+ w.to_f * fd.glyph_to_pdf_scale_factor.to_f
49
+ end
52
50
  end
53
51
  end
54
52
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -10,8 +10,8 @@ class PDF::Reader
10
10
  def initialize(font)
11
11
  @font = font
12
12
 
13
- if @font.font_descriptor
14
- @missing_width = @font.font_descriptor.missing_width
13
+ if fd = @font.font_descriptor
14
+ @missing_width = fd.missing_width
15
15
  else
16
16
  @missing_width = 0
17
17
  end
@@ -23,8 +23,9 @@ class PDF::Reader
23
23
 
24
24
  # in ruby a negative index is valid, and will go from the end of the array
25
25
  # which is undesireable in this case.
26
- if @font.first_char <= code_point
27
- @font.widths.fetch(code_point - @font.first_char, @missing_width).to_f
26
+ first_char = @font.first_char
27
+ if first_char && first_char <= code_point
28
+ @font.widths.fetch(code_point - first_char, @missing_width.to_i).to_f
28
29
  else
29
30
  @missing_width.to_f
30
31
  end
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  class PDF::Reader
@@ -13,13 +13,16 @@ class PDF::Reader
13
13
 
14
14
  def initialize(font)
15
15
  @font = font
16
- @descendant_font = @font.descendantfonts.first
17
16
  end
18
17
 
19
18
  def glyph_width(code_point)
20
19
  return 0 if code_point.nil? || code_point < 0
21
20
 
22
- @descendant_font.glyph_width(code_point).to_f
21
+ if descendant_font = @font.descendantfonts.first
22
+ descendant_font.glyph_width(code_point).to_f
23
+ else
24
+ 0
25
+ end
23
26
  end
24
27
  end
25
28
  end
@@ -73,7 +73,7 @@ class PDF::Reader
73
73
  #
74
74
  # ref - a PDF::Reader::Reference object containing an object ID and revision number
75
75
  def [](ref)
76
- @xref[ref.id][ref.gen]
76
+ @xref.fetch(ref.id, {}).fetch(ref.gen)
77
77
  rescue
78
78
  raise InvalidObjectError, "Object #{ref.id}, Generation #{ref.gen} is invalid"
79
79
  end
@@ -82,8 +82,8 @@ class PDF::Reader
82
82
  def each(&block)
83
83
  ids = @xref.keys.sort
84
84
  ids.each do |id|
85
- gen = @xref[id].keys.sort[-1]
86
- yield PDF::Reader::Reference.new(id, gen)
85
+ gen = @xref.fetch(id, {}).keys.sort[-1]
86
+ yield PDF::Reader::Reference.new(id, gen.to_i)
87
87
  end
88
88
  end
89
89
  ################################################################################
data/lib/pdf/reader.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -128,7 +128,7 @@ module PDF
128
128
  doc_strings_to_utf8(dict)
129
129
  end
130
130
 
131
- # Return a Hash with extra metadata provided by the author of the PDF file. Not
131
+ # Return a String with extra XML metadata provided by the author of the PDF file. Not
132
132
  # always present.
133
133
  #
134
134
  def metadata
@@ -182,7 +182,7 @@ module PDF
182
182
  #
183
183
  # reader.pages.each do |page|
184
184
  # puts page.fonts
185
- # puts page.images
185
+ # puts page.rectangles
186
186
  # puts page.text
187
187
  # end
188
188
  #
@@ -272,13 +272,7 @@ module PDF
272
272
  end
273
273
 
274
274
  def root
275
- @root ||= begin
276
- obj = @objects.deref_hash(@objects.trailer[:Root]) || {}
277
- unless obj.kind_of?(::Hash)
278
- raise MalformedPDFError, "PDF malformed, trailer Root should be a dictionary"
279
- end
280
- obj
281
- end
275
+ @root ||= @objects.deref_hash(@objects.trailer[:Root]) || {}
282
276
  end
283
277
 
284
278
  end
@@ -315,6 +309,7 @@ require 'pdf/reader/print_receiver'
315
309
  require 'pdf/reader/rectangle'
316
310
  require 'pdf/reader/reference'
317
311
  require 'pdf/reader/register_receiver'
312
+ require 'pdf/reader/no_text_filter'
318
313
  require 'pdf/reader/null_security_handler'
319
314
  require 'pdf/reader/security_handler_factory'
320
315
  require 'pdf/reader/standard_key_builder'