pdf-reader 2.14.0 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +22 -0
  3. data/lib/pdf/reader/advanced_text_run_filter.rb +17 -2
  4. data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
  5. data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
  6. data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
  7. data/lib/pdf/reader/buffer.rb +35 -17
  8. data/lib/pdf/reader/cid_widths.rb +7 -1
  9. data/lib/pdf/reader/cmap.rb +14 -3
  10. data/lib/pdf/reader/encoding.rb +37 -12
  11. data/lib/pdf/reader/error.rb +6 -0
  12. data/lib/pdf/reader/filter/ascii85.rb +2 -0
  13. data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
  14. data/lib/pdf/reader/filter/depredict.rb +4 -0
  15. data/lib/pdf/reader/filter/flate.rb +5 -2
  16. data/lib/pdf/reader/filter/lzw.rb +2 -0
  17. data/lib/pdf/reader/filter/null.rb +2 -0
  18. data/lib/pdf/reader/filter/run_length.rb +2 -0
  19. data/lib/pdf/reader/filter.rb +1 -0
  20. data/lib/pdf/reader/font.rb +90 -22
  21. data/lib/pdf/reader/font_descriptor.rb +76 -23
  22. data/lib/pdf/reader/form_xobject.rb +11 -0
  23. data/lib/pdf/reader/glyph_hash.rb +34 -9
  24. data/lib/pdf/reader/key_builder_v5.rb +17 -9
  25. data/lib/pdf/reader/lzw.rb +17 -6
  26. data/lib/pdf/reader/no_text_filter.rb +1 -0
  27. data/lib/pdf/reader/null_security_handler.rb +1 -0
  28. data/lib/pdf/reader/object_cache.rb +7 -2
  29. data/lib/pdf/reader/object_hash.rb +116 -9
  30. data/lib/pdf/reader/object_stream.rb +19 -2
  31. data/lib/pdf/reader/overlapping_runs_filter.rb +7 -1
  32. data/lib/pdf/reader/page.rb +41 -7
  33. data/lib/pdf/reader/page_layout.rb +25 -8
  34. data/lib/pdf/reader/page_state.rb +5 -2
  35. data/lib/pdf/reader/page_text_receiver.rb +6 -2
  36. data/lib/pdf/reader/pages_strategy.rb +1 -1
  37. data/lib/pdf/reader/parser.rb +51 -10
  38. data/lib/pdf/reader/point.rb +9 -2
  39. data/lib/pdf/reader/print_receiver.rb +2 -6
  40. data/lib/pdf/reader/rc4_security_handler.rb +2 -0
  41. data/lib/pdf/reader/rectangle.rb +24 -1
  42. data/lib/pdf/reader/reference.rb +10 -1
  43. data/lib/pdf/reader/register_receiver.rb +15 -2
  44. data/lib/pdf/reader/resources.rb +9 -0
  45. data/lib/pdf/reader/security_handler_factory.rb +13 -0
  46. data/lib/pdf/reader/standard_key_builder.rb +37 -23
  47. data/lib/pdf/reader/stream.rb +9 -3
  48. data/lib/pdf/reader/synchronized_cache.rb +5 -2
  49. data/lib/pdf/reader/text_run.rb +28 -1
  50. data/lib/pdf/reader/token.rb +1 -0
  51. data/lib/pdf/reader/transformation_matrix.rb +33 -2
  52. data/lib/pdf/reader/type_check.rb +10 -3
  53. data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
  54. data/lib/pdf/reader/validating_receiver.rb +29 -0
  55. data/lib/pdf/reader/width_calculator/built_in.rb +10 -3
  56. data/lib/pdf/reader/width_calculator/composite.rb +5 -1
  57. data/lib/pdf/reader/width_calculator/true_type.rb +5 -1
  58. data/lib/pdf/reader/width_calculator/type_one_or_three.rb +3 -1
  59. data/lib/pdf/reader/width_calculator/type_zero.rb +2 -0
  60. data/lib/pdf/reader/xref.rb +28 -7
  61. data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
  62. data/lib/pdf/reader.rb +18 -2
  63. data/rbi/pdf-reader.rbi +1502 -1594
  64. metadata +17 -11
@@ -32,25 +32,31 @@ class PDF::Reader
32
32
  # convert strings of various PDF-dialect encodings into UTF-8.
33
33
  class Encoding # :nodoc:
34
34
  CONTROL_CHARS = [0,1,2,3,4,5,6,7,8,11,12,14,15,16,17,18,19,20,21,22,23,
35
- 24,25,26,27,28,29,30,31]
36
- UNKNOWN_CHAR = 0x25AF # ▯
35
+ 24,25,26,27,28,29,30,31] #: Array[Integer]
36
+ UNKNOWN_CHAR = 0x25AF #: Integer # ▯
37
37
 
38
+ #: String
38
39
  attr_reader :unpack
39
40
 
41
+ #: (Hash[Symbol, untyped] | Symbol | nil) -> void
40
42
  def initialize(enc)
41
- @mapping = default_mapping # maps from character codes to Unicode codepoints
42
- @string_cache = {} # maps from character codes to UTF-8 strings.
43
+ # maps from character codes to Unicode codepoints
44
+ @mapping = default_mapping #: Hash[Integer, Integer]
43
45
 
44
- @enc_name = if enc.kind_of?(Hash)
45
- enc[:Encoding] || enc[:BaseEncoding]
46
+ # maps from character codes to UTF-8 strings.
47
+ @string_cache = {} #: Hash[Integer, String]
48
+
49
+ @enc_name = :StandardEncoding #: Symbol
50
+ if enc.kind_of?(Hash)
51
+ @enc_name = enc[:Encoding] || enc[:BaseEncoding]
46
52
  elsif enc && enc.respond_to?(:to_sym)
47
- enc.to_sym
48
- else
49
- :StandardEncoding
53
+ @enc_name = enc.to_sym
50
54
  end
51
55
 
52
- @unpack = get_unpack(@enc_name)
53
- @map_file = get_mapping_file(@enc_name)
56
+ @unpack = get_unpack(@enc_name) #: String
57
+ @map_file = get_mapping_file(@enc_name) #: String | nil
58
+ @differences = nil #: Hash[Integer, Integer] | nil
59
+ @glyphlist = nil #: PDF::Reader::GlyphHash | nil
54
60
 
55
61
  load_mapping(@map_file) if @map_file
56
62
 
@@ -68,6 +74,7 @@ class PDF::Reader
68
74
  # To save space the following array is also valid and equivalent to the previous one
69
75
  #
70
76
  # [25, :A, :B]
77
+ #: (Array[Integer | Symbol]) -> Hash[Integer, Integer]
71
78
  def differences=(diff)
72
79
  PDF::Reader::Error.validate_type(diff, "diff", Array)
73
80
 
@@ -85,6 +92,7 @@ class PDF::Reader
85
92
  @differences
86
93
  end
87
94
 
95
+ #: () -> Hash[Integer, Integer]
88
96
  def differences
89
97
  # this method is only used by the spec tests
90
98
  @differences ||= {}
@@ -101,6 +109,7 @@ class PDF::Reader
101
109
  # * pack the final array of Unicode codepoints into a utf-8 string
102
110
  # * mark the string as utf-8 if we're running on a M17N aware VM
103
111
  #
112
+ #: (String) -> String
104
113
  def to_utf8(str)
105
114
  if utf8_conversion_impossible?
106
115
  little_boxes(str.unpack(unpack).size)
@@ -109,6 +118,7 @@ class PDF::Reader
109
118
  end
110
119
  end
111
120
 
121
+ #: (Integer) -> String
112
122
  def int_to_utf8_string(glyph_code)
113
123
  @string_cache[glyph_code] ||= internal_int_to_utf8_string(glyph_code)
114
124
  end
@@ -118,13 +128,19 @@ class PDF::Reader
118
128
  # int_to_name(65)
119
129
  # => [:A]
120
130
  #
131
+ #: (Integer) -> Array[Symbol]
121
132
  def int_to_name(glyph_code)
122
133
  if @enc_name == :"Identity-H" || @enc_name == :"Identity-V"
123
134
  []
124
135
  elsif differences[glyph_code]
125
136
  [differences[glyph_code]]
126
137
  elsif @mapping[glyph_code]
127
- glyphlist.unicode_to_name(@mapping[glyph_code])
138
+ val = @mapping[glyph_code]
139
+ if val
140
+ glyphlist.unicode_to_name(val)
141
+ else
142
+ []
143
+ end
128
144
  else
129
145
  []
130
146
  end
@@ -137,6 +153,7 @@ class PDF::Reader
137
153
  # - leaves all other bytes <= 255 unchaged
138
154
  #
139
155
  # Each specific encoding will change this default as required for their glyphs
156
+ #: () -> Hash[Integer, Integer]
140
157
  def default_mapping
141
158
  all_bytes = (0..255).to_a
142
159
  tuples = all_bytes.map {|i|
@@ -146,6 +163,7 @@ class PDF::Reader
146
163
  mapping
147
164
  end
148
165
 
166
+ #: (Integer) -> String
149
167
  def internal_int_to_utf8_string(glyph_code)
150
168
  ret = [
151
169
  @mapping[glyph_code.to_i] || glyph_code.to_i
@@ -154,10 +172,12 @@ class PDF::Reader
154
172
  ret
155
173
  end
156
174
 
175
+ #: () -> bool
157
176
  def utf8_conversion_impossible?
158
177
  @enc_name == :"Identity-H" || @enc_name == :"Identity-V"
159
178
  end
160
179
 
180
+ #: (Integer) -> String
161
181
  def little_boxes(times)
162
182
  codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
163
183
  ret = codepoints.pack("U*")
@@ -165,12 +185,14 @@ class PDF::Reader
165
185
  ret
166
186
  end
167
187
 
188
+ #: (String) -> String
168
189
  def convert_to_utf8(str)
169
190
  ret = str.unpack(unpack).map! { |c| @mapping[c.to_i] || c }.pack("U*")
170
191
  ret.force_encoding("UTF-8")
171
192
  ret
172
193
  end
173
194
 
195
+ #: (Symbol) -> String
174
196
  def get_unpack(enc)
175
197
  case enc
176
198
  when :"Identity-H", :"Identity-V", :UTF16Encoding
@@ -180,6 +202,7 @@ class PDF::Reader
180
202
  end
181
203
  end
182
204
 
205
+ #: (Symbol) -> String?
183
206
  def get_mapping_file(enc)
184
207
  case enc
185
208
  when :"Identity-H", :"Identity-V", :UTF16Encoding then
@@ -201,10 +224,12 @@ class PDF::Reader
201
224
  end
202
225
  end
203
226
 
227
+ #: () -> PDF::Reader::GlyphHash
204
228
  def glyphlist
205
229
  @glyphlist ||= PDF::Reader::GlyphHash.new
206
230
  end
207
231
 
232
+ #: (String) -> void
208
233
  def load_mapping(file)
209
234
  File.open(file, "r:BINARY") do |f|
210
235
  f.each do |l|
@@ -31,30 +31,36 @@ class PDF::Reader
31
31
  # are valid
32
32
  class Error # :nodoc:
33
33
  ################################################################################
34
+ #: (untyped, untyped, ?untyped) -> untyped
34
35
  def self.str_assert(lvalue, rvalue, chars=nil)
35
36
  raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
36
37
  lvalue = lvalue[0,chars] if chars
37
38
  raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
38
39
  end
39
40
  ################################################################################
41
+ #: (untyped, untyped, ?untyped) -> untyped
40
42
  def self.str_assert_not(lvalue, rvalue, chars=nil)
41
43
  raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
42
44
  lvalue = lvalue[0,chars] if chars
43
45
  raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue == rvalue
44
46
  end
45
47
  ################################################################################
48
+ #: (untyped, untyped) -> untyped
46
49
  def self.assert_equal(lvalue, rvalue)
47
50
  raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
48
51
  end
49
52
  ################################################################################
53
+ #: (Object, String, Module) -> void
50
54
  def self.validate_type(object, name, klass)
51
55
  raise ArgumentError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
52
56
  end
53
57
  ################################################################################
58
+ #: (Object, String, Module) -> void
54
59
  def self.validate_type_as_malformed(object, name, klass)
55
60
  raise MalformedPDFError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
56
61
  end
57
62
  ################################################################################
63
+ #: (Object, String) -> void
58
64
  def self.validate_not_nil(object, name)
59
65
  raise ArgumentError, "#{object} must not be nil" if object.nil?
60
66
  end
@@ -9,6 +9,7 @@ class PDF::Reader
9
9
  # implementation of the Ascii85 filter
10
10
  class Ascii85
11
11
 
12
+ #: (?Hash[untyped, untyped]) -> void
12
13
  def initialize(options = {})
13
14
  @options = options
14
15
  end
@@ -17,6 +18,7 @@ class PDF::Reader
17
18
  # Decode the specified data using the Ascii85 algorithm. Relies on the AScii85
18
19
  # rubygem.
19
20
  #
21
+ #: (String) -> String
20
22
  def filter(data)
21
23
  data = "<~#{data}" unless data.to_s[0,2] == "<~"
22
24
  if defined?(::Ascii85Native)
@@ -8,6 +8,7 @@ class PDF::Reader
8
8
  # implementation of the AsciiHex stream filter
9
9
  class AsciiHex
10
10
 
11
+ #: (?Hash[untyped, untyped]) -> void
11
12
  def initialize(options = {})
12
13
  @options = options
13
14
  end
@@ -15,6 +16,7 @@ class PDF::Reader
15
16
  ################################################################################
16
17
  # Decode the specified data using the AsciiHex algorithm.
17
18
  #
19
+ #: (String) -> String
18
20
  def filter(data)
19
21
  data.chop! if data[-1,1] == ">"
20
22
  data = data[1,data.size] if data[0,1] == "<"
@@ -8,6 +8,7 @@ class PDF::Reader
8
8
  # improve compression
9
9
  class Depredict
10
10
 
11
+ #: (?Hash[untyped, untyped]) -> void
11
12
  def initialize(options = {})
12
13
  @options = options
13
14
  end
@@ -16,6 +17,7 @@ class PDF::Reader
16
17
  # Streams can be preprocessed to improve compression. This reverses the
17
18
  # preprocessing
18
19
  #
20
+ #: (String) -> String
19
21
  def filter(data)
20
22
  predictor = @options[:Predictor].to_i
21
23
 
@@ -34,6 +36,7 @@ class PDF::Reader
34
36
  private
35
37
 
36
38
  ################################################################################
39
+ #: (untyped) -> String
37
40
  def tiff_depredict(data)
38
41
  data = data.unpack("C*")
39
42
  unfiltered = ''
@@ -60,6 +63,7 @@ class PDF::Reader
60
63
  unfiltered
61
64
  end
62
65
  ################################################################################
66
+ #: (untyped) -> String
63
67
  def png_depredict(data)
64
68
  return data if @options[:Predictor].to_i < 10
65
69
 
@@ -10,15 +10,17 @@ class PDF::Reader
10
10
  # implementation of the Flate (zlib) stream filter
11
11
  class Flate
12
12
 
13
- ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 # Zlib::MAX_WBITS + 32
14
- ZLIB_RAW_DEFLATE = -15 # Zlib::MAX_WBITS * -1
13
+ ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 #: Integer # Zlib::MAX_WBITS + 32
14
+ ZLIB_RAW_DEFLATE = -15 #: Integer # Zlib::MAX_WBITS * -1
15
15
 
16
+ #: (?Hash[untyped, untyped]) -> void
16
17
  def initialize(options = {})
17
18
  @options = options
18
19
  end
19
20
 
20
21
  ################################################################################
21
22
  # Decode the specified data with the Zlib compression algorithm
23
+ #: (String) -> String
22
24
  def filter(data)
23
25
  deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
24
26
 
@@ -31,6 +33,7 @@ class PDF::Reader
31
33
 
32
34
  private
33
35
 
36
+ #: (untyped) -> untyped
34
37
  def zlib_inflate(data)
35
38
  begin
36
39
  return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)
@@ -8,12 +8,14 @@ class PDF::Reader
8
8
  # implementation of the LZW stream filter
9
9
  class Lzw
10
10
 
11
+ #: (?Hash[untyped, untyped]) -> void
11
12
  def initialize(options = {})
12
13
  @options = options
13
14
  end
14
15
 
15
16
  ################################################################################
16
17
  # Decode the specified data with the LZW compression algorithm
18
+ #: (String) -> String
17
19
  def filter(data)
18
20
  data = PDF::Reader::LZW.decode(data)
19
21
  Depredict.new(@options).filter(data)
@@ -6,10 +6,12 @@ class PDF::Reader
6
6
  module Filter # :nodoc:
7
7
  # implementation of the null stream filter
8
8
  class Null
9
+ #: (?Hash[untyped, untyped]) -> void
9
10
  def initialize(options = {})
10
11
  @options = options
11
12
  end
12
13
 
14
+ #: (String) -> String
13
15
  def filter(data)
14
16
  data
15
17
  end
@@ -8,12 +8,14 @@ class PDF::Reader # :nodoc:
8
8
  # implementation of the run length stream filter
9
9
  class RunLength
10
10
 
11
+ #: (?Hash[untyped, untyped]) -> void
11
12
  def initialize(options = {})
12
13
  @options = options
13
14
  end
14
15
 
15
16
  ################################################################################
16
17
  # Decode the specified data with the RunLengthDecode compression algorithm
18
+ #: (String) -> String
17
19
  def filter(data)
18
20
  pos = 0
19
21
  out = "".dup
@@ -41,6 +41,7 @@ class PDF::Reader
41
41
  # Filters that are only used to encode image data are accepted, but the data is
42
42
  # returned untouched. At this stage PDF::Reader has no need to decode images.
43
43
  #
44
+ #: (Symbol, ?Hash[untyped, untyped]) -> untyped
44
45
  def self.with(name, options = {})
45
46
  case name
46
47
  when :ASCII85Decode, :A85 then PDF::Reader::Filter::Ascii85.new(options)
@@ -1,5 +1,5 @@
1
1
  # coding: utf-8
2
- # typed: true
2
+ # typed: strict
3
3
  # frozen_string_literal: true
4
4
 
5
5
  ################################################################################
@@ -29,48 +29,99 @@
29
29
 
30
30
  require 'pdf/reader/width_calculator'
31
31
 
32
+
32
33
  class PDF::Reader
33
34
  # Represents a single font PDF object and provides some useful methods
34
35
  # for extracting info. Mainly used for converting text to UTF-8.
35
36
  #
36
37
  class Font
37
- attr_accessor :subtype, :encoding, :descendantfonts, :tounicode
38
- attr_reader :widths, :first_char, :last_char, :basefont, :font_descriptor,
39
- :cid_widths, :cid_default_width
38
+ #: type widthCalculator = (
39
+ #| PDF::Reader::WidthCalculator::TypeZero |
40
+ #| PDF::Reader::WidthCalculator::BuiltIn |
41
+ #| PDF::Reader::WidthCalculator::TypeOneOrThree |
42
+ #| PDF::Reader::WidthCalculator::TrueType |
43
+ #| PDF::Reader::WidthCalculator::Composite
44
+ #| )
45
+
46
+ #: Symbol?
47
+ attr_accessor :subtype
48
+
49
+ #: PDF::Reader::Encoding
50
+ attr_accessor :encoding
51
+
52
+ #: Array[PDF::Reader::Font]
53
+ attr_accessor :descendantfonts
54
+
55
+ #: PDF::Reader::CMap | nil
56
+ attr_accessor :tounicode
57
+
58
+ #: Array[Numeric]
59
+ attr_reader :widths
60
+
61
+ #: Integer?
62
+ attr_reader :first_char
40
63
 
64
+ #: Integer?
65
+ attr_reader :last_char
66
+
67
+ #: Symbol?
68
+ attr_reader :basefont
69
+
70
+ #: PDF::Reader::FontDescriptor?
71
+ attr_reader :font_descriptor
72
+
73
+ #: Array[Numeric]
74
+ attr_reader :cid_widths
75
+
76
+ #: Numeric
77
+ attr_reader :cid_default_width
78
+
79
+ #: (PDF::Reader::ObjectHash, Hash[Symbol, untyped]) -> void
41
80
  def initialize(ohash, obj)
42
81
  @ohash = ohash
43
- @tounicode = nil
82
+ @tounicode = nil #: PDF::Reader::CMap | nil
83
+ @descendantfonts = [] #: Array[PDF::Reader::Font]
84
+ @widths = [] #: Array[Numeric]
85
+ @first_char = nil #: Integer?
86
+ @last_char = nil #: Integer?
87
+ @basefont = nil #: Symbol?
88
+ @font_descriptor = nil #: PDF::Reader::FontDescriptor?
89
+ @cid_widths = [] #: Array[Numeric]
90
+ @cid_default_width = 0 #: Numeric
91
+ @encoding = PDF::Reader::Encoding.new(:StandardEncoding) #: PDF::Reader::Encoding
92
+ @cached_widths = {} #: Hash[Integer, Numeric]
93
+ @font_matrix = nil #: Array[Numeric] | nil
44
94
 
45
95
  extract_base_info(obj)
46
96
  extract_type3_info(obj)
47
97
  extract_descriptor(obj)
48
98
  extract_descendants(obj)
49
- @width_calc = build_width_calculator
50
-
51
- @encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
99
+ @width_calc = build_width_calculator #: widthCalculator
52
100
  end
53
101
 
102
+ #: (Integer | String | Array[Integer | String]) -> String
54
103
  def to_utf8(params)
55
104
  if @tounicode
56
- to_utf8_via_cmap(params)
105
+ to_utf8_via_cmap(params, @tounicode)
57
106
  else
58
107
  to_utf8_via_encoding(params)
59
108
  end
60
109
  end
61
110
 
111
+ #: (String) -> (Array[Integer | Float | String | nil] | nil)
62
112
  def unpack(data)
63
113
  data.unpack(encoding.unpack)
64
114
  end
65
115
 
66
116
  # looks up the specified codepoint and returns a value that is in (pdf)
67
117
  # glyph space, which is 1000 glyph units = 1 text space unit
118
+ #: (Integer | String) -> Numeric
68
119
  def glyph_width(code_point)
69
120
  if code_point.is_a?(String)
70
- code_point = code_point.unpack(encoding.unpack).first
121
+ code_point = unpack_string_to_array_of_ints(code_point, encoding.unpack).first
122
+ raise MalformedPDFError, "code point missing" if code_point.nil?
71
123
  end
72
124
 
73
- @cached_widths ||= {}
74
125
  @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
75
126
  end
76
127
 
@@ -78,6 +129,7 @@ class PDF::Reader
78
129
  #
79
130
  # However, Type3 fonts provide their own FontMatrix that's used for the transformation.
80
131
  #
132
+ #: (Integer | String) -> Numeric
81
133
  def glyph_width_in_text_space(code_point)
82
134
  glyph_width_in_glyph_space = glyph_width(code_point)
83
135
 
@@ -93,13 +145,14 @@ class PDF::Reader
93
145
  private
94
146
 
95
147
  # Only valid for Type3 fonts
148
+ #: (Numeric, Numeric) -> [Numeric, Numeric]
96
149
  def font_matrix_transform(x, y)
97
150
  return x, y if @font_matrix.nil?
98
151
 
99
152
  matrix = TransformationMatrix.new(
100
- @font_matrix[0], @font_matrix[1],
101
- @font_matrix[2], @font_matrix[3],
102
- @font_matrix[4], @font_matrix[5],
153
+ @font_matrix[0] || 0, @font_matrix[1] || 0,
154
+ @font_matrix[2] || 0, @font_matrix[3] || 0,
155
+ @font_matrix[4] || 0, @font_matrix[5] || 0,
103
156
  )
104
157
 
105
158
  if x == 0 && y == 0
@@ -112,6 +165,7 @@ class PDF::Reader
112
165
  end
113
166
  end
114
167
 
168
+ #: (Symbol | String | nil) -> PDF::Reader::Encoding
115
169
  def default_encoding(font_name)
116
170
  case font_name.to_s
117
171
  when "Symbol" then
@@ -123,6 +177,7 @@ class PDF::Reader
123
177
  end
124
178
  end
125
179
 
180
+ #: () -> widthCalculator
126
181
  def build_width_calculator
127
182
  if @subtype == :Type0
128
183
  PDF::Reader::WidthCalculator::TypeZero.new(self)
@@ -149,6 +204,7 @@ class PDF::Reader
149
204
  end
150
205
  end
151
206
 
207
+ #: (Hash[Symbol, untyped]) -> PDF::Reader::Encoding
152
208
  def build_encoding(obj)
153
209
  if obj[:Encoding].is_a?(Symbol)
154
210
  # one of the standard encodings, referenced by name
@@ -163,6 +219,7 @@ class PDF::Reader
163
219
  end
164
220
  end
165
221
 
222
+ #: (Hash[Symbol, untyped]) -> void
166
223
  def extract_base_info(obj)
167
224
  @subtype = @ohash.deref_name(obj[:Subtype])
168
225
  @basefont = @ohash.deref_name(obj[:BaseFont])
@@ -185,6 +242,7 @@ class PDF::Reader
185
242
  end
186
243
  end
187
244
 
245
+ #: (Hash[Symbol, untyped]) -> void
188
246
  def extract_type3_info(obj)
189
247
  if @subtype == :Type3
190
248
  @font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
@@ -193,46 +251,50 @@ class PDF::Reader
193
251
  end
194
252
  end
195
253
 
254
+ #: (Hash[Symbol, untyped]) -> void
196
255
  def extract_descriptor(obj)
197
256
  if obj[:FontDescriptor]
198
257
  # create a font descriptor object if we can, in other words, unless this is
199
258
  # a CID Font
200
- fd = @ohash.deref_hash(obj[:FontDescriptor])
259
+ fd = @ohash.deref_hash(obj[:FontDescriptor]) || {}
201
260
  @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
202
261
  else
203
262
  @font_descriptor = nil
204
263
  end
205
264
  end
206
265
 
266
+ #: (Hash[Symbol, untyped]) -> void
207
267
  def extract_descendants(obj)
208
268
  # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
209
269
  # A one-element array specifying the CIDFont dictionary that is the
210
270
  # descendant of this Type 0 font.
211
271
  if obj[:DescendantFonts]
212
- descendants = @ohash.deref_array(obj[:DescendantFonts])
272
+ descendants = @ohash.deref_array(obj[:DescendantFonts]) || []
213
273
  @descendantfonts = descendants.map { |desc|
214
- PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
274
+ PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc) || {})
215
275
  }
216
276
  else
217
277
  @descendantfonts = []
218
278
  end
219
279
  end
220
280
 
221
- def to_utf8_via_cmap(params)
281
+ #: (Integer | String | Array[Integer | String], PDF::Reader::CMap) -> String
282
+ def to_utf8_via_cmap(params, cmap)
222
283
  case params
223
284
  when Integer
224
285
  [
225
- @tounicode.decode(params) || PDF::Reader::Encoding::UNKNOWN_CHAR
286
+ cmap.decode(params)
226
287
  ].flatten.pack("U*")
227
288
  when String
228
- params.unpack(encoding.unpack).map { |c|
229
- @tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
289
+ unpack_string_to_array_of_ints(params, encoding.unpack).map { |code_point|
290
+ cmap.decode(code_point)
230
291
  }.flatten.pack("U*")
231
292
  when Array
232
- params.collect { |param| to_utf8_via_cmap(param) }.join("")
293
+ params.collect { |param| to_utf8_via_cmap(param, cmap) }.join("")
233
294
  end
234
295
  end
235
296
 
297
+ #: (Integer | String | Array[Integer | String]) -> String
236
298
  def to_utf8_via_encoding(params)
237
299
  if encoding.kind_of?(String)
238
300
  raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported"
@@ -248,5 +310,11 @@ class PDF::Reader
248
310
  end
249
311
  end
250
312
 
313
+ #: (String, String) -> Array[Integer]
314
+ def unpack_string_to_array_of_ints(unpack_me, unpack_arg)
315
+ unpack_me.unpack(unpack_arg).map { |code_point|
316
+ code_point = TypeCheck.cast_to_int!(code_point)
317
+ }
318
+ end
251
319
  end
252
320
  end