RubyGems - pdf-reader - Versions diffs - 2.14.0 → 2.15.0 - Mend

pdf-reader 2.14.0 → 2.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

checksums.yaml +4 -4
data/CHANGELOG +22 -0
data/lib/pdf/reader/advanced_text_run_filter.rb +17 -2
data/lib/pdf/reader/aes_v2_security_handler.rb +30 -0
data/lib/pdf/reader/aes_v3_security_handler.rb +35 -3
data/lib/pdf/reader/bounding_rectangle_runs_filter.rb +1 -0
data/lib/pdf/reader/buffer.rb +35 -17
data/lib/pdf/reader/cid_widths.rb +7 -1
data/lib/pdf/reader/cmap.rb +14 -3
data/lib/pdf/reader/encoding.rb +37 -12
data/lib/pdf/reader/error.rb +6 -0
data/lib/pdf/reader/filter/ascii85.rb +2 -0
data/lib/pdf/reader/filter/ascii_hex.rb +2 -0
data/lib/pdf/reader/filter/depredict.rb +4 -0
data/lib/pdf/reader/filter/flate.rb +5 -2
data/lib/pdf/reader/filter/lzw.rb +2 -0
data/lib/pdf/reader/filter/null.rb +2 -0
data/lib/pdf/reader/filter/run_length.rb +2 -0
data/lib/pdf/reader/filter.rb +1 -0
data/lib/pdf/reader/font.rb +90 -22
data/lib/pdf/reader/font_descriptor.rb +76 -23
data/lib/pdf/reader/form_xobject.rb +11 -0
data/lib/pdf/reader/glyph_hash.rb +34 -9
data/lib/pdf/reader/key_builder_v5.rb +17 -9
data/lib/pdf/reader/lzw.rb +17 -6
data/lib/pdf/reader/no_text_filter.rb +1 -0
data/lib/pdf/reader/null_security_handler.rb +1 -0
data/lib/pdf/reader/object_cache.rb +7 -2
data/lib/pdf/reader/object_hash.rb +116 -9
data/lib/pdf/reader/object_stream.rb +19 -2
data/lib/pdf/reader/overlapping_runs_filter.rb +7 -1
data/lib/pdf/reader/page.rb +41 -7
data/lib/pdf/reader/page_layout.rb +25 -8
data/lib/pdf/reader/page_state.rb +5 -2
data/lib/pdf/reader/page_text_receiver.rb +6 -2
data/lib/pdf/reader/pages_strategy.rb +1 -1
data/lib/pdf/reader/parser.rb +51 -10
data/lib/pdf/reader/point.rb +9 -2
data/lib/pdf/reader/print_receiver.rb +2 -6
data/lib/pdf/reader/rc4_security_handler.rb +2 -0
data/lib/pdf/reader/rectangle.rb +24 -1
data/lib/pdf/reader/reference.rb +10 -1
data/lib/pdf/reader/register_receiver.rb +15 -2
data/lib/pdf/reader/resources.rb +9 -0
data/lib/pdf/reader/security_handler_factory.rb +13 -0
data/lib/pdf/reader/standard_key_builder.rb +37 -23
data/lib/pdf/reader/stream.rb +9 -3
data/lib/pdf/reader/synchronized_cache.rb +5 -2
data/lib/pdf/reader/text_run.rb +28 -1
data/lib/pdf/reader/token.rb +1 -0
data/lib/pdf/reader/transformation_matrix.rb +33 -2
data/lib/pdf/reader/type_check.rb +10 -3
data/lib/pdf/reader/unimplemented_security_handler.rb +2 -0
data/lib/pdf/reader/validating_receiver.rb +29 -0
data/lib/pdf/reader/width_calculator/built_in.rb +10 -3
data/lib/pdf/reader/width_calculator/composite.rb +5 -1
data/lib/pdf/reader/width_calculator/true_type.rb +5 -1
data/lib/pdf/reader/width_calculator/type_one_or_three.rb +3 -1
data/lib/pdf/reader/width_calculator/type_zero.rb +2 -0
data/lib/pdf/reader/xref.rb +28 -7
data/lib/pdf/reader/zero_width_runs_filter.rb +1 -0
data/lib/pdf/reader.rb +18 -2
data/rbi/pdf-reader.rbi +1502 -1594
metadata +17 -11

data/lib/pdf/reader/encoding.rb CHANGED Viewed

@@ -32,25 +32,31 @@ class PDF::Reader
   # convert strings of various PDF-dialect encodings into UTF-8.
   class Encoding # :nodoc:
     CONTROL_CHARS = [0,1,2,3,4,5,6,7,8,11,12,14,15,16,17,18,19,20,21,22,23,
-                     24,25,26,27,28,29,30,31]
-    UNKNOWN_CHAR = 0x25AF # ▯
+                     24,25,26,27,28,29,30,31] #: Array[Integer]
+    UNKNOWN_CHAR = 0x25AF #: Integer # ▯
+    #: String
     attr_reader :unpack
+    #: (Hash[Symbol, untyped] | Symbol | nil) -> void
     def initialize(enc)
-      @mapping  = default_mapping # maps from character codes to Unicode codepoints
-      @string_cache  = {} # maps from character codes to UTF-8 strings.
+      # maps from character codes to Unicode codepoints
+      @mapping  = default_mapping #: Hash[Integer, Integer]
-      @enc_name = if enc.kind_of?(Hash)
-        enc[:Encoding] || enc[:BaseEncoding]
+      # maps from character codes to UTF-8 strings.
+      @string_cache  = {} #: Hash[Integer, String]
+      @enc_name = :StandardEncoding #: Symbol
+      if enc.kind_of?(Hash)
+        @enc_name = enc[:Encoding] || enc[:BaseEncoding]
       elsif enc && enc.respond_to?(:to_sym)
-        enc.to_sym
-      else
-        :StandardEncoding
+        @enc_name = enc.to_sym
       end
-      @unpack   = get_unpack(@enc_name)
-      @map_file = get_mapping_file(@enc_name)
+      @unpack   = get_unpack(@enc_name) #: String
+      @map_file = get_mapping_file(@enc_name) #: String | nil
+      @differences = nil #: Hash[Integer, Integer] | nil
+      @glyphlist = nil #: PDF::Reader::GlyphHash | nil
       load_mapping(@map_file) if @map_file
@@ -68,6 +74,7 @@ class PDF::Reader
     # To save space the following array is also valid and equivalent to the previous one
     #
     #   [25, :A, :B]
+    #: (Array[Integer | Symbol]) -> Hash[Integer, Integer]
     def differences=(diff)
       PDF::Reader::Error.validate_type(diff, "diff", Array)
@@ -85,6 +92,7 @@ class PDF::Reader
       @differences
     end
+    #: () -> Hash[Integer, Integer]
     def differences
       # this method is only used by the spec tests
       @differences ||= {}
@@ -101,6 +109,7 @@ class PDF::Reader
     # * pack the final array of Unicode codepoints into a utf-8 string
     # * mark the string as utf-8 if we're running on a M17N aware VM
     #
+    #: (String) -> String
     def to_utf8(str)
       if utf8_conversion_impossible?
         little_boxes(str.unpack(unpack).size)
@@ -109,6 +118,7 @@ class PDF::Reader
       end
     end
+    #: (Integer) -> String
     def int_to_utf8_string(glyph_code)
       @string_cache[glyph_code] ||= internal_int_to_utf8_string(glyph_code)
     end
@@ -118,13 +128,19 @@ class PDF::Reader
     #     int_to_name(65)
     #     => [:A]
     #
+    #: (Integer) -> Array[Symbol]
     def int_to_name(glyph_code)
       if @enc_name == :"Identity-H" || @enc_name == :"Identity-V"
         []
       elsif differences[glyph_code]
         [differences[glyph_code]]
       elsif @mapping[glyph_code]
-        glyphlist.unicode_to_name(@mapping[glyph_code])
+        val = @mapping[glyph_code]
+        if val
+          glyphlist.unicode_to_name(val)
+        else
+          []
+        end
       else
         []
       end
@@ -137,6 +153,7 @@ class PDF::Reader
     # - leaves all other bytes <= 255 unchaged
     #
     # Each specific encoding will change this default as required for their glyphs
+    #: () -> Hash[Integer, Integer]
     def default_mapping
       all_bytes = (0..255).to_a
       tuples = all_bytes.map {|i|
@@ -146,6 +163,7 @@ class PDF::Reader
       mapping
     end
+    #: (Integer) -> String
     def internal_int_to_utf8_string(glyph_code)
       ret = [
         @mapping[glyph_code.to_i] || glyph_code.to_i
@@ -154,10 +172,12 @@ class PDF::Reader
       ret
     end
+    #: () -> bool
     def utf8_conversion_impossible?
       @enc_name == :"Identity-H" || @enc_name == :"Identity-V"
     end
+    #: (Integer) -> String
     def little_boxes(times)
       codepoints = [ PDF::Reader::Encoding::UNKNOWN_CHAR ] * times
       ret = codepoints.pack("U*")
@@ -165,12 +185,14 @@ class PDF::Reader
       ret
     end
+    #: (String) -> String
     def convert_to_utf8(str)
       ret = str.unpack(unpack).map! { |c| @mapping[c.to_i] || c }.pack("U*")
       ret.force_encoding("UTF-8")
       ret
     end
+    #: (Symbol) -> String
     def get_unpack(enc)
       case enc
       when :"Identity-H", :"Identity-V", :UTF16Encoding
@@ -180,6 +202,7 @@ class PDF::Reader
       end
     end
+    #: (Symbol) -> String?
     def get_mapping_file(enc)
       case enc
       when :"Identity-H", :"Identity-V", :UTF16Encoding then
@@ -201,10 +224,12 @@ class PDF::Reader
       end
     end
+    #: () -> PDF::Reader::GlyphHash
     def glyphlist
       @glyphlist ||= PDF::Reader::GlyphHash.new
     end
+    #: (String) -> void
     def load_mapping(file)
       File.open(file, "r:BINARY") do |f|
         f.each do |l|

data/lib/pdf/reader/error.rb CHANGED Viewed

@@ -31,30 +31,36 @@ class PDF::Reader
   # are valid
   class Error # :nodoc:
     ################################################################################
+    #: (untyped, untyped, ?untyped) -> untyped
     def self.str_assert(lvalue, rvalue, chars=nil)
       raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
       lvalue = lvalue[0,chars] if chars
       raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead"  if lvalue != rvalue
     end
     ################################################################################
+    #: (untyped, untyped, ?untyped) -> untyped
     def self.str_assert_not(lvalue, rvalue, chars=nil)
       raise MalformedPDFError, "PDF malformed, expected string but found #{lvalue.class} instead" if chars and !lvalue.kind_of?(String)
       lvalue = lvalue[0,chars] if chars
       raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead"  if lvalue == rvalue
     end
     ################################################################################
+    #: (untyped, untyped) -> untyped
     def self.assert_equal(lvalue, rvalue)
       raise MalformedPDFError, "PDF malformed, expected '#{rvalue}' but found '#{lvalue}' instead" if lvalue != rvalue
     end
     ################################################################################
+    #: (Object, String, Module) -> void
     def self.validate_type(object, name, klass)
       raise ArgumentError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
     end
     ################################################################################
+    #: (Object, String, Module) -> void
     def self.validate_type_as_malformed(object, name, klass)
       raise MalformedPDFError, "#{name} (#{object}) must be a #{klass}" unless object.is_a?(klass)
     end
     ################################################################################
+    #: (Object, String) -> void
     def self.validate_not_nil(object, name)
       raise ArgumentError, "#{object} must not be nil" if object.nil?
     end

data/lib/pdf/reader/filter/ascii85.rb CHANGED Viewed

@@ -9,6 +9,7 @@ class PDF::Reader
     # implementation of the Ascii85 filter
     class Ascii85
+      #: (?Hash[untyped, untyped]) -> void
       def initialize(options = {})
         @options = options
       end
@@ -17,6 +18,7 @@ class PDF::Reader
       # Decode the specified data using the Ascii85 algorithm. Relies on the AScii85
       # rubygem.
       #
+      #: (String) -> String
       def filter(data)
         data = "<~#{data}" unless data.to_s[0,2] == "<~"
         if defined?(::Ascii85Native)

data/lib/pdf/reader/filter/ascii_hex.rb CHANGED Viewed

@@ -8,6 +8,7 @@ class PDF::Reader
     # implementation of the AsciiHex stream filter
     class AsciiHex
+      #: (?Hash[untyped, untyped]) -> void
       def initialize(options = {})
         @options = options
       end
@@ -15,6 +16,7 @@ class PDF::Reader
       ################################################################################
       # Decode the specified data using the AsciiHex algorithm.
       #
+      #: (String) -> String
       def filter(data)
         data.chop! if data[-1,1] == ">"
         data = data[1,data.size] if data[0,1] == "<"

data/lib/pdf/reader/filter/depredict.rb CHANGED Viewed

@@ -8,6 +8,7 @@ class PDF::Reader
     # improve compression
     class Depredict
+      #: (?Hash[untyped, untyped]) -> void
       def initialize(options = {})
         @options = options
       end
@@ -16,6 +17,7 @@ class PDF::Reader
       # Streams can be preprocessed to improve compression. This reverses the
       # preprocessing
       #
+      #: (String) -> String
       def filter(data)
         predictor = @options[:Predictor].to_i
@@ -34,6 +36,7 @@ class PDF::Reader
       private
       ################################################################################
+      #: (untyped) -> String
       def tiff_depredict(data)
         data        = data.unpack("C*")
         unfiltered  = ''
@@ -60,6 +63,7 @@ class PDF::Reader
         unfiltered
       end
       ################################################################################
+      #: (untyped) -> String
       def png_depredict(data)
         return data if @options[:Predictor].to_i < 10

data/lib/pdf/reader/filter/flate.rb CHANGED Viewed

@@ -10,15 +10,17 @@ class PDF::Reader
     # implementation of the Flate (zlib) stream filter
     class Flate
-      ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47  # Zlib::MAX_WBITS + 32
-      ZLIB_RAW_DEFLATE              = -15 # Zlib::MAX_WBITS * -1
+      ZLIB_AUTO_DETECT_ZLIB_OR_GZIP = 47 #: Integer  # Zlib::MAX_WBITS + 32
+      ZLIB_RAW_DEFLATE              = -15 #: Integer # Zlib::MAX_WBITS * -1
+      #: (?Hash[untyped, untyped]) -> void
       def initialize(options = {})
         @options = options
       end
       ################################################################################
       # Decode the specified data with the Zlib compression algorithm
+      #: (String) -> String
       def filter(data)
         deflated = zlib_inflate(data) || zlib_inflate(data[0, data.bytesize-1])
@@ -31,6 +33,7 @@ class PDF::Reader
       private
+      #: (untyped) -> untyped
       def zlib_inflate(data)
         begin
           return Zlib::Inflate.new(ZLIB_AUTO_DETECT_ZLIB_OR_GZIP).inflate(data)

data/lib/pdf/reader/filter/lzw.rb CHANGED Viewed

@@ -8,12 +8,14 @@ class PDF::Reader
     # implementation of the LZW stream filter
     class Lzw
+      #: (?Hash[untyped, untyped]) -> void
       def initialize(options = {})
         @options = options
       end
       ################################################################################
       # Decode the specified data with the LZW compression algorithm
+      #: (String) -> String
       def filter(data)
         data = PDF::Reader::LZW.decode(data)
         Depredict.new(@options).filter(data)

data/lib/pdf/reader/filter/null.rb CHANGED Viewed

@@ -6,10 +6,12 @@ class PDF::Reader
   module Filter # :nodoc:
     # implementation of the null stream filter
     class Null
+      #: (?Hash[untyped, untyped]) -> void
       def initialize(options = {})
         @options = options
       end
+      #: (String) -> String
       def filter(data)
         data
       end

data/lib/pdf/reader/filter/run_length.rb CHANGED Viewed

@@ -8,12 +8,14 @@ class PDF::Reader # :nodoc:
     # implementation of the run length stream filter
     class RunLength
+      #: (?Hash[untyped, untyped]) -> void
       def initialize(options = {})
         @options = options
       end
       ################################################################################
       # Decode the specified data with the RunLengthDecode compression algorithm
+      #: (String) -> String
       def filter(data)
         pos = 0
         out = "".dup

data/lib/pdf/reader/filter.rb CHANGED Viewed

@@ -41,6 +41,7 @@ class PDF::Reader
     # Filters that are only used to encode image data are accepted, but the data is
     # returned untouched. At this stage PDF::Reader has no need to decode images.
     #
+    #: (Symbol, ?Hash[untyped, untyped]) -> untyped
     def self.with(name, options = {})
       case name
       when :ASCII85Decode, :A85   then PDF::Reader::Filter::Ascii85.new(options)

data/lib/pdf/reader/font.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # coding: utf-8
-# typed: true
+# typed: strict
 # frozen_string_literal: true
 ################################################################################
@@ -29,48 +29,99 @@
 require 'pdf/reader/width_calculator'
 class PDF::Reader
   # Represents a single font PDF object and provides some useful methods
   # for extracting info. Mainly used for converting text to UTF-8.
   #
   class Font
-    attr_accessor :subtype, :encoding, :descendantfonts, :tounicode
-    attr_reader :widths, :first_char, :last_char, :basefont, :font_descriptor,
-                :cid_widths, :cid_default_width
+    #: type widthCalculator = (
+    #|   PDF::Reader::WidthCalculator::TypeZero |
+    #|   PDF::Reader::WidthCalculator::BuiltIn |
+    #|   PDF::Reader::WidthCalculator::TypeOneOrThree |
+    #|   PDF::Reader::WidthCalculator::TrueType |
+    #|   PDF::Reader::WidthCalculator::Composite
+    #| )
+    #: Symbol?
+    attr_accessor :subtype
+    #: PDF::Reader::Encoding
+    attr_accessor :encoding
+    #: Array[PDF::Reader::Font]
+    attr_accessor :descendantfonts
+    #: PDF::Reader::CMap | nil
+    attr_accessor :tounicode
+    #: Array[Numeric]
+    attr_reader :widths
+    #: Integer?
+    attr_reader :first_char
+    #: Integer?
+    attr_reader :last_char
+    #: Symbol?
+    attr_reader :basefont
+    #: PDF::Reader::FontDescriptor?
+    attr_reader :font_descriptor
+    #: Array[Numeric]
+    attr_reader :cid_widths
+    #: Numeric
+    attr_reader :cid_default_width
+    #: (PDF::Reader::ObjectHash, Hash[Symbol, untyped]) -> void
     def initialize(ohash, obj)
       @ohash = ohash
-      @tounicode = nil
+      @tounicode = nil #: PDF::Reader::CMap | nil
+      @descendantfonts = [] #: Array[PDF::Reader::Font]
+      @widths = [] #: Array[Numeric]
+      @first_char = nil #: Integer?
+      @last_char = nil #: Integer?
+      @basefont = nil #: Symbol?
+      @font_descriptor = nil #: PDF::Reader::FontDescriptor?
+      @cid_widths = [] #: Array[Numeric]
+      @cid_default_width = 0 #: Numeric
+      @encoding = PDF::Reader::Encoding.new(:StandardEncoding) #: PDF::Reader::Encoding
+      @cached_widths = {} #: Hash[Integer, Numeric]
+      @font_matrix = nil #: Array[Numeric] | nil
       extract_base_info(obj)
       extract_type3_info(obj)
       extract_descriptor(obj)
       extract_descendants(obj)
-      @width_calc = build_width_calculator
-      @encoding ||= PDF::Reader::Encoding.new(:StandardEncoding)
+      @width_calc = build_width_calculator #: widthCalculator
     end
+    #: (Integer | String | Array[Integer | String]) -> String
     def to_utf8(params)
       if @tounicode
-        to_utf8_via_cmap(params)
+        to_utf8_via_cmap(params, @tounicode)
       else
         to_utf8_via_encoding(params)
       end
     end
+    #: (String) -> (Array[Integer | Float | String | nil] | nil)
     def unpack(data)
       data.unpack(encoding.unpack)
     end
     # looks up the specified codepoint and returns a value that is in (pdf)
     # glyph space, which is 1000 glyph units = 1 text space unit
+    #: (Integer | String) -> Numeric
     def glyph_width(code_point)
       if code_point.is_a?(String)
-        code_point = code_point.unpack(encoding.unpack).first
+        code_point = unpack_string_to_array_of_ints(code_point, encoding.unpack).first
+        raise MalformedPDFError, "code point missing" if code_point.nil?
       end
-      @cached_widths ||= {}
       @cached_widths[code_point] ||= @width_calc.glyph_width(code_point)
     end
@@ -78,6 +129,7 @@ class PDF::Reader
     #
     # However, Type3 fonts provide their own FontMatrix that's used for the transformation.
     #
+    #: (Integer | String) -> Numeric
     def glyph_width_in_text_space(code_point)
       glyph_width_in_glyph_space = glyph_width(code_point)
@@ -93,13 +145,14 @@ class PDF::Reader
     private
     # Only valid for Type3 fonts
+    #: (Numeric, Numeric) -> [Numeric, Numeric]
     def font_matrix_transform(x, y)
       return x, y if @font_matrix.nil?
       matrix = TransformationMatrix.new(
-        @font_matrix[0], @font_matrix[1],
-        @font_matrix[2], @font_matrix[3],
-        @font_matrix[4], @font_matrix[5],
+        @font_matrix[0] || 0, @font_matrix[1] || 0,
+        @font_matrix[2] || 0, @font_matrix[3] || 0,
+        @font_matrix[4] || 0, @font_matrix[5] || 0,
       )
       if x == 0 && y == 0
@@ -112,6 +165,7 @@ class PDF::Reader
       end
     end
+    #: (Symbol | String | nil) -> PDF::Reader::Encoding
     def default_encoding(font_name)
       case font_name.to_s
       when "Symbol" then
@@ -123,6 +177,7 @@ class PDF::Reader
       end
     end
+    #: () -> widthCalculator
     def build_width_calculator
       if @subtype == :Type0
         PDF::Reader::WidthCalculator::TypeZero.new(self)
@@ -149,6 +204,7 @@ class PDF::Reader
       end
     end
+    #: (Hash[Symbol, untyped]) -> PDF::Reader::Encoding
     def build_encoding(obj)
       if obj[:Encoding].is_a?(Symbol)
         # one of the standard encodings, referenced by name
@@ -163,6 +219,7 @@ class PDF::Reader
       end
     end
+    #: (Hash[Symbol, untyped]) -> void
     def extract_base_info(obj)
       @subtype  = @ohash.deref_name(obj[:Subtype])
       @basefont = @ohash.deref_name(obj[:BaseFont])
@@ -185,6 +242,7 @@ class PDF::Reader
       end
     end
+    #: (Hash[Symbol, untyped]) -> void
     def extract_type3_info(obj)
       if @subtype == :Type3
         @font_matrix = @ohash.deref_array_of_numbers(obj[:FontMatrix]) || [
@@ -193,46 +251,50 @@ class PDF::Reader
       end
     end
+    #: (Hash[Symbol, untyped]) -> void
     def extract_descriptor(obj)
       if obj[:FontDescriptor]
         # create a font descriptor object if we can, in other words, unless this is
         # a CID Font
-        fd = @ohash.deref_hash(obj[:FontDescriptor])
+        fd = @ohash.deref_hash(obj[:FontDescriptor]) || {}
         @font_descriptor = PDF::Reader::FontDescriptor.new(@ohash, fd)
       else
         @font_descriptor = nil
       end
     end
+    #: (Hash[Symbol, untyped]) -> void
     def extract_descendants(obj)
       # per PDF 32000-1:2008 pp. 280 :DescendentFonts is:
       # A one-element array specifying the CIDFont dictionary that is the
       # descendant of this Type 0 font.
       if obj[:DescendantFonts]
-        descendants = @ohash.deref_array(obj[:DescendantFonts])
+        descendants = @ohash.deref_array(obj[:DescendantFonts]) || []
         @descendantfonts = descendants.map { |desc|
-          PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc))
+          PDF::Reader::Font.new(@ohash, @ohash.deref_hash(desc) || {})
         }
       else
         @descendantfonts = []
       end
     end
-    def to_utf8_via_cmap(params)
+    #: (Integer | String | Array[Integer | String], PDF::Reader::CMap) -> String
+    def to_utf8_via_cmap(params, cmap)
       case params
       when Integer
         [
-          @tounicode.decode(params) || PDF::Reader::Encoding::UNKNOWN_CHAR
+          cmap.decode(params)
         ].flatten.pack("U*")
       when String
-        params.unpack(encoding.unpack).map { |c|
-          @tounicode.decode(c) || PDF::Reader::Encoding::UNKNOWN_CHAR
+        unpack_string_to_array_of_ints(params, encoding.unpack).map { |code_point|
+          cmap.decode(code_point)
         }.flatten.pack("U*")
       when Array
-        params.collect { |param| to_utf8_via_cmap(param) }.join("")
+        params.collect { |param| to_utf8_via_cmap(param, cmap) }.join("")
       end
     end
+    #: (Integer | String | Array[Integer | String]) -> String
     def to_utf8_via_encoding(params)
       if encoding.kind_of?(String)
         raise UnsupportedFeatureError, "font encoding '#{encoding}' currently unsupported"
@@ -248,5 +310,11 @@ class PDF::Reader
       end
     end
+    #: (String, String) -> Array[Integer]
+    def unpack_string_to_array_of_ints(unpack_me, unpack_arg)
+      unpack_me.unpack(unpack_arg).map { |code_point|
+        code_point = TypeCheck.cast_to_int!(code_point)
+      }
+    end
   end
 end