RubyGems - reline - Versions diffs - 0.5.9 → 0.6.1 - Mend

reline 0.5.9 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

checksums.yaml +4 -4
data/lib/reline/config.rb +41 -50
data/lib/reline/face.rb +1 -1
data/lib/reline/history.rb +3 -3
data/lib/reline/io/ansi.rb +77 -123
data/lib/reline/io/dumb.rb +16 -2
data/lib/reline/io/windows.rb +94 -67
data/lib/reline/io.rb +14 -0
data/lib/reline/key_actor/base.rb +10 -4
data/lib/reline/key_actor/emacs.rb +96 -96
data/lib/reline/key_actor/vi_command.rb +182 -182
data/lib/reline/key_actor/vi_insert.rb +137 -137
data/lib/reline/key_stroke.rb +26 -16
data/lib/reline/line_editor.rb +331 -534
data/lib/reline/unicode/east_asian_width.rb +1289 -1192
data/lib/reline/unicode.rb +155 -436
data/lib/reline/version.rb +1 -1
data/lib/reline.rb +46 -34
metadata +3 -7
data/lib/reline/terminfo.rb +0 -158

data/lib/reline/unicode.rb CHANGED Viewed

@@ -28,12 +28,12 @@ class Reline::Unicode
     0x19 => '^Y',
     0x1A => '^Z', # C-z
     0x1B => '^[', # C-[ C-3
+    0x1C => '^\\', # C-\
     0x1D => '^]', # C-]
     0x1E => '^^', # C-~ C-6
     0x1F => '^_', # C-_ C-7
     0x7F => '^?', # C-? C-8
   }
-  EscapedChars = EscapedPairs.keys.map(&:chr)
   NON_PRINTING_START = "\1"
   NON_PRINTING_END = "\2"
@@ -54,53 +54,44 @@ class Reline::Unicode
     }.join
   end
-  require 'reline/unicode/east_asian_width'
+  def self.safe_encode(str, encoding)
+    # Reline only supports utf-8 convertible string.
+    converted = str.encode(encoding, invalid: :replace, undef: :replace)
+    return converted if str.encoding == Encoding::UTF_8 || converted.encoding == Encoding::UTF_8 || converted.ascii_only?
+    # This code is essentially doing the same thing as
+    # `str.encode(utf8, **replace_options).encode(encoding, **replace_options)`
+    # but also avoids unnecessary irreversible encoding conversion.
+    converted.gsub(/\X/) do |c|
+      c.encode(Encoding::UTF_8)
+      c
+    rescue Encoding::UndefinedConversionError
+      '?'
+    end
+  end
-  HalfwidthDakutenHandakuten = /[\u{FF9E}\u{FF9F}]/
-  MBCharWidthRE = /
-    (?<width_2_1>
-      [#{ EscapedChars.map {|c| "\\x%02x" % c.ord }.join }] (?# ^ + char, such as ^M, ^H, ^[, ...)
-    )
-  | (?<width_3>^\u{2E3B}) (?# THREE-EM DASH)
-  | (?<width_0>^\p{M})
-  | (?<width_2_2>
-      #{ EastAsianWidth::TYPE_F }
-    | #{ EastAsianWidth::TYPE_W }
-    )
-  | (?<width_1>
-      #{ EastAsianWidth::TYPE_H }
-    | #{ EastAsianWidth::TYPE_NA }
-    | #{ EastAsianWidth::TYPE_N }
-    )(?!#{ HalfwidthDakutenHandakuten })
-  | (?<width_2_3>
-      (?: #{ EastAsianWidth::TYPE_H }
-        | #{ EastAsianWidth::TYPE_NA }
-        | #{ EastAsianWidth::TYPE_N })
-      #{ HalfwidthDakutenHandakuten }
-    )
-  | (?<ambiguous_width>
-      #{EastAsianWidth::TYPE_A}
-    )
-  /x
+  require 'reline/unicode/east_asian_width'
   def self.get_mbchar_width(mbchar)
     ord = mbchar.ord
-    if (0x00 <= ord and ord <= 0x1F) # in EscapedPairs
+    if ord <= 0x1F # in EscapedPairs
       return 2
-    elsif (0x20 <= ord and ord <= 0x7E) # printable ASCII chars
+    elsif ord <= 0x7E # printable ASCII chars
       return 1
     end
-    m = mbchar.encode(Encoding::UTF_8).match(MBCharWidthRE)
-    case
-    when m.nil? then 1 # TODO should be U+FFFD � REPLACEMENT CHARACTER
-    when m[:width_2_1], m[:width_2_2], m[:width_2_3] then 2
-    when m[:width_3] then 3
-    when m[:width_0] then 0
-    when m[:width_1] then 1
-    when m[:ambiguous_width] then Reline.ambiguous_width
+    utf8_mbchar = mbchar.encode(Encoding::UTF_8)
+    ord = utf8_mbchar.ord
+    chunk_index = EastAsianWidth::CHUNK_LAST.bsearch_index { |o| ord <= o }
+    size = EastAsianWidth::CHUNK_WIDTH[chunk_index]
+    if size == -1
+      Reline.ambiguous_width
+    elsif size == 1 && utf8_mbchar.size >= 2
+      second_char_ord = utf8_mbchar[1].ord
+      # Halfwidth Dakuten Handakuten
+      # Only these two character has Letter Modifier category and can be combined in a single grapheme cluster
+      (second_char_ord == 0xFF9E || second_char_ord == 0xFF9F) ? 2 : 1
     else
-      nil
+      size
     end
   end
@@ -130,9 +121,14 @@ class Reline::Unicode
     end
   end
-  def self.split_by_width(str, max_width, encoding = str.encoding, offset: 0)
+  # This method is used by IRB
+  def self.split_by_width(str, max_width)
+    lines = split_line_by_width(str, max_width)
+    [lines, lines.size]
+  end
+  def self.split_line_by_width(str, max_width, encoding = str.encoding, offset: 0)
     lines = [String.new(encoding: encoding)]
-    height = 1
     width = offset
     rest = str.encode(Encoding::UTF_8)
     in_zero_width = false
@@ -141,10 +137,8 @@ class Reline::Unicode
       case
       when non_printing_start
         in_zero_width = true
-        lines.last << NON_PRINTING_START
       when non_printing_end
         in_zero_width = false
-        lines.last << NON_PRINTING_END
       when csi
         lines.last << csi
         unless in_zero_width
@@ -156,15 +150,13 @@ class Reline::Unicode
         end
       when osc
         lines.last << osc
-        seq << osc
+        seq << osc unless in_zero_width
       when gc
         unless in_zero_width
           mbchar_width = get_mbchar_width(gc)
           if (width += mbchar_width) > max_width
             width = mbchar_width
-            lines << nil
             lines << seq.dup
-            height += 1
           end
         end
         lines.last << gc
@@ -172,11 +164,13 @@ class Reline::Unicode
     end
     # The cursor moves to next line in first
     if width == max_width
-      lines << nil
       lines << String.new(encoding: encoding)
-      height += 1
     end
-    [lines, height]
+    lines
+  end
+  def self.strip_non_printing_start_end(prompt)
+    prompt.gsub(/\x01([^\x02]*)(?:\x02|\z)/) { $1 }
   end
   # Take a chunk of a String cut by width with escape sequences.
@@ -198,10 +192,8 @@ class Reline::Unicode
       case
       when non_printing_start
         in_zero_width = true
-        chunk << NON_PRINTING_START
       when non_printing_end
         in_zero_width = false
-        chunk << NON_PRINTING_END
       when csi
         has_csi = true
         chunk << csi
@@ -270,427 +262,154 @@ class Reline::Unicode
   end
   def self.em_forward_word(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
+    nonwords = gcs.take_while { |c| !word_character?(c) }
+    words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
+    nonwords.sum(&:bytesize) + words.sum(&:bytesize)
   end
   def self.em_forward_word_with_capitalization(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    new_str = String.new
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-      new_str += mbchar
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    first = true
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-      if first
-        new_str += mbchar.upcase
-        first = false
-      else
-        new_str += mbchar.downcase
-      end
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width, new_str]
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
+    nonwords = gcs.take_while { |c| !word_character?(c) }
+    words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
+    [nonwords.sum(&:bytesize) + words.sum(&:bytesize), nonwords.join + words.join.capitalize]
   end
   def self.em_backward_word(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
+    nonwords = gcs.take_while { |c| !word_character?(c) }
+    words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
+    nonwords.sum(&:bytesize) + words.sum(&:bytesize)
   end
   def self.em_big_backward_word(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      break if mbchar =~ /\S/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      break if mbchar =~ /\s/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
+    spaces = gcs.take_while { |c| space_character?(c) }
+    nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
+    spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize)
   end
   def self.ed_transpose_words(line, byte_pointer)
-    right_word_start = nil
-    size = get_next_mbchar_size(line, byte_pointer)
-    mbchar = line.byteslice(byte_pointer, size)
-    if size.zero?
-      # ' aaa bbb [cursor]'
-      byte_size = 0
-      while 0 < (byte_pointer + byte_size)
-        size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-        break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-        byte_size -= size
-      end
-      while 0 < (byte_pointer + byte_size)
-        size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-        break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-        byte_size -= size
-      end
-      right_word_start = byte_pointer + byte_size
-      byte_size = 0
-      while line.bytesize > (byte_pointer + byte_size)
-        size = get_next_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size, size)
-        break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-        byte_size += size
-      end
-      after_start = byte_pointer + byte_size
-    elsif mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-      # ' aaa bb[cursor]b'
-      byte_size = 0
-      while 0 < (byte_pointer + byte_size)
-        size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-        break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-        byte_size -= size
-      end
-      right_word_start = byte_pointer + byte_size
-      byte_size = 0
-      while line.bytesize > (byte_pointer + byte_size)
-        size = get_next_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size, size)
-        break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-        byte_size += size
-      end
-      after_start = byte_pointer + byte_size
-    else
-      byte_size = 0
-      while (line.bytesize - 1) > (byte_pointer + byte_size)
-        size = get_next_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size, size)
-        break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-        byte_size += size
-      end
-      if (byte_pointer + byte_size) == (line.bytesize - 1)
-        # ' aaa bbb [cursor] '
-        after_start = line.bytesize
-        while 0 < (byte_pointer + byte_size)
-          size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-          mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-          break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-          byte_size -= size
-        end
-        while 0 < (byte_pointer + byte_size)
-          size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-          mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-          break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-          byte_size -= size
-        end
-        right_word_start = byte_pointer + byte_size
-      else
-        # ' aaa [cursor] bbb '
-        right_word_start = byte_pointer + byte_size
-        while line.bytesize > (byte_pointer + byte_size)
-          size = get_next_mbchar_size(line, byte_pointer + byte_size)
-          mbchar = line.byteslice(byte_pointer + byte_size, size)
-          break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-          byte_size += size
-        end
-        after_start = byte_pointer + byte_size
-      end
-    end
-    byte_size = right_word_start - byte_pointer
-    while 0 < (byte_pointer + byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-      byte_size -= size
-    end
-    middle_start = byte_pointer + byte_size
-    byte_size = middle_start - byte_pointer
-    while 0 < (byte_pointer + byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-      byte_size -= size
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters
+    pos = gcs.size
+    gcs += line.byteslice(byte_pointer..).grapheme_clusters
+    pos += 1 while pos < gcs.size && !word_character?(gcs[pos])
+    if pos == gcs.size # 'aaa  bbb [cursor] '
+      pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1])
+      second_word_end = gcs.size
+    else # 'aaa  [cursor]bbb'
+      pos += 1 while pos < gcs.size && word_character?(gcs[pos])
+      second_word_end = pos
+    end
+    pos -= 1 while pos > 0 && word_character?(gcs[pos - 1])
+    second_word_start = pos
+    pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1])
+    first_word_end = pos
+    pos -= 1 while pos > 0 && word_character?(gcs[pos - 1])
+    first_word_start = pos
+    [first_word_start, first_word_end, second_word_start, second_word_end].map do |idx|
+      gcs.take(idx).sum(&:bytesize)
     end
-    left_word_start = byte_pointer + byte_size
-    [left_word_start, middle_start, right_word_start, after_start]
   end
   def self.vi_big_forward_word(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    while (line.bytesize - 1) > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar =~ /\s/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    while (line.bytesize - 1) > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar =~ /\S/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
+    nonspaces = gcs.take_while { |c| !space_character?(c) }
+    spaces = gcs.drop(nonspaces.size).take_while { |c| space_character?(c) }
+    nonspaces.sum(&:bytesize) + spaces.sum(&:bytesize)
   end
   def self.vi_big_forward_end_word(line, byte_pointer)
-    if (line.bytesize - 1) > byte_pointer
-      size = get_next_mbchar_size(line, byte_pointer)
-      mbchar = line.byteslice(byte_pointer, size)
-      width = get_mbchar_width(mbchar)
-      byte_size = size
-    else
-      return [0, 0]
-    end
-    while (line.bytesize - 1) > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar =~ /\S/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    prev_width = width
-    prev_byte_size = byte_size
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar =~ /\s/
-      prev_width = width
-      prev_byte_size = byte_size
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [prev_byte_size, prev_width]
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
+    first = gcs.shift(1)
+    spaces = gcs.take_while { |c| space_character?(c) }
+    nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
+    matched = spaces + nonspaces
+    matched.pop
+    first.sum(&:bytesize) + matched.sum(&:bytesize)
   end
   def self.vi_big_backward_word(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      break if mbchar =~ /\S/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      break if mbchar =~ /\s/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
+    spaces = gcs.take_while { |c| space_character?(c) }
+    nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
+    spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize)
   end
   def self.vi_forward_word(line, byte_pointer, drop_terminate_spaces = false)
-    if line.bytesize > byte_pointer
-      size = get_next_mbchar_size(line, byte_pointer)
-      mbchar = line.byteslice(byte_pointer, size)
-      if mbchar =~ /\w/
-        started_by = :word
-      elsif mbchar =~ /\s/
-        started_by = :space
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
+    return 0 if gcs.empty?
+    c = gcs.first
+    matched =
+      if word_character?(c)
+        gcs.take_while { |c| word_character?(c) }
+      elsif space_character?(c)
+        gcs.take_while { |c| space_character?(c) }
       else
-        started_by = :non_word_printable
-      end
-      width = get_mbchar_width(mbchar)
-      byte_size = size
-    else
-      return [0, 0]
-    end
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      case started_by
-      when :word
-        break if mbchar =~ /\W/
-      when :space
-        break if mbchar =~ /\S/
-      when :non_word_printable
-        break if mbchar =~ /\w|\s/
+        gcs.take_while { |c| !word_character?(c) && !space_character?(c) }
       end
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    return [byte_size, width] if drop_terminate_spaces
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar =~ /\S/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    return matched.sum(&:bytesize) if drop_terminate_spaces
+    spaces = gcs.drop(matched.size).take_while { |c| space_character?(c) }
+    matched.sum(&:bytesize) + spaces.sum(&:bytesize)
   end
   def self.vi_forward_end_word(line, byte_pointer)
-    if (line.bytesize - 1) > byte_pointer
-      size = get_next_mbchar_size(line, byte_pointer)
-      mbchar = line.byteslice(byte_pointer, size)
-      if mbchar =~ /\w/
-        started_by = :word
-      elsif mbchar =~ /\s/
-        started_by = :space
-      else
-        started_by = :non_word_printable
-      end
-      width = get_mbchar_width(mbchar)
-      byte_size = size
-    else
-      return [0, 0]
-    end
-    if (line.bytesize - 1) > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      if mbchar =~ /\w/
-        second = :word
-      elsif mbchar =~ /\s/
-        second = :space
-      else
-        second = :non_word_printable
-      end
-      second_width = get_mbchar_width(mbchar)
-      second_byte_size = size
-    else
-      return [byte_size, width]
-    end
-    if second == :space
-      width += second_width
-      byte_size += second_byte_size
-      while (line.bytesize - 1) > (byte_pointer + byte_size)
-        size = get_next_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size, size)
-        if mbchar =~ /\S/
-          if mbchar =~ /\w/
-            started_by = :word
-          else
-            started_by = :non_word_printable
-          end
-          break
-        end
-        width += get_mbchar_width(mbchar)
-        byte_size += size
-      end
-    else
-      case [started_by, second]
-      when [:word, :non_word_printable], [:non_word_printable, :word]
-        started_by = second
-      else
-        width += second_width
-        byte_size += second_byte_size
-        started_by = second
-      end
-    end
-    prev_width = width
-    prev_byte_size = byte_size
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      case started_by
-      when :word
-        break if mbchar =~ /\W/
-      when :non_word_printable
-        break if mbchar =~ /[\w\s]/
-      end
-      prev_width = width
-      prev_byte_size = byte_size
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [prev_byte_size, prev_width]
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
+    return 0 if gcs.empty?
+    return gcs.first.bytesize if gcs.size == 1
+    start = gcs.shift
+    skips = [start]
+    if space_character?(start) || space_character?(gcs.first)
+      spaces = gcs.take_while { |c| space_character?(c) }
+      skips += spaces
+      gcs.shift(spaces.size)
+    end
+    start_with_word = word_character?(gcs.first)
+    matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) }
+    matched.pop
+    skips.sum(&:bytesize) + matched.sum(&:bytesize)
   end
   def self.vi_backward_word(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      if mbchar =~ /\S/
-        if mbchar =~ /\w/
-          started_by = :word
-        else
-          started_by = :non_word_printable
-        end
-        break
-      end
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      case started_by
-      when :word
-        break if mbchar =~ /\W/
-      when :non_word_printable
-        break if mbchar =~ /[\w\s]/
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
+    spaces = gcs.take_while { |c| space_character?(c) }
+    gcs.shift(spaces.size)
+    start_with_word = word_character?(gcs.first)
+    matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) }
+    spaces.sum(&:bytesize) + matched.sum(&:bytesize)
+  end
+  def self.common_prefix(list, ignore_case: false)
+    return '' if list.empty?
+    common_prefix_gcs = list.first.grapheme_clusters
+    list.each do |item|
+      gcs = item.grapheme_clusters
+      common_prefix_gcs = common_prefix_gcs.take_while.with_index do |gc, i|
+        ignore_case ? gc.casecmp?(gcs[i]) : gc == gcs[i]
       end
-      width += get_mbchar_width(mbchar)
-      byte_size += size
     end
-    [byte_size, width]
+    common_prefix_gcs.join
   end
   def self.vi_first_print(line)
-    width = 0
-    byte_size = 0
-    while (line.bytesize - 1) > byte_size
-      size = get_next_mbchar_size(line, byte_size)
-      mbchar = line.byteslice(byte_size, size)
-      if mbchar =~ /\S/
-        break
-      end
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    gcs = line.grapheme_clusters
+    spaces = gcs.take_while { |c| space_character?(c) }
+    spaces.sum(&:bytesize)
+  end
+  def self.word_character?(s)
+    s.encode(Encoding::UTF_8).match?(/\p{Word}/) if s
+  rescue Encoding::UndefinedConversionError
+    false
+  end
+  def self.space_character?(s)
+    s.match?(/\s/) if s
   end
 end