RubyGems - reline - Versions diffs - 0.3.5 → 0.6.2 - Mend

reline 0.3.5 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +4 -4
data/README.md +31 -1
data/lib/reline/config.rb +101 -124
data/lib/reline/face.rb +199 -0
data/lib/reline/history.rb +4 -4
data/lib/reline/io/ansi.rb +322 -0
data/lib/reline/io/dumb.rb +120 -0
data/lib/reline/{windows.rb → io/windows.rb} +182 -153
data/lib/reline/io.rb +55 -0
data/lib/reline/key_actor/base.rb +27 -9
data/lib/reline/key_actor/composite.rb +17 -0
data/lib/reline/key_actor/emacs.rb +103 -103
data/lib/reline/key_actor/vi_command.rb +188 -188
data/lib/reline/key_actor/vi_insert.rb +144 -144
data/lib/reline/key_actor.rb +1 -0
data/lib/reline/key_stroke.rb +94 -80
data/lib/reline/kill_ring.rb +2 -2
data/lib/reline/line_editor.rb +1177 -2110
data/lib/reline/unicode/east_asian_width.rb +1288 -1192
data/lib/reline/unicode.rb +224 -465
data/lib/reline/version.rb +1 -1
data/lib/reline.rb +167 -247
metadata +13 -11
data/lib/reline/ansi.rb +0 -357
data/lib/reline/general_io.rb +0 -113
data/lib/reline/terminfo.rb +0 -160

data/lib/reline/unicode.rb CHANGED Viewed

@@ -28,12 +28,12 @@ class Reline::Unicode
     0x19 => '^Y',
     0x1A => '^Z', # C-z
     0x1B => '^[', # C-[ C-3
+    0x1C => '^\\', # C-\
     0x1D => '^]', # C-]
     0x1E => '^^', # C-~ C-6
     0x1F => '^_', # C-_ C-7
     0x7F => '^?', # C-? C-8
   }
-  EscapedChars = EscapedPairs.keys.map(&:chr)
   NON_PRINTING_START = "\1"
   NON_PRINTING_END = "\2"
@@ -41,84 +41,63 @@ class Reline::Unicode
   OSC_REGEXP = /\e\]\d+(?:;[^;\a\e]+)*(?:\a|\e\\)/
   WIDTH_SCANNER = /\G(?:(#{NON_PRINTING_START})|(#{NON_PRINTING_END})|(#{CSI_REGEXP})|(#{OSC_REGEXP})|(\X))/o
-  def self.get_mbchar_byte_size_by_first_char(c)
-    # Checks UTF-8 character byte size
-    case c.ord
-    # 0b0xxxxxxx
-    when ->(code) { (code ^ 0b10000000).allbits?(0b10000000) } then 1
-    # 0b110xxxxx
-    when ->(code) { (code ^ 0b00100000).allbits?(0b11100000) } then 2
-    # 0b1110xxxx
-    when ->(code) { (code ^ 0b00010000).allbits?(0b11110000) } then 3
-    # 0b11110xxx
-    when ->(code) { (code ^ 0b00001000).allbits?(0b11111000) } then 4
-    # 0b111110xx
-    when ->(code) { (code ^ 0b00000100).allbits?(0b11111100) } then 5
-    # 0b1111110x
-    when ->(code) { (code ^ 0b00000010).allbits?(0b11111110) } then 6
-    # successor of mbchar
-    else 0
-    end
-  end
   def self.escape_for_print(str)
     str.chars.map! { |gr|
-      escaped = EscapedPairs[gr.ord]
-      if escaped && gr != -"\n" && gr != -"\t"
-        escaped
-      else
+      case gr
+      when -"\n"
         gr
+      when -"\t"
+        -'  '
+      else
+        EscapedPairs[gr.ord] || gr
       end
     }.join
   end
+  def self.safe_encode(str, encoding)
+    # Reline only supports utf-8 convertible string.
+    converted = str.encode(encoding, invalid: :replace, undef: :replace)
+    return converted if str.encoding == Encoding::UTF_8 || converted.encoding == Encoding::UTF_8 || converted.ascii_only?
+    # This code is essentially doing the same thing as
+    # `str.encode(utf8, **replace_options).encode(encoding, **replace_options)`
+    # but also avoids unnecessary irreversible encoding conversion.
+    converted.gsub(/\X/) do |c|
+      c.encode(Encoding::UTF_8)
+      c
+    rescue Encoding::UndefinedConversionError
+      '?'
+    end
+  end
   require 'reline/unicode/east_asian_width'
-  HalfwidthDakutenHandakuten = /[\u{FF9E}\u{FF9F}]/
-  MBCharWidthRE = /
-    (?<width_2_1>
-      [#{ EscapedChars.map {|c| "\\x%02x" % c.ord }.join }] (?# ^ + char, such as ^M, ^H, ^[, ...)
-    )
-  | (?<width_3>^\u{2E3B}) (?# THREE-EM DASH)
-  | (?<width_0>^\p{M})
-  | (?<width_2_2>
-      #{ EastAsianWidth::TYPE_F }
-    | #{ EastAsianWidth::TYPE_W }
-    )
-  | (?<width_1>
-      #{ EastAsianWidth::TYPE_H }
-    | #{ EastAsianWidth::TYPE_NA }
-    | #{ EastAsianWidth::TYPE_N }
-    )(?!#{ HalfwidthDakutenHandakuten })
-  | (?<width_2_3>
-      (?: #{ EastAsianWidth::TYPE_H }
-        | #{ EastAsianWidth::TYPE_NA }
-        | #{ EastAsianWidth::TYPE_N })
-      #{ HalfwidthDakutenHandakuten }
-    )
-  | (?<ambiguous_width>
-      #{EastAsianWidth::TYPE_A}
-    )
-  /x
+  def self.east_asian_width(ord)
+    chunk_index = EastAsianWidth::CHUNK_LAST.bsearch_index { |o| ord <= o }
+    size = EastAsianWidth::CHUNK_WIDTH[chunk_index]
+    size == -1 ? Reline.ambiguous_width : size
+  end
   def self.get_mbchar_width(mbchar)
     ord = mbchar.ord
-    if (0x00 <= ord and ord <= 0x1F) # in EscapedPairs
+    if ord <= 0x1F # in EscapedPairs
       return 2
-    elsif (0x20 <= ord and ord <= 0x7E) # printable ASCII chars
+    elsif mbchar.length == 1 && ord <= 0x7E # printable ASCII chars
       return 1
     end
-    m = mbchar.encode(Encoding::UTF_8).match(MBCharWidthRE)
-    case
-    when m.nil? then 1 # TODO should be U+FFFD � REPLACEMENT CHARACTER
-    when m[:width_2_1], m[:width_2_2], m[:width_2_3] then 2
-    when m[:width_3] then 3
-    when m[:width_0] then 0
-    when m[:width_1] then 1
-    when m[:ambiguous_width] then Reline.ambiguous_width
-    else
-      nil
+    utf8_mbchar = mbchar.encode(Encoding::UTF_8)
+    zwj = false
+    utf8_mbchar.chars.sum do |c|
+      if zwj
+        zwj = false
+        0
+      elsif c.ord == 0x200D # Zero Width Joiner
+        zwj = true
+        0
+      else
+        east_asian_width(c.ord)
+      end
     end
   end
@@ -148,10 +127,15 @@ class Reline::Unicode
     end
   end
-  def self.split_by_width(str, max_width, encoding = str.encoding)
+  # This method is used by IRB
+  def self.split_by_width(str, max_width)
+    lines = split_line_by_width(str, max_width)
+    [lines, lines.size]
+  end
+  def self.split_line_by_width(str, max_width, encoding = str.encoding, offset: 0)
     lines = [String.new(encoding: encoding)]
-    height = 1
-    width = 0
+    width = offset
     rest = str.encode(Encoding::UTF_8)
     in_zero_width = false
     seq = String.new(encoding: encoding)
@@ -159,24 +143,26 @@ class Reline::Unicode
       case
       when non_printing_start
         in_zero_width = true
-        lines.last << NON_PRINTING_START
       when non_printing_end
         in_zero_width = false
-        lines.last << NON_PRINTING_END
       when csi
         lines.last << csi
-        seq << csi
+        unless in_zero_width
+          if csi == -"\e[m" || csi == -"\e[0m"
+            seq.clear
+          else
+            seq << csi
+          end
+        end
       when osc
         lines.last << osc
-        seq << osc
+        seq << osc unless in_zero_width
       when gc
         unless in_zero_width
           mbchar_width = get_mbchar_width(gc)
           if (width += mbchar_width) > max_width
             width = mbchar_width
-            lines << nil
             lines << seq.dup
-            height += 1
           end
         end
         lines.last << gc
@@ -184,19 +170,30 @@ class Reline::Unicode
     end
     # The cursor moves to next line in first
     if width == max_width
-      lines << nil
       lines << String.new(encoding: encoding)
-      height += 1
     end
-    [lines, height]
+    lines
+  end
+  def self.strip_non_printing_start_end(prompt)
+    prompt.gsub(/\x01([^\x02]*)(?:\x02|\z)/) { $1 }
   end
   # Take a chunk of a String cut by width with escape sequences.
   def self.take_range(str, start_col, max_width)
+    take_mbchar_range(str, start_col, max_width).first
+  end
+  def self.take_mbchar_range(str, start_col, width, cover_begin: false, cover_end: false, padding: false)
     chunk = String.new(encoding: str.encoding)
+    end_col = start_col + width
     total_width = 0
     rest = str.encode(Encoding::UTF_8)
     in_zero_width = false
+    chunk_start_col = nil
+    chunk_end_col = nil
+    has_csi = false
     rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc|
       case
       when non_printing_start
@@ -204,21 +201,56 @@ class Reline::Unicode
       when non_printing_end
         in_zero_width = false
       when csi
+        has_csi = true
         chunk << csi
       when osc
         chunk << osc
       when gc
         if in_zero_width
           chunk << gc
+          next
+        end
+        mbchar_width = get_mbchar_width(gc)
+        prev_width = total_width
+        total_width += mbchar_width
+        if (cover_begin || padding ? total_width <= start_col : prev_width < start_col)
+          # Current character haven't reached start_col yet
+          next
+        elsif padding && !cover_begin && prev_width < start_col && start_col < total_width
+          # Add preceding padding. This padding might have background color.
+          chunk << ' ' * (total_width - start_col)
+          chunk_start_col ||= start_col
+          chunk_end_col = total_width
+          next
+        elsif (cover_end ? prev_width < end_col : total_width <= end_col)
+          # Current character is in the range
+          chunk << gc
+          chunk_start_col ||= prev_width
+          chunk_end_col = total_width
+          break if total_width >= end_col
         else
-          mbchar_width = get_mbchar_width(gc)
-          total_width += mbchar_width
-          break if (start_col + max_width) < total_width
-          chunk << gc if start_col < total_width
+          # Current character exceeds end_col
+          if padding && end_col < total_width
+            # Add succeeding padding. This padding might have background color.
+            chunk << ' ' * (end_col - prev_width)
+            chunk_start_col ||= prev_width
+            chunk_end_col = end_col
+          end
+          break
         end
       end
     end
-    chunk
+    chunk_start_col ||= start_col
+    chunk_end_col ||= start_col
+    if padding && chunk_end_col < end_col
+      # Append padding. This padding should not include background color.
+      chunk << "\e[0m" if has_csi
+      chunk << ' ' * (end_col - chunk_end_col)
+      chunk_end_col = end_col
+    end
+    [chunk, chunk_start_col, chunk_end_col - chunk_start_col]
   end
   def self.get_next_mbchar_size(line, byte_pointer)
@@ -236,427 +268,154 @@ class Reline::Unicode
   end
   def self.em_forward_word(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
+    nonwords = gcs.take_while { |c| !word_character?(c) }
+    words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
+    nonwords.sum(&:bytesize) + words.sum(&:bytesize)
   end
   def self.em_forward_word_with_capitalization(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    new_str = String.new
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-      new_str += mbchar
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    first = true
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-      if first
-        new_str += mbchar.upcase
-        first = false
-      else
-        new_str += mbchar.downcase
-      end
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width, new_str]
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
+    nonwords = gcs.take_while { |c| !word_character?(c) }
+    words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
+    [nonwords.sum(&:bytesize) + words.sum(&:bytesize), nonwords.join + words.join.capitalize]
   end
   def self.em_backward_word(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
+    nonwords = gcs.take_while { |c| !word_character?(c) }
+    words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
+    nonwords.sum(&:bytesize) + words.sum(&:bytesize)
   end
   def self.em_big_backward_word(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      break if mbchar =~ /\S/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      break if mbchar =~ /\s/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
+    spaces = gcs.take_while { |c| space_character?(c) }
+    nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
+    spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize)
   end
   def self.ed_transpose_words(line, byte_pointer)
-    right_word_start = nil
-    size = get_next_mbchar_size(line, byte_pointer)
-    mbchar = line.byteslice(byte_pointer, size)
-    if size.zero?
-      # ' aaa bbb [cursor]'
-      byte_size = 0
-      while 0 < (byte_pointer + byte_size)
-        size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-        break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-        byte_size -= size
-      end
-      while 0 < (byte_pointer + byte_size)
-        size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-        break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-        byte_size -= size
-      end
-      right_word_start = byte_pointer + byte_size
-      byte_size = 0
-      while line.bytesize > (byte_pointer + byte_size)
-        size = get_next_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size, size)
-        break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-        byte_size += size
-      end
-      after_start = byte_pointer + byte_size
-    elsif mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-      # ' aaa bb[cursor]b'
-      byte_size = 0
-      while 0 < (byte_pointer + byte_size)
-        size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-        break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-        byte_size -= size
-      end
-      right_word_start = byte_pointer + byte_size
-      byte_size = 0
-      while line.bytesize > (byte_pointer + byte_size)
-        size = get_next_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size, size)
-        break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-        byte_size += size
-      end
-      after_start = byte_pointer + byte_size
-    else
-      byte_size = 0
-      while (line.bytesize - 1) > (byte_pointer + byte_size)
-        size = get_next_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size, size)
-        break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-        byte_size += size
-      end
-      if (byte_pointer + byte_size) == (line.bytesize - 1)
-        # ' aaa bbb [cursor] '
-        after_start = line.bytesize
-        while 0 < (byte_pointer + byte_size)
-          size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-          mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-          break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-          byte_size -= size
-        end
-        while 0 < (byte_pointer + byte_size)
-          size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-          mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-          break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-          byte_size -= size
-        end
-        right_word_start = byte_pointer + byte_size
-      else
-        # ' aaa [cursor] bbb '
-        right_word_start = byte_pointer + byte_size
-        while line.bytesize > (byte_pointer + byte_size)
-          size = get_next_mbchar_size(line, byte_pointer + byte_size)
-          mbchar = line.byteslice(byte_pointer + byte_size, size)
-          break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-          byte_size += size
-        end
-        after_start = byte_pointer + byte_size
-      end
-    end
-    byte_size = right_word_start - byte_pointer
-    while 0 < (byte_pointer + byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
-      byte_size -= size
-    end
-    middle_start = byte_pointer + byte_size
-    byte_size = middle_start - byte_pointer
-    while 0 < (byte_pointer + byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size - size, size)
-      break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
-      byte_size -= size
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters
+    pos = gcs.size
+    gcs += line.byteslice(byte_pointer..).grapheme_clusters
+    pos += 1 while pos < gcs.size && !word_character?(gcs[pos])
+    if pos == gcs.size # 'aaa  bbb [cursor] '
+      pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1])
+      second_word_end = gcs.size
+    else # 'aaa  [cursor]bbb'
+      pos += 1 while pos < gcs.size && word_character?(gcs[pos])
+      second_word_end = pos
+    end
+    pos -= 1 while pos > 0 && word_character?(gcs[pos - 1])
+    second_word_start = pos
+    pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1])
+    first_word_end = pos
+    pos -= 1 while pos > 0 && word_character?(gcs[pos - 1])
+    first_word_start = pos
+    [first_word_start, first_word_end, second_word_start, second_word_end].map do |idx|
+      gcs.take(idx).sum(&:bytesize)
     end
-    left_word_start = byte_pointer + byte_size
-    [left_word_start, middle_start, right_word_start, after_start]
   end
   def self.vi_big_forward_word(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    while (line.bytesize - 1) > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar =~ /\s/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    while (line.bytesize - 1) > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar =~ /\S/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
+    nonspaces = gcs.take_while { |c| !space_character?(c) }
+    spaces = gcs.drop(nonspaces.size).take_while { |c| space_character?(c) }
+    nonspaces.sum(&:bytesize) + spaces.sum(&:bytesize)
   end
   def self.vi_big_forward_end_word(line, byte_pointer)
-    if (line.bytesize - 1) > byte_pointer
-      size = get_next_mbchar_size(line, byte_pointer)
-      mbchar = line.byteslice(byte_pointer, size)
-      width = get_mbchar_width(mbchar)
-      byte_size = size
-    else
-      return [0, 0]
-    end
-    while (line.bytesize - 1) > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar =~ /\S/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    prev_width = width
-    prev_byte_size = byte_size
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar =~ /\s/
-      prev_width = width
-      prev_byte_size = byte_size
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [prev_byte_size, prev_width]
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
+    first = gcs.shift(1)
+    spaces = gcs.take_while { |c| space_character?(c) }
+    nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
+    matched = spaces + nonspaces
+    matched.pop
+    first.sum(&:bytesize) + matched.sum(&:bytesize)
   end
   def self.vi_big_backward_word(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      break if mbchar =~ /\S/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      break if mbchar =~ /\s/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
+    spaces = gcs.take_while { |c| space_character?(c) }
+    nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
+    spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize)
   end
   def self.vi_forward_word(line, byte_pointer, drop_terminate_spaces = false)
-    if line.bytesize > byte_pointer
-      size = get_next_mbchar_size(line, byte_pointer)
-      mbchar = line.byteslice(byte_pointer, size)
-      if mbchar =~ /\w/
-        started_by = :word
-      elsif mbchar =~ /\s/
-        started_by = :space
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
+    return 0 if gcs.empty?
+    c = gcs.first
+    matched =
+      if word_character?(c)
+        gcs.take_while { |c| word_character?(c) }
+      elsif space_character?(c)
+        gcs.take_while { |c| space_character?(c) }
       else
-        started_by = :non_word_printable
+        gcs.take_while { |c| !word_character?(c) && !space_character?(c) }
       end
-      width = get_mbchar_width(mbchar)
-      byte_size = size
-    else
-      return [0, 0]
-    end
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      case started_by
-      when :word
-        break if mbchar =~ /\W/
-      when :space
-        break if mbchar =~ /\S/
-      when :non_word_printable
-        break if mbchar =~ /\w|\s/
-      end
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    return [byte_size, width] if drop_terminate_spaces
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      break if mbchar =~ /\S/
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    return matched.sum(&:bytesize) if drop_terminate_spaces
+    spaces = gcs.drop(matched.size).take_while { |c| space_character?(c) }
+    matched.sum(&:bytesize) + spaces.sum(&:bytesize)
   end
   def self.vi_forward_end_word(line, byte_pointer)
-    if (line.bytesize - 1) > byte_pointer
-      size = get_next_mbchar_size(line, byte_pointer)
-      mbchar = line.byteslice(byte_pointer, size)
-      if mbchar =~ /\w/
-        started_by = :word
-      elsif mbchar =~ /\s/
-        started_by = :space
-      else
-        started_by = :non_word_printable
-      end
-      width = get_mbchar_width(mbchar)
-      byte_size = size
-    else
-      return [0, 0]
-    end
-    if (line.bytesize - 1) > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      if mbchar =~ /\w/
-        second = :word
-      elsif mbchar =~ /\s/
-        second = :space
-      else
-        second = :non_word_printable
-      end
-      second_width = get_mbchar_width(mbchar)
-      second_byte_size = size
-    else
-      return [byte_size, width]
-    end
-    if second == :space
-      width += second_width
-      byte_size += second_byte_size
-      while (line.bytesize - 1) > (byte_pointer + byte_size)
-        size = get_next_mbchar_size(line, byte_pointer + byte_size)
-        mbchar = line.byteslice(byte_pointer + byte_size, size)
-        if mbchar =~ /\S/
-          if mbchar =~ /\w/
-            started_by = :word
-          else
-            started_by = :non_word_printable
-          end
-          break
-        end
-        width += get_mbchar_width(mbchar)
-        byte_size += size
-      end
-    else
-      case [started_by, second]
-      when [:word, :non_word_printable], [:non_word_printable, :word]
-        started_by = second
-      else
-        width += second_width
-        byte_size += second_byte_size
-        started_by = second
-      end
-    end
-    prev_width = width
-    prev_byte_size = byte_size
-    while line.bytesize > (byte_pointer + byte_size)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
-      case started_by
-      when :word
-        break if mbchar =~ /\W/
-      when :non_word_printable
-        break if mbchar =~ /[\w\s]/
-      end
-      prev_width = width
-      prev_byte_size = byte_size
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [prev_byte_size, prev_width]
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
+    return 0 if gcs.empty?
+    return gcs.first.bytesize if gcs.size == 1
+    start = gcs.shift
+    skips = [start]
+    if space_character?(start) || space_character?(gcs.first)
+      spaces = gcs.take_while { |c| space_character?(c) }
+      skips += spaces
+      gcs.shift(spaces.size)
+    end
+    start_with_word = word_character?(gcs.first)
+    matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) }
+    matched.pop
+    skips.sum(&:bytesize) + matched.sum(&:bytesize)
   end
   def self.vi_backward_word(line, byte_pointer)
-    width = 0
-    byte_size = 0
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      if mbchar =~ /\S/
-        if mbchar =~ /\w/
-          started_by = :word
-        else
-          started_by = :non_word_printable
-        end
-        break
-      end
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    while 0 < (byte_pointer - byte_size)
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
-      case started_by
-      when :word
-        break if mbchar =~ /\W/
-      when :non_word_printable
-        break if mbchar =~ /[\w\s]/
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
+    spaces = gcs.take_while { |c| space_character?(c) }
+    gcs.shift(spaces.size)
+    start_with_word = word_character?(gcs.first)
+    matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) }
+    spaces.sum(&:bytesize) + matched.sum(&:bytesize)
+  end
+  def self.common_prefix(list, ignore_case: false)
+    return '' if list.empty?
+    common_prefix_gcs = list.first.grapheme_clusters
+    list.each do |item|
+      gcs = item.grapheme_clusters
+      common_prefix_gcs = common_prefix_gcs.take_while.with_index do |gc, i|
+        ignore_case ? gc.casecmp?(gcs[i]) : gc == gcs[i]
       end
-      width += get_mbchar_width(mbchar)
-      byte_size += size
     end
-    [byte_size, width]
+    common_prefix_gcs.join
   end
   def self.vi_first_print(line)
-    width = 0
-    byte_size = 0
-    while (line.bytesize - 1) > byte_size
-      size = get_next_mbchar_size(line, byte_size)
-      mbchar = line.byteslice(byte_size, size)
-      if mbchar =~ /\S/
-        break
-      end
-      width += get_mbchar_width(mbchar)
-      byte_size += size
-    end
-    [byte_size, width]
+    gcs = line.grapheme_clusters
+    spaces = gcs.take_while { |c| space_character?(c) }
+    spaces.sum(&:bytesize)
+  end
+  def self.word_character?(s)
+    s.encode(Encoding::UTF_8).match?(/\p{Word}/) if s
+  rescue Encoding::UndefinedConversionError
+    false
+  end
+  def self.space_character?(s)
+    s.match?(/\s/) if s
   end
 end