reline 0.5.9 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,12 +28,12 @@ class Reline::Unicode
28
28
  0x19 => '^Y',
29
29
  0x1A => '^Z', # C-z
30
30
  0x1B => '^[', # C-[ C-3
31
+ 0x1C => '^\\', # C-\
31
32
  0x1D => '^]', # C-]
32
33
  0x1E => '^^', # C-~ C-6
33
34
  0x1F => '^_', # C-_ C-7
34
35
  0x7F => '^?', # C-? C-8
35
36
  }
36
- EscapedChars = EscapedPairs.keys.map(&:chr)
37
37
 
38
38
  NON_PRINTING_START = "\1"
39
39
  NON_PRINTING_END = "\2"
@@ -54,53 +54,44 @@ class Reline::Unicode
54
54
  }.join
55
55
  end
56
56
 
57
- require 'reline/unicode/east_asian_width'
57
+ def self.safe_encode(str, encoding)
58
+ # Reline only supports utf-8 convertible string.
59
+ converted = str.encode(encoding, invalid: :replace, undef: :replace)
60
+ return converted if str.encoding == Encoding::UTF_8 || converted.encoding == Encoding::UTF_8 || converted.ascii_only?
61
+
62
+ # This code is essentially doing the same thing as
63
+ # `str.encode(utf8, **replace_options).encode(encoding, **replace_options)`
64
+ # but also avoids unnecessary irreversible encoding conversion.
65
+ converted.gsub(/\X/) do |c|
66
+ c.encode(Encoding::UTF_8)
67
+ c
68
+ rescue Encoding::UndefinedConversionError
69
+ '?'
70
+ end
71
+ end
58
72
 
59
- HalfwidthDakutenHandakuten = /[\u{FF9E}\u{FF9F}]/
60
-
61
- MBCharWidthRE = /
62
- (?<width_2_1>
63
- [#{ EscapedChars.map {|c| "\\x%02x" % c.ord }.join }] (?# ^ + char, such as ^M, ^H, ^[, ...)
64
- )
65
- | (?<width_3>^\u{2E3B}) (?# THREE-EM DASH)
66
- | (?<width_0>^\p{M})
67
- | (?<width_2_2>
68
- #{ EastAsianWidth::TYPE_F }
69
- | #{ EastAsianWidth::TYPE_W }
70
- )
71
- | (?<width_1>
72
- #{ EastAsianWidth::TYPE_H }
73
- | #{ EastAsianWidth::TYPE_NA }
74
- | #{ EastAsianWidth::TYPE_N }
75
- )(?!#{ HalfwidthDakutenHandakuten })
76
- | (?<width_2_3>
77
- (?: #{ EastAsianWidth::TYPE_H }
78
- | #{ EastAsianWidth::TYPE_NA }
79
- | #{ EastAsianWidth::TYPE_N })
80
- #{ HalfwidthDakutenHandakuten }
81
- )
82
- | (?<ambiguous_width>
83
- #{EastAsianWidth::TYPE_A}
84
- )
85
- /x
73
+ require 'reline/unicode/east_asian_width'
86
74
 
87
75
  def self.get_mbchar_width(mbchar)
88
76
  ord = mbchar.ord
89
- if (0x00 <= ord and ord <= 0x1F) # in EscapedPairs
77
+ if ord <= 0x1F # in EscapedPairs
90
78
  return 2
91
- elsif (0x20 <= ord and ord <= 0x7E) # printable ASCII chars
79
+ elsif ord <= 0x7E # printable ASCII chars
92
80
  return 1
93
81
  end
94
- m = mbchar.encode(Encoding::UTF_8).match(MBCharWidthRE)
95
- case
96
- when m.nil? then 1 # TODO should be U+FFFD � REPLACEMENT CHARACTER
97
- when m[:width_2_1], m[:width_2_2], m[:width_2_3] then 2
98
- when m[:width_3] then 3
99
- when m[:width_0] then 0
100
- when m[:width_1] then 1
101
- when m[:ambiguous_width] then Reline.ambiguous_width
82
+ utf8_mbchar = mbchar.encode(Encoding::UTF_8)
83
+ ord = utf8_mbchar.ord
84
+ chunk_index = EastAsianWidth::CHUNK_LAST.bsearch_index { |o| ord <= o }
85
+ size = EastAsianWidth::CHUNK_WIDTH[chunk_index]
86
+ if size == -1
87
+ Reline.ambiguous_width
88
+ elsif size == 1 && utf8_mbchar.size >= 2
89
+ second_char_ord = utf8_mbchar[1].ord
90
+ # Halfwidth Dakuten Handakuten
91
+ # Only these two character has Letter Modifier category and can be combined in a single grapheme cluster
92
+ (second_char_ord == 0xFF9E || second_char_ord == 0xFF9F) ? 2 : 1
102
93
  else
103
- nil
94
+ size
104
95
  end
105
96
  end
106
97
 
@@ -130,9 +121,14 @@ class Reline::Unicode
130
121
  end
131
122
  end
132
123
 
133
- def self.split_by_width(str, max_width, encoding = str.encoding, offset: 0)
124
+ # This method is used by IRB
125
+ def self.split_by_width(str, max_width)
126
+ lines = split_line_by_width(str, max_width)
127
+ [lines, lines.size]
128
+ end
129
+
130
+ def self.split_line_by_width(str, max_width, encoding = str.encoding, offset: 0)
134
131
  lines = [String.new(encoding: encoding)]
135
- height = 1
136
132
  width = offset
137
133
  rest = str.encode(Encoding::UTF_8)
138
134
  in_zero_width = false
@@ -141,10 +137,8 @@ class Reline::Unicode
141
137
  case
142
138
  when non_printing_start
143
139
  in_zero_width = true
144
- lines.last << NON_PRINTING_START
145
140
  when non_printing_end
146
141
  in_zero_width = false
147
- lines.last << NON_PRINTING_END
148
142
  when csi
149
143
  lines.last << csi
150
144
  unless in_zero_width
@@ -156,15 +150,13 @@ class Reline::Unicode
156
150
  end
157
151
  when osc
158
152
  lines.last << osc
159
- seq << osc
153
+ seq << osc unless in_zero_width
160
154
  when gc
161
155
  unless in_zero_width
162
156
  mbchar_width = get_mbchar_width(gc)
163
157
  if (width += mbchar_width) > max_width
164
158
  width = mbchar_width
165
- lines << nil
166
159
  lines << seq.dup
167
- height += 1
168
160
  end
169
161
  end
170
162
  lines.last << gc
@@ -172,11 +164,13 @@ class Reline::Unicode
172
164
  end
173
165
  # The cursor moves to next line in first
174
166
  if width == max_width
175
- lines << nil
176
167
  lines << String.new(encoding: encoding)
177
- height += 1
178
168
  end
179
- [lines, height]
169
+ lines
170
+ end
171
+
172
+ def self.strip_non_printing_start_end(prompt)
173
+ prompt.gsub(/\x01([^\x02]*)(?:\x02|\z)/) { $1 }
180
174
  end
181
175
 
182
176
  # Take a chunk of a String cut by width with escape sequences.
@@ -198,10 +192,8 @@ class Reline::Unicode
198
192
  case
199
193
  when non_printing_start
200
194
  in_zero_width = true
201
- chunk << NON_PRINTING_START
202
195
  when non_printing_end
203
196
  in_zero_width = false
204
- chunk << NON_PRINTING_END
205
197
  when csi
206
198
  has_csi = true
207
199
  chunk << csi
@@ -270,427 +262,154 @@ class Reline::Unicode
270
262
  end
271
263
 
272
264
  def self.em_forward_word(line, byte_pointer)
273
- width = 0
274
- byte_size = 0
275
- while line.bytesize > (byte_pointer + byte_size)
276
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
277
- mbchar = line.byteslice(byte_pointer + byte_size, size)
278
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
279
- width += get_mbchar_width(mbchar)
280
- byte_size += size
281
- end
282
- while line.bytesize > (byte_pointer + byte_size)
283
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
284
- mbchar = line.byteslice(byte_pointer + byte_size, size)
285
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
286
- width += get_mbchar_width(mbchar)
287
- byte_size += size
288
- end
289
- [byte_size, width]
265
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
266
+ nonwords = gcs.take_while { |c| !word_character?(c) }
267
+ words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
268
+ nonwords.sum(&:bytesize) + words.sum(&:bytesize)
290
269
  end
291
270
 
292
271
  def self.em_forward_word_with_capitalization(line, byte_pointer)
293
- width = 0
294
- byte_size = 0
295
- new_str = String.new
296
- while line.bytesize > (byte_pointer + byte_size)
297
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
298
- mbchar = line.byteslice(byte_pointer + byte_size, size)
299
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
300
- new_str += mbchar
301
- width += get_mbchar_width(mbchar)
302
- byte_size += size
303
- end
304
- first = true
305
- while line.bytesize > (byte_pointer + byte_size)
306
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
307
- mbchar = line.byteslice(byte_pointer + byte_size, size)
308
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
309
- if first
310
- new_str += mbchar.upcase
311
- first = false
312
- else
313
- new_str += mbchar.downcase
314
- end
315
- width += get_mbchar_width(mbchar)
316
- byte_size += size
317
- end
318
- [byte_size, width, new_str]
272
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
273
+ nonwords = gcs.take_while { |c| !word_character?(c) }
274
+ words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
275
+ [nonwords.sum(&:bytesize) + words.sum(&:bytesize), nonwords.join + words.join.capitalize]
319
276
  end
320
277
 
321
278
  def self.em_backward_word(line, byte_pointer)
322
- width = 0
323
- byte_size = 0
324
- while 0 < (byte_pointer - byte_size)
325
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
326
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
327
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
328
- width += get_mbchar_width(mbchar)
329
- byte_size += size
330
- end
331
- while 0 < (byte_pointer - byte_size)
332
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
333
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
334
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
335
- width += get_mbchar_width(mbchar)
336
- byte_size += size
337
- end
338
- [byte_size, width]
279
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
280
+ nonwords = gcs.take_while { |c| !word_character?(c) }
281
+ words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
282
+ nonwords.sum(&:bytesize) + words.sum(&:bytesize)
339
283
  end
340
284
 
341
285
  def self.em_big_backward_word(line, byte_pointer)
342
- width = 0
343
- byte_size = 0
344
- while 0 < (byte_pointer - byte_size)
345
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
346
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
347
- break if mbchar =~ /\S/
348
- width += get_mbchar_width(mbchar)
349
- byte_size += size
350
- end
351
- while 0 < (byte_pointer - byte_size)
352
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
353
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
354
- break if mbchar =~ /\s/
355
- width += get_mbchar_width(mbchar)
356
- byte_size += size
357
- end
358
- [byte_size, width]
286
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
287
+ spaces = gcs.take_while { |c| space_character?(c) }
288
+ nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
289
+ spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize)
359
290
  end
360
291
 
361
292
  def self.ed_transpose_words(line, byte_pointer)
362
- right_word_start = nil
363
- size = get_next_mbchar_size(line, byte_pointer)
364
- mbchar = line.byteslice(byte_pointer, size)
365
- if size.zero?
366
- # ' aaa bbb [cursor]'
367
- byte_size = 0
368
- while 0 < (byte_pointer + byte_size)
369
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
370
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
371
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
372
- byte_size -= size
373
- end
374
- while 0 < (byte_pointer + byte_size)
375
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
376
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
377
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
378
- byte_size -= size
379
- end
380
- right_word_start = byte_pointer + byte_size
381
- byte_size = 0
382
- while line.bytesize > (byte_pointer + byte_size)
383
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
384
- mbchar = line.byteslice(byte_pointer + byte_size, size)
385
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
386
- byte_size += size
387
- end
388
- after_start = byte_pointer + byte_size
389
- elsif mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
390
- # ' aaa bb[cursor]b'
391
- byte_size = 0
392
- while 0 < (byte_pointer + byte_size)
393
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
394
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
395
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
396
- byte_size -= size
397
- end
398
- right_word_start = byte_pointer + byte_size
399
- byte_size = 0
400
- while line.bytesize > (byte_pointer + byte_size)
401
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
402
- mbchar = line.byteslice(byte_pointer + byte_size, size)
403
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
404
- byte_size += size
405
- end
406
- after_start = byte_pointer + byte_size
407
- else
408
- byte_size = 0
409
- while (line.bytesize - 1) > (byte_pointer + byte_size)
410
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
411
- mbchar = line.byteslice(byte_pointer + byte_size, size)
412
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
413
- byte_size += size
414
- end
415
- if (byte_pointer + byte_size) == (line.bytesize - 1)
416
- # ' aaa bbb [cursor] '
417
- after_start = line.bytesize
418
- while 0 < (byte_pointer + byte_size)
419
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
420
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
421
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
422
- byte_size -= size
423
- end
424
- while 0 < (byte_pointer + byte_size)
425
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
426
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
427
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
428
- byte_size -= size
429
- end
430
- right_word_start = byte_pointer + byte_size
431
- else
432
- # ' aaa [cursor] bbb '
433
- right_word_start = byte_pointer + byte_size
434
- while line.bytesize > (byte_pointer + byte_size)
435
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
436
- mbchar = line.byteslice(byte_pointer + byte_size, size)
437
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
438
- byte_size += size
439
- end
440
- after_start = byte_pointer + byte_size
441
- end
442
- end
443
- byte_size = right_word_start - byte_pointer
444
- while 0 < (byte_pointer + byte_size)
445
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
446
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
447
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
448
- byte_size -= size
449
- end
450
- middle_start = byte_pointer + byte_size
451
- byte_size = middle_start - byte_pointer
452
- while 0 < (byte_pointer + byte_size)
453
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
454
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
455
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
456
- byte_size -= size
293
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters
294
+ pos = gcs.size
295
+ gcs += line.byteslice(byte_pointer..).grapheme_clusters
296
+ pos += 1 while pos < gcs.size && !word_character?(gcs[pos])
297
+ if pos == gcs.size # 'aaa bbb [cursor] '
298
+ pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1])
299
+ second_word_end = gcs.size
300
+ else # 'aaa [cursor]bbb'
301
+ pos += 1 while pos < gcs.size && word_character?(gcs[pos])
302
+ second_word_end = pos
303
+ end
304
+ pos -= 1 while pos > 0 && word_character?(gcs[pos - 1])
305
+ second_word_start = pos
306
+ pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1])
307
+ first_word_end = pos
308
+ pos -= 1 while pos > 0 && word_character?(gcs[pos - 1])
309
+ first_word_start = pos
310
+
311
+ [first_word_start, first_word_end, second_word_start, second_word_end].map do |idx|
312
+ gcs.take(idx).sum(&:bytesize)
457
313
  end
458
- left_word_start = byte_pointer + byte_size
459
- [left_word_start, middle_start, right_word_start, after_start]
460
314
  end
461
315
 
462
316
  def self.vi_big_forward_word(line, byte_pointer)
463
- width = 0
464
- byte_size = 0
465
- while (line.bytesize - 1) > (byte_pointer + byte_size)
466
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
467
- mbchar = line.byteslice(byte_pointer + byte_size, size)
468
- break if mbchar =~ /\s/
469
- width += get_mbchar_width(mbchar)
470
- byte_size += size
471
- end
472
- while (line.bytesize - 1) > (byte_pointer + byte_size)
473
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
474
- mbchar = line.byteslice(byte_pointer + byte_size, size)
475
- break if mbchar =~ /\S/
476
- width += get_mbchar_width(mbchar)
477
- byte_size += size
478
- end
479
- [byte_size, width]
317
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
318
+ nonspaces = gcs.take_while { |c| !space_character?(c) }
319
+ spaces = gcs.drop(nonspaces.size).take_while { |c| space_character?(c) }
320
+ nonspaces.sum(&:bytesize) + spaces.sum(&:bytesize)
480
321
  end
481
322
 
482
323
  def self.vi_big_forward_end_word(line, byte_pointer)
483
- if (line.bytesize - 1) > byte_pointer
484
- size = get_next_mbchar_size(line, byte_pointer)
485
- mbchar = line.byteslice(byte_pointer, size)
486
- width = get_mbchar_width(mbchar)
487
- byte_size = size
488
- else
489
- return [0, 0]
490
- end
491
- while (line.bytesize - 1) > (byte_pointer + byte_size)
492
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
493
- mbchar = line.byteslice(byte_pointer + byte_size, size)
494
- break if mbchar =~ /\S/
495
- width += get_mbchar_width(mbchar)
496
- byte_size += size
497
- end
498
- prev_width = width
499
- prev_byte_size = byte_size
500
- while line.bytesize > (byte_pointer + byte_size)
501
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
502
- mbchar = line.byteslice(byte_pointer + byte_size, size)
503
- break if mbchar =~ /\s/
504
- prev_width = width
505
- prev_byte_size = byte_size
506
- width += get_mbchar_width(mbchar)
507
- byte_size += size
508
- end
509
- [prev_byte_size, prev_width]
324
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
325
+ first = gcs.shift(1)
326
+ spaces = gcs.take_while { |c| space_character?(c) }
327
+ nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
328
+ matched = spaces + nonspaces
329
+ matched.pop
330
+ first.sum(&:bytesize) + matched.sum(&:bytesize)
510
331
  end
511
332
 
512
333
  def self.vi_big_backward_word(line, byte_pointer)
513
- width = 0
514
- byte_size = 0
515
- while 0 < (byte_pointer - byte_size)
516
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
517
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
518
- break if mbchar =~ /\S/
519
- width += get_mbchar_width(mbchar)
520
- byte_size += size
521
- end
522
- while 0 < (byte_pointer - byte_size)
523
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
524
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
525
- break if mbchar =~ /\s/
526
- width += get_mbchar_width(mbchar)
527
- byte_size += size
528
- end
529
- [byte_size, width]
334
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
335
+ spaces = gcs.take_while { |c| space_character?(c) }
336
+ nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
337
+ spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize)
530
338
  end
531
339
 
532
340
  def self.vi_forward_word(line, byte_pointer, drop_terminate_spaces = false)
533
- if line.bytesize > byte_pointer
534
- size = get_next_mbchar_size(line, byte_pointer)
535
- mbchar = line.byteslice(byte_pointer, size)
536
- if mbchar =~ /\w/
537
- started_by = :word
538
- elsif mbchar =~ /\s/
539
- started_by = :space
341
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
342
+ return 0 if gcs.empty?
343
+
344
+ c = gcs.first
345
+ matched =
346
+ if word_character?(c)
347
+ gcs.take_while { |c| word_character?(c) }
348
+ elsif space_character?(c)
349
+ gcs.take_while { |c| space_character?(c) }
540
350
  else
541
- started_by = :non_word_printable
542
- end
543
- width = get_mbchar_width(mbchar)
544
- byte_size = size
545
- else
546
- return [0, 0]
547
- end
548
- while line.bytesize > (byte_pointer + byte_size)
549
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
550
- mbchar = line.byteslice(byte_pointer + byte_size, size)
551
- case started_by
552
- when :word
553
- break if mbchar =~ /\W/
554
- when :space
555
- break if mbchar =~ /\S/
556
- when :non_word_printable
557
- break if mbchar =~ /\w|\s/
351
+ gcs.take_while { |c| !word_character?(c) && !space_character?(c) }
558
352
  end
559
- width += get_mbchar_width(mbchar)
560
- byte_size += size
561
- end
562
- return [byte_size, width] if drop_terminate_spaces
563
- while line.bytesize > (byte_pointer + byte_size)
564
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
565
- mbchar = line.byteslice(byte_pointer + byte_size, size)
566
- break if mbchar =~ /\S/
567
- width += get_mbchar_width(mbchar)
568
- byte_size += size
569
- end
570
- [byte_size, width]
353
+
354
+ return matched.sum(&:bytesize) if drop_terminate_spaces
355
+
356
+ spaces = gcs.drop(matched.size).take_while { |c| space_character?(c) }
357
+ matched.sum(&:bytesize) + spaces.sum(&:bytesize)
571
358
  end
572
359
 
573
360
  def self.vi_forward_end_word(line, byte_pointer)
574
- if (line.bytesize - 1) > byte_pointer
575
- size = get_next_mbchar_size(line, byte_pointer)
576
- mbchar = line.byteslice(byte_pointer, size)
577
- if mbchar =~ /\w/
578
- started_by = :word
579
- elsif mbchar =~ /\s/
580
- started_by = :space
581
- else
582
- started_by = :non_word_printable
583
- end
584
- width = get_mbchar_width(mbchar)
585
- byte_size = size
586
- else
587
- return [0, 0]
588
- end
589
- if (line.bytesize - 1) > (byte_pointer + byte_size)
590
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
591
- mbchar = line.byteslice(byte_pointer + byte_size, size)
592
- if mbchar =~ /\w/
593
- second = :word
594
- elsif mbchar =~ /\s/
595
- second = :space
596
- else
597
- second = :non_word_printable
598
- end
599
- second_width = get_mbchar_width(mbchar)
600
- second_byte_size = size
601
- else
602
- return [byte_size, width]
603
- end
604
- if second == :space
605
- width += second_width
606
- byte_size += second_byte_size
607
- while (line.bytesize - 1) > (byte_pointer + byte_size)
608
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
609
- mbchar = line.byteslice(byte_pointer + byte_size, size)
610
- if mbchar =~ /\S/
611
- if mbchar =~ /\w/
612
- started_by = :word
613
- else
614
- started_by = :non_word_printable
615
- end
616
- break
617
- end
618
- width += get_mbchar_width(mbchar)
619
- byte_size += size
620
- end
621
- else
622
- case [started_by, second]
623
- when [:word, :non_word_printable], [:non_word_printable, :word]
624
- started_by = second
625
- else
626
- width += second_width
627
- byte_size += second_byte_size
628
- started_by = second
629
- end
630
- end
631
- prev_width = width
632
- prev_byte_size = byte_size
633
- while line.bytesize > (byte_pointer + byte_size)
634
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
635
- mbchar = line.byteslice(byte_pointer + byte_size, size)
636
- case started_by
637
- when :word
638
- break if mbchar =~ /\W/
639
- when :non_word_printable
640
- break if mbchar =~ /[\w\s]/
641
- end
642
- prev_width = width
643
- prev_byte_size = byte_size
644
- width += get_mbchar_width(mbchar)
645
- byte_size += size
646
- end
647
- [prev_byte_size, prev_width]
361
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
362
+ return 0 if gcs.empty?
363
+ return gcs.first.bytesize if gcs.size == 1
364
+
365
+ start = gcs.shift
366
+ skips = [start]
367
+ if space_character?(start) || space_character?(gcs.first)
368
+ spaces = gcs.take_while { |c| space_character?(c) }
369
+ skips += spaces
370
+ gcs.shift(spaces.size)
371
+ end
372
+ start_with_word = word_character?(gcs.first)
373
+ matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) }
374
+ matched.pop
375
+ skips.sum(&:bytesize) + matched.sum(&:bytesize)
648
376
  end
649
377
 
650
378
  def self.vi_backward_word(line, byte_pointer)
651
- width = 0
652
- byte_size = 0
653
- while 0 < (byte_pointer - byte_size)
654
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
655
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
656
- if mbchar =~ /\S/
657
- if mbchar =~ /\w/
658
- started_by = :word
659
- else
660
- started_by = :non_word_printable
661
- end
662
- break
663
- end
664
- width += get_mbchar_width(mbchar)
665
- byte_size += size
666
- end
667
- while 0 < (byte_pointer - byte_size)
668
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
669
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
670
- case started_by
671
- when :word
672
- break if mbchar =~ /\W/
673
- when :non_word_printable
674
- break if mbchar =~ /[\w\s]/
379
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
380
+ spaces = gcs.take_while { |c| space_character?(c) }
381
+ gcs.shift(spaces.size)
382
+ start_with_word = word_character?(gcs.first)
383
+ matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) }
384
+ spaces.sum(&:bytesize) + matched.sum(&:bytesize)
385
+ end
386
+
387
+ def self.common_prefix(list, ignore_case: false)
388
+ return '' if list.empty?
389
+
390
+ common_prefix_gcs = list.first.grapheme_clusters
391
+ list.each do |item|
392
+ gcs = item.grapheme_clusters
393
+ common_prefix_gcs = common_prefix_gcs.take_while.with_index do |gc, i|
394
+ ignore_case ? gc.casecmp?(gcs[i]) : gc == gcs[i]
675
395
  end
676
- width += get_mbchar_width(mbchar)
677
- byte_size += size
678
396
  end
679
- [byte_size, width]
397
+ common_prefix_gcs.join
680
398
  end
681
399
 
682
400
  def self.vi_first_print(line)
683
- width = 0
684
- byte_size = 0
685
- while (line.bytesize - 1) > byte_size
686
- size = get_next_mbchar_size(line, byte_size)
687
- mbchar = line.byteslice(byte_size, size)
688
- if mbchar =~ /\S/
689
- break
690
- end
691
- width += get_mbchar_width(mbchar)
692
- byte_size += size
693
- end
694
- [byte_size, width]
401
+ gcs = line.grapheme_clusters
402
+ spaces = gcs.take_while { |c| space_character?(c) }
403
+ spaces.sum(&:bytesize)
404
+ end
405
+
406
+ def self.word_character?(s)
407
+ s.encode(Encoding::UTF_8).match?(/\p{Word}/) if s
408
+ rescue Encoding::UndefinedConversionError
409
+ false
410
+ end
411
+
412
+ def self.space_character?(s)
413
+ s.match?(/\s/) if s
695
414
  end
696
415
  end