reline 0.3.9 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,12 +28,12 @@ class Reline::Unicode
28
28
  0x19 => '^Y',
29
29
  0x1A => '^Z', # C-z
30
30
  0x1B => '^[', # C-[ C-3
31
+ 0x1C => '^\\', # C-\
31
32
  0x1D => '^]', # C-]
32
33
  0x1E => '^^', # C-~ C-6
33
34
  0x1F => '^_', # C-_ C-7
34
35
  0x7F => '^?', # C-? C-8
35
36
  }
36
- EscapedChars = EscapedPairs.keys.map(&:chr)
37
37
 
38
38
  NON_PRINTING_START = "\1"
39
39
  NON_PRINTING_END = "\2"
@@ -43,62 +43,55 @@ class Reline::Unicode
43
43
 
44
44
  def self.escape_for_print(str)
45
45
  str.chars.map! { |gr|
46
- escaped = EscapedPairs[gr.ord]
47
- if escaped && gr != -"\n" && gr != -"\t"
48
- escaped
49
- else
46
+ case gr
47
+ when -"\n"
50
48
  gr
49
+ when -"\t"
50
+ -' '
51
+ else
52
+ EscapedPairs[gr.ord] || gr
51
53
  end
52
54
  }.join
53
55
  end
54
56
 
55
- require 'reline/unicode/east_asian_width'
57
+ def self.safe_encode(str, encoding)
58
+ # Reline only supports utf-8 convertible string.
59
+ converted = str.encode(encoding, invalid: :replace, undef: :replace)
60
+ return converted if str.encoding == Encoding::UTF_8 || converted.encoding == Encoding::UTF_8 || converted.ascii_only?
56
61
 
57
- HalfwidthDakutenHandakuten = /[\u{FF9E}\u{FF9F}]/
58
-
59
- MBCharWidthRE = /
60
- (?<width_2_1>
61
- [#{ EscapedChars.map {|c| "\\x%02x" % c.ord }.join }] (?# ^ + char, such as ^M, ^H, ^[, ...)
62
- )
63
- | (?<width_3>^\u{2E3B}) (?# THREE-EM DASH)
64
- | (?<width_0>^\p{M})
65
- | (?<width_2_2>
66
- #{ EastAsianWidth::TYPE_F }
67
- | #{ EastAsianWidth::TYPE_W }
68
- )
69
- | (?<width_1>
70
- #{ EastAsianWidth::TYPE_H }
71
- | #{ EastAsianWidth::TYPE_NA }
72
- | #{ EastAsianWidth::TYPE_N }
73
- )(?!#{ HalfwidthDakutenHandakuten })
74
- | (?<width_2_3>
75
- (?: #{ EastAsianWidth::TYPE_H }
76
- | #{ EastAsianWidth::TYPE_NA }
77
- | #{ EastAsianWidth::TYPE_N })
78
- #{ HalfwidthDakutenHandakuten }
79
- )
80
- | (?<ambiguous_width>
81
- #{EastAsianWidth::TYPE_A}
82
- )
83
- /x
62
+ # This code is essentially doing the same thing as
63
+ # `str.encode(utf8, **replace_options).encode(encoding, **replace_options)`
64
+ # but also avoids unnecessary irreversible encoding conversion.
65
+ converted.gsub(/\X/) do |c|
66
+ c.encode(Encoding::UTF_8)
67
+ c
68
+ rescue Encoding::UndefinedConversionError
69
+ '?'
70
+ end
71
+ end
72
+
73
+ require 'reline/unicode/east_asian_width'
84
74
 
85
75
  def self.get_mbchar_width(mbchar)
86
76
  ord = mbchar.ord
87
- if (0x00 <= ord and ord <= 0x1F) # in EscapedPairs
77
+ if ord <= 0x1F # in EscapedPairs
88
78
  return 2
89
- elsif (0x20 <= ord and ord <= 0x7E) # printable ASCII chars
79
+ elsif ord <= 0x7E # printable ASCII chars
90
80
  return 1
91
81
  end
92
- m = mbchar.encode(Encoding::UTF_8).match(MBCharWidthRE)
93
- case
94
- when m.nil? then 1 # TODO should be U+FFFD � REPLACEMENT CHARACTER
95
- when m[:width_2_1], m[:width_2_2], m[:width_2_3] then 2
96
- when m[:width_3] then 3
97
- when m[:width_0] then 0
98
- when m[:width_1] then 1
99
- when m[:ambiguous_width] then Reline.ambiguous_width
82
+ utf8_mbchar = mbchar.encode(Encoding::UTF_8)
83
+ ord = utf8_mbchar.ord
84
+ chunk_index = EastAsianWidth::CHUNK_LAST.bsearch_index { |o| ord <= o }
85
+ size = EastAsianWidth::CHUNK_WIDTH[chunk_index]
86
+ if size == -1
87
+ Reline.ambiguous_width
88
+ elsif size == 1 && utf8_mbchar.size >= 2
89
+ second_char_ord = utf8_mbchar[1].ord
90
+ # Halfwidth Dakuten Handakuten
91
+ # Only these two character has Letter Modifier category and can be combined in a single grapheme cluster
92
+ (second_char_ord == 0xFF9E || second_char_ord == 0xFF9F) ? 2 : 1
100
93
  else
101
- nil
94
+ size
102
95
  end
103
96
  end
104
97
 
@@ -128,10 +121,15 @@ class Reline::Unicode
128
121
  end
129
122
  end
130
123
 
131
- def self.split_by_width(str, max_width, encoding = str.encoding)
124
+ # This method is used by IRB
125
+ def self.split_by_width(str, max_width)
126
+ lines = split_line_by_width(str, max_width)
127
+ [lines, lines.size]
128
+ end
129
+
130
+ def self.split_line_by_width(str, max_width, encoding = str.encoding, offset: 0)
132
131
  lines = [String.new(encoding: encoding)]
133
- height = 1
134
- width = 0
132
+ width = offset
135
133
  rest = str.encode(Encoding::UTF_8)
136
134
  in_zero_width = false
137
135
  seq = String.new(encoding: encoding)
@@ -139,24 +137,26 @@ class Reline::Unicode
139
137
  case
140
138
  when non_printing_start
141
139
  in_zero_width = true
142
- lines.last << NON_PRINTING_START
143
140
  when non_printing_end
144
141
  in_zero_width = false
145
- lines.last << NON_PRINTING_END
146
142
  when csi
147
143
  lines.last << csi
148
- seq << csi
144
+ unless in_zero_width
145
+ if csi == -"\e[m" || csi == -"\e[0m"
146
+ seq.clear
147
+ else
148
+ seq << csi
149
+ end
150
+ end
149
151
  when osc
150
152
  lines.last << osc
151
- seq << osc
153
+ seq << osc unless in_zero_width
152
154
  when gc
153
155
  unless in_zero_width
154
156
  mbchar_width = get_mbchar_width(gc)
155
157
  if (width += mbchar_width) > max_width
156
158
  width = mbchar_width
157
- lines << nil
158
159
  lines << seq.dup
159
- height += 1
160
160
  end
161
161
  end
162
162
  lines.last << gc
@@ -164,19 +164,30 @@ class Reline::Unicode
164
164
  end
165
165
  # The cursor moves to next line in first
166
166
  if width == max_width
167
- lines << nil
168
167
  lines << String.new(encoding: encoding)
169
- height += 1
170
168
  end
171
- [lines, height]
169
+ lines
170
+ end
171
+
172
+ def self.strip_non_printing_start_end(prompt)
173
+ prompt.gsub(/\x01([^\x02]*)(?:\x02|\z)/) { $1 }
172
174
  end
173
175
 
174
176
  # Take a chunk of a String cut by width with escape sequences.
175
177
  def self.take_range(str, start_col, max_width)
178
+ take_mbchar_range(str, start_col, max_width).first
179
+ end
180
+
181
+ def self.take_mbchar_range(str, start_col, width, cover_begin: false, cover_end: false, padding: false)
176
182
  chunk = String.new(encoding: str.encoding)
183
+
184
+ end_col = start_col + width
177
185
  total_width = 0
178
186
  rest = str.encode(Encoding::UTF_8)
179
187
  in_zero_width = false
188
+ chunk_start_col = nil
189
+ chunk_end_col = nil
190
+ has_csi = false
180
191
  rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc|
181
192
  case
182
193
  when non_printing_start
@@ -184,21 +195,56 @@ class Reline::Unicode
184
195
  when non_printing_end
185
196
  in_zero_width = false
186
197
  when csi
198
+ has_csi = true
187
199
  chunk << csi
188
200
  when osc
189
201
  chunk << osc
190
202
  when gc
191
203
  if in_zero_width
192
204
  chunk << gc
205
+ next
206
+ end
207
+
208
+ mbchar_width = get_mbchar_width(gc)
209
+ prev_width = total_width
210
+ total_width += mbchar_width
211
+
212
+ if (cover_begin || padding ? total_width <= start_col : prev_width < start_col)
213
+ # Current character haven't reached start_col yet
214
+ next
215
+ elsif padding && !cover_begin && prev_width < start_col && start_col < total_width
216
+ # Add preceding padding. This padding might have background color.
217
+ chunk << ' '
218
+ chunk_start_col ||= start_col
219
+ chunk_end_col = total_width
220
+ next
221
+ elsif (cover_end ? prev_width < end_col : total_width <= end_col)
222
+ # Current character is in the range
223
+ chunk << gc
224
+ chunk_start_col ||= prev_width
225
+ chunk_end_col = total_width
226
+ break if total_width >= end_col
193
227
  else
194
- mbchar_width = get_mbchar_width(gc)
195
- total_width += mbchar_width
196
- break if (start_col + max_width) < total_width
197
- chunk << gc if start_col < total_width
228
+ # Current character exceeds end_col
229
+ if padding && end_col < total_width
230
+ # Add succeeding padding. This padding might have background color.
231
+ chunk << ' '
232
+ chunk_start_col ||= prev_width
233
+ chunk_end_col = end_col
234
+ end
235
+ break
198
236
  end
199
237
  end
200
238
  end
201
- chunk
239
+ chunk_start_col ||= start_col
240
+ chunk_end_col ||= start_col
241
+ if padding && chunk_end_col < end_col
242
+ # Append padding. This padding should not include background color.
243
+ chunk << "\e[0m" if has_csi
244
+ chunk << ' ' * (end_col - chunk_end_col)
245
+ chunk_end_col = end_col
246
+ end
247
+ [chunk, chunk_start_col, chunk_end_col - chunk_start_col]
202
248
  end
203
249
 
204
250
  def self.get_next_mbchar_size(line, byte_pointer)
@@ -216,427 +262,154 @@ class Reline::Unicode
216
262
  end
217
263
 
218
264
  def self.em_forward_word(line, byte_pointer)
219
- width = 0
220
- byte_size = 0
221
- while line.bytesize > (byte_pointer + byte_size)
222
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
223
- mbchar = line.byteslice(byte_pointer + byte_size, size)
224
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
225
- width += get_mbchar_width(mbchar)
226
- byte_size += size
227
- end
228
- while line.bytesize > (byte_pointer + byte_size)
229
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
230
- mbchar = line.byteslice(byte_pointer + byte_size, size)
231
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
232
- width += get_mbchar_width(mbchar)
233
- byte_size += size
234
- end
235
- [byte_size, width]
265
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
266
+ nonwords = gcs.take_while { |c| !word_character?(c) }
267
+ words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
268
+ nonwords.sum(&:bytesize) + words.sum(&:bytesize)
236
269
  end
237
270
 
238
271
  def self.em_forward_word_with_capitalization(line, byte_pointer)
239
- width = 0
240
- byte_size = 0
241
- new_str = String.new
242
- while line.bytesize > (byte_pointer + byte_size)
243
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
244
- mbchar = line.byteslice(byte_pointer + byte_size, size)
245
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
246
- new_str += mbchar
247
- width += get_mbchar_width(mbchar)
248
- byte_size += size
249
- end
250
- first = true
251
- while line.bytesize > (byte_pointer + byte_size)
252
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
253
- mbchar = line.byteslice(byte_pointer + byte_size, size)
254
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
255
- if first
256
- new_str += mbchar.upcase
257
- first = false
258
- else
259
- new_str += mbchar.downcase
260
- end
261
- width += get_mbchar_width(mbchar)
262
- byte_size += size
263
- end
264
- [byte_size, width, new_str]
272
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
273
+ nonwords = gcs.take_while { |c| !word_character?(c) }
274
+ words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
275
+ [nonwords.sum(&:bytesize) + words.sum(&:bytesize), nonwords.join + words.join.capitalize]
265
276
  end
266
277
 
267
278
  def self.em_backward_word(line, byte_pointer)
268
- width = 0
269
- byte_size = 0
270
- while 0 < (byte_pointer - byte_size)
271
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
272
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
273
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
274
- width += get_mbchar_width(mbchar)
275
- byte_size += size
276
- end
277
- while 0 < (byte_pointer - byte_size)
278
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
279
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
280
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
281
- width += get_mbchar_width(mbchar)
282
- byte_size += size
283
- end
284
- [byte_size, width]
279
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
280
+ nonwords = gcs.take_while { |c| !word_character?(c) }
281
+ words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
282
+ nonwords.sum(&:bytesize) + words.sum(&:bytesize)
285
283
  end
286
284
 
287
285
  def self.em_big_backward_word(line, byte_pointer)
288
- width = 0
289
- byte_size = 0
290
- while 0 < (byte_pointer - byte_size)
291
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
292
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
293
- break if mbchar =~ /\S/
294
- width += get_mbchar_width(mbchar)
295
- byte_size += size
296
- end
297
- while 0 < (byte_pointer - byte_size)
298
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
299
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
300
- break if mbchar =~ /\s/
301
- width += get_mbchar_width(mbchar)
302
- byte_size += size
303
- end
304
- [byte_size, width]
286
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
287
+ spaces = gcs.take_while { |c| space_character?(c) }
288
+ nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
289
+ spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize)
305
290
  end
306
291
 
307
292
  def self.ed_transpose_words(line, byte_pointer)
308
- right_word_start = nil
309
- size = get_next_mbchar_size(line, byte_pointer)
310
- mbchar = line.byteslice(byte_pointer, size)
311
- if size.zero?
312
- # ' aaa bbb [cursor]'
313
- byte_size = 0
314
- while 0 < (byte_pointer + byte_size)
315
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
316
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
317
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
318
- byte_size -= size
319
- end
320
- while 0 < (byte_pointer + byte_size)
321
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
322
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
323
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
324
- byte_size -= size
325
- end
326
- right_word_start = byte_pointer + byte_size
327
- byte_size = 0
328
- while line.bytesize > (byte_pointer + byte_size)
329
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
330
- mbchar = line.byteslice(byte_pointer + byte_size, size)
331
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
332
- byte_size += size
333
- end
334
- after_start = byte_pointer + byte_size
335
- elsif mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
336
- # ' aaa bb[cursor]b'
337
- byte_size = 0
338
- while 0 < (byte_pointer + byte_size)
339
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
340
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
341
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
342
- byte_size -= size
343
- end
344
- right_word_start = byte_pointer + byte_size
345
- byte_size = 0
346
- while line.bytesize > (byte_pointer + byte_size)
347
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
348
- mbchar = line.byteslice(byte_pointer + byte_size, size)
349
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
350
- byte_size += size
351
- end
352
- after_start = byte_pointer + byte_size
353
- else
354
- byte_size = 0
355
- while (line.bytesize - 1) > (byte_pointer + byte_size)
356
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
357
- mbchar = line.byteslice(byte_pointer + byte_size, size)
358
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
359
- byte_size += size
360
- end
361
- if (byte_pointer + byte_size) == (line.bytesize - 1)
362
- # ' aaa bbb [cursor] '
363
- after_start = line.bytesize
364
- while 0 < (byte_pointer + byte_size)
365
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
366
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
367
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
368
- byte_size -= size
369
- end
370
- while 0 < (byte_pointer + byte_size)
371
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
372
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
373
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
374
- byte_size -= size
375
- end
376
- right_word_start = byte_pointer + byte_size
377
- else
378
- # ' aaa [cursor] bbb '
379
- right_word_start = byte_pointer + byte_size
380
- while line.bytesize > (byte_pointer + byte_size)
381
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
382
- mbchar = line.byteslice(byte_pointer + byte_size, size)
383
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
384
- byte_size += size
385
- end
386
- after_start = byte_pointer + byte_size
387
- end
388
- end
389
- byte_size = right_word_start - byte_pointer
390
- while 0 < (byte_pointer + byte_size)
391
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
392
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
393
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
394
- byte_size -= size
395
- end
396
- middle_start = byte_pointer + byte_size
397
- byte_size = middle_start - byte_pointer
398
- while 0 < (byte_pointer + byte_size)
399
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
400
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
401
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
402
- byte_size -= size
293
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters
294
+ pos = gcs.size
295
+ gcs += line.byteslice(byte_pointer..).grapheme_clusters
296
+ pos += 1 while pos < gcs.size && !word_character?(gcs[pos])
297
+ if pos == gcs.size # 'aaa bbb [cursor] '
298
+ pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1])
299
+ second_word_end = gcs.size
300
+ else # 'aaa [cursor]bbb'
301
+ pos += 1 while pos < gcs.size && word_character?(gcs[pos])
302
+ second_word_end = pos
303
+ end
304
+ pos -= 1 while pos > 0 && word_character?(gcs[pos - 1])
305
+ second_word_start = pos
306
+ pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1])
307
+ first_word_end = pos
308
+ pos -= 1 while pos > 0 && word_character?(gcs[pos - 1])
309
+ first_word_start = pos
310
+
311
+ [first_word_start, first_word_end, second_word_start, second_word_end].map do |idx|
312
+ gcs.take(idx).sum(&:bytesize)
403
313
  end
404
- left_word_start = byte_pointer + byte_size
405
- [left_word_start, middle_start, right_word_start, after_start]
406
314
  end
407
315
 
408
316
  def self.vi_big_forward_word(line, byte_pointer)
409
- width = 0
410
- byte_size = 0
411
- while (line.bytesize - 1) > (byte_pointer + byte_size)
412
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
413
- mbchar = line.byteslice(byte_pointer + byte_size, size)
414
- break if mbchar =~ /\s/
415
- width += get_mbchar_width(mbchar)
416
- byte_size += size
417
- end
418
- while (line.bytesize - 1) > (byte_pointer + byte_size)
419
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
420
- mbchar = line.byteslice(byte_pointer + byte_size, size)
421
- break if mbchar =~ /\S/
422
- width += get_mbchar_width(mbchar)
423
- byte_size += size
424
- end
425
- [byte_size, width]
317
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
318
+ nonspaces = gcs.take_while { |c| !space_character?(c) }
319
+ spaces = gcs.drop(nonspaces.size).take_while { |c| space_character?(c) }
320
+ nonspaces.sum(&:bytesize) + spaces.sum(&:bytesize)
426
321
  end
427
322
 
428
323
  def self.vi_big_forward_end_word(line, byte_pointer)
429
- if (line.bytesize - 1) > byte_pointer
430
- size = get_next_mbchar_size(line, byte_pointer)
431
- mbchar = line.byteslice(byte_pointer, size)
432
- width = get_mbchar_width(mbchar)
433
- byte_size = size
434
- else
435
- return [0, 0]
436
- end
437
- while (line.bytesize - 1) > (byte_pointer + byte_size)
438
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
439
- mbchar = line.byteslice(byte_pointer + byte_size, size)
440
- break if mbchar =~ /\S/
441
- width += get_mbchar_width(mbchar)
442
- byte_size += size
443
- end
444
- prev_width = width
445
- prev_byte_size = byte_size
446
- while line.bytesize > (byte_pointer + byte_size)
447
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
448
- mbchar = line.byteslice(byte_pointer + byte_size, size)
449
- break if mbchar =~ /\s/
450
- prev_width = width
451
- prev_byte_size = byte_size
452
- width += get_mbchar_width(mbchar)
453
- byte_size += size
454
- end
455
- [prev_byte_size, prev_width]
324
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
325
+ first = gcs.shift(1)
326
+ spaces = gcs.take_while { |c| space_character?(c) }
327
+ nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
328
+ matched = spaces + nonspaces
329
+ matched.pop
330
+ first.sum(&:bytesize) + matched.sum(&:bytesize)
456
331
  end
457
332
 
458
333
  def self.vi_big_backward_word(line, byte_pointer)
459
- width = 0
460
- byte_size = 0
461
- while 0 < (byte_pointer - byte_size)
462
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
463
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
464
- break if mbchar =~ /\S/
465
- width += get_mbchar_width(mbchar)
466
- byte_size += size
467
- end
468
- while 0 < (byte_pointer - byte_size)
469
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
470
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
471
- break if mbchar =~ /\s/
472
- width += get_mbchar_width(mbchar)
473
- byte_size += size
474
- end
475
- [byte_size, width]
334
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
335
+ spaces = gcs.take_while { |c| space_character?(c) }
336
+ nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
337
+ spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize)
476
338
  end
477
339
 
478
340
  def self.vi_forward_word(line, byte_pointer, drop_terminate_spaces = false)
479
- if line.bytesize > byte_pointer
480
- size = get_next_mbchar_size(line, byte_pointer)
481
- mbchar = line.byteslice(byte_pointer, size)
482
- if mbchar =~ /\w/
483
- started_by = :word
484
- elsif mbchar =~ /\s/
485
- started_by = :space
341
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
342
+ return 0 if gcs.empty?
343
+
344
+ c = gcs.first
345
+ matched =
346
+ if word_character?(c)
347
+ gcs.take_while { |c| word_character?(c) }
348
+ elsif space_character?(c)
349
+ gcs.take_while { |c| space_character?(c) }
486
350
  else
487
- started_by = :non_word_printable
488
- end
489
- width = get_mbchar_width(mbchar)
490
- byte_size = size
491
- else
492
- return [0, 0]
493
- end
494
- while line.bytesize > (byte_pointer + byte_size)
495
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
496
- mbchar = line.byteslice(byte_pointer + byte_size, size)
497
- case started_by
498
- when :word
499
- break if mbchar =~ /\W/
500
- when :space
501
- break if mbchar =~ /\S/
502
- when :non_word_printable
503
- break if mbchar =~ /\w|\s/
351
+ gcs.take_while { |c| !word_character?(c) && !space_character?(c) }
504
352
  end
505
- width += get_mbchar_width(mbchar)
506
- byte_size += size
507
- end
508
- return [byte_size, width] if drop_terminate_spaces
509
- while line.bytesize > (byte_pointer + byte_size)
510
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
511
- mbchar = line.byteslice(byte_pointer + byte_size, size)
512
- break if mbchar =~ /\S/
513
- width += get_mbchar_width(mbchar)
514
- byte_size += size
515
- end
516
- [byte_size, width]
353
+
354
+ return matched.sum(&:bytesize) if drop_terminate_spaces
355
+
356
+ spaces = gcs.drop(matched.size).take_while { |c| space_character?(c) }
357
+ matched.sum(&:bytesize) + spaces.sum(&:bytesize)
517
358
  end
518
359
 
519
360
  def self.vi_forward_end_word(line, byte_pointer)
520
- if (line.bytesize - 1) > byte_pointer
521
- size = get_next_mbchar_size(line, byte_pointer)
522
- mbchar = line.byteslice(byte_pointer, size)
523
- if mbchar =~ /\w/
524
- started_by = :word
525
- elsif mbchar =~ /\s/
526
- started_by = :space
527
- else
528
- started_by = :non_word_printable
529
- end
530
- width = get_mbchar_width(mbchar)
531
- byte_size = size
532
- else
533
- return [0, 0]
534
- end
535
- if (line.bytesize - 1) > (byte_pointer + byte_size)
536
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
537
- mbchar = line.byteslice(byte_pointer + byte_size, size)
538
- if mbchar =~ /\w/
539
- second = :word
540
- elsif mbchar =~ /\s/
541
- second = :space
542
- else
543
- second = :non_word_printable
544
- end
545
- second_width = get_mbchar_width(mbchar)
546
- second_byte_size = size
547
- else
548
- return [byte_size, width]
549
- end
550
- if second == :space
551
- width += second_width
552
- byte_size += second_byte_size
553
- while (line.bytesize - 1) > (byte_pointer + byte_size)
554
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
555
- mbchar = line.byteslice(byte_pointer + byte_size, size)
556
- if mbchar =~ /\S/
557
- if mbchar =~ /\w/
558
- started_by = :word
559
- else
560
- started_by = :non_word_printable
561
- end
562
- break
563
- end
564
- width += get_mbchar_width(mbchar)
565
- byte_size += size
566
- end
567
- else
568
- case [started_by, second]
569
- when [:word, :non_word_printable], [:non_word_printable, :word]
570
- started_by = second
571
- else
572
- width += second_width
573
- byte_size += second_byte_size
574
- started_by = second
575
- end
576
- end
577
- prev_width = width
578
- prev_byte_size = byte_size
579
- while line.bytesize > (byte_pointer + byte_size)
580
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
581
- mbchar = line.byteslice(byte_pointer + byte_size, size)
582
- case started_by
583
- when :word
584
- break if mbchar =~ /\W/
585
- when :non_word_printable
586
- break if mbchar =~ /[\w\s]/
587
- end
588
- prev_width = width
589
- prev_byte_size = byte_size
590
- width += get_mbchar_width(mbchar)
591
- byte_size += size
592
- end
593
- [prev_byte_size, prev_width]
361
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
362
+ return 0 if gcs.empty?
363
+ return gcs.first.bytesize if gcs.size == 1
364
+
365
+ start = gcs.shift
366
+ skips = [start]
367
+ if space_character?(start) || space_character?(gcs.first)
368
+ spaces = gcs.take_while { |c| space_character?(c) }
369
+ skips += spaces
370
+ gcs.shift(spaces.size)
371
+ end
372
+ start_with_word = word_character?(gcs.first)
373
+ matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) }
374
+ matched.pop
375
+ skips.sum(&:bytesize) + matched.sum(&:bytesize)
594
376
  end
595
377
 
596
378
  def self.vi_backward_word(line, byte_pointer)
597
- width = 0
598
- byte_size = 0
599
- while 0 < (byte_pointer - byte_size)
600
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
601
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
602
- if mbchar =~ /\S/
603
- if mbchar =~ /\w/
604
- started_by = :word
605
- else
606
- started_by = :non_word_printable
607
- end
608
- break
609
- end
610
- width += get_mbchar_width(mbchar)
611
- byte_size += size
612
- end
613
- while 0 < (byte_pointer - byte_size)
614
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
615
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
616
- case started_by
617
- when :word
618
- break if mbchar =~ /\W/
619
- when :non_word_printable
620
- break if mbchar =~ /[\w\s]/
379
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
380
+ spaces = gcs.take_while { |c| space_character?(c) }
381
+ gcs.shift(spaces.size)
382
+ start_with_word = word_character?(gcs.first)
383
+ matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) }
384
+ spaces.sum(&:bytesize) + matched.sum(&:bytesize)
385
+ end
386
+
387
+ def self.common_prefix(list, ignore_case: false)
388
+ return '' if list.empty?
389
+
390
+ common_prefix_gcs = list.first.grapheme_clusters
391
+ list.each do |item|
392
+ gcs = item.grapheme_clusters
393
+ common_prefix_gcs = common_prefix_gcs.take_while.with_index do |gc, i|
394
+ ignore_case ? gc.casecmp?(gcs[i]) : gc == gcs[i]
621
395
  end
622
- width += get_mbchar_width(mbchar)
623
- byte_size += size
624
396
  end
625
- [byte_size, width]
397
+ common_prefix_gcs.join
626
398
  end
627
399
 
628
400
  def self.vi_first_print(line)
629
- width = 0
630
- byte_size = 0
631
- while (line.bytesize - 1) > byte_size
632
- size = get_next_mbchar_size(line, byte_size)
633
- mbchar = line.byteslice(byte_size, size)
634
- if mbchar =~ /\S/
635
- break
636
- end
637
- width += get_mbchar_width(mbchar)
638
- byte_size += size
639
- end
640
- [byte_size, width]
401
+ gcs = line.grapheme_clusters
402
+ spaces = gcs.take_while { |c| space_character?(c) }
403
+ spaces.sum(&:bytesize)
404
+ end
405
+
406
+ def self.word_character?(s)
407
+ s.encode(Encoding::UTF_8).match?(/\p{Word}/) if s
408
+ rescue Encoding::UndefinedConversionError
409
+ false
410
+ end
411
+
412
+ def self.space_character?(s)
413
+ s.match?(/\s/) if s
641
414
  end
642
415
  end