reline 0.3.5 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -28,12 +28,12 @@ class Reline::Unicode
28
28
  0x19 => '^Y',
29
29
  0x1A => '^Z', # C-z
30
30
  0x1B => '^[', # C-[ C-3
31
+ 0x1C => '^\\', # C-\
31
32
  0x1D => '^]', # C-]
32
33
  0x1E => '^^', # C-~ C-6
33
34
  0x1F => '^_', # C-_ C-7
34
35
  0x7F => '^?', # C-? C-8
35
36
  }
36
- EscapedChars = EscapedPairs.keys.map(&:chr)
37
37
 
38
38
  NON_PRINTING_START = "\1"
39
39
  NON_PRINTING_END = "\2"
@@ -41,84 +41,63 @@ class Reline::Unicode
41
41
  OSC_REGEXP = /\e\]\d+(?:;[^;\a\e]+)*(?:\a|\e\\)/
42
42
  WIDTH_SCANNER = /\G(?:(#{NON_PRINTING_START})|(#{NON_PRINTING_END})|(#{CSI_REGEXP})|(#{OSC_REGEXP})|(\X))/o
43
43
 
44
- def self.get_mbchar_byte_size_by_first_char(c)
45
- # Checks UTF-8 character byte size
46
- case c.ord
47
- # 0b0xxxxxxx
48
- when ->(code) { (code ^ 0b10000000).allbits?(0b10000000) } then 1
49
- # 0b110xxxxx
50
- when ->(code) { (code ^ 0b00100000).allbits?(0b11100000) } then 2
51
- # 0b1110xxxx
52
- when ->(code) { (code ^ 0b00010000).allbits?(0b11110000) } then 3
53
- # 0b11110xxx
54
- when ->(code) { (code ^ 0b00001000).allbits?(0b11111000) } then 4
55
- # 0b111110xx
56
- when ->(code) { (code ^ 0b00000100).allbits?(0b11111100) } then 5
57
- # 0b1111110x
58
- when ->(code) { (code ^ 0b00000010).allbits?(0b11111110) } then 6
59
- # successor of mbchar
60
- else 0
61
- end
62
- end
63
-
64
44
  def self.escape_for_print(str)
65
45
  str.chars.map! { |gr|
66
- escaped = EscapedPairs[gr.ord]
67
- if escaped && gr != -"\n" && gr != -"\t"
68
- escaped
69
- else
46
+ case gr
47
+ when -"\n"
70
48
  gr
49
+ when -"\t"
50
+ -' '
51
+ else
52
+ EscapedPairs[gr.ord] || gr
71
53
  end
72
54
  }.join
73
55
  end
74
56
 
57
+ def self.safe_encode(str, encoding)
58
+ # Reline only supports utf-8 convertible string.
59
+ converted = str.encode(encoding, invalid: :replace, undef: :replace)
60
+ return converted if str.encoding == Encoding::UTF_8 || converted.encoding == Encoding::UTF_8 || converted.ascii_only?
61
+
62
+ # This code is essentially doing the same thing as
63
+ # `str.encode(utf8, **replace_options).encode(encoding, **replace_options)`
64
+ # but also avoids unnecessary irreversible encoding conversion.
65
+ converted.gsub(/\X/) do |c|
66
+ c.encode(Encoding::UTF_8)
67
+ c
68
+ rescue Encoding::UndefinedConversionError
69
+ '?'
70
+ end
71
+ end
72
+
75
73
  require 'reline/unicode/east_asian_width'
76
74
 
77
- HalfwidthDakutenHandakuten = /[\u{FF9E}\u{FF9F}]/
78
-
79
- MBCharWidthRE = /
80
- (?<width_2_1>
81
- [#{ EscapedChars.map {|c| "\\x%02x" % c.ord }.join }] (?# ^ + char, such as ^M, ^H, ^[, ...)
82
- )
83
- | (?<width_3>^\u{2E3B}) (?# THREE-EM DASH)
84
- | (?<width_0>^\p{M})
85
- | (?<width_2_2>
86
- #{ EastAsianWidth::TYPE_F }
87
- | #{ EastAsianWidth::TYPE_W }
88
- )
89
- | (?<width_1>
90
- #{ EastAsianWidth::TYPE_H }
91
- | #{ EastAsianWidth::TYPE_NA }
92
- | #{ EastAsianWidth::TYPE_N }
93
- )(?!#{ HalfwidthDakutenHandakuten })
94
- | (?<width_2_3>
95
- (?: #{ EastAsianWidth::TYPE_H }
96
- | #{ EastAsianWidth::TYPE_NA }
97
- | #{ EastAsianWidth::TYPE_N })
98
- #{ HalfwidthDakutenHandakuten }
99
- )
100
- | (?<ambiguous_width>
101
- #{EastAsianWidth::TYPE_A}
102
- )
103
- /x
75
+ def self.east_asian_width(ord)
76
+ chunk_index = EastAsianWidth::CHUNK_LAST.bsearch_index { |o| ord <= o }
77
+ size = EastAsianWidth::CHUNK_WIDTH[chunk_index]
78
+ size == -1 ? Reline.ambiguous_width : size
79
+ end
104
80
 
105
81
  def self.get_mbchar_width(mbchar)
106
82
  ord = mbchar.ord
107
- if (0x00 <= ord and ord <= 0x1F) # in EscapedPairs
83
+ if ord <= 0x1F # in EscapedPairs
108
84
  return 2
109
- elsif (0x20 <= ord and ord <= 0x7E) # printable ASCII chars
85
+ elsif mbchar.length == 1 && ord <= 0x7E # printable ASCII chars
110
86
  return 1
111
87
  end
112
- m = mbchar.encode(Encoding::UTF_8).match(MBCharWidthRE)
113
- case
114
- when m.nil? then 1 # TODO should be U+FFFD � REPLACEMENT CHARACTER
115
- when m[:width_2_1], m[:width_2_2], m[:width_2_3] then 2
116
- when m[:width_3] then 3
117
- when m[:width_0] then 0
118
- when m[:width_1] then 1
119
- when m[:ambiguous_width] then Reline.ambiguous_width
120
- else
121
- nil
88
+
89
+ utf8_mbchar = mbchar.encode(Encoding::UTF_8)
90
+ zwj = false
91
+ utf8_mbchar.chars.sum do |c|
92
+ if zwj
93
+ zwj = false
94
+ 0
95
+ elsif c.ord == 0x200D # Zero Width Joiner
96
+ zwj = true
97
+ 0
98
+ else
99
+ east_asian_width(c.ord)
100
+ end
122
101
  end
123
102
  end
124
103
 
@@ -148,10 +127,15 @@ class Reline::Unicode
148
127
  end
149
128
  end
150
129
 
151
- def self.split_by_width(str, max_width, encoding = str.encoding)
130
+ # This method is used by IRB
131
+ def self.split_by_width(str, max_width)
132
+ lines = split_line_by_width(str, max_width)
133
+ [lines, lines.size]
134
+ end
135
+
136
+ def self.split_line_by_width(str, max_width, encoding = str.encoding, offset: 0)
152
137
  lines = [String.new(encoding: encoding)]
153
- height = 1
154
- width = 0
138
+ width = offset
155
139
  rest = str.encode(Encoding::UTF_8)
156
140
  in_zero_width = false
157
141
  seq = String.new(encoding: encoding)
@@ -159,24 +143,26 @@ class Reline::Unicode
159
143
  case
160
144
  when non_printing_start
161
145
  in_zero_width = true
162
- lines.last << NON_PRINTING_START
163
146
  when non_printing_end
164
147
  in_zero_width = false
165
- lines.last << NON_PRINTING_END
166
148
  when csi
167
149
  lines.last << csi
168
- seq << csi
150
+ unless in_zero_width
151
+ if csi == -"\e[m" || csi == -"\e[0m"
152
+ seq.clear
153
+ else
154
+ seq << csi
155
+ end
156
+ end
169
157
  when osc
170
158
  lines.last << osc
171
- seq << osc
159
+ seq << osc unless in_zero_width
172
160
  when gc
173
161
  unless in_zero_width
174
162
  mbchar_width = get_mbchar_width(gc)
175
163
  if (width += mbchar_width) > max_width
176
164
  width = mbchar_width
177
- lines << nil
178
165
  lines << seq.dup
179
- height += 1
180
166
  end
181
167
  end
182
168
  lines.last << gc
@@ -184,19 +170,30 @@ class Reline::Unicode
184
170
  end
185
171
  # The cursor moves to next line in first
186
172
  if width == max_width
187
- lines << nil
188
173
  lines << String.new(encoding: encoding)
189
- height += 1
190
174
  end
191
- [lines, height]
175
+ lines
176
+ end
177
+
178
+ def self.strip_non_printing_start_end(prompt)
179
+ prompt.gsub(/\x01([^\x02]*)(?:\x02|\z)/) { $1 }
192
180
  end
193
181
 
194
182
  # Take a chunk of a String cut by width with escape sequences.
195
183
  def self.take_range(str, start_col, max_width)
184
+ take_mbchar_range(str, start_col, max_width).first
185
+ end
186
+
187
+ def self.take_mbchar_range(str, start_col, width, cover_begin: false, cover_end: false, padding: false)
196
188
  chunk = String.new(encoding: str.encoding)
189
+
190
+ end_col = start_col + width
197
191
  total_width = 0
198
192
  rest = str.encode(Encoding::UTF_8)
199
193
  in_zero_width = false
194
+ chunk_start_col = nil
195
+ chunk_end_col = nil
196
+ has_csi = false
200
197
  rest.scan(WIDTH_SCANNER) do |non_printing_start, non_printing_end, csi, osc, gc|
201
198
  case
202
199
  when non_printing_start
@@ -204,21 +201,56 @@ class Reline::Unicode
204
201
  when non_printing_end
205
202
  in_zero_width = false
206
203
  when csi
204
+ has_csi = true
207
205
  chunk << csi
208
206
  when osc
209
207
  chunk << osc
210
208
  when gc
211
209
  if in_zero_width
212
210
  chunk << gc
211
+ next
212
+ end
213
+
214
+ mbchar_width = get_mbchar_width(gc)
215
+ prev_width = total_width
216
+ total_width += mbchar_width
217
+
218
+ if (cover_begin || padding ? total_width <= start_col : prev_width < start_col)
219
+ # Current character haven't reached start_col yet
220
+ next
221
+ elsif padding && !cover_begin && prev_width < start_col && start_col < total_width
222
+ # Add preceding padding. This padding might have background color.
223
+ chunk << ' ' * (total_width - start_col)
224
+ chunk_start_col ||= start_col
225
+ chunk_end_col = total_width
226
+ next
227
+ elsif (cover_end ? prev_width < end_col : total_width <= end_col)
228
+ # Current character is in the range
229
+ chunk << gc
230
+ chunk_start_col ||= prev_width
231
+ chunk_end_col = total_width
232
+ break if total_width >= end_col
213
233
  else
214
- mbchar_width = get_mbchar_width(gc)
215
- total_width += mbchar_width
216
- break if (start_col + max_width) < total_width
217
- chunk << gc if start_col < total_width
234
+ # Current character exceeds end_col
235
+ if padding && end_col < total_width
236
+ # Add succeeding padding. This padding might have background color.
237
+ chunk << ' ' * (end_col - prev_width)
238
+ chunk_start_col ||= prev_width
239
+ chunk_end_col = end_col
240
+ end
241
+ break
218
242
  end
219
243
  end
220
244
  end
221
- chunk
245
+ chunk_start_col ||= start_col
246
+ chunk_end_col ||= start_col
247
+ if padding && chunk_end_col < end_col
248
+ # Append padding. This padding should not include background color.
249
+ chunk << "\e[0m" if has_csi
250
+ chunk << ' ' * (end_col - chunk_end_col)
251
+ chunk_end_col = end_col
252
+ end
253
+ [chunk, chunk_start_col, chunk_end_col - chunk_start_col]
222
254
  end
223
255
 
224
256
  def self.get_next_mbchar_size(line, byte_pointer)
@@ -236,427 +268,154 @@ class Reline::Unicode
236
268
  end
237
269
 
238
270
  def self.em_forward_word(line, byte_pointer)
239
- width = 0
240
- byte_size = 0
241
- while line.bytesize > (byte_pointer + byte_size)
242
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
243
- mbchar = line.byteslice(byte_pointer + byte_size, size)
244
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
245
- width += get_mbchar_width(mbchar)
246
- byte_size += size
247
- end
248
- while line.bytesize > (byte_pointer + byte_size)
249
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
250
- mbchar = line.byteslice(byte_pointer + byte_size, size)
251
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
252
- width += get_mbchar_width(mbchar)
253
- byte_size += size
254
- end
255
- [byte_size, width]
271
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
272
+ nonwords = gcs.take_while { |c| !word_character?(c) }
273
+ words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
274
+ nonwords.sum(&:bytesize) + words.sum(&:bytesize)
256
275
  end
257
276
 
258
277
  def self.em_forward_word_with_capitalization(line, byte_pointer)
259
- width = 0
260
- byte_size = 0
261
- new_str = String.new
262
- while line.bytesize > (byte_pointer + byte_size)
263
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
264
- mbchar = line.byteslice(byte_pointer + byte_size, size)
265
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
266
- new_str += mbchar
267
- width += get_mbchar_width(mbchar)
268
- byte_size += size
269
- end
270
- first = true
271
- while line.bytesize > (byte_pointer + byte_size)
272
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
273
- mbchar = line.byteslice(byte_pointer + byte_size, size)
274
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
275
- if first
276
- new_str += mbchar.upcase
277
- first = false
278
- else
279
- new_str += mbchar.downcase
280
- end
281
- width += get_mbchar_width(mbchar)
282
- byte_size += size
283
- end
284
- [byte_size, width, new_str]
278
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
279
+ nonwords = gcs.take_while { |c| !word_character?(c) }
280
+ words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
281
+ [nonwords.sum(&:bytesize) + words.sum(&:bytesize), nonwords.join + words.join.capitalize]
285
282
  end
286
283
 
287
284
  def self.em_backward_word(line, byte_pointer)
288
- width = 0
289
- byte_size = 0
290
- while 0 < (byte_pointer - byte_size)
291
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
292
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
293
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
294
- width += get_mbchar_width(mbchar)
295
- byte_size += size
296
- end
297
- while 0 < (byte_pointer - byte_size)
298
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
299
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
300
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
301
- width += get_mbchar_width(mbchar)
302
- byte_size += size
303
- end
304
- [byte_size, width]
285
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
286
+ nonwords = gcs.take_while { |c| !word_character?(c) }
287
+ words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
288
+ nonwords.sum(&:bytesize) + words.sum(&:bytesize)
305
289
  end
306
290
 
307
291
  def self.em_big_backward_word(line, byte_pointer)
308
- width = 0
309
- byte_size = 0
310
- while 0 < (byte_pointer - byte_size)
311
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
312
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
313
- break if mbchar =~ /\S/
314
- width += get_mbchar_width(mbchar)
315
- byte_size += size
316
- end
317
- while 0 < (byte_pointer - byte_size)
318
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
319
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
320
- break if mbchar =~ /\s/
321
- width += get_mbchar_width(mbchar)
322
- byte_size += size
323
- end
324
- [byte_size, width]
292
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
293
+ spaces = gcs.take_while { |c| space_character?(c) }
294
+ nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
295
+ spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize)
325
296
  end
326
297
 
327
298
  def self.ed_transpose_words(line, byte_pointer)
328
- right_word_start = nil
329
- size = get_next_mbchar_size(line, byte_pointer)
330
- mbchar = line.byteslice(byte_pointer, size)
331
- if size.zero?
332
- # ' aaa bbb [cursor]'
333
- byte_size = 0
334
- while 0 < (byte_pointer + byte_size)
335
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
336
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
337
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
338
- byte_size -= size
339
- end
340
- while 0 < (byte_pointer + byte_size)
341
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
342
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
343
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
344
- byte_size -= size
345
- end
346
- right_word_start = byte_pointer + byte_size
347
- byte_size = 0
348
- while line.bytesize > (byte_pointer + byte_size)
349
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
350
- mbchar = line.byteslice(byte_pointer + byte_size, size)
351
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
352
- byte_size += size
353
- end
354
- after_start = byte_pointer + byte_size
355
- elsif mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
356
- # ' aaa bb[cursor]b'
357
- byte_size = 0
358
- while 0 < (byte_pointer + byte_size)
359
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
360
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
361
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
362
- byte_size -= size
363
- end
364
- right_word_start = byte_pointer + byte_size
365
- byte_size = 0
366
- while line.bytesize > (byte_pointer + byte_size)
367
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
368
- mbchar = line.byteslice(byte_pointer + byte_size, size)
369
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
370
- byte_size += size
371
- end
372
- after_start = byte_pointer + byte_size
373
- else
374
- byte_size = 0
375
- while (line.bytesize - 1) > (byte_pointer + byte_size)
376
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
377
- mbchar = line.byteslice(byte_pointer + byte_size, size)
378
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
379
- byte_size += size
380
- end
381
- if (byte_pointer + byte_size) == (line.bytesize - 1)
382
- # ' aaa bbb [cursor] '
383
- after_start = line.bytesize
384
- while 0 < (byte_pointer + byte_size)
385
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
386
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
387
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
388
- byte_size -= size
389
- end
390
- while 0 < (byte_pointer + byte_size)
391
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
392
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
393
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
394
- byte_size -= size
395
- end
396
- right_word_start = byte_pointer + byte_size
397
- else
398
- # ' aaa [cursor] bbb '
399
- right_word_start = byte_pointer + byte_size
400
- while line.bytesize > (byte_pointer + byte_size)
401
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
402
- mbchar = line.byteslice(byte_pointer + byte_size, size)
403
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
404
- byte_size += size
405
- end
406
- after_start = byte_pointer + byte_size
407
- end
408
- end
409
- byte_size = right_word_start - byte_pointer
410
- while 0 < (byte_pointer + byte_size)
411
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
412
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
413
- break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
414
- byte_size -= size
415
- end
416
- middle_start = byte_pointer + byte_size
417
- byte_size = middle_start - byte_pointer
418
- while 0 < (byte_pointer + byte_size)
419
- size = get_prev_mbchar_size(line, byte_pointer + byte_size)
420
- mbchar = line.byteslice(byte_pointer + byte_size - size, size)
421
- break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
422
- byte_size -= size
299
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters
300
+ pos = gcs.size
301
+ gcs += line.byteslice(byte_pointer..).grapheme_clusters
302
+ pos += 1 while pos < gcs.size && !word_character?(gcs[pos])
303
+ if pos == gcs.size # 'aaa bbb [cursor] '
304
+ pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1])
305
+ second_word_end = gcs.size
306
+ else # 'aaa [cursor]bbb'
307
+ pos += 1 while pos < gcs.size && word_character?(gcs[pos])
308
+ second_word_end = pos
309
+ end
310
+ pos -= 1 while pos > 0 && word_character?(gcs[pos - 1])
311
+ second_word_start = pos
312
+ pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1])
313
+ first_word_end = pos
314
+ pos -= 1 while pos > 0 && word_character?(gcs[pos - 1])
315
+ first_word_start = pos
316
+
317
+ [first_word_start, first_word_end, second_word_start, second_word_end].map do |idx|
318
+ gcs.take(idx).sum(&:bytesize)
423
319
  end
424
- left_word_start = byte_pointer + byte_size
425
- [left_word_start, middle_start, right_word_start, after_start]
426
320
  end
427
321
 
428
322
  def self.vi_big_forward_word(line, byte_pointer)
429
- width = 0
430
- byte_size = 0
431
- while (line.bytesize - 1) > (byte_pointer + byte_size)
432
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
433
- mbchar = line.byteslice(byte_pointer + byte_size, size)
434
- break if mbchar =~ /\s/
435
- width += get_mbchar_width(mbchar)
436
- byte_size += size
437
- end
438
- while (line.bytesize - 1) > (byte_pointer + byte_size)
439
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
440
- mbchar = line.byteslice(byte_pointer + byte_size, size)
441
- break if mbchar =~ /\S/
442
- width += get_mbchar_width(mbchar)
443
- byte_size += size
444
- end
445
- [byte_size, width]
323
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
324
+ nonspaces = gcs.take_while { |c| !space_character?(c) }
325
+ spaces = gcs.drop(nonspaces.size).take_while { |c| space_character?(c) }
326
+ nonspaces.sum(&:bytesize) + spaces.sum(&:bytesize)
446
327
  end
447
328
 
448
329
  def self.vi_big_forward_end_word(line, byte_pointer)
449
- if (line.bytesize - 1) > byte_pointer
450
- size = get_next_mbchar_size(line, byte_pointer)
451
- mbchar = line.byteslice(byte_pointer, size)
452
- width = get_mbchar_width(mbchar)
453
- byte_size = size
454
- else
455
- return [0, 0]
456
- end
457
- while (line.bytesize - 1) > (byte_pointer + byte_size)
458
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
459
- mbchar = line.byteslice(byte_pointer + byte_size, size)
460
- break if mbchar =~ /\S/
461
- width += get_mbchar_width(mbchar)
462
- byte_size += size
463
- end
464
- prev_width = width
465
- prev_byte_size = byte_size
466
- while line.bytesize > (byte_pointer + byte_size)
467
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
468
- mbchar = line.byteslice(byte_pointer + byte_size, size)
469
- break if mbchar =~ /\s/
470
- prev_width = width
471
- prev_byte_size = byte_size
472
- width += get_mbchar_width(mbchar)
473
- byte_size += size
474
- end
475
- [prev_byte_size, prev_width]
330
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
331
+ first = gcs.shift(1)
332
+ spaces = gcs.take_while { |c| space_character?(c) }
333
+ nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
334
+ matched = spaces + nonspaces
335
+ matched.pop
336
+ first.sum(&:bytesize) + matched.sum(&:bytesize)
476
337
  end
477
338
 
478
339
  def self.vi_big_backward_word(line, byte_pointer)
479
- width = 0
480
- byte_size = 0
481
- while 0 < (byte_pointer - byte_size)
482
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
483
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
484
- break if mbchar =~ /\S/
485
- width += get_mbchar_width(mbchar)
486
- byte_size += size
487
- end
488
- while 0 < (byte_pointer - byte_size)
489
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
490
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
491
- break if mbchar =~ /\s/
492
- width += get_mbchar_width(mbchar)
493
- byte_size += size
494
- end
495
- [byte_size, width]
340
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
341
+ spaces = gcs.take_while { |c| space_character?(c) }
342
+ nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
343
+ spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize)
496
344
  end
497
345
 
498
346
  def self.vi_forward_word(line, byte_pointer, drop_terminate_spaces = false)
499
- if line.bytesize > byte_pointer
500
- size = get_next_mbchar_size(line, byte_pointer)
501
- mbchar = line.byteslice(byte_pointer, size)
502
- if mbchar =~ /\w/
503
- started_by = :word
504
- elsif mbchar =~ /\s/
505
- started_by = :space
347
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
348
+ return 0 if gcs.empty?
349
+
350
+ c = gcs.first
351
+ matched =
352
+ if word_character?(c)
353
+ gcs.take_while { |c| word_character?(c) }
354
+ elsif space_character?(c)
355
+ gcs.take_while { |c| space_character?(c) }
506
356
  else
507
- started_by = :non_word_printable
357
+ gcs.take_while { |c| !word_character?(c) && !space_character?(c) }
508
358
  end
509
- width = get_mbchar_width(mbchar)
510
- byte_size = size
511
- else
512
- return [0, 0]
513
- end
514
- while line.bytesize > (byte_pointer + byte_size)
515
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
516
- mbchar = line.byteslice(byte_pointer + byte_size, size)
517
- case started_by
518
- when :word
519
- break if mbchar =~ /\W/
520
- when :space
521
- break if mbchar =~ /\S/
522
- when :non_word_printable
523
- break if mbchar =~ /\w|\s/
524
- end
525
- width += get_mbchar_width(mbchar)
526
- byte_size += size
527
- end
528
- return [byte_size, width] if drop_terminate_spaces
529
- while line.bytesize > (byte_pointer + byte_size)
530
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
531
- mbchar = line.byteslice(byte_pointer + byte_size, size)
532
- break if mbchar =~ /\S/
533
- width += get_mbchar_width(mbchar)
534
- byte_size += size
535
- end
536
- [byte_size, width]
359
+
360
+ return matched.sum(&:bytesize) if drop_terminate_spaces
361
+
362
+ spaces = gcs.drop(matched.size).take_while { |c| space_character?(c) }
363
+ matched.sum(&:bytesize) + spaces.sum(&:bytesize)
537
364
  end
538
365
 
539
366
  def self.vi_forward_end_word(line, byte_pointer)
540
- if (line.bytesize - 1) > byte_pointer
541
- size = get_next_mbchar_size(line, byte_pointer)
542
- mbchar = line.byteslice(byte_pointer, size)
543
- if mbchar =~ /\w/
544
- started_by = :word
545
- elsif mbchar =~ /\s/
546
- started_by = :space
547
- else
548
- started_by = :non_word_printable
549
- end
550
- width = get_mbchar_width(mbchar)
551
- byte_size = size
552
- else
553
- return [0, 0]
554
- end
555
- if (line.bytesize - 1) > (byte_pointer + byte_size)
556
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
557
- mbchar = line.byteslice(byte_pointer + byte_size, size)
558
- if mbchar =~ /\w/
559
- second = :word
560
- elsif mbchar =~ /\s/
561
- second = :space
562
- else
563
- second = :non_word_printable
564
- end
565
- second_width = get_mbchar_width(mbchar)
566
- second_byte_size = size
567
- else
568
- return [byte_size, width]
569
- end
570
- if second == :space
571
- width += second_width
572
- byte_size += second_byte_size
573
- while (line.bytesize - 1) > (byte_pointer + byte_size)
574
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
575
- mbchar = line.byteslice(byte_pointer + byte_size, size)
576
- if mbchar =~ /\S/
577
- if mbchar =~ /\w/
578
- started_by = :word
579
- else
580
- started_by = :non_word_printable
581
- end
582
- break
583
- end
584
- width += get_mbchar_width(mbchar)
585
- byte_size += size
586
- end
587
- else
588
- case [started_by, second]
589
- when [:word, :non_word_printable], [:non_word_printable, :word]
590
- started_by = second
591
- else
592
- width += second_width
593
- byte_size += second_byte_size
594
- started_by = second
595
- end
596
- end
597
- prev_width = width
598
- prev_byte_size = byte_size
599
- while line.bytesize > (byte_pointer + byte_size)
600
- size = get_next_mbchar_size(line, byte_pointer + byte_size)
601
- mbchar = line.byteslice(byte_pointer + byte_size, size)
602
- case started_by
603
- when :word
604
- break if mbchar =~ /\W/
605
- when :non_word_printable
606
- break if mbchar =~ /[\w\s]/
607
- end
608
- prev_width = width
609
- prev_byte_size = byte_size
610
- width += get_mbchar_width(mbchar)
611
- byte_size += size
612
- end
613
- [prev_byte_size, prev_width]
367
+ gcs = line.byteslice(byte_pointer..).grapheme_clusters
368
+ return 0 if gcs.empty?
369
+ return gcs.first.bytesize if gcs.size == 1
370
+
371
+ start = gcs.shift
372
+ skips = [start]
373
+ if space_character?(start) || space_character?(gcs.first)
374
+ spaces = gcs.take_while { |c| space_character?(c) }
375
+ skips += spaces
376
+ gcs.shift(spaces.size)
377
+ end
378
+ start_with_word = word_character?(gcs.first)
379
+ matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) }
380
+ matched.pop
381
+ skips.sum(&:bytesize) + matched.sum(&:bytesize)
614
382
  end
615
383
 
616
384
  def self.vi_backward_word(line, byte_pointer)
617
- width = 0
618
- byte_size = 0
619
- while 0 < (byte_pointer - byte_size)
620
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
621
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
622
- if mbchar =~ /\S/
623
- if mbchar =~ /\w/
624
- started_by = :word
625
- else
626
- started_by = :non_word_printable
627
- end
628
- break
629
- end
630
- width += get_mbchar_width(mbchar)
631
- byte_size += size
632
- end
633
- while 0 < (byte_pointer - byte_size)
634
- size = get_prev_mbchar_size(line, byte_pointer - byte_size)
635
- mbchar = line.byteslice(byte_pointer - byte_size - size, size)
636
- case started_by
637
- when :word
638
- break if mbchar =~ /\W/
639
- when :non_word_printable
640
- break if mbchar =~ /[\w\s]/
385
+ gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
386
+ spaces = gcs.take_while { |c| space_character?(c) }
387
+ gcs.shift(spaces.size)
388
+ start_with_word = word_character?(gcs.first)
389
+ matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) }
390
+ spaces.sum(&:bytesize) + matched.sum(&:bytesize)
391
+ end
392
+
393
+ def self.common_prefix(list, ignore_case: false)
394
+ return '' if list.empty?
395
+
396
+ common_prefix_gcs = list.first.grapheme_clusters
397
+ list.each do |item|
398
+ gcs = item.grapheme_clusters
399
+ common_prefix_gcs = common_prefix_gcs.take_while.with_index do |gc, i|
400
+ ignore_case ? gc.casecmp?(gcs[i]) : gc == gcs[i]
641
401
  end
642
- width += get_mbchar_width(mbchar)
643
- byte_size += size
644
402
  end
645
- [byte_size, width]
403
+ common_prefix_gcs.join
646
404
  end
647
405
 
648
406
  def self.vi_first_print(line)
649
- width = 0
650
- byte_size = 0
651
- while (line.bytesize - 1) > byte_size
652
- size = get_next_mbchar_size(line, byte_size)
653
- mbchar = line.byteslice(byte_size, size)
654
- if mbchar =~ /\S/
655
- break
656
- end
657
- width += get_mbchar_width(mbchar)
658
- byte_size += size
659
- end
660
- [byte_size, width]
407
+ gcs = line.grapheme_clusters
408
+ spaces = gcs.take_while { |c| space_character?(c) }
409
+ spaces.sum(&:bytesize)
410
+ end
411
+
412
+ def self.word_character?(s)
413
+ s.encode(Encoding::UTF_8).match?(/\p{Word}/) if s
414
+ rescue Encoding::UndefinedConversionError
415
+ false
416
+ end
417
+
418
+ def self.space_character?(s)
419
+ s.match?(/\s/) if s
661
420
  end
662
421
  end