ruvim 0.4.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/AGENTS.md +53 -4
  3. data/README.md +15 -6
  4. data/Rakefile +7 -0
  5. data/benchmark/cext_compare.rb +165 -0
  6. data/benchmark/chunked_load.rb +256 -0
  7. data/benchmark/file_load.rb +140 -0
  8. data/benchmark/hotspots.rb +178 -0
  9. data/docs/binding.md +3 -2
  10. data/docs/command.md +81 -9
  11. data/docs/done.md +23 -0
  12. data/docs/spec.md +105 -19
  13. data/docs/todo.md +9 -0
  14. data/docs/tutorial.md +9 -1
  15. data/docs/vim_diff.md +13 -0
  16. data/ext/ruvim/extconf.rb +5 -0
  17. data/ext/ruvim/ruvim_ext.c +519 -0
  18. data/lib/ruvim/app.rb +217 -2778
  19. data/lib/ruvim/browser.rb +104 -0
  20. data/lib/ruvim/buffer.rb +39 -28
  21. data/lib/ruvim/command_invocation.rb +2 -2
  22. data/lib/ruvim/completion_manager.rb +708 -0
  23. data/lib/ruvim/dispatcher.rb +14 -8
  24. data/lib/ruvim/display_width.rb +91 -45
  25. data/lib/ruvim/editor.rb +64 -81
  26. data/lib/ruvim/ex_command_registry.rb +3 -1
  27. data/lib/ruvim/gh/link.rb +207 -0
  28. data/lib/ruvim/git/blame.rb +16 -6
  29. data/lib/ruvim/git/branch.rb +20 -5
  30. data/lib/ruvim/git/grep.rb +107 -0
  31. data/lib/ruvim/git/handler.rb +42 -1
  32. data/lib/ruvim/global_commands.rb +175 -35
  33. data/lib/ruvim/highlighter.rb +4 -13
  34. data/lib/ruvim/key_handler.rb +1510 -0
  35. data/lib/ruvim/keymap_manager.rb +7 -7
  36. data/lib/ruvim/lang/base.rb +5 -0
  37. data/lib/ruvim/lang/c.rb +116 -0
  38. data/lib/ruvim/lang/cpp.rb +107 -0
  39. data/lib/ruvim/lang/csv.rb +4 -1
  40. data/lib/ruvim/lang/diff.rb +2 -0
  41. data/lib/ruvim/lang/dockerfile.rb +36 -0
  42. data/lib/ruvim/lang/elixir.rb +85 -0
  43. data/lib/ruvim/lang/erb.rb +30 -0
  44. data/lib/ruvim/lang/go.rb +83 -0
  45. data/lib/ruvim/lang/html.rb +34 -0
  46. data/lib/ruvim/lang/javascript.rb +83 -0
  47. data/lib/ruvim/lang/json.rb +6 -0
  48. data/lib/ruvim/lang/lua.rb +76 -0
  49. data/lib/ruvim/lang/makefile.rb +36 -0
  50. data/lib/ruvim/lang/markdown.rb +3 -4
  51. data/lib/ruvim/lang/ocaml.rb +77 -0
  52. data/lib/ruvim/lang/perl.rb +91 -0
  53. data/lib/ruvim/lang/python.rb +85 -0
  54. data/lib/ruvim/lang/registry.rb +102 -0
  55. data/lib/ruvim/lang/ruby.rb +7 -0
  56. data/lib/ruvim/lang/rust.rb +95 -0
  57. data/lib/ruvim/lang/scheme.rb +5 -0
  58. data/lib/ruvim/lang/sh.rb +76 -0
  59. data/lib/ruvim/lang/sql.rb +52 -0
  60. data/lib/ruvim/lang/toml.rb +36 -0
  61. data/lib/ruvim/lang/tsv.rb +4 -1
  62. data/lib/ruvim/lang/typescript.rb +53 -0
  63. data/lib/ruvim/lang/yaml.rb +62 -0
  64. data/lib/ruvim/rich_view/table_renderer.rb +3 -3
  65. data/lib/ruvim/rich_view.rb +14 -7
  66. data/lib/ruvim/screen.rb +126 -72
  67. data/lib/ruvim/stream/file_load.rb +85 -0
  68. data/lib/ruvim/stream/follow.rb +40 -0
  69. data/lib/ruvim/stream/git.rb +43 -0
  70. data/lib/ruvim/stream/run.rb +74 -0
  71. data/lib/ruvim/stream/stdin.rb +55 -0
  72. data/lib/ruvim/stream.rb +35 -0
  73. data/lib/ruvim/stream_mixer.rb +394 -0
  74. data/lib/ruvim/terminal.rb +18 -4
  75. data/lib/ruvim/text_metrics.rb +84 -65
  76. data/lib/ruvim/version.rb +1 -1
  77. data/lib/ruvim/window.rb +5 -5
  78. data/lib/ruvim.rb +23 -6
  79. data/test/app_command_test.rb +382 -0
  80. data/test/app_completion_test.rb +43 -19
  81. data/test/app_dot_repeat_test.rb +27 -3
  82. data/test/app_ex_command_test.rb +154 -0
  83. data/test/app_motion_test.rb +13 -12
  84. data/test/app_register_test.rb +2 -1
  85. data/test/app_scenario_test.rb +15 -10
  86. data/test/app_startup_test.rb +70 -27
  87. data/test/app_text_object_test.rb +2 -1
  88. data/test/app_unicode_behavior_test.rb +3 -2
  89. data/test/browser_test.rb +88 -0
  90. data/test/buffer_test.rb +24 -0
  91. data/test/cli_test.rb +63 -0
  92. data/test/command_invocation_test.rb +33 -0
  93. data/test/config_dsl_test.rb +47 -0
  94. data/test/dispatcher_test.rb +74 -4
  95. data/test/ex_command_registry_test.rb +106 -0
  96. data/test/follow_test.rb +20 -21
  97. data/test/gh_link_test.rb +141 -0
  98. data/test/git_blame_test.rb +96 -17
  99. data/test/git_grep_test.rb +64 -0
  100. data/test/highlighter_test.rb +125 -0
  101. data/test/indent_test.rb +137 -0
  102. data/test/input_screen_integration_test.rb +1 -1
  103. data/test/keyword_chars_test.rb +85 -0
  104. data/test/lang_test.rb +634 -0
  105. data/test/markdown_renderer_test.rb +5 -5
  106. data/test/on_save_hook_test.rb +12 -8
  107. data/test/render_snapshot_test.rb +78 -0
  108. data/test/rich_view_test.rb +42 -42
  109. data/test/run_command_test.rb +307 -0
  110. data/test/screen_test.rb +68 -5
  111. data/test/stream_test.rb +165 -0
  112. data/test/window_test.rb +59 -0
  113. metadata +52 -2
@@ -0,0 +1,519 @@
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
3
+
4
+ /* ------------------------------------------------------------------ */
5
+ /* Unicode width tables */
6
+ /* ------------------------------------------------------------------ */
7
+
8
+ typedef struct {
9
+ unsigned int lo;
10
+ unsigned int hi;
11
+ } range_t;
12
+
13
+ static const range_t combining_ranges[] = {
14
+ {0x0300, 0x036F},
15
+ {0x1AB0, 0x1AFF},
16
+ {0x1DC0, 0x1DFF},
17
+ {0x20D0, 0x20FF},
18
+ {0xFE20, 0xFE2F},
19
+ };
20
+ #define COMBINING_COUNT (sizeof(combining_ranges) / sizeof(combining_ranges[0]))
21
+
22
+ static const range_t zero_width_ranges[] = {
23
+ {0x200D, 0x200D},
24
+ {0xFE00, 0xFE0F},
25
+ {0xE0100, 0xE01EF},
26
+ };
27
+ #define ZERO_WIDTH_COUNT (sizeof(zero_width_ranges) / sizeof(zero_width_ranges[0]))
28
+
29
+ static const range_t wide_ranges[] = {
30
+ {0x1100, 0x115F},
31
+ {0x2329, 0x232A},
32
+ {0x2E80, 0xA4CF},
33
+ {0xAC00, 0xD7A3},
34
+ {0xF900, 0xFAFF},
35
+ {0xFE10, 0xFE19},
36
+ {0xFE30, 0xFE6F},
37
+ {0xFF00, 0xFF60},
38
+ {0xFFE0, 0xFFE6},
39
+ };
40
+ #define WIDE_COUNT (sizeof(wide_ranges) / sizeof(wide_ranges[0]))
41
+
42
+ static const range_t emoji_ranges[] = {
43
+ {0x2600, 0x27BF},
44
+ {0x1F300, 0x1FAFF},
45
+ };
46
+ #define EMOJI_COUNT (sizeof(emoji_ranges) / sizeof(emoji_ranges[0]))
47
+
48
+ static const range_t ambiguous_ranges[] = {
49
+ {0x00A1, 0x00A1},
50
+ {0x00A4, 0x00A4},
51
+ {0x00A7, 0x00A8},
52
+ {0x00AA, 0x00AA},
53
+ {0x00AD, 0x00AE},
54
+ {0x00B0, 0x00B4},
55
+ {0x00B6, 0x00BA},
56
+ {0x00BC, 0x00BF},
57
+ {0x0391, 0x03A9},
58
+ {0x03B1, 0x03C9},
59
+ {0x2010, 0x2010},
60
+ {0x2013, 0x2016},
61
+ {0x2018, 0x2019},
62
+ {0x201C, 0x201D},
63
+ {0x2020, 0x2022},
64
+ {0x2024, 0x2027},
65
+ {0x2030, 0x2030},
66
+ {0x2032, 0x2033},
67
+ {0x2035, 0x2035},
68
+ {0x203B, 0x203B},
69
+ {0x203E, 0x203E},
70
+ {0x2460, 0x24E9},
71
+ {0x2500, 0x257F},
72
+ };
73
+ #define AMBIGUOUS_COUNT (sizeof(ambiguous_ranges) / sizeof(ambiguous_ranges[0]))
74
+
75
+ static inline int
76
+ in_ranges(unsigned int code, const range_t *ranges, int count)
77
+ {
78
+ for (int i = 0; i < count; i++) {
79
+ if (code < ranges[i].lo) return 0; /* sorted — early exit */
80
+ if (code <= ranges[i].hi) return 1;
81
+ }
82
+ return 0;
83
+ }
84
+
85
+ static int ambiguous_width = 1;
86
+
87
+ static int
88
+ codepoint_width(unsigned int code)
89
+ {
90
+ if (code == 0) return 0;
91
+ if (code < 0x20) return 1; /* control → 1 (caller handles display) */
92
+ if (code < 0x7F) return 1; /* printable ASCII */
93
+ if (in_ranges(code, combining_ranges, COMBINING_COUNT)) return 0;
94
+ if (in_ranges(code, zero_width_ranges, ZERO_WIDTH_COUNT)) return 0;
95
+ if (in_ranges(code, ambiguous_ranges, AMBIGUOUS_COUNT)) return ambiguous_width;
96
+ if (in_ranges(code, emoji_ranges, EMOJI_COUNT)) return 2;
97
+ if (in_ranges(code, wide_ranges, WIDE_COUNT)) return 2;
98
+ return 1;
99
+ }
100
+
101
+ /* ------------------------------------------------------------------ */
102
+ /* Ruby method: cell_width(ch, col: 0, tabstop: 2) */
103
+ /* ------------------------------------------------------------------ */
104
+
105
+ static VALUE
106
+ rb_cell_width(int argc, VALUE *argv, VALUE self)
107
+ {
108
+ VALUE ch, opts;
109
+ rb_scan_args(argc, argv, "1:", &ch, &opts);
110
+
111
+ if (NIL_P(ch) || (TYPE(ch) == T_STRING && RSTRING_LEN(ch) == 0))
112
+ return INT2FIX(1);
113
+
114
+ int col = 0, tabstop = 2;
115
+ if (!NIL_P(opts)) {
116
+ VALUE v;
117
+ static ID id_col, id_tabstop;
118
+ if (!id_col) {
119
+ id_col = rb_intern("col");
120
+ id_tabstop = rb_intern("tabstop");
121
+ }
122
+ v = rb_hash_lookup2(opts, ID2SYM(id_col), Qnil);
123
+ if (!NIL_P(v)) col = NUM2INT(v);
124
+ v = rb_hash_lookup2(opts, ID2SYM(id_tabstop), Qnil);
125
+ if (!NIL_P(v)) tabstop = NUM2INT(v);
126
+ }
127
+
128
+ if (TYPE(ch) != T_STRING) return INT2FIX(1);
129
+
130
+ const char *ptr = RSTRING_PTR(ch);
131
+ long len = RSTRING_LEN(ch);
132
+
133
+ if (len == 1 && ptr[0] == '\t') {
134
+ int w = tabstop - (col % tabstop);
135
+ if (w == 0) w = tabstop;
136
+ return INT2FIX(w);
137
+ }
138
+
139
+ /* Fast path: single-byte ASCII */
140
+ if (len == 1) return INT2FIX(1);
141
+
142
+ /* Decode first codepoint */
143
+ unsigned int code = rb_enc_codepoint_len(ptr, ptr + len, NULL,
144
+ rb_utf8_encoding());
145
+ return INT2FIX(codepoint_width(code));
146
+ }
147
+
148
+ /* ------------------------------------------------------------------ */
149
+ /* Ruby method: display_width(str, tabstop: 2, start_col: 0) */
150
+ /* ------------------------------------------------------------------ */
151
+
152
+ static VALUE
153
+ rb_display_width(int argc, VALUE *argv, VALUE self)
154
+ {
155
+ VALUE str, opts;
156
+ rb_scan_args(argc, argv, "1:", &str, &opts);
157
+
158
+ int tabstop = 2, start_col = 0;
159
+ if (!NIL_P(opts)) {
160
+ VALUE v;
161
+ static ID id_tabstop, id_start_col;
162
+ if (!id_tabstop) {
163
+ id_tabstop = rb_intern("tabstop");
164
+ id_start_col = rb_intern("start_col");
165
+ }
166
+ v = rb_hash_lookup2(opts, ID2SYM(id_tabstop), Qnil);
167
+ if (!NIL_P(v)) tabstop = NUM2INT(v);
168
+ v = rb_hash_lookup2(opts, ID2SYM(id_start_col), Qnil);
169
+ if (!NIL_P(v)) start_col = NUM2INT(v);
170
+ }
171
+
172
+ if (NIL_P(str)) str = rb_str_new("", 0);
173
+ if (TYPE(str) != T_STRING) str = rb_String(str);
174
+
175
+ const char *ptr = RSTRING_PTR(str);
176
+ const char *end = ptr + RSTRING_LEN(str);
177
+ rb_encoding *enc = rb_utf8_encoding();
178
+ int col = start_col;
179
+
180
+ while (ptr < end) {
181
+ unsigned int code;
182
+ int clen = rb_enc_precise_mbclen(ptr, end, enc);
183
+
184
+ if (!MBCLEN_CHARFOUND_P(clen)) {
185
+ /* invalid byte — skip one byte, width 1 */
186
+ ptr++;
187
+ col++;
188
+ continue;
189
+ }
190
+ clen = MBCLEN_CHARFOUND_LEN(clen);
191
+ code = rb_enc_codepoint(ptr, end, enc);
192
+ ptr += clen;
193
+
194
+ if (code == '\t') {
195
+ int w = tabstop - (col % tabstop);
196
+ if (w == 0) w = tabstop;
197
+ col += w;
198
+ } else if (clen == 1) {
199
+ col++; /* ASCII */
200
+ } else {
201
+ col += codepoint_width(code);
202
+ }
203
+ }
204
+
205
+ return INT2FIX(col - start_col);
206
+ }
207
+
208
+ /* ------------------------------------------------------------------ */
209
+ /* Ruby method: expand_tabs(str, tabstop: 2, start_col: 0) */
210
+ /* ------------------------------------------------------------------ */
211
+
212
+ static VALUE
213
+ rb_expand_tabs(int argc, VALUE *argv, VALUE self)
214
+ {
215
+ VALUE str, opts;
216
+ rb_scan_args(argc, argv, "1:", &str, &opts);
217
+
218
+ int tabstop = 2, start_col = 0;
219
+ if (!NIL_P(opts)) {
220
+ VALUE v;
221
+ static ID id_tabstop, id_start_col;
222
+ if (!id_tabstop) {
223
+ id_tabstop = rb_intern("tabstop");
224
+ id_start_col = rb_intern("start_col");
225
+ }
226
+ v = rb_hash_lookup2(opts, ID2SYM(id_tabstop), Qnil);
227
+ if (!NIL_P(v)) tabstop = NUM2INT(v);
228
+ v = rb_hash_lookup2(opts, ID2SYM(id_start_col), Qnil);
229
+ if (!NIL_P(v)) start_col = NUM2INT(v);
230
+ }
231
+
232
+ if (NIL_P(str)) str = rb_str_new("", 0);
233
+ if (TYPE(str) != T_STRING) str = rb_String(str);
234
+
235
+ const char *ptr = RSTRING_PTR(str);
236
+ const char *end = ptr + RSTRING_LEN(str);
237
+ rb_encoding *enc = rb_utf8_encoding();
238
+ int col = start_col;
239
+
240
+ VALUE out = rb_str_buf_new(RSTRING_LEN(str) + 32);
241
+ rb_enc_associate(out, enc);
242
+
243
+ while (ptr < end) {
244
+ if (*ptr == '\t') {
245
+ int w = tabstop - (col % tabstop);
246
+ if (w == 0) w = tabstop;
247
+ for (int i = 0; i < w; i++)
248
+ rb_str_cat(out, " ", 1);
249
+ col += w;
250
+ ptr++;
251
+ } else {
252
+ int clen = rb_enc_precise_mbclen(ptr, end, enc);
253
+ if (!MBCLEN_CHARFOUND_P(clen)) {
254
+ rb_str_cat(out, ptr, 1);
255
+ ptr++;
256
+ col++;
257
+ continue;
258
+ }
259
+ clen = MBCLEN_CHARFOUND_LEN(clen);
260
+ unsigned int code = rb_enc_codepoint(ptr, end, enc);
261
+ rb_str_cat(out, ptr, clen);
262
+ col += (clen == 1) ? 1 : codepoint_width(code);
263
+ ptr += clen;
264
+ }
265
+ }
266
+
267
+ return out;
268
+ }
269
+
270
+ /* ------------------------------------------------------------------ */
271
+ /* Ruby method: set_ambiguous_width(w) */
272
+ /* ------------------------------------------------------------------ */
273
+
274
+ static VALUE
275
+ rb_set_ambiguous_width(VALUE self, VALUE w)
276
+ {
277
+ ambiguous_width = NUM2INT(w);
278
+ return w;
279
+ }
280
+
281
+ /* ------------------------------------------------------------------ */
282
+ /* TextMetrics */
283
+ /* ------------------------------------------------------------------ */
284
+
285
+ static VALUE cCell = Qundef; /* RuVim::TextMetrics::Cell (lazy) */
286
+
287
+ static VALUE
288
+ get_cell_class(void)
289
+ {
290
+ if (cCell == Qundef) {
291
+ VALUE mRuVim = rb_const_get(rb_cObject, rb_intern("RuVim"));
292
+ VALUE mTM = rb_const_get(mRuVim, rb_intern("TextMetrics"));
293
+ cCell = rb_const_get(mTM, rb_intern("Cell"));
294
+ rb_gc_register_address(&cCell);
295
+ }
296
+ return cCell;
297
+ }
298
+
299
+ /* clip_cells_for_width(text, width, source_col_start: 0, tabstop: 2)
300
+ * Returns [cells_array, display_col] */
301
+ static VALUE
302
+ rb_clip_cells_for_width(int argc, VALUE *argv, VALUE self)
303
+ {
304
+ VALUE text, v_width, opts;
305
+ rb_scan_args(argc, argv, "2:", &text, &v_width, &opts);
306
+
307
+ int max_width = NUM2INT(v_width);
308
+ if (max_width < 0) max_width = 0;
309
+ int source_col_start = 0, tabstop = 2;
310
+
311
+ if (!NIL_P(opts)) {
312
+ VALUE v;
313
+ static ID id_source_col_start, id_tabstop;
314
+ if (!id_source_col_start) {
315
+ id_source_col_start = rb_intern("source_col_start");
316
+ id_tabstop = rb_intern("tabstop");
317
+ }
318
+ v = rb_hash_lookup2(opts, ID2SYM(id_source_col_start), Qnil);
319
+ if (!NIL_P(v)) source_col_start = NUM2INT(v);
320
+ v = rb_hash_lookup2(opts, ID2SYM(id_tabstop), Qnil);
321
+ if (!NIL_P(v)) tabstop = NUM2INT(v);
322
+ }
323
+
324
+ if (NIL_P(text)) text = rb_str_new("", 0);
325
+ if (TYPE(text) != T_STRING) text = rb_String(text);
326
+
327
+ const char *ptr = RSTRING_PTR(text);
328
+ const char *end = ptr + RSTRING_LEN(text);
329
+ rb_encoding *enc = rb_utf8_encoding();
330
+
331
+ VALUE cell_class = get_cell_class();
332
+ static ID id_new = 0;
333
+ if (!id_new) id_new = rb_intern("new");
334
+
335
+ VALUE cells = rb_ary_new();
336
+ int display_col = 0;
337
+ int source_col = source_col_start;
338
+ VALUE space_str = rb_str_new(" ", 1);
339
+ VALUE question_str = rb_str_new("?", 1);
340
+
341
+ while (ptr < end) {
342
+ int clen = rb_enc_precise_mbclen(ptr, end, enc);
343
+ if (!MBCLEN_CHARFOUND_P(clen)) {
344
+ /* invalid byte */
345
+ if (display_col >= max_width) break;
346
+ rb_ary_push(cells, rb_funcall(cell_class, id_new, 3,
347
+ question_str, INT2FIX(source_col), INT2FIX(1)));
348
+ display_col++;
349
+ source_col++;
350
+ ptr++;
351
+ continue;
352
+ }
353
+ clen = MBCLEN_CHARFOUND_LEN(clen);
354
+ unsigned int code = rb_enc_codepoint(ptr, end, enc);
355
+
356
+ /* Printable ASCII fast path */
357
+ if (code >= 0x20 && code <= 0x7E) {
358
+ if (display_col >= max_width) break;
359
+ VALUE ch = rb_str_new(ptr, 1);
360
+ rb_ary_push(cells, rb_funcall(cell_class, id_new, 3,
361
+ ch, INT2FIX(source_col), INT2FIX(1)));
362
+ display_col++;
363
+ source_col++;
364
+ ptr += 1;
365
+ continue;
366
+ }
367
+
368
+ /* Tab */
369
+ if (code == '\t') {
370
+ int w = tabstop - (display_col % tabstop);
371
+ if (w == 0) w = tabstop;
372
+ if (display_col + w > max_width) break;
373
+ for (int i = 0; i < w; i++) {
374
+ rb_ary_push(cells, rb_funcall(cell_class, id_new, 3,
375
+ space_str, INT2FIX(source_col), INT2FIX(1)));
376
+ }
377
+ display_col += w;
378
+ source_col++;
379
+ ptr += 1;
380
+ continue;
381
+ }
382
+
383
+ /* Control chars */
384
+ if (code < 0x20 || code == 0x7F || (code >= 0x80 && code <= 0x9F)) {
385
+ if (display_col >= max_width) break;
386
+ rb_ary_push(cells, rb_funcall(cell_class, id_new, 3,
387
+ question_str, INT2FIX(source_col), INT2FIX(1)));
388
+ display_col++;
389
+ source_col++;
390
+ ptr += clen;
391
+ continue;
392
+ }
393
+
394
+ /* Multi-byte character */
395
+ int w = codepoint_width(code);
396
+ if (display_col + w > max_width) break;
397
+ VALUE ch = rb_enc_str_new(ptr, clen, enc);
398
+ rb_ary_push(cells, rb_funcall(cell_class, id_new, 3,
399
+ ch, INT2FIX(source_col), INT2FIX(w)));
400
+ display_col += w;
401
+ source_col++;
402
+ ptr += clen;
403
+ }
404
+
405
+ VALUE result = rb_ary_new_capa(2);
406
+ rb_ary_push(result, cells);
407
+ rb_ary_push(result, INT2FIX(display_col));
408
+ return result;
409
+ }
410
+
411
+ /* char_index_for_screen_col(line, target_screen_col, tabstop: 2, align: :floor)
412
+ * Returns a character index whose screen column is <= target. */
413
+ static VALUE
414
+ rb_char_index_for_screen_col(int argc, VALUE *argv, VALUE self)
415
+ {
416
+ VALUE line, v_target, opts;
417
+ rb_scan_args(argc, argv, "2:", &line, &v_target, &opts);
418
+
419
+ int tabstop = 2;
420
+ int align_ceil = 0;
421
+
422
+ if (!NIL_P(opts)) {
423
+ VALUE v;
424
+ static ID id_tabstop, id_align, id_ceil;
425
+ if (!id_tabstop) {
426
+ id_tabstop = rb_intern("tabstop");
427
+ id_align = rb_intern("align");
428
+ id_ceil = rb_intern("ceil");
429
+ }
430
+ v = rb_hash_lookup2(opts, ID2SYM(id_tabstop), Qnil);
431
+ if (!NIL_P(v)) tabstop = NUM2INT(v);
432
+ v = rb_hash_lookup2(opts, ID2SYM(id_align), Qnil);
433
+ if (!NIL_P(v) && SYM2ID(v) == id_ceil) align_ceil = 1;
434
+ }
435
+
436
+ if (NIL_P(line)) line = rb_str_new("", 0);
437
+ if (TYPE(line) != T_STRING) line = rb_String(line);
438
+
439
+ int target = NUM2INT(v_target);
440
+ if (target < 0) target = 0;
441
+
442
+ const char *ptr = RSTRING_PTR(line);
443
+ const char *end = ptr + RSTRING_LEN(line);
444
+ rb_encoding *enc = rb_utf8_encoding();
445
+ int screen_col = 0;
446
+ int char_index = 0;
447
+
448
+ /* Walk grapheme clusters using oniguruma regex \X */
449
+ /* Simplified: walk codepoints, treating combining marks as part of
450
+ the previous character (width 0 doesn't advance screen_col). */
451
+ while (ptr < end) {
452
+ /* Measure one grapheme cluster: base char + combining marks */
453
+ int cluster_width = 0;
454
+ int cluster_chars = 0;
455
+ int first = 1;
456
+
457
+ while (ptr < end) {
458
+ int clen = rb_enc_precise_mbclen(ptr, end, enc);
459
+ if (!MBCLEN_CHARFOUND_P(clen)) {
460
+ if (first) { ptr++; cluster_chars++; cluster_width = 1; }
461
+ break;
462
+ }
463
+ clen = MBCLEN_CHARFOUND_LEN(clen);
464
+ unsigned int code = rb_enc_codepoint(ptr, end, enc);
465
+
466
+ if (!first) {
467
+ /* Check if combining/zero-width — part of cluster */
468
+ int w = codepoint_width(code);
469
+ if (w == 0) {
470
+ ptr += clen;
471
+ cluster_chars += (clen == 1 ? 1 : 1);
472
+ continue;
473
+ }
474
+ break; /* new base character — end of cluster */
475
+ }
476
+
477
+ first = 0;
478
+ ptr += clen;
479
+ cluster_chars += (clen == 1 ? 1 : 1);
480
+
481
+ if (code == '\t') {
482
+ int w = tabstop - (screen_col % tabstop);
483
+ if (w == 0) w = tabstop;
484
+ cluster_width = w;
485
+ } else {
486
+ cluster_width = (clen == 1 && code >= 0x20 && code < 0x7F)
487
+ ? 1 : codepoint_width(code);
488
+ }
489
+ }
490
+
491
+ if (screen_col + cluster_width > target) {
492
+ return INT2FIX(align_ceil ? char_index + cluster_chars : char_index);
493
+ }
494
+ screen_col += cluster_width;
495
+ char_index += cluster_chars;
496
+ }
497
+
498
+ return INT2FIX(char_index);
499
+ }
500
+
501
+ /* ------------------------------------------------------------------ */
502
+ /* Init */
503
+ /* ------------------------------------------------------------------ */
504
+
505
+ void
506
+ Init_ruvim_ext(void)
507
+ {
508
+ VALUE mRuVim = rb_define_module("RuVim");
509
+ VALUE mDW = rb_define_module_under(mRuVim, "DisplayWidthExt");
510
+
511
+ rb_define_module_function(mDW, "cell_width", rb_cell_width, -1);
512
+ rb_define_module_function(mDW, "display_width", rb_display_width, -1);
513
+ rb_define_module_function(mDW, "expand_tabs", rb_expand_tabs, -1);
514
+ rb_define_module_function(mDW, "set_ambiguous_width", rb_set_ambiguous_width, 1);
515
+
516
+ VALUE mTM = rb_define_module_under(mRuVim, "TextMetricsExt");
517
+ rb_define_module_function(mTM, "clip_cells_for_width", rb_clip_cells_for_width, -1);
518
+ rb_define_module_function(mTM, "char_index_for_screen_col", rb_char_index_for_screen_col, -1);
519
+ }