@fairyhunter13/opentui-core 0.1.91 → 0.1.94

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (570) hide show
  1. package/3d/SpriteResourceManager.d.ts +74 -0
  2. package/3d/SpriteUtils.d.ts +13 -0
  3. package/3d/TextureUtils.d.ts +24 -0
  4. package/3d/ThreeRenderable.d.ts +40 -0
  5. package/3d/WGPURenderer.d.ts +61 -0
  6. package/3d/animation/ExplodingSpriteEffect.d.ts +71 -0
  7. package/3d/animation/PhysicsExplodingSpriteEffect.d.ts +76 -0
  8. package/3d/animation/SpriteAnimator.d.ts +124 -0
  9. package/3d/animation/SpriteParticleGenerator.d.ts +62 -0
  10. package/3d/canvas.d.ts +44 -0
  11. package/3d/index.d.ts +12 -0
  12. package/3d/physics/PlanckPhysicsAdapter.d.ts +19 -0
  13. package/3d/physics/RapierPhysicsAdapter.d.ts +19 -0
  14. package/3d/physics/physics-interface.d.ts +27 -0
  15. package/3d.d.ts +2 -0
  16. package/3d.js +34042 -0
  17. package/3d.js.map +155 -0
  18. package/LICENSE +21 -0
  19. package/NativeSpanFeed.d.ts +41 -0
  20. package/Renderable.d.ts +334 -0
  21. package/animation/Timeline.d.ts +126 -0
  22. package/ansi.d.ts +13 -0
  23. package/buffer.d.ts +107 -0
  24. package/console.d.ts +143 -0
  25. package/edit-buffer.d.ts +98 -0
  26. package/editor-view.d.ts +73 -0
  27. package/index-e6ec7apq.js +18415 -0
  28. package/index-e6ec7apq.js.map +64 -0
  29. package/index-h066zmrb.js +12619 -0
  30. package/index-h066zmrb.js.map +43 -0
  31. package/index-ynzawt3n.js +113 -0
  32. package/index-ynzawt3n.js.map +10 -0
  33. package/index.d.ts +21 -0
  34. package/index.js +430 -0
  35. package/index.js.map +9 -0
  36. package/lib/KeyHandler.d.ts +61 -0
  37. package/lib/RGBA.d.ts +25 -0
  38. package/lib/ascii.font.d.ts +508 -0
  39. package/lib/border.d.ts +49 -0
  40. package/lib/bunfs.d.ts +7 -0
  41. package/lib/clipboard.d.ts +17 -0
  42. package/lib/clock.d.ts +15 -0
  43. package/lib/data-paths.d.ts +26 -0
  44. package/lib/debounce.d.ts +42 -0
  45. package/lib/detect-links.d.ts +6 -0
  46. package/lib/env.d.ts +42 -0
  47. package/lib/extmarks-history.d.ts +17 -0
  48. package/lib/extmarks.d.ts +89 -0
  49. package/lib/hast-styled-text.d.ts +17 -0
  50. package/lib/index.d.ts +21 -0
  51. package/lib/keymapping.d.ts +25 -0
  52. package/lib/objects-in-viewport.d.ts +24 -0
  53. package/lib/output.capture.d.ts +24 -0
  54. package/lib/parse.keypress-kitty.d.ts +2 -0
  55. package/lib/parse.keypress.d.ts +26 -0
  56. package/lib/parse.mouse.d.ts +30 -0
  57. package/lib/paste.d.ts +7 -0
  58. package/lib/queue.d.ts +15 -0
  59. package/lib/renderable.validations.d.ts +12 -0
  60. package/lib/scroll-acceleration.d.ts +43 -0
  61. package/lib/selection.d.ts +63 -0
  62. package/lib/singleton.d.ts +7 -0
  63. package/lib/stdin-parser.d.ts +76 -0
  64. package/lib/styled-text.d.ts +63 -0
  65. package/lib/terminal-capability-detection.d.ts +30 -0
  66. package/lib/terminal-palette.d.ts +50 -0
  67. package/lib/tree-sitter/assets/update.d.ts +11 -0
  68. package/lib/tree-sitter/client.d.ts +47 -0
  69. package/lib/tree-sitter/default-parsers.d.ts +2 -0
  70. package/lib/tree-sitter/download-utils.d.ts +21 -0
  71. package/lib/tree-sitter/index.d.ts +8 -0
  72. package/lib/tree-sitter/parser.worker.d.ts +1 -0
  73. package/lib/tree-sitter/parsers-config.d.ts +38 -0
  74. package/lib/tree-sitter/resolve-ft.d.ts +2 -0
  75. package/lib/tree-sitter/types.d.ts +81 -0
  76. package/lib/tree-sitter-styled-text.d.ts +14 -0
  77. package/lib/validate-dir-name.d.ts +1 -0
  78. package/lib/yoga.options.d.ts +32 -0
  79. package/package.json +51 -63
  80. package/parser.worker.js +869 -0
  81. package/parser.worker.js.map +12 -0
  82. package/plugins/core-slot.d.ts +72 -0
  83. package/plugins/registry.d.ts +38 -0
  84. package/plugins/types.d.ts +34 -0
  85. package/post/filters.d.ts +105 -0
  86. package/renderables/ASCIIFont.d.ts +52 -0
  87. package/renderables/Box.d.ts +72 -0
  88. package/renderables/Code.d.ts +78 -0
  89. package/renderables/Diff.d.ts +142 -0
  90. package/renderables/EditBufferRenderable.d.ts +162 -0
  91. package/renderables/FrameBuffer.d.ts +16 -0
  92. package/renderables/Input.d.ts +67 -0
  93. package/renderables/LineNumberRenderable.d.ts +74 -0
  94. package/renderables/Markdown.d.ts +173 -0
  95. package/renderables/ScrollBar.d.ts +77 -0
  96. package/renderables/ScrollBox.d.ts +124 -0
  97. package/renderables/Select.d.ts +115 -0
  98. package/renderables/Slider.d.ts +44 -0
  99. package/renderables/TabSelect.d.ts +96 -0
  100. package/renderables/Text.d.ts +36 -0
  101. package/renderables/TextBufferRenderable.d.ts +105 -0
  102. package/renderables/TextNode.d.ts +91 -0
  103. package/renderables/TextTable.d.ts +140 -0
  104. package/renderables/Textarea.d.ts +114 -0
  105. package/renderables/TimeToFirstDraw.d.ts +24 -0
  106. package/renderables/__tests__/renderable-test-utils.d.ts +12 -0
  107. package/renderables/composition/VRenderable.d.ts +16 -0
  108. package/renderables/composition/constructs.d.ts +35 -0
  109. package/renderables/composition/vnode.d.ts +46 -0
  110. package/renderables/index.d.ts +22 -0
  111. package/renderables/markdown-parser.d.ts +10 -0
  112. package/renderer.d.ts +388 -0
  113. package/runtime-plugin-support.d.ts +3 -0
  114. package/runtime-plugin-support.js +29 -0
  115. package/runtime-plugin-support.js.map +10 -0
  116. package/runtime-plugin.d.ts +11 -0
  117. package/runtime-plugin.js +16 -0
  118. package/runtime-plugin.js.map +9 -0
  119. package/syntax-style.d.ts +54 -0
  120. package/testing/manual-clock.d.ts +16 -0
  121. package/testing/mock-keys.d.ts +81 -0
  122. package/testing/mock-mouse.d.ts +38 -0
  123. package/testing/mock-tree-sitter-client.d.ts +23 -0
  124. package/testing/spy.d.ts +7 -0
  125. package/testing/test-recorder.d.ts +61 -0
  126. package/testing/test-renderer.d.ts +23 -0
  127. package/testing.d.ts +6 -0
  128. package/testing.js +675 -0
  129. package/testing.js.map +15 -0
  130. package/text-buffer-view.d.ts +42 -0
  131. package/text-buffer.d.ts +67 -0
  132. package/types.d.ts +131 -0
  133. package/utils.d.ts +14 -0
  134. package/zig-structs.d.ts +155 -0
  135. package/zig.d.ts +351 -0
  136. package/dev/keypress-debug-renderer.ts +0 -148
  137. package/dev/keypress-debug.ts +0 -43
  138. package/dev/print-env-vars.ts +0 -32
  139. package/dev/test-tmux-graphics-334.sh +0 -68
  140. package/dev/thai-debug-test.ts +0 -68
  141. package/docs/development.md +0 -141
  142. package/docs/env-vars.md +0 -140
  143. package/docs/getting-started.md +0 -353
  144. package/docs/renderables-vs-constructs.md +0 -159
  145. package/docs/tree-sitter.md +0 -311
  146. package/scripts/build.ts +0 -400
  147. package/scripts/publish.ts +0 -60
  148. package/src/3d/SpriteResourceManager.ts +0 -286
  149. package/src/3d/SpriteUtils.ts +0 -71
  150. package/src/3d/TextureUtils.ts +0 -196
  151. package/src/3d/ThreeRenderable.ts +0 -197
  152. package/src/3d/WGPURenderer.ts +0 -294
  153. package/src/3d/animation/ExplodingSpriteEffect.ts +0 -513
  154. package/src/3d/animation/PhysicsExplodingSpriteEffect.ts +0 -429
  155. package/src/3d/animation/SpriteAnimator.ts +0 -633
  156. package/src/3d/animation/SpriteParticleGenerator.ts +0 -435
  157. package/src/3d/canvas.ts +0 -464
  158. package/src/3d/index.ts +0 -12
  159. package/src/3d/physics/PlanckPhysicsAdapter.ts +0 -72
  160. package/src/3d/physics/RapierPhysicsAdapter.ts +0 -66
  161. package/src/3d/physics/physics-interface.ts +0 -31
  162. package/src/3d/shaders/supersampling.wgsl +0 -201
  163. package/src/3d.ts +0 -3
  164. package/src/NativeSpanFeed.ts +0 -300
  165. package/src/Renderable.ts +0 -1698
  166. package/src/__snapshots__/buffer.test.ts.snap +0 -28
  167. package/src/animation/Timeline.test.ts +0 -2709
  168. package/src/animation/Timeline.ts +0 -598
  169. package/src/ansi.ts +0 -18
  170. package/src/benchmark/latest-all-bench-run.json +0 -707
  171. package/src/benchmark/latest-async-bench-run.json +0 -336
  172. package/src/benchmark/latest-default-bench-run.json +0 -657
  173. package/src/benchmark/latest-large-bench-run.json +0 -707
  174. package/src/benchmark/latest-quick-bench-run.json +0 -207
  175. package/src/benchmark/markdown-benchmark.ts +0 -1804
  176. package/src/benchmark/native-span-feed-async-benchmark.ts +0 -355
  177. package/src/benchmark/native-span-feed-benchmark.md +0 -56
  178. package/src/benchmark/native-span-feed-benchmark.ts +0 -596
  179. package/src/benchmark/native-span-feed-compare.ts +0 -280
  180. package/src/benchmark/renderer-benchmark.ts +0 -754
  181. package/src/benchmark/text-table-benchmark.ts +0 -947
  182. package/src/buffer.test.ts +0 -291
  183. package/src/buffer.ts +0 -519
  184. package/src/console.test.ts +0 -612
  185. package/src/console.ts +0 -1255
  186. package/src/edit-buffer.test.ts +0 -1769
  187. package/src/edit-buffer.ts +0 -411
  188. package/src/editor-view.test.ts +0 -1032
  189. package/src/editor-view.ts +0 -284
  190. package/src/examples/ascii-font-selection-demo.ts +0 -245
  191. package/src/examples/assets/Water_2_M_Normal.jpg +0 -0
  192. package/src/examples/assets/concrete.png +0 -0
  193. package/src/examples/assets/crate.png +0 -0
  194. package/src/examples/assets/crate_emissive.png +0 -0
  195. package/src/examples/assets/forrest_background.png +0 -0
  196. package/src/examples/assets/hast-example.json +0 -1018
  197. package/src/examples/assets/heart.png +0 -0
  198. package/src/examples/assets/main_char_heavy_attack.png +0 -0
  199. package/src/examples/assets/main_char_idle.png +0 -0
  200. package/src/examples/assets/main_char_jump_end.png +0 -0
  201. package/src/examples/assets/main_char_jump_landing.png +0 -0
  202. package/src/examples/assets/main_char_jump_start.png +0 -0
  203. package/src/examples/assets/main_char_run_loop.png +0 -0
  204. package/src/examples/assets/roughness_map.jpg +0 -0
  205. package/src/examples/build.ts +0 -115
  206. package/src/examples/code-demo.ts +0 -584
  207. package/src/examples/console-demo.ts +0 -358
  208. package/src/examples/core-plugin-slots-demo.ts +0 -759
  209. package/src/examples/diff-demo.ts +0 -699
  210. package/src/examples/draggable-three-demo.ts +0 -259
  211. package/src/examples/editor-demo.ts +0 -322
  212. package/src/examples/extmarks-demo.ts +0 -204
  213. package/src/examples/focus-restore-demo.ts +0 -310
  214. package/src/examples/fonts.ts +0 -245
  215. package/src/examples/fractal-shader-demo.ts +0 -268
  216. package/src/examples/framebuffer-demo.ts +0 -674
  217. package/src/examples/full-unicode-demo.ts +0 -181
  218. package/src/examples/golden-star-demo.ts +0 -933
  219. package/src/examples/grayscale-buffer-demo.ts +0 -249
  220. package/src/examples/hast-syntax-highlighting-demo.ts +0 -129
  221. package/src/examples/index.ts +0 -925
  222. package/src/examples/input-demo.ts +0 -377
  223. package/src/examples/input-select-layout-demo.ts +0 -425
  224. package/src/examples/install.sh +0 -143
  225. package/src/examples/keypress-debug-demo.ts +0 -452
  226. package/src/examples/lib/HexList.ts +0 -122
  227. package/src/examples/lib/PaletteGrid.ts +0 -125
  228. package/src/examples/lib/standalone-keys.ts +0 -25
  229. package/src/examples/lib/tab-controller.ts +0 -243
  230. package/src/examples/lights-phong-demo.ts +0 -290
  231. package/src/examples/link-demo.ts +0 -220
  232. package/src/examples/live-state-demo.ts +0 -480
  233. package/src/examples/markdown-demo.ts +0 -620
  234. package/src/examples/mouse-interaction-demo.ts +0 -428
  235. package/src/examples/nested-zindex-demo.ts +0 -357
  236. package/src/examples/opacity-example.ts +0 -235
  237. package/src/examples/opentui-demo.ts +0 -1057
  238. package/src/examples/physx-planck-2d-demo.ts +0 -507
  239. package/src/examples/physx-rapier-2d-demo.ts +0 -526
  240. package/src/examples/relative-positioning-demo.ts +0 -323
  241. package/src/examples/scroll-example.ts +0 -214
  242. package/src/examples/scrollbox-mouse-test.ts +0 -112
  243. package/src/examples/scrollbox-overlay-hit-test.ts +0 -206
  244. package/src/examples/select-demo.ts +0 -237
  245. package/src/examples/shader-cube-demo.ts +0 -772
  246. package/src/examples/simple-layout-example.ts +0 -591
  247. package/src/examples/slider-demo.ts +0 -617
  248. package/src/examples/split-mode-demo.ts +0 -445
  249. package/src/examples/sprite-animation-demo.ts +0 -443
  250. package/src/examples/sprite-particle-generator-demo.ts +0 -486
  251. package/src/examples/static-sprite-demo.ts +0 -193
  252. package/src/examples/sticky-scroll-example.ts +0 -308
  253. package/src/examples/styled-text-demo.ts +0 -282
  254. package/src/examples/tab-select-demo.ts +0 -219
  255. package/src/examples/terminal-title.ts +0 -29
  256. package/src/examples/terminal.ts +0 -305
  257. package/src/examples/text-node-demo.ts +0 -416
  258. package/src/examples/text-selection-demo.ts +0 -377
  259. package/src/examples/text-table-demo.ts +0 -503
  260. package/src/examples/text-truncation-demo.ts +0 -481
  261. package/src/examples/text-wrap.ts +0 -757
  262. package/src/examples/texture-loading-demo.ts +0 -259
  263. package/src/examples/timeline-example.ts +0 -670
  264. package/src/examples/transparency-demo.ts +0 -241
  265. package/src/examples/vnode-composition-demo.ts +0 -404
  266. package/src/index.ts +0 -22
  267. package/src/lib/KeyHandler.integration.test.ts +0 -292
  268. package/src/lib/KeyHandler.stopPropagation.test.ts +0 -289
  269. package/src/lib/KeyHandler.test.ts +0 -662
  270. package/src/lib/KeyHandler.ts +0 -222
  271. package/src/lib/RGBA.test.ts +0 -984
  272. package/src/lib/RGBA.ts +0 -204
  273. package/src/lib/ascii.font.ts +0 -330
  274. package/src/lib/border.test.ts +0 -83
  275. package/src/lib/border.ts +0 -168
  276. package/src/lib/bunfs.test.ts +0 -27
  277. package/src/lib/bunfs.ts +0 -18
  278. package/src/lib/clipboard.test.ts +0 -41
  279. package/src/lib/clipboard.ts +0 -47
  280. package/src/lib/clock.ts +0 -31
  281. package/src/lib/data-paths.test.ts +0 -133
  282. package/src/lib/data-paths.ts +0 -109
  283. package/src/lib/debounce.ts +0 -106
  284. package/src/lib/detect-links.test.ts +0 -98
  285. package/src/lib/detect-links.ts +0 -56
  286. package/src/lib/env.test.ts +0 -228
  287. package/src/lib/env.ts +0 -209
  288. package/src/lib/extmarks-history.ts +0 -51
  289. package/src/lib/extmarks-multiwidth.test.ts +0 -322
  290. package/src/lib/extmarks.test.ts +0 -3457
  291. package/src/lib/extmarks.ts +0 -843
  292. package/src/lib/fonts/block.json +0 -405
  293. package/src/lib/fonts/grid.json +0 -265
  294. package/src/lib/fonts/huge.json +0 -741
  295. package/src/lib/fonts/pallet.json +0 -314
  296. package/src/lib/fonts/shade.json +0 -591
  297. package/src/lib/fonts/slick.json +0 -321
  298. package/src/lib/fonts/tiny.json +0 -69
  299. package/src/lib/hast-styled-text.ts +0 -59
  300. package/src/lib/index.ts +0 -21
  301. package/src/lib/keymapping.test.ts +0 -280
  302. package/src/lib/keymapping.ts +0 -87
  303. package/src/lib/objects-in-viewport.test.ts +0 -787
  304. package/src/lib/objects-in-viewport.ts +0 -153
  305. package/src/lib/output.capture.ts +0 -58
  306. package/src/lib/parse.keypress-kitty.protocol.test.ts +0 -340
  307. package/src/lib/parse.keypress-kitty.test.ts +0 -663
  308. package/src/lib/parse.keypress-kitty.ts +0 -439
  309. package/src/lib/parse.keypress.test.ts +0 -1849
  310. package/src/lib/parse.keypress.ts +0 -397
  311. package/src/lib/parse.mouse.test.ts +0 -552
  312. package/src/lib/parse.mouse.ts +0 -232
  313. package/src/lib/paste.ts +0 -16
  314. package/src/lib/queue.ts +0 -65
  315. package/src/lib/renderable.validations.test.ts +0 -87
  316. package/src/lib/renderable.validations.ts +0 -83
  317. package/src/lib/scroll-acceleration.ts +0 -98
  318. package/src/lib/selection.ts +0 -240
  319. package/src/lib/singleton.ts +0 -28
  320. package/src/lib/stdin-parser.test.ts +0 -1676
  321. package/src/lib/stdin-parser.ts +0 -1248
  322. package/src/lib/styled-text.ts +0 -178
  323. package/src/lib/terminal-capability-detection.test.ts +0 -202
  324. package/src/lib/terminal-capability-detection.ts +0 -79
  325. package/src/lib/terminal-palette.test.ts +0 -878
  326. package/src/lib/terminal-palette.ts +0 -383
  327. package/src/lib/tree-sitter/assets/README.md +0 -118
  328. package/src/lib/tree-sitter/assets/update.ts +0 -331
  329. package/src/lib/tree-sitter/assets.d.ts +0 -9
  330. package/src/lib/tree-sitter/cache.test.ts +0 -270
  331. package/src/lib/tree-sitter/client.test.ts +0 -1061
  332. package/src/lib/tree-sitter/client.ts +0 -615
  333. package/src/lib/tree-sitter/default-parsers.ts +0 -80
  334. package/src/lib/tree-sitter/download-utils.ts +0 -148
  335. package/src/lib/tree-sitter/index.ts +0 -28
  336. package/src/lib/tree-sitter/parser.worker.ts +0 -1001
  337. package/src/lib/tree-sitter/parsers-config.ts +0 -75
  338. package/src/lib/tree-sitter/resolve-ft.ts +0 -62
  339. package/src/lib/tree-sitter/types.ts +0 -81
  340. package/src/lib/tree-sitter-styled-text.test.ts +0 -1253
  341. package/src/lib/tree-sitter-styled-text.ts +0 -306
  342. package/src/lib/validate-dir-name.ts +0 -55
  343. package/src/lib/yoga.options.test.ts +0 -628
  344. package/src/lib/yoga.options.ts +0 -346
  345. package/src/plugins/core-slot.ts +0 -579
  346. package/src/plugins/registry.ts +0 -377
  347. package/src/plugins/types.ts +0 -46
  348. package/src/post/filters.ts +0 -888
  349. package/src/renderables/ASCIIFont.ts +0 -219
  350. package/src/renderables/Box.test.ts +0 -160
  351. package/src/renderables/Box.ts +0 -295
  352. package/src/renderables/Code.test.ts +0 -2062
  353. package/src/renderables/Code.ts +0 -357
  354. package/src/renderables/Diff.regression.test.ts +0 -226
  355. package/src/renderables/Diff.test.ts +0 -3027
  356. package/src/renderables/Diff.ts +0 -1209
  357. package/src/renderables/EditBufferRenderable.ts +0 -764
  358. package/src/renderables/FrameBuffer.ts +0 -47
  359. package/src/renderables/Input.test.ts +0 -1228
  360. package/src/renderables/Input.ts +0 -245
  361. package/src/renderables/LineNumberRenderable.ts +0 -675
  362. package/src/renderables/Markdown.ts +0 -1106
  363. package/src/renderables/ScrollBar.ts +0 -422
  364. package/src/renderables/ScrollBox.ts +0 -883
  365. package/src/renderables/Select.test.ts +0 -1010
  366. package/src/renderables/Select.ts +0 -523
  367. package/src/renderables/Slider.test.ts +0 -456
  368. package/src/renderables/Slider.ts +0 -347
  369. package/src/renderables/TabSelect.test.ts +0 -197
  370. package/src/renderables/TabSelect.ts +0 -455
  371. package/src/renderables/Text.selection-buffer.test.ts +0 -123
  372. package/src/renderables/Text.test.ts +0 -2660
  373. package/src/renderables/Text.ts +0 -147
  374. package/src/renderables/TextBufferRenderable.ts +0 -518
  375. package/src/renderables/TextNode.test.ts +0 -1058
  376. package/src/renderables/TextNode.ts +0 -325
  377. package/src/renderables/TextTable.test.ts +0 -1421
  378. package/src/renderables/TextTable.ts +0 -1344
  379. package/src/renderables/Textarea.ts +0 -732
  380. package/src/renderables/TimeToFirstDraw.ts +0 -89
  381. package/src/renderables/__snapshots__/Code.test.ts.snap +0 -13
  382. package/src/renderables/__snapshots__/Diff.test.ts.snap +0 -785
  383. package/src/renderables/__snapshots__/Text.test.ts.snap +0 -421
  384. package/src/renderables/__snapshots__/TextTable.test.ts.snap +0 -215
  385. package/src/renderables/__tests__/LineNumberRenderable.scrollbox-simple.test.ts +0 -144
  386. package/src/renderables/__tests__/LineNumberRenderable.scrollbox.test.ts +0 -816
  387. package/src/renderables/__tests__/LineNumberRenderable.test.ts +0 -1787
  388. package/src/renderables/__tests__/LineNumberRenderable.wrapping.test.ts +0 -85
  389. package/src/renderables/__tests__/Markdown.test.ts +0 -2287
  390. package/src/renderables/__tests__/MultiRenderable.selection.test.ts +0 -87
  391. package/src/renderables/__tests__/Textarea.buffer.test.ts +0 -682
  392. package/src/renderables/__tests__/Textarea.destroyed-events.test.ts +0 -675
  393. package/src/renderables/__tests__/Textarea.editing.test.ts +0 -2041
  394. package/src/renderables/__tests__/Textarea.error-handling.test.ts +0 -35
  395. package/src/renderables/__tests__/Textarea.events.test.ts +0 -738
  396. package/src/renderables/__tests__/Textarea.highlights.test.ts +0 -590
  397. package/src/renderables/__tests__/Textarea.keybinding.test.ts +0 -3149
  398. package/src/renderables/__tests__/Textarea.paste.test.ts +0 -357
  399. package/src/renderables/__tests__/Textarea.rendering.test.ts +0 -1864
  400. package/src/renderables/__tests__/Textarea.scroll.test.ts +0 -733
  401. package/src/renderables/__tests__/Textarea.selection.test.ts +0 -1590
  402. package/src/renderables/__tests__/Textarea.stress.test.ts +0 -670
  403. package/src/renderables/__tests__/Textarea.undo-redo.test.ts +0 -383
  404. package/src/renderables/__tests__/Textarea.visual-lines.test.ts +0 -310
  405. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.code.test.ts.snap +0 -221
  406. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.scrollbox-simple.test.ts.snap +0 -89
  407. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.scrollbox.test.ts.snap +0 -457
  408. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.test.ts.snap +0 -158
  409. package/src/renderables/__tests__/__snapshots__/Textarea.rendering.test.ts.snap +0 -387
  410. package/src/renderables/__tests__/markdown-parser.test.ts +0 -217
  411. package/src/renderables/__tests__/renderable-test-utils.ts +0 -60
  412. package/src/renderables/composition/README.md +0 -8
  413. package/src/renderables/composition/VRenderable.ts +0 -32
  414. package/src/renderables/composition/constructs.ts +0 -127
  415. package/src/renderables/composition/vnode.ts +0 -289
  416. package/src/renderables/index.ts +0 -22
  417. package/src/renderables/markdown-parser.ts +0 -66
  418. package/src/renderer.ts +0 -2363
  419. package/src/runtime-plugin-support.ts +0 -39
  420. package/src/runtime-plugin.ts +0 -144
  421. package/src/syntax-style.test.ts +0 -841
  422. package/src/syntax-style.ts +0 -264
  423. package/src/testing/README.md +0 -210
  424. package/src/testing/capture-spans.test.ts +0 -194
  425. package/src/testing/integration.test.ts +0 -276
  426. package/src/testing/manual-clock.ts +0 -106
  427. package/src/testing/mock-keys.test.ts +0 -1356
  428. package/src/testing/mock-keys.ts +0 -449
  429. package/src/testing/mock-mouse.test.ts +0 -218
  430. package/src/testing/mock-mouse.ts +0 -247
  431. package/src/testing/mock-tree-sitter-client.ts +0 -73
  432. package/src/testing/spy.ts +0 -13
  433. package/src/testing/test-recorder.test.ts +0 -415
  434. package/src/testing/test-recorder.ts +0 -145
  435. package/src/testing/test-renderer.ts +0 -116
  436. package/src/testing.ts +0 -7
  437. package/src/tests/__snapshots__/absolute-positioning.snapshot.test.ts.snap +0 -481
  438. package/src/tests/__snapshots__/renderable.snapshot.test.ts.snap +0 -19
  439. package/src/tests/__snapshots__/scrollbox.test.ts.snap +0 -29
  440. package/src/tests/absolute-positioning.snapshot.test.ts +0 -638
  441. package/src/tests/allocator-stats.test.ts +0 -38
  442. package/src/tests/destroy-during-render.test.ts +0 -200
  443. package/src/tests/hover-cursor.test.ts +0 -98
  444. package/src/tests/native-span-feed-async.test.ts +0 -173
  445. package/src/tests/native-span-feed-close.test.ts +0 -120
  446. package/src/tests/native-span-feed-coverage.test.ts +0 -227
  447. package/src/tests/native-span-feed-edge-cases.test.ts +0 -352
  448. package/src/tests/native-span-feed-use-after-free.test.ts +0 -45
  449. package/src/tests/opacity.test.ts +0 -123
  450. package/src/tests/renderable.snapshot.test.ts +0 -524
  451. package/src/tests/renderable.test.ts +0 -1281
  452. package/src/tests/renderer.console-startup.test.ts +0 -65
  453. package/src/tests/renderer.control.test.ts +0 -364
  454. package/src/tests/renderer.core-slot-binding.test.ts +0 -952
  455. package/src/tests/renderer.cursor.test.ts +0 -26
  456. package/src/tests/renderer.destroy-during-render.test.ts +0 -110
  457. package/src/tests/renderer.focus-restore.test.ts +0 -228
  458. package/src/tests/renderer.focus.test.ts +0 -251
  459. package/src/tests/renderer.idle.test.ts +0 -219
  460. package/src/tests/renderer.input.test.ts +0 -2145
  461. package/src/tests/renderer.kitty-flags.test.ts +0 -195
  462. package/src/tests/renderer.mouse.test.ts +0 -1269
  463. package/src/tests/renderer.palette.test.ts +0 -629
  464. package/src/tests/renderer.selection.test.ts +0 -49
  465. package/src/tests/renderer.slot-registry.test.ts +0 -649
  466. package/src/tests/renderer.useMouse.test.ts +0 -50
  467. package/src/tests/runtime-plugin-support.fixture.ts +0 -11
  468. package/src/tests/runtime-plugin-support.test.ts +0 -28
  469. package/src/tests/runtime-plugin.fixture.ts +0 -40
  470. package/src/tests/runtime-plugin.test.ts +0 -190
  471. package/src/tests/scrollbox-culling-bug.test.ts +0 -114
  472. package/src/tests/scrollbox-hitgrid-resize.test.ts +0 -136
  473. package/src/tests/scrollbox-hitgrid.test.ts +0 -909
  474. package/src/tests/scrollbox.test.ts +0 -1530
  475. package/src/tests/wrap-resize-perf.test.ts +0 -229
  476. package/src/tests/yoga-setters.test.ts +0 -921
  477. package/src/text-buffer-view.test.ts +0 -705
  478. package/src/text-buffer-view.ts +0 -189
  479. package/src/text-buffer.test.ts +0 -347
  480. package/src/text-buffer.ts +0 -250
  481. package/src/types.ts +0 -152
  482. package/src/utils.ts +0 -88
  483. package/src/zig/ansi.zig +0 -268
  484. package/src/zig/bench/README.md +0 -50
  485. package/src/zig/bench/buffer-draw-text-buffer_bench.zig +0 -887
  486. package/src/zig/bench/edit-buffer_bench.zig +0 -476
  487. package/src/zig/bench/native-span-feed_bench.zig +0 -100
  488. package/src/zig/bench/rope-markers_bench.zig +0 -713
  489. package/src/zig/bench/rope_bench.zig +0 -514
  490. package/src/zig/bench/styled-text_bench.zig +0 -470
  491. package/src/zig/bench/text-buffer-coords_bench.zig +0 -362
  492. package/src/zig/bench/text-buffer-view_bench.zig +0 -459
  493. package/src/zig/bench/text-chunk-graphemes_bench.zig +0 -273
  494. package/src/zig/bench/utf8_bench.zig +0 -799
  495. package/src/zig/bench-utils.zig +0 -431
  496. package/src/zig/bench.zig +0 -217
  497. package/src/zig/buffer.zig +0 -2223
  498. package/src/zig/build.zig +0 -289
  499. package/src/zig/build.zig.zon +0 -16
  500. package/src/zig/edit-buffer.zig +0 -825
  501. package/src/zig/editor-view.zig +0 -802
  502. package/src/zig/event-bus.zig +0 -13
  503. package/src/zig/event-emitter.zig +0 -65
  504. package/src/zig/file-logger.zig +0 -92
  505. package/src/zig/grapheme.zig +0 -599
  506. package/src/zig/lib.zig +0 -1834
  507. package/src/zig/link.zig +0 -333
  508. package/src/zig/logger.zig +0 -43
  509. package/src/zig/mem-registry.zig +0 -125
  510. package/src/zig/native-span-feed-bench-lib.zig +0 -7
  511. package/src/zig/native-span-feed.zig +0 -708
  512. package/src/zig/renderer.zig +0 -1386
  513. package/src/zig/rope.zig +0 -1220
  514. package/src/zig/syntax-style.zig +0 -161
  515. package/src/zig/terminal.zig +0 -975
  516. package/src/zig/test.zig +0 -70
  517. package/src/zig/tests/README.md +0 -18
  518. package/src/zig/tests/buffer_test.zig +0 -2526
  519. package/src/zig/tests/edit-buffer-history_test.zig +0 -271
  520. package/src/zig/tests/edit-buffer_test.zig +0 -1689
  521. package/src/zig/tests/editor-view_test.zig +0 -3299
  522. package/src/zig/tests/event-emitter_test.zig +0 -249
  523. package/src/zig/tests/grapheme_test.zig +0 -1304
  524. package/src/zig/tests/link_test.zig +0 -190
  525. package/src/zig/tests/mem-registry_test.zig +0 -473
  526. package/src/zig/tests/memory_leak_regression_test.zig +0 -159
  527. package/src/zig/tests/native-span-feed_test.zig +0 -1264
  528. package/src/zig/tests/renderer_test.zig +0 -1010
  529. package/src/zig/tests/rope-nested_test.zig +0 -712
  530. package/src/zig/tests/rope_fuzz_test.zig +0 -238
  531. package/src/zig/tests/rope_test.zig +0 -2362
  532. package/src/zig/tests/segment-merge.test.zig +0 -148
  533. package/src/zig/tests/syntax-style_test.zig +0 -557
  534. package/src/zig/tests/terminal_test.zig +0 -719
  535. package/src/zig/tests/text-buffer-drawing_test.zig +0 -3237
  536. package/src/zig/tests/text-buffer-highlights_test.zig +0 -666
  537. package/src/zig/tests/text-buffer-iterators_test.zig +0 -776
  538. package/src/zig/tests/text-buffer-segment_test.zig +0 -320
  539. package/src/zig/tests/text-buffer-selection_test.zig +0 -1035
  540. package/src/zig/tests/text-buffer-selection_viewport_test.zig +0 -358
  541. package/src/zig/tests/text-buffer-view_test.zig +0 -3649
  542. package/src/zig/tests/text-buffer_test.zig +0 -2191
  543. package/src/zig/tests/unicode-width-map.zon +0 -3909
  544. package/src/zig/tests/utf8_no_zwj_test.zig +0 -260
  545. package/src/zig/tests/utf8_test.zig +0 -4057
  546. package/src/zig/tests/utf8_wcwidth_cursor_test.zig +0 -267
  547. package/src/zig/tests/utf8_wcwidth_test.zig +0 -357
  548. package/src/zig/tests/word-wrap-editing_test.zig +0 -498
  549. package/src/zig/tests/wrap-cache-perf_test.zig +0 -113
  550. package/src/zig/text-buffer-iterators.zig +0 -499
  551. package/src/zig/text-buffer-segment.zig +0 -404
  552. package/src/zig/text-buffer-view.zig +0 -1371
  553. package/src/zig/text-buffer.zig +0 -1180
  554. package/src/zig/utf8.zig +0 -1948
  555. package/src/zig/utils.zig +0 -9
  556. package/src/zig-structs.ts +0 -261
  557. package/src/zig.ts +0 -3843
  558. package/tsconfig.build.json +0 -22
  559. package/tsconfig.json +0 -28
  560. /package/{src/lib/tree-sitter/assets → assets}/javascript/highlights.scm +0 -0
  561. /package/{src/lib/tree-sitter/assets → assets}/javascript/tree-sitter-javascript.wasm +0 -0
  562. /package/{src/lib/tree-sitter/assets → assets}/markdown/highlights.scm +0 -0
  563. /package/{src/lib/tree-sitter/assets → assets}/markdown/injections.scm +0 -0
  564. /package/{src/lib/tree-sitter/assets → assets}/markdown/tree-sitter-markdown.wasm +0 -0
  565. /package/{src/lib/tree-sitter/assets → assets}/markdown_inline/highlights.scm +0 -0
  566. /package/{src/lib/tree-sitter/assets → assets}/markdown_inline/tree-sitter-markdown_inline.wasm +0 -0
  567. /package/{src/lib/tree-sitter/assets → assets}/typescript/highlights.scm +0 -0
  568. /package/{src/lib/tree-sitter/assets → assets}/typescript/tree-sitter-typescript.wasm +0 -0
  569. /package/{src/lib/tree-sitter/assets → assets}/zig/highlights.scm +0 -0
  570. /package/{src/lib/tree-sitter/assets → assets}/zig/tree-sitter-zig.wasm +0 -0
package/src/zig/utf8.zig DELETED
@@ -1,1948 +0,0 @@
1
- const std = @import("std");
2
- const uucode = @import("uucode");
3
-
4
- /// The method to use when calculating the width of a grapheme
5
- pub const WidthMethod = enum {
6
- wcwidth,
7
- unicode,
8
- no_zwj,
9
- };
10
-
11
- /// Check if a byte slice contains only printable ASCII (32..126)
12
- /// Uses SIMD16 for fast checking
13
- pub fn isAsciiOnly(text: []const u8) bool {
14
- if (text.len == 0) return false;
15
-
16
- const vector_len = 16;
17
- const Vec = @Vector(vector_len, u8);
18
-
19
- const min_printable: Vec = @splat(32);
20
- const max_printable: Vec = @splat(126);
21
-
22
- var pos: usize = 0;
23
-
24
- // Process full 16-byte vectors
25
- while (pos + vector_len <= text.len) {
26
- const chunk: Vec = text[pos..][0..vector_len].*;
27
-
28
- // Check if all bytes are in [32, 126]
29
- const too_low = chunk < min_printable;
30
- const too_high = chunk > max_printable;
31
-
32
- // Check if any byte is out of range
33
- if (@reduce(.Or, too_low) or @reduce(.Or, too_high)) {
34
- return false;
35
- }
36
-
37
- pos += vector_len;
38
- }
39
-
40
- // Handle remaining bytes with scalar code
41
- while (pos < text.len) : (pos += 1) {
42
- const b = text[pos];
43
- if (b < 32 or b > 126) {
44
- return false;
45
- }
46
- }
47
-
48
- return true;
49
- }
50
-
51
- pub const LineBreakKind = enum {
52
- LF, // \n (Unix/Linux)
53
- CR, // \r (Old Mac)
54
- CRLF, // \r\n (Windows)
55
- };
56
-
57
- pub const LineBreak = struct {
58
- pos: usize,
59
- kind: LineBreakKind,
60
- };
61
-
62
- pub const LineBreakResult = struct {
63
- breaks: std.ArrayListUnmanaged(LineBreak),
64
- allocator: std.mem.Allocator,
65
-
66
- pub fn init(allocator: std.mem.Allocator) LineBreakResult {
67
- return .{
68
- .breaks = .{},
69
- .allocator = allocator,
70
- };
71
- }
72
-
73
- pub fn deinit(self: *LineBreakResult) void {
74
- self.breaks.deinit(self.allocator);
75
- }
76
-
77
- pub fn reset(self: *LineBreakResult) void {
78
- self.breaks.clearRetainingCapacity();
79
- }
80
- };
81
-
82
- pub const TabStopResult = struct {
83
- positions: std.ArrayListUnmanaged(usize),
84
- allocator: std.mem.Allocator,
85
-
86
- pub fn init(allocator: std.mem.Allocator) TabStopResult {
87
- return .{
88
- .positions = .{},
89
- .allocator = allocator,
90
- };
91
- }
92
-
93
- pub fn deinit(self: *TabStopResult) void {
94
- self.positions.deinit(self.allocator);
95
- }
96
-
97
- pub fn reset(self: *TabStopResult) void {
98
- self.positions.clearRetainingCapacity();
99
- }
100
- };
101
-
102
- pub const WrapBreak = struct {
103
- // byte_offset points at the grapheme that creates this break opportunity.
104
- // For whitespace and punctuation, this is the delimiter grapheme.
105
- // For CJK<->ASCII transitions, this is the last grapheme in the previous run.
106
- byte_offset: u32,
107
-
108
- // char_offset is grapheme-count based, not a display column.
109
- // Callers convert it to columns with charOffsetToColumn().
110
- char_offset: u32,
111
- };
112
-
113
- pub const WrapBreakResult = struct {
114
- breaks: std.ArrayListUnmanaged(WrapBreak),
115
- allocator: std.mem.Allocator,
116
-
117
- pub fn init(allocator: std.mem.Allocator) WrapBreakResult {
118
- return .{
119
- .breaks = .{},
120
- .allocator = allocator,
121
- };
122
- }
123
-
124
- pub fn deinit(self: *WrapBreakResult) void {
125
- self.breaks.deinit(self.allocator);
126
- }
127
-
128
- pub fn reset(self: *WrapBreakResult) void {
129
- self.breaks.clearRetainingCapacity();
130
- }
131
- };
132
-
133
- // Helper function to check if an ASCII byte is a wrap break point (CR/LF excluded)
134
- inline fn isAsciiWrapBreak(b: u8) bool {
135
- return switch (b) {
136
- ' ', '\t' => true, // Whitespace (no CR/LF in inputs)
137
- '-' => true, // Dash
138
- '/', '\\' => true, // Slashes
139
- '.', ',', ';', ':', '!', '?' => true, // Punctuation
140
- '(', ')', '[', ']', '{', '}' => true, // Brackets
141
- else => false,
142
- };
143
- }
144
-
145
- // Decode a UTF-8 codepoint starting at pos. Assumes valid UTF-8 input.
146
- // Returns (codepoint, length). If the remaining bytes are insufficient, returns length 1.
147
- pub inline fn decodeUtf8Unchecked(text: []const u8, pos: usize) struct { cp: u21, len: u3 } {
148
- const b0 = text[pos];
149
- if (b0 < 0x80) return .{ .cp = @intCast(b0), .len = 1 };
150
-
151
- if (pos + 1 >= text.len) return .{ .cp = 0xFFFD, .len = 1 };
152
- const b1 = text[pos + 1];
153
-
154
- if ((b0 & 0xE0) == 0xC0) {
155
- const cp2: u21 = @intCast((@as(u32, b0 & 0x1F) << 6) | @as(u32, b1 & 0x3F));
156
- return .{ .cp = cp2, .len = 2 };
157
- }
158
-
159
- if (pos + 2 >= text.len) return .{ .cp = 0xFFFD, .len = 1 };
160
- const b2 = text[pos + 2];
161
-
162
- if ((b0 & 0xF0) == 0xE0) {
163
- const cp3: u21 = @intCast((@as(u32, b0 & 0x0F) << 12) | (@as(u32, b1 & 0x3F) << 6) | @as(u32, b2 & 0x3F));
164
- return .{ .cp = cp3, .len = 3 };
165
- }
166
-
167
- if (pos + 3 >= text.len) return .{ .cp = 0xFFFD, .len = 1 };
168
- const b3 = text[pos + 3];
169
- const cp4: u21 = @intCast((@as(u32, b0 & 0x07) << 18) | (@as(u32, b1 & 0x3F) << 12) | (@as(u32, b2 & 0x3F) << 6) | @as(u32, b3 & 0x3F));
170
- return .{ .cp = cp4, .len = 4 };
171
- }
172
-
173
- // Unicode wrap-break codepoints
174
- inline fn isUnicodeWrapBreak(cp: u21) bool {
175
- return switch (cp) {
176
- 0x00A0, // NBSP
177
- 0x1680, // OGHAM SPACE MARK
178
- 0x2000...0x200A, // En quad..Hair space
179
- 0x202F, // NARROW NO-BREAK SPACE
180
- 0x205F, // MEDIUM MATHEMATICAL SPACE
181
- 0x3000, // IDEOGRAPHIC SPACE
182
- 0x200B, // ZERO WIDTH SPACE
183
- 0x00AD, // SOFT HYPHEN
184
- 0x2010, // HYPHEN
185
- 0x3001, // IDEOGRAPHIC COMMA
186
- 0x3002, // IDEOGRAPHIC FULL STOP
187
- 0xFF01, // FULLWIDTH EXCLAMATION MARK
188
- 0xFF1F, // FULLWIDTH QUESTION MARK
189
- => true,
190
- else => false,
191
- };
192
- }
193
-
194
- // WordClass keeps word-boundary behavior predictable in mixed-script text.
195
- // We split between ASCII word runs and CJK word runs, and we keep each
196
- // CJK run grouped as one unit.
197
- const WordClass = enum {
198
- ascii_word,
199
- cjk_word,
200
- other,
201
- };
202
-
203
- inline fn isAsciiWordByte(b: u8) bool {
204
- return (b >= 'a' and b <= 'z') or
205
- (b >= 'A' and b <= 'Z') or
206
- (b >= '0' and b <= '9') or
207
- b == '_';
208
- }
209
-
210
- inline fn isCjkWordCodepoint(cp: u21) bool {
211
- return
212
- // Han ideographs
213
- (cp >= 0x3400 and cp <= 0x4DBF) or
214
- (cp >= 0x4E00 and cp <= 0x9FFF) or
215
- (cp >= 0xF900 and cp <= 0xFAFF) or
216
- (cp >= 0x20000 and cp <= 0x2A6DF) or
217
- (cp >= 0x2A700 and cp <= 0x2B73F) or
218
- (cp >= 0x2B740 and cp <= 0x2B81F) or
219
- (cp >= 0x2B820 and cp <= 0x2CEAF) or
220
- (cp >= 0x2CEB0 and cp <= 0x2EBEF) or
221
- (cp >= 0x2EBF0 and cp <= 0x2EE5D) or
222
- (cp >= 0x2F800 and cp <= 0x2FA1F) or
223
- // Hiragana + Katakana
224
- (cp >= 0x3040 and cp <= 0x309F) or
225
- (cp >= 0x30A0 and cp <= 0x30FF) or
226
- (cp >= 0x31F0 and cp <= 0x31FF) or
227
- (cp >= 0xFF66 and cp <= 0xFF9D) or
228
- // Hangul
229
- (cp >= 0x1100 and cp <= 0x11FF) or
230
- (cp >= 0x3130 and cp <= 0x318F) or
231
- (cp >= 0xA960 and cp <= 0xA97F) or
232
- (cp >= 0xAC00 and cp <= 0xD7AF) or
233
- (cp >= 0xD7B0 and cp <= 0xD7FF);
234
- }
235
-
236
- inline fn classifyWordClass(cp: u21) WordClass {
237
- if (cp <= 0x7F) {
238
- return if (isAsciiWordByte(@intCast(cp))) .ascii_word else .other;
239
- }
240
- if (isCjkWordCodepoint(cp)) return .cjk_word;
241
- return .other;
242
- }
243
-
244
- pub inline fn isWordCodepoint(cp: u21) bool {
245
- return classifyWordClass(cp) != .other;
246
- }
247
-
248
- inline fn isCjkAsciiTransition(prev_class: WordClass, curr_class: WordClass) bool {
249
- return (prev_class == .cjk_word and curr_class == .ascii_word) or
250
- (prev_class == .ascii_word and curr_class == .cjk_word);
251
- }
252
-
253
- // Nothing needed here - using uucode.grapheme.isBreak directly
254
-
255
- pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method: WidthMethod) !void {
256
- // This function clears previous results and writes fresh break points.
257
- // Callers should treat `result.breaks` as replaced after the call.
258
- _ = width_method; // Currently unused, but kept for API consistency
259
- result.reset();
260
- const vector_len = 16;
261
-
262
- var pos: usize = 0;
263
- var char_offset: u32 = 0;
264
- var prev_cp: ?u21 = null; // Track previous codepoint for grapheme detection
265
- var break_state: uucode.grapheme.BreakState = .default;
266
- // We keep track of the current grapheme so we can add a break at
267
- // CJK<->ASCII transitions. The break is emitted at the previous grapheme,
268
- // so callers that add grapheme width land exactly at the run boundary.
269
- var have_current_grapheme = false;
270
- var current_grapheme_byte_offset: u32 = 0;
271
- var current_grapheme_char_offset: u32 = 0;
272
- var current_grapheme_class: WordClass = .other;
273
-
274
- while (pos + vector_len <= text.len) {
275
- const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
276
- const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
277
- const is_non_ascii = chunk >= ascii_threshold;
278
-
279
- // Fast path: all ASCII
280
- if (!@reduce(.Or, is_non_ascii)) {
281
- const first_class = classifyWordClass(text[pos]);
282
- if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, first_class)) {
283
- try result.breaks.append(result.allocator, .{
284
- .byte_offset = current_grapheme_byte_offset,
285
- .char_offset = current_grapheme_char_offset,
286
- });
287
- }
288
-
289
- // Use SIMD to find break characters
290
- var match_mask: @Vector(vector_len, bool) = @splat(false);
291
-
292
- // Check whitespace
293
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(' ')));
294
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('\t')));
295
-
296
- // Check dashes and slashes
297
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('-')));
298
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('/')));
299
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('\\')));
300
-
301
- // Check punctuation
302
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('.')));
303
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(',')));
304
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(';')));
305
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(':')));
306
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('!')));
307
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('?')));
308
-
309
- // Check brackets
310
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('(')));
311
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(')')));
312
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('[')));
313
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(']')));
314
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('{')));
315
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('}')));
316
-
317
- // Convert boolean mask to integer bitmask for faster iteration
318
- var bitmask: u16 = 0;
319
- inline for (0..vector_len) |i| {
320
- if (match_mask[i]) {
321
- bitmask |= @as(u16, 1) << @intCast(i);
322
- }
323
- }
324
-
325
- // Use bit manipulation to extract positions
326
- while (bitmask != 0) {
327
- const bit_pos = @ctz(bitmask);
328
- try result.breaks.append(result.allocator, .{
329
- .byte_offset = @intCast(pos + bit_pos),
330
- .char_offset = char_offset + @as(u32, @intCast(bit_pos)),
331
- });
332
- bitmask &= bitmask - 1;
333
- }
334
-
335
- pos += vector_len;
336
- const block_start_char_offset = char_offset;
337
- char_offset += vector_len;
338
- prev_cp = text[pos - 1]; // Last ASCII char
339
- break_state = .default;
340
- have_current_grapheme = true;
341
- current_grapheme_byte_offset = @intCast(pos - 1);
342
- current_grapheme_char_offset = block_start_char_offset + (vector_len - 1);
343
- current_grapheme_class = classifyWordClass(text[pos - 1]);
344
- continue;
345
- }
346
-
347
- // Slow path: mixed ASCII/non-ASCII - need grapheme-aware counting
348
- var i: usize = 0;
349
- while (i < vector_len) {
350
- const b0 = text[pos + i];
351
- if (b0 < 0x80) {
352
- const curr_cp: u21 = b0;
353
-
354
- // Check if this starts a new grapheme cluster
355
- // Skip invalid/replacement codepoints or codepoints that might be outside the grapheme table range
356
- const is_break = if (curr_cp == 0xFFFD or curr_cp > 0x10FFFF) true else if (prev_cp) |p| blk: {
357
- if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
358
- break :blk uucode.grapheme.isBreak(p, curr_cp, &break_state);
359
- } else true;
360
-
361
- if (is_break) {
362
- const curr_class = classifyWordClass(curr_cp);
363
- if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
364
- try result.breaks.append(result.allocator, .{
365
- .byte_offset = current_grapheme_byte_offset,
366
- .char_offset = current_grapheme_char_offset,
367
- });
368
- }
369
- have_current_grapheme = true;
370
- current_grapheme_byte_offset = @intCast(pos + i);
371
- current_grapheme_char_offset = char_offset;
372
- current_grapheme_class = curr_class;
373
- }
374
-
375
- if (isAsciiWrapBreak(b0)) {
376
- try result.breaks.append(result.allocator, .{
377
- .byte_offset = @intCast(pos + i),
378
- .char_offset = char_offset,
379
- });
380
- }
381
- i += 1;
382
- if (is_break) {
383
- char_offset += 1;
384
- }
385
- prev_cp = curr_cp;
386
- } else {
387
- const dec = decodeUtf8Unchecked(text, pos + i);
388
- if (pos + i + dec.len > text.len) break;
389
- if (pos + i + dec.len > pos + vector_len) break;
390
-
391
- // Check if this starts a new grapheme cluster
392
- // Skip invalid/replacement codepoints or codepoints that might be outside the grapheme table range
393
- const is_break = if (dec.cp == 0xFFFD or dec.cp > 0x10FFFF) true else if (prev_cp) |p| blk: {
394
- if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
395
- break :blk uucode.grapheme.isBreak(p, dec.cp, &break_state);
396
- } else true;
397
-
398
- if (is_break) {
399
- const curr_class = classifyWordClass(dec.cp);
400
- if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
401
- try result.breaks.append(result.allocator, .{
402
- .byte_offset = current_grapheme_byte_offset,
403
- .char_offset = current_grapheme_char_offset,
404
- });
405
- }
406
- have_current_grapheme = true;
407
- current_grapheme_byte_offset = @intCast(pos + i);
408
- current_grapheme_char_offset = char_offset;
409
- current_grapheme_class = curr_class;
410
- }
411
-
412
- if (isUnicodeWrapBreak(dec.cp)) {
413
- try result.breaks.append(result.allocator, .{
414
- .byte_offset = @intCast(pos + i),
415
- .char_offset = char_offset,
416
- });
417
- }
418
- i += dec.len;
419
- if (is_break) {
420
- char_offset += 1;
421
- }
422
- prev_cp = dec.cp;
423
- }
424
- }
425
- pos += i;
426
- }
427
-
428
- // Tail
429
- var i: usize = pos;
430
- while (i < text.len) {
431
- const b0 = text[i];
432
- if (b0 < 0x80) {
433
- const curr_cp: u21 = b0;
434
- const is_break = if (prev_cp) |p| blk: {
435
- if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
436
- break :blk uucode.grapheme.isBreak(p, curr_cp, &break_state);
437
- } else true;
438
-
439
- if (is_break) {
440
- const curr_class = classifyWordClass(curr_cp);
441
- if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
442
- try result.breaks.append(result.allocator, .{
443
- .byte_offset = current_grapheme_byte_offset,
444
- .char_offset = current_grapheme_char_offset,
445
- });
446
- }
447
- have_current_grapheme = true;
448
- current_grapheme_byte_offset = @intCast(i);
449
- current_grapheme_char_offset = char_offset;
450
- current_grapheme_class = curr_class;
451
- }
452
-
453
- if (isAsciiWrapBreak(b0)) {
454
- try result.breaks.append(result.allocator, .{
455
- .byte_offset = @intCast(i),
456
- .char_offset = char_offset,
457
- });
458
- }
459
- i += 1;
460
- if (is_break) {
461
- char_offset += 1;
462
- }
463
- prev_cp = curr_cp;
464
- } else {
465
- const dec = decodeUtf8Unchecked(text, i);
466
- if (i + dec.len > text.len) break;
467
-
468
- const is_break = if (dec.cp == 0xFFFD or dec.cp > 0x10FFFF) true else if (prev_cp) |p| blk: {
469
- if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
470
- break :blk uucode.grapheme.isBreak(p, dec.cp, &break_state);
471
- } else true;
472
-
473
- if (is_break) {
474
- const curr_class = classifyWordClass(dec.cp);
475
- if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
476
- try result.breaks.append(result.allocator, .{
477
- .byte_offset = current_grapheme_byte_offset,
478
- .char_offset = current_grapheme_char_offset,
479
- });
480
- }
481
- have_current_grapheme = true;
482
- current_grapheme_byte_offset = @intCast(i);
483
- current_grapheme_char_offset = char_offset;
484
- current_grapheme_class = curr_class;
485
- }
486
-
487
- if (isUnicodeWrapBreak(dec.cp)) {
488
- try result.breaks.append(result.allocator, .{
489
- .byte_offset = @intCast(i),
490
- .char_offset = char_offset,
491
- });
492
- }
493
- i += dec.len;
494
- if (is_break) {
495
- char_offset += 1;
496
- }
497
- prev_cp = dec.cp;
498
- }
499
- }
500
- }
501
-
502
- pub fn findTabStops(text: []const u8, result: *TabStopResult) !void {
503
- result.reset();
504
- const vector_len = 16;
505
- const Vec = @Vector(vector_len, u8);
506
-
507
- const vTab: Vec = @splat('\t');
508
-
509
- var pos: usize = 0;
510
-
511
- while (pos + vector_len <= text.len) {
512
- const chunk: Vec = text[pos..][0..vector_len].*;
513
- const cmp_tab = chunk == vTab;
514
-
515
- if (@reduce(.Or, cmp_tab)) {
516
- var i: usize = 0;
517
- while (i < vector_len) : (i += 1) {
518
- if (text[pos + i] == '\t') {
519
- try result.positions.append(result.allocator, pos + i);
520
- }
521
- }
522
- }
523
- pos += vector_len;
524
- }
525
-
526
- while (pos < text.len) : (pos += 1) {
527
- if (text[pos] == '\t') {
528
- try result.positions.append(result.allocator, pos);
529
- }
530
- }
531
- }
532
-
533
- pub fn findLineBreaks(text: []const u8, result: *LineBreakResult) !void {
534
- result.reset();
535
- const vector_len = 16; // Use 16-byte vectors (SSE2/NEON compatible)
536
- const Vec = @Vector(vector_len, u8);
537
-
538
- // Prepare vector constants for '\n' and '\r'
539
- const vNL: Vec = @splat('\n');
540
- const vCR: Vec = @splat('\r');
541
-
542
- var pos: usize = 0;
543
- var prev_was_cr = false; // Track if previous chunk ended with \r
544
-
545
- // Process full vector chunks
546
- while (pos + vector_len <= text.len) {
547
- const chunk: Vec = text[pos..][0..vector_len].*;
548
- const cmp_nl = chunk == vNL;
549
- const cmp_cr = chunk == vCR;
550
-
551
- // Check if any newline or CR found
552
- if (@reduce(.Or, cmp_nl) or @reduce(.Or, cmp_cr)) {
553
- // Found a match, process this chunk
554
- var i: usize = 0;
555
- while (i < vector_len) : (i += 1) {
556
- const absolute_index = pos + i;
557
- const b = text[absolute_index];
558
- if (b == '\n') {
559
- // Skip if this is the \n part of a CRLF split across chunks
560
- if (i == 0 and prev_was_cr) {
561
- prev_was_cr = false;
562
- continue;
563
- }
564
- // Check if this is part of CRLF
565
- const kind: LineBreakKind = if (absolute_index > 0 and text[absolute_index - 1] == '\r') .CRLF else .LF;
566
- try result.breaks.append(result.allocator, .{ .pos = absolute_index, .kind = kind });
567
- } else if (b == '\r') {
568
- // Check for CRLF
569
- if (absolute_index + 1 < text.len and text[absolute_index + 1] == '\n') {
570
- try result.breaks.append(result.allocator, .{ .pos = absolute_index + 1, .kind = .CRLF });
571
- i += 1; // Skip the \n in next iteration
572
- } else {
573
- try result.breaks.append(result.allocator, .{ .pos = absolute_index, .kind = .CR });
574
- }
575
- }
576
- }
577
- // Update prev_was_cr for next chunk
578
- prev_was_cr = (text[pos + vector_len - 1] == '\r');
579
- } else {
580
- prev_was_cr = false;
581
- }
582
- pos += vector_len;
583
- }
584
-
585
- // Handle remaining bytes with scalar code
586
- while (pos < text.len) : (pos += 1) {
587
- const b = text[pos];
588
- if (b == '\n') {
589
- // Handle CRLF split at chunk boundary
590
- if (pos > 0 and text[pos - 1] == '\r') {
591
- // Already recorded at pos - 1 or will be skipped
592
- if (prev_was_cr) {
593
- prev_was_cr = false;
594
- continue;
595
- }
596
- }
597
- const kind: LineBreakKind = if (pos > 0 and text[pos - 1] == '\r') .CRLF else .LF;
598
- try result.breaks.append(result.allocator, .{ .pos = pos, .kind = kind });
599
- } else if (b == '\r') {
600
- if (pos + 1 < text.len and text[pos + 1] == '\n') {
601
- try result.breaks.append(result.allocator, .{ .pos = pos + 1, .kind = .CRLF });
602
- pos += 1;
603
- } else {
604
- try result.breaks.append(result.allocator, .{ .pos = pos, .kind = .CR });
605
- }
606
- }
607
- prev_was_cr = false;
608
- }
609
- }
610
-
611
- pub const WrapByWidthResult = struct {
612
- byte_offset: u32,
613
- grapheme_count: u32,
614
- columns_used: u32,
615
- };
616
-
617
- pub const PosByWidthResult = struct {
618
- byte_offset: u32,
619
- grapheme_count: u32,
620
- columns_used: u32,
621
- };
622
-
623
- pub inline fn eastAsianWidth(cp: u21) u32 {
624
- if (cp > 0x10FFFF) return 0;
625
- const eaw = uucode.get(.east_asian_width, cp);
626
- const width = eawToWidth(cp, eaw);
627
- return if (width > 0) @intCast(width) else 0;
628
- }
629
-
630
- /// Calculate width from east asian width property and Unicode properties
631
- /// Returns -1 for control characters (they don't contribute to width)
632
- inline fn eawToWidth(cp: u21, eaw: uucode.types.EastAsianWidth) i16 {
633
- if (cp == 0) return 0;
634
- if (cp < 32 or (cp >= 0x7F and cp < 0xA0)) return -1;
635
-
636
- const gc = uucode.get(.general_category, cp);
637
- switch (gc) {
638
- .mark_nonspacing, .mark_spacing_combining, .mark_enclosing => return 0,
639
- else => {},
640
- }
641
-
642
- if (cp == 0x200B) return 0;
643
- if (cp == 0x200C) return 0;
644
- if (cp == 0x200D) return 0;
645
- if (cp == 0x2060) return 0;
646
- if (cp == 0x034F) return 0;
647
- if (cp == 0xFEFF) return 0;
648
- if (cp >= 0x180B and cp <= 0x180D) return 0;
649
- if (cp >= 0xFE00 and cp <= 0xFE0F) return 0;
650
- if (cp >= 0xE0100 and cp <= 0xE01EF) return 0;
651
-
652
- if (eaw == .fullwidth or eaw == .wide) return 2;
653
-
654
- if (cp >= 0x1F000 and cp <= 0x1F02B) return 2;
655
- if (cp >= 0x1F030 and cp <= 0x1F093) return 2;
656
- if (cp >= 0x1F0A0 and cp <= 0x1F0AE) return 2;
657
- if (cp >= 0x1F0B1 and cp <= 0x1F0BF) return 2;
658
- if (cp >= 0x1F0C1 and cp <= 0x1F0CF) return 2;
659
- if (cp >= 0x1F0D1 and cp <= 0x1F0F5) return 2;
660
-
661
- if (cp == 0x231A or cp == 0x231B) return 2;
662
- if (cp == 0x2329 or cp == 0x232A) return 2;
663
- if (cp >= 0x23E9 and cp <= 0x23EC) return 2;
664
- if (cp == 0x23F0 or cp == 0x23F3) return 2;
665
- if (cp >= 0x25FD and cp <= 0x25FE) return 2;
666
-
667
- if (cp >= 0x2614 and cp <= 0x2615) return 2;
668
- if (cp == 0x2622 or cp == 0x2623) return 2;
669
- if (cp >= 0x2630 and cp <= 0x2637) return 2;
670
- if (cp >= 0x2648 and cp <= 0x2653) return 2;
671
- if (cp == 0x267F or cp == 0x2693 or cp == 0x269B) return 2;
672
- if (cp == 0x26A0 or cp == 0x26A1) return 2;
673
- if (cp >= 0x26AA and cp <= 0x26AB) return 2;
674
- if (cp >= 0x26BD and cp <= 0x26BE) return 2;
675
- if (cp >= 0x26C4 and cp <= 0x26C5) return 2;
676
- if (cp == 0x26CE or cp == 0x26D1 or cp == 0x26D4) return 2;
677
- if (cp == 0x26EA or cp == 0x26F2 or cp == 0x26F3) return 2;
678
- if (cp == 0x26F5 or cp == 0x26FA or cp == 0x26FD) return 2;
679
-
680
- if (cp == 0x203C or cp == 0x2049) return 2;
681
- if (cp == 0x2705 or cp >= 0x270A and cp <= 0x270B) return 2;
682
- if (cp == 0x2728 or cp == 0x274C or cp == 0x274E) return 2;
683
- if (cp >= 0x2753 and cp <= 0x2755) return 2;
684
- if (cp == 0x2757) return 2;
685
- if (cp >= 0x2760 and cp <= 0x2767) return 2;
686
- if (cp >= 0x2795 and cp <= 0x2797) return 2;
687
- if (cp == 0x27B0 or cp == 0x27BF) return 2;
688
- if (cp >= 0x2B1B and cp <= 0x2B1C) return 2;
689
- if (cp >= 0x2B50 and cp <= 0x2B50) return 2;
690
- if (cp >= 0x2B55 and cp <= 0x2B55) return 2;
691
-
692
- if (cp >= 0x1F300 and cp <= 0x1F320) return 2;
693
- if (cp >= 0x1F32D and cp <= 0x1F335) return 2;
694
- if (cp >= 0x1F337 and cp <= 0x1F37C) return 2;
695
- if (cp >= 0x1F37E and cp <= 0x1F393) return 2;
696
- if (cp >= 0x1F3A0 and cp <= 0x1F3CA) return 2;
697
- if (cp >= 0x1F3CF and cp <= 0x1F3D3) return 2;
698
- if (cp >= 0x1F3E0 and cp <= 0x1F3F0) return 2;
699
- if (cp == 0x1F3F4) return 2;
700
- if (cp >= 0x1F3F8 and cp <= 0x1F3FF) return 2;
701
- if (cp >= 0x1F400 and cp <= 0x1F43E) return 2;
702
- if (cp == 0x1F440) return 2;
703
- if (cp >= 0x1F442 and cp <= 0x1F4FC) return 2;
704
- if (cp >= 0x1F4FF and cp <= 0x1F6C5) return 2;
705
- if (cp == 0x1F6CC) return 2;
706
- if (cp >= 0x1F6D0 and cp <= 0x1F6D2) return 2;
707
- if (cp >= 0x1F6D5 and cp <= 0x1F6D7) return 2;
708
- if (cp >= 0x1F6DC and cp <= 0x1F6DF) return 2;
709
- if (cp >= 0x1F6EB and cp <= 0x1F6EC) return 2;
710
- if (cp >= 0x1F6F4 and cp <= 0x1F6FC) return 2;
711
- if (cp >= 0x1F700 and cp <= 0x1F773) return 2;
712
- if (cp >= 0x1F780 and cp <= 0x1F7D8) return 2;
713
- if (cp >= 0x1F7E0 and cp <= 0x1F7EB) return 2;
714
- if (cp >= 0x1F800 and cp <= 0x1F80B) return 2;
715
- if (cp >= 0x1F810 and cp <= 0x1F847) return 2;
716
- if (cp >= 0x1F850 and cp <= 0x1F859) return 2;
717
- if (cp >= 0x1F860 and cp <= 0x1F887) return 2;
718
- if (cp >= 0x1F890 and cp <= 0x1F8AD) return 2;
719
- if (cp >= 0x1F8B0 and cp <= 0x1F8B1) return 2;
720
- if (cp >= 0x1F90C and cp <= 0x1F93A) return 2;
721
- if (cp >= 0x1F93C and cp <= 0x1F945) return 2;
722
- if (cp >= 0x1F947 and cp <= 0x1FA53) return 2;
723
- if (cp >= 0x1FA60 and cp <= 0x1FA6D) return 2;
724
- if (cp >= 0x1FA70 and cp <= 0x1FA74) return 2;
725
- if (cp >= 0x1FA78 and cp <= 0x1FA7C) return 2;
726
- if (cp >= 0x1FA80 and cp <= 0x1FA86) return 2;
727
- if (cp >= 0x1FA90 and cp <= 0x1FAAC) return 2;
728
- if (cp >= 0x1FAB0 and cp <= 0x1FABA) return 2;
729
- if (cp >= 0x1FAC0 and cp <= 0x1FAC5) return 2;
730
- if (cp >= 0x1FAD0 and cp <= 0x1FAD9) return 2;
731
- if (cp >= 0x1FAE0 and cp <= 0x1FAE7) return 2;
732
- if (cp >= 0x1FAF0 and cp <= 0x1FAF8) return 2;
733
-
734
- return 1;
735
- }
736
-
737
- /// Calculate the display width of a byte in columns
738
- /// Used for ASCII-only fast paths
739
- inline fn asciiCharWidth(byte: u8, tab_width: u8) u32 {
740
- if (byte == '\t') {
741
- return tab_width;
742
- } else if (byte >= 32 and byte <= 126) {
743
- return 1;
744
- }
745
- return 0;
746
- }
747
-
748
- /// Calculate the display width of a character (byte or codepoint) in columns
749
- inline fn charWidth(byte: u8, codepoint: u21, tab_width: u8) u32 {
750
- if (byte == '\t') {
751
- return tab_width;
752
- } else if (byte < 0x80 and byte >= 32 and byte <= 126) {
753
- return 1;
754
- } else if (byte >= 0x80) {
755
- const eaw = uucode.get(.east_asian_width, codepoint);
756
- const w = eawToWidth(codepoint, eaw);
757
- return if (w > 0) @intCast(w) else 0;
758
- }
759
- return 0;
760
- }
761
-
762
- /// Check if a codepoint is valid for grapheme break detection
763
- inline fn isValidCodepoint(cp: u21) bool {
764
- return cp != 0xFFFD and cp <= 0x10FFFF;
765
- }
766
-
767
- /// Check if there's a grapheme break between two codepoints
768
- /// - wcwidth mode: use Unicode grapheme clustering for proper rendering,
769
- /// but calculate width using wcwidth (sum of codepoint widths)
770
- /// - no_zwj mode: use grapheme breaks but treat ZWJ as a break (ignore joining)
771
- /// - unicode mode: use standard grapheme cluster segmentation
772
- inline fn isGraphemeBreak(prev_cp: ?u21, curr_cp: u21, break_state: *uucode.grapheme.BreakState, width_method: WidthMethod) bool {
773
- // wcwidth mode uses Unicode grapheme clustering for proper rendering
774
- // (ZWJ sequences, skin tone modifiers stay together), but width is
775
- // calculated using wcwidth semantics (sum of codepoint widths)
776
- if (width_method == .wcwidth) {
777
- if (prev_cp == null) return true;
778
-
779
- if (!isValidCodepoint(curr_cp)) return true;
780
- if (!isValidCodepoint(prev_cp.?)) return true;
781
- return uucode.grapheme.isBreak(prev_cp.?, curr_cp, break_state);
782
- }
783
-
784
- if (!isValidCodepoint(curr_cp)) return true;
785
-
786
- // In no_zwj mode, treat ZWJ (U+200D) as NOT joining characters
787
- // When we see ZWJ after a character, it's part of that character's grapheme
788
- // But when we see a character after ZWJ, it starts a new grapheme
789
- if (width_method == .no_zwj) {
790
- const ZWJ: u21 = 0x200D;
791
- if (prev_cp) |p| {
792
- // If previous was ZWJ, current starts a new grapheme
793
- // Don't call uucode.grapheme.isBreak because it will say no break
794
- if (p == ZWJ) {
795
- // Reset break state since we're forcing a break
796
- break_state.* = .default;
797
- return true;
798
- }
799
- }
800
- // If current is ZWJ, don't break yet - it's part of previous grapheme
801
- // (will have width 0 anyway)
802
- }
803
-
804
- if (prev_cp) |p| {
805
- if (!isValidCodepoint(p)) return true;
806
- return uucode.grapheme.isBreak(p, curr_cp, break_state);
807
- }
808
- return true;
809
- }
810
-
811
- /// State for accumulating grapheme cluster width
812
- const GraphemeWidthState = struct {
813
- width: u32 = 0,
814
- has_width: bool = false,
815
- is_regional_indicator_pair: bool = false,
816
- has_vs16: bool = false,
817
- has_indic_virama: bool = false,
818
- width_method: WidthMethod,
819
-
820
- /// Initialize state with the first codepoint of a grapheme cluster
821
- inline fn init(first_cp: u21, first_width: u32, width_method: WidthMethod) GraphemeWidthState {
822
- return .{
823
- .width = first_width,
824
- .has_width = (first_width > 0),
825
- .is_regional_indicator_pair = (first_cp >= 0x1F1E6 and first_cp <= 0x1F1FF),
826
- .has_vs16 = false,
827
- .has_indic_virama = false,
828
- .width_method = width_method,
829
- };
830
- }
831
-
832
- /// Add a codepoint to the current grapheme cluster
833
- inline fn addCodepoint(self: *GraphemeWidthState, cp: u21, cp_width: u32) void {
834
- // wcwidth mode: sum all codepoint widths (tmux-style)
835
- if (self.width_method == .wcwidth) {
836
- const eaw = uucode.get(.east_asian_width, cp);
837
- const w = eawToWidth(cp, eaw);
838
- if (w > 0) {
839
- self.width += @intCast(w);
840
- self.has_width = true;
841
- }
842
- return;
843
- }
844
-
845
- // unicode and no_zwj modes: use grapheme-aware width
846
- const is_ri = (cp >= 0x1F1E6 and cp <= 0x1F1FF);
847
- const is_vs16 = (cp == 0xFE0F); // Variation Selector-16 (emoji presentation)
848
-
849
- const gc = uucode.get(.general_category, cp);
850
- const is_virama = gc == .mark_nonspacing;
851
-
852
- const is_devanagari_ra = (cp == 0x0930);
853
-
854
- const is_devanagari_base = (cp >= 0x0915 and cp <= 0x0939) or (cp >= 0x0958 and cp <= 0x095F);
855
-
856
- if (is_vs16) {
857
- self.has_vs16 = true;
858
- if (self.has_width and self.width == 1) {
859
- self.width = 2;
860
- }
861
- return;
862
- }
863
-
864
- if (is_virama) {
865
- self.has_indic_virama = true;
866
- return;
867
- }
868
-
869
- if (self.is_regional_indicator_pair and is_ri) {
870
- self.width += cp_width;
871
- self.has_width = true;
872
- } else if (!self.has_width and cp_width > 0) {
873
- self.width = cp_width;
874
- self.has_width = true;
875
- } else if (self.has_width and self.has_indic_virama and is_devanagari_base and cp_width > 0) {
876
- if (!is_devanagari_ra) {
877
- self.width += cp_width;
878
- }
879
- self.has_indic_virama = false;
880
- }
881
- }
882
- };
883
-
884
- const ClusterState = struct {
885
- columns_used: u32,
886
- grapheme_count: u32,
887
- cluster_width: u32,
888
- cluster_start: usize,
889
- prev_cp: ?u21,
890
- break_state: uucode.grapheme.BreakState,
891
- width_state: GraphemeWidthState,
892
- width_method: WidthMethod,
893
- cluster_started: bool,
894
-
895
- fn init(width_method: WidthMethod) ClusterState {
896
- const dummy_width_state = GraphemeWidthState.init(0, 0, width_method);
897
- return .{
898
- .columns_used = 0,
899
- .grapheme_count = 0,
900
- .cluster_width = 0,
901
- .cluster_start = 0,
902
- .prev_cp = null,
903
- .break_state = .default,
904
- .width_state = dummy_width_state,
905
- .width_method = width_method,
906
- .cluster_started = false,
907
- };
908
- }
909
- };
910
-
911
- /// Handle grapheme cluster boundary when wrapping by width (stops BEFORE exceeding limit)
912
- /// Returns true if we should stop (limit exceeded)
913
- inline fn handleClusterForWrap(
914
- state: *ClusterState,
915
- is_break: bool,
916
- new_cluster_start: usize,
917
- max_columns: u32,
918
- ) bool {
919
- if (is_break) {
920
- if (state.prev_cp != null) {
921
- if (state.columns_used + state.cluster_width > max_columns) {
922
- return true; // Signal to stop
923
- }
924
- state.columns_used += state.cluster_width;
925
- state.grapheme_count += 1;
926
- }
927
- state.cluster_width = 0;
928
- state.cluster_start = new_cluster_start;
929
- state.cluster_started = false;
930
- }
931
- return false;
932
- }
933
-
934
- /// Handle grapheme cluster boundary when finding position (snaps to grapheme boundaries)
935
- /// Returns true if we should stop
936
- ///
937
- /// Snapping behavior:
938
- /// - include_start_before=true (for selection end): Include graphemes that START at or before max_columns
939
- /// If max_columns=3 and grapheme occupies columns [2-3], include it (starts at 2 <= 3)
940
- /// This snaps forward to include the whole grapheme even if max_columns points to its middle
941
- /// - include_start_before=false (for selection start): Only include graphemes that END before max_columns
942
- /// If max_columns=3 and grapheme occupies columns [2-3], exclude it (ends at 4 > 3)
943
- /// This snaps backward to exclude wide graphemes that would cross max_columns
944
- inline fn handleClusterForPos(
945
- state: *ClusterState,
946
- is_break: bool,
947
- new_cluster_start: usize,
948
- max_columns: u32,
949
- include_start_before: bool,
950
- ) bool {
951
- if (is_break) {
952
- if (state.prev_cp != null) {
953
- const cluster_start_col = state.columns_used;
954
- const cluster_end_col = state.columns_used + state.cluster_width;
955
-
956
- if (include_start_before) {
957
- if (cluster_start_col >= max_columns) {
958
- return true;
959
- }
960
- state.columns_used = cluster_end_col;
961
- state.grapheme_count += 1;
962
- } else {
963
- if (cluster_end_col > max_columns) {
964
- return true; // Signal to stop (don't include this grapheme)
965
- }
966
- state.columns_used = cluster_end_col;
967
- }
968
- }
969
- state.cluster_width = 0;
970
- state.cluster_start = new_cluster_start;
971
- state.cluster_started = false;
972
- }
973
- return false;
974
- }
975
-
976
- /// Find wrap position by width - proxy function that dispatches based on width_method
977
- pub fn findWrapPosByWidth(
978
- text: []const u8,
979
- max_columns: u32,
980
- tab_width: u8,
981
- isASCIIOnly: bool,
982
- width_method: WidthMethod,
983
- ) WrapByWidthResult {
984
- switch (width_method) {
985
- .unicode, .no_zwj => return findWrapPosByWidthUnicode(text, max_columns, tab_width, isASCIIOnly, width_method),
986
- .wcwidth => return findWrapPosByWidthWCWidth(text, max_columns, tab_width, isASCIIOnly),
987
- }
988
- }
989
-
990
- /// Find wrap position by width using Unicode grapheme cluster segmentation
991
- fn findWrapPosByWidthUnicode(
992
- text: []const u8,
993
- max_columns: u32,
994
- tab_width: u8,
995
- isASCIIOnly: bool,
996
- width_method: WidthMethod,
997
- ) WrapByWidthResult {
998
- if (text.len == 0 or max_columns == 0) {
999
- return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
1000
- }
1001
-
1002
- // ASCII-only fast path
1003
- if (isASCIIOnly) {
1004
- if (max_columns >= text.len) {
1005
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
1006
- } else {
1007
- return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
1008
- }
1009
- }
1010
-
1011
- const vector_len = 16;
1012
- var pos: usize = 0;
1013
- var state = ClusterState.init(width_method);
1014
-
1015
- while (pos + vector_len <= text.len) {
1016
- const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
1017
- const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
1018
- const is_non_ascii = chunk >= ascii_threshold;
1019
-
1020
- if (!@reduce(.Or, is_non_ascii)) {
1021
- // All ASCII
1022
- var i: usize = 0;
1023
- while (i < vector_len) : (i += 1) {
1024
- const b = text[pos + i];
1025
- const curr_cp: u21 = b;
1026
- const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1027
-
1028
- if (handleClusterForWrap(&state, is_break, pos + i, max_columns)) {
1029
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1030
- }
1031
-
1032
- const cp_width = asciiCharWidth(b, tab_width);
1033
- if (!state.cluster_started) {
1034
- state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1035
- state.cluster_width = cp_width;
1036
- state.cluster_started = true;
1037
- } else {
1038
- state.width_state.addCodepoint(curr_cp, cp_width);
1039
- state.cluster_width = state.width_state.width;
1040
- }
1041
- state.prev_cp = curr_cp;
1042
- }
1043
- pos += vector_len;
1044
- continue;
1045
- }
1046
-
1047
- // Mixed ASCII/non-ASCII - process rest of chunk
1048
- var i: usize = 0;
1049
- while (i < vector_len and pos + i < text.len) {
1050
- const b0 = text[pos + i];
1051
- const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos + i).cp;
1052
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos + i).len;
1053
-
1054
- if (pos + i + cp_len > text.len) break;
1055
-
1056
- const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1057
-
1058
- if (handleClusterForWrap(&state, is_break, pos + i, max_columns)) {
1059
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1060
- }
1061
-
1062
- const cp_width = charWidth(b0, curr_cp, tab_width);
1063
- if (!state.cluster_started) {
1064
- state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1065
- state.cluster_width = cp_width;
1066
- state.cluster_started = true;
1067
- } else {
1068
- state.width_state.addCodepoint(curr_cp, cp_width);
1069
- state.cluster_width = state.width_state.width;
1070
- }
1071
- state.prev_cp = curr_cp;
1072
- i += cp_len;
1073
- }
1074
- pos += i; // Advance by how much we actually processed
1075
- }
1076
-
1077
- // Tail
1078
- while (pos < text.len) {
1079
- const b0 = text[pos];
1080
- const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos).cp;
1081
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1082
-
1083
- const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1084
-
1085
- if (handleClusterForWrap(&state, is_break, pos, max_columns)) {
1086
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1087
- }
1088
-
1089
- const cp_width = charWidth(b0, curr_cp, tab_width);
1090
- if (!state.cluster_started) {
1091
- state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1092
- state.cluster_width = cp_width;
1093
- state.cluster_started = true;
1094
- } else {
1095
- state.width_state.addCodepoint(curr_cp, cp_width);
1096
- state.cluster_width = state.width_state.width;
1097
- }
1098
- state.prev_cp = curr_cp;
1099
- pos += cp_len;
1100
- }
1101
-
1102
- // Final cluster
1103
- if (state.prev_cp != null and state.cluster_width > 0) {
1104
- if (state.columns_used + state.cluster_width > max_columns) {
1105
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1106
- }
1107
- state.columns_used += state.cluster_width;
1108
- state.grapheme_count += 1;
1109
- }
1110
-
1111
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1112
- }
1113
-
1114
- /// Find wrap position by width using wcwidth-style codepoint-by-codepoint processing
1115
- fn findWrapPosByWidthWCWidth(
1116
- text: []const u8,
1117
- max_columns: u32,
1118
- tab_width: u8,
1119
- isASCIIOnly: bool,
1120
- ) WrapByWidthResult {
1121
- if (text.len == 0 or max_columns == 0) {
1122
- return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
1123
- }
1124
-
1125
- // ASCII-only fast path
1126
- if (isASCIIOnly) {
1127
- if (max_columns >= text.len) {
1128
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
1129
- } else {
1130
- return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
1131
- }
1132
- }
1133
-
1134
- // Unicode path - process each codepoint independently
1135
- var pos: usize = 0;
1136
- var columns_used: u32 = 0;
1137
- var codepoint_count: u32 = 0;
1138
-
1139
- while (pos < text.len) {
1140
- const b0 = text[pos];
1141
- const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1142
- const dec = decodeUtf8Unchecked(text, pos);
1143
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1144
- break :blk dec.cp;
1145
- };
1146
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1147
-
1148
- if (pos + cp_len > text.len) break;
1149
-
1150
- const cp_width = charWidth(b0, curr_cp, tab_width);
1151
-
1152
- // In wcwidth mode, stop if we've already used max_columns
1153
- // (don't continue adding zero-width chars after reaching limit)
1154
- if (columns_used >= max_columns) {
1155
- return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
1156
- }
1157
-
1158
- // Stop if adding this codepoint would exceed max_columns
1159
- if (columns_used + cp_width > max_columns) {
1160
- return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
1161
- }
1162
-
1163
- columns_used += cp_width;
1164
- codepoint_count += 1;
1165
- pos += cp_len;
1166
- }
1167
-
1168
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = codepoint_count, .columns_used = columns_used };
1169
- }
1170
-
1171
- /// Find position by column width - proxy function that dispatches based on width_method
1172
- /// - If include_start_before: include graphemes that START before max_columns (snap forward for selection end)
1173
- /// This ensures that if max_columns points to the middle of a width=2 grapheme, we include the whole grapheme
1174
- /// - If !include_start_before: exclude graphemes that START at or after max_columns (snap backward for selection start)
1175
- /// This ensures that if max_columns points to the middle of a width=2 grapheme, we snap back to exclude it
1176
- pub fn findPosByWidth(
1177
- text: []const u8,
1178
- max_columns: u32,
1179
- tab_width: u8,
1180
- isASCIIOnly: bool,
1181
- include_start_before: bool,
1182
- width_method: WidthMethod,
1183
- ) PosByWidthResult {
1184
- switch (width_method) {
1185
- .unicode, .no_zwj => return findPosByWidthUnicode(text, max_columns, tab_width, isASCIIOnly, include_start_before, width_method),
1186
- .wcwidth => return findPosByWidthWCWidth(text, max_columns, tab_width, isASCIIOnly, include_start_before),
1187
- }
1188
- }
1189
-
1190
- /// Find position by column width using Unicode grapheme cluster segmentation
1191
- fn findPosByWidthUnicode(
1192
- text: []const u8,
1193
- max_columns: u32,
1194
- tab_width: u8,
1195
- isASCIIOnly: bool,
1196
- include_start_before: bool,
1197
- width_method: WidthMethod,
1198
- ) PosByWidthResult {
1199
- if (text.len == 0 or max_columns == 0) {
1200
- return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
1201
- }
1202
-
1203
- // ASCII-only fast path
1204
- if (isASCIIOnly) {
1205
- if (max_columns >= text.len) {
1206
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
1207
- } else {
1208
- return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
1209
- }
1210
- }
1211
-
1212
- const vector_len = 16;
1213
- var pos: usize = 0;
1214
- var state = ClusterState.init(width_method);
1215
-
1216
- while (pos + vector_len <= text.len) {
1217
- const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
1218
- const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
1219
- const is_non_ascii = chunk >= ascii_threshold;
1220
-
1221
- if (!@reduce(.Or, is_non_ascii)) {
1222
- // All ASCII
1223
- var i: usize = 0;
1224
- while (i < vector_len) : (i += 1) {
1225
- const b = text[pos + i];
1226
- const curr_cp: u21 = b;
1227
- const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1228
-
1229
- if (handleClusterForPos(&state, is_break, pos + i, max_columns, include_start_before)) {
1230
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1231
- }
1232
-
1233
- const cp_width = asciiCharWidth(b, tab_width);
1234
- if (!state.cluster_started) {
1235
- state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1236
- state.cluster_width = cp_width;
1237
- state.cluster_started = true;
1238
- } else {
1239
- state.width_state.addCodepoint(curr_cp, cp_width);
1240
- state.cluster_width = state.width_state.width;
1241
- }
1242
- state.prev_cp = curr_cp;
1243
- }
1244
- pos += vector_len;
1245
- continue;
1246
- }
1247
-
1248
- // Mixed ASCII/non-ASCII - process rest of chunk
1249
- var i: usize = 0;
1250
- while (i < vector_len and pos + i < text.len) {
1251
- const b0 = text[pos + i];
1252
- const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos + i).cp;
1253
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos + i).len;
1254
-
1255
- if (pos + i + cp_len > text.len) break;
1256
-
1257
- const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1258
-
1259
- if (handleClusterForPos(&state, is_break, pos + i, max_columns, include_start_before)) {
1260
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1261
- }
1262
-
1263
- const cp_width = charWidth(b0, curr_cp, tab_width);
1264
- if (!state.cluster_started) {
1265
- state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1266
- state.cluster_width = cp_width;
1267
- state.cluster_started = true;
1268
- } else {
1269
- state.width_state.addCodepoint(curr_cp, cp_width);
1270
- state.cluster_width = state.width_state.width;
1271
- }
1272
- state.prev_cp = curr_cp;
1273
- i += cp_len;
1274
- }
1275
- pos += i; // Advance by how much we actually processed
1276
- }
1277
-
1278
- // Tail
1279
- while (pos < text.len) {
1280
- const b0 = text[pos];
1281
- const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos).cp;
1282
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1283
-
1284
- const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1285
-
1286
- if (handleClusterForPos(&state, is_break, pos, max_columns, include_start_before)) {
1287
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1288
- }
1289
-
1290
- const cp_width = charWidth(b0, curr_cp, tab_width);
1291
- if (!state.cluster_started) {
1292
- state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1293
- state.cluster_width = cp_width;
1294
- state.cluster_started = true;
1295
- } else {
1296
- state.width_state.addCodepoint(curr_cp, cp_width);
1297
- state.cluster_width = state.width_state.width;
1298
- }
1299
- state.prev_cp = curr_cp;
1300
- pos += cp_len;
1301
- }
1302
-
1303
- // Final cluster
1304
- if (state.prev_cp != null and state.cluster_width > 0) {
1305
- if (state.columns_used >= max_columns) {
1306
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1307
- }
1308
- state.columns_used += state.cluster_width;
1309
- if (include_start_before) {
1310
- state.grapheme_count += 1;
1311
- }
1312
- }
1313
-
1314
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1315
- }
1316
-
1317
- /// Find position by column width using wcwidth-style codepoint-by-codepoint processing
1318
- fn findPosByWidthWCWidth(
1319
- text: []const u8,
1320
- max_columns: u32,
1321
- tab_width: u8,
1322
- isASCIIOnly: bool,
1323
- include_start_before: bool,
1324
- ) PosByWidthResult {
1325
- if (text.len == 0 or max_columns == 0) {
1326
- return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
1327
- }
1328
-
1329
- // ASCII-only fast path
1330
- if (isASCIIOnly) {
1331
- if (max_columns >= text.len) {
1332
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
1333
- } else {
1334
- return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
1335
- }
1336
- }
1337
-
1338
- // Unicode path - process each codepoint independently
1339
- var pos: usize = 0;
1340
- var columns_used: u32 = 0;
1341
- var codepoint_count: u32 = 0;
1342
-
1343
- while (pos < text.len) {
1344
- const b0 = text[pos];
1345
- const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1346
- const dec = decodeUtf8Unchecked(text, pos);
1347
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1348
- break :blk dec.cp;
1349
- };
1350
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1351
-
1352
- if (pos + cp_len > text.len) break;
1353
-
1354
- const cp_width = charWidth(b0, curr_cp, tab_width);
1355
- const cp_start_col = columns_used;
1356
- const cp_end_col = columns_used + cp_width;
1357
-
1358
- // Apply boundary behavior
1359
- if (include_start_before) {
1360
- // Selection end: include codepoints that START before max_columns
1361
- if (cp_start_col >= max_columns) {
1362
- return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
1363
- }
1364
- } else {
1365
- // Selection start: only include codepoints that END before or at max_columns
1366
- // So exclude (stop) if end > max_columns
1367
- if (cp_end_col > max_columns) {
1368
- return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
1369
- }
1370
- }
1371
-
1372
- columns_used = cp_end_col;
1373
- codepoint_count += 1;
1374
- pos += cp_len;
1375
- }
1376
-
1377
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = codepoint_count, .columns_used = columns_used };
1378
- }
1379
-
1380
- /// Get width at byte offset - proxy function that dispatches based on width_method
1381
- pub fn getWidthAt(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) u32 {
1382
- switch (width_method) {
1383
- .unicode, .no_zwj => return getWidthAtUnicode(text, byte_offset, tab_width, width_method),
1384
- .wcwidth => return getWidthAtWCWidth(text, byte_offset, tab_width),
1385
- }
1386
- }
1387
-
1388
- /// Get width at byte offset using Unicode grapheme cluster segmentation
1389
- fn getWidthAtUnicode(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) u32 {
1390
- if (byte_offset >= text.len) return 0;
1391
-
1392
- const b0 = text[byte_offset];
1393
-
1394
- const first_cp: u21 = if (b0 < 0x80) b0 else blk: {
1395
- const dec = decodeUtf8Unchecked(text, byte_offset);
1396
- if (byte_offset + dec.len > text.len) return 1;
1397
- break :blk dec.cp;
1398
- };
1399
-
1400
- const first_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, byte_offset).len;
1401
-
1402
- var break_state: uucode.grapheme.BreakState = .default;
1403
- var prev_cp: ?u21 = first_cp;
1404
- const first_width = charWidth(b0, first_cp, tab_width);
1405
- var state = GraphemeWidthState.init(first_cp, first_width, width_method);
1406
-
1407
- var pos = byte_offset + first_len;
1408
-
1409
- while (pos < text.len) {
1410
- const b = text[pos];
1411
- const curr_cp: u21 = if (b < 0x80) b else decodeUtf8Unchecked(text, pos).cp;
1412
- const cp_len: usize = if (b < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1413
-
1414
- if (pos + cp_len > text.len) break;
1415
-
1416
- const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1417
- if (is_break) break;
1418
-
1419
- const cp_width = charWidth(b, curr_cp, tab_width);
1420
- state.addCodepoint(curr_cp, cp_width);
1421
-
1422
- prev_cp = curr_cp;
1423
- pos += cp_len;
1424
- }
1425
-
1426
- return state.width;
1427
- }
1428
-
1429
- /// Get width at byte offset using wcwidth-style codepoint-by-codepoint processing
1430
- /// In wcwidth mode, each codepoint is treated independently - return its width directly
1431
- fn getWidthAtWCWidth(text: []const u8, byte_offset: usize, tab_width: u8) u32 {
1432
- if (byte_offset >= text.len) return 0;
1433
-
1434
- const b0 = text[byte_offset];
1435
-
1436
- const first_cp: u21 = if (b0 < 0x80) b0 else blk: {
1437
- const dec = decodeUtf8Unchecked(text, byte_offset);
1438
- if (byte_offset + dec.len > text.len) return 1;
1439
- break :blk dec.cp;
1440
- };
1441
-
1442
- const first_width = charWidth(b0, first_cp, tab_width);
1443
- return first_width;
1444
- }
1445
-
1446
- pub const PrevGraphemeResult = struct {
1447
- start_offset: usize,
1448
- width: u32,
1449
- };
1450
-
1451
- /// Get previous grapheme start - proxy function that dispatches based on width_method
1452
- pub fn getPrevGraphemeStart(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) ?PrevGraphemeResult {
1453
- switch (width_method) {
1454
- .unicode, .no_zwj => return getPrevGraphemeStartUnicode(text, byte_offset, tab_width, width_method),
1455
- .wcwidth => return getPrevGraphemeStartWCWidth(text, byte_offset, tab_width),
1456
- }
1457
- }
1458
-
1459
- /// Get previous grapheme start using wcwidth-style codepoint-by-codepoint processing
1460
- fn getPrevGraphemeStartWCWidth(text: []const u8, byte_offset: usize, tab_width: u8) ?PrevGraphemeResult {
1461
- if (byte_offset == 0 or text.len == 0) return null;
1462
- if (byte_offset > text.len) return null;
1463
-
1464
- var pos: usize = 0;
1465
- var last_result: ?PrevGraphemeResult = null;
1466
-
1467
- while (pos < byte_offset) {
1468
- const b = text[pos];
1469
- const curr_cp: u21 = if (b < 0x80) b else blk: {
1470
- const dec = decodeUtf8Unchecked(text, pos);
1471
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1472
- break :blk dec.cp;
1473
- };
1474
- const cp_len: usize = if (b < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1475
- const cp_width = charWidth(b, curr_cp, tab_width);
1476
-
1477
- if (cp_width > 0) {
1478
- last_result = .{
1479
- .start_offset = pos,
1480
- .width = cp_width,
1481
- };
1482
- }
1483
- pos += cp_len;
1484
- }
1485
-
1486
- return last_result;
1487
- }
1488
-
1489
- /// Get previous grapheme start using Unicode grapheme cluster segmentation
1490
- fn getPrevGraphemeStartUnicode(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) ?PrevGraphemeResult {
1491
- if (byte_offset == 0 or text.len == 0) return null;
1492
- if (byte_offset > text.len) return null;
1493
-
1494
- // For unicode/no_zwj modes, use grapheme cluster detection
1495
- var break_state: uucode.grapheme.BreakState = .default;
1496
- var pos: usize = 0;
1497
- var prev_cp: ?u21 = null;
1498
- var prev_grapheme_start: usize = 0;
1499
- var second_to_last_grapheme_start: usize = 0;
1500
-
1501
- while (pos < byte_offset) {
1502
- const b = text[pos];
1503
- const curr_cp: u21 = if (b < 0x80) b else blk: {
1504
- const dec = decodeUtf8Unchecked(text, pos);
1505
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1506
- break :blk dec.cp;
1507
- };
1508
-
1509
- const cp_len: usize = if (b < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1510
-
1511
- if (isValidCodepoint(curr_cp)) {
1512
- const is_break = if (prev_cp) |p| blk: {
1513
- if (!isValidCodepoint(p)) break :blk true;
1514
- break :blk uucode.grapheme.isBreak(p, curr_cp, &break_state);
1515
- } else true;
1516
-
1517
- if (is_break) {
1518
- second_to_last_grapheme_start = prev_grapheme_start;
1519
- prev_grapheme_start = pos;
1520
- }
1521
-
1522
- prev_cp = curr_cp;
1523
- }
1524
-
1525
- pos += cp_len;
1526
- }
1527
-
1528
- if (prev_grapheme_start == 0 and byte_offset == 0) {
1529
- return null;
1530
- }
1531
-
1532
- const start_offset = if (prev_grapheme_start < byte_offset) prev_grapheme_start else second_to_last_grapheme_start;
1533
- const width = getWidthAt(text, start_offset, tab_width, width_method);
1534
-
1535
- return .{
1536
- .start_offset = start_offset,
1537
- .width = width,
1538
- };
1539
- }
1540
-
1541
- /// Calculate the display width of text - proxy function that dispatches based on width_method
1542
- pub fn calculateTextWidth(text: []const u8, tab_width: u8, isASCIIOnly: bool, width_method: WidthMethod) u32 {
1543
- switch (width_method) {
1544
- .unicode, .no_zwj => return calculateTextWidthUnicode(text, tab_width, isASCIIOnly, width_method),
1545
- .wcwidth => return calculateTextWidthWCWidth(text, tab_width, isASCIIOnly),
1546
- }
1547
- }
1548
-
1549
- /// Calculate text width using Unicode grapheme cluster segmentation
1550
- fn calculateTextWidthUnicode(text: []const u8, tab_width: u8, isASCIIOnly: bool, width_method: WidthMethod) u32 {
1551
- if (text.len == 0) return 0;
1552
-
1553
- // ASCII-only fast path
1554
- if (isASCIIOnly) {
1555
- return @intCast(text.len);
1556
- }
1557
-
1558
- // General case with Unicode support and grapheme cluster handling
1559
- var total_width: u32 = 0;
1560
- var pos: usize = 0;
1561
- var prev_cp: ?u21 = null;
1562
- var break_state: uucode.grapheme.BreakState = .default;
1563
- var state: GraphemeWidthState = undefined;
1564
-
1565
- while (pos < text.len) {
1566
- const b0 = text[pos];
1567
- const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1568
- const dec = decodeUtf8Unchecked(text, pos);
1569
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1570
- break :blk dec.cp;
1571
- };
1572
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1573
- const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1574
-
1575
- if (is_break) {
1576
- if (prev_cp != null) {
1577
- total_width += state.width;
1578
- }
1579
-
1580
- const cp_width = charWidth(b0, curr_cp, tab_width);
1581
- state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1582
- } else {
1583
- const cp_width = charWidth(b0, curr_cp, tab_width);
1584
- state.addCodepoint(curr_cp, cp_width);
1585
- }
1586
-
1587
- prev_cp = curr_cp;
1588
- pos += cp_len;
1589
- }
1590
-
1591
- if (prev_cp != null) {
1592
- total_width += state.width;
1593
- }
1594
-
1595
- return total_width;
1596
- }
1597
-
1598
- /// Calculate text width using wcwidth-style codepoint-by-codepoint processing
1599
- fn calculateTextWidthWCWidth(text: []const u8, tab_width: u8, isASCIIOnly: bool) u32 {
1600
- if (text.len == 0) return 0;
1601
-
1602
- // ASCII-only fast path
1603
- if (isASCIIOnly) {
1604
- return @intCast(text.len);
1605
- }
1606
-
1607
- // Unicode path - sum width of all codepoints
1608
- var total_width: u32 = 0;
1609
- var pos: usize = 0;
1610
-
1611
- while (pos < text.len) {
1612
- const b0 = text[pos];
1613
- const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1614
- const dec = decodeUtf8Unchecked(text, pos);
1615
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1616
- break :blk dec.cp;
1617
- };
1618
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1619
-
1620
- const cp_width = charWidth(b0, curr_cp, tab_width);
1621
- total_width += cp_width;
1622
-
1623
- pos += cp_len;
1624
- }
1625
-
1626
- return total_width;
1627
- }
1628
-
1629
- /// Grapheme cluster information for caching
1630
- pub const GraphemeInfo = struct {
1631
- byte_offset: u32,
1632
- byte_len: u8,
1633
- width: u8,
1634
- col_offset: u32,
1635
- };
1636
-
1637
- pub const GraphemeInfoResult = struct {
1638
- graphemes: std.ArrayList(GraphemeInfo),
1639
-
1640
- pub fn init(allocator: std.mem.Allocator) GraphemeInfoResult {
1641
- return .{
1642
- .graphemes = std.ArrayList(GraphemeInfo).init(allocator),
1643
- };
1644
- }
1645
-
1646
- pub fn deinit(self: *GraphemeInfoResult) void {
1647
- self.graphemes.deinit();
1648
- }
1649
-
1650
- pub fn reset(self: *GraphemeInfoResult) void {
1651
- self.graphemes.clearRetainingCapacity();
1652
- }
1653
- };
1654
-
1655
- /// Find all grapheme clusters in text and return info for multi-byte graphemes and tabs
1656
- /// This is a proxy function that dispatches to the appropriate implementation based on width_method
1657
- pub fn findGraphemeInfo(
1658
- text: []const u8,
1659
- tab_width: u8,
1660
- isASCIIOnly: bool,
1661
- width_method: WidthMethod,
1662
- allocator: std.mem.Allocator,
1663
- result: *std.ArrayListUnmanaged(GraphemeInfo),
1664
- ) !void {
1665
- switch (width_method) {
1666
- .unicode, .no_zwj => try findGraphemeInfoUnicode(text, tab_width, isASCIIOnly, width_method, allocator, result),
1667
- .wcwidth => try findGraphemeInfoWCWidth(text, tab_width, isASCIIOnly, allocator, result),
1668
- }
1669
- }
1670
-
1671
- /// Find all grapheme clusters using Unicode grapheme cluster segmentation
1672
- /// This version treats grapheme clusters as single units for width calculation
1673
- fn findGraphemeInfoUnicode(
1674
- text: []const u8,
1675
- tab_width: u8,
1676
- isASCIIOnly: bool,
1677
- width_method: WidthMethod,
1678
- allocator: std.mem.Allocator,
1679
- result: *std.ArrayListUnmanaged(GraphemeInfo),
1680
- ) !void {
1681
- // In wcwidth mode, always process to capture combining marks on ASCII
1682
- if (isASCIIOnly and width_method != .wcwidth) {
1683
- return;
1684
- }
1685
-
1686
- if (text.len == 0) {
1687
- return;
1688
- }
1689
-
1690
- const vector_len = 16;
1691
- var pos: usize = 0;
1692
- var col: u32 = 0;
1693
- var prev_cp: ?u21 = null;
1694
- var break_state: uucode.grapheme.BreakState = .default;
1695
-
1696
- // Track current grapheme cluster
1697
- var cluster_start: usize = 0;
1698
- var cluster_start_col: u32 = 0;
1699
- var cluster_width_state: GraphemeWidthState = undefined;
1700
- var cluster_is_multibyte: bool = false;
1701
- var cluster_is_tab: bool = false;
1702
-
1703
- while (pos + vector_len <= text.len) {
1704
- const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
1705
- const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
1706
- const is_non_ascii = chunk >= ascii_threshold;
1707
-
1708
- // Fast path: all ASCII
1709
- if (!@reduce(.Or, is_non_ascii)) {
1710
- var i: usize = 0;
1711
- while (i < vector_len) : (i += 1) {
1712
- const b = text[pos + i];
1713
- const curr_cp: u21 = b;
1714
- const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1715
-
1716
- if (is_break) {
1717
- if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
1718
- if (cluster_width_state.width > 0 or width_method == .wcwidth) {
1719
- const cluster_byte_len = (pos + i) - cluster_start;
1720
- try result.append(allocator, GraphemeInfo{
1721
- .byte_offset = @intCast(cluster_start),
1722
- .byte_len = @intCast(cluster_byte_len),
1723
- .width = @intCast(cluster_width_state.width),
1724
- .col_offset = cluster_start_col,
1725
- });
1726
- }
1727
- col += cluster_width_state.width;
1728
- } else if (prev_cp != null) {
1729
- col += cluster_width_state.width;
1730
- }
1731
-
1732
- cluster_start = pos + i;
1733
- cluster_start_col = col;
1734
- cluster_is_tab = (b == '\t');
1735
- cluster_is_multibyte = false;
1736
-
1737
- const cp_width = asciiCharWidth(b, tab_width);
1738
- cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1739
- } else {
1740
- // Continuing cluster (shouldn't happen for ASCII, but handle it)
1741
- const cp_width = asciiCharWidth(b, tab_width);
1742
- cluster_width_state.addCodepoint(curr_cp, cp_width);
1743
- }
1744
-
1745
- prev_cp = curr_cp;
1746
- }
1747
- pos += vector_len;
1748
- continue;
1749
- }
1750
-
1751
- // Slow path: mixed ASCII/non-ASCII
1752
- var i: usize = 0;
1753
- while (i < vector_len and pos + i < text.len) {
1754
- const b0 = text[pos + i];
1755
- const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos + i).cp;
1756
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos + i).len;
1757
-
1758
- if (pos + i + cp_len > text.len) break;
1759
-
1760
- const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1761
-
1762
- if (is_break) {
1763
- if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
1764
- if (cluster_width_state.width > 0 or width_method == .wcwidth) {
1765
- const cluster_byte_len = (pos + i) - cluster_start;
1766
- try result.append(allocator, GraphemeInfo{
1767
- .byte_offset = @intCast(cluster_start),
1768
- .byte_len = @intCast(cluster_byte_len),
1769
- .width = @intCast(cluster_width_state.width),
1770
- .col_offset = cluster_start_col,
1771
- });
1772
- }
1773
- col += cluster_width_state.width;
1774
- } else if (prev_cp != null) {
1775
- col += cluster_width_state.width;
1776
- }
1777
-
1778
- cluster_start = pos + i;
1779
- cluster_start_col = col;
1780
- cluster_is_tab = (b0 == '\t');
1781
- cluster_is_multibyte = (cp_len != 1);
1782
-
1783
- const cp_width = charWidth(b0, curr_cp, tab_width);
1784
- cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1785
- } else {
1786
- cluster_is_multibyte = cluster_is_multibyte or (cp_len != 1);
1787
- const cp_width = charWidth(b0, curr_cp, tab_width);
1788
- cluster_width_state.addCodepoint(curr_cp, cp_width);
1789
- }
1790
-
1791
- prev_cp = curr_cp;
1792
- i += cp_len;
1793
- }
1794
- pos += i;
1795
- }
1796
-
1797
- // Tail processing
1798
- while (pos < text.len) {
1799
- const b0 = text[pos];
1800
- const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos).cp;
1801
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1802
-
1803
- if (pos + cp_len > text.len) break;
1804
-
1805
- const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1806
-
1807
- if (is_break) {
1808
- if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
1809
- if (cluster_width_state.width > 0 or width_method == .wcwidth) {
1810
- const cluster_byte_len = pos - cluster_start;
1811
- try result.append(allocator, GraphemeInfo{
1812
- .byte_offset = @intCast(cluster_start),
1813
- .byte_len = @intCast(cluster_byte_len),
1814
- .width = @intCast(cluster_width_state.width),
1815
- .col_offset = cluster_start_col,
1816
- });
1817
- }
1818
- col += cluster_width_state.width;
1819
- } else if (prev_cp != null) {
1820
- col += cluster_width_state.width;
1821
- }
1822
-
1823
- cluster_start = pos;
1824
- cluster_start_col = col;
1825
- cluster_is_tab = (b0 == '\t');
1826
- cluster_is_multibyte = (cp_len != 1);
1827
-
1828
- const cp_width = charWidth(b0, curr_cp, tab_width);
1829
- cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1830
- } else {
1831
- cluster_is_multibyte = cluster_is_multibyte or (cp_len != 1);
1832
- const cp_width = charWidth(b0, curr_cp, tab_width);
1833
- cluster_width_state.addCodepoint(curr_cp, cp_width);
1834
- }
1835
-
1836
- prev_cp = curr_cp;
1837
- pos += cp_len;
1838
- }
1839
-
1840
- if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
1841
- if (cluster_width_state.width > 0 or width_method == .wcwidth) {
1842
- const cluster_byte_len = text.len - cluster_start;
1843
- try result.append(allocator, GraphemeInfo{
1844
- .byte_offset = @intCast(cluster_start),
1845
- .byte_len = @intCast(cluster_byte_len),
1846
- .width = @intCast(cluster_width_state.width),
1847
- .col_offset = cluster_start_col,
1848
- });
1849
- }
1850
- }
1851
- }
1852
-
1853
- /// Find all grapheme clusters using wcwidth-style codepoint-by-codepoint processing
1854
- /// This version treats each codepoint as a separate character (tmux/wcwidth behavior)
1855
- fn findGraphemeInfoWCWidth(
1856
- text: []const u8,
1857
- tab_width: u8,
1858
- isASCIIOnly: bool,
1859
- allocator: std.mem.Allocator,
1860
- result: *std.ArrayListUnmanaged(GraphemeInfo),
1861
- ) !void {
1862
- // wcwidth mode should still produce the same grapheme cluster boundaries as Unicode
1863
- // (so ZWJ sequences and combining marks stay together), but the width of each cluster
1864
- // is calculated using wcwidth (sum of codepoint widths). This keeps rendering coherent
1865
- // while preserving tmux-style widths.
1866
- if (isASCIIOnly) {
1867
- return;
1868
- }
1869
-
1870
- if (text.len == 0) {
1871
- return;
1872
- }
1873
-
1874
- var pos: usize = 0;
1875
- var col: u32 = 0;
1876
- var prev_cp: ?u21 = null;
1877
- var break_state: uucode.grapheme.BreakState = .default;
1878
-
1879
- // Track current cluster
1880
- var cluster_start: usize = 0;
1881
- var cluster_start_col: u32 = 0;
1882
- var cluster_width_state: GraphemeWidthState = undefined;
1883
- var cluster_is_multibyte: bool = false;
1884
- var cluster_is_tab: bool = false;
1885
- var cluster_started = false;
1886
-
1887
- while (pos < text.len) {
1888
- const b0 = text[pos];
1889
- const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1890
- const dec = decodeUtf8Unchecked(text, pos);
1891
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1892
- break :blk dec.cp;
1893
- };
1894
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1895
-
1896
- if (pos + cp_len > text.len) break;
1897
-
1898
- // Use wcwidth break detection (each codepoint is separate, tmux-style)
1899
- const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, .wcwidth);
1900
-
1901
- if (is_break) {
1902
- if (cluster_started and (cluster_is_multibyte or cluster_is_tab)) {
1903
- try result.append(allocator, GraphemeInfo{
1904
- .byte_offset = @intCast(cluster_start),
1905
- .byte_len = @intCast(pos - cluster_start),
1906
- .width = @intCast(cluster_width_state.width),
1907
- .col_offset = cluster_start_col,
1908
- });
1909
- col += cluster_width_state.width;
1910
- } else if (cluster_started) {
1911
- // Still need to advance col by cluster width even if not emitted
1912
- col += cluster_width_state.width;
1913
- }
1914
-
1915
- // Start a new cluster
1916
- cluster_start = pos;
1917
- cluster_start_col = col;
1918
- cluster_is_tab = (b0 == '\t');
1919
- cluster_is_multibyte = (cp_len != 1);
1920
- const cp_width = charWidth(b0, curr_cp, tab_width);
1921
- cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, .wcwidth);
1922
- cluster_started = true;
1923
- } else {
1924
- // Continuing cluster
1925
- cluster_is_multibyte = cluster_is_multibyte or (cp_len != 1);
1926
- const cp_width = charWidth(b0, curr_cp, tab_width);
1927
- cluster_width_state.addCodepoint(curr_cp, cp_width);
1928
- }
1929
-
1930
- prev_cp = curr_cp;
1931
- pos += cp_len;
1932
- }
1933
-
1934
- // Commit final cluster
1935
- if (cluster_started) {
1936
- if (cluster_is_multibyte or cluster_is_tab) {
1937
- try result.append(allocator, GraphemeInfo{
1938
- .byte_offset = @intCast(cluster_start),
1939
- .byte_len = @intCast(text.len - cluster_start),
1940
- .width = @intCast(cluster_width_state.width),
1941
- .col_offset = cluster_start_col,
1942
- });
1943
- col += cluster_width_state.width;
1944
- } else {
1945
- col += cluster_width_state.width;
1946
- }
1947
- }
1948
- }