@fairyhunter13/opentui-core 0.1.112 → 0.1.114

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (591) hide show
  1. package/dev/keypress-debug-renderer.ts +148 -0
  2. package/dev/keypress-debug.ts +43 -0
  3. package/dev/print-env-vars.ts +32 -0
  4. package/dev/test-tmux-graphics-334.sh +68 -0
  5. package/dev/thai-debug-test.ts +68 -0
  6. package/docs/development.md +144 -0
  7. package/package.json +63 -51
  8. package/scripts/build.ts +400 -0
  9. package/scripts/publish.ts +60 -0
  10. package/src/3d/SpriteResourceManager.ts +286 -0
  11. package/src/3d/SpriteUtils.ts +70 -0
  12. package/src/3d/TextureUtils.ts +196 -0
  13. package/src/3d/ThreeRenderable.ts +197 -0
  14. package/src/3d/WGPURenderer.ts +294 -0
  15. package/src/3d/animation/ExplodingSpriteEffect.ts +513 -0
  16. package/src/3d/animation/PhysicsExplodingSpriteEffect.ts +429 -0
  17. package/src/3d/animation/SpriteAnimator.ts +633 -0
  18. package/src/3d/animation/SpriteParticleGenerator.ts +435 -0
  19. package/src/3d/canvas.ts +464 -0
  20. package/src/3d/index.ts +12 -0
  21. package/src/3d/physics/PlanckPhysicsAdapter.ts +72 -0
  22. package/src/3d/physics/RapierPhysicsAdapter.ts +66 -0
  23. package/src/3d/physics/physics-interface.ts +31 -0
  24. package/src/3d/shaders/supersampling.wgsl +201 -0
  25. package/src/3d.ts +3 -0
  26. package/src/NativeSpanFeed.ts +300 -0
  27. package/src/Renderable.ts +1704 -0
  28. package/src/__snapshots__/buffer.test.ts.snap +28 -0
  29. package/src/animation/Timeline.test.ts +2709 -0
  30. package/src/animation/Timeline.ts +598 -0
  31. package/src/ansi.ts +18 -0
  32. package/src/benchmark/attenuation-benchmark.ts +81 -0
  33. package/src/benchmark/colormatrix-benchmark.ts +128 -0
  34. package/src/benchmark/gain-benchmark.ts +80 -0
  35. package/src/benchmark/latest-all-bench-run.json +707 -0
  36. package/src/benchmark/latest-async-bench-run.json +336 -0
  37. package/src/benchmark/latest-default-bench-run.json +657 -0
  38. package/src/benchmark/latest-large-bench-run.json +707 -0
  39. package/src/benchmark/latest-quick-bench-run.json +207 -0
  40. package/src/benchmark/markdown-benchmark.ts +1796 -0
  41. package/src/benchmark/native-span-feed-async-benchmark.ts +355 -0
  42. package/src/benchmark/native-span-feed-benchmark.md +56 -0
  43. package/src/benchmark/native-span-feed-benchmark.ts +596 -0
  44. package/src/benchmark/native-span-feed-compare.ts +280 -0
  45. package/src/benchmark/renderer-benchmark.ts +754 -0
  46. package/src/benchmark/text-table-benchmark.ts +948 -0
  47. package/src/buffer.test.ts +291 -0
  48. package/src/buffer.ts +554 -0
  49. package/src/console.test.ts +612 -0
  50. package/src/console.ts +1254 -0
  51. package/src/edit-buffer.test.ts +1769 -0
  52. package/src/edit-buffer.ts +411 -0
  53. package/src/editor-view.test.ts +1032 -0
  54. package/src/editor-view.ts +284 -0
  55. package/src/examples/ascii-font-selection-demo.ts +245 -0
  56. package/src/examples/assets/Water_2_M_Normal.jpg +0 -0
  57. package/src/examples/assets/concrete.png +0 -0
  58. package/src/examples/assets/crate.png +0 -0
  59. package/src/examples/assets/crate_emissive.png +0 -0
  60. package/src/examples/assets/forrest_background.png +0 -0
  61. package/src/examples/assets/hast-example.json +1018 -0
  62. package/src/examples/assets/heart.png +0 -0
  63. package/src/examples/assets/main_char_heavy_attack.png +0 -0
  64. package/src/examples/assets/main_char_idle.png +0 -0
  65. package/src/examples/assets/main_char_jump_end.png +0 -0
  66. package/src/examples/assets/main_char_jump_landing.png +0 -0
  67. package/src/examples/assets/main_char_jump_start.png +0 -0
  68. package/src/examples/assets/main_char_run_loop.png +0 -0
  69. package/src/examples/assets/roughness_map.jpg +0 -0
  70. package/src/examples/build.ts +115 -0
  71. package/src/examples/code-demo.ts +924 -0
  72. package/src/examples/console-demo.ts +358 -0
  73. package/src/examples/core-plugin-slots-demo.ts +759 -0
  74. package/src/examples/diff-demo.ts +701 -0
  75. package/src/examples/draggable-three-demo.ts +259 -0
  76. package/src/examples/editor-demo.ts +322 -0
  77. package/src/examples/extmarks-demo.ts +196 -0
  78. package/src/examples/focus-restore-demo.ts +310 -0
  79. package/src/examples/fonts.ts +245 -0
  80. package/src/examples/fractal-shader-demo.ts +268 -0
  81. package/src/examples/framebuffer-demo.ts +674 -0
  82. package/src/examples/full-unicode-demo.ts +241 -0
  83. package/src/examples/golden-star-demo.ts +933 -0
  84. package/src/examples/grayscale-buffer-demo.ts +249 -0
  85. package/src/examples/hast-syntax-highlighting-demo.ts +129 -0
  86. package/src/examples/index.ts +926 -0
  87. package/src/examples/input-demo.ts +377 -0
  88. package/src/examples/input-select-layout-demo.ts +425 -0
  89. package/src/examples/install.sh +143 -0
  90. package/src/examples/keypress-debug-demo.ts +452 -0
  91. package/src/examples/lib/HexList.ts +122 -0
  92. package/src/examples/lib/PaletteGrid.ts +125 -0
  93. package/src/examples/lib/standalone-keys.ts +25 -0
  94. package/src/examples/lib/tab-controller.ts +243 -0
  95. package/src/examples/lights-phong-demo.ts +290 -0
  96. package/src/examples/link-demo.ts +220 -0
  97. package/src/examples/live-state-demo.ts +480 -0
  98. package/src/examples/markdown-demo.ts +725 -0
  99. package/src/examples/mouse-interaction-demo.ts +428 -0
  100. package/src/examples/nested-zindex-demo.ts +357 -0
  101. package/src/examples/opacity-example.ts +235 -0
  102. package/src/examples/opentui-demo.ts +1057 -0
  103. package/src/examples/physx-planck-2d-demo.ts +623 -0
  104. package/src/examples/physx-rapier-2d-demo.ts +655 -0
  105. package/src/examples/relative-positioning-demo.ts +323 -0
  106. package/src/examples/scroll-example.ts +214 -0
  107. package/src/examples/scrollbox-mouse-test.ts +112 -0
  108. package/src/examples/scrollbox-overlay-hit-test.ts +206 -0
  109. package/src/examples/select-demo.ts +237 -0
  110. package/src/examples/shader-cube-demo.ts +1015 -0
  111. package/src/examples/simple-layout-example.ts +591 -0
  112. package/src/examples/slider-demo.ts +617 -0
  113. package/src/examples/split-mode-demo.ts +453 -0
  114. package/src/examples/sprite-animation-demo.ts +443 -0
  115. package/src/examples/sprite-particle-generator-demo.ts +486 -0
  116. package/src/examples/static-sprite-demo.ts +193 -0
  117. package/src/examples/sticky-scroll-example.ts +308 -0
  118. package/src/examples/styled-text-demo.ts +282 -0
  119. package/src/examples/tab-select-demo.ts +219 -0
  120. package/src/examples/terminal-title.ts +29 -0
  121. package/src/examples/terminal.ts +305 -0
  122. package/src/examples/text-node-demo.ts +416 -0
  123. package/src/examples/text-selection-demo.ts +377 -0
  124. package/src/examples/text-table-demo.ts +503 -0
  125. package/src/examples/text-truncation-demo.ts +481 -0
  126. package/src/examples/text-wrap.ts +757 -0
  127. package/src/examples/texture-loading-demo.ts +259 -0
  128. package/src/examples/timeline-example.ts +670 -0
  129. package/src/examples/transparency-demo.ts +400 -0
  130. package/src/examples/vnode-composition-demo.ts +404 -0
  131. package/src/examples/wide-grapheme-overlay-demo.ts +280 -0
  132. package/src/index.ts +24 -0
  133. package/src/lib/KeyHandler.integration.test.ts +292 -0
  134. package/src/lib/KeyHandler.stopPropagation.test.ts +289 -0
  135. package/src/lib/KeyHandler.test.ts +662 -0
  136. package/src/lib/KeyHandler.ts +222 -0
  137. package/src/lib/RGBA.test.ts +984 -0
  138. package/src/lib/RGBA.ts +204 -0
  139. package/src/lib/ascii.font.ts +330 -0
  140. package/src/lib/border.test.ts +83 -0
  141. package/src/lib/border.ts +170 -0
  142. package/src/lib/bunfs.test.ts +27 -0
  143. package/src/lib/bunfs.ts +18 -0
  144. package/src/lib/clipboard.test.ts +41 -0
  145. package/src/lib/clipboard.ts +47 -0
  146. package/src/lib/clock.ts +35 -0
  147. package/src/lib/data-paths.test.ts +133 -0
  148. package/src/lib/data-paths.ts +109 -0
  149. package/src/lib/debounce.ts +106 -0
  150. package/src/lib/detect-links.test.ts +98 -0
  151. package/src/lib/detect-links.ts +56 -0
  152. package/src/lib/env.test.ts +228 -0
  153. package/src/lib/env.ts +209 -0
  154. package/src/lib/extmarks-history.ts +51 -0
  155. package/src/lib/extmarks-multiwidth.test.ts +322 -0
  156. package/src/lib/extmarks.test.ts +3457 -0
  157. package/src/lib/extmarks.ts +843 -0
  158. package/src/lib/fonts/block.json +405 -0
  159. package/src/lib/fonts/grid.json +265 -0
  160. package/src/lib/fonts/huge.json +741 -0
  161. package/src/lib/fonts/pallet.json +314 -0
  162. package/src/lib/fonts/shade.json +591 -0
  163. package/src/lib/fonts/slick.json +321 -0
  164. package/src/lib/fonts/tiny.json +69 -0
  165. package/src/lib/hast-styled-text.ts +59 -0
  166. package/src/lib/index.ts +21 -0
  167. package/src/lib/keymapping.test.ts +317 -0
  168. package/src/lib/keymapping.ts +115 -0
  169. package/src/lib/objects-in-viewport.test.ts +787 -0
  170. package/src/lib/objects-in-viewport.ts +153 -0
  171. package/src/lib/output.capture.ts +58 -0
  172. package/src/lib/parse.keypress-kitty.protocol.test.ts +340 -0
  173. package/src/lib/parse.keypress-kitty.test.ts +663 -0
  174. package/src/lib/parse.keypress-kitty.ts +439 -0
  175. package/src/lib/parse.keypress.test.ts +1849 -0
  176. package/src/lib/parse.keypress.ts +397 -0
  177. package/src/lib/parse.mouse.test.ts +552 -0
  178. package/src/lib/parse.mouse.ts +232 -0
  179. package/src/lib/paste.ts +16 -0
  180. package/src/lib/queue.ts +65 -0
  181. package/src/lib/renderable.validations.test.ts +87 -0
  182. package/src/lib/renderable.validations.ts +83 -0
  183. package/src/lib/scroll-acceleration.ts +98 -0
  184. package/src/lib/selection.ts +240 -0
  185. package/src/lib/singleton.ts +28 -0
  186. package/src/lib/stdin-parser.test.ts +2290 -0
  187. package/src/lib/stdin-parser.ts +1810 -0
  188. package/src/lib/styled-text.ts +178 -0
  189. package/src/lib/terminal-capability-detection.test.ts +202 -0
  190. package/src/lib/terminal-capability-detection.ts +79 -0
  191. package/src/lib/terminal-palette.test.ts +878 -0
  192. package/src/lib/terminal-palette.ts +383 -0
  193. package/src/lib/tree-sitter/assets/README.md +118 -0
  194. package/src/lib/tree-sitter/assets/update.ts +334 -0
  195. package/src/lib/tree-sitter/assets.d.ts +9 -0
  196. package/src/lib/tree-sitter/cache.test.ts +273 -0
  197. package/src/lib/tree-sitter/client.test.ts +1165 -0
  198. package/src/lib/tree-sitter/client.ts +607 -0
  199. package/src/lib/tree-sitter/default-parsers.ts +86 -0
  200. package/src/lib/tree-sitter/download-utils.ts +148 -0
  201. package/src/lib/tree-sitter/index.ts +28 -0
  202. package/src/lib/tree-sitter/parser.worker.ts +1042 -0
  203. package/src/lib/tree-sitter/parsers-config.ts +81 -0
  204. package/src/lib/tree-sitter/resolve-ft.test.ts +55 -0
  205. package/src/lib/tree-sitter/resolve-ft.ts +189 -0
  206. package/src/lib/tree-sitter/types.ts +82 -0
  207. package/src/lib/tree-sitter-styled-text.test.ts +1253 -0
  208. package/src/lib/tree-sitter-styled-text.ts +306 -0
  209. package/src/lib/validate-dir-name.ts +55 -0
  210. package/src/lib/yoga.options.test.ts +628 -0
  211. package/src/lib/yoga.options.ts +346 -0
  212. package/src/plugins/core-slot.ts +579 -0
  213. package/src/plugins/registry.ts +402 -0
  214. package/src/plugins/types.ts +46 -0
  215. package/src/post/effects.ts +930 -0
  216. package/src/post/filters.ts +489 -0
  217. package/src/post/matrices.ts +288 -0
  218. package/src/renderables/ASCIIFont.ts +219 -0
  219. package/src/renderables/Box.test.ts +205 -0
  220. package/src/renderables/Box.ts +326 -0
  221. package/src/renderables/Code.test.ts +2062 -0
  222. package/src/renderables/Code.ts +357 -0
  223. package/src/renderables/Diff.regression.test.ts +226 -0
  224. package/src/renderables/Diff.test.ts +3101 -0
  225. package/src/renderables/Diff.ts +1211 -0
  226. package/src/renderables/EditBufferRenderable.test.ts +288 -0
  227. package/src/renderables/EditBufferRenderable.ts +1166 -0
  228. package/src/renderables/FrameBuffer.ts +47 -0
  229. package/src/renderables/Input.test.ts +1228 -0
  230. package/src/renderables/Input.ts +247 -0
  231. package/src/renderables/LineNumberRenderable.ts +724 -0
  232. package/src/renderables/Markdown.ts +1393 -0
  233. package/src/renderables/ScrollBar.ts +422 -0
  234. package/src/renderables/ScrollBox.ts +883 -0
  235. package/src/renderables/Select.test.ts +1033 -0
  236. package/src/renderables/Select.ts +524 -0
  237. package/src/renderables/Slider.test.ts +456 -0
  238. package/src/renderables/Slider.ts +342 -0
  239. package/src/renderables/TabSelect.test.ts +197 -0
  240. package/src/renderables/TabSelect.ts +455 -0
  241. package/src/renderables/Text.selection-buffer.test.ts +123 -0
  242. package/src/renderables/Text.test.ts +2660 -0
  243. package/src/renderables/Text.ts +147 -0
  244. package/src/renderables/TextBufferRenderable.ts +518 -0
  245. package/src/renderables/TextNode.test.ts +1058 -0
  246. package/src/renderables/TextNode.ts +325 -0
  247. package/src/renderables/TextTable.test.ts +1421 -0
  248. package/src/renderables/TextTable.ts +1344 -0
  249. package/src/renderables/Textarea.ts +430 -0
  250. package/src/renderables/TimeToFirstDraw.ts +89 -0
  251. package/src/renderables/__snapshots__/Code.test.ts.snap +13 -0
  252. package/src/renderables/__snapshots__/Diff.test.ts.snap +785 -0
  253. package/src/renderables/__snapshots__/Text.test.ts.snap +421 -0
  254. package/src/renderables/__snapshots__/TextTable.test.ts.snap +215 -0
  255. package/src/renderables/__tests__/LineNumberRenderable.scrollbox-simple.test.ts +144 -0
  256. package/src/renderables/__tests__/LineNumberRenderable.scrollbox.test.ts +816 -0
  257. package/src/renderables/__tests__/LineNumberRenderable.test.ts +1865 -0
  258. package/src/renderables/__tests__/LineNumberRenderable.wrapping.test.ts +85 -0
  259. package/src/renderables/__tests__/Markdown.code-colors.test.ts +242 -0
  260. package/src/renderables/__tests__/Markdown.test.ts +2518 -0
  261. package/src/renderables/__tests__/MultiRenderable.selection.test.ts +87 -0
  262. package/src/renderables/__tests__/Textarea.buffer.test.ts +682 -0
  263. package/src/renderables/__tests__/Textarea.destroyed-events.test.ts +675 -0
  264. package/src/renderables/__tests__/Textarea.editing.test.ts +2041 -0
  265. package/src/renderables/__tests__/Textarea.error-handling.test.ts +35 -0
  266. package/src/renderables/__tests__/Textarea.events.test.ts +738 -0
  267. package/src/renderables/__tests__/Textarea.highlights.test.ts +590 -0
  268. package/src/renderables/__tests__/Textarea.keybinding.test.ts +3149 -0
  269. package/src/renderables/__tests__/Textarea.paste.test.ts +357 -0
  270. package/src/renderables/__tests__/Textarea.rendering.test.ts +1866 -0
  271. package/src/renderables/__tests__/Textarea.scroll.test.ts +733 -0
  272. package/src/renderables/__tests__/Textarea.selection.test.ts +1590 -0
  273. package/src/renderables/__tests__/Textarea.stress.test.ts +670 -0
  274. package/src/renderables/__tests__/Textarea.undo-redo.test.ts +383 -0
  275. package/src/renderables/__tests__/Textarea.visual-lines.test.ts +310 -0
  276. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.code.test.ts.snap +221 -0
  277. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.scrollbox-simple.test.ts.snap +89 -0
  278. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.scrollbox.test.ts.snap +457 -0
  279. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.test.ts.snap +158 -0
  280. package/src/renderables/__tests__/__snapshots__/Textarea.rendering.test.ts.snap +387 -0
  281. package/src/renderables/__tests__/markdown-parser.test.ts +217 -0
  282. package/src/renderables/__tests__/renderable-test-utils.ts +60 -0
  283. package/src/renderables/composition/README.md +8 -0
  284. package/src/renderables/composition/VRenderable.ts +32 -0
  285. package/src/renderables/composition/constructs.ts +127 -0
  286. package/src/renderables/composition/vnode.ts +289 -0
  287. package/src/renderables/index.ts +23 -0
  288. package/src/renderables/markdown-parser.ts +66 -0
  289. package/src/renderer.ts +2681 -0
  290. package/src/runtime-plugin-support.ts +39 -0
  291. package/src/runtime-plugin.ts +615 -0
  292. package/src/syntax-style.test.ts +841 -0
  293. package/src/syntax-style.ts +257 -0
  294. package/src/testing/README.md +210 -0
  295. package/src/testing/capture-spans.test.ts +194 -0
  296. package/src/testing/integration.test.ts +276 -0
  297. package/src/testing/manual-clock.ts +117 -0
  298. package/src/testing/mock-keys.test.ts +1378 -0
  299. package/src/testing/mock-keys.ts +457 -0
  300. package/src/testing/mock-mouse.test.ts +218 -0
  301. package/src/testing/mock-mouse.ts +247 -0
  302. package/src/testing/mock-tree-sitter-client.ts +73 -0
  303. package/src/testing/spy.ts +13 -0
  304. package/src/testing/test-recorder.test.ts +415 -0
  305. package/src/testing/test-recorder.ts +145 -0
  306. package/src/testing/test-renderer.ts +132 -0
  307. package/src/testing.ts +7 -0
  308. package/src/tests/__snapshots__/absolute-positioning.snapshot.test.ts.snap +481 -0
  309. package/src/tests/__snapshots__/renderable.snapshot.test.ts.snap +19 -0
  310. package/src/tests/__snapshots__/scrollbox.test.ts.snap +29 -0
  311. package/src/tests/absolute-positioning.snapshot.test.ts +638 -0
  312. package/src/tests/allocator-stats.test.ts +38 -0
  313. package/src/tests/destroy-during-render.test.ts +200 -0
  314. package/src/tests/destroy-on-exit.fixture.ts +36 -0
  315. package/src/tests/destroy-on-exit.test.ts +41 -0
  316. package/src/tests/hover-cursor.test.ts +98 -0
  317. package/src/tests/native-span-feed-async.test.ts +173 -0
  318. package/src/tests/native-span-feed-close.test.ts +120 -0
  319. package/src/tests/native-span-feed-coverage.test.ts +227 -0
  320. package/src/tests/native-span-feed-edge-cases.test.ts +352 -0
  321. package/src/tests/native-span-feed-use-after-free.test.ts +45 -0
  322. package/src/tests/opacity.test.ts +123 -0
  323. package/src/tests/renderable.snapshot.test.ts +524 -0
  324. package/src/tests/renderable.test.ts +1281 -0
  325. package/src/tests/renderer.clock.test.ts +158 -0
  326. package/src/tests/renderer.console-startup.test.ts +185 -0
  327. package/src/tests/renderer.control.test.ts +425 -0
  328. package/src/tests/renderer.core-slot-binding.test.ts +952 -0
  329. package/src/tests/renderer.cursor.test.ts +26 -0
  330. package/src/tests/renderer.destroy-during-render.test.ts +147 -0
  331. package/src/tests/renderer.focus-restore.test.ts +257 -0
  332. package/src/tests/renderer.focus.test.ts +294 -0
  333. package/src/tests/renderer.idle.test.ts +219 -0
  334. package/src/tests/renderer.input.test.ts +2237 -0
  335. package/src/tests/renderer.kitty-flags.test.ts +195 -0
  336. package/src/tests/renderer.mouse.test.ts +1274 -0
  337. package/src/tests/renderer.palette.test.ts +629 -0
  338. package/src/tests/renderer.selection.test.ts +49 -0
  339. package/src/tests/renderer.slot-registry.test.ts +684 -0
  340. package/src/tests/renderer.useMouse.test.ts +47 -0
  341. package/src/tests/runtime-plugin-node-modules-cycle.fixture.ts +76 -0
  342. package/src/tests/runtime-plugin-node-modules-mjs.fixture.ts +43 -0
  343. package/src/tests/runtime-plugin-node-modules-no-bare-rewrite.fixture.ts +67 -0
  344. package/src/tests/runtime-plugin-node-modules-package-type-cache.fixture.ts +72 -0
  345. package/src/tests/runtime-plugin-node-modules-runtime-specifier.fixture.ts +44 -0
  346. package/src/tests/runtime-plugin-node-modules-scoped-package-bare-rewrite.fixture.ts +85 -0
  347. package/src/tests/runtime-plugin-path-alias.fixture.ts +43 -0
  348. package/src/tests/runtime-plugin-resolve-roots.fixture.ts +65 -0
  349. package/src/tests/runtime-plugin-support.fixture.ts +11 -0
  350. package/src/tests/runtime-plugin-support.test.ts +19 -0
  351. package/src/tests/runtime-plugin-windows-file-url.fixture.ts +30 -0
  352. package/src/tests/runtime-plugin.fixture.ts +40 -0
  353. package/src/tests/runtime-plugin.test.ts +354 -0
  354. package/src/tests/scrollbox-culling-bug.test.ts +114 -0
  355. package/src/tests/scrollbox-hitgrid-resize.test.ts +136 -0
  356. package/src/tests/scrollbox-hitgrid.test.ts +909 -0
  357. package/src/tests/scrollbox.test.ts +1530 -0
  358. package/src/tests/wrap-resize-perf.test.ts +276 -0
  359. package/src/tests/yoga-setters.test.ts +921 -0
  360. package/src/text-buffer-view.test.ts +705 -0
  361. package/src/text-buffer-view.ts +189 -0
  362. package/src/text-buffer.test.ts +347 -0
  363. package/src/text-buffer.ts +250 -0
  364. package/src/types.ts +161 -0
  365. package/src/utils.ts +88 -0
  366. package/src/zig/ansi.zig +268 -0
  367. package/src/zig/bench/README.md +50 -0
  368. package/src/zig/bench/buffer-draw-text-buffer_bench.zig +887 -0
  369. package/src/zig/bench/edit-buffer_bench.zig +476 -0
  370. package/src/zig/bench/native-span-feed_bench.zig +100 -0
  371. package/src/zig/bench/rope-markers_bench.zig +713 -0
  372. package/src/zig/bench/rope_bench.zig +514 -0
  373. package/src/zig/bench/styled-text_bench.zig +470 -0
  374. package/src/zig/bench/text-buffer-coords_bench.zig +362 -0
  375. package/src/zig/bench/text-buffer-view_bench.zig +459 -0
  376. package/src/zig/bench/text-chunk-graphemes_bench.zig +273 -0
  377. package/src/zig/bench/utf8_bench.zig +799 -0
  378. package/src/zig/bench-utils.zig +431 -0
  379. package/src/zig/bench.zig +217 -0
  380. package/src/zig/buffer-methods.zig +211 -0
  381. package/src/zig/buffer.zig +2281 -0
  382. package/src/zig/build.zig +289 -0
  383. package/src/zig/build.zig.zon +16 -0
  384. package/src/zig/edit-buffer.zig +825 -0
  385. package/src/zig/editor-view.zig +802 -0
  386. package/src/zig/event-bus.zig +13 -0
  387. package/src/zig/event-emitter.zig +65 -0
  388. package/src/zig/file-logger.zig +92 -0
  389. package/src/zig/grapheme.zig +599 -0
  390. package/src/zig/lib.zig +1854 -0
  391. package/src/zig/link.zig +333 -0
  392. package/src/zig/logger.zig +43 -0
  393. package/src/zig/mem-registry.zig +125 -0
  394. package/src/zig/native-span-feed-bench-lib.zig +7 -0
  395. package/src/zig/native-span-feed.zig +708 -0
  396. package/src/zig/renderer.zig +1393 -0
  397. package/src/zig/rope.zig +1220 -0
  398. package/src/zig/syntax-style.zig +161 -0
  399. package/src/zig/terminal.zig +987 -0
  400. package/src/zig/test.zig +72 -0
  401. package/src/zig/tests/README.md +18 -0
  402. package/src/zig/tests/buffer-methods_test.zig +1109 -0
  403. package/src/zig/tests/buffer_test.zig +2557 -0
  404. package/src/zig/tests/edit-buffer-history_test.zig +271 -0
  405. package/src/zig/tests/edit-buffer_test.zig +1689 -0
  406. package/src/zig/tests/editor-view_test.zig +3299 -0
  407. package/src/zig/tests/event-emitter_test.zig +249 -0
  408. package/src/zig/tests/grapheme_test.zig +1304 -0
  409. package/src/zig/tests/link_test.zig +190 -0
  410. package/src/zig/tests/mem-registry_test.zig +473 -0
  411. package/src/zig/tests/memory_leak_regression_test.zig +159 -0
  412. package/src/zig/tests/native-span-feed_test.zig +1264 -0
  413. package/src/zig/tests/renderer_test.zig +1017 -0
  414. package/src/zig/tests/rope-nested_test.zig +712 -0
  415. package/src/zig/tests/rope_fuzz_test.zig +238 -0
  416. package/src/zig/tests/rope_test.zig +2362 -0
  417. package/src/zig/tests/segment-merge.test.zig +148 -0
  418. package/src/zig/tests/syntax-style_test.zig +557 -0
  419. package/src/zig/tests/terminal_test.zig +754 -0
  420. package/src/zig/tests/text-buffer-drawing_test.zig +3237 -0
  421. package/src/zig/tests/text-buffer-highlights_test.zig +666 -0
  422. package/src/zig/tests/text-buffer-iterators_test.zig +776 -0
  423. package/src/zig/tests/text-buffer-segment_test.zig +320 -0
  424. package/src/zig/tests/text-buffer-selection_test.zig +1035 -0
  425. package/src/zig/tests/text-buffer-selection_viewport_test.zig +358 -0
  426. package/src/zig/tests/text-buffer-view_test.zig +3649 -0
  427. package/src/zig/tests/text-buffer_test.zig +2191 -0
  428. package/src/zig/tests/unicode-width-map.zon +3909 -0
  429. package/src/zig/tests/utf8_no_zwj_test.zig +260 -0
  430. package/src/zig/tests/utf8_test.zig +4057 -0
  431. package/src/zig/tests/utf8_wcwidth_cursor_test.zig +267 -0
  432. package/src/zig/tests/utf8_wcwidth_test.zig +357 -0
  433. package/src/zig/tests/word-wrap-editing_test.zig +498 -0
  434. package/src/zig/tests/wrap-cache-perf_test.zig +113 -0
  435. package/src/zig/text-buffer-iterators.zig +499 -0
  436. package/src/zig/text-buffer-segment.zig +404 -0
  437. package/src/zig/text-buffer-view.zig +1371 -0
  438. package/src/zig/text-buffer.zig +1180 -0
  439. package/src/zig/utf8.zig +1948 -0
  440. package/src/zig/utils.zig +9 -0
  441. package/src/zig-structs.ts +261 -0
  442. package/src/zig.ts +3884 -0
  443. package/tsconfig.build.json +24 -0
  444. package/tsconfig.json +27 -0
  445. package/3d/SpriteResourceManager.d.ts +0 -74
  446. package/3d/SpriteUtils.d.ts +0 -13
  447. package/3d/TextureUtils.d.ts +0 -24
  448. package/3d/ThreeRenderable.d.ts +0 -40
  449. package/3d/WGPURenderer.d.ts +0 -61
  450. package/3d/animation/ExplodingSpriteEffect.d.ts +0 -71
  451. package/3d/animation/PhysicsExplodingSpriteEffect.d.ts +0 -76
  452. package/3d/animation/SpriteAnimator.d.ts +0 -124
  453. package/3d/animation/SpriteParticleGenerator.d.ts +0 -62
  454. package/3d/canvas.d.ts +0 -44
  455. package/3d/index.d.ts +0 -12
  456. package/3d/physics/PlanckPhysicsAdapter.d.ts +0 -19
  457. package/3d/physics/RapierPhysicsAdapter.d.ts +0 -19
  458. package/3d/physics/physics-interface.d.ts +0 -27
  459. package/3d.d.ts +0 -2
  460. package/3d.js +0 -34041
  461. package/3d.js.map +0 -155
  462. package/LICENSE +0 -21
  463. package/NativeSpanFeed.d.ts +0 -41
  464. package/Renderable.d.ts +0 -334
  465. package/animation/Timeline.d.ts +0 -126
  466. package/ansi.d.ts +0 -13
  467. package/buffer.d.ts +0 -111
  468. package/console.d.ts +0 -144
  469. package/edit-buffer.d.ts +0 -98
  470. package/editor-view.d.ts +0 -73
  471. package/index-8fks7yv1.js +0 -411
  472. package/index-8fks7yv1.js.map +0 -10
  473. package/index-egy5e2rs.js +0 -12267
  474. package/index-egy5e2rs.js.map +0 -42
  475. package/index-tse8gzh0.js +0 -20614
  476. package/index-tse8gzh0.js.map +0 -67
  477. package/index.d.ts +0 -23
  478. package/index.js +0 -478
  479. package/index.js.map +0 -9
  480. package/lib/KeyHandler.d.ts +0 -61
  481. package/lib/RGBA.d.ts +0 -25
  482. package/lib/ascii.font.d.ts +0 -508
  483. package/lib/border.d.ts +0 -51
  484. package/lib/bunfs.d.ts +0 -7
  485. package/lib/clipboard.d.ts +0 -17
  486. package/lib/clock.d.ts +0 -15
  487. package/lib/data-paths.d.ts +0 -26
  488. package/lib/debounce.d.ts +0 -42
  489. package/lib/detect-links.d.ts +0 -6
  490. package/lib/env.d.ts +0 -42
  491. package/lib/extmarks-history.d.ts +0 -17
  492. package/lib/extmarks.d.ts +0 -89
  493. package/lib/hast-styled-text.d.ts +0 -17
  494. package/lib/index.d.ts +0 -21
  495. package/lib/keymapping.d.ts +0 -25
  496. package/lib/objects-in-viewport.d.ts +0 -24
  497. package/lib/output.capture.d.ts +0 -24
  498. package/lib/parse.keypress-kitty.d.ts +0 -2
  499. package/lib/parse.keypress.d.ts +0 -26
  500. package/lib/parse.mouse.d.ts +0 -30
  501. package/lib/paste.d.ts +0 -7
  502. package/lib/queue.d.ts +0 -15
  503. package/lib/renderable.validations.d.ts +0 -12
  504. package/lib/scroll-acceleration.d.ts +0 -43
  505. package/lib/selection.d.ts +0 -63
  506. package/lib/singleton.d.ts +0 -7
  507. package/lib/stdin-parser.d.ts +0 -87
  508. package/lib/styled-text.d.ts +0 -63
  509. package/lib/terminal-capability-detection.d.ts +0 -30
  510. package/lib/terminal-palette.d.ts +0 -50
  511. package/lib/tree-sitter/assets/update.d.ts +0 -11
  512. package/lib/tree-sitter/client.d.ts +0 -47
  513. package/lib/tree-sitter/default-parsers.d.ts +0 -2
  514. package/lib/tree-sitter/download-utils.d.ts +0 -21
  515. package/lib/tree-sitter/index.d.ts +0 -8
  516. package/lib/tree-sitter/parser.worker.d.ts +0 -1
  517. package/lib/tree-sitter/parsers-config.d.ts +0 -53
  518. package/lib/tree-sitter/resolve-ft.d.ts +0 -5
  519. package/lib/tree-sitter/types.d.ts +0 -82
  520. package/lib/tree-sitter-styled-text.d.ts +0 -14
  521. package/lib/validate-dir-name.d.ts +0 -1
  522. package/lib/yoga.options.d.ts +0 -32
  523. package/parser.worker.js +0 -899
  524. package/parser.worker.js.map +0 -12
  525. package/plugins/core-slot.d.ts +0 -72
  526. package/plugins/registry.d.ts +0 -42
  527. package/plugins/types.d.ts +0 -34
  528. package/post/effects.d.ts +0 -147
  529. package/post/filters.d.ts +0 -65
  530. package/post/matrices.d.ts +0 -20
  531. package/renderables/ASCIIFont.d.ts +0 -52
  532. package/renderables/Box.d.ts +0 -81
  533. package/renderables/Code.d.ts +0 -78
  534. package/renderables/Diff.d.ts +0 -142
  535. package/renderables/EditBufferRenderable.d.ts +0 -237
  536. package/renderables/FrameBuffer.d.ts +0 -16
  537. package/renderables/Input.d.ts +0 -67
  538. package/renderables/LineNumberRenderable.d.ts +0 -78
  539. package/renderables/Markdown.d.ts +0 -185
  540. package/renderables/ScrollBar.d.ts +0 -77
  541. package/renderables/ScrollBox.d.ts +0 -124
  542. package/renderables/Select.d.ts +0 -115
  543. package/renderables/Slider.d.ts +0 -47
  544. package/renderables/TabSelect.d.ts +0 -96
  545. package/renderables/Text.d.ts +0 -36
  546. package/renderables/TextBufferRenderable.d.ts +0 -105
  547. package/renderables/TextNode.d.ts +0 -91
  548. package/renderables/TextTable.d.ts +0 -140
  549. package/renderables/Textarea.d.ts +0 -63
  550. package/renderables/TimeToFirstDraw.d.ts +0 -24
  551. package/renderables/__tests__/renderable-test-utils.d.ts +0 -12
  552. package/renderables/composition/VRenderable.d.ts +0 -16
  553. package/renderables/composition/constructs.d.ts +0 -35
  554. package/renderables/composition/vnode.d.ts +0 -46
  555. package/renderables/index.d.ts +0 -23
  556. package/renderables/markdown-parser.d.ts +0 -10
  557. package/renderer.d.ts +0 -419
  558. package/runtime-plugin-support.d.ts +0 -3
  559. package/runtime-plugin-support.js +0 -29
  560. package/runtime-plugin-support.js.map +0 -10
  561. package/runtime-plugin.d.ts +0 -16
  562. package/runtime-plugin.js +0 -16
  563. package/runtime-plugin.js.map +0 -9
  564. package/syntax-style.d.ts +0 -54
  565. package/testing/manual-clock.d.ts +0 -17
  566. package/testing/mock-keys.d.ts +0 -81
  567. package/testing/mock-mouse.d.ts +0 -38
  568. package/testing/mock-tree-sitter-client.d.ts +0 -23
  569. package/testing/spy.d.ts +0 -7
  570. package/testing/test-recorder.d.ts +0 -61
  571. package/testing/test-renderer.d.ts +0 -23
  572. package/testing.d.ts +0 -6
  573. package/testing.js +0 -697
  574. package/testing.js.map +0 -15
  575. package/text-buffer-view.d.ts +0 -42
  576. package/text-buffer.d.ts +0 -67
  577. package/types.d.ts +0 -139
  578. package/utils.d.ts +0 -14
  579. package/zig-structs.d.ts +0 -155
  580. package/zig.d.ts +0 -353
  581. /package/{assets → src/lib/tree-sitter/assets}/javascript/highlights.scm +0 -0
  582. /package/{assets → src/lib/tree-sitter/assets}/javascript/tree-sitter-javascript.wasm +0 -0
  583. /package/{assets → src/lib/tree-sitter/assets}/markdown/highlights.scm +0 -0
  584. /package/{assets → src/lib/tree-sitter/assets}/markdown/injections.scm +0 -0
  585. /package/{assets → src/lib/tree-sitter/assets}/markdown/tree-sitter-markdown.wasm +0 -0
  586. /package/{assets → src/lib/tree-sitter/assets}/markdown_inline/highlights.scm +0 -0
  587. /package/{assets → src/lib/tree-sitter/assets}/markdown_inline/tree-sitter-markdown_inline.wasm +0 -0
  588. /package/{assets → src/lib/tree-sitter/assets}/typescript/highlights.scm +0 -0
  589. /package/{assets → src/lib/tree-sitter/assets}/typescript/tree-sitter-typescript.wasm +0 -0
  590. /package/{assets → src/lib/tree-sitter/assets}/zig/highlights.scm +0 -0
  591. /package/{assets → src/lib/tree-sitter/assets}/zig/tree-sitter-zig.wasm +0 -0
@@ -0,0 +1,1948 @@
1
+ const std = @import("std");
2
+ const uucode = @import("uucode");
3
+
4
+ /// The method to use when calculating the width of a grapheme
5
+ pub const WidthMethod = enum {
6
+ wcwidth,
7
+ unicode,
8
+ no_zwj,
9
+ };
10
+
11
+ /// Check if a byte slice contains only printable ASCII (32..126)
12
+ /// Uses SIMD16 for fast checking
13
+ pub fn isAsciiOnly(text: []const u8) bool {
14
+ if (text.len == 0) return false;
15
+
16
+ const vector_len = 16;
17
+ const Vec = @Vector(vector_len, u8);
18
+
19
+ const min_printable: Vec = @splat(32);
20
+ const max_printable: Vec = @splat(126);
21
+
22
+ var pos: usize = 0;
23
+
24
+ // Process full 16-byte vectors
25
+ while (pos + vector_len <= text.len) {
26
+ const chunk: Vec = text[pos..][0..vector_len].*;
27
+
28
+ // Check if all bytes are in [32, 126]
29
+ const too_low = chunk < min_printable;
30
+ const too_high = chunk > max_printable;
31
+
32
+ // Check if any byte is out of range
33
+ if (@reduce(.Or, too_low) or @reduce(.Or, too_high)) {
34
+ return false;
35
+ }
36
+
37
+ pos += vector_len;
38
+ }
39
+
40
+ // Handle remaining bytes with scalar code
41
+ while (pos < text.len) : (pos += 1) {
42
+ const b = text[pos];
43
+ if (b < 32 or b > 126) {
44
+ return false;
45
+ }
46
+ }
47
+
48
+ return true;
49
+ }
50
+
51
+ pub const LineBreakKind = enum {
52
+ LF, // \n (Unix/Linux)
53
+ CR, // \r (Old Mac)
54
+ CRLF, // \r\n (Windows)
55
+ };
56
+
57
+ pub const LineBreak = struct {
58
+ pos: usize,
59
+ kind: LineBreakKind,
60
+ };
61
+
62
+ pub const LineBreakResult = struct {
63
+ breaks: std.ArrayListUnmanaged(LineBreak),
64
+ allocator: std.mem.Allocator,
65
+
66
+ pub fn init(allocator: std.mem.Allocator) LineBreakResult {
67
+ return .{
68
+ .breaks = .{},
69
+ .allocator = allocator,
70
+ };
71
+ }
72
+
73
+ pub fn deinit(self: *LineBreakResult) void {
74
+ self.breaks.deinit(self.allocator);
75
+ }
76
+
77
+ pub fn reset(self: *LineBreakResult) void {
78
+ self.breaks.clearRetainingCapacity();
79
+ }
80
+ };
81
+
82
+ pub const TabStopResult = struct {
83
+ positions: std.ArrayListUnmanaged(usize),
84
+ allocator: std.mem.Allocator,
85
+
86
+ pub fn init(allocator: std.mem.Allocator) TabStopResult {
87
+ return .{
88
+ .positions = .{},
89
+ .allocator = allocator,
90
+ };
91
+ }
92
+
93
+ pub fn deinit(self: *TabStopResult) void {
94
+ self.positions.deinit(self.allocator);
95
+ }
96
+
97
+ pub fn reset(self: *TabStopResult) void {
98
+ self.positions.clearRetainingCapacity();
99
+ }
100
+ };
101
+
102
+ pub const WrapBreak = struct {
103
+ // byte_offset points at the grapheme that creates this break opportunity.
104
+ // For whitespace and punctuation, this is the delimiter grapheme.
105
+ // For CJK<->ASCII transitions, this is the last grapheme in the previous run.
106
+ byte_offset: u32,
107
+
108
+ // char_offset is grapheme-count based, not a display column.
109
+ // Callers convert it to columns with charOffsetToColumn().
110
+ char_offset: u32,
111
+ };
112
+
113
+ pub const WrapBreakResult = struct {
114
+ breaks: std.ArrayListUnmanaged(WrapBreak),
115
+ allocator: std.mem.Allocator,
116
+
117
+ pub fn init(allocator: std.mem.Allocator) WrapBreakResult {
118
+ return .{
119
+ .breaks = .{},
120
+ .allocator = allocator,
121
+ };
122
+ }
123
+
124
+ pub fn deinit(self: *WrapBreakResult) void {
125
+ self.breaks.deinit(self.allocator);
126
+ }
127
+
128
+ pub fn reset(self: *WrapBreakResult) void {
129
+ self.breaks.clearRetainingCapacity();
130
+ }
131
+ };
132
+
133
+ // Helper function to check if an ASCII byte is a wrap break point (CR/LF excluded)
134
+ inline fn isAsciiWrapBreak(b: u8) bool {
135
+ return switch (b) {
136
+ ' ', '\t' => true, // Whitespace (no CR/LF in inputs)
137
+ '-' => true, // Dash
138
+ '/', '\\' => true, // Slashes
139
+ '.', ',', ';', ':', '!', '?' => true, // Punctuation
140
+ '(', ')', '[', ']', '{', '}' => true, // Brackets
141
+ else => false,
142
+ };
143
+ }
144
+
145
+ // Decode a UTF-8 codepoint starting at pos. Assumes valid UTF-8 input.
146
+ // Returns (codepoint, length). If the remaining bytes are insufficient, returns length 1.
147
+ pub inline fn decodeUtf8Unchecked(text: []const u8, pos: usize) struct { cp: u21, len: u3 } {
148
+ const b0 = text[pos];
149
+ if (b0 < 0x80) return .{ .cp = @intCast(b0), .len = 1 };
150
+
151
+ if (pos + 1 >= text.len) return .{ .cp = 0xFFFD, .len = 1 };
152
+ const b1 = text[pos + 1];
153
+
154
+ if ((b0 & 0xE0) == 0xC0) {
155
+ const cp2: u21 = @intCast((@as(u32, b0 & 0x1F) << 6) | @as(u32, b1 & 0x3F));
156
+ return .{ .cp = cp2, .len = 2 };
157
+ }
158
+
159
+ if (pos + 2 >= text.len) return .{ .cp = 0xFFFD, .len = 1 };
160
+ const b2 = text[pos + 2];
161
+
162
+ if ((b0 & 0xF0) == 0xE0) {
163
+ const cp3: u21 = @intCast((@as(u32, b0 & 0x0F) << 12) | (@as(u32, b1 & 0x3F) << 6) | @as(u32, b2 & 0x3F));
164
+ return .{ .cp = cp3, .len = 3 };
165
+ }
166
+
167
+ if (pos + 3 >= text.len) return .{ .cp = 0xFFFD, .len = 1 };
168
+ const b3 = text[pos + 3];
169
+ const cp4: u21 = @intCast((@as(u32, b0 & 0x07) << 18) | (@as(u32, b1 & 0x3F) << 12) | (@as(u32, b2 & 0x3F) << 6) | @as(u32, b3 & 0x3F));
170
+ return .{ .cp = cp4, .len = 4 };
171
+ }
172
+
173
+ // Unicode wrap-break codepoints
174
+ inline fn isUnicodeWrapBreak(cp: u21) bool {
175
+ return switch (cp) {
176
+ 0x00A0, // NBSP
177
+ 0x1680, // OGHAM SPACE MARK
178
+ 0x2000...0x200A, // En quad..Hair space
179
+ 0x202F, // NARROW NO-BREAK SPACE
180
+ 0x205F, // MEDIUM MATHEMATICAL SPACE
181
+ 0x3000, // IDEOGRAPHIC SPACE
182
+ 0x200B, // ZERO WIDTH SPACE
183
+ 0x00AD, // SOFT HYPHEN
184
+ 0x2010, // HYPHEN
185
+ 0x3001, // IDEOGRAPHIC COMMA
186
+ 0x3002, // IDEOGRAPHIC FULL STOP
187
+ 0xFF01, // FULLWIDTH EXCLAMATION MARK
188
+ 0xFF1F, // FULLWIDTH QUESTION MARK
189
+ => true,
190
+ else => false,
191
+ };
192
+ }
193
+
194
+ // WordClass keeps word-boundary behavior predictable in mixed-script text.
195
+ // We split between ASCII word runs and CJK word runs, and we keep each
196
+ // CJK run grouped as one unit.
197
+ const WordClass = enum {
198
+ ascii_word,
199
+ cjk_word,
200
+ other,
201
+ };
202
+
203
+ inline fn isAsciiWordByte(b: u8) bool {
204
+ return (b >= 'a' and b <= 'z') or
205
+ (b >= 'A' and b <= 'Z') or
206
+ (b >= '0' and b <= '9') or
207
+ b == '_';
208
+ }
209
+
210
+ inline fn isCjkWordCodepoint(cp: u21) bool {
211
+ return
212
+ // Han ideographs
213
+ (cp >= 0x3400 and cp <= 0x4DBF) or
214
+ (cp >= 0x4E00 and cp <= 0x9FFF) or
215
+ (cp >= 0xF900 and cp <= 0xFAFF) or
216
+ (cp >= 0x20000 and cp <= 0x2A6DF) or
217
+ (cp >= 0x2A700 and cp <= 0x2B73F) or
218
+ (cp >= 0x2B740 and cp <= 0x2B81F) or
219
+ (cp >= 0x2B820 and cp <= 0x2CEAF) or
220
+ (cp >= 0x2CEB0 and cp <= 0x2EBEF) or
221
+ (cp >= 0x2EBF0 and cp <= 0x2EE5D) or
222
+ (cp >= 0x2F800 and cp <= 0x2FA1F) or
223
+ // Hiragana + Katakana
224
+ (cp >= 0x3040 and cp <= 0x309F) or
225
+ (cp >= 0x30A0 and cp <= 0x30FF) or
226
+ (cp >= 0x31F0 and cp <= 0x31FF) or
227
+ (cp >= 0xFF66 and cp <= 0xFF9D) or
228
+ // Hangul
229
+ (cp >= 0x1100 and cp <= 0x11FF) or
230
+ (cp >= 0x3130 and cp <= 0x318F) or
231
+ (cp >= 0xA960 and cp <= 0xA97F) or
232
+ (cp >= 0xAC00 and cp <= 0xD7AF) or
233
+ (cp >= 0xD7B0 and cp <= 0xD7FF);
234
+ }
235
+
236
+ inline fn classifyWordClass(cp: u21) WordClass {
237
+ if (cp <= 0x7F) {
238
+ return if (isAsciiWordByte(@intCast(cp))) .ascii_word else .other;
239
+ }
240
+ if (isCjkWordCodepoint(cp)) return .cjk_word;
241
+ return .other;
242
+ }
243
+
244
+ pub inline fn isWordCodepoint(cp: u21) bool {
245
+ return classifyWordClass(cp) != .other;
246
+ }
247
+
248
+ inline fn isCjkAsciiTransition(prev_class: WordClass, curr_class: WordClass) bool {
249
+ return (prev_class == .cjk_word and curr_class == .ascii_word) or
250
+ (prev_class == .ascii_word and curr_class == .cjk_word);
251
+ }
252
+
253
+ // Nothing needed here - using uucode.grapheme.isBreak directly
254
+
255
+ pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method: WidthMethod) !void {
256
+ // This function clears previous results and writes fresh break points.
257
+ // Callers should treat `result.breaks` as replaced after the call.
258
+ _ = width_method; // Currently unused, but kept for API consistency
259
+ result.reset();
260
+ const vector_len = 16;
261
+
262
+ var pos: usize = 0;
263
+ var char_offset: u32 = 0;
264
+ var prev_cp: ?u21 = null; // Track previous codepoint for grapheme detection
265
+ var break_state: uucode.grapheme.BreakState = .default;
266
+ // We keep track of the current grapheme so we can add a break at
267
+ // CJK<->ASCII transitions. The break is emitted at the previous grapheme,
268
+ // so callers that add grapheme width land exactly at the run boundary.
269
+ var have_current_grapheme = false;
270
+ var current_grapheme_byte_offset: u32 = 0;
271
+ var current_grapheme_char_offset: u32 = 0;
272
+ var current_grapheme_class: WordClass = .other;
273
+
274
+ while (pos + vector_len <= text.len) {
275
+ const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
276
+ const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
277
+ const is_non_ascii = chunk >= ascii_threshold;
278
+
279
+ // Fast path: all ASCII
280
+ if (!@reduce(.Or, is_non_ascii)) {
281
+ const first_class = classifyWordClass(text[pos]);
282
+ if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, first_class)) {
283
+ try result.breaks.append(result.allocator, .{
284
+ .byte_offset = current_grapheme_byte_offset,
285
+ .char_offset = current_grapheme_char_offset,
286
+ });
287
+ }
288
+
289
+ // Use SIMD to find break characters
290
+ var match_mask: @Vector(vector_len, bool) = @splat(false);
291
+
292
+ // Check whitespace
293
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(' ')));
294
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('\t')));
295
+
296
+ // Check dashes and slashes
297
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('-')));
298
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('/')));
299
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('\\')));
300
+
301
+ // Check punctuation
302
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('.')));
303
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(',')));
304
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(';')));
305
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(':')));
306
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('!')));
307
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('?')));
308
+
309
+ // Check brackets
310
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('(')));
311
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(')')));
312
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('[')));
313
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(']')));
314
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('{')));
315
+ match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('}')));
316
+
317
+ // Convert boolean mask to integer bitmask for faster iteration
318
+ var bitmask: u16 = 0;
319
+ inline for (0..vector_len) |i| {
320
+ if (match_mask[i]) {
321
+ bitmask |= @as(u16, 1) << @intCast(i);
322
+ }
323
+ }
324
+
325
+ // Use bit manipulation to extract positions
326
+ while (bitmask != 0) {
327
+ const bit_pos = @ctz(bitmask);
328
+ try result.breaks.append(result.allocator, .{
329
+ .byte_offset = @intCast(pos + bit_pos),
330
+ .char_offset = char_offset + @as(u32, @intCast(bit_pos)),
331
+ });
332
+ bitmask &= bitmask - 1;
333
+ }
334
+
335
+ pos += vector_len;
336
+ const block_start_char_offset = char_offset;
337
+ char_offset += vector_len;
338
+ prev_cp = text[pos - 1]; // Last ASCII char
339
+ break_state = .default;
340
+ have_current_grapheme = true;
341
+ current_grapheme_byte_offset = @intCast(pos - 1);
342
+ current_grapheme_char_offset = block_start_char_offset + (vector_len - 1);
343
+ current_grapheme_class = classifyWordClass(text[pos - 1]);
344
+ continue;
345
+ }
346
+
347
+ // Slow path: mixed ASCII/non-ASCII - need grapheme-aware counting
348
+ var i: usize = 0;
349
+ while (i < vector_len) {
350
+ const b0 = text[pos + i];
351
+ if (b0 < 0x80) {
352
+ const curr_cp: u21 = b0;
353
+
354
+ // Check if this starts a new grapheme cluster
355
+ // Skip invalid/replacement codepoints or codepoints that might be outside the grapheme table range
356
+ const is_break = if (curr_cp == 0xFFFD or curr_cp > 0x10FFFF) true else if (prev_cp) |p| blk: {
357
+ if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
358
+ break :blk uucode.grapheme.isBreak(p, curr_cp, &break_state);
359
+ } else true;
360
+
361
+ if (is_break) {
362
+ const curr_class = classifyWordClass(curr_cp);
363
+ if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
364
+ try result.breaks.append(result.allocator, .{
365
+ .byte_offset = current_grapheme_byte_offset,
366
+ .char_offset = current_grapheme_char_offset,
367
+ });
368
+ }
369
+ have_current_grapheme = true;
370
+ current_grapheme_byte_offset = @intCast(pos + i);
371
+ current_grapheme_char_offset = char_offset;
372
+ current_grapheme_class = curr_class;
373
+ }
374
+
375
+ if (isAsciiWrapBreak(b0)) {
376
+ try result.breaks.append(result.allocator, .{
377
+ .byte_offset = @intCast(pos + i),
378
+ .char_offset = char_offset,
379
+ });
380
+ }
381
+ i += 1;
382
+ if (is_break) {
383
+ char_offset += 1;
384
+ }
385
+ prev_cp = curr_cp;
386
+ } else {
387
+ const dec = decodeUtf8Unchecked(text, pos + i);
388
+ if (pos + i + dec.len > text.len) break;
389
+ if (pos + i + dec.len > pos + vector_len) break;
390
+
391
+ // Check if this starts a new grapheme cluster
392
+ // Skip invalid/replacement codepoints or codepoints that might be outside the grapheme table range
393
+ const is_break = if (dec.cp == 0xFFFD or dec.cp > 0x10FFFF) true else if (prev_cp) |p| blk: {
394
+ if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
395
+ break :blk uucode.grapheme.isBreak(p, dec.cp, &break_state);
396
+ } else true;
397
+
398
+ if (is_break) {
399
+ const curr_class = classifyWordClass(dec.cp);
400
+ if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
401
+ try result.breaks.append(result.allocator, .{
402
+ .byte_offset = current_grapheme_byte_offset,
403
+ .char_offset = current_grapheme_char_offset,
404
+ });
405
+ }
406
+ have_current_grapheme = true;
407
+ current_grapheme_byte_offset = @intCast(pos + i);
408
+ current_grapheme_char_offset = char_offset;
409
+ current_grapheme_class = curr_class;
410
+ }
411
+
412
+ if (isUnicodeWrapBreak(dec.cp)) {
413
+ try result.breaks.append(result.allocator, .{
414
+ .byte_offset = @intCast(pos + i),
415
+ .char_offset = char_offset,
416
+ });
417
+ }
418
+ i += dec.len;
419
+ if (is_break) {
420
+ char_offset += 1;
421
+ }
422
+ prev_cp = dec.cp;
423
+ }
424
+ }
425
+ pos += i;
426
+ }
427
+
428
+ // Tail
429
+ var i: usize = pos;
430
+ while (i < text.len) {
431
+ const b0 = text[i];
432
+ if (b0 < 0x80) {
433
+ const curr_cp: u21 = b0;
434
+ const is_break = if (prev_cp) |p| blk: {
435
+ if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
436
+ break :blk uucode.grapheme.isBreak(p, curr_cp, &break_state);
437
+ } else true;
438
+
439
+ if (is_break) {
440
+ const curr_class = classifyWordClass(curr_cp);
441
+ if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
442
+ try result.breaks.append(result.allocator, .{
443
+ .byte_offset = current_grapheme_byte_offset,
444
+ .char_offset = current_grapheme_char_offset,
445
+ });
446
+ }
447
+ have_current_grapheme = true;
448
+ current_grapheme_byte_offset = @intCast(i);
449
+ current_grapheme_char_offset = char_offset;
450
+ current_grapheme_class = curr_class;
451
+ }
452
+
453
+ if (isAsciiWrapBreak(b0)) {
454
+ try result.breaks.append(result.allocator, .{
455
+ .byte_offset = @intCast(i),
456
+ .char_offset = char_offset,
457
+ });
458
+ }
459
+ i += 1;
460
+ if (is_break) {
461
+ char_offset += 1;
462
+ }
463
+ prev_cp = curr_cp;
464
+ } else {
465
+ const dec = decodeUtf8Unchecked(text, i);
466
+ if (i + dec.len > text.len) break;
467
+
468
+ const is_break = if (dec.cp == 0xFFFD or dec.cp > 0x10FFFF) true else if (prev_cp) |p| blk: {
469
+ if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
470
+ break :blk uucode.grapheme.isBreak(p, dec.cp, &break_state);
471
+ } else true;
472
+
473
+ if (is_break) {
474
+ const curr_class = classifyWordClass(dec.cp);
475
+ if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
476
+ try result.breaks.append(result.allocator, .{
477
+ .byte_offset = current_grapheme_byte_offset,
478
+ .char_offset = current_grapheme_char_offset,
479
+ });
480
+ }
481
+ have_current_grapheme = true;
482
+ current_grapheme_byte_offset = @intCast(i);
483
+ current_grapheme_char_offset = char_offset;
484
+ current_grapheme_class = curr_class;
485
+ }
486
+
487
+ if (isUnicodeWrapBreak(dec.cp)) {
488
+ try result.breaks.append(result.allocator, .{
489
+ .byte_offset = @intCast(i),
490
+ .char_offset = char_offset,
491
+ });
492
+ }
493
+ i += dec.len;
494
+ if (is_break) {
495
+ char_offset += 1;
496
+ }
497
+ prev_cp = dec.cp;
498
+ }
499
+ }
500
+ }
501
+
502
+ pub fn findTabStops(text: []const u8, result: *TabStopResult) !void {
503
+ result.reset();
504
+ const vector_len = 16;
505
+ const Vec = @Vector(vector_len, u8);
506
+
507
+ const vTab: Vec = @splat('\t');
508
+
509
+ var pos: usize = 0;
510
+
511
+ while (pos + vector_len <= text.len) {
512
+ const chunk: Vec = text[pos..][0..vector_len].*;
513
+ const cmp_tab = chunk == vTab;
514
+
515
+ if (@reduce(.Or, cmp_tab)) {
516
+ var i: usize = 0;
517
+ while (i < vector_len) : (i += 1) {
518
+ if (text[pos + i] == '\t') {
519
+ try result.positions.append(result.allocator, pos + i);
520
+ }
521
+ }
522
+ }
523
+ pos += vector_len;
524
+ }
525
+
526
+ while (pos < text.len) : (pos += 1) {
527
+ if (text[pos] == '\t') {
528
+ try result.positions.append(result.allocator, pos);
529
+ }
530
+ }
531
+ }
532
+
533
+ pub fn findLineBreaks(text: []const u8, result: *LineBreakResult) !void {
534
+ result.reset();
535
+ const vector_len = 16; // Use 16-byte vectors (SSE2/NEON compatible)
536
+ const Vec = @Vector(vector_len, u8);
537
+
538
+ // Prepare vector constants for '\n' and '\r'
539
+ const vNL: Vec = @splat('\n');
540
+ const vCR: Vec = @splat('\r');
541
+
542
+ var pos: usize = 0;
543
+ var prev_was_cr = false; // Track if previous chunk ended with \r
544
+
545
+ // Process full vector chunks
546
+ while (pos + vector_len <= text.len) {
547
+ const chunk: Vec = text[pos..][0..vector_len].*;
548
+ const cmp_nl = chunk == vNL;
549
+ const cmp_cr = chunk == vCR;
550
+
551
+ // Check if any newline or CR found
552
+ if (@reduce(.Or, cmp_nl) or @reduce(.Or, cmp_cr)) {
553
+ // Found a match, process this chunk
554
+ var i: usize = 0;
555
+ while (i < vector_len) : (i += 1) {
556
+ const absolute_index = pos + i;
557
+ const b = text[absolute_index];
558
+ if (b == '\n') {
559
+ // Skip if this is the \n part of a CRLF split across chunks
560
+ if (i == 0 and prev_was_cr) {
561
+ prev_was_cr = false;
562
+ continue;
563
+ }
564
+ // Check if this is part of CRLF
565
+ const kind: LineBreakKind = if (absolute_index > 0 and text[absolute_index - 1] == '\r') .CRLF else .LF;
566
+ try result.breaks.append(result.allocator, .{ .pos = absolute_index, .kind = kind });
567
+ } else if (b == '\r') {
568
+ // Check for CRLF
569
+ if (absolute_index + 1 < text.len and text[absolute_index + 1] == '\n') {
570
+ try result.breaks.append(result.allocator, .{ .pos = absolute_index + 1, .kind = .CRLF });
571
+ i += 1; // Skip the \n in next iteration
572
+ } else {
573
+ try result.breaks.append(result.allocator, .{ .pos = absolute_index, .kind = .CR });
574
+ }
575
+ }
576
+ }
577
+ // Update prev_was_cr for next chunk
578
+ prev_was_cr = (text[pos + vector_len - 1] == '\r');
579
+ } else {
580
+ prev_was_cr = false;
581
+ }
582
+ pos += vector_len;
583
+ }
584
+
585
+ // Handle remaining bytes with scalar code
586
+ while (pos < text.len) : (pos += 1) {
587
+ const b = text[pos];
588
+ if (b == '\n') {
589
+ // Handle CRLF split at chunk boundary
590
+ if (pos > 0 and text[pos - 1] == '\r') {
591
+ // Already recorded at pos - 1 or will be skipped
592
+ if (prev_was_cr) {
593
+ prev_was_cr = false;
594
+ continue;
595
+ }
596
+ }
597
+ const kind: LineBreakKind = if (pos > 0 and text[pos - 1] == '\r') .CRLF else .LF;
598
+ try result.breaks.append(result.allocator, .{ .pos = pos, .kind = kind });
599
+ } else if (b == '\r') {
600
+ if (pos + 1 < text.len and text[pos + 1] == '\n') {
601
+ try result.breaks.append(result.allocator, .{ .pos = pos + 1, .kind = .CRLF });
602
+ pos += 1;
603
+ } else {
604
+ try result.breaks.append(result.allocator, .{ .pos = pos, .kind = .CR });
605
+ }
606
+ }
607
+ prev_was_cr = false;
608
+ }
609
+ }
610
+
611
+ pub const WrapByWidthResult = struct {
612
+ byte_offset: u32,
613
+ grapheme_count: u32,
614
+ columns_used: u32,
615
+ };
616
+
617
+ pub const PosByWidthResult = struct {
618
+ byte_offset: u32,
619
+ grapheme_count: u32,
620
+ columns_used: u32,
621
+ };
622
+
623
+ pub inline fn eastAsianWidth(cp: u21) u32 {
624
+ if (cp > 0x10FFFF) return 0;
625
+ const eaw = uucode.get(.east_asian_width, cp);
626
+ const width = eawToWidth(cp, eaw);
627
+ return if (width > 0) @intCast(width) else 0;
628
+ }
629
+
630
+ /// Calculate width from east asian width property and Unicode properties
631
+ /// Returns -1 for control characters (they don't contribute to width)
632
+ inline fn eawToWidth(cp: u21, eaw: uucode.types.EastAsianWidth) i16 {
633
+ if (cp == 0) return 0;
634
+ if (cp < 32 or (cp >= 0x7F and cp < 0xA0)) return -1;
635
+
636
+ const gc = uucode.get(.general_category, cp);
637
+ switch (gc) {
638
+ .mark_nonspacing, .mark_spacing_combining, .mark_enclosing => return 0,
639
+ else => {},
640
+ }
641
+
642
+ if (cp == 0x200B) return 0;
643
+ if (cp == 0x200C) return 0;
644
+ if (cp == 0x200D) return 0;
645
+ if (cp == 0x2060) return 0;
646
+ if (cp == 0x034F) return 0;
647
+ if (cp == 0xFEFF) return 0;
648
+ if (cp >= 0x180B and cp <= 0x180D) return 0;
649
+ if (cp >= 0xFE00 and cp <= 0xFE0F) return 0;
650
+ if (cp >= 0xE0100 and cp <= 0xE01EF) return 0;
651
+
652
+ if (eaw == .fullwidth or eaw == .wide) return 2;
653
+
654
+ if (cp >= 0x1F000 and cp <= 0x1F02B) return 2;
655
+ if (cp >= 0x1F030 and cp <= 0x1F093) return 2;
656
+ if (cp >= 0x1F0A0 and cp <= 0x1F0AE) return 2;
657
+ if (cp >= 0x1F0B1 and cp <= 0x1F0BF) return 2;
658
+ if (cp >= 0x1F0C1 and cp <= 0x1F0CF) return 2;
659
+ if (cp >= 0x1F0D1 and cp <= 0x1F0F5) return 2;
660
+
661
+ if (cp == 0x231A or cp == 0x231B) return 2;
662
+ if (cp == 0x2329 or cp == 0x232A) return 2;
663
+ if (cp >= 0x23E9 and cp <= 0x23EC) return 2;
664
+ if (cp == 0x23F0 or cp == 0x23F3) return 2;
665
+ if (cp >= 0x25FD and cp <= 0x25FE) return 2;
666
+
667
+ if (cp >= 0x2614 and cp <= 0x2615) return 2;
668
+ if (cp == 0x2622 or cp == 0x2623) return 2;
669
+ if (cp >= 0x2630 and cp <= 0x2637) return 2;
670
+ if (cp >= 0x2648 and cp <= 0x2653) return 2;
671
+ if (cp == 0x267F or cp == 0x2693 or cp == 0x269B) return 2;
672
+ if (cp == 0x26A0 or cp == 0x26A1) return 2;
673
+ if (cp >= 0x26AA and cp <= 0x26AB) return 2;
674
+ if (cp >= 0x26BD and cp <= 0x26BE) return 2;
675
+ if (cp >= 0x26C4 and cp <= 0x26C5) return 2;
676
+ if (cp == 0x26CE or cp == 0x26D1 or cp == 0x26D4) return 2;
677
+ if (cp == 0x26EA or cp == 0x26F2 or cp == 0x26F3) return 2;
678
+ if (cp == 0x26F5 or cp == 0x26FA or cp == 0x26FD) return 2;
679
+
680
+ if (cp == 0x203C or cp == 0x2049) return 2;
681
+ if (cp == 0x2705 or cp >= 0x270A and cp <= 0x270B) return 2;
682
+ if (cp == 0x2728 or cp == 0x274C or cp == 0x274E) return 2;
683
+ if (cp >= 0x2753 and cp <= 0x2755) return 2;
684
+ if (cp == 0x2757) return 2;
685
+ if (cp >= 0x2760 and cp <= 0x2767) return 2;
686
+ if (cp >= 0x2795 and cp <= 0x2797) return 2;
687
+ if (cp == 0x27B0 or cp == 0x27BF) return 2;
688
+ if (cp >= 0x2B1B and cp <= 0x2B1C) return 2;
689
+ if (cp >= 0x2B50 and cp <= 0x2B50) return 2;
690
+ if (cp >= 0x2B55 and cp <= 0x2B55) return 2;
691
+
692
+ if (cp >= 0x1F300 and cp <= 0x1F320) return 2;
693
+ if (cp >= 0x1F32D and cp <= 0x1F335) return 2;
694
+ if (cp >= 0x1F337 and cp <= 0x1F37C) return 2;
695
+ if (cp >= 0x1F37E and cp <= 0x1F393) return 2;
696
+ if (cp >= 0x1F3A0 and cp <= 0x1F3CA) return 2;
697
+ if (cp >= 0x1F3CF and cp <= 0x1F3D3) return 2;
698
+ if (cp >= 0x1F3E0 and cp <= 0x1F3F0) return 2;
699
+ if (cp == 0x1F3F4) return 2;
700
+ if (cp >= 0x1F3F8 and cp <= 0x1F3FF) return 2;
701
+ if (cp >= 0x1F400 and cp <= 0x1F43E) return 2;
702
+ if (cp == 0x1F440) return 2;
703
+ if (cp >= 0x1F442 and cp <= 0x1F4FC) return 2;
704
+ if (cp >= 0x1F4FF and cp <= 0x1F6C5) return 2;
705
+ if (cp == 0x1F6CC) return 2;
706
+ if (cp >= 0x1F6D0 and cp <= 0x1F6D2) return 2;
707
+ if (cp >= 0x1F6D5 and cp <= 0x1F6D7) return 2;
708
+ if (cp >= 0x1F6DC and cp <= 0x1F6DF) return 2;
709
+ if (cp >= 0x1F6EB and cp <= 0x1F6EC) return 2;
710
+ if (cp >= 0x1F6F4 and cp <= 0x1F6FC) return 2;
711
+ if (cp >= 0x1F700 and cp <= 0x1F773) return 2;
712
+ if (cp >= 0x1F780 and cp <= 0x1F7D8) return 2;
713
+ if (cp >= 0x1F7E0 and cp <= 0x1F7EB) return 2;
714
+ if (cp >= 0x1F800 and cp <= 0x1F80B) return 2;
715
+ if (cp >= 0x1F810 and cp <= 0x1F847) return 2;
716
+ if (cp >= 0x1F850 and cp <= 0x1F859) return 2;
717
+ if (cp >= 0x1F860 and cp <= 0x1F887) return 2;
718
+ if (cp >= 0x1F890 and cp <= 0x1F8AD) return 2;
719
+ if (cp >= 0x1F8B0 and cp <= 0x1F8B1) return 2;
720
+ if (cp >= 0x1F90C and cp <= 0x1F93A) return 2;
721
+ if (cp >= 0x1F93C and cp <= 0x1F945) return 2;
722
+ if (cp >= 0x1F947 and cp <= 0x1FA53) return 2;
723
+ if (cp >= 0x1FA60 and cp <= 0x1FA6D) return 2;
724
+ if (cp >= 0x1FA70 and cp <= 0x1FA74) return 2;
725
+ if (cp >= 0x1FA78 and cp <= 0x1FA7C) return 2;
726
+ if (cp >= 0x1FA80 and cp <= 0x1FA86) return 2;
727
+ if (cp >= 0x1FA90 and cp <= 0x1FAAC) return 2;
728
+ if (cp >= 0x1FAB0 and cp <= 0x1FABA) return 2;
729
+ if (cp >= 0x1FAC0 and cp <= 0x1FAC5) return 2;
730
+ if (cp >= 0x1FAD0 and cp <= 0x1FAD9) return 2;
731
+ if (cp >= 0x1FAE0 and cp <= 0x1FAE7) return 2;
732
+ if (cp >= 0x1FAF0 and cp <= 0x1FAF8) return 2;
733
+
734
+ return 1;
735
+ }
736
+
737
+ /// Calculate the display width of a byte in columns
738
+ /// Used for ASCII-only fast paths
739
+ inline fn asciiCharWidth(byte: u8, tab_width: u8) u32 {
740
+ if (byte == '\t') {
741
+ return tab_width;
742
+ } else if (byte >= 32 and byte <= 126) {
743
+ return 1;
744
+ }
745
+ return 0;
746
+ }
747
+
748
+ /// Calculate the display width of a character (byte or codepoint) in columns
749
+ inline fn charWidth(byte: u8, codepoint: u21, tab_width: u8) u32 {
750
+ if (byte == '\t') {
751
+ return tab_width;
752
+ } else if (byte < 0x80 and byte >= 32 and byte <= 126) {
753
+ return 1;
754
+ } else if (byte >= 0x80) {
755
+ const eaw = uucode.get(.east_asian_width, codepoint);
756
+ const w = eawToWidth(codepoint, eaw);
757
+ return if (w > 0) @intCast(w) else 0;
758
+ }
759
+ return 0;
760
+ }
761
+
762
+ /// Check if a codepoint is valid for grapheme break detection
763
+ inline fn isValidCodepoint(cp: u21) bool {
764
+ return cp != 0xFFFD and cp <= 0x10FFFF;
765
+ }
766
+
767
+ /// Check if there's a grapheme break between two codepoints
768
+ /// - wcwidth mode: use Unicode grapheme clustering for proper rendering,
769
+ /// but calculate width using wcwidth (sum of codepoint widths)
770
+ /// - no_zwj mode: use grapheme breaks but treat ZWJ as a break (ignore joining)
771
+ /// - unicode mode: use standard grapheme cluster segmentation
772
+ inline fn isGraphemeBreak(prev_cp: ?u21, curr_cp: u21, break_state: *uucode.grapheme.BreakState, width_method: WidthMethod) bool {
773
+ // wcwidth mode uses Unicode grapheme clustering for proper rendering
774
+ // (ZWJ sequences, skin tone modifiers stay together), but width is
775
+ // calculated using wcwidth semantics (sum of codepoint widths)
776
+ if (width_method == .wcwidth) {
777
+ if (prev_cp == null) return true;
778
+
779
+ if (!isValidCodepoint(curr_cp)) return true;
780
+ if (!isValidCodepoint(prev_cp.?)) return true;
781
+ return uucode.grapheme.isBreak(prev_cp.?, curr_cp, break_state);
782
+ }
783
+
784
+ if (!isValidCodepoint(curr_cp)) return true;
785
+
786
+ // In no_zwj mode, treat ZWJ (U+200D) as NOT joining characters
787
+ // When we see ZWJ after a character, it's part of that character's grapheme
788
+ // But when we see a character after ZWJ, it starts a new grapheme
789
+ if (width_method == .no_zwj) {
790
+ const ZWJ: u21 = 0x200D;
791
+ if (prev_cp) |p| {
792
+ // If previous was ZWJ, current starts a new grapheme
793
+ // Don't call uucode.grapheme.isBreak because it will say no break
794
+ if (p == ZWJ) {
795
+ // Reset break state since we're forcing a break
796
+ break_state.* = .default;
797
+ return true;
798
+ }
799
+ }
800
+ // If current is ZWJ, don't break yet - it's part of previous grapheme
801
+ // (will have width 0 anyway)
802
+ }
803
+
804
+ if (prev_cp) |p| {
805
+ if (!isValidCodepoint(p)) return true;
806
+ return uucode.grapheme.isBreak(p, curr_cp, break_state);
807
+ }
808
+ return true;
809
+ }
810
+
811
+ /// State for accumulating grapheme cluster width
812
+ const GraphemeWidthState = struct {
813
+ width: u32 = 0,
814
+ has_width: bool = false,
815
+ is_regional_indicator_pair: bool = false,
816
+ has_vs16: bool = false,
817
+ has_indic_virama: bool = false,
818
+ width_method: WidthMethod,
819
+
820
+ /// Initialize state with the first codepoint of a grapheme cluster
821
+ inline fn init(first_cp: u21, first_width: u32, width_method: WidthMethod) GraphemeWidthState {
822
+ return .{
823
+ .width = first_width,
824
+ .has_width = (first_width > 0),
825
+ .is_regional_indicator_pair = (first_cp >= 0x1F1E6 and first_cp <= 0x1F1FF),
826
+ .has_vs16 = false,
827
+ .has_indic_virama = false,
828
+ .width_method = width_method,
829
+ };
830
+ }
831
+
832
+ /// Add a codepoint to the current grapheme cluster
833
+ inline fn addCodepoint(self: *GraphemeWidthState, cp: u21, cp_width: u32) void {
834
+ // wcwidth mode: sum all codepoint widths (tmux-style)
835
+ if (self.width_method == .wcwidth) {
836
+ const eaw = uucode.get(.east_asian_width, cp);
837
+ const w = eawToWidth(cp, eaw);
838
+ if (w > 0) {
839
+ self.width += @intCast(w);
840
+ self.has_width = true;
841
+ }
842
+ return;
843
+ }
844
+
845
+ // unicode and no_zwj modes: use grapheme-aware width
846
+ const is_ri = (cp >= 0x1F1E6 and cp <= 0x1F1FF);
847
+ const is_vs16 = (cp == 0xFE0F); // Variation Selector-16 (emoji presentation)
848
+
849
+ const gc = uucode.get(.general_category, cp);
850
+ const is_virama = gc == .mark_nonspacing;
851
+
852
+ const is_devanagari_ra = (cp == 0x0930);
853
+
854
+ const is_devanagari_base = (cp >= 0x0915 and cp <= 0x0939) or (cp >= 0x0958 and cp <= 0x095F);
855
+
856
+ if (is_vs16) {
857
+ self.has_vs16 = true;
858
+ if (self.has_width and self.width == 1) {
859
+ self.width = 2;
860
+ }
861
+ return;
862
+ }
863
+
864
+ if (is_virama) {
865
+ self.has_indic_virama = true;
866
+ return;
867
+ }
868
+
869
+ if (self.is_regional_indicator_pair and is_ri) {
870
+ self.width += cp_width;
871
+ self.has_width = true;
872
+ } else if (!self.has_width and cp_width > 0) {
873
+ self.width = cp_width;
874
+ self.has_width = true;
875
+ } else if (self.has_width and self.has_indic_virama and is_devanagari_base and cp_width > 0) {
876
+ if (!is_devanagari_ra) {
877
+ self.width += cp_width;
878
+ }
879
+ self.has_indic_virama = false;
880
+ }
881
+ }
882
+ };
883
+
884
+ const ClusterState = struct {
885
+ columns_used: u32,
886
+ grapheme_count: u32,
887
+ cluster_width: u32,
888
+ cluster_start: usize,
889
+ prev_cp: ?u21,
890
+ break_state: uucode.grapheme.BreakState,
891
+ width_state: GraphemeWidthState,
892
+ width_method: WidthMethod,
893
+ cluster_started: bool,
894
+
895
+ fn init(width_method: WidthMethod) ClusterState {
896
+ const dummy_width_state = GraphemeWidthState.init(0, 0, width_method);
897
+ return .{
898
+ .columns_used = 0,
899
+ .grapheme_count = 0,
900
+ .cluster_width = 0,
901
+ .cluster_start = 0,
902
+ .prev_cp = null,
903
+ .break_state = .default,
904
+ .width_state = dummy_width_state,
905
+ .width_method = width_method,
906
+ .cluster_started = false,
907
+ };
908
+ }
909
+ };
910
+
911
+ /// Handle grapheme cluster boundary when wrapping by width (stops BEFORE exceeding limit)
912
+ /// Returns true if we should stop (limit exceeded)
913
+ inline fn handleClusterForWrap(
914
+ state: *ClusterState,
915
+ is_break: bool,
916
+ new_cluster_start: usize,
917
+ max_columns: u32,
918
+ ) bool {
919
+ if (is_break) {
920
+ if (state.prev_cp != null) {
921
+ if (state.columns_used + state.cluster_width > max_columns) {
922
+ return true; // Signal to stop
923
+ }
924
+ state.columns_used += state.cluster_width;
925
+ state.grapheme_count += 1;
926
+ }
927
+ state.cluster_width = 0;
928
+ state.cluster_start = new_cluster_start;
929
+ state.cluster_started = false;
930
+ }
931
+ return false;
932
+ }
933
+
934
+ /// Handle grapheme cluster boundary when finding position (snaps to grapheme boundaries)
935
+ /// Returns true if we should stop
936
+ ///
937
+ /// Snapping behavior:
938
+ /// - include_start_before=true (for selection end): Include graphemes that START at or before max_columns
939
+ /// If max_columns=3 and grapheme occupies columns [2-3], include it (starts at 2 <= 3)
940
+ /// This snaps forward to include the whole grapheme even if max_columns points to its middle
941
+ /// - include_start_before=false (for selection start): Only include graphemes that END before max_columns
942
+ /// If max_columns=3 and grapheme occupies columns [2-3], exclude it (ends at 4 > 3)
943
+ /// This snaps backward to exclude wide graphemes that would cross max_columns
944
+ inline fn handleClusterForPos(
945
+ state: *ClusterState,
946
+ is_break: bool,
947
+ new_cluster_start: usize,
948
+ max_columns: u32,
949
+ include_start_before: bool,
950
+ ) bool {
951
+ if (is_break) {
952
+ if (state.prev_cp != null) {
953
+ const cluster_start_col = state.columns_used;
954
+ const cluster_end_col = state.columns_used + state.cluster_width;
955
+
956
+ if (include_start_before) {
957
+ if (cluster_start_col >= max_columns) {
958
+ return true;
959
+ }
960
+ state.columns_used = cluster_end_col;
961
+ state.grapheme_count += 1;
962
+ } else {
963
+ if (cluster_end_col > max_columns) {
964
+ return true; // Signal to stop (don't include this grapheme)
965
+ }
966
+ state.columns_used = cluster_end_col;
967
+ }
968
+ }
969
+ state.cluster_width = 0;
970
+ state.cluster_start = new_cluster_start;
971
+ state.cluster_started = false;
972
+ }
973
+ return false;
974
+ }
975
+
976
+ /// Find wrap position by width - proxy function that dispatches based on width_method
977
+ pub fn findWrapPosByWidth(
978
+ text: []const u8,
979
+ max_columns: u32,
980
+ tab_width: u8,
981
+ isASCIIOnly: bool,
982
+ width_method: WidthMethod,
983
+ ) WrapByWidthResult {
984
+ switch (width_method) {
985
+ .unicode, .no_zwj => return findWrapPosByWidthUnicode(text, max_columns, tab_width, isASCIIOnly, width_method),
986
+ .wcwidth => return findWrapPosByWidthWCWidth(text, max_columns, tab_width, isASCIIOnly),
987
+ }
988
+ }
989
+
990
+ /// Find wrap position by width using Unicode grapheme cluster segmentation
991
+ fn findWrapPosByWidthUnicode(
992
+ text: []const u8,
993
+ max_columns: u32,
994
+ tab_width: u8,
995
+ isASCIIOnly: bool,
996
+ width_method: WidthMethod,
997
+ ) WrapByWidthResult {
998
+ if (text.len == 0 or max_columns == 0) {
999
+ return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
1000
+ }
1001
+
1002
+ // ASCII-only fast path
1003
+ if (isASCIIOnly) {
1004
+ if (max_columns >= text.len) {
1005
+ return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
1006
+ } else {
1007
+ return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
1008
+ }
1009
+ }
1010
+
1011
+ const vector_len = 16;
1012
+ var pos: usize = 0;
1013
+ var state = ClusterState.init(width_method);
1014
+
1015
+ while (pos + vector_len <= text.len) {
1016
+ const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
1017
+ const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
1018
+ const is_non_ascii = chunk >= ascii_threshold;
1019
+
1020
+ if (!@reduce(.Or, is_non_ascii)) {
1021
+ // All ASCII
1022
+ var i: usize = 0;
1023
+ while (i < vector_len) : (i += 1) {
1024
+ const b = text[pos + i];
1025
+ const curr_cp: u21 = b;
1026
+ const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1027
+
1028
+ if (handleClusterForWrap(&state, is_break, pos + i, max_columns)) {
1029
+ return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1030
+ }
1031
+
1032
+ const cp_width = asciiCharWidth(b, tab_width);
1033
+ if (!state.cluster_started) {
1034
+ state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1035
+ state.cluster_width = cp_width;
1036
+ state.cluster_started = true;
1037
+ } else {
1038
+ state.width_state.addCodepoint(curr_cp, cp_width);
1039
+ state.cluster_width = state.width_state.width;
1040
+ }
1041
+ state.prev_cp = curr_cp;
1042
+ }
1043
+ pos += vector_len;
1044
+ continue;
1045
+ }
1046
+
1047
+ // Mixed ASCII/non-ASCII - process rest of chunk
1048
+ var i: usize = 0;
1049
+ while (i < vector_len and pos + i < text.len) {
1050
+ const b0 = text[pos + i];
1051
+ const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos + i).cp;
1052
+ const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos + i).len;
1053
+
1054
+ if (pos + i + cp_len > text.len) break;
1055
+
1056
+ const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1057
+
1058
+ if (handleClusterForWrap(&state, is_break, pos + i, max_columns)) {
1059
+ return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1060
+ }
1061
+
1062
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1063
+ if (!state.cluster_started) {
1064
+ state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1065
+ state.cluster_width = cp_width;
1066
+ state.cluster_started = true;
1067
+ } else {
1068
+ state.width_state.addCodepoint(curr_cp, cp_width);
1069
+ state.cluster_width = state.width_state.width;
1070
+ }
1071
+ state.prev_cp = curr_cp;
1072
+ i += cp_len;
1073
+ }
1074
+ pos += i; // Advance by how much we actually processed
1075
+ }
1076
+
1077
+ // Tail
1078
+ while (pos < text.len) {
1079
+ const b0 = text[pos];
1080
+ const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos).cp;
1081
+ const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1082
+
1083
+ const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1084
+
1085
+ if (handleClusterForWrap(&state, is_break, pos, max_columns)) {
1086
+ return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1087
+ }
1088
+
1089
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1090
+ if (!state.cluster_started) {
1091
+ state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1092
+ state.cluster_width = cp_width;
1093
+ state.cluster_started = true;
1094
+ } else {
1095
+ state.width_state.addCodepoint(curr_cp, cp_width);
1096
+ state.cluster_width = state.width_state.width;
1097
+ }
1098
+ state.prev_cp = curr_cp;
1099
+ pos += cp_len;
1100
+ }
1101
+
1102
+ // Final cluster
1103
+ if (state.prev_cp != null and state.cluster_width > 0) {
1104
+ if (state.columns_used + state.cluster_width > max_columns) {
1105
+ return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1106
+ }
1107
+ state.columns_used += state.cluster_width;
1108
+ state.grapheme_count += 1;
1109
+ }
1110
+
1111
+ return .{ .byte_offset = @intCast(text.len), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1112
+ }
1113
+
1114
+ /// Find wrap position by width using wcwidth-style codepoint-by-codepoint processing
1115
+ fn findWrapPosByWidthWCWidth(
1116
+ text: []const u8,
1117
+ max_columns: u32,
1118
+ tab_width: u8,
1119
+ isASCIIOnly: bool,
1120
+ ) WrapByWidthResult {
1121
+ if (text.len == 0 or max_columns == 0) {
1122
+ return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
1123
+ }
1124
+
1125
+ // ASCII-only fast path
1126
+ if (isASCIIOnly) {
1127
+ if (max_columns >= text.len) {
1128
+ return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
1129
+ } else {
1130
+ return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
1131
+ }
1132
+ }
1133
+
1134
+ // Unicode path - process each codepoint independently
1135
+ var pos: usize = 0;
1136
+ var columns_used: u32 = 0;
1137
+ var codepoint_count: u32 = 0;
1138
+
1139
+ while (pos < text.len) {
1140
+ const b0 = text[pos];
1141
+ const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1142
+ const dec = decodeUtf8Unchecked(text, pos);
1143
+ if (pos + dec.len > text.len) break :blk 0xFFFD;
1144
+ break :blk dec.cp;
1145
+ };
1146
+ const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1147
+
1148
+ if (pos + cp_len > text.len) break;
1149
+
1150
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1151
+
1152
+ // In wcwidth mode, stop if we've already used max_columns
1153
+ // (don't continue adding zero-width chars after reaching limit)
1154
+ if (columns_used >= max_columns) {
1155
+ return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
1156
+ }
1157
+
1158
+ // Stop if adding this codepoint would exceed max_columns
1159
+ if (columns_used + cp_width > max_columns) {
1160
+ return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
1161
+ }
1162
+
1163
+ columns_used += cp_width;
1164
+ codepoint_count += 1;
1165
+ pos += cp_len;
1166
+ }
1167
+
1168
+ return .{ .byte_offset = @intCast(text.len), .grapheme_count = codepoint_count, .columns_used = columns_used };
1169
+ }
1170
+
1171
+ /// Find position by column width - proxy function that dispatches based on width_method
1172
+ /// - If include_start_before: include graphemes that START before max_columns (snap forward for selection end)
1173
+ /// This ensures that if max_columns points to the middle of a width=2 grapheme, we include the whole grapheme
1174
+ /// - If !include_start_before: exclude graphemes that START at or after max_columns (snap backward for selection start)
1175
+ /// This ensures that if max_columns points to the middle of a width=2 grapheme, we snap back to exclude it
1176
+ pub fn findPosByWidth(
1177
+ text: []const u8,
1178
+ max_columns: u32,
1179
+ tab_width: u8,
1180
+ isASCIIOnly: bool,
1181
+ include_start_before: bool,
1182
+ width_method: WidthMethod,
1183
+ ) PosByWidthResult {
1184
+ switch (width_method) {
1185
+ .unicode, .no_zwj => return findPosByWidthUnicode(text, max_columns, tab_width, isASCIIOnly, include_start_before, width_method),
1186
+ .wcwidth => return findPosByWidthWCWidth(text, max_columns, tab_width, isASCIIOnly, include_start_before),
1187
+ }
1188
+ }
1189
+
1190
+ /// Find position by column width using Unicode grapheme cluster segmentation
1191
+ fn findPosByWidthUnicode(
1192
+ text: []const u8,
1193
+ max_columns: u32,
1194
+ tab_width: u8,
1195
+ isASCIIOnly: bool,
1196
+ include_start_before: bool,
1197
+ width_method: WidthMethod,
1198
+ ) PosByWidthResult {
1199
+ if (text.len == 0 or max_columns == 0) {
1200
+ return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
1201
+ }
1202
+
1203
+ // ASCII-only fast path
1204
+ if (isASCIIOnly) {
1205
+ if (max_columns >= text.len) {
1206
+ return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
1207
+ } else {
1208
+ return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
1209
+ }
1210
+ }
1211
+
1212
+ const vector_len = 16;
1213
+ var pos: usize = 0;
1214
+ var state = ClusterState.init(width_method);
1215
+
1216
+ while (pos + vector_len <= text.len) {
1217
+ const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
1218
+ const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
1219
+ const is_non_ascii = chunk >= ascii_threshold;
1220
+
1221
+ if (!@reduce(.Or, is_non_ascii)) {
1222
+ // All ASCII
1223
+ var i: usize = 0;
1224
+ while (i < vector_len) : (i += 1) {
1225
+ const b = text[pos + i];
1226
+ const curr_cp: u21 = b;
1227
+ const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1228
+
1229
+ if (handleClusterForPos(&state, is_break, pos + i, max_columns, include_start_before)) {
1230
+ return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1231
+ }
1232
+
1233
+ const cp_width = asciiCharWidth(b, tab_width);
1234
+ if (!state.cluster_started) {
1235
+ state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1236
+ state.cluster_width = cp_width;
1237
+ state.cluster_started = true;
1238
+ } else {
1239
+ state.width_state.addCodepoint(curr_cp, cp_width);
1240
+ state.cluster_width = state.width_state.width;
1241
+ }
1242
+ state.prev_cp = curr_cp;
1243
+ }
1244
+ pos += vector_len;
1245
+ continue;
1246
+ }
1247
+
1248
+ // Mixed ASCII/non-ASCII - process rest of chunk
1249
+ var i: usize = 0;
1250
+ while (i < vector_len and pos + i < text.len) {
1251
+ const b0 = text[pos + i];
1252
+ const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos + i).cp;
1253
+ const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos + i).len;
1254
+
1255
+ if (pos + i + cp_len > text.len) break;
1256
+
1257
+ const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1258
+
1259
+ if (handleClusterForPos(&state, is_break, pos + i, max_columns, include_start_before)) {
1260
+ return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1261
+ }
1262
+
1263
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1264
+ if (!state.cluster_started) {
1265
+ state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1266
+ state.cluster_width = cp_width;
1267
+ state.cluster_started = true;
1268
+ } else {
1269
+ state.width_state.addCodepoint(curr_cp, cp_width);
1270
+ state.cluster_width = state.width_state.width;
1271
+ }
1272
+ state.prev_cp = curr_cp;
1273
+ i += cp_len;
1274
+ }
1275
+ pos += i; // Advance by how much we actually processed
1276
+ }
1277
+
1278
+ // Tail
1279
+ while (pos < text.len) {
1280
+ const b0 = text[pos];
1281
+ const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos).cp;
1282
+ const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1283
+
1284
+ const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1285
+
1286
+ if (handleClusterForPos(&state, is_break, pos, max_columns, include_start_before)) {
1287
+ return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1288
+ }
1289
+
1290
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1291
+ if (!state.cluster_started) {
1292
+ state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1293
+ state.cluster_width = cp_width;
1294
+ state.cluster_started = true;
1295
+ } else {
1296
+ state.width_state.addCodepoint(curr_cp, cp_width);
1297
+ state.cluster_width = state.width_state.width;
1298
+ }
1299
+ state.prev_cp = curr_cp;
1300
+ pos += cp_len;
1301
+ }
1302
+
1303
+ // Final cluster
1304
+ if (state.prev_cp != null and state.cluster_width > 0) {
1305
+ if (state.columns_used >= max_columns) {
1306
+ return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1307
+ }
1308
+ state.columns_used += state.cluster_width;
1309
+ if (include_start_before) {
1310
+ state.grapheme_count += 1;
1311
+ }
1312
+ }
1313
+
1314
+ return .{ .byte_offset = @intCast(text.len), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1315
+ }
1316
+
1317
+ /// Find position by column width using wcwidth-style codepoint-by-codepoint processing
1318
+ fn findPosByWidthWCWidth(
1319
+ text: []const u8,
1320
+ max_columns: u32,
1321
+ tab_width: u8,
1322
+ isASCIIOnly: bool,
1323
+ include_start_before: bool,
1324
+ ) PosByWidthResult {
1325
+ if (text.len == 0 or max_columns == 0) {
1326
+ return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
1327
+ }
1328
+
1329
+ // ASCII-only fast path
1330
+ if (isASCIIOnly) {
1331
+ if (max_columns >= text.len) {
1332
+ return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
1333
+ } else {
1334
+ return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
1335
+ }
1336
+ }
1337
+
1338
+ // Unicode path - process each codepoint independently
1339
+ var pos: usize = 0;
1340
+ var columns_used: u32 = 0;
1341
+ var codepoint_count: u32 = 0;
1342
+
1343
+ while (pos < text.len) {
1344
+ const b0 = text[pos];
1345
+ const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1346
+ const dec = decodeUtf8Unchecked(text, pos);
1347
+ if (pos + dec.len > text.len) break :blk 0xFFFD;
1348
+ break :blk dec.cp;
1349
+ };
1350
+ const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1351
+
1352
+ if (pos + cp_len > text.len) break;
1353
+
1354
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1355
+ const cp_start_col = columns_used;
1356
+ const cp_end_col = columns_used + cp_width;
1357
+
1358
+ // Apply boundary behavior
1359
+ if (include_start_before) {
1360
+ // Selection end: include codepoints that START before max_columns
1361
+ if (cp_start_col >= max_columns) {
1362
+ return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
1363
+ }
1364
+ } else {
1365
+ // Selection start: only include codepoints that END before or at max_columns
1366
+ // So exclude (stop) if end > max_columns
1367
+ if (cp_end_col > max_columns) {
1368
+ return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
1369
+ }
1370
+ }
1371
+
1372
+ columns_used = cp_end_col;
1373
+ codepoint_count += 1;
1374
+ pos += cp_len;
1375
+ }
1376
+
1377
+ return .{ .byte_offset = @intCast(text.len), .grapheme_count = codepoint_count, .columns_used = columns_used };
1378
+ }
1379
+
1380
+ /// Get width at byte offset - proxy function that dispatches based on width_method
1381
+ pub fn getWidthAt(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) u32 {
1382
+ switch (width_method) {
1383
+ .unicode, .no_zwj => return getWidthAtUnicode(text, byte_offset, tab_width, width_method),
1384
+ .wcwidth => return getWidthAtWCWidth(text, byte_offset, tab_width),
1385
+ }
1386
+ }
1387
+
1388
+ /// Get width at byte offset using Unicode grapheme cluster segmentation
1389
+ fn getWidthAtUnicode(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) u32 {
1390
+ if (byte_offset >= text.len) return 0;
1391
+
1392
+ const b0 = text[byte_offset];
1393
+
1394
+ const first_cp: u21 = if (b0 < 0x80) b0 else blk: {
1395
+ const dec = decodeUtf8Unchecked(text, byte_offset);
1396
+ if (byte_offset + dec.len > text.len) return 1;
1397
+ break :blk dec.cp;
1398
+ };
1399
+
1400
+ const first_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, byte_offset).len;
1401
+
1402
+ var break_state: uucode.grapheme.BreakState = .default;
1403
+ var prev_cp: ?u21 = first_cp;
1404
+ const first_width = charWidth(b0, first_cp, tab_width);
1405
+ var state = GraphemeWidthState.init(first_cp, first_width, width_method);
1406
+
1407
+ var pos = byte_offset + first_len;
1408
+
1409
+ while (pos < text.len) {
1410
+ const b = text[pos];
1411
+ const curr_cp: u21 = if (b < 0x80) b else decodeUtf8Unchecked(text, pos).cp;
1412
+ const cp_len: usize = if (b < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1413
+
1414
+ if (pos + cp_len > text.len) break;
1415
+
1416
+ const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1417
+ if (is_break) break;
1418
+
1419
+ const cp_width = charWidth(b, curr_cp, tab_width);
1420
+ state.addCodepoint(curr_cp, cp_width);
1421
+
1422
+ prev_cp = curr_cp;
1423
+ pos += cp_len;
1424
+ }
1425
+
1426
+ return state.width;
1427
+ }
1428
+
1429
+ /// Get width at byte offset using wcwidth-style codepoint-by-codepoint processing
1430
+ /// In wcwidth mode, each codepoint is treated independently - return its width directly
1431
+ fn getWidthAtWCWidth(text: []const u8, byte_offset: usize, tab_width: u8) u32 {
1432
+ if (byte_offset >= text.len) return 0;
1433
+
1434
+ const b0 = text[byte_offset];
1435
+
1436
+ const first_cp: u21 = if (b0 < 0x80) b0 else blk: {
1437
+ const dec = decodeUtf8Unchecked(text, byte_offset);
1438
+ if (byte_offset + dec.len > text.len) return 1;
1439
+ break :blk dec.cp;
1440
+ };
1441
+
1442
+ const first_width = charWidth(b0, first_cp, tab_width);
1443
+ return first_width;
1444
+ }
1445
+
1446
+ pub const PrevGraphemeResult = struct {
1447
+ start_offset: usize,
1448
+ width: u32,
1449
+ };
1450
+
1451
+ /// Get previous grapheme start - proxy function that dispatches based on width_method
1452
+ pub fn getPrevGraphemeStart(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) ?PrevGraphemeResult {
1453
+ switch (width_method) {
1454
+ .unicode, .no_zwj => return getPrevGraphemeStartUnicode(text, byte_offset, tab_width, width_method),
1455
+ .wcwidth => return getPrevGraphemeStartWCWidth(text, byte_offset, tab_width),
1456
+ }
1457
+ }
1458
+
1459
+ /// Get previous grapheme start using wcwidth-style codepoint-by-codepoint processing
1460
+ fn getPrevGraphemeStartWCWidth(text: []const u8, byte_offset: usize, tab_width: u8) ?PrevGraphemeResult {
1461
+ if (byte_offset == 0 or text.len == 0) return null;
1462
+ if (byte_offset > text.len) return null;
1463
+
1464
+ var pos: usize = 0;
1465
+ var last_result: ?PrevGraphemeResult = null;
1466
+
1467
+ while (pos < byte_offset) {
1468
+ const b = text[pos];
1469
+ const curr_cp: u21 = if (b < 0x80) b else blk: {
1470
+ const dec = decodeUtf8Unchecked(text, pos);
1471
+ if (pos + dec.len > text.len) break :blk 0xFFFD;
1472
+ break :blk dec.cp;
1473
+ };
1474
+ const cp_len: usize = if (b < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1475
+ const cp_width = charWidth(b, curr_cp, tab_width);
1476
+
1477
+ if (cp_width > 0) {
1478
+ last_result = .{
1479
+ .start_offset = pos,
1480
+ .width = cp_width,
1481
+ };
1482
+ }
1483
+ pos += cp_len;
1484
+ }
1485
+
1486
+ return last_result;
1487
+ }
1488
+
1489
+ /// Get previous grapheme start using Unicode grapheme cluster segmentation
1490
+ fn getPrevGraphemeStartUnicode(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) ?PrevGraphemeResult {
1491
+ if (byte_offset == 0 or text.len == 0) return null;
1492
+ if (byte_offset > text.len) return null;
1493
+
1494
+ // For unicode/no_zwj modes, use grapheme cluster detection
1495
+ var break_state: uucode.grapheme.BreakState = .default;
1496
+ var pos: usize = 0;
1497
+ var prev_cp: ?u21 = null;
1498
+ var prev_grapheme_start: usize = 0;
1499
+ var second_to_last_grapheme_start: usize = 0;
1500
+
1501
+ while (pos < byte_offset) {
1502
+ const b = text[pos];
1503
+ const curr_cp: u21 = if (b < 0x80) b else blk: {
1504
+ const dec = decodeUtf8Unchecked(text, pos);
1505
+ if (pos + dec.len > text.len) break :blk 0xFFFD;
1506
+ break :blk dec.cp;
1507
+ };
1508
+
1509
+ const cp_len: usize = if (b < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1510
+
1511
+ if (isValidCodepoint(curr_cp)) {
1512
+ const is_break = if (prev_cp) |p| blk: {
1513
+ if (!isValidCodepoint(p)) break :blk true;
1514
+ break :blk uucode.grapheme.isBreak(p, curr_cp, &break_state);
1515
+ } else true;
1516
+
1517
+ if (is_break) {
1518
+ second_to_last_grapheme_start = prev_grapheme_start;
1519
+ prev_grapheme_start = pos;
1520
+ }
1521
+
1522
+ prev_cp = curr_cp;
1523
+ }
1524
+
1525
+ pos += cp_len;
1526
+ }
1527
+
1528
+ if (prev_grapheme_start == 0 and byte_offset == 0) {
1529
+ return null;
1530
+ }
1531
+
1532
+ const start_offset = if (prev_grapheme_start < byte_offset) prev_grapheme_start else second_to_last_grapheme_start;
1533
+ const width = getWidthAt(text, start_offset, tab_width, width_method);
1534
+
1535
+ return .{
1536
+ .start_offset = start_offset,
1537
+ .width = width,
1538
+ };
1539
+ }
1540
+
1541
+ /// Calculate the display width of text - proxy function that dispatches based on width_method
1542
+ pub fn calculateTextWidth(text: []const u8, tab_width: u8, isASCIIOnly: bool, width_method: WidthMethod) u32 {
1543
+ switch (width_method) {
1544
+ .unicode, .no_zwj => return calculateTextWidthUnicode(text, tab_width, isASCIIOnly, width_method),
1545
+ .wcwidth => return calculateTextWidthWCWidth(text, tab_width, isASCIIOnly),
1546
+ }
1547
+ }
1548
+
1549
+ /// Calculate text width using Unicode grapheme cluster segmentation
1550
+ fn calculateTextWidthUnicode(text: []const u8, tab_width: u8, isASCIIOnly: bool, width_method: WidthMethod) u32 {
1551
+ if (text.len == 0) return 0;
1552
+
1553
+ // ASCII-only fast path
1554
+ if (isASCIIOnly) {
1555
+ return @intCast(text.len);
1556
+ }
1557
+
1558
+ // General case with Unicode support and grapheme cluster handling
1559
+ var total_width: u32 = 0;
1560
+ var pos: usize = 0;
1561
+ var prev_cp: ?u21 = null;
1562
+ var break_state: uucode.grapheme.BreakState = .default;
1563
+ var state: GraphemeWidthState = undefined;
1564
+
1565
+ while (pos < text.len) {
1566
+ const b0 = text[pos];
1567
+ const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1568
+ const dec = decodeUtf8Unchecked(text, pos);
1569
+ if (pos + dec.len > text.len) break :blk 0xFFFD;
1570
+ break :blk dec.cp;
1571
+ };
1572
+ const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1573
+ const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1574
+
1575
+ if (is_break) {
1576
+ if (prev_cp != null) {
1577
+ total_width += state.width;
1578
+ }
1579
+
1580
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1581
+ state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1582
+ } else {
1583
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1584
+ state.addCodepoint(curr_cp, cp_width);
1585
+ }
1586
+
1587
+ prev_cp = curr_cp;
1588
+ pos += cp_len;
1589
+ }
1590
+
1591
+ if (prev_cp != null) {
1592
+ total_width += state.width;
1593
+ }
1594
+
1595
+ return total_width;
1596
+ }
1597
+
1598
+ /// Calculate text width using wcwidth-style codepoint-by-codepoint processing
1599
+ fn calculateTextWidthWCWidth(text: []const u8, tab_width: u8, isASCIIOnly: bool) u32 {
1600
+ if (text.len == 0) return 0;
1601
+
1602
+ // ASCII-only fast path
1603
+ if (isASCIIOnly) {
1604
+ return @intCast(text.len);
1605
+ }
1606
+
1607
+ // Unicode path - sum width of all codepoints
1608
+ var total_width: u32 = 0;
1609
+ var pos: usize = 0;
1610
+
1611
+ while (pos < text.len) {
1612
+ const b0 = text[pos];
1613
+ const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1614
+ const dec = decodeUtf8Unchecked(text, pos);
1615
+ if (pos + dec.len > text.len) break :blk 0xFFFD;
1616
+ break :blk dec.cp;
1617
+ };
1618
+ const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1619
+
1620
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1621
+ total_width += cp_width;
1622
+
1623
+ pos += cp_len;
1624
+ }
1625
+
1626
+ return total_width;
1627
+ }
1628
+
1629
+ /// Grapheme cluster information for caching
1630
+ pub const GraphemeInfo = struct {
1631
+ byte_offset: u32,
1632
+ byte_len: u8,
1633
+ width: u8,
1634
+ col_offset: u32,
1635
+ };
1636
+
1637
+ pub const GraphemeInfoResult = struct {
1638
+ graphemes: std.ArrayList(GraphemeInfo),
1639
+
1640
+ pub fn init(allocator: std.mem.Allocator) GraphemeInfoResult {
1641
+ return .{
1642
+ .graphemes = std.ArrayList(GraphemeInfo).init(allocator),
1643
+ };
1644
+ }
1645
+
1646
+ pub fn deinit(self: *GraphemeInfoResult) void {
1647
+ self.graphemes.deinit();
1648
+ }
1649
+
1650
+ pub fn reset(self: *GraphemeInfoResult) void {
1651
+ self.graphemes.clearRetainingCapacity();
1652
+ }
1653
+ };
1654
+
1655
+ /// Find all grapheme clusters in text and return info for multi-byte graphemes and tabs
1656
+ /// This is a proxy function that dispatches to the appropriate implementation based on width_method
1657
+ pub fn findGraphemeInfo(
1658
+ text: []const u8,
1659
+ tab_width: u8,
1660
+ isASCIIOnly: bool,
1661
+ width_method: WidthMethod,
1662
+ allocator: std.mem.Allocator,
1663
+ result: *std.ArrayListUnmanaged(GraphemeInfo),
1664
+ ) !void {
1665
+ switch (width_method) {
1666
+ .unicode, .no_zwj => try findGraphemeInfoUnicode(text, tab_width, isASCIIOnly, width_method, allocator, result),
1667
+ .wcwidth => try findGraphemeInfoWCWidth(text, tab_width, isASCIIOnly, allocator, result),
1668
+ }
1669
+ }
1670
+
1671
+ /// Find all grapheme clusters using Unicode grapheme cluster segmentation
1672
+ /// This version treats grapheme clusters as single units for width calculation
1673
+ fn findGraphemeInfoUnicode(
1674
+ text: []const u8,
1675
+ tab_width: u8,
1676
+ isASCIIOnly: bool,
1677
+ width_method: WidthMethod,
1678
+ allocator: std.mem.Allocator,
1679
+ result: *std.ArrayListUnmanaged(GraphemeInfo),
1680
+ ) !void {
1681
+ // In wcwidth mode, always process to capture combining marks on ASCII
1682
+ if (isASCIIOnly and width_method != .wcwidth) {
1683
+ return;
1684
+ }
1685
+
1686
+ if (text.len == 0) {
1687
+ return;
1688
+ }
1689
+
1690
+ const vector_len = 16;
1691
+ var pos: usize = 0;
1692
+ var col: u32 = 0;
1693
+ var prev_cp: ?u21 = null;
1694
+ var break_state: uucode.grapheme.BreakState = .default;
1695
+
1696
+ // Track current grapheme cluster
1697
+ var cluster_start: usize = 0;
1698
+ var cluster_start_col: u32 = 0;
1699
+ var cluster_width_state: GraphemeWidthState = undefined;
1700
+ var cluster_is_multibyte: bool = false;
1701
+ var cluster_is_tab: bool = false;
1702
+
1703
+ while (pos + vector_len <= text.len) {
1704
+ const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
1705
+ const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
1706
+ const is_non_ascii = chunk >= ascii_threshold;
1707
+
1708
+ // Fast path: all ASCII
1709
+ if (!@reduce(.Or, is_non_ascii)) {
1710
+ var i: usize = 0;
1711
+ while (i < vector_len) : (i += 1) {
1712
+ const b = text[pos + i];
1713
+ const curr_cp: u21 = b;
1714
+ const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1715
+
1716
+ if (is_break) {
1717
+ if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
1718
+ if (cluster_width_state.width > 0 or width_method == .wcwidth) {
1719
+ const cluster_byte_len = (pos + i) - cluster_start;
1720
+ try result.append(allocator, GraphemeInfo{
1721
+ .byte_offset = @intCast(cluster_start),
1722
+ .byte_len = @intCast(cluster_byte_len),
1723
+ .width = @intCast(cluster_width_state.width),
1724
+ .col_offset = cluster_start_col,
1725
+ });
1726
+ }
1727
+ col += cluster_width_state.width;
1728
+ } else if (prev_cp != null) {
1729
+ col += cluster_width_state.width;
1730
+ }
1731
+
1732
+ cluster_start = pos + i;
1733
+ cluster_start_col = col;
1734
+ cluster_is_tab = (b == '\t');
1735
+ cluster_is_multibyte = false;
1736
+
1737
+ const cp_width = asciiCharWidth(b, tab_width);
1738
+ cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1739
+ } else {
1740
+ // Continuing cluster (shouldn't happen for ASCII, but handle it)
1741
+ const cp_width = asciiCharWidth(b, tab_width);
1742
+ cluster_width_state.addCodepoint(curr_cp, cp_width);
1743
+ }
1744
+
1745
+ prev_cp = curr_cp;
1746
+ }
1747
+ pos += vector_len;
1748
+ continue;
1749
+ }
1750
+
1751
+ // Slow path: mixed ASCII/non-ASCII
1752
+ var i: usize = 0;
1753
+ while (i < vector_len and pos + i < text.len) {
1754
+ const b0 = text[pos + i];
1755
+ const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos + i).cp;
1756
+ const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos + i).len;
1757
+
1758
+ if (pos + i + cp_len > text.len) break;
1759
+
1760
+ const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1761
+
1762
+ if (is_break) {
1763
+ if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
1764
+ if (cluster_width_state.width > 0 or width_method == .wcwidth) {
1765
+ const cluster_byte_len = (pos + i) - cluster_start;
1766
+ try result.append(allocator, GraphemeInfo{
1767
+ .byte_offset = @intCast(cluster_start),
1768
+ .byte_len = @intCast(cluster_byte_len),
1769
+ .width = @intCast(cluster_width_state.width),
1770
+ .col_offset = cluster_start_col,
1771
+ });
1772
+ }
1773
+ col += cluster_width_state.width;
1774
+ } else if (prev_cp != null) {
1775
+ col += cluster_width_state.width;
1776
+ }
1777
+
1778
+ cluster_start = pos + i;
1779
+ cluster_start_col = col;
1780
+ cluster_is_tab = (b0 == '\t');
1781
+ cluster_is_multibyte = (cp_len != 1);
1782
+
1783
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1784
+ cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1785
+ } else {
1786
+ cluster_is_multibyte = cluster_is_multibyte or (cp_len != 1);
1787
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1788
+ cluster_width_state.addCodepoint(curr_cp, cp_width);
1789
+ }
1790
+
1791
+ prev_cp = curr_cp;
1792
+ i += cp_len;
1793
+ }
1794
+ pos += i;
1795
+ }
1796
+
1797
+ // Tail processing
1798
+ while (pos < text.len) {
1799
+ const b0 = text[pos];
1800
+ const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos).cp;
1801
+ const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1802
+
1803
+ if (pos + cp_len > text.len) break;
1804
+
1805
+ const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1806
+
1807
+ if (is_break) {
1808
+ if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
1809
+ if (cluster_width_state.width > 0 or width_method == .wcwidth) {
1810
+ const cluster_byte_len = pos - cluster_start;
1811
+ try result.append(allocator, GraphemeInfo{
1812
+ .byte_offset = @intCast(cluster_start),
1813
+ .byte_len = @intCast(cluster_byte_len),
1814
+ .width = @intCast(cluster_width_state.width),
1815
+ .col_offset = cluster_start_col,
1816
+ });
1817
+ }
1818
+ col += cluster_width_state.width;
1819
+ } else if (prev_cp != null) {
1820
+ col += cluster_width_state.width;
1821
+ }
1822
+
1823
+ cluster_start = pos;
1824
+ cluster_start_col = col;
1825
+ cluster_is_tab = (b0 == '\t');
1826
+ cluster_is_multibyte = (cp_len != 1);
1827
+
1828
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1829
+ cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1830
+ } else {
1831
+ cluster_is_multibyte = cluster_is_multibyte or (cp_len != 1);
1832
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1833
+ cluster_width_state.addCodepoint(curr_cp, cp_width);
1834
+ }
1835
+
1836
+ prev_cp = curr_cp;
1837
+ pos += cp_len;
1838
+ }
1839
+
1840
+ if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
1841
+ if (cluster_width_state.width > 0 or width_method == .wcwidth) {
1842
+ const cluster_byte_len = text.len - cluster_start;
1843
+ try result.append(allocator, GraphemeInfo{
1844
+ .byte_offset = @intCast(cluster_start),
1845
+ .byte_len = @intCast(cluster_byte_len),
1846
+ .width = @intCast(cluster_width_state.width),
1847
+ .col_offset = cluster_start_col,
1848
+ });
1849
+ }
1850
+ }
1851
+ }
1852
+
1853
+ /// Find all grapheme clusters using wcwidth-style codepoint-by-codepoint processing
1854
+ /// This version treats each codepoint as a separate character (tmux/wcwidth behavior)
1855
+ fn findGraphemeInfoWCWidth(
1856
+ text: []const u8,
1857
+ tab_width: u8,
1858
+ isASCIIOnly: bool,
1859
+ allocator: std.mem.Allocator,
1860
+ result: *std.ArrayListUnmanaged(GraphemeInfo),
1861
+ ) !void {
1862
+ // wcwidth mode should still produce the same grapheme cluster boundaries as Unicode
1863
+ // (so ZWJ sequences and combining marks stay together), but the width of each cluster
1864
+ // is calculated using wcwidth (sum of codepoint widths). This keeps rendering coherent
1865
+ // while preserving tmux-style widths.
1866
+ if (isASCIIOnly) {
1867
+ return;
1868
+ }
1869
+
1870
+ if (text.len == 0) {
1871
+ return;
1872
+ }
1873
+
1874
+ var pos: usize = 0;
1875
+ var col: u32 = 0;
1876
+ var prev_cp: ?u21 = null;
1877
+ var break_state: uucode.grapheme.BreakState = .default;
1878
+
1879
+ // Track current cluster
1880
+ var cluster_start: usize = 0;
1881
+ var cluster_start_col: u32 = 0;
1882
+ var cluster_width_state: GraphemeWidthState = undefined;
1883
+ var cluster_is_multibyte: bool = false;
1884
+ var cluster_is_tab: bool = false;
1885
+ var cluster_started = false;
1886
+
1887
+ while (pos < text.len) {
1888
+ const b0 = text[pos];
1889
+ const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1890
+ const dec = decodeUtf8Unchecked(text, pos);
1891
+ if (pos + dec.len > text.len) break :blk 0xFFFD;
1892
+ break :blk dec.cp;
1893
+ };
1894
+ const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1895
+
1896
+ if (pos + cp_len > text.len) break;
1897
+
1898
+ // Use wcwidth break detection (each codepoint is separate, tmux-style)
1899
+ const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, .wcwidth);
1900
+
1901
+ if (is_break) {
1902
+ if (cluster_started and (cluster_is_multibyte or cluster_is_tab)) {
1903
+ try result.append(allocator, GraphemeInfo{
1904
+ .byte_offset = @intCast(cluster_start),
1905
+ .byte_len = @intCast(pos - cluster_start),
1906
+ .width = @intCast(cluster_width_state.width),
1907
+ .col_offset = cluster_start_col,
1908
+ });
1909
+ col += cluster_width_state.width;
1910
+ } else if (cluster_started) {
1911
+ // Still need to advance col by cluster width even if not emitted
1912
+ col += cluster_width_state.width;
1913
+ }
1914
+
1915
+ // Start a new cluster
1916
+ cluster_start = pos;
1917
+ cluster_start_col = col;
1918
+ cluster_is_tab = (b0 == '\t');
1919
+ cluster_is_multibyte = (cp_len != 1);
1920
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1921
+ cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, .wcwidth);
1922
+ cluster_started = true;
1923
+ } else {
1924
+ // Continuing cluster
1925
+ cluster_is_multibyte = cluster_is_multibyte or (cp_len != 1);
1926
+ const cp_width = charWidth(b0, curr_cp, tab_width);
1927
+ cluster_width_state.addCodepoint(curr_cp, cp_width);
1928
+ }
1929
+
1930
+ prev_cp = curr_cp;
1931
+ pos += cp_len;
1932
+ }
1933
+
1934
+ // Commit final cluster
1935
+ if (cluster_started) {
1936
+ if (cluster_is_multibyte or cluster_is_tab) {
1937
+ try result.append(allocator, GraphemeInfo{
1938
+ .byte_offset = @intCast(cluster_start),
1939
+ .byte_len = @intCast(text.len - cluster_start),
1940
+ .width = @intCast(cluster_width_state.width),
1941
+ .col_offset = cluster_start_col,
1942
+ });
1943
+ col += cluster_width_state.width;
1944
+ } else {
1945
+ col += cluster_width_state.width;
1946
+ }
1947
+ }
1948
+ }