@fairyhunter13/opentui-core 0.1.113 → 0.1.114

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (591) hide show
  1. package/dev/keypress-debug-renderer.ts +148 -0
  2. package/dev/keypress-debug.ts +43 -0
  3. package/dev/print-env-vars.ts +32 -0
  4. package/dev/test-tmux-graphics-334.sh +68 -0
  5. package/dev/thai-debug-test.ts +68 -0
  6. package/docs/development.md +144 -0
  7. package/package.json +62 -53
  8. package/scripts/build.ts +400 -0
  9. package/scripts/publish.ts +60 -0
  10. package/src/3d/SpriteResourceManager.ts +286 -0
  11. package/src/3d/SpriteUtils.ts +70 -0
  12. package/src/3d/TextureUtils.ts +196 -0
  13. package/src/3d/ThreeRenderable.ts +197 -0
  14. package/src/3d/WGPURenderer.ts +294 -0
  15. package/src/3d/animation/ExplodingSpriteEffect.ts +513 -0
  16. package/src/3d/animation/PhysicsExplodingSpriteEffect.ts +429 -0
  17. package/src/3d/animation/SpriteAnimator.ts +633 -0
  18. package/src/3d/animation/SpriteParticleGenerator.ts +435 -0
  19. package/src/3d/canvas.ts +464 -0
  20. package/src/3d/index.ts +12 -0
  21. package/src/3d/physics/PlanckPhysicsAdapter.ts +72 -0
  22. package/src/3d/physics/RapierPhysicsAdapter.ts +66 -0
  23. package/src/3d/physics/physics-interface.ts +31 -0
  24. package/src/3d/shaders/supersampling.wgsl +201 -0
  25. package/src/3d.ts +3 -0
  26. package/src/NativeSpanFeed.ts +300 -0
  27. package/src/Renderable.ts +1704 -0
  28. package/src/__snapshots__/buffer.test.ts.snap +28 -0
  29. package/src/animation/Timeline.test.ts +2709 -0
  30. package/src/animation/Timeline.ts +598 -0
  31. package/src/ansi.ts +18 -0
  32. package/src/benchmark/attenuation-benchmark.ts +81 -0
  33. package/src/benchmark/colormatrix-benchmark.ts +128 -0
  34. package/src/benchmark/gain-benchmark.ts +80 -0
  35. package/src/benchmark/latest-all-bench-run.json +707 -0
  36. package/src/benchmark/latest-async-bench-run.json +336 -0
  37. package/src/benchmark/latest-default-bench-run.json +657 -0
  38. package/src/benchmark/latest-large-bench-run.json +707 -0
  39. package/src/benchmark/latest-quick-bench-run.json +207 -0
  40. package/src/benchmark/markdown-benchmark.ts +1796 -0
  41. package/src/benchmark/native-span-feed-async-benchmark.ts +355 -0
  42. package/src/benchmark/native-span-feed-benchmark.md +56 -0
  43. package/src/benchmark/native-span-feed-benchmark.ts +596 -0
  44. package/src/benchmark/native-span-feed-compare.ts +280 -0
  45. package/src/benchmark/renderer-benchmark.ts +754 -0
  46. package/src/benchmark/text-table-benchmark.ts +948 -0
  47. package/src/buffer.test.ts +291 -0
  48. package/src/buffer.ts +554 -0
  49. package/src/console.test.ts +612 -0
  50. package/src/console.ts +1254 -0
  51. package/src/edit-buffer.test.ts +1769 -0
  52. package/src/edit-buffer.ts +411 -0
  53. package/src/editor-view.test.ts +1032 -0
  54. package/src/editor-view.ts +284 -0
  55. package/src/examples/ascii-font-selection-demo.ts +245 -0
  56. package/src/examples/assets/Water_2_M_Normal.jpg +0 -0
  57. package/src/examples/assets/concrete.png +0 -0
  58. package/src/examples/assets/crate.png +0 -0
  59. package/src/examples/assets/crate_emissive.png +0 -0
  60. package/src/examples/assets/forrest_background.png +0 -0
  61. package/src/examples/assets/hast-example.json +1018 -0
  62. package/src/examples/assets/heart.png +0 -0
  63. package/src/examples/assets/main_char_heavy_attack.png +0 -0
  64. package/src/examples/assets/main_char_idle.png +0 -0
  65. package/src/examples/assets/main_char_jump_end.png +0 -0
  66. package/src/examples/assets/main_char_jump_landing.png +0 -0
  67. package/src/examples/assets/main_char_jump_start.png +0 -0
  68. package/src/examples/assets/main_char_run_loop.png +0 -0
  69. package/src/examples/assets/roughness_map.jpg +0 -0
  70. package/src/examples/build.ts +115 -0
  71. package/src/examples/code-demo.ts +924 -0
  72. package/src/examples/console-demo.ts +358 -0
  73. package/src/examples/core-plugin-slots-demo.ts +759 -0
  74. package/src/examples/diff-demo.ts +701 -0
  75. package/src/examples/draggable-three-demo.ts +259 -0
  76. package/src/examples/editor-demo.ts +322 -0
  77. package/src/examples/extmarks-demo.ts +196 -0
  78. package/src/examples/focus-restore-demo.ts +310 -0
  79. package/src/examples/fonts.ts +245 -0
  80. package/src/examples/fractal-shader-demo.ts +268 -0
  81. package/src/examples/framebuffer-demo.ts +674 -0
  82. package/src/examples/full-unicode-demo.ts +241 -0
  83. package/src/examples/golden-star-demo.ts +933 -0
  84. package/src/examples/grayscale-buffer-demo.ts +249 -0
  85. package/src/examples/hast-syntax-highlighting-demo.ts +129 -0
  86. package/src/examples/index.ts +926 -0
  87. package/src/examples/input-demo.ts +377 -0
  88. package/src/examples/input-select-layout-demo.ts +425 -0
  89. package/src/examples/install.sh +143 -0
  90. package/src/examples/keypress-debug-demo.ts +452 -0
  91. package/src/examples/lib/HexList.ts +122 -0
  92. package/src/examples/lib/PaletteGrid.ts +125 -0
  93. package/src/examples/lib/standalone-keys.ts +25 -0
  94. package/src/examples/lib/tab-controller.ts +243 -0
  95. package/src/examples/lights-phong-demo.ts +290 -0
  96. package/src/examples/link-demo.ts +220 -0
  97. package/src/examples/live-state-demo.ts +480 -0
  98. package/src/examples/markdown-demo.ts +725 -0
  99. package/src/examples/mouse-interaction-demo.ts +428 -0
  100. package/src/examples/nested-zindex-demo.ts +357 -0
  101. package/src/examples/opacity-example.ts +235 -0
  102. package/src/examples/opentui-demo.ts +1057 -0
  103. package/src/examples/physx-planck-2d-demo.ts +623 -0
  104. package/src/examples/physx-rapier-2d-demo.ts +655 -0
  105. package/src/examples/relative-positioning-demo.ts +323 -0
  106. package/src/examples/scroll-example.ts +214 -0
  107. package/src/examples/scrollbox-mouse-test.ts +112 -0
  108. package/src/examples/scrollbox-overlay-hit-test.ts +206 -0
  109. package/src/examples/select-demo.ts +237 -0
  110. package/src/examples/shader-cube-demo.ts +1015 -0
  111. package/src/examples/simple-layout-example.ts +591 -0
  112. package/src/examples/slider-demo.ts +617 -0
  113. package/src/examples/split-mode-demo.ts +453 -0
  114. package/src/examples/sprite-animation-demo.ts +443 -0
  115. package/src/examples/sprite-particle-generator-demo.ts +486 -0
  116. package/src/examples/static-sprite-demo.ts +193 -0
  117. package/src/examples/sticky-scroll-example.ts +308 -0
  118. package/src/examples/styled-text-demo.ts +282 -0
  119. package/src/examples/tab-select-demo.ts +219 -0
  120. package/src/examples/terminal-title.ts +29 -0
  121. package/src/examples/terminal.ts +305 -0
  122. package/src/examples/text-node-demo.ts +416 -0
  123. package/src/examples/text-selection-demo.ts +377 -0
  124. package/src/examples/text-table-demo.ts +503 -0
  125. package/src/examples/text-truncation-demo.ts +481 -0
  126. package/src/examples/text-wrap.ts +757 -0
  127. package/src/examples/texture-loading-demo.ts +259 -0
  128. package/src/examples/timeline-example.ts +670 -0
  129. package/src/examples/transparency-demo.ts +400 -0
  130. package/src/examples/vnode-composition-demo.ts +404 -0
  131. package/src/examples/wide-grapheme-overlay-demo.ts +280 -0
  132. package/src/index.ts +24 -0
  133. package/src/lib/KeyHandler.integration.test.ts +292 -0
  134. package/src/lib/KeyHandler.stopPropagation.test.ts +289 -0
  135. package/src/lib/KeyHandler.test.ts +662 -0
  136. package/src/lib/KeyHandler.ts +222 -0
  137. package/src/lib/RGBA.test.ts +984 -0
  138. package/src/lib/RGBA.ts +204 -0
  139. package/src/lib/ascii.font.ts +330 -0
  140. package/src/lib/border.test.ts +83 -0
  141. package/src/lib/border.ts +170 -0
  142. package/src/lib/bunfs.test.ts +27 -0
  143. package/src/lib/bunfs.ts +18 -0
  144. package/src/lib/clipboard.test.ts +41 -0
  145. package/src/lib/clipboard.ts +47 -0
  146. package/src/lib/clock.ts +35 -0
  147. package/src/lib/data-paths.test.ts +133 -0
  148. package/src/lib/data-paths.ts +109 -0
  149. package/src/lib/debounce.ts +106 -0
  150. package/src/lib/detect-links.test.ts +98 -0
  151. package/src/lib/detect-links.ts +56 -0
  152. package/src/lib/env.test.ts +228 -0
  153. package/src/lib/env.ts +209 -0
  154. package/src/lib/extmarks-history.ts +51 -0
  155. package/src/lib/extmarks-multiwidth.test.ts +322 -0
  156. package/src/lib/extmarks.test.ts +3457 -0
  157. package/src/lib/extmarks.ts +843 -0
  158. package/src/lib/fonts/block.json +405 -0
  159. package/src/lib/fonts/grid.json +265 -0
  160. package/src/lib/fonts/huge.json +741 -0
  161. package/src/lib/fonts/pallet.json +314 -0
  162. package/src/lib/fonts/shade.json +591 -0
  163. package/src/lib/fonts/slick.json +321 -0
  164. package/src/lib/fonts/tiny.json +69 -0
  165. package/src/lib/hast-styled-text.ts +59 -0
  166. package/src/lib/index.ts +21 -0
  167. package/src/lib/keymapping.test.ts +317 -0
  168. package/src/lib/keymapping.ts +115 -0
  169. package/src/lib/objects-in-viewport.test.ts +787 -0
  170. package/src/lib/objects-in-viewport.ts +153 -0
  171. package/src/lib/output.capture.ts +58 -0
  172. package/src/lib/parse.keypress-kitty.protocol.test.ts +340 -0
  173. package/src/lib/parse.keypress-kitty.test.ts +663 -0
  174. package/src/lib/parse.keypress-kitty.ts +439 -0
  175. package/src/lib/parse.keypress.test.ts +1849 -0
  176. package/src/lib/parse.keypress.ts +397 -0
  177. package/src/lib/parse.mouse.test.ts +552 -0
  178. package/src/lib/parse.mouse.ts +232 -0
  179. package/src/lib/paste.ts +16 -0
  180. package/src/lib/queue.ts +65 -0
  181. package/src/lib/renderable.validations.test.ts +87 -0
  182. package/src/lib/renderable.validations.ts +83 -0
  183. package/src/lib/scroll-acceleration.ts +98 -0
  184. package/src/lib/selection.ts +240 -0
  185. package/src/lib/singleton.ts +28 -0
  186. package/src/lib/stdin-parser.test.ts +2290 -0
  187. package/src/lib/stdin-parser.ts +1810 -0
  188. package/src/lib/styled-text.ts +178 -0
  189. package/src/lib/terminal-capability-detection.test.ts +202 -0
  190. package/src/lib/terminal-capability-detection.ts +79 -0
  191. package/src/lib/terminal-palette.test.ts +878 -0
  192. package/src/lib/terminal-palette.ts +383 -0
  193. package/src/lib/tree-sitter/assets/README.md +118 -0
  194. package/src/lib/tree-sitter/assets/update.ts +334 -0
  195. package/src/lib/tree-sitter/assets.d.ts +9 -0
  196. package/src/lib/tree-sitter/cache.test.ts +273 -0
  197. package/src/lib/tree-sitter/client.test.ts +1165 -0
  198. package/src/lib/tree-sitter/client.ts +607 -0
  199. package/src/lib/tree-sitter/default-parsers.ts +86 -0
  200. package/src/lib/tree-sitter/download-utils.ts +148 -0
  201. package/src/lib/tree-sitter/index.ts +28 -0
  202. package/src/lib/tree-sitter/parser.worker.ts +1042 -0
  203. package/src/lib/tree-sitter/parsers-config.ts +81 -0
  204. package/src/lib/tree-sitter/resolve-ft.test.ts +55 -0
  205. package/src/lib/tree-sitter/resolve-ft.ts +189 -0
  206. package/src/lib/tree-sitter/types.ts +82 -0
  207. package/src/lib/tree-sitter-styled-text.test.ts +1253 -0
  208. package/src/lib/tree-sitter-styled-text.ts +306 -0
  209. package/src/lib/validate-dir-name.ts +55 -0
  210. package/src/lib/yoga.options.test.ts +628 -0
  211. package/src/lib/yoga.options.ts +346 -0
  212. package/src/plugins/core-slot.ts +579 -0
  213. package/src/plugins/registry.ts +402 -0
  214. package/src/plugins/types.ts +46 -0
  215. package/src/post/effects.ts +930 -0
  216. package/src/post/filters.ts +489 -0
  217. package/src/post/matrices.ts +288 -0
  218. package/src/renderables/ASCIIFont.ts +219 -0
  219. package/src/renderables/Box.test.ts +205 -0
  220. package/src/renderables/Box.ts +326 -0
  221. package/src/renderables/Code.test.ts +2062 -0
  222. package/src/renderables/Code.ts +357 -0
  223. package/src/renderables/Diff.regression.test.ts +226 -0
  224. package/src/renderables/Diff.test.ts +3101 -0
  225. package/src/renderables/Diff.ts +1211 -0
  226. package/src/renderables/EditBufferRenderable.test.ts +288 -0
  227. package/src/renderables/EditBufferRenderable.ts +1166 -0
  228. package/src/renderables/FrameBuffer.ts +47 -0
  229. package/src/renderables/Input.test.ts +1228 -0
  230. package/src/renderables/Input.ts +247 -0
  231. package/src/renderables/LineNumberRenderable.ts +724 -0
  232. package/src/renderables/Markdown.ts +1393 -0
  233. package/src/renderables/ScrollBar.ts +422 -0
  234. package/src/renderables/ScrollBox.ts +883 -0
  235. package/src/renderables/Select.test.ts +1033 -0
  236. package/src/renderables/Select.ts +524 -0
  237. package/src/renderables/Slider.test.ts +456 -0
  238. package/src/renderables/Slider.ts +342 -0
  239. package/src/renderables/TabSelect.test.ts +197 -0
  240. package/src/renderables/TabSelect.ts +455 -0
  241. package/src/renderables/Text.selection-buffer.test.ts +123 -0
  242. package/src/renderables/Text.test.ts +2660 -0
  243. package/src/renderables/Text.ts +147 -0
  244. package/src/renderables/TextBufferRenderable.ts +518 -0
  245. package/src/renderables/TextNode.test.ts +1058 -0
  246. package/src/renderables/TextNode.ts +325 -0
  247. package/src/renderables/TextTable.test.ts +1421 -0
  248. package/src/renderables/TextTable.ts +1344 -0
  249. package/src/renderables/Textarea.ts +430 -0
  250. package/src/renderables/TimeToFirstDraw.ts +89 -0
  251. package/src/renderables/__snapshots__/Code.test.ts.snap +13 -0
  252. package/src/renderables/__snapshots__/Diff.test.ts.snap +785 -0
  253. package/src/renderables/__snapshots__/Text.test.ts.snap +421 -0
  254. package/src/renderables/__snapshots__/TextTable.test.ts.snap +215 -0
  255. package/src/renderables/__tests__/LineNumberRenderable.scrollbox-simple.test.ts +144 -0
  256. package/src/renderables/__tests__/LineNumberRenderable.scrollbox.test.ts +816 -0
  257. package/src/renderables/__tests__/LineNumberRenderable.test.ts +1865 -0
  258. package/src/renderables/__tests__/LineNumberRenderable.wrapping.test.ts +85 -0
  259. package/src/renderables/__tests__/Markdown.code-colors.test.ts +242 -0
  260. package/src/renderables/__tests__/Markdown.test.ts +2518 -0
  261. package/src/renderables/__tests__/MultiRenderable.selection.test.ts +87 -0
  262. package/src/renderables/__tests__/Textarea.buffer.test.ts +682 -0
  263. package/src/renderables/__tests__/Textarea.destroyed-events.test.ts +675 -0
  264. package/src/renderables/__tests__/Textarea.editing.test.ts +2041 -0
  265. package/src/renderables/__tests__/Textarea.error-handling.test.ts +35 -0
  266. package/src/renderables/__tests__/Textarea.events.test.ts +738 -0
  267. package/src/renderables/__tests__/Textarea.highlights.test.ts +590 -0
  268. package/src/renderables/__tests__/Textarea.keybinding.test.ts +3149 -0
  269. package/src/renderables/__tests__/Textarea.paste.test.ts +357 -0
  270. package/src/renderables/__tests__/Textarea.rendering.test.ts +1866 -0
  271. package/src/renderables/__tests__/Textarea.scroll.test.ts +733 -0
  272. package/src/renderables/__tests__/Textarea.selection.test.ts +1590 -0
  273. package/src/renderables/__tests__/Textarea.stress.test.ts +670 -0
  274. package/src/renderables/__tests__/Textarea.undo-redo.test.ts +383 -0
  275. package/src/renderables/__tests__/Textarea.visual-lines.test.ts +310 -0
  276. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.code.test.ts.snap +221 -0
  277. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.scrollbox-simple.test.ts.snap +89 -0
  278. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.scrollbox.test.ts.snap +457 -0
  279. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.test.ts.snap +158 -0
  280. package/src/renderables/__tests__/__snapshots__/Textarea.rendering.test.ts.snap +387 -0
  281. package/src/renderables/__tests__/markdown-parser.test.ts +217 -0
  282. package/src/renderables/__tests__/renderable-test-utils.ts +60 -0
  283. package/src/renderables/composition/README.md +8 -0
  284. package/src/renderables/composition/VRenderable.ts +32 -0
  285. package/src/renderables/composition/constructs.ts +127 -0
  286. package/src/renderables/composition/vnode.ts +289 -0
  287. package/src/renderables/index.ts +23 -0
  288. package/src/renderables/markdown-parser.ts +66 -0
  289. package/src/renderer.ts +2681 -0
  290. package/src/runtime-plugin-support.ts +39 -0
  291. package/src/runtime-plugin.ts +615 -0
  292. package/src/syntax-style.test.ts +841 -0
  293. package/src/syntax-style.ts +257 -0
  294. package/src/testing/README.md +210 -0
  295. package/src/testing/capture-spans.test.ts +194 -0
  296. package/src/testing/integration.test.ts +276 -0
  297. package/src/testing/manual-clock.ts +117 -0
  298. package/src/testing/mock-keys.test.ts +1378 -0
  299. package/src/testing/mock-keys.ts +457 -0
  300. package/src/testing/mock-mouse.test.ts +218 -0
  301. package/src/testing/mock-mouse.ts +247 -0
  302. package/src/testing/mock-tree-sitter-client.ts +73 -0
  303. package/src/testing/spy.ts +13 -0
  304. package/src/testing/test-recorder.test.ts +415 -0
  305. package/src/testing/test-recorder.ts +145 -0
  306. package/src/testing/test-renderer.ts +132 -0
  307. package/src/testing.ts +7 -0
  308. package/src/tests/__snapshots__/absolute-positioning.snapshot.test.ts.snap +481 -0
  309. package/src/tests/__snapshots__/renderable.snapshot.test.ts.snap +19 -0
  310. package/src/tests/__snapshots__/scrollbox.test.ts.snap +29 -0
  311. package/src/tests/absolute-positioning.snapshot.test.ts +638 -0
  312. package/src/tests/allocator-stats.test.ts +38 -0
  313. package/src/tests/destroy-during-render.test.ts +200 -0
  314. package/src/tests/destroy-on-exit.fixture.ts +36 -0
  315. package/src/tests/destroy-on-exit.test.ts +41 -0
  316. package/src/tests/hover-cursor.test.ts +98 -0
  317. package/src/tests/native-span-feed-async.test.ts +173 -0
  318. package/src/tests/native-span-feed-close.test.ts +120 -0
  319. package/src/tests/native-span-feed-coverage.test.ts +227 -0
  320. package/src/tests/native-span-feed-edge-cases.test.ts +352 -0
  321. package/src/tests/native-span-feed-use-after-free.test.ts +45 -0
  322. package/src/tests/opacity.test.ts +123 -0
  323. package/src/tests/renderable.snapshot.test.ts +524 -0
  324. package/src/tests/renderable.test.ts +1281 -0
  325. package/src/tests/renderer.clock.test.ts +158 -0
  326. package/src/tests/renderer.console-startup.test.ts +185 -0
  327. package/src/tests/renderer.control.test.ts +425 -0
  328. package/src/tests/renderer.core-slot-binding.test.ts +952 -0
  329. package/src/tests/renderer.cursor.test.ts +26 -0
  330. package/src/tests/renderer.destroy-during-render.test.ts +147 -0
  331. package/src/tests/renderer.focus-restore.test.ts +257 -0
  332. package/src/tests/renderer.focus.test.ts +294 -0
  333. package/src/tests/renderer.idle.test.ts +219 -0
  334. package/src/tests/renderer.input.test.ts +2237 -0
  335. package/src/tests/renderer.kitty-flags.test.ts +195 -0
  336. package/src/tests/renderer.mouse.test.ts +1274 -0
  337. package/src/tests/renderer.palette.test.ts +629 -0
  338. package/src/tests/renderer.selection.test.ts +49 -0
  339. package/src/tests/renderer.slot-registry.test.ts +684 -0
  340. package/src/tests/renderer.useMouse.test.ts +47 -0
  341. package/src/tests/runtime-plugin-node-modules-cycle.fixture.ts +76 -0
  342. package/src/tests/runtime-plugin-node-modules-mjs.fixture.ts +43 -0
  343. package/src/tests/runtime-plugin-node-modules-no-bare-rewrite.fixture.ts +67 -0
  344. package/src/tests/runtime-plugin-node-modules-package-type-cache.fixture.ts +72 -0
  345. package/src/tests/runtime-plugin-node-modules-runtime-specifier.fixture.ts +44 -0
  346. package/src/tests/runtime-plugin-node-modules-scoped-package-bare-rewrite.fixture.ts +85 -0
  347. package/src/tests/runtime-plugin-path-alias.fixture.ts +43 -0
  348. package/src/tests/runtime-plugin-resolve-roots.fixture.ts +65 -0
  349. package/src/tests/runtime-plugin-support.fixture.ts +11 -0
  350. package/src/tests/runtime-plugin-support.test.ts +19 -0
  351. package/src/tests/runtime-plugin-windows-file-url.fixture.ts +30 -0
  352. package/src/tests/runtime-plugin.fixture.ts +40 -0
  353. package/src/tests/runtime-plugin.test.ts +354 -0
  354. package/src/tests/scrollbox-culling-bug.test.ts +114 -0
  355. package/src/tests/scrollbox-hitgrid-resize.test.ts +136 -0
  356. package/src/tests/scrollbox-hitgrid.test.ts +909 -0
  357. package/src/tests/scrollbox.test.ts +1530 -0
  358. package/src/tests/wrap-resize-perf.test.ts +276 -0
  359. package/src/tests/yoga-setters.test.ts +921 -0
  360. package/src/text-buffer-view.test.ts +705 -0
  361. package/src/text-buffer-view.ts +189 -0
  362. package/src/text-buffer.test.ts +347 -0
  363. package/src/text-buffer.ts +250 -0
  364. package/src/types.ts +161 -0
  365. package/src/utils.ts +88 -0
  366. package/src/zig/ansi.zig +268 -0
  367. package/src/zig/bench/README.md +50 -0
  368. package/src/zig/bench/buffer-draw-text-buffer_bench.zig +887 -0
  369. package/src/zig/bench/edit-buffer_bench.zig +476 -0
  370. package/src/zig/bench/native-span-feed_bench.zig +100 -0
  371. package/src/zig/bench/rope-markers_bench.zig +713 -0
  372. package/src/zig/bench/rope_bench.zig +514 -0
  373. package/src/zig/bench/styled-text_bench.zig +470 -0
  374. package/src/zig/bench/text-buffer-coords_bench.zig +362 -0
  375. package/src/zig/bench/text-buffer-view_bench.zig +459 -0
  376. package/src/zig/bench/text-chunk-graphemes_bench.zig +273 -0
  377. package/src/zig/bench/utf8_bench.zig +799 -0
  378. package/src/zig/bench-utils.zig +431 -0
  379. package/src/zig/bench.zig +217 -0
  380. package/src/zig/buffer-methods.zig +211 -0
  381. package/src/zig/buffer.zig +2281 -0
  382. package/src/zig/build.zig +289 -0
  383. package/src/zig/build.zig.zon +16 -0
  384. package/src/zig/edit-buffer.zig +825 -0
  385. package/src/zig/editor-view.zig +802 -0
  386. package/src/zig/event-bus.zig +13 -0
  387. package/src/zig/event-emitter.zig +65 -0
  388. package/src/zig/file-logger.zig +92 -0
  389. package/src/zig/grapheme.zig +599 -0
  390. package/src/zig/lib.zig +1854 -0
  391. package/src/zig/link.zig +333 -0
  392. package/src/zig/logger.zig +43 -0
  393. package/src/zig/mem-registry.zig +125 -0
  394. package/src/zig/native-span-feed-bench-lib.zig +7 -0
  395. package/src/zig/native-span-feed.zig +708 -0
  396. package/src/zig/renderer.zig +1393 -0
  397. package/src/zig/rope.zig +1220 -0
  398. package/src/zig/syntax-style.zig +161 -0
  399. package/src/zig/terminal.zig +987 -0
  400. package/src/zig/test.zig +72 -0
  401. package/src/zig/tests/README.md +18 -0
  402. package/src/zig/tests/buffer-methods_test.zig +1109 -0
  403. package/src/zig/tests/buffer_test.zig +2557 -0
  404. package/src/zig/tests/edit-buffer-history_test.zig +271 -0
  405. package/src/zig/tests/edit-buffer_test.zig +1689 -0
  406. package/src/zig/tests/editor-view_test.zig +3299 -0
  407. package/src/zig/tests/event-emitter_test.zig +249 -0
  408. package/src/zig/tests/grapheme_test.zig +1304 -0
  409. package/src/zig/tests/link_test.zig +190 -0
  410. package/src/zig/tests/mem-registry_test.zig +473 -0
  411. package/src/zig/tests/memory_leak_regression_test.zig +159 -0
  412. package/src/zig/tests/native-span-feed_test.zig +1264 -0
  413. package/src/zig/tests/renderer_test.zig +1017 -0
  414. package/src/zig/tests/rope-nested_test.zig +712 -0
  415. package/src/zig/tests/rope_fuzz_test.zig +238 -0
  416. package/src/zig/tests/rope_test.zig +2362 -0
  417. package/src/zig/tests/segment-merge.test.zig +148 -0
  418. package/src/zig/tests/syntax-style_test.zig +557 -0
  419. package/src/zig/tests/terminal_test.zig +754 -0
  420. package/src/zig/tests/text-buffer-drawing_test.zig +3237 -0
  421. package/src/zig/tests/text-buffer-highlights_test.zig +666 -0
  422. package/src/zig/tests/text-buffer-iterators_test.zig +776 -0
  423. package/src/zig/tests/text-buffer-segment_test.zig +320 -0
  424. package/src/zig/tests/text-buffer-selection_test.zig +1035 -0
  425. package/src/zig/tests/text-buffer-selection_viewport_test.zig +358 -0
  426. package/src/zig/tests/text-buffer-view_test.zig +3649 -0
  427. package/src/zig/tests/text-buffer_test.zig +2191 -0
  428. package/src/zig/tests/unicode-width-map.zon +3909 -0
  429. package/src/zig/tests/utf8_no_zwj_test.zig +260 -0
  430. package/src/zig/tests/utf8_test.zig +4057 -0
  431. package/src/zig/tests/utf8_wcwidth_cursor_test.zig +267 -0
  432. package/src/zig/tests/utf8_wcwidth_test.zig +357 -0
  433. package/src/zig/tests/word-wrap-editing_test.zig +498 -0
  434. package/src/zig/tests/wrap-cache-perf_test.zig +113 -0
  435. package/src/zig/text-buffer-iterators.zig +499 -0
  436. package/src/zig/text-buffer-segment.zig +404 -0
  437. package/src/zig/text-buffer-view.zig +1371 -0
  438. package/src/zig/text-buffer.zig +1180 -0
  439. package/src/zig/utf8.zig +1948 -0
  440. package/src/zig/utils.zig +9 -0
  441. package/src/zig-structs.ts +261 -0
  442. package/src/zig.ts +3884 -0
  443. package/tsconfig.build.json +24 -0
  444. package/tsconfig.json +27 -0
  445. package/3d/SpriteResourceManager.d.ts +0 -74
  446. package/3d/SpriteUtils.d.ts +0 -13
  447. package/3d/TextureUtils.d.ts +0 -24
  448. package/3d/ThreeRenderable.d.ts +0 -40
  449. package/3d/WGPURenderer.d.ts +0 -61
  450. package/3d/animation/ExplodingSpriteEffect.d.ts +0 -71
  451. package/3d/animation/PhysicsExplodingSpriteEffect.d.ts +0 -76
  452. package/3d/animation/SpriteAnimator.d.ts +0 -124
  453. package/3d/animation/SpriteParticleGenerator.d.ts +0 -62
  454. package/3d/canvas.d.ts +0 -44
  455. package/3d/index.d.ts +0 -12
  456. package/3d/physics/PlanckPhysicsAdapter.d.ts +0 -19
  457. package/3d/physics/RapierPhysicsAdapter.d.ts +0 -19
  458. package/3d/physics/physics-interface.d.ts +0 -27
  459. package/3d.d.ts +0 -2
  460. package/3d.js +0 -34041
  461. package/3d.js.map +0 -155
  462. package/LICENSE +0 -21
  463. package/NativeSpanFeed.d.ts +0 -41
  464. package/Renderable.d.ts +0 -334
  465. package/animation/Timeline.d.ts +0 -126
  466. package/ansi.d.ts +0 -13
  467. package/buffer.d.ts +0 -111
  468. package/console.d.ts +0 -144
  469. package/edit-buffer.d.ts +0 -98
  470. package/editor-view.d.ts +0 -73
  471. package/index-9vwc3fg6.js +0 -12260
  472. package/index-9vwc3fg6.js.map +0 -42
  473. package/index-dcj62y8t.js +0 -20614
  474. package/index-dcj62y8t.js.map +0 -67
  475. package/index-f7n39gpy.js +0 -411
  476. package/index-f7n39gpy.js.map +0 -10
  477. package/index.d.ts +0 -23
  478. package/index.js +0 -478
  479. package/index.js.map +0 -9
  480. package/lib/KeyHandler.d.ts +0 -61
  481. package/lib/RGBA.d.ts +0 -25
  482. package/lib/ascii.font.d.ts +0 -508
  483. package/lib/border.d.ts +0 -51
  484. package/lib/bunfs.d.ts +0 -7
  485. package/lib/clipboard.d.ts +0 -17
  486. package/lib/clock.d.ts +0 -15
  487. package/lib/data-paths.d.ts +0 -26
  488. package/lib/debounce.d.ts +0 -42
  489. package/lib/detect-links.d.ts +0 -6
  490. package/lib/env.d.ts +0 -42
  491. package/lib/extmarks-history.d.ts +0 -17
  492. package/lib/extmarks.d.ts +0 -89
  493. package/lib/hast-styled-text.d.ts +0 -17
  494. package/lib/index.d.ts +0 -21
  495. package/lib/keymapping.d.ts +0 -25
  496. package/lib/objects-in-viewport.d.ts +0 -24
  497. package/lib/output.capture.d.ts +0 -24
  498. package/lib/parse.keypress-kitty.d.ts +0 -2
  499. package/lib/parse.keypress.d.ts +0 -26
  500. package/lib/parse.mouse.d.ts +0 -30
  501. package/lib/paste.d.ts +0 -7
  502. package/lib/queue.d.ts +0 -15
  503. package/lib/renderable.validations.d.ts +0 -12
  504. package/lib/scroll-acceleration.d.ts +0 -43
  505. package/lib/selection.d.ts +0 -63
  506. package/lib/singleton.d.ts +0 -7
  507. package/lib/stdin-parser.d.ts +0 -87
  508. package/lib/styled-text.d.ts +0 -63
  509. package/lib/terminal-capability-detection.d.ts +0 -30
  510. package/lib/terminal-palette.d.ts +0 -50
  511. package/lib/tree-sitter/assets/update.d.ts +0 -11
  512. package/lib/tree-sitter/client.d.ts +0 -47
  513. package/lib/tree-sitter/default-parsers.d.ts +0 -2
  514. package/lib/tree-sitter/download-utils.d.ts +0 -21
  515. package/lib/tree-sitter/index.d.ts +0 -8
  516. package/lib/tree-sitter/parser.worker.d.ts +0 -1
  517. package/lib/tree-sitter/parsers-config.d.ts +0 -53
  518. package/lib/tree-sitter/resolve-ft.d.ts +0 -5
  519. package/lib/tree-sitter/types.d.ts +0 -82
  520. package/lib/tree-sitter-styled-text.d.ts +0 -14
  521. package/lib/validate-dir-name.d.ts +0 -1
  522. package/lib/yoga.options.d.ts +0 -32
  523. package/parser.worker.js +0 -899
  524. package/parser.worker.js.map +0 -12
  525. package/plugins/core-slot.d.ts +0 -72
  526. package/plugins/registry.d.ts +0 -42
  527. package/plugins/types.d.ts +0 -34
  528. package/post/effects.d.ts +0 -147
  529. package/post/filters.d.ts +0 -65
  530. package/post/matrices.d.ts +0 -20
  531. package/renderables/ASCIIFont.d.ts +0 -52
  532. package/renderables/Box.d.ts +0 -81
  533. package/renderables/Code.d.ts +0 -78
  534. package/renderables/Diff.d.ts +0 -142
  535. package/renderables/EditBufferRenderable.d.ts +0 -237
  536. package/renderables/FrameBuffer.d.ts +0 -16
  537. package/renderables/Input.d.ts +0 -67
  538. package/renderables/LineNumberRenderable.d.ts +0 -78
  539. package/renderables/Markdown.d.ts +0 -185
  540. package/renderables/ScrollBar.d.ts +0 -77
  541. package/renderables/ScrollBox.d.ts +0 -124
  542. package/renderables/Select.d.ts +0 -115
  543. package/renderables/Slider.d.ts +0 -47
  544. package/renderables/TabSelect.d.ts +0 -96
  545. package/renderables/Text.d.ts +0 -36
  546. package/renderables/TextBufferRenderable.d.ts +0 -105
  547. package/renderables/TextNode.d.ts +0 -91
  548. package/renderables/TextTable.d.ts +0 -140
  549. package/renderables/Textarea.d.ts +0 -63
  550. package/renderables/TimeToFirstDraw.d.ts +0 -24
  551. package/renderables/__tests__/renderable-test-utils.d.ts +0 -12
  552. package/renderables/composition/VRenderable.d.ts +0 -16
  553. package/renderables/composition/constructs.d.ts +0 -35
  554. package/renderables/composition/vnode.d.ts +0 -46
  555. package/renderables/index.d.ts +0 -23
  556. package/renderables/markdown-parser.d.ts +0 -10
  557. package/renderer.d.ts +0 -419
  558. package/runtime-plugin-support.d.ts +0 -3
  559. package/runtime-plugin-support.js +0 -29
  560. package/runtime-plugin-support.js.map +0 -10
  561. package/runtime-plugin.d.ts +0 -16
  562. package/runtime-plugin.js +0 -16
  563. package/runtime-plugin.js.map +0 -9
  564. package/syntax-style.d.ts +0 -54
  565. package/testing/manual-clock.d.ts +0 -17
  566. package/testing/mock-keys.d.ts +0 -81
  567. package/testing/mock-mouse.d.ts +0 -38
  568. package/testing/mock-tree-sitter-client.d.ts +0 -23
  569. package/testing/spy.d.ts +0 -7
  570. package/testing/test-recorder.d.ts +0 -61
  571. package/testing/test-renderer.d.ts +0 -23
  572. package/testing.d.ts +0 -6
  573. package/testing.js +0 -697
  574. package/testing.js.map +0 -15
  575. package/text-buffer-view.d.ts +0 -42
  576. package/text-buffer.d.ts +0 -67
  577. package/types.d.ts +0 -139
  578. package/utils.d.ts +0 -14
  579. package/zig-structs.d.ts +0 -155
  580. package/zig.d.ts +0 -353
  581. /package/{assets → src/lib/tree-sitter/assets}/javascript/highlights.scm +0 -0
  582. /package/{assets → src/lib/tree-sitter/assets}/javascript/tree-sitter-javascript.wasm +0 -0
  583. /package/{assets → src/lib/tree-sitter/assets}/markdown/highlights.scm +0 -0
  584. /package/{assets → src/lib/tree-sitter/assets}/markdown/injections.scm +0 -0
  585. /package/{assets → src/lib/tree-sitter/assets}/markdown/tree-sitter-markdown.wasm +0 -0
  586. /package/{assets → src/lib/tree-sitter/assets}/markdown_inline/highlights.scm +0 -0
  587. /package/{assets → src/lib/tree-sitter/assets}/markdown_inline/tree-sitter-markdown_inline.wasm +0 -0
  588. /package/{assets → src/lib/tree-sitter/assets}/typescript/highlights.scm +0 -0
  589. /package/{assets → src/lib/tree-sitter/assets}/typescript/tree-sitter-typescript.wasm +0 -0
  590. /package/{assets → src/lib/tree-sitter/assets}/zig/highlights.scm +0 -0
  591. /package/{assets → src/lib/tree-sitter/assets}/zig/tree-sitter-zig.wasm +0 -0
@@ -0,0 +1,4057 @@
1
+ const std = @import("std");
2
+ const testing = std.testing;
3
+ const utf8 = @import("../utf8.zig");
4
+
5
+ // ============================================================================
6
+ // ASCII-ONLY DETECTION TESTS
7
+ // ============================================================================
8
+
9
+ test "isAsciiOnly: empty string" {
10
+ // Empty string is not ASCII-only by convention
11
+ try testing.expect(!utf8.isAsciiOnly(""));
12
+ }
13
+
14
+ test "isAsciiOnly: simple ASCII" {
15
+ try testing.expect(utf8.isAsciiOnly("Hello, World!"));
16
+ try testing.expect(utf8.isAsciiOnly("The quick brown fox"));
17
+ try testing.expect(utf8.isAsciiOnly("0123456789"));
18
+ try testing.expect(utf8.isAsciiOnly("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"));
19
+ }
20
+
21
+ test "isAsciiOnly: control chars rejected" {
22
+ try testing.expect(!utf8.isAsciiOnly("Hello\tWorld"));
23
+ try testing.expect(!utf8.isAsciiOnly("Hello\nWorld"));
24
+ try testing.expect(!utf8.isAsciiOnly("Hello\rWorld"));
25
+ try testing.expect(!utf8.isAsciiOnly("\x00"));
26
+ try testing.expect(!utf8.isAsciiOnly("\x1F"));
27
+ }
28
+
29
+ test "isAsciiOnly: extended ASCII rejected" {
30
+ try testing.expect(!utf8.isAsciiOnly("Hello\x7FWorld"));
31
+ try testing.expect(!utf8.isAsciiOnly("Hello\x80World"));
32
+ try testing.expect(!utf8.isAsciiOnly("Hello\xFFWorld"));
33
+ }
34
+
35
+ test "isAsciiOnly: Unicode rejected" {
36
+ try testing.expect(!utf8.isAsciiOnly("Hello 👋"));
37
+ try testing.expect(!utf8.isAsciiOnly("Hello 世界"));
38
+ try testing.expect(!utf8.isAsciiOnly("café"));
39
+ try testing.expect(!utf8.isAsciiOnly("Привет"));
40
+ }
41
+
42
+ test "isAsciiOnly: space character accepted" {
43
+ try testing.expect(utf8.isAsciiOnly(" "));
44
+ try testing.expect(utf8.isAsciiOnly(" "));
45
+ try testing.expect(utf8.isAsciiOnly("Hello World"));
46
+ }
47
+
48
+ test "isAsciiOnly: all printable ASCII chars" {
49
+ const all_printable = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
50
+ try testing.expect(utf8.isAsciiOnly(all_printable));
51
+ }
52
+
53
+ test "isAsciiOnly: SIMD boundary tests" {
54
+ try testing.expect(utf8.isAsciiOnly("0123456789abcdef"));
55
+ try testing.expect(utf8.isAsciiOnly("0123456789abcde"));
56
+ try testing.expect(utf8.isAsciiOnly("0123456789abcdefg"));
57
+ try testing.expect(utf8.isAsciiOnly("0123456789abcdef0123456789abcdef"));
58
+ try testing.expect(utf8.isAsciiOnly("0123456789abcdef0123456789abcdefX"));
59
+ }
60
+
61
+ test "isAsciiOnly: non-ASCII at different positions" {
62
+ try testing.expect(!utf8.isAsciiOnly("Hello\x00World"));
63
+ try testing.expect(!utf8.isAsciiOnly("\x00bcdefghijklmnop"));
64
+ try testing.expect(!utf8.isAsciiOnly("0123456789abcde\x00"));
65
+ try testing.expect(!utf8.isAsciiOnly("0123456789abcdef\x00"));
66
+ try testing.expect(!utf8.isAsciiOnly("0123456789abcdef0123456789\x00bcdef"));
67
+ try testing.expect(!utf8.isAsciiOnly("0123456789abcdef01234\x00"));
68
+ }
69
+
70
+ test "isAsciiOnly: large ASCII text" {
71
+ const size = 10000;
72
+ const buf = try testing.allocator.alloc(u8, size);
73
+ defer testing.allocator.free(buf);
74
+
75
+ for (buf, 0..) |*b, i| {
76
+ b.* = 32 + @as(u8, @intCast(i % 95));
77
+ }
78
+
79
+ try testing.expect(utf8.isAsciiOnly(buf));
80
+
81
+ buf[5000] = 0x80;
82
+ try testing.expect(!utf8.isAsciiOnly(buf));
83
+ }
84
+
85
+ // ============================================================================
86
+ // LINE BREAK TESTS
87
+ // ============================================================================
88
+
89
+ const LineBreakTestCase = struct {
90
+ name: []const u8,
91
+ input: []const u8,
92
+ expected: []const usize,
93
+ };
94
+
95
+ const line_break_golden_tests = [_]LineBreakTestCase{
96
+ .{
97
+ .name = "empty string",
98
+ .input = "",
99
+ .expected = &[_]usize{},
100
+ },
101
+ .{
102
+ .name = "only LF",
103
+ .input = "a\nb",
104
+ .expected = &[_]usize{1},
105
+ },
106
+ .{
107
+ .name = "only CR",
108
+ .input = "a\rb",
109
+ .expected = &[_]usize{1},
110
+ },
111
+ .{
112
+ .name = "CRLF",
113
+ .input = "a\r\nb",
114
+ .expected = &[_]usize{2}, // CRLF recorded at \n index
115
+ },
116
+ .{
117
+ .name = "ending with CR",
118
+ .input = "a\r",
119
+ .expected = &[_]usize{1},
120
+ },
121
+ .{
122
+ .name = "ending with LF",
123
+ .input = "a\n",
124
+ .expected = &[_]usize{1},
125
+ },
126
+ .{
127
+ .name = "ending with CRLF",
128
+ .input = "a\r\n",
129
+ .expected = &[_]usize{2},
130
+ },
131
+ .{
132
+ .name = "consecutive LF",
133
+ .input = "\n\n",
134
+ .expected = &[_]usize{ 0, 1 },
135
+ },
136
+ .{
137
+ .name = "consecutive CRLF",
138
+ .input = "\r\n\r\n",
139
+ .expected = &[_]usize{ 1, 3 },
140
+ },
141
+ .{
142
+ .name = "mixed breaks",
143
+ .input = "\n\r\n\r",
144
+ .expected = &[_]usize{ 0, 2, 3 },
145
+ },
146
+ .{
147
+ .name = "CR LF separate",
148
+ .input = "\r\r\n",
149
+ .expected = &[_]usize{ 0, 2 },
150
+ },
151
+ .{
152
+ .name = "very long line no breaks",
153
+ .input = "a" ** 1000,
154
+ .expected = &[_]usize{},
155
+ },
156
+ .{
157
+ .name = "multiple LF",
158
+ .input = "line1\nline2\nline3\n",
159
+ .expected = &[_]usize{ 5, 11, 17 },
160
+ },
161
+ .{
162
+ .name = "multiple CRLF",
163
+ .input = "line1\r\nline2\r\nline3\r\n",
164
+ .expected = &[_]usize{ 6, 13, 20 },
165
+ },
166
+ .{
167
+ .name = "mixed line endings",
168
+ .input = "unix\nmac\rwin\r\n",
169
+ .expected = &[_]usize{ 4, 8, 13 },
170
+ },
171
+ };
172
+
173
+ fn testLineBreaks(test_case: LineBreakTestCase, allocator: std.mem.Allocator) !void {
174
+ var result = utf8.LineBreakResult.init(allocator);
175
+ defer result.deinit();
176
+
177
+ try utf8.findLineBreaks(test_case.input, &result);
178
+
179
+ try testing.expectEqual(test_case.expected.len, result.breaks.items.len);
180
+
181
+ for (test_case.expected, 0..) |exp, i| {
182
+ try testing.expectEqual(exp, result.breaks.items[i].pos);
183
+ }
184
+ }
185
+
186
+ test "line breaks: golden tests" {
187
+ for (line_break_golden_tests) |tc| {
188
+ try testLineBreaks(tc, testing.allocator);
189
+ }
190
+ }
191
+
192
+ test "line breaks: CRLF at SIMD16 edge (15-16)" {
193
+ var buf: [32]u8 = undefined;
194
+ @memset(&buf, 'x');
195
+ buf[15] = '\r';
196
+ buf[16] = '\n';
197
+
198
+ const expected = [_]usize{16}; // CRLF recorded at \n index
199
+
200
+ try testLineBreaks(.{
201
+ .name = "CRLF@15-16",
202
+ .input = &buf,
203
+ .expected = &expected,
204
+ }, testing.allocator);
205
+ }
206
+
207
+ test "line breaks: multiple breaks around SIMD16 boundary" {
208
+ var buf: [32]u8 = undefined;
209
+ @memset(&buf, 'x');
210
+ buf[14] = '\n';
211
+ buf[15] = '\r';
212
+ buf[16] = '\n';
213
+ buf[17] = '\n';
214
+
215
+ const expected = [_]usize{ 14, 16, 17 }; // 15-16 is CRLF
216
+
217
+ try testLineBreaks(.{
218
+ .name = "multi@boundary",
219
+ .input = &buf,
220
+ .expected = &expected,
221
+ }, testing.allocator);
222
+ }
223
+
224
+ test "line breaks: multibyte adjacent to LF" {
225
+ const input = "é\n";
226
+ const expected = [_]usize{2};
227
+
228
+ try testLineBreaks(.{
229
+ .name = "é\\n",
230
+ .input = input,
231
+ .expected = &expected,
232
+ }, testing.allocator);
233
+ }
234
+
235
+ test "line breaks: multibyte adjacent to CRLF" {
236
+ const input = "漢\r\n";
237
+ const expected = [_]usize{4};
238
+
239
+ try testLineBreaks(.{
240
+ .name = "漢\\r\\n",
241
+ .input = input,
242
+ .expected = &expected,
243
+ }, testing.allocator);
244
+ }
245
+
246
+ test "line breaks: multibyte at SIMD boundary without breaks" {
247
+ var buf: [32]u8 = undefined;
248
+ @memset(&buf, 0);
249
+
250
+ const text = "Test世界Test";
251
+ @memcpy(buf[0..text.len], text);
252
+
253
+ const expected = [_]usize{};
254
+
255
+ try testLineBreaks(.{
256
+ .name = "unicode@boundary",
257
+ .input = buf[0..text.len],
258
+ .expected = &expected,
259
+ }, testing.allocator);
260
+ }
261
+
262
+ test "line breaks: realistic text" {
263
+ const sample_text =
264
+ "The quick brown fox jumps over the lazy dog.\n" ++
265
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n" ++
266
+ "Windows uses CRLF line endings.\r\n" ++
267
+ "Unix uses LF line endings.\n" ++
268
+ "Classic Mac used CR line endings.\r" ++
269
+ "UTF-8 text: 世界 こんにちは\n" ++
270
+ "Multiple\n\nEmpty\n\n\nLines\n" ++
271
+ "Mixed\r\nendings\nhere\r";
272
+
273
+ var result = utf8.LineBreakResult.init(testing.allocator);
274
+ defer result.deinit();
275
+
276
+ try utf8.findLineBreaks(sample_text, &result);
277
+
278
+ // Verify we found some breaks
279
+ try testing.expect(result.breaks.items.len > 0);
280
+ }
281
+
282
+ test "line breaks: random small buffers" {
283
+ var prng = std.Random.DefaultPrng.init(42);
284
+ const random = prng.random();
285
+
286
+ var i: usize = 0;
287
+ while (i < 50) : (i += 1) {
288
+ const size = 16 + random.uintLessThan(usize, 1024);
289
+ const buf = try testing.allocator.alloc(u8, size);
290
+ defer testing.allocator.free(buf);
291
+
292
+ for (buf) |*b| {
293
+ const r = random.uintLessThan(u8, 100);
294
+ if (r < 5) {
295
+ b.* = '\n';
296
+ } else if (r < 10) {
297
+ b.* = '\r';
298
+ } else {
299
+ b.* = 'a' + random.uintLessThan(u8, 26);
300
+ }
301
+ }
302
+
303
+ var result = utf8.LineBreakResult.init(testing.allocator);
304
+ defer result.deinit();
305
+ try utf8.findLineBreaks(buf, &result);
306
+ }
307
+ }
308
+
309
+ // ============================================================================
310
+ // TAB STOP TESTS
311
+ // ============================================================================
312
+
313
+ const TabStopTestCase = struct {
314
+ name: []const u8,
315
+ input: []const u8,
316
+ expected: []const usize,
317
+ };
318
+
319
+ const tab_stop_golden_tests = [_]TabStopTestCase{
320
+ .{
321
+ .name = "empty string",
322
+ .input = "",
323
+ .expected = &[_]usize{},
324
+ },
325
+ .{
326
+ .name = "no tabs",
327
+ .input = "hello world",
328
+ .expected = &[_]usize{},
329
+ },
330
+ .{
331
+ .name = "single tab",
332
+ .input = "a\tb",
333
+ .expected = &[_]usize{1},
334
+ },
335
+ .{
336
+ .name = "multiple tabs",
337
+ .input = "a\tb\tc",
338
+ .expected = &[_]usize{ 1, 3 },
339
+ },
340
+ .{
341
+ .name = "tab at start",
342
+ .input = "\tabc",
343
+ .expected = &[_]usize{0},
344
+ },
345
+ .{
346
+ .name = "tab at end",
347
+ .input = "abc\t",
348
+ .expected = &[_]usize{3},
349
+ },
350
+ .{
351
+ .name = "consecutive tabs",
352
+ .input = "a\t\tb",
353
+ .expected = &[_]usize{ 1, 2 },
354
+ },
355
+ .{
356
+ .name = "only tabs",
357
+ .input = "\t\t\t",
358
+ .expected = &[_]usize{ 0, 1, 2 },
359
+ },
360
+ .{
361
+ .name = "tabs mixed with spaces",
362
+ .input = "a \tb \tc",
363
+ .expected = &[_]usize{ 2, 5 },
364
+ },
365
+ .{
366
+ .name = "tab with newline",
367
+ .input = "a\tb\nc\td",
368
+ .expected = &[_]usize{ 1, 5 },
369
+ },
370
+ .{
371
+ .name = "many tabs",
372
+ .input = "\ta\tb\tc\td\te\tf\t",
373
+ .expected = &[_]usize{ 0, 2, 4, 6, 8, 10, 12 },
374
+ },
375
+ };
376
+
377
+ fn testTabStops(test_case: TabStopTestCase, allocator: std.mem.Allocator) !void {
378
+ var result = utf8.TabStopResult.init(allocator);
379
+ defer result.deinit();
380
+
381
+ try utf8.findTabStops(test_case.input, &result);
382
+
383
+ try testing.expectEqual(test_case.expected.len, result.positions.items.len);
384
+
385
+ for (test_case.expected, 0..) |exp, i| {
386
+ try testing.expectEqual(exp, result.positions.items[i]);
387
+ }
388
+ }
389
+
390
+ test "tab stops: golden tests" {
391
+ for (tab_stop_golden_tests) |tc| {
392
+ try testTabStops(tc, testing.allocator);
393
+ }
394
+ }
395
+
396
+ test "tab stops: tab at SIMD16 edge (15)" {
397
+ var buf: [32]u8 = undefined;
398
+ @memset(&buf, 'x');
399
+ buf[15] = '\t';
400
+ buf[16] = 'y';
401
+
402
+ const expected = [_]usize{15};
403
+
404
+ try testTabStops(.{
405
+ .name = "tab@15",
406
+ .input = &buf,
407
+ .expected = &expected,
408
+ }, testing.allocator);
409
+ }
410
+
411
+ test "tab stops: tab at SIMD16 edge (16)" {
412
+ var buf: [32]u8 = undefined;
413
+ @memset(&buf, 'x');
414
+ buf[16] = '\t';
415
+ buf[17] = 'y';
416
+
417
+ const expected = [_]usize{16};
418
+
419
+ try testTabStops(.{
420
+ .name = "tab@16",
421
+ .input = &buf,
422
+ .expected = &expected,
423
+ }, testing.allocator);
424
+ }
425
+
426
+ test "tab stops: multiple tabs around SIMD16 boundary" {
427
+ var buf: [32]u8 = undefined;
428
+ @memset(&buf, 'x');
429
+ buf[14] = '\t';
430
+ buf[15] = '\t';
431
+ buf[16] = '\t';
432
+ buf[17] = '\t';
433
+
434
+ const expected = [_]usize{ 14, 15, 16, 17 };
435
+
436
+ try testTabStops(.{
437
+ .name = "tabs@boundary",
438
+ .input = &buf,
439
+ .expected = &expected,
440
+ }, testing.allocator);
441
+ }
442
+
443
+ test "tab stops: tabs in all SIMD lanes" {
444
+ var buf: [16]u8 = undefined;
445
+ for (&buf) |*b| {
446
+ b.* = '\t';
447
+ }
448
+
449
+ const expected = [_]usize{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
450
+
451
+ try testTabStops(.{
452
+ .name = "all_tabs",
453
+ .input = &buf,
454
+ .expected = &expected,
455
+ }, testing.allocator);
456
+ }
457
+
458
+ test "tab stops: multibyte adjacent to tab" {
459
+ const input = "é\ttest"; // é is 2 bytes: 0xC3 0xA9
460
+ const expected = [_]usize{2}; // Tab at index 2
461
+
462
+ try testTabStops(.{
463
+ .name = "é\\t",
464
+ .input = input,
465
+ .expected = &expected,
466
+ }, testing.allocator);
467
+ }
468
+
469
+ test "tab stops: CJK adjacent to tab" {
470
+ const input = "漢\ttest"; // 漢 is 3 bytes: 0xE6 0xBC 0xA2
471
+ const expected = [_]usize{3}; // Tab at index 3
472
+
473
+ try testTabStops(.{
474
+ .name = "漢\\t",
475
+ .input = input,
476
+ .expected = &expected,
477
+ }, testing.allocator);
478
+ }
479
+
480
+ test "tab stops: emoji adjacent to tab" {
481
+ const input = "👋\twave"; // 👋 is 4 bytes
482
+ const expected = [_]usize{4}; // Tab at index 4
483
+
484
+ try testTabStops(.{
485
+ .name = "emoji\\t",
486
+ .input = input,
487
+ .expected = &expected,
488
+ }, testing.allocator);
489
+ }
490
+
491
+ test "tab stops: multibyte at SIMD boundary without tabs" {
492
+ var buf: [32]u8 = undefined;
493
+ @memset(&buf, 0);
494
+
495
+ const text = "Test世界Test";
496
+ @memcpy(buf[0..text.len], text);
497
+
498
+ const expected = [_]usize{}; // No tabs
499
+
500
+ try testTabStops(.{
501
+ .name = "unicode@boundary",
502
+ .input = buf[0..text.len],
503
+ .expected = &expected,
504
+ }, testing.allocator);
505
+ }
506
+
507
+ test "tab stops: realistic code text" {
508
+ const sample_text =
509
+ "function test() {\n" ++
510
+ "\tconst x = 10;\n" ++
511
+ "\tif (x > 5) {\n" ++
512
+ "\t\treturn true;\n" ++
513
+ "\t}\n" ++
514
+ "\treturn false;\n" ++
515
+ "}\n";
516
+
517
+ var result = utf8.TabStopResult.init(testing.allocator);
518
+ defer result.deinit();
519
+
520
+ try utf8.findTabStops(sample_text, &result);
521
+
522
+ // Should find 6 tabs (including double-tab for nested return)
523
+ try testing.expectEqual(@as(usize, 6), result.positions.items.len);
524
+ }
525
+
526
+ test "tab stops: TSV data" {
527
+ const tsv_line = "name\tage\tcity\tcountry";
528
+ const expected = [_]usize{ 4, 8, 13 };
529
+
530
+ try testTabStops(.{
531
+ .name = "tsv",
532
+ .input = tsv_line,
533
+ .expected = &expected,
534
+ }, testing.allocator);
535
+ }
536
+
537
+ test "tab stops: random small buffers" {
538
+ var prng = std.Random.DefaultPrng.init(42);
539
+ const random = prng.random();
540
+
541
+ var i: usize = 0;
542
+ while (i < 50) : (i += 1) {
543
+ const size = 16 + random.uintLessThan(usize, 1024);
544
+ const buf = try testing.allocator.alloc(u8, size);
545
+ defer testing.allocator.free(buf);
546
+
547
+ for (buf) |*b| {
548
+ const r = random.uintLessThan(u8, 100);
549
+ if (r < 10) {
550
+ b.* = '\t';
551
+ } else {
552
+ b.* = 'a' + random.uintLessThan(u8, 26);
553
+ }
554
+ }
555
+
556
+ var result = utf8.TabStopResult.init(testing.allocator);
557
+ defer result.deinit();
558
+ try utf8.findTabStops(buf, &result);
559
+ }
560
+ }
561
+
562
+ test "tab stops: large buffer with periodic tabs" {
563
+ const size = 10000;
564
+ const buf = try testing.allocator.alloc(u8, size);
565
+ defer testing.allocator.free(buf);
566
+
567
+ var expected_count: usize = 0;
568
+ for (buf, 0..) |*b, idx| {
569
+ if (idx % 50 == 0) {
570
+ b.* = '\t';
571
+ expected_count += 1;
572
+ } else {
573
+ b.* = 'a' + @as(u8, @intCast(idx % 26));
574
+ }
575
+ }
576
+
577
+ var result = utf8.TabStopResult.init(testing.allocator);
578
+ defer result.deinit();
579
+ try utf8.findTabStops(buf, &result);
580
+
581
+ try testing.expectEqual(expected_count, result.positions.items.len);
582
+ }
583
+
584
+ test "tab stops: exactly 16 bytes with tab" {
585
+ const input = "0123456789abcd\tx"; // exactly 16 bytes with tab at pos 14
586
+ const expected = [_]usize{14};
587
+
588
+ try testTabStops(.{
589
+ .name = "16bytes_with_tab",
590
+ .input = input,
591
+ .expected = &expected,
592
+ }, testing.allocator);
593
+ }
594
+
595
+ test "tab stops: exactly 16 bytes no tab" {
596
+ const input = "0123456789abcdef"; // exactly 16 bytes, no tab
597
+ const expected = [_]usize{};
598
+
599
+ try testTabStops(.{
600
+ .name = "16bytes_no_tab",
601
+ .input = input,
602
+ .expected = &expected,
603
+ }, testing.allocator);
604
+ }
605
+
606
+ test "tab stops: 17 bytes with tab at 16" {
607
+ const input = "0123456789abcdef\t"; // tab at position 16
608
+ const expected = [_]usize{16};
609
+
610
+ try testTabStops(.{
611
+ .name = "tab@16",
612
+ .input = input,
613
+ .expected = &expected,
614
+ }, testing.allocator);
615
+ }
616
+
617
+ test "tab stops: result reuse" {
618
+ var result = utf8.TabStopResult.init(testing.allocator);
619
+ defer result.deinit();
620
+
621
+ // First use
622
+ try utf8.findTabStops("a\tb\tc", &result);
623
+ try testing.expectEqual(@as(usize, 2), result.positions.items.len);
624
+
625
+ // Second use - should reset automatically
626
+ try utf8.findTabStops("x\ty", &result);
627
+ try testing.expectEqual(@as(usize, 1), result.positions.items.len);
628
+ try testing.expectEqual(@as(usize, 1), result.positions.items[0]);
629
+ }
630
+
631
+ test "tab stops: mixed with other whitespace" {
632
+ const input = " \t \t ";
633
+ const expected = [_]usize{ 2, 5 };
634
+
635
+ try testTabStops(.{
636
+ .name = "mixed_whitespace",
637
+ .input = input,
638
+ .expected = &expected,
639
+ }, testing.allocator);
640
+ }
641
+
642
+ test "tab stops: makefile style" {
643
+ const makefile = "target:\n\t@echo Building\n\t@gcc -o out main.c\n";
644
+
645
+ var result = utf8.TabStopResult.init(testing.allocator);
646
+ defer result.deinit();
647
+
648
+ try utf8.findTabStops(makefile, &result);
649
+
650
+ // Should find 2 tabs (one per command line)
651
+ try testing.expectEqual(@as(usize, 2), result.positions.items.len);
652
+ }
653
+
654
+ test "tab stops: tabs across multiple SIMD chunks" {
655
+ const size = 64; // 4 SIMD chunks
656
+ const buf = try testing.allocator.alloc(u8, size);
657
+ defer testing.allocator.free(buf);
658
+
659
+ @memset(buf, 'x');
660
+ buf[0] = '\t';
661
+ buf[16] = '\t';
662
+ buf[32] = '\t';
663
+ buf[48] = '\t';
664
+ buf[63] = '\t';
665
+
666
+ const expected = [_]usize{ 0, 16, 32, 48, 63 };
667
+
668
+ try testTabStops(.{
669
+ .name = "multi_chunk",
670
+ .input = buf,
671
+ .expected = &expected,
672
+ }, testing.allocator);
673
+ }
674
+
675
+ // ============================================================================
676
+ // WORD WRAP BREAK TESTS
677
+ // ============================================================================
678
+
679
+ const WrapBreakTestCase = struct {
680
+ name: []const u8,
681
+ input: []const u8,
682
+ expected: []const usize,
683
+ };
684
+
685
+ const wrap_break_golden_tests = [_]WrapBreakTestCase{
686
+ .{
687
+ .name = "empty string",
688
+ .input = "",
689
+ .expected = &[_]usize{},
690
+ },
691
+ .{
692
+ .name = "no breaks",
693
+ .input = "abcdef",
694
+ .expected = &[_]usize{},
695
+ },
696
+ .{
697
+ .name = "single space",
698
+ .input = "a b",
699
+ .expected = &[_]usize{1},
700
+ },
701
+ .{
702
+ .name = "multiple spaces",
703
+ .input = "a b c",
704
+ .expected = &[_]usize{ 1, 3 },
705
+ },
706
+ .{
707
+ .name = "tab character",
708
+ .input = "a\tb",
709
+ .expected = &[_]usize{1},
710
+ },
711
+ .{
712
+ .name = "newline",
713
+ .input = "a\nb",
714
+ .expected = &[_]usize{},
715
+ },
716
+ .{
717
+ .name = "carriage return",
718
+ .input = "a\rb",
719
+ .expected = &[_]usize{},
720
+ },
721
+ .{
722
+ .name = "dash",
723
+ .input = "pre-post",
724
+ .expected = &[_]usize{3},
725
+ },
726
+ .{
727
+ .name = "forward slash",
728
+ .input = "path/to/file",
729
+ .expected = &[_]usize{ 4, 7 },
730
+ },
731
+ .{
732
+ .name = "backslash",
733
+ .input = "path\\to\\file",
734
+ .expected = &[_]usize{ 4, 7 },
735
+ },
736
+ .{
737
+ .name = "punctuation",
738
+ .input = "Hello, world! How are you? Fine.",
739
+ .expected = &[_]usize{ 5, 6, 12, 13, 17, 21, 25, 26, 31 },
740
+ },
741
+ .{
742
+ .name = "brackets",
743
+ .input = "(a)[b]{c}",
744
+ .expected = &[_]usize{ 0, 2, 3, 5, 6, 8 },
745
+ },
746
+ .{
747
+ .name = "mixed breaks",
748
+ .input = "Hello, world! -path/file.",
749
+ .expected = &[_]usize{ 5, 6, 12, 13, 14, 19, 24 },
750
+ },
751
+ .{
752
+ .name = "consecutive spaces",
753
+ .input = "a b",
754
+ .expected = &[_]usize{ 1, 2 },
755
+ },
756
+ .{
757
+ .name = "only spaces",
758
+ .input = " ",
759
+ .expected = &[_]usize{ 0, 1, 2 },
760
+ },
761
+ .{
762
+ .name = "all break types",
763
+ .input = " \t-/\\.,:;!?()[]{}",
764
+ .expected = &[_]usize{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
765
+ },
766
+ .{
767
+ .name = "nbsp",
768
+ .input = "a\u{00A0}b",
769
+ .expected = &[_]usize{1},
770
+ },
771
+ .{
772
+ .name = "em space",
773
+ .input = "a\u{2003}b",
774
+ .expected = &[_]usize{1},
775
+ },
776
+ .{
777
+ .name = "ideo space",
778
+ .input = "a\u{3000}b",
779
+ .expected = &[_]usize{1},
780
+ },
781
+ .{
782
+ .name = "soft hyphen",
783
+ .input = "pre\u{00AD}post",
784
+ .expected = &[_]usize{3},
785
+ },
786
+ .{
787
+ .name = "unicode hyphen",
788
+ .input = "pre\u{2010}post",
789
+ .expected = &[_]usize{3},
790
+ },
791
+ .{
792
+ .name = "zero width space",
793
+ .input = "a\u{200B}b",
794
+ .expected = &[_]usize{1},
795
+ },
796
+ };
797
+
798
+ fn testWrapBreaks(test_case: WrapBreakTestCase, allocator: std.mem.Allocator) !void {
799
+ var result = utf8.WrapBreakResult.init(allocator);
800
+ defer result.deinit();
801
+
802
+ try utf8.findWrapBreaks(test_case.input, &result, .unicode);
803
+
804
+ try testing.expectEqual(test_case.expected.len, result.breaks.items.len);
805
+
806
+ for (test_case.expected, 0..) |exp, i| {
807
+ try testing.expectEqual(exp, result.breaks.items[i].byte_offset);
808
+ }
809
+ }
810
+
811
+ test "wrap breaks: golden tests" {
812
+ for (wrap_break_golden_tests) |tc| {
813
+ try testWrapBreaks(tc, testing.allocator);
814
+ }
815
+ }
816
+
817
+ test "wrap breaks: space at SIMD16 edge (15)" {
818
+ var buf: [32]u8 = undefined;
819
+ @memset(&buf, 'x');
820
+ buf[15] = ' ';
821
+ buf[16] = 'y';
822
+
823
+ const expected = [_]usize{15};
824
+
825
+ try testWrapBreaks(.{
826
+ .name = "space@15",
827
+ .input = &buf,
828
+ .expected = &expected,
829
+ }, testing.allocator);
830
+ }
831
+
832
+ test "wrap breaks: unicode NBSP at SIMD16 edge (15)" {
833
+ var buf: [32]u8 = undefined;
834
+ @memset(&buf, 'x');
835
+ // NBSP U+00A0 = 0xC2 0xA0
836
+ buf[15] = 0xC2;
837
+ buf[16] = 0xA0;
838
+
839
+ const expected = [_]usize{15};
840
+
841
+ try testWrapBreaks(.{
842
+ .name = "nbsp@15",
843
+ .input = &buf,
844
+ .expected = &expected,
845
+ }, testing.allocator);
846
+ }
847
+
848
+ test "wrap breaks: multiple breaks around SIMD16 boundary" {
849
+ var buf: [32]u8 = undefined;
850
+ @memset(&buf, 'x');
851
+ buf[14] = ' ';
852
+ buf[15] = '-';
853
+ buf[16] = '/';
854
+ buf[17] = '.';
855
+
856
+ const expected = [_]usize{ 14, 15, 16, 17 };
857
+
858
+ try testWrapBreaks(.{
859
+ .name = "multi@boundary",
860
+ .input = &buf,
861
+ .expected = &expected,
862
+ }, testing.allocator);
863
+ }
864
+
865
+ test "wrap breaks: multibyte adjacent to space" {
866
+ const input = "é test"; // é is 2 bytes: 0xC3 0xA9
867
+ const expected = [_]usize{2}; // Space at index 2
868
+
869
+ try testWrapBreaks(.{
870
+ .name = "é space",
871
+ .input = input,
872
+ .expected = &expected,
873
+ }, testing.allocator);
874
+ }
875
+
876
+ test "wrap breaks: multibyte adjacent to dash" {
877
+ const input = "漢-test"; // 漢 is 3 bytes: 0xE6 0xBC 0xA2
878
+ const expected = [_]usize{3}; // Dash at index 3
879
+
880
+ try testWrapBreaks(.{
881
+ .name = "漢-",
882
+ .input = input,
883
+ .expected = &expected,
884
+ }, testing.allocator);
885
+ }
886
+
887
+ test "wrap breaks: multibyte at SIMD boundary with script transitions" {
888
+ var buf: [32]u8 = undefined;
889
+ @memset(&buf, 0);
890
+
891
+ // Place UTF-8 sequences around boundary
892
+ const text = "Test世界Test";
893
+ @memcpy(buf[0..text.len], text);
894
+
895
+ //// Breaks at ASCII<->CJK transitions:
896
+ // - after 't' in "Test" (byte 3)
897
+ // - after '界' before "Test" (byte 7)
898
+ const expected = [_]usize{ 3, 7 };
899
+
900
+ try testWrapBreaks(.{
901
+ .name = "unicode@boundary",
902
+ .input = buf[0..text.len],
903
+ .expected = &expected,
904
+ }, testing.allocator);
905
+ }
906
+
907
+ test "wrap breaks: realistic text" {
908
+ const sample_text =
909
+ "The quick brown fox jumps over the lazy dog.\n" ++
910
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n" ++
911
+ "File paths: /usr/local/bin and C:\\Windows\\System32\n" ++
912
+ "Punctuation test: Hello, world! How are you? I'm fine.\n" ++
913
+ "Brackets test: (parentheses) [square] {curly}\n" ++
914
+ "Dashes test: pre-dash post-dash multi-word-expression\n" ++
915
+ "Mixed: Hello, /path/to-file.txt [done]!\n";
916
+
917
+ var result = utf8.WrapBreakResult.init(testing.allocator);
918
+ defer result.deinit();
919
+
920
+ try utf8.findWrapBreaks(sample_text, &result, .unicode);
921
+
922
+ // Verify we found many breaks
923
+ try testing.expect(result.breaks.items.len > 0);
924
+ }
925
+
926
+ test "wrap breaks: random small buffers" {
927
+ var prng = std.Random.DefaultPrng.init(42);
928
+ const random = prng.random();
929
+
930
+ const break_chars = " \t-/\\.,:;!?()[]{}";
931
+
932
+ var i: usize = 0;
933
+ while (i < 50) : (i += 1) {
934
+ const size = 16 + random.uintLessThan(usize, 1024);
935
+ const buf = try testing.allocator.alloc(u8, size);
936
+ defer testing.allocator.free(buf);
937
+
938
+ // Fill with ASCII letters and randomly insert breaks
939
+ for (buf) |*b| {
940
+ const r = random.uintLessThan(u8, 100);
941
+ if (r < 20) {
942
+ const break_idx = random.uintLessThan(usize, break_chars.len);
943
+ b.* = break_chars[break_idx];
944
+ } else {
945
+ b.* = 'a' + random.uintLessThan(u8, 26);
946
+ }
947
+ }
948
+
949
+ var result = utf8.WrapBreakResult.init(testing.allocator);
950
+ defer result.deinit();
951
+ try utf8.findWrapBreaks(buf, &result, .unicode);
952
+ }
953
+ }
954
+
955
+ test "wrap breaks: large buffer" {
956
+ const size = 10000;
957
+ const buf = try testing.allocator.alloc(u8, size);
958
+ defer testing.allocator.free(buf);
959
+
960
+ // Create realistic text with periodic breaks
961
+ for (buf, 0..) |*b, idx| {
962
+ if (idx % 50 == 0) {
963
+ b.* = ' ';
964
+ } else if (idx % 75 == 0) {
965
+ b.* = '-';
966
+ } else {
967
+ b.* = 'a' + @as(u8, @intCast(idx % 26));
968
+ }
969
+ }
970
+
971
+ var result = utf8.WrapBreakResult.init(testing.allocator);
972
+ defer result.deinit();
973
+ try utf8.findWrapBreaks(buf, &result, .unicode);
974
+
975
+ try testing.expect(result.breaks.items.len > 0);
976
+ }
977
+
978
+ test "wrap breaks: buffer exceeding 64KB" {
979
+ const size = 100_000;
980
+ const buf = try testing.allocator.alloc(u8, size);
981
+ defer testing.allocator.free(buf);
982
+
983
+ @memset(buf, 'a');
984
+
985
+ // Place a space at 70000, with u16, this will truncate to 4464 (70000 % 65536)
986
+ const break_pos: usize = 70_000;
987
+ buf[break_pos] = ' ';
988
+
989
+ var result = utf8.WrapBreakResult.init(testing.allocator);
990
+ defer result.deinit();
991
+ try utf8.findWrapBreaks(buf, &result, .unicode);
992
+
993
+ // Should find exactly one wrap break
994
+ try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
995
+
996
+ // The byte_offset must be the actual position, not truncated
997
+ try testing.expectEqual(@as(u32, break_pos), result.breaks.items[0].byte_offset);
998
+ try testing.expectEqual(@as(u32, break_pos), result.breaks.items[0].char_offset);
999
+ }
1000
+
1001
+ // ============================================================================
1002
+ // EDGE CASES AND INTEGRATION TESTS
1003
+ // ============================================================================
1004
+
1005
+ test "edge case: result reuse" {
1006
+ var line_result = utf8.LineBreakResult.init(testing.allocator);
1007
+ defer line_result.deinit();
1008
+
1009
+ // First use - line breaks
1010
+ try utf8.findLineBreaks("a\nb\nc", &line_result);
1011
+ try testing.expectEqual(@as(usize, 2), line_result.breaks.items.len);
1012
+
1013
+ // Second use - should reset automatically
1014
+ try utf8.findLineBreaks("x\ny", &line_result);
1015
+ try testing.expectEqual(@as(usize, 1), line_result.breaks.items.len);
1016
+ try testing.expectEqual(@as(usize, 1), line_result.breaks.items[0].pos);
1017
+
1018
+ // Third use - wrap breaks (different result type)
1019
+ var wrap_result = utf8.WrapBreakResult.init(testing.allocator);
1020
+ defer wrap_result.deinit();
1021
+ try utf8.findWrapBreaks("a b c", &wrap_result, .unicode);
1022
+ try testing.expectEqual(@as(usize, 2), wrap_result.breaks.items.len);
1023
+ }
1024
+
1025
+ test "edge case: empty input" {
1026
+ var line_result = utf8.LineBreakResult.init(testing.allocator);
1027
+ defer line_result.deinit();
1028
+
1029
+ try utf8.findLineBreaks("", &line_result);
1030
+ try testing.expectEqual(@as(usize, 0), line_result.breaks.items.len);
1031
+
1032
+ var wrap_result = utf8.WrapBreakResult.init(testing.allocator);
1033
+ defer wrap_result.deinit();
1034
+ try utf8.findWrapBreaks("", &wrap_result, .unicode);
1035
+ try testing.expectEqual(@as(usize, 0), wrap_result.breaks.items.len);
1036
+ }
1037
+
1038
+ test "edge case: exactly 16 bytes" {
1039
+ var line_result = utf8.LineBreakResult.init(testing.allocator);
1040
+ defer line_result.deinit();
1041
+
1042
+ const input = "0123456789abcdef"; // exactly 16 bytes
1043
+ try utf8.findLineBreaks(input, &line_result);
1044
+ try testing.expectEqual(@as(usize, 0), line_result.breaks.items.len);
1045
+
1046
+ var wrap_result = utf8.WrapBreakResult.init(testing.allocator);
1047
+ defer wrap_result.deinit();
1048
+ try utf8.findWrapBreaks(input, &wrap_result, .unicode);
1049
+ try testing.expectEqual(@as(usize, 0), wrap_result.breaks.items.len);
1050
+ }
1051
+
1052
+ test "edge case: 17 bytes with break at 16" {
1053
+ var line_result = utf8.LineBreakResult.init(testing.allocator);
1054
+ defer line_result.deinit();
1055
+
1056
+ const input = "0123456789abcde\nx"; // break at position 15
1057
+ try utf8.findLineBreaks(input, &line_result);
1058
+ try testing.expectEqual(@as(usize, 1), line_result.breaks.items.len);
1059
+ try testing.expectEqual(@as(usize, 15), line_result.breaks.items[0].pos);
1060
+
1061
+ var wrap_result = utf8.WrapBreakResult.init(testing.allocator);
1062
+ defer wrap_result.deinit();
1063
+ const input2 = "0123456789abcde x"; // space at position 15
1064
+ try utf8.findWrapBreaks(input2, &wrap_result, .unicode);
1065
+ try testing.expectEqual(@as(usize, 1), wrap_result.breaks.items.len);
1066
+ try testing.expectEqual(@as(u16, 15), wrap_result.breaks.items[0].byte_offset);
1067
+ try testing.expectEqual(@as(u16, 15), wrap_result.breaks.items[0].char_offset);
1068
+ }
1069
+
1070
+ // ============================================================================
1071
+ // GRAPHEME CLUSTER TESTS
1072
+ // ============================================================================
1073
+
1074
+ test "wrap breaks: emoji with ZWJ - char offset should count grapheme not codepoints" {
1075
+ const input = "ab 👩‍🚀 cd";
1076
+
1077
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1078
+ defer result.deinit();
1079
+ try utf8.findWrapBreaks(input, &result, .unicode);
1080
+
1081
+ try testing.expectEqual(@as(usize, 2), result.breaks.items.len);
1082
+ try testing.expectEqual(@as(u16, 2), result.breaks.items[0].byte_offset);
1083
+ try testing.expectEqual(@as(u16, 2), result.breaks.items[0].char_offset);
1084
+ try testing.expectEqual(@as(u16, 14), result.breaks.items[1].byte_offset);
1085
+ try testing.expectEqual(@as(u16, 4), result.breaks.items[1].char_offset); // Should be 4, not 6
1086
+ }
1087
+
1088
+ test "wrap breaks: emoji with skin tone - char offset should count grapheme" {
1089
+ const input = "hi 👋🏿 bye";
1090
+
1091
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1092
+ defer result.deinit();
1093
+ try utf8.findWrapBreaks(input, &result, .unicode);
1094
+
1095
+ try testing.expectEqual(@as(usize, 2), result.breaks.items.len);
1096
+ try testing.expectEqual(@as(u16, 2), result.breaks.items[0].byte_offset);
1097
+ try testing.expectEqual(@as(u16, 2), result.breaks.items[0].char_offset);
1098
+ try testing.expectEqual(@as(u16, 11), result.breaks.items[1].byte_offset);
1099
+ try testing.expectEqual(@as(u16, 4), result.breaks.items[1].char_offset); // Should be 4, not 5
1100
+ }
1101
+
1102
+ test "wrap breaks: emoji with VS16 selector - char offset should count grapheme" {
1103
+ const input = "I ❤️ U";
1104
+
1105
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1106
+ defer result.deinit();
1107
+ try utf8.findWrapBreaks(input, &result, .unicode);
1108
+
1109
+ try testing.expectEqual(@as(usize, 2), result.breaks.items.len);
1110
+ try testing.expectEqual(@as(u16, 1), result.breaks.items[0].byte_offset);
1111
+ try testing.expectEqual(@as(u16, 1), result.breaks.items[0].char_offset);
1112
+ try testing.expectEqual(@as(u16, 8), result.breaks.items[1].byte_offset);
1113
+ try testing.expectEqual(@as(u16, 3), result.breaks.items[1].char_offset); // Should be 3, not 4
1114
+ }
1115
+
1116
+ test "wrap breaks: combining diacritic - char offset should count grapheme" {
1117
+ const input = "cafe\u{0301} time";
1118
+
1119
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1120
+ defer result.deinit();
1121
+ try utf8.findWrapBreaks(input, &result, .unicode);
1122
+
1123
+ try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
1124
+ try testing.expectEqual(@as(u16, 6), result.breaks.items[0].byte_offset);
1125
+ try testing.expectEqual(@as(u16, 4), result.breaks.items[0].char_offset); // Should be 4, not 5
1126
+ }
1127
+
1128
+ test "wrap breaks: flag emoji - char offset should count grapheme" {
1129
+ const input = "USA🇺🇸 flag";
1130
+
1131
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1132
+ defer result.deinit();
1133
+ try utf8.findWrapBreaks(input, &result, .unicode);
1134
+
1135
+ try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
1136
+ try testing.expectEqual(@as(u16, 11), result.breaks.items[0].byte_offset);
1137
+ try testing.expectEqual(@as(u16, 4), result.breaks.items[0].char_offset); // 3(USA) + 1(flag) = 4
1138
+ }
1139
+
1140
+ test "wrap breaks: mixed graphemes and ASCII" {
1141
+ const input = "Hello 👋🏿 world 🇺🇸 test";
1142
+
1143
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1144
+ defer result.deinit();
1145
+ try utf8.findWrapBreaks(input, &result, .unicode);
1146
+
1147
+ try testing.expectEqual(@as(usize, 4), result.breaks.items.len);
1148
+ try testing.expectEqual(@as(u16, 5), result.breaks.items[0].byte_offset);
1149
+ try testing.expectEqual(@as(u16, 5), result.breaks.items[0].char_offset);
1150
+ try testing.expectEqual(@as(u16, 14), result.breaks.items[1].byte_offset);
1151
+ try testing.expectEqual(@as(u16, 7), result.breaks.items[1].char_offset); // 5 + 1 + 1(grapheme) = 7
1152
+ try testing.expectEqual(@as(u16, 20), result.breaks.items[2].byte_offset);
1153
+ try testing.expectEqual(@as(u16, 13), result.breaks.items[2].char_offset); // 7 + 1 + 5 = 13
1154
+ try testing.expectEqual(@as(u16, 29), result.breaks.items[3].byte_offset);
1155
+ try testing.expectEqual(@as(u16, 15), result.breaks.items[3].char_offset); // 13 + 1(space) + 1(RI) + 1(RI) = 15 (per uucode)
1156
+ }
1157
+
1158
+ test "wrap breaks: CJK characters keep break offsets" {
1159
+ // Ensure multibyte graphemes don't shift wrap break offsets.
1160
+ const input = "Hello 世界 test";
1161
+
1162
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1163
+ defer result.deinit();
1164
+ try utf8.findWrapBreaks(input, &result, .unicode);
1165
+
1166
+ // Should find 2 wrap breaks (2 spaces)
1167
+ try testing.expectEqual(@as(usize, 2), result.breaks.items.len);
1168
+
1169
+ // First break: space after "Hello"
1170
+ try testing.expectEqual(@as(u16, 5), result.breaks.items[0].byte_offset);
1171
+ try testing.expectEqual(@as(u16, 5), result.breaks.items[0].char_offset);
1172
+
1173
+ // Second break: space after "世界"
1174
+ // Byte: "Hello " = 6 bytes, "世" = 3 bytes, "界" = 3 bytes, total = 12
1175
+ try testing.expectEqual(@as(u16, 12), result.breaks.items[1].byte_offset);
1176
+ try testing.expectEqual(@as(u16, 8), result.breaks.items[1].char_offset); // 6 graphemes(Hello space) + 2 graphemes(世界) = 8
1177
+ }
1178
+
1179
+ test "wrap breaks: CJK to ASCII script transition" {
1180
+ const input = "日本語abc";
1181
+
1182
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1183
+ defer result.deinit();
1184
+ try utf8.findWrapBreaks(input, &result, .unicode);
1185
+
1186
+ try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
1187
+ try testing.expectEqual(@as(u16, 6), result.breaks.items[0].byte_offset);
1188
+ try testing.expectEqual(@as(u16, 2), result.breaks.items[0].char_offset);
1189
+ }
1190
+
1191
+ test "wrap breaks: ASCII to CJK script transition" {
1192
+ const input = "abc日本語";
1193
+
1194
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1195
+ defer result.deinit();
1196
+ try utf8.findWrapBreaks(input, &result, .unicode);
1197
+
1198
+ try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
1199
+ try testing.expectEqual(@as(u16, 2), result.breaks.items[0].byte_offset);
1200
+ try testing.expectEqual(@as(u16, 2), result.breaks.items[0].char_offset);
1201
+ }
1202
+
1203
+ test "wrap breaks: CJK punctuation before ASCII" {
1204
+ const input = "日本語。abc";
1205
+
1206
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1207
+ defer result.deinit();
1208
+ try utf8.findWrapBreaks(input, &result, .unicode);
1209
+
1210
+ try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
1211
+ try testing.expectEqual(@as(u16, 9), result.breaks.items[0].byte_offset);
1212
+ try testing.expectEqual(@as(u16, 3), result.breaks.items[0].char_offset);
1213
+ }
1214
+
1215
+ test "wrap breaks: compat ideograph to ASCII script transition" {
1216
+ const input = "丽abc";
1217
+
1218
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1219
+ defer result.deinit();
1220
+ try utf8.findWrapBreaks(input, &result, .unicode);
1221
+
1222
+ try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
1223
+ try testing.expectEqual(@as(u16, 0), result.breaks.items[0].byte_offset);
1224
+ try testing.expectEqual(@as(u16, 0), result.breaks.items[0].char_offset);
1225
+ }
1226
+
1227
+ test "wrap breaks: extension I ideograph to ASCII script transition" {
1228
+ const input = "𮯰abc";
1229
+
1230
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1231
+ defer result.deinit();
1232
+ try utf8.findWrapBreaks(input, &result, .unicode);
1233
+
1234
+ try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
1235
+ try testing.expectEqual(@as(u16, 0), result.breaks.items[0].byte_offset);
1236
+ try testing.expectEqual(@as(u16, 0), result.breaks.items[0].char_offset);
1237
+ }
1238
+
1239
+ test "wrap breaks: emoji and CJK mixed offsets" {
1240
+ const input = "🌟 Unicode test: こんにちは世界 Hello World";
1241
+
1242
+ var result = utf8.WrapBreakResult.init(testing.allocator);
1243
+ defer result.deinit();
1244
+ try utf8.findWrapBreaks(input, &result, .unicode);
1245
+
1246
+ // Find the space before "Hello"
1247
+ var space_before_hello: ?utf8.WrapBreak = null;
1248
+ for (result.breaks.items) |brk| {
1249
+ if (brk.byte_offset == 40) {
1250
+ space_before_hello = brk;
1251
+ break;
1252
+ }
1253
+ }
1254
+
1255
+ try testing.expect(space_before_hello != null);
1256
+ try testing.expectEqual(@as(u16, 40), space_before_hello.?.byte_offset);
1257
+ try testing.expectEqual(@as(u16, 23), space_before_hello.?.char_offset); // Graphemes before this space
1258
+
1259
+ // Find the space after "Hello"
1260
+ var space_after_hello: ?utf8.WrapBreak = null;
1261
+ for (result.breaks.items) |brk| {
1262
+ if (brk.byte_offset == 46) {
1263
+ space_after_hello = brk;
1264
+ break;
1265
+ }
1266
+ }
1267
+
1268
+ try testing.expect(space_after_hello != null);
1269
+ try testing.expectEqual(@as(u16, 46), space_after_hello.?.byte_offset);
1270
+ try testing.expectEqual(@as(u16, 29), space_after_hello.?.char_offset);
1271
+ }
1272
+
1273
+ // ============================================================================
1274
+ // WRAP BY WIDTH TESTS
1275
+ // ============================================================================
1276
+
1277
+ test "wrap by width: empty string" {
1278
+ const result = utf8.findWrapPosByWidth("", 10, 4, false, .unicode);
1279
+ try testing.expectEqual(@as(u32, 0), result.byte_offset);
1280
+ try testing.expectEqual(@as(u32, 0), result.grapheme_count);
1281
+ try testing.expectEqual(@as(u32, 0), result.columns_used);
1282
+ }
1283
+
1284
+ test "wrap by width: simple ASCII no wrap" {
1285
+ const result = utf8.findWrapPosByWidth("hello", 10, 4, true, .unicode);
1286
+ try testing.expectEqual(@as(u32, 5), result.byte_offset);
1287
+ try testing.expectEqual(@as(u32, 5), result.grapheme_count);
1288
+ try testing.expectEqual(@as(u32, 5), result.columns_used);
1289
+ }
1290
+
1291
+ test "wrap by width: ASCII wrap exactly at limit" {
1292
+ const result = utf8.findWrapPosByWidth("hello", 5, 4, true, .unicode);
1293
+ try testing.expectEqual(@as(u32, 5), result.byte_offset);
1294
+ try testing.expectEqual(@as(u32, 5), result.grapheme_count);
1295
+ try testing.expectEqual(@as(u32, 5), result.columns_used);
1296
+ }
1297
+
1298
+ test "wrap by width: ASCII wrap before limit" {
1299
+ const result = utf8.findWrapPosByWidth("hello world", 7, 4, true, .unicode);
1300
+ try testing.expectEqual(@as(u32, 7), result.byte_offset);
1301
+ try testing.expectEqual(@as(u32, 7), result.grapheme_count);
1302
+ try testing.expectEqual(@as(u32, 7), result.columns_used);
1303
+ }
1304
+
1305
+ test "wrap by width: East Asian wide char" {
1306
+ const result = utf8.findWrapPosByWidth("世界", 3, 4, false, .unicode);
1307
+ try testing.expectEqual(@as(u32, 3), result.byte_offset); // After first char
1308
+ try testing.expectEqual(@as(u32, 1), result.grapheme_count);
1309
+ try testing.expectEqual(@as(u32, 2), result.columns_used);
1310
+ }
1311
+
1312
+ test "wrap by width: combining mark" {
1313
+ const result = utf8.findWrapPosByWidth("e\u{0301}test", 3, 4, false, .unicode);
1314
+ try testing.expectEqual(@as(u32, 5), result.byte_offset); // After "é" (3 bytes) + "te" (2 bytes)
1315
+ try testing.expectEqual(@as(u32, 3), result.grapheme_count);
1316
+ try testing.expectEqual(@as(u32, 3), result.columns_used);
1317
+ }
1318
+
1319
+ test "wrap by width: tab handling" {
1320
+ const result = utf8.findWrapPosByWidth("a\tb", 5, 4, false, .unicode);
1321
+ try testing.expectEqual(@as(u32, 2), result.byte_offset); // After "a\t"
1322
+ try testing.expectEqual(@as(u32, 2), result.grapheme_count); // 'a' + tab
1323
+ try testing.expectEqual(@as(u32, 5), result.columns_used); // 'a' (1) + tab (4) = 5
1324
+ }
1325
+
1326
+ fn testWrapByWidthMethodsMatch(input: []const u8, max_columns: u32, tab_width: u8, isASCIIOnly: bool) !void {
1327
+ const result = utf8.findWrapPosByWidth(input, max_columns, tab_width, isASCIIOnly, .unicode);
1328
+ // Since we only have SIMD16 in utf8.zig, just verify it doesn't crash
1329
+ _ = result;
1330
+ }
1331
+
1332
+ test "wrap by width: consistency - realistic text" {
1333
+ const sample_text =
1334
+ "The quick brown fox jumps over the lazy dog. " ++
1335
+ "Lorem ipsum dolor sit amet, consectetur adipiscing elit. " ++
1336
+ "File paths: /usr/local/bin and C:\\Windows\\System32. " ++
1337
+ "Punctuation test: Hello, world! How are you? I'm fine.";
1338
+
1339
+ const widths = [_]u32{ 10, 20, 40, 80, 120 };
1340
+ for (widths) |w| {
1341
+ try testWrapByWidthMethodsMatch(sample_text, w, 4, true);
1342
+ }
1343
+ }
1344
+
1345
+ test "wrap by width: consistency - Unicode text" {
1346
+ const unicode_text = "世界 こんにちは test 你好 CJK-mixed";
1347
+
1348
+ const widths = [_]u32{ 5, 10, 15, 20, 30 };
1349
+ for (widths) |w| {
1350
+ try testWrapByWidthMethodsMatch(unicode_text, w, 4, false);
1351
+ }
1352
+ }
1353
+
1354
+ test "wrap by width: consistency - edge cases" {
1355
+ const edge_cases = [_]struct { text: []const u8, ascii: bool }{
1356
+ .{ .text = "", .ascii = false },
1357
+ .{ .text = " ", .ascii = true },
1358
+ .{ .text = "a", .ascii = true },
1359
+ .{ .text = "abc", .ascii = true },
1360
+ .{ .text = " ", .ascii = true },
1361
+ .{ .text = "a b c d e", .ascii = true },
1362
+ .{ .text = "no-spaces-here", .ascii = true },
1363
+ .{ .text = "/usr/local/bin", .ascii = true },
1364
+ .{ .text = "世界", .ascii = false },
1365
+ .{ .text = "\t\t\t", .ascii = false },
1366
+ };
1367
+
1368
+ for (edge_cases) |input| {
1369
+ const widths = [_]u32{ 1, 5, 10, 20 };
1370
+ for (widths) |w| {
1371
+ try testWrapByWidthMethodsMatch(input.text, w, 4, input.ascii);
1372
+ }
1373
+ }
1374
+ }
1375
+
1376
+ test "wrap by width: property - random ASCII buffers" {
1377
+ var prng = std.Random.DefaultPrng.init(42);
1378
+ const random = prng.random();
1379
+
1380
+ var i: usize = 0;
1381
+ while (i < 50) : (i += 1) {
1382
+ const size = 16 + random.uintLessThan(usize, 256);
1383
+ const buf = try testing.allocator.alloc(u8, size);
1384
+ defer testing.allocator.free(buf);
1385
+
1386
+ for (buf) |*b| {
1387
+ b.* = 'a' + random.uintLessThan(u8, 26);
1388
+ }
1389
+
1390
+ const width = 10 + random.uintLessThan(u32, 70);
1391
+ try testWrapByWidthMethodsMatch(buf, width, 4, true);
1392
+ }
1393
+ }
1394
+
1395
+ test "wrap by width: boundary - SIMD16 chunk boundary" {
1396
+ var buf: [32]u8 = undefined;
1397
+ @memset(&buf, 'x');
1398
+ try testWrapByWidthMethodsMatch(&buf, 20, 4, true);
1399
+ try testWrapByWidthMethodsMatch(&buf, 10, 4, true);
1400
+ }
1401
+
1402
+ test "wrap by width: boundary - Unicode at SIMD boundary" {
1403
+ var buf: [32]u8 = undefined;
1404
+ @memset(&buf, 'a');
1405
+ const cjk = "世";
1406
+ @memcpy(buf[14..17], cjk);
1407
+ try testWrapByWidthMethodsMatch(buf[0..20], 20, 4, false);
1408
+ }
1409
+
1410
+ test "wrap by width: wide emoji exactly at column boundary" {
1411
+ const input = "Hello 🌍 World";
1412
+
1413
+ const result7 = utf8.findWrapPosByWidth(input, 7, 8, false, .unicode);
1414
+ try testing.expectEqual(@as(u32, 6), result7.byte_offset);
1415
+ try testing.expectEqual(@as(u32, 6), result7.columns_used);
1416
+
1417
+ const result8 = utf8.findWrapPosByWidth(input, 8, 8, false, .unicode);
1418
+ try testing.expectEqual(@as(u32, 10), result8.byte_offset);
1419
+ try testing.expectEqual(@as(u32, 8), result8.columns_used);
1420
+
1421
+ const result6 = utf8.findWrapPosByWidth(input, 6, 8, false, .unicode);
1422
+ try testing.expectEqual(@as(u32, 6), result6.byte_offset);
1423
+ try testing.expectEqual(@as(u32, 6), result6.columns_used);
1424
+ }
1425
+
1426
+ test "wrap by width: wide emoji at start" {
1427
+ const input = "🌍 World";
1428
+
1429
+ const result1 = utf8.findWrapPosByWidth(input, 1, 8, false, .unicode);
1430
+ try testing.expectEqual(@as(u32, 0), result1.byte_offset);
1431
+ try testing.expectEqual(@as(u32, 0), result1.columns_used);
1432
+
1433
+ const result2 = utf8.findWrapPosByWidth(input, 2, 8, false, .unicode);
1434
+ try testing.expectEqual(@as(u32, 4), result2.byte_offset);
1435
+ try testing.expectEqual(@as(u32, 2), result2.columns_used);
1436
+
1437
+ const result3 = utf8.findWrapPosByWidth(input, 3, 8, false, .unicode);
1438
+ try testing.expectEqual(@as(u32, 5), result3.byte_offset);
1439
+ try testing.expectEqual(@as(u32, 3), result3.columns_used);
1440
+ }
1441
+
1442
+ test "wrap by width: multiple wide characters" {
1443
+ const input = "AB🌍CD🌎EF";
1444
+
1445
+ const result5 = utf8.findWrapPosByWidth(input, 5, 8, false, .unicode);
1446
+ try testing.expectEqual(@as(u32, 7), result5.byte_offset);
1447
+ try testing.expectEqual(@as(u32, 5), result5.columns_used);
1448
+
1449
+ const result6 = utf8.findWrapPosByWidth(input, 6, 8, false, .unicode);
1450
+ try testing.expectEqual(@as(u32, 8), result6.byte_offset);
1451
+ try testing.expectEqual(@as(u32, 6), result6.columns_used);
1452
+ }
1453
+
1454
+ test "wrap by width: CJK wide characters at boundary" {
1455
+ const input = "hello世界test";
1456
+
1457
+ const result6 = utf8.findWrapPosByWidth(input, 6, 8, false, .unicode);
1458
+ try testing.expectEqual(@as(u32, 5), result6.byte_offset);
1459
+ try testing.expectEqual(@as(u32, 5), result6.columns_used);
1460
+
1461
+ const result7 = utf8.findWrapPosByWidth(input, 7, 8, false, .unicode);
1462
+ try testing.expectEqual(@as(u32, 8), result7.byte_offset);
1463
+ try testing.expectEqual(@as(u32, 7), result7.columns_used);
1464
+ }
1465
+
1466
+ // ============================================================================
1467
+ // FIND POS BY WIDTH TESTS (for selection - includes graphemes that start before limit)
1468
+ // ============================================================================
1469
+
1470
+ test "find pos by width: wide emoji at boundary - INCLUDES grapheme" {
1471
+ const input = "Hello 🌍 World";
1472
+ // Layout: H(0) e(1) l(2) l(3) o(4) space(5) 🌍(6-7) space(8) W(9)...
1473
+
1474
+ // include_start_before=true (selection end): include graphemes that START before max_columns
1475
+ const result7 = utf8.findPosByWidth(input, 7, 8, false, true, .unicode);
1476
+ try testing.expectEqual(@as(u32, 10), result7.byte_offset); // After emoji (snapped forward)
1477
+ try testing.expectEqual(@as(u32, 8), result7.columns_used);
1478
+
1479
+ const result8 = utf8.findPosByWidth(input, 8, 8, false, true, .unicode);
1480
+ try testing.expectEqual(@as(u32, 10), result8.byte_offset);
1481
+ try testing.expectEqual(@as(u32, 8), result8.columns_used);
1482
+
1483
+ const result6 = utf8.findPosByWidth(input, 6, 8, false, true, .unicode);
1484
+ try testing.expectEqual(@as(u32, 6), result6.byte_offset);
1485
+ try testing.expectEqual(@as(u32, 6), result6.columns_used);
1486
+
1487
+ // include_start_before=false (selection start): exclude graphemes that cross max_columns
1488
+ const start7 = utf8.findPosByWidth(input, 7, 8, false, false, .unicode);
1489
+ try testing.expectEqual(@as(u32, 6), start7.byte_offset); // Before emoji (snapped backward)
1490
+ try testing.expectEqual(@as(u32, 6), start7.columns_used);
1491
+ }
1492
+
1493
+ test "find pos by width: start at second cell of width=2 grapheme snaps backward" {
1494
+ const input = "AB🌍CD";
1495
+ const result = utf8.findPosByWidth(input, 3, 8, false, false, .unicode);
1496
+ try testing.expectEqual(@as(u32, 2), result.byte_offset); // After "AB", before emoji
1497
+ try testing.expectEqual(@as(u32, 2), result.columns_used);
1498
+ }
1499
+
1500
+ test "find pos by width: end at first cell of width=2 grapheme snaps forward" {
1501
+ const input = "AB🌍CD";
1502
+ const result = utf8.findPosByWidth(input, 2, 8, false, true, .unicode);
1503
+ try testing.expectEqual(@as(u32, 2), result.byte_offset); // After "AB" (emoji starts at 2, which is NOT > 2, but hasn't been consumed yet)
1504
+ try testing.expectEqual(@as(u32, 2), result.columns_used);
1505
+
1506
+ const result3 = utf8.findPosByWidth(input, 3, 8, false, true, .unicode);
1507
+ try testing.expectEqual(@as(u32, 6), result3.byte_offset); // After "AB🌍"
1508
+ try testing.expectEqual(@as(u32, 4), result3.columns_used);
1509
+ }
1510
+
1511
+ test "find pos by width: selection boundaries with multiple wide chars" {
1512
+ const input = "A🌍B🌎C";
1513
+ const start2 = utf8.findPosByWidth(input, 2, 8, false, false, .unicode);
1514
+ try testing.expectEqual(@as(u32, 1), start2.byte_offset); // After "A", before first emoji
1515
+ try testing.expectEqual(@as(u32, 1), start2.columns_used);
1516
+
1517
+ const end5 = utf8.findPosByWidth(input, 5, 8, false, true, .unicode);
1518
+ try testing.expectEqual(@as(u32, 10), end5.byte_offset); // After "A🌍B🌎"
1519
+ try testing.expectEqual(@as(u32, 6), end5.columns_used);
1520
+ }
1521
+
1522
+ test "find pos by width: empty string" {
1523
+ const result = utf8.findPosByWidth("", 10, 4, false, true, .unicode);
1524
+ try testing.expectEqual(@as(u32, 0), result.byte_offset);
1525
+ try testing.expectEqual(@as(u32, 0), result.grapheme_count);
1526
+ try testing.expectEqual(@as(u32, 0), result.columns_used);
1527
+ }
1528
+
1529
+ test "find pos by width: simple ASCII no limit" {
1530
+ const result = utf8.findPosByWidth("hello", 10, 4, true, true, .unicode);
1531
+ try testing.expectEqual(@as(u32, 5), result.byte_offset);
1532
+ try testing.expectEqual(@as(u32, 5), result.grapheme_count);
1533
+ try testing.expectEqual(@as(u32, 5), result.columns_used);
1534
+ }
1535
+
1536
+ test "find pos by width: ASCII exactly at limit" {
1537
+ const result = utf8.findPosByWidth("hello", 5, 4, true, true, .unicode);
1538
+ try testing.expectEqual(@as(u32, 5), result.byte_offset);
1539
+ try testing.expectEqual(@as(u32, 5), result.grapheme_count);
1540
+ try testing.expectEqual(@as(u32, 5), result.columns_used);
1541
+ }
1542
+
1543
+ test "find pos by width: wide emoji at start" {
1544
+ const input = "🌍 World";
1545
+
1546
+ const result1 = utf8.findPosByWidth(input, 1, 8, false, true, .unicode);
1547
+ try testing.expectEqual(@as(u32, 4), result1.byte_offset);
1548
+ try testing.expectEqual(@as(u32, 2), result1.columns_used);
1549
+
1550
+ const result2 = utf8.findPosByWidth(input, 2, 8, false, true, .unicode);
1551
+ try testing.expectEqual(@as(u32, 4), result2.byte_offset);
1552
+ try testing.expectEqual(@as(u32, 2), result2.columns_used);
1553
+
1554
+ const result3 = utf8.findPosByWidth(input, 3, 8, false, true, .unicode);
1555
+ try testing.expectEqual(@as(u32, 5), result3.byte_offset);
1556
+ try testing.expectEqual(@as(u32, 3), result3.columns_used);
1557
+ }
1558
+
1559
+ test "find pos by width: multiple wide characters" {
1560
+ const input = "AB🌍CD🌎EF";
1561
+
1562
+ const result5 = utf8.findPosByWidth(input, 5, 8, false, true, .unicode);
1563
+ try testing.expectEqual(@as(u32, 7), result5.byte_offset);
1564
+ try testing.expectEqual(@as(u32, 5), result5.columns_used);
1565
+
1566
+ const result7 = utf8.findPosByWidth(input, 7, 8, false, true, .unicode);
1567
+ try testing.expectEqual(@as(u32, 12), result7.byte_offset);
1568
+ try testing.expectEqual(@as(u32, 8), result7.columns_used);
1569
+ }
1570
+
1571
+ test "find pos by width: CJK wide characters" {
1572
+ const input = "hello世界test";
1573
+
1574
+ const result6 = utf8.findPosByWidth(input, 6, 8, false, true, .unicode);
1575
+ try testing.expectEqual(@as(u32, 8), result6.byte_offset);
1576
+ try testing.expectEqual(@as(u32, 7), result6.columns_used);
1577
+
1578
+ const result8 = utf8.findPosByWidth(input, 8, 8, false, true, .unicode);
1579
+ try testing.expectEqual(@as(u32, 11), result8.byte_offset);
1580
+ try testing.expectEqual(@as(u32, 9), result8.columns_used);
1581
+ }
1582
+
1583
+ test "eastAsianWidth: verify all characters in test string have correct width" {
1584
+ // Test each CJK character individually to ensure width calculation is correct
1585
+
1586
+ // Test hiragana characters from "こんにちは"
1587
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x3053)); // こ
1588
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x3093)); // ん
1589
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x306B)); // に
1590
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x3061)); // ち
1591
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x306F)); // は
1592
+
1593
+ // Test kanji characters from "世界"
1594
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x4E16)); // 世
1595
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x754C)); // 界
1596
+
1597
+ // Test emoji
1598
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x1F31F)); // 🌟
1599
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x1F680)); // 🚀
1600
+
1601
+ // Test Chinese characters from "你好"
1602
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x4F60)); // 你
1603
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x597D)); // 好
1604
+
1605
+ // Test Korean characters from "안녕하세요"
1606
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0xC548)); // 안
1607
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0xB155)); // 녕
1608
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0xD558)); // 하
1609
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0xC138)); // 세
1610
+ try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0xC694)); // 요
1611
+
1612
+ // Test ASCII characters
1613
+ try testing.expectEqual(@as(u32, 1), utf8.eastAsianWidth('H'));
1614
+ try testing.expectEqual(@as(u32, 1), utf8.eastAsianWidth('e'));
1615
+ try testing.expectEqual(@as(u32, 1), utf8.eastAsianWidth(' '));
1616
+ try testing.expectEqual(@as(u32, 1), utf8.eastAsianWidth(':'));
1617
+ }
1618
+
1619
+ test "calculateTextWidth: verify CJK string widths character by character" {
1620
+ // Verify width of individual CJK characters
1621
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("こ", 8, false, .unicode));
1622
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("ん", 8, false, .unicode));
1623
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("に", 8, false, .unicode));
1624
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("ち", 8, false, .unicode));
1625
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("は", 8, false, .unicode));
1626
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("世", 8, false, .unicode));
1627
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("界", 8, false, .unicode));
1628
+
1629
+ // Verify cumulative widths
1630
+ try testing.expectEqual(@as(u32, 4), utf8.calculateTextWidth("こん", 8, false, .unicode));
1631
+ try testing.expectEqual(@as(u32, 6), utf8.calculateTextWidth("こんに", 8, false, .unicode));
1632
+ try testing.expectEqual(@as(u32, 14), utf8.calculateTextWidth("こんにちは世界", 8, false, .unicode));
1633
+
1634
+ // Verify mixed ASCII and CJK
1635
+ try testing.expectEqual(@as(u32, 5), utf8.calculateTextWidth("Hello", 8, true, .unicode));
1636
+ try testing.expectEqual(@as(u32, 6), utf8.calculateTextWidth("Hello ", 8, true, .unicode));
1637
+ try testing.expectEqual(@as(u32, 8), utf8.calculateTextWidth("Hello 世", 8, false, .unicode));
1638
+ try testing.expectEqual(@as(u32, 10), utf8.calculateTextWidth("Hello 世界", 8, false, .unicode));
1639
+ }
1640
+
1641
+ test "calculateTextWidth: step by step for emoji CJK test string" {
1642
+ // Manually verify each section
1643
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("🌟", 8, false, .unicode));
1644
+ try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth("🌟 ", 8, false, .unicode));
1645
+ try testing.expectEqual(@as(u32, 10), utf8.calculateTextWidth("🌟 Unicode", 8, false, .unicode));
1646
+ try testing.expectEqual(@as(u32, 11), utf8.calculateTextWidth("🌟 Unicode ", 8, false, .unicode));
1647
+ try testing.expectEqual(@as(u32, 15), utf8.calculateTextWidth("🌟 Unicode test", 8, false, .unicode));
1648
+ try testing.expectEqual(@as(u32, 16), utf8.calculateTextWidth("🌟 Unicode test:", 8, false, .unicode));
1649
+ try testing.expectEqual(@as(u32, 17), utf8.calculateTextWidth("🌟 Unicode test: ", 8, false, .unicode));
1650
+
1651
+ // CJK section - verify each character adds 2 columns
1652
+ try testing.expectEqual(@as(u32, 19), utf8.calculateTextWidth("🌟 Unicode test: こ", 8, false, .unicode));
1653
+ try testing.expectEqual(@as(u32, 21), utf8.calculateTextWidth("🌟 Unicode test: こん", 8, false, .unicode));
1654
+ try testing.expectEqual(@as(u32, 23), utf8.calculateTextWidth("🌟 Unicode test: こんに", 8, false, .unicode));
1655
+ try testing.expectEqual(@as(u32, 25), utf8.calculateTextWidth("🌟 Unicode test: こんにち", 8, false, .unicode));
1656
+ try testing.expectEqual(@as(u32, 27), utf8.calculateTextWidth("🌟 Unicode test: こんにちは", 8, false, .unicode));
1657
+ try testing.expectEqual(@as(u32, 29), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世", 8, false, .unicode));
1658
+ try testing.expectEqual(@as(u32, 31), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世界", 8, false, .unicode));
1659
+ try testing.expectEqual(@as(u32, 32), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世界 ", 8, false, .unicode));
1660
+
1661
+ // English section
1662
+ try testing.expectEqual(@as(u32, 33), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世界 H", 8, false, .unicode));
1663
+ try testing.expectEqual(@as(u32, 37), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世界 Hello", 8, false, .unicode));
1664
+ try testing.expectEqual(@as(u32, 38), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世界 Hello ", 8, false, .unicode));
1665
+ try testing.expectEqual(@as(u32, 43), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世界 Hello World", 8, false, .unicode));
1666
+ }
1667
+
1668
+ test "find pos by width: CJK characters with English - verify column calculation" {
1669
+ // This test verifies that findPosByWidth correctly handles mixed CJK and ASCII
1670
+ const input = "🌟 Unicode test: こんにちは世界 Hello World 你好世界";
1671
+
1672
+ // Verify width calculations at key positions
1673
+ const width_before_hello = utf8.calculateTextWidth(input[0..40], 8, false, .unicode);
1674
+ try testing.expectEqual(@as(u32, 31), width_before_hello);
1675
+
1676
+ const width_including_space_before_hello = utf8.calculateTextWidth(input[0..41], 8, false, .unicode);
1677
+ try testing.expectEqual(@as(u32, 32), width_including_space_before_hello);
1678
+
1679
+ const width_up_to_hello = utf8.calculateTextWidth(input[0..46], 8, false, .unicode);
1680
+ try testing.expectEqual(@as(u32, 37), width_up_to_hello);
1681
+
1682
+ const width_including_hello_space = utf8.calculateTextWidth(input[0..47], 8, false, .unicode);
1683
+ try testing.expectEqual(@as(u32, 38), width_including_hello_space);
1684
+
1685
+ const width_up_to_world = utf8.calculateTextWidth(input[0..52], 8, false, .unicode);
1686
+ try testing.expectEqual(@as(u32, 43), width_up_to_world);
1687
+
1688
+ const width_including_world_space = utf8.calculateTextWidth(input[0..53], 8, false, .unicode);
1689
+ try testing.expectEqual(@as(u32, 44), width_including_world_space);
1690
+
1691
+ // Verify findPosByWidth returns correct positions
1692
+ const result35 = utf8.findPosByWidth(input, 35, 8, false, false, .unicode);
1693
+ try testing.expectEqual(@as(u32, 44), result35.byte_offset);
1694
+ try testing.expectEqual(@as(u32, 35), result35.columns_used);
1695
+
1696
+ const result36 = utf8.findPosByWidth(input, 36, 8, false, false, .unicode);
1697
+ try testing.expectEqual(@as(u32, 45), result36.byte_offset);
1698
+ try testing.expectEqual(@as(u32, 36), result36.columns_used);
1699
+
1700
+ const result37 = utf8.findPosByWidth(input, 37, 8, false, false, .unicode);
1701
+ try testing.expectEqual(@as(u32, 46), result37.byte_offset);
1702
+ try testing.expectEqual(@as(u32, 37), result37.columns_used);
1703
+
1704
+ const result42 = utf8.findPosByWidth(input, 42, 8, false, false, .unicode);
1705
+ try testing.expectEqual(@as(u32, 51), result42.byte_offset);
1706
+ try testing.expectEqual(@as(u32, 42), result42.columns_used);
1707
+ }
1708
+
1709
+ test "find pos by width: combining mark" {
1710
+ const result = utf8.findPosByWidth("e\u{0301}test", 3, 4, false, true, .unicode);
1711
+ try testing.expectEqual(@as(u32, 5), result.byte_offset); // After "é" (3 bytes) + "te" (2 bytes)
1712
+ try testing.expectEqual(@as(u32, 3), result.grapheme_count);
1713
+ try testing.expectEqual(@as(u32, 3), result.columns_used);
1714
+ }
1715
+
1716
+ test "find pos by width: tab handling" {
1717
+ const result = utf8.findPosByWidth("a\tb", 5, 4, false, true, .unicode);
1718
+ try testing.expectEqual(@as(u32, 2), result.byte_offset); // After "a\t"
1719
+ try testing.expectEqual(@as(u32, 2), result.grapheme_count); // 'a' + tab
1720
+ try testing.expectEqual(@as(u32, 5), result.columns_used); // 'a' (1) + tab (4) = 5
1721
+ }
1722
+
1723
+ // ============================================================================
1724
+ // SPLIT CHUNK AT WEIGHT TESTS (include_start_before=false)
1725
+ // Tests for the exact behavior needed by splitChunkAtWeight in edit-buffer.zig
1726
+ // ============================================================================
1727
+
1728
+ test "split at weight: ASCII simple split" {
1729
+ const input = "hello world";
1730
+
1731
+ // Split at column 5 - should stop at 'h' of "hello"
1732
+ const result = utf8.findPosByWidth(input, 5, 8, true, false, .unicode);
1733
+ try testing.expectEqual(@as(u32, 5), result.byte_offset); // After "hello"
1734
+ try testing.expectEqual(@as(u32, 5), result.columns_used);
1735
+ }
1736
+
1737
+ test "split at weight: ASCII split in middle" {
1738
+ const input = "abcdefghij";
1739
+
1740
+ // Split at column 3
1741
+ const result = utf8.findPosByWidth(input, 3, 8, true, false, .unicode);
1742
+ try testing.expectEqual(@as(u32, 3), result.byte_offset); // After "abc"
1743
+ try testing.expectEqual(@as(u32, 3), result.columns_used);
1744
+ }
1745
+
1746
+ test "split at weight: wide char at boundary - exclude when starting after" {
1747
+ const input = "AB🌍CD"; // A(1) B(1) 🌍(2) C(1) D(1)
1748
+
1749
+ // Split at column 2 - should include up to B, exclude emoji
1750
+ const result2 = utf8.findPosByWidth(input, 2, 8, false, false, .unicode);
1751
+ try testing.expectEqual(@as(u32, 2), result2.byte_offset); // After "AB"
1752
+ try testing.expectEqual(@as(u32, 2), result2.columns_used);
1753
+
1754
+ const result3 = utf8.findPosByWidth(input, 3, 8, false, false, .unicode);
1755
+ try testing.expectEqual(@as(u32, 2), result3.byte_offset); // After "AB", before emoji
1756
+ try testing.expectEqual(@as(u32, 2), result3.columns_used);
1757
+ }
1758
+
1759
+ test "split at weight: CJK characters" {
1760
+ const input = "hello世界test"; // h(1) e(1) l(1) l(1) o(1) 世(2) 界(2) t(1) e(1) s(1) t(1)
1761
+
1762
+ // Split at column 5 - after "hello"
1763
+ const result5 = utf8.findPosByWidth(input, 5, 8, false, false, .unicode);
1764
+ try testing.expectEqual(@as(u32, 5), result5.byte_offset);
1765
+ try testing.expectEqual(@as(u32, 5), result5.columns_used);
1766
+
1767
+ const result6 = utf8.findPosByWidth(input, 6, 8, false, false, .unicode);
1768
+ try testing.expectEqual(@as(u32, 5), result6.byte_offset); // After "hello", before 世
1769
+ try testing.expectEqual(@as(u32, 5), result6.columns_used);
1770
+
1771
+ // Split at column 9 - should include both CJK chars
1772
+ const result9 = utf8.findPosByWidth(input, 9, 8, false, false, .unicode);
1773
+ try testing.expectEqual(@as(u32, 11), result9.byte_offset); // After "hello世界"
1774
+ try testing.expectEqual(@as(u32, 9), result9.columns_used);
1775
+ }
1776
+
1777
+ test "split at weight: combining marks" {
1778
+ const input = "cafe\u{0301}test"; // c(1) a(1) f(1) é(1) t(1) e(1) s(1) t(1)
1779
+
1780
+ // Split at column 4 - should include the combining mark with 'e'
1781
+ const result4 = utf8.findPosByWidth(input, 4, 8, false, false, .unicode);
1782
+ try testing.expectEqual(@as(u32, 6), result4.byte_offset); // After "café" (5 bytes: cafe + combining accent)
1783
+ try testing.expectEqual(@as(u32, 4), result4.columns_used);
1784
+ }
1785
+
1786
+ test "split at weight: emoji with skin tone" {
1787
+ const input = "Hi👋🏿Bye"; // H(1) i(1) 👋🏿(wide) B(1) y(1) e(1)
1788
+
1789
+ // Split at column 2 - should stop before or after emoji depending on where it starts
1790
+ const result2 = utf8.findPosByWidth(input, 2, 8, false, false, .unicode);
1791
+ try testing.expectEqual(@as(u32, 2), result2.byte_offset); // After "Hi"
1792
+ try testing.expectEqual(@as(u32, 2), result2.columns_used);
1793
+
1794
+ // Split at column 5 - should include emoji
1795
+ const result5 = utf8.findPosByWidth(input, 5, 8, false, false, .unicode);
1796
+ // Result will stop at first grapheme that starts >= max_columns
1797
+ // Just verify it returns a reasonable offset
1798
+ try testing.expect(result5.byte_offset >= 2); // At least past "Hi"
1799
+ try testing.expect(result5.columns_used >= 2); // At least 2 columns
1800
+ }
1801
+
1802
+ test "split at weight: zero width at start" {
1803
+ const input = "hello";
1804
+
1805
+ // Split at column 0 - should return offset 0
1806
+ const result = utf8.findPosByWidth(input, 0, 8, true, false, .unicode);
1807
+ try testing.expectEqual(@as(u32, 0), result.byte_offset);
1808
+ try testing.expectEqual(@as(u32, 0), result.columns_used);
1809
+ }
1810
+
1811
+ test "split at weight: beyond end" {
1812
+ const input = "hello"; // 5 columns
1813
+
1814
+ // Split at column 10 - should return entire string
1815
+ const result = utf8.findPosByWidth(input, 10, 8, true, false, .unicode);
1816
+ try testing.expectEqual(@as(u32, 5), result.byte_offset);
1817
+ try testing.expectEqual(@as(u32, 5), result.columns_used);
1818
+ }
1819
+
1820
+ test "split at weight: tab character" {
1821
+ const input = "a\tbc"; // a(1) tab(4 fixed) b(1) c(1) = 7 columns total
1822
+
1823
+ // Split at column 4 - should stop before tab since it would exceed limit
1824
+ const result4 = utf8.findPosByWidth(input, 4, 4, false, false, .unicode);
1825
+ try testing.expectEqual(@as(u32, 1), result4.byte_offset); // After "a"
1826
+ try testing.expectEqual(@as(u32, 1), result4.columns_used); // a(1)
1827
+ }
1828
+
1829
+ test "split at weight: complex mixed content" {
1830
+ const input = "A🌍B世C"; // A(1) 🌍(2) B(1) 世(2) C(1) = 7 columns total
1831
+ const r1 = utf8.findPosByWidth(input, 1, 8, false, false, .unicode);
1832
+ try testing.expectEqual(@as(u32, 1), r1.byte_offset); // After "A"
1833
+
1834
+ const r2 = utf8.findPosByWidth(input, 2, 8, false, false, .unicode);
1835
+ try testing.expectEqual(@as(u32, 1), r2.byte_offset); // After "A"
1836
+
1837
+ const r3 = utf8.findPosByWidth(input, 3, 8, false, false, .unicode);
1838
+ try testing.expectEqual(@as(u32, 5), r3.byte_offset); // After "A🌍"
1839
+
1840
+ const r4 = utf8.findPosByWidth(input, 4, 8, false, false, .unicode);
1841
+ try testing.expectEqual(@as(u32, 6), r4.byte_offset); // After "A🌍B"
1842
+
1843
+ const r5 = utf8.findPosByWidth(input, 5, 8, false, false, .unicode);
1844
+ try testing.expectEqual(@as(u32, 6), r5.byte_offset); // After "A🌍B"
1845
+ }
1846
+
1847
+ // ============================================================================
1848
+ // GET WIDTH AT TESTS
1849
+ // ============================================================================
1850
+
1851
+ test "getWidthAt: empty string" {
1852
+ const result = utf8.getWidthAt("", 0, 8, .unicode);
1853
+ try testing.expectEqual(@as(u32, 0), result);
1854
+ }
1855
+
1856
+ test "getWidthAt: out of bounds" {
1857
+ const result = utf8.getWidthAt("hello", 10, 8, .unicode);
1858
+ try testing.expectEqual(@as(u32, 0), result);
1859
+ }
1860
+
1861
+ test "getWidthAt: simple ASCII" {
1862
+ const text = "hello";
1863
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'h'
1864
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 1, 8, .unicode)); // 'e'
1865
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 4, 8, .unicode)); // 'o'
1866
+ }
1867
+
1868
+ test "getWidthAt: tab character" {
1869
+ const text = "a\tb";
1870
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 4, .unicode)); // 'a'
1871
+ try testing.expectEqual(@as(u32, 4), utf8.getWidthAt(text, 1, 4, .unicode)); // tab fixed width 4
1872
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 2, 4, .unicode)); // 'b'
1873
+ }
1874
+
1875
+ test "getWidthAt: tab at different columns" {
1876
+ const text = "\t";
1877
+ // Tab now has fixed width regardless of current_column
1878
+ try testing.expectEqual(@as(u32, 4), utf8.getWidthAt(text, 0, 4, .unicode)); // Tab fixed width 4
1879
+ try testing.expectEqual(@as(u32, 4), utf8.getWidthAt(text, 0, 4, .unicode)); // Tab fixed width 4
1880
+ try testing.expectEqual(@as(u32, 4), utf8.getWidthAt(text, 0, 4, .unicode)); // Tab fixed width 4
1881
+ try testing.expectEqual(@as(u32, 4), utf8.getWidthAt(text, 0, 4, .unicode)); // Tab fixed width 4
1882
+ try testing.expectEqual(@as(u32, 4), utf8.getWidthAt(text, 0, 4, .unicode)); // Tab fixed width 4
1883
+ }
1884
+
1885
+ test "getWidthAt: CJK wide character" {
1886
+ const text = "世界";
1887
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 0, 8, .unicode)); // '世' (3 bytes)
1888
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 3, 8, .unicode)); // '界' (3 bytes)
1889
+ }
1890
+
1891
+ test "getWidthAt: emoji single width" {
1892
+ const text = "🌍";
1893
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 0, 8, .unicode)); // emoji
1894
+ }
1895
+
1896
+ test "getWidthAt: combining mark grapheme" {
1897
+ const text = "cafe\u{0301}"; // é with combining acute accent
1898
+ const width = utf8.getWidthAt(text, 3, 8, .unicode); // At 'e' (which has combining mark after)
1899
+ try testing.expectEqual(@as(u32, 1), width); // 'e' width 1 + combining mark width 0 = 1
1900
+ }
1901
+
1902
+ test "getWidthAt: emoji with skin tone" {
1903
+ const text = "👋🏿"; // Wave + dark skin tone modifier
1904
+ const width = utf8.getWidthAt(text, 0, 8, .unicode);
1905
+ try testing.expectEqual(@as(u32, 2), width); // Single grapheme cluster, width 2
1906
+ }
1907
+
1908
+ test "getWidthAt: emoji with ZWJ" {
1909
+ const text = "👩‍🚀"; // Woman astronaut (woman + ZWJ + rocket)
1910
+ const width = utf8.getWidthAt(text, 0, 8, .unicode);
1911
+ try testing.expectEqual(@as(u32, 2), width); // Single grapheme cluster, width 2
1912
+ }
1913
+
1914
+ test "getWidthAt: flag emoji" {
1915
+ const text = "🇺🇸"; // US flag (two regional indicators)
1916
+ const width = utf8.getWidthAt(text, 0, 8, .unicode);
1917
+ try testing.expectEqual(@as(u32, 2), width); // Entire grapheme cluster
1918
+ }
1919
+
1920
+ test "getWidthAt: mixed ASCII and CJK" {
1921
+ const text = "Hello世界";
1922
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'H'
1923
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 1, 8, .unicode)); // 'e'
1924
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 5, 8, .unicode)); // '世'
1925
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 8, 8, .unicode)); // '界'
1926
+ }
1927
+
1928
+ test "getWidthAt: emoji with VS16 selector" {
1929
+ const text = "❤️"; // Heart + VS16 selector
1930
+ const width = utf8.getWidthAt(text, 0, 8, .unicode);
1931
+ try testing.expectEqual(@as(u32, 2), width); // Single grapheme cluster, width 2
1932
+ }
1933
+
1934
+ test "getWidthAt: hiragana" {
1935
+ const text = "こんにちは";
1936
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 0, 8, .unicode)); // 'こ'
1937
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 3, 8, .unicode)); // 'ん'
1938
+ }
1939
+
1940
+ test "getWidthAt: katakana" {
1941
+ const text = "カタカナ";
1942
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 0, 8, .unicode)); // 'カ'
1943
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 3, 8, .unicode)); // 'タ'
1944
+ }
1945
+
1946
+ test "getWidthAt: fullwidth forms" {
1947
+ const text = "ABC"; // Fullwidth A, B, C
1948
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 0, 8, .unicode)); // Fullwidth 'A'
1949
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 3, 8, .unicode)); // Fullwidth 'B'
1950
+ }
1951
+
1952
+ test "getWidthAt: zero width at start of string" {
1953
+ const text = "a\u{0301}bc"; // a + combining accent + bc
1954
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'a' + combining = 1
1955
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 3, 8, .unicode)); // 'b'
1956
+ }
1957
+
1958
+ test "getWidthAt: control characters" {
1959
+ const text = "a\x00b";
1960
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'a'
1961
+ try testing.expectEqual(@as(u32, 0), utf8.getWidthAt(text, 1, 8, .unicode)); // null
1962
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 2, 8, .unicode)); // 'b'
1963
+ }
1964
+
1965
+ test "getWidthAt: multiple combining marks" {
1966
+ const text = "e\u{0301}\u{0302}"; // e + acute + circumflex
1967
+ const width = utf8.getWidthAt(text, 0, 8, .unicode);
1968
+ try testing.expectEqual(@as(u32, 1), width); // All combining marks part of one grapheme
1969
+ }
1970
+
1971
+ test "getWidthAt: at exact end boundary" {
1972
+ const text = "hello";
1973
+ const width = utf8.getWidthAt(text, 5, 8, .unicode); // At index 5 (past end)
1974
+ try testing.expectEqual(@as(u32, 0), width);
1975
+ }
1976
+
1977
+ test "getWidthAt: realistic mixed content" {
1978
+ const text = "Hello 世界! 👋";
1979
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'H'
1980
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 5, 8, .unicode)); // ' '
1981
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 6, 8, .unicode)); // '世'
1982
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 9, 8, .unicode)); // '界'
1983
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 12, 8, .unicode)); // '!'
1984
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 13, 8, .unicode)); // ' '
1985
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 14, 8, .unicode)); // emoji
1986
+ }
1987
+
1988
+ test "getWidthAt: grapheme at SIMD boundary" {
1989
+ var buf: [32]u8 = undefined;
1990
+ @memset(&buf, 'x');
1991
+ const cjk = "世";
1992
+ @memcpy(buf[14..17], cjk); // Place CJK char near boundary
1993
+
1994
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(&buf, 13, 8, .unicode)); // 'x'
1995
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(&buf, 14, 8, .unicode)); // '世'
1996
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(&buf, 17, 8, .unicode)); // 'x'
1997
+ }
1998
+
1999
+ test "getWidthAt: incomplete UTF-8 at end" {
2000
+ const text = "abc\xC3"; // Incomplete 2-byte sequence
2001
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'a'
2002
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 3, 8, .unicode)); // Incomplete, returns 1 for error
2003
+ }
2004
+
2005
+ test "getWidthAt: random positions in realistic text" {
2006
+ const text = "The quick brown 🦊 jumps over the lazy 犬";
2007
+
2008
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'T'
2009
+ try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 10, 8, .unicode)); // 'b'
2010
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 16, 8, .unicode)); // fox emoji
2011
+ try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 41, 8, .unicode)); // '犬' (dog)
2012
+ }
2013
+
2014
+ // ============================================================================
2015
+ // GET PREV GRAPHEME START TESTS
2016
+ // ============================================================================
2017
+
2018
+ test "getPrevGraphemeStart: at start" {
2019
+ const text = "hello";
2020
+ const result = utf8.getPrevGraphemeStart(text, 0, 8, .unicode);
2021
+ try testing.expect(result == null);
2022
+ }
2023
+
2024
+ test "getPrevGraphemeStart: empty string" {
2025
+ const result = utf8.getPrevGraphemeStart("", 0, 8, .unicode);
2026
+ try testing.expect(result == null);
2027
+ }
2028
+
2029
+ test "getPrevGraphemeStart: out of bounds" {
2030
+ const text = "hello";
2031
+ const result = utf8.getPrevGraphemeStart(text, 100, 8, .unicode);
2032
+ try testing.expect(result == null);
2033
+ }
2034
+
2035
+ test "getPrevGraphemeStart: simple ASCII" {
2036
+ const text = "hello";
2037
+
2038
+ const r1 = utf8.getPrevGraphemeStart(text, 1, 8, .unicode);
2039
+ try testing.expect(r1 != null);
2040
+ try testing.expectEqual(@as(usize, 0), r1.?.start_offset);
2041
+ try testing.expectEqual(@as(u32, 1), r1.?.width);
2042
+
2043
+ const r2 = utf8.getPrevGraphemeStart(text, 2, 8, .unicode);
2044
+ try testing.expect(r2 != null);
2045
+ try testing.expectEqual(@as(usize, 1), r2.?.start_offset);
2046
+ try testing.expectEqual(@as(u32, 1), r2.?.width);
2047
+
2048
+ const r5 = utf8.getPrevGraphemeStart(text, 5, 8, .unicode);
2049
+ try testing.expect(r5 != null);
2050
+ try testing.expectEqual(@as(usize, 4), r5.?.start_offset);
2051
+ try testing.expectEqual(@as(u32, 1), r5.?.width);
2052
+ }
2053
+
2054
+ test "getPrevGraphemeStart: CJK wide character" {
2055
+ const text = "a世界";
2056
+
2057
+ const r1 = utf8.getPrevGraphemeStart(text, 1, 8, .unicode);
2058
+ try testing.expect(r1 != null);
2059
+ try testing.expectEqual(@as(usize, 0), r1.?.start_offset);
2060
+ try testing.expectEqual(@as(u32, 1), r1.?.width);
2061
+
2062
+ const r4 = utf8.getPrevGraphemeStart(text, 4, 8, .unicode);
2063
+ try testing.expect(r4 != null);
2064
+ try testing.expectEqual(@as(usize, 1), r4.?.start_offset);
2065
+ try testing.expectEqual(@as(u32, 2), r4.?.width);
2066
+
2067
+ const r7 = utf8.getPrevGraphemeStart(text, 7, 8, .unicode);
2068
+ try testing.expect(r7 != null);
2069
+ try testing.expectEqual(@as(usize, 4), r7.?.start_offset);
2070
+ try testing.expectEqual(@as(u32, 2), r7.?.width);
2071
+ }
2072
+
2073
+ test "getPrevGraphemeStart: combining mark" {
2074
+ const text = "cafe\u{0301}"; // café with combining acute
2075
+
2076
+ const r6 = utf8.getPrevGraphemeStart(text, 6, 8, .unicode);
2077
+ try testing.expect(r6 != null);
2078
+ try testing.expectEqual(@as(usize, 3), r6.?.start_offset);
2079
+ try testing.expectEqual(@as(u32, 1), r6.?.width);
2080
+ }
2081
+
2082
+ test "getPrevGraphemeStart: emoji with skin tone" {
2083
+ const text = "Hi👋🏿";
2084
+
2085
+ const r2 = utf8.getPrevGraphemeStart(text, 2, 8, .unicode);
2086
+ try testing.expect(r2 != null);
2087
+ try testing.expectEqual(@as(usize, 1), r2.?.start_offset);
2088
+ try testing.expectEqual(@as(u32, 1), r2.?.width);
2089
+
2090
+ const r_end = utf8.getPrevGraphemeStart(text, text.len, 8, .unicode);
2091
+ try testing.expect(r_end != null);
2092
+ try testing.expectEqual(@as(usize, 2), r_end.?.start_offset);
2093
+ }
2094
+
2095
+ test "getPrevGraphemeStart: emoji with ZWJ" {
2096
+ const text = "a👩‍🚀"; // a + woman astronaut
2097
+
2098
+ const r1 = utf8.getPrevGraphemeStart(text, 1, 8, .unicode);
2099
+ try testing.expect(r1 != null);
2100
+ try testing.expectEqual(@as(usize, 0), r1.?.start_offset);
2101
+ try testing.expectEqual(@as(u32, 1), r1.?.width);
2102
+
2103
+ const r_end = utf8.getPrevGraphemeStart(text, text.len, 8, .unicode);
2104
+ try testing.expect(r_end != null);
2105
+ try testing.expectEqual(@as(usize, 1), r_end.?.start_offset);
2106
+ }
2107
+
2108
+ test "getPrevGraphemeStart: flag emoji" {
2109
+ const text = "US🇺🇸";
2110
+
2111
+ const r_end = utf8.getPrevGraphemeStart(text, text.len, 8, .unicode);
2112
+ try testing.expect(r_end != null);
2113
+ try testing.expectEqual(@as(usize, 2), r_end.?.start_offset);
2114
+ }
2115
+
2116
+ test "getPrevGraphemeStart: tab handling" {
2117
+ const text = "a\tb";
2118
+
2119
+ const r2 = utf8.getPrevGraphemeStart(text, 2, 4, .unicode);
2120
+ try testing.expect(r2 != null);
2121
+ try testing.expectEqual(@as(usize, 1), r2.?.start_offset);
2122
+
2123
+ const r1 = utf8.getPrevGraphemeStart(text, 1, 4, .unicode);
2124
+ try testing.expect(r1 != null);
2125
+ try testing.expectEqual(@as(usize, 0), r1.?.start_offset);
2126
+ try testing.expectEqual(@as(u32, 1), r1.?.width);
2127
+ }
2128
+
2129
+ test "getPrevGraphemeStart: mixed content" {
2130
+ const text = "Hi世界!";
2131
+
2132
+ const r2 = utf8.getPrevGraphemeStart(text, 2, 8, .unicode);
2133
+ try testing.expect(r2 != null);
2134
+ try testing.expectEqual(@as(usize, 1), r2.?.start_offset);
2135
+
2136
+ const r5 = utf8.getPrevGraphemeStart(text, 5, 8, .unicode);
2137
+ try testing.expect(r5 != null);
2138
+ try testing.expectEqual(@as(usize, 2), r5.?.start_offset);
2139
+ try testing.expectEqual(@as(u32, 2), r5.?.width);
2140
+
2141
+ const r8 = utf8.getPrevGraphemeStart(text, 8, 8, .unicode);
2142
+ try testing.expect(r8 != null);
2143
+ try testing.expectEqual(@as(usize, 5), r8.?.start_offset);
2144
+ try testing.expectEqual(@as(u32, 2), r8.?.width);
2145
+ }
2146
+
2147
+ test "getPrevGraphemeStart: multiple combining marks" {
2148
+ const text = "e\u{0301}\u{0302}x"; // e + acute + circumflex + x
2149
+
2150
+ const r_x = utf8.getPrevGraphemeStart(text, text.len, 8, .unicode);
2151
+ try testing.expect(r_x != null);
2152
+ try testing.expectEqual(@as(usize, text.len - 1), r_x.?.start_offset);
2153
+
2154
+ const r_e = utf8.getPrevGraphemeStart(text, text.len - 1, 8, .unicode);
2155
+ try testing.expect(r_e != null);
2156
+ try testing.expectEqual(@as(usize, 0), r_e.?.start_offset);
2157
+ try testing.expectEqual(@as(u32, 1), r_e.?.width);
2158
+ }
2159
+
2160
+ test "getPrevGraphemeStart: hiragana" {
2161
+ const text = "こんにちは";
2162
+
2163
+ const r_last = utf8.getPrevGraphemeStart(text, text.len, 8, .unicode);
2164
+ try testing.expect(r_last != null);
2165
+ try testing.expectEqual(@as(usize, 12), r_last.?.start_offset);
2166
+ try testing.expectEqual(@as(u32, 2), r_last.?.width);
2167
+ }
2168
+
2169
+ test "getPrevGraphemeStart: realistic scenario" {
2170
+ const text = "Hello 世界! 👋";
2171
+
2172
+ const r_end = utf8.getPrevGraphemeStart(text, text.len, 8, .unicode);
2173
+ try testing.expect(r_end != null);
2174
+ try testing.expectEqual(@as(usize, 14), r_end.?.start_offset);
2175
+
2176
+ const r_space = utf8.getPrevGraphemeStart(text, 14, 8, .unicode);
2177
+ try testing.expect(r_space != null);
2178
+ try testing.expectEqual(@as(usize, 13), r_space.?.start_offset);
2179
+ try testing.expectEqual(@as(u32, 1), r_space.?.width);
2180
+ }
2181
+
2182
+ test "getPrevGraphemeStart: consecutive wide chars" {
2183
+ const text = "世界中";
2184
+
2185
+ const r9 = utf8.getPrevGraphemeStart(text, 9, 8, .unicode);
2186
+ try testing.expect(r9 != null);
2187
+ try testing.expectEqual(@as(usize, 6), r9.?.start_offset);
2188
+ try testing.expectEqual(@as(u32, 2), r9.?.width);
2189
+
2190
+ const r6 = utf8.getPrevGraphemeStart(text, 6, 8, .unicode);
2191
+ try testing.expect(r6 != null);
2192
+ try testing.expectEqual(@as(usize, 3), r6.?.start_offset);
2193
+ try testing.expectEqual(@as(u32, 2), r6.?.width);
2194
+
2195
+ const r3 = utf8.getPrevGraphemeStart(text, 3, 8, .unicode);
2196
+ try testing.expect(r3 != null);
2197
+ try testing.expectEqual(@as(usize, 0), r3.?.start_offset);
2198
+ try testing.expectEqual(@as(u32, 2), r3.?.width);
2199
+ }
2200
+
2201
+ // ============================================================================
2202
+ // CALCULATE TEXT WIDTH TESTS (static tab width)
2203
+ // ============================================================================
2204
+
2205
+ test "calculateTextWidth: empty string" {
2206
+ const result = utf8.calculateTextWidth("", 4, false, .unicode);
2207
+ try testing.expectEqual(@as(u32, 0), result);
2208
+ }
2209
+
2210
+ test "calculateTextWidth: simple ASCII" {
2211
+ const result = utf8.calculateTextWidth("hello", 4, true, .unicode);
2212
+ try testing.expectEqual(@as(u32, 5), result);
2213
+ }
2214
+
2215
+ test "calculateTextWidth: single tab" {
2216
+ const result = utf8.calculateTextWidth("\t", 4, false, .unicode);
2217
+ try testing.expectEqual(@as(u32, 4), result);
2218
+ }
2219
+
2220
+ test "calculateTextWidth: tab with different widths" {
2221
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("\t", 2, false, .unicode));
2222
+ try testing.expectEqual(@as(u32, 4), utf8.calculateTextWidth("\t", 4, false, .unicode));
2223
+ try testing.expectEqual(@as(u32, 8), utf8.calculateTextWidth("\t", 8, false, .unicode));
2224
+ }
2225
+
2226
+ test "calculateTextWidth: multiple tabs" {
2227
+ const result = utf8.calculateTextWidth("\t\t\t", 4, false, .unicode);
2228
+ try testing.expectEqual(@as(u32, 12), result); // 3 tabs * 4 = 12
2229
+ }
2230
+
2231
+ test "calculateTextWidth: text with tabs" {
2232
+ const result = utf8.calculateTextWidth("a\tb", 4, false, .unicode);
2233
+ try testing.expectEqual(@as(u32, 6), result); // a(1) + tab(4) + b(1) = 6
2234
+ }
2235
+
2236
+ test "calculateTextWidth: multiple tabs between text" {
2237
+ const result = utf8.calculateTextWidth("a\t\tb", 2, false, .unicode);
2238
+ try testing.expectEqual(@as(u32, 6), result); // a(1) + tab(2) + tab(2) + b(1) = 6
2239
+ }
2240
+
2241
+ test "calculateTextWidth: tab at start" {
2242
+ const result = utf8.calculateTextWidth("\tabc", 4, false, .unicode);
2243
+ try testing.expectEqual(@as(u32, 7), result); // tab(4) + a(1) + b(1) + c(1) = 7
2244
+ }
2245
+
2246
+ test "calculateTextWidth: tab at end" {
2247
+ const result = utf8.calculateTextWidth("abc\t", 4, false, .unicode);
2248
+ try testing.expectEqual(@as(u32, 7), result); // a(1) + b(1) + c(1) + tab(4) = 7
2249
+ }
2250
+
2251
+ test "calculateTextWidth: CJK with tabs" {
2252
+ const result = utf8.calculateTextWidth("世\t界", 4, false, .unicode);
2253
+ try testing.expectEqual(@as(u32, 8), result); // 世(2) + tab(4) + 界(2) = 8
2254
+ }
2255
+
2256
+ test "calculateTextWidth: emoji with tab" {
2257
+ const result = utf8.calculateTextWidth("🌍\t", 4, false, .unicode);
2258
+ try testing.expectEqual(@as(u32, 6), result); // emoji(2) + tab(4) = 6
2259
+ }
2260
+
2261
+ test "calculateTextWidth: mixed ASCII and Unicode with tabs" {
2262
+ const result = utf8.calculateTextWidth("hello\t世界", 4, false, .unicode);
2263
+ try testing.expectEqual(@as(u32, 13), result); // hello(5) + tab(4) + 世(2) + 界(2) = 13
2264
+ }
2265
+
2266
+ test "calculateTextWidth: realistic code with tabs" {
2267
+ const text = "\tif (x > 5) {\n\t\treturn true;\n\t}";
2268
+ const result = utf8.calculateTextWidth(text, 2, false, .unicode);
2269
+ // tab(2) + "if (x > 5) {" (12) + newline(0) + tab(2) + tab(2) + "return true;" (12) + newline(0) + tab(2) + "}" (1)
2270
+ // = 2 + 12 + 2 + 2 + 12 + 2 + 1 = 33
2271
+ try testing.expectEqual(@as(u32, 33), result);
2272
+ }
2273
+
2274
+ test "calculateTextWidth: only spaces" {
2275
+ const result = utf8.calculateTextWidth(" ", 4, true, .unicode);
2276
+ try testing.expectEqual(@as(u32, 5), result);
2277
+ }
2278
+
2279
+ test "calculateTextWidth: tabs and spaces mixed" {
2280
+ const result = utf8.calculateTextWidth(" \t \t ", 4, false, .unicode);
2281
+ try testing.expectEqual(@as(u32, 14), result); // 2 + 4 + 2 + 4 + 2 = 14
2282
+ }
2283
+
2284
+ test "calculateTextWidth: control characters" {
2285
+ const result = utf8.calculateTextWidth("a\x00b\x1Fc", 4, false, .unicode);
2286
+ try testing.expectEqual(@as(u32, 3), result); // Only printable chars: a, b, c
2287
+ }
2288
+
2289
+ test "calculateTextWidth: combining marks" {
2290
+ const result = utf8.calculateTextWidth("cafe\u{0301}", 4, false, .unicode);
2291
+ try testing.expectEqual(@as(u32, 4), result); // c(1) + a(1) + f(1) + e(1) + combining(0) = 4
2292
+ }
2293
+
2294
+ test "calculateTextWidth: scroll book and writing emojis width 2" {
2295
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("📜", 4, false, .unicode));
2296
+ }
2297
+
2298
+ test "calculateTextWidth: Devanagari नमस्ते width 4" {
2299
+ const result = utf8.calculateTextWidth("नमस्ते", 4, false, .unicode);
2300
+ try testing.expectEqual(@as(u32, 4), result);
2301
+ }
2302
+
2303
+ // ============================================================================
2304
+ // UNICODE WARNING SIGNS WIDTH TESTS
2305
+ // ============================================================================
2306
+
2307
+ test "calculateTextWidth: U+26A0 warning sign should be width 2" {
2308
+ const result = utf8.calculateTextWidth("⚠", 4, false, .unicode);
2309
+ try testing.expectEqual(@as(u32, 2), result);
2310
+ }
2311
+
2312
+ test "calculateTextWidth: U+2049 exclamation question mark should be width 2" {
2313
+ const result = utf8.calculateTextWidth("⁉", 4, false, .unicode);
2314
+ try testing.expectEqual(@as(u32, 2), result);
2315
+ }
2316
+
2317
+ test "calculateTextWidth: U+203C double exclamation mark should be width 2" {
2318
+ const result = utf8.calculateTextWidth("‼", 4, false, .unicode);
2319
+ try testing.expectEqual(@as(u32, 2), result);
2320
+ }
2321
+
2322
+ test "calculateTextWidth: U+26D1 rescue worker helmet should be width 2" {
2323
+ const result = utf8.calculateTextWidth("⛑", 4, false, .unicode);
2324
+ try testing.expectEqual(@as(u32, 2), result);
2325
+ }
2326
+
2327
+ test "calculateTextWidth: U+2622 radioactive sign should be width 2" {
2328
+ const result = utf8.calculateTextWidth("☢", 4, false, .unicode);
2329
+ try testing.expectEqual(@as(u32, 2), result);
2330
+ }
2331
+
2332
+ test "calculateTextWidth: U+2623 biohazard sign should be width 2" {
2333
+ const result = utf8.calculateTextWidth("☣", 4, false, .unicode);
2334
+ try testing.expectEqual(@as(u32, 2), result);
2335
+ }
2336
+
2337
+ test "calculateTextWidth: U+269B atom symbol should be width 2" {
2338
+ const result = utf8.calculateTextWidth("⚛", 4, false, .unicode);
2339
+ try testing.expectEqual(@as(u32, 2), result);
2340
+ }
2341
+
2342
+ // ============================================================================
2343
+ // GRAPHEME INFO TESTS (for caching multi-byte graphemes and tabs)
2344
+ // ============================================================================
2345
+
2346
+ test "findGraphemeInfo: empty string" {
2347
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2348
+ defer result.deinit(testing.allocator);
2349
+
2350
+ try utf8.findGraphemeInfo("", 4, false, .unicode, testing.allocator, &result);
2351
+ try testing.expectEqual(@as(usize, 0), result.items.len);
2352
+ }
2353
+
2354
+ test "findGraphemeInfo: ASCII-only returns empty" {
2355
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2356
+ defer result.deinit(testing.allocator);
2357
+
2358
+ try utf8.findGraphemeInfo("hello world", 4, true, .unicode, testing.allocator, &result);
2359
+ try testing.expectEqual(@as(usize, 0), result.items.len);
2360
+ }
2361
+
2362
+ test "findGraphemeInfo: ASCII with tab" {
2363
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2364
+ defer result.deinit(testing.allocator);
2365
+
2366
+ try utf8.findGraphemeInfo("hello\tworld", 4, false, .unicode, testing.allocator, &result);
2367
+
2368
+ // Should have one entry for the tab
2369
+ try testing.expectEqual(@as(usize, 1), result.items.len);
2370
+ try testing.expectEqual(@as(u32, 5), result.items[0].byte_offset);
2371
+ try testing.expectEqual(@as(u8, 1), result.items[0].byte_len);
2372
+ try testing.expectEqual(@as(u8, 4), result.items[0].width);
2373
+ try testing.expectEqual(@as(u32, 5), result.items[0].col_offset);
2374
+ }
2375
+
2376
+ test "findGraphemeInfo: multiple tabs" {
2377
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2378
+ defer result.deinit(testing.allocator);
2379
+
2380
+ try utf8.findGraphemeInfo("a\tb\tc", 4, false, .unicode, testing.allocator, &result);
2381
+
2382
+ // Should have two entries for the tabs
2383
+ try testing.expectEqual(@as(usize, 2), result.items.len);
2384
+
2385
+ // First tab at byte 1, col 1
2386
+ try testing.expectEqual(@as(u32, 1), result.items[0].byte_offset);
2387
+ try testing.expectEqual(@as(u8, 1), result.items[0].byte_len);
2388
+ try testing.expectEqual(@as(u8, 4), result.items[0].width);
2389
+ try testing.expectEqual(@as(u32, 1), result.items[0].col_offset);
2390
+
2391
+ // Second tab at byte 3, col 6 (1 + 4 + 1)
2392
+ try testing.expectEqual(@as(u32, 3), result.items[1].byte_offset);
2393
+ try testing.expectEqual(@as(u8, 1), result.items[1].byte_len);
2394
+ try testing.expectEqual(@as(u8, 4), result.items[1].width);
2395
+ try testing.expectEqual(@as(u32, 6), result.items[1].col_offset);
2396
+ }
2397
+
2398
+ test "findGraphemeInfo: CJK characters" {
2399
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2400
+ defer result.deinit(testing.allocator);
2401
+
2402
+ const text = "hello世界";
2403
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
2404
+
2405
+ // Should have two entries for the CJK characters
2406
+ try testing.expectEqual(@as(usize, 2), result.items.len);
2407
+
2408
+ // 世 at byte 5
2409
+ try testing.expectEqual(@as(u32, 5), result.items[0].byte_offset);
2410
+ try testing.expectEqual(@as(u8, 3), result.items[0].byte_len);
2411
+ try testing.expectEqual(@as(u8, 2), result.items[0].width);
2412
+ try testing.expectEqual(@as(u32, 5), result.items[0].col_offset);
2413
+
2414
+ // 界 at byte 8
2415
+ try testing.expectEqual(@as(u32, 8), result.items[1].byte_offset);
2416
+ try testing.expectEqual(@as(u8, 3), result.items[1].byte_len);
2417
+ try testing.expectEqual(@as(u8, 2), result.items[1].width);
2418
+ try testing.expectEqual(@as(u32, 7), result.items[1].col_offset);
2419
+ }
2420
+
2421
+ test "findGraphemeInfo: emoji with skin tone" {
2422
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2423
+ defer result.deinit(testing.allocator);
2424
+
2425
+ const text = "Hi👋🏿Bye"; // Hi + wave + dark skin tone + Bye
2426
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
2427
+
2428
+ // Should have one entry for the emoji cluster
2429
+ try testing.expectEqual(@as(usize, 1), result.items.len);
2430
+
2431
+ try testing.expectEqual(@as(u32, 2), result.items[0].byte_offset);
2432
+ try testing.expectEqual(@as(u8, 8), result.items[0].byte_len); // 4 + 4 bytes
2433
+ try testing.expectEqual(@as(u8, 2), result.items[0].width);
2434
+ try testing.expectEqual(@as(u32, 2), result.items[0].col_offset);
2435
+ }
2436
+
2437
+ test "findGraphemeInfo: emoji with ZWJ" {
2438
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2439
+ defer result.deinit(testing.allocator);
2440
+
2441
+ const text = "a👩‍🚀b"; // a + woman astronaut + b
2442
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
2443
+
2444
+ // Should have one entry for the emoji cluster
2445
+ try testing.expectEqual(@as(usize, 1), result.items.len);
2446
+
2447
+ try testing.expectEqual(@as(u32, 1), result.items[0].byte_offset);
2448
+ try testing.expectEqual(@as(u8, 2), result.items[0].width);
2449
+ try testing.expectEqual(@as(u32, 1), result.items[0].col_offset);
2450
+ }
2451
+
2452
+ test "findGraphemeInfo: combining mark" {
2453
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2454
+ defer result.deinit(testing.allocator);
2455
+
2456
+ const text = "cafe\u{0301}"; // café with combining acute
2457
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
2458
+
2459
+ // Should have one entry for e + combining mark
2460
+ try testing.expectEqual(@as(usize, 1), result.items.len);
2461
+
2462
+ try testing.expectEqual(@as(u32, 3), result.items[0].byte_offset); // 'e' position
2463
+ try testing.expectEqual(@as(u8, 3), result.items[0].byte_len); // e (1 byte) + combining (2 bytes)
2464
+ try testing.expectEqual(@as(u8, 1), result.items[0].width);
2465
+ try testing.expectEqual(@as(u32, 3), result.items[0].col_offset);
2466
+ }
2467
+
2468
+ test "findGraphemeInfo: flag emoji" {
2469
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2470
+ defer result.deinit(testing.allocator);
2471
+
2472
+ const text = "US🇺🇸"; // US + flag
2473
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
2474
+
2475
+ // Should have one entry for the flag (two regional indicators)
2476
+ try testing.expectEqual(@as(usize, 1), result.items.len);
2477
+
2478
+ try testing.expectEqual(@as(u32, 2), result.items[0].byte_offset);
2479
+ try testing.expectEqual(@as(u8, 8), result.items[0].byte_len); // Two 4-byte chars
2480
+ try testing.expectEqual(@as(u8, 2), result.items[0].width);
2481
+ try testing.expectEqual(@as(u32, 2), result.items[0].col_offset);
2482
+ }
2483
+
2484
+ test "findGraphemeInfo: mixed content" {
2485
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2486
+ defer result.deinit(testing.allocator);
2487
+
2488
+ const text = "Hi\t世界!"; // Hi + tab + CJK + !
2489
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
2490
+
2491
+ // Should have three entries: tab, 世, 界
2492
+ try testing.expectEqual(@as(usize, 3), result.items.len);
2493
+
2494
+ // Tab at byte 2, col 2
2495
+ try testing.expectEqual(@as(u32, 2), result.items[0].byte_offset);
2496
+ try testing.expectEqual(@as(u8, 1), result.items[0].byte_len);
2497
+ try testing.expectEqual(@as(u8, 4), result.items[0].width);
2498
+ try testing.expectEqual(@as(u32, 2), result.items[0].col_offset);
2499
+
2500
+ // 世 at byte 3, col 6
2501
+ try testing.expectEqual(@as(u32, 3), result.items[1].byte_offset);
2502
+ try testing.expectEqual(@as(u8, 3), result.items[1].byte_len);
2503
+ try testing.expectEqual(@as(u8, 2), result.items[1].width);
2504
+ try testing.expectEqual(@as(u32, 6), result.items[1].col_offset);
2505
+
2506
+ // 界 at byte 6, col 8
2507
+ try testing.expectEqual(@as(u32, 6), result.items[2].byte_offset);
2508
+ try testing.expectEqual(@as(u8, 3), result.items[2].byte_len);
2509
+ try testing.expectEqual(@as(u8, 2), result.items[2].width);
2510
+ try testing.expectEqual(@as(u32, 8), result.items[2].col_offset);
2511
+ }
2512
+
2513
+ test "findGraphemeInfo: only ASCII letters no cache" {
2514
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2515
+ defer result.deinit(testing.allocator);
2516
+
2517
+ try utf8.findGraphemeInfo("abcdefghij", 4, false, .unicode, testing.allocator, &result);
2518
+
2519
+ // No special characters, should be empty
2520
+ try testing.expectEqual(@as(usize, 0), result.items.len);
2521
+ }
2522
+
2523
+ test "findGraphemeInfo: emoji with VS16" {
2524
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2525
+ defer result.deinit(testing.allocator);
2526
+
2527
+ const text = "I ❤️ U"; // I + space + heart + VS16 + space + U
2528
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
2529
+
2530
+ // Should have one entry for the emoji cluster
2531
+ try testing.expectEqual(@as(usize, 1), result.items.len);
2532
+
2533
+ try testing.expectEqual(@as(u32, 2), result.items[0].byte_offset);
2534
+ try testing.expectEqual(@as(u8, 2), result.items[0].width);
2535
+ try testing.expectEqual(@as(u32, 2), result.items[0].col_offset);
2536
+ }
2537
+
2538
+ test "findGraphemeInfo: realistic text" {
2539
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2540
+ defer result.deinit(testing.allocator);
2541
+
2542
+ const text = "function test() {\n\tconst 世界 = 10;\n}";
2543
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
2544
+
2545
+ // Should have entries for: tab, 世, 界
2546
+ try testing.expectEqual(@as(usize, 3), result.items.len);
2547
+ }
2548
+
2549
+ test "findGraphemeInfo: hiragana" {
2550
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2551
+ defer result.deinit(testing.allocator);
2552
+
2553
+ const text = "こんにちは";
2554
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
2555
+
2556
+ // Should have 5 entries (each hiragana is 3 bytes, width 2)
2557
+ try testing.expectEqual(@as(usize, 5), result.items.len);
2558
+
2559
+ // Check first character
2560
+ try testing.expectEqual(@as(u32, 0), result.items[0].byte_offset);
2561
+ try testing.expectEqual(@as(u8, 3), result.items[0].byte_len);
2562
+ try testing.expectEqual(@as(u8, 2), result.items[0].width);
2563
+ }
2564
+
2565
+ test "findGraphemeInfo: at SIMD boundary" {
2566
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
2567
+ defer result.deinit(testing.allocator);
2568
+
2569
+ // Create text with multibyte char near SIMD boundary (16 bytes)
2570
+ var buf: [32]u8 = undefined;
2571
+ @memset(&buf, 'x');
2572
+ const cjk = "世";
2573
+ @memcpy(buf[14..17], cjk); // Place CJK char at boundary
2574
+
2575
+ try utf8.findGraphemeInfo(&buf, 4, false, .unicode, testing.allocator, &result);
2576
+
2577
+ // Should find the CJK character
2578
+ var found = false;
2579
+ for (result.items) |g| {
2580
+ if (g.byte_offset == 14) {
2581
+ found = true;
2582
+ try testing.expectEqual(@as(u8, 3), g.byte_len);
2583
+ try testing.expectEqual(@as(u8, 2), g.width);
2584
+ break;
2585
+ }
2586
+ }
2587
+ try testing.expect(found);
2588
+ }
2589
+
2590
+ test "calculateTextWidth: book and writing hand emojis width 2" {
2591
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("📖", 4, false, .unicode));
2592
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("✍️", 4, false, .unicode));
2593
+ }
2594
+
2595
+ test "calculateTextWidth: Devanagari script" {
2596
+ const result = utf8.calculateTextWidth("देवनागरी", 4, false, .unicode);
2597
+ try testing.expectEqual(@as(u32, 5), result);
2598
+ try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth("प्रथम", 4, false, .unicode));
2599
+ }
2600
+
2601
+ test "calculateTextWidth: checkmark symbol" {
2602
+ const result = utf8.calculateTextWidth("✓", 4, false, .unicode);
2603
+ try testing.expectEqual(@as(u32, 1), result);
2604
+ }
2605
+
2606
+ test "calculateTextWidth: emoji with skin tone" {
2607
+ const result = utf8.calculateTextWidth("👋🏿", 4, false, .unicode);
2608
+ try testing.expectEqual(@as(u32, 2), result); // 👋🏿 is a single grapheme with width 2
2609
+ }
2610
+
2611
+ test "calculateTextWidth: emoji with ZWJ" {
2612
+ const result = utf8.calculateTextWidth("👩‍🚀", 4, false, .unicode);
2613
+ try testing.expectEqual(@as(u32, 2), result); // 👩‍🚀 is a single grapheme with width 2
2614
+ }
2615
+
2616
+ test "calculateTextWidth: emoji with VS16 selector" {
2617
+ const result = utf8.calculateTextWidth("❤️", 4, false, .unicode);
2618
+ try testing.expectEqual(@as(u32, 2), result); // ❤️ (heart + VS16) is a single grapheme with width 2
2619
+ }
2620
+
2621
+ test "calculateTextWidth: flag emoji" {
2622
+ const result = utf8.calculateTextWidth("🇺🇸", 4, false, .unicode);
2623
+ try testing.expectEqual(@as(u32, 2), result); // 🇺🇸 is a single grapheme with width 2
2624
+ }
2625
+
2626
+ test "calculateTextWidth: hiragana with tab" {
2627
+ const result = utf8.calculateTextWidth("こん\tにちは", 4, false, .unicode);
2628
+ try testing.expectEqual(@as(u32, 14), result); // こ(2) + ん(2) + tab(4) + に(2) + ち(2) + は(2) = 14
2629
+ }
2630
+
2631
+ test "calculateTextWidth: fullwidth forms with tab" {
2632
+ const result = utf8.calculateTextWidth("AB\tC", 4, false, .unicode);
2633
+ try testing.expectEqual(@as(u32, 10), result); // A(2) + B(2) + tab(4) + C(2) = 10
2634
+ }
2635
+
2636
+ test "calculateTextWidth: ASCII fast path consistency" {
2637
+ const text_ascii = "hello world";
2638
+ const result_fast = utf8.calculateTextWidth(text_ascii, 4, true, .unicode);
2639
+ const result_slow = utf8.calculateTextWidth(text_ascii, 4, false, .unicode);
2640
+ try testing.expectEqual(result_fast, result_slow);
2641
+ }
2642
+
2643
+ test "calculateTextWidth: large text with many tabs" {
2644
+ const size = 1000;
2645
+ const buf = try testing.allocator.alloc(u8, size);
2646
+ defer testing.allocator.free(buf);
2647
+
2648
+ var expected: u32 = 0;
2649
+ for (buf, 0..) |*b, i| {
2650
+ if (i % 10 == 0) {
2651
+ b.* = '\t';
2652
+ expected += 4;
2653
+ } else {
2654
+ b.* = 'a';
2655
+ expected += 1;
2656
+ }
2657
+ }
2658
+
2659
+ const result = utf8.calculateTextWidth(buf, 4, false, .unicode);
2660
+ try testing.expectEqual(expected, result);
2661
+ }
2662
+
2663
+ test "calculateTextWidth: comparison with manual calculation" {
2664
+ const test_cases = [_]struct {
2665
+ text: []const u8,
2666
+ tab_width: u8,
2667
+ expected: u32,
2668
+ }{
2669
+ .{ .text = "\t", .tab_width = 2, .expected = 2 },
2670
+ .{ .text = "\t\t", .tab_width = 2, .expected = 4 },
2671
+ .{ .text = "a\t", .tab_width = 2, .expected = 3 },
2672
+ .{ .text = "\ta", .tab_width = 2, .expected = 3 },
2673
+ .{ .text = "a\tb", .tab_width = 2, .expected = 4 },
2674
+ .{ .text = "ab\tcd", .tab_width = 4, .expected = 8 },
2675
+ .{ .text = "\t\tx", .tab_width = 2, .expected = 5 },
2676
+ .{ .text = "世\t界", .tab_width = 2, .expected = 6 },
2677
+ };
2678
+
2679
+ for (test_cases) |tc| {
2680
+ const result = utf8.calculateTextWidth(tc.text, tc.tab_width, false, .unicode);
2681
+ try testing.expectEqual(tc.expected, result);
2682
+ }
2683
+ }
2684
+
2685
+ // ============================================================================
2686
+ // LINE WIDTH WITH GRAPHEMES TESTS
2687
+ // Testing that calculateTextWidth returns correct Unicode display widths
2688
+ // ============================================================================
2689
+
2690
+ test "calculateTextWidth: checkmark grapheme ✅" {
2691
+ // Test simple checkmark emoji
2692
+ const checkmark = "✅";
2693
+
2694
+ // Calculate width using utf8.zig's calculateTextWidth
2695
+ const width = utf8.calculateTextWidth(checkmark, 4, false, .unicode);
2696
+
2697
+ // The checkmark ✅ (U+2705) should be width 2
2698
+ try testing.expectEqual(@as(u32, 2), width);
2699
+ }
2700
+
2701
+ test "calculateTextWidth: Sanskrit text with combining marks" {
2702
+ const result = utf8.calculateTextWidth("संस्कृति", 4, false, .unicode);
2703
+ try testing.expectEqual(@as(u32, 4), result);
2704
+ }
2705
+
2706
+ test "calculateTextWidth: checkmark in text" {
2707
+ // Test checkmark in context
2708
+ const text = "Done ✅";
2709
+
2710
+ // Calculate width using utf8.zig
2711
+ const width = utf8.calculateTextWidth(text, 4, false, .unicode);
2712
+
2713
+ // Should return: D(1) + o(1) + n(1) + e(1) + space(1) + ✅(2) = 7
2714
+ try testing.expectEqual(@as(u32, 7), width);
2715
+ }
2716
+
2717
+ test "calculateTextWidth: various emoji graphemes" {
2718
+ const test_cases = [_]struct {
2719
+ text: []const u8,
2720
+ name: []const u8,
2721
+ expected_width: u32,
2722
+ }{
2723
+ .{ .text = "✅", .name = "checkmark U+2705", .expected_width = 2 },
2724
+ .{ .text = "❤️", .name = "red heart U+2764+FE0F", .expected_width = 2 },
2725
+ .{ .text = "🎉", .name = "party popper U+1F389", .expected_width = 2 },
2726
+ .{ .text = "🔥", .name = "fire U+1F525", .expected_width = 2 },
2727
+ .{ .text = "💯", .name = "hundred points U+1F4AF", .expected_width = 2 },
2728
+ .{ .text = "🚀", .name = "rocket U+1F680", .expected_width = 2 },
2729
+ .{ .text = "⭐", .name = "star U+2B50", .expected_width = 2 },
2730
+ .{ .text = "👍", .name = "thumbs up U+1F44D", .expected_width = 2 },
2731
+ };
2732
+
2733
+ for (test_cases) |tc| {
2734
+ const width = utf8.calculateTextWidth(tc.text, 4, false, .unicode);
2735
+ try testing.expectEqual(tc.expected_width, width);
2736
+ }
2737
+ }
2738
+
2739
+ test "calculateTextWidth: complex graphemes with ZWJ" {
2740
+ // Woman astronaut: 👩‍🚀 (woman + ZWJ + rocket)
2741
+ const woman_astronaut = "👩‍🚀";
2742
+
2743
+ const width = utf8.calculateTextWidth(woman_astronaut, 4, false, .unicode);
2744
+
2745
+ // Should return 2 for the combined grapheme (not 5 for individual codepoints)
2746
+ try testing.expectEqual(@as(u32, 2), width);
2747
+ }
2748
+
2749
+ test "calculateTextWidth: flag emoji grapheme" {
2750
+ // US flag: 🇺🇸 (two regional indicator symbols)
2751
+ const us_flag = "🇺🇸";
2752
+
2753
+ const width = utf8.calculateTextWidth(us_flag, 4, false, .unicode);
2754
+
2755
+ // Should return 2 for the flag grapheme
2756
+ try testing.expectEqual(@as(u32, 2), width);
2757
+ }
2758
+
2759
+ test "calculateTextWidth: skin tone modifier grapheme" {
2760
+ // Waving hand with dark skin tone: 👋🏿
2761
+ const wave_dark = "👋🏿";
2762
+
2763
+ const width = utf8.calculateTextWidth(wave_dark, 4, false, .unicode);
2764
+
2765
+ // Should return 2 for the combined grapheme (not 4 for individual codepoints)
2766
+ try testing.expectEqual(@as(u32, 2), width);
2767
+ }
2768
+ // ============================================================================
2769
+ // COMPREHENSIVE UNICODE GRAPHEME TESTS FOR calculateTextWidth
2770
+ // Testing various emoji, ZWJ sequences, Indic scripts, and Unicode edge cases
2771
+ // ============================================================================
2772
+
2773
+ // ----------------------------------------------------------------------------
2774
+ // Emoji Presentation Tests
2775
+ // ----------------------------------------------------------------------------
2776
+
2777
+ test "calculateTextWidth: emoji presentation with VS15 (text)" {
2778
+ // U+2764 (heart) + U+FE0E (VS15 - text presentation)
2779
+ const heart_text = "❤\u{FE0E}";
2780
+ const width = utf8.calculateTextWidth(heart_text, 4, false, .unicode);
2781
+ // With text presentation selector, should still be counted as grapheme width 2
2782
+ try testing.expectEqual(@as(u32, 2), width);
2783
+ }
2784
+
2785
+ test "calculateTextWidth: emoji presentation with VS16 (emoji)" {
2786
+ // U+2764 (heart) + U+FE0F (VS16 - emoji presentation) - already tested as ❤️
2787
+ const heart_emoji = "❤️";
2788
+ const width = utf8.calculateTextWidth(heart_emoji, 4, false, .unicode);
2789
+ try testing.expectEqual(@as(u32, 2), width);
2790
+ }
2791
+
2792
+ test "calculateTextWidth: keycap sequences" {
2793
+ // Digit + U+FE0F + U+20E3 (combining enclosing keycap)
2794
+ const keycap_1 = "1️⃣"; // U+0031 U+FE0F U+20E3
2795
+ const keycap_hash = "#️⃣"; // U+0023 U+FE0F U+20E3
2796
+
2797
+ // Keycap: base char (1) + VS16 (changes to emoji presentation, width 2) + combining keycap (0) = 2 total width
2798
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(keycap_1, 4, false, .unicode));
2799
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(keycap_hash, 4, false, .unicode));
2800
+ }
2801
+
2802
+ // ----------------------------------------------------------------------------
2803
+ // Complex ZWJ Sequences
2804
+ // ----------------------------------------------------------------------------
2805
+
2806
+ test "calculateTextWidth: family ZWJ sequences" {
2807
+ // Family: man, woman, girl, boy (4 people)
2808
+ const family = "👨‍👩‍👧‍👦"; // man + ZWJ + woman + ZWJ + girl + ZWJ + boy
2809
+ const width = utf8.calculateTextWidth(family, 4, false, .unicode);
2810
+ // Should be counted as single grapheme with width 2
2811
+ try testing.expectEqual(@as(u32, 2), width);
2812
+ }
2813
+
2814
+ test "calculateTextWidth: profession ZWJ sequences" {
2815
+ // Woman health worker: woman + ZWJ + health worker
2816
+ const health_worker = "👩‍⚕️";
2817
+ const firefighter = "👨‍🚒";
2818
+ const teacher = "👩‍🏫";
2819
+
2820
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(health_worker, 4, false, .unicode));
2821
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(firefighter, 4, false, .unicode));
2822
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(teacher, 4, false, .unicode));
2823
+ }
2824
+
2825
+ test "calculateTextWidth: couple ZWJ sequences" {
2826
+ // Kiss: person + ZWJ + heart + ZWJ + person
2827
+ const kiss = "💏"; // Single codepoint
2828
+ const couple_with_heart = "👩‍❤️‍👨"; // woman + ZWJ + heart + VS16 + ZWJ + man
2829
+
2830
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(kiss, 4, false, .unicode));
2831
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(couple_with_heart, 4, false, .unicode));
2832
+ }
2833
+
2834
+ // ----------------------------------------------------------------------------
2835
+ // Skin Tone Modifiers (Fitzpatrick scale)
2836
+ // ----------------------------------------------------------------------------
2837
+
2838
+ test "calculateTextWidth: all skin tone modifiers" {
2839
+ // Fitzpatrick Type-1-2 (light skin tone) U+1F3FB
2840
+ const wave_light = "👋🏻";
2841
+ // Fitzpatrick Type-3 (medium-light skin tone) U+1F3FC
2842
+ const wave_medium_light = "👋🏼";
2843
+ // Fitzpatrick Type-4 (medium skin tone) U+1F3FD
2844
+ const wave_medium = "👋🏽";
2845
+ // Fitzpatrick Type-5 (medium-dark skin tone) U+1F3FE
2846
+ const wave_medium_dark = "👋🏾";
2847
+ // Fitzpatrick Type-6 (dark skin tone) U+1F3FF
2848
+ const wave_dark = "👋🏿";
2849
+
2850
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(wave_light, 4, false, .unicode));
2851
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(wave_medium_light, 4, false, .unicode));
2852
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(wave_medium, 4, false, .unicode));
2853
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(wave_medium_dark, 4, false, .unicode));
2854
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(wave_dark, 4, false, .unicode));
2855
+ }
2856
+
2857
+ test "calculateTextWidth: skin tone with ZWJ" {
2858
+ // Family with skin tones: man(dark) + ZWJ + woman(light) + ZWJ + child
2859
+ const family_skin_tones = "👨🏿‍👩🏻‍👶";
2860
+ const width = utf8.calculateTextWidth(family_skin_tones, 4, false, .unicode);
2861
+ try testing.expectEqual(@as(u32, 2), width);
2862
+ }
2863
+
2864
+ // ----------------------------------------------------------------------------
2865
+ // Regional Indicator Symbols (Flags)
2866
+ // ----------------------------------------------------------------------------
2867
+
2868
+ test "calculateTextWidth: various flag emojis" {
2869
+ const flag_us = "🇺🇸"; // U+1F1FA U+1F1F8
2870
+ const flag_uk = "🇬🇧"; // U+1F1EC U+1F1E7
2871
+ const flag_jp = "🇯🇵"; // U+1F1EF U+1F1F5
2872
+ const flag_de = "🇩🇪"; // U+1F1E9 U+1F1EA
2873
+ const flag_fr = "🇫🇷"; // U+1F1EB U+1F1F7
2874
+
2875
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(flag_us, 4, false, .unicode));
2876
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(flag_uk, 4, false, .unicode));
2877
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(flag_jp, 4, false, .unicode));
2878
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(flag_de, 4, false, .unicode));
2879
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(flag_fr, 4, false, .unicode));
2880
+ }
2881
+
2882
+ test "calculateTextWidth: multiple flags in text" {
2883
+ const text = "Flags: 🇺🇸 🇬🇧 🇯🇵";
2884
+ const width = utf8.calculateTextWidth(text, 4, false, .unicode);
2885
+ // "Flags: " (7) + 🇺🇸 (2) + " " (1) + 🇬🇧 (2) + " " (1) + 🇯🇵 (2) = 15
2886
+ try testing.expectEqual(@as(u32, 15), width);
2887
+ }
2888
+
2889
+ // ----------------------------------------------------------------------------
2890
+ // Devanagari and Indic Scripts
2891
+ // ----------------------------------------------------------------------------
2892
+
2893
+ test "calculateTextWidth: Devanagari basic characters" {
2894
+ // Devanagari script (Hindi, Sanskrit, etc.)
2895
+ const namaste = "नमस्ते"; // na-ma-s-te with virama
2896
+ const width = utf8.calculateTextWidth(namaste, 4, false, .unicode);
2897
+ // Devanagari characters are typically width 1 each
2898
+ // This is 5 graphemes: न म स् ते (the virama combines with स)
2899
+ try testing.expect(width > 0); // Exact width depends on grapheme clustering
2900
+ }
2901
+
2902
+ test "calculateTextWidth: Devanagari with combining marks" {
2903
+ // Devanagari vowel signs and nukta
2904
+ const ka = "क"; // Base character
2905
+ const ki = "कि"; // क + vowel sign i (U+093F)
2906
+ const kii = "की"; // क + vowel sign ii (U+0940)
2907
+
2908
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(ka, 4, false, .unicode));
2909
+ // With combining vowel signs, should still be 1 grapheme
2910
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(ki, 4, false, .unicode));
2911
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(kii, 4, false, .unicode));
2912
+ }
2913
+
2914
+ test "calculateTextWidth: Devanagari conjuncts" {
2915
+ // Conjunct consonants with virama
2916
+ const kta = "क्त"; // क + virama + त (kta)
2917
+ const jna = "ज्ञ"; // ज + virama + ञ (jna)
2918
+ const ksha = "क्‍ष"; // क + virama + ZWJ + ष (kṣa with explicit ZWJ)
2919
+
2920
+ // These form single grapheme clusters but width = number of base consonants
2921
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(kta, 4, false, .unicode));
2922
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(jna, 4, false, .unicode));
2923
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(ksha, 4, false, .unicode));
2924
+ }
2925
+
2926
+ test "calculateTextWidth: Bengali script" {
2927
+ // Bengali/Bangla script
2928
+ const bangla = "বাংলা"; // Bangla
2929
+ const width = utf8.calculateTextWidth(bangla, 4, false, .unicode);
2930
+ try testing.expect(width > 0);
2931
+ }
2932
+
2933
+ test "calculateTextWidth: Tamil script" {
2934
+ // Tamil script (no conjuncts, simpler than Devanagari)
2935
+ const tamil = "தமிழ்"; // Tamil
2936
+ const width = utf8.calculateTextWidth(tamil, 4, false, .unicode);
2937
+ try testing.expect(width > 0);
2938
+ }
2939
+
2940
+ test "calculateTextWidth: Telugu script" {
2941
+ // Telugu script
2942
+ const telugu = "తెలుగు"; // Telugu
2943
+ const width = utf8.calculateTextWidth(telugu, 4, false, .unicode);
2944
+ try testing.expect(width > 0);
2945
+ }
2946
+
2947
+ // ----------------------------------------------------------------------------
2948
+ // Arabic and RTL Scripts
2949
+ // ----------------------------------------------------------------------------
2950
+
2951
+ test "calculateTextWidth: Arabic basic text" {
2952
+ // Arabic text (RTL, but width calculation is the same)
2953
+ const arabic = "مرحبا"; // Marhaba (hello)
2954
+ const width = utf8.calculateTextWidth(arabic, 4, false, .unicode);
2955
+ // Arabic characters are width 1 each
2956
+ try testing.expect(width >= 5);
2957
+ }
2958
+
2959
+ test "calculateTextWidth: Arabic with diacritics" {
2960
+ // Arabic with harakat (diacritical marks)
2961
+ const with_diacritics = "مَرْحَبًا"; // Marhaba with vowel marks
2962
+ const width = utf8.calculateTextWidth(with_diacritics, 4, false, .unicode);
2963
+ // Combining marks should not add to width
2964
+ try testing.expect(width >= 5);
2965
+ }
2966
+
2967
+ test "calculateTextWidth: Hebrew text" {
2968
+ // Hebrew text (RTL)
2969
+ const hebrew = "שלום"; // Shalom
2970
+ const width = utf8.calculateTextWidth(hebrew, 4, false, .unicode);
2971
+ try testing.expect(width >= 4);
2972
+ }
2973
+
2974
+ // ----------------------------------------------------------------------------
2975
+ // East Asian Scripts (CJK)
2976
+ // ----------------------------------------------------------------------------
2977
+
2978
+ test "calculateTextWidth: Chinese traditional characters" {
2979
+ const traditional = "繁體中文"; // Traditional Chinese
2980
+ const width = utf8.calculateTextWidth(traditional, 4, false, .unicode);
2981
+ // Each CJK character is width 2
2982
+ try testing.expectEqual(@as(u32, 8), width); // 4 chars * 2 = 8
2983
+ }
2984
+
2985
+ test "calculateTextWidth: Chinese simplified characters" {
2986
+ const simplified = "简体中文"; // Simplified Chinese
2987
+ const width = utf8.calculateTextWidth(simplified, 4, false, .unicode);
2988
+ try testing.expectEqual(@as(u32, 8), width); // 4 chars * 2 = 8
2989
+ }
2990
+
2991
+ test "calculateTextWidth: Japanese mixed scripts" {
2992
+ // Hiragana + Kanji + Katakana
2993
+ const mixed = "ひらがな漢字カタカナ"; // hiragana, kanji, katakana
2994
+ const width = utf8.calculateTextWidth(mixed, 4, false, .unicode);
2995
+ // All are width 2: 4 hiragana + 2 kanji + 4 katakana = 10 chars * 2 = 20
2996
+ try testing.expectEqual(@as(u32, 20), width);
2997
+ }
2998
+
2999
+ test "calculateTextWidth: Korean Hangul syllables" {
3000
+ const korean = "한글"; // Hangul (Korean)
3001
+ const width = utf8.calculateTextWidth(korean, 4, false, .unicode);
3002
+ // Hangul syllables are width 2
3003
+ try testing.expectEqual(@as(u32, 4), width); // 2 chars * 2 = 4
3004
+ }
3005
+
3006
+ test "calculateTextWidth: CJK with ASCII" {
3007
+ const mixed = "Hello世界World"; // ASCII + CJK + ASCII
3008
+ const width = utf8.calculateTextWidth(mixed, 4, false, .unicode);
3009
+ // "Hello" (5) + "世界" (4) + "World" (5) = 14
3010
+ try testing.expectEqual(@as(u32, 14), width);
3011
+ }
3012
+
3013
+ // ----------------------------------------------------------------------------
3014
+ // Combining Marks and Diacritics
3015
+ // ----------------------------------------------------------------------------
3016
+
3017
+ test "calculateTextWidth: multiple combining marks on one base" {
3018
+ // Base + multiple combining marks
3019
+ const multiple = "e\u{0301}\u{0302}\u{0304}"; // e + acute + circumflex + macron
3020
+ const width = utf8.calculateTextWidth(multiple, 4, false, .unicode);
3021
+ try testing.expectEqual(@as(u32, 1), width);
3022
+ }
3023
+
3024
+ test "calculateTextWidth: combining enclosing marks" {
3025
+ // Combining enclosing circle backslash U+20E0
3026
+ const enclosed = "a\u{20E0}";
3027
+ const width = utf8.calculateTextWidth(enclosed, 4, false, .unicode);
3028
+ try testing.expectEqual(@as(u32, 1), width);
3029
+ }
3030
+
3031
+ test "calculateTextWidth: Vietnamese with multiple diacritics" {
3032
+ // Vietnamese uses Latin with complex diacritics
3033
+ const vietnamese = "Tiếng Việt"; // Vietnamese language
3034
+ const width = utf8.calculateTextWidth(vietnamese, 4, false, .unicode);
3035
+ // Each base character with combining marks = 1 width
3036
+ // "Tiếng" (5) + " " (1) + "Việt" (4) = 10
3037
+ try testing.expectEqual(@as(u32, 10), width);
3038
+ }
3039
+
3040
+ // ----------------------------------------------------------------------------
3041
+ // Zero-Width Characters
3042
+ // ----------------------------------------------------------------------------
3043
+
3044
+ test "calculateTextWidth: zero width joiner (ZWJ)" {
3045
+ // ZWJ by itself (shouldn't happen, but test it) - it's a format char with width 0
3046
+ const zwj = "\u{200D}";
3047
+ const width = utf8.calculateTextWidth(zwj, 4, false, .unicode);
3048
+ try testing.expectEqual(@as(u32, 0), width); // Width of ZWJ is 0 (Cf category)
3049
+ }
3050
+
3051
+ test "calculateTextWidth: zero width non-joiner (ZWNJ)" {
3052
+ // ZWNJ U+200C
3053
+ const zwnj = "ab\u{200C}cd";
3054
+ const width = utf8.calculateTextWidth(zwnj, 4, false, .unicode);
3055
+ // ZWNJ has width 0, so should be 4 (a, b, c, d)
3056
+ try testing.expectEqual(@as(u32, 4), width);
3057
+ }
3058
+
3059
+ test "calculateTextWidth: zero width space" {
3060
+ // ZWSP U+200B is Cf (format) category with width 0
3061
+ const zwsp = "a\u{200B}b\u{200B}c";
3062
+ const width = utf8.calculateTextWidth(zwsp, 4, false, .unicode);
3063
+ // a(1) + ZWSP(0) + b(1) + ZWSP(0) + c(1) = 3
3064
+ try testing.expectEqual(@as(u32, 3), width);
3065
+ }
3066
+
3067
+ test "calculateTextWidth: word joiner" {
3068
+ // Word joiner U+2060 is Cf (format) category with width 0
3069
+ const word_joiner = "word\u{2060}joiner";
3070
+ const width = utf8.calculateTextWidth(word_joiner, 4, false, .unicode);
3071
+ // word(4) + word_joiner(0) + joiner(6) = 10
3072
+ try testing.expectEqual(@as(u32, 10), width);
3073
+ }
3074
+
3075
+ // ----------------------------------------------------------------------------
3076
+ // Special Unicode Spaces
3077
+ // ----------------------------------------------------------------------------
3078
+
3079
+ test "calculateTextWidth: various Unicode spaces" {
3080
+ // En space U+2002
3081
+ const en_space = "a\u{2002}b";
3082
+ // Em space U+2003
3083
+ const em_space = "a\u{2003}b";
3084
+ // Thin space U+2009
3085
+ const thin_space = "a\u{2009}b";
3086
+ // Hair space U+200A
3087
+ const hair_space = "a\u{200A}b";
3088
+ // Ideographic space U+3000 (CJK)
3089
+ const ideo_space = "a\u{3000}b";
3090
+
3091
+ // These are all real spaces with width 1
3092
+ try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(en_space, 4, false, .unicode));
3093
+ try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(em_space, 4, false, .unicode));
3094
+ try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(thin_space, 4, false, .unicode));
3095
+ try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(hair_space, 4, false, .unicode));
3096
+ // Ideographic space is width 2 (fullwidth)
3097
+ try testing.expectEqual(@as(u32, 4), utf8.calculateTextWidth(ideo_space, 4, false, .unicode));
3098
+ }
3099
+
3100
+ test "calculateTextWidth: non-breaking spaces" {
3101
+ // NBSP U+00A0
3102
+ const nbsp = "a\u{00A0}b";
3103
+ // Narrow NBSP U+202F
3104
+ const narrow_nbsp = "a\u{202F}b";
3105
+
3106
+ try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(nbsp, 4, false, .unicode));
3107
+ try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(narrow_nbsp, 4, false, .unicode));
3108
+ }
3109
+
3110
+ // ----------------------------------------------------------------------------
3111
+ // Emoji Modifiers and Tags
3112
+ // ----------------------------------------------------------------------------
3113
+
3114
+ test "calculateTextWidth: emoji with multiple modifiers" {
3115
+ // Rainbow flag (black flag + rainbow)
3116
+ const rainbow_flag = "🏴‍🌈"; // U+1F3F4 U+200D U+1F308
3117
+ const width = utf8.calculateTextWidth(rainbow_flag, 4, false, .unicode);
3118
+ try testing.expectEqual(@as(u32, 2), width);
3119
+ }
3120
+
3121
+ test "calculateTextWidth: emoji tag sequences (subdivision flags)" {
3122
+ // England flag: 🏴󠁧󠁢󠁥󠁮󠁧󠁿 (black flag + tag chars + cancel tag)
3123
+ // This is complex to type, so we'll test a simpler version
3124
+ const black_flag = "🏴"; // Just the base flag
3125
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(black_flag, 4, false, .unicode));
3126
+ }
3127
+
3128
+ test "calculateTextWidth: hair style variations" {
3129
+ // Person: red hair, curly hair, white hair, bald
3130
+ const red_hair = "👩‍🦰";
3131
+ const curly_hair = "👨‍🦱";
3132
+ const white_hair = "👩‍🦳";
3133
+ const bald = "👨‍🦲";
3134
+
3135
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(red_hair, 4, false, .unicode));
3136
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(curly_hair, 4, false, .unicode));
3137
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(white_hair, 4, false, .unicode));
3138
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(bald, 4, false, .unicode));
3139
+ }
3140
+
3141
+ // ----------------------------------------------------------------------------
3142
+ // Mixed Content and Real-world Scenarios
3143
+ // ----------------------------------------------------------------------------
3144
+
3145
+ test "calculateTextWidth: multilingual sentence" {
3146
+ // Mix of Latin, CJK, Arabic, Emoji
3147
+ const text = "Hello 世界! مرحبا 👋";
3148
+ const width = utf8.calculateTextWidth(text, 4, false, .unicode);
3149
+ // "Hello " (6) + "世界" (4) + "! " (2) + "مرحبا" (5) + " " (1) + "👋" (2) = 20
3150
+ try testing.expect(width >= 18); // Allow some flexibility for combining marks
3151
+ }
3152
+
3153
+ test "calculateTextWidth: code with emoji comments" {
3154
+ const code = "const x = 42; // ✅ works";
3155
+ const width = utf8.calculateTextWidth(code, 4, false, .unicode);
3156
+ // Most chars are width 1, checkmark is width 2
3157
+ // "const x = 42; // " (17) + "✅" (2) + " works" (6) = 25
3158
+ try testing.expectEqual(@as(u32, 25), width);
3159
+ }
3160
+
3161
+ test "calculateTextWidth: emoji sentence" {
3162
+ const text = "I ❤️ 🍕 and 🍣!";
3163
+ const width = utf8.calculateTextWidth(text, 4, false, .unicode);
3164
+ // "I " (2) + "❤️" (2) + " " (1) + "🍕" (2) + " and " (5) + "🍣" (2) + "!" (1) = 15
3165
+ try testing.expectEqual(@as(u32, 15), width);
3166
+ }
3167
+
3168
+ test "calculateTextWidth: social media style text" {
3169
+ const text = "#OpenTUI 🚀 is #awesome 💯!";
3170
+ const width = utf8.calculateTextWidth(text, 4, false, .unicode);
3171
+ // "#OpenTUI " (9) + "🚀" (2) + " is #awesome " (13) + "💯" (2) + "!" (1) = 27
3172
+ try testing.expectEqual(@as(u32, 27), width);
3173
+ }
3174
+
3175
+ // ----------------------------------------------------------------------------
3176
+ // Edge Cases and Boundaries
3177
+ // ----------------------------------------------------------------------------
3178
+
3179
+ test "calculateTextWidth: surrogate pair edge cases" {
3180
+ // Valid surrogate pairs (emoji are in supplementary planes)
3181
+ const emoji = "𝕳𝖊𝖑𝖑𝖔"; // Mathematical bold letters (U+1D577 etc)
3182
+ const width = utf8.calculateTextWidth(emoji, 4, false, .unicode);
3183
+ // These are typically width 1 each
3184
+ try testing.expectEqual(@as(u32, 5), width);
3185
+ }
3186
+
3187
+ test "calculateTextWidth: long grapheme cluster chain" {
3188
+ // Create a base + many combining marks
3189
+ var text: std.ArrayListUnmanaged(u8) = .{};
3190
+ defer text.deinit(testing.allocator);
3191
+
3192
+ try text.appendSlice(testing.allocator, "e");
3193
+ // Add 10 combining marks
3194
+ var i: usize = 0;
3195
+ while (i < 10) : (i += 1) {
3196
+ try text.appendSlice(testing.allocator, "\u{0301}"); // Combining acute accent
3197
+ }
3198
+
3199
+ const width = utf8.calculateTextWidth(text.items, 4, false, .unicode);
3200
+ // Should be treated as single grapheme
3201
+ try testing.expectEqual(@as(u32, 1), width);
3202
+ }
3203
+
3204
+ test "calculateTextWidth: all emoji skin tones in sequence" {
3205
+ const text = "👋🏻👋🏼👋🏽👋🏾👋🏿";
3206
+ const width = utf8.calculateTextWidth(text, 4, false, .unicode);
3207
+ // 5 emoji with skin tones, each is 1 grapheme with width 2
3208
+ try testing.expectEqual(@as(u32, 10), width); // 5 * 2 = 10
3209
+ }
3210
+
3211
+ test "calculateTextWidth: emoji zodiac signs" {
3212
+ const zodiac = "♈♉♊♋♌♍♎♏♐♑♒♓"; // All 12 zodiac signs
3213
+ const width = utf8.calculateTextWidth(zodiac, 4, false, .unicode);
3214
+ // Each zodiac symbol is width 2
3215
+ try testing.expectEqual(@as(u32, 24), width); // 12 * 2 = 24
3216
+ }
3217
+
3218
+ test "calculateTextWidth: mathematical symbols" {
3219
+ // Mathematical operators and symbols
3220
+ const math = "∀∃∈∉∋∑∏∫∂∇≠≤≥"; // Various math symbols
3221
+ const width = utf8.calculateTextWidth(math, 4, false, .unicode);
3222
+ // Most math symbols are width 1
3223
+ try testing.expect(width >= 13);
3224
+ }
3225
+
3226
+ test "calculateTextWidth: box drawing characters" {
3227
+ // Box drawing characters (width 1)
3228
+ const box = "┌─┐│└─┘"; // Simple box
3229
+ const width = utf8.calculateTextWidth(box, 4, false, .unicode);
3230
+ try testing.expectEqual(@as(u32, 7), width);
3231
+ }
3232
+
3233
+ test "calculateTextWidth: braille patterns" {
3234
+ // Braille patterns U+2800-U+28FF
3235
+ const braille = "⠀⠁⠂⠃⠄⠅⠆⠇"; // Some braille patterns
3236
+ const width = utf8.calculateTextWidth(braille, 4, false, .unicode);
3237
+ // Braille patterns are width 1
3238
+ try testing.expectEqual(@as(u32, 8), width);
3239
+ }
3240
+
3241
+ test "calculateTextWidth: musical symbols" {
3242
+ // Musical notation symbols
3243
+ const music = "𝄞𝄢𝅘𝅥𝅮"; // Treble clef, bass clef, notes (U+1D11E etc)
3244
+ const width = utf8.calculateTextWidth(music, 4, false, .unicode);
3245
+ // Musical symbols are typically width 1, but encoding might be issue - just verify no crash
3246
+ try testing.expect(width >= 0); // Accept any non-negative width
3247
+ }
3248
+
3249
+ test "calculateTextWidth: weather and nature emoji" {
3250
+ const weather = "☀️🌤️⛅🌦️🌧️⛈️"; // Sun, clouds, rain
3251
+ const width = utf8.calculateTextWidth(weather, 4, false, .unicode);
3252
+ // Each emoji is width 2
3253
+ try testing.expectEqual(@as(u32, 12), width); // 6 * 2 = 12
3254
+ }
3255
+
3256
+ test "calculateTextWidth: food emoji collection" {
3257
+ const food = "🍎🍌🍇🍓🥕🥦🍞🧀"; // Various food items
3258
+ const width = utf8.calculateTextWidth(food, 4, false, .unicode);
3259
+ // 8 emoji * 2 = 16
3260
+ try testing.expectEqual(@as(u32, 16), width);
3261
+ }
3262
+
3263
+ test "calculateTextWidth: animal emoji" {
3264
+ const animals = "🐶🐱🐭🐹🐰🦊🐻🐼"; // Various animals
3265
+ const width = utf8.calculateTextWidth(animals, 4, false, .unicode);
3266
+ try testing.expectEqual(@as(u32, 16), width); // 8 * 2 = 16
3267
+ }
3268
+
3269
+ test "calculateTextWidth: realistic chat message" {
3270
+ const message = "Hey! 👋 Can you review my PR? 🙏 It fixes the bug 🐛 we discussed earlier. Thanks! 😊";
3271
+ const width = utf8.calculateTextWidth(message, 4, false, .unicode);
3272
+ // Long string with multiple emoji - just verify it doesn't crash
3273
+ try testing.expect(width > 70);
3274
+ }
3275
+
3276
+ test "calculateTextWidth: empty string with tabs" {
3277
+ const text = "";
3278
+ try testing.expectEqual(@as(u32, 0), utf8.calculateTextWidth(text, 4, false, .unicode));
3279
+ try testing.expectEqual(@as(u32, 0), utf8.calculateTextWidth(text, 8, false, .unicode));
3280
+ }
3281
+
3282
+ test "calculateTextWidth: only combining marks (invalid but should not crash)" {
3283
+ const text = "\u{0301}\u{0302}\u{0303}"; // Just combining marks, no base
3284
+ const width = utf8.calculateTextWidth(text, 4, false, .unicode);
3285
+ // Should handle gracefully - each combining mark might be width 0
3286
+ try testing.expect(width >= 0);
3287
+ }
3288
+
3289
+ test "calculateTextWidth: emoji collection - celestial and symbols" {
3290
+ const celestial = "🌟🔮✨";
3291
+ const width = utf8.calculateTextWidth(celestial, 4, false, .unicode);
3292
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3293
+ }
3294
+
3295
+ test "calculateTextWidth: emoji collection - religious and gestures" {
3296
+ const religious = "🙏";
3297
+ const width = utf8.calculateTextWidth(religious, 4, false, .unicode);
3298
+ try testing.expectEqual(@as(u32, 2), width); // 1 emoji * 2 = 2
3299
+ }
3300
+
3301
+ test "calculateTextWidth: emoji collection - ZWJ sequences astronauts" {
3302
+ const astronauts = "🧑‍🚀👨‍🚀👩‍🚀";
3303
+ const width = utf8.calculateTextWidth(astronauts, 4, false, .unicode);
3304
+ try testing.expectEqual(@as(u32, 6), width); // 3 graphemes * 2 = 6
3305
+ }
3306
+
3307
+ test "calculateTextWidth: emoji collection - rainbow and magical creatures" {
3308
+ const magical = "🌈🦄🧚‍♀️";
3309
+ const width = utf8.calculateTextWidth(magical, 4, false, .unicode);
3310
+ try testing.expectEqual(@as(u32, 6), width); // 3 graphemes * 2 = 6
3311
+ }
3312
+
3313
+ test "calculateTextWidth: emoji collection - books and writing" {
3314
+ const writing = "📜📖✍️";
3315
+ const width = utf8.calculateTextWidth(writing, 4, false, .unicode);
3316
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3317
+ }
3318
+
3319
+ test "calculateTextWidth: emoji collection - Japanese culture" {
3320
+ const japanese = "🏯🎋🌸";
3321
+ const width = utf8.calculateTextWidth(japanese, 4, false, .unicode);
3322
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3323
+ }
3324
+
3325
+ test "calculateTextWidth: emoji collection - traditional Japanese items" {
3326
+ const traditional = "📯🎴🎎";
3327
+ const width = utf8.calculateTextWidth(traditional, 4, false, .unicode);
3328
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3329
+ }
3330
+
3331
+ test "calculateTextWidth: emoji collection - hearts and peace" {
3332
+ const peace = "💝🕊️☮️";
3333
+ const width = utf8.calculateTextWidth(peace, 4, false, .unicode);
3334
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3335
+ }
3336
+
3337
+ test "calculateTextWidth: emoji collection - meditation and nature" {
3338
+ const meditation = "🧘‍♂️🌳";
3339
+ const width = utf8.calculateTextWidth(meditation, 4, false, .unicode);
3340
+ try testing.expectEqual(@as(u32, 4), width); // 2 graphemes * 2 = 4
3341
+ }
3342
+
3343
+ test "calculateTextWidth: emoji collection - food and drink" {
3344
+ const food = "🍵🥟";
3345
+ const width = utf8.calculateTextWidth(food, 4, false, .unicode);
3346
+ try testing.expectEqual(@as(u32, 4), width); // 2 emoji * 2 = 4
3347
+ }
3348
+
3349
+ test "calculateTextWidth: emoji collection - exotic animals" {
3350
+ const animals = "🦥🦦🦧🦨🦩🦚🦜🦝🦞🦟";
3351
+ const width = utf8.calculateTextWidth(animals, 4, false, .unicode);
3352
+ try testing.expectEqual(@as(u32, 20), width); // 10 emoji * 2 = 20
3353
+ }
3354
+
3355
+ test "calculateTextWidth: emoji collection - communication" {
3356
+ const communication = "🤫🗣️💬";
3357
+ const width = utf8.calculateTextWidth(communication, 4, false, .unicode);
3358
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3359
+ }
3360
+
3361
+ test "calculateTextWidth: emoji collection - water and nature" {
3362
+ const nature = "🌊📝🎭";
3363
+ const width = utf8.calculateTextWidth(nature, 4, false, .unicode);
3364
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3365
+ }
3366
+
3367
+ test "calculateTextWidth: emoji collection - landscape" {
3368
+ const landscape = "🏞️🌊💧";
3369
+ const width = utf8.calculateTextWidth(landscape, 4, false, .unicode);
3370
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3371
+ }
3372
+
3373
+ test "calculateTextWidth: emoji collection - circus and art" {
3374
+ const circus = "🤹‍♂️🎪🎨";
3375
+ const width = utf8.calculateTextWidth(circus, 4, false, .unicode);
3376
+ try testing.expectEqual(@as(u32, 6), width); // 3 graphemes * 2 = 6
3377
+ }
3378
+
3379
+ test "calculateTextWidth: emoji collection - shopping and food items" {
3380
+ const shopping = "🏪🛒💰🌶️🧄🧅";
3381
+ const width = utf8.calculateTextWidth(shopping, 4, false, .unicode);
3382
+ try testing.expectEqual(@as(u32, 12), width); // 6 emoji * 2 = 12
3383
+ }
3384
+
3385
+ test "calculateTextWidth: emoji collection - textiles and art" {
3386
+ const textiles = "🧵👘🎨🖼️";
3387
+ const width = utf8.calculateTextWidth(textiles, 4, false, .unicode);
3388
+ try testing.expectEqual(@as(u32, 8), width); // 4 emoji * 2 = 8
3389
+ }
3390
+
3391
+ test "calculateTextWidth: emoji collection - prehistoric creatures" {
3392
+ const prehistoric = "🦖🦕🐉🐲";
3393
+ const width = utf8.calculateTextWidth(prehistoric, 4, false, .unicode);
3394
+ try testing.expectEqual(@as(u32, 8), width); // 4 emoji * 2 = 8
3395
+ }
3396
+
3397
+ test "calculateTextWidth: emoji collection - hand gestures" {
3398
+ const hands = "🤝🤲👐";
3399
+ const width = utf8.calculateTextWidth(hands, 4, false, .unicode);
3400
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3401
+ }
3402
+
3403
+ test "calculateTextWidth: emoji collection - lanterns and lights" {
3404
+ const lanterns = "🏮🎆🎇🕯️💡";
3405
+ const width = utf8.calculateTextWidth(lanterns, 4, false, .unicode);
3406
+ try testing.expectEqual(@as(u32, 10), width); // 5 emoji * 2 = 10
3407
+ }
3408
+
3409
+ test "calculateTextWidth: emoji collection - dancers" {
3410
+ const dancers = "💃🕺🩰";
3411
+ const width = utf8.calculateTextWidth(dancers, 4, false, .unicode);
3412
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3413
+ }
3414
+
3415
+ test "calculateTextWidth: emoji collection - musical instruments" {
3416
+ const instruments = "🎻🎺🎷🎸🪕🪘";
3417
+ const width = utf8.calculateTextWidth(instruments, 4, false, .unicode);
3418
+ try testing.expectEqual(@as(u32, 12), width); // 6 emoji * 2 = 12
3419
+ }
3420
+
3421
+ test "calculateTextWidth: emoji collection - bells and shrine" {
3422
+ const bells = "🔔⛩️";
3423
+ const width = utf8.calculateTextWidth(bells, 4, false, .unicode);
3424
+ try testing.expectEqual(@as(u32, 4), width); // 2 emoji * 2 = 4
3425
+ }
3426
+
3427
+ test "calculateTextWidth: emoji collection - shocked and amazed" {
3428
+ const shocked = "😵‍💫🤯✨";
3429
+ const width = utf8.calculateTextWidth(shocked, 4, false, .unicode);
3430
+ try testing.expectEqual(@as(u32, 6), width); // 3 graphemes * 2 = 6
3431
+ }
3432
+
3433
+ test "calculateTextWidth: emoji collection - sweets and bubble tea" {
3434
+ const sweets = "🧋🍬🍭🧁";
3435
+ const width = utf8.calculateTextWidth(sweets, 4, false, .unicode);
3436
+ try testing.expectEqual(@as(u32, 8), width); // 4 emoji * 2 = 8
3437
+ }
3438
+
3439
+ test "calculateTextWidth: emoji collection - machinery and robots" {
3440
+ const machinery = "⚙️🤖🦾🦿";
3441
+ const width = utf8.calculateTextWidth(machinery, 4, false, .unicode);
3442
+ try testing.expectEqual(@as(u32, 8), width); // 4 emoji * 2 = 8
3443
+ }
3444
+
3445
+ test "calculateTextWidth: emoji collection - vehicles" {
3446
+ const vehicles = "🚗🚕🚙🚌🚎";
3447
+ const width = utf8.calculateTextWidth(vehicles, 4, false, .unicode);
3448
+ try testing.expectEqual(@as(u32, 10), width); // 5 emoji * 2 = 10
3449
+ }
3450
+
3451
+ test "calculateTextWidth: emoji collection - space travel" {
3452
+ const space = "🚀🛸🛰️";
3453
+ const width = utf8.calculateTextWidth(space, 4, false, .unicode);
3454
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3455
+ }
3456
+
3457
+ test "calculateTextWidth: emoji collection - technology" {
3458
+ const tech = "🐍💻⌨️";
3459
+ const width = utf8.calculateTextWidth(tech, 4, false, .unicode);
3460
+ // 🐍(2) + 💻(2) + ⌨️(2, VS16 makes it emoji presentation) = 6
3461
+ try testing.expectEqual(@as(u32, 6), width);
3462
+ }
3463
+
3464
+ test "calculateTextWidth: emoji collection - education and brain" {
3465
+ const education = "🧠📚🎓";
3466
+ const width = utf8.calculateTextWidth(education, 4, false, .unicode);
3467
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3468
+ }
3469
+
3470
+ test "calculateTextWidth: emoji collection - professional ZWJ sequences" {
3471
+ const professionals = "👨‍💼👩‍💼👨‍🔬👩‍🔬";
3472
+ const width = utf8.calculateTextWidth(professionals, 4, false, .unicode);
3473
+ try testing.expectEqual(@as(u32, 8), width); // 4 graphemes * 2 = 8
3474
+ }
3475
+
3476
+ test "calculateTextWidth: emoji collection - earth globes" {
3477
+ const globes = "🌍🌎🌏";
3478
+ const width = utf8.calculateTextWidth(globes, 4, false, .unicode);
3479
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3480
+ }
3481
+
3482
+ test "calculateTextWidth: emoji collection - family ZWJ sequence" {
3483
+ const family = "👨‍👩‍👧‍👦";
3484
+ const width = utf8.calculateTextWidth(family, 4, false, .unicode);
3485
+ try testing.expectEqual(@as(u32, 2), width); // 1 grapheme * 2 = 2
3486
+ }
3487
+
3488
+ test "calculateTextWidth: emoji collection - elderly people" {
3489
+ const elderly = "👴👵";
3490
+ const width = utf8.calculateTextWidth(elderly, 4, false, .unicode);
3491
+ try testing.expectEqual(@as(u32, 4), width); // 2 emoji * 2 = 4
3492
+ }
3493
+
3494
+ test "calculateTextWidth: emoji collection - sunrise and sunset" {
3495
+ const sunrise = "🌅🌄🌠";
3496
+ const width = utf8.calculateTextWidth(sunrise, 4, false, .unicode);
3497
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3498
+ }
3499
+
3500
+ test "calculateTextWidth: emoji collection - mountains" {
3501
+ const mountains = "🏔️⛰️🗻";
3502
+ const width = utf8.calculateTextWidth(mountains, 4, false, .unicode);
3503
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3504
+ }
3505
+
3506
+ test "calculateTextWidth: emoji collection - thoughts and dreams" {
3507
+ const dreams = "💭💤🌌";
3508
+ const width = utf8.calculateTextWidth(dreams, 4, false, .unicode);
3509
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3510
+ }
3511
+
3512
+ test "calculateTextWidth: emoji collection - campfire" {
3513
+ const campfire = "🔥🏕️";
3514
+ const width = utf8.calculateTextWidth(campfire, 4, false, .unicode);
3515
+ try testing.expectEqual(@as(u32, 4), width); // 2 emoji * 2 = 4
3516
+ }
3517
+
3518
+ test "calculateTextWidth: emoji collection - cooking" {
3519
+ const cooking = "🍛🍲🥘";
3520
+ const width = utf8.calculateTextWidth(cooking, 4, false, .unicode);
3521
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3522
+ }
3523
+
3524
+ test "calculateTextWidth: emoji collection - love hearts" {
3525
+ const hearts = "❤️💕💖";
3526
+ const width = utf8.calculateTextWidth(hearts, 4, false, .unicode);
3527
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3528
+ }
3529
+
3530
+ test "calculateTextWidth: emoji collection - media" {
3531
+ const media = "📸🎞️📹";
3532
+ const width = utf8.calculateTextWidth(media, 4, false, .unicode);
3533
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3534
+ }
3535
+
3536
+ test "calculateTextWidth: emoji collection - global and handshake" {
3537
+ const global = "🌐🤝🌈";
3538
+ const width = utf8.calculateTextWidth(global, 4, false, .unicode);
3539
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3540
+ }
3541
+
3542
+ test "calculateTextWidth: emoji collection - special symbols" {
3543
+ const special = "🦩🧿🪬🫀🫁🧠";
3544
+ const width = utf8.calculateTextWidth(special, 4, false, .unicode);
3545
+ try testing.expectEqual(@as(u32, 12), width); // 6 emoji * 2 = 12
3546
+ }
3547
+
3548
+ test "calculateTextWidth: emoji collection - strength" {
3549
+ const strength = "💪✊🙌";
3550
+ const width = utf8.calculateTextWidth(strength, 4, false, .unicode);
3551
+ try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
3552
+ }
3553
+
3554
+ test "calculateTextWidth: emoji collection - entertainment" {
3555
+ const entertainment = "🎬🎭🎪✨🌟⭐";
3556
+ const width = utf8.calculateTextWidth(entertainment, 4, false, .unicode);
3557
+ try testing.expectEqual(@as(u32, 12), width); // 6 emoji * 2 = 12
3558
+ }
3559
+
3560
+ // ============================================================================
3561
+ // DEVANAGARI SCRIPT WIDTH TESTS
3562
+ // ============================================================================
3563
+
3564
+ test "calculateTextWidth: Devanagari - Sanskrit word" {
3565
+ // संस्कृति (culture/civilization)
3566
+ const sanskrit = "संस्कृति";
3567
+ const width = utf8.calculateTextWidth(sanskrit, 4, false, .unicode);
3568
+ // 4 base consonants (SA, SA, KA, TA) with combining marks = width 4
3569
+ try testing.expectEqual(@as(u32, 4), width);
3570
+ }
3571
+
3572
+ test "calculateTextWidth: Devanagari - namaste" {
3573
+ const namaste = "नमस्ते";
3574
+ const width = utf8.calculateTextWidth(namaste, 4, false, .unicode);
3575
+ // 4 base consonants: NA, MA, SA, TA = width 4
3576
+ try testing.expectEqual(@as(u32, 4), width);
3577
+ }
3578
+
3579
+ test "calculateTextWidth: Devanagari - Om symbol" {
3580
+ const om = "ॐ";
3581
+ const width = utf8.calculateTextWidth(om, 4, false, .unicode);
3582
+ try testing.expectEqual(@as(u32, 1), width);
3583
+ }
3584
+
3585
+ test "calculateTextWidth: Devanagari - mixed with ASCII" {
3586
+ const mixed = "Hello नमस्ते World";
3587
+ const width = utf8.calculateTextWidth(mixed, 4, false, .unicode);
3588
+ // "Hello "(6) + नमस्ते(4 base consonants) + " World"(6) = 16
3589
+ try testing.expectEqual(@as(u32, 16), width);
3590
+ }
3591
+
3592
+ // ============================================================================
3593
+ // CJK SCRIPT WIDTH TESTS
3594
+ // ============================================================================
3595
+
3596
+ test "calculateTextWidth: Chinese characters - kanji" {
3597
+ const kanji = "漢字";
3598
+ const width = utf8.calculateTextWidth(kanji, 4, false, .unicode);
3599
+ try testing.expectEqual(@as(u32, 4), width); // 2 chars * 2 = 4
3600
+ }
3601
+
3602
+ test "calculateTextWidth: Hiragana" {
3603
+ const hiragana = "ひらがな";
3604
+ const width = utf8.calculateTextWidth(hiragana, 4, false, .unicode);
3605
+ try testing.expectEqual(@as(u32, 8), width); // 4 chars * 2 = 8
3606
+ }
3607
+
3608
+ test "calculateTextWidth: Katakana" {
3609
+ const katakana = "カタカナ";
3610
+ const width = utf8.calculateTextWidth(katakana, 4, false, .unicode);
3611
+ try testing.expectEqual(@as(u32, 8), width); // 4 chars * 2 = 8
3612
+ }
3613
+
3614
+ test "calculateTextWidth: Korean Hangul" {
3615
+ const hangul = "한글";
3616
+ const width = utf8.calculateTextWidth(hangul, 4, false, .unicode);
3617
+ try testing.expectEqual(@as(u32, 4), width); // 2 chars * 2 = 4
3618
+ }
3619
+
3620
+ test "calculateTextWidth: Korean words - love and peace" {
3621
+ const korean = "사랑 평화";
3622
+ const width = utf8.calculateTextWidth(korean, 4, false, .unicode);
3623
+ // 사(2) + 랑(2) + space(1) + 평(2) + 화(2) = 9
3624
+ try testing.expectEqual(@as(u32, 9), width);
3625
+ }
3626
+
3627
+ // ============================================================================
3628
+ // TIBETAN SCRIPT WIDTH TESTS
3629
+ // ============================================================================
3630
+
3631
+ test "calculateTextWidth: Tibetan script" {
3632
+ const tibetan = "རྒྱ་མཚོ";
3633
+ const width = utf8.calculateTextWidth(tibetan, 4, false, .unicode);
3634
+ // Tibetan has complex combining characters
3635
+ // Base chars are width 1, subjoined letters width 0
3636
+ try testing.expect(width >= 3 and width <= 6);
3637
+ }
3638
+
3639
+ // ============================================================================
3640
+ // OTHER INDIC SCRIPTS WIDTH TESTS
3641
+ // ============================================================================
3642
+
3643
+ test "calculateTextWidth: Gujarati script" {
3644
+ const gujarati = "ગુજરાતી";
3645
+ const width = utf8.calculateTextWidth(gujarati, 4, false, .unicode);
3646
+ // ગ(1) + ુ(0) + જ(1) + ર(1) + ા(0) + ત(1) + ી(0) = 4
3647
+ try testing.expectEqual(@as(u32, 4), width);
3648
+ }
3649
+
3650
+ test "calculateTextWidth: Tamil script word" {
3651
+ const tamil = "தமிழ்";
3652
+ const width = utf8.calculateTextWidth(tamil, 4, false, .unicode);
3653
+ // த(1) + ம(1) + ி(0) + ழ(1) + ்(0) = 3
3654
+ try testing.expectEqual(@as(u32, 3), width);
3655
+ }
3656
+
3657
+ test "calculateTextWidth: Punjabi script word" {
3658
+ const punjabi = "ਪੰਜਾਬੀ";
3659
+ const width = utf8.calculateTextWidth(punjabi, 4, false, .unicode);
3660
+ // ਪ(1) + ੰ(0) + ਜ(1) + ਾ(0) + ਬ(1) + ੀ(0) = 3 base chars
3661
+ try testing.expectEqual(@as(u32, 3), width);
3662
+ }
3663
+
3664
+ test "calculateTextWidth: Telugu script word" {
3665
+ const telugu = "తెలుగు";
3666
+ const width = utf8.calculateTextWidth(telugu, 4, false, .unicode);
3667
+ // త(1) + ె(0) + ల(1) + ు(0) + గ(1) + ు(0) = 3
3668
+ try testing.expectEqual(@as(u32, 3), width);
3669
+ }
3670
+
3671
+ test "calculateTextWidth: Bengali script word" {
3672
+ const bengali = "বাংলা";
3673
+ const width = utf8.calculateTextWidth(bengali, 4, false, .unicode);
3674
+ // ব(1) + া(0) + ং(0) + ল(1) + া(0) = 2
3675
+ try testing.expectEqual(@as(u32, 2), width);
3676
+ }
3677
+
3678
+ test "calculateTextWidth: Kannada script" {
3679
+ const kannada = "ಕನ್ನಡ";
3680
+ const width = utf8.calculateTextWidth(kannada, 4, false, .unicode);
3681
+ // ಕ(1) + ನ(1) + ್(0) + ನ(1) + ಡ(1) = 4
3682
+ try testing.expectEqual(@as(u32, 4), width);
3683
+ }
3684
+
3685
+ test "calculateTextWidth: Malayalam script" {
3686
+ const malayalam = "മലയാളം";
3687
+ const width = utf8.calculateTextWidth(malayalam, 4, false, .unicode);
3688
+ // Each base letter is width 1, vowel signs width 0
3689
+ try testing.expect(width >= 4 and width <= 5);
3690
+ }
3691
+
3692
+ test "calculateTextWidth: Oriya script" {
3693
+ const oriya = "ଓଡ଼ିଆ";
3694
+ const width = utf8.calculateTextWidth(oriya, 4, false, .unicode);
3695
+ // ଓ(1) + ଡ(1) + ଼(0) + ି(0) + ଆ(1) = 3
3696
+ try testing.expectEqual(@as(u32, 3), width);
3697
+ }
3698
+
3699
+ // ============================================================================
3700
+ // THAI AND LAO SCRIPT WIDTH TESTS
3701
+ // ============================================================================
3702
+
3703
+ test "calculateTextWidth: Thai script" {
3704
+ const thai = "ภาษา";
3705
+ const width = utf8.calculateTextWidth(thai, 4, false, .unicode);
3706
+ // Thai base chars width 1, combining vowels/tones width 0
3707
+ try testing.expect(width >= 3 and width <= 4);
3708
+ }
3709
+
3710
+ test "calculateTextWidth: Thai numerals" {
3711
+ const thai_num = "๑๐๐";
3712
+ const width = utf8.calculateTextWidth(thai_num, 4, false, .unicode);
3713
+ try testing.expectEqual(@as(u32, 3), width); // 3 digits * 1 = 3
3714
+ }
3715
+
3716
+ test "calculateTextWidth: Lao script" {
3717
+ const lao = "ໂຫຍ່າກເຈົ້າ";
3718
+ const width = utf8.calculateTextWidth(lao, 4, false, .unicode);
3719
+ // Lao has complex vowels and tone marks (width 0)
3720
+ try testing.expect(width >= 5 and width <= 10);
3721
+ }
3722
+
3723
+ // ============================================================================
3724
+ // ARABIC AND OTHER SCRIPTS WIDTH TESTS
3725
+ // ============================================================================
3726
+
3727
+ test "calculateTextWidth: Arabic character" {
3728
+ const arabic = "ا";
3729
+ const width = utf8.calculateTextWidth(arabic, 4, false, .unicode);
3730
+ try testing.expectEqual(@as(u32, 1), width);
3731
+ }
3732
+
3733
+ test "calculateTextWidth: Sinhala script" {
3734
+ const sinhala = "ආහාර";
3735
+ const width = utf8.calculateTextWidth(sinhala, 4, false, .unicode);
3736
+ // Sinhala chars width 1, vowel signs width 0
3737
+ try testing.expect(width >= 3 and width <= 4);
3738
+ }
3739
+
3740
+ test "calculateTextWidth: Chinese text" {
3741
+ const chinese = "中文";
3742
+ const width = utf8.calculateTextWidth(chinese, 4, false, .unicode);
3743
+ try testing.expectEqual(@as(u32, 4), width); // 2 chars * 2 = 4
3744
+ }
3745
+
3746
+ test "calculateTextWidth: Hangul Jamo" {
3747
+ const jamo = "ㄱ";
3748
+ const width = utf8.calculateTextWidth(jamo, 4, false, .unicode);
3749
+ try testing.expectEqual(@as(u32, 2), width); // Hangul Jamo is width 2
3750
+ }
3751
+
3752
+ // ============================================================================
3753
+ // MIXED SCRIPT COMPREHENSIVE TESTS
3754
+ // ============================================================================
3755
+
3756
+ test "calculateTextWidth: realistic multilingual sentence" {
3757
+ const multilingual = "Hello 世界! नमस्ते 🙏";
3758
+ const width = utf8.calculateTextWidth(multilingual, 4, false, .unicode);
3759
+ // "Hello "(6) + 世界(4) + "! "(2) + नमस्ते(4) + " "(1) + 🙏(2) = 19
3760
+ try testing.expectEqual(@as(u32, 19), width);
3761
+ }
3762
+
3763
+ test "calculateTextWidth: all ending words from text" {
3764
+ const endings = "समाप्त끝จบముగింపుಅಂತ್ಯઅંત";
3765
+ const width = utf8.calculateTextWidth(endings, 4, false, .unicode);
3766
+ // TODO: Expect absolutely
3767
+ try testing.expect(width > 10);
3768
+ }
3769
+
3770
+ test "calculateTextWidth: complex text with emojis and multiple scripts" {
3771
+ const complex = "The 🌟 journey: संस्कृति meets 漢字 🎋";
3772
+ const width = utf8.calculateTextWidth(complex, 4, false, .unicode);
3773
+ // TODO: Expect absolutely
3774
+ try testing.expect(width >= 30 and width <= 50);
3775
+ }
3776
+
3777
+ test "calculateTextWidth: validate against unicode-width-map.zon" {
3778
+ const zon_content = @embedFile("unicode-width-map.zon");
3779
+
3780
+ // Use arena allocator to avoid memory leaks from ZON parser string allocations
3781
+ var arena = std.heap.ArenaAllocator.init(testing.allocator);
3782
+ defer arena.deinit();
3783
+ const allocator = arena.allocator();
3784
+
3785
+ const zon_with_null = try allocator.dupeZ(u8, zon_content);
3786
+
3787
+ const WidthEntry = struct {
3788
+ codepoint: []const u8,
3789
+ width: i32,
3790
+ };
3791
+
3792
+ const width_entries = std.zon.parse.fromSlice(
3793
+ []const WidthEntry,
3794
+ allocator,
3795
+ zon_with_null,
3796
+ null,
3797
+ .{},
3798
+ ) catch |err| {
3799
+ return err;
3800
+ };
3801
+
3802
+ var successes: usize = 0;
3803
+ var failures: usize = 0;
3804
+
3805
+ for (width_entries) |entry| {
3806
+ const codepoint_str = entry.codepoint;
3807
+ const expected_width = entry.width;
3808
+
3809
+ // Parse "U+XXXX" from codepoint string
3810
+ if (codepoint_str.len < 3 or !std.mem.startsWith(u8, codepoint_str, "U+")) {
3811
+ continue;
3812
+ }
3813
+ const hex_str = codepoint_str[2..];
3814
+ const code_point = std.fmt.parseInt(u21, hex_str, 16) catch continue;
3815
+
3816
+ var buf: [4]u8 = undefined;
3817
+ const len = std.unicode.utf8Encode(code_point, &buf) catch continue;
3818
+ const str = buf[0..len];
3819
+
3820
+ const actual_width = utf8.calculateTextWidth(str, 4, false, .unicode);
3821
+
3822
+ if (actual_width == expected_width) {
3823
+ successes += 1;
3824
+ } else {
3825
+ failures += 1;
3826
+ }
3827
+ }
3828
+
3829
+ try testing.expectEqual(@as(usize, 0), failures);
3830
+ }
3831
+
3832
+ test "findGraphemeInfo: comprehensive multilingual text" {
3833
+ const text =
3834
+ \\# The Celestial Journey of संस्कृति 🌟🔮✨
3835
+ \\In the beginning, there was नमस्ते 🙏 and the ancient wisdom of the ॐ symbol echoing through dimensions. The travelers 🧑‍🚀👨‍🚀👩‍🚀 embarked on their quest through the cosmos, guided by the mysterious རྒྱ་མཚོ and the luminous 🌈🦄🧚‍♀️ beings of light. They encountered the great देवनागरी scribes who wrote in flowing अक्षर characters, documenting everything in their sacred texts 📜📖✍️.
3836
+ \\## Chapter प्रथम: The Eastern Gardens 🏯🎋🌸
3837
+ \\The journey led them to the mystical lands where 漢字 (kanji) danced with ひらがな and カタカナ across ancient scrolls 📯🎴🎎. In the gardens of Seoul, they found 한글 inscriptions speaking of 사랑 (love) and 평화 (peace) 💝🕊️☮️. The monks meditated under the bodhi tree 🧘‍♂️🌳, contemplating the nature of धर्म while drinking matcha 🍵 and eating 餃子 dumplings 🥟.
3838
+ \\Strange creatures emerged from the mist: 🦥🦦🦧🦨🦩🦚🦜🦝🦞🦟. They spoke in riddles about the प्राचीन (ancient) ways and the नवीन (new) paths forward. "भविष्य में क्या है?" they asked, while the ໂຫຍ່າກເຈົ້າ whispered secrets in Lao script 🤫🗣️💬.
3839
+ \\## The संगम (Confluence) of Scripts 🌊📝🎭
3840
+ \\At the great confluence, they witnessed the merger of བོད་ཡིག (Tibetan), ગુજરાતી (Gujarati), and தமிழ் (Tamil) scripts flowing together like rivers 🏞️🌊💧. The scholars debated about ਪੰਜਾਬੀ philosophy while juggling 🤹‍♂️🎪🎨 colorful orbs that represented different తెలుగు concepts.
3841
+ \\The marketplace buzzed with activity 🏪🛒💰: merchants sold বাংলা spices 🌶️🧄🧅, ಕನ್ನಡ silks 🧵👘, and മലയാളം handicrafts 🎨🖼️. Children played with toys shaped like 🦖🦕🐉🐲 while their parents bargained using ancient ଓଡ଼ିଆ numerals and gestures 🤝🤲👐.
3842
+ \\## The Festival of ๑๐๐ Lanterns 🏮🎆🎇
3843
+ \\During the grand festival, they lit exactly ๑๐๐ (100 in Thai numerals) lanterns 🏮🕯️💡 that floated into the night sky like ascending ความหวัง (hopes). The celebration featured dancers 💃🕺🩰 performing classical moves from भरतनाट्यम tradition, their मुद्रा hand gestures telling stories of प्रेम and वीरता.
3844
+ \\Musicians played unusual instruments: the 🎻🎺🎷🎸🪕🪘 ensemble created harmonies that resonated with the वेद chants and མཆོད་རྟེན bells 🔔⛩️. The audience sat mesmerized 😵‍💫🤯✨, some sipping on bubble tea 🧋 while others enjoyed मिठाई sweets 🍬🍭🧁.
3845
+ \\## The འཕྲུལ་དེབ (Machine) Age Arrives ⚙️🤖🦾
3846
+ \\As modernity crept in, the ancient འཁོར་ལོ (wheel) gave way to 🚗🚕🚙🚌🚎 vehicles and eventually to 🚀🛸🛰️ spacecraft. The યુવાન (youth) learned to code in Python 🐍💻⌨️, but still honored their గురువు (teachers) who taught them the old ways of ज्ञान acquisition 🧠📚🎓.
3847
+ \\The সমাজ (society) transformed: robots 🤖🦾🦿 worked alongside humans 👨‍💼👩‍💼👨‍🔬👩‍🔬, and AI learned to read སྐད (languages) from across the planet 🌍🌎🌏. Yet somehow, the essence of मानवता remained intact, preserved in the கவிதை (poetry) and the ກາບແກ້ວ stories passed down through generations 👴👵👨‍👩‍👧‍👦.
3848
+ \\## The Final ಅಧ್ಯಾಯ (Chapter) 🌅🌄🌠
3849
+ \\As the sun set over the പർവ്വതങ്ങൾ (mountains) 🏔️⛰️🗻, our travelers realized that every script, every symbol—from ا to ㄱ to অ to अ—represented not just sounds, but entire civilizations' worth of विचार (thoughts) and ಕನಸು (dreams) 💭💤🌌.
3850
+ \\They gathered around the final campfire 🔥🏕️, sharing stories in ภาษา (languages) both ancient and new. Someone brought out a guitar 🎸 and started singing in ગીત form, while others prepared ආහාර (food) 🍛🍲🥘 seasoned with love ❤️💕💖 and memories 📸🎞️📹.
3851
+ \\And so they learned that whether written in দেবনাগরী, 中文, 한글, or ไทย, the human experience transcends boundaries 🌐🤝🌈. The weird emojis 🦩🧿🪬🫀🫁🧠 and complex scripts were all part of the same beautiful བསྟན་པ (teaching): that diversity is our greatest strength 💪✊🙌.
3852
+ \\The end. समाप्त. 끝. จบ. முடிவு. ముగింపు. সমাপ্তি. ഒടുക്കം. ಅಂತ್ಯ. અંત. 🎬🎭🎪✨🌟⭐
3853
+ \\
3854
+ ;
3855
+
3856
+ const expected_width = utf8.calculateTextWidth(text, 4, false, .unicode);
3857
+
3858
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
3859
+ defer result.deinit(testing.allocator);
3860
+
3861
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
3862
+ try testing.expect(result.items.len > 0);
3863
+
3864
+ var prev_end_byte: usize = 0;
3865
+
3866
+ for (result.items) |g| {
3867
+ try testing.expect(g.byte_offset >= prev_end_byte);
3868
+
3869
+ const text_before = text[0..g.byte_offset];
3870
+ const expected_col = utf8.calculateTextWidth(text_before, 4, false, .unicode);
3871
+
3872
+ try testing.expectEqual(expected_col, g.col_offset);
3873
+
3874
+ prev_end_byte = g.byte_offset + g.byte_len;
3875
+ }
3876
+
3877
+ const final_computed_width = utf8.calculateTextWidth(text, 4, false, .unicode);
3878
+ try testing.expectEqual(expected_width, final_computed_width);
3879
+ }
3880
+
3881
+ // ============================================================================
3882
+ // THAI DIACRITICS AND COMBINING MARKS TESTS
3883
+ // ============================================================================
3884
+
3885
+ test "Thai: base consonants have width 1" {
3886
+ const consonants = "กขคงจฉชซญฎฏฐดตถทธนบปผฝพฟภมยรลวศษสหอฮ";
3887
+ const width = utf8.calculateTextWidth(consonants, 4, false, .unicode);
3888
+ try testing.expectEqual(@as(u32, 36), width);
3889
+ }
3890
+
3891
+ test "Thai: spacing vowels have width 1" {
3892
+ const spacing_vowels = "าะแโใไ";
3893
+ const width = utf8.calculateTextWidth(spacing_vowels, 4, false, .unicode);
3894
+ try testing.expectEqual(@as(u32, 6), width);
3895
+ }
3896
+
3897
+ test "Thai: combining vowels above have width 0" {
3898
+ const base = "ก";
3899
+ const with_sara_i = "กิ";
3900
+ const with_sara_ii = "กี";
3901
+ const with_sara_ue = "กึ";
3902
+ const with_sara_uee = "กื";
3903
+ const with_mai_han_akat = "กั";
3904
+
3905
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(base, 4, false, .unicode));
3906
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_sara_i, 4, false, .unicode));
3907
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_sara_ii, 4, false, .unicode));
3908
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_sara_ue, 4, false, .unicode));
3909
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_sara_uee, 4, false, .unicode));
3910
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_mai_han_akat, 4, false, .unicode));
3911
+ }
3912
+
3913
+ test "Thai: combining vowels below have width 0" {
3914
+ const with_sara_u = "กุ";
3915
+ const with_sara_uu = "กู";
3916
+
3917
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_sara_u, 4, false, .unicode));
3918
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_sara_uu, 4, false, .unicode));
3919
+ }
3920
+
3921
+ test "Thai: tone marks have width 0" {
3922
+ const with_mai_ek = "ก่";
3923
+ const with_mai_tho = "ก้";
3924
+ const with_mai_tri = "ก๊";
3925
+ const with_mai_chattawa = "ก๋";
3926
+
3927
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_mai_ek, 4, false, .unicode));
3928
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_mai_tho, 4, false, .unicode));
3929
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_mai_tri, 4, false, .unicode));
3930
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_mai_chattawa, 4, false, .unicode));
3931
+ }
3932
+
3933
+ test "Thai: other diacritics have width 0" {
3934
+ const with_maitaikhu = "ก็";
3935
+ const with_thanthakhat = "ก์";
3936
+ const with_nikhahit = "กํ";
3937
+
3938
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_maitaikhu, 4, false, .unicode));
3939
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_thanthakhat, 4, false, .unicode));
3940
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_nikhahit, 4, false, .unicode));
3941
+ }
3942
+
3943
+ test "Thai: combined vowel and tone mark" {
3944
+ const text = "กี่";
3945
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(text, 4, false, .unicode));
3946
+
3947
+ const text2 = "คือ";
3948
+ try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(text2, 4, false, .unicode));
3949
+ }
3950
+
3951
+ test "Thai: word 'ภาษาไทย' (Thai language)" {
3952
+ const text = "ภาษาไทย";
3953
+ try testing.expectEqual(@as(u32, 7), utf8.calculateTextWidth(text, 4, false, .unicode));
3954
+ }
3955
+
3956
+ test "Thai: word 'อย่าง' with tone mark" {
3957
+ const text = "อย่าง";
3958
+ try testing.expectEqual(@as(u32, 4), utf8.calculateTextWidth(text, 4, false, .unicode));
3959
+ }
3960
+
3961
+ test "Thai: word 'อธิบาย' with vowel above" {
3962
+ const text = "อธิบาย";
3963
+ try testing.expectEqual(@as(u32, 5), utf8.calculateTextWidth(text, 4, false, .unicode));
3964
+ }
3965
+
3966
+ test "Thai: full sentence with spaces" {
3967
+ const text = "ภาษาไทย คืออะไร อธิบายมาอย่างละเอียด";
3968
+ try testing.expectEqual(@as(u32, 32), utf8.calculateTextWidth(text, 4, false, .unicode));
3969
+ }
3970
+
3971
+ test "Thai: wrap by width respects combining marks" {
3972
+ const text = "คือ";
3973
+
3974
+ const result1 = utf8.findWrapPosByWidth(text, 1, 4, false, .unicode);
3975
+ try testing.expectEqual(@as(u32, 6), result1.byte_offset);
3976
+ try testing.expectEqual(@as(u32, 1), result1.columns_used);
3977
+
3978
+ const result2 = utf8.findWrapPosByWidth(text, 2, 4, false, .unicode);
3979
+ try testing.expectEqual(@as(u32, 9), result2.byte_offset);
3980
+ try testing.expectEqual(@as(u32, 2), result2.columns_used);
3981
+ }
3982
+
3983
+ test "Thai: wrap by width with tone marks" {
3984
+ const text = "ก่อน";
3985
+
3986
+ const result2 = utf8.findWrapPosByWidth(text, 2, 4, false, .unicode);
3987
+ try testing.expectEqual(@as(u32, 2), result2.columns_used);
3988
+
3989
+ const result3 = utf8.findWrapPosByWidth(text, 3, 4, false, .unicode);
3990
+ try testing.expectEqual(@as(u32, 3), result3.columns_used);
3991
+ }
3992
+
3993
+ test "Thai: grapheme info for combining marks" {
3994
+ const text = "กี่";
3995
+
3996
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
3997
+ defer result.deinit(testing.allocator);
3998
+
3999
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
4000
+
4001
+ try testing.expectEqual(@as(usize, 1), result.items.len);
4002
+ try testing.expectEqual(@as(u8, 1), result.items[0].width);
4003
+ }
4004
+
4005
+ test "Thai: grapheme info for word with combining marks" {
4006
+ const text = "คือ";
4007
+
4008
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
4009
+ defer result.deinit(testing.allocator);
4010
+
4011
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
4012
+
4013
+ try testing.expectEqual(@as(usize, 2), result.items.len);
4014
+ try testing.expectEqual(@as(u8, 1), result.items[0].width);
4015
+ try testing.expectEqual(@as(u8, 1), result.items[1].width);
4016
+ }
4017
+
4018
+ test "Thai: mixed Thai and ASCII" {
4019
+ const text = "Hello ภาษาไทย World";
4020
+ try testing.expectEqual(@as(u32, 19), utf8.calculateTextWidth(text, 4, false, .unicode));
4021
+ }
4022
+
4023
+ test "Thai: mixed Thai and emoji" {
4024
+ const text = "ภาษา 🇹🇭 ไทย";
4025
+ try testing.expectEqual(@as(u32, 11), utf8.calculateTextWidth(text, 4, false, .unicode));
4026
+ }
4027
+
4028
+ test "Thai: คำว่า width should be 3" {
4029
+ const text = "คำว่า";
4030
+ try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(text, 4, false, .unicode));
4031
+ }
4032
+
4033
+ test "Thai: ว่ width should be 1" {
4034
+ const text = "ว่";
4035
+ try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(text, 4, false, .unicode));
4036
+ }
4037
+
4038
+ test "Thai: ว่ wcwidth vs unicode mode comparison" {
4039
+ const text = "ว่";
4040
+ const wcwidth_result = utf8.calculateTextWidth(text, 4, false, .wcwidth);
4041
+ const unicode_result = utf8.calculateTextWidth(text, 4, false, .unicode);
4042
+
4043
+ try testing.expectEqual(@as(u32, 1), wcwidth_result);
4044
+ try testing.expectEqual(@as(u32, 1), unicode_result);
4045
+ }
4046
+
4047
+ test "Thai: ว่ is a single grapheme cluster" {
4048
+ const text = "ว่";
4049
+
4050
+ var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
4051
+ defer result.deinit(testing.allocator);
4052
+
4053
+ try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
4054
+
4055
+ try testing.expectEqual(@as(usize, 1), result.items.len);
4056
+ try testing.expectEqual(@as(u8, 1), result.items[0].width);
4057
+ }