@fairyhunter13/opentui-core 0.1.113 → 0.1.114
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dev/keypress-debug-renderer.ts +148 -0
- package/dev/keypress-debug.ts +43 -0
- package/dev/print-env-vars.ts +32 -0
- package/dev/test-tmux-graphics-334.sh +68 -0
- package/dev/thai-debug-test.ts +68 -0
- package/docs/development.md +144 -0
- package/package.json +62 -53
- package/scripts/build.ts +400 -0
- package/scripts/publish.ts +60 -0
- package/src/3d/SpriteResourceManager.ts +286 -0
- package/src/3d/SpriteUtils.ts +70 -0
- package/src/3d/TextureUtils.ts +196 -0
- package/src/3d/ThreeRenderable.ts +197 -0
- package/src/3d/WGPURenderer.ts +294 -0
- package/src/3d/animation/ExplodingSpriteEffect.ts +513 -0
- package/src/3d/animation/PhysicsExplodingSpriteEffect.ts +429 -0
- package/src/3d/animation/SpriteAnimator.ts +633 -0
- package/src/3d/animation/SpriteParticleGenerator.ts +435 -0
- package/src/3d/canvas.ts +464 -0
- package/src/3d/index.ts +12 -0
- package/src/3d/physics/PlanckPhysicsAdapter.ts +72 -0
- package/src/3d/physics/RapierPhysicsAdapter.ts +66 -0
- package/src/3d/physics/physics-interface.ts +31 -0
- package/src/3d/shaders/supersampling.wgsl +201 -0
- package/src/3d.ts +3 -0
- package/src/NativeSpanFeed.ts +300 -0
- package/src/Renderable.ts +1704 -0
- package/src/__snapshots__/buffer.test.ts.snap +28 -0
- package/src/animation/Timeline.test.ts +2709 -0
- package/src/animation/Timeline.ts +598 -0
- package/src/ansi.ts +18 -0
- package/src/benchmark/attenuation-benchmark.ts +81 -0
- package/src/benchmark/colormatrix-benchmark.ts +128 -0
- package/src/benchmark/gain-benchmark.ts +80 -0
- package/src/benchmark/latest-all-bench-run.json +707 -0
- package/src/benchmark/latest-async-bench-run.json +336 -0
- package/src/benchmark/latest-default-bench-run.json +657 -0
- package/src/benchmark/latest-large-bench-run.json +707 -0
- package/src/benchmark/latest-quick-bench-run.json +207 -0
- package/src/benchmark/markdown-benchmark.ts +1796 -0
- package/src/benchmark/native-span-feed-async-benchmark.ts +355 -0
- package/src/benchmark/native-span-feed-benchmark.md +56 -0
- package/src/benchmark/native-span-feed-benchmark.ts +596 -0
- package/src/benchmark/native-span-feed-compare.ts +280 -0
- package/src/benchmark/renderer-benchmark.ts +754 -0
- package/src/benchmark/text-table-benchmark.ts +948 -0
- package/src/buffer.test.ts +291 -0
- package/src/buffer.ts +554 -0
- package/src/console.test.ts +612 -0
- package/src/console.ts +1254 -0
- package/src/edit-buffer.test.ts +1769 -0
- package/src/edit-buffer.ts +411 -0
- package/src/editor-view.test.ts +1032 -0
- package/src/editor-view.ts +284 -0
- package/src/examples/ascii-font-selection-demo.ts +245 -0
- package/src/examples/assets/Water_2_M_Normal.jpg +0 -0
- package/src/examples/assets/concrete.png +0 -0
- package/src/examples/assets/crate.png +0 -0
- package/src/examples/assets/crate_emissive.png +0 -0
- package/src/examples/assets/forrest_background.png +0 -0
- package/src/examples/assets/hast-example.json +1018 -0
- package/src/examples/assets/heart.png +0 -0
- package/src/examples/assets/main_char_heavy_attack.png +0 -0
- package/src/examples/assets/main_char_idle.png +0 -0
- package/src/examples/assets/main_char_jump_end.png +0 -0
- package/src/examples/assets/main_char_jump_landing.png +0 -0
- package/src/examples/assets/main_char_jump_start.png +0 -0
- package/src/examples/assets/main_char_run_loop.png +0 -0
- package/src/examples/assets/roughness_map.jpg +0 -0
- package/src/examples/build.ts +115 -0
- package/src/examples/code-demo.ts +924 -0
- package/src/examples/console-demo.ts +358 -0
- package/src/examples/core-plugin-slots-demo.ts +759 -0
- package/src/examples/diff-demo.ts +701 -0
- package/src/examples/draggable-three-demo.ts +259 -0
- package/src/examples/editor-demo.ts +322 -0
- package/src/examples/extmarks-demo.ts +196 -0
- package/src/examples/focus-restore-demo.ts +310 -0
- package/src/examples/fonts.ts +245 -0
- package/src/examples/fractal-shader-demo.ts +268 -0
- package/src/examples/framebuffer-demo.ts +674 -0
- package/src/examples/full-unicode-demo.ts +241 -0
- package/src/examples/golden-star-demo.ts +933 -0
- package/src/examples/grayscale-buffer-demo.ts +249 -0
- package/src/examples/hast-syntax-highlighting-demo.ts +129 -0
- package/src/examples/index.ts +926 -0
- package/src/examples/input-demo.ts +377 -0
- package/src/examples/input-select-layout-demo.ts +425 -0
- package/src/examples/install.sh +143 -0
- package/src/examples/keypress-debug-demo.ts +452 -0
- package/src/examples/lib/HexList.ts +122 -0
- package/src/examples/lib/PaletteGrid.ts +125 -0
- package/src/examples/lib/standalone-keys.ts +25 -0
- package/src/examples/lib/tab-controller.ts +243 -0
- package/src/examples/lights-phong-demo.ts +290 -0
- package/src/examples/link-demo.ts +220 -0
- package/src/examples/live-state-demo.ts +480 -0
- package/src/examples/markdown-demo.ts +725 -0
- package/src/examples/mouse-interaction-demo.ts +428 -0
- package/src/examples/nested-zindex-demo.ts +357 -0
- package/src/examples/opacity-example.ts +235 -0
- package/src/examples/opentui-demo.ts +1057 -0
- package/src/examples/physx-planck-2d-demo.ts +623 -0
- package/src/examples/physx-rapier-2d-demo.ts +655 -0
- package/src/examples/relative-positioning-demo.ts +323 -0
- package/src/examples/scroll-example.ts +214 -0
- package/src/examples/scrollbox-mouse-test.ts +112 -0
- package/src/examples/scrollbox-overlay-hit-test.ts +206 -0
- package/src/examples/select-demo.ts +237 -0
- package/src/examples/shader-cube-demo.ts +1015 -0
- package/src/examples/simple-layout-example.ts +591 -0
- package/src/examples/slider-demo.ts +617 -0
- package/src/examples/split-mode-demo.ts +453 -0
- package/src/examples/sprite-animation-demo.ts +443 -0
- package/src/examples/sprite-particle-generator-demo.ts +486 -0
- package/src/examples/static-sprite-demo.ts +193 -0
- package/src/examples/sticky-scroll-example.ts +308 -0
- package/src/examples/styled-text-demo.ts +282 -0
- package/src/examples/tab-select-demo.ts +219 -0
- package/src/examples/terminal-title.ts +29 -0
- package/src/examples/terminal.ts +305 -0
- package/src/examples/text-node-demo.ts +416 -0
- package/src/examples/text-selection-demo.ts +377 -0
- package/src/examples/text-table-demo.ts +503 -0
- package/src/examples/text-truncation-demo.ts +481 -0
- package/src/examples/text-wrap.ts +757 -0
- package/src/examples/texture-loading-demo.ts +259 -0
- package/src/examples/timeline-example.ts +670 -0
- package/src/examples/transparency-demo.ts +400 -0
- package/src/examples/vnode-composition-demo.ts +404 -0
- package/src/examples/wide-grapheme-overlay-demo.ts +280 -0
- package/src/index.ts +24 -0
- package/src/lib/KeyHandler.integration.test.ts +292 -0
- package/src/lib/KeyHandler.stopPropagation.test.ts +289 -0
- package/src/lib/KeyHandler.test.ts +662 -0
- package/src/lib/KeyHandler.ts +222 -0
- package/src/lib/RGBA.test.ts +984 -0
- package/src/lib/RGBA.ts +204 -0
- package/src/lib/ascii.font.ts +330 -0
- package/src/lib/border.test.ts +83 -0
- package/src/lib/border.ts +170 -0
- package/src/lib/bunfs.test.ts +27 -0
- package/src/lib/bunfs.ts +18 -0
- package/src/lib/clipboard.test.ts +41 -0
- package/src/lib/clipboard.ts +47 -0
- package/src/lib/clock.ts +35 -0
- package/src/lib/data-paths.test.ts +133 -0
- package/src/lib/data-paths.ts +109 -0
- package/src/lib/debounce.ts +106 -0
- package/src/lib/detect-links.test.ts +98 -0
- package/src/lib/detect-links.ts +56 -0
- package/src/lib/env.test.ts +228 -0
- package/src/lib/env.ts +209 -0
- package/src/lib/extmarks-history.ts +51 -0
- package/src/lib/extmarks-multiwidth.test.ts +322 -0
- package/src/lib/extmarks.test.ts +3457 -0
- package/src/lib/extmarks.ts +843 -0
- package/src/lib/fonts/block.json +405 -0
- package/src/lib/fonts/grid.json +265 -0
- package/src/lib/fonts/huge.json +741 -0
- package/src/lib/fonts/pallet.json +314 -0
- package/src/lib/fonts/shade.json +591 -0
- package/src/lib/fonts/slick.json +321 -0
- package/src/lib/fonts/tiny.json +69 -0
- package/src/lib/hast-styled-text.ts +59 -0
- package/src/lib/index.ts +21 -0
- package/src/lib/keymapping.test.ts +317 -0
- package/src/lib/keymapping.ts +115 -0
- package/src/lib/objects-in-viewport.test.ts +787 -0
- package/src/lib/objects-in-viewport.ts +153 -0
- package/src/lib/output.capture.ts +58 -0
- package/src/lib/parse.keypress-kitty.protocol.test.ts +340 -0
- package/src/lib/parse.keypress-kitty.test.ts +663 -0
- package/src/lib/parse.keypress-kitty.ts +439 -0
- package/src/lib/parse.keypress.test.ts +1849 -0
- package/src/lib/parse.keypress.ts +397 -0
- package/src/lib/parse.mouse.test.ts +552 -0
- package/src/lib/parse.mouse.ts +232 -0
- package/src/lib/paste.ts +16 -0
- package/src/lib/queue.ts +65 -0
- package/src/lib/renderable.validations.test.ts +87 -0
- package/src/lib/renderable.validations.ts +83 -0
- package/src/lib/scroll-acceleration.ts +98 -0
- package/src/lib/selection.ts +240 -0
- package/src/lib/singleton.ts +28 -0
- package/src/lib/stdin-parser.test.ts +2290 -0
- package/src/lib/stdin-parser.ts +1810 -0
- package/src/lib/styled-text.ts +178 -0
- package/src/lib/terminal-capability-detection.test.ts +202 -0
- package/src/lib/terminal-capability-detection.ts +79 -0
- package/src/lib/terminal-palette.test.ts +878 -0
- package/src/lib/terminal-palette.ts +383 -0
- package/src/lib/tree-sitter/assets/README.md +118 -0
- package/src/lib/tree-sitter/assets/update.ts +334 -0
- package/src/lib/tree-sitter/assets.d.ts +9 -0
- package/src/lib/tree-sitter/cache.test.ts +273 -0
- package/src/lib/tree-sitter/client.test.ts +1165 -0
- package/src/lib/tree-sitter/client.ts +607 -0
- package/src/lib/tree-sitter/default-parsers.ts +86 -0
- package/src/lib/tree-sitter/download-utils.ts +148 -0
- package/src/lib/tree-sitter/index.ts +28 -0
- package/src/lib/tree-sitter/parser.worker.ts +1042 -0
- package/src/lib/tree-sitter/parsers-config.ts +81 -0
- package/src/lib/tree-sitter/resolve-ft.test.ts +55 -0
- package/src/lib/tree-sitter/resolve-ft.ts +189 -0
- package/src/lib/tree-sitter/types.ts +82 -0
- package/src/lib/tree-sitter-styled-text.test.ts +1253 -0
- package/src/lib/tree-sitter-styled-text.ts +306 -0
- package/src/lib/validate-dir-name.ts +55 -0
- package/src/lib/yoga.options.test.ts +628 -0
- package/src/lib/yoga.options.ts +346 -0
- package/src/plugins/core-slot.ts +579 -0
- package/src/plugins/registry.ts +402 -0
- package/src/plugins/types.ts +46 -0
- package/src/post/effects.ts +930 -0
- package/src/post/filters.ts +489 -0
- package/src/post/matrices.ts +288 -0
- package/src/renderables/ASCIIFont.ts +219 -0
- package/src/renderables/Box.test.ts +205 -0
- package/src/renderables/Box.ts +326 -0
- package/src/renderables/Code.test.ts +2062 -0
- package/src/renderables/Code.ts +357 -0
- package/src/renderables/Diff.regression.test.ts +226 -0
- package/src/renderables/Diff.test.ts +3101 -0
- package/src/renderables/Diff.ts +1211 -0
- package/src/renderables/EditBufferRenderable.test.ts +288 -0
- package/src/renderables/EditBufferRenderable.ts +1166 -0
- package/src/renderables/FrameBuffer.ts +47 -0
- package/src/renderables/Input.test.ts +1228 -0
- package/src/renderables/Input.ts +247 -0
- package/src/renderables/LineNumberRenderable.ts +724 -0
- package/src/renderables/Markdown.ts +1393 -0
- package/src/renderables/ScrollBar.ts +422 -0
- package/src/renderables/ScrollBox.ts +883 -0
- package/src/renderables/Select.test.ts +1033 -0
- package/src/renderables/Select.ts +524 -0
- package/src/renderables/Slider.test.ts +456 -0
- package/src/renderables/Slider.ts +342 -0
- package/src/renderables/TabSelect.test.ts +197 -0
- package/src/renderables/TabSelect.ts +455 -0
- package/src/renderables/Text.selection-buffer.test.ts +123 -0
- package/src/renderables/Text.test.ts +2660 -0
- package/src/renderables/Text.ts +147 -0
- package/src/renderables/TextBufferRenderable.ts +518 -0
- package/src/renderables/TextNode.test.ts +1058 -0
- package/src/renderables/TextNode.ts +325 -0
- package/src/renderables/TextTable.test.ts +1421 -0
- package/src/renderables/TextTable.ts +1344 -0
- package/src/renderables/Textarea.ts +430 -0
- package/src/renderables/TimeToFirstDraw.ts +89 -0
- package/src/renderables/__snapshots__/Code.test.ts.snap +13 -0
- package/src/renderables/__snapshots__/Diff.test.ts.snap +785 -0
- package/src/renderables/__snapshots__/Text.test.ts.snap +421 -0
- package/src/renderables/__snapshots__/TextTable.test.ts.snap +215 -0
- package/src/renderables/__tests__/LineNumberRenderable.scrollbox-simple.test.ts +144 -0
- package/src/renderables/__tests__/LineNumberRenderable.scrollbox.test.ts +816 -0
- package/src/renderables/__tests__/LineNumberRenderable.test.ts +1865 -0
- package/src/renderables/__tests__/LineNumberRenderable.wrapping.test.ts +85 -0
- package/src/renderables/__tests__/Markdown.code-colors.test.ts +242 -0
- package/src/renderables/__tests__/Markdown.test.ts +2518 -0
- package/src/renderables/__tests__/MultiRenderable.selection.test.ts +87 -0
- package/src/renderables/__tests__/Textarea.buffer.test.ts +682 -0
- package/src/renderables/__tests__/Textarea.destroyed-events.test.ts +675 -0
- package/src/renderables/__tests__/Textarea.editing.test.ts +2041 -0
- package/src/renderables/__tests__/Textarea.error-handling.test.ts +35 -0
- package/src/renderables/__tests__/Textarea.events.test.ts +738 -0
- package/src/renderables/__tests__/Textarea.highlights.test.ts +590 -0
- package/src/renderables/__tests__/Textarea.keybinding.test.ts +3149 -0
- package/src/renderables/__tests__/Textarea.paste.test.ts +357 -0
- package/src/renderables/__tests__/Textarea.rendering.test.ts +1866 -0
- package/src/renderables/__tests__/Textarea.scroll.test.ts +733 -0
- package/src/renderables/__tests__/Textarea.selection.test.ts +1590 -0
- package/src/renderables/__tests__/Textarea.stress.test.ts +670 -0
- package/src/renderables/__tests__/Textarea.undo-redo.test.ts +383 -0
- package/src/renderables/__tests__/Textarea.visual-lines.test.ts +310 -0
- package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.code.test.ts.snap +221 -0
- package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.scrollbox-simple.test.ts.snap +89 -0
- package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.scrollbox.test.ts.snap +457 -0
- package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.test.ts.snap +158 -0
- package/src/renderables/__tests__/__snapshots__/Textarea.rendering.test.ts.snap +387 -0
- package/src/renderables/__tests__/markdown-parser.test.ts +217 -0
- package/src/renderables/__tests__/renderable-test-utils.ts +60 -0
- package/src/renderables/composition/README.md +8 -0
- package/src/renderables/composition/VRenderable.ts +32 -0
- package/src/renderables/composition/constructs.ts +127 -0
- package/src/renderables/composition/vnode.ts +289 -0
- package/src/renderables/index.ts +23 -0
- package/src/renderables/markdown-parser.ts +66 -0
- package/src/renderer.ts +2681 -0
- package/src/runtime-plugin-support.ts +39 -0
- package/src/runtime-plugin.ts +615 -0
- package/src/syntax-style.test.ts +841 -0
- package/src/syntax-style.ts +257 -0
- package/src/testing/README.md +210 -0
- package/src/testing/capture-spans.test.ts +194 -0
- package/src/testing/integration.test.ts +276 -0
- package/src/testing/manual-clock.ts +117 -0
- package/src/testing/mock-keys.test.ts +1378 -0
- package/src/testing/mock-keys.ts +457 -0
- package/src/testing/mock-mouse.test.ts +218 -0
- package/src/testing/mock-mouse.ts +247 -0
- package/src/testing/mock-tree-sitter-client.ts +73 -0
- package/src/testing/spy.ts +13 -0
- package/src/testing/test-recorder.test.ts +415 -0
- package/src/testing/test-recorder.ts +145 -0
- package/src/testing/test-renderer.ts +132 -0
- package/src/testing.ts +7 -0
- package/src/tests/__snapshots__/absolute-positioning.snapshot.test.ts.snap +481 -0
- package/src/tests/__snapshots__/renderable.snapshot.test.ts.snap +19 -0
- package/src/tests/__snapshots__/scrollbox.test.ts.snap +29 -0
- package/src/tests/absolute-positioning.snapshot.test.ts +638 -0
- package/src/tests/allocator-stats.test.ts +38 -0
- package/src/tests/destroy-during-render.test.ts +200 -0
- package/src/tests/destroy-on-exit.fixture.ts +36 -0
- package/src/tests/destroy-on-exit.test.ts +41 -0
- package/src/tests/hover-cursor.test.ts +98 -0
- package/src/tests/native-span-feed-async.test.ts +173 -0
- package/src/tests/native-span-feed-close.test.ts +120 -0
- package/src/tests/native-span-feed-coverage.test.ts +227 -0
- package/src/tests/native-span-feed-edge-cases.test.ts +352 -0
- package/src/tests/native-span-feed-use-after-free.test.ts +45 -0
- package/src/tests/opacity.test.ts +123 -0
- package/src/tests/renderable.snapshot.test.ts +524 -0
- package/src/tests/renderable.test.ts +1281 -0
- package/src/tests/renderer.clock.test.ts +158 -0
- package/src/tests/renderer.console-startup.test.ts +185 -0
- package/src/tests/renderer.control.test.ts +425 -0
- package/src/tests/renderer.core-slot-binding.test.ts +952 -0
- package/src/tests/renderer.cursor.test.ts +26 -0
- package/src/tests/renderer.destroy-during-render.test.ts +147 -0
- package/src/tests/renderer.focus-restore.test.ts +257 -0
- package/src/tests/renderer.focus.test.ts +294 -0
- package/src/tests/renderer.idle.test.ts +219 -0
- package/src/tests/renderer.input.test.ts +2237 -0
- package/src/tests/renderer.kitty-flags.test.ts +195 -0
- package/src/tests/renderer.mouse.test.ts +1274 -0
- package/src/tests/renderer.palette.test.ts +629 -0
- package/src/tests/renderer.selection.test.ts +49 -0
- package/src/tests/renderer.slot-registry.test.ts +684 -0
- package/src/tests/renderer.useMouse.test.ts +47 -0
- package/src/tests/runtime-plugin-node-modules-cycle.fixture.ts +76 -0
- package/src/tests/runtime-plugin-node-modules-mjs.fixture.ts +43 -0
- package/src/tests/runtime-plugin-node-modules-no-bare-rewrite.fixture.ts +67 -0
- package/src/tests/runtime-plugin-node-modules-package-type-cache.fixture.ts +72 -0
- package/src/tests/runtime-plugin-node-modules-runtime-specifier.fixture.ts +44 -0
- package/src/tests/runtime-plugin-node-modules-scoped-package-bare-rewrite.fixture.ts +85 -0
- package/src/tests/runtime-plugin-path-alias.fixture.ts +43 -0
- package/src/tests/runtime-plugin-resolve-roots.fixture.ts +65 -0
- package/src/tests/runtime-plugin-support.fixture.ts +11 -0
- package/src/tests/runtime-plugin-support.test.ts +19 -0
- package/src/tests/runtime-plugin-windows-file-url.fixture.ts +30 -0
- package/src/tests/runtime-plugin.fixture.ts +40 -0
- package/src/tests/runtime-plugin.test.ts +354 -0
- package/src/tests/scrollbox-culling-bug.test.ts +114 -0
- package/src/tests/scrollbox-hitgrid-resize.test.ts +136 -0
- package/src/tests/scrollbox-hitgrid.test.ts +909 -0
- package/src/tests/scrollbox.test.ts +1530 -0
- package/src/tests/wrap-resize-perf.test.ts +276 -0
- package/src/tests/yoga-setters.test.ts +921 -0
- package/src/text-buffer-view.test.ts +705 -0
- package/src/text-buffer-view.ts +189 -0
- package/src/text-buffer.test.ts +347 -0
- package/src/text-buffer.ts +250 -0
- package/src/types.ts +161 -0
- package/src/utils.ts +88 -0
- package/src/zig/ansi.zig +268 -0
- package/src/zig/bench/README.md +50 -0
- package/src/zig/bench/buffer-draw-text-buffer_bench.zig +887 -0
- package/src/zig/bench/edit-buffer_bench.zig +476 -0
- package/src/zig/bench/native-span-feed_bench.zig +100 -0
- package/src/zig/bench/rope-markers_bench.zig +713 -0
- package/src/zig/bench/rope_bench.zig +514 -0
- package/src/zig/bench/styled-text_bench.zig +470 -0
- package/src/zig/bench/text-buffer-coords_bench.zig +362 -0
- package/src/zig/bench/text-buffer-view_bench.zig +459 -0
- package/src/zig/bench/text-chunk-graphemes_bench.zig +273 -0
- package/src/zig/bench/utf8_bench.zig +799 -0
- package/src/zig/bench-utils.zig +431 -0
- package/src/zig/bench.zig +217 -0
- package/src/zig/buffer-methods.zig +211 -0
- package/src/zig/buffer.zig +2281 -0
- package/src/zig/build.zig +289 -0
- package/src/zig/build.zig.zon +16 -0
- package/src/zig/edit-buffer.zig +825 -0
- package/src/zig/editor-view.zig +802 -0
- package/src/zig/event-bus.zig +13 -0
- package/src/zig/event-emitter.zig +65 -0
- package/src/zig/file-logger.zig +92 -0
- package/src/zig/grapheme.zig +599 -0
- package/src/zig/lib.zig +1854 -0
- package/src/zig/link.zig +333 -0
- package/src/zig/logger.zig +43 -0
- package/src/zig/mem-registry.zig +125 -0
- package/src/zig/native-span-feed-bench-lib.zig +7 -0
- package/src/zig/native-span-feed.zig +708 -0
- package/src/zig/renderer.zig +1393 -0
- package/src/zig/rope.zig +1220 -0
- package/src/zig/syntax-style.zig +161 -0
- package/src/zig/terminal.zig +987 -0
- package/src/zig/test.zig +72 -0
- package/src/zig/tests/README.md +18 -0
- package/src/zig/tests/buffer-methods_test.zig +1109 -0
- package/src/zig/tests/buffer_test.zig +2557 -0
- package/src/zig/tests/edit-buffer-history_test.zig +271 -0
- package/src/zig/tests/edit-buffer_test.zig +1689 -0
- package/src/zig/tests/editor-view_test.zig +3299 -0
- package/src/zig/tests/event-emitter_test.zig +249 -0
- package/src/zig/tests/grapheme_test.zig +1304 -0
- package/src/zig/tests/link_test.zig +190 -0
- package/src/zig/tests/mem-registry_test.zig +473 -0
- package/src/zig/tests/memory_leak_regression_test.zig +159 -0
- package/src/zig/tests/native-span-feed_test.zig +1264 -0
- package/src/zig/tests/renderer_test.zig +1017 -0
- package/src/zig/tests/rope-nested_test.zig +712 -0
- package/src/zig/tests/rope_fuzz_test.zig +238 -0
- package/src/zig/tests/rope_test.zig +2362 -0
- package/src/zig/tests/segment-merge.test.zig +148 -0
- package/src/zig/tests/syntax-style_test.zig +557 -0
- package/src/zig/tests/terminal_test.zig +754 -0
- package/src/zig/tests/text-buffer-drawing_test.zig +3237 -0
- package/src/zig/tests/text-buffer-highlights_test.zig +666 -0
- package/src/zig/tests/text-buffer-iterators_test.zig +776 -0
- package/src/zig/tests/text-buffer-segment_test.zig +320 -0
- package/src/zig/tests/text-buffer-selection_test.zig +1035 -0
- package/src/zig/tests/text-buffer-selection_viewport_test.zig +358 -0
- package/src/zig/tests/text-buffer-view_test.zig +3649 -0
- package/src/zig/tests/text-buffer_test.zig +2191 -0
- package/src/zig/tests/unicode-width-map.zon +3909 -0
- package/src/zig/tests/utf8_no_zwj_test.zig +260 -0
- package/src/zig/tests/utf8_test.zig +4057 -0
- package/src/zig/tests/utf8_wcwidth_cursor_test.zig +267 -0
- package/src/zig/tests/utf8_wcwidth_test.zig +357 -0
- package/src/zig/tests/word-wrap-editing_test.zig +498 -0
- package/src/zig/tests/wrap-cache-perf_test.zig +113 -0
- package/src/zig/text-buffer-iterators.zig +499 -0
- package/src/zig/text-buffer-segment.zig +404 -0
- package/src/zig/text-buffer-view.zig +1371 -0
- package/src/zig/text-buffer.zig +1180 -0
- package/src/zig/utf8.zig +1948 -0
- package/src/zig/utils.zig +9 -0
- package/src/zig-structs.ts +261 -0
- package/src/zig.ts +3884 -0
- package/tsconfig.build.json +24 -0
- package/tsconfig.json +27 -0
- package/3d/SpriteResourceManager.d.ts +0 -74
- package/3d/SpriteUtils.d.ts +0 -13
- package/3d/TextureUtils.d.ts +0 -24
- package/3d/ThreeRenderable.d.ts +0 -40
- package/3d/WGPURenderer.d.ts +0 -61
- package/3d/animation/ExplodingSpriteEffect.d.ts +0 -71
- package/3d/animation/PhysicsExplodingSpriteEffect.d.ts +0 -76
- package/3d/animation/SpriteAnimator.d.ts +0 -124
- package/3d/animation/SpriteParticleGenerator.d.ts +0 -62
- package/3d/canvas.d.ts +0 -44
- package/3d/index.d.ts +0 -12
- package/3d/physics/PlanckPhysicsAdapter.d.ts +0 -19
- package/3d/physics/RapierPhysicsAdapter.d.ts +0 -19
- package/3d/physics/physics-interface.d.ts +0 -27
- package/3d.d.ts +0 -2
- package/3d.js +0 -34041
- package/3d.js.map +0 -155
- package/LICENSE +0 -21
- package/NativeSpanFeed.d.ts +0 -41
- package/Renderable.d.ts +0 -334
- package/animation/Timeline.d.ts +0 -126
- package/ansi.d.ts +0 -13
- package/buffer.d.ts +0 -111
- package/console.d.ts +0 -144
- package/edit-buffer.d.ts +0 -98
- package/editor-view.d.ts +0 -73
- package/index-9vwc3fg6.js +0 -12260
- package/index-9vwc3fg6.js.map +0 -42
- package/index-dcj62y8t.js +0 -20614
- package/index-dcj62y8t.js.map +0 -67
- package/index-f7n39gpy.js +0 -411
- package/index-f7n39gpy.js.map +0 -10
- package/index.d.ts +0 -23
- package/index.js +0 -478
- package/index.js.map +0 -9
- package/lib/KeyHandler.d.ts +0 -61
- package/lib/RGBA.d.ts +0 -25
- package/lib/ascii.font.d.ts +0 -508
- package/lib/border.d.ts +0 -51
- package/lib/bunfs.d.ts +0 -7
- package/lib/clipboard.d.ts +0 -17
- package/lib/clock.d.ts +0 -15
- package/lib/data-paths.d.ts +0 -26
- package/lib/debounce.d.ts +0 -42
- package/lib/detect-links.d.ts +0 -6
- package/lib/env.d.ts +0 -42
- package/lib/extmarks-history.d.ts +0 -17
- package/lib/extmarks.d.ts +0 -89
- package/lib/hast-styled-text.d.ts +0 -17
- package/lib/index.d.ts +0 -21
- package/lib/keymapping.d.ts +0 -25
- package/lib/objects-in-viewport.d.ts +0 -24
- package/lib/output.capture.d.ts +0 -24
- package/lib/parse.keypress-kitty.d.ts +0 -2
- package/lib/parse.keypress.d.ts +0 -26
- package/lib/parse.mouse.d.ts +0 -30
- package/lib/paste.d.ts +0 -7
- package/lib/queue.d.ts +0 -15
- package/lib/renderable.validations.d.ts +0 -12
- package/lib/scroll-acceleration.d.ts +0 -43
- package/lib/selection.d.ts +0 -63
- package/lib/singleton.d.ts +0 -7
- package/lib/stdin-parser.d.ts +0 -87
- package/lib/styled-text.d.ts +0 -63
- package/lib/terminal-capability-detection.d.ts +0 -30
- package/lib/terminal-palette.d.ts +0 -50
- package/lib/tree-sitter/assets/update.d.ts +0 -11
- package/lib/tree-sitter/client.d.ts +0 -47
- package/lib/tree-sitter/default-parsers.d.ts +0 -2
- package/lib/tree-sitter/download-utils.d.ts +0 -21
- package/lib/tree-sitter/index.d.ts +0 -8
- package/lib/tree-sitter/parser.worker.d.ts +0 -1
- package/lib/tree-sitter/parsers-config.d.ts +0 -53
- package/lib/tree-sitter/resolve-ft.d.ts +0 -5
- package/lib/tree-sitter/types.d.ts +0 -82
- package/lib/tree-sitter-styled-text.d.ts +0 -14
- package/lib/validate-dir-name.d.ts +0 -1
- package/lib/yoga.options.d.ts +0 -32
- package/parser.worker.js +0 -899
- package/parser.worker.js.map +0 -12
- package/plugins/core-slot.d.ts +0 -72
- package/plugins/registry.d.ts +0 -42
- package/plugins/types.d.ts +0 -34
- package/post/effects.d.ts +0 -147
- package/post/filters.d.ts +0 -65
- package/post/matrices.d.ts +0 -20
- package/renderables/ASCIIFont.d.ts +0 -52
- package/renderables/Box.d.ts +0 -81
- package/renderables/Code.d.ts +0 -78
- package/renderables/Diff.d.ts +0 -142
- package/renderables/EditBufferRenderable.d.ts +0 -237
- package/renderables/FrameBuffer.d.ts +0 -16
- package/renderables/Input.d.ts +0 -67
- package/renderables/LineNumberRenderable.d.ts +0 -78
- package/renderables/Markdown.d.ts +0 -185
- package/renderables/ScrollBar.d.ts +0 -77
- package/renderables/ScrollBox.d.ts +0 -124
- package/renderables/Select.d.ts +0 -115
- package/renderables/Slider.d.ts +0 -47
- package/renderables/TabSelect.d.ts +0 -96
- package/renderables/Text.d.ts +0 -36
- package/renderables/TextBufferRenderable.d.ts +0 -105
- package/renderables/TextNode.d.ts +0 -91
- package/renderables/TextTable.d.ts +0 -140
- package/renderables/Textarea.d.ts +0 -63
- package/renderables/TimeToFirstDraw.d.ts +0 -24
- package/renderables/__tests__/renderable-test-utils.d.ts +0 -12
- package/renderables/composition/VRenderable.d.ts +0 -16
- package/renderables/composition/constructs.d.ts +0 -35
- package/renderables/composition/vnode.d.ts +0 -46
- package/renderables/index.d.ts +0 -23
- package/renderables/markdown-parser.d.ts +0 -10
- package/renderer.d.ts +0 -419
- package/runtime-plugin-support.d.ts +0 -3
- package/runtime-plugin-support.js +0 -29
- package/runtime-plugin-support.js.map +0 -10
- package/runtime-plugin.d.ts +0 -16
- package/runtime-plugin.js +0 -16
- package/runtime-plugin.js.map +0 -9
- package/syntax-style.d.ts +0 -54
- package/testing/manual-clock.d.ts +0 -17
- package/testing/mock-keys.d.ts +0 -81
- package/testing/mock-mouse.d.ts +0 -38
- package/testing/mock-tree-sitter-client.d.ts +0 -23
- package/testing/spy.d.ts +0 -7
- package/testing/test-recorder.d.ts +0 -61
- package/testing/test-renderer.d.ts +0 -23
- package/testing.d.ts +0 -6
- package/testing.js +0 -697
- package/testing.js.map +0 -15
- package/text-buffer-view.d.ts +0 -42
- package/text-buffer.d.ts +0 -67
- package/types.d.ts +0 -139
- package/utils.d.ts +0 -14
- package/zig-structs.d.ts +0 -155
- package/zig.d.ts +0 -353
- /package/{assets → src/lib/tree-sitter/assets}/javascript/highlights.scm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/javascript/tree-sitter-javascript.wasm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/markdown/highlights.scm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/markdown/injections.scm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/markdown/tree-sitter-markdown.wasm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/markdown_inline/highlights.scm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/markdown_inline/tree-sitter-markdown_inline.wasm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/typescript/highlights.scm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/typescript/tree-sitter-typescript.wasm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/zig/highlights.scm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/zig/tree-sitter-zig.wasm +0 -0
|
@@ -0,0 +1,4057 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const testing = std.testing;
|
|
3
|
+
const utf8 = @import("../utf8.zig");
|
|
4
|
+
|
|
5
|
+
// ============================================================================
|
|
6
|
+
// ASCII-ONLY DETECTION TESTS
|
|
7
|
+
// ============================================================================
|
|
8
|
+
|
|
9
|
+
test "isAsciiOnly: empty string" {
|
|
10
|
+
// Empty string is not ASCII-only by convention
|
|
11
|
+
try testing.expect(!utf8.isAsciiOnly(""));
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
test "isAsciiOnly: simple ASCII" {
|
|
15
|
+
try testing.expect(utf8.isAsciiOnly("Hello, World!"));
|
|
16
|
+
try testing.expect(utf8.isAsciiOnly("The quick brown fox"));
|
|
17
|
+
try testing.expect(utf8.isAsciiOnly("0123456789"));
|
|
18
|
+
try testing.expect(utf8.isAsciiOnly("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"));
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
test "isAsciiOnly: control chars rejected" {
|
|
22
|
+
try testing.expect(!utf8.isAsciiOnly("Hello\tWorld"));
|
|
23
|
+
try testing.expect(!utf8.isAsciiOnly("Hello\nWorld"));
|
|
24
|
+
try testing.expect(!utf8.isAsciiOnly("Hello\rWorld"));
|
|
25
|
+
try testing.expect(!utf8.isAsciiOnly("\x00"));
|
|
26
|
+
try testing.expect(!utf8.isAsciiOnly("\x1F"));
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
test "isAsciiOnly: extended ASCII rejected" {
|
|
30
|
+
try testing.expect(!utf8.isAsciiOnly("Hello\x7FWorld"));
|
|
31
|
+
try testing.expect(!utf8.isAsciiOnly("Hello\x80World"));
|
|
32
|
+
try testing.expect(!utf8.isAsciiOnly("Hello\xFFWorld"));
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
test "isAsciiOnly: Unicode rejected" {
|
|
36
|
+
try testing.expect(!utf8.isAsciiOnly("Hello 👋"));
|
|
37
|
+
try testing.expect(!utf8.isAsciiOnly("Hello 世界"));
|
|
38
|
+
try testing.expect(!utf8.isAsciiOnly("café"));
|
|
39
|
+
try testing.expect(!utf8.isAsciiOnly("Привет"));
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
test "isAsciiOnly: space character accepted" {
|
|
43
|
+
try testing.expect(utf8.isAsciiOnly(" "));
|
|
44
|
+
try testing.expect(utf8.isAsciiOnly(" "));
|
|
45
|
+
try testing.expect(utf8.isAsciiOnly("Hello World"));
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
test "isAsciiOnly: all printable ASCII chars" {
|
|
49
|
+
const all_printable = " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~";
|
|
50
|
+
try testing.expect(utf8.isAsciiOnly(all_printable));
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
test "isAsciiOnly: SIMD boundary tests" {
|
|
54
|
+
try testing.expect(utf8.isAsciiOnly("0123456789abcdef"));
|
|
55
|
+
try testing.expect(utf8.isAsciiOnly("0123456789abcde"));
|
|
56
|
+
try testing.expect(utf8.isAsciiOnly("0123456789abcdefg"));
|
|
57
|
+
try testing.expect(utf8.isAsciiOnly("0123456789abcdef0123456789abcdef"));
|
|
58
|
+
try testing.expect(utf8.isAsciiOnly("0123456789abcdef0123456789abcdefX"));
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
test "isAsciiOnly: non-ASCII at different positions" {
|
|
62
|
+
try testing.expect(!utf8.isAsciiOnly("Hello\x00World"));
|
|
63
|
+
try testing.expect(!utf8.isAsciiOnly("\x00bcdefghijklmnop"));
|
|
64
|
+
try testing.expect(!utf8.isAsciiOnly("0123456789abcde\x00"));
|
|
65
|
+
try testing.expect(!utf8.isAsciiOnly("0123456789abcdef\x00"));
|
|
66
|
+
try testing.expect(!utf8.isAsciiOnly("0123456789abcdef0123456789\x00bcdef"));
|
|
67
|
+
try testing.expect(!utf8.isAsciiOnly("0123456789abcdef01234\x00"));
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
test "isAsciiOnly: large ASCII text" {
|
|
71
|
+
const size = 10000;
|
|
72
|
+
const buf = try testing.allocator.alloc(u8, size);
|
|
73
|
+
defer testing.allocator.free(buf);
|
|
74
|
+
|
|
75
|
+
for (buf, 0..) |*b, i| {
|
|
76
|
+
b.* = 32 + @as(u8, @intCast(i % 95));
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
try testing.expect(utf8.isAsciiOnly(buf));
|
|
80
|
+
|
|
81
|
+
buf[5000] = 0x80;
|
|
82
|
+
try testing.expect(!utf8.isAsciiOnly(buf));
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// ============================================================================
|
|
86
|
+
// LINE BREAK TESTS
|
|
87
|
+
// ============================================================================
|
|
88
|
+
|
|
89
|
+
const LineBreakTestCase = struct {
|
|
90
|
+
name: []const u8,
|
|
91
|
+
input: []const u8,
|
|
92
|
+
expected: []const usize,
|
|
93
|
+
};
|
|
94
|
+
|
|
95
|
+
const line_break_golden_tests = [_]LineBreakTestCase{
|
|
96
|
+
.{
|
|
97
|
+
.name = "empty string",
|
|
98
|
+
.input = "",
|
|
99
|
+
.expected = &[_]usize{},
|
|
100
|
+
},
|
|
101
|
+
.{
|
|
102
|
+
.name = "only LF",
|
|
103
|
+
.input = "a\nb",
|
|
104
|
+
.expected = &[_]usize{1},
|
|
105
|
+
},
|
|
106
|
+
.{
|
|
107
|
+
.name = "only CR",
|
|
108
|
+
.input = "a\rb",
|
|
109
|
+
.expected = &[_]usize{1},
|
|
110
|
+
},
|
|
111
|
+
.{
|
|
112
|
+
.name = "CRLF",
|
|
113
|
+
.input = "a\r\nb",
|
|
114
|
+
.expected = &[_]usize{2}, // CRLF recorded at \n index
|
|
115
|
+
},
|
|
116
|
+
.{
|
|
117
|
+
.name = "ending with CR",
|
|
118
|
+
.input = "a\r",
|
|
119
|
+
.expected = &[_]usize{1},
|
|
120
|
+
},
|
|
121
|
+
.{
|
|
122
|
+
.name = "ending with LF",
|
|
123
|
+
.input = "a\n",
|
|
124
|
+
.expected = &[_]usize{1},
|
|
125
|
+
},
|
|
126
|
+
.{
|
|
127
|
+
.name = "ending with CRLF",
|
|
128
|
+
.input = "a\r\n",
|
|
129
|
+
.expected = &[_]usize{2},
|
|
130
|
+
},
|
|
131
|
+
.{
|
|
132
|
+
.name = "consecutive LF",
|
|
133
|
+
.input = "\n\n",
|
|
134
|
+
.expected = &[_]usize{ 0, 1 },
|
|
135
|
+
},
|
|
136
|
+
.{
|
|
137
|
+
.name = "consecutive CRLF",
|
|
138
|
+
.input = "\r\n\r\n",
|
|
139
|
+
.expected = &[_]usize{ 1, 3 },
|
|
140
|
+
},
|
|
141
|
+
.{
|
|
142
|
+
.name = "mixed breaks",
|
|
143
|
+
.input = "\n\r\n\r",
|
|
144
|
+
.expected = &[_]usize{ 0, 2, 3 },
|
|
145
|
+
},
|
|
146
|
+
.{
|
|
147
|
+
.name = "CR LF separate",
|
|
148
|
+
.input = "\r\r\n",
|
|
149
|
+
.expected = &[_]usize{ 0, 2 },
|
|
150
|
+
},
|
|
151
|
+
.{
|
|
152
|
+
.name = "very long line no breaks",
|
|
153
|
+
.input = "a" ** 1000,
|
|
154
|
+
.expected = &[_]usize{},
|
|
155
|
+
},
|
|
156
|
+
.{
|
|
157
|
+
.name = "multiple LF",
|
|
158
|
+
.input = "line1\nline2\nline3\n",
|
|
159
|
+
.expected = &[_]usize{ 5, 11, 17 },
|
|
160
|
+
},
|
|
161
|
+
.{
|
|
162
|
+
.name = "multiple CRLF",
|
|
163
|
+
.input = "line1\r\nline2\r\nline3\r\n",
|
|
164
|
+
.expected = &[_]usize{ 6, 13, 20 },
|
|
165
|
+
},
|
|
166
|
+
.{
|
|
167
|
+
.name = "mixed line endings",
|
|
168
|
+
.input = "unix\nmac\rwin\r\n",
|
|
169
|
+
.expected = &[_]usize{ 4, 8, 13 },
|
|
170
|
+
},
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
fn testLineBreaks(test_case: LineBreakTestCase, allocator: std.mem.Allocator) !void {
|
|
174
|
+
var result = utf8.LineBreakResult.init(allocator);
|
|
175
|
+
defer result.deinit();
|
|
176
|
+
|
|
177
|
+
try utf8.findLineBreaks(test_case.input, &result);
|
|
178
|
+
|
|
179
|
+
try testing.expectEqual(test_case.expected.len, result.breaks.items.len);
|
|
180
|
+
|
|
181
|
+
for (test_case.expected, 0..) |exp, i| {
|
|
182
|
+
try testing.expectEqual(exp, result.breaks.items[i].pos);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
test "line breaks: golden tests" {
|
|
187
|
+
for (line_break_golden_tests) |tc| {
|
|
188
|
+
try testLineBreaks(tc, testing.allocator);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
test "line breaks: CRLF at SIMD16 edge (15-16)" {
|
|
193
|
+
var buf: [32]u8 = undefined;
|
|
194
|
+
@memset(&buf, 'x');
|
|
195
|
+
buf[15] = '\r';
|
|
196
|
+
buf[16] = '\n';
|
|
197
|
+
|
|
198
|
+
const expected = [_]usize{16}; // CRLF recorded at \n index
|
|
199
|
+
|
|
200
|
+
try testLineBreaks(.{
|
|
201
|
+
.name = "CRLF@15-16",
|
|
202
|
+
.input = &buf,
|
|
203
|
+
.expected = &expected,
|
|
204
|
+
}, testing.allocator);
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
test "line breaks: multiple breaks around SIMD16 boundary" {
|
|
208
|
+
var buf: [32]u8 = undefined;
|
|
209
|
+
@memset(&buf, 'x');
|
|
210
|
+
buf[14] = '\n';
|
|
211
|
+
buf[15] = '\r';
|
|
212
|
+
buf[16] = '\n';
|
|
213
|
+
buf[17] = '\n';
|
|
214
|
+
|
|
215
|
+
const expected = [_]usize{ 14, 16, 17 }; // 15-16 is CRLF
|
|
216
|
+
|
|
217
|
+
try testLineBreaks(.{
|
|
218
|
+
.name = "multi@boundary",
|
|
219
|
+
.input = &buf,
|
|
220
|
+
.expected = &expected,
|
|
221
|
+
}, testing.allocator);
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
test "line breaks: multibyte adjacent to LF" {
|
|
225
|
+
const input = "é\n";
|
|
226
|
+
const expected = [_]usize{2};
|
|
227
|
+
|
|
228
|
+
try testLineBreaks(.{
|
|
229
|
+
.name = "é\\n",
|
|
230
|
+
.input = input,
|
|
231
|
+
.expected = &expected,
|
|
232
|
+
}, testing.allocator);
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
test "line breaks: multibyte adjacent to CRLF" {
|
|
236
|
+
const input = "漢\r\n";
|
|
237
|
+
const expected = [_]usize{4};
|
|
238
|
+
|
|
239
|
+
try testLineBreaks(.{
|
|
240
|
+
.name = "漢\\r\\n",
|
|
241
|
+
.input = input,
|
|
242
|
+
.expected = &expected,
|
|
243
|
+
}, testing.allocator);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
test "line breaks: multibyte at SIMD boundary without breaks" {
|
|
247
|
+
var buf: [32]u8 = undefined;
|
|
248
|
+
@memset(&buf, 0);
|
|
249
|
+
|
|
250
|
+
const text = "Test世界Test";
|
|
251
|
+
@memcpy(buf[0..text.len], text);
|
|
252
|
+
|
|
253
|
+
const expected = [_]usize{};
|
|
254
|
+
|
|
255
|
+
try testLineBreaks(.{
|
|
256
|
+
.name = "unicode@boundary",
|
|
257
|
+
.input = buf[0..text.len],
|
|
258
|
+
.expected = &expected,
|
|
259
|
+
}, testing.allocator);
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
test "line breaks: realistic text" {
|
|
263
|
+
const sample_text =
|
|
264
|
+
"The quick brown fox jumps over the lazy dog.\n" ++
|
|
265
|
+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n" ++
|
|
266
|
+
"Windows uses CRLF line endings.\r\n" ++
|
|
267
|
+
"Unix uses LF line endings.\n" ++
|
|
268
|
+
"Classic Mac used CR line endings.\r" ++
|
|
269
|
+
"UTF-8 text: 世界 こんにちは\n" ++
|
|
270
|
+
"Multiple\n\nEmpty\n\n\nLines\n" ++
|
|
271
|
+
"Mixed\r\nendings\nhere\r";
|
|
272
|
+
|
|
273
|
+
var result = utf8.LineBreakResult.init(testing.allocator);
|
|
274
|
+
defer result.deinit();
|
|
275
|
+
|
|
276
|
+
try utf8.findLineBreaks(sample_text, &result);
|
|
277
|
+
|
|
278
|
+
// Verify we found some breaks
|
|
279
|
+
try testing.expect(result.breaks.items.len > 0);
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
test "line breaks: random small buffers" {
|
|
283
|
+
var prng = std.Random.DefaultPrng.init(42);
|
|
284
|
+
const random = prng.random();
|
|
285
|
+
|
|
286
|
+
var i: usize = 0;
|
|
287
|
+
while (i < 50) : (i += 1) {
|
|
288
|
+
const size = 16 + random.uintLessThan(usize, 1024);
|
|
289
|
+
const buf = try testing.allocator.alloc(u8, size);
|
|
290
|
+
defer testing.allocator.free(buf);
|
|
291
|
+
|
|
292
|
+
for (buf) |*b| {
|
|
293
|
+
const r = random.uintLessThan(u8, 100);
|
|
294
|
+
if (r < 5) {
|
|
295
|
+
b.* = '\n';
|
|
296
|
+
} else if (r < 10) {
|
|
297
|
+
b.* = '\r';
|
|
298
|
+
} else {
|
|
299
|
+
b.* = 'a' + random.uintLessThan(u8, 26);
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
var result = utf8.LineBreakResult.init(testing.allocator);
|
|
304
|
+
defer result.deinit();
|
|
305
|
+
try utf8.findLineBreaks(buf, &result);
|
|
306
|
+
}
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
// ============================================================================
|
|
310
|
+
// TAB STOP TESTS
|
|
311
|
+
// ============================================================================
|
|
312
|
+
|
|
313
|
+
const TabStopTestCase = struct {
|
|
314
|
+
name: []const u8,
|
|
315
|
+
input: []const u8,
|
|
316
|
+
expected: []const usize,
|
|
317
|
+
};
|
|
318
|
+
|
|
319
|
+
const tab_stop_golden_tests = [_]TabStopTestCase{
|
|
320
|
+
.{
|
|
321
|
+
.name = "empty string",
|
|
322
|
+
.input = "",
|
|
323
|
+
.expected = &[_]usize{},
|
|
324
|
+
},
|
|
325
|
+
.{
|
|
326
|
+
.name = "no tabs",
|
|
327
|
+
.input = "hello world",
|
|
328
|
+
.expected = &[_]usize{},
|
|
329
|
+
},
|
|
330
|
+
.{
|
|
331
|
+
.name = "single tab",
|
|
332
|
+
.input = "a\tb",
|
|
333
|
+
.expected = &[_]usize{1},
|
|
334
|
+
},
|
|
335
|
+
.{
|
|
336
|
+
.name = "multiple tabs",
|
|
337
|
+
.input = "a\tb\tc",
|
|
338
|
+
.expected = &[_]usize{ 1, 3 },
|
|
339
|
+
},
|
|
340
|
+
.{
|
|
341
|
+
.name = "tab at start",
|
|
342
|
+
.input = "\tabc",
|
|
343
|
+
.expected = &[_]usize{0},
|
|
344
|
+
},
|
|
345
|
+
.{
|
|
346
|
+
.name = "tab at end",
|
|
347
|
+
.input = "abc\t",
|
|
348
|
+
.expected = &[_]usize{3},
|
|
349
|
+
},
|
|
350
|
+
.{
|
|
351
|
+
.name = "consecutive tabs",
|
|
352
|
+
.input = "a\t\tb",
|
|
353
|
+
.expected = &[_]usize{ 1, 2 },
|
|
354
|
+
},
|
|
355
|
+
.{
|
|
356
|
+
.name = "only tabs",
|
|
357
|
+
.input = "\t\t\t",
|
|
358
|
+
.expected = &[_]usize{ 0, 1, 2 },
|
|
359
|
+
},
|
|
360
|
+
.{
|
|
361
|
+
.name = "tabs mixed with spaces",
|
|
362
|
+
.input = "a \tb \tc",
|
|
363
|
+
.expected = &[_]usize{ 2, 5 },
|
|
364
|
+
},
|
|
365
|
+
.{
|
|
366
|
+
.name = "tab with newline",
|
|
367
|
+
.input = "a\tb\nc\td",
|
|
368
|
+
.expected = &[_]usize{ 1, 5 },
|
|
369
|
+
},
|
|
370
|
+
.{
|
|
371
|
+
.name = "many tabs",
|
|
372
|
+
.input = "\ta\tb\tc\td\te\tf\t",
|
|
373
|
+
.expected = &[_]usize{ 0, 2, 4, 6, 8, 10, 12 },
|
|
374
|
+
},
|
|
375
|
+
};
|
|
376
|
+
|
|
377
|
+
fn testTabStops(test_case: TabStopTestCase, allocator: std.mem.Allocator) !void {
|
|
378
|
+
var result = utf8.TabStopResult.init(allocator);
|
|
379
|
+
defer result.deinit();
|
|
380
|
+
|
|
381
|
+
try utf8.findTabStops(test_case.input, &result);
|
|
382
|
+
|
|
383
|
+
try testing.expectEqual(test_case.expected.len, result.positions.items.len);
|
|
384
|
+
|
|
385
|
+
for (test_case.expected, 0..) |exp, i| {
|
|
386
|
+
try testing.expectEqual(exp, result.positions.items[i]);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
test "tab stops: golden tests" {
|
|
391
|
+
for (tab_stop_golden_tests) |tc| {
|
|
392
|
+
try testTabStops(tc, testing.allocator);
|
|
393
|
+
}
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
test "tab stops: tab at SIMD16 edge (15)" {
|
|
397
|
+
var buf: [32]u8 = undefined;
|
|
398
|
+
@memset(&buf, 'x');
|
|
399
|
+
buf[15] = '\t';
|
|
400
|
+
buf[16] = 'y';
|
|
401
|
+
|
|
402
|
+
const expected = [_]usize{15};
|
|
403
|
+
|
|
404
|
+
try testTabStops(.{
|
|
405
|
+
.name = "tab@15",
|
|
406
|
+
.input = &buf,
|
|
407
|
+
.expected = &expected,
|
|
408
|
+
}, testing.allocator);
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
test "tab stops: tab at SIMD16 edge (16)" {
|
|
412
|
+
var buf: [32]u8 = undefined;
|
|
413
|
+
@memset(&buf, 'x');
|
|
414
|
+
buf[16] = '\t';
|
|
415
|
+
buf[17] = 'y';
|
|
416
|
+
|
|
417
|
+
const expected = [_]usize{16};
|
|
418
|
+
|
|
419
|
+
try testTabStops(.{
|
|
420
|
+
.name = "tab@16",
|
|
421
|
+
.input = &buf,
|
|
422
|
+
.expected = &expected,
|
|
423
|
+
}, testing.allocator);
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
test "tab stops: multiple tabs around SIMD16 boundary" {
|
|
427
|
+
var buf: [32]u8 = undefined;
|
|
428
|
+
@memset(&buf, 'x');
|
|
429
|
+
buf[14] = '\t';
|
|
430
|
+
buf[15] = '\t';
|
|
431
|
+
buf[16] = '\t';
|
|
432
|
+
buf[17] = '\t';
|
|
433
|
+
|
|
434
|
+
const expected = [_]usize{ 14, 15, 16, 17 };
|
|
435
|
+
|
|
436
|
+
try testTabStops(.{
|
|
437
|
+
.name = "tabs@boundary",
|
|
438
|
+
.input = &buf,
|
|
439
|
+
.expected = &expected,
|
|
440
|
+
}, testing.allocator);
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
test "tab stops: tabs in all SIMD lanes" {
|
|
444
|
+
var buf: [16]u8 = undefined;
|
|
445
|
+
for (&buf) |*b| {
|
|
446
|
+
b.* = '\t';
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const expected = [_]usize{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
|
|
450
|
+
|
|
451
|
+
try testTabStops(.{
|
|
452
|
+
.name = "all_tabs",
|
|
453
|
+
.input = &buf,
|
|
454
|
+
.expected = &expected,
|
|
455
|
+
}, testing.allocator);
|
|
456
|
+
}
|
|
457
|
+
|
|
458
|
+
test "tab stops: multibyte adjacent to tab" {
|
|
459
|
+
const input = "é\ttest"; // é is 2 bytes: 0xC3 0xA9
|
|
460
|
+
const expected = [_]usize{2}; // Tab at index 2
|
|
461
|
+
|
|
462
|
+
try testTabStops(.{
|
|
463
|
+
.name = "é\\t",
|
|
464
|
+
.input = input,
|
|
465
|
+
.expected = &expected,
|
|
466
|
+
}, testing.allocator);
|
|
467
|
+
}
|
|
468
|
+
|
|
469
|
+
test "tab stops: CJK adjacent to tab" {
|
|
470
|
+
const input = "漢\ttest"; // 漢 is 3 bytes: 0xE6 0xBC 0xA2
|
|
471
|
+
const expected = [_]usize{3}; // Tab at index 3
|
|
472
|
+
|
|
473
|
+
try testTabStops(.{
|
|
474
|
+
.name = "漢\\t",
|
|
475
|
+
.input = input,
|
|
476
|
+
.expected = &expected,
|
|
477
|
+
}, testing.allocator);
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
test "tab stops: emoji adjacent to tab" {
|
|
481
|
+
const input = "👋\twave"; // 👋 is 4 bytes
|
|
482
|
+
const expected = [_]usize{4}; // Tab at index 4
|
|
483
|
+
|
|
484
|
+
try testTabStops(.{
|
|
485
|
+
.name = "emoji\\t",
|
|
486
|
+
.input = input,
|
|
487
|
+
.expected = &expected,
|
|
488
|
+
}, testing.allocator);
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
test "tab stops: multibyte at SIMD boundary without tabs" {
|
|
492
|
+
var buf: [32]u8 = undefined;
|
|
493
|
+
@memset(&buf, 0);
|
|
494
|
+
|
|
495
|
+
const text = "Test世界Test";
|
|
496
|
+
@memcpy(buf[0..text.len], text);
|
|
497
|
+
|
|
498
|
+
const expected = [_]usize{}; // No tabs
|
|
499
|
+
|
|
500
|
+
try testTabStops(.{
|
|
501
|
+
.name = "unicode@boundary",
|
|
502
|
+
.input = buf[0..text.len],
|
|
503
|
+
.expected = &expected,
|
|
504
|
+
}, testing.allocator);
|
|
505
|
+
}
|
|
506
|
+
|
|
507
|
+
test "tab stops: realistic code text" {
|
|
508
|
+
const sample_text =
|
|
509
|
+
"function test() {\n" ++
|
|
510
|
+
"\tconst x = 10;\n" ++
|
|
511
|
+
"\tif (x > 5) {\n" ++
|
|
512
|
+
"\t\treturn true;\n" ++
|
|
513
|
+
"\t}\n" ++
|
|
514
|
+
"\treturn false;\n" ++
|
|
515
|
+
"}\n";
|
|
516
|
+
|
|
517
|
+
var result = utf8.TabStopResult.init(testing.allocator);
|
|
518
|
+
defer result.deinit();
|
|
519
|
+
|
|
520
|
+
try utf8.findTabStops(sample_text, &result);
|
|
521
|
+
|
|
522
|
+
// Should find 6 tabs (including double-tab for nested return)
|
|
523
|
+
try testing.expectEqual(@as(usize, 6), result.positions.items.len);
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
test "tab stops: TSV data" {
|
|
527
|
+
const tsv_line = "name\tage\tcity\tcountry";
|
|
528
|
+
const expected = [_]usize{ 4, 8, 13 };
|
|
529
|
+
|
|
530
|
+
try testTabStops(.{
|
|
531
|
+
.name = "tsv",
|
|
532
|
+
.input = tsv_line,
|
|
533
|
+
.expected = &expected,
|
|
534
|
+
}, testing.allocator);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
test "tab stops: random small buffers" {
|
|
538
|
+
var prng = std.Random.DefaultPrng.init(42);
|
|
539
|
+
const random = prng.random();
|
|
540
|
+
|
|
541
|
+
var i: usize = 0;
|
|
542
|
+
while (i < 50) : (i += 1) {
|
|
543
|
+
const size = 16 + random.uintLessThan(usize, 1024);
|
|
544
|
+
const buf = try testing.allocator.alloc(u8, size);
|
|
545
|
+
defer testing.allocator.free(buf);
|
|
546
|
+
|
|
547
|
+
for (buf) |*b| {
|
|
548
|
+
const r = random.uintLessThan(u8, 100);
|
|
549
|
+
if (r < 10) {
|
|
550
|
+
b.* = '\t';
|
|
551
|
+
} else {
|
|
552
|
+
b.* = 'a' + random.uintLessThan(u8, 26);
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
var result = utf8.TabStopResult.init(testing.allocator);
|
|
557
|
+
defer result.deinit();
|
|
558
|
+
try utf8.findTabStops(buf, &result);
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
test "tab stops: large buffer with periodic tabs" {
|
|
563
|
+
const size = 10000;
|
|
564
|
+
const buf = try testing.allocator.alloc(u8, size);
|
|
565
|
+
defer testing.allocator.free(buf);
|
|
566
|
+
|
|
567
|
+
var expected_count: usize = 0;
|
|
568
|
+
for (buf, 0..) |*b, idx| {
|
|
569
|
+
if (idx % 50 == 0) {
|
|
570
|
+
b.* = '\t';
|
|
571
|
+
expected_count += 1;
|
|
572
|
+
} else {
|
|
573
|
+
b.* = 'a' + @as(u8, @intCast(idx % 26));
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
|
|
577
|
+
var result = utf8.TabStopResult.init(testing.allocator);
|
|
578
|
+
defer result.deinit();
|
|
579
|
+
try utf8.findTabStops(buf, &result);
|
|
580
|
+
|
|
581
|
+
try testing.expectEqual(expected_count, result.positions.items.len);
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
test "tab stops: exactly 16 bytes with tab" {
|
|
585
|
+
const input = "0123456789abcd\tx"; // exactly 16 bytes with tab at pos 14
|
|
586
|
+
const expected = [_]usize{14};
|
|
587
|
+
|
|
588
|
+
try testTabStops(.{
|
|
589
|
+
.name = "16bytes_with_tab",
|
|
590
|
+
.input = input,
|
|
591
|
+
.expected = &expected,
|
|
592
|
+
}, testing.allocator);
|
|
593
|
+
}
|
|
594
|
+
|
|
595
|
+
test "tab stops: exactly 16 bytes no tab" {
|
|
596
|
+
const input = "0123456789abcdef"; // exactly 16 bytes, no tab
|
|
597
|
+
const expected = [_]usize{};
|
|
598
|
+
|
|
599
|
+
try testTabStops(.{
|
|
600
|
+
.name = "16bytes_no_tab",
|
|
601
|
+
.input = input,
|
|
602
|
+
.expected = &expected,
|
|
603
|
+
}, testing.allocator);
|
|
604
|
+
}
|
|
605
|
+
|
|
606
|
+
test "tab stops: 17 bytes with tab at 16" {
|
|
607
|
+
const input = "0123456789abcdef\t"; // tab at position 16
|
|
608
|
+
const expected = [_]usize{16};
|
|
609
|
+
|
|
610
|
+
try testTabStops(.{
|
|
611
|
+
.name = "tab@16",
|
|
612
|
+
.input = input,
|
|
613
|
+
.expected = &expected,
|
|
614
|
+
}, testing.allocator);
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
test "tab stops: result reuse" {
|
|
618
|
+
var result = utf8.TabStopResult.init(testing.allocator);
|
|
619
|
+
defer result.deinit();
|
|
620
|
+
|
|
621
|
+
// First use
|
|
622
|
+
try utf8.findTabStops("a\tb\tc", &result);
|
|
623
|
+
try testing.expectEqual(@as(usize, 2), result.positions.items.len);
|
|
624
|
+
|
|
625
|
+
// Second use - should reset automatically
|
|
626
|
+
try utf8.findTabStops("x\ty", &result);
|
|
627
|
+
try testing.expectEqual(@as(usize, 1), result.positions.items.len);
|
|
628
|
+
try testing.expectEqual(@as(usize, 1), result.positions.items[0]);
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
test "tab stops: mixed with other whitespace" {
|
|
632
|
+
const input = " \t \t ";
|
|
633
|
+
const expected = [_]usize{ 2, 5 };
|
|
634
|
+
|
|
635
|
+
try testTabStops(.{
|
|
636
|
+
.name = "mixed_whitespace",
|
|
637
|
+
.input = input,
|
|
638
|
+
.expected = &expected,
|
|
639
|
+
}, testing.allocator);
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
test "tab stops: makefile style" {
|
|
643
|
+
const makefile = "target:\n\t@echo Building\n\t@gcc -o out main.c\n";
|
|
644
|
+
|
|
645
|
+
var result = utf8.TabStopResult.init(testing.allocator);
|
|
646
|
+
defer result.deinit();
|
|
647
|
+
|
|
648
|
+
try utf8.findTabStops(makefile, &result);
|
|
649
|
+
|
|
650
|
+
// Should find 2 tabs (one per command line)
|
|
651
|
+
try testing.expectEqual(@as(usize, 2), result.positions.items.len);
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
test "tab stops: tabs across multiple SIMD chunks" {
|
|
655
|
+
const size = 64; // 4 SIMD chunks
|
|
656
|
+
const buf = try testing.allocator.alloc(u8, size);
|
|
657
|
+
defer testing.allocator.free(buf);
|
|
658
|
+
|
|
659
|
+
@memset(buf, 'x');
|
|
660
|
+
buf[0] = '\t';
|
|
661
|
+
buf[16] = '\t';
|
|
662
|
+
buf[32] = '\t';
|
|
663
|
+
buf[48] = '\t';
|
|
664
|
+
buf[63] = '\t';
|
|
665
|
+
|
|
666
|
+
const expected = [_]usize{ 0, 16, 32, 48, 63 };
|
|
667
|
+
|
|
668
|
+
try testTabStops(.{
|
|
669
|
+
.name = "multi_chunk",
|
|
670
|
+
.input = buf,
|
|
671
|
+
.expected = &expected,
|
|
672
|
+
}, testing.allocator);
|
|
673
|
+
}
|
|
674
|
+
|
|
675
|
+
// ============================================================================
|
|
676
|
+
// WORD WRAP BREAK TESTS
|
|
677
|
+
// ============================================================================
|
|
678
|
+
|
|
679
|
+
const WrapBreakTestCase = struct {
|
|
680
|
+
name: []const u8,
|
|
681
|
+
input: []const u8,
|
|
682
|
+
expected: []const usize,
|
|
683
|
+
};
|
|
684
|
+
|
|
685
|
+
const wrap_break_golden_tests = [_]WrapBreakTestCase{
|
|
686
|
+
.{
|
|
687
|
+
.name = "empty string",
|
|
688
|
+
.input = "",
|
|
689
|
+
.expected = &[_]usize{},
|
|
690
|
+
},
|
|
691
|
+
.{
|
|
692
|
+
.name = "no breaks",
|
|
693
|
+
.input = "abcdef",
|
|
694
|
+
.expected = &[_]usize{},
|
|
695
|
+
},
|
|
696
|
+
.{
|
|
697
|
+
.name = "single space",
|
|
698
|
+
.input = "a b",
|
|
699
|
+
.expected = &[_]usize{1},
|
|
700
|
+
},
|
|
701
|
+
.{
|
|
702
|
+
.name = "multiple spaces",
|
|
703
|
+
.input = "a b c",
|
|
704
|
+
.expected = &[_]usize{ 1, 3 },
|
|
705
|
+
},
|
|
706
|
+
.{
|
|
707
|
+
.name = "tab character",
|
|
708
|
+
.input = "a\tb",
|
|
709
|
+
.expected = &[_]usize{1},
|
|
710
|
+
},
|
|
711
|
+
.{
|
|
712
|
+
.name = "newline",
|
|
713
|
+
.input = "a\nb",
|
|
714
|
+
.expected = &[_]usize{},
|
|
715
|
+
},
|
|
716
|
+
.{
|
|
717
|
+
.name = "carriage return",
|
|
718
|
+
.input = "a\rb",
|
|
719
|
+
.expected = &[_]usize{},
|
|
720
|
+
},
|
|
721
|
+
.{
|
|
722
|
+
.name = "dash",
|
|
723
|
+
.input = "pre-post",
|
|
724
|
+
.expected = &[_]usize{3},
|
|
725
|
+
},
|
|
726
|
+
.{
|
|
727
|
+
.name = "forward slash",
|
|
728
|
+
.input = "path/to/file",
|
|
729
|
+
.expected = &[_]usize{ 4, 7 },
|
|
730
|
+
},
|
|
731
|
+
.{
|
|
732
|
+
.name = "backslash",
|
|
733
|
+
.input = "path\\to\\file",
|
|
734
|
+
.expected = &[_]usize{ 4, 7 },
|
|
735
|
+
},
|
|
736
|
+
.{
|
|
737
|
+
.name = "punctuation",
|
|
738
|
+
.input = "Hello, world! How are you? Fine.",
|
|
739
|
+
.expected = &[_]usize{ 5, 6, 12, 13, 17, 21, 25, 26, 31 },
|
|
740
|
+
},
|
|
741
|
+
.{
|
|
742
|
+
.name = "brackets",
|
|
743
|
+
.input = "(a)[b]{c}",
|
|
744
|
+
.expected = &[_]usize{ 0, 2, 3, 5, 6, 8 },
|
|
745
|
+
},
|
|
746
|
+
.{
|
|
747
|
+
.name = "mixed breaks",
|
|
748
|
+
.input = "Hello, world! -path/file.",
|
|
749
|
+
.expected = &[_]usize{ 5, 6, 12, 13, 14, 19, 24 },
|
|
750
|
+
},
|
|
751
|
+
.{
|
|
752
|
+
.name = "consecutive spaces",
|
|
753
|
+
.input = "a b",
|
|
754
|
+
.expected = &[_]usize{ 1, 2 },
|
|
755
|
+
},
|
|
756
|
+
.{
|
|
757
|
+
.name = "only spaces",
|
|
758
|
+
.input = " ",
|
|
759
|
+
.expected = &[_]usize{ 0, 1, 2 },
|
|
760
|
+
},
|
|
761
|
+
.{
|
|
762
|
+
.name = "all break types",
|
|
763
|
+
.input = " \t-/\\.,:;!?()[]{}",
|
|
764
|
+
.expected = &[_]usize{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16 },
|
|
765
|
+
},
|
|
766
|
+
.{
|
|
767
|
+
.name = "nbsp",
|
|
768
|
+
.input = "a\u{00A0}b",
|
|
769
|
+
.expected = &[_]usize{1},
|
|
770
|
+
},
|
|
771
|
+
.{
|
|
772
|
+
.name = "em space",
|
|
773
|
+
.input = "a\u{2003}b",
|
|
774
|
+
.expected = &[_]usize{1},
|
|
775
|
+
},
|
|
776
|
+
.{
|
|
777
|
+
.name = "ideo space",
|
|
778
|
+
.input = "a\u{3000}b",
|
|
779
|
+
.expected = &[_]usize{1},
|
|
780
|
+
},
|
|
781
|
+
.{
|
|
782
|
+
.name = "soft hyphen",
|
|
783
|
+
.input = "pre\u{00AD}post",
|
|
784
|
+
.expected = &[_]usize{3},
|
|
785
|
+
},
|
|
786
|
+
.{
|
|
787
|
+
.name = "unicode hyphen",
|
|
788
|
+
.input = "pre\u{2010}post",
|
|
789
|
+
.expected = &[_]usize{3},
|
|
790
|
+
},
|
|
791
|
+
.{
|
|
792
|
+
.name = "zero width space",
|
|
793
|
+
.input = "a\u{200B}b",
|
|
794
|
+
.expected = &[_]usize{1},
|
|
795
|
+
},
|
|
796
|
+
};
|
|
797
|
+
|
|
798
|
+
fn testWrapBreaks(test_case: WrapBreakTestCase, allocator: std.mem.Allocator) !void {
|
|
799
|
+
var result = utf8.WrapBreakResult.init(allocator);
|
|
800
|
+
defer result.deinit();
|
|
801
|
+
|
|
802
|
+
try utf8.findWrapBreaks(test_case.input, &result, .unicode);
|
|
803
|
+
|
|
804
|
+
try testing.expectEqual(test_case.expected.len, result.breaks.items.len);
|
|
805
|
+
|
|
806
|
+
for (test_case.expected, 0..) |exp, i| {
|
|
807
|
+
try testing.expectEqual(exp, result.breaks.items[i].byte_offset);
|
|
808
|
+
}
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
test "wrap breaks: golden tests" {
|
|
812
|
+
for (wrap_break_golden_tests) |tc| {
|
|
813
|
+
try testWrapBreaks(tc, testing.allocator);
|
|
814
|
+
}
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
test "wrap breaks: space at SIMD16 edge (15)" {
|
|
818
|
+
var buf: [32]u8 = undefined;
|
|
819
|
+
@memset(&buf, 'x');
|
|
820
|
+
buf[15] = ' ';
|
|
821
|
+
buf[16] = 'y';
|
|
822
|
+
|
|
823
|
+
const expected = [_]usize{15};
|
|
824
|
+
|
|
825
|
+
try testWrapBreaks(.{
|
|
826
|
+
.name = "space@15",
|
|
827
|
+
.input = &buf,
|
|
828
|
+
.expected = &expected,
|
|
829
|
+
}, testing.allocator);
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
test "wrap breaks: unicode NBSP at SIMD16 edge (15)" {
|
|
833
|
+
var buf: [32]u8 = undefined;
|
|
834
|
+
@memset(&buf, 'x');
|
|
835
|
+
// NBSP U+00A0 = 0xC2 0xA0
|
|
836
|
+
buf[15] = 0xC2;
|
|
837
|
+
buf[16] = 0xA0;
|
|
838
|
+
|
|
839
|
+
const expected = [_]usize{15};
|
|
840
|
+
|
|
841
|
+
try testWrapBreaks(.{
|
|
842
|
+
.name = "nbsp@15",
|
|
843
|
+
.input = &buf,
|
|
844
|
+
.expected = &expected,
|
|
845
|
+
}, testing.allocator);
|
|
846
|
+
}
|
|
847
|
+
|
|
848
|
+
test "wrap breaks: multiple breaks around SIMD16 boundary" {
|
|
849
|
+
var buf: [32]u8 = undefined;
|
|
850
|
+
@memset(&buf, 'x');
|
|
851
|
+
buf[14] = ' ';
|
|
852
|
+
buf[15] = '-';
|
|
853
|
+
buf[16] = '/';
|
|
854
|
+
buf[17] = '.';
|
|
855
|
+
|
|
856
|
+
const expected = [_]usize{ 14, 15, 16, 17 };
|
|
857
|
+
|
|
858
|
+
try testWrapBreaks(.{
|
|
859
|
+
.name = "multi@boundary",
|
|
860
|
+
.input = &buf,
|
|
861
|
+
.expected = &expected,
|
|
862
|
+
}, testing.allocator);
|
|
863
|
+
}
|
|
864
|
+
|
|
865
|
+
test "wrap breaks: multibyte adjacent to space" {
|
|
866
|
+
const input = "é test"; // é is 2 bytes: 0xC3 0xA9
|
|
867
|
+
const expected = [_]usize{2}; // Space at index 2
|
|
868
|
+
|
|
869
|
+
try testWrapBreaks(.{
|
|
870
|
+
.name = "é space",
|
|
871
|
+
.input = input,
|
|
872
|
+
.expected = &expected,
|
|
873
|
+
}, testing.allocator);
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
test "wrap breaks: multibyte adjacent to dash" {
|
|
877
|
+
const input = "漢-test"; // 漢 is 3 bytes: 0xE6 0xBC 0xA2
|
|
878
|
+
const expected = [_]usize{3}; // Dash at index 3
|
|
879
|
+
|
|
880
|
+
try testWrapBreaks(.{
|
|
881
|
+
.name = "漢-",
|
|
882
|
+
.input = input,
|
|
883
|
+
.expected = &expected,
|
|
884
|
+
}, testing.allocator);
|
|
885
|
+
}
|
|
886
|
+
|
|
887
|
+
test "wrap breaks: multibyte at SIMD boundary with script transitions" {
|
|
888
|
+
var buf: [32]u8 = undefined;
|
|
889
|
+
@memset(&buf, 0);
|
|
890
|
+
|
|
891
|
+
// Place UTF-8 sequences around boundary
|
|
892
|
+
const text = "Test世界Test";
|
|
893
|
+
@memcpy(buf[0..text.len], text);
|
|
894
|
+
|
|
895
|
+
//// Breaks at ASCII<->CJK transitions:
|
|
896
|
+
// - after 't' in "Test" (byte 3)
|
|
897
|
+
// - after '界' before "Test" (byte 7)
|
|
898
|
+
const expected = [_]usize{ 3, 7 };
|
|
899
|
+
|
|
900
|
+
try testWrapBreaks(.{
|
|
901
|
+
.name = "unicode@boundary",
|
|
902
|
+
.input = buf[0..text.len],
|
|
903
|
+
.expected = &expected,
|
|
904
|
+
}, testing.allocator);
|
|
905
|
+
}
|
|
906
|
+
|
|
907
|
+
test "wrap breaks: realistic text" {
|
|
908
|
+
const sample_text =
|
|
909
|
+
"The quick brown fox jumps over the lazy dog.\n" ++
|
|
910
|
+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit.\n" ++
|
|
911
|
+
"File paths: /usr/local/bin and C:\\Windows\\System32\n" ++
|
|
912
|
+
"Punctuation test: Hello, world! How are you? I'm fine.\n" ++
|
|
913
|
+
"Brackets test: (parentheses) [square] {curly}\n" ++
|
|
914
|
+
"Dashes test: pre-dash post-dash multi-word-expression\n" ++
|
|
915
|
+
"Mixed: Hello, /path/to-file.txt [done]!\n";
|
|
916
|
+
|
|
917
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
918
|
+
defer result.deinit();
|
|
919
|
+
|
|
920
|
+
try utf8.findWrapBreaks(sample_text, &result, .unicode);
|
|
921
|
+
|
|
922
|
+
// Verify we found many breaks
|
|
923
|
+
try testing.expect(result.breaks.items.len > 0);
|
|
924
|
+
}
|
|
925
|
+
|
|
926
|
+
test "wrap breaks: random small buffers" {
|
|
927
|
+
var prng = std.Random.DefaultPrng.init(42);
|
|
928
|
+
const random = prng.random();
|
|
929
|
+
|
|
930
|
+
const break_chars = " \t-/\\.,:;!?()[]{}";
|
|
931
|
+
|
|
932
|
+
var i: usize = 0;
|
|
933
|
+
while (i < 50) : (i += 1) {
|
|
934
|
+
const size = 16 + random.uintLessThan(usize, 1024);
|
|
935
|
+
const buf = try testing.allocator.alloc(u8, size);
|
|
936
|
+
defer testing.allocator.free(buf);
|
|
937
|
+
|
|
938
|
+
// Fill with ASCII letters and randomly insert breaks
|
|
939
|
+
for (buf) |*b| {
|
|
940
|
+
const r = random.uintLessThan(u8, 100);
|
|
941
|
+
if (r < 20) {
|
|
942
|
+
const break_idx = random.uintLessThan(usize, break_chars.len);
|
|
943
|
+
b.* = break_chars[break_idx];
|
|
944
|
+
} else {
|
|
945
|
+
b.* = 'a' + random.uintLessThan(u8, 26);
|
|
946
|
+
}
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
950
|
+
defer result.deinit();
|
|
951
|
+
try utf8.findWrapBreaks(buf, &result, .unicode);
|
|
952
|
+
}
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
test "wrap breaks: large buffer" {
|
|
956
|
+
const size = 10000;
|
|
957
|
+
const buf = try testing.allocator.alloc(u8, size);
|
|
958
|
+
defer testing.allocator.free(buf);
|
|
959
|
+
|
|
960
|
+
// Create realistic text with periodic breaks
|
|
961
|
+
for (buf, 0..) |*b, idx| {
|
|
962
|
+
if (idx % 50 == 0) {
|
|
963
|
+
b.* = ' ';
|
|
964
|
+
} else if (idx % 75 == 0) {
|
|
965
|
+
b.* = '-';
|
|
966
|
+
} else {
|
|
967
|
+
b.* = 'a' + @as(u8, @intCast(idx % 26));
|
|
968
|
+
}
|
|
969
|
+
}
|
|
970
|
+
|
|
971
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
972
|
+
defer result.deinit();
|
|
973
|
+
try utf8.findWrapBreaks(buf, &result, .unicode);
|
|
974
|
+
|
|
975
|
+
try testing.expect(result.breaks.items.len > 0);
|
|
976
|
+
}
|
|
977
|
+
|
|
978
|
+
test "wrap breaks: buffer exceeding 64KB" {
|
|
979
|
+
const size = 100_000;
|
|
980
|
+
const buf = try testing.allocator.alloc(u8, size);
|
|
981
|
+
defer testing.allocator.free(buf);
|
|
982
|
+
|
|
983
|
+
@memset(buf, 'a');
|
|
984
|
+
|
|
985
|
+
// Place a space at 70000, with u16, this will truncate to 4464 (70000 % 65536)
|
|
986
|
+
const break_pos: usize = 70_000;
|
|
987
|
+
buf[break_pos] = ' ';
|
|
988
|
+
|
|
989
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
990
|
+
defer result.deinit();
|
|
991
|
+
try utf8.findWrapBreaks(buf, &result, .unicode);
|
|
992
|
+
|
|
993
|
+
// Should find exactly one wrap break
|
|
994
|
+
try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
|
|
995
|
+
|
|
996
|
+
// The byte_offset must be the actual position, not truncated
|
|
997
|
+
try testing.expectEqual(@as(u32, break_pos), result.breaks.items[0].byte_offset);
|
|
998
|
+
try testing.expectEqual(@as(u32, break_pos), result.breaks.items[0].char_offset);
|
|
999
|
+
}
|
|
1000
|
+
|
|
1001
|
+
// ============================================================================
|
|
1002
|
+
// EDGE CASES AND INTEGRATION TESTS
|
|
1003
|
+
// ============================================================================
|
|
1004
|
+
|
|
1005
|
+
test "edge case: result reuse" {
|
|
1006
|
+
var line_result = utf8.LineBreakResult.init(testing.allocator);
|
|
1007
|
+
defer line_result.deinit();
|
|
1008
|
+
|
|
1009
|
+
// First use - line breaks
|
|
1010
|
+
try utf8.findLineBreaks("a\nb\nc", &line_result);
|
|
1011
|
+
try testing.expectEqual(@as(usize, 2), line_result.breaks.items.len);
|
|
1012
|
+
|
|
1013
|
+
// Second use - should reset automatically
|
|
1014
|
+
try utf8.findLineBreaks("x\ny", &line_result);
|
|
1015
|
+
try testing.expectEqual(@as(usize, 1), line_result.breaks.items.len);
|
|
1016
|
+
try testing.expectEqual(@as(usize, 1), line_result.breaks.items[0].pos);
|
|
1017
|
+
|
|
1018
|
+
// Third use - wrap breaks (different result type)
|
|
1019
|
+
var wrap_result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1020
|
+
defer wrap_result.deinit();
|
|
1021
|
+
try utf8.findWrapBreaks("a b c", &wrap_result, .unicode);
|
|
1022
|
+
try testing.expectEqual(@as(usize, 2), wrap_result.breaks.items.len);
|
|
1023
|
+
}
|
|
1024
|
+
|
|
1025
|
+
test "edge case: empty input" {
|
|
1026
|
+
var line_result = utf8.LineBreakResult.init(testing.allocator);
|
|
1027
|
+
defer line_result.deinit();
|
|
1028
|
+
|
|
1029
|
+
try utf8.findLineBreaks("", &line_result);
|
|
1030
|
+
try testing.expectEqual(@as(usize, 0), line_result.breaks.items.len);
|
|
1031
|
+
|
|
1032
|
+
var wrap_result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1033
|
+
defer wrap_result.deinit();
|
|
1034
|
+
try utf8.findWrapBreaks("", &wrap_result, .unicode);
|
|
1035
|
+
try testing.expectEqual(@as(usize, 0), wrap_result.breaks.items.len);
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
test "edge case: exactly 16 bytes" {
|
|
1039
|
+
var line_result = utf8.LineBreakResult.init(testing.allocator);
|
|
1040
|
+
defer line_result.deinit();
|
|
1041
|
+
|
|
1042
|
+
const input = "0123456789abcdef"; // exactly 16 bytes
|
|
1043
|
+
try utf8.findLineBreaks(input, &line_result);
|
|
1044
|
+
try testing.expectEqual(@as(usize, 0), line_result.breaks.items.len);
|
|
1045
|
+
|
|
1046
|
+
var wrap_result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1047
|
+
defer wrap_result.deinit();
|
|
1048
|
+
try utf8.findWrapBreaks(input, &wrap_result, .unicode);
|
|
1049
|
+
try testing.expectEqual(@as(usize, 0), wrap_result.breaks.items.len);
|
|
1050
|
+
}
|
|
1051
|
+
|
|
1052
|
+
test "edge case: 17 bytes with break at 16" {
|
|
1053
|
+
var line_result = utf8.LineBreakResult.init(testing.allocator);
|
|
1054
|
+
defer line_result.deinit();
|
|
1055
|
+
|
|
1056
|
+
const input = "0123456789abcde\nx"; // break at position 15
|
|
1057
|
+
try utf8.findLineBreaks(input, &line_result);
|
|
1058
|
+
try testing.expectEqual(@as(usize, 1), line_result.breaks.items.len);
|
|
1059
|
+
try testing.expectEqual(@as(usize, 15), line_result.breaks.items[0].pos);
|
|
1060
|
+
|
|
1061
|
+
var wrap_result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1062
|
+
defer wrap_result.deinit();
|
|
1063
|
+
const input2 = "0123456789abcde x"; // space at position 15
|
|
1064
|
+
try utf8.findWrapBreaks(input2, &wrap_result, .unicode);
|
|
1065
|
+
try testing.expectEqual(@as(usize, 1), wrap_result.breaks.items.len);
|
|
1066
|
+
try testing.expectEqual(@as(u16, 15), wrap_result.breaks.items[0].byte_offset);
|
|
1067
|
+
try testing.expectEqual(@as(u16, 15), wrap_result.breaks.items[0].char_offset);
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
// ============================================================================
|
|
1071
|
+
// GRAPHEME CLUSTER TESTS
|
|
1072
|
+
// ============================================================================
|
|
1073
|
+
|
|
1074
|
+
test "wrap breaks: emoji with ZWJ - char offset should count grapheme not codepoints" {
|
|
1075
|
+
const input = "ab 👩🚀 cd";
|
|
1076
|
+
|
|
1077
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1078
|
+
defer result.deinit();
|
|
1079
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1080
|
+
|
|
1081
|
+
try testing.expectEqual(@as(usize, 2), result.breaks.items.len);
|
|
1082
|
+
try testing.expectEqual(@as(u16, 2), result.breaks.items[0].byte_offset);
|
|
1083
|
+
try testing.expectEqual(@as(u16, 2), result.breaks.items[0].char_offset);
|
|
1084
|
+
try testing.expectEqual(@as(u16, 14), result.breaks.items[1].byte_offset);
|
|
1085
|
+
try testing.expectEqual(@as(u16, 4), result.breaks.items[1].char_offset); // Should be 4, not 6
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
test "wrap breaks: emoji with skin tone - char offset should count grapheme" {
|
|
1089
|
+
const input = "hi 👋🏿 bye";
|
|
1090
|
+
|
|
1091
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1092
|
+
defer result.deinit();
|
|
1093
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1094
|
+
|
|
1095
|
+
try testing.expectEqual(@as(usize, 2), result.breaks.items.len);
|
|
1096
|
+
try testing.expectEqual(@as(u16, 2), result.breaks.items[0].byte_offset);
|
|
1097
|
+
try testing.expectEqual(@as(u16, 2), result.breaks.items[0].char_offset);
|
|
1098
|
+
try testing.expectEqual(@as(u16, 11), result.breaks.items[1].byte_offset);
|
|
1099
|
+
try testing.expectEqual(@as(u16, 4), result.breaks.items[1].char_offset); // Should be 4, not 5
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
test "wrap breaks: emoji with VS16 selector - char offset should count grapheme" {
|
|
1103
|
+
const input = "I ❤️ U";
|
|
1104
|
+
|
|
1105
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1106
|
+
defer result.deinit();
|
|
1107
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1108
|
+
|
|
1109
|
+
try testing.expectEqual(@as(usize, 2), result.breaks.items.len);
|
|
1110
|
+
try testing.expectEqual(@as(u16, 1), result.breaks.items[0].byte_offset);
|
|
1111
|
+
try testing.expectEqual(@as(u16, 1), result.breaks.items[0].char_offset);
|
|
1112
|
+
try testing.expectEqual(@as(u16, 8), result.breaks.items[1].byte_offset);
|
|
1113
|
+
try testing.expectEqual(@as(u16, 3), result.breaks.items[1].char_offset); // Should be 3, not 4
|
|
1114
|
+
}
|
|
1115
|
+
|
|
1116
|
+
test "wrap breaks: combining diacritic - char offset should count grapheme" {
|
|
1117
|
+
const input = "cafe\u{0301} time";
|
|
1118
|
+
|
|
1119
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1120
|
+
defer result.deinit();
|
|
1121
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1122
|
+
|
|
1123
|
+
try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
|
|
1124
|
+
try testing.expectEqual(@as(u16, 6), result.breaks.items[0].byte_offset);
|
|
1125
|
+
try testing.expectEqual(@as(u16, 4), result.breaks.items[0].char_offset); // Should be 4, not 5
|
|
1126
|
+
}
|
|
1127
|
+
|
|
1128
|
+
test "wrap breaks: flag emoji - char offset should count grapheme" {
|
|
1129
|
+
const input = "USA🇺🇸 flag";
|
|
1130
|
+
|
|
1131
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1132
|
+
defer result.deinit();
|
|
1133
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1134
|
+
|
|
1135
|
+
try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
|
|
1136
|
+
try testing.expectEqual(@as(u16, 11), result.breaks.items[0].byte_offset);
|
|
1137
|
+
try testing.expectEqual(@as(u16, 4), result.breaks.items[0].char_offset); // 3(USA) + 1(flag) = 4
|
|
1138
|
+
}
|
|
1139
|
+
|
|
1140
|
+
test "wrap breaks: mixed graphemes and ASCII" {
|
|
1141
|
+
const input = "Hello 👋🏿 world 🇺🇸 test";
|
|
1142
|
+
|
|
1143
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1144
|
+
defer result.deinit();
|
|
1145
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1146
|
+
|
|
1147
|
+
try testing.expectEqual(@as(usize, 4), result.breaks.items.len);
|
|
1148
|
+
try testing.expectEqual(@as(u16, 5), result.breaks.items[0].byte_offset);
|
|
1149
|
+
try testing.expectEqual(@as(u16, 5), result.breaks.items[0].char_offset);
|
|
1150
|
+
try testing.expectEqual(@as(u16, 14), result.breaks.items[1].byte_offset);
|
|
1151
|
+
try testing.expectEqual(@as(u16, 7), result.breaks.items[1].char_offset); // 5 + 1 + 1(grapheme) = 7
|
|
1152
|
+
try testing.expectEqual(@as(u16, 20), result.breaks.items[2].byte_offset);
|
|
1153
|
+
try testing.expectEqual(@as(u16, 13), result.breaks.items[2].char_offset); // 7 + 1 + 5 = 13
|
|
1154
|
+
try testing.expectEqual(@as(u16, 29), result.breaks.items[3].byte_offset);
|
|
1155
|
+
try testing.expectEqual(@as(u16, 15), result.breaks.items[3].char_offset); // 13 + 1(space) + 1(RI) + 1(RI) = 15 (per uucode)
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
test "wrap breaks: CJK characters keep break offsets" {
|
|
1159
|
+
// Ensure multibyte graphemes don't shift wrap break offsets.
|
|
1160
|
+
const input = "Hello 世界 test";
|
|
1161
|
+
|
|
1162
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1163
|
+
defer result.deinit();
|
|
1164
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1165
|
+
|
|
1166
|
+
// Should find 2 wrap breaks (2 spaces)
|
|
1167
|
+
try testing.expectEqual(@as(usize, 2), result.breaks.items.len);
|
|
1168
|
+
|
|
1169
|
+
// First break: space after "Hello"
|
|
1170
|
+
try testing.expectEqual(@as(u16, 5), result.breaks.items[0].byte_offset);
|
|
1171
|
+
try testing.expectEqual(@as(u16, 5), result.breaks.items[0].char_offset);
|
|
1172
|
+
|
|
1173
|
+
// Second break: space after "世界"
|
|
1174
|
+
// Byte: "Hello " = 6 bytes, "世" = 3 bytes, "界" = 3 bytes, total = 12
|
|
1175
|
+
try testing.expectEqual(@as(u16, 12), result.breaks.items[1].byte_offset);
|
|
1176
|
+
try testing.expectEqual(@as(u16, 8), result.breaks.items[1].char_offset); // 6 graphemes(Hello space) + 2 graphemes(世界) = 8
|
|
1177
|
+
}
|
|
1178
|
+
|
|
1179
|
+
test "wrap breaks: CJK to ASCII script transition" {
|
|
1180
|
+
const input = "日本語abc";
|
|
1181
|
+
|
|
1182
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1183
|
+
defer result.deinit();
|
|
1184
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1185
|
+
|
|
1186
|
+
try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
|
|
1187
|
+
try testing.expectEqual(@as(u16, 6), result.breaks.items[0].byte_offset);
|
|
1188
|
+
try testing.expectEqual(@as(u16, 2), result.breaks.items[0].char_offset);
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
test "wrap breaks: ASCII to CJK script transition" {
|
|
1192
|
+
const input = "abc日本語";
|
|
1193
|
+
|
|
1194
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1195
|
+
defer result.deinit();
|
|
1196
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1197
|
+
|
|
1198
|
+
try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
|
|
1199
|
+
try testing.expectEqual(@as(u16, 2), result.breaks.items[0].byte_offset);
|
|
1200
|
+
try testing.expectEqual(@as(u16, 2), result.breaks.items[0].char_offset);
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
test "wrap breaks: CJK punctuation before ASCII" {
|
|
1204
|
+
const input = "日本語。abc";
|
|
1205
|
+
|
|
1206
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1207
|
+
defer result.deinit();
|
|
1208
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1209
|
+
|
|
1210
|
+
try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
|
|
1211
|
+
try testing.expectEqual(@as(u16, 9), result.breaks.items[0].byte_offset);
|
|
1212
|
+
try testing.expectEqual(@as(u16, 3), result.breaks.items[0].char_offset);
|
|
1213
|
+
}
|
|
1214
|
+
|
|
1215
|
+
test "wrap breaks: compat ideograph to ASCII script transition" {
|
|
1216
|
+
const input = "丽abc";
|
|
1217
|
+
|
|
1218
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1219
|
+
defer result.deinit();
|
|
1220
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1221
|
+
|
|
1222
|
+
try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
|
|
1223
|
+
try testing.expectEqual(@as(u16, 0), result.breaks.items[0].byte_offset);
|
|
1224
|
+
try testing.expectEqual(@as(u16, 0), result.breaks.items[0].char_offset);
|
|
1225
|
+
}
|
|
1226
|
+
|
|
1227
|
+
test "wrap breaks: extension I ideograph to ASCII script transition" {
|
|
1228
|
+
const input = "abc";
|
|
1229
|
+
|
|
1230
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1231
|
+
defer result.deinit();
|
|
1232
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1233
|
+
|
|
1234
|
+
try testing.expectEqual(@as(usize, 1), result.breaks.items.len);
|
|
1235
|
+
try testing.expectEqual(@as(u16, 0), result.breaks.items[0].byte_offset);
|
|
1236
|
+
try testing.expectEqual(@as(u16, 0), result.breaks.items[0].char_offset);
|
|
1237
|
+
}
|
|
1238
|
+
|
|
1239
|
+
test "wrap breaks: emoji and CJK mixed offsets" {
|
|
1240
|
+
const input = "🌟 Unicode test: こんにちは世界 Hello World";
|
|
1241
|
+
|
|
1242
|
+
var result = utf8.WrapBreakResult.init(testing.allocator);
|
|
1243
|
+
defer result.deinit();
|
|
1244
|
+
try utf8.findWrapBreaks(input, &result, .unicode);
|
|
1245
|
+
|
|
1246
|
+
// Find the space before "Hello"
|
|
1247
|
+
var space_before_hello: ?utf8.WrapBreak = null;
|
|
1248
|
+
for (result.breaks.items) |brk| {
|
|
1249
|
+
if (brk.byte_offset == 40) {
|
|
1250
|
+
space_before_hello = brk;
|
|
1251
|
+
break;
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
try testing.expect(space_before_hello != null);
|
|
1256
|
+
try testing.expectEqual(@as(u16, 40), space_before_hello.?.byte_offset);
|
|
1257
|
+
try testing.expectEqual(@as(u16, 23), space_before_hello.?.char_offset); // Graphemes before this space
|
|
1258
|
+
|
|
1259
|
+
// Find the space after "Hello"
|
|
1260
|
+
var space_after_hello: ?utf8.WrapBreak = null;
|
|
1261
|
+
for (result.breaks.items) |brk| {
|
|
1262
|
+
if (brk.byte_offset == 46) {
|
|
1263
|
+
space_after_hello = brk;
|
|
1264
|
+
break;
|
|
1265
|
+
}
|
|
1266
|
+
}
|
|
1267
|
+
|
|
1268
|
+
try testing.expect(space_after_hello != null);
|
|
1269
|
+
try testing.expectEqual(@as(u16, 46), space_after_hello.?.byte_offset);
|
|
1270
|
+
try testing.expectEqual(@as(u16, 29), space_after_hello.?.char_offset);
|
|
1271
|
+
}
|
|
1272
|
+
|
|
1273
|
+
// ============================================================================
|
|
1274
|
+
// WRAP BY WIDTH TESTS
|
|
1275
|
+
// ============================================================================
|
|
1276
|
+
|
|
1277
|
+
test "wrap by width: empty string" {
|
|
1278
|
+
const result = utf8.findWrapPosByWidth("", 10, 4, false, .unicode);
|
|
1279
|
+
try testing.expectEqual(@as(u32, 0), result.byte_offset);
|
|
1280
|
+
try testing.expectEqual(@as(u32, 0), result.grapheme_count);
|
|
1281
|
+
try testing.expectEqual(@as(u32, 0), result.columns_used);
|
|
1282
|
+
}
|
|
1283
|
+
|
|
1284
|
+
test "wrap by width: simple ASCII no wrap" {
|
|
1285
|
+
const result = utf8.findWrapPosByWidth("hello", 10, 4, true, .unicode);
|
|
1286
|
+
try testing.expectEqual(@as(u32, 5), result.byte_offset);
|
|
1287
|
+
try testing.expectEqual(@as(u32, 5), result.grapheme_count);
|
|
1288
|
+
try testing.expectEqual(@as(u32, 5), result.columns_used);
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
test "wrap by width: ASCII wrap exactly at limit" {
|
|
1292
|
+
const result = utf8.findWrapPosByWidth("hello", 5, 4, true, .unicode);
|
|
1293
|
+
try testing.expectEqual(@as(u32, 5), result.byte_offset);
|
|
1294
|
+
try testing.expectEqual(@as(u32, 5), result.grapheme_count);
|
|
1295
|
+
try testing.expectEqual(@as(u32, 5), result.columns_used);
|
|
1296
|
+
}
|
|
1297
|
+
|
|
1298
|
+
test "wrap by width: ASCII wrap before limit" {
|
|
1299
|
+
const result = utf8.findWrapPosByWidth("hello world", 7, 4, true, .unicode);
|
|
1300
|
+
try testing.expectEqual(@as(u32, 7), result.byte_offset);
|
|
1301
|
+
try testing.expectEqual(@as(u32, 7), result.grapheme_count);
|
|
1302
|
+
try testing.expectEqual(@as(u32, 7), result.columns_used);
|
|
1303
|
+
}
|
|
1304
|
+
|
|
1305
|
+
test "wrap by width: East Asian wide char" {
|
|
1306
|
+
const result = utf8.findWrapPosByWidth("世界", 3, 4, false, .unicode);
|
|
1307
|
+
try testing.expectEqual(@as(u32, 3), result.byte_offset); // After first char
|
|
1308
|
+
try testing.expectEqual(@as(u32, 1), result.grapheme_count);
|
|
1309
|
+
try testing.expectEqual(@as(u32, 2), result.columns_used);
|
|
1310
|
+
}
|
|
1311
|
+
|
|
1312
|
+
test "wrap by width: combining mark" {
|
|
1313
|
+
const result = utf8.findWrapPosByWidth("e\u{0301}test", 3, 4, false, .unicode);
|
|
1314
|
+
try testing.expectEqual(@as(u32, 5), result.byte_offset); // After "é" (3 bytes) + "te" (2 bytes)
|
|
1315
|
+
try testing.expectEqual(@as(u32, 3), result.grapheme_count);
|
|
1316
|
+
try testing.expectEqual(@as(u32, 3), result.columns_used);
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
test "wrap by width: tab handling" {
|
|
1320
|
+
const result = utf8.findWrapPosByWidth("a\tb", 5, 4, false, .unicode);
|
|
1321
|
+
try testing.expectEqual(@as(u32, 2), result.byte_offset); // After "a\t"
|
|
1322
|
+
try testing.expectEqual(@as(u32, 2), result.grapheme_count); // 'a' + tab
|
|
1323
|
+
try testing.expectEqual(@as(u32, 5), result.columns_used); // 'a' (1) + tab (4) = 5
|
|
1324
|
+
}
|
|
1325
|
+
|
|
1326
|
+
fn testWrapByWidthMethodsMatch(input: []const u8, max_columns: u32, tab_width: u8, isASCIIOnly: bool) !void {
|
|
1327
|
+
const result = utf8.findWrapPosByWidth(input, max_columns, tab_width, isASCIIOnly, .unicode);
|
|
1328
|
+
// Since we only have SIMD16 in utf8.zig, just verify it doesn't crash
|
|
1329
|
+
_ = result;
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
test "wrap by width: consistency - realistic text" {
|
|
1333
|
+
const sample_text =
|
|
1334
|
+
"The quick brown fox jumps over the lazy dog. " ++
|
|
1335
|
+
"Lorem ipsum dolor sit amet, consectetur adipiscing elit. " ++
|
|
1336
|
+
"File paths: /usr/local/bin and C:\\Windows\\System32. " ++
|
|
1337
|
+
"Punctuation test: Hello, world! How are you? I'm fine.";
|
|
1338
|
+
|
|
1339
|
+
const widths = [_]u32{ 10, 20, 40, 80, 120 };
|
|
1340
|
+
for (widths) |w| {
|
|
1341
|
+
try testWrapByWidthMethodsMatch(sample_text, w, 4, true);
|
|
1342
|
+
}
|
|
1343
|
+
}
|
|
1344
|
+
|
|
1345
|
+
test "wrap by width: consistency - Unicode text" {
|
|
1346
|
+
const unicode_text = "世界 こんにちは test 你好 CJK-mixed";
|
|
1347
|
+
|
|
1348
|
+
const widths = [_]u32{ 5, 10, 15, 20, 30 };
|
|
1349
|
+
for (widths) |w| {
|
|
1350
|
+
try testWrapByWidthMethodsMatch(unicode_text, w, 4, false);
|
|
1351
|
+
}
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
test "wrap by width: consistency - edge cases" {
|
|
1355
|
+
const edge_cases = [_]struct { text: []const u8, ascii: bool }{
|
|
1356
|
+
.{ .text = "", .ascii = false },
|
|
1357
|
+
.{ .text = " ", .ascii = true },
|
|
1358
|
+
.{ .text = "a", .ascii = true },
|
|
1359
|
+
.{ .text = "abc", .ascii = true },
|
|
1360
|
+
.{ .text = " ", .ascii = true },
|
|
1361
|
+
.{ .text = "a b c d e", .ascii = true },
|
|
1362
|
+
.{ .text = "no-spaces-here", .ascii = true },
|
|
1363
|
+
.{ .text = "/usr/local/bin", .ascii = true },
|
|
1364
|
+
.{ .text = "世界", .ascii = false },
|
|
1365
|
+
.{ .text = "\t\t\t", .ascii = false },
|
|
1366
|
+
};
|
|
1367
|
+
|
|
1368
|
+
for (edge_cases) |input| {
|
|
1369
|
+
const widths = [_]u32{ 1, 5, 10, 20 };
|
|
1370
|
+
for (widths) |w| {
|
|
1371
|
+
try testWrapByWidthMethodsMatch(input.text, w, 4, input.ascii);
|
|
1372
|
+
}
|
|
1373
|
+
}
|
|
1374
|
+
}
|
|
1375
|
+
|
|
1376
|
+
test "wrap by width: property - random ASCII buffers" {
|
|
1377
|
+
var prng = std.Random.DefaultPrng.init(42);
|
|
1378
|
+
const random = prng.random();
|
|
1379
|
+
|
|
1380
|
+
var i: usize = 0;
|
|
1381
|
+
while (i < 50) : (i += 1) {
|
|
1382
|
+
const size = 16 + random.uintLessThan(usize, 256);
|
|
1383
|
+
const buf = try testing.allocator.alloc(u8, size);
|
|
1384
|
+
defer testing.allocator.free(buf);
|
|
1385
|
+
|
|
1386
|
+
for (buf) |*b| {
|
|
1387
|
+
b.* = 'a' + random.uintLessThan(u8, 26);
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
const width = 10 + random.uintLessThan(u32, 70);
|
|
1391
|
+
try testWrapByWidthMethodsMatch(buf, width, 4, true);
|
|
1392
|
+
}
|
|
1393
|
+
}
|
|
1394
|
+
|
|
1395
|
+
test "wrap by width: boundary - SIMD16 chunk boundary" {
|
|
1396
|
+
var buf: [32]u8 = undefined;
|
|
1397
|
+
@memset(&buf, 'x');
|
|
1398
|
+
try testWrapByWidthMethodsMatch(&buf, 20, 4, true);
|
|
1399
|
+
try testWrapByWidthMethodsMatch(&buf, 10, 4, true);
|
|
1400
|
+
}
|
|
1401
|
+
|
|
1402
|
+
test "wrap by width: boundary - Unicode at SIMD boundary" {
|
|
1403
|
+
var buf: [32]u8 = undefined;
|
|
1404
|
+
@memset(&buf, 'a');
|
|
1405
|
+
const cjk = "世";
|
|
1406
|
+
@memcpy(buf[14..17], cjk);
|
|
1407
|
+
try testWrapByWidthMethodsMatch(buf[0..20], 20, 4, false);
|
|
1408
|
+
}
|
|
1409
|
+
|
|
1410
|
+
test "wrap by width: wide emoji exactly at column boundary" {
|
|
1411
|
+
const input = "Hello 🌍 World";
|
|
1412
|
+
|
|
1413
|
+
const result7 = utf8.findWrapPosByWidth(input, 7, 8, false, .unicode);
|
|
1414
|
+
try testing.expectEqual(@as(u32, 6), result7.byte_offset);
|
|
1415
|
+
try testing.expectEqual(@as(u32, 6), result7.columns_used);
|
|
1416
|
+
|
|
1417
|
+
const result8 = utf8.findWrapPosByWidth(input, 8, 8, false, .unicode);
|
|
1418
|
+
try testing.expectEqual(@as(u32, 10), result8.byte_offset);
|
|
1419
|
+
try testing.expectEqual(@as(u32, 8), result8.columns_used);
|
|
1420
|
+
|
|
1421
|
+
const result6 = utf8.findWrapPosByWidth(input, 6, 8, false, .unicode);
|
|
1422
|
+
try testing.expectEqual(@as(u32, 6), result6.byte_offset);
|
|
1423
|
+
try testing.expectEqual(@as(u32, 6), result6.columns_used);
|
|
1424
|
+
}
|
|
1425
|
+
|
|
1426
|
+
test "wrap by width: wide emoji at start" {
|
|
1427
|
+
const input = "🌍 World";
|
|
1428
|
+
|
|
1429
|
+
const result1 = utf8.findWrapPosByWidth(input, 1, 8, false, .unicode);
|
|
1430
|
+
try testing.expectEqual(@as(u32, 0), result1.byte_offset);
|
|
1431
|
+
try testing.expectEqual(@as(u32, 0), result1.columns_used);
|
|
1432
|
+
|
|
1433
|
+
const result2 = utf8.findWrapPosByWidth(input, 2, 8, false, .unicode);
|
|
1434
|
+
try testing.expectEqual(@as(u32, 4), result2.byte_offset);
|
|
1435
|
+
try testing.expectEqual(@as(u32, 2), result2.columns_used);
|
|
1436
|
+
|
|
1437
|
+
const result3 = utf8.findWrapPosByWidth(input, 3, 8, false, .unicode);
|
|
1438
|
+
try testing.expectEqual(@as(u32, 5), result3.byte_offset);
|
|
1439
|
+
try testing.expectEqual(@as(u32, 3), result3.columns_used);
|
|
1440
|
+
}
|
|
1441
|
+
|
|
1442
|
+
test "wrap by width: multiple wide characters" {
|
|
1443
|
+
const input = "AB🌍CD🌎EF";
|
|
1444
|
+
|
|
1445
|
+
const result5 = utf8.findWrapPosByWidth(input, 5, 8, false, .unicode);
|
|
1446
|
+
try testing.expectEqual(@as(u32, 7), result5.byte_offset);
|
|
1447
|
+
try testing.expectEqual(@as(u32, 5), result5.columns_used);
|
|
1448
|
+
|
|
1449
|
+
const result6 = utf8.findWrapPosByWidth(input, 6, 8, false, .unicode);
|
|
1450
|
+
try testing.expectEqual(@as(u32, 8), result6.byte_offset);
|
|
1451
|
+
try testing.expectEqual(@as(u32, 6), result6.columns_used);
|
|
1452
|
+
}
|
|
1453
|
+
|
|
1454
|
+
test "wrap by width: CJK wide characters at boundary" {
|
|
1455
|
+
const input = "hello世界test";
|
|
1456
|
+
|
|
1457
|
+
const result6 = utf8.findWrapPosByWidth(input, 6, 8, false, .unicode);
|
|
1458
|
+
try testing.expectEqual(@as(u32, 5), result6.byte_offset);
|
|
1459
|
+
try testing.expectEqual(@as(u32, 5), result6.columns_used);
|
|
1460
|
+
|
|
1461
|
+
const result7 = utf8.findWrapPosByWidth(input, 7, 8, false, .unicode);
|
|
1462
|
+
try testing.expectEqual(@as(u32, 8), result7.byte_offset);
|
|
1463
|
+
try testing.expectEqual(@as(u32, 7), result7.columns_used);
|
|
1464
|
+
}
|
|
1465
|
+
|
|
1466
|
+
// ============================================================================
|
|
1467
|
+
// FIND POS BY WIDTH TESTS (for selection - includes graphemes that start before limit)
|
|
1468
|
+
// ============================================================================
|
|
1469
|
+
|
|
1470
|
+
test "find pos by width: wide emoji at boundary - INCLUDES grapheme" {
|
|
1471
|
+
const input = "Hello 🌍 World";
|
|
1472
|
+
// Layout: H(0) e(1) l(2) l(3) o(4) space(5) 🌍(6-7) space(8) W(9)...
|
|
1473
|
+
|
|
1474
|
+
// include_start_before=true (selection end): include graphemes that START before max_columns
|
|
1475
|
+
const result7 = utf8.findPosByWidth(input, 7, 8, false, true, .unicode);
|
|
1476
|
+
try testing.expectEqual(@as(u32, 10), result7.byte_offset); // After emoji (snapped forward)
|
|
1477
|
+
try testing.expectEqual(@as(u32, 8), result7.columns_used);
|
|
1478
|
+
|
|
1479
|
+
const result8 = utf8.findPosByWidth(input, 8, 8, false, true, .unicode);
|
|
1480
|
+
try testing.expectEqual(@as(u32, 10), result8.byte_offset);
|
|
1481
|
+
try testing.expectEqual(@as(u32, 8), result8.columns_used);
|
|
1482
|
+
|
|
1483
|
+
const result6 = utf8.findPosByWidth(input, 6, 8, false, true, .unicode);
|
|
1484
|
+
try testing.expectEqual(@as(u32, 6), result6.byte_offset);
|
|
1485
|
+
try testing.expectEqual(@as(u32, 6), result6.columns_used);
|
|
1486
|
+
|
|
1487
|
+
// include_start_before=false (selection start): exclude graphemes that cross max_columns
|
|
1488
|
+
const start7 = utf8.findPosByWidth(input, 7, 8, false, false, .unicode);
|
|
1489
|
+
try testing.expectEqual(@as(u32, 6), start7.byte_offset); // Before emoji (snapped backward)
|
|
1490
|
+
try testing.expectEqual(@as(u32, 6), start7.columns_used);
|
|
1491
|
+
}
|
|
1492
|
+
|
|
1493
|
+
test "find pos by width: start at second cell of width=2 grapheme snaps backward" {
|
|
1494
|
+
const input = "AB🌍CD";
|
|
1495
|
+
const result = utf8.findPosByWidth(input, 3, 8, false, false, .unicode);
|
|
1496
|
+
try testing.expectEqual(@as(u32, 2), result.byte_offset); // After "AB", before emoji
|
|
1497
|
+
try testing.expectEqual(@as(u32, 2), result.columns_used);
|
|
1498
|
+
}
|
|
1499
|
+
|
|
1500
|
+
test "find pos by width: end at first cell of width=2 grapheme snaps forward" {
|
|
1501
|
+
const input = "AB🌍CD";
|
|
1502
|
+
const result = utf8.findPosByWidth(input, 2, 8, false, true, .unicode);
|
|
1503
|
+
try testing.expectEqual(@as(u32, 2), result.byte_offset); // After "AB" (emoji starts at 2, which is NOT > 2, but hasn't been consumed yet)
|
|
1504
|
+
try testing.expectEqual(@as(u32, 2), result.columns_used);
|
|
1505
|
+
|
|
1506
|
+
const result3 = utf8.findPosByWidth(input, 3, 8, false, true, .unicode);
|
|
1507
|
+
try testing.expectEqual(@as(u32, 6), result3.byte_offset); // After "AB🌍"
|
|
1508
|
+
try testing.expectEqual(@as(u32, 4), result3.columns_used);
|
|
1509
|
+
}
|
|
1510
|
+
|
|
1511
|
+
test "find pos by width: selection boundaries with multiple wide chars" {
|
|
1512
|
+
const input = "A🌍B🌎C";
|
|
1513
|
+
const start2 = utf8.findPosByWidth(input, 2, 8, false, false, .unicode);
|
|
1514
|
+
try testing.expectEqual(@as(u32, 1), start2.byte_offset); // After "A", before first emoji
|
|
1515
|
+
try testing.expectEqual(@as(u32, 1), start2.columns_used);
|
|
1516
|
+
|
|
1517
|
+
const end5 = utf8.findPosByWidth(input, 5, 8, false, true, .unicode);
|
|
1518
|
+
try testing.expectEqual(@as(u32, 10), end5.byte_offset); // After "A🌍B🌎"
|
|
1519
|
+
try testing.expectEqual(@as(u32, 6), end5.columns_used);
|
|
1520
|
+
}
|
|
1521
|
+
|
|
1522
|
+
test "find pos by width: empty string" {
|
|
1523
|
+
const result = utf8.findPosByWidth("", 10, 4, false, true, .unicode);
|
|
1524
|
+
try testing.expectEqual(@as(u32, 0), result.byte_offset);
|
|
1525
|
+
try testing.expectEqual(@as(u32, 0), result.grapheme_count);
|
|
1526
|
+
try testing.expectEqual(@as(u32, 0), result.columns_used);
|
|
1527
|
+
}
|
|
1528
|
+
|
|
1529
|
+
test "find pos by width: simple ASCII no limit" {
|
|
1530
|
+
const result = utf8.findPosByWidth("hello", 10, 4, true, true, .unicode);
|
|
1531
|
+
try testing.expectEqual(@as(u32, 5), result.byte_offset);
|
|
1532
|
+
try testing.expectEqual(@as(u32, 5), result.grapheme_count);
|
|
1533
|
+
try testing.expectEqual(@as(u32, 5), result.columns_used);
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1536
|
+
test "find pos by width: ASCII exactly at limit" {
|
|
1537
|
+
const result = utf8.findPosByWidth("hello", 5, 4, true, true, .unicode);
|
|
1538
|
+
try testing.expectEqual(@as(u32, 5), result.byte_offset);
|
|
1539
|
+
try testing.expectEqual(@as(u32, 5), result.grapheme_count);
|
|
1540
|
+
try testing.expectEqual(@as(u32, 5), result.columns_used);
|
|
1541
|
+
}
|
|
1542
|
+
|
|
1543
|
+
test "find pos by width: wide emoji at start" {
|
|
1544
|
+
const input = "🌍 World";
|
|
1545
|
+
|
|
1546
|
+
const result1 = utf8.findPosByWidth(input, 1, 8, false, true, .unicode);
|
|
1547
|
+
try testing.expectEqual(@as(u32, 4), result1.byte_offset);
|
|
1548
|
+
try testing.expectEqual(@as(u32, 2), result1.columns_used);
|
|
1549
|
+
|
|
1550
|
+
const result2 = utf8.findPosByWidth(input, 2, 8, false, true, .unicode);
|
|
1551
|
+
try testing.expectEqual(@as(u32, 4), result2.byte_offset);
|
|
1552
|
+
try testing.expectEqual(@as(u32, 2), result2.columns_used);
|
|
1553
|
+
|
|
1554
|
+
const result3 = utf8.findPosByWidth(input, 3, 8, false, true, .unicode);
|
|
1555
|
+
try testing.expectEqual(@as(u32, 5), result3.byte_offset);
|
|
1556
|
+
try testing.expectEqual(@as(u32, 3), result3.columns_used);
|
|
1557
|
+
}
|
|
1558
|
+
|
|
1559
|
+
test "find pos by width: multiple wide characters" {
|
|
1560
|
+
const input = "AB🌍CD🌎EF";
|
|
1561
|
+
|
|
1562
|
+
const result5 = utf8.findPosByWidth(input, 5, 8, false, true, .unicode);
|
|
1563
|
+
try testing.expectEqual(@as(u32, 7), result5.byte_offset);
|
|
1564
|
+
try testing.expectEqual(@as(u32, 5), result5.columns_used);
|
|
1565
|
+
|
|
1566
|
+
const result7 = utf8.findPosByWidth(input, 7, 8, false, true, .unicode);
|
|
1567
|
+
try testing.expectEqual(@as(u32, 12), result7.byte_offset);
|
|
1568
|
+
try testing.expectEqual(@as(u32, 8), result7.columns_used);
|
|
1569
|
+
}
|
|
1570
|
+
|
|
1571
|
+
test "find pos by width: CJK wide characters" {
|
|
1572
|
+
const input = "hello世界test";
|
|
1573
|
+
|
|
1574
|
+
const result6 = utf8.findPosByWidth(input, 6, 8, false, true, .unicode);
|
|
1575
|
+
try testing.expectEqual(@as(u32, 8), result6.byte_offset);
|
|
1576
|
+
try testing.expectEqual(@as(u32, 7), result6.columns_used);
|
|
1577
|
+
|
|
1578
|
+
const result8 = utf8.findPosByWidth(input, 8, 8, false, true, .unicode);
|
|
1579
|
+
try testing.expectEqual(@as(u32, 11), result8.byte_offset);
|
|
1580
|
+
try testing.expectEqual(@as(u32, 9), result8.columns_used);
|
|
1581
|
+
}
|
|
1582
|
+
|
|
1583
|
+
test "eastAsianWidth: verify all characters in test string have correct width" {
|
|
1584
|
+
// Test each CJK character individually to ensure width calculation is correct
|
|
1585
|
+
|
|
1586
|
+
// Test hiragana characters from "こんにちは"
|
|
1587
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x3053)); // こ
|
|
1588
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x3093)); // ん
|
|
1589
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x306B)); // に
|
|
1590
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x3061)); // ち
|
|
1591
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x306F)); // は
|
|
1592
|
+
|
|
1593
|
+
// Test kanji characters from "世界"
|
|
1594
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x4E16)); // 世
|
|
1595
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x754C)); // 界
|
|
1596
|
+
|
|
1597
|
+
// Test emoji
|
|
1598
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x1F31F)); // 🌟
|
|
1599
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x1F680)); // 🚀
|
|
1600
|
+
|
|
1601
|
+
// Test Chinese characters from "你好"
|
|
1602
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x4F60)); // 你
|
|
1603
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0x597D)); // 好
|
|
1604
|
+
|
|
1605
|
+
// Test Korean characters from "안녕하세요"
|
|
1606
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0xC548)); // 안
|
|
1607
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0xB155)); // 녕
|
|
1608
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0xD558)); // 하
|
|
1609
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0xC138)); // 세
|
|
1610
|
+
try testing.expectEqual(@as(u32, 2), utf8.eastAsianWidth(0xC694)); // 요
|
|
1611
|
+
|
|
1612
|
+
// Test ASCII characters
|
|
1613
|
+
try testing.expectEqual(@as(u32, 1), utf8.eastAsianWidth('H'));
|
|
1614
|
+
try testing.expectEqual(@as(u32, 1), utf8.eastAsianWidth('e'));
|
|
1615
|
+
try testing.expectEqual(@as(u32, 1), utf8.eastAsianWidth(' '));
|
|
1616
|
+
try testing.expectEqual(@as(u32, 1), utf8.eastAsianWidth(':'));
|
|
1617
|
+
}
|
|
1618
|
+
|
|
1619
|
+
test "calculateTextWidth: verify CJK string widths character by character" {
|
|
1620
|
+
// Verify width of individual CJK characters
|
|
1621
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("こ", 8, false, .unicode));
|
|
1622
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("ん", 8, false, .unicode));
|
|
1623
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("に", 8, false, .unicode));
|
|
1624
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("ち", 8, false, .unicode));
|
|
1625
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("は", 8, false, .unicode));
|
|
1626
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("世", 8, false, .unicode));
|
|
1627
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("界", 8, false, .unicode));
|
|
1628
|
+
|
|
1629
|
+
// Verify cumulative widths
|
|
1630
|
+
try testing.expectEqual(@as(u32, 4), utf8.calculateTextWidth("こん", 8, false, .unicode));
|
|
1631
|
+
try testing.expectEqual(@as(u32, 6), utf8.calculateTextWidth("こんに", 8, false, .unicode));
|
|
1632
|
+
try testing.expectEqual(@as(u32, 14), utf8.calculateTextWidth("こんにちは世界", 8, false, .unicode));
|
|
1633
|
+
|
|
1634
|
+
// Verify mixed ASCII and CJK
|
|
1635
|
+
try testing.expectEqual(@as(u32, 5), utf8.calculateTextWidth("Hello", 8, true, .unicode));
|
|
1636
|
+
try testing.expectEqual(@as(u32, 6), utf8.calculateTextWidth("Hello ", 8, true, .unicode));
|
|
1637
|
+
try testing.expectEqual(@as(u32, 8), utf8.calculateTextWidth("Hello 世", 8, false, .unicode));
|
|
1638
|
+
try testing.expectEqual(@as(u32, 10), utf8.calculateTextWidth("Hello 世界", 8, false, .unicode));
|
|
1639
|
+
}
|
|
1640
|
+
|
|
1641
|
+
test "calculateTextWidth: step by step for emoji CJK test string" {
|
|
1642
|
+
// Manually verify each section
|
|
1643
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("🌟", 8, false, .unicode));
|
|
1644
|
+
try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth("🌟 ", 8, false, .unicode));
|
|
1645
|
+
try testing.expectEqual(@as(u32, 10), utf8.calculateTextWidth("🌟 Unicode", 8, false, .unicode));
|
|
1646
|
+
try testing.expectEqual(@as(u32, 11), utf8.calculateTextWidth("🌟 Unicode ", 8, false, .unicode));
|
|
1647
|
+
try testing.expectEqual(@as(u32, 15), utf8.calculateTextWidth("🌟 Unicode test", 8, false, .unicode));
|
|
1648
|
+
try testing.expectEqual(@as(u32, 16), utf8.calculateTextWidth("🌟 Unicode test:", 8, false, .unicode));
|
|
1649
|
+
try testing.expectEqual(@as(u32, 17), utf8.calculateTextWidth("🌟 Unicode test: ", 8, false, .unicode));
|
|
1650
|
+
|
|
1651
|
+
// CJK section - verify each character adds 2 columns
|
|
1652
|
+
try testing.expectEqual(@as(u32, 19), utf8.calculateTextWidth("🌟 Unicode test: こ", 8, false, .unicode));
|
|
1653
|
+
try testing.expectEqual(@as(u32, 21), utf8.calculateTextWidth("🌟 Unicode test: こん", 8, false, .unicode));
|
|
1654
|
+
try testing.expectEqual(@as(u32, 23), utf8.calculateTextWidth("🌟 Unicode test: こんに", 8, false, .unicode));
|
|
1655
|
+
try testing.expectEqual(@as(u32, 25), utf8.calculateTextWidth("🌟 Unicode test: こんにち", 8, false, .unicode));
|
|
1656
|
+
try testing.expectEqual(@as(u32, 27), utf8.calculateTextWidth("🌟 Unicode test: こんにちは", 8, false, .unicode));
|
|
1657
|
+
try testing.expectEqual(@as(u32, 29), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世", 8, false, .unicode));
|
|
1658
|
+
try testing.expectEqual(@as(u32, 31), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世界", 8, false, .unicode));
|
|
1659
|
+
try testing.expectEqual(@as(u32, 32), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世界 ", 8, false, .unicode));
|
|
1660
|
+
|
|
1661
|
+
// English section
|
|
1662
|
+
try testing.expectEqual(@as(u32, 33), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世界 H", 8, false, .unicode));
|
|
1663
|
+
try testing.expectEqual(@as(u32, 37), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世界 Hello", 8, false, .unicode));
|
|
1664
|
+
try testing.expectEqual(@as(u32, 38), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世界 Hello ", 8, false, .unicode));
|
|
1665
|
+
try testing.expectEqual(@as(u32, 43), utf8.calculateTextWidth("🌟 Unicode test: こんにちは世界 Hello World", 8, false, .unicode));
|
|
1666
|
+
}
|
|
1667
|
+
|
|
1668
|
+
test "find pos by width: CJK characters with English - verify column calculation" {
|
|
1669
|
+
// This test verifies that findPosByWidth correctly handles mixed CJK and ASCII
|
|
1670
|
+
const input = "🌟 Unicode test: こんにちは世界 Hello World 你好世界";
|
|
1671
|
+
|
|
1672
|
+
// Verify width calculations at key positions
|
|
1673
|
+
const width_before_hello = utf8.calculateTextWidth(input[0..40], 8, false, .unicode);
|
|
1674
|
+
try testing.expectEqual(@as(u32, 31), width_before_hello);
|
|
1675
|
+
|
|
1676
|
+
const width_including_space_before_hello = utf8.calculateTextWidth(input[0..41], 8, false, .unicode);
|
|
1677
|
+
try testing.expectEqual(@as(u32, 32), width_including_space_before_hello);
|
|
1678
|
+
|
|
1679
|
+
const width_up_to_hello = utf8.calculateTextWidth(input[0..46], 8, false, .unicode);
|
|
1680
|
+
try testing.expectEqual(@as(u32, 37), width_up_to_hello);
|
|
1681
|
+
|
|
1682
|
+
const width_including_hello_space = utf8.calculateTextWidth(input[0..47], 8, false, .unicode);
|
|
1683
|
+
try testing.expectEqual(@as(u32, 38), width_including_hello_space);
|
|
1684
|
+
|
|
1685
|
+
const width_up_to_world = utf8.calculateTextWidth(input[0..52], 8, false, .unicode);
|
|
1686
|
+
try testing.expectEqual(@as(u32, 43), width_up_to_world);
|
|
1687
|
+
|
|
1688
|
+
const width_including_world_space = utf8.calculateTextWidth(input[0..53], 8, false, .unicode);
|
|
1689
|
+
try testing.expectEqual(@as(u32, 44), width_including_world_space);
|
|
1690
|
+
|
|
1691
|
+
// Verify findPosByWidth returns correct positions
|
|
1692
|
+
const result35 = utf8.findPosByWidth(input, 35, 8, false, false, .unicode);
|
|
1693
|
+
try testing.expectEqual(@as(u32, 44), result35.byte_offset);
|
|
1694
|
+
try testing.expectEqual(@as(u32, 35), result35.columns_used);
|
|
1695
|
+
|
|
1696
|
+
const result36 = utf8.findPosByWidth(input, 36, 8, false, false, .unicode);
|
|
1697
|
+
try testing.expectEqual(@as(u32, 45), result36.byte_offset);
|
|
1698
|
+
try testing.expectEqual(@as(u32, 36), result36.columns_used);
|
|
1699
|
+
|
|
1700
|
+
const result37 = utf8.findPosByWidth(input, 37, 8, false, false, .unicode);
|
|
1701
|
+
try testing.expectEqual(@as(u32, 46), result37.byte_offset);
|
|
1702
|
+
try testing.expectEqual(@as(u32, 37), result37.columns_used);
|
|
1703
|
+
|
|
1704
|
+
const result42 = utf8.findPosByWidth(input, 42, 8, false, false, .unicode);
|
|
1705
|
+
try testing.expectEqual(@as(u32, 51), result42.byte_offset);
|
|
1706
|
+
try testing.expectEqual(@as(u32, 42), result42.columns_used);
|
|
1707
|
+
}
|
|
1708
|
+
|
|
1709
|
+
test "find pos by width: combining mark" {
|
|
1710
|
+
const result = utf8.findPosByWidth("e\u{0301}test", 3, 4, false, true, .unicode);
|
|
1711
|
+
try testing.expectEqual(@as(u32, 5), result.byte_offset); // After "é" (3 bytes) + "te" (2 bytes)
|
|
1712
|
+
try testing.expectEqual(@as(u32, 3), result.grapheme_count);
|
|
1713
|
+
try testing.expectEqual(@as(u32, 3), result.columns_used);
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1716
|
+
test "find pos by width: tab handling" {
|
|
1717
|
+
const result = utf8.findPosByWidth("a\tb", 5, 4, false, true, .unicode);
|
|
1718
|
+
try testing.expectEqual(@as(u32, 2), result.byte_offset); // After "a\t"
|
|
1719
|
+
try testing.expectEqual(@as(u32, 2), result.grapheme_count); // 'a' + tab
|
|
1720
|
+
try testing.expectEqual(@as(u32, 5), result.columns_used); // 'a' (1) + tab (4) = 5
|
|
1721
|
+
}
|
|
1722
|
+
|
|
1723
|
+
// ============================================================================
|
|
1724
|
+
// SPLIT CHUNK AT WEIGHT TESTS (include_start_before=false)
|
|
1725
|
+
// Tests for the exact behavior needed by splitChunkAtWeight in edit-buffer.zig
|
|
1726
|
+
// ============================================================================
|
|
1727
|
+
|
|
1728
|
+
test "split at weight: ASCII simple split" {
|
|
1729
|
+
const input = "hello world";
|
|
1730
|
+
|
|
1731
|
+
// Split at column 5 - should stop at 'h' of "hello"
|
|
1732
|
+
const result = utf8.findPosByWidth(input, 5, 8, true, false, .unicode);
|
|
1733
|
+
try testing.expectEqual(@as(u32, 5), result.byte_offset); // After "hello"
|
|
1734
|
+
try testing.expectEqual(@as(u32, 5), result.columns_used);
|
|
1735
|
+
}
|
|
1736
|
+
|
|
1737
|
+
test "split at weight: ASCII split in middle" {
|
|
1738
|
+
const input = "abcdefghij";
|
|
1739
|
+
|
|
1740
|
+
// Split at column 3
|
|
1741
|
+
const result = utf8.findPosByWidth(input, 3, 8, true, false, .unicode);
|
|
1742
|
+
try testing.expectEqual(@as(u32, 3), result.byte_offset); // After "abc"
|
|
1743
|
+
try testing.expectEqual(@as(u32, 3), result.columns_used);
|
|
1744
|
+
}
|
|
1745
|
+
|
|
1746
|
+
test "split at weight: wide char at boundary - exclude when starting after" {
|
|
1747
|
+
const input = "AB🌍CD"; // A(1) B(1) 🌍(2) C(1) D(1)
|
|
1748
|
+
|
|
1749
|
+
// Split at column 2 - should include up to B, exclude emoji
|
|
1750
|
+
const result2 = utf8.findPosByWidth(input, 2, 8, false, false, .unicode);
|
|
1751
|
+
try testing.expectEqual(@as(u32, 2), result2.byte_offset); // After "AB"
|
|
1752
|
+
try testing.expectEqual(@as(u32, 2), result2.columns_used);
|
|
1753
|
+
|
|
1754
|
+
const result3 = utf8.findPosByWidth(input, 3, 8, false, false, .unicode);
|
|
1755
|
+
try testing.expectEqual(@as(u32, 2), result3.byte_offset); // After "AB", before emoji
|
|
1756
|
+
try testing.expectEqual(@as(u32, 2), result3.columns_used);
|
|
1757
|
+
}
|
|
1758
|
+
|
|
1759
|
+
test "split at weight: CJK characters" {
|
|
1760
|
+
const input = "hello世界test"; // h(1) e(1) l(1) l(1) o(1) 世(2) 界(2) t(1) e(1) s(1) t(1)
|
|
1761
|
+
|
|
1762
|
+
// Split at column 5 - after "hello"
|
|
1763
|
+
const result5 = utf8.findPosByWidth(input, 5, 8, false, false, .unicode);
|
|
1764
|
+
try testing.expectEqual(@as(u32, 5), result5.byte_offset);
|
|
1765
|
+
try testing.expectEqual(@as(u32, 5), result5.columns_used);
|
|
1766
|
+
|
|
1767
|
+
const result6 = utf8.findPosByWidth(input, 6, 8, false, false, .unicode);
|
|
1768
|
+
try testing.expectEqual(@as(u32, 5), result6.byte_offset); // After "hello", before 世
|
|
1769
|
+
try testing.expectEqual(@as(u32, 5), result6.columns_used);
|
|
1770
|
+
|
|
1771
|
+
// Split at column 9 - should include both CJK chars
|
|
1772
|
+
const result9 = utf8.findPosByWidth(input, 9, 8, false, false, .unicode);
|
|
1773
|
+
try testing.expectEqual(@as(u32, 11), result9.byte_offset); // After "hello世界"
|
|
1774
|
+
try testing.expectEqual(@as(u32, 9), result9.columns_used);
|
|
1775
|
+
}
|
|
1776
|
+
|
|
1777
|
+
test "split at weight: combining marks" {
|
|
1778
|
+
const input = "cafe\u{0301}test"; // c(1) a(1) f(1) é(1) t(1) e(1) s(1) t(1)
|
|
1779
|
+
|
|
1780
|
+
// Split at column 4 - should include the combining mark with 'e'
|
|
1781
|
+
const result4 = utf8.findPosByWidth(input, 4, 8, false, false, .unicode);
|
|
1782
|
+
try testing.expectEqual(@as(u32, 6), result4.byte_offset); // After "café" (5 bytes: cafe + combining accent)
|
|
1783
|
+
try testing.expectEqual(@as(u32, 4), result4.columns_used);
|
|
1784
|
+
}
|
|
1785
|
+
|
|
1786
|
+
test "split at weight: emoji with skin tone" {
|
|
1787
|
+
const input = "Hi👋🏿Bye"; // H(1) i(1) 👋🏿(wide) B(1) y(1) e(1)
|
|
1788
|
+
|
|
1789
|
+
// Split at column 2 - should stop before or after emoji depending on where it starts
|
|
1790
|
+
const result2 = utf8.findPosByWidth(input, 2, 8, false, false, .unicode);
|
|
1791
|
+
try testing.expectEqual(@as(u32, 2), result2.byte_offset); // After "Hi"
|
|
1792
|
+
try testing.expectEqual(@as(u32, 2), result2.columns_used);
|
|
1793
|
+
|
|
1794
|
+
// Split at column 5 - should include emoji
|
|
1795
|
+
const result5 = utf8.findPosByWidth(input, 5, 8, false, false, .unicode);
|
|
1796
|
+
// Result will stop at first grapheme that starts >= max_columns
|
|
1797
|
+
// Just verify it returns a reasonable offset
|
|
1798
|
+
try testing.expect(result5.byte_offset >= 2); // At least past "Hi"
|
|
1799
|
+
try testing.expect(result5.columns_used >= 2); // At least 2 columns
|
|
1800
|
+
}
|
|
1801
|
+
|
|
1802
|
+
test "split at weight: zero width at start" {
|
|
1803
|
+
const input = "hello";
|
|
1804
|
+
|
|
1805
|
+
// Split at column 0 - should return offset 0
|
|
1806
|
+
const result = utf8.findPosByWidth(input, 0, 8, true, false, .unicode);
|
|
1807
|
+
try testing.expectEqual(@as(u32, 0), result.byte_offset);
|
|
1808
|
+
try testing.expectEqual(@as(u32, 0), result.columns_used);
|
|
1809
|
+
}
|
|
1810
|
+
|
|
1811
|
+
test "split at weight: beyond end" {
|
|
1812
|
+
const input = "hello"; // 5 columns
|
|
1813
|
+
|
|
1814
|
+
// Split at column 10 - should return entire string
|
|
1815
|
+
const result = utf8.findPosByWidth(input, 10, 8, true, false, .unicode);
|
|
1816
|
+
try testing.expectEqual(@as(u32, 5), result.byte_offset);
|
|
1817
|
+
try testing.expectEqual(@as(u32, 5), result.columns_used);
|
|
1818
|
+
}
|
|
1819
|
+
|
|
1820
|
+
test "split at weight: tab character" {
|
|
1821
|
+
const input = "a\tbc"; // a(1) tab(4 fixed) b(1) c(1) = 7 columns total
|
|
1822
|
+
|
|
1823
|
+
// Split at column 4 - should stop before tab since it would exceed limit
|
|
1824
|
+
const result4 = utf8.findPosByWidth(input, 4, 4, false, false, .unicode);
|
|
1825
|
+
try testing.expectEqual(@as(u32, 1), result4.byte_offset); // After "a"
|
|
1826
|
+
try testing.expectEqual(@as(u32, 1), result4.columns_used); // a(1)
|
|
1827
|
+
}
|
|
1828
|
+
|
|
1829
|
+
test "split at weight: complex mixed content" {
|
|
1830
|
+
const input = "A🌍B世C"; // A(1) 🌍(2) B(1) 世(2) C(1) = 7 columns total
|
|
1831
|
+
const r1 = utf8.findPosByWidth(input, 1, 8, false, false, .unicode);
|
|
1832
|
+
try testing.expectEqual(@as(u32, 1), r1.byte_offset); // After "A"
|
|
1833
|
+
|
|
1834
|
+
const r2 = utf8.findPosByWidth(input, 2, 8, false, false, .unicode);
|
|
1835
|
+
try testing.expectEqual(@as(u32, 1), r2.byte_offset); // After "A"
|
|
1836
|
+
|
|
1837
|
+
const r3 = utf8.findPosByWidth(input, 3, 8, false, false, .unicode);
|
|
1838
|
+
try testing.expectEqual(@as(u32, 5), r3.byte_offset); // After "A🌍"
|
|
1839
|
+
|
|
1840
|
+
const r4 = utf8.findPosByWidth(input, 4, 8, false, false, .unicode);
|
|
1841
|
+
try testing.expectEqual(@as(u32, 6), r4.byte_offset); // After "A🌍B"
|
|
1842
|
+
|
|
1843
|
+
const r5 = utf8.findPosByWidth(input, 5, 8, false, false, .unicode);
|
|
1844
|
+
try testing.expectEqual(@as(u32, 6), r5.byte_offset); // After "A🌍B"
|
|
1845
|
+
}
|
|
1846
|
+
|
|
1847
|
+
// ============================================================================
|
|
1848
|
+
// GET WIDTH AT TESTS
|
|
1849
|
+
// ============================================================================
|
|
1850
|
+
|
|
1851
|
+
test "getWidthAt: empty string" {
|
|
1852
|
+
const result = utf8.getWidthAt("", 0, 8, .unicode);
|
|
1853
|
+
try testing.expectEqual(@as(u32, 0), result);
|
|
1854
|
+
}
|
|
1855
|
+
|
|
1856
|
+
test "getWidthAt: out of bounds" {
|
|
1857
|
+
const result = utf8.getWidthAt("hello", 10, 8, .unicode);
|
|
1858
|
+
try testing.expectEqual(@as(u32, 0), result);
|
|
1859
|
+
}
|
|
1860
|
+
|
|
1861
|
+
test "getWidthAt: simple ASCII" {
|
|
1862
|
+
const text = "hello";
|
|
1863
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'h'
|
|
1864
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 1, 8, .unicode)); // 'e'
|
|
1865
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 4, 8, .unicode)); // 'o'
|
|
1866
|
+
}
|
|
1867
|
+
|
|
1868
|
+
test "getWidthAt: tab character" {
|
|
1869
|
+
const text = "a\tb";
|
|
1870
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 4, .unicode)); // 'a'
|
|
1871
|
+
try testing.expectEqual(@as(u32, 4), utf8.getWidthAt(text, 1, 4, .unicode)); // tab fixed width 4
|
|
1872
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 2, 4, .unicode)); // 'b'
|
|
1873
|
+
}
|
|
1874
|
+
|
|
1875
|
+
test "getWidthAt: tab at different columns" {
|
|
1876
|
+
const text = "\t";
|
|
1877
|
+
// Tab now has fixed width regardless of current_column
|
|
1878
|
+
try testing.expectEqual(@as(u32, 4), utf8.getWidthAt(text, 0, 4, .unicode)); // Tab fixed width 4
|
|
1879
|
+
try testing.expectEqual(@as(u32, 4), utf8.getWidthAt(text, 0, 4, .unicode)); // Tab fixed width 4
|
|
1880
|
+
try testing.expectEqual(@as(u32, 4), utf8.getWidthAt(text, 0, 4, .unicode)); // Tab fixed width 4
|
|
1881
|
+
try testing.expectEqual(@as(u32, 4), utf8.getWidthAt(text, 0, 4, .unicode)); // Tab fixed width 4
|
|
1882
|
+
try testing.expectEqual(@as(u32, 4), utf8.getWidthAt(text, 0, 4, .unicode)); // Tab fixed width 4
|
|
1883
|
+
}
|
|
1884
|
+
|
|
1885
|
+
test "getWidthAt: CJK wide character" {
|
|
1886
|
+
const text = "世界";
|
|
1887
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 0, 8, .unicode)); // '世' (3 bytes)
|
|
1888
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 3, 8, .unicode)); // '界' (3 bytes)
|
|
1889
|
+
}
|
|
1890
|
+
|
|
1891
|
+
test "getWidthAt: emoji single width" {
|
|
1892
|
+
const text = "🌍";
|
|
1893
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 0, 8, .unicode)); // emoji
|
|
1894
|
+
}
|
|
1895
|
+
|
|
1896
|
+
test "getWidthAt: combining mark grapheme" {
|
|
1897
|
+
const text = "cafe\u{0301}"; // é with combining acute accent
|
|
1898
|
+
const width = utf8.getWidthAt(text, 3, 8, .unicode); // At 'e' (which has combining mark after)
|
|
1899
|
+
try testing.expectEqual(@as(u32, 1), width); // 'e' width 1 + combining mark width 0 = 1
|
|
1900
|
+
}
|
|
1901
|
+
|
|
1902
|
+
test "getWidthAt: emoji with skin tone" {
|
|
1903
|
+
const text = "👋🏿"; // Wave + dark skin tone modifier
|
|
1904
|
+
const width = utf8.getWidthAt(text, 0, 8, .unicode);
|
|
1905
|
+
try testing.expectEqual(@as(u32, 2), width); // Single grapheme cluster, width 2
|
|
1906
|
+
}
|
|
1907
|
+
|
|
1908
|
+
test "getWidthAt: emoji with ZWJ" {
|
|
1909
|
+
const text = "👩🚀"; // Woman astronaut (woman + ZWJ + rocket)
|
|
1910
|
+
const width = utf8.getWidthAt(text, 0, 8, .unicode);
|
|
1911
|
+
try testing.expectEqual(@as(u32, 2), width); // Single grapheme cluster, width 2
|
|
1912
|
+
}
|
|
1913
|
+
|
|
1914
|
+
test "getWidthAt: flag emoji" {
|
|
1915
|
+
const text = "🇺🇸"; // US flag (two regional indicators)
|
|
1916
|
+
const width = utf8.getWidthAt(text, 0, 8, .unicode);
|
|
1917
|
+
try testing.expectEqual(@as(u32, 2), width); // Entire grapheme cluster
|
|
1918
|
+
}
|
|
1919
|
+
|
|
1920
|
+
test "getWidthAt: mixed ASCII and CJK" {
|
|
1921
|
+
const text = "Hello世界";
|
|
1922
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'H'
|
|
1923
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 1, 8, .unicode)); // 'e'
|
|
1924
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 5, 8, .unicode)); // '世'
|
|
1925
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 8, 8, .unicode)); // '界'
|
|
1926
|
+
}
|
|
1927
|
+
|
|
1928
|
+
test "getWidthAt: emoji with VS16 selector" {
|
|
1929
|
+
const text = "❤️"; // Heart + VS16 selector
|
|
1930
|
+
const width = utf8.getWidthAt(text, 0, 8, .unicode);
|
|
1931
|
+
try testing.expectEqual(@as(u32, 2), width); // Single grapheme cluster, width 2
|
|
1932
|
+
}
|
|
1933
|
+
|
|
1934
|
+
test "getWidthAt: hiragana" {
|
|
1935
|
+
const text = "こんにちは";
|
|
1936
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 0, 8, .unicode)); // 'こ'
|
|
1937
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 3, 8, .unicode)); // 'ん'
|
|
1938
|
+
}
|
|
1939
|
+
|
|
1940
|
+
test "getWidthAt: katakana" {
|
|
1941
|
+
const text = "カタカナ";
|
|
1942
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 0, 8, .unicode)); // 'カ'
|
|
1943
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 3, 8, .unicode)); // 'タ'
|
|
1944
|
+
}
|
|
1945
|
+
|
|
1946
|
+
test "getWidthAt: fullwidth forms" {
|
|
1947
|
+
const text = "ABC"; // Fullwidth A, B, C
|
|
1948
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 0, 8, .unicode)); // Fullwidth 'A'
|
|
1949
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 3, 8, .unicode)); // Fullwidth 'B'
|
|
1950
|
+
}
|
|
1951
|
+
|
|
1952
|
+
test "getWidthAt: zero width at start of string" {
|
|
1953
|
+
const text = "a\u{0301}bc"; // a + combining accent + bc
|
|
1954
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'a' + combining = 1
|
|
1955
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 3, 8, .unicode)); // 'b'
|
|
1956
|
+
}
|
|
1957
|
+
|
|
1958
|
+
test "getWidthAt: control characters" {
|
|
1959
|
+
const text = "a\x00b";
|
|
1960
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'a'
|
|
1961
|
+
try testing.expectEqual(@as(u32, 0), utf8.getWidthAt(text, 1, 8, .unicode)); // null
|
|
1962
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 2, 8, .unicode)); // 'b'
|
|
1963
|
+
}
|
|
1964
|
+
|
|
1965
|
+
test "getWidthAt: multiple combining marks" {
|
|
1966
|
+
const text = "e\u{0301}\u{0302}"; // e + acute + circumflex
|
|
1967
|
+
const width = utf8.getWidthAt(text, 0, 8, .unicode);
|
|
1968
|
+
try testing.expectEqual(@as(u32, 1), width); // All combining marks part of one grapheme
|
|
1969
|
+
}
|
|
1970
|
+
|
|
1971
|
+
test "getWidthAt: at exact end boundary" {
|
|
1972
|
+
const text = "hello";
|
|
1973
|
+
const width = utf8.getWidthAt(text, 5, 8, .unicode); // At index 5 (past end)
|
|
1974
|
+
try testing.expectEqual(@as(u32, 0), width);
|
|
1975
|
+
}
|
|
1976
|
+
|
|
1977
|
+
test "getWidthAt: realistic mixed content" {
|
|
1978
|
+
const text = "Hello 世界! 👋";
|
|
1979
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'H'
|
|
1980
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 5, 8, .unicode)); // ' '
|
|
1981
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 6, 8, .unicode)); // '世'
|
|
1982
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 9, 8, .unicode)); // '界'
|
|
1983
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 12, 8, .unicode)); // '!'
|
|
1984
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 13, 8, .unicode)); // ' '
|
|
1985
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 14, 8, .unicode)); // emoji
|
|
1986
|
+
}
|
|
1987
|
+
|
|
1988
|
+
test "getWidthAt: grapheme at SIMD boundary" {
|
|
1989
|
+
var buf: [32]u8 = undefined;
|
|
1990
|
+
@memset(&buf, 'x');
|
|
1991
|
+
const cjk = "世";
|
|
1992
|
+
@memcpy(buf[14..17], cjk); // Place CJK char near boundary
|
|
1993
|
+
|
|
1994
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(&buf, 13, 8, .unicode)); // 'x'
|
|
1995
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(&buf, 14, 8, .unicode)); // '世'
|
|
1996
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(&buf, 17, 8, .unicode)); // 'x'
|
|
1997
|
+
}
|
|
1998
|
+
|
|
1999
|
+
test "getWidthAt: incomplete UTF-8 at end" {
|
|
2000
|
+
const text = "abc\xC3"; // Incomplete 2-byte sequence
|
|
2001
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'a'
|
|
2002
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 3, 8, .unicode)); // Incomplete, returns 1 for error
|
|
2003
|
+
}
|
|
2004
|
+
|
|
2005
|
+
test "getWidthAt: random positions in realistic text" {
|
|
2006
|
+
const text = "The quick brown 🦊 jumps over the lazy 犬";
|
|
2007
|
+
|
|
2008
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 0, 8, .unicode)); // 'T'
|
|
2009
|
+
try testing.expectEqual(@as(u32, 1), utf8.getWidthAt(text, 10, 8, .unicode)); // 'b'
|
|
2010
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 16, 8, .unicode)); // fox emoji
|
|
2011
|
+
try testing.expectEqual(@as(u32, 2), utf8.getWidthAt(text, 41, 8, .unicode)); // '犬' (dog)
|
|
2012
|
+
}
|
|
2013
|
+
|
|
2014
|
+
// ============================================================================
|
|
2015
|
+
// GET PREV GRAPHEME START TESTS
|
|
2016
|
+
// ============================================================================
|
|
2017
|
+
|
|
2018
|
+
test "getPrevGraphemeStart: at start" {
|
|
2019
|
+
const text = "hello";
|
|
2020
|
+
const result = utf8.getPrevGraphemeStart(text, 0, 8, .unicode);
|
|
2021
|
+
try testing.expect(result == null);
|
|
2022
|
+
}
|
|
2023
|
+
|
|
2024
|
+
test "getPrevGraphemeStart: empty string" {
|
|
2025
|
+
const result = utf8.getPrevGraphemeStart("", 0, 8, .unicode);
|
|
2026
|
+
try testing.expect(result == null);
|
|
2027
|
+
}
|
|
2028
|
+
|
|
2029
|
+
test "getPrevGraphemeStart: out of bounds" {
|
|
2030
|
+
const text = "hello";
|
|
2031
|
+
const result = utf8.getPrevGraphemeStart(text, 100, 8, .unicode);
|
|
2032
|
+
try testing.expect(result == null);
|
|
2033
|
+
}
|
|
2034
|
+
|
|
2035
|
+
test "getPrevGraphemeStart: simple ASCII" {
|
|
2036
|
+
const text = "hello";
|
|
2037
|
+
|
|
2038
|
+
const r1 = utf8.getPrevGraphemeStart(text, 1, 8, .unicode);
|
|
2039
|
+
try testing.expect(r1 != null);
|
|
2040
|
+
try testing.expectEqual(@as(usize, 0), r1.?.start_offset);
|
|
2041
|
+
try testing.expectEqual(@as(u32, 1), r1.?.width);
|
|
2042
|
+
|
|
2043
|
+
const r2 = utf8.getPrevGraphemeStart(text, 2, 8, .unicode);
|
|
2044
|
+
try testing.expect(r2 != null);
|
|
2045
|
+
try testing.expectEqual(@as(usize, 1), r2.?.start_offset);
|
|
2046
|
+
try testing.expectEqual(@as(u32, 1), r2.?.width);
|
|
2047
|
+
|
|
2048
|
+
const r5 = utf8.getPrevGraphemeStart(text, 5, 8, .unicode);
|
|
2049
|
+
try testing.expect(r5 != null);
|
|
2050
|
+
try testing.expectEqual(@as(usize, 4), r5.?.start_offset);
|
|
2051
|
+
try testing.expectEqual(@as(u32, 1), r5.?.width);
|
|
2052
|
+
}
|
|
2053
|
+
|
|
2054
|
+
test "getPrevGraphemeStart: CJK wide character" {
|
|
2055
|
+
const text = "a世界";
|
|
2056
|
+
|
|
2057
|
+
const r1 = utf8.getPrevGraphemeStart(text, 1, 8, .unicode);
|
|
2058
|
+
try testing.expect(r1 != null);
|
|
2059
|
+
try testing.expectEqual(@as(usize, 0), r1.?.start_offset);
|
|
2060
|
+
try testing.expectEqual(@as(u32, 1), r1.?.width);
|
|
2061
|
+
|
|
2062
|
+
const r4 = utf8.getPrevGraphemeStart(text, 4, 8, .unicode);
|
|
2063
|
+
try testing.expect(r4 != null);
|
|
2064
|
+
try testing.expectEqual(@as(usize, 1), r4.?.start_offset);
|
|
2065
|
+
try testing.expectEqual(@as(u32, 2), r4.?.width);
|
|
2066
|
+
|
|
2067
|
+
const r7 = utf8.getPrevGraphemeStart(text, 7, 8, .unicode);
|
|
2068
|
+
try testing.expect(r7 != null);
|
|
2069
|
+
try testing.expectEqual(@as(usize, 4), r7.?.start_offset);
|
|
2070
|
+
try testing.expectEqual(@as(u32, 2), r7.?.width);
|
|
2071
|
+
}
|
|
2072
|
+
|
|
2073
|
+
test "getPrevGraphemeStart: combining mark" {
|
|
2074
|
+
const text = "cafe\u{0301}"; // café with combining acute
|
|
2075
|
+
|
|
2076
|
+
const r6 = utf8.getPrevGraphemeStart(text, 6, 8, .unicode);
|
|
2077
|
+
try testing.expect(r6 != null);
|
|
2078
|
+
try testing.expectEqual(@as(usize, 3), r6.?.start_offset);
|
|
2079
|
+
try testing.expectEqual(@as(u32, 1), r6.?.width);
|
|
2080
|
+
}
|
|
2081
|
+
|
|
2082
|
+
test "getPrevGraphemeStart: emoji with skin tone" {
|
|
2083
|
+
const text = "Hi👋🏿";
|
|
2084
|
+
|
|
2085
|
+
const r2 = utf8.getPrevGraphemeStart(text, 2, 8, .unicode);
|
|
2086
|
+
try testing.expect(r2 != null);
|
|
2087
|
+
try testing.expectEqual(@as(usize, 1), r2.?.start_offset);
|
|
2088
|
+
try testing.expectEqual(@as(u32, 1), r2.?.width);
|
|
2089
|
+
|
|
2090
|
+
const r_end = utf8.getPrevGraphemeStart(text, text.len, 8, .unicode);
|
|
2091
|
+
try testing.expect(r_end != null);
|
|
2092
|
+
try testing.expectEqual(@as(usize, 2), r_end.?.start_offset);
|
|
2093
|
+
}
|
|
2094
|
+
|
|
2095
|
+
test "getPrevGraphemeStart: emoji with ZWJ" {
|
|
2096
|
+
const text = "a👩🚀"; // a + woman astronaut
|
|
2097
|
+
|
|
2098
|
+
const r1 = utf8.getPrevGraphemeStart(text, 1, 8, .unicode);
|
|
2099
|
+
try testing.expect(r1 != null);
|
|
2100
|
+
try testing.expectEqual(@as(usize, 0), r1.?.start_offset);
|
|
2101
|
+
try testing.expectEqual(@as(u32, 1), r1.?.width);
|
|
2102
|
+
|
|
2103
|
+
const r_end = utf8.getPrevGraphemeStart(text, text.len, 8, .unicode);
|
|
2104
|
+
try testing.expect(r_end != null);
|
|
2105
|
+
try testing.expectEqual(@as(usize, 1), r_end.?.start_offset);
|
|
2106
|
+
}
|
|
2107
|
+
|
|
2108
|
+
test "getPrevGraphemeStart: flag emoji" {
|
|
2109
|
+
const text = "US🇺🇸";
|
|
2110
|
+
|
|
2111
|
+
const r_end = utf8.getPrevGraphemeStart(text, text.len, 8, .unicode);
|
|
2112
|
+
try testing.expect(r_end != null);
|
|
2113
|
+
try testing.expectEqual(@as(usize, 2), r_end.?.start_offset);
|
|
2114
|
+
}
|
|
2115
|
+
|
|
2116
|
+
test "getPrevGraphemeStart: tab handling" {
|
|
2117
|
+
const text = "a\tb";
|
|
2118
|
+
|
|
2119
|
+
const r2 = utf8.getPrevGraphemeStart(text, 2, 4, .unicode);
|
|
2120
|
+
try testing.expect(r2 != null);
|
|
2121
|
+
try testing.expectEqual(@as(usize, 1), r2.?.start_offset);
|
|
2122
|
+
|
|
2123
|
+
const r1 = utf8.getPrevGraphemeStart(text, 1, 4, .unicode);
|
|
2124
|
+
try testing.expect(r1 != null);
|
|
2125
|
+
try testing.expectEqual(@as(usize, 0), r1.?.start_offset);
|
|
2126
|
+
try testing.expectEqual(@as(u32, 1), r1.?.width);
|
|
2127
|
+
}
|
|
2128
|
+
|
|
2129
|
+
test "getPrevGraphemeStart: mixed content" {
|
|
2130
|
+
const text = "Hi世界!";
|
|
2131
|
+
|
|
2132
|
+
const r2 = utf8.getPrevGraphemeStart(text, 2, 8, .unicode);
|
|
2133
|
+
try testing.expect(r2 != null);
|
|
2134
|
+
try testing.expectEqual(@as(usize, 1), r2.?.start_offset);
|
|
2135
|
+
|
|
2136
|
+
const r5 = utf8.getPrevGraphemeStart(text, 5, 8, .unicode);
|
|
2137
|
+
try testing.expect(r5 != null);
|
|
2138
|
+
try testing.expectEqual(@as(usize, 2), r5.?.start_offset);
|
|
2139
|
+
try testing.expectEqual(@as(u32, 2), r5.?.width);
|
|
2140
|
+
|
|
2141
|
+
const r8 = utf8.getPrevGraphemeStart(text, 8, 8, .unicode);
|
|
2142
|
+
try testing.expect(r8 != null);
|
|
2143
|
+
try testing.expectEqual(@as(usize, 5), r8.?.start_offset);
|
|
2144
|
+
try testing.expectEqual(@as(u32, 2), r8.?.width);
|
|
2145
|
+
}
|
|
2146
|
+
|
|
2147
|
+
test "getPrevGraphemeStart: multiple combining marks" {
|
|
2148
|
+
const text = "e\u{0301}\u{0302}x"; // e + acute + circumflex + x
|
|
2149
|
+
|
|
2150
|
+
const r_x = utf8.getPrevGraphemeStart(text, text.len, 8, .unicode);
|
|
2151
|
+
try testing.expect(r_x != null);
|
|
2152
|
+
try testing.expectEqual(@as(usize, text.len - 1), r_x.?.start_offset);
|
|
2153
|
+
|
|
2154
|
+
const r_e = utf8.getPrevGraphemeStart(text, text.len - 1, 8, .unicode);
|
|
2155
|
+
try testing.expect(r_e != null);
|
|
2156
|
+
try testing.expectEqual(@as(usize, 0), r_e.?.start_offset);
|
|
2157
|
+
try testing.expectEqual(@as(u32, 1), r_e.?.width);
|
|
2158
|
+
}
|
|
2159
|
+
|
|
2160
|
+
test "getPrevGraphemeStart: hiragana" {
|
|
2161
|
+
const text = "こんにちは";
|
|
2162
|
+
|
|
2163
|
+
const r_last = utf8.getPrevGraphemeStart(text, text.len, 8, .unicode);
|
|
2164
|
+
try testing.expect(r_last != null);
|
|
2165
|
+
try testing.expectEqual(@as(usize, 12), r_last.?.start_offset);
|
|
2166
|
+
try testing.expectEqual(@as(u32, 2), r_last.?.width);
|
|
2167
|
+
}
|
|
2168
|
+
|
|
2169
|
+
test "getPrevGraphemeStart: realistic scenario" {
|
|
2170
|
+
const text = "Hello 世界! 👋";
|
|
2171
|
+
|
|
2172
|
+
const r_end = utf8.getPrevGraphemeStart(text, text.len, 8, .unicode);
|
|
2173
|
+
try testing.expect(r_end != null);
|
|
2174
|
+
try testing.expectEqual(@as(usize, 14), r_end.?.start_offset);
|
|
2175
|
+
|
|
2176
|
+
const r_space = utf8.getPrevGraphemeStart(text, 14, 8, .unicode);
|
|
2177
|
+
try testing.expect(r_space != null);
|
|
2178
|
+
try testing.expectEqual(@as(usize, 13), r_space.?.start_offset);
|
|
2179
|
+
try testing.expectEqual(@as(u32, 1), r_space.?.width);
|
|
2180
|
+
}
|
|
2181
|
+
|
|
2182
|
+
test "getPrevGraphemeStart: consecutive wide chars" {
|
|
2183
|
+
const text = "世界中";
|
|
2184
|
+
|
|
2185
|
+
const r9 = utf8.getPrevGraphemeStart(text, 9, 8, .unicode);
|
|
2186
|
+
try testing.expect(r9 != null);
|
|
2187
|
+
try testing.expectEqual(@as(usize, 6), r9.?.start_offset);
|
|
2188
|
+
try testing.expectEqual(@as(u32, 2), r9.?.width);
|
|
2189
|
+
|
|
2190
|
+
const r6 = utf8.getPrevGraphemeStart(text, 6, 8, .unicode);
|
|
2191
|
+
try testing.expect(r6 != null);
|
|
2192
|
+
try testing.expectEqual(@as(usize, 3), r6.?.start_offset);
|
|
2193
|
+
try testing.expectEqual(@as(u32, 2), r6.?.width);
|
|
2194
|
+
|
|
2195
|
+
const r3 = utf8.getPrevGraphemeStart(text, 3, 8, .unicode);
|
|
2196
|
+
try testing.expect(r3 != null);
|
|
2197
|
+
try testing.expectEqual(@as(usize, 0), r3.?.start_offset);
|
|
2198
|
+
try testing.expectEqual(@as(u32, 2), r3.?.width);
|
|
2199
|
+
}
|
|
2200
|
+
|
|
2201
|
+
// ============================================================================
|
|
2202
|
+
// CALCULATE TEXT WIDTH TESTS (static tab width)
|
|
2203
|
+
// ============================================================================
|
|
2204
|
+
|
|
2205
|
+
test "calculateTextWidth: empty string" {
|
|
2206
|
+
const result = utf8.calculateTextWidth("", 4, false, .unicode);
|
|
2207
|
+
try testing.expectEqual(@as(u32, 0), result);
|
|
2208
|
+
}
|
|
2209
|
+
|
|
2210
|
+
test "calculateTextWidth: simple ASCII" {
|
|
2211
|
+
const result = utf8.calculateTextWidth("hello", 4, true, .unicode);
|
|
2212
|
+
try testing.expectEqual(@as(u32, 5), result);
|
|
2213
|
+
}
|
|
2214
|
+
|
|
2215
|
+
test "calculateTextWidth: single tab" {
|
|
2216
|
+
const result = utf8.calculateTextWidth("\t", 4, false, .unicode);
|
|
2217
|
+
try testing.expectEqual(@as(u32, 4), result);
|
|
2218
|
+
}
|
|
2219
|
+
|
|
2220
|
+
test "calculateTextWidth: tab with different widths" {
|
|
2221
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("\t", 2, false, .unicode));
|
|
2222
|
+
try testing.expectEqual(@as(u32, 4), utf8.calculateTextWidth("\t", 4, false, .unicode));
|
|
2223
|
+
try testing.expectEqual(@as(u32, 8), utf8.calculateTextWidth("\t", 8, false, .unicode));
|
|
2224
|
+
}
|
|
2225
|
+
|
|
2226
|
+
test "calculateTextWidth: multiple tabs" {
|
|
2227
|
+
const result = utf8.calculateTextWidth("\t\t\t", 4, false, .unicode);
|
|
2228
|
+
try testing.expectEqual(@as(u32, 12), result); // 3 tabs * 4 = 12
|
|
2229
|
+
}
|
|
2230
|
+
|
|
2231
|
+
test "calculateTextWidth: text with tabs" {
|
|
2232
|
+
const result = utf8.calculateTextWidth("a\tb", 4, false, .unicode);
|
|
2233
|
+
try testing.expectEqual(@as(u32, 6), result); // a(1) + tab(4) + b(1) = 6
|
|
2234
|
+
}
|
|
2235
|
+
|
|
2236
|
+
test "calculateTextWidth: multiple tabs between text" {
|
|
2237
|
+
const result = utf8.calculateTextWidth("a\t\tb", 2, false, .unicode);
|
|
2238
|
+
try testing.expectEqual(@as(u32, 6), result); // a(1) + tab(2) + tab(2) + b(1) = 6
|
|
2239
|
+
}
|
|
2240
|
+
|
|
2241
|
+
test "calculateTextWidth: tab at start" {
|
|
2242
|
+
const result = utf8.calculateTextWidth("\tabc", 4, false, .unicode);
|
|
2243
|
+
try testing.expectEqual(@as(u32, 7), result); // tab(4) + a(1) + b(1) + c(1) = 7
|
|
2244
|
+
}
|
|
2245
|
+
|
|
2246
|
+
test "calculateTextWidth: tab at end" {
|
|
2247
|
+
const result = utf8.calculateTextWidth("abc\t", 4, false, .unicode);
|
|
2248
|
+
try testing.expectEqual(@as(u32, 7), result); // a(1) + b(1) + c(1) + tab(4) = 7
|
|
2249
|
+
}
|
|
2250
|
+
|
|
2251
|
+
test "calculateTextWidth: CJK with tabs" {
|
|
2252
|
+
const result = utf8.calculateTextWidth("世\t界", 4, false, .unicode);
|
|
2253
|
+
try testing.expectEqual(@as(u32, 8), result); // 世(2) + tab(4) + 界(2) = 8
|
|
2254
|
+
}
|
|
2255
|
+
|
|
2256
|
+
test "calculateTextWidth: emoji with tab" {
|
|
2257
|
+
const result = utf8.calculateTextWidth("🌍\t", 4, false, .unicode);
|
|
2258
|
+
try testing.expectEqual(@as(u32, 6), result); // emoji(2) + tab(4) = 6
|
|
2259
|
+
}
|
|
2260
|
+
|
|
2261
|
+
test "calculateTextWidth: mixed ASCII and Unicode with tabs" {
|
|
2262
|
+
const result = utf8.calculateTextWidth("hello\t世界", 4, false, .unicode);
|
|
2263
|
+
try testing.expectEqual(@as(u32, 13), result); // hello(5) + tab(4) + 世(2) + 界(2) = 13
|
|
2264
|
+
}
|
|
2265
|
+
|
|
2266
|
+
test "calculateTextWidth: realistic code with tabs" {
|
|
2267
|
+
const text = "\tif (x > 5) {\n\t\treturn true;\n\t}";
|
|
2268
|
+
const result = utf8.calculateTextWidth(text, 2, false, .unicode);
|
|
2269
|
+
// tab(2) + "if (x > 5) {" (12) + newline(0) + tab(2) + tab(2) + "return true;" (12) + newline(0) + tab(2) + "}" (1)
|
|
2270
|
+
// = 2 + 12 + 2 + 2 + 12 + 2 + 1 = 33
|
|
2271
|
+
try testing.expectEqual(@as(u32, 33), result);
|
|
2272
|
+
}
|
|
2273
|
+
|
|
2274
|
+
test "calculateTextWidth: only spaces" {
|
|
2275
|
+
const result = utf8.calculateTextWidth(" ", 4, true, .unicode);
|
|
2276
|
+
try testing.expectEqual(@as(u32, 5), result);
|
|
2277
|
+
}
|
|
2278
|
+
|
|
2279
|
+
test "calculateTextWidth: tabs and spaces mixed" {
|
|
2280
|
+
const result = utf8.calculateTextWidth(" \t \t ", 4, false, .unicode);
|
|
2281
|
+
try testing.expectEqual(@as(u32, 14), result); // 2 + 4 + 2 + 4 + 2 = 14
|
|
2282
|
+
}
|
|
2283
|
+
|
|
2284
|
+
test "calculateTextWidth: control characters" {
|
|
2285
|
+
const result = utf8.calculateTextWidth("a\x00b\x1Fc", 4, false, .unicode);
|
|
2286
|
+
try testing.expectEqual(@as(u32, 3), result); // Only printable chars: a, b, c
|
|
2287
|
+
}
|
|
2288
|
+
|
|
2289
|
+
test "calculateTextWidth: combining marks" {
|
|
2290
|
+
const result = utf8.calculateTextWidth("cafe\u{0301}", 4, false, .unicode);
|
|
2291
|
+
try testing.expectEqual(@as(u32, 4), result); // c(1) + a(1) + f(1) + e(1) + combining(0) = 4
|
|
2292
|
+
}
|
|
2293
|
+
|
|
2294
|
+
test "calculateTextWidth: scroll book and writing emojis width 2" {
|
|
2295
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("📜", 4, false, .unicode));
|
|
2296
|
+
}
|
|
2297
|
+
|
|
2298
|
+
test "calculateTextWidth: Devanagari नमस्ते width 4" {
|
|
2299
|
+
const result = utf8.calculateTextWidth("नमस्ते", 4, false, .unicode);
|
|
2300
|
+
try testing.expectEqual(@as(u32, 4), result);
|
|
2301
|
+
}
|
|
2302
|
+
|
|
2303
|
+
// ============================================================================
|
|
2304
|
+
// UNICODE WARNING SIGNS WIDTH TESTS
|
|
2305
|
+
// ============================================================================
|
|
2306
|
+
|
|
2307
|
+
test "calculateTextWidth: U+26A0 warning sign should be width 2" {
|
|
2308
|
+
const result = utf8.calculateTextWidth("⚠", 4, false, .unicode);
|
|
2309
|
+
try testing.expectEqual(@as(u32, 2), result);
|
|
2310
|
+
}
|
|
2311
|
+
|
|
2312
|
+
test "calculateTextWidth: U+2049 exclamation question mark should be width 2" {
|
|
2313
|
+
const result = utf8.calculateTextWidth("⁉", 4, false, .unicode);
|
|
2314
|
+
try testing.expectEqual(@as(u32, 2), result);
|
|
2315
|
+
}
|
|
2316
|
+
|
|
2317
|
+
test "calculateTextWidth: U+203C double exclamation mark should be width 2" {
|
|
2318
|
+
const result = utf8.calculateTextWidth("‼", 4, false, .unicode);
|
|
2319
|
+
try testing.expectEqual(@as(u32, 2), result);
|
|
2320
|
+
}
|
|
2321
|
+
|
|
2322
|
+
test "calculateTextWidth: U+26D1 rescue worker helmet should be width 2" {
|
|
2323
|
+
const result = utf8.calculateTextWidth("⛑", 4, false, .unicode);
|
|
2324
|
+
try testing.expectEqual(@as(u32, 2), result);
|
|
2325
|
+
}
|
|
2326
|
+
|
|
2327
|
+
test "calculateTextWidth: U+2622 radioactive sign should be width 2" {
|
|
2328
|
+
const result = utf8.calculateTextWidth("☢", 4, false, .unicode);
|
|
2329
|
+
try testing.expectEqual(@as(u32, 2), result);
|
|
2330
|
+
}
|
|
2331
|
+
|
|
2332
|
+
test "calculateTextWidth: U+2623 biohazard sign should be width 2" {
|
|
2333
|
+
const result = utf8.calculateTextWidth("☣", 4, false, .unicode);
|
|
2334
|
+
try testing.expectEqual(@as(u32, 2), result);
|
|
2335
|
+
}
|
|
2336
|
+
|
|
2337
|
+
test "calculateTextWidth: U+269B atom symbol should be width 2" {
|
|
2338
|
+
const result = utf8.calculateTextWidth("⚛", 4, false, .unicode);
|
|
2339
|
+
try testing.expectEqual(@as(u32, 2), result);
|
|
2340
|
+
}
|
|
2341
|
+
|
|
2342
|
+
// ============================================================================
|
|
2343
|
+
// GRAPHEME INFO TESTS (for caching multi-byte graphemes and tabs)
|
|
2344
|
+
// ============================================================================
|
|
2345
|
+
|
|
2346
|
+
test "findGraphemeInfo: empty string" {
|
|
2347
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2348
|
+
defer result.deinit(testing.allocator);
|
|
2349
|
+
|
|
2350
|
+
try utf8.findGraphemeInfo("", 4, false, .unicode, testing.allocator, &result);
|
|
2351
|
+
try testing.expectEqual(@as(usize, 0), result.items.len);
|
|
2352
|
+
}
|
|
2353
|
+
|
|
2354
|
+
test "findGraphemeInfo: ASCII-only returns empty" {
|
|
2355
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2356
|
+
defer result.deinit(testing.allocator);
|
|
2357
|
+
|
|
2358
|
+
try utf8.findGraphemeInfo("hello world", 4, true, .unicode, testing.allocator, &result);
|
|
2359
|
+
try testing.expectEqual(@as(usize, 0), result.items.len);
|
|
2360
|
+
}
|
|
2361
|
+
|
|
2362
|
+
test "findGraphemeInfo: ASCII with tab" {
|
|
2363
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2364
|
+
defer result.deinit(testing.allocator);
|
|
2365
|
+
|
|
2366
|
+
try utf8.findGraphemeInfo("hello\tworld", 4, false, .unicode, testing.allocator, &result);
|
|
2367
|
+
|
|
2368
|
+
// Should have one entry for the tab
|
|
2369
|
+
try testing.expectEqual(@as(usize, 1), result.items.len);
|
|
2370
|
+
try testing.expectEqual(@as(u32, 5), result.items[0].byte_offset);
|
|
2371
|
+
try testing.expectEqual(@as(u8, 1), result.items[0].byte_len);
|
|
2372
|
+
try testing.expectEqual(@as(u8, 4), result.items[0].width);
|
|
2373
|
+
try testing.expectEqual(@as(u32, 5), result.items[0].col_offset);
|
|
2374
|
+
}
|
|
2375
|
+
|
|
2376
|
+
test "findGraphemeInfo: multiple tabs" {
|
|
2377
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2378
|
+
defer result.deinit(testing.allocator);
|
|
2379
|
+
|
|
2380
|
+
try utf8.findGraphemeInfo("a\tb\tc", 4, false, .unicode, testing.allocator, &result);
|
|
2381
|
+
|
|
2382
|
+
// Should have two entries for the tabs
|
|
2383
|
+
try testing.expectEqual(@as(usize, 2), result.items.len);
|
|
2384
|
+
|
|
2385
|
+
// First tab at byte 1, col 1
|
|
2386
|
+
try testing.expectEqual(@as(u32, 1), result.items[0].byte_offset);
|
|
2387
|
+
try testing.expectEqual(@as(u8, 1), result.items[0].byte_len);
|
|
2388
|
+
try testing.expectEqual(@as(u8, 4), result.items[0].width);
|
|
2389
|
+
try testing.expectEqual(@as(u32, 1), result.items[0].col_offset);
|
|
2390
|
+
|
|
2391
|
+
// Second tab at byte 3, col 6 (1 + 4 + 1)
|
|
2392
|
+
try testing.expectEqual(@as(u32, 3), result.items[1].byte_offset);
|
|
2393
|
+
try testing.expectEqual(@as(u8, 1), result.items[1].byte_len);
|
|
2394
|
+
try testing.expectEqual(@as(u8, 4), result.items[1].width);
|
|
2395
|
+
try testing.expectEqual(@as(u32, 6), result.items[1].col_offset);
|
|
2396
|
+
}
|
|
2397
|
+
|
|
2398
|
+
test "findGraphemeInfo: CJK characters" {
|
|
2399
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2400
|
+
defer result.deinit(testing.allocator);
|
|
2401
|
+
|
|
2402
|
+
const text = "hello世界";
|
|
2403
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
2404
|
+
|
|
2405
|
+
// Should have two entries for the CJK characters
|
|
2406
|
+
try testing.expectEqual(@as(usize, 2), result.items.len);
|
|
2407
|
+
|
|
2408
|
+
// 世 at byte 5
|
|
2409
|
+
try testing.expectEqual(@as(u32, 5), result.items[0].byte_offset);
|
|
2410
|
+
try testing.expectEqual(@as(u8, 3), result.items[0].byte_len);
|
|
2411
|
+
try testing.expectEqual(@as(u8, 2), result.items[0].width);
|
|
2412
|
+
try testing.expectEqual(@as(u32, 5), result.items[0].col_offset);
|
|
2413
|
+
|
|
2414
|
+
// 界 at byte 8
|
|
2415
|
+
try testing.expectEqual(@as(u32, 8), result.items[1].byte_offset);
|
|
2416
|
+
try testing.expectEqual(@as(u8, 3), result.items[1].byte_len);
|
|
2417
|
+
try testing.expectEqual(@as(u8, 2), result.items[1].width);
|
|
2418
|
+
try testing.expectEqual(@as(u32, 7), result.items[1].col_offset);
|
|
2419
|
+
}
|
|
2420
|
+
|
|
2421
|
+
test "findGraphemeInfo: emoji with skin tone" {
|
|
2422
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2423
|
+
defer result.deinit(testing.allocator);
|
|
2424
|
+
|
|
2425
|
+
const text = "Hi👋🏿Bye"; // Hi + wave + dark skin tone + Bye
|
|
2426
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
2427
|
+
|
|
2428
|
+
// Should have one entry for the emoji cluster
|
|
2429
|
+
try testing.expectEqual(@as(usize, 1), result.items.len);
|
|
2430
|
+
|
|
2431
|
+
try testing.expectEqual(@as(u32, 2), result.items[0].byte_offset);
|
|
2432
|
+
try testing.expectEqual(@as(u8, 8), result.items[0].byte_len); // 4 + 4 bytes
|
|
2433
|
+
try testing.expectEqual(@as(u8, 2), result.items[0].width);
|
|
2434
|
+
try testing.expectEqual(@as(u32, 2), result.items[0].col_offset);
|
|
2435
|
+
}
|
|
2436
|
+
|
|
2437
|
+
test "findGraphemeInfo: emoji with ZWJ" {
|
|
2438
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2439
|
+
defer result.deinit(testing.allocator);
|
|
2440
|
+
|
|
2441
|
+
const text = "a👩🚀b"; // a + woman astronaut + b
|
|
2442
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
2443
|
+
|
|
2444
|
+
// Should have one entry for the emoji cluster
|
|
2445
|
+
try testing.expectEqual(@as(usize, 1), result.items.len);
|
|
2446
|
+
|
|
2447
|
+
try testing.expectEqual(@as(u32, 1), result.items[0].byte_offset);
|
|
2448
|
+
try testing.expectEqual(@as(u8, 2), result.items[0].width);
|
|
2449
|
+
try testing.expectEqual(@as(u32, 1), result.items[0].col_offset);
|
|
2450
|
+
}
|
|
2451
|
+
|
|
2452
|
+
test "findGraphemeInfo: combining mark" {
|
|
2453
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2454
|
+
defer result.deinit(testing.allocator);
|
|
2455
|
+
|
|
2456
|
+
const text = "cafe\u{0301}"; // café with combining acute
|
|
2457
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
2458
|
+
|
|
2459
|
+
// Should have one entry for e + combining mark
|
|
2460
|
+
try testing.expectEqual(@as(usize, 1), result.items.len);
|
|
2461
|
+
|
|
2462
|
+
try testing.expectEqual(@as(u32, 3), result.items[0].byte_offset); // 'e' position
|
|
2463
|
+
try testing.expectEqual(@as(u8, 3), result.items[0].byte_len); // e (1 byte) + combining (2 bytes)
|
|
2464
|
+
try testing.expectEqual(@as(u8, 1), result.items[0].width);
|
|
2465
|
+
try testing.expectEqual(@as(u32, 3), result.items[0].col_offset);
|
|
2466
|
+
}
|
|
2467
|
+
|
|
2468
|
+
test "findGraphemeInfo: flag emoji" {
|
|
2469
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2470
|
+
defer result.deinit(testing.allocator);
|
|
2471
|
+
|
|
2472
|
+
const text = "US🇺🇸"; // US + flag
|
|
2473
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
2474
|
+
|
|
2475
|
+
// Should have one entry for the flag (two regional indicators)
|
|
2476
|
+
try testing.expectEqual(@as(usize, 1), result.items.len);
|
|
2477
|
+
|
|
2478
|
+
try testing.expectEqual(@as(u32, 2), result.items[0].byte_offset);
|
|
2479
|
+
try testing.expectEqual(@as(u8, 8), result.items[0].byte_len); // Two 4-byte chars
|
|
2480
|
+
try testing.expectEqual(@as(u8, 2), result.items[0].width);
|
|
2481
|
+
try testing.expectEqual(@as(u32, 2), result.items[0].col_offset);
|
|
2482
|
+
}
|
|
2483
|
+
|
|
2484
|
+
test "findGraphemeInfo: mixed content" {
|
|
2485
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2486
|
+
defer result.deinit(testing.allocator);
|
|
2487
|
+
|
|
2488
|
+
const text = "Hi\t世界!"; // Hi + tab + CJK + !
|
|
2489
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
2490
|
+
|
|
2491
|
+
// Should have three entries: tab, 世, 界
|
|
2492
|
+
try testing.expectEqual(@as(usize, 3), result.items.len);
|
|
2493
|
+
|
|
2494
|
+
// Tab at byte 2, col 2
|
|
2495
|
+
try testing.expectEqual(@as(u32, 2), result.items[0].byte_offset);
|
|
2496
|
+
try testing.expectEqual(@as(u8, 1), result.items[0].byte_len);
|
|
2497
|
+
try testing.expectEqual(@as(u8, 4), result.items[0].width);
|
|
2498
|
+
try testing.expectEqual(@as(u32, 2), result.items[0].col_offset);
|
|
2499
|
+
|
|
2500
|
+
// 世 at byte 3, col 6
|
|
2501
|
+
try testing.expectEqual(@as(u32, 3), result.items[1].byte_offset);
|
|
2502
|
+
try testing.expectEqual(@as(u8, 3), result.items[1].byte_len);
|
|
2503
|
+
try testing.expectEqual(@as(u8, 2), result.items[1].width);
|
|
2504
|
+
try testing.expectEqual(@as(u32, 6), result.items[1].col_offset);
|
|
2505
|
+
|
|
2506
|
+
// 界 at byte 6, col 8
|
|
2507
|
+
try testing.expectEqual(@as(u32, 6), result.items[2].byte_offset);
|
|
2508
|
+
try testing.expectEqual(@as(u8, 3), result.items[2].byte_len);
|
|
2509
|
+
try testing.expectEqual(@as(u8, 2), result.items[2].width);
|
|
2510
|
+
try testing.expectEqual(@as(u32, 8), result.items[2].col_offset);
|
|
2511
|
+
}
|
|
2512
|
+
|
|
2513
|
+
test "findGraphemeInfo: only ASCII letters no cache" {
|
|
2514
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2515
|
+
defer result.deinit(testing.allocator);
|
|
2516
|
+
|
|
2517
|
+
try utf8.findGraphemeInfo("abcdefghij", 4, false, .unicode, testing.allocator, &result);
|
|
2518
|
+
|
|
2519
|
+
// No special characters, should be empty
|
|
2520
|
+
try testing.expectEqual(@as(usize, 0), result.items.len);
|
|
2521
|
+
}
|
|
2522
|
+
|
|
2523
|
+
test "findGraphemeInfo: emoji with VS16" {
|
|
2524
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2525
|
+
defer result.deinit(testing.allocator);
|
|
2526
|
+
|
|
2527
|
+
const text = "I ❤️ U"; // I + space + heart + VS16 + space + U
|
|
2528
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
2529
|
+
|
|
2530
|
+
// Should have one entry for the emoji cluster
|
|
2531
|
+
try testing.expectEqual(@as(usize, 1), result.items.len);
|
|
2532
|
+
|
|
2533
|
+
try testing.expectEqual(@as(u32, 2), result.items[0].byte_offset);
|
|
2534
|
+
try testing.expectEqual(@as(u8, 2), result.items[0].width);
|
|
2535
|
+
try testing.expectEqual(@as(u32, 2), result.items[0].col_offset);
|
|
2536
|
+
}
|
|
2537
|
+
|
|
2538
|
+
test "findGraphemeInfo: realistic text" {
|
|
2539
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2540
|
+
defer result.deinit(testing.allocator);
|
|
2541
|
+
|
|
2542
|
+
const text = "function test() {\n\tconst 世界 = 10;\n}";
|
|
2543
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
2544
|
+
|
|
2545
|
+
// Should have entries for: tab, 世, 界
|
|
2546
|
+
try testing.expectEqual(@as(usize, 3), result.items.len);
|
|
2547
|
+
}
|
|
2548
|
+
|
|
2549
|
+
test "findGraphemeInfo: hiragana" {
|
|
2550
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2551
|
+
defer result.deinit(testing.allocator);
|
|
2552
|
+
|
|
2553
|
+
const text = "こんにちは";
|
|
2554
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
2555
|
+
|
|
2556
|
+
// Should have 5 entries (each hiragana is 3 bytes, width 2)
|
|
2557
|
+
try testing.expectEqual(@as(usize, 5), result.items.len);
|
|
2558
|
+
|
|
2559
|
+
// Check first character
|
|
2560
|
+
try testing.expectEqual(@as(u32, 0), result.items[0].byte_offset);
|
|
2561
|
+
try testing.expectEqual(@as(u8, 3), result.items[0].byte_len);
|
|
2562
|
+
try testing.expectEqual(@as(u8, 2), result.items[0].width);
|
|
2563
|
+
}
|
|
2564
|
+
|
|
2565
|
+
test "findGraphemeInfo: at SIMD boundary" {
|
|
2566
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
2567
|
+
defer result.deinit(testing.allocator);
|
|
2568
|
+
|
|
2569
|
+
// Create text with multibyte char near SIMD boundary (16 bytes)
|
|
2570
|
+
var buf: [32]u8 = undefined;
|
|
2571
|
+
@memset(&buf, 'x');
|
|
2572
|
+
const cjk = "世";
|
|
2573
|
+
@memcpy(buf[14..17], cjk); // Place CJK char at boundary
|
|
2574
|
+
|
|
2575
|
+
try utf8.findGraphemeInfo(&buf, 4, false, .unicode, testing.allocator, &result);
|
|
2576
|
+
|
|
2577
|
+
// Should find the CJK character
|
|
2578
|
+
var found = false;
|
|
2579
|
+
for (result.items) |g| {
|
|
2580
|
+
if (g.byte_offset == 14) {
|
|
2581
|
+
found = true;
|
|
2582
|
+
try testing.expectEqual(@as(u8, 3), g.byte_len);
|
|
2583
|
+
try testing.expectEqual(@as(u8, 2), g.width);
|
|
2584
|
+
break;
|
|
2585
|
+
}
|
|
2586
|
+
}
|
|
2587
|
+
try testing.expect(found);
|
|
2588
|
+
}
|
|
2589
|
+
|
|
2590
|
+
test "calculateTextWidth: book and writing hand emojis width 2" {
|
|
2591
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("📖", 4, false, .unicode));
|
|
2592
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth("✍️", 4, false, .unicode));
|
|
2593
|
+
}
|
|
2594
|
+
|
|
2595
|
+
test "calculateTextWidth: Devanagari script" {
|
|
2596
|
+
const result = utf8.calculateTextWidth("देवनागरी", 4, false, .unicode);
|
|
2597
|
+
try testing.expectEqual(@as(u32, 5), result);
|
|
2598
|
+
try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth("प्रथम", 4, false, .unicode));
|
|
2599
|
+
}
|
|
2600
|
+
|
|
2601
|
+
test "calculateTextWidth: checkmark symbol" {
|
|
2602
|
+
const result = utf8.calculateTextWidth("✓", 4, false, .unicode);
|
|
2603
|
+
try testing.expectEqual(@as(u32, 1), result);
|
|
2604
|
+
}
|
|
2605
|
+
|
|
2606
|
+
test "calculateTextWidth: emoji with skin tone" {
|
|
2607
|
+
const result = utf8.calculateTextWidth("👋🏿", 4, false, .unicode);
|
|
2608
|
+
try testing.expectEqual(@as(u32, 2), result); // 👋🏿 is a single grapheme with width 2
|
|
2609
|
+
}
|
|
2610
|
+
|
|
2611
|
+
test "calculateTextWidth: emoji with ZWJ" {
|
|
2612
|
+
const result = utf8.calculateTextWidth("👩🚀", 4, false, .unicode);
|
|
2613
|
+
try testing.expectEqual(@as(u32, 2), result); // 👩🚀 is a single grapheme with width 2
|
|
2614
|
+
}
|
|
2615
|
+
|
|
2616
|
+
test "calculateTextWidth: emoji with VS16 selector" {
|
|
2617
|
+
const result = utf8.calculateTextWidth("❤️", 4, false, .unicode);
|
|
2618
|
+
try testing.expectEqual(@as(u32, 2), result); // ❤️ (heart + VS16) is a single grapheme with width 2
|
|
2619
|
+
}
|
|
2620
|
+
|
|
2621
|
+
test "calculateTextWidth: flag emoji" {
|
|
2622
|
+
const result = utf8.calculateTextWidth("🇺🇸", 4, false, .unicode);
|
|
2623
|
+
try testing.expectEqual(@as(u32, 2), result); // 🇺🇸 is a single grapheme with width 2
|
|
2624
|
+
}
|
|
2625
|
+
|
|
2626
|
+
test "calculateTextWidth: hiragana with tab" {
|
|
2627
|
+
const result = utf8.calculateTextWidth("こん\tにちは", 4, false, .unicode);
|
|
2628
|
+
try testing.expectEqual(@as(u32, 14), result); // こ(2) + ん(2) + tab(4) + に(2) + ち(2) + は(2) = 14
|
|
2629
|
+
}
|
|
2630
|
+
|
|
2631
|
+
test "calculateTextWidth: fullwidth forms with tab" {
|
|
2632
|
+
const result = utf8.calculateTextWidth("AB\tC", 4, false, .unicode);
|
|
2633
|
+
try testing.expectEqual(@as(u32, 10), result); // A(2) + B(2) + tab(4) + C(2) = 10
|
|
2634
|
+
}
|
|
2635
|
+
|
|
2636
|
+
test "calculateTextWidth: ASCII fast path consistency" {
|
|
2637
|
+
const text_ascii = "hello world";
|
|
2638
|
+
const result_fast = utf8.calculateTextWidth(text_ascii, 4, true, .unicode);
|
|
2639
|
+
const result_slow = utf8.calculateTextWidth(text_ascii, 4, false, .unicode);
|
|
2640
|
+
try testing.expectEqual(result_fast, result_slow);
|
|
2641
|
+
}
|
|
2642
|
+
|
|
2643
|
+
test "calculateTextWidth: large text with many tabs" {
|
|
2644
|
+
const size = 1000;
|
|
2645
|
+
const buf = try testing.allocator.alloc(u8, size);
|
|
2646
|
+
defer testing.allocator.free(buf);
|
|
2647
|
+
|
|
2648
|
+
var expected: u32 = 0;
|
|
2649
|
+
for (buf, 0..) |*b, i| {
|
|
2650
|
+
if (i % 10 == 0) {
|
|
2651
|
+
b.* = '\t';
|
|
2652
|
+
expected += 4;
|
|
2653
|
+
} else {
|
|
2654
|
+
b.* = 'a';
|
|
2655
|
+
expected += 1;
|
|
2656
|
+
}
|
|
2657
|
+
}
|
|
2658
|
+
|
|
2659
|
+
const result = utf8.calculateTextWidth(buf, 4, false, .unicode);
|
|
2660
|
+
try testing.expectEqual(expected, result);
|
|
2661
|
+
}
|
|
2662
|
+
|
|
2663
|
+
test "calculateTextWidth: comparison with manual calculation" {
|
|
2664
|
+
const test_cases = [_]struct {
|
|
2665
|
+
text: []const u8,
|
|
2666
|
+
tab_width: u8,
|
|
2667
|
+
expected: u32,
|
|
2668
|
+
}{
|
|
2669
|
+
.{ .text = "\t", .tab_width = 2, .expected = 2 },
|
|
2670
|
+
.{ .text = "\t\t", .tab_width = 2, .expected = 4 },
|
|
2671
|
+
.{ .text = "a\t", .tab_width = 2, .expected = 3 },
|
|
2672
|
+
.{ .text = "\ta", .tab_width = 2, .expected = 3 },
|
|
2673
|
+
.{ .text = "a\tb", .tab_width = 2, .expected = 4 },
|
|
2674
|
+
.{ .text = "ab\tcd", .tab_width = 4, .expected = 8 },
|
|
2675
|
+
.{ .text = "\t\tx", .tab_width = 2, .expected = 5 },
|
|
2676
|
+
.{ .text = "世\t界", .tab_width = 2, .expected = 6 },
|
|
2677
|
+
};
|
|
2678
|
+
|
|
2679
|
+
for (test_cases) |tc| {
|
|
2680
|
+
const result = utf8.calculateTextWidth(tc.text, tc.tab_width, false, .unicode);
|
|
2681
|
+
try testing.expectEqual(tc.expected, result);
|
|
2682
|
+
}
|
|
2683
|
+
}
|
|
2684
|
+
|
|
2685
|
+
// ============================================================================
|
|
2686
|
+
// LINE WIDTH WITH GRAPHEMES TESTS
|
|
2687
|
+
// Testing that calculateTextWidth returns correct Unicode display widths
|
|
2688
|
+
// ============================================================================
|
|
2689
|
+
|
|
2690
|
+
test "calculateTextWidth: checkmark grapheme ✅" {
|
|
2691
|
+
// Test simple checkmark emoji
|
|
2692
|
+
const checkmark = "✅";
|
|
2693
|
+
|
|
2694
|
+
// Calculate width using utf8.zig's calculateTextWidth
|
|
2695
|
+
const width = utf8.calculateTextWidth(checkmark, 4, false, .unicode);
|
|
2696
|
+
|
|
2697
|
+
// The checkmark ✅ (U+2705) should be width 2
|
|
2698
|
+
try testing.expectEqual(@as(u32, 2), width);
|
|
2699
|
+
}
|
|
2700
|
+
|
|
2701
|
+
test "calculateTextWidth: Sanskrit text with combining marks" {
|
|
2702
|
+
const result = utf8.calculateTextWidth("संस्कृति", 4, false, .unicode);
|
|
2703
|
+
try testing.expectEqual(@as(u32, 4), result);
|
|
2704
|
+
}
|
|
2705
|
+
|
|
2706
|
+
test "calculateTextWidth: checkmark in text" {
|
|
2707
|
+
// Test checkmark in context
|
|
2708
|
+
const text = "Done ✅";
|
|
2709
|
+
|
|
2710
|
+
// Calculate width using utf8.zig
|
|
2711
|
+
const width = utf8.calculateTextWidth(text, 4, false, .unicode);
|
|
2712
|
+
|
|
2713
|
+
// Should return: D(1) + o(1) + n(1) + e(1) + space(1) + ✅(2) = 7
|
|
2714
|
+
try testing.expectEqual(@as(u32, 7), width);
|
|
2715
|
+
}
|
|
2716
|
+
|
|
2717
|
+
test "calculateTextWidth: various emoji graphemes" {
|
|
2718
|
+
const test_cases = [_]struct {
|
|
2719
|
+
text: []const u8,
|
|
2720
|
+
name: []const u8,
|
|
2721
|
+
expected_width: u32,
|
|
2722
|
+
}{
|
|
2723
|
+
.{ .text = "✅", .name = "checkmark U+2705", .expected_width = 2 },
|
|
2724
|
+
.{ .text = "❤️", .name = "red heart U+2764+FE0F", .expected_width = 2 },
|
|
2725
|
+
.{ .text = "🎉", .name = "party popper U+1F389", .expected_width = 2 },
|
|
2726
|
+
.{ .text = "🔥", .name = "fire U+1F525", .expected_width = 2 },
|
|
2727
|
+
.{ .text = "💯", .name = "hundred points U+1F4AF", .expected_width = 2 },
|
|
2728
|
+
.{ .text = "🚀", .name = "rocket U+1F680", .expected_width = 2 },
|
|
2729
|
+
.{ .text = "⭐", .name = "star U+2B50", .expected_width = 2 },
|
|
2730
|
+
.{ .text = "👍", .name = "thumbs up U+1F44D", .expected_width = 2 },
|
|
2731
|
+
};
|
|
2732
|
+
|
|
2733
|
+
for (test_cases) |tc| {
|
|
2734
|
+
const width = utf8.calculateTextWidth(tc.text, 4, false, .unicode);
|
|
2735
|
+
try testing.expectEqual(tc.expected_width, width);
|
|
2736
|
+
}
|
|
2737
|
+
}
|
|
2738
|
+
|
|
2739
|
+
test "calculateTextWidth: complex graphemes with ZWJ" {
|
|
2740
|
+
// Woman astronaut: 👩🚀 (woman + ZWJ + rocket)
|
|
2741
|
+
const woman_astronaut = "👩🚀";
|
|
2742
|
+
|
|
2743
|
+
const width = utf8.calculateTextWidth(woman_astronaut, 4, false, .unicode);
|
|
2744
|
+
|
|
2745
|
+
// Should return 2 for the combined grapheme (not 5 for individual codepoints)
|
|
2746
|
+
try testing.expectEqual(@as(u32, 2), width);
|
|
2747
|
+
}
|
|
2748
|
+
|
|
2749
|
+
test "calculateTextWidth: flag emoji grapheme" {
|
|
2750
|
+
// US flag: 🇺🇸 (two regional indicator symbols)
|
|
2751
|
+
const us_flag = "🇺🇸";
|
|
2752
|
+
|
|
2753
|
+
const width = utf8.calculateTextWidth(us_flag, 4, false, .unicode);
|
|
2754
|
+
|
|
2755
|
+
// Should return 2 for the flag grapheme
|
|
2756
|
+
try testing.expectEqual(@as(u32, 2), width);
|
|
2757
|
+
}
|
|
2758
|
+
|
|
2759
|
+
test "calculateTextWidth: skin tone modifier grapheme" {
|
|
2760
|
+
// Waving hand with dark skin tone: 👋🏿
|
|
2761
|
+
const wave_dark = "👋🏿";
|
|
2762
|
+
|
|
2763
|
+
const width = utf8.calculateTextWidth(wave_dark, 4, false, .unicode);
|
|
2764
|
+
|
|
2765
|
+
// Should return 2 for the combined grapheme (not 4 for individual codepoints)
|
|
2766
|
+
try testing.expectEqual(@as(u32, 2), width);
|
|
2767
|
+
}
|
|
2768
|
+
// ============================================================================
|
|
2769
|
+
// COMPREHENSIVE UNICODE GRAPHEME TESTS FOR calculateTextWidth
|
|
2770
|
+
// Testing various emoji, ZWJ sequences, Indic scripts, and Unicode edge cases
|
|
2771
|
+
// ============================================================================
|
|
2772
|
+
|
|
2773
|
+
// ----------------------------------------------------------------------------
|
|
2774
|
+
// Emoji Presentation Tests
|
|
2775
|
+
// ----------------------------------------------------------------------------
|
|
2776
|
+
|
|
2777
|
+
test "calculateTextWidth: emoji presentation with VS15 (text)" {
|
|
2778
|
+
// U+2764 (heart) + U+FE0E (VS15 - text presentation)
|
|
2779
|
+
const heart_text = "❤\u{FE0E}";
|
|
2780
|
+
const width = utf8.calculateTextWidth(heart_text, 4, false, .unicode);
|
|
2781
|
+
// With text presentation selector, should still be counted as grapheme width 2
|
|
2782
|
+
try testing.expectEqual(@as(u32, 2), width);
|
|
2783
|
+
}
|
|
2784
|
+
|
|
2785
|
+
test "calculateTextWidth: emoji presentation with VS16 (emoji)" {
|
|
2786
|
+
// U+2764 (heart) + U+FE0F (VS16 - emoji presentation) - already tested as ❤️
|
|
2787
|
+
const heart_emoji = "❤️";
|
|
2788
|
+
const width = utf8.calculateTextWidth(heart_emoji, 4, false, .unicode);
|
|
2789
|
+
try testing.expectEqual(@as(u32, 2), width);
|
|
2790
|
+
}
|
|
2791
|
+
|
|
2792
|
+
test "calculateTextWidth: keycap sequences" {
|
|
2793
|
+
// Digit + U+FE0F + U+20E3 (combining enclosing keycap)
|
|
2794
|
+
const keycap_1 = "1️⃣"; // U+0031 U+FE0F U+20E3
|
|
2795
|
+
const keycap_hash = "#️⃣"; // U+0023 U+FE0F U+20E3
|
|
2796
|
+
|
|
2797
|
+
// Keycap: base char (1) + VS16 (changes to emoji presentation, width 2) + combining keycap (0) = 2 total width
|
|
2798
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(keycap_1, 4, false, .unicode));
|
|
2799
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(keycap_hash, 4, false, .unicode));
|
|
2800
|
+
}
|
|
2801
|
+
|
|
2802
|
+
// ----------------------------------------------------------------------------
|
|
2803
|
+
// Complex ZWJ Sequences
|
|
2804
|
+
// ----------------------------------------------------------------------------
|
|
2805
|
+
|
|
2806
|
+
test "calculateTextWidth: family ZWJ sequences" {
|
|
2807
|
+
// Family: man, woman, girl, boy (4 people)
|
|
2808
|
+
const family = "👨👩👧👦"; // man + ZWJ + woman + ZWJ + girl + ZWJ + boy
|
|
2809
|
+
const width = utf8.calculateTextWidth(family, 4, false, .unicode);
|
|
2810
|
+
// Should be counted as single grapheme with width 2
|
|
2811
|
+
try testing.expectEqual(@as(u32, 2), width);
|
|
2812
|
+
}
|
|
2813
|
+
|
|
2814
|
+
test "calculateTextWidth: profession ZWJ sequences" {
|
|
2815
|
+
// Woman health worker: woman + ZWJ + health worker
|
|
2816
|
+
const health_worker = "👩⚕️";
|
|
2817
|
+
const firefighter = "👨🚒";
|
|
2818
|
+
const teacher = "👩🏫";
|
|
2819
|
+
|
|
2820
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(health_worker, 4, false, .unicode));
|
|
2821
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(firefighter, 4, false, .unicode));
|
|
2822
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(teacher, 4, false, .unicode));
|
|
2823
|
+
}
|
|
2824
|
+
|
|
2825
|
+
test "calculateTextWidth: couple ZWJ sequences" {
|
|
2826
|
+
// Kiss: person + ZWJ + heart + ZWJ + person
|
|
2827
|
+
const kiss = "💏"; // Single codepoint
|
|
2828
|
+
const couple_with_heart = "👩❤️👨"; // woman + ZWJ + heart + VS16 + ZWJ + man
|
|
2829
|
+
|
|
2830
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(kiss, 4, false, .unicode));
|
|
2831
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(couple_with_heart, 4, false, .unicode));
|
|
2832
|
+
}
|
|
2833
|
+
|
|
2834
|
+
// ----------------------------------------------------------------------------
|
|
2835
|
+
// Skin Tone Modifiers (Fitzpatrick scale)
|
|
2836
|
+
// ----------------------------------------------------------------------------
|
|
2837
|
+
|
|
2838
|
+
test "calculateTextWidth: all skin tone modifiers" {
|
|
2839
|
+
// Fitzpatrick Type-1-2 (light skin tone) U+1F3FB
|
|
2840
|
+
const wave_light = "👋🏻";
|
|
2841
|
+
// Fitzpatrick Type-3 (medium-light skin tone) U+1F3FC
|
|
2842
|
+
const wave_medium_light = "👋🏼";
|
|
2843
|
+
// Fitzpatrick Type-4 (medium skin tone) U+1F3FD
|
|
2844
|
+
const wave_medium = "👋🏽";
|
|
2845
|
+
// Fitzpatrick Type-5 (medium-dark skin tone) U+1F3FE
|
|
2846
|
+
const wave_medium_dark = "👋🏾";
|
|
2847
|
+
// Fitzpatrick Type-6 (dark skin tone) U+1F3FF
|
|
2848
|
+
const wave_dark = "👋🏿";
|
|
2849
|
+
|
|
2850
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(wave_light, 4, false, .unicode));
|
|
2851
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(wave_medium_light, 4, false, .unicode));
|
|
2852
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(wave_medium, 4, false, .unicode));
|
|
2853
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(wave_medium_dark, 4, false, .unicode));
|
|
2854
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(wave_dark, 4, false, .unicode));
|
|
2855
|
+
}
|
|
2856
|
+
|
|
2857
|
+
test "calculateTextWidth: skin tone with ZWJ" {
|
|
2858
|
+
// Family with skin tones: man(dark) + ZWJ + woman(light) + ZWJ + child
|
|
2859
|
+
const family_skin_tones = "👨🏿👩🏻👶";
|
|
2860
|
+
const width = utf8.calculateTextWidth(family_skin_tones, 4, false, .unicode);
|
|
2861
|
+
try testing.expectEqual(@as(u32, 2), width);
|
|
2862
|
+
}
|
|
2863
|
+
|
|
2864
|
+
// ----------------------------------------------------------------------------
|
|
2865
|
+
// Regional Indicator Symbols (Flags)
|
|
2866
|
+
// ----------------------------------------------------------------------------
|
|
2867
|
+
|
|
2868
|
+
test "calculateTextWidth: various flag emojis" {
|
|
2869
|
+
const flag_us = "🇺🇸"; // U+1F1FA U+1F1F8
|
|
2870
|
+
const flag_uk = "🇬🇧"; // U+1F1EC U+1F1E7
|
|
2871
|
+
const flag_jp = "🇯🇵"; // U+1F1EF U+1F1F5
|
|
2872
|
+
const flag_de = "🇩🇪"; // U+1F1E9 U+1F1EA
|
|
2873
|
+
const flag_fr = "🇫🇷"; // U+1F1EB U+1F1F7
|
|
2874
|
+
|
|
2875
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(flag_us, 4, false, .unicode));
|
|
2876
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(flag_uk, 4, false, .unicode));
|
|
2877
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(flag_jp, 4, false, .unicode));
|
|
2878
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(flag_de, 4, false, .unicode));
|
|
2879
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(flag_fr, 4, false, .unicode));
|
|
2880
|
+
}
|
|
2881
|
+
|
|
2882
|
+
test "calculateTextWidth: multiple flags in text" {
|
|
2883
|
+
const text = "Flags: 🇺🇸 🇬🇧 🇯🇵";
|
|
2884
|
+
const width = utf8.calculateTextWidth(text, 4, false, .unicode);
|
|
2885
|
+
// "Flags: " (7) + 🇺🇸 (2) + " " (1) + 🇬🇧 (2) + " " (1) + 🇯🇵 (2) = 15
|
|
2886
|
+
try testing.expectEqual(@as(u32, 15), width);
|
|
2887
|
+
}
|
|
2888
|
+
|
|
2889
|
+
// ----------------------------------------------------------------------------
|
|
2890
|
+
// Devanagari and Indic Scripts
|
|
2891
|
+
// ----------------------------------------------------------------------------
|
|
2892
|
+
|
|
2893
|
+
test "calculateTextWidth: Devanagari basic characters" {
|
|
2894
|
+
// Devanagari script (Hindi, Sanskrit, etc.)
|
|
2895
|
+
const namaste = "नमस्ते"; // na-ma-s-te with virama
|
|
2896
|
+
const width = utf8.calculateTextWidth(namaste, 4, false, .unicode);
|
|
2897
|
+
// Devanagari characters are typically width 1 each
|
|
2898
|
+
// This is 5 graphemes: न म स् ते (the virama combines with स)
|
|
2899
|
+
try testing.expect(width > 0); // Exact width depends on grapheme clustering
|
|
2900
|
+
}
|
|
2901
|
+
|
|
2902
|
+
test "calculateTextWidth: Devanagari with combining marks" {
|
|
2903
|
+
// Devanagari vowel signs and nukta
|
|
2904
|
+
const ka = "क"; // Base character
|
|
2905
|
+
const ki = "कि"; // क + vowel sign i (U+093F)
|
|
2906
|
+
const kii = "की"; // क + vowel sign ii (U+0940)
|
|
2907
|
+
|
|
2908
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(ka, 4, false, .unicode));
|
|
2909
|
+
// With combining vowel signs, should still be 1 grapheme
|
|
2910
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(ki, 4, false, .unicode));
|
|
2911
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(kii, 4, false, .unicode));
|
|
2912
|
+
}
|
|
2913
|
+
|
|
2914
|
+
test "calculateTextWidth: Devanagari conjuncts" {
|
|
2915
|
+
// Conjunct consonants with virama
|
|
2916
|
+
const kta = "क्त"; // क + virama + त (kta)
|
|
2917
|
+
const jna = "ज्ञ"; // ज + virama + ञ (jna)
|
|
2918
|
+
const ksha = "क्ष"; // क + virama + ZWJ + ष (kṣa with explicit ZWJ)
|
|
2919
|
+
|
|
2920
|
+
// These form single grapheme clusters but width = number of base consonants
|
|
2921
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(kta, 4, false, .unicode));
|
|
2922
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(jna, 4, false, .unicode));
|
|
2923
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(ksha, 4, false, .unicode));
|
|
2924
|
+
}
|
|
2925
|
+
|
|
2926
|
+
test "calculateTextWidth: Bengali script" {
|
|
2927
|
+
// Bengali/Bangla script
|
|
2928
|
+
const bangla = "বাংলা"; // Bangla
|
|
2929
|
+
const width = utf8.calculateTextWidth(bangla, 4, false, .unicode);
|
|
2930
|
+
try testing.expect(width > 0);
|
|
2931
|
+
}
|
|
2932
|
+
|
|
2933
|
+
test "calculateTextWidth: Tamil script" {
|
|
2934
|
+
// Tamil script (no conjuncts, simpler than Devanagari)
|
|
2935
|
+
const tamil = "தமிழ்"; // Tamil
|
|
2936
|
+
const width = utf8.calculateTextWidth(tamil, 4, false, .unicode);
|
|
2937
|
+
try testing.expect(width > 0);
|
|
2938
|
+
}
|
|
2939
|
+
|
|
2940
|
+
test "calculateTextWidth: Telugu script" {
|
|
2941
|
+
// Telugu script
|
|
2942
|
+
const telugu = "తెలుగు"; // Telugu
|
|
2943
|
+
const width = utf8.calculateTextWidth(telugu, 4, false, .unicode);
|
|
2944
|
+
try testing.expect(width > 0);
|
|
2945
|
+
}
|
|
2946
|
+
|
|
2947
|
+
// ----------------------------------------------------------------------------
|
|
2948
|
+
// Arabic and RTL Scripts
|
|
2949
|
+
// ----------------------------------------------------------------------------
|
|
2950
|
+
|
|
2951
|
+
test "calculateTextWidth: Arabic basic text" {
|
|
2952
|
+
// Arabic text (RTL, but width calculation is the same)
|
|
2953
|
+
const arabic = "مرحبا"; // Marhaba (hello)
|
|
2954
|
+
const width = utf8.calculateTextWidth(arabic, 4, false, .unicode);
|
|
2955
|
+
// Arabic characters are width 1 each
|
|
2956
|
+
try testing.expect(width >= 5);
|
|
2957
|
+
}
|
|
2958
|
+
|
|
2959
|
+
test "calculateTextWidth: Arabic with diacritics" {
|
|
2960
|
+
// Arabic with harakat (diacritical marks)
|
|
2961
|
+
const with_diacritics = "مَرْحَبًا"; // Marhaba with vowel marks
|
|
2962
|
+
const width = utf8.calculateTextWidth(with_diacritics, 4, false, .unicode);
|
|
2963
|
+
// Combining marks should not add to width
|
|
2964
|
+
try testing.expect(width >= 5);
|
|
2965
|
+
}
|
|
2966
|
+
|
|
2967
|
+
test "calculateTextWidth: Hebrew text" {
|
|
2968
|
+
// Hebrew text (RTL)
|
|
2969
|
+
const hebrew = "שלום"; // Shalom
|
|
2970
|
+
const width = utf8.calculateTextWidth(hebrew, 4, false, .unicode);
|
|
2971
|
+
try testing.expect(width >= 4);
|
|
2972
|
+
}
|
|
2973
|
+
|
|
2974
|
+
// ----------------------------------------------------------------------------
|
|
2975
|
+
// East Asian Scripts (CJK)
|
|
2976
|
+
// ----------------------------------------------------------------------------
|
|
2977
|
+
|
|
2978
|
+
test "calculateTextWidth: Chinese traditional characters" {
|
|
2979
|
+
const traditional = "繁體中文"; // Traditional Chinese
|
|
2980
|
+
const width = utf8.calculateTextWidth(traditional, 4, false, .unicode);
|
|
2981
|
+
// Each CJK character is width 2
|
|
2982
|
+
try testing.expectEqual(@as(u32, 8), width); // 4 chars * 2 = 8
|
|
2983
|
+
}
|
|
2984
|
+
|
|
2985
|
+
test "calculateTextWidth: Chinese simplified characters" {
|
|
2986
|
+
const simplified = "简体中文"; // Simplified Chinese
|
|
2987
|
+
const width = utf8.calculateTextWidth(simplified, 4, false, .unicode);
|
|
2988
|
+
try testing.expectEqual(@as(u32, 8), width); // 4 chars * 2 = 8
|
|
2989
|
+
}
|
|
2990
|
+
|
|
2991
|
+
test "calculateTextWidth: Japanese mixed scripts" {
|
|
2992
|
+
// Hiragana + Kanji + Katakana
|
|
2993
|
+
const mixed = "ひらがな漢字カタカナ"; // hiragana, kanji, katakana
|
|
2994
|
+
const width = utf8.calculateTextWidth(mixed, 4, false, .unicode);
|
|
2995
|
+
// All are width 2: 4 hiragana + 2 kanji + 4 katakana = 10 chars * 2 = 20
|
|
2996
|
+
try testing.expectEqual(@as(u32, 20), width);
|
|
2997
|
+
}
|
|
2998
|
+
|
|
2999
|
+
test "calculateTextWidth: Korean Hangul syllables" {
|
|
3000
|
+
const korean = "한글"; // Hangul (Korean)
|
|
3001
|
+
const width = utf8.calculateTextWidth(korean, 4, false, .unicode);
|
|
3002
|
+
// Hangul syllables are width 2
|
|
3003
|
+
try testing.expectEqual(@as(u32, 4), width); // 2 chars * 2 = 4
|
|
3004
|
+
}
|
|
3005
|
+
|
|
3006
|
+
test "calculateTextWidth: CJK with ASCII" {
|
|
3007
|
+
const mixed = "Hello世界World"; // ASCII + CJK + ASCII
|
|
3008
|
+
const width = utf8.calculateTextWidth(mixed, 4, false, .unicode);
|
|
3009
|
+
// "Hello" (5) + "世界" (4) + "World" (5) = 14
|
|
3010
|
+
try testing.expectEqual(@as(u32, 14), width);
|
|
3011
|
+
}
|
|
3012
|
+
|
|
3013
|
+
// ----------------------------------------------------------------------------
|
|
3014
|
+
// Combining Marks and Diacritics
|
|
3015
|
+
// ----------------------------------------------------------------------------
|
|
3016
|
+
|
|
3017
|
+
test "calculateTextWidth: multiple combining marks on one base" {
|
|
3018
|
+
// Base + multiple combining marks
|
|
3019
|
+
const multiple = "e\u{0301}\u{0302}\u{0304}"; // e + acute + circumflex + macron
|
|
3020
|
+
const width = utf8.calculateTextWidth(multiple, 4, false, .unicode);
|
|
3021
|
+
try testing.expectEqual(@as(u32, 1), width);
|
|
3022
|
+
}
|
|
3023
|
+
|
|
3024
|
+
test "calculateTextWidth: combining enclosing marks" {
|
|
3025
|
+
// Combining enclosing circle backslash U+20E0
|
|
3026
|
+
const enclosed = "a\u{20E0}";
|
|
3027
|
+
const width = utf8.calculateTextWidth(enclosed, 4, false, .unicode);
|
|
3028
|
+
try testing.expectEqual(@as(u32, 1), width);
|
|
3029
|
+
}
|
|
3030
|
+
|
|
3031
|
+
test "calculateTextWidth: Vietnamese with multiple diacritics" {
|
|
3032
|
+
// Vietnamese uses Latin with complex diacritics
|
|
3033
|
+
const vietnamese = "Tiếng Việt"; // Vietnamese language
|
|
3034
|
+
const width = utf8.calculateTextWidth(vietnamese, 4, false, .unicode);
|
|
3035
|
+
// Each base character with combining marks = 1 width
|
|
3036
|
+
// "Tiếng" (5) + " " (1) + "Việt" (4) = 10
|
|
3037
|
+
try testing.expectEqual(@as(u32, 10), width);
|
|
3038
|
+
}
|
|
3039
|
+
|
|
3040
|
+
// ----------------------------------------------------------------------------
|
|
3041
|
+
// Zero-Width Characters
|
|
3042
|
+
// ----------------------------------------------------------------------------
|
|
3043
|
+
|
|
3044
|
+
test "calculateTextWidth: zero width joiner (ZWJ)" {
|
|
3045
|
+
// ZWJ by itself (shouldn't happen, but test it) - it's a format char with width 0
|
|
3046
|
+
const zwj = "\u{200D}";
|
|
3047
|
+
const width = utf8.calculateTextWidth(zwj, 4, false, .unicode);
|
|
3048
|
+
try testing.expectEqual(@as(u32, 0), width); // Width of ZWJ is 0 (Cf category)
|
|
3049
|
+
}
|
|
3050
|
+
|
|
3051
|
+
test "calculateTextWidth: zero width non-joiner (ZWNJ)" {
|
|
3052
|
+
// ZWNJ U+200C
|
|
3053
|
+
const zwnj = "ab\u{200C}cd";
|
|
3054
|
+
const width = utf8.calculateTextWidth(zwnj, 4, false, .unicode);
|
|
3055
|
+
// ZWNJ has width 0, so should be 4 (a, b, c, d)
|
|
3056
|
+
try testing.expectEqual(@as(u32, 4), width);
|
|
3057
|
+
}
|
|
3058
|
+
|
|
3059
|
+
test "calculateTextWidth: zero width space" {
|
|
3060
|
+
// ZWSP U+200B is Cf (format) category with width 0
|
|
3061
|
+
const zwsp = "a\u{200B}b\u{200B}c";
|
|
3062
|
+
const width = utf8.calculateTextWidth(zwsp, 4, false, .unicode);
|
|
3063
|
+
// a(1) + ZWSP(0) + b(1) + ZWSP(0) + c(1) = 3
|
|
3064
|
+
try testing.expectEqual(@as(u32, 3), width);
|
|
3065
|
+
}
|
|
3066
|
+
|
|
3067
|
+
test "calculateTextWidth: word joiner" {
|
|
3068
|
+
// Word joiner U+2060 is Cf (format) category with width 0
|
|
3069
|
+
const word_joiner = "word\u{2060}joiner";
|
|
3070
|
+
const width = utf8.calculateTextWidth(word_joiner, 4, false, .unicode);
|
|
3071
|
+
// word(4) + word_joiner(0) + joiner(6) = 10
|
|
3072
|
+
try testing.expectEqual(@as(u32, 10), width);
|
|
3073
|
+
}
|
|
3074
|
+
|
|
3075
|
+
// ----------------------------------------------------------------------------
|
|
3076
|
+
// Special Unicode Spaces
|
|
3077
|
+
// ----------------------------------------------------------------------------
|
|
3078
|
+
|
|
3079
|
+
test "calculateTextWidth: various Unicode spaces" {
|
|
3080
|
+
// En space U+2002
|
|
3081
|
+
const en_space = "a\u{2002}b";
|
|
3082
|
+
// Em space U+2003
|
|
3083
|
+
const em_space = "a\u{2003}b";
|
|
3084
|
+
// Thin space U+2009
|
|
3085
|
+
const thin_space = "a\u{2009}b";
|
|
3086
|
+
// Hair space U+200A
|
|
3087
|
+
const hair_space = "a\u{200A}b";
|
|
3088
|
+
// Ideographic space U+3000 (CJK)
|
|
3089
|
+
const ideo_space = "a\u{3000}b";
|
|
3090
|
+
|
|
3091
|
+
// These are all real spaces with width 1
|
|
3092
|
+
try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(en_space, 4, false, .unicode));
|
|
3093
|
+
try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(em_space, 4, false, .unicode));
|
|
3094
|
+
try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(thin_space, 4, false, .unicode));
|
|
3095
|
+
try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(hair_space, 4, false, .unicode));
|
|
3096
|
+
// Ideographic space is width 2 (fullwidth)
|
|
3097
|
+
try testing.expectEqual(@as(u32, 4), utf8.calculateTextWidth(ideo_space, 4, false, .unicode));
|
|
3098
|
+
}
|
|
3099
|
+
|
|
3100
|
+
test "calculateTextWidth: non-breaking spaces" {
|
|
3101
|
+
// NBSP U+00A0
|
|
3102
|
+
const nbsp = "a\u{00A0}b";
|
|
3103
|
+
// Narrow NBSP U+202F
|
|
3104
|
+
const narrow_nbsp = "a\u{202F}b";
|
|
3105
|
+
|
|
3106
|
+
try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(nbsp, 4, false, .unicode));
|
|
3107
|
+
try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(narrow_nbsp, 4, false, .unicode));
|
|
3108
|
+
}
|
|
3109
|
+
|
|
3110
|
+
// ----------------------------------------------------------------------------
|
|
3111
|
+
// Emoji Modifiers and Tags
|
|
3112
|
+
// ----------------------------------------------------------------------------
|
|
3113
|
+
|
|
3114
|
+
test "calculateTextWidth: emoji with multiple modifiers" {
|
|
3115
|
+
// Rainbow flag (black flag + rainbow)
|
|
3116
|
+
const rainbow_flag = "🏴🌈"; // U+1F3F4 U+200D U+1F308
|
|
3117
|
+
const width = utf8.calculateTextWidth(rainbow_flag, 4, false, .unicode);
|
|
3118
|
+
try testing.expectEqual(@as(u32, 2), width);
|
|
3119
|
+
}
|
|
3120
|
+
|
|
3121
|
+
test "calculateTextWidth: emoji tag sequences (subdivision flags)" {
|
|
3122
|
+
// England flag: 🏴 (black flag + tag chars + cancel tag)
|
|
3123
|
+
// This is complex to type, so we'll test a simpler version
|
|
3124
|
+
const black_flag = "🏴"; // Just the base flag
|
|
3125
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(black_flag, 4, false, .unicode));
|
|
3126
|
+
}
|
|
3127
|
+
|
|
3128
|
+
test "calculateTextWidth: hair style variations" {
|
|
3129
|
+
// Person: red hair, curly hair, white hair, bald
|
|
3130
|
+
const red_hair = "👩🦰";
|
|
3131
|
+
const curly_hair = "👨🦱";
|
|
3132
|
+
const white_hair = "👩🦳";
|
|
3133
|
+
const bald = "👨🦲";
|
|
3134
|
+
|
|
3135
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(red_hair, 4, false, .unicode));
|
|
3136
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(curly_hair, 4, false, .unicode));
|
|
3137
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(white_hair, 4, false, .unicode));
|
|
3138
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(bald, 4, false, .unicode));
|
|
3139
|
+
}
|
|
3140
|
+
|
|
3141
|
+
// ----------------------------------------------------------------------------
|
|
3142
|
+
// Mixed Content and Real-world Scenarios
|
|
3143
|
+
// ----------------------------------------------------------------------------
|
|
3144
|
+
|
|
3145
|
+
test "calculateTextWidth: multilingual sentence" {
|
|
3146
|
+
// Mix of Latin, CJK, Arabic, Emoji
|
|
3147
|
+
const text = "Hello 世界! مرحبا 👋";
|
|
3148
|
+
const width = utf8.calculateTextWidth(text, 4, false, .unicode);
|
|
3149
|
+
// "Hello " (6) + "世界" (4) + "! " (2) + "مرحبا" (5) + " " (1) + "👋" (2) = 20
|
|
3150
|
+
try testing.expect(width >= 18); // Allow some flexibility for combining marks
|
|
3151
|
+
}
|
|
3152
|
+
|
|
3153
|
+
test "calculateTextWidth: code with emoji comments" {
|
|
3154
|
+
const code = "const x = 42; // ✅ works";
|
|
3155
|
+
const width = utf8.calculateTextWidth(code, 4, false, .unicode);
|
|
3156
|
+
// Most chars are width 1, checkmark is width 2
|
|
3157
|
+
// "const x = 42; // " (17) + "✅" (2) + " works" (6) = 25
|
|
3158
|
+
try testing.expectEqual(@as(u32, 25), width);
|
|
3159
|
+
}
|
|
3160
|
+
|
|
3161
|
+
test "calculateTextWidth: emoji sentence" {
|
|
3162
|
+
const text = "I ❤️ 🍕 and 🍣!";
|
|
3163
|
+
const width = utf8.calculateTextWidth(text, 4, false, .unicode);
|
|
3164
|
+
// "I " (2) + "❤️" (2) + " " (1) + "🍕" (2) + " and " (5) + "🍣" (2) + "!" (1) = 15
|
|
3165
|
+
try testing.expectEqual(@as(u32, 15), width);
|
|
3166
|
+
}
|
|
3167
|
+
|
|
3168
|
+
test "calculateTextWidth: social media style text" {
|
|
3169
|
+
const text = "#OpenTUI 🚀 is #awesome 💯!";
|
|
3170
|
+
const width = utf8.calculateTextWidth(text, 4, false, .unicode);
|
|
3171
|
+
// "#OpenTUI " (9) + "🚀" (2) + " is #awesome " (13) + "💯" (2) + "!" (1) = 27
|
|
3172
|
+
try testing.expectEqual(@as(u32, 27), width);
|
|
3173
|
+
}
|
|
3174
|
+
|
|
3175
|
+
// ----------------------------------------------------------------------------
|
|
3176
|
+
// Edge Cases and Boundaries
|
|
3177
|
+
// ----------------------------------------------------------------------------
|
|
3178
|
+
|
|
3179
|
+
test "calculateTextWidth: surrogate pair edge cases" {
|
|
3180
|
+
// Valid surrogate pairs (emoji are in supplementary planes)
|
|
3181
|
+
const emoji = "𝕳𝖊𝖑𝖑𝖔"; // Mathematical bold letters (U+1D577 etc)
|
|
3182
|
+
const width = utf8.calculateTextWidth(emoji, 4, false, .unicode);
|
|
3183
|
+
// These are typically width 1 each
|
|
3184
|
+
try testing.expectEqual(@as(u32, 5), width);
|
|
3185
|
+
}
|
|
3186
|
+
|
|
3187
|
+
test "calculateTextWidth: long grapheme cluster chain" {
|
|
3188
|
+
// Create a base + many combining marks
|
|
3189
|
+
var text: std.ArrayListUnmanaged(u8) = .{};
|
|
3190
|
+
defer text.deinit(testing.allocator);
|
|
3191
|
+
|
|
3192
|
+
try text.appendSlice(testing.allocator, "e");
|
|
3193
|
+
// Add 10 combining marks
|
|
3194
|
+
var i: usize = 0;
|
|
3195
|
+
while (i < 10) : (i += 1) {
|
|
3196
|
+
try text.appendSlice(testing.allocator, "\u{0301}"); // Combining acute accent
|
|
3197
|
+
}
|
|
3198
|
+
|
|
3199
|
+
const width = utf8.calculateTextWidth(text.items, 4, false, .unicode);
|
|
3200
|
+
// Should be treated as single grapheme
|
|
3201
|
+
try testing.expectEqual(@as(u32, 1), width);
|
|
3202
|
+
}
|
|
3203
|
+
|
|
3204
|
+
test "calculateTextWidth: all emoji skin tones in sequence" {
|
|
3205
|
+
const text = "👋🏻👋🏼👋🏽👋🏾👋🏿";
|
|
3206
|
+
const width = utf8.calculateTextWidth(text, 4, false, .unicode);
|
|
3207
|
+
// 5 emoji with skin tones, each is 1 grapheme with width 2
|
|
3208
|
+
try testing.expectEqual(@as(u32, 10), width); // 5 * 2 = 10
|
|
3209
|
+
}
|
|
3210
|
+
|
|
3211
|
+
test "calculateTextWidth: emoji zodiac signs" {
|
|
3212
|
+
const zodiac = "♈♉♊♋♌♍♎♏♐♑♒♓"; // All 12 zodiac signs
|
|
3213
|
+
const width = utf8.calculateTextWidth(zodiac, 4, false, .unicode);
|
|
3214
|
+
// Each zodiac symbol is width 2
|
|
3215
|
+
try testing.expectEqual(@as(u32, 24), width); // 12 * 2 = 24
|
|
3216
|
+
}
|
|
3217
|
+
|
|
3218
|
+
test "calculateTextWidth: mathematical symbols" {
|
|
3219
|
+
// Mathematical operators and symbols
|
|
3220
|
+
const math = "∀∃∈∉∋∑∏∫∂∇≠≤≥"; // Various math symbols
|
|
3221
|
+
const width = utf8.calculateTextWidth(math, 4, false, .unicode);
|
|
3222
|
+
// Most math symbols are width 1
|
|
3223
|
+
try testing.expect(width >= 13);
|
|
3224
|
+
}
|
|
3225
|
+
|
|
3226
|
+
test "calculateTextWidth: box drawing characters" {
|
|
3227
|
+
// Box drawing characters (width 1)
|
|
3228
|
+
const box = "┌─┐│└─┘"; // Simple box
|
|
3229
|
+
const width = utf8.calculateTextWidth(box, 4, false, .unicode);
|
|
3230
|
+
try testing.expectEqual(@as(u32, 7), width);
|
|
3231
|
+
}
|
|
3232
|
+
|
|
3233
|
+
test "calculateTextWidth: braille patterns" {
|
|
3234
|
+
// Braille patterns U+2800-U+28FF
|
|
3235
|
+
const braille = "⠀⠁⠂⠃⠄⠅⠆⠇"; // Some braille patterns
|
|
3236
|
+
const width = utf8.calculateTextWidth(braille, 4, false, .unicode);
|
|
3237
|
+
// Braille patterns are width 1
|
|
3238
|
+
try testing.expectEqual(@as(u32, 8), width);
|
|
3239
|
+
}
|
|
3240
|
+
|
|
3241
|
+
test "calculateTextWidth: musical symbols" {
|
|
3242
|
+
// Musical notation symbols
|
|
3243
|
+
const music = "𝄞𝄢𝅘𝅥𝅮"; // Treble clef, bass clef, notes (U+1D11E etc)
|
|
3244
|
+
const width = utf8.calculateTextWidth(music, 4, false, .unicode);
|
|
3245
|
+
// Musical symbols are typically width 1, but encoding might be issue - just verify no crash
|
|
3246
|
+
try testing.expect(width >= 0); // Accept any non-negative width
|
|
3247
|
+
}
|
|
3248
|
+
|
|
3249
|
+
test "calculateTextWidth: weather and nature emoji" {
|
|
3250
|
+
const weather = "☀️🌤️⛅🌦️🌧️⛈️"; // Sun, clouds, rain
|
|
3251
|
+
const width = utf8.calculateTextWidth(weather, 4, false, .unicode);
|
|
3252
|
+
// Each emoji is width 2
|
|
3253
|
+
try testing.expectEqual(@as(u32, 12), width); // 6 * 2 = 12
|
|
3254
|
+
}
|
|
3255
|
+
|
|
3256
|
+
test "calculateTextWidth: food emoji collection" {
|
|
3257
|
+
const food = "🍎🍌🍇🍓🥕🥦🍞🧀"; // Various food items
|
|
3258
|
+
const width = utf8.calculateTextWidth(food, 4, false, .unicode);
|
|
3259
|
+
// 8 emoji * 2 = 16
|
|
3260
|
+
try testing.expectEqual(@as(u32, 16), width);
|
|
3261
|
+
}
|
|
3262
|
+
|
|
3263
|
+
test "calculateTextWidth: animal emoji" {
|
|
3264
|
+
const animals = "🐶🐱🐭🐹🐰🦊🐻🐼"; // Various animals
|
|
3265
|
+
const width = utf8.calculateTextWidth(animals, 4, false, .unicode);
|
|
3266
|
+
try testing.expectEqual(@as(u32, 16), width); // 8 * 2 = 16
|
|
3267
|
+
}
|
|
3268
|
+
|
|
3269
|
+
test "calculateTextWidth: realistic chat message" {
|
|
3270
|
+
const message = "Hey! 👋 Can you review my PR? 🙏 It fixes the bug 🐛 we discussed earlier. Thanks! 😊";
|
|
3271
|
+
const width = utf8.calculateTextWidth(message, 4, false, .unicode);
|
|
3272
|
+
// Long string with multiple emoji - just verify it doesn't crash
|
|
3273
|
+
try testing.expect(width > 70);
|
|
3274
|
+
}
|
|
3275
|
+
|
|
3276
|
+
test "calculateTextWidth: empty string with tabs" {
|
|
3277
|
+
const text = "";
|
|
3278
|
+
try testing.expectEqual(@as(u32, 0), utf8.calculateTextWidth(text, 4, false, .unicode));
|
|
3279
|
+
try testing.expectEqual(@as(u32, 0), utf8.calculateTextWidth(text, 8, false, .unicode));
|
|
3280
|
+
}
|
|
3281
|
+
|
|
3282
|
+
test "calculateTextWidth: only combining marks (invalid but should not crash)" {
|
|
3283
|
+
const text = "\u{0301}\u{0302}\u{0303}"; // Just combining marks, no base
|
|
3284
|
+
const width = utf8.calculateTextWidth(text, 4, false, .unicode);
|
|
3285
|
+
// Should handle gracefully - each combining mark might be width 0
|
|
3286
|
+
try testing.expect(width >= 0);
|
|
3287
|
+
}
|
|
3288
|
+
|
|
3289
|
+
test "calculateTextWidth: emoji collection - celestial and symbols" {
|
|
3290
|
+
const celestial = "🌟🔮✨";
|
|
3291
|
+
const width = utf8.calculateTextWidth(celestial, 4, false, .unicode);
|
|
3292
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3293
|
+
}
|
|
3294
|
+
|
|
3295
|
+
test "calculateTextWidth: emoji collection - religious and gestures" {
|
|
3296
|
+
const religious = "🙏";
|
|
3297
|
+
const width = utf8.calculateTextWidth(religious, 4, false, .unicode);
|
|
3298
|
+
try testing.expectEqual(@as(u32, 2), width); // 1 emoji * 2 = 2
|
|
3299
|
+
}
|
|
3300
|
+
|
|
3301
|
+
test "calculateTextWidth: emoji collection - ZWJ sequences astronauts" {
|
|
3302
|
+
const astronauts = "🧑🚀👨🚀👩🚀";
|
|
3303
|
+
const width = utf8.calculateTextWidth(astronauts, 4, false, .unicode);
|
|
3304
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 graphemes * 2 = 6
|
|
3305
|
+
}
|
|
3306
|
+
|
|
3307
|
+
test "calculateTextWidth: emoji collection - rainbow and magical creatures" {
|
|
3308
|
+
const magical = "🌈🦄🧚♀️";
|
|
3309
|
+
const width = utf8.calculateTextWidth(magical, 4, false, .unicode);
|
|
3310
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 graphemes * 2 = 6
|
|
3311
|
+
}
|
|
3312
|
+
|
|
3313
|
+
test "calculateTextWidth: emoji collection - books and writing" {
|
|
3314
|
+
const writing = "📜📖✍️";
|
|
3315
|
+
const width = utf8.calculateTextWidth(writing, 4, false, .unicode);
|
|
3316
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3317
|
+
}
|
|
3318
|
+
|
|
3319
|
+
test "calculateTextWidth: emoji collection - Japanese culture" {
|
|
3320
|
+
const japanese = "🏯🎋🌸";
|
|
3321
|
+
const width = utf8.calculateTextWidth(japanese, 4, false, .unicode);
|
|
3322
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3323
|
+
}
|
|
3324
|
+
|
|
3325
|
+
test "calculateTextWidth: emoji collection - traditional Japanese items" {
|
|
3326
|
+
const traditional = "📯🎴🎎";
|
|
3327
|
+
const width = utf8.calculateTextWidth(traditional, 4, false, .unicode);
|
|
3328
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3329
|
+
}
|
|
3330
|
+
|
|
3331
|
+
test "calculateTextWidth: emoji collection - hearts and peace" {
|
|
3332
|
+
const peace = "💝🕊️☮️";
|
|
3333
|
+
const width = utf8.calculateTextWidth(peace, 4, false, .unicode);
|
|
3334
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3335
|
+
}
|
|
3336
|
+
|
|
3337
|
+
test "calculateTextWidth: emoji collection - meditation and nature" {
|
|
3338
|
+
const meditation = "🧘♂️🌳";
|
|
3339
|
+
const width = utf8.calculateTextWidth(meditation, 4, false, .unicode);
|
|
3340
|
+
try testing.expectEqual(@as(u32, 4), width); // 2 graphemes * 2 = 4
|
|
3341
|
+
}
|
|
3342
|
+
|
|
3343
|
+
test "calculateTextWidth: emoji collection - food and drink" {
|
|
3344
|
+
const food = "🍵🥟";
|
|
3345
|
+
const width = utf8.calculateTextWidth(food, 4, false, .unicode);
|
|
3346
|
+
try testing.expectEqual(@as(u32, 4), width); // 2 emoji * 2 = 4
|
|
3347
|
+
}
|
|
3348
|
+
|
|
3349
|
+
test "calculateTextWidth: emoji collection - exotic animals" {
|
|
3350
|
+
const animals = "🦥🦦🦧🦨🦩🦚🦜🦝🦞🦟";
|
|
3351
|
+
const width = utf8.calculateTextWidth(animals, 4, false, .unicode);
|
|
3352
|
+
try testing.expectEqual(@as(u32, 20), width); // 10 emoji * 2 = 20
|
|
3353
|
+
}
|
|
3354
|
+
|
|
3355
|
+
test "calculateTextWidth: emoji collection - communication" {
|
|
3356
|
+
const communication = "🤫🗣️💬";
|
|
3357
|
+
const width = utf8.calculateTextWidth(communication, 4, false, .unicode);
|
|
3358
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3359
|
+
}
|
|
3360
|
+
|
|
3361
|
+
test "calculateTextWidth: emoji collection - water and nature" {
|
|
3362
|
+
const nature = "🌊📝🎭";
|
|
3363
|
+
const width = utf8.calculateTextWidth(nature, 4, false, .unicode);
|
|
3364
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3365
|
+
}
|
|
3366
|
+
|
|
3367
|
+
test "calculateTextWidth: emoji collection - landscape" {
|
|
3368
|
+
const landscape = "🏞️🌊💧";
|
|
3369
|
+
const width = utf8.calculateTextWidth(landscape, 4, false, .unicode);
|
|
3370
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3371
|
+
}
|
|
3372
|
+
|
|
3373
|
+
test "calculateTextWidth: emoji collection - circus and art" {
|
|
3374
|
+
const circus = "🤹♂️🎪🎨";
|
|
3375
|
+
const width = utf8.calculateTextWidth(circus, 4, false, .unicode);
|
|
3376
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 graphemes * 2 = 6
|
|
3377
|
+
}
|
|
3378
|
+
|
|
3379
|
+
test "calculateTextWidth: emoji collection - shopping and food items" {
|
|
3380
|
+
const shopping = "🏪🛒💰🌶️🧄🧅";
|
|
3381
|
+
const width = utf8.calculateTextWidth(shopping, 4, false, .unicode);
|
|
3382
|
+
try testing.expectEqual(@as(u32, 12), width); // 6 emoji * 2 = 12
|
|
3383
|
+
}
|
|
3384
|
+
|
|
3385
|
+
test "calculateTextWidth: emoji collection - textiles and art" {
|
|
3386
|
+
const textiles = "🧵👘🎨🖼️";
|
|
3387
|
+
const width = utf8.calculateTextWidth(textiles, 4, false, .unicode);
|
|
3388
|
+
try testing.expectEqual(@as(u32, 8), width); // 4 emoji * 2 = 8
|
|
3389
|
+
}
|
|
3390
|
+
|
|
3391
|
+
test "calculateTextWidth: emoji collection - prehistoric creatures" {
|
|
3392
|
+
const prehistoric = "🦖🦕🐉🐲";
|
|
3393
|
+
const width = utf8.calculateTextWidth(prehistoric, 4, false, .unicode);
|
|
3394
|
+
try testing.expectEqual(@as(u32, 8), width); // 4 emoji * 2 = 8
|
|
3395
|
+
}
|
|
3396
|
+
|
|
3397
|
+
test "calculateTextWidth: emoji collection - hand gestures" {
|
|
3398
|
+
const hands = "🤝🤲👐";
|
|
3399
|
+
const width = utf8.calculateTextWidth(hands, 4, false, .unicode);
|
|
3400
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3401
|
+
}
|
|
3402
|
+
|
|
3403
|
+
test "calculateTextWidth: emoji collection - lanterns and lights" {
|
|
3404
|
+
const lanterns = "🏮🎆🎇🕯️💡";
|
|
3405
|
+
const width = utf8.calculateTextWidth(lanterns, 4, false, .unicode);
|
|
3406
|
+
try testing.expectEqual(@as(u32, 10), width); // 5 emoji * 2 = 10
|
|
3407
|
+
}
|
|
3408
|
+
|
|
3409
|
+
test "calculateTextWidth: emoji collection - dancers" {
|
|
3410
|
+
const dancers = "💃🕺🩰";
|
|
3411
|
+
const width = utf8.calculateTextWidth(dancers, 4, false, .unicode);
|
|
3412
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3413
|
+
}
|
|
3414
|
+
|
|
3415
|
+
test "calculateTextWidth: emoji collection - musical instruments" {
|
|
3416
|
+
const instruments = "🎻🎺🎷🎸🪕🪘";
|
|
3417
|
+
const width = utf8.calculateTextWidth(instruments, 4, false, .unicode);
|
|
3418
|
+
try testing.expectEqual(@as(u32, 12), width); // 6 emoji * 2 = 12
|
|
3419
|
+
}
|
|
3420
|
+
|
|
3421
|
+
test "calculateTextWidth: emoji collection - bells and shrine" {
|
|
3422
|
+
const bells = "🔔⛩️";
|
|
3423
|
+
const width = utf8.calculateTextWidth(bells, 4, false, .unicode);
|
|
3424
|
+
try testing.expectEqual(@as(u32, 4), width); // 2 emoji * 2 = 4
|
|
3425
|
+
}
|
|
3426
|
+
|
|
3427
|
+
test "calculateTextWidth: emoji collection - shocked and amazed" {
|
|
3428
|
+
const shocked = "😵💫🤯✨";
|
|
3429
|
+
const width = utf8.calculateTextWidth(shocked, 4, false, .unicode);
|
|
3430
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 graphemes * 2 = 6
|
|
3431
|
+
}
|
|
3432
|
+
|
|
3433
|
+
test "calculateTextWidth: emoji collection - sweets and bubble tea" {
|
|
3434
|
+
const sweets = "🧋🍬🍭🧁";
|
|
3435
|
+
const width = utf8.calculateTextWidth(sweets, 4, false, .unicode);
|
|
3436
|
+
try testing.expectEqual(@as(u32, 8), width); // 4 emoji * 2 = 8
|
|
3437
|
+
}
|
|
3438
|
+
|
|
3439
|
+
test "calculateTextWidth: emoji collection - machinery and robots" {
|
|
3440
|
+
const machinery = "⚙️🤖🦾🦿";
|
|
3441
|
+
const width = utf8.calculateTextWidth(machinery, 4, false, .unicode);
|
|
3442
|
+
try testing.expectEqual(@as(u32, 8), width); // 4 emoji * 2 = 8
|
|
3443
|
+
}
|
|
3444
|
+
|
|
3445
|
+
test "calculateTextWidth: emoji collection - vehicles" {
|
|
3446
|
+
const vehicles = "🚗🚕🚙🚌🚎";
|
|
3447
|
+
const width = utf8.calculateTextWidth(vehicles, 4, false, .unicode);
|
|
3448
|
+
try testing.expectEqual(@as(u32, 10), width); // 5 emoji * 2 = 10
|
|
3449
|
+
}
|
|
3450
|
+
|
|
3451
|
+
test "calculateTextWidth: emoji collection - space travel" {
|
|
3452
|
+
const space = "🚀🛸🛰️";
|
|
3453
|
+
const width = utf8.calculateTextWidth(space, 4, false, .unicode);
|
|
3454
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3455
|
+
}
|
|
3456
|
+
|
|
3457
|
+
test "calculateTextWidth: emoji collection - technology" {
|
|
3458
|
+
const tech = "🐍💻⌨️";
|
|
3459
|
+
const width = utf8.calculateTextWidth(tech, 4, false, .unicode);
|
|
3460
|
+
// 🐍(2) + 💻(2) + ⌨️(2, VS16 makes it emoji presentation) = 6
|
|
3461
|
+
try testing.expectEqual(@as(u32, 6), width);
|
|
3462
|
+
}
|
|
3463
|
+
|
|
3464
|
+
test "calculateTextWidth: emoji collection - education and brain" {
|
|
3465
|
+
const education = "🧠📚🎓";
|
|
3466
|
+
const width = utf8.calculateTextWidth(education, 4, false, .unicode);
|
|
3467
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3468
|
+
}
|
|
3469
|
+
|
|
3470
|
+
test "calculateTextWidth: emoji collection - professional ZWJ sequences" {
|
|
3471
|
+
const professionals = "👨💼👩💼👨🔬👩🔬";
|
|
3472
|
+
const width = utf8.calculateTextWidth(professionals, 4, false, .unicode);
|
|
3473
|
+
try testing.expectEqual(@as(u32, 8), width); // 4 graphemes * 2 = 8
|
|
3474
|
+
}
|
|
3475
|
+
|
|
3476
|
+
test "calculateTextWidth: emoji collection - earth globes" {
|
|
3477
|
+
const globes = "🌍🌎🌏";
|
|
3478
|
+
const width = utf8.calculateTextWidth(globes, 4, false, .unicode);
|
|
3479
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3480
|
+
}
|
|
3481
|
+
|
|
3482
|
+
test "calculateTextWidth: emoji collection - family ZWJ sequence" {
|
|
3483
|
+
const family = "👨👩👧👦";
|
|
3484
|
+
const width = utf8.calculateTextWidth(family, 4, false, .unicode);
|
|
3485
|
+
try testing.expectEqual(@as(u32, 2), width); // 1 grapheme * 2 = 2
|
|
3486
|
+
}
|
|
3487
|
+
|
|
3488
|
+
test "calculateTextWidth: emoji collection - elderly people" {
|
|
3489
|
+
const elderly = "👴👵";
|
|
3490
|
+
const width = utf8.calculateTextWidth(elderly, 4, false, .unicode);
|
|
3491
|
+
try testing.expectEqual(@as(u32, 4), width); // 2 emoji * 2 = 4
|
|
3492
|
+
}
|
|
3493
|
+
|
|
3494
|
+
test "calculateTextWidth: emoji collection - sunrise and sunset" {
|
|
3495
|
+
const sunrise = "🌅🌄🌠";
|
|
3496
|
+
const width = utf8.calculateTextWidth(sunrise, 4, false, .unicode);
|
|
3497
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3498
|
+
}
|
|
3499
|
+
|
|
3500
|
+
test "calculateTextWidth: emoji collection - mountains" {
|
|
3501
|
+
const mountains = "🏔️⛰️🗻";
|
|
3502
|
+
const width = utf8.calculateTextWidth(mountains, 4, false, .unicode);
|
|
3503
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3504
|
+
}
|
|
3505
|
+
|
|
3506
|
+
test "calculateTextWidth: emoji collection - thoughts and dreams" {
|
|
3507
|
+
const dreams = "💭💤🌌";
|
|
3508
|
+
const width = utf8.calculateTextWidth(dreams, 4, false, .unicode);
|
|
3509
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3510
|
+
}
|
|
3511
|
+
|
|
3512
|
+
test "calculateTextWidth: emoji collection - campfire" {
|
|
3513
|
+
const campfire = "🔥🏕️";
|
|
3514
|
+
const width = utf8.calculateTextWidth(campfire, 4, false, .unicode);
|
|
3515
|
+
try testing.expectEqual(@as(u32, 4), width); // 2 emoji * 2 = 4
|
|
3516
|
+
}
|
|
3517
|
+
|
|
3518
|
+
test "calculateTextWidth: emoji collection - cooking" {
|
|
3519
|
+
const cooking = "🍛🍲🥘";
|
|
3520
|
+
const width = utf8.calculateTextWidth(cooking, 4, false, .unicode);
|
|
3521
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3522
|
+
}
|
|
3523
|
+
|
|
3524
|
+
test "calculateTextWidth: emoji collection - love hearts" {
|
|
3525
|
+
const hearts = "❤️💕💖";
|
|
3526
|
+
const width = utf8.calculateTextWidth(hearts, 4, false, .unicode);
|
|
3527
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3528
|
+
}
|
|
3529
|
+
|
|
3530
|
+
test "calculateTextWidth: emoji collection - media" {
|
|
3531
|
+
const media = "📸🎞️📹";
|
|
3532
|
+
const width = utf8.calculateTextWidth(media, 4, false, .unicode);
|
|
3533
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3534
|
+
}
|
|
3535
|
+
|
|
3536
|
+
test "calculateTextWidth: emoji collection - global and handshake" {
|
|
3537
|
+
const global = "🌐🤝🌈";
|
|
3538
|
+
const width = utf8.calculateTextWidth(global, 4, false, .unicode);
|
|
3539
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3540
|
+
}
|
|
3541
|
+
|
|
3542
|
+
test "calculateTextWidth: emoji collection - special symbols" {
|
|
3543
|
+
const special = "🦩🧿🪬🫀🫁🧠";
|
|
3544
|
+
const width = utf8.calculateTextWidth(special, 4, false, .unicode);
|
|
3545
|
+
try testing.expectEqual(@as(u32, 12), width); // 6 emoji * 2 = 12
|
|
3546
|
+
}
|
|
3547
|
+
|
|
3548
|
+
test "calculateTextWidth: emoji collection - strength" {
|
|
3549
|
+
const strength = "💪✊🙌";
|
|
3550
|
+
const width = utf8.calculateTextWidth(strength, 4, false, .unicode);
|
|
3551
|
+
try testing.expectEqual(@as(u32, 6), width); // 3 emoji * 2 = 6
|
|
3552
|
+
}
|
|
3553
|
+
|
|
3554
|
+
test "calculateTextWidth: emoji collection - entertainment" {
|
|
3555
|
+
const entertainment = "🎬🎭🎪✨🌟⭐";
|
|
3556
|
+
const width = utf8.calculateTextWidth(entertainment, 4, false, .unicode);
|
|
3557
|
+
try testing.expectEqual(@as(u32, 12), width); // 6 emoji * 2 = 12
|
|
3558
|
+
}
|
|
3559
|
+
|
|
3560
|
+
// ============================================================================
|
|
3561
|
+
// DEVANAGARI SCRIPT WIDTH TESTS
|
|
3562
|
+
// ============================================================================
|
|
3563
|
+
|
|
3564
|
+
test "calculateTextWidth: Devanagari - Sanskrit word" {
|
|
3565
|
+
// संस्कृति (culture/civilization)
|
|
3566
|
+
const sanskrit = "संस्कृति";
|
|
3567
|
+
const width = utf8.calculateTextWidth(sanskrit, 4, false, .unicode);
|
|
3568
|
+
// 4 base consonants (SA, SA, KA, TA) with combining marks = width 4
|
|
3569
|
+
try testing.expectEqual(@as(u32, 4), width);
|
|
3570
|
+
}
|
|
3571
|
+
|
|
3572
|
+
test "calculateTextWidth: Devanagari - namaste" {
|
|
3573
|
+
const namaste = "नमस्ते";
|
|
3574
|
+
const width = utf8.calculateTextWidth(namaste, 4, false, .unicode);
|
|
3575
|
+
// 4 base consonants: NA, MA, SA, TA = width 4
|
|
3576
|
+
try testing.expectEqual(@as(u32, 4), width);
|
|
3577
|
+
}
|
|
3578
|
+
|
|
3579
|
+
test "calculateTextWidth: Devanagari - Om symbol" {
|
|
3580
|
+
const om = "ॐ";
|
|
3581
|
+
const width = utf8.calculateTextWidth(om, 4, false, .unicode);
|
|
3582
|
+
try testing.expectEqual(@as(u32, 1), width);
|
|
3583
|
+
}
|
|
3584
|
+
|
|
3585
|
+
test "calculateTextWidth: Devanagari - mixed with ASCII" {
|
|
3586
|
+
const mixed = "Hello नमस्ते World";
|
|
3587
|
+
const width = utf8.calculateTextWidth(mixed, 4, false, .unicode);
|
|
3588
|
+
// "Hello "(6) + नमस्ते(4 base consonants) + " World"(6) = 16
|
|
3589
|
+
try testing.expectEqual(@as(u32, 16), width);
|
|
3590
|
+
}
|
|
3591
|
+
|
|
3592
|
+
// ============================================================================
|
|
3593
|
+
// CJK SCRIPT WIDTH TESTS
|
|
3594
|
+
// ============================================================================
|
|
3595
|
+
|
|
3596
|
+
test "calculateTextWidth: Chinese characters - kanji" {
|
|
3597
|
+
const kanji = "漢字";
|
|
3598
|
+
const width = utf8.calculateTextWidth(kanji, 4, false, .unicode);
|
|
3599
|
+
try testing.expectEqual(@as(u32, 4), width); // 2 chars * 2 = 4
|
|
3600
|
+
}
|
|
3601
|
+
|
|
3602
|
+
test "calculateTextWidth: Hiragana" {
|
|
3603
|
+
const hiragana = "ひらがな";
|
|
3604
|
+
const width = utf8.calculateTextWidth(hiragana, 4, false, .unicode);
|
|
3605
|
+
try testing.expectEqual(@as(u32, 8), width); // 4 chars * 2 = 8
|
|
3606
|
+
}
|
|
3607
|
+
|
|
3608
|
+
test "calculateTextWidth: Katakana" {
|
|
3609
|
+
const katakana = "カタカナ";
|
|
3610
|
+
const width = utf8.calculateTextWidth(katakana, 4, false, .unicode);
|
|
3611
|
+
try testing.expectEqual(@as(u32, 8), width); // 4 chars * 2 = 8
|
|
3612
|
+
}
|
|
3613
|
+
|
|
3614
|
+
test "calculateTextWidth: Korean Hangul" {
|
|
3615
|
+
const hangul = "한글";
|
|
3616
|
+
const width = utf8.calculateTextWidth(hangul, 4, false, .unicode);
|
|
3617
|
+
try testing.expectEqual(@as(u32, 4), width); // 2 chars * 2 = 4
|
|
3618
|
+
}
|
|
3619
|
+
|
|
3620
|
+
test "calculateTextWidth: Korean words - love and peace" {
|
|
3621
|
+
const korean = "사랑 평화";
|
|
3622
|
+
const width = utf8.calculateTextWidth(korean, 4, false, .unicode);
|
|
3623
|
+
// 사(2) + 랑(2) + space(1) + 평(2) + 화(2) = 9
|
|
3624
|
+
try testing.expectEqual(@as(u32, 9), width);
|
|
3625
|
+
}
|
|
3626
|
+
|
|
3627
|
+
// ============================================================================
|
|
3628
|
+
// TIBETAN SCRIPT WIDTH TESTS
|
|
3629
|
+
// ============================================================================
|
|
3630
|
+
|
|
3631
|
+
test "calculateTextWidth: Tibetan script" {
|
|
3632
|
+
const tibetan = "རྒྱ་མཚོ";
|
|
3633
|
+
const width = utf8.calculateTextWidth(tibetan, 4, false, .unicode);
|
|
3634
|
+
// Tibetan has complex combining characters
|
|
3635
|
+
// Base chars are width 1, subjoined letters width 0
|
|
3636
|
+
try testing.expect(width >= 3 and width <= 6);
|
|
3637
|
+
}
|
|
3638
|
+
|
|
3639
|
+
// ============================================================================
|
|
3640
|
+
// OTHER INDIC SCRIPTS WIDTH TESTS
|
|
3641
|
+
// ============================================================================
|
|
3642
|
+
|
|
3643
|
+
test "calculateTextWidth: Gujarati script" {
|
|
3644
|
+
const gujarati = "ગુજરાતી";
|
|
3645
|
+
const width = utf8.calculateTextWidth(gujarati, 4, false, .unicode);
|
|
3646
|
+
// ગ(1) + ુ(0) + જ(1) + ર(1) + ા(0) + ત(1) + ી(0) = 4
|
|
3647
|
+
try testing.expectEqual(@as(u32, 4), width);
|
|
3648
|
+
}
|
|
3649
|
+
|
|
3650
|
+
test "calculateTextWidth: Tamil script word" {
|
|
3651
|
+
const tamil = "தமிழ்";
|
|
3652
|
+
const width = utf8.calculateTextWidth(tamil, 4, false, .unicode);
|
|
3653
|
+
// த(1) + ம(1) + ி(0) + ழ(1) + ்(0) = 3
|
|
3654
|
+
try testing.expectEqual(@as(u32, 3), width);
|
|
3655
|
+
}
|
|
3656
|
+
|
|
3657
|
+
test "calculateTextWidth: Punjabi script word" {
|
|
3658
|
+
const punjabi = "ਪੰਜਾਬੀ";
|
|
3659
|
+
const width = utf8.calculateTextWidth(punjabi, 4, false, .unicode);
|
|
3660
|
+
// ਪ(1) + ੰ(0) + ਜ(1) + ਾ(0) + ਬ(1) + ੀ(0) = 3 base chars
|
|
3661
|
+
try testing.expectEqual(@as(u32, 3), width);
|
|
3662
|
+
}
|
|
3663
|
+
|
|
3664
|
+
test "calculateTextWidth: Telugu script word" {
|
|
3665
|
+
const telugu = "తెలుగు";
|
|
3666
|
+
const width = utf8.calculateTextWidth(telugu, 4, false, .unicode);
|
|
3667
|
+
// త(1) + ె(0) + ల(1) + ు(0) + గ(1) + ు(0) = 3
|
|
3668
|
+
try testing.expectEqual(@as(u32, 3), width);
|
|
3669
|
+
}
|
|
3670
|
+
|
|
3671
|
+
test "calculateTextWidth: Bengali script word" {
|
|
3672
|
+
const bengali = "বাংলা";
|
|
3673
|
+
const width = utf8.calculateTextWidth(bengali, 4, false, .unicode);
|
|
3674
|
+
// ব(1) + া(0) + ং(0) + ল(1) + া(0) = 2
|
|
3675
|
+
try testing.expectEqual(@as(u32, 2), width);
|
|
3676
|
+
}
|
|
3677
|
+
|
|
3678
|
+
test "calculateTextWidth: Kannada script" {
|
|
3679
|
+
const kannada = "ಕನ್ನಡ";
|
|
3680
|
+
const width = utf8.calculateTextWidth(kannada, 4, false, .unicode);
|
|
3681
|
+
// ಕ(1) + ನ(1) + ್(0) + ನ(1) + ಡ(1) = 4
|
|
3682
|
+
try testing.expectEqual(@as(u32, 4), width);
|
|
3683
|
+
}
|
|
3684
|
+
|
|
3685
|
+
test "calculateTextWidth: Malayalam script" {
|
|
3686
|
+
const malayalam = "മലയാളം";
|
|
3687
|
+
const width = utf8.calculateTextWidth(malayalam, 4, false, .unicode);
|
|
3688
|
+
// Each base letter is width 1, vowel signs width 0
|
|
3689
|
+
try testing.expect(width >= 4 and width <= 5);
|
|
3690
|
+
}
|
|
3691
|
+
|
|
3692
|
+
test "calculateTextWidth: Oriya script" {
|
|
3693
|
+
const oriya = "ଓଡ଼ିଆ";
|
|
3694
|
+
const width = utf8.calculateTextWidth(oriya, 4, false, .unicode);
|
|
3695
|
+
// ଓ(1) + ଡ(1) + ଼(0) + ି(0) + ଆ(1) = 3
|
|
3696
|
+
try testing.expectEqual(@as(u32, 3), width);
|
|
3697
|
+
}
|
|
3698
|
+
|
|
3699
|
+
// ============================================================================
|
|
3700
|
+
// THAI AND LAO SCRIPT WIDTH TESTS
|
|
3701
|
+
// ============================================================================
|
|
3702
|
+
|
|
3703
|
+
test "calculateTextWidth: Thai script" {
|
|
3704
|
+
const thai = "ภาษา";
|
|
3705
|
+
const width = utf8.calculateTextWidth(thai, 4, false, .unicode);
|
|
3706
|
+
// Thai base chars width 1, combining vowels/tones width 0
|
|
3707
|
+
try testing.expect(width >= 3 and width <= 4);
|
|
3708
|
+
}
|
|
3709
|
+
|
|
3710
|
+
test "calculateTextWidth: Thai numerals" {
|
|
3711
|
+
const thai_num = "๑๐๐";
|
|
3712
|
+
const width = utf8.calculateTextWidth(thai_num, 4, false, .unicode);
|
|
3713
|
+
try testing.expectEqual(@as(u32, 3), width); // 3 digits * 1 = 3
|
|
3714
|
+
}
|
|
3715
|
+
|
|
3716
|
+
test "calculateTextWidth: Lao script" {
|
|
3717
|
+
const lao = "ໂຫຍ່າກເຈົ້າ";
|
|
3718
|
+
const width = utf8.calculateTextWidth(lao, 4, false, .unicode);
|
|
3719
|
+
// Lao has complex vowels and tone marks (width 0)
|
|
3720
|
+
try testing.expect(width >= 5 and width <= 10);
|
|
3721
|
+
}
|
|
3722
|
+
|
|
3723
|
+
// ============================================================================
|
|
3724
|
+
// ARABIC AND OTHER SCRIPTS WIDTH TESTS
|
|
3725
|
+
// ============================================================================
|
|
3726
|
+
|
|
3727
|
+
test "calculateTextWidth: Arabic character" {
|
|
3728
|
+
const arabic = "ا";
|
|
3729
|
+
const width = utf8.calculateTextWidth(arabic, 4, false, .unicode);
|
|
3730
|
+
try testing.expectEqual(@as(u32, 1), width);
|
|
3731
|
+
}
|
|
3732
|
+
|
|
3733
|
+
test "calculateTextWidth: Sinhala script" {
|
|
3734
|
+
const sinhala = "ආහාර";
|
|
3735
|
+
const width = utf8.calculateTextWidth(sinhala, 4, false, .unicode);
|
|
3736
|
+
// Sinhala chars width 1, vowel signs width 0
|
|
3737
|
+
try testing.expect(width >= 3 and width <= 4);
|
|
3738
|
+
}
|
|
3739
|
+
|
|
3740
|
+
test "calculateTextWidth: Chinese text" {
|
|
3741
|
+
const chinese = "中文";
|
|
3742
|
+
const width = utf8.calculateTextWidth(chinese, 4, false, .unicode);
|
|
3743
|
+
try testing.expectEqual(@as(u32, 4), width); // 2 chars * 2 = 4
|
|
3744
|
+
}
|
|
3745
|
+
|
|
3746
|
+
test "calculateTextWidth: Hangul Jamo" {
|
|
3747
|
+
const jamo = "ㄱ";
|
|
3748
|
+
const width = utf8.calculateTextWidth(jamo, 4, false, .unicode);
|
|
3749
|
+
try testing.expectEqual(@as(u32, 2), width); // Hangul Jamo is width 2
|
|
3750
|
+
}
|
|
3751
|
+
|
|
3752
|
+
// ============================================================================
|
|
3753
|
+
// MIXED SCRIPT COMPREHENSIVE TESTS
|
|
3754
|
+
// ============================================================================
|
|
3755
|
+
|
|
3756
|
+
test "calculateTextWidth: realistic multilingual sentence" {
|
|
3757
|
+
const multilingual = "Hello 世界! नमस्ते 🙏";
|
|
3758
|
+
const width = utf8.calculateTextWidth(multilingual, 4, false, .unicode);
|
|
3759
|
+
// "Hello "(6) + 世界(4) + "! "(2) + नमस्ते(4) + " "(1) + 🙏(2) = 19
|
|
3760
|
+
try testing.expectEqual(@as(u32, 19), width);
|
|
3761
|
+
}
|
|
3762
|
+
|
|
3763
|
+
test "calculateTextWidth: all ending words from text" {
|
|
3764
|
+
const endings = "समाप्त끝จบముగింపుಅಂತ್ಯઅંત";
|
|
3765
|
+
const width = utf8.calculateTextWidth(endings, 4, false, .unicode);
|
|
3766
|
+
// TODO: Expect absolutely
|
|
3767
|
+
try testing.expect(width > 10);
|
|
3768
|
+
}
|
|
3769
|
+
|
|
3770
|
+
test "calculateTextWidth: complex text with emojis and multiple scripts" {
|
|
3771
|
+
const complex = "The 🌟 journey: संस्कृति meets 漢字 🎋";
|
|
3772
|
+
const width = utf8.calculateTextWidth(complex, 4, false, .unicode);
|
|
3773
|
+
// TODO: Expect absolutely
|
|
3774
|
+
try testing.expect(width >= 30 and width <= 50);
|
|
3775
|
+
}
|
|
3776
|
+
|
|
3777
|
+
test "calculateTextWidth: validate against unicode-width-map.zon" {
|
|
3778
|
+
const zon_content = @embedFile("unicode-width-map.zon");
|
|
3779
|
+
|
|
3780
|
+
// Use arena allocator to avoid memory leaks from ZON parser string allocations
|
|
3781
|
+
var arena = std.heap.ArenaAllocator.init(testing.allocator);
|
|
3782
|
+
defer arena.deinit();
|
|
3783
|
+
const allocator = arena.allocator();
|
|
3784
|
+
|
|
3785
|
+
const zon_with_null = try allocator.dupeZ(u8, zon_content);
|
|
3786
|
+
|
|
3787
|
+
const WidthEntry = struct {
|
|
3788
|
+
codepoint: []const u8,
|
|
3789
|
+
width: i32,
|
|
3790
|
+
};
|
|
3791
|
+
|
|
3792
|
+
const width_entries = std.zon.parse.fromSlice(
|
|
3793
|
+
[]const WidthEntry,
|
|
3794
|
+
allocator,
|
|
3795
|
+
zon_with_null,
|
|
3796
|
+
null,
|
|
3797
|
+
.{},
|
|
3798
|
+
) catch |err| {
|
|
3799
|
+
return err;
|
|
3800
|
+
};
|
|
3801
|
+
|
|
3802
|
+
var successes: usize = 0;
|
|
3803
|
+
var failures: usize = 0;
|
|
3804
|
+
|
|
3805
|
+
for (width_entries) |entry| {
|
|
3806
|
+
const codepoint_str = entry.codepoint;
|
|
3807
|
+
const expected_width = entry.width;
|
|
3808
|
+
|
|
3809
|
+
// Parse "U+XXXX" from codepoint string
|
|
3810
|
+
if (codepoint_str.len < 3 or !std.mem.startsWith(u8, codepoint_str, "U+")) {
|
|
3811
|
+
continue;
|
|
3812
|
+
}
|
|
3813
|
+
const hex_str = codepoint_str[2..];
|
|
3814
|
+
const code_point = std.fmt.parseInt(u21, hex_str, 16) catch continue;
|
|
3815
|
+
|
|
3816
|
+
var buf: [4]u8 = undefined;
|
|
3817
|
+
const len = std.unicode.utf8Encode(code_point, &buf) catch continue;
|
|
3818
|
+
const str = buf[0..len];
|
|
3819
|
+
|
|
3820
|
+
const actual_width = utf8.calculateTextWidth(str, 4, false, .unicode);
|
|
3821
|
+
|
|
3822
|
+
if (actual_width == expected_width) {
|
|
3823
|
+
successes += 1;
|
|
3824
|
+
} else {
|
|
3825
|
+
failures += 1;
|
|
3826
|
+
}
|
|
3827
|
+
}
|
|
3828
|
+
|
|
3829
|
+
try testing.expectEqual(@as(usize, 0), failures);
|
|
3830
|
+
}
|
|
3831
|
+
|
|
3832
|
+
test "findGraphemeInfo: comprehensive multilingual text" {
|
|
3833
|
+
const text =
|
|
3834
|
+
\\# The Celestial Journey of संस्कृति 🌟🔮✨
|
|
3835
|
+
\\In the beginning, there was नमस्ते 🙏 and the ancient wisdom of the ॐ symbol echoing through dimensions. The travelers 🧑🚀👨🚀👩🚀 embarked on their quest through the cosmos, guided by the mysterious རྒྱ་མཚོ and the luminous 🌈🦄🧚♀️ beings of light. They encountered the great देवनागरी scribes who wrote in flowing अक्षर characters, documenting everything in their sacred texts 📜📖✍️.
|
|
3836
|
+
\\## Chapter प्रथम: The Eastern Gardens 🏯🎋🌸
|
|
3837
|
+
\\The journey led them to the mystical lands where 漢字 (kanji) danced with ひらがな and カタカナ across ancient scrolls 📯🎴🎎. In the gardens of Seoul, they found 한글 inscriptions speaking of 사랑 (love) and 평화 (peace) 💝🕊️☮️. The monks meditated under the bodhi tree 🧘♂️🌳, contemplating the nature of धर्म while drinking matcha 🍵 and eating 餃子 dumplings 🥟.
|
|
3838
|
+
\\Strange creatures emerged from the mist: 🦥🦦🦧🦨🦩🦚🦜🦝🦞🦟. They spoke in riddles about the प्राचीन (ancient) ways and the नवीन (new) paths forward. "भविष्य में क्या है?" they asked, while the ໂຫຍ່າກເຈົ້າ whispered secrets in Lao script 🤫🗣️💬.
|
|
3839
|
+
\\## The संगम (Confluence) of Scripts 🌊📝🎭
|
|
3840
|
+
\\At the great confluence, they witnessed the merger of བོད་ཡིག (Tibetan), ગુજરાતી (Gujarati), and தமிழ் (Tamil) scripts flowing together like rivers 🏞️🌊💧. The scholars debated about ਪੰਜਾਬੀ philosophy while juggling 🤹♂️🎪🎨 colorful orbs that represented different తెలుగు concepts.
|
|
3841
|
+
\\The marketplace buzzed with activity 🏪🛒💰: merchants sold বাংলা spices 🌶️🧄🧅, ಕನ್ನಡ silks 🧵👘, and മലയാളം handicrafts 🎨🖼️. Children played with toys shaped like 🦖🦕🐉🐲 while their parents bargained using ancient ଓଡ଼ିଆ numerals and gestures 🤝🤲👐.
|
|
3842
|
+
\\## The Festival of ๑๐๐ Lanterns 🏮🎆🎇
|
|
3843
|
+
\\During the grand festival, they lit exactly ๑๐๐ (100 in Thai numerals) lanterns 🏮🕯️💡 that floated into the night sky like ascending ความหวัง (hopes). The celebration featured dancers 💃🕺🩰 performing classical moves from भरतनाट्यम tradition, their मुद्रा hand gestures telling stories of प्रेम and वीरता.
|
|
3844
|
+
\\Musicians played unusual instruments: the 🎻🎺🎷🎸🪕🪘 ensemble created harmonies that resonated with the वेद chants and མཆོད་རྟེན bells 🔔⛩️. The audience sat mesmerized 😵💫🤯✨, some sipping on bubble tea 🧋 while others enjoyed मिठाई sweets 🍬🍭🧁.
|
|
3845
|
+
\\## The འཕྲུལ་དེབ (Machine) Age Arrives ⚙️🤖🦾
|
|
3846
|
+
\\As modernity crept in, the ancient འཁོར་ལོ (wheel) gave way to 🚗🚕🚙🚌🚎 vehicles and eventually to 🚀🛸🛰️ spacecraft. The યુવાન (youth) learned to code in Python 🐍💻⌨️, but still honored their గురువు (teachers) who taught them the old ways of ज्ञान acquisition 🧠📚🎓.
|
|
3847
|
+
\\The সমাজ (society) transformed: robots 🤖🦾🦿 worked alongside humans 👨💼👩💼👨🔬👩🔬, and AI learned to read སྐད (languages) from across the planet 🌍🌎🌏. Yet somehow, the essence of मानवता remained intact, preserved in the கவிதை (poetry) and the ກາບແກ້ວ stories passed down through generations 👴👵👨👩👧👦.
|
|
3848
|
+
\\## The Final ಅಧ್ಯಾಯ (Chapter) 🌅🌄🌠
|
|
3849
|
+
\\As the sun set over the പർവ്വതങ്ങൾ (mountains) 🏔️⛰️🗻, our travelers realized that every script, every symbol—from ا to ㄱ to অ to अ—represented not just sounds, but entire civilizations' worth of विचार (thoughts) and ಕನಸು (dreams) 💭💤🌌.
|
|
3850
|
+
\\They gathered around the final campfire 🔥🏕️, sharing stories in ภาษา (languages) both ancient and new. Someone brought out a guitar 🎸 and started singing in ગીત form, while others prepared ආහාර (food) 🍛🍲🥘 seasoned with love ❤️💕💖 and memories 📸🎞️📹.
|
|
3851
|
+
\\And so they learned that whether written in দেবনাগরী, 中文, 한글, or ไทย, the human experience transcends boundaries 🌐🤝🌈. The weird emojis 🦩🧿🪬🫀🫁🧠 and complex scripts were all part of the same beautiful བསྟན་པ (teaching): that diversity is our greatest strength 💪✊🙌.
|
|
3852
|
+
\\The end. समाप्त. 끝. จบ. முடிவு. ముగింపు. সমাপ্তি. ഒടുക്കം. ಅಂತ್ಯ. અંત. 🎬🎭🎪✨🌟⭐
|
|
3853
|
+
\\
|
|
3854
|
+
;
|
|
3855
|
+
|
|
3856
|
+
const expected_width = utf8.calculateTextWidth(text, 4, false, .unicode);
|
|
3857
|
+
|
|
3858
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
3859
|
+
defer result.deinit(testing.allocator);
|
|
3860
|
+
|
|
3861
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
3862
|
+
try testing.expect(result.items.len > 0);
|
|
3863
|
+
|
|
3864
|
+
var prev_end_byte: usize = 0;
|
|
3865
|
+
|
|
3866
|
+
for (result.items) |g| {
|
|
3867
|
+
try testing.expect(g.byte_offset >= prev_end_byte);
|
|
3868
|
+
|
|
3869
|
+
const text_before = text[0..g.byte_offset];
|
|
3870
|
+
const expected_col = utf8.calculateTextWidth(text_before, 4, false, .unicode);
|
|
3871
|
+
|
|
3872
|
+
try testing.expectEqual(expected_col, g.col_offset);
|
|
3873
|
+
|
|
3874
|
+
prev_end_byte = g.byte_offset + g.byte_len;
|
|
3875
|
+
}
|
|
3876
|
+
|
|
3877
|
+
const final_computed_width = utf8.calculateTextWidth(text, 4, false, .unicode);
|
|
3878
|
+
try testing.expectEqual(expected_width, final_computed_width);
|
|
3879
|
+
}
|
|
3880
|
+
|
|
3881
|
+
// ============================================================================
|
|
3882
|
+
// THAI DIACRITICS AND COMBINING MARKS TESTS
|
|
3883
|
+
// ============================================================================
|
|
3884
|
+
|
|
3885
|
+
test "Thai: base consonants have width 1" {
|
|
3886
|
+
const consonants = "กขคงจฉชซญฎฏฐดตถทธนบปผฝพฟภมยรลวศษสหอฮ";
|
|
3887
|
+
const width = utf8.calculateTextWidth(consonants, 4, false, .unicode);
|
|
3888
|
+
try testing.expectEqual(@as(u32, 36), width);
|
|
3889
|
+
}
|
|
3890
|
+
|
|
3891
|
+
test "Thai: spacing vowels have width 1" {
|
|
3892
|
+
const spacing_vowels = "าะแโใไ";
|
|
3893
|
+
const width = utf8.calculateTextWidth(spacing_vowels, 4, false, .unicode);
|
|
3894
|
+
try testing.expectEqual(@as(u32, 6), width);
|
|
3895
|
+
}
|
|
3896
|
+
|
|
3897
|
+
test "Thai: combining vowels above have width 0" {
|
|
3898
|
+
const base = "ก";
|
|
3899
|
+
const with_sara_i = "กิ";
|
|
3900
|
+
const with_sara_ii = "กี";
|
|
3901
|
+
const with_sara_ue = "กึ";
|
|
3902
|
+
const with_sara_uee = "กื";
|
|
3903
|
+
const with_mai_han_akat = "กั";
|
|
3904
|
+
|
|
3905
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(base, 4, false, .unicode));
|
|
3906
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_sara_i, 4, false, .unicode));
|
|
3907
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_sara_ii, 4, false, .unicode));
|
|
3908
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_sara_ue, 4, false, .unicode));
|
|
3909
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_sara_uee, 4, false, .unicode));
|
|
3910
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_mai_han_akat, 4, false, .unicode));
|
|
3911
|
+
}
|
|
3912
|
+
|
|
3913
|
+
test "Thai: combining vowels below have width 0" {
|
|
3914
|
+
const with_sara_u = "กุ";
|
|
3915
|
+
const with_sara_uu = "กู";
|
|
3916
|
+
|
|
3917
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_sara_u, 4, false, .unicode));
|
|
3918
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_sara_uu, 4, false, .unicode));
|
|
3919
|
+
}
|
|
3920
|
+
|
|
3921
|
+
test "Thai: tone marks have width 0" {
|
|
3922
|
+
const with_mai_ek = "ก่";
|
|
3923
|
+
const with_mai_tho = "ก้";
|
|
3924
|
+
const with_mai_tri = "ก๊";
|
|
3925
|
+
const with_mai_chattawa = "ก๋";
|
|
3926
|
+
|
|
3927
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_mai_ek, 4, false, .unicode));
|
|
3928
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_mai_tho, 4, false, .unicode));
|
|
3929
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_mai_tri, 4, false, .unicode));
|
|
3930
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_mai_chattawa, 4, false, .unicode));
|
|
3931
|
+
}
|
|
3932
|
+
|
|
3933
|
+
test "Thai: other diacritics have width 0" {
|
|
3934
|
+
const with_maitaikhu = "ก็";
|
|
3935
|
+
const with_thanthakhat = "ก์";
|
|
3936
|
+
const with_nikhahit = "กํ";
|
|
3937
|
+
|
|
3938
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_maitaikhu, 4, false, .unicode));
|
|
3939
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_thanthakhat, 4, false, .unicode));
|
|
3940
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(with_nikhahit, 4, false, .unicode));
|
|
3941
|
+
}
|
|
3942
|
+
|
|
3943
|
+
test "Thai: combined vowel and tone mark" {
|
|
3944
|
+
const text = "กี่";
|
|
3945
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(text, 4, false, .unicode));
|
|
3946
|
+
|
|
3947
|
+
const text2 = "คือ";
|
|
3948
|
+
try testing.expectEqual(@as(u32, 2), utf8.calculateTextWidth(text2, 4, false, .unicode));
|
|
3949
|
+
}
|
|
3950
|
+
|
|
3951
|
+
test "Thai: word 'ภาษาไทย' (Thai language)" {
|
|
3952
|
+
const text = "ภาษาไทย";
|
|
3953
|
+
try testing.expectEqual(@as(u32, 7), utf8.calculateTextWidth(text, 4, false, .unicode));
|
|
3954
|
+
}
|
|
3955
|
+
|
|
3956
|
+
test "Thai: word 'อย่าง' with tone mark" {
|
|
3957
|
+
const text = "อย่าง";
|
|
3958
|
+
try testing.expectEqual(@as(u32, 4), utf8.calculateTextWidth(text, 4, false, .unicode));
|
|
3959
|
+
}
|
|
3960
|
+
|
|
3961
|
+
test "Thai: word 'อธิบาย' with vowel above" {
|
|
3962
|
+
const text = "อธิบาย";
|
|
3963
|
+
try testing.expectEqual(@as(u32, 5), utf8.calculateTextWidth(text, 4, false, .unicode));
|
|
3964
|
+
}
|
|
3965
|
+
|
|
3966
|
+
test "Thai: full sentence with spaces" {
|
|
3967
|
+
const text = "ภาษาไทย คืออะไร อธิบายมาอย่างละเอียด";
|
|
3968
|
+
try testing.expectEqual(@as(u32, 32), utf8.calculateTextWidth(text, 4, false, .unicode));
|
|
3969
|
+
}
|
|
3970
|
+
|
|
3971
|
+
test "Thai: wrap by width respects combining marks" {
|
|
3972
|
+
const text = "คือ";
|
|
3973
|
+
|
|
3974
|
+
const result1 = utf8.findWrapPosByWidth(text, 1, 4, false, .unicode);
|
|
3975
|
+
try testing.expectEqual(@as(u32, 6), result1.byte_offset);
|
|
3976
|
+
try testing.expectEqual(@as(u32, 1), result1.columns_used);
|
|
3977
|
+
|
|
3978
|
+
const result2 = utf8.findWrapPosByWidth(text, 2, 4, false, .unicode);
|
|
3979
|
+
try testing.expectEqual(@as(u32, 9), result2.byte_offset);
|
|
3980
|
+
try testing.expectEqual(@as(u32, 2), result2.columns_used);
|
|
3981
|
+
}
|
|
3982
|
+
|
|
3983
|
+
test "Thai: wrap by width with tone marks" {
|
|
3984
|
+
const text = "ก่อน";
|
|
3985
|
+
|
|
3986
|
+
const result2 = utf8.findWrapPosByWidth(text, 2, 4, false, .unicode);
|
|
3987
|
+
try testing.expectEqual(@as(u32, 2), result2.columns_used);
|
|
3988
|
+
|
|
3989
|
+
const result3 = utf8.findWrapPosByWidth(text, 3, 4, false, .unicode);
|
|
3990
|
+
try testing.expectEqual(@as(u32, 3), result3.columns_used);
|
|
3991
|
+
}
|
|
3992
|
+
|
|
3993
|
+
test "Thai: grapheme info for combining marks" {
|
|
3994
|
+
const text = "กี่";
|
|
3995
|
+
|
|
3996
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
3997
|
+
defer result.deinit(testing.allocator);
|
|
3998
|
+
|
|
3999
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
4000
|
+
|
|
4001
|
+
try testing.expectEqual(@as(usize, 1), result.items.len);
|
|
4002
|
+
try testing.expectEqual(@as(u8, 1), result.items[0].width);
|
|
4003
|
+
}
|
|
4004
|
+
|
|
4005
|
+
test "Thai: grapheme info for word with combining marks" {
|
|
4006
|
+
const text = "คือ";
|
|
4007
|
+
|
|
4008
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
4009
|
+
defer result.deinit(testing.allocator);
|
|
4010
|
+
|
|
4011
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
4012
|
+
|
|
4013
|
+
try testing.expectEqual(@as(usize, 2), result.items.len);
|
|
4014
|
+
try testing.expectEqual(@as(u8, 1), result.items[0].width);
|
|
4015
|
+
try testing.expectEqual(@as(u8, 1), result.items[1].width);
|
|
4016
|
+
}
|
|
4017
|
+
|
|
4018
|
+
test "Thai: mixed Thai and ASCII" {
|
|
4019
|
+
const text = "Hello ภาษาไทย World";
|
|
4020
|
+
try testing.expectEqual(@as(u32, 19), utf8.calculateTextWidth(text, 4, false, .unicode));
|
|
4021
|
+
}
|
|
4022
|
+
|
|
4023
|
+
test "Thai: mixed Thai and emoji" {
|
|
4024
|
+
const text = "ภาษา 🇹🇭 ไทย";
|
|
4025
|
+
try testing.expectEqual(@as(u32, 11), utf8.calculateTextWidth(text, 4, false, .unicode));
|
|
4026
|
+
}
|
|
4027
|
+
|
|
4028
|
+
test "Thai: คำว่า width should be 3" {
|
|
4029
|
+
const text = "คำว่า";
|
|
4030
|
+
try testing.expectEqual(@as(u32, 3), utf8.calculateTextWidth(text, 4, false, .unicode));
|
|
4031
|
+
}
|
|
4032
|
+
|
|
4033
|
+
test "Thai: ว่ width should be 1" {
|
|
4034
|
+
const text = "ว่";
|
|
4035
|
+
try testing.expectEqual(@as(u32, 1), utf8.calculateTextWidth(text, 4, false, .unicode));
|
|
4036
|
+
}
|
|
4037
|
+
|
|
4038
|
+
test "Thai: ว่ wcwidth vs unicode mode comparison" {
|
|
4039
|
+
const text = "ว่";
|
|
4040
|
+
const wcwidth_result = utf8.calculateTextWidth(text, 4, false, .wcwidth);
|
|
4041
|
+
const unicode_result = utf8.calculateTextWidth(text, 4, false, .unicode);
|
|
4042
|
+
|
|
4043
|
+
try testing.expectEqual(@as(u32, 1), wcwidth_result);
|
|
4044
|
+
try testing.expectEqual(@as(u32, 1), unicode_result);
|
|
4045
|
+
}
|
|
4046
|
+
|
|
4047
|
+
test "Thai: ว่ is a single grapheme cluster" {
|
|
4048
|
+
const text = "ว่";
|
|
4049
|
+
|
|
4050
|
+
var result: std.ArrayListUnmanaged(utf8.GraphemeInfo) = .{};
|
|
4051
|
+
defer result.deinit(testing.allocator);
|
|
4052
|
+
|
|
4053
|
+
try utf8.findGraphemeInfo(text, 4, false, .unicode, testing.allocator, &result);
|
|
4054
|
+
|
|
4055
|
+
try testing.expectEqual(@as(usize, 1), result.items.len);
|
|
4056
|
+
try testing.expectEqual(@as(u8, 1), result.items[0].width);
|
|
4057
|
+
}
|