@fairyhunter13/opentui-core 0.1.112 → 0.1.114
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dev/keypress-debug-renderer.ts +148 -0
- package/dev/keypress-debug.ts +43 -0
- package/dev/print-env-vars.ts +32 -0
- package/dev/test-tmux-graphics-334.sh +68 -0
- package/dev/thai-debug-test.ts +68 -0
- package/docs/development.md +144 -0
- package/package.json +63 -51
- package/scripts/build.ts +400 -0
- package/scripts/publish.ts +60 -0
- package/src/3d/SpriteResourceManager.ts +286 -0
- package/src/3d/SpriteUtils.ts +70 -0
- package/src/3d/TextureUtils.ts +196 -0
- package/src/3d/ThreeRenderable.ts +197 -0
- package/src/3d/WGPURenderer.ts +294 -0
- package/src/3d/animation/ExplodingSpriteEffect.ts +513 -0
- package/src/3d/animation/PhysicsExplodingSpriteEffect.ts +429 -0
- package/src/3d/animation/SpriteAnimator.ts +633 -0
- package/src/3d/animation/SpriteParticleGenerator.ts +435 -0
- package/src/3d/canvas.ts +464 -0
- package/src/3d/index.ts +12 -0
- package/src/3d/physics/PlanckPhysicsAdapter.ts +72 -0
- package/src/3d/physics/RapierPhysicsAdapter.ts +66 -0
- package/src/3d/physics/physics-interface.ts +31 -0
- package/src/3d/shaders/supersampling.wgsl +201 -0
- package/src/3d.ts +3 -0
- package/src/NativeSpanFeed.ts +300 -0
- package/src/Renderable.ts +1704 -0
- package/src/__snapshots__/buffer.test.ts.snap +28 -0
- package/src/animation/Timeline.test.ts +2709 -0
- package/src/animation/Timeline.ts +598 -0
- package/src/ansi.ts +18 -0
- package/src/benchmark/attenuation-benchmark.ts +81 -0
- package/src/benchmark/colormatrix-benchmark.ts +128 -0
- package/src/benchmark/gain-benchmark.ts +80 -0
- package/src/benchmark/latest-all-bench-run.json +707 -0
- package/src/benchmark/latest-async-bench-run.json +336 -0
- package/src/benchmark/latest-default-bench-run.json +657 -0
- package/src/benchmark/latest-large-bench-run.json +707 -0
- package/src/benchmark/latest-quick-bench-run.json +207 -0
- package/src/benchmark/markdown-benchmark.ts +1796 -0
- package/src/benchmark/native-span-feed-async-benchmark.ts +355 -0
- package/src/benchmark/native-span-feed-benchmark.md +56 -0
- package/src/benchmark/native-span-feed-benchmark.ts +596 -0
- package/src/benchmark/native-span-feed-compare.ts +280 -0
- package/src/benchmark/renderer-benchmark.ts +754 -0
- package/src/benchmark/text-table-benchmark.ts +948 -0
- package/src/buffer.test.ts +291 -0
- package/src/buffer.ts +554 -0
- package/src/console.test.ts +612 -0
- package/src/console.ts +1254 -0
- package/src/edit-buffer.test.ts +1769 -0
- package/src/edit-buffer.ts +411 -0
- package/src/editor-view.test.ts +1032 -0
- package/src/editor-view.ts +284 -0
- package/src/examples/ascii-font-selection-demo.ts +245 -0
- package/src/examples/assets/Water_2_M_Normal.jpg +0 -0
- package/src/examples/assets/concrete.png +0 -0
- package/src/examples/assets/crate.png +0 -0
- package/src/examples/assets/crate_emissive.png +0 -0
- package/src/examples/assets/forrest_background.png +0 -0
- package/src/examples/assets/hast-example.json +1018 -0
- package/src/examples/assets/heart.png +0 -0
- package/src/examples/assets/main_char_heavy_attack.png +0 -0
- package/src/examples/assets/main_char_idle.png +0 -0
- package/src/examples/assets/main_char_jump_end.png +0 -0
- package/src/examples/assets/main_char_jump_landing.png +0 -0
- package/src/examples/assets/main_char_jump_start.png +0 -0
- package/src/examples/assets/main_char_run_loop.png +0 -0
- package/src/examples/assets/roughness_map.jpg +0 -0
- package/src/examples/build.ts +115 -0
- package/src/examples/code-demo.ts +924 -0
- package/src/examples/console-demo.ts +358 -0
- package/src/examples/core-plugin-slots-demo.ts +759 -0
- package/src/examples/diff-demo.ts +701 -0
- package/src/examples/draggable-three-demo.ts +259 -0
- package/src/examples/editor-demo.ts +322 -0
- package/src/examples/extmarks-demo.ts +196 -0
- package/src/examples/focus-restore-demo.ts +310 -0
- package/src/examples/fonts.ts +245 -0
- package/src/examples/fractal-shader-demo.ts +268 -0
- package/src/examples/framebuffer-demo.ts +674 -0
- package/src/examples/full-unicode-demo.ts +241 -0
- package/src/examples/golden-star-demo.ts +933 -0
- package/src/examples/grayscale-buffer-demo.ts +249 -0
- package/src/examples/hast-syntax-highlighting-demo.ts +129 -0
- package/src/examples/index.ts +926 -0
- package/src/examples/input-demo.ts +377 -0
- package/src/examples/input-select-layout-demo.ts +425 -0
- package/src/examples/install.sh +143 -0
- package/src/examples/keypress-debug-demo.ts +452 -0
- package/src/examples/lib/HexList.ts +122 -0
- package/src/examples/lib/PaletteGrid.ts +125 -0
- package/src/examples/lib/standalone-keys.ts +25 -0
- package/src/examples/lib/tab-controller.ts +243 -0
- package/src/examples/lights-phong-demo.ts +290 -0
- package/src/examples/link-demo.ts +220 -0
- package/src/examples/live-state-demo.ts +480 -0
- package/src/examples/markdown-demo.ts +725 -0
- package/src/examples/mouse-interaction-demo.ts +428 -0
- package/src/examples/nested-zindex-demo.ts +357 -0
- package/src/examples/opacity-example.ts +235 -0
- package/src/examples/opentui-demo.ts +1057 -0
- package/src/examples/physx-planck-2d-demo.ts +623 -0
- package/src/examples/physx-rapier-2d-demo.ts +655 -0
- package/src/examples/relative-positioning-demo.ts +323 -0
- package/src/examples/scroll-example.ts +214 -0
- package/src/examples/scrollbox-mouse-test.ts +112 -0
- package/src/examples/scrollbox-overlay-hit-test.ts +206 -0
- package/src/examples/select-demo.ts +237 -0
- package/src/examples/shader-cube-demo.ts +1015 -0
- package/src/examples/simple-layout-example.ts +591 -0
- package/src/examples/slider-demo.ts +617 -0
- package/src/examples/split-mode-demo.ts +453 -0
- package/src/examples/sprite-animation-demo.ts +443 -0
- package/src/examples/sprite-particle-generator-demo.ts +486 -0
- package/src/examples/static-sprite-demo.ts +193 -0
- package/src/examples/sticky-scroll-example.ts +308 -0
- package/src/examples/styled-text-demo.ts +282 -0
- package/src/examples/tab-select-demo.ts +219 -0
- package/src/examples/terminal-title.ts +29 -0
- package/src/examples/terminal.ts +305 -0
- package/src/examples/text-node-demo.ts +416 -0
- package/src/examples/text-selection-demo.ts +377 -0
- package/src/examples/text-table-demo.ts +503 -0
- package/src/examples/text-truncation-demo.ts +481 -0
- package/src/examples/text-wrap.ts +757 -0
- package/src/examples/texture-loading-demo.ts +259 -0
- package/src/examples/timeline-example.ts +670 -0
- package/src/examples/transparency-demo.ts +400 -0
- package/src/examples/vnode-composition-demo.ts +404 -0
- package/src/examples/wide-grapheme-overlay-demo.ts +280 -0
- package/src/index.ts +24 -0
- package/src/lib/KeyHandler.integration.test.ts +292 -0
- package/src/lib/KeyHandler.stopPropagation.test.ts +289 -0
- package/src/lib/KeyHandler.test.ts +662 -0
- package/src/lib/KeyHandler.ts +222 -0
- package/src/lib/RGBA.test.ts +984 -0
- package/src/lib/RGBA.ts +204 -0
- package/src/lib/ascii.font.ts +330 -0
- package/src/lib/border.test.ts +83 -0
- package/src/lib/border.ts +170 -0
- package/src/lib/bunfs.test.ts +27 -0
- package/src/lib/bunfs.ts +18 -0
- package/src/lib/clipboard.test.ts +41 -0
- package/src/lib/clipboard.ts +47 -0
- package/src/lib/clock.ts +35 -0
- package/src/lib/data-paths.test.ts +133 -0
- package/src/lib/data-paths.ts +109 -0
- package/src/lib/debounce.ts +106 -0
- package/src/lib/detect-links.test.ts +98 -0
- package/src/lib/detect-links.ts +56 -0
- package/src/lib/env.test.ts +228 -0
- package/src/lib/env.ts +209 -0
- package/src/lib/extmarks-history.ts +51 -0
- package/src/lib/extmarks-multiwidth.test.ts +322 -0
- package/src/lib/extmarks.test.ts +3457 -0
- package/src/lib/extmarks.ts +843 -0
- package/src/lib/fonts/block.json +405 -0
- package/src/lib/fonts/grid.json +265 -0
- package/src/lib/fonts/huge.json +741 -0
- package/src/lib/fonts/pallet.json +314 -0
- package/src/lib/fonts/shade.json +591 -0
- package/src/lib/fonts/slick.json +321 -0
- package/src/lib/fonts/tiny.json +69 -0
- package/src/lib/hast-styled-text.ts +59 -0
- package/src/lib/index.ts +21 -0
- package/src/lib/keymapping.test.ts +317 -0
- package/src/lib/keymapping.ts +115 -0
- package/src/lib/objects-in-viewport.test.ts +787 -0
- package/src/lib/objects-in-viewport.ts +153 -0
- package/src/lib/output.capture.ts +58 -0
- package/src/lib/parse.keypress-kitty.protocol.test.ts +340 -0
- package/src/lib/parse.keypress-kitty.test.ts +663 -0
- package/src/lib/parse.keypress-kitty.ts +439 -0
- package/src/lib/parse.keypress.test.ts +1849 -0
- package/src/lib/parse.keypress.ts +397 -0
- package/src/lib/parse.mouse.test.ts +552 -0
- package/src/lib/parse.mouse.ts +232 -0
- package/src/lib/paste.ts +16 -0
- package/src/lib/queue.ts +65 -0
- package/src/lib/renderable.validations.test.ts +87 -0
- package/src/lib/renderable.validations.ts +83 -0
- package/src/lib/scroll-acceleration.ts +98 -0
- package/src/lib/selection.ts +240 -0
- package/src/lib/singleton.ts +28 -0
- package/src/lib/stdin-parser.test.ts +2290 -0
- package/src/lib/stdin-parser.ts +1810 -0
- package/src/lib/styled-text.ts +178 -0
- package/src/lib/terminal-capability-detection.test.ts +202 -0
- package/src/lib/terminal-capability-detection.ts +79 -0
- package/src/lib/terminal-palette.test.ts +878 -0
- package/src/lib/terminal-palette.ts +383 -0
- package/src/lib/tree-sitter/assets/README.md +118 -0
- package/src/lib/tree-sitter/assets/update.ts +334 -0
- package/src/lib/tree-sitter/assets.d.ts +9 -0
- package/src/lib/tree-sitter/cache.test.ts +273 -0
- package/src/lib/tree-sitter/client.test.ts +1165 -0
- package/src/lib/tree-sitter/client.ts +607 -0
- package/src/lib/tree-sitter/default-parsers.ts +86 -0
- package/src/lib/tree-sitter/download-utils.ts +148 -0
- package/src/lib/tree-sitter/index.ts +28 -0
- package/src/lib/tree-sitter/parser.worker.ts +1042 -0
- package/src/lib/tree-sitter/parsers-config.ts +81 -0
- package/src/lib/tree-sitter/resolve-ft.test.ts +55 -0
- package/src/lib/tree-sitter/resolve-ft.ts +189 -0
- package/src/lib/tree-sitter/types.ts +82 -0
- package/src/lib/tree-sitter-styled-text.test.ts +1253 -0
- package/src/lib/tree-sitter-styled-text.ts +306 -0
- package/src/lib/validate-dir-name.ts +55 -0
- package/src/lib/yoga.options.test.ts +628 -0
- package/src/lib/yoga.options.ts +346 -0
- package/src/plugins/core-slot.ts +579 -0
- package/src/plugins/registry.ts +402 -0
- package/src/plugins/types.ts +46 -0
- package/src/post/effects.ts +930 -0
- package/src/post/filters.ts +489 -0
- package/src/post/matrices.ts +288 -0
- package/src/renderables/ASCIIFont.ts +219 -0
- package/src/renderables/Box.test.ts +205 -0
- package/src/renderables/Box.ts +326 -0
- package/src/renderables/Code.test.ts +2062 -0
- package/src/renderables/Code.ts +357 -0
- package/src/renderables/Diff.regression.test.ts +226 -0
- package/src/renderables/Diff.test.ts +3101 -0
- package/src/renderables/Diff.ts +1211 -0
- package/src/renderables/EditBufferRenderable.test.ts +288 -0
- package/src/renderables/EditBufferRenderable.ts +1166 -0
- package/src/renderables/FrameBuffer.ts +47 -0
- package/src/renderables/Input.test.ts +1228 -0
- package/src/renderables/Input.ts +247 -0
- package/src/renderables/LineNumberRenderable.ts +724 -0
- package/src/renderables/Markdown.ts +1393 -0
- package/src/renderables/ScrollBar.ts +422 -0
- package/src/renderables/ScrollBox.ts +883 -0
- package/src/renderables/Select.test.ts +1033 -0
- package/src/renderables/Select.ts +524 -0
- package/src/renderables/Slider.test.ts +456 -0
- package/src/renderables/Slider.ts +342 -0
- package/src/renderables/TabSelect.test.ts +197 -0
- package/src/renderables/TabSelect.ts +455 -0
- package/src/renderables/Text.selection-buffer.test.ts +123 -0
- package/src/renderables/Text.test.ts +2660 -0
- package/src/renderables/Text.ts +147 -0
- package/src/renderables/TextBufferRenderable.ts +518 -0
- package/src/renderables/TextNode.test.ts +1058 -0
- package/src/renderables/TextNode.ts +325 -0
- package/src/renderables/TextTable.test.ts +1421 -0
- package/src/renderables/TextTable.ts +1344 -0
- package/src/renderables/Textarea.ts +430 -0
- package/src/renderables/TimeToFirstDraw.ts +89 -0
- package/src/renderables/__snapshots__/Code.test.ts.snap +13 -0
- package/src/renderables/__snapshots__/Diff.test.ts.snap +785 -0
- package/src/renderables/__snapshots__/Text.test.ts.snap +421 -0
- package/src/renderables/__snapshots__/TextTable.test.ts.snap +215 -0
- package/src/renderables/__tests__/LineNumberRenderable.scrollbox-simple.test.ts +144 -0
- package/src/renderables/__tests__/LineNumberRenderable.scrollbox.test.ts +816 -0
- package/src/renderables/__tests__/LineNumberRenderable.test.ts +1865 -0
- package/src/renderables/__tests__/LineNumberRenderable.wrapping.test.ts +85 -0
- package/src/renderables/__tests__/Markdown.code-colors.test.ts +242 -0
- package/src/renderables/__tests__/Markdown.test.ts +2518 -0
- package/src/renderables/__tests__/MultiRenderable.selection.test.ts +87 -0
- package/src/renderables/__tests__/Textarea.buffer.test.ts +682 -0
- package/src/renderables/__tests__/Textarea.destroyed-events.test.ts +675 -0
- package/src/renderables/__tests__/Textarea.editing.test.ts +2041 -0
- package/src/renderables/__tests__/Textarea.error-handling.test.ts +35 -0
- package/src/renderables/__tests__/Textarea.events.test.ts +738 -0
- package/src/renderables/__tests__/Textarea.highlights.test.ts +590 -0
- package/src/renderables/__tests__/Textarea.keybinding.test.ts +3149 -0
- package/src/renderables/__tests__/Textarea.paste.test.ts +357 -0
- package/src/renderables/__tests__/Textarea.rendering.test.ts +1866 -0
- package/src/renderables/__tests__/Textarea.scroll.test.ts +733 -0
- package/src/renderables/__tests__/Textarea.selection.test.ts +1590 -0
- package/src/renderables/__tests__/Textarea.stress.test.ts +670 -0
- package/src/renderables/__tests__/Textarea.undo-redo.test.ts +383 -0
- package/src/renderables/__tests__/Textarea.visual-lines.test.ts +310 -0
- package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.code.test.ts.snap +221 -0
- package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.scrollbox-simple.test.ts.snap +89 -0
- package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.scrollbox.test.ts.snap +457 -0
- package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.test.ts.snap +158 -0
- package/src/renderables/__tests__/__snapshots__/Textarea.rendering.test.ts.snap +387 -0
- package/src/renderables/__tests__/markdown-parser.test.ts +217 -0
- package/src/renderables/__tests__/renderable-test-utils.ts +60 -0
- package/src/renderables/composition/README.md +8 -0
- package/src/renderables/composition/VRenderable.ts +32 -0
- package/src/renderables/composition/constructs.ts +127 -0
- package/src/renderables/composition/vnode.ts +289 -0
- package/src/renderables/index.ts +23 -0
- package/src/renderables/markdown-parser.ts +66 -0
- package/src/renderer.ts +2681 -0
- package/src/runtime-plugin-support.ts +39 -0
- package/src/runtime-plugin.ts +615 -0
- package/src/syntax-style.test.ts +841 -0
- package/src/syntax-style.ts +257 -0
- package/src/testing/README.md +210 -0
- package/src/testing/capture-spans.test.ts +194 -0
- package/src/testing/integration.test.ts +276 -0
- package/src/testing/manual-clock.ts +117 -0
- package/src/testing/mock-keys.test.ts +1378 -0
- package/src/testing/mock-keys.ts +457 -0
- package/src/testing/mock-mouse.test.ts +218 -0
- package/src/testing/mock-mouse.ts +247 -0
- package/src/testing/mock-tree-sitter-client.ts +73 -0
- package/src/testing/spy.ts +13 -0
- package/src/testing/test-recorder.test.ts +415 -0
- package/src/testing/test-recorder.ts +145 -0
- package/src/testing/test-renderer.ts +132 -0
- package/src/testing.ts +7 -0
- package/src/tests/__snapshots__/absolute-positioning.snapshot.test.ts.snap +481 -0
- package/src/tests/__snapshots__/renderable.snapshot.test.ts.snap +19 -0
- package/src/tests/__snapshots__/scrollbox.test.ts.snap +29 -0
- package/src/tests/absolute-positioning.snapshot.test.ts +638 -0
- package/src/tests/allocator-stats.test.ts +38 -0
- package/src/tests/destroy-during-render.test.ts +200 -0
- package/src/tests/destroy-on-exit.fixture.ts +36 -0
- package/src/tests/destroy-on-exit.test.ts +41 -0
- package/src/tests/hover-cursor.test.ts +98 -0
- package/src/tests/native-span-feed-async.test.ts +173 -0
- package/src/tests/native-span-feed-close.test.ts +120 -0
- package/src/tests/native-span-feed-coverage.test.ts +227 -0
- package/src/tests/native-span-feed-edge-cases.test.ts +352 -0
- package/src/tests/native-span-feed-use-after-free.test.ts +45 -0
- package/src/tests/opacity.test.ts +123 -0
- package/src/tests/renderable.snapshot.test.ts +524 -0
- package/src/tests/renderable.test.ts +1281 -0
- package/src/tests/renderer.clock.test.ts +158 -0
- package/src/tests/renderer.console-startup.test.ts +185 -0
- package/src/tests/renderer.control.test.ts +425 -0
- package/src/tests/renderer.core-slot-binding.test.ts +952 -0
- package/src/tests/renderer.cursor.test.ts +26 -0
- package/src/tests/renderer.destroy-during-render.test.ts +147 -0
- package/src/tests/renderer.focus-restore.test.ts +257 -0
- package/src/tests/renderer.focus.test.ts +294 -0
- package/src/tests/renderer.idle.test.ts +219 -0
- package/src/tests/renderer.input.test.ts +2237 -0
- package/src/tests/renderer.kitty-flags.test.ts +195 -0
- package/src/tests/renderer.mouse.test.ts +1274 -0
- package/src/tests/renderer.palette.test.ts +629 -0
- package/src/tests/renderer.selection.test.ts +49 -0
- package/src/tests/renderer.slot-registry.test.ts +684 -0
- package/src/tests/renderer.useMouse.test.ts +47 -0
- package/src/tests/runtime-plugin-node-modules-cycle.fixture.ts +76 -0
- package/src/tests/runtime-plugin-node-modules-mjs.fixture.ts +43 -0
- package/src/tests/runtime-plugin-node-modules-no-bare-rewrite.fixture.ts +67 -0
- package/src/tests/runtime-plugin-node-modules-package-type-cache.fixture.ts +72 -0
- package/src/tests/runtime-plugin-node-modules-runtime-specifier.fixture.ts +44 -0
- package/src/tests/runtime-plugin-node-modules-scoped-package-bare-rewrite.fixture.ts +85 -0
- package/src/tests/runtime-plugin-path-alias.fixture.ts +43 -0
- package/src/tests/runtime-plugin-resolve-roots.fixture.ts +65 -0
- package/src/tests/runtime-plugin-support.fixture.ts +11 -0
- package/src/tests/runtime-plugin-support.test.ts +19 -0
- package/src/tests/runtime-plugin-windows-file-url.fixture.ts +30 -0
- package/src/tests/runtime-plugin.fixture.ts +40 -0
- package/src/tests/runtime-plugin.test.ts +354 -0
- package/src/tests/scrollbox-culling-bug.test.ts +114 -0
- package/src/tests/scrollbox-hitgrid-resize.test.ts +136 -0
- package/src/tests/scrollbox-hitgrid.test.ts +909 -0
- package/src/tests/scrollbox.test.ts +1530 -0
- package/src/tests/wrap-resize-perf.test.ts +276 -0
- package/src/tests/yoga-setters.test.ts +921 -0
- package/src/text-buffer-view.test.ts +705 -0
- package/src/text-buffer-view.ts +189 -0
- package/src/text-buffer.test.ts +347 -0
- package/src/text-buffer.ts +250 -0
- package/src/types.ts +161 -0
- package/src/utils.ts +88 -0
- package/src/zig/ansi.zig +268 -0
- package/src/zig/bench/README.md +50 -0
- package/src/zig/bench/buffer-draw-text-buffer_bench.zig +887 -0
- package/src/zig/bench/edit-buffer_bench.zig +476 -0
- package/src/zig/bench/native-span-feed_bench.zig +100 -0
- package/src/zig/bench/rope-markers_bench.zig +713 -0
- package/src/zig/bench/rope_bench.zig +514 -0
- package/src/zig/bench/styled-text_bench.zig +470 -0
- package/src/zig/bench/text-buffer-coords_bench.zig +362 -0
- package/src/zig/bench/text-buffer-view_bench.zig +459 -0
- package/src/zig/bench/text-chunk-graphemes_bench.zig +273 -0
- package/src/zig/bench/utf8_bench.zig +799 -0
- package/src/zig/bench-utils.zig +431 -0
- package/src/zig/bench.zig +217 -0
- package/src/zig/buffer-methods.zig +211 -0
- package/src/zig/buffer.zig +2281 -0
- package/src/zig/build.zig +289 -0
- package/src/zig/build.zig.zon +16 -0
- package/src/zig/edit-buffer.zig +825 -0
- package/src/zig/editor-view.zig +802 -0
- package/src/zig/event-bus.zig +13 -0
- package/src/zig/event-emitter.zig +65 -0
- package/src/zig/file-logger.zig +92 -0
- package/src/zig/grapheme.zig +599 -0
- package/src/zig/lib.zig +1854 -0
- package/src/zig/link.zig +333 -0
- package/src/zig/logger.zig +43 -0
- package/src/zig/mem-registry.zig +125 -0
- package/src/zig/native-span-feed-bench-lib.zig +7 -0
- package/src/zig/native-span-feed.zig +708 -0
- package/src/zig/renderer.zig +1393 -0
- package/src/zig/rope.zig +1220 -0
- package/src/zig/syntax-style.zig +161 -0
- package/src/zig/terminal.zig +987 -0
- package/src/zig/test.zig +72 -0
- package/src/zig/tests/README.md +18 -0
- package/src/zig/tests/buffer-methods_test.zig +1109 -0
- package/src/zig/tests/buffer_test.zig +2557 -0
- package/src/zig/tests/edit-buffer-history_test.zig +271 -0
- package/src/zig/tests/edit-buffer_test.zig +1689 -0
- package/src/zig/tests/editor-view_test.zig +3299 -0
- package/src/zig/tests/event-emitter_test.zig +249 -0
- package/src/zig/tests/grapheme_test.zig +1304 -0
- package/src/zig/tests/link_test.zig +190 -0
- package/src/zig/tests/mem-registry_test.zig +473 -0
- package/src/zig/tests/memory_leak_regression_test.zig +159 -0
- package/src/zig/tests/native-span-feed_test.zig +1264 -0
- package/src/zig/tests/renderer_test.zig +1017 -0
- package/src/zig/tests/rope-nested_test.zig +712 -0
- package/src/zig/tests/rope_fuzz_test.zig +238 -0
- package/src/zig/tests/rope_test.zig +2362 -0
- package/src/zig/tests/segment-merge.test.zig +148 -0
- package/src/zig/tests/syntax-style_test.zig +557 -0
- package/src/zig/tests/terminal_test.zig +754 -0
- package/src/zig/tests/text-buffer-drawing_test.zig +3237 -0
- package/src/zig/tests/text-buffer-highlights_test.zig +666 -0
- package/src/zig/tests/text-buffer-iterators_test.zig +776 -0
- package/src/zig/tests/text-buffer-segment_test.zig +320 -0
- package/src/zig/tests/text-buffer-selection_test.zig +1035 -0
- package/src/zig/tests/text-buffer-selection_viewport_test.zig +358 -0
- package/src/zig/tests/text-buffer-view_test.zig +3649 -0
- package/src/zig/tests/text-buffer_test.zig +2191 -0
- package/src/zig/tests/unicode-width-map.zon +3909 -0
- package/src/zig/tests/utf8_no_zwj_test.zig +260 -0
- package/src/zig/tests/utf8_test.zig +4057 -0
- package/src/zig/tests/utf8_wcwidth_cursor_test.zig +267 -0
- package/src/zig/tests/utf8_wcwidth_test.zig +357 -0
- package/src/zig/tests/word-wrap-editing_test.zig +498 -0
- package/src/zig/tests/wrap-cache-perf_test.zig +113 -0
- package/src/zig/text-buffer-iterators.zig +499 -0
- package/src/zig/text-buffer-segment.zig +404 -0
- package/src/zig/text-buffer-view.zig +1371 -0
- package/src/zig/text-buffer.zig +1180 -0
- package/src/zig/utf8.zig +1948 -0
- package/src/zig/utils.zig +9 -0
- package/src/zig-structs.ts +261 -0
- package/src/zig.ts +3884 -0
- package/tsconfig.build.json +24 -0
- package/tsconfig.json +27 -0
- package/3d/SpriteResourceManager.d.ts +0 -74
- package/3d/SpriteUtils.d.ts +0 -13
- package/3d/TextureUtils.d.ts +0 -24
- package/3d/ThreeRenderable.d.ts +0 -40
- package/3d/WGPURenderer.d.ts +0 -61
- package/3d/animation/ExplodingSpriteEffect.d.ts +0 -71
- package/3d/animation/PhysicsExplodingSpriteEffect.d.ts +0 -76
- package/3d/animation/SpriteAnimator.d.ts +0 -124
- package/3d/animation/SpriteParticleGenerator.d.ts +0 -62
- package/3d/canvas.d.ts +0 -44
- package/3d/index.d.ts +0 -12
- package/3d/physics/PlanckPhysicsAdapter.d.ts +0 -19
- package/3d/physics/RapierPhysicsAdapter.d.ts +0 -19
- package/3d/physics/physics-interface.d.ts +0 -27
- package/3d.d.ts +0 -2
- package/3d.js +0 -34041
- package/3d.js.map +0 -155
- package/LICENSE +0 -21
- package/NativeSpanFeed.d.ts +0 -41
- package/Renderable.d.ts +0 -334
- package/animation/Timeline.d.ts +0 -126
- package/ansi.d.ts +0 -13
- package/buffer.d.ts +0 -111
- package/console.d.ts +0 -144
- package/edit-buffer.d.ts +0 -98
- package/editor-view.d.ts +0 -73
- package/index-8fks7yv1.js +0 -411
- package/index-8fks7yv1.js.map +0 -10
- package/index-egy5e2rs.js +0 -12267
- package/index-egy5e2rs.js.map +0 -42
- package/index-tse8gzh0.js +0 -20614
- package/index-tse8gzh0.js.map +0 -67
- package/index.d.ts +0 -23
- package/index.js +0 -478
- package/index.js.map +0 -9
- package/lib/KeyHandler.d.ts +0 -61
- package/lib/RGBA.d.ts +0 -25
- package/lib/ascii.font.d.ts +0 -508
- package/lib/border.d.ts +0 -51
- package/lib/bunfs.d.ts +0 -7
- package/lib/clipboard.d.ts +0 -17
- package/lib/clock.d.ts +0 -15
- package/lib/data-paths.d.ts +0 -26
- package/lib/debounce.d.ts +0 -42
- package/lib/detect-links.d.ts +0 -6
- package/lib/env.d.ts +0 -42
- package/lib/extmarks-history.d.ts +0 -17
- package/lib/extmarks.d.ts +0 -89
- package/lib/hast-styled-text.d.ts +0 -17
- package/lib/index.d.ts +0 -21
- package/lib/keymapping.d.ts +0 -25
- package/lib/objects-in-viewport.d.ts +0 -24
- package/lib/output.capture.d.ts +0 -24
- package/lib/parse.keypress-kitty.d.ts +0 -2
- package/lib/parse.keypress.d.ts +0 -26
- package/lib/parse.mouse.d.ts +0 -30
- package/lib/paste.d.ts +0 -7
- package/lib/queue.d.ts +0 -15
- package/lib/renderable.validations.d.ts +0 -12
- package/lib/scroll-acceleration.d.ts +0 -43
- package/lib/selection.d.ts +0 -63
- package/lib/singleton.d.ts +0 -7
- package/lib/stdin-parser.d.ts +0 -87
- package/lib/styled-text.d.ts +0 -63
- package/lib/terminal-capability-detection.d.ts +0 -30
- package/lib/terminal-palette.d.ts +0 -50
- package/lib/tree-sitter/assets/update.d.ts +0 -11
- package/lib/tree-sitter/client.d.ts +0 -47
- package/lib/tree-sitter/default-parsers.d.ts +0 -2
- package/lib/tree-sitter/download-utils.d.ts +0 -21
- package/lib/tree-sitter/index.d.ts +0 -8
- package/lib/tree-sitter/parser.worker.d.ts +0 -1
- package/lib/tree-sitter/parsers-config.d.ts +0 -53
- package/lib/tree-sitter/resolve-ft.d.ts +0 -5
- package/lib/tree-sitter/types.d.ts +0 -82
- package/lib/tree-sitter-styled-text.d.ts +0 -14
- package/lib/validate-dir-name.d.ts +0 -1
- package/lib/yoga.options.d.ts +0 -32
- package/parser.worker.js +0 -899
- package/parser.worker.js.map +0 -12
- package/plugins/core-slot.d.ts +0 -72
- package/plugins/registry.d.ts +0 -42
- package/plugins/types.d.ts +0 -34
- package/post/effects.d.ts +0 -147
- package/post/filters.d.ts +0 -65
- package/post/matrices.d.ts +0 -20
- package/renderables/ASCIIFont.d.ts +0 -52
- package/renderables/Box.d.ts +0 -81
- package/renderables/Code.d.ts +0 -78
- package/renderables/Diff.d.ts +0 -142
- package/renderables/EditBufferRenderable.d.ts +0 -237
- package/renderables/FrameBuffer.d.ts +0 -16
- package/renderables/Input.d.ts +0 -67
- package/renderables/LineNumberRenderable.d.ts +0 -78
- package/renderables/Markdown.d.ts +0 -185
- package/renderables/ScrollBar.d.ts +0 -77
- package/renderables/ScrollBox.d.ts +0 -124
- package/renderables/Select.d.ts +0 -115
- package/renderables/Slider.d.ts +0 -47
- package/renderables/TabSelect.d.ts +0 -96
- package/renderables/Text.d.ts +0 -36
- package/renderables/TextBufferRenderable.d.ts +0 -105
- package/renderables/TextNode.d.ts +0 -91
- package/renderables/TextTable.d.ts +0 -140
- package/renderables/Textarea.d.ts +0 -63
- package/renderables/TimeToFirstDraw.d.ts +0 -24
- package/renderables/__tests__/renderable-test-utils.d.ts +0 -12
- package/renderables/composition/VRenderable.d.ts +0 -16
- package/renderables/composition/constructs.d.ts +0 -35
- package/renderables/composition/vnode.d.ts +0 -46
- package/renderables/index.d.ts +0 -23
- package/renderables/markdown-parser.d.ts +0 -10
- package/renderer.d.ts +0 -419
- package/runtime-plugin-support.d.ts +0 -3
- package/runtime-plugin-support.js +0 -29
- package/runtime-plugin-support.js.map +0 -10
- package/runtime-plugin.d.ts +0 -16
- package/runtime-plugin.js +0 -16
- package/runtime-plugin.js.map +0 -9
- package/syntax-style.d.ts +0 -54
- package/testing/manual-clock.d.ts +0 -17
- package/testing/mock-keys.d.ts +0 -81
- package/testing/mock-mouse.d.ts +0 -38
- package/testing/mock-tree-sitter-client.d.ts +0 -23
- package/testing/spy.d.ts +0 -7
- package/testing/test-recorder.d.ts +0 -61
- package/testing/test-renderer.d.ts +0 -23
- package/testing.d.ts +0 -6
- package/testing.js +0 -697
- package/testing.js.map +0 -15
- package/text-buffer-view.d.ts +0 -42
- package/text-buffer.d.ts +0 -67
- package/types.d.ts +0 -139
- package/utils.d.ts +0 -14
- package/zig-structs.d.ts +0 -155
- package/zig.d.ts +0 -353
- /package/{assets → src/lib/tree-sitter/assets}/javascript/highlights.scm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/javascript/tree-sitter-javascript.wasm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/markdown/highlights.scm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/markdown/injections.scm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/markdown/tree-sitter-markdown.wasm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/markdown_inline/highlights.scm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/markdown_inline/tree-sitter-markdown_inline.wasm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/typescript/highlights.scm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/typescript/tree-sitter-typescript.wasm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/zig/highlights.scm +0 -0
- /package/{assets → src/lib/tree-sitter/assets}/zig/tree-sitter-zig.wasm +0 -0
package/src/zig/utf8.zig
ADDED
|
@@ -0,0 +1,1948 @@
|
|
|
1
|
+
const std = @import("std");
|
|
2
|
+
const uucode = @import("uucode");
|
|
3
|
+
|
|
4
|
+
/// The method to use when calculating the width of a grapheme
|
|
5
|
+
pub const WidthMethod = enum {
|
|
6
|
+
wcwidth,
|
|
7
|
+
unicode,
|
|
8
|
+
no_zwj,
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
/// Check if a byte slice contains only printable ASCII (32..126)
|
|
12
|
+
/// Uses SIMD16 for fast checking
|
|
13
|
+
pub fn isAsciiOnly(text: []const u8) bool {
|
|
14
|
+
if (text.len == 0) return false;
|
|
15
|
+
|
|
16
|
+
const vector_len = 16;
|
|
17
|
+
const Vec = @Vector(vector_len, u8);
|
|
18
|
+
|
|
19
|
+
const min_printable: Vec = @splat(32);
|
|
20
|
+
const max_printable: Vec = @splat(126);
|
|
21
|
+
|
|
22
|
+
var pos: usize = 0;
|
|
23
|
+
|
|
24
|
+
// Process full 16-byte vectors
|
|
25
|
+
while (pos + vector_len <= text.len) {
|
|
26
|
+
const chunk: Vec = text[pos..][0..vector_len].*;
|
|
27
|
+
|
|
28
|
+
// Check if all bytes are in [32, 126]
|
|
29
|
+
const too_low = chunk < min_printable;
|
|
30
|
+
const too_high = chunk > max_printable;
|
|
31
|
+
|
|
32
|
+
// Check if any byte is out of range
|
|
33
|
+
if (@reduce(.Or, too_low) or @reduce(.Or, too_high)) {
|
|
34
|
+
return false;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
pos += vector_len;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
// Handle remaining bytes with scalar code
|
|
41
|
+
while (pos < text.len) : (pos += 1) {
|
|
42
|
+
const b = text[pos];
|
|
43
|
+
if (b < 32 or b > 126) {
|
|
44
|
+
return false;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
return true;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
pub const LineBreakKind = enum {
|
|
52
|
+
LF, // \n (Unix/Linux)
|
|
53
|
+
CR, // \r (Old Mac)
|
|
54
|
+
CRLF, // \r\n (Windows)
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
pub const LineBreak = struct {
|
|
58
|
+
pos: usize,
|
|
59
|
+
kind: LineBreakKind,
|
|
60
|
+
};
|
|
61
|
+
|
|
62
|
+
pub const LineBreakResult = struct {
|
|
63
|
+
breaks: std.ArrayListUnmanaged(LineBreak),
|
|
64
|
+
allocator: std.mem.Allocator,
|
|
65
|
+
|
|
66
|
+
pub fn init(allocator: std.mem.Allocator) LineBreakResult {
|
|
67
|
+
return .{
|
|
68
|
+
.breaks = .{},
|
|
69
|
+
.allocator = allocator,
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
pub fn deinit(self: *LineBreakResult) void {
|
|
74
|
+
self.breaks.deinit(self.allocator);
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
pub fn reset(self: *LineBreakResult) void {
|
|
78
|
+
self.breaks.clearRetainingCapacity();
|
|
79
|
+
}
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
pub const TabStopResult = struct {
|
|
83
|
+
positions: std.ArrayListUnmanaged(usize),
|
|
84
|
+
allocator: std.mem.Allocator,
|
|
85
|
+
|
|
86
|
+
pub fn init(allocator: std.mem.Allocator) TabStopResult {
|
|
87
|
+
return .{
|
|
88
|
+
.positions = .{},
|
|
89
|
+
.allocator = allocator,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
pub fn deinit(self: *TabStopResult) void {
|
|
94
|
+
self.positions.deinit(self.allocator);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
pub fn reset(self: *TabStopResult) void {
|
|
98
|
+
self.positions.clearRetainingCapacity();
|
|
99
|
+
}
|
|
100
|
+
};
|
|
101
|
+
|
|
102
|
+
pub const WrapBreak = struct {
|
|
103
|
+
// byte_offset points at the grapheme that creates this break opportunity.
|
|
104
|
+
// For whitespace and punctuation, this is the delimiter grapheme.
|
|
105
|
+
// For CJK<->ASCII transitions, this is the last grapheme in the previous run.
|
|
106
|
+
byte_offset: u32,
|
|
107
|
+
|
|
108
|
+
// char_offset is grapheme-count based, not a display column.
|
|
109
|
+
// Callers convert it to columns with charOffsetToColumn().
|
|
110
|
+
char_offset: u32,
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
pub const WrapBreakResult = struct {
|
|
114
|
+
breaks: std.ArrayListUnmanaged(WrapBreak),
|
|
115
|
+
allocator: std.mem.Allocator,
|
|
116
|
+
|
|
117
|
+
pub fn init(allocator: std.mem.Allocator) WrapBreakResult {
|
|
118
|
+
return .{
|
|
119
|
+
.breaks = .{},
|
|
120
|
+
.allocator = allocator,
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
pub fn deinit(self: *WrapBreakResult) void {
|
|
125
|
+
self.breaks.deinit(self.allocator);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
pub fn reset(self: *WrapBreakResult) void {
|
|
129
|
+
self.breaks.clearRetainingCapacity();
|
|
130
|
+
}
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
// Helper function to check if an ASCII byte is a wrap break point (CR/LF excluded)
|
|
134
|
+
inline fn isAsciiWrapBreak(b: u8) bool {
|
|
135
|
+
return switch (b) {
|
|
136
|
+
' ', '\t' => true, // Whitespace (no CR/LF in inputs)
|
|
137
|
+
'-' => true, // Dash
|
|
138
|
+
'/', '\\' => true, // Slashes
|
|
139
|
+
'.', ',', ';', ':', '!', '?' => true, // Punctuation
|
|
140
|
+
'(', ')', '[', ']', '{', '}' => true, // Brackets
|
|
141
|
+
else => false,
|
|
142
|
+
};
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Decode a UTF-8 codepoint starting at pos. Assumes valid UTF-8 input.
|
|
146
|
+
// Returns (codepoint, length). If the remaining bytes are insufficient, returns length 1.
|
|
147
|
+
pub inline fn decodeUtf8Unchecked(text: []const u8, pos: usize) struct { cp: u21, len: u3 } {
|
|
148
|
+
const b0 = text[pos];
|
|
149
|
+
if (b0 < 0x80) return .{ .cp = @intCast(b0), .len = 1 };
|
|
150
|
+
|
|
151
|
+
if (pos + 1 >= text.len) return .{ .cp = 0xFFFD, .len = 1 };
|
|
152
|
+
const b1 = text[pos + 1];
|
|
153
|
+
|
|
154
|
+
if ((b0 & 0xE0) == 0xC0) {
|
|
155
|
+
const cp2: u21 = @intCast((@as(u32, b0 & 0x1F) << 6) | @as(u32, b1 & 0x3F));
|
|
156
|
+
return .{ .cp = cp2, .len = 2 };
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (pos + 2 >= text.len) return .{ .cp = 0xFFFD, .len = 1 };
|
|
160
|
+
const b2 = text[pos + 2];
|
|
161
|
+
|
|
162
|
+
if ((b0 & 0xF0) == 0xE0) {
|
|
163
|
+
const cp3: u21 = @intCast((@as(u32, b0 & 0x0F) << 12) | (@as(u32, b1 & 0x3F) << 6) | @as(u32, b2 & 0x3F));
|
|
164
|
+
return .{ .cp = cp3, .len = 3 };
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
if (pos + 3 >= text.len) return .{ .cp = 0xFFFD, .len = 1 };
|
|
168
|
+
const b3 = text[pos + 3];
|
|
169
|
+
const cp4: u21 = @intCast((@as(u32, b0 & 0x07) << 18) | (@as(u32, b1 & 0x3F) << 12) | (@as(u32, b2 & 0x3F) << 6) | @as(u32, b3 & 0x3F));
|
|
170
|
+
return .{ .cp = cp4, .len = 4 };
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Unicode wrap-break codepoints
|
|
174
|
+
inline fn isUnicodeWrapBreak(cp: u21) bool {
|
|
175
|
+
return switch (cp) {
|
|
176
|
+
0x00A0, // NBSP
|
|
177
|
+
0x1680, // OGHAM SPACE MARK
|
|
178
|
+
0x2000...0x200A, // En quad..Hair space
|
|
179
|
+
0x202F, // NARROW NO-BREAK SPACE
|
|
180
|
+
0x205F, // MEDIUM MATHEMATICAL SPACE
|
|
181
|
+
0x3000, // IDEOGRAPHIC SPACE
|
|
182
|
+
0x200B, // ZERO WIDTH SPACE
|
|
183
|
+
0x00AD, // SOFT HYPHEN
|
|
184
|
+
0x2010, // HYPHEN
|
|
185
|
+
0x3001, // IDEOGRAPHIC COMMA
|
|
186
|
+
0x3002, // IDEOGRAPHIC FULL STOP
|
|
187
|
+
0xFF01, // FULLWIDTH EXCLAMATION MARK
|
|
188
|
+
0xFF1F, // FULLWIDTH QUESTION MARK
|
|
189
|
+
=> true,
|
|
190
|
+
else => false,
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
// WordClass keeps word-boundary behavior predictable in mixed-script text.
|
|
195
|
+
// We split between ASCII word runs and CJK word runs, and we keep each
|
|
196
|
+
// CJK run grouped as one unit.
|
|
197
|
+
const WordClass = enum {
|
|
198
|
+
ascii_word,
|
|
199
|
+
cjk_word,
|
|
200
|
+
other,
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
inline fn isAsciiWordByte(b: u8) bool {
|
|
204
|
+
return (b >= 'a' and b <= 'z') or
|
|
205
|
+
(b >= 'A' and b <= 'Z') or
|
|
206
|
+
(b >= '0' and b <= '9') or
|
|
207
|
+
b == '_';
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
inline fn isCjkWordCodepoint(cp: u21) bool {
|
|
211
|
+
return
|
|
212
|
+
// Han ideographs
|
|
213
|
+
(cp >= 0x3400 and cp <= 0x4DBF) or
|
|
214
|
+
(cp >= 0x4E00 and cp <= 0x9FFF) or
|
|
215
|
+
(cp >= 0xF900 and cp <= 0xFAFF) or
|
|
216
|
+
(cp >= 0x20000 and cp <= 0x2A6DF) or
|
|
217
|
+
(cp >= 0x2A700 and cp <= 0x2B73F) or
|
|
218
|
+
(cp >= 0x2B740 and cp <= 0x2B81F) or
|
|
219
|
+
(cp >= 0x2B820 and cp <= 0x2CEAF) or
|
|
220
|
+
(cp >= 0x2CEB0 and cp <= 0x2EBEF) or
|
|
221
|
+
(cp >= 0x2EBF0 and cp <= 0x2EE5D) or
|
|
222
|
+
(cp >= 0x2F800 and cp <= 0x2FA1F) or
|
|
223
|
+
// Hiragana + Katakana
|
|
224
|
+
(cp >= 0x3040 and cp <= 0x309F) or
|
|
225
|
+
(cp >= 0x30A0 and cp <= 0x30FF) or
|
|
226
|
+
(cp >= 0x31F0 and cp <= 0x31FF) or
|
|
227
|
+
(cp >= 0xFF66 and cp <= 0xFF9D) or
|
|
228
|
+
// Hangul
|
|
229
|
+
(cp >= 0x1100 and cp <= 0x11FF) or
|
|
230
|
+
(cp >= 0x3130 and cp <= 0x318F) or
|
|
231
|
+
(cp >= 0xA960 and cp <= 0xA97F) or
|
|
232
|
+
(cp >= 0xAC00 and cp <= 0xD7AF) or
|
|
233
|
+
(cp >= 0xD7B0 and cp <= 0xD7FF);
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
inline fn classifyWordClass(cp: u21) WordClass {
|
|
237
|
+
if (cp <= 0x7F) {
|
|
238
|
+
return if (isAsciiWordByte(@intCast(cp))) .ascii_word else .other;
|
|
239
|
+
}
|
|
240
|
+
if (isCjkWordCodepoint(cp)) return .cjk_word;
|
|
241
|
+
return .other;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
pub inline fn isWordCodepoint(cp: u21) bool {
|
|
245
|
+
return classifyWordClass(cp) != .other;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
inline fn isCjkAsciiTransition(prev_class: WordClass, curr_class: WordClass) bool {
|
|
249
|
+
return (prev_class == .cjk_word and curr_class == .ascii_word) or
|
|
250
|
+
(prev_class == .ascii_word and curr_class == .cjk_word);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
// Nothing needed here - using uucode.grapheme.isBreak directly
|
|
254
|
+
|
|
255
|
+
pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method: WidthMethod) !void {
|
|
256
|
+
// This function clears previous results and writes fresh break points.
|
|
257
|
+
// Callers should treat `result.breaks` as replaced after the call.
|
|
258
|
+
_ = width_method; // Currently unused, but kept for API consistency
|
|
259
|
+
result.reset();
|
|
260
|
+
const vector_len = 16;
|
|
261
|
+
|
|
262
|
+
var pos: usize = 0;
|
|
263
|
+
var char_offset: u32 = 0;
|
|
264
|
+
var prev_cp: ?u21 = null; // Track previous codepoint for grapheme detection
|
|
265
|
+
var break_state: uucode.grapheme.BreakState = .default;
|
|
266
|
+
// We keep track of the current grapheme so we can add a break at
|
|
267
|
+
// CJK<->ASCII transitions. The break is emitted at the previous grapheme,
|
|
268
|
+
// so callers that add grapheme width land exactly at the run boundary.
|
|
269
|
+
var have_current_grapheme = false;
|
|
270
|
+
var current_grapheme_byte_offset: u32 = 0;
|
|
271
|
+
var current_grapheme_char_offset: u32 = 0;
|
|
272
|
+
var current_grapheme_class: WordClass = .other;
|
|
273
|
+
|
|
274
|
+
while (pos + vector_len <= text.len) {
|
|
275
|
+
const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
|
|
276
|
+
const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
|
|
277
|
+
const is_non_ascii = chunk >= ascii_threshold;
|
|
278
|
+
|
|
279
|
+
// Fast path: all ASCII
|
|
280
|
+
if (!@reduce(.Or, is_non_ascii)) {
|
|
281
|
+
const first_class = classifyWordClass(text[pos]);
|
|
282
|
+
if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, first_class)) {
|
|
283
|
+
try result.breaks.append(result.allocator, .{
|
|
284
|
+
.byte_offset = current_grapheme_byte_offset,
|
|
285
|
+
.char_offset = current_grapheme_char_offset,
|
|
286
|
+
});
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
// Use SIMD to find break characters
|
|
290
|
+
var match_mask: @Vector(vector_len, bool) = @splat(false);
|
|
291
|
+
|
|
292
|
+
// Check whitespace
|
|
293
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(' ')));
|
|
294
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('\t')));
|
|
295
|
+
|
|
296
|
+
// Check dashes and slashes
|
|
297
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('-')));
|
|
298
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('/')));
|
|
299
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('\\')));
|
|
300
|
+
|
|
301
|
+
// Check punctuation
|
|
302
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('.')));
|
|
303
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(',')));
|
|
304
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(';')));
|
|
305
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(':')));
|
|
306
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('!')));
|
|
307
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('?')));
|
|
308
|
+
|
|
309
|
+
// Check brackets
|
|
310
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('(')));
|
|
311
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(')')));
|
|
312
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('[')));
|
|
313
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(']')));
|
|
314
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('{')));
|
|
315
|
+
match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('}')));
|
|
316
|
+
|
|
317
|
+
// Convert boolean mask to integer bitmask for faster iteration
|
|
318
|
+
var bitmask: u16 = 0;
|
|
319
|
+
inline for (0..vector_len) |i| {
|
|
320
|
+
if (match_mask[i]) {
|
|
321
|
+
bitmask |= @as(u16, 1) << @intCast(i);
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
// Use bit manipulation to extract positions
|
|
326
|
+
while (bitmask != 0) {
|
|
327
|
+
const bit_pos = @ctz(bitmask);
|
|
328
|
+
try result.breaks.append(result.allocator, .{
|
|
329
|
+
.byte_offset = @intCast(pos + bit_pos),
|
|
330
|
+
.char_offset = char_offset + @as(u32, @intCast(bit_pos)),
|
|
331
|
+
});
|
|
332
|
+
bitmask &= bitmask - 1;
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
pos += vector_len;
|
|
336
|
+
const block_start_char_offset = char_offset;
|
|
337
|
+
char_offset += vector_len;
|
|
338
|
+
prev_cp = text[pos - 1]; // Last ASCII char
|
|
339
|
+
break_state = .default;
|
|
340
|
+
have_current_grapheme = true;
|
|
341
|
+
current_grapheme_byte_offset = @intCast(pos - 1);
|
|
342
|
+
current_grapheme_char_offset = block_start_char_offset + (vector_len - 1);
|
|
343
|
+
current_grapheme_class = classifyWordClass(text[pos - 1]);
|
|
344
|
+
continue;
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Slow path: mixed ASCII/non-ASCII - need grapheme-aware counting
|
|
348
|
+
var i: usize = 0;
|
|
349
|
+
while (i < vector_len) {
|
|
350
|
+
const b0 = text[pos + i];
|
|
351
|
+
if (b0 < 0x80) {
|
|
352
|
+
const curr_cp: u21 = b0;
|
|
353
|
+
|
|
354
|
+
// Check if this starts a new grapheme cluster
|
|
355
|
+
// Skip invalid/replacement codepoints or codepoints that might be outside the grapheme table range
|
|
356
|
+
const is_break = if (curr_cp == 0xFFFD or curr_cp > 0x10FFFF) true else if (prev_cp) |p| blk: {
|
|
357
|
+
if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
|
|
358
|
+
break :blk uucode.grapheme.isBreak(p, curr_cp, &break_state);
|
|
359
|
+
} else true;
|
|
360
|
+
|
|
361
|
+
if (is_break) {
|
|
362
|
+
const curr_class = classifyWordClass(curr_cp);
|
|
363
|
+
if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
|
|
364
|
+
try result.breaks.append(result.allocator, .{
|
|
365
|
+
.byte_offset = current_grapheme_byte_offset,
|
|
366
|
+
.char_offset = current_grapheme_char_offset,
|
|
367
|
+
});
|
|
368
|
+
}
|
|
369
|
+
have_current_grapheme = true;
|
|
370
|
+
current_grapheme_byte_offset = @intCast(pos + i);
|
|
371
|
+
current_grapheme_char_offset = char_offset;
|
|
372
|
+
current_grapheme_class = curr_class;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
if (isAsciiWrapBreak(b0)) {
|
|
376
|
+
try result.breaks.append(result.allocator, .{
|
|
377
|
+
.byte_offset = @intCast(pos + i),
|
|
378
|
+
.char_offset = char_offset,
|
|
379
|
+
});
|
|
380
|
+
}
|
|
381
|
+
i += 1;
|
|
382
|
+
if (is_break) {
|
|
383
|
+
char_offset += 1;
|
|
384
|
+
}
|
|
385
|
+
prev_cp = curr_cp;
|
|
386
|
+
} else {
|
|
387
|
+
const dec = decodeUtf8Unchecked(text, pos + i);
|
|
388
|
+
if (pos + i + dec.len > text.len) break;
|
|
389
|
+
if (pos + i + dec.len > pos + vector_len) break;
|
|
390
|
+
|
|
391
|
+
// Check if this starts a new grapheme cluster
|
|
392
|
+
// Skip invalid/replacement codepoints or codepoints that might be outside the grapheme table range
|
|
393
|
+
const is_break = if (dec.cp == 0xFFFD or dec.cp > 0x10FFFF) true else if (prev_cp) |p| blk: {
|
|
394
|
+
if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
|
|
395
|
+
break :blk uucode.grapheme.isBreak(p, dec.cp, &break_state);
|
|
396
|
+
} else true;
|
|
397
|
+
|
|
398
|
+
if (is_break) {
|
|
399
|
+
const curr_class = classifyWordClass(dec.cp);
|
|
400
|
+
if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
|
|
401
|
+
try result.breaks.append(result.allocator, .{
|
|
402
|
+
.byte_offset = current_grapheme_byte_offset,
|
|
403
|
+
.char_offset = current_grapheme_char_offset,
|
|
404
|
+
});
|
|
405
|
+
}
|
|
406
|
+
have_current_grapheme = true;
|
|
407
|
+
current_grapheme_byte_offset = @intCast(pos + i);
|
|
408
|
+
current_grapheme_char_offset = char_offset;
|
|
409
|
+
current_grapheme_class = curr_class;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
if (isUnicodeWrapBreak(dec.cp)) {
|
|
413
|
+
try result.breaks.append(result.allocator, .{
|
|
414
|
+
.byte_offset = @intCast(pos + i),
|
|
415
|
+
.char_offset = char_offset,
|
|
416
|
+
});
|
|
417
|
+
}
|
|
418
|
+
i += dec.len;
|
|
419
|
+
if (is_break) {
|
|
420
|
+
char_offset += 1;
|
|
421
|
+
}
|
|
422
|
+
prev_cp = dec.cp;
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
pos += i;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
// Tail
|
|
429
|
+
var i: usize = pos;
|
|
430
|
+
while (i < text.len) {
|
|
431
|
+
const b0 = text[i];
|
|
432
|
+
if (b0 < 0x80) {
|
|
433
|
+
const curr_cp: u21 = b0;
|
|
434
|
+
const is_break = if (prev_cp) |p| blk: {
|
|
435
|
+
if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
|
|
436
|
+
break :blk uucode.grapheme.isBreak(p, curr_cp, &break_state);
|
|
437
|
+
} else true;
|
|
438
|
+
|
|
439
|
+
if (is_break) {
|
|
440
|
+
const curr_class = classifyWordClass(curr_cp);
|
|
441
|
+
if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
|
|
442
|
+
try result.breaks.append(result.allocator, .{
|
|
443
|
+
.byte_offset = current_grapheme_byte_offset,
|
|
444
|
+
.char_offset = current_grapheme_char_offset,
|
|
445
|
+
});
|
|
446
|
+
}
|
|
447
|
+
have_current_grapheme = true;
|
|
448
|
+
current_grapheme_byte_offset = @intCast(i);
|
|
449
|
+
current_grapheme_char_offset = char_offset;
|
|
450
|
+
current_grapheme_class = curr_class;
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
if (isAsciiWrapBreak(b0)) {
|
|
454
|
+
try result.breaks.append(result.allocator, .{
|
|
455
|
+
.byte_offset = @intCast(i),
|
|
456
|
+
.char_offset = char_offset,
|
|
457
|
+
});
|
|
458
|
+
}
|
|
459
|
+
i += 1;
|
|
460
|
+
if (is_break) {
|
|
461
|
+
char_offset += 1;
|
|
462
|
+
}
|
|
463
|
+
prev_cp = curr_cp;
|
|
464
|
+
} else {
|
|
465
|
+
const dec = decodeUtf8Unchecked(text, i);
|
|
466
|
+
if (i + dec.len > text.len) break;
|
|
467
|
+
|
|
468
|
+
const is_break = if (dec.cp == 0xFFFD or dec.cp > 0x10FFFF) true else if (prev_cp) |p| blk: {
|
|
469
|
+
if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
|
|
470
|
+
break :blk uucode.grapheme.isBreak(p, dec.cp, &break_state);
|
|
471
|
+
} else true;
|
|
472
|
+
|
|
473
|
+
if (is_break) {
|
|
474
|
+
const curr_class = classifyWordClass(dec.cp);
|
|
475
|
+
if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
|
|
476
|
+
try result.breaks.append(result.allocator, .{
|
|
477
|
+
.byte_offset = current_grapheme_byte_offset,
|
|
478
|
+
.char_offset = current_grapheme_char_offset,
|
|
479
|
+
});
|
|
480
|
+
}
|
|
481
|
+
have_current_grapheme = true;
|
|
482
|
+
current_grapheme_byte_offset = @intCast(i);
|
|
483
|
+
current_grapheme_char_offset = char_offset;
|
|
484
|
+
current_grapheme_class = curr_class;
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
if (isUnicodeWrapBreak(dec.cp)) {
|
|
488
|
+
try result.breaks.append(result.allocator, .{
|
|
489
|
+
.byte_offset = @intCast(i),
|
|
490
|
+
.char_offset = char_offset,
|
|
491
|
+
});
|
|
492
|
+
}
|
|
493
|
+
i += dec.len;
|
|
494
|
+
if (is_break) {
|
|
495
|
+
char_offset += 1;
|
|
496
|
+
}
|
|
497
|
+
prev_cp = dec.cp;
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
pub fn findTabStops(text: []const u8, result: *TabStopResult) !void {
|
|
503
|
+
result.reset();
|
|
504
|
+
const vector_len = 16;
|
|
505
|
+
const Vec = @Vector(vector_len, u8);
|
|
506
|
+
|
|
507
|
+
const vTab: Vec = @splat('\t');
|
|
508
|
+
|
|
509
|
+
var pos: usize = 0;
|
|
510
|
+
|
|
511
|
+
while (pos + vector_len <= text.len) {
|
|
512
|
+
const chunk: Vec = text[pos..][0..vector_len].*;
|
|
513
|
+
const cmp_tab = chunk == vTab;
|
|
514
|
+
|
|
515
|
+
if (@reduce(.Or, cmp_tab)) {
|
|
516
|
+
var i: usize = 0;
|
|
517
|
+
while (i < vector_len) : (i += 1) {
|
|
518
|
+
if (text[pos + i] == '\t') {
|
|
519
|
+
try result.positions.append(result.allocator, pos + i);
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
}
|
|
523
|
+
pos += vector_len;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
while (pos < text.len) : (pos += 1) {
|
|
527
|
+
if (text[pos] == '\t') {
|
|
528
|
+
try result.positions.append(result.allocator, pos);
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
pub fn findLineBreaks(text: []const u8, result: *LineBreakResult) !void {
|
|
534
|
+
result.reset();
|
|
535
|
+
const vector_len = 16; // Use 16-byte vectors (SSE2/NEON compatible)
|
|
536
|
+
const Vec = @Vector(vector_len, u8);
|
|
537
|
+
|
|
538
|
+
// Prepare vector constants for '\n' and '\r'
|
|
539
|
+
const vNL: Vec = @splat('\n');
|
|
540
|
+
const vCR: Vec = @splat('\r');
|
|
541
|
+
|
|
542
|
+
var pos: usize = 0;
|
|
543
|
+
var prev_was_cr = false; // Track if previous chunk ended with \r
|
|
544
|
+
|
|
545
|
+
// Process full vector chunks
|
|
546
|
+
while (pos + vector_len <= text.len) {
|
|
547
|
+
const chunk: Vec = text[pos..][0..vector_len].*;
|
|
548
|
+
const cmp_nl = chunk == vNL;
|
|
549
|
+
const cmp_cr = chunk == vCR;
|
|
550
|
+
|
|
551
|
+
// Check if any newline or CR found
|
|
552
|
+
if (@reduce(.Or, cmp_nl) or @reduce(.Or, cmp_cr)) {
|
|
553
|
+
// Found a match, process this chunk
|
|
554
|
+
var i: usize = 0;
|
|
555
|
+
while (i < vector_len) : (i += 1) {
|
|
556
|
+
const absolute_index = pos + i;
|
|
557
|
+
const b = text[absolute_index];
|
|
558
|
+
if (b == '\n') {
|
|
559
|
+
// Skip if this is the \n part of a CRLF split across chunks
|
|
560
|
+
if (i == 0 and prev_was_cr) {
|
|
561
|
+
prev_was_cr = false;
|
|
562
|
+
continue;
|
|
563
|
+
}
|
|
564
|
+
// Check if this is part of CRLF
|
|
565
|
+
const kind: LineBreakKind = if (absolute_index > 0 and text[absolute_index - 1] == '\r') .CRLF else .LF;
|
|
566
|
+
try result.breaks.append(result.allocator, .{ .pos = absolute_index, .kind = kind });
|
|
567
|
+
} else if (b == '\r') {
|
|
568
|
+
// Check for CRLF
|
|
569
|
+
if (absolute_index + 1 < text.len and text[absolute_index + 1] == '\n') {
|
|
570
|
+
try result.breaks.append(result.allocator, .{ .pos = absolute_index + 1, .kind = .CRLF });
|
|
571
|
+
i += 1; // Skip the \n in next iteration
|
|
572
|
+
} else {
|
|
573
|
+
try result.breaks.append(result.allocator, .{ .pos = absolute_index, .kind = .CR });
|
|
574
|
+
}
|
|
575
|
+
}
|
|
576
|
+
}
|
|
577
|
+
// Update prev_was_cr for next chunk
|
|
578
|
+
prev_was_cr = (text[pos + vector_len - 1] == '\r');
|
|
579
|
+
} else {
|
|
580
|
+
prev_was_cr = false;
|
|
581
|
+
}
|
|
582
|
+
pos += vector_len;
|
|
583
|
+
}
|
|
584
|
+
|
|
585
|
+
// Handle remaining bytes with scalar code
|
|
586
|
+
while (pos < text.len) : (pos += 1) {
|
|
587
|
+
const b = text[pos];
|
|
588
|
+
if (b == '\n') {
|
|
589
|
+
// Handle CRLF split at chunk boundary
|
|
590
|
+
if (pos > 0 and text[pos - 1] == '\r') {
|
|
591
|
+
// Already recorded at pos - 1 or will be skipped
|
|
592
|
+
if (prev_was_cr) {
|
|
593
|
+
prev_was_cr = false;
|
|
594
|
+
continue;
|
|
595
|
+
}
|
|
596
|
+
}
|
|
597
|
+
const kind: LineBreakKind = if (pos > 0 and text[pos - 1] == '\r') .CRLF else .LF;
|
|
598
|
+
try result.breaks.append(result.allocator, .{ .pos = pos, .kind = kind });
|
|
599
|
+
} else if (b == '\r') {
|
|
600
|
+
if (pos + 1 < text.len and text[pos + 1] == '\n') {
|
|
601
|
+
try result.breaks.append(result.allocator, .{ .pos = pos + 1, .kind = .CRLF });
|
|
602
|
+
pos += 1;
|
|
603
|
+
} else {
|
|
604
|
+
try result.breaks.append(result.allocator, .{ .pos = pos, .kind = .CR });
|
|
605
|
+
}
|
|
606
|
+
}
|
|
607
|
+
prev_was_cr = false;
|
|
608
|
+
}
|
|
609
|
+
}
|
|
610
|
+
|
|
611
|
+
pub const WrapByWidthResult = struct {
|
|
612
|
+
byte_offset: u32,
|
|
613
|
+
grapheme_count: u32,
|
|
614
|
+
columns_used: u32,
|
|
615
|
+
};
|
|
616
|
+
|
|
617
|
+
pub const PosByWidthResult = struct {
|
|
618
|
+
byte_offset: u32,
|
|
619
|
+
grapheme_count: u32,
|
|
620
|
+
columns_used: u32,
|
|
621
|
+
};
|
|
622
|
+
|
|
623
|
+
pub inline fn eastAsianWidth(cp: u21) u32 {
|
|
624
|
+
if (cp > 0x10FFFF) return 0;
|
|
625
|
+
const eaw = uucode.get(.east_asian_width, cp);
|
|
626
|
+
const width = eawToWidth(cp, eaw);
|
|
627
|
+
return if (width > 0) @intCast(width) else 0;
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
/// Calculate width from east asian width property and Unicode properties
|
|
631
|
+
/// Returns -1 for control characters (they don't contribute to width)
|
|
632
|
+
inline fn eawToWidth(cp: u21, eaw: uucode.types.EastAsianWidth) i16 {
|
|
633
|
+
if (cp == 0) return 0;
|
|
634
|
+
if (cp < 32 or (cp >= 0x7F and cp < 0xA0)) return -1;
|
|
635
|
+
|
|
636
|
+
const gc = uucode.get(.general_category, cp);
|
|
637
|
+
switch (gc) {
|
|
638
|
+
.mark_nonspacing, .mark_spacing_combining, .mark_enclosing => return 0,
|
|
639
|
+
else => {},
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
if (cp == 0x200B) return 0;
|
|
643
|
+
if (cp == 0x200C) return 0;
|
|
644
|
+
if (cp == 0x200D) return 0;
|
|
645
|
+
if (cp == 0x2060) return 0;
|
|
646
|
+
if (cp == 0x034F) return 0;
|
|
647
|
+
if (cp == 0xFEFF) return 0;
|
|
648
|
+
if (cp >= 0x180B and cp <= 0x180D) return 0;
|
|
649
|
+
if (cp >= 0xFE00 and cp <= 0xFE0F) return 0;
|
|
650
|
+
if (cp >= 0xE0100 and cp <= 0xE01EF) return 0;
|
|
651
|
+
|
|
652
|
+
if (eaw == .fullwidth or eaw == .wide) return 2;
|
|
653
|
+
|
|
654
|
+
if (cp >= 0x1F000 and cp <= 0x1F02B) return 2;
|
|
655
|
+
if (cp >= 0x1F030 and cp <= 0x1F093) return 2;
|
|
656
|
+
if (cp >= 0x1F0A0 and cp <= 0x1F0AE) return 2;
|
|
657
|
+
if (cp >= 0x1F0B1 and cp <= 0x1F0BF) return 2;
|
|
658
|
+
if (cp >= 0x1F0C1 and cp <= 0x1F0CF) return 2;
|
|
659
|
+
if (cp >= 0x1F0D1 and cp <= 0x1F0F5) return 2;
|
|
660
|
+
|
|
661
|
+
if (cp == 0x231A or cp == 0x231B) return 2;
|
|
662
|
+
if (cp == 0x2329 or cp == 0x232A) return 2;
|
|
663
|
+
if (cp >= 0x23E9 and cp <= 0x23EC) return 2;
|
|
664
|
+
if (cp == 0x23F0 or cp == 0x23F3) return 2;
|
|
665
|
+
if (cp >= 0x25FD and cp <= 0x25FE) return 2;
|
|
666
|
+
|
|
667
|
+
if (cp >= 0x2614 and cp <= 0x2615) return 2;
|
|
668
|
+
if (cp == 0x2622 or cp == 0x2623) return 2;
|
|
669
|
+
if (cp >= 0x2630 and cp <= 0x2637) return 2;
|
|
670
|
+
if (cp >= 0x2648 and cp <= 0x2653) return 2;
|
|
671
|
+
if (cp == 0x267F or cp == 0x2693 or cp == 0x269B) return 2;
|
|
672
|
+
if (cp == 0x26A0 or cp == 0x26A1) return 2;
|
|
673
|
+
if (cp >= 0x26AA and cp <= 0x26AB) return 2;
|
|
674
|
+
if (cp >= 0x26BD and cp <= 0x26BE) return 2;
|
|
675
|
+
if (cp >= 0x26C4 and cp <= 0x26C5) return 2;
|
|
676
|
+
if (cp == 0x26CE or cp == 0x26D1 or cp == 0x26D4) return 2;
|
|
677
|
+
if (cp == 0x26EA or cp == 0x26F2 or cp == 0x26F3) return 2;
|
|
678
|
+
if (cp == 0x26F5 or cp == 0x26FA or cp == 0x26FD) return 2;
|
|
679
|
+
|
|
680
|
+
if (cp == 0x203C or cp == 0x2049) return 2;
|
|
681
|
+
if (cp == 0x2705 or cp >= 0x270A and cp <= 0x270B) return 2;
|
|
682
|
+
if (cp == 0x2728 or cp == 0x274C or cp == 0x274E) return 2;
|
|
683
|
+
if (cp >= 0x2753 and cp <= 0x2755) return 2;
|
|
684
|
+
if (cp == 0x2757) return 2;
|
|
685
|
+
if (cp >= 0x2760 and cp <= 0x2767) return 2;
|
|
686
|
+
if (cp >= 0x2795 and cp <= 0x2797) return 2;
|
|
687
|
+
if (cp == 0x27B0 or cp == 0x27BF) return 2;
|
|
688
|
+
if (cp >= 0x2B1B and cp <= 0x2B1C) return 2;
|
|
689
|
+
if (cp >= 0x2B50 and cp <= 0x2B50) return 2;
|
|
690
|
+
if (cp >= 0x2B55 and cp <= 0x2B55) return 2;
|
|
691
|
+
|
|
692
|
+
if (cp >= 0x1F300 and cp <= 0x1F320) return 2;
|
|
693
|
+
if (cp >= 0x1F32D and cp <= 0x1F335) return 2;
|
|
694
|
+
if (cp >= 0x1F337 and cp <= 0x1F37C) return 2;
|
|
695
|
+
if (cp >= 0x1F37E and cp <= 0x1F393) return 2;
|
|
696
|
+
if (cp >= 0x1F3A0 and cp <= 0x1F3CA) return 2;
|
|
697
|
+
if (cp >= 0x1F3CF and cp <= 0x1F3D3) return 2;
|
|
698
|
+
if (cp >= 0x1F3E0 and cp <= 0x1F3F0) return 2;
|
|
699
|
+
if (cp == 0x1F3F4) return 2;
|
|
700
|
+
if (cp >= 0x1F3F8 and cp <= 0x1F3FF) return 2;
|
|
701
|
+
if (cp >= 0x1F400 and cp <= 0x1F43E) return 2;
|
|
702
|
+
if (cp == 0x1F440) return 2;
|
|
703
|
+
if (cp >= 0x1F442 and cp <= 0x1F4FC) return 2;
|
|
704
|
+
if (cp >= 0x1F4FF and cp <= 0x1F6C5) return 2;
|
|
705
|
+
if (cp == 0x1F6CC) return 2;
|
|
706
|
+
if (cp >= 0x1F6D0 and cp <= 0x1F6D2) return 2;
|
|
707
|
+
if (cp >= 0x1F6D5 and cp <= 0x1F6D7) return 2;
|
|
708
|
+
if (cp >= 0x1F6DC and cp <= 0x1F6DF) return 2;
|
|
709
|
+
if (cp >= 0x1F6EB and cp <= 0x1F6EC) return 2;
|
|
710
|
+
if (cp >= 0x1F6F4 and cp <= 0x1F6FC) return 2;
|
|
711
|
+
if (cp >= 0x1F700 and cp <= 0x1F773) return 2;
|
|
712
|
+
if (cp >= 0x1F780 and cp <= 0x1F7D8) return 2;
|
|
713
|
+
if (cp >= 0x1F7E0 and cp <= 0x1F7EB) return 2;
|
|
714
|
+
if (cp >= 0x1F800 and cp <= 0x1F80B) return 2;
|
|
715
|
+
if (cp >= 0x1F810 and cp <= 0x1F847) return 2;
|
|
716
|
+
if (cp >= 0x1F850 and cp <= 0x1F859) return 2;
|
|
717
|
+
if (cp >= 0x1F860 and cp <= 0x1F887) return 2;
|
|
718
|
+
if (cp >= 0x1F890 and cp <= 0x1F8AD) return 2;
|
|
719
|
+
if (cp >= 0x1F8B0 and cp <= 0x1F8B1) return 2;
|
|
720
|
+
if (cp >= 0x1F90C and cp <= 0x1F93A) return 2;
|
|
721
|
+
if (cp >= 0x1F93C and cp <= 0x1F945) return 2;
|
|
722
|
+
if (cp >= 0x1F947 and cp <= 0x1FA53) return 2;
|
|
723
|
+
if (cp >= 0x1FA60 and cp <= 0x1FA6D) return 2;
|
|
724
|
+
if (cp >= 0x1FA70 and cp <= 0x1FA74) return 2;
|
|
725
|
+
if (cp >= 0x1FA78 and cp <= 0x1FA7C) return 2;
|
|
726
|
+
if (cp >= 0x1FA80 and cp <= 0x1FA86) return 2;
|
|
727
|
+
if (cp >= 0x1FA90 and cp <= 0x1FAAC) return 2;
|
|
728
|
+
if (cp >= 0x1FAB0 and cp <= 0x1FABA) return 2;
|
|
729
|
+
if (cp >= 0x1FAC0 and cp <= 0x1FAC5) return 2;
|
|
730
|
+
if (cp >= 0x1FAD0 and cp <= 0x1FAD9) return 2;
|
|
731
|
+
if (cp >= 0x1FAE0 and cp <= 0x1FAE7) return 2;
|
|
732
|
+
if (cp >= 0x1FAF0 and cp <= 0x1FAF8) return 2;
|
|
733
|
+
|
|
734
|
+
return 1;
|
|
735
|
+
}
|
|
736
|
+
|
|
737
|
+
/// Calculate the display width of a byte in columns
|
|
738
|
+
/// Used for ASCII-only fast paths
|
|
739
|
+
inline fn asciiCharWidth(byte: u8, tab_width: u8) u32 {
|
|
740
|
+
if (byte == '\t') {
|
|
741
|
+
return tab_width;
|
|
742
|
+
} else if (byte >= 32 and byte <= 126) {
|
|
743
|
+
return 1;
|
|
744
|
+
}
|
|
745
|
+
return 0;
|
|
746
|
+
}
|
|
747
|
+
|
|
748
|
+
/// Calculate the display width of a character (byte or codepoint) in columns
|
|
749
|
+
inline fn charWidth(byte: u8, codepoint: u21, tab_width: u8) u32 {
|
|
750
|
+
if (byte == '\t') {
|
|
751
|
+
return tab_width;
|
|
752
|
+
} else if (byte < 0x80 and byte >= 32 and byte <= 126) {
|
|
753
|
+
return 1;
|
|
754
|
+
} else if (byte >= 0x80) {
|
|
755
|
+
const eaw = uucode.get(.east_asian_width, codepoint);
|
|
756
|
+
const w = eawToWidth(codepoint, eaw);
|
|
757
|
+
return if (w > 0) @intCast(w) else 0;
|
|
758
|
+
}
|
|
759
|
+
return 0;
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
/// Check if a codepoint is valid for grapheme break detection
|
|
763
|
+
inline fn isValidCodepoint(cp: u21) bool {
|
|
764
|
+
return cp != 0xFFFD and cp <= 0x10FFFF;
|
|
765
|
+
}
|
|
766
|
+
|
|
767
|
+
/// Check if there's a grapheme break between two codepoints
|
|
768
|
+
/// - wcwidth mode: use Unicode grapheme clustering for proper rendering,
|
|
769
|
+
/// but calculate width using wcwidth (sum of codepoint widths)
|
|
770
|
+
/// - no_zwj mode: use grapheme breaks but treat ZWJ as a break (ignore joining)
|
|
771
|
+
/// - unicode mode: use standard grapheme cluster segmentation
|
|
772
|
+
inline fn isGraphemeBreak(prev_cp: ?u21, curr_cp: u21, break_state: *uucode.grapheme.BreakState, width_method: WidthMethod) bool {
|
|
773
|
+
// wcwidth mode uses Unicode grapheme clustering for proper rendering
|
|
774
|
+
// (ZWJ sequences, skin tone modifiers stay together), but width is
|
|
775
|
+
// calculated using wcwidth semantics (sum of codepoint widths)
|
|
776
|
+
if (width_method == .wcwidth) {
|
|
777
|
+
if (prev_cp == null) return true;
|
|
778
|
+
|
|
779
|
+
if (!isValidCodepoint(curr_cp)) return true;
|
|
780
|
+
if (!isValidCodepoint(prev_cp.?)) return true;
|
|
781
|
+
return uucode.grapheme.isBreak(prev_cp.?, curr_cp, break_state);
|
|
782
|
+
}
|
|
783
|
+
|
|
784
|
+
if (!isValidCodepoint(curr_cp)) return true;
|
|
785
|
+
|
|
786
|
+
// In no_zwj mode, treat ZWJ (U+200D) as NOT joining characters
|
|
787
|
+
// When we see ZWJ after a character, it's part of that character's grapheme
|
|
788
|
+
// But when we see a character after ZWJ, it starts a new grapheme
|
|
789
|
+
if (width_method == .no_zwj) {
|
|
790
|
+
const ZWJ: u21 = 0x200D;
|
|
791
|
+
if (prev_cp) |p| {
|
|
792
|
+
// If previous was ZWJ, current starts a new grapheme
|
|
793
|
+
// Don't call uucode.grapheme.isBreak because it will say no break
|
|
794
|
+
if (p == ZWJ) {
|
|
795
|
+
// Reset break state since we're forcing a break
|
|
796
|
+
break_state.* = .default;
|
|
797
|
+
return true;
|
|
798
|
+
}
|
|
799
|
+
}
|
|
800
|
+
// If current is ZWJ, don't break yet - it's part of previous grapheme
|
|
801
|
+
// (will have width 0 anyway)
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
if (prev_cp) |p| {
|
|
805
|
+
if (!isValidCodepoint(p)) return true;
|
|
806
|
+
return uucode.grapheme.isBreak(p, curr_cp, break_state);
|
|
807
|
+
}
|
|
808
|
+
return true;
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
/// State for accumulating grapheme cluster width
|
|
812
|
+
const GraphemeWidthState = struct {
|
|
813
|
+
width: u32 = 0,
|
|
814
|
+
has_width: bool = false,
|
|
815
|
+
is_regional_indicator_pair: bool = false,
|
|
816
|
+
has_vs16: bool = false,
|
|
817
|
+
has_indic_virama: bool = false,
|
|
818
|
+
width_method: WidthMethod,
|
|
819
|
+
|
|
820
|
+
/// Initialize state with the first codepoint of a grapheme cluster
|
|
821
|
+
inline fn init(first_cp: u21, first_width: u32, width_method: WidthMethod) GraphemeWidthState {
|
|
822
|
+
return .{
|
|
823
|
+
.width = first_width,
|
|
824
|
+
.has_width = (first_width > 0),
|
|
825
|
+
.is_regional_indicator_pair = (first_cp >= 0x1F1E6 and first_cp <= 0x1F1FF),
|
|
826
|
+
.has_vs16 = false,
|
|
827
|
+
.has_indic_virama = false,
|
|
828
|
+
.width_method = width_method,
|
|
829
|
+
};
|
|
830
|
+
}
|
|
831
|
+
|
|
832
|
+
/// Add a codepoint to the current grapheme cluster
|
|
833
|
+
inline fn addCodepoint(self: *GraphemeWidthState, cp: u21, cp_width: u32) void {
|
|
834
|
+
// wcwidth mode: sum all codepoint widths (tmux-style)
|
|
835
|
+
if (self.width_method == .wcwidth) {
|
|
836
|
+
const eaw = uucode.get(.east_asian_width, cp);
|
|
837
|
+
const w = eawToWidth(cp, eaw);
|
|
838
|
+
if (w > 0) {
|
|
839
|
+
self.width += @intCast(w);
|
|
840
|
+
self.has_width = true;
|
|
841
|
+
}
|
|
842
|
+
return;
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
// unicode and no_zwj modes: use grapheme-aware width
|
|
846
|
+
const is_ri = (cp >= 0x1F1E6 and cp <= 0x1F1FF);
|
|
847
|
+
const is_vs16 = (cp == 0xFE0F); // Variation Selector-16 (emoji presentation)
|
|
848
|
+
|
|
849
|
+
const gc = uucode.get(.general_category, cp);
|
|
850
|
+
const is_virama = gc == .mark_nonspacing;
|
|
851
|
+
|
|
852
|
+
const is_devanagari_ra = (cp == 0x0930);
|
|
853
|
+
|
|
854
|
+
const is_devanagari_base = (cp >= 0x0915 and cp <= 0x0939) or (cp >= 0x0958 and cp <= 0x095F);
|
|
855
|
+
|
|
856
|
+
if (is_vs16) {
|
|
857
|
+
self.has_vs16 = true;
|
|
858
|
+
if (self.has_width and self.width == 1) {
|
|
859
|
+
self.width = 2;
|
|
860
|
+
}
|
|
861
|
+
return;
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
if (is_virama) {
|
|
865
|
+
self.has_indic_virama = true;
|
|
866
|
+
return;
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
if (self.is_regional_indicator_pair and is_ri) {
|
|
870
|
+
self.width += cp_width;
|
|
871
|
+
self.has_width = true;
|
|
872
|
+
} else if (!self.has_width and cp_width > 0) {
|
|
873
|
+
self.width = cp_width;
|
|
874
|
+
self.has_width = true;
|
|
875
|
+
} else if (self.has_width and self.has_indic_virama and is_devanagari_base and cp_width > 0) {
|
|
876
|
+
if (!is_devanagari_ra) {
|
|
877
|
+
self.width += cp_width;
|
|
878
|
+
}
|
|
879
|
+
self.has_indic_virama = false;
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
};
|
|
883
|
+
|
|
884
|
+
const ClusterState = struct {
|
|
885
|
+
columns_used: u32,
|
|
886
|
+
grapheme_count: u32,
|
|
887
|
+
cluster_width: u32,
|
|
888
|
+
cluster_start: usize,
|
|
889
|
+
prev_cp: ?u21,
|
|
890
|
+
break_state: uucode.grapheme.BreakState,
|
|
891
|
+
width_state: GraphemeWidthState,
|
|
892
|
+
width_method: WidthMethod,
|
|
893
|
+
cluster_started: bool,
|
|
894
|
+
|
|
895
|
+
fn init(width_method: WidthMethod) ClusterState {
|
|
896
|
+
const dummy_width_state = GraphemeWidthState.init(0, 0, width_method);
|
|
897
|
+
return .{
|
|
898
|
+
.columns_used = 0,
|
|
899
|
+
.grapheme_count = 0,
|
|
900
|
+
.cluster_width = 0,
|
|
901
|
+
.cluster_start = 0,
|
|
902
|
+
.prev_cp = null,
|
|
903
|
+
.break_state = .default,
|
|
904
|
+
.width_state = dummy_width_state,
|
|
905
|
+
.width_method = width_method,
|
|
906
|
+
.cluster_started = false,
|
|
907
|
+
};
|
|
908
|
+
}
|
|
909
|
+
};
|
|
910
|
+
|
|
911
|
+
/// Handle grapheme cluster boundary when wrapping by width (stops BEFORE exceeding limit)
|
|
912
|
+
/// Returns true if we should stop (limit exceeded)
|
|
913
|
+
inline fn handleClusterForWrap(
|
|
914
|
+
state: *ClusterState,
|
|
915
|
+
is_break: bool,
|
|
916
|
+
new_cluster_start: usize,
|
|
917
|
+
max_columns: u32,
|
|
918
|
+
) bool {
|
|
919
|
+
if (is_break) {
|
|
920
|
+
if (state.prev_cp != null) {
|
|
921
|
+
if (state.columns_used + state.cluster_width > max_columns) {
|
|
922
|
+
return true; // Signal to stop
|
|
923
|
+
}
|
|
924
|
+
state.columns_used += state.cluster_width;
|
|
925
|
+
state.grapheme_count += 1;
|
|
926
|
+
}
|
|
927
|
+
state.cluster_width = 0;
|
|
928
|
+
state.cluster_start = new_cluster_start;
|
|
929
|
+
state.cluster_started = false;
|
|
930
|
+
}
|
|
931
|
+
return false;
|
|
932
|
+
}
|
|
933
|
+
|
|
934
|
+
/// Handle grapheme cluster boundary when finding position (snaps to grapheme boundaries)
|
|
935
|
+
/// Returns true if we should stop
|
|
936
|
+
///
|
|
937
|
+
/// Snapping behavior:
|
|
938
|
+
/// - include_start_before=true (for selection end): Include graphemes that START at or before max_columns
|
|
939
|
+
/// If max_columns=3 and grapheme occupies columns [2-3], include it (starts at 2 <= 3)
|
|
940
|
+
/// This snaps forward to include the whole grapheme even if max_columns points to its middle
|
|
941
|
+
/// - include_start_before=false (for selection start): Only include graphemes that END before max_columns
|
|
942
|
+
/// If max_columns=3 and grapheme occupies columns [2-3], exclude it (ends at 4 > 3)
|
|
943
|
+
/// This snaps backward to exclude wide graphemes that would cross max_columns
|
|
944
|
+
inline fn handleClusterForPos(
|
|
945
|
+
state: *ClusterState,
|
|
946
|
+
is_break: bool,
|
|
947
|
+
new_cluster_start: usize,
|
|
948
|
+
max_columns: u32,
|
|
949
|
+
include_start_before: bool,
|
|
950
|
+
) bool {
|
|
951
|
+
if (is_break) {
|
|
952
|
+
if (state.prev_cp != null) {
|
|
953
|
+
const cluster_start_col = state.columns_used;
|
|
954
|
+
const cluster_end_col = state.columns_used + state.cluster_width;
|
|
955
|
+
|
|
956
|
+
if (include_start_before) {
|
|
957
|
+
if (cluster_start_col >= max_columns) {
|
|
958
|
+
return true;
|
|
959
|
+
}
|
|
960
|
+
state.columns_used = cluster_end_col;
|
|
961
|
+
state.grapheme_count += 1;
|
|
962
|
+
} else {
|
|
963
|
+
if (cluster_end_col > max_columns) {
|
|
964
|
+
return true; // Signal to stop (don't include this grapheme)
|
|
965
|
+
}
|
|
966
|
+
state.columns_used = cluster_end_col;
|
|
967
|
+
}
|
|
968
|
+
}
|
|
969
|
+
state.cluster_width = 0;
|
|
970
|
+
state.cluster_start = new_cluster_start;
|
|
971
|
+
state.cluster_started = false;
|
|
972
|
+
}
|
|
973
|
+
return false;
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
/// Find wrap position by width - proxy function that dispatches based on width_method
|
|
977
|
+
pub fn findWrapPosByWidth(
|
|
978
|
+
text: []const u8,
|
|
979
|
+
max_columns: u32,
|
|
980
|
+
tab_width: u8,
|
|
981
|
+
isASCIIOnly: bool,
|
|
982
|
+
width_method: WidthMethod,
|
|
983
|
+
) WrapByWidthResult {
|
|
984
|
+
switch (width_method) {
|
|
985
|
+
.unicode, .no_zwj => return findWrapPosByWidthUnicode(text, max_columns, tab_width, isASCIIOnly, width_method),
|
|
986
|
+
.wcwidth => return findWrapPosByWidthWCWidth(text, max_columns, tab_width, isASCIIOnly),
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
/// Find wrap position by width using Unicode grapheme cluster segmentation
|
|
991
|
+
fn findWrapPosByWidthUnicode(
|
|
992
|
+
text: []const u8,
|
|
993
|
+
max_columns: u32,
|
|
994
|
+
tab_width: u8,
|
|
995
|
+
isASCIIOnly: bool,
|
|
996
|
+
width_method: WidthMethod,
|
|
997
|
+
) WrapByWidthResult {
|
|
998
|
+
if (text.len == 0 or max_columns == 0) {
|
|
999
|
+
return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
|
|
1000
|
+
}
|
|
1001
|
+
|
|
1002
|
+
// ASCII-only fast path
|
|
1003
|
+
if (isASCIIOnly) {
|
|
1004
|
+
if (max_columns >= text.len) {
|
|
1005
|
+
return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
|
|
1006
|
+
} else {
|
|
1007
|
+
return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
|
|
1008
|
+
}
|
|
1009
|
+
}
|
|
1010
|
+
|
|
1011
|
+
const vector_len = 16;
|
|
1012
|
+
var pos: usize = 0;
|
|
1013
|
+
var state = ClusterState.init(width_method);
|
|
1014
|
+
|
|
1015
|
+
while (pos + vector_len <= text.len) {
|
|
1016
|
+
const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
|
|
1017
|
+
const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
|
|
1018
|
+
const is_non_ascii = chunk >= ascii_threshold;
|
|
1019
|
+
|
|
1020
|
+
if (!@reduce(.Or, is_non_ascii)) {
|
|
1021
|
+
// All ASCII
|
|
1022
|
+
var i: usize = 0;
|
|
1023
|
+
while (i < vector_len) : (i += 1) {
|
|
1024
|
+
const b = text[pos + i];
|
|
1025
|
+
const curr_cp: u21 = b;
|
|
1026
|
+
const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
|
|
1027
|
+
|
|
1028
|
+
if (handleClusterForWrap(&state, is_break, pos + i, max_columns)) {
|
|
1029
|
+
return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
|
|
1030
|
+
}
|
|
1031
|
+
|
|
1032
|
+
const cp_width = asciiCharWidth(b, tab_width);
|
|
1033
|
+
if (!state.cluster_started) {
|
|
1034
|
+
state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
|
|
1035
|
+
state.cluster_width = cp_width;
|
|
1036
|
+
state.cluster_started = true;
|
|
1037
|
+
} else {
|
|
1038
|
+
state.width_state.addCodepoint(curr_cp, cp_width);
|
|
1039
|
+
state.cluster_width = state.width_state.width;
|
|
1040
|
+
}
|
|
1041
|
+
state.prev_cp = curr_cp;
|
|
1042
|
+
}
|
|
1043
|
+
pos += vector_len;
|
|
1044
|
+
continue;
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
// Mixed ASCII/non-ASCII - process rest of chunk
|
|
1048
|
+
var i: usize = 0;
|
|
1049
|
+
while (i < vector_len and pos + i < text.len) {
|
|
1050
|
+
const b0 = text[pos + i];
|
|
1051
|
+
const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos + i).cp;
|
|
1052
|
+
const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos + i).len;
|
|
1053
|
+
|
|
1054
|
+
if (pos + i + cp_len > text.len) break;
|
|
1055
|
+
|
|
1056
|
+
const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
|
|
1057
|
+
|
|
1058
|
+
if (handleClusterForWrap(&state, is_break, pos + i, max_columns)) {
|
|
1059
|
+
return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
|
|
1060
|
+
}
|
|
1061
|
+
|
|
1062
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1063
|
+
if (!state.cluster_started) {
|
|
1064
|
+
state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
|
|
1065
|
+
state.cluster_width = cp_width;
|
|
1066
|
+
state.cluster_started = true;
|
|
1067
|
+
} else {
|
|
1068
|
+
state.width_state.addCodepoint(curr_cp, cp_width);
|
|
1069
|
+
state.cluster_width = state.width_state.width;
|
|
1070
|
+
}
|
|
1071
|
+
state.prev_cp = curr_cp;
|
|
1072
|
+
i += cp_len;
|
|
1073
|
+
}
|
|
1074
|
+
pos += i; // Advance by how much we actually processed
|
|
1075
|
+
}
|
|
1076
|
+
|
|
1077
|
+
// Tail
|
|
1078
|
+
while (pos < text.len) {
|
|
1079
|
+
const b0 = text[pos];
|
|
1080
|
+
const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos).cp;
|
|
1081
|
+
const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
|
|
1082
|
+
|
|
1083
|
+
const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
|
|
1084
|
+
|
|
1085
|
+
if (handleClusterForWrap(&state, is_break, pos, max_columns)) {
|
|
1086
|
+
return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
|
|
1087
|
+
}
|
|
1088
|
+
|
|
1089
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1090
|
+
if (!state.cluster_started) {
|
|
1091
|
+
state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
|
|
1092
|
+
state.cluster_width = cp_width;
|
|
1093
|
+
state.cluster_started = true;
|
|
1094
|
+
} else {
|
|
1095
|
+
state.width_state.addCodepoint(curr_cp, cp_width);
|
|
1096
|
+
state.cluster_width = state.width_state.width;
|
|
1097
|
+
}
|
|
1098
|
+
state.prev_cp = curr_cp;
|
|
1099
|
+
pos += cp_len;
|
|
1100
|
+
}
|
|
1101
|
+
|
|
1102
|
+
// Final cluster
|
|
1103
|
+
if (state.prev_cp != null and state.cluster_width > 0) {
|
|
1104
|
+
if (state.columns_used + state.cluster_width > max_columns) {
|
|
1105
|
+
return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
|
|
1106
|
+
}
|
|
1107
|
+
state.columns_used += state.cluster_width;
|
|
1108
|
+
state.grapheme_count += 1;
|
|
1109
|
+
}
|
|
1110
|
+
|
|
1111
|
+
return .{ .byte_offset = @intCast(text.len), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
/// Find wrap position by width using wcwidth-style codepoint-by-codepoint processing
|
|
1115
|
+
fn findWrapPosByWidthWCWidth(
|
|
1116
|
+
text: []const u8,
|
|
1117
|
+
max_columns: u32,
|
|
1118
|
+
tab_width: u8,
|
|
1119
|
+
isASCIIOnly: bool,
|
|
1120
|
+
) WrapByWidthResult {
|
|
1121
|
+
if (text.len == 0 or max_columns == 0) {
|
|
1122
|
+
return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
|
|
1123
|
+
}
|
|
1124
|
+
|
|
1125
|
+
// ASCII-only fast path
|
|
1126
|
+
if (isASCIIOnly) {
|
|
1127
|
+
if (max_columns >= text.len) {
|
|
1128
|
+
return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
|
|
1129
|
+
} else {
|
|
1130
|
+
return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
// Unicode path - process each codepoint independently
|
|
1135
|
+
var pos: usize = 0;
|
|
1136
|
+
var columns_used: u32 = 0;
|
|
1137
|
+
var codepoint_count: u32 = 0;
|
|
1138
|
+
|
|
1139
|
+
while (pos < text.len) {
|
|
1140
|
+
const b0 = text[pos];
|
|
1141
|
+
const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
|
|
1142
|
+
const dec = decodeUtf8Unchecked(text, pos);
|
|
1143
|
+
if (pos + dec.len > text.len) break :blk 0xFFFD;
|
|
1144
|
+
break :blk dec.cp;
|
|
1145
|
+
};
|
|
1146
|
+
const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
|
|
1147
|
+
|
|
1148
|
+
if (pos + cp_len > text.len) break;
|
|
1149
|
+
|
|
1150
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1151
|
+
|
|
1152
|
+
// In wcwidth mode, stop if we've already used max_columns
|
|
1153
|
+
// (don't continue adding zero-width chars after reaching limit)
|
|
1154
|
+
if (columns_used >= max_columns) {
|
|
1155
|
+
return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
|
|
1156
|
+
}
|
|
1157
|
+
|
|
1158
|
+
// Stop if adding this codepoint would exceed max_columns
|
|
1159
|
+
if (columns_used + cp_width > max_columns) {
|
|
1160
|
+
return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
|
|
1161
|
+
}
|
|
1162
|
+
|
|
1163
|
+
columns_used += cp_width;
|
|
1164
|
+
codepoint_count += 1;
|
|
1165
|
+
pos += cp_len;
|
|
1166
|
+
}
|
|
1167
|
+
|
|
1168
|
+
return .{ .byte_offset = @intCast(text.len), .grapheme_count = codepoint_count, .columns_used = columns_used };
|
|
1169
|
+
}
|
|
1170
|
+
|
|
1171
|
+
/// Find position by column width - proxy function that dispatches based on width_method
|
|
1172
|
+
/// - If include_start_before: include graphemes that START before max_columns (snap forward for selection end)
|
|
1173
|
+
/// This ensures that if max_columns points to the middle of a width=2 grapheme, we include the whole grapheme
|
|
1174
|
+
/// - If !include_start_before: exclude graphemes that START at or after max_columns (snap backward for selection start)
|
|
1175
|
+
/// This ensures that if max_columns points to the middle of a width=2 grapheme, we snap back to exclude it
|
|
1176
|
+
pub fn findPosByWidth(
|
|
1177
|
+
text: []const u8,
|
|
1178
|
+
max_columns: u32,
|
|
1179
|
+
tab_width: u8,
|
|
1180
|
+
isASCIIOnly: bool,
|
|
1181
|
+
include_start_before: bool,
|
|
1182
|
+
width_method: WidthMethod,
|
|
1183
|
+
) PosByWidthResult {
|
|
1184
|
+
switch (width_method) {
|
|
1185
|
+
.unicode, .no_zwj => return findPosByWidthUnicode(text, max_columns, tab_width, isASCIIOnly, include_start_before, width_method),
|
|
1186
|
+
.wcwidth => return findPosByWidthWCWidth(text, max_columns, tab_width, isASCIIOnly, include_start_before),
|
|
1187
|
+
}
|
|
1188
|
+
}
|
|
1189
|
+
|
|
1190
|
+
/// Find position by column width using Unicode grapheme cluster segmentation
|
|
1191
|
+
fn findPosByWidthUnicode(
|
|
1192
|
+
text: []const u8,
|
|
1193
|
+
max_columns: u32,
|
|
1194
|
+
tab_width: u8,
|
|
1195
|
+
isASCIIOnly: bool,
|
|
1196
|
+
include_start_before: bool,
|
|
1197
|
+
width_method: WidthMethod,
|
|
1198
|
+
) PosByWidthResult {
|
|
1199
|
+
if (text.len == 0 or max_columns == 0) {
|
|
1200
|
+
return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
|
|
1201
|
+
}
|
|
1202
|
+
|
|
1203
|
+
// ASCII-only fast path
|
|
1204
|
+
if (isASCIIOnly) {
|
|
1205
|
+
if (max_columns >= text.len) {
|
|
1206
|
+
return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
|
|
1207
|
+
} else {
|
|
1208
|
+
return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
|
|
1209
|
+
}
|
|
1210
|
+
}
|
|
1211
|
+
|
|
1212
|
+
const vector_len = 16;
|
|
1213
|
+
var pos: usize = 0;
|
|
1214
|
+
var state = ClusterState.init(width_method);
|
|
1215
|
+
|
|
1216
|
+
while (pos + vector_len <= text.len) {
|
|
1217
|
+
const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
|
|
1218
|
+
const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
|
|
1219
|
+
const is_non_ascii = chunk >= ascii_threshold;
|
|
1220
|
+
|
|
1221
|
+
if (!@reduce(.Or, is_non_ascii)) {
|
|
1222
|
+
// All ASCII
|
|
1223
|
+
var i: usize = 0;
|
|
1224
|
+
while (i < vector_len) : (i += 1) {
|
|
1225
|
+
const b = text[pos + i];
|
|
1226
|
+
const curr_cp: u21 = b;
|
|
1227
|
+
const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
|
|
1228
|
+
|
|
1229
|
+
if (handleClusterForPos(&state, is_break, pos + i, max_columns, include_start_before)) {
|
|
1230
|
+
return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
|
|
1231
|
+
}
|
|
1232
|
+
|
|
1233
|
+
const cp_width = asciiCharWidth(b, tab_width);
|
|
1234
|
+
if (!state.cluster_started) {
|
|
1235
|
+
state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
|
|
1236
|
+
state.cluster_width = cp_width;
|
|
1237
|
+
state.cluster_started = true;
|
|
1238
|
+
} else {
|
|
1239
|
+
state.width_state.addCodepoint(curr_cp, cp_width);
|
|
1240
|
+
state.cluster_width = state.width_state.width;
|
|
1241
|
+
}
|
|
1242
|
+
state.prev_cp = curr_cp;
|
|
1243
|
+
}
|
|
1244
|
+
pos += vector_len;
|
|
1245
|
+
continue;
|
|
1246
|
+
}
|
|
1247
|
+
|
|
1248
|
+
// Mixed ASCII/non-ASCII - process rest of chunk
|
|
1249
|
+
var i: usize = 0;
|
|
1250
|
+
while (i < vector_len and pos + i < text.len) {
|
|
1251
|
+
const b0 = text[pos + i];
|
|
1252
|
+
const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos + i).cp;
|
|
1253
|
+
const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos + i).len;
|
|
1254
|
+
|
|
1255
|
+
if (pos + i + cp_len > text.len) break;
|
|
1256
|
+
|
|
1257
|
+
const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
|
|
1258
|
+
|
|
1259
|
+
if (handleClusterForPos(&state, is_break, pos + i, max_columns, include_start_before)) {
|
|
1260
|
+
return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
|
|
1261
|
+
}
|
|
1262
|
+
|
|
1263
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1264
|
+
if (!state.cluster_started) {
|
|
1265
|
+
state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
|
|
1266
|
+
state.cluster_width = cp_width;
|
|
1267
|
+
state.cluster_started = true;
|
|
1268
|
+
} else {
|
|
1269
|
+
state.width_state.addCodepoint(curr_cp, cp_width);
|
|
1270
|
+
state.cluster_width = state.width_state.width;
|
|
1271
|
+
}
|
|
1272
|
+
state.prev_cp = curr_cp;
|
|
1273
|
+
i += cp_len;
|
|
1274
|
+
}
|
|
1275
|
+
pos += i; // Advance by how much we actually processed
|
|
1276
|
+
}
|
|
1277
|
+
|
|
1278
|
+
// Tail
|
|
1279
|
+
while (pos < text.len) {
|
|
1280
|
+
const b0 = text[pos];
|
|
1281
|
+
const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos).cp;
|
|
1282
|
+
const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
|
|
1283
|
+
|
|
1284
|
+
const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
|
|
1285
|
+
|
|
1286
|
+
if (handleClusterForPos(&state, is_break, pos, max_columns, include_start_before)) {
|
|
1287
|
+
return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
|
|
1288
|
+
}
|
|
1289
|
+
|
|
1290
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1291
|
+
if (!state.cluster_started) {
|
|
1292
|
+
state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
|
|
1293
|
+
state.cluster_width = cp_width;
|
|
1294
|
+
state.cluster_started = true;
|
|
1295
|
+
} else {
|
|
1296
|
+
state.width_state.addCodepoint(curr_cp, cp_width);
|
|
1297
|
+
state.cluster_width = state.width_state.width;
|
|
1298
|
+
}
|
|
1299
|
+
state.prev_cp = curr_cp;
|
|
1300
|
+
pos += cp_len;
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
// Final cluster
|
|
1304
|
+
if (state.prev_cp != null and state.cluster_width > 0) {
|
|
1305
|
+
if (state.columns_used >= max_columns) {
|
|
1306
|
+
return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
|
|
1307
|
+
}
|
|
1308
|
+
state.columns_used += state.cluster_width;
|
|
1309
|
+
if (include_start_before) {
|
|
1310
|
+
state.grapheme_count += 1;
|
|
1311
|
+
}
|
|
1312
|
+
}
|
|
1313
|
+
|
|
1314
|
+
return .{ .byte_offset = @intCast(text.len), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
|
|
1315
|
+
}
|
|
1316
|
+
|
|
1317
|
+
/// Find position by column width using wcwidth-style codepoint-by-codepoint processing
|
|
1318
|
+
fn findPosByWidthWCWidth(
|
|
1319
|
+
text: []const u8,
|
|
1320
|
+
max_columns: u32,
|
|
1321
|
+
tab_width: u8,
|
|
1322
|
+
isASCIIOnly: bool,
|
|
1323
|
+
include_start_before: bool,
|
|
1324
|
+
) PosByWidthResult {
|
|
1325
|
+
if (text.len == 0 or max_columns == 0) {
|
|
1326
|
+
return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
|
|
1327
|
+
}
|
|
1328
|
+
|
|
1329
|
+
// ASCII-only fast path
|
|
1330
|
+
if (isASCIIOnly) {
|
|
1331
|
+
if (max_columns >= text.len) {
|
|
1332
|
+
return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
|
|
1333
|
+
} else {
|
|
1334
|
+
return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
|
|
1335
|
+
}
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
// Unicode path - process each codepoint independently
|
|
1339
|
+
var pos: usize = 0;
|
|
1340
|
+
var columns_used: u32 = 0;
|
|
1341
|
+
var codepoint_count: u32 = 0;
|
|
1342
|
+
|
|
1343
|
+
while (pos < text.len) {
|
|
1344
|
+
const b0 = text[pos];
|
|
1345
|
+
const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
|
|
1346
|
+
const dec = decodeUtf8Unchecked(text, pos);
|
|
1347
|
+
if (pos + dec.len > text.len) break :blk 0xFFFD;
|
|
1348
|
+
break :blk dec.cp;
|
|
1349
|
+
};
|
|
1350
|
+
const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
|
|
1351
|
+
|
|
1352
|
+
if (pos + cp_len > text.len) break;
|
|
1353
|
+
|
|
1354
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1355
|
+
const cp_start_col = columns_used;
|
|
1356
|
+
const cp_end_col = columns_used + cp_width;
|
|
1357
|
+
|
|
1358
|
+
// Apply boundary behavior
|
|
1359
|
+
if (include_start_before) {
|
|
1360
|
+
// Selection end: include codepoints that START before max_columns
|
|
1361
|
+
if (cp_start_col >= max_columns) {
|
|
1362
|
+
return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
|
|
1363
|
+
}
|
|
1364
|
+
} else {
|
|
1365
|
+
// Selection start: only include codepoints that END before or at max_columns
|
|
1366
|
+
// So exclude (stop) if end > max_columns
|
|
1367
|
+
if (cp_end_col > max_columns) {
|
|
1368
|
+
return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
|
|
1369
|
+
}
|
|
1370
|
+
}
|
|
1371
|
+
|
|
1372
|
+
columns_used = cp_end_col;
|
|
1373
|
+
codepoint_count += 1;
|
|
1374
|
+
pos += cp_len;
|
|
1375
|
+
}
|
|
1376
|
+
|
|
1377
|
+
return .{ .byte_offset = @intCast(text.len), .grapheme_count = codepoint_count, .columns_used = columns_used };
|
|
1378
|
+
}
|
|
1379
|
+
|
|
1380
|
+
/// Get width at byte offset - proxy function that dispatches based on width_method
|
|
1381
|
+
pub fn getWidthAt(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) u32 {
|
|
1382
|
+
switch (width_method) {
|
|
1383
|
+
.unicode, .no_zwj => return getWidthAtUnicode(text, byte_offset, tab_width, width_method),
|
|
1384
|
+
.wcwidth => return getWidthAtWCWidth(text, byte_offset, tab_width),
|
|
1385
|
+
}
|
|
1386
|
+
}
|
|
1387
|
+
|
|
1388
|
+
/// Get width at byte offset using Unicode grapheme cluster segmentation
|
|
1389
|
+
fn getWidthAtUnicode(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) u32 {
|
|
1390
|
+
if (byte_offset >= text.len) return 0;
|
|
1391
|
+
|
|
1392
|
+
const b0 = text[byte_offset];
|
|
1393
|
+
|
|
1394
|
+
const first_cp: u21 = if (b0 < 0x80) b0 else blk: {
|
|
1395
|
+
const dec = decodeUtf8Unchecked(text, byte_offset);
|
|
1396
|
+
if (byte_offset + dec.len > text.len) return 1;
|
|
1397
|
+
break :blk dec.cp;
|
|
1398
|
+
};
|
|
1399
|
+
|
|
1400
|
+
const first_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, byte_offset).len;
|
|
1401
|
+
|
|
1402
|
+
var break_state: uucode.grapheme.BreakState = .default;
|
|
1403
|
+
var prev_cp: ?u21 = first_cp;
|
|
1404
|
+
const first_width = charWidth(b0, first_cp, tab_width);
|
|
1405
|
+
var state = GraphemeWidthState.init(first_cp, first_width, width_method);
|
|
1406
|
+
|
|
1407
|
+
var pos = byte_offset + first_len;
|
|
1408
|
+
|
|
1409
|
+
while (pos < text.len) {
|
|
1410
|
+
const b = text[pos];
|
|
1411
|
+
const curr_cp: u21 = if (b < 0x80) b else decodeUtf8Unchecked(text, pos).cp;
|
|
1412
|
+
const cp_len: usize = if (b < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
|
|
1413
|
+
|
|
1414
|
+
if (pos + cp_len > text.len) break;
|
|
1415
|
+
|
|
1416
|
+
const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
|
|
1417
|
+
if (is_break) break;
|
|
1418
|
+
|
|
1419
|
+
const cp_width = charWidth(b, curr_cp, tab_width);
|
|
1420
|
+
state.addCodepoint(curr_cp, cp_width);
|
|
1421
|
+
|
|
1422
|
+
prev_cp = curr_cp;
|
|
1423
|
+
pos += cp_len;
|
|
1424
|
+
}
|
|
1425
|
+
|
|
1426
|
+
return state.width;
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
/// Get width at byte offset using wcwidth-style codepoint-by-codepoint processing
|
|
1430
|
+
/// In wcwidth mode, each codepoint is treated independently - return its width directly
|
|
1431
|
+
fn getWidthAtWCWidth(text: []const u8, byte_offset: usize, tab_width: u8) u32 {
|
|
1432
|
+
if (byte_offset >= text.len) return 0;
|
|
1433
|
+
|
|
1434
|
+
const b0 = text[byte_offset];
|
|
1435
|
+
|
|
1436
|
+
const first_cp: u21 = if (b0 < 0x80) b0 else blk: {
|
|
1437
|
+
const dec = decodeUtf8Unchecked(text, byte_offset);
|
|
1438
|
+
if (byte_offset + dec.len > text.len) return 1;
|
|
1439
|
+
break :blk dec.cp;
|
|
1440
|
+
};
|
|
1441
|
+
|
|
1442
|
+
const first_width = charWidth(b0, first_cp, tab_width);
|
|
1443
|
+
return first_width;
|
|
1444
|
+
}
|
|
1445
|
+
|
|
1446
|
+
pub const PrevGraphemeResult = struct {
|
|
1447
|
+
start_offset: usize,
|
|
1448
|
+
width: u32,
|
|
1449
|
+
};
|
|
1450
|
+
|
|
1451
|
+
/// Get previous grapheme start - proxy function that dispatches based on width_method
|
|
1452
|
+
pub fn getPrevGraphemeStart(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) ?PrevGraphemeResult {
|
|
1453
|
+
switch (width_method) {
|
|
1454
|
+
.unicode, .no_zwj => return getPrevGraphemeStartUnicode(text, byte_offset, tab_width, width_method),
|
|
1455
|
+
.wcwidth => return getPrevGraphemeStartWCWidth(text, byte_offset, tab_width),
|
|
1456
|
+
}
|
|
1457
|
+
}
|
|
1458
|
+
|
|
1459
|
+
/// Get previous grapheme start using wcwidth-style codepoint-by-codepoint processing
|
|
1460
|
+
fn getPrevGraphemeStartWCWidth(text: []const u8, byte_offset: usize, tab_width: u8) ?PrevGraphemeResult {
|
|
1461
|
+
if (byte_offset == 0 or text.len == 0) return null;
|
|
1462
|
+
if (byte_offset > text.len) return null;
|
|
1463
|
+
|
|
1464
|
+
var pos: usize = 0;
|
|
1465
|
+
var last_result: ?PrevGraphemeResult = null;
|
|
1466
|
+
|
|
1467
|
+
while (pos < byte_offset) {
|
|
1468
|
+
const b = text[pos];
|
|
1469
|
+
const curr_cp: u21 = if (b < 0x80) b else blk: {
|
|
1470
|
+
const dec = decodeUtf8Unchecked(text, pos);
|
|
1471
|
+
if (pos + dec.len > text.len) break :blk 0xFFFD;
|
|
1472
|
+
break :blk dec.cp;
|
|
1473
|
+
};
|
|
1474
|
+
const cp_len: usize = if (b < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
|
|
1475
|
+
const cp_width = charWidth(b, curr_cp, tab_width);
|
|
1476
|
+
|
|
1477
|
+
if (cp_width > 0) {
|
|
1478
|
+
last_result = .{
|
|
1479
|
+
.start_offset = pos,
|
|
1480
|
+
.width = cp_width,
|
|
1481
|
+
};
|
|
1482
|
+
}
|
|
1483
|
+
pos += cp_len;
|
|
1484
|
+
}
|
|
1485
|
+
|
|
1486
|
+
return last_result;
|
|
1487
|
+
}
|
|
1488
|
+
|
|
1489
|
+
/// Get previous grapheme start using Unicode grapheme cluster segmentation
|
|
1490
|
+
fn getPrevGraphemeStartUnicode(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) ?PrevGraphemeResult {
|
|
1491
|
+
if (byte_offset == 0 or text.len == 0) return null;
|
|
1492
|
+
if (byte_offset > text.len) return null;
|
|
1493
|
+
|
|
1494
|
+
// For unicode/no_zwj modes, use grapheme cluster detection
|
|
1495
|
+
var break_state: uucode.grapheme.BreakState = .default;
|
|
1496
|
+
var pos: usize = 0;
|
|
1497
|
+
var prev_cp: ?u21 = null;
|
|
1498
|
+
var prev_grapheme_start: usize = 0;
|
|
1499
|
+
var second_to_last_grapheme_start: usize = 0;
|
|
1500
|
+
|
|
1501
|
+
while (pos < byte_offset) {
|
|
1502
|
+
const b = text[pos];
|
|
1503
|
+
const curr_cp: u21 = if (b < 0x80) b else blk: {
|
|
1504
|
+
const dec = decodeUtf8Unchecked(text, pos);
|
|
1505
|
+
if (pos + dec.len > text.len) break :blk 0xFFFD;
|
|
1506
|
+
break :blk dec.cp;
|
|
1507
|
+
};
|
|
1508
|
+
|
|
1509
|
+
const cp_len: usize = if (b < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
|
|
1510
|
+
|
|
1511
|
+
if (isValidCodepoint(curr_cp)) {
|
|
1512
|
+
const is_break = if (prev_cp) |p| blk: {
|
|
1513
|
+
if (!isValidCodepoint(p)) break :blk true;
|
|
1514
|
+
break :blk uucode.grapheme.isBreak(p, curr_cp, &break_state);
|
|
1515
|
+
} else true;
|
|
1516
|
+
|
|
1517
|
+
if (is_break) {
|
|
1518
|
+
second_to_last_grapheme_start = prev_grapheme_start;
|
|
1519
|
+
prev_grapheme_start = pos;
|
|
1520
|
+
}
|
|
1521
|
+
|
|
1522
|
+
prev_cp = curr_cp;
|
|
1523
|
+
}
|
|
1524
|
+
|
|
1525
|
+
pos += cp_len;
|
|
1526
|
+
}
|
|
1527
|
+
|
|
1528
|
+
if (prev_grapheme_start == 0 and byte_offset == 0) {
|
|
1529
|
+
return null;
|
|
1530
|
+
}
|
|
1531
|
+
|
|
1532
|
+
const start_offset = if (prev_grapheme_start < byte_offset) prev_grapheme_start else second_to_last_grapheme_start;
|
|
1533
|
+
const width = getWidthAt(text, start_offset, tab_width, width_method);
|
|
1534
|
+
|
|
1535
|
+
return .{
|
|
1536
|
+
.start_offset = start_offset,
|
|
1537
|
+
.width = width,
|
|
1538
|
+
};
|
|
1539
|
+
}
|
|
1540
|
+
|
|
1541
|
+
/// Calculate the display width of text - proxy function that dispatches based on width_method
|
|
1542
|
+
pub fn calculateTextWidth(text: []const u8, tab_width: u8, isASCIIOnly: bool, width_method: WidthMethod) u32 {
|
|
1543
|
+
switch (width_method) {
|
|
1544
|
+
.unicode, .no_zwj => return calculateTextWidthUnicode(text, tab_width, isASCIIOnly, width_method),
|
|
1545
|
+
.wcwidth => return calculateTextWidthWCWidth(text, tab_width, isASCIIOnly),
|
|
1546
|
+
}
|
|
1547
|
+
}
|
|
1548
|
+
|
|
1549
|
+
/// Calculate text width using Unicode grapheme cluster segmentation
|
|
1550
|
+
fn calculateTextWidthUnicode(text: []const u8, tab_width: u8, isASCIIOnly: bool, width_method: WidthMethod) u32 {
|
|
1551
|
+
if (text.len == 0) return 0;
|
|
1552
|
+
|
|
1553
|
+
// ASCII-only fast path
|
|
1554
|
+
if (isASCIIOnly) {
|
|
1555
|
+
return @intCast(text.len);
|
|
1556
|
+
}
|
|
1557
|
+
|
|
1558
|
+
// General case with Unicode support and grapheme cluster handling
|
|
1559
|
+
var total_width: u32 = 0;
|
|
1560
|
+
var pos: usize = 0;
|
|
1561
|
+
var prev_cp: ?u21 = null;
|
|
1562
|
+
var break_state: uucode.grapheme.BreakState = .default;
|
|
1563
|
+
var state: GraphemeWidthState = undefined;
|
|
1564
|
+
|
|
1565
|
+
while (pos < text.len) {
|
|
1566
|
+
const b0 = text[pos];
|
|
1567
|
+
const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
|
|
1568
|
+
const dec = decodeUtf8Unchecked(text, pos);
|
|
1569
|
+
if (pos + dec.len > text.len) break :blk 0xFFFD;
|
|
1570
|
+
break :blk dec.cp;
|
|
1571
|
+
};
|
|
1572
|
+
const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
|
|
1573
|
+
const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
|
|
1574
|
+
|
|
1575
|
+
if (is_break) {
|
|
1576
|
+
if (prev_cp != null) {
|
|
1577
|
+
total_width += state.width;
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1581
|
+
state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
|
|
1582
|
+
} else {
|
|
1583
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1584
|
+
state.addCodepoint(curr_cp, cp_width);
|
|
1585
|
+
}
|
|
1586
|
+
|
|
1587
|
+
prev_cp = curr_cp;
|
|
1588
|
+
pos += cp_len;
|
|
1589
|
+
}
|
|
1590
|
+
|
|
1591
|
+
if (prev_cp != null) {
|
|
1592
|
+
total_width += state.width;
|
|
1593
|
+
}
|
|
1594
|
+
|
|
1595
|
+
return total_width;
|
|
1596
|
+
}
|
|
1597
|
+
|
|
1598
|
+
/// Calculate text width using wcwidth-style codepoint-by-codepoint processing
|
|
1599
|
+
fn calculateTextWidthWCWidth(text: []const u8, tab_width: u8, isASCIIOnly: bool) u32 {
|
|
1600
|
+
if (text.len == 0) return 0;
|
|
1601
|
+
|
|
1602
|
+
// ASCII-only fast path
|
|
1603
|
+
if (isASCIIOnly) {
|
|
1604
|
+
return @intCast(text.len);
|
|
1605
|
+
}
|
|
1606
|
+
|
|
1607
|
+
// Unicode path - sum width of all codepoints
|
|
1608
|
+
var total_width: u32 = 0;
|
|
1609
|
+
var pos: usize = 0;
|
|
1610
|
+
|
|
1611
|
+
while (pos < text.len) {
|
|
1612
|
+
const b0 = text[pos];
|
|
1613
|
+
const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
|
|
1614
|
+
const dec = decodeUtf8Unchecked(text, pos);
|
|
1615
|
+
if (pos + dec.len > text.len) break :blk 0xFFFD;
|
|
1616
|
+
break :blk dec.cp;
|
|
1617
|
+
};
|
|
1618
|
+
const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
|
|
1619
|
+
|
|
1620
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1621
|
+
total_width += cp_width;
|
|
1622
|
+
|
|
1623
|
+
pos += cp_len;
|
|
1624
|
+
}
|
|
1625
|
+
|
|
1626
|
+
return total_width;
|
|
1627
|
+
}
|
|
1628
|
+
|
|
1629
|
+
/// Grapheme cluster information for caching
|
|
1630
|
+
pub const GraphemeInfo = struct {
|
|
1631
|
+
byte_offset: u32,
|
|
1632
|
+
byte_len: u8,
|
|
1633
|
+
width: u8,
|
|
1634
|
+
col_offset: u32,
|
|
1635
|
+
};
|
|
1636
|
+
|
|
1637
|
+
pub const GraphemeInfoResult = struct {
|
|
1638
|
+
graphemes: std.ArrayList(GraphemeInfo),
|
|
1639
|
+
|
|
1640
|
+
pub fn init(allocator: std.mem.Allocator) GraphemeInfoResult {
|
|
1641
|
+
return .{
|
|
1642
|
+
.graphemes = std.ArrayList(GraphemeInfo).init(allocator),
|
|
1643
|
+
};
|
|
1644
|
+
}
|
|
1645
|
+
|
|
1646
|
+
pub fn deinit(self: *GraphemeInfoResult) void {
|
|
1647
|
+
self.graphemes.deinit();
|
|
1648
|
+
}
|
|
1649
|
+
|
|
1650
|
+
pub fn reset(self: *GraphemeInfoResult) void {
|
|
1651
|
+
self.graphemes.clearRetainingCapacity();
|
|
1652
|
+
}
|
|
1653
|
+
};
|
|
1654
|
+
|
|
1655
|
+
/// Find all grapheme clusters in text and return info for multi-byte graphemes and tabs
|
|
1656
|
+
/// This is a proxy function that dispatches to the appropriate implementation based on width_method
|
|
1657
|
+
pub fn findGraphemeInfo(
|
|
1658
|
+
text: []const u8,
|
|
1659
|
+
tab_width: u8,
|
|
1660
|
+
isASCIIOnly: bool,
|
|
1661
|
+
width_method: WidthMethod,
|
|
1662
|
+
allocator: std.mem.Allocator,
|
|
1663
|
+
result: *std.ArrayListUnmanaged(GraphemeInfo),
|
|
1664
|
+
) !void {
|
|
1665
|
+
switch (width_method) {
|
|
1666
|
+
.unicode, .no_zwj => try findGraphemeInfoUnicode(text, tab_width, isASCIIOnly, width_method, allocator, result),
|
|
1667
|
+
.wcwidth => try findGraphemeInfoWCWidth(text, tab_width, isASCIIOnly, allocator, result),
|
|
1668
|
+
}
|
|
1669
|
+
}
|
|
1670
|
+
|
|
1671
|
+
/// Find all grapheme clusters using Unicode grapheme cluster segmentation
|
|
1672
|
+
/// This version treats grapheme clusters as single units for width calculation
|
|
1673
|
+
fn findGraphemeInfoUnicode(
|
|
1674
|
+
text: []const u8,
|
|
1675
|
+
tab_width: u8,
|
|
1676
|
+
isASCIIOnly: bool,
|
|
1677
|
+
width_method: WidthMethod,
|
|
1678
|
+
allocator: std.mem.Allocator,
|
|
1679
|
+
result: *std.ArrayListUnmanaged(GraphemeInfo),
|
|
1680
|
+
) !void {
|
|
1681
|
+
// In wcwidth mode, always process to capture combining marks on ASCII
|
|
1682
|
+
if (isASCIIOnly and width_method != .wcwidth) {
|
|
1683
|
+
return;
|
|
1684
|
+
}
|
|
1685
|
+
|
|
1686
|
+
if (text.len == 0) {
|
|
1687
|
+
return;
|
|
1688
|
+
}
|
|
1689
|
+
|
|
1690
|
+
const vector_len = 16;
|
|
1691
|
+
var pos: usize = 0;
|
|
1692
|
+
var col: u32 = 0;
|
|
1693
|
+
var prev_cp: ?u21 = null;
|
|
1694
|
+
var break_state: uucode.grapheme.BreakState = .default;
|
|
1695
|
+
|
|
1696
|
+
// Track current grapheme cluster
|
|
1697
|
+
var cluster_start: usize = 0;
|
|
1698
|
+
var cluster_start_col: u32 = 0;
|
|
1699
|
+
var cluster_width_state: GraphemeWidthState = undefined;
|
|
1700
|
+
var cluster_is_multibyte: bool = false;
|
|
1701
|
+
var cluster_is_tab: bool = false;
|
|
1702
|
+
|
|
1703
|
+
while (pos + vector_len <= text.len) {
|
|
1704
|
+
const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
|
|
1705
|
+
const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
|
|
1706
|
+
const is_non_ascii = chunk >= ascii_threshold;
|
|
1707
|
+
|
|
1708
|
+
// Fast path: all ASCII
|
|
1709
|
+
if (!@reduce(.Or, is_non_ascii)) {
|
|
1710
|
+
var i: usize = 0;
|
|
1711
|
+
while (i < vector_len) : (i += 1) {
|
|
1712
|
+
const b = text[pos + i];
|
|
1713
|
+
const curr_cp: u21 = b;
|
|
1714
|
+
const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
|
|
1715
|
+
|
|
1716
|
+
if (is_break) {
|
|
1717
|
+
if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
|
|
1718
|
+
if (cluster_width_state.width > 0 or width_method == .wcwidth) {
|
|
1719
|
+
const cluster_byte_len = (pos + i) - cluster_start;
|
|
1720
|
+
try result.append(allocator, GraphemeInfo{
|
|
1721
|
+
.byte_offset = @intCast(cluster_start),
|
|
1722
|
+
.byte_len = @intCast(cluster_byte_len),
|
|
1723
|
+
.width = @intCast(cluster_width_state.width),
|
|
1724
|
+
.col_offset = cluster_start_col,
|
|
1725
|
+
});
|
|
1726
|
+
}
|
|
1727
|
+
col += cluster_width_state.width;
|
|
1728
|
+
} else if (prev_cp != null) {
|
|
1729
|
+
col += cluster_width_state.width;
|
|
1730
|
+
}
|
|
1731
|
+
|
|
1732
|
+
cluster_start = pos + i;
|
|
1733
|
+
cluster_start_col = col;
|
|
1734
|
+
cluster_is_tab = (b == '\t');
|
|
1735
|
+
cluster_is_multibyte = false;
|
|
1736
|
+
|
|
1737
|
+
const cp_width = asciiCharWidth(b, tab_width);
|
|
1738
|
+
cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
|
|
1739
|
+
} else {
|
|
1740
|
+
// Continuing cluster (shouldn't happen for ASCII, but handle it)
|
|
1741
|
+
const cp_width = asciiCharWidth(b, tab_width);
|
|
1742
|
+
cluster_width_state.addCodepoint(curr_cp, cp_width);
|
|
1743
|
+
}
|
|
1744
|
+
|
|
1745
|
+
prev_cp = curr_cp;
|
|
1746
|
+
}
|
|
1747
|
+
pos += vector_len;
|
|
1748
|
+
continue;
|
|
1749
|
+
}
|
|
1750
|
+
|
|
1751
|
+
// Slow path: mixed ASCII/non-ASCII
|
|
1752
|
+
var i: usize = 0;
|
|
1753
|
+
while (i < vector_len and pos + i < text.len) {
|
|
1754
|
+
const b0 = text[pos + i];
|
|
1755
|
+
const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos + i).cp;
|
|
1756
|
+
const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos + i).len;
|
|
1757
|
+
|
|
1758
|
+
if (pos + i + cp_len > text.len) break;
|
|
1759
|
+
|
|
1760
|
+
const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
|
|
1761
|
+
|
|
1762
|
+
if (is_break) {
|
|
1763
|
+
if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
|
|
1764
|
+
if (cluster_width_state.width > 0 or width_method == .wcwidth) {
|
|
1765
|
+
const cluster_byte_len = (pos + i) - cluster_start;
|
|
1766
|
+
try result.append(allocator, GraphemeInfo{
|
|
1767
|
+
.byte_offset = @intCast(cluster_start),
|
|
1768
|
+
.byte_len = @intCast(cluster_byte_len),
|
|
1769
|
+
.width = @intCast(cluster_width_state.width),
|
|
1770
|
+
.col_offset = cluster_start_col,
|
|
1771
|
+
});
|
|
1772
|
+
}
|
|
1773
|
+
col += cluster_width_state.width;
|
|
1774
|
+
} else if (prev_cp != null) {
|
|
1775
|
+
col += cluster_width_state.width;
|
|
1776
|
+
}
|
|
1777
|
+
|
|
1778
|
+
cluster_start = pos + i;
|
|
1779
|
+
cluster_start_col = col;
|
|
1780
|
+
cluster_is_tab = (b0 == '\t');
|
|
1781
|
+
cluster_is_multibyte = (cp_len != 1);
|
|
1782
|
+
|
|
1783
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1784
|
+
cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
|
|
1785
|
+
} else {
|
|
1786
|
+
cluster_is_multibyte = cluster_is_multibyte or (cp_len != 1);
|
|
1787
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1788
|
+
cluster_width_state.addCodepoint(curr_cp, cp_width);
|
|
1789
|
+
}
|
|
1790
|
+
|
|
1791
|
+
prev_cp = curr_cp;
|
|
1792
|
+
i += cp_len;
|
|
1793
|
+
}
|
|
1794
|
+
pos += i;
|
|
1795
|
+
}
|
|
1796
|
+
|
|
1797
|
+
// Tail processing
|
|
1798
|
+
while (pos < text.len) {
|
|
1799
|
+
const b0 = text[pos];
|
|
1800
|
+
const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos).cp;
|
|
1801
|
+
const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
|
|
1802
|
+
|
|
1803
|
+
if (pos + cp_len > text.len) break;
|
|
1804
|
+
|
|
1805
|
+
const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
|
|
1806
|
+
|
|
1807
|
+
if (is_break) {
|
|
1808
|
+
if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
|
|
1809
|
+
if (cluster_width_state.width > 0 or width_method == .wcwidth) {
|
|
1810
|
+
const cluster_byte_len = pos - cluster_start;
|
|
1811
|
+
try result.append(allocator, GraphemeInfo{
|
|
1812
|
+
.byte_offset = @intCast(cluster_start),
|
|
1813
|
+
.byte_len = @intCast(cluster_byte_len),
|
|
1814
|
+
.width = @intCast(cluster_width_state.width),
|
|
1815
|
+
.col_offset = cluster_start_col,
|
|
1816
|
+
});
|
|
1817
|
+
}
|
|
1818
|
+
col += cluster_width_state.width;
|
|
1819
|
+
} else if (prev_cp != null) {
|
|
1820
|
+
col += cluster_width_state.width;
|
|
1821
|
+
}
|
|
1822
|
+
|
|
1823
|
+
cluster_start = pos;
|
|
1824
|
+
cluster_start_col = col;
|
|
1825
|
+
cluster_is_tab = (b0 == '\t');
|
|
1826
|
+
cluster_is_multibyte = (cp_len != 1);
|
|
1827
|
+
|
|
1828
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1829
|
+
cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
|
|
1830
|
+
} else {
|
|
1831
|
+
cluster_is_multibyte = cluster_is_multibyte or (cp_len != 1);
|
|
1832
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1833
|
+
cluster_width_state.addCodepoint(curr_cp, cp_width);
|
|
1834
|
+
}
|
|
1835
|
+
|
|
1836
|
+
prev_cp = curr_cp;
|
|
1837
|
+
pos += cp_len;
|
|
1838
|
+
}
|
|
1839
|
+
|
|
1840
|
+
if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
|
|
1841
|
+
if (cluster_width_state.width > 0 or width_method == .wcwidth) {
|
|
1842
|
+
const cluster_byte_len = text.len - cluster_start;
|
|
1843
|
+
try result.append(allocator, GraphemeInfo{
|
|
1844
|
+
.byte_offset = @intCast(cluster_start),
|
|
1845
|
+
.byte_len = @intCast(cluster_byte_len),
|
|
1846
|
+
.width = @intCast(cluster_width_state.width),
|
|
1847
|
+
.col_offset = cluster_start_col,
|
|
1848
|
+
});
|
|
1849
|
+
}
|
|
1850
|
+
}
|
|
1851
|
+
}
|
|
1852
|
+
|
|
1853
|
+
/// Find all grapheme clusters using wcwidth-style codepoint-by-codepoint processing
|
|
1854
|
+
/// This version treats each codepoint as a separate character (tmux/wcwidth behavior)
|
|
1855
|
+
fn findGraphemeInfoWCWidth(
|
|
1856
|
+
text: []const u8,
|
|
1857
|
+
tab_width: u8,
|
|
1858
|
+
isASCIIOnly: bool,
|
|
1859
|
+
allocator: std.mem.Allocator,
|
|
1860
|
+
result: *std.ArrayListUnmanaged(GraphemeInfo),
|
|
1861
|
+
) !void {
|
|
1862
|
+
// wcwidth mode should still produce the same grapheme cluster boundaries as Unicode
|
|
1863
|
+
// (so ZWJ sequences and combining marks stay together), but the width of each cluster
|
|
1864
|
+
// is calculated using wcwidth (sum of codepoint widths). This keeps rendering coherent
|
|
1865
|
+
// while preserving tmux-style widths.
|
|
1866
|
+
if (isASCIIOnly) {
|
|
1867
|
+
return;
|
|
1868
|
+
}
|
|
1869
|
+
|
|
1870
|
+
if (text.len == 0) {
|
|
1871
|
+
return;
|
|
1872
|
+
}
|
|
1873
|
+
|
|
1874
|
+
var pos: usize = 0;
|
|
1875
|
+
var col: u32 = 0;
|
|
1876
|
+
var prev_cp: ?u21 = null;
|
|
1877
|
+
var break_state: uucode.grapheme.BreakState = .default;
|
|
1878
|
+
|
|
1879
|
+
// Track current cluster
|
|
1880
|
+
var cluster_start: usize = 0;
|
|
1881
|
+
var cluster_start_col: u32 = 0;
|
|
1882
|
+
var cluster_width_state: GraphemeWidthState = undefined;
|
|
1883
|
+
var cluster_is_multibyte: bool = false;
|
|
1884
|
+
var cluster_is_tab: bool = false;
|
|
1885
|
+
var cluster_started = false;
|
|
1886
|
+
|
|
1887
|
+
while (pos < text.len) {
|
|
1888
|
+
const b0 = text[pos];
|
|
1889
|
+
const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
|
|
1890
|
+
const dec = decodeUtf8Unchecked(text, pos);
|
|
1891
|
+
if (pos + dec.len > text.len) break :blk 0xFFFD;
|
|
1892
|
+
break :blk dec.cp;
|
|
1893
|
+
};
|
|
1894
|
+
const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
|
|
1895
|
+
|
|
1896
|
+
if (pos + cp_len > text.len) break;
|
|
1897
|
+
|
|
1898
|
+
// Use wcwidth break detection (each codepoint is separate, tmux-style)
|
|
1899
|
+
const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, .wcwidth);
|
|
1900
|
+
|
|
1901
|
+
if (is_break) {
|
|
1902
|
+
if (cluster_started and (cluster_is_multibyte or cluster_is_tab)) {
|
|
1903
|
+
try result.append(allocator, GraphemeInfo{
|
|
1904
|
+
.byte_offset = @intCast(cluster_start),
|
|
1905
|
+
.byte_len = @intCast(pos - cluster_start),
|
|
1906
|
+
.width = @intCast(cluster_width_state.width),
|
|
1907
|
+
.col_offset = cluster_start_col,
|
|
1908
|
+
});
|
|
1909
|
+
col += cluster_width_state.width;
|
|
1910
|
+
} else if (cluster_started) {
|
|
1911
|
+
// Still need to advance col by cluster width even if not emitted
|
|
1912
|
+
col += cluster_width_state.width;
|
|
1913
|
+
}
|
|
1914
|
+
|
|
1915
|
+
// Start a new cluster
|
|
1916
|
+
cluster_start = pos;
|
|
1917
|
+
cluster_start_col = col;
|
|
1918
|
+
cluster_is_tab = (b0 == '\t');
|
|
1919
|
+
cluster_is_multibyte = (cp_len != 1);
|
|
1920
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1921
|
+
cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, .wcwidth);
|
|
1922
|
+
cluster_started = true;
|
|
1923
|
+
} else {
|
|
1924
|
+
// Continuing cluster
|
|
1925
|
+
cluster_is_multibyte = cluster_is_multibyte or (cp_len != 1);
|
|
1926
|
+
const cp_width = charWidth(b0, curr_cp, tab_width);
|
|
1927
|
+
cluster_width_state.addCodepoint(curr_cp, cp_width);
|
|
1928
|
+
}
|
|
1929
|
+
|
|
1930
|
+
prev_cp = curr_cp;
|
|
1931
|
+
pos += cp_len;
|
|
1932
|
+
}
|
|
1933
|
+
|
|
1934
|
+
// Commit final cluster
|
|
1935
|
+
if (cluster_started) {
|
|
1936
|
+
if (cluster_is_multibyte or cluster_is_tab) {
|
|
1937
|
+
try result.append(allocator, GraphemeInfo{
|
|
1938
|
+
.byte_offset = @intCast(cluster_start),
|
|
1939
|
+
.byte_len = @intCast(text.len - cluster_start),
|
|
1940
|
+
.width = @intCast(cluster_width_state.width),
|
|
1941
|
+
.col_offset = cluster_start_col,
|
|
1942
|
+
});
|
|
1943
|
+
col += cluster_width_state.width;
|
|
1944
|
+
} else {
|
|
1945
|
+
col += cluster_width_state.width;
|
|
1946
|
+
}
|
|
1947
|
+
}
|
|
1948
|
+
}
|