@fairyhunter13/opentui-core 0.1.90 → 0.1.92

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (571) hide show
  1. package/3d/SpriteResourceManager.d.ts +74 -0
  2. package/3d/SpriteUtils.d.ts +13 -0
  3. package/3d/TextureUtils.d.ts +24 -0
  4. package/3d/ThreeRenderable.d.ts +40 -0
  5. package/3d/WGPURenderer.d.ts +61 -0
  6. package/3d/animation/ExplodingSpriteEffect.d.ts +71 -0
  7. package/3d/animation/PhysicsExplodingSpriteEffect.d.ts +76 -0
  8. package/3d/animation/SpriteAnimator.d.ts +124 -0
  9. package/3d/animation/SpriteParticleGenerator.d.ts +62 -0
  10. package/3d/canvas.d.ts +44 -0
  11. package/3d/index.d.ts +12 -0
  12. package/3d/physics/PlanckPhysicsAdapter.d.ts +19 -0
  13. package/3d/physics/RapierPhysicsAdapter.d.ts +19 -0
  14. package/3d/physics/physics-interface.d.ts +27 -0
  15. package/3d.d.ts +2 -0
  16. package/3d.js +34042 -0
  17. package/3d.js.map +155 -0
  18. package/LICENSE +21 -0
  19. package/NativeSpanFeed.d.ts +41 -0
  20. package/README.md +2 -2
  21. package/Renderable.d.ts +334 -0
  22. package/animation/Timeline.d.ts +126 -0
  23. package/ansi.d.ts +13 -0
  24. package/buffer.d.ts +107 -0
  25. package/console.d.ts +143 -0
  26. package/edit-buffer.d.ts +98 -0
  27. package/editor-view.d.ts +73 -0
  28. package/index-e6ec7apq.js +18415 -0
  29. package/index-e6ec7apq.js.map +64 -0
  30. package/index-h066zmrb.js +12619 -0
  31. package/index-h066zmrb.js.map +43 -0
  32. package/index-ynzawt3n.js +113 -0
  33. package/index-ynzawt3n.js.map +10 -0
  34. package/index.d.ts +21 -0
  35. package/index.js +430 -0
  36. package/index.js.map +9 -0
  37. package/lib/KeyHandler.d.ts +61 -0
  38. package/lib/RGBA.d.ts +25 -0
  39. package/lib/ascii.font.d.ts +508 -0
  40. package/lib/border.d.ts +49 -0
  41. package/lib/bunfs.d.ts +7 -0
  42. package/lib/clipboard.d.ts +17 -0
  43. package/lib/clock.d.ts +15 -0
  44. package/lib/data-paths.d.ts +26 -0
  45. package/lib/debounce.d.ts +42 -0
  46. package/lib/detect-links.d.ts +6 -0
  47. package/lib/env.d.ts +42 -0
  48. package/lib/extmarks-history.d.ts +17 -0
  49. package/lib/extmarks.d.ts +89 -0
  50. package/lib/hast-styled-text.d.ts +17 -0
  51. package/lib/index.d.ts +21 -0
  52. package/lib/keymapping.d.ts +25 -0
  53. package/lib/objects-in-viewport.d.ts +24 -0
  54. package/lib/output.capture.d.ts +24 -0
  55. package/lib/parse.keypress-kitty.d.ts +2 -0
  56. package/lib/parse.keypress.d.ts +26 -0
  57. package/lib/parse.mouse.d.ts +30 -0
  58. package/lib/paste.d.ts +7 -0
  59. package/lib/queue.d.ts +15 -0
  60. package/lib/renderable.validations.d.ts +12 -0
  61. package/lib/scroll-acceleration.d.ts +43 -0
  62. package/lib/selection.d.ts +63 -0
  63. package/lib/singleton.d.ts +7 -0
  64. package/lib/stdin-parser.d.ts +76 -0
  65. package/lib/styled-text.d.ts +63 -0
  66. package/lib/terminal-capability-detection.d.ts +30 -0
  67. package/lib/terminal-palette.d.ts +50 -0
  68. package/lib/tree-sitter/assets/update.d.ts +11 -0
  69. package/lib/tree-sitter/client.d.ts +47 -0
  70. package/lib/tree-sitter/default-parsers.d.ts +2 -0
  71. package/lib/tree-sitter/download-utils.d.ts +21 -0
  72. package/lib/tree-sitter/index.d.ts +8 -0
  73. package/lib/tree-sitter/parser.worker.d.ts +1 -0
  74. package/lib/tree-sitter/parsers-config.d.ts +38 -0
  75. package/lib/tree-sitter/resolve-ft.d.ts +2 -0
  76. package/lib/tree-sitter/types.d.ts +81 -0
  77. package/lib/tree-sitter-styled-text.d.ts +14 -0
  78. package/lib/validate-dir-name.d.ts +1 -0
  79. package/lib/yoga.options.d.ts +32 -0
  80. package/package.json +51 -63
  81. package/parser.worker.js +869 -0
  82. package/parser.worker.js.map +12 -0
  83. package/plugins/core-slot.d.ts +72 -0
  84. package/plugins/registry.d.ts +38 -0
  85. package/plugins/types.d.ts +34 -0
  86. package/post/filters.d.ts +105 -0
  87. package/renderables/ASCIIFont.d.ts +52 -0
  88. package/renderables/Box.d.ts +72 -0
  89. package/renderables/Code.d.ts +78 -0
  90. package/renderables/Diff.d.ts +142 -0
  91. package/renderables/EditBufferRenderable.d.ts +162 -0
  92. package/renderables/FrameBuffer.d.ts +16 -0
  93. package/renderables/Input.d.ts +67 -0
  94. package/renderables/LineNumberRenderable.d.ts +74 -0
  95. package/renderables/Markdown.d.ts +173 -0
  96. package/renderables/ScrollBar.d.ts +77 -0
  97. package/renderables/ScrollBox.d.ts +124 -0
  98. package/renderables/Select.d.ts +115 -0
  99. package/renderables/Slider.d.ts +44 -0
  100. package/renderables/TabSelect.d.ts +96 -0
  101. package/renderables/Text.d.ts +36 -0
  102. package/renderables/TextBufferRenderable.d.ts +105 -0
  103. package/renderables/TextNode.d.ts +91 -0
  104. package/renderables/TextTable.d.ts +140 -0
  105. package/renderables/Textarea.d.ts +114 -0
  106. package/renderables/TimeToFirstDraw.d.ts +24 -0
  107. package/renderables/__tests__/renderable-test-utils.d.ts +12 -0
  108. package/renderables/composition/VRenderable.d.ts +16 -0
  109. package/renderables/composition/constructs.d.ts +35 -0
  110. package/renderables/composition/vnode.d.ts +46 -0
  111. package/renderables/index.d.ts +22 -0
  112. package/renderables/markdown-parser.d.ts +10 -0
  113. package/renderer.d.ts +388 -0
  114. package/runtime-plugin-support.d.ts +3 -0
  115. package/runtime-plugin-support.js +29 -0
  116. package/runtime-plugin-support.js.map +10 -0
  117. package/runtime-plugin.d.ts +11 -0
  118. package/runtime-plugin.js +16 -0
  119. package/runtime-plugin.js.map +9 -0
  120. package/syntax-style.d.ts +54 -0
  121. package/testing/manual-clock.d.ts +16 -0
  122. package/testing/mock-keys.d.ts +81 -0
  123. package/testing/mock-mouse.d.ts +38 -0
  124. package/testing/mock-tree-sitter-client.d.ts +23 -0
  125. package/testing/spy.d.ts +7 -0
  126. package/testing/test-recorder.d.ts +61 -0
  127. package/testing/test-renderer.d.ts +23 -0
  128. package/testing.d.ts +6 -0
  129. package/testing.js +675 -0
  130. package/testing.js.map +15 -0
  131. package/text-buffer-view.d.ts +42 -0
  132. package/text-buffer.d.ts +67 -0
  133. package/types.d.ts +131 -0
  134. package/utils.d.ts +14 -0
  135. package/zig-structs.d.ts +155 -0
  136. package/zig.d.ts +351 -0
  137. package/dev/keypress-debug-renderer.ts +0 -148
  138. package/dev/keypress-debug.ts +0 -43
  139. package/dev/print-env-vars.ts +0 -32
  140. package/dev/test-tmux-graphics-334.sh +0 -68
  141. package/dev/thai-debug-test.ts +0 -68
  142. package/docs/development.md +0 -141
  143. package/docs/env-vars.md +0 -140
  144. package/docs/getting-started.md +0 -353
  145. package/docs/renderables-vs-constructs.md +0 -159
  146. package/docs/tree-sitter.md +0 -311
  147. package/scripts/build.ts +0 -400
  148. package/scripts/publish.ts +0 -60
  149. package/src/3d/SpriteResourceManager.ts +0 -286
  150. package/src/3d/SpriteUtils.ts +0 -71
  151. package/src/3d/TextureUtils.ts +0 -196
  152. package/src/3d/ThreeRenderable.ts +0 -197
  153. package/src/3d/WGPURenderer.ts +0 -294
  154. package/src/3d/animation/ExplodingSpriteEffect.ts +0 -513
  155. package/src/3d/animation/PhysicsExplodingSpriteEffect.ts +0 -429
  156. package/src/3d/animation/SpriteAnimator.ts +0 -633
  157. package/src/3d/animation/SpriteParticleGenerator.ts +0 -435
  158. package/src/3d/canvas.ts +0 -464
  159. package/src/3d/index.ts +0 -12
  160. package/src/3d/physics/PlanckPhysicsAdapter.ts +0 -72
  161. package/src/3d/physics/RapierPhysicsAdapter.ts +0 -66
  162. package/src/3d/physics/physics-interface.ts +0 -31
  163. package/src/3d/shaders/supersampling.wgsl +0 -201
  164. package/src/3d.ts +0 -3
  165. package/src/NativeSpanFeed.ts +0 -300
  166. package/src/Renderable.ts +0 -1698
  167. package/src/__snapshots__/buffer.test.ts.snap +0 -28
  168. package/src/animation/Timeline.test.ts +0 -2709
  169. package/src/animation/Timeline.ts +0 -598
  170. package/src/ansi.ts +0 -18
  171. package/src/benchmark/latest-all-bench-run.json +0 -707
  172. package/src/benchmark/latest-async-bench-run.json +0 -336
  173. package/src/benchmark/latest-default-bench-run.json +0 -657
  174. package/src/benchmark/latest-large-bench-run.json +0 -707
  175. package/src/benchmark/latest-quick-bench-run.json +0 -207
  176. package/src/benchmark/markdown-benchmark.ts +0 -1804
  177. package/src/benchmark/native-span-feed-async-benchmark.ts +0 -355
  178. package/src/benchmark/native-span-feed-benchmark.md +0 -56
  179. package/src/benchmark/native-span-feed-benchmark.ts +0 -596
  180. package/src/benchmark/native-span-feed-compare.ts +0 -280
  181. package/src/benchmark/renderer-benchmark.ts +0 -754
  182. package/src/benchmark/text-table-benchmark.ts +0 -947
  183. package/src/buffer.test.ts +0 -291
  184. package/src/buffer.ts +0 -519
  185. package/src/console.test.ts +0 -612
  186. package/src/console.ts +0 -1255
  187. package/src/edit-buffer.test.ts +0 -1769
  188. package/src/edit-buffer.ts +0 -411
  189. package/src/editor-view.test.ts +0 -1032
  190. package/src/editor-view.ts +0 -284
  191. package/src/examples/ascii-font-selection-demo.ts +0 -245
  192. package/src/examples/assets/Water_2_M_Normal.jpg +0 -0
  193. package/src/examples/assets/concrete.png +0 -0
  194. package/src/examples/assets/crate.png +0 -0
  195. package/src/examples/assets/crate_emissive.png +0 -0
  196. package/src/examples/assets/forrest_background.png +0 -0
  197. package/src/examples/assets/hast-example.json +0 -1018
  198. package/src/examples/assets/heart.png +0 -0
  199. package/src/examples/assets/main_char_heavy_attack.png +0 -0
  200. package/src/examples/assets/main_char_idle.png +0 -0
  201. package/src/examples/assets/main_char_jump_end.png +0 -0
  202. package/src/examples/assets/main_char_jump_landing.png +0 -0
  203. package/src/examples/assets/main_char_jump_start.png +0 -0
  204. package/src/examples/assets/main_char_run_loop.png +0 -0
  205. package/src/examples/assets/roughness_map.jpg +0 -0
  206. package/src/examples/build.ts +0 -115
  207. package/src/examples/code-demo.ts +0 -584
  208. package/src/examples/console-demo.ts +0 -358
  209. package/src/examples/core-plugin-slots-demo.ts +0 -759
  210. package/src/examples/diff-demo.ts +0 -699
  211. package/src/examples/draggable-three-demo.ts +0 -259
  212. package/src/examples/editor-demo.ts +0 -322
  213. package/src/examples/extmarks-demo.ts +0 -204
  214. package/src/examples/focus-restore-demo.ts +0 -310
  215. package/src/examples/fonts.ts +0 -245
  216. package/src/examples/fractal-shader-demo.ts +0 -268
  217. package/src/examples/framebuffer-demo.ts +0 -674
  218. package/src/examples/full-unicode-demo.ts +0 -181
  219. package/src/examples/golden-star-demo.ts +0 -933
  220. package/src/examples/grayscale-buffer-demo.ts +0 -249
  221. package/src/examples/hast-syntax-highlighting-demo.ts +0 -129
  222. package/src/examples/index.ts +0 -925
  223. package/src/examples/input-demo.ts +0 -377
  224. package/src/examples/input-select-layout-demo.ts +0 -425
  225. package/src/examples/install.sh +0 -143
  226. package/src/examples/keypress-debug-demo.ts +0 -452
  227. package/src/examples/lib/HexList.ts +0 -122
  228. package/src/examples/lib/PaletteGrid.ts +0 -125
  229. package/src/examples/lib/standalone-keys.ts +0 -25
  230. package/src/examples/lib/tab-controller.ts +0 -243
  231. package/src/examples/lights-phong-demo.ts +0 -290
  232. package/src/examples/link-demo.ts +0 -220
  233. package/src/examples/live-state-demo.ts +0 -480
  234. package/src/examples/markdown-demo.ts +0 -620
  235. package/src/examples/mouse-interaction-demo.ts +0 -428
  236. package/src/examples/nested-zindex-demo.ts +0 -357
  237. package/src/examples/opacity-example.ts +0 -235
  238. package/src/examples/opentui-demo.ts +0 -1057
  239. package/src/examples/physx-planck-2d-demo.ts +0 -507
  240. package/src/examples/physx-rapier-2d-demo.ts +0 -526
  241. package/src/examples/relative-positioning-demo.ts +0 -323
  242. package/src/examples/scroll-example.ts +0 -214
  243. package/src/examples/scrollbox-mouse-test.ts +0 -112
  244. package/src/examples/scrollbox-overlay-hit-test.ts +0 -206
  245. package/src/examples/select-demo.ts +0 -237
  246. package/src/examples/shader-cube-demo.ts +0 -772
  247. package/src/examples/simple-layout-example.ts +0 -591
  248. package/src/examples/slider-demo.ts +0 -617
  249. package/src/examples/split-mode-demo.ts +0 -445
  250. package/src/examples/sprite-animation-demo.ts +0 -443
  251. package/src/examples/sprite-particle-generator-demo.ts +0 -486
  252. package/src/examples/static-sprite-demo.ts +0 -193
  253. package/src/examples/sticky-scroll-example.ts +0 -308
  254. package/src/examples/styled-text-demo.ts +0 -282
  255. package/src/examples/tab-select-demo.ts +0 -219
  256. package/src/examples/terminal-title.ts +0 -29
  257. package/src/examples/terminal.ts +0 -305
  258. package/src/examples/text-node-demo.ts +0 -416
  259. package/src/examples/text-selection-demo.ts +0 -377
  260. package/src/examples/text-table-demo.ts +0 -503
  261. package/src/examples/text-truncation-demo.ts +0 -481
  262. package/src/examples/text-wrap.ts +0 -757
  263. package/src/examples/texture-loading-demo.ts +0 -259
  264. package/src/examples/timeline-example.ts +0 -670
  265. package/src/examples/transparency-demo.ts +0 -241
  266. package/src/examples/vnode-composition-demo.ts +0 -404
  267. package/src/index.ts +0 -22
  268. package/src/lib/KeyHandler.integration.test.ts +0 -292
  269. package/src/lib/KeyHandler.stopPropagation.test.ts +0 -289
  270. package/src/lib/KeyHandler.test.ts +0 -662
  271. package/src/lib/KeyHandler.ts +0 -222
  272. package/src/lib/RGBA.test.ts +0 -984
  273. package/src/lib/RGBA.ts +0 -204
  274. package/src/lib/ascii.font.ts +0 -330
  275. package/src/lib/border.test.ts +0 -83
  276. package/src/lib/border.ts +0 -168
  277. package/src/lib/bunfs.test.ts +0 -27
  278. package/src/lib/bunfs.ts +0 -18
  279. package/src/lib/clipboard.test.ts +0 -41
  280. package/src/lib/clipboard.ts +0 -47
  281. package/src/lib/clock.ts +0 -31
  282. package/src/lib/data-paths.test.ts +0 -133
  283. package/src/lib/data-paths.ts +0 -109
  284. package/src/lib/debounce.ts +0 -106
  285. package/src/lib/detect-links.test.ts +0 -98
  286. package/src/lib/detect-links.ts +0 -56
  287. package/src/lib/env.test.ts +0 -228
  288. package/src/lib/env.ts +0 -209
  289. package/src/lib/extmarks-history.ts +0 -51
  290. package/src/lib/extmarks-multiwidth.test.ts +0 -322
  291. package/src/lib/extmarks.test.ts +0 -3457
  292. package/src/lib/extmarks.ts +0 -843
  293. package/src/lib/fonts/block.json +0 -405
  294. package/src/lib/fonts/grid.json +0 -265
  295. package/src/lib/fonts/huge.json +0 -741
  296. package/src/lib/fonts/pallet.json +0 -314
  297. package/src/lib/fonts/shade.json +0 -591
  298. package/src/lib/fonts/slick.json +0 -321
  299. package/src/lib/fonts/tiny.json +0 -69
  300. package/src/lib/hast-styled-text.ts +0 -59
  301. package/src/lib/index.ts +0 -21
  302. package/src/lib/keymapping.test.ts +0 -280
  303. package/src/lib/keymapping.ts +0 -87
  304. package/src/lib/objects-in-viewport.test.ts +0 -787
  305. package/src/lib/objects-in-viewport.ts +0 -153
  306. package/src/lib/output.capture.ts +0 -58
  307. package/src/lib/parse.keypress-kitty.protocol.test.ts +0 -340
  308. package/src/lib/parse.keypress-kitty.test.ts +0 -663
  309. package/src/lib/parse.keypress-kitty.ts +0 -439
  310. package/src/lib/parse.keypress.test.ts +0 -1849
  311. package/src/lib/parse.keypress.ts +0 -397
  312. package/src/lib/parse.mouse.test.ts +0 -552
  313. package/src/lib/parse.mouse.ts +0 -232
  314. package/src/lib/paste.ts +0 -16
  315. package/src/lib/queue.ts +0 -65
  316. package/src/lib/renderable.validations.test.ts +0 -87
  317. package/src/lib/renderable.validations.ts +0 -83
  318. package/src/lib/scroll-acceleration.ts +0 -98
  319. package/src/lib/selection.ts +0 -240
  320. package/src/lib/singleton.ts +0 -28
  321. package/src/lib/stdin-parser.test.ts +0 -1676
  322. package/src/lib/stdin-parser.ts +0 -1248
  323. package/src/lib/styled-text.ts +0 -178
  324. package/src/lib/terminal-capability-detection.test.ts +0 -202
  325. package/src/lib/terminal-capability-detection.ts +0 -79
  326. package/src/lib/terminal-palette.test.ts +0 -878
  327. package/src/lib/terminal-palette.ts +0 -383
  328. package/src/lib/tree-sitter/assets/README.md +0 -118
  329. package/src/lib/tree-sitter/assets/update.ts +0 -331
  330. package/src/lib/tree-sitter/assets.d.ts +0 -9
  331. package/src/lib/tree-sitter/cache.test.ts +0 -270
  332. package/src/lib/tree-sitter/client.test.ts +0 -1061
  333. package/src/lib/tree-sitter/client.ts +0 -615
  334. package/src/lib/tree-sitter/default-parsers.ts +0 -80
  335. package/src/lib/tree-sitter/download-utils.ts +0 -148
  336. package/src/lib/tree-sitter/index.ts +0 -28
  337. package/src/lib/tree-sitter/parser.worker.ts +0 -1001
  338. package/src/lib/tree-sitter/parsers-config.ts +0 -75
  339. package/src/lib/tree-sitter/resolve-ft.ts +0 -62
  340. package/src/lib/tree-sitter/types.ts +0 -81
  341. package/src/lib/tree-sitter-styled-text.test.ts +0 -1253
  342. package/src/lib/tree-sitter-styled-text.ts +0 -306
  343. package/src/lib/validate-dir-name.ts +0 -55
  344. package/src/lib/yoga.options.test.ts +0 -628
  345. package/src/lib/yoga.options.ts +0 -346
  346. package/src/plugins/core-slot.ts +0 -579
  347. package/src/plugins/registry.ts +0 -377
  348. package/src/plugins/types.ts +0 -46
  349. package/src/post/filters.ts +0 -888
  350. package/src/renderables/ASCIIFont.ts +0 -219
  351. package/src/renderables/Box.test.ts +0 -160
  352. package/src/renderables/Box.ts +0 -295
  353. package/src/renderables/Code.test.ts +0 -2062
  354. package/src/renderables/Code.ts +0 -357
  355. package/src/renderables/Diff.regression.test.ts +0 -226
  356. package/src/renderables/Diff.test.ts +0 -3027
  357. package/src/renderables/Diff.ts +0 -1209
  358. package/src/renderables/EditBufferRenderable.ts +0 -764
  359. package/src/renderables/FrameBuffer.ts +0 -47
  360. package/src/renderables/Input.test.ts +0 -1228
  361. package/src/renderables/Input.ts +0 -245
  362. package/src/renderables/LineNumberRenderable.ts +0 -675
  363. package/src/renderables/Markdown.ts +0 -1106
  364. package/src/renderables/ScrollBar.ts +0 -422
  365. package/src/renderables/ScrollBox.ts +0 -883
  366. package/src/renderables/Select.test.ts +0 -1010
  367. package/src/renderables/Select.ts +0 -523
  368. package/src/renderables/Slider.test.ts +0 -456
  369. package/src/renderables/Slider.ts +0 -347
  370. package/src/renderables/TabSelect.test.ts +0 -197
  371. package/src/renderables/TabSelect.ts +0 -455
  372. package/src/renderables/Text.selection-buffer.test.ts +0 -123
  373. package/src/renderables/Text.test.ts +0 -2660
  374. package/src/renderables/Text.ts +0 -147
  375. package/src/renderables/TextBufferRenderable.ts +0 -518
  376. package/src/renderables/TextNode.test.ts +0 -1058
  377. package/src/renderables/TextNode.ts +0 -325
  378. package/src/renderables/TextTable.test.ts +0 -1421
  379. package/src/renderables/TextTable.ts +0 -1344
  380. package/src/renderables/Textarea.ts +0 -732
  381. package/src/renderables/TimeToFirstDraw.ts +0 -89
  382. package/src/renderables/__snapshots__/Code.test.ts.snap +0 -13
  383. package/src/renderables/__snapshots__/Diff.test.ts.snap +0 -785
  384. package/src/renderables/__snapshots__/Text.test.ts.snap +0 -421
  385. package/src/renderables/__snapshots__/TextTable.test.ts.snap +0 -215
  386. package/src/renderables/__tests__/LineNumberRenderable.scrollbox-simple.test.ts +0 -144
  387. package/src/renderables/__tests__/LineNumberRenderable.scrollbox.test.ts +0 -816
  388. package/src/renderables/__tests__/LineNumberRenderable.test.ts +0 -1787
  389. package/src/renderables/__tests__/LineNumberRenderable.wrapping.test.ts +0 -85
  390. package/src/renderables/__tests__/Markdown.test.ts +0 -2287
  391. package/src/renderables/__tests__/MultiRenderable.selection.test.ts +0 -87
  392. package/src/renderables/__tests__/Textarea.buffer.test.ts +0 -682
  393. package/src/renderables/__tests__/Textarea.destroyed-events.test.ts +0 -675
  394. package/src/renderables/__tests__/Textarea.editing.test.ts +0 -2041
  395. package/src/renderables/__tests__/Textarea.error-handling.test.ts +0 -35
  396. package/src/renderables/__tests__/Textarea.events.test.ts +0 -738
  397. package/src/renderables/__tests__/Textarea.highlights.test.ts +0 -590
  398. package/src/renderables/__tests__/Textarea.keybinding.test.ts +0 -3149
  399. package/src/renderables/__tests__/Textarea.paste.test.ts +0 -357
  400. package/src/renderables/__tests__/Textarea.rendering.test.ts +0 -1864
  401. package/src/renderables/__tests__/Textarea.scroll.test.ts +0 -733
  402. package/src/renderables/__tests__/Textarea.selection.test.ts +0 -1590
  403. package/src/renderables/__tests__/Textarea.stress.test.ts +0 -670
  404. package/src/renderables/__tests__/Textarea.undo-redo.test.ts +0 -383
  405. package/src/renderables/__tests__/Textarea.visual-lines.test.ts +0 -310
  406. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.code.test.ts.snap +0 -221
  407. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.scrollbox-simple.test.ts.snap +0 -89
  408. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.scrollbox.test.ts.snap +0 -457
  409. package/src/renderables/__tests__/__snapshots__/LineNumberRenderable.test.ts.snap +0 -158
  410. package/src/renderables/__tests__/__snapshots__/Textarea.rendering.test.ts.snap +0 -387
  411. package/src/renderables/__tests__/markdown-parser.test.ts +0 -217
  412. package/src/renderables/__tests__/renderable-test-utils.ts +0 -60
  413. package/src/renderables/composition/README.md +0 -8
  414. package/src/renderables/composition/VRenderable.ts +0 -32
  415. package/src/renderables/composition/constructs.ts +0 -127
  416. package/src/renderables/composition/vnode.ts +0 -289
  417. package/src/renderables/index.ts +0 -22
  418. package/src/renderables/markdown-parser.ts +0 -66
  419. package/src/renderer.ts +0 -2363
  420. package/src/runtime-plugin-support.ts +0 -39
  421. package/src/runtime-plugin.ts +0 -144
  422. package/src/syntax-style.test.ts +0 -841
  423. package/src/syntax-style.ts +0 -264
  424. package/src/testing/README.md +0 -210
  425. package/src/testing/capture-spans.test.ts +0 -194
  426. package/src/testing/integration.test.ts +0 -276
  427. package/src/testing/manual-clock.ts +0 -106
  428. package/src/testing/mock-keys.test.ts +0 -1356
  429. package/src/testing/mock-keys.ts +0 -449
  430. package/src/testing/mock-mouse.test.ts +0 -218
  431. package/src/testing/mock-mouse.ts +0 -247
  432. package/src/testing/mock-tree-sitter-client.ts +0 -73
  433. package/src/testing/spy.ts +0 -13
  434. package/src/testing/test-recorder.test.ts +0 -415
  435. package/src/testing/test-recorder.ts +0 -145
  436. package/src/testing/test-renderer.ts +0 -116
  437. package/src/testing.ts +0 -7
  438. package/src/tests/__snapshots__/absolute-positioning.snapshot.test.ts.snap +0 -481
  439. package/src/tests/__snapshots__/renderable.snapshot.test.ts.snap +0 -19
  440. package/src/tests/__snapshots__/scrollbox.test.ts.snap +0 -29
  441. package/src/tests/absolute-positioning.snapshot.test.ts +0 -638
  442. package/src/tests/allocator-stats.test.ts +0 -38
  443. package/src/tests/destroy-during-render.test.ts +0 -200
  444. package/src/tests/hover-cursor.test.ts +0 -98
  445. package/src/tests/native-span-feed-async.test.ts +0 -173
  446. package/src/tests/native-span-feed-close.test.ts +0 -120
  447. package/src/tests/native-span-feed-coverage.test.ts +0 -227
  448. package/src/tests/native-span-feed-edge-cases.test.ts +0 -352
  449. package/src/tests/native-span-feed-use-after-free.test.ts +0 -45
  450. package/src/tests/opacity.test.ts +0 -123
  451. package/src/tests/renderable.snapshot.test.ts +0 -524
  452. package/src/tests/renderable.test.ts +0 -1281
  453. package/src/tests/renderer.console-startup.test.ts +0 -65
  454. package/src/tests/renderer.control.test.ts +0 -364
  455. package/src/tests/renderer.core-slot-binding.test.ts +0 -952
  456. package/src/tests/renderer.cursor.test.ts +0 -26
  457. package/src/tests/renderer.destroy-during-render.test.ts +0 -110
  458. package/src/tests/renderer.focus-restore.test.ts +0 -228
  459. package/src/tests/renderer.focus.test.ts +0 -251
  460. package/src/tests/renderer.idle.test.ts +0 -219
  461. package/src/tests/renderer.input.test.ts +0 -2145
  462. package/src/tests/renderer.kitty-flags.test.ts +0 -195
  463. package/src/tests/renderer.mouse.test.ts +0 -1269
  464. package/src/tests/renderer.palette.test.ts +0 -629
  465. package/src/tests/renderer.selection.test.ts +0 -49
  466. package/src/tests/renderer.slot-registry.test.ts +0 -649
  467. package/src/tests/renderer.useMouse.test.ts +0 -50
  468. package/src/tests/runtime-plugin-support.fixture.ts +0 -11
  469. package/src/tests/runtime-plugin-support.test.ts +0 -28
  470. package/src/tests/runtime-plugin.fixture.ts +0 -40
  471. package/src/tests/runtime-plugin.test.ts +0 -190
  472. package/src/tests/scrollbox-culling-bug.test.ts +0 -114
  473. package/src/tests/scrollbox-hitgrid-resize.test.ts +0 -136
  474. package/src/tests/scrollbox-hitgrid.test.ts +0 -909
  475. package/src/tests/scrollbox.test.ts +0 -1530
  476. package/src/tests/wrap-resize-perf.test.ts +0 -229
  477. package/src/tests/yoga-setters.test.ts +0 -921
  478. package/src/text-buffer-view.test.ts +0 -705
  479. package/src/text-buffer-view.ts +0 -189
  480. package/src/text-buffer.test.ts +0 -347
  481. package/src/text-buffer.ts +0 -250
  482. package/src/types.ts +0 -152
  483. package/src/utils.ts +0 -88
  484. package/src/zig/ansi.zig +0 -268
  485. package/src/zig/bench/README.md +0 -50
  486. package/src/zig/bench/buffer-draw-text-buffer_bench.zig +0 -887
  487. package/src/zig/bench/edit-buffer_bench.zig +0 -476
  488. package/src/zig/bench/native-span-feed_bench.zig +0 -100
  489. package/src/zig/bench/rope-markers_bench.zig +0 -713
  490. package/src/zig/bench/rope_bench.zig +0 -514
  491. package/src/zig/bench/styled-text_bench.zig +0 -470
  492. package/src/zig/bench/text-buffer-coords_bench.zig +0 -362
  493. package/src/zig/bench/text-buffer-view_bench.zig +0 -459
  494. package/src/zig/bench/text-chunk-graphemes_bench.zig +0 -273
  495. package/src/zig/bench/utf8_bench.zig +0 -799
  496. package/src/zig/bench-utils.zig +0 -431
  497. package/src/zig/bench.zig +0 -217
  498. package/src/zig/buffer.zig +0 -2223
  499. package/src/zig/build.zig +0 -289
  500. package/src/zig/build.zig.zon +0 -16
  501. package/src/zig/edit-buffer.zig +0 -825
  502. package/src/zig/editor-view.zig +0 -802
  503. package/src/zig/event-bus.zig +0 -13
  504. package/src/zig/event-emitter.zig +0 -65
  505. package/src/zig/file-logger.zig +0 -92
  506. package/src/zig/grapheme.zig +0 -599
  507. package/src/zig/lib.zig +0 -1834
  508. package/src/zig/link.zig +0 -333
  509. package/src/zig/logger.zig +0 -43
  510. package/src/zig/mem-registry.zig +0 -125
  511. package/src/zig/native-span-feed-bench-lib.zig +0 -7
  512. package/src/zig/native-span-feed.zig +0 -708
  513. package/src/zig/renderer.zig +0 -1386
  514. package/src/zig/rope.zig +0 -1220
  515. package/src/zig/syntax-style.zig +0 -161
  516. package/src/zig/terminal.zig +0 -975
  517. package/src/zig/test.zig +0 -70
  518. package/src/zig/tests/README.md +0 -18
  519. package/src/zig/tests/buffer_test.zig +0 -2526
  520. package/src/zig/tests/edit-buffer-history_test.zig +0 -271
  521. package/src/zig/tests/edit-buffer_test.zig +0 -1689
  522. package/src/zig/tests/editor-view_test.zig +0 -3299
  523. package/src/zig/tests/event-emitter_test.zig +0 -249
  524. package/src/zig/tests/grapheme_test.zig +0 -1304
  525. package/src/zig/tests/link_test.zig +0 -190
  526. package/src/zig/tests/mem-registry_test.zig +0 -473
  527. package/src/zig/tests/memory_leak_regression_test.zig +0 -159
  528. package/src/zig/tests/native-span-feed_test.zig +0 -1264
  529. package/src/zig/tests/renderer_test.zig +0 -1010
  530. package/src/zig/tests/rope-nested_test.zig +0 -712
  531. package/src/zig/tests/rope_fuzz_test.zig +0 -238
  532. package/src/zig/tests/rope_test.zig +0 -2362
  533. package/src/zig/tests/segment-merge.test.zig +0 -148
  534. package/src/zig/tests/syntax-style_test.zig +0 -557
  535. package/src/zig/tests/terminal_test.zig +0 -719
  536. package/src/zig/tests/text-buffer-drawing_test.zig +0 -3237
  537. package/src/zig/tests/text-buffer-highlights_test.zig +0 -666
  538. package/src/zig/tests/text-buffer-iterators_test.zig +0 -776
  539. package/src/zig/tests/text-buffer-segment_test.zig +0 -320
  540. package/src/zig/tests/text-buffer-selection_test.zig +0 -1035
  541. package/src/zig/tests/text-buffer-selection_viewport_test.zig +0 -358
  542. package/src/zig/tests/text-buffer-view_test.zig +0 -3649
  543. package/src/zig/tests/text-buffer_test.zig +0 -2191
  544. package/src/zig/tests/unicode-width-map.zon +0 -3909
  545. package/src/zig/tests/utf8_no_zwj_test.zig +0 -260
  546. package/src/zig/tests/utf8_test.zig +0 -4057
  547. package/src/zig/tests/utf8_wcwidth_cursor_test.zig +0 -267
  548. package/src/zig/tests/utf8_wcwidth_test.zig +0 -357
  549. package/src/zig/tests/word-wrap-editing_test.zig +0 -498
  550. package/src/zig/tests/wrap-cache-perf_test.zig +0 -113
  551. package/src/zig/text-buffer-iterators.zig +0 -499
  552. package/src/zig/text-buffer-segment.zig +0 -404
  553. package/src/zig/text-buffer-view.zig +0 -1371
  554. package/src/zig/text-buffer.zig +0 -1180
  555. package/src/zig/utf8.zig +0 -1948
  556. package/src/zig/utils.zig +0 -9
  557. package/src/zig-structs.ts +0 -261
  558. package/src/zig.ts +0 -3843
  559. package/tsconfig.build.json +0 -22
  560. package/tsconfig.json +0 -28
  561. /package/{src/lib/tree-sitter/assets → assets}/javascript/highlights.scm +0 -0
  562. /package/{src/lib/tree-sitter/assets → assets}/javascript/tree-sitter-javascript.wasm +0 -0
  563. /package/{src/lib/tree-sitter/assets → assets}/markdown/highlights.scm +0 -0
  564. /package/{src/lib/tree-sitter/assets → assets}/markdown/injections.scm +0 -0
  565. /package/{src/lib/tree-sitter/assets → assets}/markdown/tree-sitter-markdown.wasm +0 -0
  566. /package/{src/lib/tree-sitter/assets → assets}/markdown_inline/highlights.scm +0 -0
  567. /package/{src/lib/tree-sitter/assets → assets}/markdown_inline/tree-sitter-markdown_inline.wasm +0 -0
  568. /package/{src/lib/tree-sitter/assets → assets}/typescript/highlights.scm +0 -0
  569. /package/{src/lib/tree-sitter/assets → assets}/typescript/tree-sitter-typescript.wasm +0 -0
  570. /package/{src/lib/tree-sitter/assets → assets}/zig/highlights.scm +0 -0
  571. /package/{src/lib/tree-sitter/assets → assets}/zig/tree-sitter-zig.wasm +0 -0
package/src/zig/utf8.zig DELETED
@@ -1,1948 +0,0 @@
1
- const std = @import("std");
2
- const uucode = @import("uucode");
3
-
4
- /// The method to use when calculating the width of a grapheme
5
- pub const WidthMethod = enum {
6
- wcwidth,
7
- unicode,
8
- no_zwj,
9
- };
10
-
11
- /// Check if a byte slice contains only printable ASCII (32..126)
12
- /// Uses SIMD16 for fast checking
13
- pub fn isAsciiOnly(text: []const u8) bool {
14
- if (text.len == 0) return false;
15
-
16
- const vector_len = 16;
17
- const Vec = @Vector(vector_len, u8);
18
-
19
- const min_printable: Vec = @splat(32);
20
- const max_printable: Vec = @splat(126);
21
-
22
- var pos: usize = 0;
23
-
24
- // Process full 16-byte vectors
25
- while (pos + vector_len <= text.len) {
26
- const chunk: Vec = text[pos..][0..vector_len].*;
27
-
28
- // Check if all bytes are in [32, 126]
29
- const too_low = chunk < min_printable;
30
- const too_high = chunk > max_printable;
31
-
32
- // Check if any byte is out of range
33
- if (@reduce(.Or, too_low) or @reduce(.Or, too_high)) {
34
- return false;
35
- }
36
-
37
- pos += vector_len;
38
- }
39
-
40
- // Handle remaining bytes with scalar code
41
- while (pos < text.len) : (pos += 1) {
42
- const b = text[pos];
43
- if (b < 32 or b > 126) {
44
- return false;
45
- }
46
- }
47
-
48
- return true;
49
- }
50
-
51
- pub const LineBreakKind = enum {
52
- LF, // \n (Unix/Linux)
53
- CR, // \r (Old Mac)
54
- CRLF, // \r\n (Windows)
55
- };
56
-
57
- pub const LineBreak = struct {
58
- pos: usize,
59
- kind: LineBreakKind,
60
- };
61
-
62
- pub const LineBreakResult = struct {
63
- breaks: std.ArrayListUnmanaged(LineBreak),
64
- allocator: std.mem.Allocator,
65
-
66
- pub fn init(allocator: std.mem.Allocator) LineBreakResult {
67
- return .{
68
- .breaks = .{},
69
- .allocator = allocator,
70
- };
71
- }
72
-
73
- pub fn deinit(self: *LineBreakResult) void {
74
- self.breaks.deinit(self.allocator);
75
- }
76
-
77
- pub fn reset(self: *LineBreakResult) void {
78
- self.breaks.clearRetainingCapacity();
79
- }
80
- };
81
-
82
- pub const TabStopResult = struct {
83
- positions: std.ArrayListUnmanaged(usize),
84
- allocator: std.mem.Allocator,
85
-
86
- pub fn init(allocator: std.mem.Allocator) TabStopResult {
87
- return .{
88
- .positions = .{},
89
- .allocator = allocator,
90
- };
91
- }
92
-
93
- pub fn deinit(self: *TabStopResult) void {
94
- self.positions.deinit(self.allocator);
95
- }
96
-
97
- pub fn reset(self: *TabStopResult) void {
98
- self.positions.clearRetainingCapacity();
99
- }
100
- };
101
-
102
- pub const WrapBreak = struct {
103
- // byte_offset points at the grapheme that creates this break opportunity.
104
- // For whitespace and punctuation, this is the delimiter grapheme.
105
- // For CJK<->ASCII transitions, this is the last grapheme in the previous run.
106
- byte_offset: u32,
107
-
108
- // char_offset is grapheme-count based, not a display column.
109
- // Callers convert it to columns with charOffsetToColumn().
110
- char_offset: u32,
111
- };
112
-
113
- pub const WrapBreakResult = struct {
114
- breaks: std.ArrayListUnmanaged(WrapBreak),
115
- allocator: std.mem.Allocator,
116
-
117
- pub fn init(allocator: std.mem.Allocator) WrapBreakResult {
118
- return .{
119
- .breaks = .{},
120
- .allocator = allocator,
121
- };
122
- }
123
-
124
- pub fn deinit(self: *WrapBreakResult) void {
125
- self.breaks.deinit(self.allocator);
126
- }
127
-
128
- pub fn reset(self: *WrapBreakResult) void {
129
- self.breaks.clearRetainingCapacity();
130
- }
131
- };
132
-
133
- // Helper function to check if an ASCII byte is a wrap break point (CR/LF excluded)
134
- inline fn isAsciiWrapBreak(b: u8) bool {
135
- return switch (b) {
136
- ' ', '\t' => true, // Whitespace (no CR/LF in inputs)
137
- '-' => true, // Dash
138
- '/', '\\' => true, // Slashes
139
- '.', ',', ';', ':', '!', '?' => true, // Punctuation
140
- '(', ')', '[', ']', '{', '}' => true, // Brackets
141
- else => false,
142
- };
143
- }
144
-
145
- // Decode a UTF-8 codepoint starting at pos. Assumes valid UTF-8 input.
146
- // Returns (codepoint, length). If the remaining bytes are insufficient, returns length 1.
147
- pub inline fn decodeUtf8Unchecked(text: []const u8, pos: usize) struct { cp: u21, len: u3 } {
148
- const b0 = text[pos];
149
- if (b0 < 0x80) return .{ .cp = @intCast(b0), .len = 1 };
150
-
151
- if (pos + 1 >= text.len) return .{ .cp = 0xFFFD, .len = 1 };
152
- const b1 = text[pos + 1];
153
-
154
- if ((b0 & 0xE0) == 0xC0) {
155
- const cp2: u21 = @intCast((@as(u32, b0 & 0x1F) << 6) | @as(u32, b1 & 0x3F));
156
- return .{ .cp = cp2, .len = 2 };
157
- }
158
-
159
- if (pos + 2 >= text.len) return .{ .cp = 0xFFFD, .len = 1 };
160
- const b2 = text[pos + 2];
161
-
162
- if ((b0 & 0xF0) == 0xE0) {
163
- const cp3: u21 = @intCast((@as(u32, b0 & 0x0F) << 12) | (@as(u32, b1 & 0x3F) << 6) | @as(u32, b2 & 0x3F));
164
- return .{ .cp = cp3, .len = 3 };
165
- }
166
-
167
- if (pos + 3 >= text.len) return .{ .cp = 0xFFFD, .len = 1 };
168
- const b3 = text[pos + 3];
169
- const cp4: u21 = @intCast((@as(u32, b0 & 0x07) << 18) | (@as(u32, b1 & 0x3F) << 12) | (@as(u32, b2 & 0x3F) << 6) | @as(u32, b3 & 0x3F));
170
- return .{ .cp = cp4, .len = 4 };
171
- }
172
-
173
- // Unicode wrap-break codepoints
174
- inline fn isUnicodeWrapBreak(cp: u21) bool {
175
- return switch (cp) {
176
- 0x00A0, // NBSP
177
- 0x1680, // OGHAM SPACE MARK
178
- 0x2000...0x200A, // En quad..Hair space
179
- 0x202F, // NARROW NO-BREAK SPACE
180
- 0x205F, // MEDIUM MATHEMATICAL SPACE
181
- 0x3000, // IDEOGRAPHIC SPACE
182
- 0x200B, // ZERO WIDTH SPACE
183
- 0x00AD, // SOFT HYPHEN
184
- 0x2010, // HYPHEN
185
- 0x3001, // IDEOGRAPHIC COMMA
186
- 0x3002, // IDEOGRAPHIC FULL STOP
187
- 0xFF01, // FULLWIDTH EXCLAMATION MARK
188
- 0xFF1F, // FULLWIDTH QUESTION MARK
189
- => true,
190
- else => false,
191
- };
192
- }
193
-
194
- // WordClass keeps word-boundary behavior predictable in mixed-script text.
195
- // We split between ASCII word runs and CJK word runs, and we keep each
196
- // CJK run grouped as one unit.
197
- const WordClass = enum {
198
- ascii_word,
199
- cjk_word,
200
- other,
201
- };
202
-
203
- inline fn isAsciiWordByte(b: u8) bool {
204
- return (b >= 'a' and b <= 'z') or
205
- (b >= 'A' and b <= 'Z') or
206
- (b >= '0' and b <= '9') or
207
- b == '_';
208
- }
209
-
210
- inline fn isCjkWordCodepoint(cp: u21) bool {
211
- return
212
- // Han ideographs
213
- (cp >= 0x3400 and cp <= 0x4DBF) or
214
- (cp >= 0x4E00 and cp <= 0x9FFF) or
215
- (cp >= 0xF900 and cp <= 0xFAFF) or
216
- (cp >= 0x20000 and cp <= 0x2A6DF) or
217
- (cp >= 0x2A700 and cp <= 0x2B73F) or
218
- (cp >= 0x2B740 and cp <= 0x2B81F) or
219
- (cp >= 0x2B820 and cp <= 0x2CEAF) or
220
- (cp >= 0x2CEB0 and cp <= 0x2EBEF) or
221
- (cp >= 0x2EBF0 and cp <= 0x2EE5D) or
222
- (cp >= 0x2F800 and cp <= 0x2FA1F) or
223
- // Hiragana + Katakana
224
- (cp >= 0x3040 and cp <= 0x309F) or
225
- (cp >= 0x30A0 and cp <= 0x30FF) or
226
- (cp >= 0x31F0 and cp <= 0x31FF) or
227
- (cp >= 0xFF66 and cp <= 0xFF9D) or
228
- // Hangul
229
- (cp >= 0x1100 and cp <= 0x11FF) or
230
- (cp >= 0x3130 and cp <= 0x318F) or
231
- (cp >= 0xA960 and cp <= 0xA97F) or
232
- (cp >= 0xAC00 and cp <= 0xD7AF) or
233
- (cp >= 0xD7B0 and cp <= 0xD7FF);
234
- }
235
-
236
- inline fn classifyWordClass(cp: u21) WordClass {
237
- if (cp <= 0x7F) {
238
- return if (isAsciiWordByte(@intCast(cp))) .ascii_word else .other;
239
- }
240
- if (isCjkWordCodepoint(cp)) return .cjk_word;
241
- return .other;
242
- }
243
-
244
- pub inline fn isWordCodepoint(cp: u21) bool {
245
- return classifyWordClass(cp) != .other;
246
- }
247
-
248
- inline fn isCjkAsciiTransition(prev_class: WordClass, curr_class: WordClass) bool {
249
- return (prev_class == .cjk_word and curr_class == .ascii_word) or
250
- (prev_class == .ascii_word and curr_class == .cjk_word);
251
- }
252
-
253
- // Nothing needed here - using uucode.grapheme.isBreak directly
254
-
255
- pub fn findWrapBreaks(text: []const u8, result: *WrapBreakResult, width_method: WidthMethod) !void {
256
- // This function clears previous results and writes fresh break points.
257
- // Callers should treat `result.breaks` as replaced after the call.
258
- _ = width_method; // Currently unused, but kept for API consistency
259
- result.reset();
260
- const vector_len = 16;
261
-
262
- var pos: usize = 0;
263
- var char_offset: u32 = 0;
264
- var prev_cp: ?u21 = null; // Track previous codepoint for grapheme detection
265
- var break_state: uucode.grapheme.BreakState = .default;
266
- // We keep track of the current grapheme so we can add a break at
267
- // CJK<->ASCII transitions. The break is emitted at the previous grapheme,
268
- // so callers that add grapheme width land exactly at the run boundary.
269
- var have_current_grapheme = false;
270
- var current_grapheme_byte_offset: u32 = 0;
271
- var current_grapheme_char_offset: u32 = 0;
272
- var current_grapheme_class: WordClass = .other;
273
-
274
- while (pos + vector_len <= text.len) {
275
- const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
276
- const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
277
- const is_non_ascii = chunk >= ascii_threshold;
278
-
279
- // Fast path: all ASCII
280
- if (!@reduce(.Or, is_non_ascii)) {
281
- const first_class = classifyWordClass(text[pos]);
282
- if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, first_class)) {
283
- try result.breaks.append(result.allocator, .{
284
- .byte_offset = current_grapheme_byte_offset,
285
- .char_offset = current_grapheme_char_offset,
286
- });
287
- }
288
-
289
- // Use SIMD to find break characters
290
- var match_mask: @Vector(vector_len, bool) = @splat(false);
291
-
292
- // Check whitespace
293
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(' ')));
294
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('\t')));
295
-
296
- // Check dashes and slashes
297
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('-')));
298
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('/')));
299
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('\\')));
300
-
301
- // Check punctuation
302
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('.')));
303
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(',')));
304
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(';')));
305
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(':')));
306
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('!')));
307
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('?')));
308
-
309
- // Check brackets
310
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('(')));
311
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(')')));
312
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('[')));
313
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat(']')));
314
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('{')));
315
- match_mask = match_mask | (chunk == @as(@Vector(vector_len, u8), @splat('}')));
316
-
317
- // Convert boolean mask to integer bitmask for faster iteration
318
- var bitmask: u16 = 0;
319
- inline for (0..vector_len) |i| {
320
- if (match_mask[i]) {
321
- bitmask |= @as(u16, 1) << @intCast(i);
322
- }
323
- }
324
-
325
- // Use bit manipulation to extract positions
326
- while (bitmask != 0) {
327
- const bit_pos = @ctz(bitmask);
328
- try result.breaks.append(result.allocator, .{
329
- .byte_offset = @intCast(pos + bit_pos),
330
- .char_offset = char_offset + @as(u32, @intCast(bit_pos)),
331
- });
332
- bitmask &= bitmask - 1;
333
- }
334
-
335
- pos += vector_len;
336
- const block_start_char_offset = char_offset;
337
- char_offset += vector_len;
338
- prev_cp = text[pos - 1]; // Last ASCII char
339
- break_state = .default;
340
- have_current_grapheme = true;
341
- current_grapheme_byte_offset = @intCast(pos - 1);
342
- current_grapheme_char_offset = block_start_char_offset + (vector_len - 1);
343
- current_grapheme_class = classifyWordClass(text[pos - 1]);
344
- continue;
345
- }
346
-
347
- // Slow path: mixed ASCII/non-ASCII - need grapheme-aware counting
348
- var i: usize = 0;
349
- while (i < vector_len) {
350
- const b0 = text[pos + i];
351
- if (b0 < 0x80) {
352
- const curr_cp: u21 = b0;
353
-
354
- // Check if this starts a new grapheme cluster
355
- // Skip invalid/replacement codepoints or codepoints that might be outside the grapheme table range
356
- const is_break = if (curr_cp == 0xFFFD or curr_cp > 0x10FFFF) true else if (prev_cp) |p| blk: {
357
- if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
358
- break :blk uucode.grapheme.isBreak(p, curr_cp, &break_state);
359
- } else true;
360
-
361
- if (is_break) {
362
- const curr_class = classifyWordClass(curr_cp);
363
- if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
364
- try result.breaks.append(result.allocator, .{
365
- .byte_offset = current_grapheme_byte_offset,
366
- .char_offset = current_grapheme_char_offset,
367
- });
368
- }
369
- have_current_grapheme = true;
370
- current_grapheme_byte_offset = @intCast(pos + i);
371
- current_grapheme_char_offset = char_offset;
372
- current_grapheme_class = curr_class;
373
- }
374
-
375
- if (isAsciiWrapBreak(b0)) {
376
- try result.breaks.append(result.allocator, .{
377
- .byte_offset = @intCast(pos + i),
378
- .char_offset = char_offset,
379
- });
380
- }
381
- i += 1;
382
- if (is_break) {
383
- char_offset += 1;
384
- }
385
- prev_cp = curr_cp;
386
- } else {
387
- const dec = decodeUtf8Unchecked(text, pos + i);
388
- if (pos + i + dec.len > text.len) break;
389
- if (pos + i + dec.len > pos + vector_len) break;
390
-
391
- // Check if this starts a new grapheme cluster
392
- // Skip invalid/replacement codepoints or codepoints that might be outside the grapheme table range
393
- const is_break = if (dec.cp == 0xFFFD or dec.cp > 0x10FFFF) true else if (prev_cp) |p| blk: {
394
- if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
395
- break :blk uucode.grapheme.isBreak(p, dec.cp, &break_state);
396
- } else true;
397
-
398
- if (is_break) {
399
- const curr_class = classifyWordClass(dec.cp);
400
- if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
401
- try result.breaks.append(result.allocator, .{
402
- .byte_offset = current_grapheme_byte_offset,
403
- .char_offset = current_grapheme_char_offset,
404
- });
405
- }
406
- have_current_grapheme = true;
407
- current_grapheme_byte_offset = @intCast(pos + i);
408
- current_grapheme_char_offset = char_offset;
409
- current_grapheme_class = curr_class;
410
- }
411
-
412
- if (isUnicodeWrapBreak(dec.cp)) {
413
- try result.breaks.append(result.allocator, .{
414
- .byte_offset = @intCast(pos + i),
415
- .char_offset = char_offset,
416
- });
417
- }
418
- i += dec.len;
419
- if (is_break) {
420
- char_offset += 1;
421
- }
422
- prev_cp = dec.cp;
423
- }
424
- }
425
- pos += i;
426
- }
427
-
428
- // Tail
429
- var i: usize = pos;
430
- while (i < text.len) {
431
- const b0 = text[i];
432
- if (b0 < 0x80) {
433
- const curr_cp: u21 = b0;
434
- const is_break = if (prev_cp) |p| blk: {
435
- if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
436
- break :blk uucode.grapheme.isBreak(p, curr_cp, &break_state);
437
- } else true;
438
-
439
- if (is_break) {
440
- const curr_class = classifyWordClass(curr_cp);
441
- if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
442
- try result.breaks.append(result.allocator, .{
443
- .byte_offset = current_grapheme_byte_offset,
444
- .char_offset = current_grapheme_char_offset,
445
- });
446
- }
447
- have_current_grapheme = true;
448
- current_grapheme_byte_offset = @intCast(i);
449
- current_grapheme_char_offset = char_offset;
450
- current_grapheme_class = curr_class;
451
- }
452
-
453
- if (isAsciiWrapBreak(b0)) {
454
- try result.breaks.append(result.allocator, .{
455
- .byte_offset = @intCast(i),
456
- .char_offset = char_offset,
457
- });
458
- }
459
- i += 1;
460
- if (is_break) {
461
- char_offset += 1;
462
- }
463
- prev_cp = curr_cp;
464
- } else {
465
- const dec = decodeUtf8Unchecked(text, i);
466
- if (i + dec.len > text.len) break;
467
-
468
- const is_break = if (dec.cp == 0xFFFD or dec.cp > 0x10FFFF) true else if (prev_cp) |p| blk: {
469
- if (p == 0xFFFD or p > 0x10FFFF) break :blk true;
470
- break :blk uucode.grapheme.isBreak(p, dec.cp, &break_state);
471
- } else true;
472
-
473
- if (is_break) {
474
- const curr_class = classifyWordClass(dec.cp);
475
- if (have_current_grapheme and isCjkAsciiTransition(current_grapheme_class, curr_class)) {
476
- try result.breaks.append(result.allocator, .{
477
- .byte_offset = current_grapheme_byte_offset,
478
- .char_offset = current_grapheme_char_offset,
479
- });
480
- }
481
- have_current_grapheme = true;
482
- current_grapheme_byte_offset = @intCast(i);
483
- current_grapheme_char_offset = char_offset;
484
- current_grapheme_class = curr_class;
485
- }
486
-
487
- if (isUnicodeWrapBreak(dec.cp)) {
488
- try result.breaks.append(result.allocator, .{
489
- .byte_offset = @intCast(i),
490
- .char_offset = char_offset,
491
- });
492
- }
493
- i += dec.len;
494
- if (is_break) {
495
- char_offset += 1;
496
- }
497
- prev_cp = dec.cp;
498
- }
499
- }
500
- }
501
-
502
- pub fn findTabStops(text: []const u8, result: *TabStopResult) !void {
503
- result.reset();
504
- const vector_len = 16;
505
- const Vec = @Vector(vector_len, u8);
506
-
507
- const vTab: Vec = @splat('\t');
508
-
509
- var pos: usize = 0;
510
-
511
- while (pos + vector_len <= text.len) {
512
- const chunk: Vec = text[pos..][0..vector_len].*;
513
- const cmp_tab = chunk == vTab;
514
-
515
- if (@reduce(.Or, cmp_tab)) {
516
- var i: usize = 0;
517
- while (i < vector_len) : (i += 1) {
518
- if (text[pos + i] == '\t') {
519
- try result.positions.append(result.allocator, pos + i);
520
- }
521
- }
522
- }
523
- pos += vector_len;
524
- }
525
-
526
- while (pos < text.len) : (pos += 1) {
527
- if (text[pos] == '\t') {
528
- try result.positions.append(result.allocator, pos);
529
- }
530
- }
531
- }
532
-
533
- pub fn findLineBreaks(text: []const u8, result: *LineBreakResult) !void {
534
- result.reset();
535
- const vector_len = 16; // Use 16-byte vectors (SSE2/NEON compatible)
536
- const Vec = @Vector(vector_len, u8);
537
-
538
- // Prepare vector constants for '\n' and '\r'
539
- const vNL: Vec = @splat('\n');
540
- const vCR: Vec = @splat('\r');
541
-
542
- var pos: usize = 0;
543
- var prev_was_cr = false; // Track if previous chunk ended with \r
544
-
545
- // Process full vector chunks
546
- while (pos + vector_len <= text.len) {
547
- const chunk: Vec = text[pos..][0..vector_len].*;
548
- const cmp_nl = chunk == vNL;
549
- const cmp_cr = chunk == vCR;
550
-
551
- // Check if any newline or CR found
552
- if (@reduce(.Or, cmp_nl) or @reduce(.Or, cmp_cr)) {
553
- // Found a match, process this chunk
554
- var i: usize = 0;
555
- while (i < vector_len) : (i += 1) {
556
- const absolute_index = pos + i;
557
- const b = text[absolute_index];
558
- if (b == '\n') {
559
- // Skip if this is the \n part of a CRLF split across chunks
560
- if (i == 0 and prev_was_cr) {
561
- prev_was_cr = false;
562
- continue;
563
- }
564
- // Check if this is part of CRLF
565
- const kind: LineBreakKind = if (absolute_index > 0 and text[absolute_index - 1] == '\r') .CRLF else .LF;
566
- try result.breaks.append(result.allocator, .{ .pos = absolute_index, .kind = kind });
567
- } else if (b == '\r') {
568
- // Check for CRLF
569
- if (absolute_index + 1 < text.len and text[absolute_index + 1] == '\n') {
570
- try result.breaks.append(result.allocator, .{ .pos = absolute_index + 1, .kind = .CRLF });
571
- i += 1; // Skip the \n in next iteration
572
- } else {
573
- try result.breaks.append(result.allocator, .{ .pos = absolute_index, .kind = .CR });
574
- }
575
- }
576
- }
577
- // Update prev_was_cr for next chunk
578
- prev_was_cr = (text[pos + vector_len - 1] == '\r');
579
- } else {
580
- prev_was_cr = false;
581
- }
582
- pos += vector_len;
583
- }
584
-
585
- // Handle remaining bytes with scalar code
586
- while (pos < text.len) : (pos += 1) {
587
- const b = text[pos];
588
- if (b == '\n') {
589
- // Handle CRLF split at chunk boundary
590
- if (pos > 0 and text[pos - 1] == '\r') {
591
- // Already recorded at pos - 1 or will be skipped
592
- if (prev_was_cr) {
593
- prev_was_cr = false;
594
- continue;
595
- }
596
- }
597
- const kind: LineBreakKind = if (pos > 0 and text[pos - 1] == '\r') .CRLF else .LF;
598
- try result.breaks.append(result.allocator, .{ .pos = pos, .kind = kind });
599
- } else if (b == '\r') {
600
- if (pos + 1 < text.len and text[pos + 1] == '\n') {
601
- try result.breaks.append(result.allocator, .{ .pos = pos + 1, .kind = .CRLF });
602
- pos += 1;
603
- } else {
604
- try result.breaks.append(result.allocator, .{ .pos = pos, .kind = .CR });
605
- }
606
- }
607
- prev_was_cr = false;
608
- }
609
- }
610
-
611
- pub const WrapByWidthResult = struct {
612
- byte_offset: u32,
613
- grapheme_count: u32,
614
- columns_used: u32,
615
- };
616
-
617
- pub const PosByWidthResult = struct {
618
- byte_offset: u32,
619
- grapheme_count: u32,
620
- columns_used: u32,
621
- };
622
-
623
- pub inline fn eastAsianWidth(cp: u21) u32 {
624
- if (cp > 0x10FFFF) return 0;
625
- const eaw = uucode.get(.east_asian_width, cp);
626
- const width = eawToWidth(cp, eaw);
627
- return if (width > 0) @intCast(width) else 0;
628
- }
629
-
630
- /// Calculate width from east asian width property and Unicode properties
631
- /// Returns -1 for control characters (they don't contribute to width)
632
- inline fn eawToWidth(cp: u21, eaw: uucode.types.EastAsianWidth) i16 {
633
- if (cp == 0) return 0;
634
- if (cp < 32 or (cp >= 0x7F and cp < 0xA0)) return -1;
635
-
636
- const gc = uucode.get(.general_category, cp);
637
- switch (gc) {
638
- .mark_nonspacing, .mark_spacing_combining, .mark_enclosing => return 0,
639
- else => {},
640
- }
641
-
642
- if (cp == 0x200B) return 0;
643
- if (cp == 0x200C) return 0;
644
- if (cp == 0x200D) return 0;
645
- if (cp == 0x2060) return 0;
646
- if (cp == 0x034F) return 0;
647
- if (cp == 0xFEFF) return 0;
648
- if (cp >= 0x180B and cp <= 0x180D) return 0;
649
- if (cp >= 0xFE00 and cp <= 0xFE0F) return 0;
650
- if (cp >= 0xE0100 and cp <= 0xE01EF) return 0;
651
-
652
- if (eaw == .fullwidth or eaw == .wide) return 2;
653
-
654
- if (cp >= 0x1F000 and cp <= 0x1F02B) return 2;
655
- if (cp >= 0x1F030 and cp <= 0x1F093) return 2;
656
- if (cp >= 0x1F0A0 and cp <= 0x1F0AE) return 2;
657
- if (cp >= 0x1F0B1 and cp <= 0x1F0BF) return 2;
658
- if (cp >= 0x1F0C1 and cp <= 0x1F0CF) return 2;
659
- if (cp >= 0x1F0D1 and cp <= 0x1F0F5) return 2;
660
-
661
- if (cp == 0x231A or cp == 0x231B) return 2;
662
- if (cp == 0x2329 or cp == 0x232A) return 2;
663
- if (cp >= 0x23E9 and cp <= 0x23EC) return 2;
664
- if (cp == 0x23F0 or cp == 0x23F3) return 2;
665
- if (cp >= 0x25FD and cp <= 0x25FE) return 2;
666
-
667
- if (cp >= 0x2614 and cp <= 0x2615) return 2;
668
- if (cp == 0x2622 or cp == 0x2623) return 2;
669
- if (cp >= 0x2630 and cp <= 0x2637) return 2;
670
- if (cp >= 0x2648 and cp <= 0x2653) return 2;
671
- if (cp == 0x267F or cp == 0x2693 or cp == 0x269B) return 2;
672
- if (cp == 0x26A0 or cp == 0x26A1) return 2;
673
- if (cp >= 0x26AA and cp <= 0x26AB) return 2;
674
- if (cp >= 0x26BD and cp <= 0x26BE) return 2;
675
- if (cp >= 0x26C4 and cp <= 0x26C5) return 2;
676
- if (cp == 0x26CE or cp == 0x26D1 or cp == 0x26D4) return 2;
677
- if (cp == 0x26EA or cp == 0x26F2 or cp == 0x26F3) return 2;
678
- if (cp == 0x26F5 or cp == 0x26FA or cp == 0x26FD) return 2;
679
-
680
- if (cp == 0x203C or cp == 0x2049) return 2;
681
- if (cp == 0x2705 or cp >= 0x270A and cp <= 0x270B) return 2;
682
- if (cp == 0x2728 or cp == 0x274C or cp == 0x274E) return 2;
683
- if (cp >= 0x2753 and cp <= 0x2755) return 2;
684
- if (cp == 0x2757) return 2;
685
- if (cp >= 0x2760 and cp <= 0x2767) return 2;
686
- if (cp >= 0x2795 and cp <= 0x2797) return 2;
687
- if (cp == 0x27B0 or cp == 0x27BF) return 2;
688
- if (cp >= 0x2B1B and cp <= 0x2B1C) return 2;
689
- if (cp >= 0x2B50 and cp <= 0x2B50) return 2;
690
- if (cp >= 0x2B55 and cp <= 0x2B55) return 2;
691
-
692
- if (cp >= 0x1F300 and cp <= 0x1F320) return 2;
693
- if (cp >= 0x1F32D and cp <= 0x1F335) return 2;
694
- if (cp >= 0x1F337 and cp <= 0x1F37C) return 2;
695
- if (cp >= 0x1F37E and cp <= 0x1F393) return 2;
696
- if (cp >= 0x1F3A0 and cp <= 0x1F3CA) return 2;
697
- if (cp >= 0x1F3CF and cp <= 0x1F3D3) return 2;
698
- if (cp >= 0x1F3E0 and cp <= 0x1F3F0) return 2;
699
- if (cp == 0x1F3F4) return 2;
700
- if (cp >= 0x1F3F8 and cp <= 0x1F3FF) return 2;
701
- if (cp >= 0x1F400 and cp <= 0x1F43E) return 2;
702
- if (cp == 0x1F440) return 2;
703
- if (cp >= 0x1F442 and cp <= 0x1F4FC) return 2;
704
- if (cp >= 0x1F4FF and cp <= 0x1F6C5) return 2;
705
- if (cp == 0x1F6CC) return 2;
706
- if (cp >= 0x1F6D0 and cp <= 0x1F6D2) return 2;
707
- if (cp >= 0x1F6D5 and cp <= 0x1F6D7) return 2;
708
- if (cp >= 0x1F6DC and cp <= 0x1F6DF) return 2;
709
- if (cp >= 0x1F6EB and cp <= 0x1F6EC) return 2;
710
- if (cp >= 0x1F6F4 and cp <= 0x1F6FC) return 2;
711
- if (cp >= 0x1F700 and cp <= 0x1F773) return 2;
712
- if (cp >= 0x1F780 and cp <= 0x1F7D8) return 2;
713
- if (cp >= 0x1F7E0 and cp <= 0x1F7EB) return 2;
714
- if (cp >= 0x1F800 and cp <= 0x1F80B) return 2;
715
- if (cp >= 0x1F810 and cp <= 0x1F847) return 2;
716
- if (cp >= 0x1F850 and cp <= 0x1F859) return 2;
717
- if (cp >= 0x1F860 and cp <= 0x1F887) return 2;
718
- if (cp >= 0x1F890 and cp <= 0x1F8AD) return 2;
719
- if (cp >= 0x1F8B0 and cp <= 0x1F8B1) return 2;
720
- if (cp >= 0x1F90C and cp <= 0x1F93A) return 2;
721
- if (cp >= 0x1F93C and cp <= 0x1F945) return 2;
722
- if (cp >= 0x1F947 and cp <= 0x1FA53) return 2;
723
- if (cp >= 0x1FA60 and cp <= 0x1FA6D) return 2;
724
- if (cp >= 0x1FA70 and cp <= 0x1FA74) return 2;
725
- if (cp >= 0x1FA78 and cp <= 0x1FA7C) return 2;
726
- if (cp >= 0x1FA80 and cp <= 0x1FA86) return 2;
727
- if (cp >= 0x1FA90 and cp <= 0x1FAAC) return 2;
728
- if (cp >= 0x1FAB0 and cp <= 0x1FABA) return 2;
729
- if (cp >= 0x1FAC0 and cp <= 0x1FAC5) return 2;
730
- if (cp >= 0x1FAD0 and cp <= 0x1FAD9) return 2;
731
- if (cp >= 0x1FAE0 and cp <= 0x1FAE7) return 2;
732
- if (cp >= 0x1FAF0 and cp <= 0x1FAF8) return 2;
733
-
734
- return 1;
735
- }
736
-
737
- /// Calculate the display width of a byte in columns
738
- /// Used for ASCII-only fast paths
739
- inline fn asciiCharWidth(byte: u8, tab_width: u8) u32 {
740
- if (byte == '\t') {
741
- return tab_width;
742
- } else if (byte >= 32 and byte <= 126) {
743
- return 1;
744
- }
745
- return 0;
746
- }
747
-
748
- /// Calculate the display width of a character (byte or codepoint) in columns
749
- inline fn charWidth(byte: u8, codepoint: u21, tab_width: u8) u32 {
750
- if (byte == '\t') {
751
- return tab_width;
752
- } else if (byte < 0x80 and byte >= 32 and byte <= 126) {
753
- return 1;
754
- } else if (byte >= 0x80) {
755
- const eaw = uucode.get(.east_asian_width, codepoint);
756
- const w = eawToWidth(codepoint, eaw);
757
- return if (w > 0) @intCast(w) else 0;
758
- }
759
- return 0;
760
- }
761
-
762
- /// Check if a codepoint is valid for grapheme break detection
763
- inline fn isValidCodepoint(cp: u21) bool {
764
- return cp != 0xFFFD and cp <= 0x10FFFF;
765
- }
766
-
767
- /// Check if there's a grapheme break between two codepoints
768
- /// - wcwidth mode: use Unicode grapheme clustering for proper rendering,
769
- /// but calculate width using wcwidth (sum of codepoint widths)
770
- /// - no_zwj mode: use grapheme breaks but treat ZWJ as a break (ignore joining)
771
- /// - unicode mode: use standard grapheme cluster segmentation
772
- inline fn isGraphemeBreak(prev_cp: ?u21, curr_cp: u21, break_state: *uucode.grapheme.BreakState, width_method: WidthMethod) bool {
773
- // wcwidth mode uses Unicode grapheme clustering for proper rendering
774
- // (ZWJ sequences, skin tone modifiers stay together), but width is
775
- // calculated using wcwidth semantics (sum of codepoint widths)
776
- if (width_method == .wcwidth) {
777
- if (prev_cp == null) return true;
778
-
779
- if (!isValidCodepoint(curr_cp)) return true;
780
- if (!isValidCodepoint(prev_cp.?)) return true;
781
- return uucode.grapheme.isBreak(prev_cp.?, curr_cp, break_state);
782
- }
783
-
784
- if (!isValidCodepoint(curr_cp)) return true;
785
-
786
- // In no_zwj mode, treat ZWJ (U+200D) as NOT joining characters
787
- // When we see ZWJ after a character, it's part of that character's grapheme
788
- // But when we see a character after ZWJ, it starts a new grapheme
789
- if (width_method == .no_zwj) {
790
- const ZWJ: u21 = 0x200D;
791
- if (prev_cp) |p| {
792
- // If previous was ZWJ, current starts a new grapheme
793
- // Don't call uucode.grapheme.isBreak because it will say no break
794
- if (p == ZWJ) {
795
- // Reset break state since we're forcing a break
796
- break_state.* = .default;
797
- return true;
798
- }
799
- }
800
- // If current is ZWJ, don't break yet - it's part of previous grapheme
801
- // (will have width 0 anyway)
802
- }
803
-
804
- if (prev_cp) |p| {
805
- if (!isValidCodepoint(p)) return true;
806
- return uucode.grapheme.isBreak(p, curr_cp, break_state);
807
- }
808
- return true;
809
- }
810
-
811
- /// State for accumulating grapheme cluster width
812
- const GraphemeWidthState = struct {
813
- width: u32 = 0,
814
- has_width: bool = false,
815
- is_regional_indicator_pair: bool = false,
816
- has_vs16: bool = false,
817
- has_indic_virama: bool = false,
818
- width_method: WidthMethod,
819
-
820
- /// Initialize state with the first codepoint of a grapheme cluster
821
- inline fn init(first_cp: u21, first_width: u32, width_method: WidthMethod) GraphemeWidthState {
822
- return .{
823
- .width = first_width,
824
- .has_width = (first_width > 0),
825
- .is_regional_indicator_pair = (first_cp >= 0x1F1E6 and first_cp <= 0x1F1FF),
826
- .has_vs16 = false,
827
- .has_indic_virama = false,
828
- .width_method = width_method,
829
- };
830
- }
831
-
832
- /// Add a codepoint to the current grapheme cluster
833
- inline fn addCodepoint(self: *GraphemeWidthState, cp: u21, cp_width: u32) void {
834
- // wcwidth mode: sum all codepoint widths (tmux-style)
835
- if (self.width_method == .wcwidth) {
836
- const eaw = uucode.get(.east_asian_width, cp);
837
- const w = eawToWidth(cp, eaw);
838
- if (w > 0) {
839
- self.width += @intCast(w);
840
- self.has_width = true;
841
- }
842
- return;
843
- }
844
-
845
- // unicode and no_zwj modes: use grapheme-aware width
846
- const is_ri = (cp >= 0x1F1E6 and cp <= 0x1F1FF);
847
- const is_vs16 = (cp == 0xFE0F); // Variation Selector-16 (emoji presentation)
848
-
849
- const gc = uucode.get(.general_category, cp);
850
- const is_virama = gc == .mark_nonspacing;
851
-
852
- const is_devanagari_ra = (cp == 0x0930);
853
-
854
- const is_devanagari_base = (cp >= 0x0915 and cp <= 0x0939) or (cp >= 0x0958 and cp <= 0x095F);
855
-
856
- if (is_vs16) {
857
- self.has_vs16 = true;
858
- if (self.has_width and self.width == 1) {
859
- self.width = 2;
860
- }
861
- return;
862
- }
863
-
864
- if (is_virama) {
865
- self.has_indic_virama = true;
866
- return;
867
- }
868
-
869
- if (self.is_regional_indicator_pair and is_ri) {
870
- self.width += cp_width;
871
- self.has_width = true;
872
- } else if (!self.has_width and cp_width > 0) {
873
- self.width = cp_width;
874
- self.has_width = true;
875
- } else if (self.has_width and self.has_indic_virama and is_devanagari_base and cp_width > 0) {
876
- if (!is_devanagari_ra) {
877
- self.width += cp_width;
878
- }
879
- self.has_indic_virama = false;
880
- }
881
- }
882
- };
883
-
884
- const ClusterState = struct {
885
- columns_used: u32,
886
- grapheme_count: u32,
887
- cluster_width: u32,
888
- cluster_start: usize,
889
- prev_cp: ?u21,
890
- break_state: uucode.grapheme.BreakState,
891
- width_state: GraphemeWidthState,
892
- width_method: WidthMethod,
893
- cluster_started: bool,
894
-
895
- fn init(width_method: WidthMethod) ClusterState {
896
- const dummy_width_state = GraphemeWidthState.init(0, 0, width_method);
897
- return .{
898
- .columns_used = 0,
899
- .grapheme_count = 0,
900
- .cluster_width = 0,
901
- .cluster_start = 0,
902
- .prev_cp = null,
903
- .break_state = .default,
904
- .width_state = dummy_width_state,
905
- .width_method = width_method,
906
- .cluster_started = false,
907
- };
908
- }
909
- };
910
-
911
- /// Handle grapheme cluster boundary when wrapping by width (stops BEFORE exceeding limit)
912
- /// Returns true if we should stop (limit exceeded)
913
- inline fn handleClusterForWrap(
914
- state: *ClusterState,
915
- is_break: bool,
916
- new_cluster_start: usize,
917
- max_columns: u32,
918
- ) bool {
919
- if (is_break) {
920
- if (state.prev_cp != null) {
921
- if (state.columns_used + state.cluster_width > max_columns) {
922
- return true; // Signal to stop
923
- }
924
- state.columns_used += state.cluster_width;
925
- state.grapheme_count += 1;
926
- }
927
- state.cluster_width = 0;
928
- state.cluster_start = new_cluster_start;
929
- state.cluster_started = false;
930
- }
931
- return false;
932
- }
933
-
934
- /// Handle grapheme cluster boundary when finding position (snaps to grapheme boundaries)
935
- /// Returns true if we should stop
936
- ///
937
- /// Snapping behavior:
938
- /// - include_start_before=true (for selection end): Include graphemes that START at or before max_columns
939
- /// If max_columns=3 and grapheme occupies columns [2-3], include it (starts at 2 <= 3)
940
- /// This snaps forward to include the whole grapheme even if max_columns points to its middle
941
- /// - include_start_before=false (for selection start): Only include graphemes that END before max_columns
942
- /// If max_columns=3 and grapheme occupies columns [2-3], exclude it (ends at 4 > 3)
943
- /// This snaps backward to exclude wide graphemes that would cross max_columns
944
- inline fn handleClusterForPos(
945
- state: *ClusterState,
946
- is_break: bool,
947
- new_cluster_start: usize,
948
- max_columns: u32,
949
- include_start_before: bool,
950
- ) bool {
951
- if (is_break) {
952
- if (state.prev_cp != null) {
953
- const cluster_start_col = state.columns_used;
954
- const cluster_end_col = state.columns_used + state.cluster_width;
955
-
956
- if (include_start_before) {
957
- if (cluster_start_col >= max_columns) {
958
- return true;
959
- }
960
- state.columns_used = cluster_end_col;
961
- state.grapheme_count += 1;
962
- } else {
963
- if (cluster_end_col > max_columns) {
964
- return true; // Signal to stop (don't include this grapheme)
965
- }
966
- state.columns_used = cluster_end_col;
967
- }
968
- }
969
- state.cluster_width = 0;
970
- state.cluster_start = new_cluster_start;
971
- state.cluster_started = false;
972
- }
973
- return false;
974
- }
975
-
976
- /// Find wrap position by width - proxy function that dispatches based on width_method
977
- pub fn findWrapPosByWidth(
978
- text: []const u8,
979
- max_columns: u32,
980
- tab_width: u8,
981
- isASCIIOnly: bool,
982
- width_method: WidthMethod,
983
- ) WrapByWidthResult {
984
- switch (width_method) {
985
- .unicode, .no_zwj => return findWrapPosByWidthUnicode(text, max_columns, tab_width, isASCIIOnly, width_method),
986
- .wcwidth => return findWrapPosByWidthWCWidth(text, max_columns, tab_width, isASCIIOnly),
987
- }
988
- }
989
-
990
- /// Find wrap position by width using Unicode grapheme cluster segmentation
991
- fn findWrapPosByWidthUnicode(
992
- text: []const u8,
993
- max_columns: u32,
994
- tab_width: u8,
995
- isASCIIOnly: bool,
996
- width_method: WidthMethod,
997
- ) WrapByWidthResult {
998
- if (text.len == 0 or max_columns == 0) {
999
- return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
1000
- }
1001
-
1002
- // ASCII-only fast path
1003
- if (isASCIIOnly) {
1004
- if (max_columns >= text.len) {
1005
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
1006
- } else {
1007
- return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
1008
- }
1009
- }
1010
-
1011
- const vector_len = 16;
1012
- var pos: usize = 0;
1013
- var state = ClusterState.init(width_method);
1014
-
1015
- while (pos + vector_len <= text.len) {
1016
- const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
1017
- const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
1018
- const is_non_ascii = chunk >= ascii_threshold;
1019
-
1020
- if (!@reduce(.Or, is_non_ascii)) {
1021
- // All ASCII
1022
- var i: usize = 0;
1023
- while (i < vector_len) : (i += 1) {
1024
- const b = text[pos + i];
1025
- const curr_cp: u21 = b;
1026
- const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1027
-
1028
- if (handleClusterForWrap(&state, is_break, pos + i, max_columns)) {
1029
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1030
- }
1031
-
1032
- const cp_width = asciiCharWidth(b, tab_width);
1033
- if (!state.cluster_started) {
1034
- state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1035
- state.cluster_width = cp_width;
1036
- state.cluster_started = true;
1037
- } else {
1038
- state.width_state.addCodepoint(curr_cp, cp_width);
1039
- state.cluster_width = state.width_state.width;
1040
- }
1041
- state.prev_cp = curr_cp;
1042
- }
1043
- pos += vector_len;
1044
- continue;
1045
- }
1046
-
1047
- // Mixed ASCII/non-ASCII - process rest of chunk
1048
- var i: usize = 0;
1049
- while (i < vector_len and pos + i < text.len) {
1050
- const b0 = text[pos + i];
1051
- const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos + i).cp;
1052
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos + i).len;
1053
-
1054
- if (pos + i + cp_len > text.len) break;
1055
-
1056
- const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1057
-
1058
- if (handleClusterForWrap(&state, is_break, pos + i, max_columns)) {
1059
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1060
- }
1061
-
1062
- const cp_width = charWidth(b0, curr_cp, tab_width);
1063
- if (!state.cluster_started) {
1064
- state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1065
- state.cluster_width = cp_width;
1066
- state.cluster_started = true;
1067
- } else {
1068
- state.width_state.addCodepoint(curr_cp, cp_width);
1069
- state.cluster_width = state.width_state.width;
1070
- }
1071
- state.prev_cp = curr_cp;
1072
- i += cp_len;
1073
- }
1074
- pos += i; // Advance by how much we actually processed
1075
- }
1076
-
1077
- // Tail
1078
- while (pos < text.len) {
1079
- const b0 = text[pos];
1080
- const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos).cp;
1081
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1082
-
1083
- const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1084
-
1085
- if (handleClusterForWrap(&state, is_break, pos, max_columns)) {
1086
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1087
- }
1088
-
1089
- const cp_width = charWidth(b0, curr_cp, tab_width);
1090
- if (!state.cluster_started) {
1091
- state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1092
- state.cluster_width = cp_width;
1093
- state.cluster_started = true;
1094
- } else {
1095
- state.width_state.addCodepoint(curr_cp, cp_width);
1096
- state.cluster_width = state.width_state.width;
1097
- }
1098
- state.prev_cp = curr_cp;
1099
- pos += cp_len;
1100
- }
1101
-
1102
- // Final cluster
1103
- if (state.prev_cp != null and state.cluster_width > 0) {
1104
- if (state.columns_used + state.cluster_width > max_columns) {
1105
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1106
- }
1107
- state.columns_used += state.cluster_width;
1108
- state.grapheme_count += 1;
1109
- }
1110
-
1111
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1112
- }
1113
-
1114
- /// Find wrap position by width using wcwidth-style codepoint-by-codepoint processing
1115
- fn findWrapPosByWidthWCWidth(
1116
- text: []const u8,
1117
- max_columns: u32,
1118
- tab_width: u8,
1119
- isASCIIOnly: bool,
1120
- ) WrapByWidthResult {
1121
- if (text.len == 0 or max_columns == 0) {
1122
- return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
1123
- }
1124
-
1125
- // ASCII-only fast path
1126
- if (isASCIIOnly) {
1127
- if (max_columns >= text.len) {
1128
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
1129
- } else {
1130
- return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
1131
- }
1132
- }
1133
-
1134
- // Unicode path - process each codepoint independently
1135
- var pos: usize = 0;
1136
- var columns_used: u32 = 0;
1137
- var codepoint_count: u32 = 0;
1138
-
1139
- while (pos < text.len) {
1140
- const b0 = text[pos];
1141
- const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1142
- const dec = decodeUtf8Unchecked(text, pos);
1143
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1144
- break :blk dec.cp;
1145
- };
1146
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1147
-
1148
- if (pos + cp_len > text.len) break;
1149
-
1150
- const cp_width = charWidth(b0, curr_cp, tab_width);
1151
-
1152
- // In wcwidth mode, stop if we've already used max_columns
1153
- // (don't continue adding zero-width chars after reaching limit)
1154
- if (columns_used >= max_columns) {
1155
- return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
1156
- }
1157
-
1158
- // Stop if adding this codepoint would exceed max_columns
1159
- if (columns_used + cp_width > max_columns) {
1160
- return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
1161
- }
1162
-
1163
- columns_used += cp_width;
1164
- codepoint_count += 1;
1165
- pos += cp_len;
1166
- }
1167
-
1168
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = codepoint_count, .columns_used = columns_used };
1169
- }
1170
-
1171
- /// Find position by column width - proxy function that dispatches based on width_method
1172
- /// - If include_start_before: include graphemes that START before max_columns (snap forward for selection end)
1173
- /// This ensures that if max_columns points to the middle of a width=2 grapheme, we include the whole grapheme
1174
- /// - If !include_start_before: exclude graphemes that START at or after max_columns (snap backward for selection start)
1175
- /// This ensures that if max_columns points to the middle of a width=2 grapheme, we snap back to exclude it
1176
- pub fn findPosByWidth(
1177
- text: []const u8,
1178
- max_columns: u32,
1179
- tab_width: u8,
1180
- isASCIIOnly: bool,
1181
- include_start_before: bool,
1182
- width_method: WidthMethod,
1183
- ) PosByWidthResult {
1184
- switch (width_method) {
1185
- .unicode, .no_zwj => return findPosByWidthUnicode(text, max_columns, tab_width, isASCIIOnly, include_start_before, width_method),
1186
- .wcwidth => return findPosByWidthWCWidth(text, max_columns, tab_width, isASCIIOnly, include_start_before),
1187
- }
1188
- }
1189
-
1190
- /// Find position by column width using Unicode grapheme cluster segmentation
1191
- fn findPosByWidthUnicode(
1192
- text: []const u8,
1193
- max_columns: u32,
1194
- tab_width: u8,
1195
- isASCIIOnly: bool,
1196
- include_start_before: bool,
1197
- width_method: WidthMethod,
1198
- ) PosByWidthResult {
1199
- if (text.len == 0 or max_columns == 0) {
1200
- return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
1201
- }
1202
-
1203
- // ASCII-only fast path
1204
- if (isASCIIOnly) {
1205
- if (max_columns >= text.len) {
1206
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
1207
- } else {
1208
- return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
1209
- }
1210
- }
1211
-
1212
- const vector_len = 16;
1213
- var pos: usize = 0;
1214
- var state = ClusterState.init(width_method);
1215
-
1216
- while (pos + vector_len <= text.len) {
1217
- const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
1218
- const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
1219
- const is_non_ascii = chunk >= ascii_threshold;
1220
-
1221
- if (!@reduce(.Or, is_non_ascii)) {
1222
- // All ASCII
1223
- var i: usize = 0;
1224
- while (i < vector_len) : (i += 1) {
1225
- const b = text[pos + i];
1226
- const curr_cp: u21 = b;
1227
- const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1228
-
1229
- if (handleClusterForPos(&state, is_break, pos + i, max_columns, include_start_before)) {
1230
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1231
- }
1232
-
1233
- const cp_width = asciiCharWidth(b, tab_width);
1234
- if (!state.cluster_started) {
1235
- state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1236
- state.cluster_width = cp_width;
1237
- state.cluster_started = true;
1238
- } else {
1239
- state.width_state.addCodepoint(curr_cp, cp_width);
1240
- state.cluster_width = state.width_state.width;
1241
- }
1242
- state.prev_cp = curr_cp;
1243
- }
1244
- pos += vector_len;
1245
- continue;
1246
- }
1247
-
1248
- // Mixed ASCII/non-ASCII - process rest of chunk
1249
- var i: usize = 0;
1250
- while (i < vector_len and pos + i < text.len) {
1251
- const b0 = text[pos + i];
1252
- const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos + i).cp;
1253
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos + i).len;
1254
-
1255
- if (pos + i + cp_len > text.len) break;
1256
-
1257
- const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1258
-
1259
- if (handleClusterForPos(&state, is_break, pos + i, max_columns, include_start_before)) {
1260
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1261
- }
1262
-
1263
- const cp_width = charWidth(b0, curr_cp, tab_width);
1264
- if (!state.cluster_started) {
1265
- state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1266
- state.cluster_width = cp_width;
1267
- state.cluster_started = true;
1268
- } else {
1269
- state.width_state.addCodepoint(curr_cp, cp_width);
1270
- state.cluster_width = state.width_state.width;
1271
- }
1272
- state.prev_cp = curr_cp;
1273
- i += cp_len;
1274
- }
1275
- pos += i; // Advance by how much we actually processed
1276
- }
1277
-
1278
- // Tail
1279
- while (pos < text.len) {
1280
- const b0 = text[pos];
1281
- const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos).cp;
1282
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1283
-
1284
- const is_break = isGraphemeBreak(state.prev_cp, curr_cp, &state.break_state, state.width_method);
1285
-
1286
- if (handleClusterForPos(&state, is_break, pos, max_columns, include_start_before)) {
1287
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1288
- }
1289
-
1290
- const cp_width = charWidth(b0, curr_cp, tab_width);
1291
- if (!state.cluster_started) {
1292
- state.width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1293
- state.cluster_width = cp_width;
1294
- state.cluster_started = true;
1295
- } else {
1296
- state.width_state.addCodepoint(curr_cp, cp_width);
1297
- state.cluster_width = state.width_state.width;
1298
- }
1299
- state.prev_cp = curr_cp;
1300
- pos += cp_len;
1301
- }
1302
-
1303
- // Final cluster
1304
- if (state.prev_cp != null and state.cluster_width > 0) {
1305
- if (state.columns_used >= max_columns) {
1306
- return .{ .byte_offset = @intCast(state.cluster_start), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1307
- }
1308
- state.columns_used += state.cluster_width;
1309
- if (include_start_before) {
1310
- state.grapheme_count += 1;
1311
- }
1312
- }
1313
-
1314
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = state.grapheme_count, .columns_used = state.columns_used };
1315
- }
1316
-
1317
- /// Find position by column width using wcwidth-style codepoint-by-codepoint processing
1318
- fn findPosByWidthWCWidth(
1319
- text: []const u8,
1320
- max_columns: u32,
1321
- tab_width: u8,
1322
- isASCIIOnly: bool,
1323
- include_start_before: bool,
1324
- ) PosByWidthResult {
1325
- if (text.len == 0 or max_columns == 0) {
1326
- return .{ .byte_offset = 0, .grapheme_count = 0, .columns_used = 0 };
1327
- }
1328
-
1329
- // ASCII-only fast path
1330
- if (isASCIIOnly) {
1331
- if (max_columns >= text.len) {
1332
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = @intCast(text.len), .columns_used = @intCast(text.len) };
1333
- } else {
1334
- return .{ .byte_offset = max_columns, .grapheme_count = max_columns, .columns_used = max_columns };
1335
- }
1336
- }
1337
-
1338
- // Unicode path - process each codepoint independently
1339
- var pos: usize = 0;
1340
- var columns_used: u32 = 0;
1341
- var codepoint_count: u32 = 0;
1342
-
1343
- while (pos < text.len) {
1344
- const b0 = text[pos];
1345
- const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1346
- const dec = decodeUtf8Unchecked(text, pos);
1347
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1348
- break :blk dec.cp;
1349
- };
1350
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1351
-
1352
- if (pos + cp_len > text.len) break;
1353
-
1354
- const cp_width = charWidth(b0, curr_cp, tab_width);
1355
- const cp_start_col = columns_used;
1356
- const cp_end_col = columns_used + cp_width;
1357
-
1358
- // Apply boundary behavior
1359
- if (include_start_before) {
1360
- // Selection end: include codepoints that START before max_columns
1361
- if (cp_start_col >= max_columns) {
1362
- return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
1363
- }
1364
- } else {
1365
- // Selection start: only include codepoints that END before or at max_columns
1366
- // So exclude (stop) if end > max_columns
1367
- if (cp_end_col > max_columns) {
1368
- return .{ .byte_offset = @intCast(pos), .grapheme_count = codepoint_count, .columns_used = columns_used };
1369
- }
1370
- }
1371
-
1372
- columns_used = cp_end_col;
1373
- codepoint_count += 1;
1374
- pos += cp_len;
1375
- }
1376
-
1377
- return .{ .byte_offset = @intCast(text.len), .grapheme_count = codepoint_count, .columns_used = columns_used };
1378
- }
1379
-
1380
- /// Get width at byte offset - proxy function that dispatches based on width_method
1381
- pub fn getWidthAt(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) u32 {
1382
- switch (width_method) {
1383
- .unicode, .no_zwj => return getWidthAtUnicode(text, byte_offset, tab_width, width_method),
1384
- .wcwidth => return getWidthAtWCWidth(text, byte_offset, tab_width),
1385
- }
1386
- }
1387
-
1388
- /// Get width at byte offset using Unicode grapheme cluster segmentation
1389
- fn getWidthAtUnicode(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) u32 {
1390
- if (byte_offset >= text.len) return 0;
1391
-
1392
- const b0 = text[byte_offset];
1393
-
1394
- const first_cp: u21 = if (b0 < 0x80) b0 else blk: {
1395
- const dec = decodeUtf8Unchecked(text, byte_offset);
1396
- if (byte_offset + dec.len > text.len) return 1;
1397
- break :blk dec.cp;
1398
- };
1399
-
1400
- const first_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, byte_offset).len;
1401
-
1402
- var break_state: uucode.grapheme.BreakState = .default;
1403
- var prev_cp: ?u21 = first_cp;
1404
- const first_width = charWidth(b0, first_cp, tab_width);
1405
- var state = GraphemeWidthState.init(first_cp, first_width, width_method);
1406
-
1407
- var pos = byte_offset + first_len;
1408
-
1409
- while (pos < text.len) {
1410
- const b = text[pos];
1411
- const curr_cp: u21 = if (b < 0x80) b else decodeUtf8Unchecked(text, pos).cp;
1412
- const cp_len: usize = if (b < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1413
-
1414
- if (pos + cp_len > text.len) break;
1415
-
1416
- const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1417
- if (is_break) break;
1418
-
1419
- const cp_width = charWidth(b, curr_cp, tab_width);
1420
- state.addCodepoint(curr_cp, cp_width);
1421
-
1422
- prev_cp = curr_cp;
1423
- pos += cp_len;
1424
- }
1425
-
1426
- return state.width;
1427
- }
1428
-
1429
- /// Get width at byte offset using wcwidth-style codepoint-by-codepoint processing
1430
- /// In wcwidth mode, each codepoint is treated independently - return its width directly
1431
- fn getWidthAtWCWidth(text: []const u8, byte_offset: usize, tab_width: u8) u32 {
1432
- if (byte_offset >= text.len) return 0;
1433
-
1434
- const b0 = text[byte_offset];
1435
-
1436
- const first_cp: u21 = if (b0 < 0x80) b0 else blk: {
1437
- const dec = decodeUtf8Unchecked(text, byte_offset);
1438
- if (byte_offset + dec.len > text.len) return 1;
1439
- break :blk dec.cp;
1440
- };
1441
-
1442
- const first_width = charWidth(b0, first_cp, tab_width);
1443
- return first_width;
1444
- }
1445
-
1446
- pub const PrevGraphemeResult = struct {
1447
- start_offset: usize,
1448
- width: u32,
1449
- };
1450
-
1451
- /// Get previous grapheme start - proxy function that dispatches based on width_method
1452
- pub fn getPrevGraphemeStart(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) ?PrevGraphemeResult {
1453
- switch (width_method) {
1454
- .unicode, .no_zwj => return getPrevGraphemeStartUnicode(text, byte_offset, tab_width, width_method),
1455
- .wcwidth => return getPrevGraphemeStartWCWidth(text, byte_offset, tab_width),
1456
- }
1457
- }
1458
-
1459
- /// Get previous grapheme start using wcwidth-style codepoint-by-codepoint processing
1460
- fn getPrevGraphemeStartWCWidth(text: []const u8, byte_offset: usize, tab_width: u8) ?PrevGraphemeResult {
1461
- if (byte_offset == 0 or text.len == 0) return null;
1462
- if (byte_offset > text.len) return null;
1463
-
1464
- var pos: usize = 0;
1465
- var last_result: ?PrevGraphemeResult = null;
1466
-
1467
- while (pos < byte_offset) {
1468
- const b = text[pos];
1469
- const curr_cp: u21 = if (b < 0x80) b else blk: {
1470
- const dec = decodeUtf8Unchecked(text, pos);
1471
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1472
- break :blk dec.cp;
1473
- };
1474
- const cp_len: usize = if (b < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1475
- const cp_width = charWidth(b, curr_cp, tab_width);
1476
-
1477
- if (cp_width > 0) {
1478
- last_result = .{
1479
- .start_offset = pos,
1480
- .width = cp_width,
1481
- };
1482
- }
1483
- pos += cp_len;
1484
- }
1485
-
1486
- return last_result;
1487
- }
1488
-
1489
- /// Get previous grapheme start using Unicode grapheme cluster segmentation
1490
- fn getPrevGraphemeStartUnicode(text: []const u8, byte_offset: usize, tab_width: u8, width_method: WidthMethod) ?PrevGraphemeResult {
1491
- if (byte_offset == 0 or text.len == 0) return null;
1492
- if (byte_offset > text.len) return null;
1493
-
1494
- // For unicode/no_zwj modes, use grapheme cluster detection
1495
- var break_state: uucode.grapheme.BreakState = .default;
1496
- var pos: usize = 0;
1497
- var prev_cp: ?u21 = null;
1498
- var prev_grapheme_start: usize = 0;
1499
- var second_to_last_grapheme_start: usize = 0;
1500
-
1501
- while (pos < byte_offset) {
1502
- const b = text[pos];
1503
- const curr_cp: u21 = if (b < 0x80) b else blk: {
1504
- const dec = decodeUtf8Unchecked(text, pos);
1505
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1506
- break :blk dec.cp;
1507
- };
1508
-
1509
- const cp_len: usize = if (b < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1510
-
1511
- if (isValidCodepoint(curr_cp)) {
1512
- const is_break = if (prev_cp) |p| blk: {
1513
- if (!isValidCodepoint(p)) break :blk true;
1514
- break :blk uucode.grapheme.isBreak(p, curr_cp, &break_state);
1515
- } else true;
1516
-
1517
- if (is_break) {
1518
- second_to_last_grapheme_start = prev_grapheme_start;
1519
- prev_grapheme_start = pos;
1520
- }
1521
-
1522
- prev_cp = curr_cp;
1523
- }
1524
-
1525
- pos += cp_len;
1526
- }
1527
-
1528
- if (prev_grapheme_start == 0 and byte_offset == 0) {
1529
- return null;
1530
- }
1531
-
1532
- const start_offset = if (prev_grapheme_start < byte_offset) prev_grapheme_start else second_to_last_grapheme_start;
1533
- const width = getWidthAt(text, start_offset, tab_width, width_method);
1534
-
1535
- return .{
1536
- .start_offset = start_offset,
1537
- .width = width,
1538
- };
1539
- }
1540
-
1541
- /// Calculate the display width of text - proxy function that dispatches based on width_method
1542
- pub fn calculateTextWidth(text: []const u8, tab_width: u8, isASCIIOnly: bool, width_method: WidthMethod) u32 {
1543
- switch (width_method) {
1544
- .unicode, .no_zwj => return calculateTextWidthUnicode(text, tab_width, isASCIIOnly, width_method),
1545
- .wcwidth => return calculateTextWidthWCWidth(text, tab_width, isASCIIOnly),
1546
- }
1547
- }
1548
-
1549
- /// Calculate text width using Unicode grapheme cluster segmentation
1550
- fn calculateTextWidthUnicode(text: []const u8, tab_width: u8, isASCIIOnly: bool, width_method: WidthMethod) u32 {
1551
- if (text.len == 0) return 0;
1552
-
1553
- // ASCII-only fast path
1554
- if (isASCIIOnly) {
1555
- return @intCast(text.len);
1556
- }
1557
-
1558
- // General case with Unicode support and grapheme cluster handling
1559
- var total_width: u32 = 0;
1560
- var pos: usize = 0;
1561
- var prev_cp: ?u21 = null;
1562
- var break_state: uucode.grapheme.BreakState = .default;
1563
- var state: GraphemeWidthState = undefined;
1564
-
1565
- while (pos < text.len) {
1566
- const b0 = text[pos];
1567
- const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1568
- const dec = decodeUtf8Unchecked(text, pos);
1569
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1570
- break :blk dec.cp;
1571
- };
1572
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1573
- const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1574
-
1575
- if (is_break) {
1576
- if (prev_cp != null) {
1577
- total_width += state.width;
1578
- }
1579
-
1580
- const cp_width = charWidth(b0, curr_cp, tab_width);
1581
- state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1582
- } else {
1583
- const cp_width = charWidth(b0, curr_cp, tab_width);
1584
- state.addCodepoint(curr_cp, cp_width);
1585
- }
1586
-
1587
- prev_cp = curr_cp;
1588
- pos += cp_len;
1589
- }
1590
-
1591
- if (prev_cp != null) {
1592
- total_width += state.width;
1593
- }
1594
-
1595
- return total_width;
1596
- }
1597
-
1598
- /// Calculate text width using wcwidth-style codepoint-by-codepoint processing
1599
- fn calculateTextWidthWCWidth(text: []const u8, tab_width: u8, isASCIIOnly: bool) u32 {
1600
- if (text.len == 0) return 0;
1601
-
1602
- // ASCII-only fast path
1603
- if (isASCIIOnly) {
1604
- return @intCast(text.len);
1605
- }
1606
-
1607
- // Unicode path - sum width of all codepoints
1608
- var total_width: u32 = 0;
1609
- var pos: usize = 0;
1610
-
1611
- while (pos < text.len) {
1612
- const b0 = text[pos];
1613
- const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1614
- const dec = decodeUtf8Unchecked(text, pos);
1615
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1616
- break :blk dec.cp;
1617
- };
1618
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1619
-
1620
- const cp_width = charWidth(b0, curr_cp, tab_width);
1621
- total_width += cp_width;
1622
-
1623
- pos += cp_len;
1624
- }
1625
-
1626
- return total_width;
1627
- }
1628
-
1629
- /// Grapheme cluster information for caching
1630
- pub const GraphemeInfo = struct {
1631
- byte_offset: u32,
1632
- byte_len: u8,
1633
- width: u8,
1634
- col_offset: u32,
1635
- };
1636
-
1637
- pub const GraphemeInfoResult = struct {
1638
- graphemes: std.ArrayList(GraphemeInfo),
1639
-
1640
- pub fn init(allocator: std.mem.Allocator) GraphemeInfoResult {
1641
- return .{
1642
- .graphemes = std.ArrayList(GraphemeInfo).init(allocator),
1643
- };
1644
- }
1645
-
1646
- pub fn deinit(self: *GraphemeInfoResult) void {
1647
- self.graphemes.deinit();
1648
- }
1649
-
1650
- pub fn reset(self: *GraphemeInfoResult) void {
1651
- self.graphemes.clearRetainingCapacity();
1652
- }
1653
- };
1654
-
1655
- /// Find all grapheme clusters in text and return info for multi-byte graphemes and tabs
1656
- /// This is a proxy function that dispatches to the appropriate implementation based on width_method
1657
- pub fn findGraphemeInfo(
1658
- text: []const u8,
1659
- tab_width: u8,
1660
- isASCIIOnly: bool,
1661
- width_method: WidthMethod,
1662
- allocator: std.mem.Allocator,
1663
- result: *std.ArrayListUnmanaged(GraphemeInfo),
1664
- ) !void {
1665
- switch (width_method) {
1666
- .unicode, .no_zwj => try findGraphemeInfoUnicode(text, tab_width, isASCIIOnly, width_method, allocator, result),
1667
- .wcwidth => try findGraphemeInfoWCWidth(text, tab_width, isASCIIOnly, allocator, result),
1668
- }
1669
- }
1670
-
1671
- /// Find all grapheme clusters using Unicode grapheme cluster segmentation
1672
- /// This version treats grapheme clusters as single units for width calculation
1673
- fn findGraphemeInfoUnicode(
1674
- text: []const u8,
1675
- tab_width: u8,
1676
- isASCIIOnly: bool,
1677
- width_method: WidthMethod,
1678
- allocator: std.mem.Allocator,
1679
- result: *std.ArrayListUnmanaged(GraphemeInfo),
1680
- ) !void {
1681
- // In wcwidth mode, always process to capture combining marks on ASCII
1682
- if (isASCIIOnly and width_method != .wcwidth) {
1683
- return;
1684
- }
1685
-
1686
- if (text.len == 0) {
1687
- return;
1688
- }
1689
-
1690
- const vector_len = 16;
1691
- var pos: usize = 0;
1692
- var col: u32 = 0;
1693
- var prev_cp: ?u21 = null;
1694
- var break_state: uucode.grapheme.BreakState = .default;
1695
-
1696
- // Track current grapheme cluster
1697
- var cluster_start: usize = 0;
1698
- var cluster_start_col: u32 = 0;
1699
- var cluster_width_state: GraphemeWidthState = undefined;
1700
- var cluster_is_multibyte: bool = false;
1701
- var cluster_is_tab: bool = false;
1702
-
1703
- while (pos + vector_len <= text.len) {
1704
- const chunk: @Vector(vector_len, u8) = text[pos..][0..vector_len].*;
1705
- const ascii_threshold: @Vector(vector_len, u8) = @splat(0x80);
1706
- const is_non_ascii = chunk >= ascii_threshold;
1707
-
1708
- // Fast path: all ASCII
1709
- if (!@reduce(.Or, is_non_ascii)) {
1710
- var i: usize = 0;
1711
- while (i < vector_len) : (i += 1) {
1712
- const b = text[pos + i];
1713
- const curr_cp: u21 = b;
1714
- const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1715
-
1716
- if (is_break) {
1717
- if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
1718
- if (cluster_width_state.width > 0 or width_method == .wcwidth) {
1719
- const cluster_byte_len = (pos + i) - cluster_start;
1720
- try result.append(allocator, GraphemeInfo{
1721
- .byte_offset = @intCast(cluster_start),
1722
- .byte_len = @intCast(cluster_byte_len),
1723
- .width = @intCast(cluster_width_state.width),
1724
- .col_offset = cluster_start_col,
1725
- });
1726
- }
1727
- col += cluster_width_state.width;
1728
- } else if (prev_cp != null) {
1729
- col += cluster_width_state.width;
1730
- }
1731
-
1732
- cluster_start = pos + i;
1733
- cluster_start_col = col;
1734
- cluster_is_tab = (b == '\t');
1735
- cluster_is_multibyte = false;
1736
-
1737
- const cp_width = asciiCharWidth(b, tab_width);
1738
- cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1739
- } else {
1740
- // Continuing cluster (shouldn't happen for ASCII, but handle it)
1741
- const cp_width = asciiCharWidth(b, tab_width);
1742
- cluster_width_state.addCodepoint(curr_cp, cp_width);
1743
- }
1744
-
1745
- prev_cp = curr_cp;
1746
- }
1747
- pos += vector_len;
1748
- continue;
1749
- }
1750
-
1751
- // Slow path: mixed ASCII/non-ASCII
1752
- var i: usize = 0;
1753
- while (i < vector_len and pos + i < text.len) {
1754
- const b0 = text[pos + i];
1755
- const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos + i).cp;
1756
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos + i).len;
1757
-
1758
- if (pos + i + cp_len > text.len) break;
1759
-
1760
- const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1761
-
1762
- if (is_break) {
1763
- if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
1764
- if (cluster_width_state.width > 0 or width_method == .wcwidth) {
1765
- const cluster_byte_len = (pos + i) - cluster_start;
1766
- try result.append(allocator, GraphemeInfo{
1767
- .byte_offset = @intCast(cluster_start),
1768
- .byte_len = @intCast(cluster_byte_len),
1769
- .width = @intCast(cluster_width_state.width),
1770
- .col_offset = cluster_start_col,
1771
- });
1772
- }
1773
- col += cluster_width_state.width;
1774
- } else if (prev_cp != null) {
1775
- col += cluster_width_state.width;
1776
- }
1777
-
1778
- cluster_start = pos + i;
1779
- cluster_start_col = col;
1780
- cluster_is_tab = (b0 == '\t');
1781
- cluster_is_multibyte = (cp_len != 1);
1782
-
1783
- const cp_width = charWidth(b0, curr_cp, tab_width);
1784
- cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1785
- } else {
1786
- cluster_is_multibyte = cluster_is_multibyte or (cp_len != 1);
1787
- const cp_width = charWidth(b0, curr_cp, tab_width);
1788
- cluster_width_state.addCodepoint(curr_cp, cp_width);
1789
- }
1790
-
1791
- prev_cp = curr_cp;
1792
- i += cp_len;
1793
- }
1794
- pos += i;
1795
- }
1796
-
1797
- // Tail processing
1798
- while (pos < text.len) {
1799
- const b0 = text[pos];
1800
- const curr_cp: u21 = if (b0 < 0x80) b0 else decodeUtf8Unchecked(text, pos).cp;
1801
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1802
-
1803
- if (pos + cp_len > text.len) break;
1804
-
1805
- const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, width_method);
1806
-
1807
- if (is_break) {
1808
- if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
1809
- if (cluster_width_state.width > 0 or width_method == .wcwidth) {
1810
- const cluster_byte_len = pos - cluster_start;
1811
- try result.append(allocator, GraphemeInfo{
1812
- .byte_offset = @intCast(cluster_start),
1813
- .byte_len = @intCast(cluster_byte_len),
1814
- .width = @intCast(cluster_width_state.width),
1815
- .col_offset = cluster_start_col,
1816
- });
1817
- }
1818
- col += cluster_width_state.width;
1819
- } else if (prev_cp != null) {
1820
- col += cluster_width_state.width;
1821
- }
1822
-
1823
- cluster_start = pos;
1824
- cluster_start_col = col;
1825
- cluster_is_tab = (b0 == '\t');
1826
- cluster_is_multibyte = (cp_len != 1);
1827
-
1828
- const cp_width = charWidth(b0, curr_cp, tab_width);
1829
- cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, width_method);
1830
- } else {
1831
- cluster_is_multibyte = cluster_is_multibyte or (cp_len != 1);
1832
- const cp_width = charWidth(b0, curr_cp, tab_width);
1833
- cluster_width_state.addCodepoint(curr_cp, cp_width);
1834
- }
1835
-
1836
- prev_cp = curr_cp;
1837
- pos += cp_len;
1838
- }
1839
-
1840
- if (prev_cp != null and (cluster_is_multibyte or cluster_is_tab)) {
1841
- if (cluster_width_state.width > 0 or width_method == .wcwidth) {
1842
- const cluster_byte_len = text.len - cluster_start;
1843
- try result.append(allocator, GraphemeInfo{
1844
- .byte_offset = @intCast(cluster_start),
1845
- .byte_len = @intCast(cluster_byte_len),
1846
- .width = @intCast(cluster_width_state.width),
1847
- .col_offset = cluster_start_col,
1848
- });
1849
- }
1850
- }
1851
- }
1852
-
1853
- /// Find all grapheme clusters using wcwidth-style codepoint-by-codepoint processing
1854
- /// This version treats each codepoint as a separate character (tmux/wcwidth behavior)
1855
- fn findGraphemeInfoWCWidth(
1856
- text: []const u8,
1857
- tab_width: u8,
1858
- isASCIIOnly: bool,
1859
- allocator: std.mem.Allocator,
1860
- result: *std.ArrayListUnmanaged(GraphemeInfo),
1861
- ) !void {
1862
- // wcwidth mode should still produce the same grapheme cluster boundaries as Unicode
1863
- // (so ZWJ sequences and combining marks stay together), but the width of each cluster
1864
- // is calculated using wcwidth (sum of codepoint widths). This keeps rendering coherent
1865
- // while preserving tmux-style widths.
1866
- if (isASCIIOnly) {
1867
- return;
1868
- }
1869
-
1870
- if (text.len == 0) {
1871
- return;
1872
- }
1873
-
1874
- var pos: usize = 0;
1875
- var col: u32 = 0;
1876
- var prev_cp: ?u21 = null;
1877
- var break_state: uucode.grapheme.BreakState = .default;
1878
-
1879
- // Track current cluster
1880
- var cluster_start: usize = 0;
1881
- var cluster_start_col: u32 = 0;
1882
- var cluster_width_state: GraphemeWidthState = undefined;
1883
- var cluster_is_multibyte: bool = false;
1884
- var cluster_is_tab: bool = false;
1885
- var cluster_started = false;
1886
-
1887
- while (pos < text.len) {
1888
- const b0 = text[pos];
1889
- const curr_cp: u21 = if (b0 < 0x80) b0 else blk: {
1890
- const dec = decodeUtf8Unchecked(text, pos);
1891
- if (pos + dec.len > text.len) break :blk 0xFFFD;
1892
- break :blk dec.cp;
1893
- };
1894
- const cp_len: usize = if (b0 < 0x80) 1 else decodeUtf8Unchecked(text, pos).len;
1895
-
1896
- if (pos + cp_len > text.len) break;
1897
-
1898
- // Use wcwidth break detection (each codepoint is separate, tmux-style)
1899
- const is_break = isGraphemeBreak(prev_cp, curr_cp, &break_state, .wcwidth);
1900
-
1901
- if (is_break) {
1902
- if (cluster_started and (cluster_is_multibyte or cluster_is_tab)) {
1903
- try result.append(allocator, GraphemeInfo{
1904
- .byte_offset = @intCast(cluster_start),
1905
- .byte_len = @intCast(pos - cluster_start),
1906
- .width = @intCast(cluster_width_state.width),
1907
- .col_offset = cluster_start_col,
1908
- });
1909
- col += cluster_width_state.width;
1910
- } else if (cluster_started) {
1911
- // Still need to advance col by cluster width even if not emitted
1912
- col += cluster_width_state.width;
1913
- }
1914
-
1915
- // Start a new cluster
1916
- cluster_start = pos;
1917
- cluster_start_col = col;
1918
- cluster_is_tab = (b0 == '\t');
1919
- cluster_is_multibyte = (cp_len != 1);
1920
- const cp_width = charWidth(b0, curr_cp, tab_width);
1921
- cluster_width_state = GraphemeWidthState.init(curr_cp, cp_width, .wcwidth);
1922
- cluster_started = true;
1923
- } else {
1924
- // Continuing cluster
1925
- cluster_is_multibyte = cluster_is_multibyte or (cp_len != 1);
1926
- const cp_width = charWidth(b0, curr_cp, tab_width);
1927
- cluster_width_state.addCodepoint(curr_cp, cp_width);
1928
- }
1929
-
1930
- prev_cp = curr_cp;
1931
- pos += cp_len;
1932
- }
1933
-
1934
- // Commit final cluster
1935
- if (cluster_started) {
1936
- if (cluster_is_multibyte or cluster_is_tab) {
1937
- try result.append(allocator, GraphemeInfo{
1938
- .byte_offset = @intCast(cluster_start),
1939
- .byte_len = @intCast(text.len - cluster_start),
1940
- .width = @intCast(cluster_width_state.width),
1941
- .col_offset = cluster_start_col,
1942
- });
1943
- col += cluster_width_state.width;
1944
- } else {
1945
- col += cluster_width_state.width;
1946
- }
1947
- }
1948
- }