@fugood/llama.node 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (320) hide show
  1. package/CMakeLists.txt +5 -2
  2. package/bin/darwin/arm64/llama-node.node +0 -0
  3. package/bin/darwin/x64/llama-node.node +0 -0
  4. package/bin/linux/arm64/llama-node.node +0 -0
  5. package/bin/linux/x64/llama-node.node +0 -0
  6. package/bin/linux-vulkan/arm64/llama-node.node +0 -0
  7. package/bin/linux-vulkan/x64/llama-node.node +0 -0
  8. package/bin/win32/arm64/llama-node.node +0 -0
  9. package/bin/win32/arm64/node.lib +0 -0
  10. package/bin/win32/x64/llama-node.node +0 -0
  11. package/bin/win32/x64/node.lib +0 -0
  12. package/bin/win32-vulkan/arm64/llama-node.node +0 -0
  13. package/bin/win32-vulkan/arm64/node.lib +0 -0
  14. package/bin/win32-vulkan/x64/llama-node.node +0 -0
  15. package/bin/win32-vulkan/x64/node.lib +0 -0
  16. package/lib/binding.ts +8 -1
  17. package/package.json +1 -1
  18. package/patches/llama.patch +12 -12
  19. package/src/DetokenizeWorker.cpp +1 -1
  20. package/src/LlamaContext.cpp +33 -1
  21. package/src/LlamaContext.h +1 -0
  22. package/src/LoadSessionWorker.cpp +1 -0
  23. package/src/llama.cpp/.github/workflows/bench.yml +310 -0
  24. package/src/llama.cpp/.github/workflows/build.yml +1315 -0
  25. package/src/llama.cpp/.github/workflows/close-issue.yml +23 -0
  26. package/src/llama.cpp/.github/workflows/docker.yml +116 -0
  27. package/src/llama.cpp/.github/workflows/editorconfig.yml +27 -0
  28. package/src/llama.cpp/.github/workflows/gguf-publish.yml +44 -0
  29. package/src/llama.cpp/.github/workflows/labeler.yml +17 -0
  30. package/src/llama.cpp/.github/workflows/nix-ci-aarch64.yml +65 -0
  31. package/src/llama.cpp/.github/workflows/nix-ci.yml +72 -0
  32. package/src/llama.cpp/.github/workflows/nix-flake-update.yml +22 -0
  33. package/src/llama.cpp/.github/workflows/nix-publish-flake.yml +36 -0
  34. package/src/llama.cpp/.github/workflows/python-check-requirements.yml +35 -0
  35. package/src/llama.cpp/.github/workflows/python-lint.yml +23 -0
  36. package/src/llama.cpp/.github/workflows/python-type-check.yml +38 -0
  37. package/src/llama.cpp/.github/workflows/server.yml +183 -0
  38. package/src/llama.cpp/CMakeLists.txt +91 -1245
  39. package/src/llama.cpp/cmake/arm64-windows-llvm.cmake +1 -1
  40. package/src/llama.cpp/cmake/build-info.cmake +58 -0
  41. package/src/llama.cpp/cmake/git-vars.cmake +22 -0
  42. package/src/llama.cpp/common/CMakeLists.txt +4 -3
  43. package/src/llama.cpp/common/build-info.cpp.in +4 -0
  44. package/src/llama.cpp/common/common.cpp +1116 -877
  45. package/src/llama.cpp/common/common.h +191 -77
  46. package/src/llama.cpp/common/grammar-parser.cpp +118 -31
  47. package/src/llama.cpp/common/json-schema-to-grammar.cpp +346 -65
  48. package/src/llama.cpp/common/log.h +1 -1
  49. package/src/llama.cpp/common/ngram-cache.h +10 -3
  50. package/src/llama.cpp/common/sampling.cpp +19 -10
  51. package/src/llama.cpp/docs/build.md +353 -0
  52. package/src/llama.cpp/examples/CMakeLists.txt +22 -22
  53. package/src/llama.cpp/examples/baby-llama/CMakeLists.txt +1 -1
  54. package/src/llama.cpp/examples/baby-llama/baby-llama.cpp +6 -6
  55. package/src/llama.cpp/examples/batched/CMakeLists.txt +1 -1
  56. package/src/llama.cpp/examples/batched/batched.cpp +52 -55
  57. package/src/llama.cpp/examples/batched-bench/CMakeLists.txt +1 -1
  58. package/src/llama.cpp/examples/batched-bench/batched-bench.cpp +20 -72
  59. package/src/llama.cpp/examples/benchmark/CMakeLists.txt +1 -1
  60. package/src/llama.cpp/examples/chat-13B.bat +57 -0
  61. package/src/llama.cpp/examples/convert-llama2c-to-ggml/CMakeLists.txt +1 -1
  62. package/src/llama.cpp/examples/{finetune → cvector-generator}/CMakeLists.txt +2 -2
  63. package/src/llama.cpp/examples/cvector-generator/completions.txt +582 -0
  64. package/src/llama.cpp/examples/cvector-generator/cvector-generator.cpp +503 -0
  65. package/src/llama.cpp/examples/cvector-generator/mean.hpp +48 -0
  66. package/src/llama.cpp/examples/cvector-generator/negative.txt +4 -0
  67. package/src/llama.cpp/examples/cvector-generator/pca.hpp +325 -0
  68. package/src/llama.cpp/examples/cvector-generator/positive.txt +4 -0
  69. package/src/llama.cpp/examples/deprecation-warning/deprecation-warning.cpp +35 -0
  70. package/src/llama.cpp/examples/embedding/CMakeLists.txt +1 -1
  71. package/src/llama.cpp/examples/embedding/embedding.cpp +94 -46
  72. package/src/llama.cpp/examples/eval-callback/CMakeLists.txt +2 -2
  73. package/src/llama.cpp/examples/eval-callback/eval-callback.cpp +4 -6
  74. package/src/llama.cpp/examples/export-lora/CMakeLists.txt +1 -1
  75. package/src/llama.cpp/examples/export-lora/export-lora.cpp +344 -386
  76. package/src/llama.cpp/examples/gbnf-validator/CMakeLists.txt +2 -2
  77. package/src/llama.cpp/examples/gbnf-validator/gbnf-validator.cpp +30 -25
  78. package/src/llama.cpp/examples/gguf/CMakeLists.txt +1 -1
  79. package/src/llama.cpp/examples/gguf/gguf.cpp +5 -0
  80. package/src/llama.cpp/examples/gguf-hash/CMakeLists.txt +15 -0
  81. package/src/llama.cpp/examples/gguf-hash/deps/rotate-bits/rotate-bits.h +46 -0
  82. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.c +295 -0
  83. package/src/llama.cpp/examples/gguf-hash/deps/sha1/sha1.h +52 -0
  84. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.c +221 -0
  85. package/src/llama.cpp/examples/gguf-hash/deps/sha256/sha256.h +24 -0
  86. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.c +42 -0
  87. package/src/llama.cpp/examples/gguf-hash/deps/xxhash/xxhash.h +7093 -0
  88. package/src/llama.cpp/examples/gguf-hash/gguf-hash.cpp +693 -0
  89. package/src/llama.cpp/examples/gguf-split/CMakeLists.txt +1 -1
  90. package/src/llama.cpp/examples/gguf-split/gguf-split.cpp +3 -3
  91. package/src/llama.cpp/examples/gritlm/CMakeLists.txt +1 -1
  92. package/src/llama.cpp/examples/gritlm/gritlm.cpp +6 -2
  93. package/src/llama.cpp/examples/imatrix/CMakeLists.txt +1 -1
  94. package/src/llama.cpp/examples/imatrix/imatrix.cpp +137 -176
  95. package/src/llama.cpp/examples/infill/CMakeLists.txt +1 -1
  96. package/src/llama.cpp/examples/infill/infill.cpp +38 -153
  97. package/src/llama.cpp/examples/llama-bench/llama-bench.cpp +175 -94
  98. package/src/llama.cpp/examples/llama.android/app/build.gradle.kts +65 -0
  99. package/src/llama.cpp/examples/llama.android/build.gradle.kts +6 -0
  100. package/src/llama.cpp/examples/llama.android/llama/build.gradle.kts +68 -0
  101. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/CMakeLists.txt +11 -7
  102. package/src/llama.cpp/examples/llama.android/llama/src/main/cpp/llama-android.cpp +2 -2
  103. package/src/llama.cpp/examples/llama.android/settings.gradle.kts +18 -0
  104. package/src/llama.cpp/examples/llava/CMakeLists.txt +6 -5
  105. package/src/llama.cpp/examples/llava/android/build_64.sh +8 -0
  106. package/src/llama.cpp/examples/llava/clip.cpp +23 -14
  107. package/src/llama.cpp/examples/llava/llava-cli.cpp +8 -6
  108. package/src/llama.cpp/examples/llava/requirements.txt +3 -2
  109. package/src/llama.cpp/examples/lookahead/CMakeLists.txt +1 -1
  110. package/src/llama.cpp/examples/lookahead/lookahead.cpp +2 -1
  111. package/src/llama.cpp/examples/lookup/CMakeLists.txt +4 -4
  112. package/src/llama.cpp/examples/lookup/lookup-create.cpp +2 -0
  113. package/src/llama.cpp/examples/lookup/lookup-merge.cpp +4 -4
  114. package/src/llama.cpp/examples/lookup/lookup-stats.cpp +2 -2
  115. package/src/llama.cpp/examples/lookup/lookup.cpp +1 -1
  116. package/src/llama.cpp/examples/main/CMakeLists.txt +1 -1
  117. package/src/llama.cpp/examples/main/main.cpp +98 -75
  118. package/src/llama.cpp/examples/main-cmake-pkg/CMakeLists.txt +4 -5
  119. package/src/llama.cpp/examples/parallel/CMakeLists.txt +1 -1
  120. package/src/llama.cpp/examples/parallel/parallel.cpp +2 -1
  121. package/src/llama.cpp/examples/passkey/CMakeLists.txt +1 -1
  122. package/src/llama.cpp/examples/passkey/passkey.cpp +23 -43
  123. package/src/llama.cpp/examples/perplexity/CMakeLists.txt +1 -1
  124. package/src/llama.cpp/examples/perplexity/perplexity.cpp +13 -10
  125. package/src/llama.cpp/examples/quantize/CMakeLists.txt +1 -1
  126. package/src/llama.cpp/examples/quantize/quantize.cpp +37 -34
  127. package/src/llama.cpp/examples/quantize-stats/CMakeLists.txt +1 -1
  128. package/src/llama.cpp/examples/quantize-stats/quantize-stats.cpp +1 -1
  129. package/src/llama.cpp/examples/retrieval/CMakeLists.txt +1 -1
  130. package/src/llama.cpp/examples/retrieval/retrieval.cpp +26 -77
  131. package/src/llama.cpp/examples/save-load-state/CMakeLists.txt +1 -1
  132. package/src/llama.cpp/examples/save-load-state/save-load-state.cpp +14 -7
  133. package/src/llama.cpp/examples/server/CMakeLists.txt +26 -2
  134. package/src/llama.cpp/examples/server/server.cpp +274 -671
  135. package/src/llama.cpp/examples/server/tests/requirements.txt +2 -2
  136. package/src/llama.cpp/examples/server/utils.hpp +28 -29
  137. package/src/llama.cpp/examples/simple/CMakeLists.txt +1 -1
  138. package/src/llama.cpp/examples/simple/simple.cpp +21 -29
  139. package/src/llama.cpp/examples/speculative/CMakeLists.txt +1 -1
  140. package/src/llama.cpp/examples/speculative/speculative.cpp +2 -1
  141. package/src/llama.cpp/examples/sycl/CMakeLists.txt +1 -1
  142. package/src/llama.cpp/examples/sycl/build.sh +23 -0
  143. package/src/llama.cpp/examples/sycl/run-llama2.sh +36 -0
  144. package/src/llama.cpp/examples/sycl/win-build-sycl.bat +33 -0
  145. package/src/llama.cpp/examples/sycl/win-run-llama2.bat +9 -0
  146. package/src/llama.cpp/examples/tokenize/CMakeLists.txt +1 -1
  147. package/src/llama.cpp/examples/tokenize/tokenize.cpp +16 -2
  148. package/src/llama.cpp/ggml/CMakeLists.txt +253 -0
  149. package/src/llama.cpp/{cmake → ggml/cmake}/FindSIMD.cmake +6 -6
  150. package/src/llama.cpp/{ggml-backend.h → ggml/include/ggml-backend.h} +22 -17
  151. package/src/llama.cpp/ggml/include/ggml-blas.h +23 -0
  152. package/src/llama.cpp/ggml/include/ggml-cann.h +125 -0
  153. package/src/llama.cpp/{ggml-cuda.h → ggml/include/ggml-cuda.h} +3 -0
  154. package/src/llama.cpp/{ggml-metal.h → ggml/include/ggml-metal.h} +1 -2
  155. package/src/llama.cpp/{ggml-sycl.h → ggml/include/ggml-sycl.h} +3 -10
  156. package/src/llama.cpp/{ggml.h → ggml/include/ggml.h} +80 -85
  157. package/src/llama.cpp/ggml/src/CMakeLists.txt +1329 -0
  158. package/src/llama.cpp/ggml/src/ggml-aarch64.c +2193 -0
  159. package/src/llama.cpp/ggml/src/ggml-aarch64.h +39 -0
  160. package/src/llama.cpp/{ggml-alloc.c → ggml/src/ggml-alloc.c} +100 -49
  161. package/src/llama.cpp/{ggml-backend-impl.h → ggml/src/ggml-backend-impl.h} +20 -8
  162. package/src/llama.cpp/{ggml-backend.c → ggml/src/ggml-backend.c} +307 -167
  163. package/src/llama.cpp/ggml/src/ggml-blas.cpp +367 -0
  164. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.cpp +198 -0
  165. package/src/llama.cpp/ggml/src/ggml-cann/acl_tensor.h +230 -0
  166. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.cpp +2944 -0
  167. package/src/llama.cpp/ggml/src/ggml-cann/aclnn_ops.h +592 -0
  168. package/src/llama.cpp/ggml/src/ggml-cann/common.h +282 -0
  169. package/src/llama.cpp/ggml/src/ggml-cann/kernels/CMakeLists.txt +32 -0
  170. package/src/llama.cpp/ggml/src/ggml-cann/kernels/ascendc_kernels.h +17 -0
  171. package/src/llama.cpp/ggml/src/ggml-cann/kernels/dup.cpp +223 -0
  172. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f16.cpp +186 -0
  173. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_f32.cpp +180 -0
  174. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp +193 -0
  175. package/src/llama.cpp/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp +191 -0
  176. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp +208 -0
  177. package/src/llama.cpp/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp +206 -0
  178. package/src/llama.cpp/ggml/src/ggml-cann.cpp +2023 -0
  179. package/src/llama.cpp/{ggml-common.h → ggml/src/ggml-common.h} +41 -7
  180. package/src/llama.cpp/{ggml-impl.h → ggml/src/ggml-impl.h} +113 -9
  181. package/src/llama.cpp/{ggml-kompute.cpp → ggml/src/ggml-kompute.cpp} +33 -18
  182. package/src/llama.cpp/{ggml-quants.c → ggml/src/ggml-quants.c} +1460 -940
  183. package/src/llama.cpp/{ggml-quants.h → ggml/src/ggml-quants.h} +19 -20
  184. package/src/llama.cpp/{ggml-rpc.cpp → ggml/src/ggml-rpc.cpp} +95 -72
  185. package/src/llama.cpp/ggml/src/ggml-sycl/backend.hpp +27 -0
  186. package/src/llama.cpp/ggml/src/ggml-sycl/common.cpp +53 -0
  187. package/src/llama.cpp/ggml/src/ggml-sycl/common.hpp +355 -0
  188. package/src/llama.cpp/ggml/src/ggml-sycl/concat.cpp +195 -0
  189. package/src/llama.cpp/ggml/src/ggml-sycl/concat.hpp +21 -0
  190. package/src/llama.cpp/ggml/src/ggml-sycl/convert.cpp +547 -0
  191. package/src/llama.cpp/ggml/src/ggml-sycl/convert.hpp +27 -0
  192. package/src/llama.cpp/ggml/src/ggml-sycl/dequantize.hpp +698 -0
  193. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.cpp +1023 -0
  194. package/src/llama.cpp/ggml/src/ggml-sycl/dmmv.hpp +27 -0
  195. package/src/llama.cpp/ggml/src/ggml-sycl/dpct/helper.hpp +3011 -0
  196. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.cpp +3031 -0
  197. package/src/llama.cpp/ggml/src/ggml-sycl/mmq.hpp +33 -0
  198. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.cpp +1027 -0
  199. package/src/llama.cpp/ggml/src/ggml-sycl/mmvq.hpp +27 -0
  200. package/src/llama.cpp/ggml/src/ggml-sycl/norm.cpp +374 -0
  201. package/src/llama.cpp/ggml/src/ggml-sycl/norm.hpp +35 -0
  202. package/src/llama.cpp/ggml/src/ggml-sycl/presets.hpp +66 -0
  203. package/src/llama.cpp/ggml/src/ggml-sycl/rope.cpp +275 -0
  204. package/src/llama.cpp/ggml/src/ggml-sycl/rope.hpp +22 -0
  205. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.cpp +251 -0
  206. package/src/llama.cpp/ggml/src/ggml-sycl/softmax.hpp +24 -0
  207. package/src/llama.cpp/ggml/src/ggml-sycl/vecdotq.hpp +1140 -0
  208. package/src/llama.cpp/ggml/src/ggml-sycl.cpp +5314 -0
  209. package/src/llama.cpp/{ggml-vulkan.cpp → ggml/src/ggml-vulkan.cpp} +1781 -1868
  210. package/src/llama.cpp/{ggml.c → ggml/src/ggml.c} +1245 -2087
  211. package/src/llama.cpp/{sgemm.cpp → ggml/src/llamafile/sgemm.cpp} +21 -24
  212. package/src/llama.cpp/{sgemm.h → ggml/src/llamafile/sgemm.h} +1 -1
  213. package/src/llama.cpp/ggml/src/vulkan-shaders/CMakeLists.txt +5 -0
  214. package/src/llama.cpp/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp +552 -0
  215. package/src/llama.cpp/{llama.h → include/llama.h} +175 -100
  216. package/src/llama.cpp/models/.editorconfig +1 -0
  217. package/src/llama.cpp/models/ggml-vocab-aquila.gguf +0 -0
  218. package/src/llama.cpp/models/ggml-vocab-baichuan.gguf +0 -0
  219. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf +0 -0
  220. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.inp +112 -0
  221. package/src/llama.cpp/models/ggml-vocab-bert-bge.gguf.out +46 -0
  222. package/src/llama.cpp/models/ggml-vocab-command-r.gguf +0 -0
  223. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.inp +112 -0
  224. package/src/llama.cpp/models/ggml-vocab-command-r.gguf.out +46 -0
  225. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf +0 -0
  226. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.inp +112 -0
  227. package/src/llama.cpp/models/ggml-vocab-deepseek-coder.gguf.out +46 -0
  228. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf +0 -0
  229. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.inp +112 -0
  230. package/src/llama.cpp/models/ggml-vocab-deepseek-llm.gguf.out +46 -0
  231. package/src/llama.cpp/models/ggml-vocab-falcon.gguf +0 -0
  232. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.inp +112 -0
  233. package/src/llama.cpp/models/ggml-vocab-falcon.gguf.out +46 -0
  234. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf +0 -0
  235. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.inp +112 -0
  236. package/src/llama.cpp/models/ggml-vocab-gpt-2.gguf.out +46 -0
  237. package/src/llama.cpp/models/ggml-vocab-gpt-neox.gguf +0 -0
  238. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf +0 -0
  239. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.inp +112 -0
  240. package/src/llama.cpp/models/ggml-vocab-llama-bpe.gguf.out +46 -0
  241. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf +0 -0
  242. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.inp +112 -0
  243. package/src/llama.cpp/models/ggml-vocab-llama-spm.gguf.out +46 -0
  244. package/src/llama.cpp/models/ggml-vocab-mpt.gguf +0 -0
  245. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.inp +112 -0
  246. package/src/llama.cpp/models/ggml-vocab-mpt.gguf.out +46 -0
  247. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf +0 -0
  248. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.inp +112 -0
  249. package/src/llama.cpp/models/ggml-vocab-phi-3.gguf.out +46 -0
  250. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf +0 -0
  251. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.inp +112 -0
  252. package/src/llama.cpp/models/ggml-vocab-qwen2.gguf.out +46 -0
  253. package/src/llama.cpp/models/ggml-vocab-refact.gguf +0 -0
  254. package/src/llama.cpp/models/ggml-vocab-refact.gguf.inp +112 -0
  255. package/src/llama.cpp/models/ggml-vocab-refact.gguf.out +46 -0
  256. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf +0 -0
  257. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.inp +112 -0
  258. package/src/llama.cpp/models/ggml-vocab-starcoder.gguf.out +46 -0
  259. package/src/llama.cpp/pocs/vdot/CMakeLists.txt +2 -2
  260. package/src/llama.cpp/requirements/requirements-all.txt +12 -0
  261. package/src/llama.cpp/requirements/requirements-compare-llama-bench.txt +2 -0
  262. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf.txt +3 -0
  263. package/src/llama.cpp/requirements/requirements-convert_hf_to_gguf_update.txt +3 -0
  264. package/src/llama.cpp/requirements/{requirements-convert.txt → requirements-convert_legacy_llama.txt} +1 -1
  265. package/src/llama.cpp/requirements/requirements-convert_llama_ggml_to_gguf.txt +1 -0
  266. package/src/llama.cpp/requirements/requirements-convert_lora_to_gguf.txt +2 -0
  267. package/src/llama.cpp/requirements/requirements-pydantic.txt +3 -0
  268. package/src/llama.cpp/requirements/requirements-test-tokenizer-random.txt +1 -0
  269. package/src/llama.cpp/requirements.txt +5 -4
  270. package/src/llama.cpp/scripts/build-info.sh +30 -0
  271. package/src/llama.cpp/scripts/install-oneapi.bat +19 -0
  272. package/src/llama.cpp/src/CMakeLists.txt +33 -0
  273. package/src/llama.cpp/src/llama-grammar.cpp +539 -0
  274. package/src/llama.cpp/src/llama-grammar.h +39 -0
  275. package/src/llama.cpp/src/llama-impl.h +26 -0
  276. package/src/llama.cpp/src/llama-sampling.cpp +635 -0
  277. package/src/llama.cpp/src/llama-sampling.h +56 -0
  278. package/src/llama.cpp/src/llama-vocab.cpp +1721 -0
  279. package/src/llama.cpp/src/llama-vocab.h +130 -0
  280. package/src/llama.cpp/{llama.cpp → src/llama.cpp} +5979 -5260
  281. package/src/llama.cpp/{unicode-data.cpp → src/unicode-data.cpp} +851 -802
  282. package/src/llama.cpp/{unicode.cpp → src/unicode.cpp} +52 -30
  283. package/src/llama.cpp/{unicode.h → src/unicode.h} +5 -1
  284. package/src/llama.cpp/tests/CMakeLists.txt +19 -20
  285. package/src/llama.cpp/tests/test-backend-ops.cpp +245 -67
  286. package/src/llama.cpp/tests/test-chat-template.cpp +57 -3
  287. package/src/llama.cpp/tests/test-double-float.cpp +2 -2
  288. package/src/llama.cpp/tests/test-grad0.cpp +2 -2
  289. package/src/llama.cpp/tests/test-grammar-integration.cpp +978 -31
  290. package/src/llama.cpp/tests/test-grammar-parser.cpp +423 -158
  291. package/src/llama.cpp/tests/test-json-schema-to-grammar.cpp +508 -135
  292. package/src/llama.cpp/tests/test-llama-grammar.cpp +15 -9
  293. package/src/llama.cpp/tests/test-quantize-fns.cpp +1 -1
  294. package/src/llama.cpp/tests/test-quantize-perf.cpp +1 -1
  295. package/src/llama.cpp/tests/test-rope.cpp +3 -4
  296. package/src/llama.cpp/tests/test-sampling.cpp +5 -5
  297. package/src/llama.cpp/tests/test-tokenizer-0.cpp +6 -6
  298. package/src/llama.cpp/tests/test-tokenizer-1-bpe.cpp +20 -15
  299. package/src/llama.cpp/tests/test-tokenizer-1-spm.cpp +22 -11
  300. package/bin/darwin/arm64/default.metallib +0 -0
  301. package/bin/darwin/x64/default.metallib +0 -0
  302. package/src/llama.cpp/examples/beam-search/CMakeLists.txt +0 -5
  303. package/src/llama.cpp/examples/beam-search/beam-search.cpp +0 -188
  304. package/src/llama.cpp/examples/finetune/finetune.cpp +0 -1862
  305. package/src/llama.cpp/examples/llama.android/llama/CMakeLists.txt +0 -55
  306. package/src/llama.cpp/examples/train-text-from-scratch/CMakeLists.txt +0 -5
  307. package/src/llama.cpp/examples/train-text-from-scratch/train-text-from-scratch.cpp +0 -1253
  308. package/src/llama.cpp/ggml-opencl.cpp +0 -2305
  309. package/src/llama.cpp/ggml-opencl.h +0 -36
  310. package/src/llama.cpp/ggml-sycl.cpp +0 -17340
  311. package/src/llama.cpp/ggml-vulkan-shaders.hpp +0 -81211
  312. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf-update.txt +0 -2
  313. package/src/llama.cpp/requirements/requirements-convert-hf-to-gguf.txt +0 -2
  314. package/src/llama.cpp/requirements/requirements-convert-llama-ggml-to-gguf.txt +0 -1
  315. package/src/llama.cpp/scripts/gen-build-info-cpp.cmake +0 -24
  316. /package/src/llama.cpp/{ggml-alloc.h → ggml/include/ggml-alloc.h} +0 -0
  317. /package/src/llama.cpp/{ggml-kompute.h → ggml/include/ggml-kompute.h} +0 -0
  318. /package/src/llama.cpp/{ggml-rpc.h → ggml/include/ggml-rpc.h} +0 -0
  319. /package/src/llama.cpp/{ggml-vulkan.h → ggml/include/ggml-vulkan.h} +0 -0
  320. /package/src/llama.cpp/{unicode-data.h → src/unicode-data.h} +0 -0
@@ -7,11 +7,14 @@
7
7
  #include "ggml.h"
8
8
  #include "llama.h"
9
9
  #include "grammar-parser.h"
10
+ #include "json-schema-to-grammar.h"
10
11
  #include "unicode.h"
11
12
  #include <cassert>
12
13
  #include <string>
13
14
  #include <vector>
14
15
 
16
+ using json = nlohmann::ordered_json;
17
+
15
18
  static llama_grammar* build_grammar(const std::string & grammar_str) {
16
19
  auto parsed_grammar = grammar_parser::parse(grammar_str.c_str());
17
20
 
@@ -31,31 +34,36 @@ static llama_grammar* build_grammar(const std::string & grammar_str) {
31
34
  static bool test_build_grammar_fails(const std::string & grammar_str) {
32
35
  fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
33
36
  bool grammar_fails = false;
34
- try {
35
- build_grammar(grammar_str);
37
+ llama_grammar * grammar = build_grammar(grammar_str);
38
+ if (grammar != nullptr) {
36
39
  fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
37
- } catch (const std::exception & err) {
40
+ } else {
38
41
  grammar_fails = true;
39
42
  fprintf(stdout, " ✅︎\n");
40
43
  }
41
44
  return grammar_fails;
42
45
  }
43
46
 
44
- static bool match_string(const std::string & input, llama_grammar* grammar) {
47
+ static bool match_string(const std::string & input, llama_grammar * grammar) {
45
48
  auto decoded = decode_utf8(input, {});
46
49
 
47
50
  const auto & code_points = decoded.first;
48
51
 
52
+ const llama_grammar_rules & rules = llama_grammar_get_rules (grammar);
53
+ llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
54
+
49
55
  for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
50
- auto prev_stacks = grammar->stacks;
51
- llama_grammar_accept(grammar->rules, prev_stacks, *it, grammar->stacks);
52
- if (grammar->stacks.empty()) {
56
+ const llama_grammar_stacks prev_stacks = llama_grammar_get_stacks(grammar); // copy
57
+
58
+ llama_grammar_accept(rules, prev_stacks, *it, cur_stacks);
59
+
60
+ if (cur_stacks.empty()) {
53
61
  // no stacks means that the grammar failed to match at this point
54
62
  return false;
55
63
  }
56
64
  }
57
65
 
58
- for (const auto & stack : grammar->stacks) {
66
+ for (const auto & stack : cur_stacks) {
59
67
  if (stack.empty()) {
60
68
  // An empty stack means that the grammar has been completed
61
69
  return true;
@@ -65,14 +73,16 @@ static bool match_string(const std::string & input, llama_grammar* grammar) {
65
73
  return false;
66
74
  }
67
75
 
68
- static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
69
- fprintf(stderr, "⚫ Testing %s. Grammar: %s\n", test_desc.c_str(), grammar_str.c_str());
76
+ static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
77
+ fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
70
78
  fflush(stderr);
71
79
 
72
80
  auto grammar = build_grammar(grammar_str);
73
81
 
74
82
  // Save the original grammar stacks so that we can reset after every new string we want to test
75
- auto original_stacks = grammar->stacks;
83
+ const llama_grammar_stacks original_stacks = llama_grammar_get_stacks(grammar);
84
+
85
+ llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
76
86
 
77
87
  fprintf(stderr, " 🔵 Valid strings:\n");
78
88
 
@@ -85,6 +95,23 @@ static void test_grammar(const std::string & test_desc, const std::string & gram
85
95
 
86
96
  if (!matched) {
87
97
  fprintf(stderr, "❌ (failed to match)\n");
98
+
99
+ // DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
100
+ // DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
101
+ FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
102
+ if (grammar_file) {
103
+ fprintf(grammar_file, "%s", grammar_str.c_str());
104
+ fclose(grammar_file);
105
+ }
106
+
107
+ // DEBUG: Write the test string to test-grammar-integration.string.txt
108
+ FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
109
+ if (string_file) {
110
+ fprintf(string_file, "%s", test_string.c_str());
111
+ fclose(string_file);
112
+ }
113
+
114
+ fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
88
115
  } else {
89
116
  fprintf(stdout, "✅︎\n");
90
117
  }
@@ -92,7 +119,7 @@ static void test_grammar(const std::string & test_desc, const std::string & gram
92
119
  assert(matched);
93
120
 
94
121
  // Reset the grammar stacks
95
- grammar->stacks = original_stacks;
122
+ cur_stacks = original_stacks;
96
123
  }
97
124
 
98
125
  fprintf(stderr, " 🟠 Invalid strings:\n");
@@ -112,14 +139,264 @@ static void test_grammar(const std::string & test_desc, const std::string & gram
112
139
  assert(!matched);
113
140
 
114
141
  // Reset the grammar stacks
115
- grammar->stacks = original_stacks;
142
+ cur_stacks = original_stacks;
116
143
  }
117
144
 
118
145
  // Clean up allocated memory
119
146
  llama_grammar_free(grammar);
120
147
  }
148
+ static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
149
+ test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
150
+ }
151
+ static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
152
+ test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str)), passing_strings, failing_strings);
153
+ }
121
154
 
122
155
  static void test_simple_grammar() {
156
+ test_schema(
157
+ "min 0",
158
+ R"""({
159
+ "type": "integer",
160
+ "minimum": 0
161
+ })""",
162
+ // Passing strings
163
+ {
164
+ "0",
165
+ "10",
166
+ "12",
167
+ "10000",
168
+ },
169
+ // Failing strings
170
+ {
171
+ "-1",
172
+ "-10",
173
+ "-10000",
174
+ "-100000000000000000000000000000000",
175
+ "100000000000000000000000000000000",
176
+ "00",
177
+ "01",
178
+ "-0",
179
+ }
180
+ );
181
+ test_schema(
182
+ "min 2",
183
+ // Schema
184
+ R"""({
185
+ "type": "integer",
186
+ "minimum": 2
187
+ })""",
188
+ // Passing strings
189
+ {
190
+ "2",
191
+ "3",
192
+ "4",
193
+ "10",
194
+ "20",
195
+ "1234567890000000",
196
+ },
197
+ // Failing strings
198
+ {
199
+ "0",
200
+ "1",
201
+ "-1",
202
+ "-100",
203
+ "0",
204
+ "1",
205
+ "01",
206
+ "02",
207
+ "12345678900000000",
208
+ }
209
+ );
210
+ test_schema(
211
+ "min 456",
212
+ R"""({
213
+ "type": "integer",
214
+ "minimum": 456
215
+ })""",
216
+ // Passing strings
217
+ {
218
+ "456",
219
+ "4560",
220
+ "457",
221
+ "460",
222
+ "500",
223
+ },
224
+ // Failing strings
225
+ {
226
+ "455",
227
+ "356",
228
+ "50",
229
+ "050",
230
+ "-1",
231
+ "-456",
232
+ }
233
+ );
234
+ test_schema(
235
+ "min -123",
236
+ R"""({
237
+ "type": "integer",
238
+ "minimum": -123
239
+ })""",
240
+ // Passing strings
241
+ {
242
+ "-123",
243
+ "-122",
244
+ "-11",
245
+ "-1",
246
+ "0",
247
+ "1",
248
+ "123",
249
+ "1234",
250
+ "2345",
251
+ },
252
+ // Failing strings
253
+ {
254
+ "-1234",
255
+ "-124",
256
+ }
257
+ );
258
+
259
+ test_schema(
260
+ "max 9999",
261
+ // Schema
262
+ R"""({
263
+ "type": "integer",
264
+ "maximum": 9999
265
+ })""",
266
+ // Passing strings
267
+ {
268
+ "-99999",
269
+ "0",
270
+ "9999",
271
+ },
272
+ // Failing strings
273
+ {
274
+ "10000",
275
+ "99991",
276
+ }
277
+ );
278
+ test_schema(
279
+ "max -9999",
280
+ // Schema
281
+ R"""({
282
+ "type": "integer",
283
+ "maximum": -9999
284
+ })""",
285
+ // Passing strings
286
+ {
287
+ "-10000",
288
+ "-9999",
289
+ },
290
+ // Failing strings
291
+ {
292
+ "-9998",
293
+ "0",
294
+ "9999",
295
+ }
296
+ );
297
+ test_schema(
298
+ "min 5 max 30",
299
+ // Schema
300
+ R"""({
301
+ "type": "integer",
302
+ "minimum": 5,
303
+ "maximum": 30
304
+ })""",
305
+ // Passing strings
306
+ {
307
+ "5",
308
+ "10",
309
+ "30",
310
+ },
311
+ // Failing strings
312
+ {
313
+ "05",
314
+ "4",
315
+ "-1",
316
+ "31",
317
+ "123",
318
+ "0123",
319
+ }
320
+ );
321
+ test_schema(
322
+ "min -1 max 1",
323
+ R"""({
324
+ "type": "integer",
325
+ "minimum": -1,
326
+ "maximum": 1
327
+ })""",
328
+ // Passing strings
329
+ {
330
+ "-1",
331
+ "0",
332
+ "1",
333
+ },
334
+ // Failing strings
335
+ {
336
+ "-11",
337
+ "-10",
338
+ "-2",
339
+ "2",
340
+ "10",
341
+ "11",
342
+ }
343
+ );
344
+ test_schema(
345
+ "min -123 max 42",
346
+ R"""({
347
+ "type": "integer",
348
+ "minimum": -123,
349
+ "maximum": 42
350
+ })""",
351
+ // Passing strings
352
+ {
353
+ "-123",
354
+ "-122",
355
+ "-13",
356
+ "-11",
357
+ "-2",
358
+ "-1",
359
+ "0",
360
+ "1",
361
+ "5",
362
+ "10",
363
+ "39",
364
+ "40",
365
+ "42",
366
+ },
367
+ // Failing strings
368
+ {
369
+ "-0123",
370
+ "-124",
371
+ "-1123",
372
+ "-200",
373
+ "43",
374
+ "123",
375
+ "0123",
376
+ }
377
+ );
378
+ test_schema(
379
+ "exclusive min / max",
380
+ // Schema
381
+ R"""({
382
+ "type": "integer",
383
+ "exclusiveMinimum": 0,
384
+ "exclusiveMaximum": 10000
385
+ })""",
386
+ // Passing strings
387
+ {
388
+ "1",
389
+ "9999",
390
+ },
391
+ // Failing strings
392
+ {
393
+ "0",
394
+ "01",
395
+ "10000",
396
+ "99999",
397
+ }
398
+ );
399
+
123
400
  // Test case for a simple grammar
124
401
  test_grammar(
125
402
  "simple grammar",
@@ -205,6 +482,33 @@ static void test_complex_grammar() {
205
482
  );
206
483
  }
207
484
 
485
+ static void test_special_chars() {
486
+ // A collection of tests to exercise special characters such as "."
487
+ test_grammar(
488
+ "special characters",
489
+ // Grammar
490
+ R"""(
491
+ root ::= ... "abc" ...
492
+ )""",
493
+ // Passing strings
494
+ {
495
+ "abcabcabc",
496
+ "aaaabcccc",
497
+ // NOTE: Also ensures that multi-byte characters still count as a single character
498
+ "🔵🟠✅abc❌🟠🔵"
499
+ },
500
+ // Failing strings
501
+ {
502
+ "aaabcccc",
503
+ "aaaaabcccc",
504
+ "aaaabccc",
505
+ "aaaabccccc",
506
+ "🔵🟠✅❌abc❌✅🟠🔵"
507
+ "🔵🟠abc🟠🔵"
508
+ }
509
+ );
510
+ }
511
+
208
512
  static void test_quantifiers() {
209
513
  // A collection of tests to exercise * + and ? quantifiers
210
514
 
@@ -292,15 +596,92 @@ static void test_quantifiers() {
292
596
  "catyyy",
293
597
  }
294
598
  );
599
+ test_grammar(
600
+ "simple exact repetition",
601
+ // Grammar
602
+ R"""(
603
+ root ::= [ab]{4}
604
+ )""",
605
+ // Passing strings
606
+ {
607
+ "aaaa",
608
+ "bbbb",
609
+ "abab",
610
+ },
611
+ // Failing strings
612
+ {
613
+ "a",
614
+ "b",
615
+ "aaaaa",
616
+ }
617
+ );
618
+ test_grammar(
619
+ "simple min repetition",
620
+ // Grammar
621
+ R"""(
622
+ root ::= [ab]{4,}
623
+ )""",
624
+ // Passing strings
625
+ {
626
+ "aaaa",
627
+ "aaaaab",
628
+ "bbbb",
629
+ "ababab",
630
+ },
631
+ // Failing strings
632
+ {
633
+ "",
634
+ "aba",
635
+ }
636
+ );
637
+ test_grammar(
638
+ "simple max repetition",
639
+ // Grammar
640
+ R"""(
641
+ root ::= [ab]{0,4}
642
+ )""",
643
+ // Passing strings
644
+ {
645
+ "",
646
+ "a",
647
+ "aa",
648
+ "aaa",
649
+ "aaab",
650
+ },
651
+ // Failing strings
652
+ {
653
+ "aaaaa",
654
+ }
655
+ );
656
+ test_grammar(
657
+ "min / max repetition",
658
+ // Grammar
659
+ R"""(
660
+ root ::= ("0x" [A-F0-9]{2} " "?){3,5}
661
+ )""",
662
+ // Passing strings
663
+ {
664
+ "0xFF 0x12 0xAB",
665
+ "0xFF 0x12 0xAB 0x00 0x00",
666
+ },
667
+ // Failing strings
668
+ {
669
+ "",
670
+ "0xFF",
671
+ "0xFF 0x12",
672
+ "0xFF 0x12 0xAB 0x00 0x00 0x00",
673
+ }
674
+ );
295
675
  }
296
676
 
297
677
  static void test_failure_missing_root() {
298
678
  fprintf(stderr, "⚫ Testing missing root node:\n");
299
679
  // Test case for a grammar that is missing a root rule
300
- const std::string grammar_str = R"""(rot ::= expr
301
- expr ::= term ("+" term)*
302
- term ::= number
303
- number ::= [0-9]+)""";
680
+ const std::string grammar_str = R"""(
681
+ rot ::= expr
682
+ expr ::= term ("+" term)*
683
+ term ::= number
684
+ number ::= [0-9]+)""";
304
685
 
305
686
  grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
306
687
 
@@ -317,10 +698,10 @@ static void test_failure_missing_reference() {
317
698
 
318
699
  // Test case for a grammar that is missing a referenced rule
319
700
  const std::string grammar_str =
320
- R"""(root ::= expr
321
- expr ::= term ("+" term)*
322
- term ::= numero
323
- number ::= [0-9]+)""";
701
+ R"""(root ::= expr
702
+ expr ::= term ("+" term)*
703
+ term ::= numero
704
+ number ::= [0-9]+)""";
324
705
 
325
706
  fprintf(stderr, " Expected error: ");
326
707
 
@@ -342,37 +723,603 @@ static void test_failure_left_recursion() {
342
723
 
343
724
  // Test more complicated left recursion detection
344
725
  const std::string medium_str = R"""(
345
- root ::= asdf
346
- asdf ::= "a" | asdf "a"
347
- )""";
726
+ root ::= asdf
727
+ asdf ::= "a" | asdf "a"
728
+ )""";
348
729
  assert(test_build_grammar_fails(medium_str));
349
730
 
350
731
  // Test even more complicated left recursion detection
351
732
  const std::string hard_str = R"""(
352
- root ::= asdf
353
- asdf ::= "a" | foo "b"
354
- foo ::= "c" | asdf "d" | "e")""";
733
+ root ::= asdf
734
+ asdf ::= "a" | foo "b"
735
+ foo ::= "c" | asdf "d" | "e")""";
355
736
  assert(test_build_grammar_fails(hard_str));
356
737
 
357
738
  // Test yet even more complicated left recursion detection
358
739
  const std::string hardest_str = R"""(
359
- root ::= asdf
360
- asdf ::= "a" | foo "b"
361
- foo ::= "c" | empty asdf "d" | "e"
362
- empty ::= "blah" | )""";
740
+ root ::= asdf
741
+ asdf ::= "a" | foo "b"
742
+ foo ::= "c" | empty asdf "d" | "e"
743
+ empty ::= "blah" | )""";
363
744
  assert(test_build_grammar_fails(hardest_str));
364
745
 
365
746
  fprintf(stderr, " ✅︎ Passed\n");
366
747
  }
367
748
 
749
+ static void test_json_schema() {
750
+ // Note that this is similar to the regular grammar tests,
751
+ // but we convert each json schema to a grammar before parsing.
752
+ // Otherwise, this test structure is the same.
753
+
754
+ test_schema(
755
+ "empty schema (object)",
756
+ // Schema
757
+ R"""(
758
+ {}
759
+ )""",
760
+ // Passing strings
761
+ {
762
+ R"""({})""",
763
+ R"""({"foo": "bar"})""",
764
+ },
765
+ // Failing strings
766
+ {
767
+ "",
768
+ "[]",
769
+ "null",
770
+ R"""("")""",
771
+ "true",
772
+ }
773
+ );
774
+
775
+ test_schema(
776
+ "exotic formats (list)",
777
+ // Schema
778
+ R"""({
779
+ "items": [
780
+ { "format": "date" },
781
+ { "format": "uuid" },
782
+ { "format": "time" },
783
+ { "format": "date-time" }
784
+ ]
785
+ })""",
786
+ // Passing strings
787
+ {
788
+ // "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
789
+ // "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
790
+ R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
791
+ //R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
792
+ //R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
793
+ },
794
+ // Failing strings
795
+ {
796
+ R"""(["foo", "bar"])""",
797
+ R"""(["12345678-1234-1234-1234-1234567890ab"])""",
798
+ }
799
+ );
800
+
801
+ test_schema(
802
+ "string",
803
+ // Schema
804
+ R"""({
805
+ "type": "string"
806
+ })""",
807
+ // Passing strings
808
+ {
809
+ R"""("foo")""",
810
+ R"""("bar")""",
811
+ R"""("")""",
812
+ },
813
+ // Failing strings
814
+ {
815
+ R"""({})""",
816
+ R"""("foo": "bar")""",
817
+ }
818
+ );
819
+
820
+ test_schema(
821
+ "string w/ min length 1",
822
+ // Schema
823
+ R"""({
824
+ "type": "string",
825
+ "minLength": 1
826
+ })""",
827
+ // Passing strings
828
+ {
829
+ R"""("foo")""",
830
+ R"""("bar")""",
831
+ },
832
+ // Failing strings
833
+ {
834
+ R"""("")""",
835
+ R"""({})""",
836
+ R"""("foo": "bar")""",
837
+ }
838
+ );
839
+
840
+ test_schema(
841
+ "string w/ min length 3",
842
+ // Schema
843
+ R"""({
844
+ "type": "string",
845
+ "minLength": 3
846
+ })""",
847
+ // Passing strings
848
+ {
849
+ R"""("foo")""",
850
+ R"""("bar")""",
851
+ R"""("foobar")""",
852
+ },
853
+ // Failing strings
854
+ {
855
+ R"""("")""",
856
+ R"""("f")""",
857
+ R"""("fo")""",
858
+ }
859
+ );
860
+
861
+ test_schema(
862
+ "string w/ max length",
863
+ // Schema
864
+ R"""({
865
+ "type": "string",
866
+ "maxLength": 3
867
+ })""",
868
+ // Passing strings
869
+ {
870
+ R"""("foo")""",
871
+ R"""("bar")""",
872
+ R"""("")""",
873
+ R"""("f")""",
874
+ R"""("fo")""",
875
+ },
876
+ // Failing strings
877
+ {
878
+ R"""("foobar")""",
879
+ }
880
+ );
881
+
882
+ test_schema(
883
+ "string w/ min & max length",
884
+ // Schema
885
+ R"""({
886
+ "type": "string",
887
+ "minLength": 1,
888
+ "maxLength": 4
889
+ })""",
890
+ // Passing strings
891
+ {
892
+ R"""("foo")""",
893
+ R"""("bar")""",
894
+ R"""("f")""",
895
+ R"""("barf")""",
896
+ },
897
+ // Failing strings
898
+ {
899
+ R"""("")""",
900
+ R"""("barfo")""",
901
+ R"""("foobar")""",
902
+ }
903
+ );
904
+
905
+ test_schema(
906
+ "boolean",
907
+ // Schema
908
+ R"""({
909
+ "type": "boolean"
910
+ })""",
911
+ // Passing strings
912
+ {
913
+ "true",
914
+ "false",
915
+ },
916
+ // Failing strings
917
+ {
918
+ R"""("")""",
919
+ R"""("true")""",
920
+ R"""(True)""",
921
+ R"""(FALSE)""",
922
+ }
923
+ );
924
+
925
+ test_schema(
926
+ "integer",
927
+ // Schema
928
+ R"""({
929
+ "type": "integer"
930
+ })""",
931
+ // Passing strings
932
+ {
933
+ R"""(0)""",
934
+ R"""(12345)""",
935
+ R"""(1234567890123456)""",
936
+ },
937
+ // Failing strings
938
+ {
939
+ R"""()""",
940
+ R"""(01)""",
941
+ R"""(007)""",
942
+ R"""(12345678901234567 )""",
943
+ }
944
+ );
945
+
946
+ test_schema(
947
+ "string const",
948
+ // Schema
949
+ R"""({
950
+ "const": "foo"
951
+ })""",
952
+ // Passing strings
953
+ {
954
+ R"""("foo")""",
955
+ },
956
+ // Failing strings
957
+ {
958
+ R"""(foo)""",
959
+ R"""("bar")""",
960
+ }
961
+ );
962
+
963
+ test_schema(
964
+ "non-string const",
965
+ // Schema
966
+ R"""({
967
+ "const": true
968
+ })""",
969
+ // Passing strings
970
+ {
971
+ R"""(true)""",
972
+ },
973
+ // Failing strings
974
+ {
975
+ R"""()""",
976
+ R"""(foo)""",
977
+ R"""("true")""",
978
+ }
979
+ );
980
+
981
+ test_schema(
982
+ "non-string const",
983
+ // Schema
984
+ R"""({
985
+ "enum": ["red", "amber", "green", null, 42, ["foo"]]
986
+ })""",
987
+ // Passing strings
988
+ {
989
+ R"""("red")""",
990
+ R"""(null)""",
991
+ R"""(42)""",
992
+ R"""(["foo"])""",
993
+ },
994
+ // Failing strings
995
+ {
996
+ R"""()""",
997
+ R"""(420)""",
998
+ R"""(true)""",
999
+ R"""(foo)""",
1000
+ }
1001
+ );
1002
+
1003
+ test_schema(
1004
+ "simple pattern",
1005
+ // Schema
1006
+ R"""({
1007
+ "pattern": "^[a-zA-Z0-9_-]*$"
1008
+ })""",
1009
+ // Passing strings
1010
+ {
1011
+ R"""("")""",
1012
+ R"""("He_llo-12")""",
1013
+ },
1014
+ // Failing strings
1015
+ {
1016
+ R"""("!")""",
1017
+ R"""("Hello World")""",
1018
+ }
1019
+ );
1020
+
1021
+ test_schema(
1022
+ "pattern with escapes",
1023
+ // Schema
1024
+ R"""({
1025
+ "pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
1026
+ })""",
1027
+ // Passing strings
1028
+ {
1029
+ R"""("a^$.[]()|{}*+?b")""",
1030
+ },
1031
+ // Failing strings
1032
+ {
1033
+ R"""("ab")""",
1034
+ }
1035
+ );
1036
+
1037
+ test_schema(
1038
+ "",
1039
+ // Schema
1040
+ R"""(
1041
+ {
1042
+ "type": ["array", "null"],
1043
+ "items": { "type": "string" }
1044
+ }
1045
+ )""",
1046
+ // Passing strings
1047
+ {
1048
+ "null",
1049
+ "[]",
1050
+ "[\"123\"]",
1051
+ "[\"foo\", \"bar\"]",
1052
+ },
1053
+ // Failing strings
1054
+ {
1055
+ "",
1056
+ "[123]",
1057
+ "\"foo\"",
1058
+ "[\"foo\", 42]",
1059
+ }
1060
+ );
1061
+
1062
+ test_schema(
1063
+ "min+max items",
1064
+ // Schema
1065
+ R"""({
1066
+ "items": {
1067
+ "type": ["number", "integer"]
1068
+ },
1069
+ "minItems": 3,
1070
+ "maxItems": 5
1071
+ })""",
1072
+ // Passing strings
1073
+ {
1074
+ R"""([1, 2, 3])""",
1075
+ R"""([1, 2, 3, 4])""",
1076
+ R"""([1, 2, 3, 4, 5])""",
1077
+ },
1078
+ // Failing strings
1079
+ {
1080
+ R"""([1, 2])""",
1081
+ R"""([1, 2, 3, 4, 5, 6])""",
1082
+ R"""(1)""",
1083
+ }
1084
+ );
1085
+
1086
+ // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
1087
+ test_schema(
1088
+ "object properties",
1089
+ // Schema
1090
+ R"""({
1091
+ "type": "object",
1092
+ "properties": {
1093
+ "number": { "type": "number" },
1094
+ "street_name": { "type": "string" },
1095
+ "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
1096
+ }
1097
+ })""",
1098
+ // Passing strings
1099
+ {
1100
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1101
+ // "By default, leaving out properties is valid"
1102
+ R"""({ "street_name": "Pennsylvania" })""",
1103
+ R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
1104
+ // "By extension, even an empty object is valid"
1105
+ R"""({})""",
1106
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
1107
+ },
1108
+ // Failing strings
1109
+ {
1110
+ // Change datatype from number to string
1111
+ R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1112
+ // Reorder properties
1113
+ R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
1114
+ // Reorder properties
1115
+ R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1116
+ // "Additional properties default to false for generation, even though the spec says true.
1117
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
1118
+
1119
+ }
1120
+ );
1121
+
1122
+ test_schema(
1123
+ "additional properties can't override other properties",
1124
+ R"""({
1125
+ "properties": {
1126
+ "a": {"type": "integer"},
1127
+ "b": {"type": "integer"}
1128
+ },
1129
+ "additionalProperties": true
1130
+ })""",
1131
+ // Passing strings
1132
+ {
1133
+ R"""({"a": 42})""",
1134
+ R"""({"c": ""})""",
1135
+ R"""({"a": 42, "c": ""})""",
1136
+ R"""({"a_": ""})""",
1137
+ },
1138
+ // Failing strings
1139
+ {
1140
+ R"""()""",
1141
+ R"""({"a": ""})""",
1142
+ R"""({"a": "", "b": ""})""",
1143
+ }
1144
+ );
1145
+
1146
+ // Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
1147
+ test_schema(
1148
+ "object properties, additionalProperties: true",
1149
+ // Schema
1150
+ R"""({
1151
+ "type": "object",
1152
+ "properties": {
1153
+ "number": { "type": "number" },
1154
+ "street_name": { "type": "string" },
1155
+ "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
1156
+ },
1157
+ "additionalProperties": true
1158
+ })""",
1159
+ // Passing strings
1160
+ {
1161
+ // "By extension, even an empty object is valid"
1162
+ R"""({})""",
1163
+ R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
1164
+ // "By default, leaving out properties is valid"
1165
+ R"""({ "street_name": "Pennsylvania" })""",
1166
+ R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
1167
+ // "By default, providing additional properties is valid"
1168
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
1169
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
1170
+ },
1171
+ // Failing strings
1172
+ {
1173
+ // Change datatype from number to string
1174
+ R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1175
+ // Reorder properties
1176
+ R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
1177
+ }
1178
+ );
1179
+
1180
+ // Additional properties: false
1181
+ test_schema(
1182
+ "required + optional props each in original order",
1183
+ // Schema
1184
+ R"""({
1185
+ "type": "object",
1186
+ "properties": {
1187
+ "number": { "type": "number" },
1188
+ "street_name": { "type": "string" },
1189
+ "street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
1190
+ },
1191
+ "additionalProperties": false
1192
+ })""",
1193
+ // Passing strings
1194
+ {
1195
+ R"""({ "street_name": "Pennsylvania" })""",
1196
+ R"""({ "number": 1600, "street_type":"Avenue"})""",
1197
+ R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
1198
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
1199
+ // Spaces are permitted around enum values
1200
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
1201
+ },
1202
+ // Failing strings
1203
+ {
1204
+ // Reorder properties
1205
+ R"""({ "street_type": "Avenue", "number": 1600 })""",
1206
+ // Add "direction"
1207
+ R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
1208
+ }
1209
+ );
1210
+
1211
+ test_schema(
1212
+ "required + optional props each in original order",
1213
+ // Schema
1214
+ R"""({
1215
+ "properties": {
1216
+ "b": {"type": "string"},
1217
+ "a": {"type": "string"},
1218
+ "d": {"type": "string"},
1219
+ "c": {"type": "string"}
1220
+ },
1221
+ "required": ["a", "b"],
1222
+ "additionalProperties": false
1223
+ })""",
1224
+ // Passing strings
1225
+ {
1226
+ R"""({"b": "foo", "a": "bar"})""",
1227
+ R"""({"b":"foo","a":"bar","d":"qux"})""",
1228
+ R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
1229
+ },
1230
+ // Failing strings
1231
+ {
1232
+ R"""({"a": "foo", "b": "bar"})""",
1233
+ R"""({"b": "bar"})""",
1234
+ R"""({"a": "foo", "c": "baz"})""",
1235
+ R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
1236
+ }
1237
+ );
1238
+
1239
+ // NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
1240
+ test_schema(
1241
+ "required props",
1242
+ // Schema
1243
+ R"""({
1244
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
1245
+ "$id": "https://example.com/product.schema.json",
1246
+ "title": "Product",
1247
+ "description": "A product from Acme's catalog",
1248
+ "type": "object",
1249
+ "properties": {
1250
+ "productId": {
1251
+ "description": "The unique identifier for a product",
1252
+ "type": "integer"
1253
+ },
1254
+ "productName": {
1255
+ "description": "Name of the product",
1256
+ "type": "string"
1257
+ },
1258
+ "price": {
1259
+ "description": "The price of the product",
1260
+ "type": "number",
1261
+ "exclusiveMinimum": 0
1262
+ },
1263
+ "tags": {
1264
+ "description": "Tags for the product",
1265
+ "type": "array",
1266
+ "items": {
1267
+ "type": "string"
1268
+ },
1269
+ "minItems": 1,
1270
+ "uniqueItems": true
1271
+ },
1272
+ "dimensions": {
1273
+ "type": "object",
1274
+ "properties": {
1275
+ "length": {
1276
+ "type": "number"
1277
+ },
1278
+ "width": {
1279
+ "type": "number"
1280
+ },
1281
+ "height": {
1282
+ "type": "number"
1283
+ }
1284
+ },
1285
+ "required": [ "length", "width", "height" ]
1286
+ }
1287
+ },
1288
+ "required": [ "productId", "productName", "price" ]
1289
+ })""",
1290
+ // Passing strings
1291
+ {
1292
+ R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
1293
+ R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
1294
+ R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
1295
+ },
1296
+ // Failing strings
1297
+ {
1298
+ R"""({})""", // Missing all required properties
1299
+ R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
1300
+ // TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
1301
+ // Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
1302
+ // R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
1303
+ R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
1304
+ R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
1305
+ R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
1306
+ R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
1307
+ // TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
1308
+ // R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
1309
+ }
1310
+ );
1311
+ }
1312
+
368
1313
  int main() {
369
1314
  fprintf(stdout, "Running grammar integration tests...\n");
370
1315
  test_simple_grammar();
371
1316
  test_complex_grammar();
1317
+ test_special_chars();
372
1318
  test_quantifiers();
373
1319
  test_failure_missing_root();
374
1320
  test_failure_missing_reference();
375
1321
  test_failure_left_recursion();
1322
+ test_json_schema();
376
1323
  fprintf(stdout, "All tests passed.\n");
377
1324
  return 0;
378
1325
  }