node-llama-cpp 3.0.0-beta.2 → 3.0.0-beta.20

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (639) hide show
  1. package/README.md +14 -11
  2. package/dist/ChatWrapper.d.ts +2 -15
  3. package/dist/ChatWrapper.js +28 -33
  4. package/dist/ChatWrapper.js.map +1 -1
  5. package/dist/apiDocsOverrides.d.ts +1 -0
  6. package/dist/apiDocsOverrides.js +5 -0
  7. package/dist/apiDocsOverrides.js.map +1 -0
  8. package/dist/{utils/getBin.d.ts → bindings/AddonTypes.d.ts} +54 -7
  9. package/dist/bindings/AddonTypes.js +2 -0
  10. package/dist/bindings/AddonTypes.js.map +1 -0
  11. package/dist/bindings/Llama.d.ts +47 -0
  12. package/dist/bindings/Llama.js +343 -0
  13. package/dist/bindings/Llama.js.map +1 -0
  14. package/dist/bindings/consts.d.ts +2 -0
  15. package/dist/bindings/consts.js +11 -0
  16. package/dist/bindings/consts.js.map +1 -0
  17. package/dist/bindings/getLlama.d.ts +145 -0
  18. package/dist/bindings/getLlama.js +389 -0
  19. package/dist/bindings/getLlama.js.map +1 -0
  20. package/dist/bindings/types.d.ts +55 -0
  21. package/dist/bindings/types.js +77 -0
  22. package/dist/bindings/types.js.map +1 -0
  23. package/dist/bindings/utils/MemoryOrchestrator.d.ts +21 -0
  24. package/dist/bindings/utils/MemoryOrchestrator.js +49 -0
  25. package/dist/bindings/utils/MemoryOrchestrator.js.map +1 -0
  26. package/dist/bindings/utils/NoBinaryFoundError.d.ts +2 -0
  27. package/dist/bindings/utils/NoBinaryFoundError.js +7 -0
  28. package/dist/bindings/utils/NoBinaryFoundError.js.map +1 -0
  29. package/dist/bindings/utils/asyncEvery.d.ts +5 -0
  30. package/dist/bindings/utils/asyncEvery.js +15 -0
  31. package/dist/bindings/utils/asyncEvery.js.map +1 -0
  32. package/dist/bindings/utils/asyncSome.d.ts +5 -0
  33. package/dist/bindings/utils/asyncSome.js +27 -0
  34. package/dist/bindings/utils/asyncSome.js.map +1 -0
  35. package/dist/{utils → bindings/utils}/binariesGithubRelease.js +1 -1
  36. package/dist/bindings/utils/binariesGithubRelease.js.map +1 -0
  37. package/dist/bindings/utils/clearAllLocalBuilds.d.ts +1 -0
  38. package/dist/bindings/utils/clearAllLocalBuilds.js +47 -0
  39. package/dist/bindings/utils/clearAllLocalBuilds.js.map +1 -0
  40. package/dist/bindings/utils/cloneLlamaCppRepo.d.ts +11 -0
  41. package/dist/bindings/utils/cloneLlamaCppRepo.js +166 -0
  42. package/dist/bindings/utils/cloneLlamaCppRepo.js.map +1 -0
  43. package/dist/bindings/utils/compileLLamaCpp.d.ts +15 -0
  44. package/dist/bindings/utils/compileLLamaCpp.js +221 -0
  45. package/dist/bindings/utils/compileLLamaCpp.js.map +1 -0
  46. package/dist/bindings/utils/detectAvailableComputeLayers.d.ts +14 -0
  47. package/dist/bindings/utils/detectAvailableComputeLayers.js +304 -0
  48. package/dist/bindings/utils/detectAvailableComputeLayers.js.map +1 -0
  49. package/dist/bindings/utils/detectGlibc.d.ts +4 -0
  50. package/dist/bindings/utils/detectGlibc.js +46 -0
  51. package/dist/bindings/utils/detectGlibc.js.map +1 -0
  52. package/dist/bindings/utils/getBestComputeLayersAvailable.d.ts +9 -0
  53. package/dist/bindings/utils/getBestComputeLayersAvailable.js +29 -0
  54. package/dist/bindings/utils/getBestComputeLayersAvailable.js.map +1 -0
  55. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.d.ts +5 -0
  56. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js +93 -0
  57. package/dist/bindings/utils/getBuildFolderNameForBuildOptions.js.map +1 -0
  58. package/dist/bindings/utils/getCanUsePrebuiltBinaries.d.ts +1 -0
  59. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js +8 -0
  60. package/dist/bindings/utils/getCanUsePrebuiltBinaries.js.map +1 -0
  61. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.d.ts +2 -0
  62. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js +21 -0
  63. package/dist/bindings/utils/getExampleUsageCodeOfGetLlama.js.map +1 -0
  64. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +11 -0
  65. package/dist/bindings/utils/getGpuTypesToUseForOption.js +30 -0
  66. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -0
  67. package/dist/bindings/utils/getLinuxDistroInfo.d.ts +9 -0
  68. package/dist/bindings/utils/getLinuxDistroInfo.js +46 -0
  69. package/dist/bindings/utils/getLinuxDistroInfo.js.map +1 -0
  70. package/dist/bindings/utils/getLlamaWithoutBackend.d.ts +5 -0
  71. package/dist/bindings/utils/getLlamaWithoutBackend.js +27 -0
  72. package/dist/bindings/utils/getLlamaWithoutBackend.js.map +1 -0
  73. package/dist/bindings/utils/getPlatform.d.ts +2 -0
  74. package/dist/bindings/utils/getPlatform.js +15 -0
  75. package/dist/bindings/utils/getPlatform.js.map +1 -0
  76. package/dist/bindings/utils/getPlatformInfo.d.ts +5 -0
  77. package/dist/bindings/utils/getPlatformInfo.js +28 -0
  78. package/dist/bindings/utils/getPlatformInfo.js.map +1 -0
  79. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.d.ts +3 -0
  80. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js +27 -0
  81. package/dist/bindings/utils/hasBuildingFromSourceDependenciesInstalled.js.map +1 -0
  82. package/dist/bindings/utils/hasFileInPath.d.ts +2 -0
  83. package/dist/bindings/utils/hasFileInPath.js +34 -0
  84. package/dist/bindings/utils/hasFileInPath.js.map +1 -0
  85. package/dist/bindings/utils/lastBuildInfo.d.ts +6 -0
  86. package/dist/bindings/utils/lastBuildInfo.js +17 -0
  87. package/dist/bindings/utils/lastBuildInfo.js.map +1 -0
  88. package/dist/bindings/utils/logBinaryUsageExampleToConsole.d.ts +2 -0
  89. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js +22 -0
  90. package/dist/bindings/utils/logBinaryUsageExampleToConsole.js.map +1 -0
  91. package/dist/bindings/utils/logDistroInstallInstruction.d.ts +14 -0
  92. package/dist/bindings/utils/logDistroInstallInstruction.js +48 -0
  93. package/dist/bindings/utils/logDistroInstallInstruction.js.map +1 -0
  94. package/dist/bindings/utils/resolveCustomCmakeOptions.d.ts +1 -0
  95. package/dist/bindings/utils/resolveCustomCmakeOptions.js +45 -0
  96. package/dist/bindings/utils/resolveCustomCmakeOptions.js.map +1 -0
  97. package/dist/bindings/utils/testBindingBinary.d.ts +1 -0
  98. package/dist/bindings/utils/testBindingBinary.js +98 -0
  99. package/dist/bindings/utils/testBindingBinary.js.map +1 -0
  100. package/dist/bindings/utils/testCmakeBinary.d.ts +5 -0
  101. package/dist/bindings/utils/testCmakeBinary.js +32 -0
  102. package/dist/bindings/utils/testCmakeBinary.js.map +1 -0
  103. package/dist/chatWrappers/AlpacaChatWrapper.d.ts +2 -1
  104. package/dist/chatWrappers/AlpacaChatWrapper.js +9 -2
  105. package/dist/chatWrappers/AlpacaChatWrapper.js.map +1 -1
  106. package/dist/chatWrappers/ChatMLChatWrapper.d.ts +5 -0
  107. package/dist/chatWrappers/ChatMLChatWrapper.js +13 -11
  108. package/dist/chatWrappers/ChatMLChatWrapper.js.map +1 -1
  109. package/dist/chatWrappers/FalconChatWrapper.d.ts +2 -1
  110. package/dist/chatWrappers/FalconChatWrapper.js +28 -11
  111. package/dist/chatWrappers/FalconChatWrapper.js.map +1 -1
  112. package/dist/chatWrappers/FunctionaryChatWrapper.js +86 -73
  113. package/dist/chatWrappers/FunctionaryChatWrapper.js.map +1 -1
  114. package/dist/chatWrappers/{LlamaChatWrapper.d.ts → GemmaChatWrapper.d.ts} +6 -1
  115. package/dist/chatWrappers/GemmaChatWrapper.js +88 -0
  116. package/dist/chatWrappers/GemmaChatWrapper.js.map +1 -0
  117. package/dist/chatWrappers/GeneralChatWrapper.d.ts +2 -1
  118. package/dist/chatWrappers/GeneralChatWrapper.js +35 -12
  119. package/dist/chatWrappers/GeneralChatWrapper.js.map +1 -1
  120. package/dist/chatWrappers/Llama2ChatWrapper.d.ts +20 -0
  121. package/dist/chatWrappers/{LlamaChatWrapper.js → Llama2ChatWrapper.js} +29 -11
  122. package/dist/chatWrappers/Llama2ChatWrapper.js.map +1 -0
  123. package/dist/chatWrappers/Llama3ChatWrapper.d.ts +31 -0
  124. package/dist/chatWrappers/Llama3ChatWrapper.js +129 -0
  125. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -0
  126. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +73 -0
  127. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js +359 -0
  128. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -0
  129. package/dist/chatWrappers/generic/TemplateChatWrapper.d.ts +64 -0
  130. package/dist/chatWrappers/generic/TemplateChatWrapper.js +200 -0
  131. package/dist/chatWrappers/generic/TemplateChatWrapper.js.map +1 -0
  132. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.d.ts +33 -0
  133. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js +45 -0
  134. package/dist/chatWrappers/generic/utils/chatHistoryFunctionCallMessageTemplate.js.map +1 -0
  135. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.d.ts +42 -0
  136. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js +82 -0
  137. package/dist/chatWrappers/utils/ChatModelFunctionsDocumentationGenerator.js.map +1 -0
  138. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.d.ts +4 -0
  139. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js +206 -0
  140. package/dist/chatWrappers/utils/isJinjaTemplateEquivalentToSpecializedChatWrapper.js.map +1 -0
  141. package/dist/chatWrappers/utils/resolveChatWrapper.d.ts +69 -0
  142. package/dist/chatWrappers/utils/resolveChatWrapper.js +214 -0
  143. package/dist/chatWrappers/utils/resolveChatWrapper.js.map +1 -0
  144. package/dist/cli/cli.js +21 -7
  145. package/dist/cli/cli.js.map +1 -1
  146. package/dist/cli/commands/BuildCommand.d.ts +6 -4
  147. package/dist/cli/commands/BuildCommand.js +103 -41
  148. package/dist/cli/commands/BuildCommand.js.map +1 -1
  149. package/dist/cli/commands/ChatCommand.d.ts +18 -6
  150. package/dist/cli/commands/ChatCommand.js +298 -142
  151. package/dist/cli/commands/ChatCommand.js.map +1 -1
  152. package/dist/cli/commands/ClearCommand.d.ts +1 -1
  153. package/dist/cli/commands/ClearCommand.js +11 -12
  154. package/dist/cli/commands/ClearCommand.js.map +1 -1
  155. package/dist/cli/commands/CompleteCommand.d.ts +29 -0
  156. package/dist/cli/commands/CompleteCommand.js +365 -0
  157. package/dist/cli/commands/CompleteCommand.js.map +1 -0
  158. package/dist/cli/commands/DebugCommand.d.ts +7 -0
  159. package/dist/cli/commands/DebugCommand.js +54 -0
  160. package/dist/cli/commands/DebugCommand.js.map +1 -0
  161. package/dist/cli/commands/DownloadCommand.d.ts +6 -4
  162. package/dist/cli/commands/DownloadCommand.js +120 -69
  163. package/dist/cli/commands/DownloadCommand.js.map +1 -1
  164. package/dist/cli/commands/InfillCommand.d.ts +31 -0
  165. package/dist/cli/commands/InfillCommand.js +401 -0
  166. package/dist/cli/commands/InfillCommand.js.map +1 -0
  167. package/dist/cli/commands/InitCommand.d.ts +11 -0
  168. package/dist/cli/commands/InitCommand.js +195 -0
  169. package/dist/cli/commands/InitCommand.js.map +1 -0
  170. package/dist/cli/commands/OnPostInstallCommand.js +9 -10
  171. package/dist/cli/commands/OnPostInstallCommand.js.map +1 -1
  172. package/dist/cli/commands/PullCommand.d.ts +12 -0
  173. package/dist/cli/commands/PullCommand.js +117 -0
  174. package/dist/cli/commands/PullCommand.js.map +1 -0
  175. package/dist/cli/commands/inspect/InspectCommand.d.ts +4 -0
  176. package/dist/cli/commands/inspect/InspectCommand.js +19 -0
  177. package/dist/cli/commands/inspect/InspectCommand.js.map +1 -0
  178. package/dist/cli/commands/inspect/commands/InspectGgufCommand.d.ts +12 -0
  179. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js +136 -0
  180. package/dist/cli/commands/inspect/commands/InspectGgufCommand.js.map +1 -0
  181. package/dist/cli/commands/inspect/commands/InspectGpuCommand.d.ts +4 -0
  182. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js +138 -0
  183. package/dist/cli/commands/inspect/commands/InspectGpuCommand.js.map +1 -0
  184. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +17 -0
  185. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +613 -0
  186. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -0
  187. package/dist/cli/projectTemplates.d.ts +7 -0
  188. package/dist/cli/projectTemplates.js +10 -0
  189. package/dist/cli/projectTemplates.js.map +1 -0
  190. package/dist/cli/recommendedModels.d.ts +2 -0
  191. package/dist/cli/recommendedModels.js +342 -0
  192. package/dist/cli/recommendedModels.js.map +1 -0
  193. package/dist/cli/startCreateCli.d.ts +2 -0
  194. package/dist/cli/startCreateCli.js +26 -0
  195. package/dist/cli/startCreateCli.js.map +1 -0
  196. package/dist/cli/utils/ConsoleInteraction.d.ts +23 -0
  197. package/dist/cli/utils/ConsoleInteraction.js +122 -0
  198. package/dist/cli/utils/ConsoleInteraction.js.map +1 -0
  199. package/dist/cli/utils/ConsoleTable.d.ts +23 -0
  200. package/dist/cli/utils/ConsoleTable.js +86 -0
  201. package/dist/cli/utils/ConsoleTable.js.map +1 -0
  202. package/dist/cli/utils/basicChooseFromListConsoleInteraction.d.ts +13 -0
  203. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js +111 -0
  204. package/dist/cli/utils/basicChooseFromListConsoleInteraction.js.map +1 -0
  205. package/dist/cli/utils/consolePromptQuestion.d.ts +6 -0
  206. package/dist/cli/utils/consolePromptQuestion.js +82 -0
  207. package/dist/cli/utils/consolePromptQuestion.js.map +1 -0
  208. package/dist/cli/utils/getReadablePath.d.ts +1 -0
  209. package/dist/cli/utils/getReadablePath.js +14 -0
  210. package/dist/cli/utils/getReadablePath.js.map +1 -0
  211. package/dist/cli/utils/interactivelyAskForModel.d.ts +7 -0
  212. package/dist/cli/utils/interactivelyAskForModel.js +451 -0
  213. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -0
  214. package/dist/cli/utils/logUsedGpuTypeOption.d.ts +2 -0
  215. package/dist/cli/utils/logUsedGpuTypeOption.js +9 -0
  216. package/dist/cli/utils/logUsedGpuTypeOption.js.map +1 -0
  217. package/dist/cli/utils/printCommonInfoLines.d.ts +9 -0
  218. package/dist/cli/utils/printCommonInfoLines.js +71 -0
  219. package/dist/cli/utils/printCommonInfoLines.js.map +1 -0
  220. package/dist/cli/utils/printInfoLine.d.ts +12 -0
  221. package/dist/cli/utils/printInfoLine.js +54 -0
  222. package/dist/cli/utils/printInfoLine.js.map +1 -0
  223. package/dist/cli/utils/projectTemplates.d.ts +19 -0
  224. package/dist/cli/utils/projectTemplates.js +47 -0
  225. package/dist/cli/utils/projectTemplates.js.map +1 -0
  226. package/dist/cli/utils/resolveCommandGgufPath.d.ts +4 -0
  227. package/dist/cli/utils/resolveCommandGgufPath.js +71 -0
  228. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -0
  229. package/dist/cli/utils/resolveHeaderFlag.d.ts +1 -0
  230. package/dist/cli/utils/resolveHeaderFlag.js +21 -0
  231. package/dist/cli/utils/resolveHeaderFlag.js.map +1 -0
  232. package/dist/cli/utils/resolveModelRecommendationFileOptions.d.ts +19 -0
  233. package/dist/cli/utils/resolveModelRecommendationFileOptions.js +7 -0
  234. package/dist/cli/utils/resolveModelRecommendationFileOptions.js.map +1 -0
  235. package/dist/cli/utils/splitAnsiToLines.d.ts +1 -0
  236. package/dist/cli/utils/splitAnsiToLines.js +32 -0
  237. package/dist/cli/utils/splitAnsiToLines.js.map +1 -0
  238. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.d.ts +2 -0
  239. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js +23 -0
  240. package/dist/cli/utils/withCliCommandDescriptionDocsUrl.js.map +1 -0
  241. package/dist/commands.d.ts +1 -0
  242. package/dist/commands.js +3 -0
  243. package/dist/commands.js.map +1 -1
  244. package/dist/config.d.ts +38 -5
  245. package/dist/config.js +61 -16
  246. package/dist/config.js.map +1 -1
  247. package/dist/consts.d.ts +3 -0
  248. package/dist/consts.js +10 -0
  249. package/dist/consts.js.map +1 -0
  250. package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.d.ts +37 -35
  251. package/dist/{llamaEvaluator → evaluator}/LlamaChat/LlamaChat.js +298 -221
  252. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -0
  253. package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/FunctionCallGrammar.d.ts +2 -1
  254. package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/FunctionCallGrammar.js +5 -3
  255. package/dist/evaluator/LlamaChat/utils/FunctionCallGrammar.js.map +1 -0
  256. package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js +18 -0
  257. package/dist/evaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +1 -0
  258. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/LlamaChatSession.d.ts +40 -3
  259. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/LlamaChatSession.js +28 -7
  260. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -0
  261. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.d.ts +3 -0
  262. package/dist/{llamaEvaluator → evaluator}/LlamaChatSession/utils/defineChatSessionFunction.js +3 -0
  263. package/dist/evaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +1 -0
  264. package/dist/evaluator/LlamaCompletion.d.ts +155 -0
  265. package/dist/evaluator/LlamaCompletion.js +405 -0
  266. package/dist/evaluator/LlamaCompletion.js.map +1 -0
  267. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.d.ts +41 -20
  268. package/dist/{llamaEvaluator → evaluator}/LlamaContext/LlamaContext.js +271 -81
  269. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -0
  270. package/dist/evaluator/LlamaContext/types.d.ts +140 -0
  271. package/dist/evaluator/LlamaContext/types.js.map +1 -0
  272. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/firstInFirstOutStrategy.js.map +1 -0
  273. package/dist/evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies/maximumParallelismStrategy.js.map +1 -0
  274. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.d.ts +2 -0
  275. package/dist/{llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js → evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js} +4 -4
  276. package/dist/evaluator/LlamaContext/utils/resolveBatchItemsPrioritizationStrategy.js.map +1 -0
  277. package/dist/evaluator/LlamaEmbeddingContext.d.ts +51 -0
  278. package/dist/evaluator/LlamaEmbeddingContext.js +73 -0
  279. package/dist/evaluator/LlamaEmbeddingContext.js.map +1 -0
  280. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.d.ts +8 -5
  281. package/dist/{llamaEvaluator → evaluator}/LlamaGrammar.js +13 -10
  282. package/dist/evaluator/LlamaGrammar.js.map +1 -0
  283. package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.js +4 -4
  284. package/dist/evaluator/LlamaGrammarEvaluationState.js.map +1 -0
  285. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.d.ts +2 -1
  286. package/dist/{llamaEvaluator → evaluator}/LlamaJsonSchemaGrammar.js +3 -3
  287. package/dist/evaluator/LlamaJsonSchemaGrammar.js.map +1 -0
  288. package/dist/evaluator/LlamaModel.d.ts +230 -0
  289. package/dist/evaluator/LlamaModel.js +597 -0
  290. package/dist/evaluator/LlamaModel.js.map +1 -0
  291. package/dist/evaluator/TokenBias.d.ts +22 -0
  292. package/dist/evaluator/TokenBias.js +33 -0
  293. package/dist/evaluator/TokenBias.js.map +1 -0
  294. package/dist/evaluator/TokenMeter.d.ts +54 -0
  295. package/dist/evaluator/TokenMeter.js +86 -0
  296. package/dist/evaluator/TokenMeter.js.map +1 -0
  297. package/dist/gguf/consts.d.ts +3 -0
  298. package/dist/gguf/consts.js +8 -0
  299. package/dist/gguf/consts.js.map +1 -0
  300. package/dist/gguf/errors/InvalidGgufMagicError.d.ts +3 -0
  301. package/dist/gguf/errors/InvalidGgufMagicError.js +6 -0
  302. package/dist/gguf/errors/InvalidGgufMagicError.js.map +1 -0
  303. package/dist/gguf/errors/UnsupportedGgufValueTypeError.d.ts +4 -0
  304. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js +9 -0
  305. package/dist/gguf/errors/UnsupportedGgufValueTypeError.js.map +1 -0
  306. package/dist/gguf/fileReaders/GgufFileReader.d.ts +33 -0
  307. package/dist/gguf/fileReaders/GgufFileReader.js +76 -0
  308. package/dist/gguf/fileReaders/GgufFileReader.js.map +1 -0
  309. package/dist/gguf/fileReaders/GgufFsFileReader.d.ts +17 -0
  310. package/dist/gguf/fileReaders/GgufFsFileReader.js +45 -0
  311. package/dist/gguf/fileReaders/GgufFsFileReader.js.map +1 -0
  312. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.d.ts +22 -0
  313. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js +63 -0
  314. package/dist/gguf/fileReaders/GgufNetworkFetchFileReader.js.map +1 -0
  315. package/dist/gguf/insights/GgufInsights.d.ts +42 -0
  316. package/dist/gguf/insights/GgufInsights.js +361 -0
  317. package/dist/gguf/insights/GgufInsights.js.map +1 -0
  318. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +87 -0
  319. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +136 -0
  320. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -0
  321. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +18 -0
  322. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +76 -0
  323. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -0
  324. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +14 -0
  325. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +177 -0
  326. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -0
  327. package/dist/gguf/insights/utils/scoreLevels.d.ts +5 -0
  328. package/dist/gguf/insights/utils/scoreLevels.js +16 -0
  329. package/dist/gguf/insights/utils/scoreLevels.js.map +1 -0
  330. package/dist/gguf/parser/GgufV2Parser.d.ts +19 -0
  331. package/dist/gguf/parser/GgufV2Parser.js +115 -0
  332. package/dist/gguf/parser/GgufV2Parser.js.map +1 -0
  333. package/dist/gguf/parser/GgufV3Parser.d.ts +3 -0
  334. package/dist/gguf/parser/GgufV3Parser.js +4 -0
  335. package/dist/gguf/parser/GgufV3Parser.js.map +1 -0
  336. package/dist/gguf/parser/parseGguf.d.ts +8 -0
  337. package/dist/gguf/parser/parseGguf.js +63 -0
  338. package/dist/gguf/parser/parseGguf.js.map +1 -0
  339. package/dist/gguf/readGgufFileInfo.d.ts +33 -0
  340. package/dist/gguf/readGgufFileInfo.js +66 -0
  341. package/dist/gguf/readGgufFileInfo.js.map +1 -0
  342. package/dist/gguf/types/GgufFileInfoTypes.d.ts +84 -0
  343. package/dist/gguf/types/GgufFileInfoTypes.js +18 -0
  344. package/dist/gguf/types/GgufFileInfoTypes.js.map +1 -0
  345. package/dist/gguf/types/GgufMetadataTypes.d.ts +334 -0
  346. package/dist/gguf/types/GgufMetadataTypes.js +86 -0
  347. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -0
  348. package/dist/gguf/types/GgufTensorInfoTypes.d.ts +37 -0
  349. package/dist/gguf/types/GgufTensorInfoTypes.js +33 -0
  350. package/dist/gguf/types/GgufTensorInfoTypes.js.map +1 -0
  351. package/dist/gguf/utils/GgufReadOffset.d.ts +6 -0
  352. package/dist/gguf/utils/GgufReadOffset.js +18 -0
  353. package/dist/gguf/utils/GgufReadOffset.js.map +1 -0
  354. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.d.ts +5 -0
  355. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js +38 -0
  356. package/dist/gguf/utils/convertMetadataKeyValueRecordToNestedObject.js.map +1 -0
  357. package/dist/gguf/utils/getGgufFileTypeName.d.ts +4 -0
  358. package/dist/gguf/utils/getGgufFileTypeName.js +13 -0
  359. package/dist/gguf/utils/getGgufFileTypeName.js.map +1 -0
  360. package/dist/gguf/utils/getGgufMetadataArchitectureData.d.ts +3 -0
  361. package/dist/gguf/utils/getGgufMetadataArchitectureData.js +4 -0
  362. package/dist/gguf/utils/getGgufMetadataArchitectureData.js.map +1 -0
  363. package/dist/gguf/utils/normalizeGgufDownloadUrl.d.ts +1 -0
  364. package/dist/gguf/utils/normalizeGgufDownloadUrl.js +16 -0
  365. package/dist/gguf/utils/normalizeGgufDownloadUrl.js.map +1 -0
  366. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.d.ts +2 -0
  367. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js +39 -0
  368. package/dist/gguf/utils/resolveBinarySplitGgufPartUrls.js.map +1 -0
  369. package/dist/gguf/utils/resolveSplitGgufParts.d.ts +7 -0
  370. package/dist/gguf/utils/resolveSplitGgufParts.js +55 -0
  371. package/dist/gguf/utils/resolveSplitGgufParts.js.map +1 -0
  372. package/dist/index.d.ts +37 -17
  373. package/dist/index.js +33 -14
  374. package/dist/index.js.map +1 -1
  375. package/dist/state.d.ts +4 -0
  376. package/dist/state.js +14 -0
  377. package/dist/state.js.map +1 -1
  378. package/dist/types.d.ts +53 -2
  379. package/dist/types.js.map +1 -1
  380. package/dist/utils/DisposeGuard.d.ts +13 -0
  381. package/dist/utils/DisposeGuard.js +120 -0
  382. package/dist/utils/DisposeGuard.js.map +1 -0
  383. package/dist/utils/InsufficientMemoryError.d.ts +3 -0
  384. package/dist/utils/InsufficientMemoryError.js +6 -0
  385. package/dist/utils/InsufficientMemoryError.js.map +1 -0
  386. package/dist/utils/LlamaText.d.ts +50 -25
  387. package/dist/utils/LlamaText.js +367 -155
  388. package/dist/utils/LlamaText.js.map +1 -1
  389. package/dist/utils/StopGenerationDetector.d.ts +1 -1
  390. package/dist/utils/StopGenerationDetector.js +23 -18
  391. package/dist/utils/StopGenerationDetector.js.map +1 -1
  392. package/dist/utils/TokenStreamRegulator.d.ts +8 -4
  393. package/dist/utils/TokenStreamRegulator.js +78 -8
  394. package/dist/utils/TokenStreamRegulator.js.map +1 -1
  395. package/dist/utils/UnsupportedError.d.ts +2 -0
  396. package/dist/utils/UnsupportedError.js +7 -0
  397. package/dist/utils/UnsupportedError.js.map +1 -0
  398. package/dist/utils/cmake.js +38 -20
  399. package/dist/utils/cmake.js.map +1 -1
  400. package/dist/utils/createModelDownloader.d.ts +102 -0
  401. package/dist/utils/createModelDownloader.js +226 -0
  402. package/dist/utils/createModelDownloader.js.map +1 -0
  403. package/dist/utils/findBestOption.d.ts +4 -0
  404. package/dist/utils/findBestOption.js +15 -0
  405. package/dist/utils/findBestOption.js.map +1 -0
  406. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js +18 -8
  407. package/dist/utils/findCharacterRemovalCountToFitChatHistoryInContext.js.map +1 -1
  408. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.d.ts +5 -0
  409. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js +11 -0
  410. package/dist/utils/gbnfJson/getGbnfGrammarForGbnfJsonSchema.js.map +1 -0
  411. package/dist/utils/gbnfJson/terminals/GbnfArray.d.ts +3 -1
  412. package/dist/utils/gbnfJson/terminals/GbnfArray.js +10 -5
  413. package/dist/utils/gbnfJson/terminals/GbnfArray.js.map +1 -1
  414. package/dist/utils/gbnfJson/terminals/GbnfBoolean.d.ts +1 -1
  415. package/dist/utils/gbnfJson/terminals/GbnfBoolean.js.map +1 -1
  416. package/dist/utils/gbnfJson/terminals/GbnfBooleanValue.js.map +1 -1
  417. package/dist/utils/gbnfJson/terminals/GbnfGrammar.js.map +1 -1
  418. package/dist/utils/gbnfJson/terminals/GbnfNull.d.ts +1 -1
  419. package/dist/utils/gbnfJson/terminals/GbnfNull.js.map +1 -1
  420. package/dist/utils/gbnfJson/terminals/GbnfNumber.d.ts +1 -1
  421. package/dist/utils/gbnfJson/terminals/GbnfNumber.js.map +1 -1
  422. package/dist/utils/gbnfJson/terminals/GbnfNumberValue.js.map +1 -1
  423. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.d.ts +3 -1
  424. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js +9 -4
  425. package/dist/utils/gbnfJson/terminals/GbnfObjectMap.js.map +1 -1
  426. package/dist/utils/gbnfJson/terminals/GbnfOr.js.map +1 -1
  427. package/dist/utils/gbnfJson/terminals/GbnfRepetition.d.ts +9 -0
  428. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js +37 -0
  429. package/dist/utils/gbnfJson/terminals/GbnfRepetition.js.map +1 -0
  430. package/dist/utils/gbnfJson/terminals/GbnfString.d.ts +1 -1
  431. package/dist/utils/gbnfJson/terminals/GbnfString.js +23 -5
  432. package/dist/utils/gbnfJson/terminals/GbnfString.js.map +1 -1
  433. package/dist/utils/gbnfJson/terminals/GbnfStringValue.js.map +1 -1
  434. package/dist/utils/gbnfJson/terminals/GbnfVerbatimText.js.map +1 -1
  435. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.d.ts +7 -4
  436. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js +37 -9
  437. package/dist/utils/gbnfJson/terminals/GbnfWhitespace.js.map +1 -1
  438. package/dist/utils/gbnfJson/terminals/gbnfConsts.d.ts +5 -4
  439. package/dist/utils/gbnfJson/terminals/gbnfConsts.js +14 -3
  440. package/dist/utils/gbnfJson/terminals/gbnfConsts.js.map +1 -1
  441. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.d.ts +10 -0
  442. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js +15 -0
  443. package/dist/utils/gbnfJson/utils/GbnfJsonScopeState.js.map +1 -0
  444. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.d.ts +2 -1
  445. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js +6 -5
  446. package/dist/utils/gbnfJson/utils/getGbnfJsonTerminalForGbnfJsonSchema.js.map +1 -1
  447. package/dist/utils/getBuildDefaults.d.ts +1 -2
  448. package/dist/utils/getBuildDefaults.js +2 -3
  449. package/dist/utils/getBuildDefaults.js.map +1 -1
  450. package/dist/utils/getConsoleLogPrefix.d.ts +1 -0
  451. package/dist/utils/getConsoleLogPrefix.js +10 -0
  452. package/dist/utils/getConsoleLogPrefix.js.map +1 -0
  453. package/dist/utils/getGrammarsFolder.d.ts +2 -1
  454. package/dist/utils/getGrammarsFolder.js +8 -7
  455. package/dist/utils/getGrammarsFolder.js.map +1 -1
  456. package/dist/utils/getModuleVersion.d.ts +1 -0
  457. package/dist/utils/getModuleVersion.js +13 -0
  458. package/dist/utils/getModuleVersion.js.map +1 -0
  459. package/dist/utils/getQueuedTokensBeforeStopTrigger.d.ts +6 -0
  460. package/dist/utils/getQueuedTokensBeforeStopTrigger.js +22 -0
  461. package/dist/utils/getQueuedTokensBeforeStopTrigger.js.map +1 -0
  462. package/dist/utils/getReadableContextSize.d.ts +1 -0
  463. package/dist/utils/getReadableContextSize.js +7 -0
  464. package/dist/utils/getReadableContextSize.js.map +1 -0
  465. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js +15 -11
  466. package/dist/utils/getTypeScriptTypeStringForGbnfJsonSchema.js.map +1 -1
  467. package/dist/utils/gitReleaseBundles.js +73 -5
  468. package/dist/utils/gitReleaseBundles.js.map +1 -1
  469. package/dist/utils/hashString.d.ts +1 -0
  470. package/dist/utils/hashString.js +8 -0
  471. package/dist/utils/hashString.js.map +1 -0
  472. package/dist/utils/isLockfileActive.d.ts +4 -0
  473. package/dist/utils/isLockfileActive.js +12 -0
  474. package/dist/utils/isLockfileActive.js.map +1 -0
  475. package/dist/utils/isToken.d.ts +2 -0
  476. package/dist/utils/isToken.js +4 -0
  477. package/dist/utils/isToken.js.map +1 -0
  478. package/dist/utils/isUrl.d.ts +1 -0
  479. package/dist/utils/isUrl.js +15 -0
  480. package/dist/utils/isUrl.js.map +1 -0
  481. package/dist/utils/mergeUnionTypes.d.ts +10 -0
  482. package/dist/utils/mergeUnionTypes.js +2 -0
  483. package/dist/utils/mergeUnionTypes.js.map +1 -0
  484. package/dist/utils/parseModelFileName.d.ts +1 -0
  485. package/dist/utils/parseModelFileName.js +6 -1
  486. package/dist/utils/parseModelFileName.js.map +1 -1
  487. package/dist/utils/parseTextTemplate.d.ts +66 -0
  488. package/dist/utils/parseTextTemplate.js +116 -0
  489. package/dist/utils/parseTextTemplate.js.map +1 -0
  490. package/dist/utils/prettyPrintObject.d.ts +10 -0
  491. package/dist/utils/prettyPrintObject.js +84 -0
  492. package/dist/utils/prettyPrintObject.js.map +1 -0
  493. package/dist/utils/removeNullFields.d.ts +2 -1
  494. package/dist/utils/removeNullFields.js +8 -0
  495. package/dist/utils/removeNullFields.js.map +1 -1
  496. package/dist/utils/resolveGithubRelease.d.ts +2 -0
  497. package/dist/utils/resolveGithubRelease.js +36 -0
  498. package/dist/utils/resolveGithubRelease.js.map +1 -0
  499. package/dist/utils/runtime.d.ts +4 -0
  500. package/dist/utils/runtime.js +8 -0
  501. package/dist/utils/runtime.js.map +1 -0
  502. package/dist/utils/spawnCommand.d.ts +11 -1
  503. package/dist/utils/spawnCommand.js +56 -6
  504. package/dist/utils/spawnCommand.js.map +1 -1
  505. package/dist/utils/tokenizeInput.d.ts +3 -0
  506. package/dist/utils/tokenizeInput.js +12 -0
  507. package/dist/utils/tokenizeInput.js.map +1 -0
  508. package/dist/utils/utilTypes.d.ts +3 -0
  509. package/dist/utils/utilTypes.js +2 -0
  510. package/dist/utils/utilTypes.js.map +1 -0
  511. package/dist/utils/waitForLockfileRelease.d.ts +5 -0
  512. package/dist/utils/waitForLockfileRelease.js +20 -0
  513. package/dist/utils/waitForLockfileRelease.js.map +1 -0
  514. package/dist/utils/withLockfile.d.ts +7 -0
  515. package/dist/utils/withLockfile.js +44 -0
  516. package/dist/utils/withLockfile.js.map +1 -0
  517. package/dist/utils/withOra.d.ts +2 -0
  518. package/dist/utils/withOra.js +22 -6
  519. package/dist/utils/withOra.js.map +1 -1
  520. package/dist/utils/withProgressLog.d.ts +23 -0
  521. package/dist/utils/withProgressLog.js +211 -0
  522. package/dist/utils/withProgressLog.js.map +1 -0
  523. package/dist/utils/withStatusLogs.d.ts +2 -1
  524. package/dist/utils/withStatusLogs.js +12 -9
  525. package/dist/utils/withStatusLogs.js.map +1 -1
  526. package/llama/.clang-format +1 -2
  527. package/llama/CMakeLists.txt +115 -4
  528. package/llama/addon.cpp +1318 -99
  529. package/llama/binariesGithubRelease.json +1 -1
  530. package/llama/gitRelease.bundle +0 -0
  531. package/llama/gpuInfo/cuda-gpu-info.cu +120 -0
  532. package/llama/gpuInfo/cuda-gpu-info.h +10 -0
  533. package/llama/gpuInfo/metal-gpu-info.h +8 -0
  534. package/llama/gpuInfo/metal-gpu-info.mm +30 -0
  535. package/llama/gpuInfo/vulkan-gpu-info.cpp +83 -0
  536. package/llama/gpuInfo/vulkan-gpu-info.h +9 -0
  537. package/llama/grammars/README.md +11 -1
  538. package/llama/grammars/json.gbnf +1 -1
  539. package/llama/grammars/json_arr.gbnf +1 -1
  540. package/llama/llama.cpp.info.json +4 -0
  541. package/llama/toolchains/win32.host-x64.target-arm64.cmake +41 -0
  542. package/llamaBins/linux-arm64/_nlcBuildMetadata.json +1 -0
  543. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  544. package/llamaBins/linux-armv7l/_nlcBuildMetadata.json +1 -0
  545. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  546. package/llamaBins/linux-x64/_nlcBuildMetadata.json +1 -0
  547. package/llamaBins/linux-x64/llama-addon.node +0 -0
  548. package/llamaBins/linux-x64-cuda/_nlcBuildMetadata.json +1 -0
  549. package/llamaBins/linux-x64-cuda/llama-addon.node +0 -0
  550. package/llamaBins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -0
  551. package/llamaBins/linux-x64-vulkan/llama-addon.node +0 -0
  552. package/llamaBins/mac-arm64-metal/_nlcBuildMetadata.json +1 -0
  553. package/llamaBins/mac-arm64-metal/default.metallib +0 -0
  554. package/llamaBins/mac-arm64-metal/llama-addon.node +0 -0
  555. package/llamaBins/mac-x64/_nlcBuildMetadata.json +1 -0
  556. package/llamaBins/mac-x64/llama-addon.node +0 -0
  557. package/llamaBins/win-arm64/_nlcBuildMetadata.json +1 -0
  558. package/llamaBins/win-arm64/llama-addon.exp +0 -0
  559. package/llamaBins/win-arm64/llama-addon.lib +0 -0
  560. package/llamaBins/win-arm64/llama-addon.node +0 -0
  561. package/llamaBins/win-x64/_nlcBuildMetadata.json +1 -0
  562. package/llamaBins/win-x64/llama-addon.exp +0 -0
  563. package/llamaBins/win-x64/llama-addon.lib +0 -0
  564. package/llamaBins/win-x64/llama-addon.node +0 -0
  565. package/llamaBins/win-x64-cuda/_nlcBuildMetadata.json +1 -0
  566. package/llamaBins/win-x64-cuda/llama-addon.exp +0 -0
  567. package/llamaBins/win-x64-cuda/llama-addon.lib +0 -0
  568. package/llamaBins/win-x64-cuda/llama-addon.node +0 -0
  569. package/llamaBins/win-x64-vulkan/_nlcBuildMetadata.json +1 -0
  570. package/llamaBins/win-x64-vulkan/llama-addon.exp +0 -0
  571. package/llamaBins/win-x64-vulkan/llama-addon.lib +0 -0
  572. package/llamaBins/win-x64-vulkan/llama-addon.node +0 -0
  573. package/package.json +61 -34
  574. package/templates/package.json +10 -0
  575. package/dist/AbortError.d.ts +0 -2
  576. package/dist/AbortError.js +0 -7
  577. package/dist/AbortError.js.map +0 -1
  578. package/dist/chatWrappers/LlamaChatWrapper.js.map +0 -1
  579. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +0 -13
  580. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +0 -55
  581. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +0 -1
  582. package/dist/llamaEvaluator/LlamaBins.d.ts +0 -18
  583. package/dist/llamaEvaluator/LlamaBins.js +0 -5
  584. package/dist/llamaEvaluator/LlamaBins.js.map +0 -1
  585. package/dist/llamaEvaluator/LlamaChat/LlamaChat.js.map +0 -1
  586. package/dist/llamaEvaluator/LlamaChat/utils/FunctionCallGrammar.js.map +0 -1
  587. package/dist/llamaEvaluator/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js.map +0 -1
  588. package/dist/llamaEvaluator/LlamaChatSession/LlamaChatSession.js.map +0 -1
  589. package/dist/llamaEvaluator/LlamaChatSession/utils/defineChatSessionFunction.js.map +0 -1
  590. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +0 -1
  591. package/dist/llamaEvaluator/LlamaContext/types.d.ts +0 -86
  592. package/dist/llamaEvaluator/LlamaContext/types.js.map +0 -1
  593. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +0 -1
  594. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +0 -1
  595. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +0 -2
  596. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +0 -1
  597. package/dist/llamaEvaluator/LlamaGrammar.js.map +0 -1
  598. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +0 -1
  599. package/dist/llamaEvaluator/LlamaJsonSchemaGrammar.js.map +0 -1
  600. package/dist/llamaEvaluator/LlamaModel.d.ts +0 -119
  601. package/dist/llamaEvaluator/LlamaModel.js +0 -322
  602. package/dist/llamaEvaluator/LlamaModel.js.map +0 -1
  603. package/dist/utils/binariesGithubRelease.js.map +0 -1
  604. package/dist/utils/clearLlamaBuild.d.ts +0 -1
  605. package/dist/utils/clearLlamaBuild.js +0 -12
  606. package/dist/utils/clearLlamaBuild.js.map +0 -1
  607. package/dist/utils/cloneLlamaCppRepo.d.ts +0 -2
  608. package/dist/utils/cloneLlamaCppRepo.js +0 -102
  609. package/dist/utils/cloneLlamaCppRepo.js.map +0 -1
  610. package/dist/utils/compileLLamaCpp.d.ts +0 -8
  611. package/dist/utils/compileLLamaCpp.js +0 -132
  612. package/dist/utils/compileLLamaCpp.js.map +0 -1
  613. package/dist/utils/getBin.js +0 -78
  614. package/dist/utils/getBin.js.map +0 -1
  615. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.d.ts +0 -2
  616. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js +0 -9
  617. package/dist/utils/getGbnfGrammarForGbnfJsonSchema.js.map +0 -1
  618. package/dist/utils/getReleaseInfo.d.ts +0 -7
  619. package/dist/utils/getReleaseInfo.js +0 -30
  620. package/dist/utils/getReleaseInfo.js.map +0 -1
  621. package/dist/utils/parseModelTypeDescription.d.ts +0 -6
  622. package/dist/utils/parseModelTypeDescription.js +0 -9
  623. package/dist/utils/parseModelTypeDescription.js.map +0 -1
  624. package/dist/utils/resolveChatWrapper.d.ts +0 -4
  625. package/dist/utils/resolveChatWrapper.js +0 -16
  626. package/dist/utils/resolveChatWrapper.js.map +0 -1
  627. package/dist/utils/usedBinFlag.d.ts +0 -6
  628. package/dist/utils/usedBinFlag.js +0 -15
  629. package/dist/utils/usedBinFlag.js.map +0 -1
  630. package/llama/usedBin.json +0 -3
  631. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  632. /package/dist/{utils → bindings/utils}/binariesGithubRelease.d.ts +0 -0
  633. /package/dist/{llamaEvaluator → evaluator}/LlamaChat/utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.d.ts +0 -0
  634. /package/dist/{llamaEvaluator → evaluator}/LlamaContext/types.js +0 -0
  635. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.d.ts +0 -0
  636. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/firstInFirstOutStrategy.js +0 -0
  637. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.d.ts +0 -0
  638. /package/dist/{llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies → evaluator/LlamaContext/utils/batchItemsPrioritizationStrategies}/maximumParallelismStrategy.js +0 -0
  639. /package/dist/{llamaEvaluator → evaluator}/LlamaGrammarEvaluationState.d.ts +0 -0
@@ -1,10 +1,13 @@
1
1
  import { DisposeAggregator, DisposedError, EventRelay } from "lifecycle-utils";
2
- import { resolveChatWrapper } from "../../utils/resolveChatWrapper.js";
3
2
  import { removeNullFields } from "../../utils/removeNullFields.js";
4
3
  import { LlamaGrammarEvaluationState } from "../LlamaGrammarEvaluationState.js";
5
- import { AbortError } from "../../AbortError.js";
6
4
  import { StopGenerationDetector } from "../../utils/StopGenerationDetector.js";
7
5
  import { TokenStreamRegulator } from "../../utils/TokenStreamRegulator.js";
6
+ import { UNKNOWN_UNICODE_CHAR } from "../../consts.js";
7
+ import { getQueuedTokensBeforeStopTrigger } from "../../utils/getQueuedTokensBeforeStopTrigger.js";
8
+ import { resolveChatWrapper } from "../../chatWrappers/utils/resolveChatWrapper.js";
9
+ import { GeneralChatWrapper } from "../../chatWrappers/GeneralChatWrapper.js";
10
+ import { getConsoleLogPrefix } from "../../utils/getConsoleLogPrefix.js";
8
11
  import { eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy } from "./utils/contextShiftStrategies/eraseFirstResponseAndKeepFirstSystemChatContextShiftStrategy.js";
9
12
  import { FunctionCallGrammar, LlamaFunctionCallValidationError } from "./utils/FunctionCallGrammar.js";
10
13
  const defaultContextShiftOptions = {
@@ -12,7 +15,6 @@ const defaultContextShiftOptions = {
12
15
  strategy: "eraseFirstResponseAndKeepFirstSystem",
13
16
  lastEvaluationMetadata: null
14
17
  };
15
- const UNKNOWN_UNICODE_CHAR = "\ufffd";
16
18
  export class LlamaChat {
17
19
  /** @internal */ _chatWrapper;
18
20
  /** @internal */ _disposeAggregator = new DisposeAggregator();
@@ -30,7 +32,14 @@ export class LlamaChat {
30
32
  this.dispose();
31
33
  }));
32
34
  this._disposeAggregator.add(this.onDispose.dispatchEvent);
33
- this._chatWrapper = resolveChatWrapper(chatWrapper, contextSequence.model);
35
+ this._chatWrapper = chatWrapper === "auto"
36
+ ? (resolveChatWrapper({
37
+ bosString: contextSequence.model.tokens.bosString,
38
+ filename: contextSequence.model.filename,
39
+ fileInfo: contextSequence.model.fileInfo,
40
+ tokenizer: contextSequence.model.tokenizer
41
+ }) ?? new GeneralChatWrapper())
42
+ : chatWrapper;
34
43
  }
35
44
  dispose({ disposeSequence = this._autoDisposeSequence } = {}) {
36
45
  if (this._sequence == null)
@@ -63,12 +72,12 @@ export class LlamaChat {
63
72
  get model() {
64
73
  return this.sequence.model;
65
74
  }
66
- async generateResponse(history, { onToken, signal, maxTokens, temperature, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
75
+ async generateResponse(history, { onToken, signal, stopOnAbortSignal = false, maxTokens, temperature, minP, topK, topP, grammar, trimWhitespaceSuffix = false, repeatPenalty = {}, tokenBias, evaluationPriority = 5, functions, documentFunctionParams, contextShift = defaultContextShiftOptions, customStopTriggers, lastEvaluationContextWindow: { history: lastEvaluationContextWindowHistory, minimumOverlapPercentageToPreventContextShift = 0.5 } = {} } = {}) {
67
76
  const functionsEnabled = (functions != null && Object.keys(functions).length > 0);
68
77
  if (grammar != null && functionsEnabled)
69
78
  throw new Error("Using both grammar and functions is not supported yet");
70
79
  if (signal?.aborted)
71
- throw new AbortError();
80
+ throw signal.reason;
72
81
  if (this._sequence == null)
73
82
  throw new DisposedError();
74
83
  let resolvedHistory = this._sequence.isLoadedToMemory
@@ -81,7 +90,6 @@ export class LlamaChat {
81
90
  });
82
91
  const model = this._sequence.model;
83
92
  const context = this._sequence.context;
84
- const eosToken = model.tokens.eos;
85
93
  const resolvedContextShift = {
86
94
  ...defaultContextShiftOptions,
87
95
  ...removeNullFields(contextShift)
@@ -99,7 +107,7 @@ export class LlamaChat {
99
107
  ? new LlamaGrammarEvaluationState({ grammar })
100
108
  : undefined;
101
109
  let functionsGrammar = functionsEnabled
102
- ? new FunctionCallGrammar(functions, this._chatWrapper, false)
110
+ ? new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false)
103
111
  : undefined;
104
112
  let functionsEvaluationState = (functionsEnabled && functionsGrammar != null)
105
113
  ? new LlamaGrammarEvaluationState({
@@ -108,6 +116,7 @@ export class LlamaChat {
108
116
  : undefined;
109
117
  const streamRegulator = new TokenStreamRegulator();
110
118
  const stopGenerationDetector = new StopGenerationDetector();
119
+ const customStopGenerationTriggersDetector = new StopGenerationDetector();
111
120
  const functionSyntaxStartDetector = new StopGenerationDetector();
112
121
  const functionSyntaxEndDetector = new StopGenerationDetector();
113
122
  const disengageInitiallyEngagedFunctionMode = new StopGenerationDetector();
@@ -121,8 +130,8 @@ export class LlamaChat {
121
130
  let lastContextWindowHistory = resolvedHistory;
122
131
  let lastHistoryCompressionMetadata = resolvedContextShift.lastEvaluationMetadata;
123
132
  const ensureNotAborted = () => {
124
- if (signal?.aborted)
125
- throw new AbortError();
133
+ if (signal?.aborted && (!stopOnAbortSignal || res.length === 0))
134
+ throw signal.reason;
126
135
  if (this._sequence == null)
127
136
  throw new DisposedError();
128
137
  };
@@ -132,7 +141,7 @@ export class LlamaChat {
132
141
  let punishTokens = res.slice(-repeatPenaltyLastTokens);
133
142
  if (punishTokensFilter != null)
134
143
  punishTokens = punishTokensFilter(punishTokens);
135
- if (!penalizeNewLine) {
144
+ if (penalizeNewLine == null || !penalizeNewLine) {
136
145
  const nlToken = model.tokens.nl;
137
146
  if (nlToken != null)
138
147
  punishTokens = punishTokens.filter(token => token !== nlToken);
@@ -173,7 +182,7 @@ export class LlamaChat {
173
182
  ignoredStartTextTokens = mostExhaustiveTriggeredStop.stopTrigger
174
183
  .map((stopTrigger) => {
175
184
  if (typeof stopTrigger === "string")
176
- return model.tokenize(stopTrigger);
185
+ return model.tokenize(stopTrigger, false, "trimLeadingSpace");
177
186
  else
178
187
  return [stopTrigger];
179
188
  })
@@ -181,7 +190,7 @@ export class LlamaChat {
181
190
  const newPendingTokens = mostExhaustiveTriggeredStop.remainingGenerations
182
191
  .map((generation) => {
183
192
  if (typeof generation === "string")
184
- return model.tokenize(generation);
193
+ return model.tokenize(generation, false, "trimLeadingSpace");
185
194
  else
186
195
  return generation;
187
196
  })
@@ -192,8 +201,11 @@ export class LlamaChat {
192
201
  }
193
202
  }
194
203
  };
204
+ if (customStopTriggers != null)
205
+ StopGenerationDetector.resolveStopTriggers(customStopTriggers, model.tokenizer)
206
+ .map((stopTrigger) => customStopGenerationTriggersDetector.addStopTrigger(stopTrigger));
195
207
  if (grammar != null)
196
- StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenize)
208
+ StopGenerationDetector.resolveStopTriggers(grammar.stopGenerationTriggers, model.tokenizer)
197
209
  .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
198
210
  if (functions != null && Object.keys(functions).length > 0)
199
211
  functionSyntaxStartDetector.addStopTrigger([this._chatWrapper.settings.functions.call.prefix]);
@@ -206,7 +218,7 @@ export class LlamaChat {
206
218
  resolvedHistory: getResolvedHistoryWithCurrentModelResponse(),
207
219
  resolvedContextShift,
208
220
  lastHistoryCompressionMetadata,
209
- pendingTokensCount: pendingTokens.length + queuedChunkTokens.length,
221
+ pendingTokensCount: ignoredStartTextTokens.length + pendingTokens.length + queuedChunkTokens.length,
210
222
  isFirstEvaluation,
211
223
  chatWrapper: this._chatWrapper,
212
224
  lastEvaluationContextWindowHistory,
@@ -218,15 +230,15 @@ export class LlamaChat {
218
230
  });
219
231
  ensureNotAborted();
220
232
  if (generatedTokens === 0) {
221
- StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.tokenize)
233
+ StopGenerationDetector.resolveStopTriggers(ignoreStartText, model.tokenizer)
222
234
  .map((stopTrigger) => ignoreStartTextDetector.addStopTrigger(stopTrigger));
223
235
  if (functionsEnabled) {
224
236
  initiallyEngagedFunctionMode = functionCallInitiallyEngaged;
225
- StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.tokenize)
237
+ StopGenerationDetector.resolveStopTriggers(disengageInitiallyEngagedFunctionCall, model.tokenizer)
226
238
  .map((stopTrigger) => disengageInitiallyEngagedFunctionMode.addStopTrigger(stopTrigger));
227
239
  if (initiallyEngagedFunctionMode) {
228
240
  inFunctionEvaluationMode = true;
229
- functionsGrammar = new FunctionCallGrammar(functions, this._chatWrapper, true);
241
+ functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, true);
230
242
  functionsEvaluationState = new LlamaGrammarEvaluationState({
231
243
  grammar: functionsGrammar
232
244
  });
@@ -239,10 +251,10 @@ export class LlamaChat {
239
251
  lastContextWindowHistory = contextWindowHistory;
240
252
  const contextWindowLastModelResponse = getLastTextModelResponseFromChatHistory(contextWindowHistory);
241
253
  const contextWindowsRes = [];
242
- StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.tokenize)
254
+ StopGenerationDetector.resolveStopTriggers(stopGenerationTriggers, model.tokenizer)
243
255
  .map((stopTrigger) => stopGenerationDetector.addStopTrigger(stopTrigger));
244
256
  if (functionsGrammar != null)
245
- StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.tokenize)
257
+ StopGenerationDetector.resolveStopTriggers(functionsGrammar.stopGenerationTriggers, model.tokenizer)
246
258
  .map((stopTrigger) => functionSyntaxEndDetector.addStopTrigger(stopTrigger));
247
259
  let { firstDifferentIndex } = this._sequence.compareContextTokens(tokens);
248
260
  // we need to decode at least one token to generate a response
@@ -257,7 +269,7 @@ export class LlamaChat {
257
269
  ensureNotAborted();
258
270
  }
259
271
  const evaluationIterator = this._sequence.evaluate(tokens, removeNullFields({
260
- temperature, topK, topP,
272
+ temperature, minP, topK, topP,
261
273
  grammarEvaluationState: () => {
262
274
  if (inFunctionEvaluationMode)
263
275
  return functionsEvaluationState;
@@ -269,194 +281,279 @@ export class LlamaChat {
269
281
  frequencyPenalty,
270
282
  presencePenalty
271
283
  },
284
+ tokenBias,
272
285
  evaluationPriority,
273
- yieldEosToken: true
286
+ yieldEogToken: true
274
287
  }));
275
- for await (const token of evaluationIterator) {
276
- ensureNotAborted();
277
- generatedTokens++;
278
- const tokens = [token];
279
- const text = model.detokenize([token]);
280
- const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
281
- if (initiallyEngagedFunctionMode)
282
- disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
283
- if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
284
- locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
285
- }
286
- else {
287
- while (locksToReleaseOnValidGeneration.length > 0)
288
- locksToReleaseOnValidGeneration.shift().dispose();
289
- }
290
- functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
291
- if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
292
- initiallyEngagedFunctionMode = false;
293
- let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
294
- if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
295
- const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
296
- try {
297
- const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
298
- enableInternalBuiltinFunctions: true,
299
- initialFunctionCallEngaged: true
300
- });
301
- const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
302
- if (internalBuiltinFunctions[functionName] != null) {
303
- shouldStopFunctionEvaluationMode = true;
288
+ try {
289
+ let currentIteration = await evaluationIterator.next();
290
+ while (currentIteration.done !== true) {
291
+ const token = currentIteration.value;
292
+ let replacementToken = undefined;
293
+ ensureNotAborted();
294
+ generatedTokens++;
295
+ const tokens = [token];
296
+ const text = model.detokenize([token]);
297
+ const queuedTokenRelease = streamRegulator.addChunk({ tokens, text });
298
+ if (initiallyEngagedFunctionMode)
299
+ disengageInitiallyEngagedFunctionMode.recordGeneration({ text, tokens, startNewChecks: generatedTokens === 1 });
300
+ if (text === UNKNOWN_UNICODE_CHAR || ((grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) && text.trim() === "")) {
301
+ locksToReleaseOnValidGeneration.push(queuedTokenRelease.createTextIndexLock(0));
302
+ }
303
+ else {
304
+ while (locksToReleaseOnValidGeneration.length > 0)
305
+ locksToReleaseOnValidGeneration.shift().dispose();
306
+ }
307
+ functionSyntaxStartDetector.recordGeneration({ text, tokens, queuedTokenRelease });
308
+ if (initiallyEngagedFunctionMode && disengageInitiallyEngagedFunctionMode.hasTriggeredStops) {
309
+ initiallyEngagedFunctionMode = false;
310
+ let shouldStopFunctionEvaluationMode = !functionSyntaxStartDetector.hasTriggeredStops;
311
+ if (!shouldStopFunctionEvaluationMode && functionsEnabled && functionsGrammar != null) {
312
+ const functionCallText = model.detokenize([...functionCallTokens, ...tokens]);
313
+ try {
314
+ const functionName = functionsGrammar.parseFunctionNameFromPartialCall(functionCallText, {
315
+ enableInternalBuiltinFunctions: true,
316
+ initialFunctionCallEngaged: true
317
+ });
318
+ const internalBuiltinFunctions = this._chatWrapper.getInternalBuiltinFunctions({ initialFunctionCallEngaged: true });
319
+ if (internalBuiltinFunctions[functionName] != null) {
320
+ shouldStopFunctionEvaluationMode = true;
321
+ }
322
+ }
323
+ catch (err) {
324
+ if (!(err instanceof LlamaFunctionCallValidationError))
325
+ throw err;
304
326
  }
305
327
  }
306
- catch (err) {
307
- if (!(err instanceof LlamaFunctionCallValidationError))
308
- throw err;
328
+ if (shouldStopFunctionEvaluationMode) {
329
+ inFunctionEvaluationMode = false;
330
+ functionsGrammar = new FunctionCallGrammar(model._llama, functions, this._chatWrapper, false);
331
+ functionsEvaluationState = new LlamaGrammarEvaluationState({
332
+ grammar: functionsGrammar
333
+ });
334
+ functionCallTokens.length = 0;
335
+ while (functionCallTokenSyntaxLocks.length > 0)
336
+ functionCallTokenSyntaxLocks.shift().dispose();
337
+ functionSyntaxStartDetector.clearInProgressStops();
338
+ functionSyntaxStartDetector.clearTriggeredStops();
339
+ functionSyntaxEndDetector.clearInProgressStops();
340
+ functionSyntaxEndDetector.clearTriggeredStops();
309
341
  }
310
342
  }
311
- if (shouldStopFunctionEvaluationMode) {
312
- inFunctionEvaluationMode = false;
313
- functionsGrammar = new FunctionCallGrammar(functions, this._chatWrapper, false);
314
- functionsEvaluationState = new LlamaGrammarEvaluationState({
315
- grammar: functionsGrammar
316
- });
317
- functionCallTokens.length = 0;
318
- while (functionCallTokenSyntaxLocks.length > 0)
319
- functionCallTokenSyntaxLocks.shift().dispose();
320
- functionSyntaxStartDetector.clearInProgressStops();
321
- functionSyntaxStartDetector.clearTriggeredStops();
322
- functionSyntaxEndDetector.clearInProgressStops();
323
- functionSyntaxEndDetector.clearTriggeredStops();
343
+ if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
344
+ functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
345
+ inFunctionEvaluationMode = true;
346
+ functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
347
+ stopGenerationDetector.clearTriggeredStops();
348
+ stopGenerationDetector.clearInProgressStops();
349
+ customStopGenerationTriggersDetector.clearTriggeredStops();
350
+ customStopGenerationTriggersDetector.clearInProgressStops();
351
+ pendingTokens.push(...streamRegulator.popFreeChunkTokens());
352
+ const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
353
+ const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk(model.tokenizer);
354
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
355
+ pendingTokens.push(...queuedTokensBeforeStopTrigger);
356
+ const [firstRemainingGenerationAfterStop] = triggeredStops
357
+ .map((stopTrigger) => stopTrigger.remainingGenerations)
358
+ .filter((remainingGenerations) => remainingGenerations.length > 0)
359
+ .flat(1);
360
+ const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
361
+ ? ""
362
+ : typeof firstRemainingGenerationAfterStop === "string"
363
+ ? firstRemainingGenerationAfterStop
364
+ : model.detokenize(firstRemainingGenerationAfterStop);
365
+ functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix, false, "trimLeadingSpace"));
366
+ for (const functionCallToken of functionCallTokens)
367
+ context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
368
+ // these tokens have to be verified that they match the function calling syntax grammar before they can be accepted,
369
+ // or the context state should be modified to not include the incompatible tokens
370
+ const remainingTextTokens = model.tokenize(remainingTextAfterStop, false, "trimLeadingSpace");
371
+ let unfitTokens = [];
372
+ for (let i = 0; i < remainingTextTokens.length; i++) {
373
+ const remainingToken = remainingTextTokens[i];
374
+ const canBeNextToken = context._canBeNextTokenForGrammarEvaluationState(functionsEvaluationState, remainingToken);
375
+ if (!canBeNextToken) {
376
+ unfitTokens = remainingTextTokens.slice(i);
377
+ break;
378
+ }
379
+ context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, remainingToken);
380
+ functionCallTokens.push(remainingToken);
381
+ }
382
+ if (unfitTokens.length > 0) {
383
+ const unfitTokensText = model.detokenize(unfitTokens); // the current token text must end with it
384
+ const currentTokenText = queuedTokenRelease.text;
385
+ let replacementTokens;
386
+ if (!currentTokenText.endsWith(unfitTokensText)) {
387
+ console.warn(getConsoleLogPrefix() + "The current token text does not end with the unfit function call syntax tokens text");
388
+ replacementTokens = remainingTextTokens.slice(0, -unfitTokens.length);
389
+ }
390
+ else {
391
+ const newCurrentTokensText = currentTokenText.slice(0, -unfitTokensText.length);
392
+ replacementTokens = model.tokenize(newCurrentTokensText, false, "trimLeadingSpace");
393
+ }
394
+ if (replacementTokens.length > 0) {
395
+ replacementToken = replacementTokens[0];
396
+ queuedTokenRelease.modifyTokensAndText(replacementTokens, model.detokenize([replacementToken]));
397
+ }
398
+ }
324
399
  }
325
- }
326
- if (!inFunctionEvaluationMode && functionsEnabled && functionsGrammar != null &&
327
- functionSyntaxStartDetector.hasTriggeredStops && functionsEvaluationState != null) {
328
- inFunctionEvaluationMode = true;
329
- functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
330
- stopGenerationDetector.clearTriggeredStops();
331
- stopGenerationDetector.clearInProgressStops();
332
- pendingTokens.push(...streamRegulator.popFreeChunkTokens());
333
- const triggeredStops = functionSyntaxStartDetector.getTriggeredStops();
334
- const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
335
- const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
336
- pendingTokens.push(...queuedTokensBeforeStopTrigger);
337
- const [firstRemainingGenerationAfterStop] = triggeredStops
338
- .map((stopTrigger) => stopTrigger.remainingGenerations)
339
- .filter((remainingGenerations) => remainingGenerations.length > 0)
340
- .flat(1);
341
- const remainingTextAfterStop = (firstRemainingGenerationAfterStop == null || firstRemainingGenerationAfterStop.length === 0)
342
- ? ""
343
- : typeof firstRemainingGenerationAfterStop === "string"
344
- ? firstRemainingGenerationAfterStop
345
- : model.detokenize(firstRemainingGenerationAfterStop);
346
- functionCallTokens.push(...model.tokenize(this._chatWrapper.settings.functions.call.prefix + remainingTextAfterStop));
347
- for (const functionCallToken of functionCallTokens)
348
- context._acceptTokenOnGrammarEvaluationState(functionsEvaluationState, functionCallToken);
349
- }
350
- else if (inFunctionEvaluationMode) {
351
- functionCallTokens.push(...tokens);
352
- functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
353
- functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
354
- }
355
- if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
356
- const functionCallText = model.detokenize(functionCallTokens);
357
- const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
358
- let modelResponse = model.detokenize(res);
359
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
360
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
361
- modelResponse = modelResponse.trimEnd();
362
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
400
+ else if (inFunctionEvaluationMode) {
401
+ functionCallTokens.push(...tokens);
402
+ functionCallTokenSyntaxLocks.push(queuedTokenRelease.createTextIndexLock(0));
403
+ functionSyntaxEndDetector.recordGeneration({ text, tokens, queuedTokenRelease });
363
404
  }
364
- return {
365
- response: modelResponse,
366
- lastEvaluation: {
367
- contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
368
- cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
369
- contextShiftMetadata: lastHistoryCompressionMetadata
370
- },
371
- // prevent infinite TS type instantiation
372
- functionCall: functionCall,
373
- metadata: {
374
- stopReason: "functionCall"
405
+ if (inFunctionEvaluationMode && functionSyntaxEndDetector.hasTriggeredStops && functionsGrammar != null) {
406
+ const functionCallText = model.detokenize(functionCallTokens);
407
+ const functionCall = functionsGrammar.parseFunctionCall(functionCallText);
408
+ let modelResponse = model.detokenize(res);
409
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
410
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
411
+ modelResponse = modelResponse.trimEnd();
412
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
375
413
  }
376
- };
377
- }
378
- if (!inFunctionEvaluationMode)
379
- stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
380
- pendingTokens.push(...streamRegulator.popFreeChunkTokens());
381
- removeFoundStartIgnoreTextsFromPendingTokens();
382
- if (stopGenerationDetector.hasTriggeredStops || token === eosToken) {
383
- const triggeredStops = stopGenerationDetector.getTriggeredStops();
384
- const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk();
385
- const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenize);
386
- pendingTokens.push(...queuedTokensBeforeStopTrigger);
387
- const [firstRemainingGenerationAfterStop] = triggeredStops
388
- .map((stopTrigger) => stopTrigger.remainingGenerations)
389
- .filter((remainingGenerations) => remainingGenerations.length > 0)
390
- .flat(1);
391
- removeFoundStartIgnoreTextsFromPendingTokens();
392
- if (pendingTokens.length > 0)
393
- onToken?.(pendingTokens.slice());
394
- res.push(...pendingTokens);
395
- contextWindowsRes.push(...pendingTokens);
396
- pendingTokens.length = 0;
397
- let modelResponse = model.detokenize(res);
398
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
399
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
400
- modelResponse = modelResponse.trimEnd();
401
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
414
+ return {
415
+ response: modelResponse,
416
+ lastEvaluation: {
417
+ contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
418
+ cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
419
+ contextShiftMetadata: lastHistoryCompressionMetadata
420
+ },
421
+ // prevent infinite TS type instantiation
422
+ functionCall: functionCall,
423
+ metadata: {
424
+ stopReason: "functionCall"
425
+ }
426
+ };
402
427
  }
403
- return {
404
- response: modelResponse,
405
- lastEvaluation: {
406
- contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
407
- cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
408
- contextShiftMetadata: lastHistoryCompressionMetadata
409
- },
410
- metadata: {
411
- remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
412
- stopReason: token === eosToken
413
- ? "eosToken"
414
- : "stopGenerationTrigger"
415
- }
416
- };
417
- }
418
- const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
419
- if (res.length === 0) {
420
- ignoreStartTextDetector.clearInProgressStops();
421
- ignoreStartTextDetector.clearTriggeredStops();
422
- ignoreStartTextDetector.recordGeneration({
423
- text: model.detokenize(pendingTokens),
424
- tokens: pendingTokens
425
- });
426
- }
427
- if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
428
+ if (!inFunctionEvaluationMode) {
429
+ stopGenerationDetector.recordGeneration({ text, tokens, queuedTokenRelease });
430
+ customStopGenerationTriggersDetector.recordGeneration({ text, tokens, queuedTokenRelease });
431
+ }
432
+ pendingTokens.push(...streamRegulator.popFreeChunkTokens());
428
433
  removeFoundStartIgnoreTextsFromPendingTokens();
429
- if (pendingTokens.length > 0) {
430
- onToken?.(pendingTokens.slice());
434
+ if (stopGenerationDetector.hasTriggeredStops || customStopGenerationTriggersDetector.hasTriggeredStops ||
435
+ model.isEogToken(token)) {
436
+ stopGenerationDetector.clearInProgressStops();
437
+ customStopGenerationTriggersDetector.clearInProgressStops();
438
+ pendingTokens.push(...streamRegulator.popFreeChunkTokens());
439
+ const triggeredStops = stopGenerationDetector.hasTriggeredStops
440
+ ? stopGenerationDetector.getTriggeredStops()
441
+ : customStopGenerationTriggersDetector.getTriggeredStops();
442
+ const partiallyFreeTokens = streamRegulator.getPartiallyFreeChunk(model.tokenizer);
443
+ const queuedTokensBeforeStopTrigger = getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, model.tokenizer);
444
+ pendingTokens.push(...queuedTokensBeforeStopTrigger);
445
+ const [firstRemainingGenerationAfterStop] = triggeredStops
446
+ .map((stopTrigger) => stopTrigger.remainingGenerations)
447
+ .filter((remainingGenerations) => remainingGenerations.length > 0)
448
+ .flat(1);
449
+ removeFoundStartIgnoreTextsFromPendingTokens();
450
+ if (pendingTokens.length > 0)
451
+ onToken?.(pendingTokens.slice());
431
452
  res.push(...pendingTokens);
432
453
  contextWindowsRes.push(...pendingTokens);
433
454
  pendingTokens.length = 0;
434
- }
435
- }
436
- if (maxTokensTriggered) {
437
- let modelResponse = model.detokenize(res);
438
- let contextWindowModelResponse = model.detokenize(contextWindowsRes);
439
- if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
440
- modelResponse = modelResponse.trimEnd();
441
- contextWindowModelResponse = contextWindowModelResponse.trimEnd();
442
- }
443
- return {
444
- response: modelResponse,
445
- lastEvaluation: {
455
+ let modelResponse = model.detokenize(res);
456
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
457
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
458
+ modelResponse = modelResponse.trimEnd();
459
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
460
+ }
461
+ const lastEvaluation = {
446
462
  contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
447
463
  cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
448
464
  contextShiftMetadata: lastHistoryCompressionMetadata
449
- },
450
- metadata: {
451
- stopReason: "maxTokens"
465
+ };
466
+ const isEogToken = model.isEogToken(token);
467
+ if (isEogToken || stopGenerationDetector.hasTriggeredStops) {
468
+ return {
469
+ response: modelResponse,
470
+ lastEvaluation,
471
+ metadata: {
472
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
473
+ stopReason: isEogToken
474
+ ? "eogToken"
475
+ : "stopGenerationTrigger"
476
+ }
477
+ };
452
478
  }
453
- };
454
- }
455
- if (this._sequence.nextTokenIndex >= context.contextSize) {
456
- shouldContextShift = true;
457
- break;
479
+ return {
480
+ response: modelResponse,
481
+ lastEvaluation,
482
+ metadata: {
483
+ remainingGenerationAfterStop: firstRemainingGenerationAfterStop,
484
+ stopReason: "customStopTrigger",
485
+ customStopTrigger: triggeredStops[0].stopTrigger
486
+ }
487
+ };
488
+ }
489
+ const maxTokensTriggered = maxTokens != null && maxTokens > 0 && generatedTokens >= maxTokens;
490
+ if (res.length === 0) {
491
+ ignoreStartTextDetector.clearInProgressStops();
492
+ ignoreStartTextDetector.clearTriggeredStops();
493
+ ignoreStartTextDetector.recordGeneration({
494
+ text: model.detokenize(pendingTokens),
495
+ tokens: pendingTokens
496
+ });
497
+ }
498
+ if (pendingTokens.length > 0 && (maxTokensTriggered || !ignoreStartTextDetector.hasInProgressStops)) {
499
+ removeFoundStartIgnoreTextsFromPendingTokens();
500
+ if (pendingTokens.length > 0) {
501
+ onToken?.(pendingTokens.slice());
502
+ res.push(...pendingTokens);
503
+ contextWindowsRes.push(...pendingTokens);
504
+ pendingTokens.length = 0;
505
+ }
506
+ }
507
+ if (maxTokensTriggered) {
508
+ let modelResponse = model.detokenize(res);
509
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
510
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
511
+ modelResponse = modelResponse.trimEnd();
512
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
513
+ }
514
+ return {
515
+ response: modelResponse,
516
+ lastEvaluation: {
517
+ contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
518
+ cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
519
+ contextShiftMetadata: lastHistoryCompressionMetadata
520
+ },
521
+ metadata: {
522
+ stopReason: "maxTokens"
523
+ }
524
+ };
525
+ }
526
+ if (this._sequence.nextTokenIndex >= context.contextSize - 1) {
527
+ shouldContextShift = true;
528
+ break;
529
+ }
530
+ if (signal?.aborted && stopOnAbortSignal) {
531
+ if (res.length === 0)
532
+ throw signal.reason;
533
+ let modelResponse = model.detokenize(res);
534
+ let contextWindowModelResponse = model.detokenize(contextWindowsRes);
535
+ if (grammar?.trimWhitespaceSuffix || trimWhitespaceSuffix) {
536
+ modelResponse = modelResponse.trimEnd();
537
+ contextWindowModelResponse = contextWindowModelResponse.trimEnd();
538
+ }
539
+ return {
540
+ response: modelResponse,
541
+ lastEvaluation: {
542
+ contextWindow: setLastModelTextResponseInChatHistory(lastContextWindowHistory, contextWindowLastModelResponse + contextWindowModelResponse),
543
+ cleanHistory: setLastModelTextResponseInChatHistory(resolvedHistory, lastModelResponse + modelResponse),
544
+ contextShiftMetadata: lastHistoryCompressionMetadata
545
+ },
546
+ metadata: {
547
+ stopReason: "abort"
548
+ }
549
+ };
550
+ }
551
+ currentIteration = await evaluationIterator.next(replacementToken);
458
552
  }
459
553
  }
554
+ finally {
555
+ await evaluationIterator.return();
556
+ }
460
557
  isFirstEvaluation = false;
461
558
  if (shouldContextShift)
462
559
  continue;
@@ -530,33 +627,13 @@ async function compressHistoryToFitContextSize({ history, contextShiftSize, cont
530
627
  lastShiftMetadata: contextShiftLastEvaluationMetadata
531
628
  });
532
629
  if (!checkIfHistoryFitsContext(chatHistory))
533
- throw new Error("The default context shift strategy did not return a history that fits the context size");
630
+ throw new Error("The default context shift strategy did not return a history that fits the context size. " +
631
+ "This may happen due to the system prompt being too long");
534
632
  return {
535
633
  compressedHistory: chatHistory,
536
634
  metadata
537
635
  };
538
636
  }
539
- function getQueuedTokensBeforeStopTrigger(triggeredStops, partiallyFreeTokens, tokenizer) {
540
- if (partiallyFreeTokens.tokens.length === 0 && partiallyFreeTokens.text.length === 0)
541
- return [];
542
- else if (partiallyFreeTokens.tokens.length !== 0 && partiallyFreeTokens.text.length === 0)
543
- return partiallyFreeTokens.tokens;
544
- else if (partiallyFreeTokens.tokens.length === 0 && partiallyFreeTokens.text.length !== 0)
545
- return tokenizer(partiallyFreeTokens.text);
546
- const triggerThatStartsWithStringIndex = triggeredStops.findIndex((trigger) => trigger.stopTrigger.length > 0 && typeof trigger.stopTrigger[0] === "string");
547
- const triggerThatStartsWithTokenIndex = triggeredStops.findIndex((trigger) => trigger.stopTrigger.length > 0 && typeof trigger.stopTrigger[0] !== "string");
548
- if (triggerThatStartsWithTokenIndex > 0 && triggerThatStartsWithStringIndex < 0)
549
- return partiallyFreeTokens.tokens;
550
- else if (triggerThatStartsWithStringIndex > 0 && triggerThatStartsWithTokenIndex < 0)
551
- return tokenizer(partiallyFreeTokens.text);
552
- const stringTokens = tokenizer(partiallyFreeTokens.text);
553
- if (stringTokens.length === partiallyFreeTokens.tokens.length &&
554
- stringTokens.every((value, index) => value === partiallyFreeTokens.tokens[index]))
555
- return stringTokens;
556
- else if (triggerThatStartsWithStringIndex < triggerThatStartsWithTokenIndex)
557
- return stringTokens;
558
- return partiallyFreeTokens.tokens;
559
- }
560
637
  function getLastTextModelResponseFromChatHistory(chatHistory) {
561
638
  if (chatHistory.length === 0 || chatHistory[chatHistory.length - 1].type !== "model")
562
639
  return "";
@@ -604,7 +681,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
604
681
  availableFunctions: functions,
605
682
  documentFunctionParams
606
683
  });
607
- const tokens = contextText.tokenize(model.tokenize);
684
+ const tokens = contextText.tokenize(model.tokenizer);
608
685
  if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize) {
609
686
  const { firstDifferentIndex } = sequence.compareContextTokens(tokens);
610
687
  const existingEvaluationPercentage = firstDifferentIndex / tokens.length;
@@ -630,11 +707,11 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
630
707
  : resolvedContextShift.size;
631
708
  const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
632
709
  history: resolvedHistory,
633
- contextShiftSize: Math.max(contextShiftSize, minFreeContextTokens) + pendingTokensCount,
710
+ contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
634
711
  contextShiftStrategy: resolvedContextShift.strategy,
635
712
  contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
636
713
  contextSize: context.contextSize,
637
- tokenizer: model.tokenize,
714
+ tokenizer: model.tokenizer,
638
715
  chatWrapper: chatWrapper,
639
716
  functions,
640
717
  documentFunctionParams
@@ -646,7 +723,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
646
723
  return {
647
724
  history: compressedHistory,
648
725
  stopGenerationTriggers,
649
- tokens: contextText.tokenize(model.tokenize),
726
+ tokens: contextText.tokenize(model.tokenizer),
650
727
  newResolvedHistory: resolvedHistory,
651
728
  newHistoryCompressionMetadata: metadata,
652
729
  ignoreStartText: ignoreStartText ?? [],
@@ -659,7 +736,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
659
736
  availableFunctions: functions,
660
737
  documentFunctionParams
661
738
  });
662
- const tokens = contextText.tokenize(model.tokenize);
739
+ const tokens = contextText.tokenize(model.tokenizer);
663
740
  if (tokens.length + pendingTokensCount + minFreeContextTokens < context.contextSize)
664
741
  return {
665
742
  history: resolvedHistory,
@@ -672,16 +749,16 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
672
749
  disengageInitiallyEngagedFunctionCall: functionCall?.disengageInitiallyEngaged ?? []
673
750
  };
674
751
  }
675
- const contextShiftSize = resolvedContextShift.size instanceof Function
752
+ const contextShiftSize = Math.min(context.contextSize, Math.max(1, Math.floor(resolvedContextShift.size instanceof Function
676
753
  ? await resolvedContextShift.size(sequence)
677
- : resolvedContextShift.size;
754
+ : resolvedContextShift.size)));
678
755
  const { compressedHistory, metadata } = await compressHistoryToFitContextSize({
679
756
  history: resolvedHistory,
680
- contextShiftSize: Math.max(contextShiftSize, minFreeContextTokens) + pendingTokensCount,
757
+ contextShiftSize: Math.max(minFreeContextTokens, Math.min(contextShiftSize, context.contextSize - pendingTokensCount)) + pendingTokensCount,
681
758
  contextShiftStrategy: resolvedContextShift.strategy,
682
759
  contextShiftLastEvaluationMetadata: resolvedContextShift.lastEvaluationMetadata,
683
760
  contextSize: context.contextSize,
684
- tokenizer: model.tokenize,
761
+ tokenizer: model.tokenizer,
685
762
  chatWrapper: chatWrapper,
686
763
  functions,
687
764
  documentFunctionParams
@@ -693,7 +770,7 @@ async function getContextWindow({ resolvedHistory, resolvedContextShift, lastHis
693
770
  return {
694
771
  history: compressedHistory,
695
772
  stopGenerationTriggers,
696
- tokens: contextText.tokenize(model.tokenize),
773
+ tokens: contextText.tokenize(model.tokenizer),
697
774
  newResolvedHistory: resolvedHistory,
698
775
  newHistoryCompressionMetadata: metadata,
699
776
  ignoreStartText: ignoreStartText ?? [],