node-llama-cpp 3.15.1 → 3.16.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. package/dist/bindings/AddonTypes.d.ts +8 -1
  2. package/dist/bindings/getLlama.d.ts +1 -1
  3. package/dist/bindings/getLlama.js +1 -1
  4. package/dist/bindings/getLlama.js.map +1 -1
  5. package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js +67 -8
  6. package/dist/chatWrappers/generic/utils/extractFunctionCallSettingsFromJinjaTemplate.js.map +1 -1
  7. package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js +2 -1
  8. package/dist/chatWrappers/generic/utils/extractSegmentSettingsFromTokenizerAndChatTemplate.js.map +1 -1
  9. package/dist/cli/commands/ChatCommand.d.ts +6 -0
  10. package/dist/cli/commands/ChatCommand.js +66 -3
  11. package/dist/cli/commands/ChatCommand.js.map +1 -1
  12. package/dist/cli/commands/CompleteCommand.d.ts +6 -0
  13. package/dist/cli/commands/CompleteCommand.js +66 -4
  14. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  15. package/dist/cli/commands/InfillCommand.d.ts +6 -0
  16. package/dist/cli/commands/InfillCommand.js +66 -4
  17. package/dist/cli/commands/InfillCommand.js.map +1 -1
  18. package/dist/cli/utils/parseXtcArg.d.ts +5 -0
  19. package/dist/cli/utils/parseXtcArg.js +16 -0
  20. package/dist/cli/utils/parseXtcArg.js.map +1 -0
  21. package/dist/evaluator/LlamaChat/LlamaChat.d.ts +36 -1
  22. package/dist/evaluator/LlamaChat/LlamaChat.js +29 -10
  23. package/dist/evaluator/LlamaChat/LlamaChat.js.map +1 -1
  24. package/dist/evaluator/LlamaChatSession/LlamaChatSession.d.ts +83 -2
  25. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js +11 -5
  26. package/dist/evaluator/LlamaChatSession/LlamaChatSession.js.map +1 -1
  27. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.d.ts +2 -0
  28. package/dist/evaluator/LlamaChatSession/utils/LlamaChatSessionPromptCompletionEngine.js.map +1 -1
  29. package/dist/evaluator/LlamaCompletion.d.ts +36 -3
  30. package/dist/evaluator/LlamaCompletion.js +7 -4
  31. package/dist/evaluator/LlamaCompletion.js.map +1 -1
  32. package/dist/evaluator/LlamaContext/LlamaContext.js +67 -35
  33. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  34. package/dist/evaluator/LlamaContext/LlamaSampler.js +8 -0
  35. package/dist/evaluator/LlamaContext/LlamaSampler.js.map +1 -1
  36. package/dist/evaluator/LlamaContext/tokenPredictors/DraftSequenceTokenPredictor.d.ts +1 -1
  37. package/dist/evaluator/LlamaContext/types.d.ts +113 -0
  38. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +2 -2
  39. package/dist/evaluator/LlamaModel/LlamaModel.js +1 -1
  40. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
  41. package/dist/gguf/insights/GgufInsights.js +4 -0
  42. package/dist/gguf/insights/GgufInsights.js.map +1 -1
  43. package/dist/gguf/types/GgufMetadataTypes.d.ts +5 -0
  44. package/dist/gguf/types/GgufMetadataTypes.js +5 -0
  45. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
  46. package/dist/tsconfig.tsbuildinfo +1 -1
  47. package/dist/types.d.ts +51 -0
  48. package/dist/types.js.map +1 -1
  49. package/dist/utils/cmake.js +6 -3
  50. package/dist/utils/cmake.js.map +1 -1
  51. package/llama/addon/AddonContext.cpp +19 -5
  52. package/llama/addon/AddonContext.h +1 -1
  53. package/llama/addon/AddonSampler.cpp +158 -0
  54. package/llama/addon/AddonSampler.h +13 -1
  55. package/llama/addon/globals/getGpuInfo.cpp +1 -1
  56. package/llama/binariesGithubRelease.json +1 -1
  57. package/llama/gitRelease.bundle +0 -0
  58. package/llama/gpuInfo/vulkan-gpu-info.cpp +12 -5
  59. package/llama/llama.cpp.info.json +1 -1
  60. package/package.json +63 -62
  61. package/templates/packed/electron-typescript-react.json +1 -1
  62. package/templates/packed/node-typescript.json +1 -1
package/dist/types.d.ts CHANGED
@@ -72,6 +72,11 @@ export type ChatWrapperSettings = {
72
72
  readonly parallelism?: {
73
73
  readonly call: {
74
74
  readonly sectionPrefix: string | LlamaText;
75
+ /**
76
+ * Alternate section prefixes that can be used to detect a function call section,
77
+ * but won't be used to construct the context when building it from scratch.
78
+ */
79
+ readonly sectionPrefixAlternateMatches?: Array<string | LlamaText>;
75
80
  readonly betweenCalls?: string | LlamaText;
76
81
  readonly sectionSuffix?: string | LlamaText;
77
82
  };
@@ -354,3 +359,49 @@ export type LLamaContextualRepeatPenalty = {
354
359
  */
355
360
  presencePenalty?: number;
356
361
  };
362
+ export type LLamaContextualDryRepeatPenalty = {
363
+ /**
364
+ * A number between `0` and `1` representing the strength of the DRY (Don't Repeat Yourself) effect.
365
+ *
366
+ * Setting this to `0` will disable the DRY penalty completely.
367
+ *
368
+ * The recommended value is `0.8`.
369
+ */
370
+ strength: number;
371
+ /**
372
+ * The base value for the exponential penality calculation.
373
+ *
374
+ * A higher value will lead to more aggressive penalization of repetitions.
375
+ *
376
+ * Defaults to `1.75`.
377
+ */
378
+ base?: number;
379
+ /**
380
+ * The maximum sequence length (in tokens) that will be allowed to be repeated without being penalized.
381
+ *
382
+ * Repetitions shorter than or equal to this length will not be penalized,
383
+ * allowing for natural repetitions of short phrases and common words.
384
+ *
385
+ * Defaults to `2`.
386
+ */
387
+ allowedLength?: number;
388
+ /**
389
+ * Number of recent tokens generated by the model to consider for sequence repetition matching.
390
+ *
391
+ * When set to `null`, the entire context sequence history will be considered for repetition matching.
392
+ * Setting to `0` will disable DRY (Don't Repeat Yourself) penalty.
393
+ *
394
+ * Defaults to `null`.
395
+ */
396
+ lastTokens?: number | null;
397
+ /**
398
+ * Text sequences that will be considered as breakers for the repeated sequences.
399
+ * These will never be penalized for being repeated, and are used to mark the boundaries of the repeated sequences.
400
+ *
401
+ * For example, setting this to `["\n", "*"]` will allow the model to make as many lists as it wants,
402
+ * without being penalized for repeating the list item marker (like `*`).
403
+ *
404
+ * Defaults to `["\n", ":", '"', "*"]`.
405
+ */
406
+ sequenceBreakers?: string[];
407
+ };
package/dist/types.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AAoVA,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,SAAS,EAAE,SAAS,CAAoD,CAAC;AACzG,KAAM,IAAsF,CAAC;AA8B7F,MAAM,UAAU,+BAA+B,CAAC,IAAuD;IACnG,IAAI,IAAI,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QACxC,OAAO,KAAK,CAAC;IAEjB,OAAO,IAAI,CAAC,IAAI,KAAK,cAAc,CAAC;AACxC,CAAC;AAED,MAAM,UAAU,0BAA0B,CAAC,IAAuD;IAC9F,IAAI,IAAI,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QACxC,OAAO,KAAK,CAAC;IAEjB,OAAO,IAAI,CAAC,IAAI,KAAK,SAAS,CAAC;AACnC,CAAC"}
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../src/types.ts"],"names":[],"mappings":"AA2VA,MAAM,CAAC,MAAM,eAAe,GAAG,CAAC,SAAS,EAAE,SAAS,CAAoD,CAAC;AACzG,KAAM,IAAsF,CAAC;AA8B7F,MAAM,UAAU,+BAA+B,CAAC,IAAuD;IACnG,IAAI,IAAI,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QACxC,OAAO,KAAK,CAAC;IAEjB,OAAO,IAAI,CAAC,IAAI,KAAK,cAAc,CAAC;AACxC,CAAC;AAED,MAAM,UAAU,0BAA0B,CAAC,IAAuD;IAC9F,IAAI,IAAI,IAAI,IAAI,IAAI,OAAO,IAAI,KAAK,QAAQ;QACxC,OAAO,KAAK,CAAC;IAEjB,OAAO,IAAI,CAAC,IAAI,KAAK,SAAS,CAAC;AACnC,CAAC"}
@@ -35,9 +35,12 @@ export async function getCmakePath() {
35
35
  }
36
36
  catch (err) { }
37
37
  try {
38
- let resolvedPath = await which("cmake", {
39
- path: path.join(llamaDirectory, "xpack", "xpacks", ".bin")
40
- });
38
+ let resolvedPath = (await which("cmake", {
39
+ path: path.join(llamaDirectory, "xpack", "xpacks", ".bin"),
40
+ nothrow: true
41
+ })) || (await which("cmake", {
42
+ path: path.join(llamaDirectory, "xpack", "xpacks", "@xpack-dev-tools", "cmake", ".content", "bin")
43
+ }));
41
44
  if (resolvedPath.toLowerCase().endsWith(".cmd"))
42
45
  resolvedPath = (await getBinFromWindowCmd(resolvedPath, "cmake.exe")) ?? "";
43
46
  else if (resolvedPath.toLowerCase().endsWith(".ps1")) {
@@ -1 +1 @@
1
- {"version":3,"file":"cmake.js","sourceRoot":"","sources":["../../src/utils/cmake.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,MAAM,EAAC,MAAM,SAAS,CAAC;AAC/B,OAAO,EACH,2BAA2B,EAAE,2BAA2B,EAAE,cAAc,EAAE,yBAAyB,EAAE,yBAAyB,EAC9H,cAAc,EAAE,UAAU,EAC7B,MAAM,cAAc,CAAC;AACtB,OAAO,EAAC,2BAA2B,EAAC,MAAM,kDAAkD,CAAC;AAC7F,OAAO,EAAC,WAAW,EAAC,MAAM,kCAAkC,CAAC;AAC7D,OAAO,EAAC,kCAAkC,EAAC,MAAM,uCAAuC,CAAC;AACzF,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAC/C,OAAO,cAAc,MAAM,qBAAqB,CAAC;AACjD,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAG/C,MAAM,CAAC,KAAK,UAAU,eAAe;IACjC,IAAI,CAAC;QACD,MAAM,YAAY,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,CAAC;QAC1C,OAAO,YAAY,KAAK,EAAE,CAAC;IAC/B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACX,OAAO,KAAK,CAAC;IACjB,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY;IAC9B,IAAI,CAAC;QACD,MAAM,YAAY,GAAG,MAAM,KAAK,CAAC,OAAO,EAAE;YACtC,OAAO,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,IAAI,YAAY,KAAK,EAAE,IAAI,YAAY,IAAI,IAAI;YAC3C,OAAO,YAAY,CAAC;IAC5B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC,CAAA,CAAC;IAEhB,IAAI,CAAC;QACD,MAAM,aAAa,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAChD,IAAI,aAAa,IAAI,IAAI;YACrB,OAAO,aAAa,CAAC;IAC7B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC,CAAA,CAAC;IAEhB,IAAI,CAAC;QACD,IAAI,YAAY,GAAG,MAAM,KAAK,CAAC,OAAO,EAAE;YACpC,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,CAAC;SAC7D,CAAC,CAAC;QAEH,IAAI,YAAY,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;YAC3C,YAAY,GAAG,CAAC,MAAM,mBAAmB,CAAC,YAAY,EAAE,WAAW,CAAC,CAAC,IAAI,EAAE,CAAC;aAC3E,IAAI,YAAY,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACnD,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;YAEnE,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC;gBAChC,YAAY,GAAG,CAAC,MAAM,mBAAmB,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC,IAAI,EAAE,CAAC;QACnF,CAAC;QAED,IAAI,YAAY,KAAK,EAAE;YACnB,OAAO,YAAY,CAAC;IAC5B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC,CAAA,CAAC;IAEhB,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,qBAA8B,KAAK;IAC3E,IAAI,CAAC;QACD,MAAM,YAAY,EAAE,CAAC;QACrB,OAAO;IACX,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC,CAAA,CAAC;IAEhB,IAAI,CAAC,kBAAkB;QACnB,MAAM,aAAa,CAAC,EAAC,YAAY,EAAE,kBAAkB,EAAC,CAAC,CAAC;SACvD,CAAC;QACF,IAAI,CAAC;YACD,MAAM,cAAc,CAAC;gBACjB,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC;gBACxC,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,kBAAkB,CAAC;gBACvC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC;aAC/C,EAAE,KAAK,IAAI,EAAE;gBACV,MAAM,aAAa,CAAC,EAAC,YAAY,EAAE,kBAAkB,EAAC,CAAC,CAAC;YAC5D,CAAC,CAAC,CAAC;QACP,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,MAAM,2BAA2B,CAAC,sBAAsB,EAAE;gBACtD,aAAa,EAAE,EAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,EAAC;gBAC/C,aAAa,EAAE,EAAC,IAAI,EAAE,CAAC,OAAO,CAAC,EAAC;aACnC,CAAC,CAAC;YACH,MAAM,GAAG,CAAC;QACd,CAAC;IACL,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe;IACjC,MAAM,EAAE,CAAC,MAAM,CAAC,yBAAyB,CAAC,CAAC;IAC3C,MAAM,EAAE,CAAC,MAAM,CAAC,yBAAyB,CAAC,CAAC;IAC3C,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC,CAAC;AACzD,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB;IACrC,IAAI,CAAC;QACD,MAAM,MAAM,CAAC,yBAAyB,EAAE,KAAK,CAAC,CAAC;QAC/C,MAAM,MAAM,CAAC,yBAAyB,EAAE,KAAK,CAAC,CAAC;QAC/C,MAAM,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,QAAQ,CAAC,EAAE,KAAK,CAAC,CAAC;IAC7D,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC,CAAA,CAAC;AACpB,CAAC;AAED,KAAK,UAAU,iBAAiB;IAC5B,MAAM,QAAQ,GAAG,WAAW,EAAE,CAAC;IAE/B,IAAI,QAAQ,KAAK,KAAK,EAAE,CAAC;QACrB,MAAM,EAAC,cAAc,EAAC,GAAG,MAAM,kCAAkC,EAAE,CAAC;QAEpE,MAAM,mBAAmB,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC,CAC5D,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,SAAS,EAAE,KAAK,EAAE,kBAAkB,EAAE,WAAW,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,CAAC,CAClH,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,CAAC,MAAM,OAAO,CAAC,GAAG,CACjC,mBAAmB,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE;YACxC,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC;gBAC9B,OAAO,SAAS,CAAC;YAErB,OAAO,IAAI,CAAC;QAChB,CAAC,CAAC,CACL,CAAC;aACG,MAAM,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,IAAI,IAAI,CAAC,CAAC;QAE9C,OAAO,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IAED,OAAO,SAAS,CAAC;AACrB,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,EAAC,YAAY,GAAG,IAAI,KAA8B,EAAE;IAC7E,MAAM,YAAY,CAAC;QACf,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,cAAc,CAAC;KAC1D,EAAE,KAAK,IAAI,EAAE;QACV,MAAM,MAAM,GAAsB;YAC9B,GAAG,OAAO,CAAC,GAAG;YACd,mBAAmB,EAAE,2BAA2B;YAChD,mBAAmB,EAAE,2BAA2B;SACnD,CAAC;QAEF,MAAM,YAAY,CAAC,KAAK,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,UAAU,EAAE,EAAE,SAAS,EAAE,+BAA+B,EAAE,WAAW,CAAC,EAAE,cAAc,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;QAEvK,MAAM,EAAE,CAAC,MAAM,CAAC,yBAAyB,CAAC,CAAC;QAC3C,MAAM,mBAAmB,EAAE,CAAC;IAChC,CAAC,CAAC,CAAC;AACP,CAAC;AAED,KAAK,UAAU,mBAAmB,CAAC,WAAmB,EAAE,OAAe;IACnE,MAAM,WAAW,GAAW,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;IACnE,MAAM,oBAAoB,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;IAEvD,IAAI,CAAC,oBAAoB,CAAC,QAAQ,CAAC,OAAO,CAAC;QACvC,OAAO,IAAI,CAAC;IAEhB,MAAM,kBAAkB,GAAG,oBAAoB,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IACrE,MAAM,qBAAqB,GAAG,WAAW,CAAC,kBAAkB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAE/E,IAAI,qBAAqB,KAAK,GAAG,IAAI,qBAAqB,KAAK,GAAG;QAC9D,OAAO,IAAI,CAAC;IAEhB,MAAM,oBAAoB,GAAG,WAAW,CAAC,WAAW,CAAC,qBAAqB,EAAE,kBAAkB,CAAC,CAAC;IAEhG,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,CAAC,oBAAoB,GAAG,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAEjG,IAAI,CAAC,MAAM,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC;QAC7B,OAAO,IAAI,CAAC;IAEhB,OAAO,OAAO,CAAC;AACnB,CAAC"}
1
+ {"version":3,"file":"cmake.js","sourceRoot":"","sources":["../../src/utils/cmake.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,UAAU,CAAC;AAC1B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,KAAK,MAAM,OAAO,CAAC;AAC1B,OAAO,EAAC,MAAM,EAAC,MAAM,SAAS,CAAC;AAC/B,OAAO,EACH,2BAA2B,EAAE,2BAA2B,EAAE,cAAc,EAAE,yBAAyB,EAAE,yBAAyB,EAC9H,cAAc,EAAE,UAAU,EAC7B,MAAM,cAAc,CAAC;AACtB,OAAO,EAAC,2BAA2B,EAAC,MAAM,kDAAkD,CAAC;AAC7F,OAAO,EAAC,WAAW,EAAC,MAAM,kCAAkC,CAAC;AAC7D,OAAO,EAAC,kCAAkC,EAAC,MAAM,uCAAuC,CAAC;AACzF,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAC/C,OAAO,cAAc,MAAM,qBAAqB,CAAC;AACjD,OAAO,EAAC,YAAY,EAAC,MAAM,mBAAmB,CAAC;AAG/C,MAAM,CAAC,KAAK,UAAU,eAAe;IACjC,IAAI,CAAC;QACD,MAAM,YAAY,GAAG,MAAM,KAAK,CAAC,OAAO,CAAC,CAAC;QAC1C,OAAO,YAAY,KAAK,EAAE,CAAC;IAC/B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACX,OAAO,KAAK,CAAC;IACjB,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,YAAY;IAC9B,IAAI,CAAC;QACD,MAAM,YAAY,GAAG,MAAM,KAAK,CAAC,OAAO,EAAE;YACtC,OAAO,EAAE,IAAI;SAChB,CAAC,CAAC;QAEH,IAAI,YAAY,KAAK,EAAE,IAAI,YAAY,IAAI,IAAI;YAC3C,OAAO,YAAY,CAAC;IAC5B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC,CAAA,CAAC;IAEhB,IAAI,CAAC;QACD,MAAM,aAAa,GAAG,MAAM,iBAAiB,EAAE,CAAC;QAChD,IAAI,aAAa,IAAI,IAAI;YACrB,OAAO,aAAa,CAAC;IAC7B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC,CAAA,CAAC;IAEhB,IAAI,CAAC;QACD,IAAI,YAAY,GAAG,CACf,MAAM,KAAK,CAAC,OAAO,EAAE;YACjB,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,CAAC;YAC1D,OAAO,EAAE,IAAI;SAChB,CAAC,CACL,IAAI,CACD,MAAM,KAAK,CAAC,OAAO,EAAE;YACjB,IAAI,EAAE,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,OAAO,EAAE,QAAQ,EAAE,kBAAkB,EAAE,OAAO,EAAE,UAAU,EAAE,KAAK,CAAC;SACrG,CAAC,CACL,CAAC;QAEF,IAAI,YAAY,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC;YAC3C,YAAY,GAAG,CAAC,MAAM,mBAAmB,CAAC,YAAY,EAAE,WAAW,CAAC,CAAC,IAAI,EAAE,CAAC;aAC3E,IAAI,YAAY,CAAC,WAAW,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;YACnD,MAAM,WAAW,GAAG,YAAY,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,MAAM,CAAC,GAAG,MAAM,CAAC;YAEnE,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,WAAW,CAAC;gBAChC,YAAY,GAAG,CAAC,MAAM,mBAAmB,CAAC,WAAW,EAAE,WAAW,CAAC,CAAC,IAAI,EAAE,CAAC;QACnF,CAAC;QAED,IAAI,YAAY,KAAK,EAAE;YACnB,OAAO,YAAY,CAAC;IAC5B,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC,CAAA,CAAC;IAEhB,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;AACvC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,qBAAqB,CAAC,qBAA8B,KAAK;IAC3E,IAAI,CAAC;QACD,MAAM,YAAY,EAAE,CAAC;QACrB,OAAO;IACX,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC,CAAA,CAAC;IAEhB,IAAI,CAAC,kBAAkB;QACnB,MAAM,aAAa,CAAC,EAAC,YAAY,EAAE,kBAAkB,EAAC,CAAC,CAAC;SACvD,CAAC;QACF,IAAI,CAAC;YACD,MAAM,cAAc,CAAC;gBACjB,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,mBAAmB,CAAC;gBACxC,OAAO,EAAE,KAAK,CAAC,IAAI,CAAC,kBAAkB,CAAC;gBACvC,IAAI,EAAE,KAAK,CAAC,IAAI,CAAC,0BAA0B,CAAC;aAC/C,EAAE,KAAK,IAAI,EAAE;gBACV,MAAM,aAAa,CAAC,EAAC,YAAY,EAAE,kBAAkB,EAAC,CAAC,CAAC;YAC5D,CAAC,CAAC,CAAC;QACP,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACX,MAAM,2BAA2B,CAAC,sBAAsB,EAAE;gBACtD,aAAa,EAAE,EAAC,GAAG,EAAE,CAAC,OAAO,CAAC,EAAE,GAAG,EAAE,CAAC,OAAO,CAAC,EAAC;gBAC/C,aAAa,EAAE,EAAC,IAAI,EAAE,CAAC,OAAO,CAAC,EAAC;aACnC,CAAC,CAAC;YACH,MAAM,GAAG,CAAC;QACd,CAAC;IACL,CAAC;AACL,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe;IACjC,MAAM,EAAE,CAAC,MAAM,CAAC,yBAAyB,CAAC,CAAC;IAC3C,MAAM,EAAE,CAAC,MAAM,CAAC,yBAAyB,CAAC,CAAC;IAC3C,MAAM,EAAE,CAAC,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,QAAQ,CAAC,CAAC,CAAC;AACzD,CAAC;AAED;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,mBAAmB;IACrC,IAAI,CAAC;QACD,MAAM,MAAM,CAAC,yBAAyB,EAAE,KAAK,CAAC,CAAC;QAC/C,MAAM,MAAM,CAAC,yBAAyB,EAAE,KAAK,CAAC,CAAC;QAC/C,MAAM,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,QAAQ,CAAC,EAAE,KAAK,CAAC,CAAC;IAC7D,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC,CAAA,CAAC;AACpB,CAAC;AAED,KAAK,UAAU,iBAAiB;IAC5B,MAAM,QAAQ,GAAG,WAAW,EAAE,CAAC;IAE/B,IAAI,QAAQ,KAAK,KAAK,EAAE,CAAC;QACrB,MAAM,EAAC,cAAc,EAAC,GAAG,MAAM,kCAAkC,EAAE,CAAC;QAEpE,MAAM,mBAAmB,GAAG,cAAc,CAAC,GAAG,CAAC,CAAC,WAAW,EAAE,EAAE,CAAC,CAC5D,IAAI,CAAC,IAAI,CAAC,WAAW,EAAE,SAAS,EAAE,KAAK,EAAE,kBAAkB,EAAE,WAAW,EAAE,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,WAAW,CAAC,CAClH,CAAC,CAAC;QAEH,MAAM,UAAU,GAAG,CAAC,MAAM,OAAO,CAAC,GAAG,CACjC,mBAAmB,CAAC,GAAG,CAAC,KAAK,EAAE,SAAS,EAAE,EAAE;YACxC,IAAI,MAAM,EAAE,CAAC,UAAU,CAAC,SAAS,CAAC;gBAC9B,OAAO,SAAS,CAAC;YAErB,OAAO,IAAI,CAAC;QAChB,CAAC,CAAC,CACL,CAAC;aACG,MAAM,CAAC,CAAC,SAAS,EAAE,EAAE,CAAC,SAAS,IAAI,IAAI,CAAC,CAAC;QAE9C,OAAO,UAAU,CAAC,CAAC,CAAC,CAAC;IACzB,CAAC;IAED,OAAO,SAAS,CAAC;AACrB,CAAC;AAED,KAAK,UAAU,aAAa,CAAC,EAAC,YAAY,GAAG,IAAI,KAA8B,EAAE;IAC7E,MAAM,YAAY,CAAC;QACf,YAAY,EAAE,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,cAAc,CAAC;KAC1D,EAAE,KAAK,IAAI,EAAE;QACV,MAAM,MAAM,GAAsB;YAC9B,GAAG,OAAO,CAAC,GAAG;YACd,mBAAmB,EAAE,2BAA2B;YAChD,mBAAmB,EAAE,2BAA2B;SACnD,CAAC;QAEF,MAAM,YAAY,CAAC,KAAK,EAAE,CAAC,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE,OAAO,UAAU,EAAE,EAAE,SAAS,EAAE,+BAA+B,EAAE,WAAW,CAAC,EAAE,cAAc,EAAE,MAAM,EAAE,YAAY,CAAC,CAAC;QAEvK,MAAM,EAAE,CAAC,MAAM,CAAC,yBAAyB,CAAC,CAAC;QAC3C,MAAM,mBAAmB,EAAE,CAAC;IAChC,CAAC,CAAC,CAAC;AACP,CAAC;AAED,KAAK,UAAU,mBAAmB,CAAC,WAAmB,EAAE,OAAe;IACnE,MAAM,WAAW,GAAW,MAAM,EAAE,CAAC,QAAQ,CAAC,WAAW,EAAE,MAAM,CAAC,CAAC;IACnE,MAAM,oBAAoB,GAAG,WAAW,CAAC,WAAW,EAAE,CAAC;IAEvD,IAAI,CAAC,oBAAoB,CAAC,QAAQ,CAAC,OAAO,CAAC;QACvC,OAAO,IAAI,CAAC;IAEhB,MAAM,kBAAkB,GAAG,oBAAoB,CAAC,WAAW,CAAC,OAAO,CAAC,CAAC;IACrE,MAAM,qBAAqB,GAAG,WAAW,CAAC,kBAAkB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAE/E,IAAI,qBAAqB,KAAK,GAAG,IAAI,qBAAqB,KAAK,GAAG;QAC9D,OAAO,IAAI,CAAC;IAEhB,MAAM,oBAAoB,GAAG,WAAW,CAAC,WAAW,CAAC,qBAAqB,EAAE,kBAAkB,CAAC,CAAC;IAEhG,MAAM,OAAO,GAAG,WAAW,CAAC,KAAK,CAAC,oBAAoB,GAAG,CAAC,EAAE,kBAAkB,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IAEjG,IAAI,CAAC,MAAM,EAAE,CAAC,UAAU,CAAC,OAAO,CAAC;QAC7B,OAAO,IAAI,CAAC;IAEhB,OAAO,OAAO,CAAC;AACnB,CAAC"}
@@ -942,11 +942,25 @@ Napi::Value AddonContext::EnsureDraftContextIsCompatibleForSpeculative(const Nap
942
942
  return info.Env().Undefined();
943
943
  }
944
944
 
945
- Napi::Value AddonContext::SetLora(const Napi::CallbackInfo& info) {
946
- AddonModelLora* lora = Napi::ObjectWrap<AddonModelLora>::Unwrap(info[0].As<Napi::Object>());
947
- float scale = info[1].As<Napi::Number>().FloatValue();
945
+ Napi::Value AddonContext::SetLoras(const Napi::CallbackInfo& info) {
946
+ Napi::Array loraArray = info[0].As<Napi::Array>();
947
+ Napi::Array scaleArray = info[1].As<Napi::Array>();
948
948
 
949
- llama_set_adapter_lora(ctx, lora->lora_adapter, scale);
949
+ std::vector<llama_adapter_lora *> loras;
950
+ std::vector<float> scales;
951
+
952
+ loras.reserve(loraArray.Length());
953
+ scales.reserve(scaleArray.Length());
954
+
955
+ for (size_t i = 0; i < loraArray.Length() && i < scaleArray.Length(); i++) {
956
+ AddonModelLora* lora = Napi::ObjectWrap<AddonModelLora>::Unwrap(loraArray.Get(i).As<Napi::Object>());
957
+ float scale = scaleArray.Get(i).As<Napi::Number>().FloatValue();
958
+
959
+ loras.push_back(lora->lora_adapter);
960
+ scales.push_back(scale);
961
+ }
962
+
963
+ llama_set_adapters_lora(ctx, loras.data(), loras.size(), scales.data());
950
964
 
951
965
  return info.Env().Undefined();
952
966
  }
@@ -977,7 +991,7 @@ void AddonContext::init(Napi::Object exports) {
977
991
  InstanceMethod("ensureDraftContextIsCompatibleForSpeculative", &AddonContext::EnsureDraftContextIsCompatibleForSpeculative),
978
992
  InstanceMethod("saveSequenceStateToFile", &AddonContext::SaveSequenceStateToFile),
979
993
  InstanceMethod("loadSequenceStateFromFile", &AddonContext::LoadSequenceStateFromFile),
980
- InstanceMethod("setLora", &AddonContext::SetLora),
994
+ InstanceMethod("setLoras", &AddonContext::SetLoras),
981
995
  InstanceMethod("dispose", &AddonContext::Dispose),
982
996
  }
983
997
  )
@@ -52,7 +52,7 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
52
52
  Napi::Value PrintTimings(const Napi::CallbackInfo& info);
53
53
  Napi::Value EnsureDraftContextIsCompatibleForSpeculative(const Napi::CallbackInfo& info);
54
54
 
55
- Napi::Value SetLora(const Napi::CallbackInfo& info);
55
+ Napi::Value SetLoras(const Napi::CallbackInfo& info);
56
56
 
57
57
  static void init(Napi::Object exports);
58
58
  };
@@ -28,6 +28,11 @@ void AddonSampler::dispose() {
28
28
  model->Unref();
29
29
  freeChain();
30
30
 
31
+ if (xtcSampler != nullptr) {
32
+ llama_sampler_free(xtcSampler);
33
+ xtcSampler = nullptr;
34
+ }
35
+
31
36
  if (temperatureSampler != nullptr) {
32
37
  llama_sampler_free(temperatureSampler);
33
38
  temperatureSampler = nullptr;
@@ -63,6 +68,11 @@ void AddonSampler::dispose() {
63
68
  repeatPenaltySampler = nullptr;
64
69
  }
65
70
 
71
+ if (dryRepeatPenaltySampler != nullptr) {
72
+ llama_sampler_free(dryRepeatPenaltySampler);
73
+ dryRepeatPenaltySampler = nullptr;
74
+ }
75
+
66
76
  if (tokenBiasSampler != nullptr) {
67
77
  llama_sampler_free(tokenBiasSampler);
68
78
  tokenBiasSampler = nullptr;
@@ -108,11 +118,19 @@ void AddonSampler::rebuildChainIfNeeded() {
108
118
  llama_sampler_chain_add(chain, repeatPenaltySampler);
109
119
  }
110
120
 
121
+ if (dryRepeatPenaltySampler != nullptr) {
122
+ llama_sampler_chain_add(chain, dryRepeatPenaltySampler);
123
+ }
124
+
111
125
  if (grammarEvaluationState != nullptr) {
112
126
  llama_sampler_chain_add(chain, grammarEvaluationState->sampler);
113
127
  }
114
128
 
115
129
  if (greedySampler != nullptr) {
130
+ if (xtcSampler != nullptr) {
131
+ llama_sampler_chain_add(chain, xtcSampler);
132
+ }
133
+
116
134
  llama_sampler_chain_add(chain, greedySampler);
117
135
  } else {
118
136
  if (topKSampler != nullptr) {
@@ -127,6 +145,10 @@ void AddonSampler::rebuildChainIfNeeded() {
127
145
  llama_sampler_chain_add(chain, minPSampler);
128
146
  }
129
147
 
148
+ if (xtcSampler != nullptr) {
149
+ llama_sampler_chain_add(chain, xtcSampler);
150
+ }
151
+
130
152
  if (temperatureSampler != nullptr) {
131
153
  llama_sampler_chain_add(chain, temperatureSampler);
132
154
  }
@@ -143,6 +165,10 @@ void AddonSampler::acceptToken(llama_token token) {
143
165
  repeatPenalty_lastTokens.push_back(token);
144
166
  }
145
167
 
168
+ if (dryRepeatPenaltySampler != nullptr) {
169
+ llama_sampler_accept(dryRepeatPenaltySampler, token);
170
+ }
171
+
146
172
  if (grammarEvaluationState != nullptr && grammarEvaluationState->sampler != nullptr && !llama_vocab_is_eog(model->vocab, token)) {
147
173
  llama_sampler_accept(grammarEvaluationState->sampler, token);
148
174
  }
@@ -282,6 +308,33 @@ Napi::Value AddonSampler::ApplyConfig(const Napi::CallbackInfo& info) {
282
308
  seedSampler = llama_sampler_init_dist(time(NULL));
283
309
  }
284
310
 
311
+ if (config.Has("xtcProbability") && config.Has("xtcThreshold")) {
312
+ auto xtcProbability = config.Get("xtcProbability").As<Napi::Number>().FloatValue();
313
+ auto xtcThreshold = config.Get("xtcThreshold").As<Napi::Number>().FloatValue();
314
+
315
+ if (xtcProbability != xtcSampler_probability || xtcThreshold != xtcSampler_threshold || xtcSampler == nullptr) {
316
+ xtcSampler_probability = xtcProbability;
317
+ xtcSampler_threshold = xtcThreshold;
318
+ freeChain();
319
+
320
+ if (xtcSampler != nullptr) {
321
+ llama_sampler_free(xtcSampler);
322
+ xtcSampler = nullptr;
323
+ }
324
+
325
+ xtcSampler = llama_sampler_init_xtc(
326
+ xtcSampler_probability,
327
+ xtcSampler_threshold,
328
+ 0,
329
+ seedSampler == nullptr ? LLAMA_DEFAULT_SEED : seedSampler_seed
330
+ );
331
+ }
332
+ } else if (xtcSampler != nullptr) {
333
+ freeChain();
334
+ llama_sampler_free(xtcSampler);
335
+ xtcSampler = nullptr;
336
+ }
337
+
285
338
  if (config.Has("repeatPenaltyTokens")) {
286
339
  Napi::Uint32Array repeat_penalty_tokens_uint32_array = config.Get("repeatPenaltyTokens").As<Napi::Uint32Array>();
287
340
  auto repeatPenalty = config.Has("repeatPenalty")
@@ -374,6 +427,111 @@ Napi::Value AddonSampler::ApplyConfig(const Napi::CallbackInfo& info) {
374
427
  repeatPenaltySampler = nullptr;
375
428
  }
376
429
 
430
+ if (config.Has("dryRepeatPenaltyStrength")) {
431
+ float strength = config.Get("dryRepeatPenaltyStrength").As<Napi::Number>().FloatValue();
432
+ float base = config.Has("dryRepeatPenaltyBase")
433
+ ? config.Get("dryRepeatPenaltyBase").As<Napi::Number>().FloatValue()
434
+ : 0;
435
+ int32_t allowedLength = config.Has("dryRepeatPenaltyAllowedLength")
436
+ ? config.Get("dryRepeatPenaltyAllowedLength").As<Napi::Number>().Int32Value()
437
+ : 2;
438
+ int32_t lastTokens = config.Has("dryRepeatPenaltyLastTokens")
439
+ ? config.Get("dryRepeatPenaltyLastTokens").As<Napi::Number>().Int32Value()
440
+ : -1;
441
+
442
+ bool sequenceBreaksIsTheSame = (
443
+ config.Has("dryRepeatPenaltySequenceBreakers") &&
444
+ config.Get("dryRepeatPenaltySequenceBreakers").IsBoolean() &&
445
+ config.Get("dryRepeatPenaltySequenceBreakers").As<Napi::Boolean>().Value() == false
446
+ ) ? true : false;
447
+
448
+ std::vector<std::string> sequenceBreakers;
449
+ if (config.Has("dryRepeatPenaltySequenceBreakers") && config.Get("dryRepeatPenaltySequenceBreakers").IsArray()) {
450
+ Napi::Array sequenceBreakersArray = config.Get("dryRepeatPenaltySequenceBreakers").As<Napi::Array>();
451
+ sequenceBreaksIsTheSame = dryRepeatPenalty_sequenceBreakers.size() == sequenceBreakersArray.Length();
452
+
453
+ sequenceBreakers.reserve(sequenceBreakersArray.Length());
454
+ for (size_t i = 0; i < sequenceBreakersArray.Length(); i++) {
455
+ std::string breaker = sequenceBreakersArray.Get(i).As<Napi::String>().Utf8Value();
456
+
457
+ if (sequenceBreaksIsTheSame && dryRepeatPenalty_sequenceBreakers[i] != breaker) {
458
+ sequenceBreaksIsTheSame = false;
459
+ }
460
+
461
+ sequenceBreakers.push_back(std::move(breaker));
462
+ }
463
+ }
464
+
465
+ auto enabled = base != 0 && lastTokens != 0;
466
+ bool shouldCreateSampler = false;
467
+
468
+ if (!enabled) {
469
+ if (dryRepeatPenaltySampler != nullptr) {
470
+ freeChain();
471
+ llama_sampler_free(dryRepeatPenaltySampler);
472
+ dryRepeatPenaltySampler = nullptr;
473
+ }
474
+ } else if (dryRepeatPenaltySampler == nullptr) {
475
+ freeChain();
476
+ shouldCreateSampler = true;
477
+ } else {
478
+ bool existingSamplerMatchesConfig = true;
479
+ existingSamplerMatchesConfig &= dryRepeatPenalty_strength == strength;
480
+ existingSamplerMatchesConfig &= dryRepeatPenalty_base == base;
481
+ existingSamplerMatchesConfig &= dryRepeatPenalty_allowedLength == allowedLength;
482
+ existingSamplerMatchesConfig &= dryRepeatPenalty_lastTokens == lastTokens;
483
+ existingSamplerMatchesConfig &= sequenceBreaksIsTheSame;
484
+
485
+ if (!existingSamplerMatchesConfig) {
486
+ freeChain();
487
+ llama_sampler_free(dryRepeatPenaltySampler);
488
+ dryRepeatPenaltySampler = nullptr;
489
+
490
+ shouldCreateSampler = true;
491
+ }
492
+ }
493
+
494
+ if (shouldCreateSampler) {
495
+ std::vector<const char *> cSequenceBreakers;
496
+
497
+ if (sequenceBreaksIsTheSame) {
498
+ cSequenceBreakers.reserve(dryRepeatPenalty_sequenceBreakers.size());
499
+ for (const auto & str : dryRepeatPenalty_sequenceBreakers) {
500
+ cSequenceBreakers.push_back(str.c_str());
501
+ }
502
+ } else {
503
+ cSequenceBreakers.reserve(sequenceBreakers.size());
504
+ for (const auto & str : sequenceBreakers) {
505
+ cSequenceBreakers.push_back(str.c_str());
506
+ }
507
+ }
508
+
509
+ dryRepeatPenaltySampler = llama_sampler_init_dry(
510
+ model->vocab,
511
+ llama_model_n_ctx_train(model->model),
512
+ strength,
513
+ base,
514
+ allowedLength,
515
+ lastTokens,
516
+ cSequenceBreakers.data(),
517
+ cSequenceBreakers.size()
518
+ );
519
+
520
+ dryRepeatPenalty_strength = strength;
521
+ dryRepeatPenalty_base = base;
522
+ dryRepeatPenalty_allowedLength = allowedLength;
523
+ dryRepeatPenalty_lastTokens = lastTokens;
524
+
525
+ if (!sequenceBreaksIsTheSame) {
526
+ dryRepeatPenalty_sequenceBreakers.swap(sequenceBreakers);
527
+ }
528
+ }
529
+ } else if (dryRepeatPenaltySampler != nullptr) {
530
+ freeChain();
531
+ llama_sampler_free(dryRepeatPenaltySampler);
532
+ dryRepeatPenaltySampler = nullptr;
533
+ }
534
+
377
535
  if (config.Has("tokenBiasKeys") && config.Has("tokenBiasValues")) {
378
536
  Napi::Uint32Array tokenBiasKeys = config.Get("tokenBiasKeys").As<Napi::Uint32Array>();
379
537
  Napi::Float32Array tokenBiasValues = config.Get("tokenBiasValues").As<Napi::Float32Array>();
@@ -25,7 +25,11 @@ class AddonSampler : public Napi::ObjectWrap<AddonSampler> {
25
25
 
26
26
  llama_sampler * topPSampler = nullptr;
27
27
  float topPSampler_topP = 0.0f; // Top p sampling >=1.0 = disabled
28
-
28
+
29
+ llama_sampler * xtcSampler = nullptr;
30
+ float xtcSampler_probability = 0;
31
+ float xtcSampler_threshold = 0;
32
+
29
33
  llama_sampler * seedSampler = nullptr;
30
34
  uint32_t seedSampler_seed = 0;
31
35
 
@@ -36,6 +40,14 @@ class AddonSampler : public Napi::ObjectWrap<AddonSampler> {
36
40
  float repeatPenalty_presencePenalty = 0.00f; // 0.0 = disabled
37
41
  float repeatPenalty_frequencyPenalty = 0.00f; // 0.0 = disabled
38
42
 
43
+ llama_sampler * dryRepeatPenaltySampler = nullptr;
44
+ float dryRepeatPenalty_strength = 0.00f; // 0.00f = disabled
45
+ float dryRepeatPenalty_base = 0.00f;
46
+ int32_t dryRepeatPenalty_allowedLength = 1;
47
+ int32_t dryRepeatPenalty_lastTokens = 1;
48
+ bool dryRepeatPenalty_lastTokensAll = true;
49
+ std::vector<std::string> dryRepeatPenalty_sequenceBreakers;
50
+
39
51
  llama_sampler * tokenBiasSampler = nullptr;
40
52
  std::vector<llama_logit_bias> tokenBiasSampler_biases;
41
53
 
@@ -99,7 +99,7 @@ std::pair<ggml_backend_dev_t, std::string> getGpuDevice() {
99
99
  ggml_backend_dev_t device = ggml_backend_dev_get(i);
100
100
  const auto deviceName = std::string(ggml_backend_dev_name(device));
101
101
 
102
- if (deviceName == "Metal") {
102
+ if (std::string(deviceName).find("MTL") == 0 || deviceName == "Metal") {
103
103
  return std::pair<ggml_backend_dev_t, std::string>(device, "metal");
104
104
  } else if (std::string(deviceName).find("Vulkan") == 0) {
105
105
  return std::pair<ggml_backend_dev_t, std::string>(device, "vulkan");
@@ -1,3 +1,3 @@
1
1
  {
2
- "release": "b7836"
2
+ "release": "b8095"
3
3
  }
Binary file
@@ -1,4 +1,5 @@
1
1
  #include <stddef.h>
2
+ #include <cstdint>
2
3
  #include <map>
3
4
  #include <vector>
4
5
 
@@ -8,6 +9,7 @@ constexpr std::uint32_t VK_VENDOR_ID_AMD = 0x1002;
8
9
  constexpr std::uint32_t VK_VENDOR_ID_APPLE = 0x106b;
9
10
  constexpr std::uint32_t VK_VENDOR_ID_INTEL = 0x8086;
10
11
  constexpr std::uint32_t VK_VENDOR_ID_NVIDIA = 0x10de;
12
+ constexpr std::uint32_t VK_VENDOR_ID_QUALCOMM = 0x5143;
11
13
 
12
14
  typedef void (*gpuInfoVulkanWarningLogCallback_t)(const char* message);
13
15
 
@@ -35,7 +37,7 @@ static std::vector<vk::PhysicalDevice> dedupedDevices() {
35
37
  auto oldDevice = std::find_if(
36
38
  dedupedDevices.begin(),
37
39
  dedupedDevices.end(),
38
- [&newId](const vk::PhysicalDevice& oldDevice) {
40
+ [&newId, &newDriver](const vk::PhysicalDevice& oldDevice) {
39
41
  vk::PhysicalDeviceProperties2 oldProps;
40
42
  vk::PhysicalDeviceDriverProperties oldDriver;
41
43
  vk::PhysicalDeviceIDProperties oldId;
@@ -43,13 +45,14 @@ static std::vector<vk::PhysicalDevice> dedupedDevices() {
43
45
  oldDriver.pNext = &oldId;
44
46
  oldDevice.getProperties2(&oldProps);
45
47
 
46
- bool equals = std::equal(std::begin(oldId.deviceUUID), std::end(oldId.deviceUUID), std::begin(newId.deviceUUID));
47
- equals = equals || (
48
+ bool sameUuid = std::equal(std::begin(oldId.deviceUUID), std::end(oldId.deviceUUID), std::begin(newId.deviceUUID));
49
+ sameUuid = sameUuid || (
48
50
  oldId.deviceLUIDValid && newId.deviceLUIDValid &&
49
51
  std::equal(std::begin(oldId.deviceLUID), std::end(oldId.deviceLUID), std::begin(newId.deviceLUID))
50
52
  );
53
+ bool bothMoltenVk = (newDriver.driverID == vk::DriverId::eMoltenvk && oldDriver.driverID == vk::DriverId::eMoltenvk);
51
54
 
52
- return equals;
55
+ return sameUuid && !bothMoltenVk;
53
56
  }
54
57
  );
55
58
 
@@ -83,8 +86,12 @@ static std::vector<vk::PhysicalDevice> dedupedDevices() {
83
86
  driverPriorities[vk::DriverId::eMesaNvk] = 2;
84
87
  #endif
85
88
  break;
89
+ case VK_VENDOR_ID_QUALCOMM:
90
+ driverPriorities[vk::DriverId::eQualcommProprietary] = 1;
91
+ driverPriorities[vk::DriverId::eMesaTurnip] = 2;
92
+ break;
86
93
  }
87
- driverPriorities[vk::DriverId::eMesaDozen] = 4;
94
+ driverPriorities[vk::DriverId::eMesaDozen] = 100;
88
95
 
89
96
  if (driverPriorities.count(oldDriver.driverID)) {
90
97
  oldPriority = driverPriorities[oldDriver.driverID];
@@ -1,4 +1,4 @@
1
1
  {
2
- "tag": "b7836",
2
+ "tag": "b8095",
3
3
  "llamaCppGithubRepo": "ggml-org/llama.cpp"
4
4
  }