node-llama-cpp 3.0.0-beta.36 → 3.0.0-beta.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (98) hide show
  1. package/bins/linux-arm64/_nlcBuildMetadata.json +1 -1
  2. package/bins/linux-arm64/libllama.so +0 -0
  3. package/bins/linux-arm64/llama-addon.node +0 -0
  4. package/bins/linux-armv7l/_nlcBuildMetadata.json +1 -1
  5. package/bins/linux-armv7l/libllama.so +0 -0
  6. package/bins/linux-armv7l/llama-addon.node +0 -0
  7. package/bins/linux-x64/_nlcBuildMetadata.json +1 -1
  8. package/bins/linux-x64/libllama.so +0 -0
  9. package/bins/linux-x64/llama-addon.node +0 -0
  10. package/bins/linux-x64-vulkan/_nlcBuildMetadata.json +1 -1
  11. package/bins/linux-x64-vulkan/libllama.so +0 -0
  12. package/bins/linux-x64-vulkan/llama-addon.node +0 -0
  13. package/bins/mac-arm64-metal/_nlcBuildMetadata.json +1 -1
  14. package/bins/mac-arm64-metal/ggml-common.h +7 -7
  15. package/bins/mac-arm64-metal/ggml-metal.metal +0 -1
  16. package/bins/mac-arm64-metal/libggml.dylib +0 -0
  17. package/bins/mac-arm64-metal/libllama.dylib +0 -0
  18. package/bins/mac-arm64-metal/llama-addon.node +0 -0
  19. package/bins/mac-x64/_nlcBuildMetadata.json +1 -1
  20. package/bins/mac-x64/libllama.dylib +0 -0
  21. package/bins/mac-x64/llama-addon.node +0 -0
  22. package/bins/win-arm64/_nlcBuildMetadata.json +1 -1
  23. package/bins/win-arm64/ggml.dll +0 -0
  24. package/bins/win-arm64/llama-addon.node +0 -0
  25. package/bins/win-arm64/llama.dll +0 -0
  26. package/bins/win-x64/_nlcBuildMetadata.json +1 -1
  27. package/bins/win-x64/ggml.dll +0 -0
  28. package/bins/win-x64/llama-addon.node +0 -0
  29. package/bins/win-x64/llama.dll +0 -0
  30. package/bins/win-x64-vulkan/_nlcBuildMetadata.json +1 -1
  31. package/bins/win-x64-vulkan/ggml.dll +0 -0
  32. package/bins/win-x64-vulkan/llama-addon.node +0 -0
  33. package/bins/win-x64-vulkan/llama.dll +0 -0
  34. package/dist/bindings/AddonTypes.d.ts +1 -0
  35. package/dist/bindings/Llama.d.ts +2 -2
  36. package/dist/bindings/Llama.js +4 -1
  37. package/dist/bindings/Llama.js.map +1 -1
  38. package/dist/bindings/getLlama.d.ts +5 -2
  39. package/dist/bindings/getLlama.js +2 -0
  40. package/dist/bindings/getLlama.js.map +1 -1
  41. package/dist/bindings/types.d.ts +1 -0
  42. package/dist/bindings/types.js.map +1 -1
  43. package/dist/bindings/utils/getGpuTypesToUseForOption.d.ts +2 -1
  44. package/dist/bindings/utils/getGpuTypesToUseForOption.js +13 -4
  45. package/dist/bindings/utils/getGpuTypesToUseForOption.js.map +1 -1
  46. package/dist/chatWrappers/Llama3ChatWrapper.js +2 -2
  47. package/dist/chatWrappers/Llama3ChatWrapper.js.map +1 -1
  48. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.d.ts +9 -0
  49. package/dist/chatWrappers/generic/JinjaTemplateChatWrapper.js.map +1 -1
  50. package/dist/cli/commands/ChatCommand.d.ts +1 -0
  51. package/dist/cli/commands/ChatCommand.js +15 -6
  52. package/dist/cli/commands/ChatCommand.js.map +1 -1
  53. package/dist/cli/commands/CompleteCommand.d.ts +1 -0
  54. package/dist/cli/commands/CompleteCommand.js +13 -4
  55. package/dist/cli/commands/CompleteCommand.js.map +1 -1
  56. package/dist/cli/commands/InfillCommand.d.ts +1 -0
  57. package/dist/cli/commands/InfillCommand.js +13 -4
  58. package/dist/cli/commands/InfillCommand.js.map +1 -1
  59. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.d.ts +1 -0
  60. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js +19 -6
  61. package/dist/cli/commands/inspect/commands/InspectMeasureCommand.js.map +1 -1
  62. package/dist/cli/utils/interactivelyAskForModel.d.ts +2 -1
  63. package/dist/cli/utils/interactivelyAskForModel.js +19 -9
  64. package/dist/cli/utils/interactivelyAskForModel.js.map +1 -1
  65. package/dist/cli/utils/printCommonInfoLines.js +4 -0
  66. package/dist/cli/utils/printCommonInfoLines.js.map +1 -1
  67. package/dist/cli/utils/resolveCommandGgufPath.d.ts +2 -1
  68. package/dist/cli/utils/resolveCommandGgufPath.js +3 -2
  69. package/dist/cli/utils/resolveCommandGgufPath.js.map +1 -1
  70. package/dist/evaluator/LlamaContext/LlamaContext.d.ts +1 -0
  71. package/dist/evaluator/LlamaContext/LlamaContext.js +15 -4
  72. package/dist/evaluator/LlamaContext/LlamaContext.js.map +1 -1
  73. package/dist/evaluator/LlamaContext/types.d.ts +14 -0
  74. package/dist/evaluator/LlamaModel/LlamaModel.d.ts +22 -0
  75. package/dist/evaluator/LlamaModel/LlamaModel.js +49 -4
  76. package/dist/evaluator/LlamaModel/LlamaModel.js.map +1 -1
  77. package/dist/gguf/insights/GgufInsights.d.ts +3 -1
  78. package/dist/gguf/insights/GgufInsights.js +18 -2
  79. package/dist/gguf/insights/GgufInsights.js.map +1 -1
  80. package/dist/gguf/insights/GgufInsightsConfigurationResolver.d.ts +6 -3
  81. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js +8 -5
  82. package/dist/gguf/insights/GgufInsightsConfigurationResolver.js.map +1 -1
  83. package/dist/gguf/insights/utils/resolveContextContextSizeOption.d.ts +2 -1
  84. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js +3 -1
  85. package/dist/gguf/insights/utils/resolveContextContextSizeOption.js.map +1 -1
  86. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.d.ts +2 -1
  87. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js +16 -10
  88. package/dist/gguf/insights/utils/resolveModelGpuLayersOption.js.map +1 -1
  89. package/dist/gguf/types/GgufMetadataTypes.d.ts +15 -10
  90. package/dist/gguf/types/GgufMetadataTypes.js +15 -2
  91. package/dist/gguf/types/GgufMetadataTypes.js.map +1 -1
  92. package/dist/index.d.ts +3 -3
  93. package/dist/index.js.map +1 -1
  94. package/llama/addon.cpp +13 -26
  95. package/llama/binariesGithubRelease.json +1 -1
  96. package/llama/gitRelease.bundle +0 -0
  97. package/llama/llama.cpp.info.json +1 -1
  98. package/package.json +3 -3
@@ -4,7 +4,7 @@ import { getDefaultContextBatchSize, getDefaultModelContextSize } from "../../..
4
4
  import { minAllowedContextSizeInCalculations } from "../../../config.js";
5
5
  import { scoreLevels } from "./scoreLevels.js";
6
6
  const fitContextExtraMemoryPaddingPercentage = 0.5;
7
- export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading }) {
7
+ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ignoreMemorySafetyChecks = false, getVramState, llamaVramPaddingSize, llamaGpu, llamaSupportsGpuOffloading, defaultContextFlashAttention }) {
8
8
  if (gpuLayers == null)
9
9
  gpuLayers = "auto";
10
10
  if (!llamaSupportsGpuOffloading)
@@ -19,7 +19,8 @@ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ign
19
19
  const maxLayersRequirements = getVramRequiredForGpuLayers({
20
20
  gpuLayers: resolvedGpuLayers,
21
21
  ggufInsights,
22
- currentVram: vramState.free
22
+ currentVram: vramState.free,
23
+ defaultContextFlashAttention
23
24
  });
24
25
  if (maxLayersRequirements == null)
25
26
  throw new InsufficientMemoryError("Not enough VRAM to fit the model with the specified settings");
@@ -48,7 +49,8 @@ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ign
48
49
  : undefined,
49
50
  maxGpuLayers: typeof gpuLayers === "object"
50
51
  ? gpuLayers.max
51
- : undefined
52
+ : undefined,
53
+ defaultContextFlashAttention
52
54
  });
53
55
  const hasGpuLayersRequirements = typeof gpuLayers === "object" &&
54
56
  (gpuLayers.min != null || gpuLayers.max != null || gpuLayers.fitContext?.contextSize != null);
@@ -58,7 +60,7 @@ export async function resolveModelGpuLayersOption(gpuLayers, { ggufInsights, ign
58
60
  }
59
61
  throw new Error(`Invalid gpuLayers value: ${gpuLayers}`);
60
62
  }
61
- function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGpuLayers, maxGpuLayers }) {
63
+ function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGpuLayers, maxGpuLayers, defaultContextFlashAttention }) {
62
64
  return findBestOption({
63
65
  *generator() {
64
66
  const minLayers = Math.floor(Math.max(0, minGpuLayers ?? 0));
@@ -74,7 +76,8 @@ function getBestGpuLayersForFreeVram({ ggufInsights, freeVram, fitContext, minGp
74
76
  gpuLayers: option.gpuLayers,
75
77
  ggufInsights,
76
78
  currentVram: freeVram,
77
- fitContext
79
+ fitContext,
80
+ defaultContextFlashAttention
78
81
  });
79
82
  if (layersRequirements == null)
80
83
  return null;
@@ -121,7 +124,7 @@ function scoreGpuLayersAndContextCombination({ gpuLayers, contextSize }, { total
121
124
  }
122
125
  return scoreGpuLayers() + scoreContextSize();
123
126
  }
124
- function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fitContext }) {
127
+ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fitContext, defaultContextFlashAttention = false }) {
125
128
  const modelVram = ggufInsights.estimateModelResourceRequirements({ gpuLayers }).gpuVram;
126
129
  if (modelVram > currentVram)
127
130
  return null;
@@ -131,7 +134,8 @@ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fit
131
134
  batchSize: getDefaultContextBatchSize({ contextSize: fitContext.contextSize, sequences: 1 }),
132
135
  modelGpuLayers: gpuLayers,
133
136
  sequences: 1,
134
- isEmbeddingContext: fitContext.embeddingContext ?? false
137
+ isEmbeddingContext: fitContext.embeddingContext ?? false,
138
+ flashAttention: defaultContextFlashAttention
135
139
  }).gpuVram;
136
140
  const totalVram = modelVram + contextVram;
137
141
  if (totalVram > currentVram)
@@ -146,7 +150,8 @@ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fit
146
150
  gpuLayers,
147
151
  ggufInsights,
148
152
  vram: currentVram - modelVram,
149
- isEmbeddingContext: fitContext?.embeddingContext ?? false
153
+ isEmbeddingContext: fitContext?.embeddingContext ?? false,
154
+ flashAttention: defaultContextFlashAttention
150
155
  });
151
156
  if (maxContext == null || modelVram + maxContext.vram > currentVram)
152
157
  return null;
@@ -156,7 +161,7 @@ function getVramRequiredForGpuLayers({ gpuLayers, ggufInsights, currentVram, fit
156
161
  totalVram: modelVram + maxContext.vram
157
162
  };
158
163
  }
159
- function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEmbeddingContext }) {
164
+ function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEmbeddingContext, flashAttention }) {
160
165
  const maxContextSize = getDefaultModelContextSize({ trainContextSize: ggufInsights.trainContextSize });
161
166
  for (let contextSize = maxContextSize; contextSize >= minAllowedContextSizeInCalculations; contextSize--) {
162
167
  const contextVram = ggufInsights.estimateContextResourceRequirements({
@@ -164,7 +169,8 @@ function findMaxPossibleContextSizeForVram({ gpuLayers, ggufInsights, vram, isEm
164
169
  batchSize: getDefaultContextBatchSize({ contextSize, sequences: 1 }),
165
170
  modelGpuLayers: gpuLayers,
166
171
  sequences: 1,
167
- isEmbeddingContext
172
+ isEmbeddingContext,
173
+ flashAttention
168
174
  }).gpuVram;
169
175
  if (contextVram <= vram)
170
176
  return {
@@ -1 +1 @@
1
- {"version":3,"file":"resolveModelGpuLayersOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveModelGpuLayersOption.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,uBAAuB,EAAC,MAAM,2CAA2C,CAAC;AAClF,OAAO,EAAC,cAAc,EAAC,MAAM,kCAAkC,CAAC;AAChE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAG7C,MAAM,sCAAsC,GAAG,GAAG,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,SAAyC,EAAE,EACzF,YAAY,EAAE,wBAAwB,GAAG,KAAK,EAAE,YAAY,EAAE,oBAAoB,EAClF,QAAQ,EAAE,0BAA0B,EAKvC;IACG,IAAI,SAAS,IAAI,IAAI;QACjB,SAAS,GAAG,MAAM,CAAC;IAEvB,IAAI,CAAC,0BAA0B;QAC3B,OAAO,CAAC,CAAC;IAEb,IAAI,SAAS,KAAK,KAAK,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QACvD,MAAM,iBAAiB,GAAG,OAAO,SAAS,KAAK,QAAQ;YACnD,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;YAC5D,CAAC,CAAC,YAAY,CAAC,WAAW,CAAC;QAE/B,IAAI,wBAAwB;YACxB,OAAO,iBAAiB,CAAC;QAE7B,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,MAAM,qBAAqB,GAAG,2BAA2B,CAAC;YACtD,SAAS,EAAE,iBAAiB;YAC5B,YAAY;YACZ,WAAW,EAAE,SAAS,CAAC,IAAI;SAC9B,CAAC,CAAC;QAEH,IAAI,qBAAqB,IAAI,IAAI;YAC7B,MAAM,IAAI,uBAAuB,CAAC,8DAA8D,CAAC,CAAC;QAEtG,OAAO,iBAAiB,CAAC;IAC7B,CAAC;SAAM,IAAI,SAAS,KAAK,MAAM,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC/D,IAAI,QAAQ,KAAK,KAAK;YAClB,OAAO,CAAC,CAAC;QAEb,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,IAAI,SAAS,CAAC,KAAK,KAAK,CAAC;YACrB,OAAO,CAAC,CAAC;QAEb,IAAI,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC;QAC9B,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,UAAU,EAAE,WAAW,IAAI,IAAI,EAAE,CAAC;YAC7E,QAAQ,IAAI,oBAAoB,GAAG,sCAAsC,CAAC;YAE1E,IAAI,QAAQ,GAAG,CAAC;gBACZ,QAAQ,GAAG,CAAC,CAAC;QACrB,CAAC;QAED,MAAM,mBAAmB,GAAG,2BAA2B,CAAC;YACpD,YAAY;YACZ,QAAQ;YACR,UAAU,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACrC,CAAC,CAAC,SAAS,CAAC,UAAU;gBACtB,CAAC,CAAC,SAAS;YACf,YAAY,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACvC,CAAC,CAAC,SAAS,CAAC,GAAG;gBACf,CAAC,CAAC,SAAS;YACf,YAAY,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACvC,CAAC,CAAC,SAAS,CAAC,GAAG;gBACf,CAAC,CAAC,SAAS;SAClB,CAAC,CAAC;QAEH,MAAM,wBAAwB,GAAG,OAAO,SAAS,KAAK,QAAQ;YAC1D,CAAC,SAAS,CAAC,GAAG,IAAI,IAAI,IAAI,SAAS,CAAC,GAAG,IAAI,IAAI,IAAI,SAAS,CAAC,UAAU,EAAE,WAAW,IAAI,IAAI,CAAC,CAAC;QAElG,IAAI,CAAC,wBAAwB,IAAI,mBAAmB,IAAI,IAAI,IAAI,wBAAwB;YACpF,MAAM,IAAI,uBAAuB,CAAC,8DAA8D,CAAC,CAAC;QAEtG,OAAO,mBAAmB,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;AAC7D,CAAC;AAED,SAAS,2BAA2B,CAAC,EACjC,YAAY,EACZ,QAAQ,EACR,UAAU,EACV,YAAY,EACZ,YAAY,EAOf;IACG,OAAO,cAAc,CAAC;QAClB,CAAC,SAAS;YACN,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC;YAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,WAAW,EAAE,YAAY,IAAI,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC;YAE3G,KAAK,IAAI,MAAM,GAAG,SAAS,EAAE,MAAM,IAAI,SAAS,EAAE,MAAM,EAAE,EAAE,CAAC;gBACzD,MAAM;oBACF,SAAS,EAAE,MAAM;iBACpB,CAAC;YACN,CAAC;QACL,CAAC;QACD,KAAK,CAAC,MAAM;YACR,MAAM,kBAAkB,GAAG,2BAA2B,CAAC;gBACnD,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,YAAY;gBACZ,WAAW,EAAE,QAAQ;gBACrB,UAAU;aACb,CAAC,CAAC;YAEH,IAAI,kBAAkB,IAAI,IAAI;gBAC1B,OAAO,IAAI,CAAC;YAEhB,OAAO,mCAAmC,CAAC,EAAC,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,WAAW,EAAE,kBAAkB,CAAC,WAAW,EAAC,EAAE;gBACnH,cAAc,EAAE,YAAY,CAAC,WAAW;gBACxC,gBAAgB,EAAE,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,EAAC,CAAC;aAClG,CAAC,CAAC;QACP,CAAC;KACJ,CAAC,EAAE,SAAS,IAAI,IAAI,CAAC;AAC1B,CAAC;AAED,SAAS,mCAAmC,CAAC,EAAC,SAAS,EAAE,WAAW,EAA2C,EAAE,EAC7G,cAAc,EAAE,gBAAgB,EAGnC;IACG,SAAS,cAAc;QACnB,OAAO,WAAW,CAAC,SAAS,EAAE,CAAC;gBAC3B,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,EAAE;aACb,EAAE;gBACC,KAAK,EAAE,cAAc;gBACrB,MAAM,EAAE,EAAE;gBACV,GAAG,EAAE,cAAc;aACtB,CAAC,CAAC,CAAC;IACR,CAAC;IAED,SAAS,gBAAgB;QACrB,MAAM,mBAAmB,GAAG,SAAS,GAAG,cAAc,CAAC;QAEvD,OAAO,WAAW,CAAC,WAAW,EAAE,CAAC;gBAC7B,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aAC5C,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE;aAC7C,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,KAAK,CAAC;aACzC,CAAC,CAAC,CAAC;IACR,CAAC;IAED,OAAO,cAAc,EAAE,GAAG,gBAAgB,EAAE,CAAC;AACjD,CAAC;AAED,SAAS,2BAA2B,CAAC,EACjC,SAAS,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,EAGnD;IACG,MAAM,SAAS,GAAG,YAAY,CAAC,iCAAiC,CAAC,EAAC,SAAS,EAAC,CAAC,CAAC,OAAO,CAAC;IAEtF,IAAI,SAAS,GAAG,WAAW;QACvB,OAAO,IAAI,CAAC;IAEhB,IAAI,UAAU,IAAI,IAAI,IAAI,UAAU,CAAC,WAAW,IAAI,IAAI,EAAE,CAAC;QACvD,MAAM,WAAW,GAAG,YAAY,CAAC,mCAAmC,CAAC;YACjE,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,SAAS,EAAE,0BAA0B,CAAC,EAAC,WAAW,EAAE,UAAU,CAAC,WAAW,EAAE,SAAS,EAAE,CAAC,EAAC,CAAC;YAC1F,cAAc,EAAE,SAAS;YACzB,SAAS,EAAE,CAAC;YACZ,kBAAkB,EAAE,UAAU,CAAC,gBAAgB,IAAI,KAAK;SAC3D,CAAC,CAAC,OAAO,CAAC;QAEX,MAAM,SAAS,GAAG,SAAS,GAAG,WAAW,CAAC;QAC1C,IAAI,SAAS,GAAG,WAAW;YACvB,OAAO,IAAI,CAAC;QAEhB,OAAO;YACH,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,WAAW;YACX,SAAS;SACZ,CAAC;IACN,CAAC;IAED,MAAM,UAAU,GAAG,iCAAiC,CAAC;QACjD,SAAS;QACT,YAAY;QACZ,IAAI,EAAE,WAAW,GAAG,SAAS;QAC7B,kBAAkB,EAAE,UAAU,EAAE,gBAAgB,IAAI,KAAK;KAC5D,CAAC,CAAC;IAEH,IAAI,UAAU,IAAI,IAAI,IAAI,SAAS,GAAG,UAAU,CAAC,IAAI,GAAG,WAAW;QAC/D,OAAO,IAAI,CAAC;IAEhB,OAAO;QACH,WAAW,EAAE,UAAU,CAAC,WAAW;QACnC,WAAW,EAAE,UAAU,CAAC,IAAI;QAC5B,SAAS,EAAE,SAAS,GAAG,UAAU,CAAC,IAAI;KACzC,CAAC;AACN,CAAC;AAED,SAAS,iCAAiC,CAAC,EAAC,SAAS,EAAE,YAAY,EAAE,IAAI,EAAE,kBAAkB,EAE5F;IACG,MAAM,cAAc,GAAG,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,EAAC,CAAC,CAAC;IAErG,KAAK,IAAI,WAAW,GAAG,cAAc,EAAE,WAAW,IAAI,mCAAmC,EAAE,WAAW,EAAE,EAAE,CAAC;QACvG,MAAM,WAAW,GAAG,YAAY,CAAC,mCAAmC,CAAC;YACjE,WAAW;YACX,SAAS,EAAE,0BAA0B,CAAC,EAAC,WAAW,EAAE,SAAS,EAAE,CAAC,EAAC,CAAC;YAClE,cAAc,EAAE,SAAS;YACzB,SAAS,EAAE,CAAC;YACZ,kBAAkB;SACrB,CAAC,CAAC,OAAO,CAAC;QAEX,IAAI,WAAW,IAAI,IAAI;YACnB,OAAO;gBACH,WAAW;gBACX,IAAI,EAAE,WAAW;aACpB,CAAC;IACV,CAAC;IAED,OAAO,IAAI,CAAC;AAChB,CAAC"}
1
+ {"version":3,"file":"resolveModelGpuLayersOption.js","sourceRoot":"","sources":["../../../../src/gguf/insights/utils/resolveModelGpuLayersOption.ts"],"names":[],"mappings":"AAEA,OAAO,EAAC,uBAAuB,EAAC,MAAM,2CAA2C,CAAC;AAClF,OAAO,EAAC,cAAc,EAAC,MAAM,kCAAkC,CAAC;AAChE,OAAO,EAAC,0BAA0B,EAAE,0BAA0B,EAAC,MAAM,iDAAiD,CAAC;AACvH,OAAO,EAAC,mCAAmC,EAAC,MAAM,oBAAoB,CAAC;AACvE,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAG7C,MAAM,sCAAsC,GAAG,GAAG,CAAC;AAEnD,MAAM,CAAC,KAAK,UAAU,2BAA2B,CAAC,SAAyC,EAAE,EACzF,YAAY,EAAE,wBAAwB,GAAG,KAAK,EAAE,YAAY,EAAE,oBAAoB,EAClF,QAAQ,EAAE,0BAA0B,EAAE,4BAA4B,EAKrE;IACG,IAAI,SAAS,IAAI,IAAI;QACjB,SAAS,GAAG,MAAM,CAAC;IAEvB,IAAI,CAAC,0BAA0B;QAC3B,OAAO,CAAC,CAAC;IAEb,IAAI,SAAS,KAAK,KAAK,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QACvD,MAAM,iBAAiB,GAAG,OAAO,SAAS,KAAK,QAAQ;YACnD,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,WAAW,EAAE,SAAS,CAAC,CAAC;YAC5D,CAAC,CAAC,YAAY,CAAC,WAAW,CAAC;QAE/B,IAAI,wBAAwB;YACxB,OAAO,iBAAiB,CAAC;QAE7B,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,MAAM,qBAAqB,GAAG,2BAA2B,CAAC;YACtD,SAAS,EAAE,iBAAiB;YAC5B,YAAY;YACZ,WAAW,EAAE,SAAS,CAAC,IAAI;YAC3B,4BAA4B;SAC/B,CAAC,CAAC;QAEH,IAAI,qBAAqB,IAAI,IAAI;YAC7B,MAAM,IAAI,uBAAuB,CAAC,8DAA8D,CAAC,CAAC;QAEtG,OAAO,iBAAiB,CAAC;IAC7B,CAAC;SAAM,IAAI,SAAS,KAAK,MAAM,IAAI,OAAO,SAAS,KAAK,QAAQ,EAAE,CAAC;QAC/D,IAAI,QAAQ,KAAK,KAAK;YAClB,OAAO,CAAC,CAAC;QAEb,MAAM,SAAS,GAAG,MAAM,YAAY,EAAE,CAAC;QACvC,IAAI,SAAS,CAAC,KAAK,KAAK,CAAC;YACrB,OAAO,CAAC,CAAC;QAEb,IAAI,QAAQ,GAAG,SAAS,CAAC,IAAI,CAAC;QAC9B,IAAI,OAAO,SAAS,KAAK,QAAQ,IAAI,SAAS,CAAC,UAAU,EAAE,WAAW,IAAI,IAAI,EAAE,CAAC;YAC7E,QAAQ,IAAI,oBAAoB,GAAG,sCAAsC,CAAC;YAE1E,IAAI,QAAQ,GAAG,CAAC;gBACZ,QAAQ,GAAG,CAAC,CAAC;QACrB,CAAC;QAED,MAAM,mBAAmB,GAAG,2BAA2B,CAAC;YACpD,YAAY;YACZ,QAAQ;YACR,UAAU,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACrC,CAAC,CAAC,SAAS,CAAC,UAAU;gBACtB,CAAC,CAAC,SAAS;YACf,YAAY,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACvC,CAAC,CAAC,SAAS,CAAC,GAAG;gBACf,CAAC,CAAC,SAAS;YACf,YAAY,EAAE,OAAO,SAAS,KAAK,QAAQ;gBACvC,CAAC,CAAC,SAAS,CAAC,GAAG;gBACf,CAAC,CAAC,SAAS;YACf,4BAA4B;SAC/B,CAAC,CAAC;QAEH,MAAM,wBAAwB,GAAG,OAAO,SAAS,KAAK,QAAQ;YAC1D,CAAC,SAAS,CAAC,GAAG,IAAI,IAAI,IAAI,SAAS,CAAC,GAAG,IAAI,IAAI,IAAI,SAAS,CAAC,UAAU,EAAE,WAAW,IAAI,IAAI,CAAC,CAAC;QAElG,IAAI,CAAC,wBAAwB,IAAI,mBAAmB,IAAI,IAAI,IAAI,wBAAwB;YACpF,MAAM,IAAI,uBAAuB,CAAC,8DAA8D,CAAC,CAAC;QAEtG,OAAO,mBAAmB,IAAI,CAAC,CAAC;IACpC,CAAC;IAED,MAAM,IAAI,KAAK,CAAC,4BAA4B,SAAS,EAAE,CAAC,CAAC;AAC7D,CAAC;AAED,SAAS,2BAA2B,CAAC,EACjC,YAAY,EACZ,QAAQ,EACR,UAAU,EACV,YAAY,EACZ,YAAY,EACZ,4BAA4B,EAQ/B;IACG,OAAO,cAAc,CAAC;QAClB,CAAC,SAAS;YACN,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,EAAE,YAAY,IAAI,CAAC,CAAC,CAAC,CAAC;YAC7D,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,YAAY,CAAC,WAAW,EAAE,YAAY,IAAI,YAAY,CAAC,WAAW,CAAC,CAAC,CAAC;YAE3G,KAAK,IAAI,MAAM,GAAG,SAAS,EAAE,MAAM,IAAI,SAAS,EAAE,MAAM,EAAE,EAAE,CAAC;gBACzD,MAAM;oBACF,SAAS,EAAE,MAAM;iBACpB,CAAC;YACN,CAAC;QACL,CAAC;QACD,KAAK,CAAC,MAAM;YACR,MAAM,kBAAkB,GAAG,2BAA2B,CAAC;gBACnD,SAAS,EAAE,MAAM,CAAC,SAAS;gBAC3B,YAAY;gBACZ,WAAW,EAAE,QAAQ;gBACrB,UAAU;gBACV,4BAA4B;aAC/B,CAAC,CAAC;YAEH,IAAI,kBAAkB,IAAI,IAAI;gBAC1B,OAAO,IAAI,CAAC;YAEhB,OAAO,mCAAmC,CAAC,EAAC,SAAS,EAAE,MAAM,CAAC,SAAS,EAAE,WAAW,EAAE,kBAAkB,CAAC,WAAW,EAAC,EAAE;gBACnH,cAAc,EAAE,YAAY,CAAC,WAAW;gBACxC,gBAAgB,EAAE,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,EAAC,CAAC;aAClG,CAAC,CAAC;QACP,CAAC;KACJ,CAAC,EAAE,SAAS,IAAI,IAAI,CAAC;AAC1B,CAAC;AAED,SAAS,mCAAmC,CAAC,EAAC,SAAS,EAAE,WAAW,EAA2C,EAAE,EAC7G,cAAc,EAAE,gBAAgB,EAGnC;IACG,SAAS,cAAc;QACnB,OAAO,WAAW,CAAC,SAAS,EAAE,CAAC;gBAC3B,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,EAAE;aACb,EAAE;gBACC,KAAK,EAAE,cAAc;gBACrB,MAAM,EAAE,EAAE;gBACV,GAAG,EAAE,cAAc;aACtB,CAAC,CAAC,CAAC;IACR,CAAC;IAED,SAAS,gBAAgB;QACrB,MAAM,mBAAmB,GAAG,SAAS,GAAG,cAAc,CAAC;QAEvD,OAAO,WAAW,CAAC,WAAW,EAAE,CAAC;gBAC7B,KAAK,EAAE,CAAC;gBACR,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,CAAC;aACZ,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;aAC5C,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,EAAE;aAC7C,EAAE;gBACC,KAAK,EAAE,IAAI;gBACX,MAAM,EAAE,mBAAmB,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBACzC,GAAG,EAAE,IAAI,CAAC,GAAG,CAAC,gBAAgB,EAAE,KAAK,CAAC;aACzC,CAAC,CAAC,CAAC;IACR,CAAC;IAED,OAAO,cAAc,EAAE,GAAG,gBAAgB,EAAE,CAAC;AACjD,CAAC;AAED,SAAS,2BAA2B,CAAC,EACjC,SAAS,EAAE,YAAY,EAAE,WAAW,EAAE,UAAU,EAAE,4BAA4B,GAAG,KAAK,EAIzF;IACG,MAAM,SAAS,GAAG,YAAY,CAAC,iCAAiC,CAAC,EAAC,SAAS,EAAC,CAAC,CAAC,OAAO,CAAC;IAEtF,IAAI,SAAS,GAAG,WAAW;QACvB,OAAO,IAAI,CAAC;IAEhB,IAAI,UAAU,IAAI,IAAI,IAAI,UAAU,CAAC,WAAW,IAAI,IAAI,EAAE,CAAC;QACvD,MAAM,WAAW,GAAG,YAAY,CAAC,mCAAmC,CAAC;YACjE,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,SAAS,EAAE,0BAA0B,CAAC,EAAC,WAAW,EAAE,UAAU,CAAC,WAAW,EAAE,SAAS,EAAE,CAAC,EAAC,CAAC;YAC1F,cAAc,EAAE,SAAS;YACzB,SAAS,EAAE,CAAC;YACZ,kBAAkB,EAAE,UAAU,CAAC,gBAAgB,IAAI,KAAK;YACxD,cAAc,EAAE,4BAA4B;SAC/C,CAAC,CAAC,OAAO,CAAC;QAEX,MAAM,SAAS,GAAG,SAAS,GAAG,WAAW,CAAC;QAC1C,IAAI,SAAS,GAAG,WAAW;YACvB,OAAO,IAAI,CAAC;QAEhB,OAAO;YACH,WAAW,EAAE,UAAU,CAAC,WAAW;YACnC,WAAW;YACX,SAAS;SACZ,CAAC;IACN,CAAC;IAED,MAAM,UAAU,GAAG,iCAAiC,CAAC;QACjD,SAAS;QACT,YAAY;QACZ,IAAI,EAAE,WAAW,GAAG,SAAS;QAC7B,kBAAkB,EAAE,UAAU,EAAE,gBAAgB,IAAI,KAAK;QACzD,cAAc,EAAE,4BAA4B;KAC/C,CAAC,CAAC;IAEH,IAAI,UAAU,IAAI,IAAI,IAAI,SAAS,GAAG,UAAU,CAAC,IAAI,GAAG,WAAW;QAC/D,OAAO,IAAI,CAAC;IAEhB,OAAO;QACH,WAAW,EAAE,UAAU,CAAC,WAAW;QACnC,WAAW,EAAE,UAAU,CAAC,IAAI;QAC5B,SAAS,EAAE,SAAS,GAAG,UAAU,CAAC,IAAI;KACzC,CAAC;AACN,CAAC;AAED,SAAS,iCAAiC,CAAC,EAAC,SAAS,EAAE,YAAY,EAAE,IAAI,EAAE,kBAAkB,EAAE,cAAc,EAE5G;IACG,MAAM,cAAc,GAAG,0BAA0B,CAAC,EAAC,gBAAgB,EAAE,YAAY,CAAC,gBAAgB,EAAC,CAAC,CAAC;IAErG,KAAK,IAAI,WAAW,GAAG,cAAc,EAAE,WAAW,IAAI,mCAAmC,EAAE,WAAW,EAAE,EAAE,CAAC;QACvG,MAAM,WAAW,GAAG,YAAY,CAAC,mCAAmC,CAAC;YACjE,WAAW;YACX,SAAS,EAAE,0BAA0B,CAAC,EAAC,WAAW,EAAE,SAAS,EAAE,CAAC,EAAC,CAAC;YAClE,cAAc,EAAE,SAAS;YACzB,SAAS,EAAE,CAAC;YACZ,kBAAkB;YAClB,cAAc;SACjB,CAAC,CAAC,OAAO,CAAC;QAEX,IAAI,WAAW,IAAI,IAAI;YACnB,OAAO;gBACH,WAAW;gBACX,IAAI,EAAE,WAAW;aACpB,CAAC;IACV,CAAC;IAED,OAAO,IAAI,CAAC;AAChB,CAAC"}
@@ -1,31 +1,44 @@
1
1
  export declare const enum GgufArchitectureType {
2
2
  llama = "llama",
3
3
  falcon = "falcon",
4
+ grok = "grok",
4
5
  gpt2 = "gpt2",
5
6
  gptj = "gptj",
6
7
  gptneox = "gptneox",
7
8
  mpt = "mpt",
8
9
  baichuan = "baichuan",
9
10
  starcoder = "starcoder",
10
- persimmon = "persimmon",
11
11
  refact = "refact",
12
12
  bert = "bert",
13
13
  nomicBert = "nomic-bert",
14
+ jinaBertV2 = "jina-bert-v2",
14
15
  bloom = "bloom",
15
16
  stablelm = "stablelm",
16
17
  qwen = "qwen",
17
18
  qwen2 = "qwen2",
19
+ qwen2moe = "qwen2moe",
18
20
  phi2 = "phi2",
21
+ phi3 = "phi3",
19
22
  plamo = "plamo",
20
23
  codeshell = "codeshell",
21
24
  orion = "orion",
22
25
  internlm2 = "internlm2",
23
26
  minicpm = "minicpm",
24
27
  gemma = "gemma",
28
+ gemma2 = "gemma2",
25
29
  starcoder2 = "starcoder2",
26
30
  mamba = "mamba",
31
+ xverse = "xverse",
27
32
  commandR = "command-r",
28
- rwkv = "rwkv"
33
+ dbrx = "dbrx",
34
+ olmo = "olmo",
35
+ openelm = "openelm",
36
+ arctic = "arctic",
37
+ deepseek2 = "deepseek2",
38
+ bitnet = "bitnet",
39
+ t5 = "t5",
40
+ jais = "jais",
41
+ unknown = "(unknown)"
29
42
  }
30
43
  export type GgufMetadata<A extends GgufArchitectureType = GgufArchitectureType> = {
31
44
  readonly general: GgufMetadataGeneral<A>;
@@ -44,7 +57,6 @@ export type GgufMetadataLlmToType = {
44
57
  [GgufArchitectureType.bloom]: GgufMetadataBloom;
45
58
  [GgufArchitectureType.falcon]: GgufMetadataFalcon;
46
59
  [GgufArchitectureType.mamba]: GgufMetadataMamba;
47
- [GgufArchitectureType.rwkv]: GgufMetadataRWKV;
48
60
  };
49
61
  export declare enum GgufFileType {
50
62
  ALL_F32 = 0,
@@ -325,11 +337,4 @@ export type GgufMetadataMamba = {
325
337
  readonly layer_norm_rms_epsilon: number;
326
338
  };
327
339
  };
328
- export type GgufMetadataRWKV = {
329
- readonly architecture_version: 4 | number;
330
- readonly context_length: number;
331
- readonly block_count: number;
332
- readonly embedding_length: number;
333
- readonly feed_forward_length: number;
334
- };
335
340
  export declare function isGgufMetadataOfArchitectureType<A extends GgufArchitectureType>(metadata: GgufMetadata, type: A): metadata is GgufMetadata<A>;
@@ -2,31 +2,44 @@ export var GgufArchitectureType;
2
2
  (function (GgufArchitectureType) {
3
3
  GgufArchitectureType["llama"] = "llama";
4
4
  GgufArchitectureType["falcon"] = "falcon";
5
+ GgufArchitectureType["grok"] = "grok";
5
6
  GgufArchitectureType["gpt2"] = "gpt2";
6
7
  GgufArchitectureType["gptj"] = "gptj";
7
8
  GgufArchitectureType["gptneox"] = "gptneox";
8
9
  GgufArchitectureType["mpt"] = "mpt";
9
10
  GgufArchitectureType["baichuan"] = "baichuan";
10
11
  GgufArchitectureType["starcoder"] = "starcoder";
11
- GgufArchitectureType["persimmon"] = "persimmon";
12
12
  GgufArchitectureType["refact"] = "refact";
13
13
  GgufArchitectureType["bert"] = "bert";
14
14
  GgufArchitectureType["nomicBert"] = "nomic-bert";
15
+ GgufArchitectureType["jinaBertV2"] = "jina-bert-v2";
15
16
  GgufArchitectureType["bloom"] = "bloom";
16
17
  GgufArchitectureType["stablelm"] = "stablelm";
17
18
  GgufArchitectureType["qwen"] = "qwen";
18
19
  GgufArchitectureType["qwen2"] = "qwen2";
20
+ GgufArchitectureType["qwen2moe"] = "qwen2moe";
19
21
  GgufArchitectureType["phi2"] = "phi2";
22
+ GgufArchitectureType["phi3"] = "phi3";
20
23
  GgufArchitectureType["plamo"] = "plamo";
21
24
  GgufArchitectureType["codeshell"] = "codeshell";
22
25
  GgufArchitectureType["orion"] = "orion";
23
26
  GgufArchitectureType["internlm2"] = "internlm2";
24
27
  GgufArchitectureType["minicpm"] = "minicpm";
25
28
  GgufArchitectureType["gemma"] = "gemma";
29
+ GgufArchitectureType["gemma2"] = "gemma2";
26
30
  GgufArchitectureType["starcoder2"] = "starcoder2";
27
31
  GgufArchitectureType["mamba"] = "mamba";
32
+ GgufArchitectureType["xverse"] = "xverse";
28
33
  GgufArchitectureType["commandR"] = "command-r";
29
- GgufArchitectureType["rwkv"] = "rwkv";
34
+ GgufArchitectureType["dbrx"] = "dbrx";
35
+ GgufArchitectureType["olmo"] = "olmo";
36
+ GgufArchitectureType["openelm"] = "openelm";
37
+ GgufArchitectureType["arctic"] = "arctic";
38
+ GgufArchitectureType["deepseek2"] = "deepseek2";
39
+ GgufArchitectureType["bitnet"] = "bitnet";
40
+ GgufArchitectureType["t5"] = "t5";
41
+ GgufArchitectureType["jais"] = "jais";
42
+ GgufArchitectureType["unknown"] = "(unknown)";
30
43
  })(GgufArchitectureType || (GgufArchitectureType = {}));
31
44
  // source: `enum llama_ftype` in `llama.h` in the `llama.cpp` source code
32
45
  export var GgufFileType;
@@ -1 +1 @@
1
- {"version":3,"file":"GgufMetadataTypes.js","sourceRoot":"","sources":["../../../src/gguf/types/GgufMetadataTypes.ts"],"names":[],"mappings":"AAAA,MAAM,CAAN,IAAkB,oBA4BjB;AA5BD,WAAkB,oBAAoB;IAClC,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,2CAAmB,CAAA;IACnB,mCAAW,CAAA;IACX,6CAAqB,CAAA;IACrB,+CAAuB,CAAA;IACvB,+CAAuB,CAAA;IACvB,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,gDAAwB,CAAA;IACxB,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,qCAAa,CAAA;IACb,uCAAe,CAAA;IACf,qCAAa,CAAA;IACb,uCAAe,CAAA;IACf,+CAAuB,CAAA;IACvB,uCAAe,CAAA;IACf,+CAAuB,CAAA;IACvB,2CAAmB,CAAA;IACnB,uCAAe,CAAA;IACf,iDAAyB,CAAA;IACzB,uCAAe,CAAA;IACf,8CAAsB,CAAA;IACtB,qCAAa,CAAA;AACjB,CAAC,EA5BiB,oBAAoB,KAApB,oBAAoB,QA4BrC;AA+BD,yEAAyE;AACzE,MAAM,CAAN,IAAY,YAgCX;AAhCD,WAAY,YAAY;IACpB,qDAAW,CAAA;IACX,2DAAc,CAAA;IACd,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,+EAAwB,CAAA;IACxB,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,8DAAgB,CAAA;IAChB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,8DAAgB,CAAA;IAChB,oEAAmB,CAAA;IACnB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,oEAAmB,CAAA;IACnB,gEAAiB,CAAA;IACjB,kEAAkB,CAAA;IAClB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,kEAAkB,CAAA;AACtB,CAAC,EAhCW,YAAY,KAAZ,YAAY,QAgCvB;AAyED,MAAM,CAAN,IAAkB,8BAQjB;AARD,WAAkB,8BAA8B;IAC5C,6FAAa,CAAA;IACb,uFAAU,CAAA;IACV,yFAAW,CAAA;IACX,yFAAW,CAAA;IACX,iGAAe,CAAA;IACf,uFAAU,CAAA;IACV,mFAAQ,CAAA;AACZ,CAAC,EARiB,8BAA8B,KAA9B,8BAA8B,QAQ/C;AAiCD,MAAM,CAAN,IAAkB,mCAKjB;AALD,WAAkB,mCAAmC;IACjD,4GAAgB,CAAA;IAChB,6FAAQ,CAAA;IACR,6FAAQ,CAAA;IACR,2FAAO,CAAA;AACX,CAAC,EALiB,mCAAmC,KAAnC,mCAAmC,QAKpD;AAuND,MAAM,UAAU,gCAAgC,CAC5C,QAAsB,EAAE,IAAO;IAE/B,OAAO,QAAQ,EAAE,OAAO,EAAE,YAAY,KAAK,IAAI,CAAC;AACpD,CAAC"}
1
+ {"version":3,"file":"GgufMetadataTypes.js","sourceRoot":"","sources":["../../../src/gguf/types/GgufMetadataTypes.ts"],"names":[],"mappings":"AAAA,MAAM,CAAN,IAAkB,oBAyCjB;AAzCD,WAAkB,oBAAoB;IAClC,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,2CAAmB,CAAA;IACnB,mCAAW,CAAA;IACX,6CAAqB,CAAA;IACrB,+CAAuB,CAAA;IACvB,yCAAiB,CAAA;IACjB,qCAAa,CAAA;IACb,gDAAwB,CAAA;IACxB,mDAA2B,CAAA;IAC3B,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,qCAAa,CAAA;IACb,uCAAe,CAAA;IACf,6CAAqB,CAAA;IACrB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,uCAAe,CAAA;IACf,+CAAuB,CAAA;IACvB,uCAAe,CAAA;IACf,+CAAuB,CAAA;IACvB,2CAAmB,CAAA;IACnB,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,iDAAyB,CAAA;IACzB,uCAAe,CAAA;IACf,yCAAiB,CAAA;IACjB,8CAAsB,CAAA;IACtB,qCAAa,CAAA;IACb,qCAAa,CAAA;IACb,2CAAmB,CAAA;IACnB,yCAAiB,CAAA;IACjB,+CAAuB,CAAA;IACvB,yCAAiB,CAAA;IACjB,iCAAS,CAAA;IACT,qCAAa,CAAA;IACb,6CAAqB,CAAA;AACzB,CAAC,EAzCiB,oBAAoB,KAApB,oBAAoB,QAyCrC;AA8BD,yEAAyE;AACzE,MAAM,CAAN,IAAY,YAgCX;AAhCD,WAAY,YAAY;IACpB,qDAAW,CAAA;IACX,2DAAc,CAAA;IACd,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,+EAAwB,CAAA;IACxB,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,6DAAe,CAAA;IACf,8DAAgB,CAAA;IAChB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,8DAAgB,CAAA;IAChB,oEAAmB,CAAA;IACnB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,kEAAkB,CAAA;IAClB,oEAAmB,CAAA;IACnB,gEAAiB,CAAA;IACjB,kEAAkB,CAAA;IAClB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,gEAAiB,CAAA;IACjB,kEAAkB,CAAA;AACtB,CAAC,EAhCW,YAAY,KAAZ,YAAY,QAgCvB;AAyED,MAAM,CAAN,IAAkB,8BAQjB;AARD,WAAkB,8BAA8B;IAC5C,6FAAa,CAAA;IACb,uFAAU,CAAA;IACV,yFAAW,CAAA;IACX,yFAAW,CAAA;IACX,iGAAe,CAAA;IACf,uFAAU,CAAA;IACV,mFAAQ,CAAA;AACZ,CAAC,EARiB,8BAA8B,KAA9B,8BAA8B,QAQ/C;AAiCD,MAAM,CAAN,IAAkB,mCAKjB;AALD,WAAkB,mCAAmC;IACjD,4GAAgB,CAAA;IAChB,6FAAQ,CAAA;IACR,6FAAQ,CAAA;IACR,2FAAO,CAAA;AACX,CAAC,EALiB,mCAAmC,KAAnC,mCAAmC,QAKpD;AA8MD,MAAM,UAAU,gCAAgC,CAC5C,QAAsB,EAAE,IAAO;IAE/B,OAAO,QAAQ,EAAE,OAAO,EAAE,YAAY,KAAK,IAAI,CAAC;AACpD,CAAC"}
package/dist/index.d.ts CHANGED
@@ -2,7 +2,7 @@ import { DisposedError } from "lifecycle-utils";
2
2
  import { Llama } from "./bindings/Llama.js";
3
3
  import { getLlama, type LlamaOptions, type LastBuildOptions } from "./bindings/getLlama.js";
4
4
  import { NoBinaryFoundError } from "./bindings/utils/NoBinaryFoundError.js";
5
- import { LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, LlamaVocabularyType } from "./bindings/types.js";
5
+ import { type LlamaGpuType, LlamaLogLevel, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, LlamaVocabularyType } from "./bindings/types.js";
6
6
  import { LlamaModel, LlamaModelInfillTokens, type LlamaModelOptions, LlamaModelTokens } from "./evaluator/LlamaModel/LlamaModel.js";
7
7
  import { TokenAttributes } from "./evaluator/LlamaModel/utils/TokenAttributes.js";
8
8
  import { LlamaGrammar, type LlamaGrammarOptions } from "./evaluator/LlamaGrammar.js";
@@ -46,6 +46,6 @@ import { createModelDownloader, ModelDownloader, type ModelDownloaderOptions } f
46
46
  import { type ChatHistoryItem, type ChatModelFunctionCall, type ChatModelFunctions, type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, type Tokenizer, type Detokenizer, isChatModelResponseFunctionCall, type LLamaContextualRepeatPenalty, type ChatWrapperSettings, type ChatWrapperGenerateContextStateOptions, type ChatWrapperGeneratedContextState } from "./types.js";
47
47
  import { type GbnfJsonArraySchema, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonObjectSchema, type GbnfJsonOneOfSchema, type GbnfJsonSchema, type GbnfJsonSchemaImmutableType, type GbnfJsonSchemaToType } from "./utils/gbnfJson/types.js";
48
48
  import { type GgufFileInfo } from "./gguf/types/GgufFileInfoTypes.js";
49
- import { type GgufMetadata, type GgufMetadataLlmToType, GgufArchitectureType, GgufFileType, GgufMetadataTokenizerTokenType, GgufMetadataArchitecturePoolingType, type GgufMetadataGeneral, type GgufMetadataTokenizer, type GgufMetadataDefaultArchitectureType, type GgufMetadataLlmLLaMA, type GgufMetadataMPT, type GgufMetadataGPTNeoX, type GgufMetadataGPTJ, type GgufMetadataGPT2, type GgufMetadataBloom, type GgufMetadataFalcon, type GgufMetadataMamba, type GgufMetadataRWKV, isGgufMetadataOfArchitectureType } from "./gguf/types/GgufMetadataTypes.js";
49
+ import { type GgufMetadata, type GgufMetadataLlmToType, GgufArchitectureType, GgufFileType, GgufMetadataTokenizerTokenType, GgufMetadataArchitecturePoolingType, type GgufMetadataGeneral, type GgufMetadataTokenizer, type GgufMetadataDefaultArchitectureType, type GgufMetadataLlmLLaMA, type GgufMetadataMPT, type GgufMetadataGPTNeoX, type GgufMetadataGPTJ, type GgufMetadataGPT2, type GgufMetadataBloom, type GgufMetadataFalcon, type GgufMetadataMamba, isGgufMetadataOfArchitectureType } from "./gguf/types/GgufMetadataTypes.js";
50
50
  import { GgmlType, type GgufTensorInfo } from "./gguf/types/GgufTensorInfoTypes.js";
51
- export { Llama, getLlama, type LlamaOptions, type LastBuildOptions, LlamaLogLevel, NoBinaryFoundError, LlamaModel, LlamaModelTokens, LlamaModelInfillTokens, TokenAttributes, type LlamaModelOptions, LlamaGrammar, type LlamaGrammarOptions, LlamaJsonSchemaGrammar, LlamaJsonSchemaValidationError, LlamaGrammarEvaluationState, type LlamaGrammarEvaluationStateOptions, LlamaContext, LlamaContextSequence, type LlamaContextOptions, type BatchingOptions, type CustomBatchingDispatchSchedule, type CustomBatchingPrioritizationStrategy, type BatchItem, type PrioritizedBatchItem, type ContextShiftOptions, type ContextTokensDeleteRange, type EvaluationPriority, type LlamaContextSequenceRepeatPenalty, TokenBias, LlamaEmbeddingContext, type LlamaEmbeddingContextOptions, type LlamaEmbedding, LlamaChatSession, defineChatSessionFunction, type LlamaChatSessionOptions, type LlamaChatSessionContextShiftOptions, type LLamaChatPromptOptions, type LLamaChatCompletePromptOptions, type LlamaChatSessionRepeatPenalty, type LLamaChatPreloadPromptOptions, LlamaChat, type LlamaChatOptions, type LLamaChatGenerateResponseOptions, type LLamaChatLoadAndCompleteUserMessageOptions, type LLamaChatContextShiftOptions, type LLamaContextualRepeatPenalty, type LlamaChatResponse, type LlamaChatResponseFunctionCall, type LlamaChatLoadAndCompleteUserResponse, LlamaChatSessionPromptCompletionEngine, type LLamaChatPromptCompletionEngineOptions, LlamaCompletion, type LlamaCompletionOptions, type LlamaCompletionGenerationOptions, type LlamaInfillGenerationOptions, type LlamaCompletionResponse, TokenMeter, type TokenMeterState, UnsupportedError, InsufficientMemoryError, DisposedError, ChatWrapper, type ChatWrapperSettings, type ChatWrapperGenerateContextStateOptions, type ChatWrapperGeneratedContextState, EmptyChatWrapper, Llama3ChatWrapper, Llama2ChatWrapper, GeneralChatWrapper, ChatMLChatWrapper, FalconChatWrapper, AlpacaChatWrapper, FunctionaryChatWrapper, GemmaChatWrapper, TemplateChatWrapper, type TemplateChatWrapperOptions, JinjaTemplateChatWrapper, type JinjaTemplateChatWrapperOptions, type JinjaTemplateChatWrapperOptionsConvertMessageFormat, type ChatHistoryFunctionCallMessageTemplate, resolveChatWrapper, type ResolveChatWrapperOptions, resolvableChatWrapperTypeNames, type ResolvableChatWrapperTypeName, specializedChatWrapperTypeNames, type SpecializedChatWrapperTypeName, templateChatWrapperTypeNames, type TemplateChatWrapperTypeName, ChatModelFunctionsDocumentationGenerator, LlamaText, SpecialTokensText, SpecialToken, isLlamaText, tokenizeText, type LlamaTextValue, type LlamaTextInputValue, type LlamaTextJSON, type LlamaTextJSONValue, type LlamaTextSpecialTokensTextJSON, type LlamaTextSpecialTokenJSON, type BuiltinSpecialTokenValue, appendUserMessageToChatHistory, getModuleVersion, type ChatHistoryItem, type ChatModelFunctionCall, type ChatModelFunctions, type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, type Tokenizer, type Detokenizer, isChatModelResponseFunctionCall, type GbnfJsonSchema, type GbnfJsonSchemaToType, type GbnfJsonSchemaImmutableType, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonOneOfSchema, type GbnfJsonObjectSchema, type GbnfJsonArraySchema, LlamaVocabularyType, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, readGgufFileInfo, type GgufFileInfo, type GgufMetadata, type GgufTensorInfo, type GgufMetadataLlmToType, GgufArchitectureType, GgufFileType, GgufMetadataTokenizerTokenType, GgufMetadataArchitecturePoolingType, type GgufMetadataGeneral, type GgufMetadataTokenizer, type GgufMetadataDefaultArchitectureType, type GgufMetadataLlmLLaMA, type GgufMetadataMPT, type GgufMetadataGPTNeoX, type GgufMetadataGPTJ, type GgufMetadataGPT2, type GgufMetadataBloom, type GgufMetadataFalcon, type GgufMetadataMamba, type GgufMetadataRWKV, GgmlType, isGgufMetadataOfArchitectureType, GgufInsights, type GgufInsightsResourceRequirements, GgufInsightsConfigurationResolver, createModelDownloader, ModelDownloader, type ModelDownloaderOptions };
51
+ export { Llama, getLlama, type LlamaOptions, type LastBuildOptions, type LlamaGpuType, LlamaLogLevel, NoBinaryFoundError, LlamaModel, LlamaModelTokens, LlamaModelInfillTokens, TokenAttributes, type LlamaModelOptions, LlamaGrammar, type LlamaGrammarOptions, LlamaJsonSchemaGrammar, LlamaJsonSchemaValidationError, LlamaGrammarEvaluationState, type LlamaGrammarEvaluationStateOptions, LlamaContext, LlamaContextSequence, type LlamaContextOptions, type BatchingOptions, type CustomBatchingDispatchSchedule, type CustomBatchingPrioritizationStrategy, type BatchItem, type PrioritizedBatchItem, type ContextShiftOptions, type ContextTokensDeleteRange, type EvaluationPriority, type LlamaContextSequenceRepeatPenalty, TokenBias, LlamaEmbeddingContext, type LlamaEmbeddingContextOptions, type LlamaEmbedding, LlamaChatSession, defineChatSessionFunction, type LlamaChatSessionOptions, type LlamaChatSessionContextShiftOptions, type LLamaChatPromptOptions, type LLamaChatCompletePromptOptions, type LlamaChatSessionRepeatPenalty, type LLamaChatPreloadPromptOptions, LlamaChat, type LlamaChatOptions, type LLamaChatGenerateResponseOptions, type LLamaChatLoadAndCompleteUserMessageOptions, type LLamaChatContextShiftOptions, type LLamaContextualRepeatPenalty, type LlamaChatResponse, type LlamaChatResponseFunctionCall, type LlamaChatLoadAndCompleteUserResponse, LlamaChatSessionPromptCompletionEngine, type LLamaChatPromptCompletionEngineOptions, LlamaCompletion, type LlamaCompletionOptions, type LlamaCompletionGenerationOptions, type LlamaInfillGenerationOptions, type LlamaCompletionResponse, TokenMeter, type TokenMeterState, UnsupportedError, InsufficientMemoryError, DisposedError, ChatWrapper, type ChatWrapperSettings, type ChatWrapperGenerateContextStateOptions, type ChatWrapperGeneratedContextState, EmptyChatWrapper, Llama3ChatWrapper, Llama2ChatWrapper, GeneralChatWrapper, ChatMLChatWrapper, FalconChatWrapper, AlpacaChatWrapper, FunctionaryChatWrapper, GemmaChatWrapper, TemplateChatWrapper, type TemplateChatWrapperOptions, JinjaTemplateChatWrapper, type JinjaTemplateChatWrapperOptions, type JinjaTemplateChatWrapperOptionsConvertMessageFormat, type ChatHistoryFunctionCallMessageTemplate, resolveChatWrapper, type ResolveChatWrapperOptions, resolvableChatWrapperTypeNames, type ResolvableChatWrapperTypeName, specializedChatWrapperTypeNames, type SpecializedChatWrapperTypeName, templateChatWrapperTypeNames, type TemplateChatWrapperTypeName, ChatModelFunctionsDocumentationGenerator, LlamaText, SpecialTokensText, SpecialToken, isLlamaText, tokenizeText, type LlamaTextValue, type LlamaTextInputValue, type LlamaTextJSON, type LlamaTextJSONValue, type LlamaTextSpecialTokensTextJSON, type LlamaTextSpecialTokenJSON, type BuiltinSpecialTokenValue, appendUserMessageToChatHistory, getModuleVersion, type ChatHistoryItem, type ChatModelFunctionCall, type ChatModelFunctions, type ChatModelResponse, type ChatSessionModelFunction, type ChatSessionModelFunctions, type ChatSystemMessage, type ChatUserMessage, type Token, type Tokenizer, type Detokenizer, isChatModelResponseFunctionCall, type GbnfJsonSchema, type GbnfJsonSchemaToType, type GbnfJsonSchemaImmutableType, type GbnfJsonBasicSchema, type GbnfJsonConstSchema, type GbnfJsonEnumSchema, type GbnfJsonOneOfSchema, type GbnfJsonObjectSchema, type GbnfJsonArraySchema, LlamaVocabularyType, LlamaLogLevelGreaterThan, LlamaLogLevelGreaterThanOrEqual, readGgufFileInfo, type GgufFileInfo, type GgufMetadata, type GgufTensorInfo, type GgufMetadataLlmToType, GgufArchitectureType, GgufFileType, GgufMetadataTokenizerTokenType, GgufMetadataArchitecturePoolingType, type GgufMetadataGeneral, type GgufMetadataTokenizer, type GgufMetadataDefaultArchitectureType, type GgufMetadataLlmLLaMA, type GgufMetadataMPT, type GgufMetadataGPTNeoX, type GgufMetadataGPTJ, type GgufMetadataGPT2, type GgufMetadataBloom, type GgufMetadataFalcon, type GgufMetadataMamba, GgmlType, isGgufMetadataOfArchitectureType, GgufInsights, type GgufInsightsResourceRequirements, GgufInsightsConfigurationResolver, createModelDownloader, ModelDownloader, type ModelDownloaderOptions };
package/dist/index.js.map CHANGED
@@ -1 +1 @@
1
- {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,aAAa,EAAC,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAC,KAAK,EAAC,MAAM,qBAAqB,CAAC;AAC1C,OAAO,EAAC,QAAQ,EAA2C,MAAM,wBAAwB,CAAC;AAC1F,OAAO,EAAC,kBAAkB,EAAC,MAAM,wCAAwC,CAAC;AAC1E,OAAO,EAAC,aAAa,EAAE,wBAAwB,EAAE,+BAA+B,EAAE,mBAAmB,EAAC,MAAM,qBAAqB,CAAC;AAClI,OAAO,EAAC,UAAU,EAAE,sBAAsB,EAA0B,gBAAgB,EAAC,MAAM,sCAAsC,CAAC;AAClI,OAAO,EAAC,eAAe,EAAC,MAAM,iDAAiD,CAAC;AAChF,OAAO,EAAC,YAAY,EAA2B,MAAM,6BAA6B,CAAC;AACnF,OAAO,EAAC,sBAAsB,EAAC,MAAM,uCAAuC,CAAC;AAC7E,OAAO,EAAC,8BAA8B,EAAC,MAAM,2DAA2D,CAAC;AACzG,OAAO,EAAC,2BAA2B,EAAqC,MAAM,4CAA4C,CAAC;AAC3H,OAAO,EAAC,YAAY,EAAE,oBAAoB,EAAC,MAAM,0CAA0C,CAAC;AAC5F,OAAO,EAAC,qBAAqB,EAAyD,MAAM,sCAAsC,CAAC;AAMnI,OAAO,EAAC,SAAS,EAAC,MAAM,0BAA0B,CAAC;AACnD,OAAO,EACH,gBAAgB,EAEnB,MAAM,kDAAkD,CAAC;AAC1D,OAAO,EAAC,yBAAyB,EAAC,MAAM,iEAAiE,CAAC;AAC1G,OAAO,EACH,SAAS,EAEZ,MAAM,oCAAoC,CAAC;AAC5C,OAAO,EACH,sCAAsC,EACzC,MAAM,8EAA8E,CAAC;AACtF,OAAO,EACH,eAAe,EAElB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAAC,UAAU,EAAuB,MAAM,2BAA2B,CAAC;AAC3E,OAAO,EAAC,gBAAgB,EAAC,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAC,uBAAuB,EAAC,MAAM,oCAAoC,CAAC;AAC3E,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAC,gBAAgB,EAAC,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,kBAAkB,EAAC,MAAM,sCAAsC,CAAC;AACxE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,sBAAsB,EAAC,MAAM,0CAA0C,CAAC;AAChF,OAAO,EAAC,gBAAgB,EAAC,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAC,mBAAmB,EAAkC,MAAM,+CAA+C,CAAC;AACnH,OAAO,EACH,wBAAwB,EAC3B,MAAM,oDAAoD,CAAC;AAE5D,OAAO,EACH,8BAA8B,EAAsC,+BAA+B,EAC9D,4BAA4B,EAAoC,kBAAkB,EAE1H,MAAM,4CAA4C,CAAC;AACpD,OAAO,EAAC,wCAAwC,EAAC,MAAM,kEAAkE,CAAC;AAC1H,OAAO,EACH,SAAS,EAAE,iBAAiB,EAAE,YAAY,EAAE,WAAW,EAAE,YAAY,EAGxE,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAC,8BAA8B,EAAC,MAAM,2CAA2C,CAAC;AACzF,OAAO,EAAC,gBAAgB,EAAC,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAC,gBAAgB,EAAC,MAAM,4BAA4B,CAAC;AAC5D,OAAO,EAAC,YAAY,EAAwC,MAAM,iCAAiC,CAAC;AACpG,OAAO,EAAC,iCAAiC,EAAC,MAAM,sDAAsD,CAAC;AACvG,OAAO,EAAC,qBAAqB,EAAE,eAAe,EAA8B,MAAM,kCAAkC,CAAC;AAErH,OAAO,EAG2C,+BAA+B,EAEhF,MAAM,YAAY,CAAC;AAMpB,OAAO,EAC4C,oBAAoB,EAAE,YAAY,EAAE,8BAA8B,EACjH,mCAAmC,EAE6D,gCAAgC,EACnI,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAC,QAAQ,EAAsB,MAAM,qCAAqC,CAAC;AAGlF,OAAO,EACH,KAAK,EACL,QAAQ,EAGR,aAAa,EACb,kBAAkB,EAClB,UAAU,EACV,gBAAgB,EAChB,sBAAsB,EACtB,eAAe,EAEf,YAAY,EAEZ,sBAAsB,EACtB,8BAA8B,EAC9B,2BAA2B,EAE3B,YAAY,EACZ,oBAAoB,EAWpB,SAAS,EACT,qBAAqB,EAGrB,gBAAgB,EAChB,yBAAyB,EAOzB,SAAS,EAST,sCAAsC,EAEtC,eAAe,EAKf,UAAU,EAEV,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,WAAW,EAIX,gBAAgB,EAChB,iBAAiB,EACjB,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,iBAAiB,EACjB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EAChB,mBAAmB,EAEnB,wBAAwB,EAIxB,kBAAkB,EAElB,8BAA8B,EAE9B,+BAA+B,EAE/B,4BAA4B,EAE5B,wCAAwC,EACxC,SAAS,EACT,iBAAiB,EACjB,YAAY,EACZ,WAAW,EACX,YAAY,EAQZ,8BAA8B,EAC9B,gBAAgB,EAYhB,+BAA+B,EAU/B,mBAAmB,EACnB,wBAAwB,EACxB,+BAA+B,EAC/B,gBAAgB,EAKhB,oBAAoB,EACpB,YAAY,EACZ,8BAA8B,EAC9B,mCAAmC,EAanC,QAAQ,EACR,gCAAgC,EAChC,YAAY,EAEZ,iCAAiC,EACjC,qBAAqB,EACrB,eAAe,EAElB,CAAC"}
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,aAAa,EAAC,MAAM,iBAAiB,CAAC;AAC9C,OAAO,EAAC,KAAK,EAAC,MAAM,qBAAqB,CAAC;AAC1C,OAAO,EAAC,QAAQ,EAA2C,MAAM,wBAAwB,CAAC;AAC1F,OAAO,EAAC,kBAAkB,EAAC,MAAM,wCAAwC,CAAC;AAC1E,OAAO,EACgB,aAAa,EAAE,wBAAwB,EAAE,+BAA+B,EAAE,mBAAmB,EACnH,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAC,UAAU,EAAE,sBAAsB,EAA0B,gBAAgB,EAAC,MAAM,sCAAsC,CAAC;AAClI,OAAO,EAAC,eAAe,EAAC,MAAM,iDAAiD,CAAC;AAChF,OAAO,EAAC,YAAY,EAA2B,MAAM,6BAA6B,CAAC;AACnF,OAAO,EAAC,sBAAsB,EAAC,MAAM,uCAAuC,CAAC;AAC7E,OAAO,EAAC,8BAA8B,EAAC,MAAM,2DAA2D,CAAC;AACzG,OAAO,EAAC,2BAA2B,EAAqC,MAAM,4CAA4C,CAAC;AAC3H,OAAO,EAAC,YAAY,EAAE,oBAAoB,EAAC,MAAM,0CAA0C,CAAC;AAC5F,OAAO,EAAC,qBAAqB,EAAyD,MAAM,sCAAsC,CAAC;AAMnI,OAAO,EAAC,SAAS,EAAC,MAAM,0BAA0B,CAAC;AACnD,OAAO,EACH,gBAAgB,EAEnB,MAAM,kDAAkD,CAAC;AAC1D,OAAO,EAAC,yBAAyB,EAAC,MAAM,iEAAiE,CAAC;AAC1G,OAAO,EACH,SAAS,EAEZ,MAAM,oCAAoC,CAAC;AAC5C,OAAO,EACH,sCAAsC,EACzC,MAAM,8EAA8E,CAAC;AACtF,OAAO,EACH,eAAe,EAElB,MAAM,gCAAgC,CAAC;AACxC,OAAO,EAAC,UAAU,EAAuB,MAAM,2BAA2B,CAAC;AAC3E,OAAO,EAAC,gBAAgB,EAAC,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAC,uBAAuB,EAAC,MAAM,oCAAoC,CAAC;AAC3E,OAAO,EAAC,WAAW,EAAC,MAAM,kBAAkB,CAAC;AAC7C,OAAO,EAAC,gBAAgB,EAAC,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,kBAAkB,EAAC,MAAM,sCAAsC,CAAC;AACxE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,iBAAiB,EAAC,MAAM,qCAAqC,CAAC;AACtE,OAAO,EAAC,sBAAsB,EAAC,MAAM,0CAA0C,CAAC;AAChF,OAAO,EAAC,gBAAgB,EAAC,MAAM,oCAAoC,CAAC;AACpE,OAAO,EAAC,mBAAmB,EAAkC,MAAM,+CAA+C,CAAC;AACnH,OAAO,EACH,wBAAwB,EAC3B,MAAM,oDAAoD,CAAC;AAE5D,OAAO,EACH,8BAA8B,EAAsC,+BAA+B,EAC9D,4BAA4B,EAAoC,kBAAkB,EAE1H,MAAM,4CAA4C,CAAC;AACpD,OAAO,EAAC,wCAAwC,EAAC,MAAM,kEAAkE,CAAC;AAC1H,OAAO,EACH,SAAS,EAAE,iBAAiB,EAAE,YAAY,EAAE,WAAW,EAAE,YAAY,EAGxE,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAC,8BAA8B,EAAC,MAAM,2CAA2C,CAAC;AACzF,OAAO,EAAC,gBAAgB,EAAC,MAAM,6BAA6B,CAAC;AAC7D,OAAO,EAAC,gBAAgB,EAAC,MAAM,4BAA4B,CAAC;AAC5D,OAAO,EAAC,YAAY,EAAwC,MAAM,iCAAiC,CAAC;AACpG,OAAO,EAAC,iCAAiC,EAAC,MAAM,sDAAsD,CAAC;AACvG,OAAO,EAAC,qBAAqB,EAAE,eAAe,EAA8B,MAAM,kCAAkC,CAAC;AAErH,OAAO,EAG2C,+BAA+B,EAEhF,MAAM,YAAY,CAAC;AAMpB,OAAO,EAC4C,oBAAoB,EAAE,YAAY,EAAE,8BAA8B,EACjH,mCAAmC,EAEsC,gCAAgC,EAC5G,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAC,QAAQ,EAAsB,MAAM,qCAAqC,CAAC;AAGlF,OAAO,EACH,KAAK,EACL,QAAQ,EAIR,aAAa,EACb,kBAAkB,EAClB,UAAU,EACV,gBAAgB,EAChB,sBAAsB,EACtB,eAAe,EAEf,YAAY,EAEZ,sBAAsB,EACtB,8BAA8B,EAC9B,2BAA2B,EAE3B,YAAY,EACZ,oBAAoB,EAWpB,SAAS,EACT,qBAAqB,EAGrB,gBAAgB,EAChB,yBAAyB,EAOzB,SAAS,EAST,sCAAsC,EAEtC,eAAe,EAKf,UAAU,EAEV,gBAAgB,EAChB,uBAAuB,EACvB,aAAa,EACb,WAAW,EAIX,gBAAgB,EAChB,iBAAiB,EACjB,iBAAiB,EACjB,kBAAkB,EAClB,iBAAiB,EACjB,iBAAiB,EACjB,iBAAiB,EACjB,sBAAsB,EACtB,gBAAgB,EAChB,mBAAmB,EAEnB,wBAAwB,EAIxB,kBAAkB,EAElB,8BAA8B,EAE9B,+BAA+B,EAE/B,4BAA4B,EAE5B,wCAAwC,EACxC,SAAS,EACT,iBAAiB,EACjB,YAAY,EACZ,WAAW,EACX,YAAY,EAQZ,8BAA8B,EAC9B,gBAAgB,EAYhB,+BAA+B,EAU/B,mBAAmB,EACnB,wBAAwB,EACxB,+BAA+B,EAC/B,gBAAgB,EAKhB,oBAAoB,EACpB,YAAY,EACZ,8BAA8B,EAC9B,mCAAmC,EAYnC,QAAQ,EACR,gCAAgC,EAChC,YAAY,EAEZ,iCAAiC,EACjC,qBAAqB,EACrB,eAAe,EAElB,CAAC"}
package/llama/addon.cpp CHANGED
@@ -108,20 +108,6 @@ static void adjustNapiExternalMemorySubtract(Napi::Env env, uint64_t size) {
108
108
  }
109
109
  }
110
110
 
111
- std::string addon_model_token_to_piece(const struct llama_model* model, llama_token token, bool specialTokens) {
112
- std::vector<char> result(8, 0);
113
- const int n_tokens = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
114
- if (n_tokens < 0) {
115
- result.resize(-n_tokens);
116
- int check = llama_token_to_piece(model, token, result.data(), result.size(), specialTokens);
117
- GGML_ASSERT(check == -n_tokens);
118
- } else {
119
- result.resize(n_tokens);
120
- }
121
-
122
- return std::string(result.data(), result.size());
123
- }
124
-
125
111
  #ifdef GPU_INFO_USE_CUDA
126
112
  void logCudaError(const char* message) {
127
113
  addonLlamaCppLogCallback(GGML_LOG_LEVEL_ERROR, (std::string("CUDA error: ") + std::string(message)).c_str(), nullptr);
@@ -395,21 +381,18 @@ class AddonModel : public Napi::ObjectWrap<AddonModel> {
395
381
  ? info[1].As<Napi::Boolean>().Value()
396
382
  : false;
397
383
 
398
- // Create a stringstream for accumulating the decoded string.
399
- std::stringstream ss;
400
-
401
- // Decode each token and accumulate the result.
402
- for (size_t i = 0; i < tokens.ElementLength(); i++) {
403
- const std::string piece = addon_model_token_to_piece(model, (llama_token)tokens[i], decodeSpecialTokens);
384
+ std::vector<char> result(8, 0);
385
+ const int n_length = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), result.data(), result.size(), false, decodeSpecialTokens);
404
386
 
405
- if (piece.empty()) {
406
- continue;
407
- }
408
-
409
- ss << piece;
387
+ if (n_length < 0) {
388
+ result.resize(-n_length);
389
+ int check = llama_detokenize(model, (llama_token*)tokens.Data(), tokens.ElementLength(), result.data(), result.size(), false, decodeSpecialTokens);
390
+ GGML_ASSERT(check == -n_length);
391
+ } else {
392
+ result.resize(n_length);
410
393
  }
411
394
 
412
- return Napi::String::New(info.Env(), ss.str());
395
+ return Napi::String::New(info.Env(), result.data(), result.size());
413
396
  }
414
397
 
415
398
  Napi::Value GetTrainContextSize(const Napi::CallbackInfo& info) {
@@ -987,6 +970,10 @@ class AddonContext : public Napi::ObjectWrap<AddonContext> {
987
970
  context_params.embeddings = options.Get("embeddings").As<Napi::Boolean>().Value();
988
971
  }
989
972
 
973
+ if (options.Has("flashAttention")) {
974
+ context_params.flash_attn = options.Get("flashAttention").As<Napi::Boolean>().Value();
975
+ }
976
+
990
977
  if (options.Has("threads")) {
991
978
  const auto n_threads = options.Get("threads").As<Napi::Number>().Uint32Value();
992
979
  const auto resolved_n_threads = n_threads == 0 ? std::thread::hardware_concurrency() : n_threads;
@@ -1,3 +1,3 @@
1
1
  {
2
- "release": "b3267"
2
+ "release": "b3347"
3
3
  }
Binary file
@@ -1,4 +1,4 @@
1
1
  {
2
- "tag": "b3267",
2
+ "tag": "b3347",
3
3
  "llamaCppGithubRepo": "ggerganov/llama.cpp"
4
4
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-llama-cpp",
3
- "version": "3.0.0-beta.36",
3
+ "version": "3.0.0-beta.38",
4
4
  "description": "Run AI models locally on your machine with node.js bindings for llama.cpp. Force a JSON schema on the model output on the generation level",
5
5
  "main": "./dist/index.js",
6
6
  "type": "module",
@@ -193,7 +193,7 @@
193
193
  }
194
194
  },
195
195
  "optionalDependencies": {
196
- "@node-llama-cpp/linux-x64-cuda": "3.0.0-beta.36",
197
- "@node-llama-cpp/win-x64-cuda": "3.0.0-beta.36"
196
+ "@node-llama-cpp/linux-x64-cuda": "3.0.0-beta.38",
197
+ "@node-llama-cpp/win-x64-cuda": "3.0.0-beta.38"
198
198
  }
199
199
  }