node-llama-cpp 2.8.0 → 3.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/README.md +1 -1
  2. package/dist/chatWrappers/generateContextTextFromConversationHistory.d.ts +0 -8
  3. package/dist/chatWrappers/generateContextTextFromConversationHistory.js +0 -8
  4. package/dist/chatWrappers/generateContextTextFromConversationHistory.js.map +1 -1
  5. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.d.ts +13 -0
  6. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js +49 -0
  7. package/dist/chatWrappers/resolveChatWrapperBasedOnModel.js.map +1 -0
  8. package/dist/cli/cli.js +1 -1
  9. package/dist/cli/cli.js.map +1 -1
  10. package/dist/cli/commands/ChatCommand.js +20 -10
  11. package/dist/cli/commands/ChatCommand.js.map +1 -1
  12. package/dist/index.d.ts +6 -4
  13. package/dist/index.js +5 -4
  14. package/dist/index.js.map +1 -1
  15. package/dist/llamaEvaluator/LlamaBins.d.ts +19 -4
  16. package/dist/llamaEvaluator/LlamaBins.js +3 -3
  17. package/dist/llamaEvaluator/LlamaChatSession.d.ts +24 -23
  18. package/dist/llamaEvaluator/LlamaChatSession.js +90 -36
  19. package/dist/llamaEvaluator/LlamaChatSession.js.map +1 -1
  20. package/dist/llamaEvaluator/LlamaContext/LlamaContext.d.ts +112 -0
  21. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js +640 -0
  22. package/dist/llamaEvaluator/LlamaContext/LlamaContext.js.map +1 -0
  23. package/dist/llamaEvaluator/LlamaContext/types.d.ts +90 -0
  24. package/dist/llamaEvaluator/LlamaContext/types.js +2 -0
  25. package/dist/llamaEvaluator/LlamaContext/types.js.map +1 -0
  26. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.d.ts +5 -0
  27. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js +16 -0
  28. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/firstInFirstOutStrategy.js.map +1 -0
  29. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.d.ts +5 -0
  30. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js +42 -0
  31. package/dist/llamaEvaluator/LlamaContext/utils/batchItemsPrioritizingStrategies/maximumParallelismStrategy.js.map +1 -0
  32. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.d.ts +2 -0
  33. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js +13 -0
  34. package/dist/llamaEvaluator/LlamaContext/utils/resolveBatchItemsPrioritizingStrategy.js.map +1 -0
  35. package/dist/llamaEvaluator/LlamaGrammar.d.ts +5 -5
  36. package/dist/llamaEvaluator/LlamaGrammar.js +7 -7
  37. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.d.ts +6 -5
  38. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js +8 -7
  39. package/dist/llamaEvaluator/LlamaGrammarEvaluationState.js.map +1 -1
  40. package/dist/llamaEvaluator/LlamaModel.d.ts +93 -112
  41. package/dist/llamaEvaluator/LlamaModel.js +294 -59
  42. package/dist/llamaEvaluator/LlamaModel.js.map +1 -1
  43. package/dist/types.d.ts +3 -1
  44. package/dist/utils/ReplHistory.js +1 -1
  45. package/dist/utils/ReplHistory.js.map +1 -1
  46. package/dist/utils/getBin.d.ts +71 -39
  47. package/dist/utils/getBin.js.map +1 -1
  48. package/dist/utils/getReleaseInfo.d.ts +1 -1
  49. package/dist/utils/getReleaseInfo.js.map +1 -1
  50. package/dist/utils/parseModelFileName.d.ts +9 -0
  51. package/dist/utils/parseModelFileName.js +68 -0
  52. package/dist/utils/parseModelFileName.js.map +1 -0
  53. package/dist/utils/parseModelTypeDescription.d.ts +6 -0
  54. package/dist/utils/parseModelTypeDescription.js +9 -0
  55. package/dist/utils/parseModelTypeDescription.js.map +1 -0
  56. package/llama/.clang-format +10 -9
  57. package/llama/addon.cpp +689 -356
  58. package/llama/binariesGithubRelease.json +1 -1
  59. package/llama/gitRelease.bundle +0 -0
  60. package/llama/grammars/README.md +2 -2
  61. package/llamaBins/linux-arm64/llama-addon.node +0 -0
  62. package/llamaBins/linux-armv7l/llama-addon.node +0 -0
  63. package/llamaBins/linux-x64/llama-addon.node +0 -0
  64. package/llamaBins/mac-arm64/ggml-metal.metal +107 -1
  65. package/llamaBins/mac-arm64/llama-addon.node +0 -0
  66. package/llamaBins/mac-x64/ggml-metal.metal +107 -1
  67. package/llamaBins/mac-x64/llama-addon.node +0 -0
  68. package/llamaBins/win-x64/llama-addon.exp +0 -0
  69. package/llamaBins/win-x64/llama-addon.lib +0 -0
  70. package/llamaBins/win-x64/llama-addon.node +0 -0
  71. package/package.json +13 -7
  72. package/dist/chatWrappers/createChatWrapperByBos.d.ts +0 -2
  73. package/dist/chatWrappers/createChatWrapperByBos.js +0 -14
  74. package/dist/chatWrappers/createChatWrapperByBos.js.map +0 -1
  75. package/dist/llamaEvaluator/LlamaContext.d.ts +0 -100
  76. package/dist/llamaEvaluator/LlamaContext.js +0 -141
  77. package/dist/llamaEvaluator/LlamaContext.js.map +0 -1
  78. package/dist/utils/withLock.d.ts +0 -1
  79. package/dist/utils/withLock.js +0 -19
  80. package/dist/utils/withLock.js.map +0 -1
@@ -1,3 +1,3 @@
1
1
  {
2
- "release": "b1492"
2
+ "release": "b1567"
3
3
  }
Binary file
@@ -55,7 +55,7 @@ The order of symbols in a sequence matter. For example, in `"1. " move " " move
55
55
 
56
56
  Alternatives, denoted by `|`, give different sequences that are acceptable. For example, in `move ::= pawn | nonpawn | castle`, `move` can be a `pawn` move, a `nonpawn` move, or a `castle`.
57
57
 
58
- Parentheses `()` can be used to group sequences, which allows for embedding alternatives in a larger rule or applying repetition and optptional symbols (below) to a sequence.
58
+ Parentheses `()` can be used to group sequences, which allows for embedding alternatives in a larger rule or applying repetition and optional symbols (below) to a sequence.
59
59
 
60
60
  ## Repetition and Optional Symbols
61
61
 
@@ -67,7 +67,7 @@ Parentheses `()` can be used to group sequences, which allows for embedding alte
67
67
 
68
68
  Comments can be specified with `#`:
69
69
  ```
70
- # defines optional whitspace
70
+ # defines optional whitespace
71
71
  ws ::= [ \t\n]+
72
72
  ```
73
73
 
@@ -792,7 +792,7 @@ kernel void kernel_mul_mv_f32_f32(
792
792
  constant int64_t & ne0,
793
793
  constant int64_t & ne1,
794
794
  uint3 tgpig[[threadgroup_position_in_grid]],
795
- uint tiisg[[thread_index_in_simdgroup]]) {
795
+ uint tiisg[[thread_index_in_simdgroup]]) {
796
796
 
797
797
  const int64_t r0 = tgpig.x;
798
798
  const int64_t rb = tgpig.y*N_F32_F32;
@@ -844,6 +844,79 @@ kernel void kernel_mul_mv_f32_f32(
844
844
  }
845
845
  }
846
846
 
847
+ #define N_F16_F16 4
848
+
849
+ kernel void kernel_mul_mv_f16_f16(
850
+ device const char * src0,
851
+ device const char * src1,
852
+ device float * dst,
853
+ constant int64_t & ne00,
854
+ constant int64_t & ne01,
855
+ constant int64_t & ne02,
856
+ constant uint64_t & nb00,
857
+ constant uint64_t & nb01,
858
+ constant uint64_t & nb02,
859
+ constant int64_t & ne10,
860
+ constant int64_t & ne11,
861
+ constant int64_t & ne12,
862
+ constant uint64_t & nb10,
863
+ constant uint64_t & nb11,
864
+ constant uint64_t & nb12,
865
+ constant int64_t & ne0,
866
+ constant int64_t & ne1,
867
+ uint3 tgpig[[threadgroup_position_in_grid]],
868
+ uint tiisg[[thread_index_in_simdgroup]]) {
869
+
870
+ const int64_t r0 = tgpig.x;
871
+ const int64_t rb = tgpig.y*N_F16_F16;
872
+ const int64_t im = tgpig.z;
873
+
874
+ device const half * x = (device const half *) (src0 + r0*nb01 + im/(ne12/ne02)*nb02);
875
+
876
+ if (ne00 < 128) {
877
+ for (int row = 0; row < N_F16_F16; ++row) {
878
+ int r1 = rb + row;
879
+ if (r1 >= ne11) {
880
+ break;
881
+ }
882
+
883
+ device const half * y = (device const half *) (src1 + r1*nb11 + im*nb12);
884
+
885
+ float sumf = 0;
886
+ for (int i = tiisg; i < ne00; i += 32) {
887
+ sumf += (half) x[i] * (half) y[i];
888
+ }
889
+
890
+ float all_sum = simd_sum(sumf);
891
+ if (tiisg == 0) {
892
+ dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum;
893
+ }
894
+ }
895
+ } else {
896
+ device const half4 * x4 = (device const half4 *)x;
897
+ for (int row = 0; row < N_F16_F16; ++row) {
898
+ int r1 = rb + row;
899
+ if (r1 >= ne11) {
900
+ break;
901
+ }
902
+
903
+ device const half * y = (device const half *) (src1 + r1*nb11 + im*nb12);
904
+ device const half4 * y4 = (device const half4 *) y;
905
+
906
+ float sumf = 0;
907
+ for (int i = tiisg; i < ne00/4; i += 32) {
908
+ for (int k = 0; k < 4; ++k) sumf += (half) x4[i][k] * y4[i][k];
909
+ }
910
+
911
+ float all_sum = simd_sum(sumf);
912
+ if (tiisg == 0) {
913
+ for (int i = 4*(ne00/4); i < ne00; ++i) all_sum += (half) x[i] * y[i];
914
+ dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum;
915
+ }
916
+ }
917
+ }
918
+ }
919
+
847
920
  kernel void kernel_mul_mv_f16_f32_1row(
848
921
  device const char * src0,
849
922
  device const char * src1,
@@ -1229,6 +1302,39 @@ kernel void kernel_rope(
1229
1302
  template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope<float>;
1230
1303
  template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope<half>;
1231
1304
 
1305
+ kernel void kernel_im2col_f16(
1306
+ device const float * x,
1307
+ device half * dst,
1308
+ constant int32_t & ofs0,
1309
+ constant int32_t & ofs1,
1310
+ constant int32_t & IW,
1311
+ constant int32_t & IH,
1312
+ constant int32_t & CHW,
1313
+ constant int32_t & s0,
1314
+ constant int32_t & s1,
1315
+ constant int32_t & p0,
1316
+ constant int32_t & p1,
1317
+ constant int32_t & d0,
1318
+ constant int32_t & d1,
1319
+ uint3 tgpig[[threadgroup_position_in_grid]],
1320
+ uint3 tgpg[[threadgroups_per_grid]],
1321
+ uint3 tpitg[[thread_position_in_threadgroup]],
1322
+ uint3 ntg[[threads_per_threadgroup]]) {
1323
+ const int32_t iiw = tgpig[2] * s0 + tpitg[2] * d0 - p0;
1324
+ const int32_t iih = tgpig[1] * s1 + tpitg[1] * d1 - p1;
1325
+
1326
+ const int32_t offset_dst =
1327
+ (tpitg[0] * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW +
1328
+ (tgpig[0] * (ntg[1] * ntg[2]) + tpitg[1] * ntg[2] + tpitg[2]);
1329
+
1330
+ if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
1331
+ dst[offset_dst] = 0.0f;
1332
+ } else {
1333
+ const int32_t offset_src = tpitg[0] * ofs0 + tgpig[0] * ofs1;
1334
+ dst[offset_dst] = x[offset_src + iih * IW + iiw];
1335
+ }
1336
+ }
1337
+
1232
1338
  kernel void kernel_cpy_f16_f16(
1233
1339
  device const half * src0,
1234
1340
  device half * dst,
@@ -792,7 +792,7 @@ kernel void kernel_mul_mv_f32_f32(
792
792
  constant int64_t & ne0,
793
793
  constant int64_t & ne1,
794
794
  uint3 tgpig[[threadgroup_position_in_grid]],
795
- uint tiisg[[thread_index_in_simdgroup]]) {
795
+ uint tiisg[[thread_index_in_simdgroup]]) {
796
796
 
797
797
  const int64_t r0 = tgpig.x;
798
798
  const int64_t rb = tgpig.y*N_F32_F32;
@@ -844,6 +844,79 @@ kernel void kernel_mul_mv_f32_f32(
844
844
  }
845
845
  }
846
846
 
847
+ #define N_F16_F16 4
848
+
849
+ kernel void kernel_mul_mv_f16_f16(
850
+ device const char * src0,
851
+ device const char * src1,
852
+ device float * dst,
853
+ constant int64_t & ne00,
854
+ constant int64_t & ne01,
855
+ constant int64_t & ne02,
856
+ constant uint64_t & nb00,
857
+ constant uint64_t & nb01,
858
+ constant uint64_t & nb02,
859
+ constant int64_t & ne10,
860
+ constant int64_t & ne11,
861
+ constant int64_t & ne12,
862
+ constant uint64_t & nb10,
863
+ constant uint64_t & nb11,
864
+ constant uint64_t & nb12,
865
+ constant int64_t & ne0,
866
+ constant int64_t & ne1,
867
+ uint3 tgpig[[threadgroup_position_in_grid]],
868
+ uint tiisg[[thread_index_in_simdgroup]]) {
869
+
870
+ const int64_t r0 = tgpig.x;
871
+ const int64_t rb = tgpig.y*N_F16_F16;
872
+ const int64_t im = tgpig.z;
873
+
874
+ device const half * x = (device const half *) (src0 + r0*nb01 + im/(ne12/ne02)*nb02);
875
+
876
+ if (ne00 < 128) {
877
+ for (int row = 0; row < N_F16_F16; ++row) {
878
+ int r1 = rb + row;
879
+ if (r1 >= ne11) {
880
+ break;
881
+ }
882
+
883
+ device const half * y = (device const half *) (src1 + r1*nb11 + im*nb12);
884
+
885
+ float sumf = 0;
886
+ for (int i = tiisg; i < ne00; i += 32) {
887
+ sumf += (half) x[i] * (half) y[i];
888
+ }
889
+
890
+ float all_sum = simd_sum(sumf);
891
+ if (tiisg == 0) {
892
+ dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum;
893
+ }
894
+ }
895
+ } else {
896
+ device const half4 * x4 = (device const half4 *)x;
897
+ for (int row = 0; row < N_F16_F16; ++row) {
898
+ int r1 = rb + row;
899
+ if (r1 >= ne11) {
900
+ break;
901
+ }
902
+
903
+ device const half * y = (device const half *) (src1 + r1*nb11 + im*nb12);
904
+ device const half4 * y4 = (device const half4 *) y;
905
+
906
+ float sumf = 0;
907
+ for (int i = tiisg; i < ne00/4; i += 32) {
908
+ for (int k = 0; k < 4; ++k) sumf += (half) x4[i][k] * y4[i][k];
909
+ }
910
+
911
+ float all_sum = simd_sum(sumf);
912
+ if (tiisg == 0) {
913
+ for (int i = 4*(ne00/4); i < ne00; ++i) all_sum += (half) x[i] * y[i];
914
+ dst[im*ne1*ne0 + r1*ne0 + r0] = all_sum;
915
+ }
916
+ }
917
+ }
918
+ }
919
+
847
920
  kernel void kernel_mul_mv_f16_f32_1row(
848
921
  device const char * src0,
849
922
  device const char * src1,
@@ -1229,6 +1302,39 @@ kernel void kernel_rope(
1229
1302
  template [[host_name("kernel_rope_f32")]] kernel rope_t kernel_rope<float>;
1230
1303
  template [[host_name("kernel_rope_f16")]] kernel rope_t kernel_rope<half>;
1231
1304
 
1305
+ kernel void kernel_im2col_f16(
1306
+ device const float * x,
1307
+ device half * dst,
1308
+ constant int32_t & ofs0,
1309
+ constant int32_t & ofs1,
1310
+ constant int32_t & IW,
1311
+ constant int32_t & IH,
1312
+ constant int32_t & CHW,
1313
+ constant int32_t & s0,
1314
+ constant int32_t & s1,
1315
+ constant int32_t & p0,
1316
+ constant int32_t & p1,
1317
+ constant int32_t & d0,
1318
+ constant int32_t & d1,
1319
+ uint3 tgpig[[threadgroup_position_in_grid]],
1320
+ uint3 tgpg[[threadgroups_per_grid]],
1321
+ uint3 tpitg[[thread_position_in_threadgroup]],
1322
+ uint3 ntg[[threads_per_threadgroup]]) {
1323
+ const int32_t iiw = tgpig[2] * s0 + tpitg[2] * d0 - p0;
1324
+ const int32_t iih = tgpig[1] * s1 + tpitg[1] * d1 - p1;
1325
+
1326
+ const int32_t offset_dst =
1327
+ (tpitg[0] * tgpg[1] * tgpg[2] + tgpig[1] * tgpg[2] + tgpig[2]) * CHW +
1328
+ (tgpig[0] * (ntg[1] * ntg[2]) + tpitg[1] * ntg[2] + tpitg[2]);
1329
+
1330
+ if (iih < 0 || iih >= IH || iiw < 0 || iiw >= IW) {
1331
+ dst[offset_dst] = 0.0f;
1332
+ } else {
1333
+ const int32_t offset_src = tpitg[0] * ofs0 + tgpig[0] * ofs1;
1334
+ dst[offset_dst] = x[offset_src + iih * IW + iiw];
1335
+ }
1336
+ }
1337
+
1232
1338
  kernel void kernel_cpy_f16_f16(
1233
1339
  device const half * src0,
1234
1340
  device half * dst,
Binary file
Binary file
Binary file
Binary file
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "node-llama-cpp",
3
- "version": "2.8.0",
3
+ "version": "3.0.0-beta.1",
4
4
  "description": "Run AI models locally on your machine with node.js bindings for llama.cpp. Force a JSON schema on the model output on the generation level",
5
5
  "main": "dist/index.js",
6
6
  "type": "module",
@@ -97,6 +97,10 @@
97
97
  "bugs": {
98
98
  "url": "https://github.com/withcatai/node-llama-cpp/issues"
99
99
  },
100
+ "funding": {
101
+ "type": "github",
102
+ "url": "https://github.com/sponsors/giladgd"
103
+ },
100
104
  "homepage": "https://withcatai.github.io/node-llama-cpp/",
101
105
  "devDependencies": {
102
106
  "@commitlint/cli": "^17.7.1",
@@ -114,17 +118,18 @@
114
118
  "@vitest/coverage-v8": "^0.34.6",
115
119
  "eslint": "^8.46.0",
116
120
  "eslint-plugin-import": "^2.28.0",
117
- "eslint-plugin-node": "github:giladgd/eslint-plugin-node#dev/giladgd/fixImportExtentionFixingInTypeScript",
121
+ "eslint-plugin-jsdoc": "^46.9.0",
122
+ "eslint-plugin-n": "^16.3.1",
118
123
  "husky": "^8.0.3",
119
124
  "rimraf": "^5.0.1",
120
- "semantic-release": "^21.0.7",
125
+ "semantic-release": "^22.0.8",
121
126
  "ts-node": "^10.9.1",
122
127
  "tslib": "^2.6.1",
123
- "typedoc": "^0.25.1",
124
- "typedoc-plugin-markdown": "^4.0.0-next.22",
125
- "typedoc-plugin-mdn-links": "^3.1.0",
128
+ "typedoc": "^0.25.3",
129
+ "typedoc-plugin-markdown": "4.0.0-next.30",
130
+ "typedoc-plugin-mdn-links": "^3.1.5",
126
131
  "typedoc-vitepress-theme": "^1.0.0-next.3",
127
- "typescript": "^5.1.6",
132
+ "typescript": "^5.2.2",
128
133
  "vitepress": "^1.0.0-rc.20",
129
134
  "vitest": "^0.34.6",
130
135
  "zx": "^7.2.3"
@@ -138,6 +143,7 @@
138
143
  "cross-spawn": "^7.0.3",
139
144
  "env-var": "^7.3.1",
140
145
  "fs-extra": "^11.1.1",
146
+ "lifecycle-utils": "^1.1.3",
141
147
  "log-symbols": "^5.1.0",
142
148
  "node-addon-api": "^7.0.0",
143
149
  "octokit": "^3.1.0",
@@ -1,2 +0,0 @@
1
- import { LlamaChatPromptWrapper } from "./LlamaChatPromptWrapper.js";
2
- export declare function getChatWrapperByBos(bos: string | undefined | null): typeof LlamaChatPromptWrapper | null;
@@ -1,14 +0,0 @@
1
- import { LlamaChatPromptWrapper } from "./LlamaChatPromptWrapper.js";
2
- import { ChatMLChatPromptWrapper } from "./ChatMLChatPromptWrapper.js";
3
- export function getChatWrapperByBos(bos) {
4
- if (bos === "" || bos == null)
5
- return null;
6
- if ("<s>[INST] <<SYS>>\n".startsWith(bos)) {
7
- return LlamaChatPromptWrapper;
8
- }
9
- else if ("<|im_start|>system\n".startsWith(bos)) {
10
- return ChatMLChatPromptWrapper;
11
- }
12
- return null;
13
- }
14
- //# sourceMappingURL=createChatWrapperByBos.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"createChatWrapperByBos.js","sourceRoot":"","sources":["../../src/chatWrappers/createChatWrapperByBos.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,sBAAsB,EAAC,MAAM,6BAA6B,CAAC;AACnE,OAAO,EAAC,uBAAuB,EAAC,MAAM,8BAA8B,CAAC;AAErE,MAAM,UAAU,mBAAmB,CAAC,GAA8B;IAC9D,IAAI,GAAG,KAAK,EAAE,IAAI,GAAG,IAAI,IAAI;QACzB,OAAO,IAAI,CAAC;IAEhB,IAAI,qBAAqB,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;QACvC,OAAO,sBAAsB,CAAC;KACjC;SAAM,IAAI,sBAAsB,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE;QAC/C,OAAO,uBAAuB,CAAC;KAClC;IAED,OAAO,IAAI,CAAC;AAChB,CAAC"}
@@ -1,100 +0,0 @@
1
- import { Token } from "../types.js";
2
- import { LlamaModel } from "./LlamaModel.js";
3
- import { LlamaGrammarEvaluationState } from "./LlamaGrammarEvaluationState.js";
4
- import { LlamaGrammar } from "./LlamaGrammar.js";
5
- export type LlamaContextOptions = {
6
- model: LlamaModel;
7
- prependBos?: boolean;
8
- /**
9
- * @deprecated use the `grammar` option on `LlamaChatSession`'s `prompt` function
10
- * or the `grammarEvaluationState` option on `LlamaContext`'s `evaluate` function instead
11
- * @hidden
12
- */
13
- grammar?: LlamaGrammar;
14
- /** If null, a random seed will be used */
15
- seed?: number | null;
16
- /** text context size */
17
- contextSize?: number;
18
- /** prompt processing batch size */
19
- batchSize?: number;
20
- /** use fp16 for KV cache */
21
- f16Kv?: boolean;
22
- /** the llama_eval() call computes all logits, not just the last one */
23
- logitsAll?: boolean;
24
- /** embedding mode only */
25
- embedding?: boolean;
26
- /** number of threads to use to evaluate tokens */
27
- threads?: number;
28
- };
29
- export type LlamaContextRepeatPenalty = {
30
- /** Tokens to lower the predication probability of to be the next predicted token */
31
- punishTokens: Uint32Array | (() => Uint32Array);
32
- /**
33
- * The relative amount to lower the probability of the tokens in `punishTokens` by
34
- * Defaults to `1.1`.
35
- * Set to `1` to disable.
36
- */
37
- penalty?: number;
38
- /**
39
- * For n time a token is in the `punishTokens` array, lower its probability by `n * frequencyPenalty`
40
- * Disabled by default (`0`).
41
- * Set to a value between `0` and `1` to enable.
42
- */
43
- frequencyPenalty?: number;
44
- /**
45
- * Lower the probability of all the tokens in the `punishTokens` array by `presencePenalty`
46
- * Disabled by default (`0`).
47
- * Set to a value between `0` and `1` to enable.
48
- */
49
- presencePenalty?: number;
50
- };
51
- export declare class LlamaContext {
52
- private readonly _model;
53
- private readonly _ctx;
54
- private readonly _prependBos;
55
- private _prependTokens;
56
- /**
57
- * @param {LlamaContextOptions} options
58
- */
59
- constructor({ model, prependBos, grammar, seed, contextSize, batchSize, f16Kv, logitsAll, embedding, threads }: LlamaContextOptions);
60
- encode(text: string): Uint32Array;
61
- decode(tokens: Uint32Array | Token[]): string;
62
- get prependBos(): boolean;
63
- /**
64
- * @returns {Token | null} The BOS (Beginning Of Sequence) token.
65
- */
66
- getBosToken(): Token | null;
67
- /**
68
- * @returns {Token | null} The EOS (End Of Sequence) token.
69
- */
70
- getEosToken(): Token | null;
71
- /**
72
- * @returns {Token | null} The NL (New Line) token.
73
- */
74
- getNlToken(): Token | null;
75
- /**
76
- * @returns {string | null} The BOS (Beginning Of Sequence) token as a string.
77
- */
78
- getBosString(): string | null;
79
- /**
80
- * @returns {string | null} The EOS (End Of Sequence) token as a string.
81
- */
82
- getEosString(): string | null;
83
- /**
84
- * @returns {string | null} The NL (New Line) token as a string.
85
- */
86
- getNlString(): string | null;
87
- getContextSize(): number;
88
- /**
89
- * @param {Uint32Array} tokens
90
- * @param {object} options
91
- * @returns {AsyncGenerator<Token, void>}
92
- */
93
- evaluate(tokens: Uint32Array, { temperature, topK, topP, grammarEvaluationState, repeatPenalty }?: {
94
- temperature?: number;
95
- topK?: number;
96
- topP?: number;
97
- grammarEvaluationState?: LlamaGrammarEvaluationState;
98
- repeatPenalty?: LlamaContextRepeatPenalty;
99
- }): AsyncGenerator<Token, void>;
100
- }
@@ -1,141 +0,0 @@
1
- import { removeNullFields } from "../utils/removeNullFields.js";
2
- import { LLAMAContext } from "./LlamaBins.js";
3
- export class LlamaContext {
4
- _model;
5
- _ctx;
6
- _prependBos;
7
- _prependTokens;
8
- /** @internal */
9
- _chatGrammar;
10
- /**
11
- * @param {LlamaContextOptions} options
12
- */
13
- constructor({ model, prependBos = true, grammar, seed = model._contextOptions.seed, contextSize = model._contextOptions.contextSize, batchSize = model._contextOptions.batchSize, f16Kv = model._contextOptions.f16Kv, logitsAll = model._contextOptions.logitsAll, embedding = model._contextOptions.embedding, threads = model._contextOptions.threads }) {
14
- this._model = model;
15
- this._ctx = new LLAMAContext(model._model, removeNullFields({
16
- seed: seed != null ? Math.max(-1, seed) : undefined,
17
- contextSize,
18
- batchSize,
19
- f16Kv,
20
- logitsAll,
21
- embedding,
22
- threads
23
- }));
24
- this._prependBos = prependBos;
25
- this._prependTokens = [];
26
- this._chatGrammar = grammar;
27
- if (prependBos) {
28
- this._prependTokens.unshift(this._ctx.tokenBos());
29
- }
30
- }
31
- encode(text) {
32
- if (text === "")
33
- return new Uint32Array();
34
- return this._ctx.encode(text);
35
- }
36
- decode(tokens) {
37
- if (tokens.length === 0)
38
- return "";
39
- if (tokens instanceof Uint32Array)
40
- return this._ctx.decode(tokens);
41
- return this._ctx.decode(Uint32Array.from(tokens));
42
- }
43
- get prependBos() {
44
- return this._prependBos;
45
- }
46
- /**
47
- * @returns {Token | null} The BOS (Beginning Of Sequence) token.
48
- */
49
- getBosToken() {
50
- const bosToken = this._ctx.tokenBos();
51
- if (bosToken === -1)
52
- return null;
53
- return bosToken;
54
- }
55
- /**
56
- * @returns {Token | null} The EOS (End Of Sequence) token.
57
- */
58
- getEosToken() {
59
- const eosToken = this._ctx.tokenEos();
60
- if (eosToken === -1)
61
- return null;
62
- return eosToken;
63
- }
64
- /**
65
- * @returns {Token | null} The NL (New Line) token.
66
- */
67
- getNlToken() {
68
- const nlToken = this._ctx.tokenNl();
69
- if (nlToken === -1)
70
- return null;
71
- return nlToken;
72
- }
73
- /**
74
- * @returns {string | null} The BOS (Beginning Of Sequence) token as a string.
75
- */
76
- getBosString() {
77
- const bosToken = this.getBosToken();
78
- if (bosToken == null)
79
- return null;
80
- return this._ctx.getTokenString(bosToken);
81
- }
82
- /**
83
- * @returns {string | null} The EOS (End Of Sequence) token as a string.
84
- */
85
- getEosString() {
86
- const eosToken = this.getEosToken();
87
- if (eosToken == null)
88
- return null;
89
- return this._ctx.getTokenString(eosToken);
90
- }
91
- /**
92
- * @returns {string | null} The NL (New Line) token as a string.
93
- */
94
- getNlString() {
95
- const nlToken = this.getNlToken();
96
- if (nlToken == null)
97
- return null;
98
- return this._ctx.getTokenString(nlToken);
99
- }
100
- getContextSize() {
101
- return this._ctx.getContextSize();
102
- }
103
- /**
104
- * @param {Uint32Array} tokens
105
- * @param {object} options
106
- * @returns {AsyncGenerator<Token, void>}
107
- */
108
- async *evaluate(tokens, { temperature = this._model._evaluationOptions.temperature, topK = this._model._evaluationOptions.topK, topP = this._model._evaluationOptions.topP, grammarEvaluationState, repeatPenalty } = {}) {
109
- let evalTokens = tokens;
110
- if (this._prependTokens.length > 0) {
111
- const tokenArray = this._prependTokens.concat(Array.from(tokens));
112
- evalTokens = Uint32Array.from(tokenArray);
113
- this._prependTokens = [];
114
- }
115
- if (evalTokens.length === 0)
116
- return;
117
- // eslint-disable-next-line no-constant-condition
118
- while (true) {
119
- // Evaluate to get the next token.
120
- const nextToken = await this._ctx.eval(evalTokens, removeNullFields({
121
- temperature,
122
- topK,
123
- topP,
124
- repeatPenalty: repeatPenalty?.penalty,
125
- repeatPenaltyTokens: repeatPenalty?.punishTokens instanceof Function
126
- ? repeatPenalty.punishTokens()
127
- : repeatPenalty?.punishTokens,
128
- repeatPenaltyPresencePenalty: repeatPenalty?.presencePenalty,
129
- repeatPenaltyFrequencyPenalty: repeatPenalty?.frequencyPenalty,
130
- grammarEvaluationState: grammarEvaluationState?._state
131
- }));
132
- // the assistant finished answering
133
- if (nextToken === this._ctx.tokenEos())
134
- break;
135
- yield nextToken;
136
- // Create tokens for the next eval.
137
- evalTokens = Uint32Array.from([nextToken]);
138
- }
139
- }
140
- }
141
- //# sourceMappingURL=LlamaContext.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"LlamaContext.js","sourceRoot":"","sources":["../../src/llamaEvaluator/LlamaContext.ts"],"names":[],"mappings":"AAAA,OAAO,EAAC,gBAAgB,EAAC,MAAM,8BAA8B,CAAC;AAE9D,OAAO,EAAC,YAAY,EAAC,MAAM,gBAAgB,CAAC;AAiE5C,MAAM,OAAO,YAAY;IACJ,MAAM,CAAa;IACnB,IAAI,CAAe;IACnB,WAAW,CAAU;IAC9B,cAAc,CAAU;IAEhC,gBAAgB;IACA,YAAY,CAAgB;IAG5C;;OAEG;IACH,YAAmB,EACf,KAAK,EACL,UAAU,GAAG,IAAI,EACjB,OAAO,EACP,IAAI,GAAG,KAAK,CAAC,eAAe,CAAC,IAAI,EACjC,WAAW,GAAG,KAAK,CAAC,eAAe,CAAC,WAAW,EAC/C,SAAS,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,EAC3C,KAAK,GAAG,KAAK,CAAC,eAAe,CAAC,KAAK,EACnC,SAAS,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,EAC3C,SAAS,GAAG,KAAK,CAAC,eAAe,CAAC,SAAS,EAC3C,OAAO,GAAG,KAAK,CAAC,eAAe,CAAC,OAAO,EACrB;QAClB,IAAI,CAAC,MAAM,GAAG,KAAK,CAAC;QACpB,IAAI,CAAC,IAAI,GAAG,IAAI,YAAY,CAAC,KAAK,CAAC,MAAM,EAAE,gBAAgB,CAAC;YACxD,IAAI,EAAE,IAAI,IAAI,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC,CAAC,SAAS;YACnD,WAAW;YACX,SAAS;YACT,KAAK;YACL,SAAS;YACT,SAAS;YACT,OAAO;SACV,CAAC,CAAC,CAAC;QACJ,IAAI,CAAC,WAAW,GAAG,UAAU,CAAC;QAC9B,IAAI,CAAC,cAAc,GAAG,EAAE,CAAC;QACzB,IAAI,CAAC,YAAY,GAAG,OAAO,CAAC;QAE5B,IAAI,UAAU,EAAE;YACZ,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC,CAAC;SACrD;IACL,CAAC;IAEM,MAAM,CAAC,IAAY;QACtB,IAAI,IAAI,KAAK,EAAE;YACX,OAAO,IAAI,WAAW,EAAE,CAAC;QAE7B,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IAClC,CAAC;IAEM,MAAM,CAAC,MAA6B;QACvC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC;YACnB,OAAO,EAAE,CAAC;QAEd,IAAI,MAAM,YAAY,WAAW;YAC7B,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QAEpC,OAAO,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;IACtD,CAAC;IAED,IAAW,UAAU;QACjB,OAAO,IAAI,CAAC,WAAW,CAAC;IAC5B,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEtC,IAAI,QAAQ,KAAK,CAAC,CAAC;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE,CAAC;QAEtC,IAAI,QAAQ,KAAK,CAAC,CAAC;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED;;OAEG;IACI,UAAU;QACb,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,CAAC;QAEpC,IAAI,OAAO,KAAK,CAAC,CAAC;YACd,OAAO,IAAI,CAAC;QAEhB,OAAO,OAAO,CAAC;IACnB,CAAC;IAED;;OAEG;IACI,YAAY;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEpC,IAAI,QAAQ,IAAI,IAAI;YAChB,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,YAAY;QACf,MAAM,QAAQ,GAAG,IAAI,CAAC,WAAW,EAAE,CAAC;QAEpC,IAAI,QAAQ,IAAI,IAAI;YAChB,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,QAAQ,CAAC,CAAC;IAC9C,CAAC;IAED;;OAEG;IACI,WAAW;QACd,MAAM,OAAO,GAAG,IAAI,CAAC,UAAU,EAAE,CAAC;QAElC,IAAI,OAAO,IAAI,IAAI;YACf,OAAO,IAAI,CAAC;QAEhB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,CAAC,OAAO,CAAC,CAAC;IAC7C,CAAC;IAEM,cAAc;QACjB,OAAO,IAAI,CAAC,IAAI,CAAC,cAAc,EAAE,CAAC;IACtC,CAAC;IAED;;;;OAIG;IACI,KAAK,CAAC,CAAC,QAAQ,CAAC,MAAmB,EAAE,EACxC,WAAW,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,WAAW,EACxD,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,EAC1C,IAAI,GAAG,IAAI,CAAC,MAAM,CAAC,kBAAkB,CAAC,IAAI,EAC1C,sBAAsB,EACtB,aAAa,KAIb,EAAE;QACF,IAAI,UAAU,GAAG,MAAM,CAAC;QAExB,IAAI,IAAI,CAAC,cAAc,CAAC,MAAM,GAAG,CAAC,EAAE;YAChC,MAAM,UAAU,GAAY,IAAI,CAAC,cAAc,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC;YAE3E,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;YAC1C,IAAI,CAAC,cAAc,GAAG,EAAE,CAAC;SAC5B;QAED,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;YACvB,OAAO;QAEX,iDAAiD;QACjD,OAAO,IAAI,EAAE;YACT,kCAAkC;YAClC,MAAM,SAAS,GAAU,MAAM,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,EAAE,gBAAgB,CAAC;gBACvE,WAAW;gBACX,IAAI;gBACJ,IAAI;gBACJ,aAAa,EAAE,aAAa,EAAE,OAAO;gBACrC,mBAAmB,EAAE,aAAa,EAAE,YAAY,YAAY,QAAQ;oBAChE,CAAC,CAAC,aAAa,CAAC,YAAY,EAAE;oBAC9B,CAAC,CAAC,aAAa,EAAE,YAAY;gBACjC,4BAA4B,EAAE,aAAa,EAAE,eAAe;gBAC5D,6BAA6B,EAAE,aAAa,EAAE,gBAAgB;gBAC9D,sBAAsB,EAAE,sBAAsB,EAAE,MAAM;aACzD,CAAC,CAAC,CAAC;YAEJ,mCAAmC;YACnC,IAAI,SAAS,KAAK,IAAI,CAAC,IAAI,CAAC,QAAQ,EAAE;gBAClC,MAAM;YAEV,MAAM,SAAS,CAAC;YAEhB,mCAAmC;YACnC,UAAU,GAAG,WAAW,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC;SAC9C;IACL,CAAC;CACJ"}
@@ -1 +0,0 @@
1
- export declare function withLock<ReturnType>(scope: any, key: string, callback: () => Promise<ReturnType>): Promise<ReturnType>;
@@ -1,19 +0,0 @@
1
- const locks = new Map();
2
- export async function withLock(scope, key, callback) {
3
- while (locks.get(scope)?.has(key)) {
4
- await locks.get(scope)?.get(key);
5
- }
6
- const promise = callback();
7
- if (!locks.has(scope))
8
- locks.set(scope, new Map());
9
- locks.get(scope).set(key, promise);
10
- try {
11
- return await promise;
12
- }
13
- finally {
14
- locks.get(scope)?.delete(key);
15
- if (locks.get(scope)?.size === 0)
16
- locks.delete(scope);
17
- }
18
- }
19
- //# sourceMappingURL=withLock.js.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"withLock.js","sourceRoot":"","sources":["../../src/utils/withLock.ts"],"names":[],"mappings":"AAAA,MAAM,KAAK,GAAG,IAAI,GAAG,EAAkC,CAAC;AAExD,MAAM,CAAC,KAAK,UAAU,QAAQ,CAAa,KAAU,EAAE,GAAW,EAAE,QAAmC;IACnG,OAAO,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,EAAE;QAC/B,MAAM,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,GAAG,CAAC,GAAG,CAAC,CAAC;KACpC;IAED,MAAM,OAAO,GAAG,QAAQ,EAAE,CAAC;IAE3B,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC;QACjB,KAAK,CAAC,GAAG,CAAC,KAAK,EAAE,IAAI,GAAG,EAAE,CAAC,CAAC;IAEhC,KAAK,CAAC,GAAG,CAAC,KAAK,CAAE,CAAC,GAAG,CAAC,GAAG,EAAE,OAAO,CAAC,CAAC;IAEpC,IAAI;QACA,OAAO,MAAM,OAAO,CAAC;KACxB;YAAS;QACN,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,MAAM,CAAC,GAAG,CAAC,CAAC;QAE9B,IAAI,KAAK,CAAC,GAAG,CAAC,KAAK,CAAC,EAAE,IAAI,KAAK,CAAC;YAC5B,KAAK,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;KAC3B;AACL,CAAC"}