langchain 0.0.176 → 0.0.177
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chat_models/bedrock.cjs +25 -4
- package/dist/chat_models/bedrock.d.ts +2 -1
- package/dist/chat_models/bedrock.js +25 -4
- package/dist/chat_models/llama_cpp.cjs +31 -79
- package/dist/chat_models/llama_cpp.d.ts +15 -58
- package/dist/chat_models/llama_cpp.js +32 -80
- package/dist/chat_models/openai.cjs +91 -6
- package/dist/chat_models/openai.d.ts +10 -0
- package/dist/chat_models/openai.js +91 -6
- package/dist/embeddings/hf.cjs +10 -1
- package/dist/embeddings/hf.d.ts +4 -2
- package/dist/embeddings/hf.js +10 -1
- package/dist/embeddings/llama_cpp.cjs +67 -0
- package/dist/embeddings/llama_cpp.d.ts +26 -0
- package/dist/embeddings/llama_cpp.js +63 -0
- package/dist/embeddings/ollama.cjs +7 -1
- package/dist/embeddings/ollama.js +7 -1
- package/dist/llms/bedrock.cjs +25 -3
- package/dist/llms/bedrock.d.ts +2 -1
- package/dist/llms/bedrock.js +25 -3
- package/dist/llms/hf.cjs +10 -1
- package/dist/llms/hf.d.ts +3 -0
- package/dist/llms/hf.js +10 -1
- package/dist/llms/llama_cpp.cjs +25 -65
- package/dist/llms/llama_cpp.d.ts +7 -43
- package/dist/llms/llama_cpp.js +25 -65
- package/dist/load/import_constants.cjs +1 -0
- package/dist/load/import_constants.js +1 -0
- package/dist/prompts/few_shot.cjs +162 -1
- package/dist/prompts/few_shot.d.ts +90 -2
- package/dist/prompts/few_shot.js +160 -0
- package/dist/prompts/index.cjs +2 -1
- package/dist/prompts/index.d.ts +1 -1
- package/dist/prompts/index.js +1 -1
- package/dist/retrievers/zep.cjs +26 -3
- package/dist/retrievers/zep.d.ts +11 -2
- package/dist/retrievers/zep.js +26 -3
- package/dist/util/bedrock.d.ts +2 -0
- package/dist/util/llama_cpp.cjs +34 -0
- package/dist/util/llama_cpp.d.ts +46 -0
- package/dist/util/llama_cpp.js +28 -0
- package/dist/util/openai-format-fndef.cjs +81 -0
- package/dist/util/openai-format-fndef.d.ts +44 -0
- package/dist/util/openai-format-fndef.js +77 -0
- package/dist/util/openapi.d.ts +2 -2
- package/dist/vectorstores/pinecone.cjs +5 -5
- package/dist/vectorstores/pinecone.d.ts +2 -2
- package/dist/vectorstores/pinecone.js +5 -5
- package/embeddings/llama_cpp.cjs +1 -0
- package/embeddings/llama_cpp.d.ts +1 -0
- package/embeddings/llama_cpp.js +1 -0
- package/package.json +13 -5
|
@@ -133,6 +133,12 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
|
|
|
133
133
|
writable: true,
|
|
134
134
|
value: new eventstream_codec_1.EventStreamCodec(util_utf8_1.toUtf8, util_utf8_1.fromUtf8)
|
|
135
135
|
});
|
|
136
|
+
Object.defineProperty(this, "streaming", {
|
|
137
|
+
enumerable: true,
|
|
138
|
+
configurable: true,
|
|
139
|
+
writable: true,
|
|
140
|
+
value: false
|
|
141
|
+
});
|
|
136
142
|
this.model = fields?.model ?? this.model;
|
|
137
143
|
const allowedModels = ["ai21", "anthropic", "amazon"];
|
|
138
144
|
if (!allowedModels.includes(this.model.split(".")[0])) {
|
|
@@ -150,6 +156,7 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
|
|
|
150
156
|
this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl;
|
|
151
157
|
this.stopSequences = fields?.stopSequences;
|
|
152
158
|
this.modelKwargs = fields?.modelKwargs;
|
|
159
|
+
this.streaming = fields?.streaming ?? this.streaming;
|
|
153
160
|
}
|
|
154
161
|
/** Call out to Bedrock service model.
|
|
155
162
|
Arguments:
|
|
@@ -161,10 +168,23 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
|
|
|
161
168
|
Example:
|
|
162
169
|
response = model.call("Tell me a joke.")
|
|
163
170
|
*/
|
|
164
|
-
async _call(messages, options) {
|
|
171
|
+
async _call(messages, options, runManager) {
|
|
165
172
|
const service = "bedrock-runtime";
|
|
166
173
|
const endpointHost = this.endpointHost ?? `${service}.${this.region}.amazonaws.com`;
|
|
167
174
|
const provider = this.model.split(".")[0];
|
|
175
|
+
if (this.streaming) {
|
|
176
|
+
const stream = this._streamResponseChunks(messages, options, runManager);
|
|
177
|
+
let finalResult;
|
|
178
|
+
for await (const chunk of stream) {
|
|
179
|
+
if (finalResult === undefined) {
|
|
180
|
+
finalResult = chunk;
|
|
181
|
+
}
|
|
182
|
+
else {
|
|
183
|
+
finalResult = finalResult.concat(chunk);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
return finalResult?.message.content ?? "";
|
|
187
|
+
}
|
|
168
188
|
const response = await this._signedFetch(messages, options, {
|
|
169
189
|
bedrockMethod: "invoke",
|
|
170
190
|
endpointHost,
|
|
@@ -233,7 +253,6 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
|
|
|
233
253
|
event.headers[":content-type"].value !== "application/json") {
|
|
234
254
|
throw Error(`Failed to get event chunk: got ${chunk}`);
|
|
235
255
|
}
|
|
236
|
-
// console.log(decoder.decode(event.body));
|
|
237
256
|
const body = JSON.parse(decoder.decode(event.body));
|
|
238
257
|
if (body.message) {
|
|
239
258
|
throw new Error(body.message);
|
|
@@ -245,7 +264,8 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
|
|
|
245
264
|
text,
|
|
246
265
|
message: new index_js_1.AIMessageChunk({ content: text }),
|
|
247
266
|
});
|
|
248
|
-
|
|
267
|
+
// eslint-disable-next-line no-void
|
|
268
|
+
void runManager?.handleLLMNewToken(text);
|
|
249
269
|
}
|
|
250
270
|
}
|
|
251
271
|
}
|
|
@@ -256,7 +276,8 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
|
|
|
256
276
|
text,
|
|
257
277
|
message: new index_js_1.AIMessageChunk({ content: text }),
|
|
258
278
|
});
|
|
259
|
-
|
|
279
|
+
// eslint-disable-next-line no-void
|
|
280
|
+
void runManager?.handleLLMNewToken(text);
|
|
260
281
|
}
|
|
261
282
|
}
|
|
262
283
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
@@ -33,6 +33,7 @@ export declare class ChatBedrock extends SimpleChatModel implements BaseBedrockI
|
|
|
33
33
|
stopSequences?: string[];
|
|
34
34
|
modelKwargs?: Record<string, unknown>;
|
|
35
35
|
codec: EventStreamCodec;
|
|
36
|
+
streaming: boolean;
|
|
36
37
|
get lc_secrets(): {
|
|
37
38
|
[key: string]: string;
|
|
38
39
|
} | undefined;
|
|
@@ -49,7 +50,7 @@ export declare class ChatBedrock extends SimpleChatModel implements BaseBedrockI
|
|
|
49
50
|
Example:
|
|
50
51
|
response = model.call("Tell me a joke.")
|
|
51
52
|
*/
|
|
52
|
-
_call(messages: BaseMessage[], options: this["ParsedCallOptions"]): Promise<string>;
|
|
53
|
+
_call(messages: BaseMessage[], options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): Promise<string>;
|
|
53
54
|
_signedFetch(messages: BaseMessage[], options: this["ParsedCallOptions"], fields: {
|
|
54
55
|
bedrockMethod: "invoke" | "invoke-with-response-stream";
|
|
55
56
|
endpointHost: string;
|
|
@@ -128,6 +128,12 @@ export class ChatBedrock extends SimpleChatModel {
|
|
|
128
128
|
writable: true,
|
|
129
129
|
value: new EventStreamCodec(toUtf8, fromUtf8)
|
|
130
130
|
});
|
|
131
|
+
Object.defineProperty(this, "streaming", {
|
|
132
|
+
enumerable: true,
|
|
133
|
+
configurable: true,
|
|
134
|
+
writable: true,
|
|
135
|
+
value: false
|
|
136
|
+
});
|
|
131
137
|
this.model = fields?.model ?? this.model;
|
|
132
138
|
const allowedModels = ["ai21", "anthropic", "amazon"];
|
|
133
139
|
if (!allowedModels.includes(this.model.split(".")[0])) {
|
|
@@ -145,6 +151,7 @@ export class ChatBedrock extends SimpleChatModel {
|
|
|
145
151
|
this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl;
|
|
146
152
|
this.stopSequences = fields?.stopSequences;
|
|
147
153
|
this.modelKwargs = fields?.modelKwargs;
|
|
154
|
+
this.streaming = fields?.streaming ?? this.streaming;
|
|
148
155
|
}
|
|
149
156
|
/** Call out to Bedrock service model.
|
|
150
157
|
Arguments:
|
|
@@ -156,10 +163,23 @@ export class ChatBedrock extends SimpleChatModel {
|
|
|
156
163
|
Example:
|
|
157
164
|
response = model.call("Tell me a joke.")
|
|
158
165
|
*/
|
|
159
|
-
async _call(messages, options) {
|
|
166
|
+
async _call(messages, options, runManager) {
|
|
160
167
|
const service = "bedrock-runtime";
|
|
161
168
|
const endpointHost = this.endpointHost ?? `${service}.${this.region}.amazonaws.com`;
|
|
162
169
|
const provider = this.model.split(".")[0];
|
|
170
|
+
if (this.streaming) {
|
|
171
|
+
const stream = this._streamResponseChunks(messages, options, runManager);
|
|
172
|
+
let finalResult;
|
|
173
|
+
for await (const chunk of stream) {
|
|
174
|
+
if (finalResult === undefined) {
|
|
175
|
+
finalResult = chunk;
|
|
176
|
+
}
|
|
177
|
+
else {
|
|
178
|
+
finalResult = finalResult.concat(chunk);
|
|
179
|
+
}
|
|
180
|
+
}
|
|
181
|
+
return finalResult?.message.content ?? "";
|
|
182
|
+
}
|
|
163
183
|
const response = await this._signedFetch(messages, options, {
|
|
164
184
|
bedrockMethod: "invoke",
|
|
165
185
|
endpointHost,
|
|
@@ -228,7 +248,6 @@ export class ChatBedrock extends SimpleChatModel {
|
|
|
228
248
|
event.headers[":content-type"].value !== "application/json") {
|
|
229
249
|
throw Error(`Failed to get event chunk: got ${chunk}`);
|
|
230
250
|
}
|
|
231
|
-
// console.log(decoder.decode(event.body));
|
|
232
251
|
const body = JSON.parse(decoder.decode(event.body));
|
|
233
252
|
if (body.message) {
|
|
234
253
|
throw new Error(body.message);
|
|
@@ -240,7 +259,8 @@ export class ChatBedrock extends SimpleChatModel {
|
|
|
240
259
|
text,
|
|
241
260
|
message: new AIMessageChunk({ content: text }),
|
|
242
261
|
});
|
|
243
|
-
|
|
262
|
+
// eslint-disable-next-line no-void
|
|
263
|
+
void runManager?.handleLLMNewToken(text);
|
|
244
264
|
}
|
|
245
265
|
}
|
|
246
266
|
}
|
|
@@ -251,7 +271,8 @@ export class ChatBedrock extends SimpleChatModel {
|
|
|
251
271
|
text,
|
|
252
272
|
message: new AIMessageChunk({ content: text }),
|
|
253
273
|
});
|
|
254
|
-
|
|
274
|
+
// eslint-disable-next-line no-void
|
|
275
|
+
void runManager?.handleLLMNewToken(text);
|
|
255
276
|
}
|
|
256
277
|
}
|
|
257
278
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
3
3
|
exports.ChatLlamaCpp = void 0;
|
|
4
4
|
const node_llama_cpp_1 = require("node-llama-cpp");
|
|
5
5
|
const base_js_1 = require("./base.cjs");
|
|
6
|
+
const llama_cpp_js_1 = require("../util/llama_cpp.cjs");
|
|
6
7
|
/**
|
|
7
8
|
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
8
9
|
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
@@ -15,73 +16,31 @@ class ChatLlamaCpp extends base_js_1.SimpleChatModel {
|
|
|
15
16
|
}
|
|
16
17
|
constructor(inputs) {
|
|
17
18
|
super(inputs);
|
|
18
|
-
Object.defineProperty(this, "
|
|
19
|
+
Object.defineProperty(this, "maxTokens", {
|
|
19
20
|
enumerable: true,
|
|
20
21
|
configurable: true,
|
|
21
22
|
writable: true,
|
|
22
23
|
value: void 0
|
|
23
24
|
});
|
|
24
|
-
Object.defineProperty(this, "
|
|
25
|
+
Object.defineProperty(this, "temperature", {
|
|
25
26
|
enumerable: true,
|
|
26
27
|
configurable: true,
|
|
27
28
|
writable: true,
|
|
28
29
|
value: void 0
|
|
29
30
|
});
|
|
30
|
-
Object.defineProperty(this, "
|
|
31
|
+
Object.defineProperty(this, "topK", {
|
|
31
32
|
enumerable: true,
|
|
32
33
|
configurable: true,
|
|
33
34
|
writable: true,
|
|
34
35
|
value: void 0
|
|
35
36
|
});
|
|
36
|
-
Object.defineProperty(this, "
|
|
37
|
+
Object.defineProperty(this, "topP", {
|
|
37
38
|
enumerable: true,
|
|
38
39
|
configurable: true,
|
|
39
40
|
writable: true,
|
|
40
41
|
value: void 0
|
|
41
42
|
});
|
|
42
|
-
Object.defineProperty(this, "
|
|
43
|
-
enumerable: true,
|
|
44
|
-
configurable: true,
|
|
45
|
-
writable: true,
|
|
46
|
-
value: void 0
|
|
47
|
-
});
|
|
48
|
-
Object.defineProperty(this, "logitsAll", {
|
|
49
|
-
enumerable: true,
|
|
50
|
-
configurable: true,
|
|
51
|
-
writable: true,
|
|
52
|
-
value: void 0
|
|
53
|
-
});
|
|
54
|
-
Object.defineProperty(this, "lowVram", {
|
|
55
|
-
enumerable: true,
|
|
56
|
-
configurable: true,
|
|
57
|
-
writable: true,
|
|
58
|
-
value: void 0
|
|
59
|
-
});
|
|
60
|
-
Object.defineProperty(this, "seed", {
|
|
61
|
-
enumerable: true,
|
|
62
|
-
configurable: true,
|
|
63
|
-
writable: true,
|
|
64
|
-
value: void 0
|
|
65
|
-
});
|
|
66
|
-
Object.defineProperty(this, "useMlock", {
|
|
67
|
-
enumerable: true,
|
|
68
|
-
configurable: true,
|
|
69
|
-
writable: true,
|
|
70
|
-
value: void 0
|
|
71
|
-
});
|
|
72
|
-
Object.defineProperty(this, "useMmap", {
|
|
73
|
-
enumerable: true,
|
|
74
|
-
configurable: true,
|
|
75
|
-
writable: true,
|
|
76
|
-
value: void 0
|
|
77
|
-
});
|
|
78
|
-
Object.defineProperty(this, "vocabOnly", {
|
|
79
|
-
enumerable: true,
|
|
80
|
-
configurable: true,
|
|
81
|
-
writable: true,
|
|
82
|
-
value: void 0
|
|
83
|
-
});
|
|
84
|
-
Object.defineProperty(this, "modelPath", {
|
|
43
|
+
Object.defineProperty(this, "trimWhitespaceSuffix", {
|
|
85
44
|
enumerable: true,
|
|
86
45
|
configurable: true,
|
|
87
46
|
writable: true,
|
|
@@ -105,47 +64,33 @@ class ChatLlamaCpp extends base_js_1.SimpleChatModel {
|
|
|
105
64
|
writable: true,
|
|
106
65
|
value: void 0
|
|
107
66
|
});
|
|
108
|
-
this.
|
|
109
|
-
this.
|
|
110
|
-
this.
|
|
111
|
-
this.
|
|
112
|
-
this.
|
|
113
|
-
this.
|
|
114
|
-
this.
|
|
115
|
-
this.modelPath = inputs.modelPath;
|
|
116
|
-
this.seed = inputs?.seed;
|
|
117
|
-
this.useMlock = inputs?.useMlock;
|
|
118
|
-
this.useMmap = inputs?.useMmap;
|
|
119
|
-
this.vocabOnly = inputs?.vocabOnly;
|
|
120
|
-
this._model = new node_llama_cpp_1.LlamaModel(inputs);
|
|
121
|
-
this._context = new node_llama_cpp_1.LlamaContext({ model: this._model });
|
|
67
|
+
this.maxTokens = inputs?.maxTokens;
|
|
68
|
+
this.temperature = inputs?.temperature;
|
|
69
|
+
this.topK = inputs?.topK;
|
|
70
|
+
this.topP = inputs?.topP;
|
|
71
|
+
this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
|
|
72
|
+
this._model = (0, llama_cpp_js_1.createLlamaModel)(inputs);
|
|
73
|
+
this._context = (0, llama_cpp_js_1.createLlamaContext)(this._model, inputs);
|
|
122
74
|
this._session = null;
|
|
123
75
|
}
|
|
124
76
|
_llmType() {
|
|
125
77
|
return "llama2_cpp";
|
|
126
78
|
}
|
|
127
|
-
invocationParams() {
|
|
128
|
-
return {
|
|
129
|
-
batchSize: this.batchSize,
|
|
130
|
-
contextSize: this.contextSize,
|
|
131
|
-
embedding: this.embedding,
|
|
132
|
-
f16Kv: this.f16Kv,
|
|
133
|
-
gpuLayers: this.gpuLayers,
|
|
134
|
-
logitsAll: this.logitsAll,
|
|
135
|
-
lowVram: this.lowVram,
|
|
136
|
-
modelPath: this.modelPath,
|
|
137
|
-
seed: this.seed,
|
|
138
|
-
useMlock: this.useMlock,
|
|
139
|
-
useMmap: this.useMmap,
|
|
140
|
-
vocabOnly: this.vocabOnly,
|
|
141
|
-
};
|
|
142
|
-
}
|
|
143
79
|
/** @ignore */
|
|
144
80
|
_combineLLMOutput() {
|
|
145
81
|
return {};
|
|
146
82
|
}
|
|
83
|
+
invocationParams() {
|
|
84
|
+
return {
|
|
85
|
+
maxTokens: this.maxTokens,
|
|
86
|
+
temperature: this.temperature,
|
|
87
|
+
topK: this.topK,
|
|
88
|
+
topP: this.topP,
|
|
89
|
+
trimWhitespaceSuffix: this.trimWhitespaceSuffix,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
147
92
|
/** @ignore */
|
|
148
|
-
async _call(messages,
|
|
93
|
+
async _call(messages, _options) {
|
|
149
94
|
let prompt = "";
|
|
150
95
|
if (messages.length > 1) {
|
|
151
96
|
// We need to build a new _session
|
|
@@ -159,8 +104,15 @@ class ChatLlamaCpp extends base_js_1.SimpleChatModel {
|
|
|
159
104
|
prompt = messages[0].content;
|
|
160
105
|
}
|
|
161
106
|
try {
|
|
107
|
+
const promptOptions = {
|
|
108
|
+
maxTokens: this?.maxTokens,
|
|
109
|
+
temperature: this?.temperature,
|
|
110
|
+
topK: this?.topK,
|
|
111
|
+
topP: this?.topP,
|
|
112
|
+
trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
|
|
113
|
+
};
|
|
162
114
|
// @ts-expect-error - TS2531: Object is possibly 'null'.
|
|
163
|
-
const completion = await this._session.prompt(prompt,
|
|
115
|
+
const completion = await this._session.prompt(prompt, promptOptions);
|
|
164
116
|
return completion;
|
|
165
117
|
}
|
|
166
118
|
catch (e) {
|
|
@@ -1,42 +1,13 @@
|
|
|
1
1
|
import { LlamaModel, LlamaContext, LlamaChatSession, type ConversationInteraction } from "node-llama-cpp";
|
|
2
2
|
import { SimpleChatModel, BaseChatModelParams } from "./base.js";
|
|
3
|
+
import { LlamaBaseCppInputs } from "../util/llama_cpp.js";
|
|
3
4
|
import { BaseLanguageModelCallOptions } from "../base_language/index.js";
|
|
4
5
|
import type { BaseMessage } from "../schema/index.js";
|
|
5
6
|
/**
|
|
6
7
|
* Note that the modelPath is the only required parameter. For testing you
|
|
7
8
|
* can set this in the environment variable `LLAMA_PATH`.
|
|
8
9
|
*/
|
|
9
|
-
export interface LlamaCppInputs extends BaseChatModelParams {
|
|
10
|
-
/** Prompt processing batch size. */
|
|
11
|
-
batchSize?: number;
|
|
12
|
-
/** Text context size. */
|
|
13
|
-
contextSize?: number;
|
|
14
|
-
/** Embedding mode only. */
|
|
15
|
-
embedding?: boolean;
|
|
16
|
-
/** Use fp16 for KV cache. */
|
|
17
|
-
f16Kv?: boolean;
|
|
18
|
-
/** Number of layers to store in VRAM. */
|
|
19
|
-
gpuLayers?: number;
|
|
20
|
-
/** The llama_eval() call computes all logits, not just the last one. */
|
|
21
|
-
logitsAll?: boolean;
|
|
22
|
-
/** If true, reduce VRAM usage at the cost of performance. */
|
|
23
|
-
lowVram?: boolean;
|
|
24
|
-
/** Path to the model on the filesystem. */
|
|
25
|
-
modelPath: string;
|
|
26
|
-
/** If null, a random seed will be used. */
|
|
27
|
-
seed?: null | number;
|
|
28
|
-
/** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */
|
|
29
|
-
temperature?: number;
|
|
30
|
-
/** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */
|
|
31
|
-
topK?: number;
|
|
32
|
-
/** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */
|
|
33
|
-
topP?: number;
|
|
34
|
-
/** Force system to keep model in RAM. */
|
|
35
|
-
useMlock?: boolean;
|
|
36
|
-
/** Use mmap if possible. */
|
|
37
|
-
useMmap?: boolean;
|
|
38
|
-
/** Only load the vocabulary, no weights. */
|
|
39
|
-
vocabOnly?: boolean;
|
|
10
|
+
export interface LlamaCppInputs extends LlamaBaseCppInputs, BaseChatModelParams {
|
|
40
11
|
}
|
|
41
12
|
export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
|
|
42
13
|
/** The maximum number of tokens the response should contain. */
|
|
@@ -53,42 +24,28 @@ export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
|
|
|
53
24
|
export declare class ChatLlamaCpp extends SimpleChatModel<LlamaCppCallOptions> {
|
|
54
25
|
CallOptions: LlamaCppCallOptions;
|
|
55
26
|
static inputs: LlamaCppInputs;
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
logitsAll?: boolean;
|
|
62
|
-
lowVram?: boolean;
|
|
63
|
-
seed?: null | number;
|
|
64
|
-
useMlock?: boolean;
|
|
65
|
-
useMmap?: boolean;
|
|
66
|
-
vocabOnly?: boolean;
|
|
67
|
-
modelPath: string;
|
|
27
|
+
maxTokens?: number;
|
|
28
|
+
temperature?: number;
|
|
29
|
+
topK?: number;
|
|
30
|
+
topP?: number;
|
|
31
|
+
trimWhitespaceSuffix?: boolean;
|
|
68
32
|
_model: LlamaModel;
|
|
69
33
|
_context: LlamaContext;
|
|
70
34
|
_session: LlamaChatSession | null;
|
|
71
35
|
static lc_name(): string;
|
|
72
36
|
constructor(inputs: LlamaCppInputs);
|
|
73
37
|
_llmType(): string;
|
|
74
|
-
invocationParams(): {
|
|
75
|
-
batchSize: number | undefined;
|
|
76
|
-
contextSize: number | undefined;
|
|
77
|
-
embedding: boolean | undefined;
|
|
78
|
-
f16Kv: boolean | undefined;
|
|
79
|
-
gpuLayers: number | undefined;
|
|
80
|
-
logitsAll: boolean | undefined;
|
|
81
|
-
lowVram: boolean | undefined;
|
|
82
|
-
modelPath: string;
|
|
83
|
-
seed: number | null | undefined;
|
|
84
|
-
useMlock: boolean | undefined;
|
|
85
|
-
useMmap: boolean | undefined;
|
|
86
|
-
vocabOnly: boolean | undefined;
|
|
87
|
-
};
|
|
88
38
|
/** @ignore */
|
|
89
39
|
_combineLLMOutput(): {};
|
|
40
|
+
invocationParams(): {
|
|
41
|
+
maxTokens: number | undefined;
|
|
42
|
+
temperature: number | undefined;
|
|
43
|
+
topK: number | undefined;
|
|
44
|
+
topP: number | undefined;
|
|
45
|
+
trimWhitespaceSuffix: boolean | undefined;
|
|
46
|
+
};
|
|
90
47
|
/** @ignore */
|
|
91
|
-
_call(messages: BaseMessage[],
|
|
48
|
+
_call(messages: BaseMessage[], _options: this["ParsedCallOptions"]): Promise<string>;
|
|
92
49
|
protected _buildSession(messages: BaseMessage[]): string;
|
|
93
50
|
protected _convertMessagesToInteractions(messages: BaseMessage[]): ConversationInteraction[];
|
|
94
51
|
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { LlamaChatSession, } from "node-llama-cpp";
|
|
2
2
|
import { SimpleChatModel } from "./base.js";
|
|
3
|
+
import { createLlamaModel, createLlamaContext, } from "../util/llama_cpp.js";
|
|
3
4
|
/**
|
|
4
5
|
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
5
6
|
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
@@ -12,73 +13,31 @@ export class ChatLlamaCpp extends SimpleChatModel {
|
|
|
12
13
|
}
|
|
13
14
|
constructor(inputs) {
|
|
14
15
|
super(inputs);
|
|
15
|
-
Object.defineProperty(this, "
|
|
16
|
+
Object.defineProperty(this, "maxTokens", {
|
|
16
17
|
enumerable: true,
|
|
17
18
|
configurable: true,
|
|
18
19
|
writable: true,
|
|
19
20
|
value: void 0
|
|
20
21
|
});
|
|
21
|
-
Object.defineProperty(this, "
|
|
22
|
+
Object.defineProperty(this, "temperature", {
|
|
22
23
|
enumerable: true,
|
|
23
24
|
configurable: true,
|
|
24
25
|
writable: true,
|
|
25
26
|
value: void 0
|
|
26
27
|
});
|
|
27
|
-
Object.defineProperty(this, "
|
|
28
|
+
Object.defineProperty(this, "topK", {
|
|
28
29
|
enumerable: true,
|
|
29
30
|
configurable: true,
|
|
30
31
|
writable: true,
|
|
31
32
|
value: void 0
|
|
32
33
|
});
|
|
33
|
-
Object.defineProperty(this, "
|
|
34
|
+
Object.defineProperty(this, "topP", {
|
|
34
35
|
enumerable: true,
|
|
35
36
|
configurable: true,
|
|
36
37
|
writable: true,
|
|
37
38
|
value: void 0
|
|
38
39
|
});
|
|
39
|
-
Object.defineProperty(this, "
|
|
40
|
-
enumerable: true,
|
|
41
|
-
configurable: true,
|
|
42
|
-
writable: true,
|
|
43
|
-
value: void 0
|
|
44
|
-
});
|
|
45
|
-
Object.defineProperty(this, "logitsAll", {
|
|
46
|
-
enumerable: true,
|
|
47
|
-
configurable: true,
|
|
48
|
-
writable: true,
|
|
49
|
-
value: void 0
|
|
50
|
-
});
|
|
51
|
-
Object.defineProperty(this, "lowVram", {
|
|
52
|
-
enumerable: true,
|
|
53
|
-
configurable: true,
|
|
54
|
-
writable: true,
|
|
55
|
-
value: void 0
|
|
56
|
-
});
|
|
57
|
-
Object.defineProperty(this, "seed", {
|
|
58
|
-
enumerable: true,
|
|
59
|
-
configurable: true,
|
|
60
|
-
writable: true,
|
|
61
|
-
value: void 0
|
|
62
|
-
});
|
|
63
|
-
Object.defineProperty(this, "useMlock", {
|
|
64
|
-
enumerable: true,
|
|
65
|
-
configurable: true,
|
|
66
|
-
writable: true,
|
|
67
|
-
value: void 0
|
|
68
|
-
});
|
|
69
|
-
Object.defineProperty(this, "useMmap", {
|
|
70
|
-
enumerable: true,
|
|
71
|
-
configurable: true,
|
|
72
|
-
writable: true,
|
|
73
|
-
value: void 0
|
|
74
|
-
});
|
|
75
|
-
Object.defineProperty(this, "vocabOnly", {
|
|
76
|
-
enumerable: true,
|
|
77
|
-
configurable: true,
|
|
78
|
-
writable: true,
|
|
79
|
-
value: void 0
|
|
80
|
-
});
|
|
81
|
-
Object.defineProperty(this, "modelPath", {
|
|
40
|
+
Object.defineProperty(this, "trimWhitespaceSuffix", {
|
|
82
41
|
enumerable: true,
|
|
83
42
|
configurable: true,
|
|
84
43
|
writable: true,
|
|
@@ -102,47 +61,33 @@ export class ChatLlamaCpp extends SimpleChatModel {
|
|
|
102
61
|
writable: true,
|
|
103
62
|
value: void 0
|
|
104
63
|
});
|
|
105
|
-
this.
|
|
106
|
-
this.
|
|
107
|
-
this.
|
|
108
|
-
this.
|
|
109
|
-
this.
|
|
110
|
-
this.
|
|
111
|
-
this.
|
|
112
|
-
this.modelPath = inputs.modelPath;
|
|
113
|
-
this.seed = inputs?.seed;
|
|
114
|
-
this.useMlock = inputs?.useMlock;
|
|
115
|
-
this.useMmap = inputs?.useMmap;
|
|
116
|
-
this.vocabOnly = inputs?.vocabOnly;
|
|
117
|
-
this._model = new LlamaModel(inputs);
|
|
118
|
-
this._context = new LlamaContext({ model: this._model });
|
|
64
|
+
this.maxTokens = inputs?.maxTokens;
|
|
65
|
+
this.temperature = inputs?.temperature;
|
|
66
|
+
this.topK = inputs?.topK;
|
|
67
|
+
this.topP = inputs?.topP;
|
|
68
|
+
this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
|
|
69
|
+
this._model = createLlamaModel(inputs);
|
|
70
|
+
this._context = createLlamaContext(this._model, inputs);
|
|
119
71
|
this._session = null;
|
|
120
72
|
}
|
|
121
73
|
_llmType() {
|
|
122
74
|
return "llama2_cpp";
|
|
123
75
|
}
|
|
124
|
-
invocationParams() {
|
|
125
|
-
return {
|
|
126
|
-
batchSize: this.batchSize,
|
|
127
|
-
contextSize: this.contextSize,
|
|
128
|
-
embedding: this.embedding,
|
|
129
|
-
f16Kv: this.f16Kv,
|
|
130
|
-
gpuLayers: this.gpuLayers,
|
|
131
|
-
logitsAll: this.logitsAll,
|
|
132
|
-
lowVram: this.lowVram,
|
|
133
|
-
modelPath: this.modelPath,
|
|
134
|
-
seed: this.seed,
|
|
135
|
-
useMlock: this.useMlock,
|
|
136
|
-
useMmap: this.useMmap,
|
|
137
|
-
vocabOnly: this.vocabOnly,
|
|
138
|
-
};
|
|
139
|
-
}
|
|
140
76
|
/** @ignore */
|
|
141
77
|
_combineLLMOutput() {
|
|
142
78
|
return {};
|
|
143
79
|
}
|
|
80
|
+
invocationParams() {
|
|
81
|
+
return {
|
|
82
|
+
maxTokens: this.maxTokens,
|
|
83
|
+
temperature: this.temperature,
|
|
84
|
+
topK: this.topK,
|
|
85
|
+
topP: this.topP,
|
|
86
|
+
trimWhitespaceSuffix: this.trimWhitespaceSuffix,
|
|
87
|
+
};
|
|
88
|
+
}
|
|
144
89
|
/** @ignore */
|
|
145
|
-
async _call(messages,
|
|
90
|
+
async _call(messages, _options) {
|
|
146
91
|
let prompt = "";
|
|
147
92
|
if (messages.length > 1) {
|
|
148
93
|
// We need to build a new _session
|
|
@@ -156,8 +101,15 @@ export class ChatLlamaCpp extends SimpleChatModel {
|
|
|
156
101
|
prompt = messages[0].content;
|
|
157
102
|
}
|
|
158
103
|
try {
|
|
104
|
+
const promptOptions = {
|
|
105
|
+
maxTokens: this?.maxTokens,
|
|
106
|
+
temperature: this?.temperature,
|
|
107
|
+
topK: this?.topK,
|
|
108
|
+
topP: this?.topP,
|
|
109
|
+
trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
|
|
110
|
+
};
|
|
159
111
|
// @ts-expect-error - TS2531: Object is possibly 'null'.
|
|
160
|
-
const completion = await this._session.prompt(prompt,
|
|
112
|
+
const completion = await this._session.prompt(prompt, promptOptions);
|
|
161
113
|
return completion;
|
|
162
114
|
}
|
|
163
115
|
catch (e) {
|