langchain 0.0.169 → 0.0.171
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -2
- package/agents/format_scratchpad.cjs +1 -0
- package/agents/format_scratchpad.d.ts +1 -0
- package/agents/format_scratchpad.js +1 -0
- package/chat_models/llama_cpp.cjs +1 -0
- package/chat_models/llama_cpp.d.ts +1 -0
- package/chat_models/llama_cpp.js +1 -0
- package/dist/agents/agent.cjs +49 -1
- package/dist/agents/agent.d.ts +19 -1
- package/dist/agents/agent.js +47 -0
- package/dist/agents/executor.cjs +10 -1
- package/dist/agents/executor.d.ts +22 -8
- package/dist/agents/executor.js +11 -2
- package/dist/agents/format_scratchpad.cjs +25 -0
- package/dist/agents/format_scratchpad.d.ts +10 -0
- package/dist/agents/format_scratchpad.js +21 -0
- package/dist/agents/toolkits/aws_sfn.d.ts +4 -1
- package/dist/agents/toolkits/conversational_retrieval/openai_functions.d.ts +1 -1
- package/dist/agents/toolkits/json/json.d.ts +4 -1
- package/dist/agents/toolkits/openapi/openapi.cjs +8 -0
- package/dist/agents/toolkits/openapi/openapi.d.ts +12 -1
- package/dist/agents/toolkits/openapi/openapi.js +8 -0
- package/dist/agents/toolkits/sql/sql.d.ts +4 -1
- package/dist/agents/toolkits/vectorstore/vectorstore.d.ts +8 -2
- package/dist/agents/types.d.ts +13 -1
- package/dist/chains/sql_db/sql_db_chain.cjs +11 -0
- package/dist/chains/sql_db/sql_db_chain.d.ts +11 -0
- package/dist/chains/sql_db/sql_db_chain.js +11 -0
- package/dist/chat_models/baiduwenxin.cjs +12 -1
- package/dist/chat_models/baiduwenxin.d.ts +3 -1
- package/dist/chat_models/baiduwenxin.js +12 -1
- package/dist/chat_models/llama_cpp.cjs +243 -0
- package/dist/chat_models/llama_cpp.d.ts +94 -0
- package/dist/chat_models/llama_cpp.js +239 -0
- package/dist/document_loaders/web/assemblyai.cjs +63 -114
- package/dist/document_loaders/web/assemblyai.d.ts +38 -57
- package/dist/document_loaders/web/assemblyai.js +63 -100
- package/dist/document_loaders/web/pdf.cjs +23 -5
- package/dist/document_loaders/web/pdf.d.ts +9 -1
- package/dist/document_loaders/web/pdf.js +20 -2
- package/dist/graphs/neo4j_graph.cjs +14 -0
- package/dist/graphs/neo4j_graph.d.ts +14 -0
- package/dist/graphs/neo4j_graph.js +14 -0
- package/dist/llms/googlepalm.cjs +3 -0
- package/dist/llms/googlepalm.js +3 -0
- package/dist/load/import_constants.cjs +2 -0
- package/dist/load/import_constants.js +2 -0
- package/dist/load/import_map.cjs +3 -2
- package/dist/load/import_map.d.ts +1 -0
- package/dist/load/import_map.js +1 -0
- package/dist/memory/index.d.ts +1 -1
- package/dist/memory/index.js +1 -1
- package/dist/retrievers/parent_document.cjs +22 -2
- package/dist/retrievers/parent_document.d.ts +8 -1
- package/dist/retrievers/parent_document.js +22 -2
- package/dist/retrievers/time_weighted.cjs +1 -1
- package/dist/retrievers/time_weighted.d.ts +1 -1
- package/dist/retrievers/time_weighted.js +1 -1
- package/dist/schema/runnable/base.cjs +4 -1
- package/dist/schema/runnable/base.d.ts +1 -0
- package/dist/schema/runnable/base.js +4 -1
- package/dist/schema/runnable/passthrough.cjs +35 -1
- package/dist/schema/runnable/passthrough.d.ts +11 -1
- package/dist/schema/runnable/passthrough.js +34 -1
- package/dist/sql_db.cjs +14 -0
- package/dist/sql_db.d.ts +14 -0
- package/dist/sql_db.js +14 -0
- package/dist/storage/ioredis.cjs +2 -1
- package/dist/storage/ioredis.js +2 -1
- package/dist/storage/upstash_redis.cjs +155 -0
- package/dist/storage/upstash_redis.d.ts +59 -0
- package/dist/storage/upstash_redis.js +151 -0
- package/dist/storage/vercel_kv.cjs +2 -1
- package/dist/storage/vercel_kv.js +2 -1
- package/dist/types/assemblyai-types.cjs +0 -150
- package/dist/types/assemblyai-types.d.ts +4 -670
- package/dist/types/assemblyai-types.js +1 -149
- package/dist/util/stream.cjs +3 -0
- package/dist/util/stream.js +3 -0
- package/dist/vectorstores/cassandra.cjs +25 -4
- package/dist/vectorstores/cassandra.d.ts +11 -1
- package/dist/vectorstores/cassandra.js +25 -4
- package/dist/vectorstores/momento_vector_index.cjs +3 -15
- package/dist/vectorstores/momento_vector_index.d.ts +0 -8
- package/dist/vectorstores/momento_vector_index.js +3 -15
- package/dist/vectorstores/neo4j_vector.cjs +14 -0
- package/dist/vectorstores/neo4j_vector.d.ts +14 -0
- package/dist/vectorstores/neo4j_vector.js +14 -0
- package/dist/vectorstores/pgvector.cjs +1 -1
- package/dist/vectorstores/pgvector.js +1 -1
- package/package.json +37 -8
- package/storage/upstash_redis.cjs +1 -0
- package/storage/upstash_redis.d.ts +1 -0
- package/storage/upstash_redis.js +1 -0
- package/dist/util/assemblyai-client.cjs +0 -173
- package/dist/util/assemblyai-client.d.ts +0 -63
- package/dist/util/assemblyai-client.js +0 -170
|
@@ -25,7 +25,7 @@ function messageToWenxinRole(message) {
|
|
|
25
25
|
case "human":
|
|
26
26
|
return "user";
|
|
27
27
|
case "system":
|
|
28
|
-
throw new Error("System messages not
|
|
28
|
+
throw new Error("System messages should not be here");
|
|
29
29
|
case "function":
|
|
30
30
|
throw new Error("Function messages not supported");
|
|
31
31
|
case "generic": {
|
|
@@ -161,6 +161,10 @@ export class ChatBaiduWenxin extends BaseChatModel {
|
|
|
161
161
|
this.apiUrl =
|
|
162
162
|
"https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/eb-instant";
|
|
163
163
|
}
|
|
164
|
+
else if (this.modelName === "ERNIE-Bot-4") {
|
|
165
|
+
this.apiUrl =
|
|
166
|
+
"https://aip.baidubce.com/rpc/2.0/ai_custom/v1/wenxinworkshop/chat/completions_pro";
|
|
167
|
+
}
|
|
164
168
|
else {
|
|
165
169
|
throw new Error(`Invalid model name: ${this.modelName}`);
|
|
166
170
|
}
|
|
@@ -216,6 +220,13 @@ export class ChatBaiduWenxin extends BaseChatModel {
|
|
|
216
220
|
async _generate(messages, options, runManager) {
|
|
217
221
|
const tokenUsage = {};
|
|
218
222
|
const params = this.invocationParams();
|
|
223
|
+
// Wenxin requires the system message to be put in the params, not messages array
|
|
224
|
+
const systemMessage = messages.find((message) => message._getType() === "system");
|
|
225
|
+
if (systemMessage) {
|
|
226
|
+
// eslint-disable-next-line no-param-reassign
|
|
227
|
+
messages = messages.filter((message) => message !== systemMessage);
|
|
228
|
+
params.system = systemMessage.text;
|
|
229
|
+
}
|
|
219
230
|
const messagesMapped = messages.map((message) => ({
|
|
220
231
|
role: messageToWenxinRole(message),
|
|
221
232
|
content: message.text,
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ChatLlamaCpp = void 0;
|
|
4
|
+
const node_llama_cpp_1 = require("node-llama-cpp");
|
|
5
|
+
const base_js_1 = require("./base.cjs");
|
|
6
|
+
/**
|
|
7
|
+
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
8
|
+
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
9
|
+
* version supported in version 2.0.0.
|
|
10
|
+
* This also requires that have a locally built version of Llama2 installed.
|
|
11
|
+
*/
|
|
12
|
+
class ChatLlamaCpp extends base_js_1.SimpleChatModel {
|
|
13
|
+
static lc_name() {
|
|
14
|
+
return "ChatLlamaCpp";
|
|
15
|
+
}
|
|
16
|
+
constructor(inputs) {
|
|
17
|
+
super(inputs);
|
|
18
|
+
Object.defineProperty(this, "batchSize", {
|
|
19
|
+
enumerable: true,
|
|
20
|
+
configurable: true,
|
|
21
|
+
writable: true,
|
|
22
|
+
value: void 0
|
|
23
|
+
});
|
|
24
|
+
Object.defineProperty(this, "contextSize", {
|
|
25
|
+
enumerable: true,
|
|
26
|
+
configurable: true,
|
|
27
|
+
writable: true,
|
|
28
|
+
value: void 0
|
|
29
|
+
});
|
|
30
|
+
Object.defineProperty(this, "embedding", {
|
|
31
|
+
enumerable: true,
|
|
32
|
+
configurable: true,
|
|
33
|
+
writable: true,
|
|
34
|
+
value: void 0
|
|
35
|
+
});
|
|
36
|
+
Object.defineProperty(this, "f16Kv", {
|
|
37
|
+
enumerable: true,
|
|
38
|
+
configurable: true,
|
|
39
|
+
writable: true,
|
|
40
|
+
value: void 0
|
|
41
|
+
});
|
|
42
|
+
Object.defineProperty(this, "gpuLayers", {
|
|
43
|
+
enumerable: true,
|
|
44
|
+
configurable: true,
|
|
45
|
+
writable: true,
|
|
46
|
+
value: void 0
|
|
47
|
+
});
|
|
48
|
+
Object.defineProperty(this, "logitsAll", {
|
|
49
|
+
enumerable: true,
|
|
50
|
+
configurable: true,
|
|
51
|
+
writable: true,
|
|
52
|
+
value: void 0
|
|
53
|
+
});
|
|
54
|
+
Object.defineProperty(this, "lowVram", {
|
|
55
|
+
enumerable: true,
|
|
56
|
+
configurable: true,
|
|
57
|
+
writable: true,
|
|
58
|
+
value: void 0
|
|
59
|
+
});
|
|
60
|
+
Object.defineProperty(this, "seed", {
|
|
61
|
+
enumerable: true,
|
|
62
|
+
configurable: true,
|
|
63
|
+
writable: true,
|
|
64
|
+
value: void 0
|
|
65
|
+
});
|
|
66
|
+
Object.defineProperty(this, "useMlock", {
|
|
67
|
+
enumerable: true,
|
|
68
|
+
configurable: true,
|
|
69
|
+
writable: true,
|
|
70
|
+
value: void 0
|
|
71
|
+
});
|
|
72
|
+
Object.defineProperty(this, "useMmap", {
|
|
73
|
+
enumerable: true,
|
|
74
|
+
configurable: true,
|
|
75
|
+
writable: true,
|
|
76
|
+
value: void 0
|
|
77
|
+
});
|
|
78
|
+
Object.defineProperty(this, "vocabOnly", {
|
|
79
|
+
enumerable: true,
|
|
80
|
+
configurable: true,
|
|
81
|
+
writable: true,
|
|
82
|
+
value: void 0
|
|
83
|
+
});
|
|
84
|
+
Object.defineProperty(this, "modelPath", {
|
|
85
|
+
enumerable: true,
|
|
86
|
+
configurable: true,
|
|
87
|
+
writable: true,
|
|
88
|
+
value: void 0
|
|
89
|
+
});
|
|
90
|
+
Object.defineProperty(this, "_model", {
|
|
91
|
+
enumerable: true,
|
|
92
|
+
configurable: true,
|
|
93
|
+
writable: true,
|
|
94
|
+
value: void 0
|
|
95
|
+
});
|
|
96
|
+
Object.defineProperty(this, "_context", {
|
|
97
|
+
enumerable: true,
|
|
98
|
+
configurable: true,
|
|
99
|
+
writable: true,
|
|
100
|
+
value: void 0
|
|
101
|
+
});
|
|
102
|
+
Object.defineProperty(this, "_session", {
|
|
103
|
+
enumerable: true,
|
|
104
|
+
configurable: true,
|
|
105
|
+
writable: true,
|
|
106
|
+
value: void 0
|
|
107
|
+
});
|
|
108
|
+
this.batchSize = inputs?.batchSize;
|
|
109
|
+
this.contextSize = inputs?.contextSize;
|
|
110
|
+
this.embedding = inputs?.embedding;
|
|
111
|
+
this.f16Kv = inputs?.f16Kv;
|
|
112
|
+
this.gpuLayers = inputs?.gpuLayers;
|
|
113
|
+
this.logitsAll = inputs?.logitsAll;
|
|
114
|
+
this.lowVram = inputs?.lowVram;
|
|
115
|
+
this.modelPath = inputs.modelPath;
|
|
116
|
+
this.seed = inputs?.seed;
|
|
117
|
+
this.useMlock = inputs?.useMlock;
|
|
118
|
+
this.useMmap = inputs?.useMmap;
|
|
119
|
+
this.vocabOnly = inputs?.vocabOnly;
|
|
120
|
+
this._model = new node_llama_cpp_1.LlamaModel(inputs);
|
|
121
|
+
this._context = new node_llama_cpp_1.LlamaContext({ model: this._model });
|
|
122
|
+
this._session = null;
|
|
123
|
+
}
|
|
124
|
+
_llmType() {
|
|
125
|
+
return "llama2_cpp";
|
|
126
|
+
}
|
|
127
|
+
invocationParams() {
|
|
128
|
+
return {
|
|
129
|
+
batchSize: this.batchSize,
|
|
130
|
+
contextSize: this.contextSize,
|
|
131
|
+
embedding: this.embedding,
|
|
132
|
+
f16Kv: this.f16Kv,
|
|
133
|
+
gpuLayers: this.gpuLayers,
|
|
134
|
+
logitsAll: this.logitsAll,
|
|
135
|
+
lowVram: this.lowVram,
|
|
136
|
+
modelPath: this.modelPath,
|
|
137
|
+
seed: this.seed,
|
|
138
|
+
useMlock: this.useMlock,
|
|
139
|
+
useMmap: this.useMmap,
|
|
140
|
+
vocabOnly: this.vocabOnly,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
/** @ignore */
|
|
144
|
+
_combineLLMOutput() {
|
|
145
|
+
return {};
|
|
146
|
+
}
|
|
147
|
+
/** @ignore */
|
|
148
|
+
async _call(messages, options) {
|
|
149
|
+
let prompt = "";
|
|
150
|
+
if (messages.length > 1) {
|
|
151
|
+
// We need to build a new _session
|
|
152
|
+
prompt = this._buildSession(messages);
|
|
153
|
+
}
|
|
154
|
+
else if (!this._session) {
|
|
155
|
+
prompt = this._buildSession(messages);
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
// If we already have a session then we should just have a single prompt
|
|
159
|
+
prompt = messages[0].content;
|
|
160
|
+
}
|
|
161
|
+
try {
|
|
162
|
+
// @ts-expect-error - TS2531: Object is possibly 'null'.
|
|
163
|
+
const completion = await this._session.prompt(prompt, options);
|
|
164
|
+
return completion;
|
|
165
|
+
}
|
|
166
|
+
catch (e) {
|
|
167
|
+
throw new Error("Error getting prompt completion.");
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
// This constructs a new session if we need to adding in any sys messages or previous chats
|
|
171
|
+
_buildSession(messages) {
|
|
172
|
+
let prompt = "";
|
|
173
|
+
let sysMessage = "";
|
|
174
|
+
let noSystemMessages = [];
|
|
175
|
+
let interactions = [];
|
|
176
|
+
// Let's see if we have a system message
|
|
177
|
+
if (messages.findIndex((msg) => msg._getType() === "system") !== -1) {
|
|
178
|
+
const sysMessages = messages.filter((message) => message._getType() === "system");
|
|
179
|
+
// Only use the last provided system message
|
|
180
|
+
sysMessage = sysMessages[sysMessages.length - 1].content;
|
|
181
|
+
// Now filter out the system messages
|
|
182
|
+
noSystemMessages = messages.filter((message) => message._getType() !== "system");
|
|
183
|
+
}
|
|
184
|
+
else {
|
|
185
|
+
noSystemMessages = messages;
|
|
186
|
+
}
|
|
187
|
+
// Lets see if we just have a prompt left or are their previous interactions?
|
|
188
|
+
if (noSystemMessages.length > 1) {
|
|
189
|
+
// Is the last message a prompt?
|
|
190
|
+
if (noSystemMessages[noSystemMessages.length - 1]._getType() === "human") {
|
|
191
|
+
prompt = noSystemMessages[noSystemMessages.length - 1].content;
|
|
192
|
+
interactions = this._convertMessagesToInteractions(noSystemMessages.slice(0, noSystemMessages.length - 1));
|
|
193
|
+
}
|
|
194
|
+
else {
|
|
195
|
+
interactions = this._convertMessagesToInteractions(noSystemMessages);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
else {
|
|
199
|
+
// If there was only a single message we assume it's a prompt
|
|
200
|
+
prompt = noSystemMessages[0].content;
|
|
201
|
+
}
|
|
202
|
+
// Now lets construct a session according to what we got
|
|
203
|
+
if (sysMessage !== "" && interactions.length > 0) {
|
|
204
|
+
this._session = new node_llama_cpp_1.LlamaChatSession({
|
|
205
|
+
context: this._context,
|
|
206
|
+
conversationHistory: interactions,
|
|
207
|
+
systemPrompt: sysMessage,
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
else if (sysMessage !== "" && interactions.length === 0) {
|
|
211
|
+
this._session = new node_llama_cpp_1.LlamaChatSession({
|
|
212
|
+
context: this._context,
|
|
213
|
+
systemPrompt: sysMessage,
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
else if (sysMessage === "" && interactions.length > 0) {
|
|
217
|
+
this._session = new node_llama_cpp_1.LlamaChatSession({
|
|
218
|
+
context: this._context,
|
|
219
|
+
conversationHistory: interactions,
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
else {
|
|
223
|
+
this._session = new node_llama_cpp_1.LlamaChatSession({
|
|
224
|
+
context: this._context,
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
return prompt;
|
|
228
|
+
}
|
|
229
|
+
// This builds a an array of interactions
|
|
230
|
+
_convertMessagesToInteractions(messages) {
|
|
231
|
+
const result = [];
|
|
232
|
+
for (let i = 0; i < messages.length; i += 2) {
|
|
233
|
+
if (i + 1 < messages.length) {
|
|
234
|
+
result.push({
|
|
235
|
+
prompt: messages[i].content,
|
|
236
|
+
response: messages[i + 1].content,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return result;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
exports.ChatLlamaCpp = ChatLlamaCpp;
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { LlamaModel, LlamaContext, LlamaChatSession, type ConversationInteraction } from "node-llama-cpp";
|
|
2
|
+
import { SimpleChatModel, BaseChatModelParams } from "./base.js";
|
|
3
|
+
import { BaseLanguageModelCallOptions } from "../base_language/index.js";
|
|
4
|
+
import type { BaseMessage } from "../schema/index.js";
|
|
5
|
+
/**
|
|
6
|
+
* Note that the modelPath is the only required parameter. For testing you
|
|
7
|
+
* can set this in the environment variable `LLAMA_PATH`.
|
|
8
|
+
*/
|
|
9
|
+
export interface LlamaCppInputs extends BaseChatModelParams {
|
|
10
|
+
/** Prompt processing batch size. */
|
|
11
|
+
batchSize?: number;
|
|
12
|
+
/** Text context size. */
|
|
13
|
+
contextSize?: number;
|
|
14
|
+
/** Embedding mode only. */
|
|
15
|
+
embedding?: boolean;
|
|
16
|
+
/** Use fp16 for KV cache. */
|
|
17
|
+
f16Kv?: boolean;
|
|
18
|
+
/** Number of layers to store in VRAM. */
|
|
19
|
+
gpuLayers?: number;
|
|
20
|
+
/** The llama_eval() call computes all logits, not just the last one. */
|
|
21
|
+
logitsAll?: boolean;
|
|
22
|
+
/** If true, reduce VRAM usage at the cost of performance. */
|
|
23
|
+
lowVram?: boolean;
|
|
24
|
+
/** Path to the model on the filesystem. */
|
|
25
|
+
modelPath: string;
|
|
26
|
+
/** If null, a random seed will be used. */
|
|
27
|
+
seed?: null | number;
|
|
28
|
+
/** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */
|
|
29
|
+
temperature?: number;
|
|
30
|
+
/** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */
|
|
31
|
+
topK?: number;
|
|
32
|
+
/** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */
|
|
33
|
+
topP?: number;
|
|
34
|
+
/** Force system to keep model in RAM. */
|
|
35
|
+
useMlock?: boolean;
|
|
36
|
+
/** Use mmap if possible. */
|
|
37
|
+
useMmap?: boolean;
|
|
38
|
+
/** Only load the vocabulary, no weights. */
|
|
39
|
+
vocabOnly?: boolean;
|
|
40
|
+
}
|
|
41
|
+
export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
|
|
42
|
+
/** The maximum number of tokens the response should contain. */
|
|
43
|
+
maxTokens?: number;
|
|
44
|
+
/** A function called when matching the provided token array */
|
|
45
|
+
onToken?: (tokens: number[]) => void;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
49
|
+
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
50
|
+
* version supported in version 2.0.0.
|
|
51
|
+
* This also requires that have a locally built version of Llama2 installed.
|
|
52
|
+
*/
|
|
53
|
+
export declare class ChatLlamaCpp extends SimpleChatModel<LlamaCppCallOptions> {
|
|
54
|
+
CallOptions: LlamaCppCallOptions;
|
|
55
|
+
static inputs: LlamaCppInputs;
|
|
56
|
+
batchSize?: number;
|
|
57
|
+
contextSize?: number;
|
|
58
|
+
embedding?: boolean;
|
|
59
|
+
f16Kv?: boolean;
|
|
60
|
+
gpuLayers?: number;
|
|
61
|
+
logitsAll?: boolean;
|
|
62
|
+
lowVram?: boolean;
|
|
63
|
+
seed?: null | number;
|
|
64
|
+
useMlock?: boolean;
|
|
65
|
+
useMmap?: boolean;
|
|
66
|
+
vocabOnly?: boolean;
|
|
67
|
+
modelPath: string;
|
|
68
|
+
_model: LlamaModel;
|
|
69
|
+
_context: LlamaContext;
|
|
70
|
+
_session: LlamaChatSession | null;
|
|
71
|
+
static lc_name(): string;
|
|
72
|
+
constructor(inputs: LlamaCppInputs);
|
|
73
|
+
_llmType(): string;
|
|
74
|
+
invocationParams(): {
|
|
75
|
+
batchSize: number | undefined;
|
|
76
|
+
contextSize: number | undefined;
|
|
77
|
+
embedding: boolean | undefined;
|
|
78
|
+
f16Kv: boolean | undefined;
|
|
79
|
+
gpuLayers: number | undefined;
|
|
80
|
+
logitsAll: boolean | undefined;
|
|
81
|
+
lowVram: boolean | undefined;
|
|
82
|
+
modelPath: string;
|
|
83
|
+
seed: number | null | undefined;
|
|
84
|
+
useMlock: boolean | undefined;
|
|
85
|
+
useMmap: boolean | undefined;
|
|
86
|
+
vocabOnly: boolean | undefined;
|
|
87
|
+
};
|
|
88
|
+
/** @ignore */
|
|
89
|
+
_combineLLMOutput(): {};
|
|
90
|
+
/** @ignore */
|
|
91
|
+
_call(messages: BaseMessage[], options: this["ParsedCallOptions"]): Promise<string>;
|
|
92
|
+
protected _buildSession(messages: BaseMessage[]): string;
|
|
93
|
+
protected _convertMessagesToInteractions(messages: BaseMessage[]): ConversationInteraction[];
|
|
94
|
+
}
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import { LlamaModel, LlamaContext, LlamaChatSession, } from "node-llama-cpp";
|
|
2
|
+
import { SimpleChatModel } from "./base.js";
|
|
3
|
+
/**
|
|
4
|
+
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
5
|
+
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
6
|
+
* version supported in version 2.0.0.
|
|
7
|
+
* This also requires that have a locally built version of Llama2 installed.
|
|
8
|
+
*/
|
|
9
|
+
export class ChatLlamaCpp extends SimpleChatModel {
|
|
10
|
+
static lc_name() {
|
|
11
|
+
return "ChatLlamaCpp";
|
|
12
|
+
}
|
|
13
|
+
constructor(inputs) {
|
|
14
|
+
super(inputs);
|
|
15
|
+
Object.defineProperty(this, "batchSize", {
|
|
16
|
+
enumerable: true,
|
|
17
|
+
configurable: true,
|
|
18
|
+
writable: true,
|
|
19
|
+
value: void 0
|
|
20
|
+
});
|
|
21
|
+
Object.defineProperty(this, "contextSize", {
|
|
22
|
+
enumerable: true,
|
|
23
|
+
configurable: true,
|
|
24
|
+
writable: true,
|
|
25
|
+
value: void 0
|
|
26
|
+
});
|
|
27
|
+
Object.defineProperty(this, "embedding", {
|
|
28
|
+
enumerable: true,
|
|
29
|
+
configurable: true,
|
|
30
|
+
writable: true,
|
|
31
|
+
value: void 0
|
|
32
|
+
});
|
|
33
|
+
Object.defineProperty(this, "f16Kv", {
|
|
34
|
+
enumerable: true,
|
|
35
|
+
configurable: true,
|
|
36
|
+
writable: true,
|
|
37
|
+
value: void 0
|
|
38
|
+
});
|
|
39
|
+
Object.defineProperty(this, "gpuLayers", {
|
|
40
|
+
enumerable: true,
|
|
41
|
+
configurable: true,
|
|
42
|
+
writable: true,
|
|
43
|
+
value: void 0
|
|
44
|
+
});
|
|
45
|
+
Object.defineProperty(this, "logitsAll", {
|
|
46
|
+
enumerable: true,
|
|
47
|
+
configurable: true,
|
|
48
|
+
writable: true,
|
|
49
|
+
value: void 0
|
|
50
|
+
});
|
|
51
|
+
Object.defineProperty(this, "lowVram", {
|
|
52
|
+
enumerable: true,
|
|
53
|
+
configurable: true,
|
|
54
|
+
writable: true,
|
|
55
|
+
value: void 0
|
|
56
|
+
});
|
|
57
|
+
Object.defineProperty(this, "seed", {
|
|
58
|
+
enumerable: true,
|
|
59
|
+
configurable: true,
|
|
60
|
+
writable: true,
|
|
61
|
+
value: void 0
|
|
62
|
+
});
|
|
63
|
+
Object.defineProperty(this, "useMlock", {
|
|
64
|
+
enumerable: true,
|
|
65
|
+
configurable: true,
|
|
66
|
+
writable: true,
|
|
67
|
+
value: void 0
|
|
68
|
+
});
|
|
69
|
+
Object.defineProperty(this, "useMmap", {
|
|
70
|
+
enumerable: true,
|
|
71
|
+
configurable: true,
|
|
72
|
+
writable: true,
|
|
73
|
+
value: void 0
|
|
74
|
+
});
|
|
75
|
+
Object.defineProperty(this, "vocabOnly", {
|
|
76
|
+
enumerable: true,
|
|
77
|
+
configurable: true,
|
|
78
|
+
writable: true,
|
|
79
|
+
value: void 0
|
|
80
|
+
});
|
|
81
|
+
Object.defineProperty(this, "modelPath", {
|
|
82
|
+
enumerable: true,
|
|
83
|
+
configurable: true,
|
|
84
|
+
writable: true,
|
|
85
|
+
value: void 0
|
|
86
|
+
});
|
|
87
|
+
Object.defineProperty(this, "_model", {
|
|
88
|
+
enumerable: true,
|
|
89
|
+
configurable: true,
|
|
90
|
+
writable: true,
|
|
91
|
+
value: void 0
|
|
92
|
+
});
|
|
93
|
+
Object.defineProperty(this, "_context", {
|
|
94
|
+
enumerable: true,
|
|
95
|
+
configurable: true,
|
|
96
|
+
writable: true,
|
|
97
|
+
value: void 0
|
|
98
|
+
});
|
|
99
|
+
Object.defineProperty(this, "_session", {
|
|
100
|
+
enumerable: true,
|
|
101
|
+
configurable: true,
|
|
102
|
+
writable: true,
|
|
103
|
+
value: void 0
|
|
104
|
+
});
|
|
105
|
+
this.batchSize = inputs?.batchSize;
|
|
106
|
+
this.contextSize = inputs?.contextSize;
|
|
107
|
+
this.embedding = inputs?.embedding;
|
|
108
|
+
this.f16Kv = inputs?.f16Kv;
|
|
109
|
+
this.gpuLayers = inputs?.gpuLayers;
|
|
110
|
+
this.logitsAll = inputs?.logitsAll;
|
|
111
|
+
this.lowVram = inputs?.lowVram;
|
|
112
|
+
this.modelPath = inputs.modelPath;
|
|
113
|
+
this.seed = inputs?.seed;
|
|
114
|
+
this.useMlock = inputs?.useMlock;
|
|
115
|
+
this.useMmap = inputs?.useMmap;
|
|
116
|
+
this.vocabOnly = inputs?.vocabOnly;
|
|
117
|
+
this._model = new LlamaModel(inputs);
|
|
118
|
+
this._context = new LlamaContext({ model: this._model });
|
|
119
|
+
this._session = null;
|
|
120
|
+
}
|
|
121
|
+
_llmType() {
|
|
122
|
+
return "llama2_cpp";
|
|
123
|
+
}
|
|
124
|
+
invocationParams() {
|
|
125
|
+
return {
|
|
126
|
+
batchSize: this.batchSize,
|
|
127
|
+
contextSize: this.contextSize,
|
|
128
|
+
embedding: this.embedding,
|
|
129
|
+
f16Kv: this.f16Kv,
|
|
130
|
+
gpuLayers: this.gpuLayers,
|
|
131
|
+
logitsAll: this.logitsAll,
|
|
132
|
+
lowVram: this.lowVram,
|
|
133
|
+
modelPath: this.modelPath,
|
|
134
|
+
seed: this.seed,
|
|
135
|
+
useMlock: this.useMlock,
|
|
136
|
+
useMmap: this.useMmap,
|
|
137
|
+
vocabOnly: this.vocabOnly,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
/** @ignore */
|
|
141
|
+
_combineLLMOutput() {
|
|
142
|
+
return {};
|
|
143
|
+
}
|
|
144
|
+
/** @ignore */
|
|
145
|
+
async _call(messages, options) {
|
|
146
|
+
let prompt = "";
|
|
147
|
+
if (messages.length > 1) {
|
|
148
|
+
// We need to build a new _session
|
|
149
|
+
prompt = this._buildSession(messages);
|
|
150
|
+
}
|
|
151
|
+
else if (!this._session) {
|
|
152
|
+
prompt = this._buildSession(messages);
|
|
153
|
+
}
|
|
154
|
+
else {
|
|
155
|
+
// If we already have a session then we should just have a single prompt
|
|
156
|
+
prompt = messages[0].content;
|
|
157
|
+
}
|
|
158
|
+
try {
|
|
159
|
+
// @ts-expect-error - TS2531: Object is possibly 'null'.
|
|
160
|
+
const completion = await this._session.prompt(prompt, options);
|
|
161
|
+
return completion;
|
|
162
|
+
}
|
|
163
|
+
catch (e) {
|
|
164
|
+
throw new Error("Error getting prompt completion.");
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
// This constructs a new session if we need to adding in any sys messages or previous chats
|
|
168
|
+
_buildSession(messages) {
|
|
169
|
+
let prompt = "";
|
|
170
|
+
let sysMessage = "";
|
|
171
|
+
let noSystemMessages = [];
|
|
172
|
+
let interactions = [];
|
|
173
|
+
// Let's see if we have a system message
|
|
174
|
+
if (messages.findIndex((msg) => msg._getType() === "system") !== -1) {
|
|
175
|
+
const sysMessages = messages.filter((message) => message._getType() === "system");
|
|
176
|
+
// Only use the last provided system message
|
|
177
|
+
sysMessage = sysMessages[sysMessages.length - 1].content;
|
|
178
|
+
// Now filter out the system messages
|
|
179
|
+
noSystemMessages = messages.filter((message) => message._getType() !== "system");
|
|
180
|
+
}
|
|
181
|
+
else {
|
|
182
|
+
noSystemMessages = messages;
|
|
183
|
+
}
|
|
184
|
+
// Lets see if we just have a prompt left or are their previous interactions?
|
|
185
|
+
if (noSystemMessages.length > 1) {
|
|
186
|
+
// Is the last message a prompt?
|
|
187
|
+
if (noSystemMessages[noSystemMessages.length - 1]._getType() === "human") {
|
|
188
|
+
prompt = noSystemMessages[noSystemMessages.length - 1].content;
|
|
189
|
+
interactions = this._convertMessagesToInteractions(noSystemMessages.slice(0, noSystemMessages.length - 1));
|
|
190
|
+
}
|
|
191
|
+
else {
|
|
192
|
+
interactions = this._convertMessagesToInteractions(noSystemMessages);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
else {
|
|
196
|
+
// If there was only a single message we assume it's a prompt
|
|
197
|
+
prompt = noSystemMessages[0].content;
|
|
198
|
+
}
|
|
199
|
+
// Now lets construct a session according to what we got
|
|
200
|
+
if (sysMessage !== "" && interactions.length > 0) {
|
|
201
|
+
this._session = new LlamaChatSession({
|
|
202
|
+
context: this._context,
|
|
203
|
+
conversationHistory: interactions,
|
|
204
|
+
systemPrompt: sysMessage,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
else if (sysMessage !== "" && interactions.length === 0) {
|
|
208
|
+
this._session = new LlamaChatSession({
|
|
209
|
+
context: this._context,
|
|
210
|
+
systemPrompt: sysMessage,
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
else if (sysMessage === "" && interactions.length > 0) {
|
|
214
|
+
this._session = new LlamaChatSession({
|
|
215
|
+
context: this._context,
|
|
216
|
+
conversationHistory: interactions,
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
else {
|
|
220
|
+
this._session = new LlamaChatSession({
|
|
221
|
+
context: this._context,
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
return prompt;
|
|
225
|
+
}
|
|
226
|
+
// This builds a an array of interactions
|
|
227
|
+
_convertMessagesToInteractions(messages) {
|
|
228
|
+
const result = [];
|
|
229
|
+
for (let i = 0; i < messages.length; i += 2) {
|
|
230
|
+
if (i + 1 < messages.length) {
|
|
231
|
+
result.push({
|
|
232
|
+
prompt: messages[i].content,
|
|
233
|
+
response: messages[i + 1].content,
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
return result;
|
|
238
|
+
}
|
|
239
|
+
}
|