langchain 0.0.170 → 0.0.171
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/chat_models/llama_cpp.cjs +1 -0
- package/chat_models/llama_cpp.d.ts +1 -0
- package/chat_models/llama_cpp.js +1 -0
- package/dist/agents/toolkits/openapi/openapi.cjs +1 -1
- package/dist/agents/toolkits/openapi/openapi.d.ts +1 -1
- package/dist/agents/toolkits/openapi/openapi.js +1 -1
- package/dist/chains/sql_db/sql_db_chain.cjs +2 -0
- package/dist/chains/sql_db/sql_db_chain.d.ts +2 -0
- package/dist/chains/sql_db/sql_db_chain.js +2 -0
- package/dist/chat_models/llama_cpp.cjs +243 -0
- package/dist/chat_models/llama_cpp.d.ts +94 -0
- package/dist/chat_models/llama_cpp.js +239 -0
- package/dist/document_loaders/web/pdf.cjs +23 -5
- package/dist/document_loaders/web/pdf.d.ts +9 -1
- package/dist/document_loaders/web/pdf.js +20 -2
- package/dist/graphs/neo4j_graph.cjs +14 -0
- package/dist/graphs/neo4j_graph.d.ts +14 -0
- package/dist/graphs/neo4j_graph.js +14 -0
- package/dist/llms/googlepalm.cjs +3 -0
- package/dist/llms/googlepalm.js +3 -0
- package/dist/load/import_constants.cjs +1 -0
- package/dist/load/import_constants.js +1 -0
- package/dist/retrievers/parent_document.cjs +22 -2
- package/dist/retrievers/parent_document.d.ts +8 -1
- package/dist/retrievers/parent_document.js +22 -2
- package/dist/schema/runnable/passthrough.cjs +3 -1
- package/dist/schema/runnable/passthrough.js +3 -1
- package/dist/sql_db.cjs +2 -0
- package/dist/sql_db.d.ts +2 -0
- package/dist/sql_db.js +2 -0
- package/dist/util/stream.cjs +3 -0
- package/dist/util/stream.js +3 -0
- package/dist/vectorstores/cassandra.cjs +25 -4
- package/dist/vectorstores/cassandra.d.ts +11 -1
- package/dist/vectorstores/cassandra.js +25 -4
- package/dist/vectorstores/momento_vector_index.cjs +3 -15
- package/dist/vectorstores/momento_vector_index.d.ts +0 -8
- package/dist/vectorstores/momento_vector_index.js +3 -15
- package/dist/vectorstores/neo4j_vector.cjs +14 -0
- package/dist/vectorstores/neo4j_vector.d.ts +14 -0
- package/dist/vectorstores/neo4j_vector.js +14 -0
- package/package.json +15 -7
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module.exports = require('../dist/chat_models/llama_cpp.cjs');
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../dist/chat_models/llama_cpp.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../dist/chat_models/llama_cpp.js'
|
|
@@ -67,7 +67,7 @@ exports.OpenApiToolkit = OpenApiToolkit;
|
|
|
67
67
|
* limiting access to what endpoints it can hit, what actions can be taken, and
|
|
68
68
|
* more.
|
|
69
69
|
*
|
|
70
|
-
* See https://js.langchain.com/docs/security for more information.
|
|
70
|
+
* @link See https://js.langchain.com/docs/security for more information.
|
|
71
71
|
*/
|
|
72
72
|
function createOpenApiAgent(llm, openApiToolkit, args) {
|
|
73
73
|
const { prefix = prompt_js_1.OPENAPI_PREFIX, suffix = prompt_js_1.OPENAPI_SUFFIX, inputVariables = ["input", "agent_scratchpad"], } = args ?? {};
|
|
@@ -39,7 +39,7 @@ export declare class OpenApiToolkit extends RequestsToolkit {
|
|
|
39
39
|
* limiting access to what endpoints it can hit, what actions can be taken, and
|
|
40
40
|
* more.
|
|
41
41
|
*
|
|
42
|
-
* See https://js.langchain.com/docs/security for more information.
|
|
42
|
+
* @link See https://js.langchain.com/docs/security for more information.
|
|
43
43
|
*/
|
|
44
44
|
export declare function createOpenApiAgent(llm: BaseLanguageModel, openApiToolkit: OpenApiToolkit, args?: ZeroShotCreatePromptArgs): AgentExecutor<import("../../../schema/index.js").ChainValues & {
|
|
45
45
|
agent_scratchpad?: string | import("../../../schema/index.js").BaseMessage[] | undefined;
|
|
@@ -62,7 +62,7 @@ export class OpenApiToolkit extends RequestsToolkit {
|
|
|
62
62
|
* limiting access to what endpoints it can hit, what actions can be taken, and
|
|
63
63
|
* more.
|
|
64
64
|
*
|
|
65
|
-
* See https://js.langchain.com/docs/security for more information.
|
|
65
|
+
* @link See https://js.langchain.com/docs/security for more information.
|
|
66
66
|
*/
|
|
67
67
|
export function createOpenApiAgent(llm, openApiToolkit, args) {
|
|
68
68
|
const { prefix = OPENAPI_PREFIX, suffix = OPENAPI_SUFFIX, inputVariables = ["input", "agent_scratchpad"], } = args ?? {};
|
|
@@ -19,6 +19,8 @@ const sql_utils_js_1 = require("../../util/sql_utils.cjs");
|
|
|
19
19
|
* to read and scope to the tables that are needed.
|
|
20
20
|
* Optionally, use the includesTables or ignoreTables class parameters
|
|
21
21
|
* to limit which tables can/cannot be accessed.
|
|
22
|
+
*
|
|
23
|
+
* @link See https://js.langchain.com/docs/security for more information.
|
|
22
24
|
*/
|
|
23
25
|
class SqlDatabaseChain extends base_js_1.BaseChain {
|
|
24
26
|
static lc_name() {
|
|
@@ -31,6 +31,8 @@ export interface SqlDatabaseChainInput extends ChainInputs {
|
|
|
31
31
|
* to read and scope to the tables that are needed.
|
|
32
32
|
* Optionally, use the includesTables or ignoreTables class parameters
|
|
33
33
|
* to limit which tables can/cannot be accessed.
|
|
34
|
+
*
|
|
35
|
+
* @link See https://js.langchain.com/docs/security for more information.
|
|
34
36
|
*/
|
|
35
37
|
export declare class SqlDatabaseChain extends BaseChain {
|
|
36
38
|
static lc_name(): string;
|
|
@@ -16,6 +16,8 @@ import { getPromptTemplateFromDataSource } from "../../util/sql_utils.js";
|
|
|
16
16
|
* to read and scope to the tables that are needed.
|
|
17
17
|
* Optionally, use the includesTables or ignoreTables class parameters
|
|
18
18
|
* to limit which tables can/cannot be accessed.
|
|
19
|
+
*
|
|
20
|
+
* @link See https://js.langchain.com/docs/security for more information.
|
|
19
21
|
*/
|
|
20
22
|
export class SqlDatabaseChain extends BaseChain {
|
|
21
23
|
static lc_name() {
|
|
@@ -0,0 +1,243 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ChatLlamaCpp = void 0;
|
|
4
|
+
const node_llama_cpp_1 = require("node-llama-cpp");
|
|
5
|
+
const base_js_1 = require("./base.cjs");
|
|
6
|
+
/**
|
|
7
|
+
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
8
|
+
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
9
|
+
* version supported in version 2.0.0.
|
|
10
|
+
* This also requires that have a locally built version of Llama2 installed.
|
|
11
|
+
*/
|
|
12
|
+
class ChatLlamaCpp extends base_js_1.SimpleChatModel {
|
|
13
|
+
static lc_name() {
|
|
14
|
+
return "ChatLlamaCpp";
|
|
15
|
+
}
|
|
16
|
+
constructor(inputs) {
|
|
17
|
+
super(inputs);
|
|
18
|
+
Object.defineProperty(this, "batchSize", {
|
|
19
|
+
enumerable: true,
|
|
20
|
+
configurable: true,
|
|
21
|
+
writable: true,
|
|
22
|
+
value: void 0
|
|
23
|
+
});
|
|
24
|
+
Object.defineProperty(this, "contextSize", {
|
|
25
|
+
enumerable: true,
|
|
26
|
+
configurable: true,
|
|
27
|
+
writable: true,
|
|
28
|
+
value: void 0
|
|
29
|
+
});
|
|
30
|
+
Object.defineProperty(this, "embedding", {
|
|
31
|
+
enumerable: true,
|
|
32
|
+
configurable: true,
|
|
33
|
+
writable: true,
|
|
34
|
+
value: void 0
|
|
35
|
+
});
|
|
36
|
+
Object.defineProperty(this, "f16Kv", {
|
|
37
|
+
enumerable: true,
|
|
38
|
+
configurable: true,
|
|
39
|
+
writable: true,
|
|
40
|
+
value: void 0
|
|
41
|
+
});
|
|
42
|
+
Object.defineProperty(this, "gpuLayers", {
|
|
43
|
+
enumerable: true,
|
|
44
|
+
configurable: true,
|
|
45
|
+
writable: true,
|
|
46
|
+
value: void 0
|
|
47
|
+
});
|
|
48
|
+
Object.defineProperty(this, "logitsAll", {
|
|
49
|
+
enumerable: true,
|
|
50
|
+
configurable: true,
|
|
51
|
+
writable: true,
|
|
52
|
+
value: void 0
|
|
53
|
+
});
|
|
54
|
+
Object.defineProperty(this, "lowVram", {
|
|
55
|
+
enumerable: true,
|
|
56
|
+
configurable: true,
|
|
57
|
+
writable: true,
|
|
58
|
+
value: void 0
|
|
59
|
+
});
|
|
60
|
+
Object.defineProperty(this, "seed", {
|
|
61
|
+
enumerable: true,
|
|
62
|
+
configurable: true,
|
|
63
|
+
writable: true,
|
|
64
|
+
value: void 0
|
|
65
|
+
});
|
|
66
|
+
Object.defineProperty(this, "useMlock", {
|
|
67
|
+
enumerable: true,
|
|
68
|
+
configurable: true,
|
|
69
|
+
writable: true,
|
|
70
|
+
value: void 0
|
|
71
|
+
});
|
|
72
|
+
Object.defineProperty(this, "useMmap", {
|
|
73
|
+
enumerable: true,
|
|
74
|
+
configurable: true,
|
|
75
|
+
writable: true,
|
|
76
|
+
value: void 0
|
|
77
|
+
});
|
|
78
|
+
Object.defineProperty(this, "vocabOnly", {
|
|
79
|
+
enumerable: true,
|
|
80
|
+
configurable: true,
|
|
81
|
+
writable: true,
|
|
82
|
+
value: void 0
|
|
83
|
+
});
|
|
84
|
+
Object.defineProperty(this, "modelPath", {
|
|
85
|
+
enumerable: true,
|
|
86
|
+
configurable: true,
|
|
87
|
+
writable: true,
|
|
88
|
+
value: void 0
|
|
89
|
+
});
|
|
90
|
+
Object.defineProperty(this, "_model", {
|
|
91
|
+
enumerable: true,
|
|
92
|
+
configurable: true,
|
|
93
|
+
writable: true,
|
|
94
|
+
value: void 0
|
|
95
|
+
});
|
|
96
|
+
Object.defineProperty(this, "_context", {
|
|
97
|
+
enumerable: true,
|
|
98
|
+
configurable: true,
|
|
99
|
+
writable: true,
|
|
100
|
+
value: void 0
|
|
101
|
+
});
|
|
102
|
+
Object.defineProperty(this, "_session", {
|
|
103
|
+
enumerable: true,
|
|
104
|
+
configurable: true,
|
|
105
|
+
writable: true,
|
|
106
|
+
value: void 0
|
|
107
|
+
});
|
|
108
|
+
this.batchSize = inputs?.batchSize;
|
|
109
|
+
this.contextSize = inputs?.contextSize;
|
|
110
|
+
this.embedding = inputs?.embedding;
|
|
111
|
+
this.f16Kv = inputs?.f16Kv;
|
|
112
|
+
this.gpuLayers = inputs?.gpuLayers;
|
|
113
|
+
this.logitsAll = inputs?.logitsAll;
|
|
114
|
+
this.lowVram = inputs?.lowVram;
|
|
115
|
+
this.modelPath = inputs.modelPath;
|
|
116
|
+
this.seed = inputs?.seed;
|
|
117
|
+
this.useMlock = inputs?.useMlock;
|
|
118
|
+
this.useMmap = inputs?.useMmap;
|
|
119
|
+
this.vocabOnly = inputs?.vocabOnly;
|
|
120
|
+
this._model = new node_llama_cpp_1.LlamaModel(inputs);
|
|
121
|
+
this._context = new node_llama_cpp_1.LlamaContext({ model: this._model });
|
|
122
|
+
this._session = null;
|
|
123
|
+
}
|
|
124
|
+
_llmType() {
|
|
125
|
+
return "llama2_cpp";
|
|
126
|
+
}
|
|
127
|
+
invocationParams() {
|
|
128
|
+
return {
|
|
129
|
+
batchSize: this.batchSize,
|
|
130
|
+
contextSize: this.contextSize,
|
|
131
|
+
embedding: this.embedding,
|
|
132
|
+
f16Kv: this.f16Kv,
|
|
133
|
+
gpuLayers: this.gpuLayers,
|
|
134
|
+
logitsAll: this.logitsAll,
|
|
135
|
+
lowVram: this.lowVram,
|
|
136
|
+
modelPath: this.modelPath,
|
|
137
|
+
seed: this.seed,
|
|
138
|
+
useMlock: this.useMlock,
|
|
139
|
+
useMmap: this.useMmap,
|
|
140
|
+
vocabOnly: this.vocabOnly,
|
|
141
|
+
};
|
|
142
|
+
}
|
|
143
|
+
/** @ignore */
|
|
144
|
+
_combineLLMOutput() {
|
|
145
|
+
return {};
|
|
146
|
+
}
|
|
147
|
+
/** @ignore */
|
|
148
|
+
async _call(messages, options) {
|
|
149
|
+
let prompt = "";
|
|
150
|
+
if (messages.length > 1) {
|
|
151
|
+
// We need to build a new _session
|
|
152
|
+
prompt = this._buildSession(messages);
|
|
153
|
+
}
|
|
154
|
+
else if (!this._session) {
|
|
155
|
+
prompt = this._buildSession(messages);
|
|
156
|
+
}
|
|
157
|
+
else {
|
|
158
|
+
// If we already have a session then we should just have a single prompt
|
|
159
|
+
prompt = messages[0].content;
|
|
160
|
+
}
|
|
161
|
+
try {
|
|
162
|
+
// @ts-expect-error - TS2531: Object is possibly 'null'.
|
|
163
|
+
const completion = await this._session.prompt(prompt, options);
|
|
164
|
+
return completion;
|
|
165
|
+
}
|
|
166
|
+
catch (e) {
|
|
167
|
+
throw new Error("Error getting prompt completion.");
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
// This constructs a new session if we need to adding in any sys messages or previous chats
|
|
171
|
+
_buildSession(messages) {
|
|
172
|
+
let prompt = "";
|
|
173
|
+
let sysMessage = "";
|
|
174
|
+
let noSystemMessages = [];
|
|
175
|
+
let interactions = [];
|
|
176
|
+
// Let's see if we have a system message
|
|
177
|
+
if (messages.findIndex((msg) => msg._getType() === "system") !== -1) {
|
|
178
|
+
const sysMessages = messages.filter((message) => message._getType() === "system");
|
|
179
|
+
// Only use the last provided system message
|
|
180
|
+
sysMessage = sysMessages[sysMessages.length - 1].content;
|
|
181
|
+
// Now filter out the system messages
|
|
182
|
+
noSystemMessages = messages.filter((message) => message._getType() !== "system");
|
|
183
|
+
}
|
|
184
|
+
else {
|
|
185
|
+
noSystemMessages = messages;
|
|
186
|
+
}
|
|
187
|
+
// Lets see if we just have a prompt left or are their previous interactions?
|
|
188
|
+
if (noSystemMessages.length > 1) {
|
|
189
|
+
// Is the last message a prompt?
|
|
190
|
+
if (noSystemMessages[noSystemMessages.length - 1]._getType() === "human") {
|
|
191
|
+
prompt = noSystemMessages[noSystemMessages.length - 1].content;
|
|
192
|
+
interactions = this._convertMessagesToInteractions(noSystemMessages.slice(0, noSystemMessages.length - 1));
|
|
193
|
+
}
|
|
194
|
+
else {
|
|
195
|
+
interactions = this._convertMessagesToInteractions(noSystemMessages);
|
|
196
|
+
}
|
|
197
|
+
}
|
|
198
|
+
else {
|
|
199
|
+
// If there was only a single message we assume it's a prompt
|
|
200
|
+
prompt = noSystemMessages[0].content;
|
|
201
|
+
}
|
|
202
|
+
// Now lets construct a session according to what we got
|
|
203
|
+
if (sysMessage !== "" && interactions.length > 0) {
|
|
204
|
+
this._session = new node_llama_cpp_1.LlamaChatSession({
|
|
205
|
+
context: this._context,
|
|
206
|
+
conversationHistory: interactions,
|
|
207
|
+
systemPrompt: sysMessage,
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
else if (sysMessage !== "" && interactions.length === 0) {
|
|
211
|
+
this._session = new node_llama_cpp_1.LlamaChatSession({
|
|
212
|
+
context: this._context,
|
|
213
|
+
systemPrompt: sysMessage,
|
|
214
|
+
});
|
|
215
|
+
}
|
|
216
|
+
else if (sysMessage === "" && interactions.length > 0) {
|
|
217
|
+
this._session = new node_llama_cpp_1.LlamaChatSession({
|
|
218
|
+
context: this._context,
|
|
219
|
+
conversationHistory: interactions,
|
|
220
|
+
});
|
|
221
|
+
}
|
|
222
|
+
else {
|
|
223
|
+
this._session = new node_llama_cpp_1.LlamaChatSession({
|
|
224
|
+
context: this._context,
|
|
225
|
+
});
|
|
226
|
+
}
|
|
227
|
+
return prompt;
|
|
228
|
+
}
|
|
229
|
+
// This builds a an array of interactions
|
|
230
|
+
_convertMessagesToInteractions(messages) {
|
|
231
|
+
const result = [];
|
|
232
|
+
for (let i = 0; i < messages.length; i += 2) {
|
|
233
|
+
if (i + 1 < messages.length) {
|
|
234
|
+
result.push({
|
|
235
|
+
prompt: messages[i].content,
|
|
236
|
+
response: messages[i + 1].content,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
return result;
|
|
241
|
+
}
|
|
242
|
+
}
|
|
243
|
+
exports.ChatLlamaCpp = ChatLlamaCpp;
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
import { LlamaModel, LlamaContext, LlamaChatSession, type ConversationInteraction } from "node-llama-cpp";
|
|
2
|
+
import { SimpleChatModel, BaseChatModelParams } from "./base.js";
|
|
3
|
+
import { BaseLanguageModelCallOptions } from "../base_language/index.js";
|
|
4
|
+
import type { BaseMessage } from "../schema/index.js";
|
|
5
|
+
/**
|
|
6
|
+
* Note that the modelPath is the only required parameter. For testing you
|
|
7
|
+
* can set this in the environment variable `LLAMA_PATH`.
|
|
8
|
+
*/
|
|
9
|
+
export interface LlamaCppInputs extends BaseChatModelParams {
|
|
10
|
+
/** Prompt processing batch size. */
|
|
11
|
+
batchSize?: number;
|
|
12
|
+
/** Text context size. */
|
|
13
|
+
contextSize?: number;
|
|
14
|
+
/** Embedding mode only. */
|
|
15
|
+
embedding?: boolean;
|
|
16
|
+
/** Use fp16 for KV cache. */
|
|
17
|
+
f16Kv?: boolean;
|
|
18
|
+
/** Number of layers to store in VRAM. */
|
|
19
|
+
gpuLayers?: number;
|
|
20
|
+
/** The llama_eval() call computes all logits, not just the last one. */
|
|
21
|
+
logitsAll?: boolean;
|
|
22
|
+
/** If true, reduce VRAM usage at the cost of performance. */
|
|
23
|
+
lowVram?: boolean;
|
|
24
|
+
/** Path to the model on the filesystem. */
|
|
25
|
+
modelPath: string;
|
|
26
|
+
/** If null, a random seed will be used. */
|
|
27
|
+
seed?: null | number;
|
|
28
|
+
/** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */
|
|
29
|
+
temperature?: number;
|
|
30
|
+
/** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */
|
|
31
|
+
topK?: number;
|
|
32
|
+
/** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */
|
|
33
|
+
topP?: number;
|
|
34
|
+
/** Force system to keep model in RAM. */
|
|
35
|
+
useMlock?: boolean;
|
|
36
|
+
/** Use mmap if possible. */
|
|
37
|
+
useMmap?: boolean;
|
|
38
|
+
/** Only load the vocabulary, no weights. */
|
|
39
|
+
vocabOnly?: boolean;
|
|
40
|
+
}
|
|
41
|
+
export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
|
|
42
|
+
/** The maximum number of tokens the response should contain. */
|
|
43
|
+
maxTokens?: number;
|
|
44
|
+
/** A function called when matching the provided token array */
|
|
45
|
+
onToken?: (tokens: number[]) => void;
|
|
46
|
+
}
|
|
47
|
+
/**
|
|
48
|
+
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
49
|
+
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
50
|
+
* version supported in version 2.0.0.
|
|
51
|
+
* This also requires that have a locally built version of Llama2 installed.
|
|
52
|
+
*/
|
|
53
|
+
export declare class ChatLlamaCpp extends SimpleChatModel<LlamaCppCallOptions> {
|
|
54
|
+
CallOptions: LlamaCppCallOptions;
|
|
55
|
+
static inputs: LlamaCppInputs;
|
|
56
|
+
batchSize?: number;
|
|
57
|
+
contextSize?: number;
|
|
58
|
+
embedding?: boolean;
|
|
59
|
+
f16Kv?: boolean;
|
|
60
|
+
gpuLayers?: number;
|
|
61
|
+
logitsAll?: boolean;
|
|
62
|
+
lowVram?: boolean;
|
|
63
|
+
seed?: null | number;
|
|
64
|
+
useMlock?: boolean;
|
|
65
|
+
useMmap?: boolean;
|
|
66
|
+
vocabOnly?: boolean;
|
|
67
|
+
modelPath: string;
|
|
68
|
+
_model: LlamaModel;
|
|
69
|
+
_context: LlamaContext;
|
|
70
|
+
_session: LlamaChatSession | null;
|
|
71
|
+
static lc_name(): string;
|
|
72
|
+
constructor(inputs: LlamaCppInputs);
|
|
73
|
+
_llmType(): string;
|
|
74
|
+
invocationParams(): {
|
|
75
|
+
batchSize: number | undefined;
|
|
76
|
+
contextSize: number | undefined;
|
|
77
|
+
embedding: boolean | undefined;
|
|
78
|
+
f16Kv: boolean | undefined;
|
|
79
|
+
gpuLayers: number | undefined;
|
|
80
|
+
logitsAll: boolean | undefined;
|
|
81
|
+
lowVram: boolean | undefined;
|
|
82
|
+
modelPath: string;
|
|
83
|
+
seed: number | null | undefined;
|
|
84
|
+
useMlock: boolean | undefined;
|
|
85
|
+
useMmap: boolean | undefined;
|
|
86
|
+
vocabOnly: boolean | undefined;
|
|
87
|
+
};
|
|
88
|
+
/** @ignore */
|
|
89
|
+
_combineLLMOutput(): {};
|
|
90
|
+
/** @ignore */
|
|
91
|
+
_call(messages: BaseMessage[], options: this["ParsedCallOptions"]): Promise<string>;
|
|
92
|
+
protected _buildSession(messages: BaseMessage[]): string;
|
|
93
|
+
protected _convertMessagesToInteractions(messages: BaseMessage[]): ConversationInteraction[];
|
|
94
|
+
}
|
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
import { LlamaModel, LlamaContext, LlamaChatSession, } from "node-llama-cpp";
|
|
2
|
+
import { SimpleChatModel } from "./base.js";
|
|
3
|
+
/**
|
|
4
|
+
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
5
|
+
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
6
|
+
* version supported in version 2.0.0.
|
|
7
|
+
* This also requires that have a locally built version of Llama2 installed.
|
|
8
|
+
*/
|
|
9
|
+
export class ChatLlamaCpp extends SimpleChatModel {
|
|
10
|
+
static lc_name() {
|
|
11
|
+
return "ChatLlamaCpp";
|
|
12
|
+
}
|
|
13
|
+
constructor(inputs) {
|
|
14
|
+
super(inputs);
|
|
15
|
+
Object.defineProperty(this, "batchSize", {
|
|
16
|
+
enumerable: true,
|
|
17
|
+
configurable: true,
|
|
18
|
+
writable: true,
|
|
19
|
+
value: void 0
|
|
20
|
+
});
|
|
21
|
+
Object.defineProperty(this, "contextSize", {
|
|
22
|
+
enumerable: true,
|
|
23
|
+
configurable: true,
|
|
24
|
+
writable: true,
|
|
25
|
+
value: void 0
|
|
26
|
+
});
|
|
27
|
+
Object.defineProperty(this, "embedding", {
|
|
28
|
+
enumerable: true,
|
|
29
|
+
configurable: true,
|
|
30
|
+
writable: true,
|
|
31
|
+
value: void 0
|
|
32
|
+
});
|
|
33
|
+
Object.defineProperty(this, "f16Kv", {
|
|
34
|
+
enumerable: true,
|
|
35
|
+
configurable: true,
|
|
36
|
+
writable: true,
|
|
37
|
+
value: void 0
|
|
38
|
+
});
|
|
39
|
+
Object.defineProperty(this, "gpuLayers", {
|
|
40
|
+
enumerable: true,
|
|
41
|
+
configurable: true,
|
|
42
|
+
writable: true,
|
|
43
|
+
value: void 0
|
|
44
|
+
});
|
|
45
|
+
Object.defineProperty(this, "logitsAll", {
|
|
46
|
+
enumerable: true,
|
|
47
|
+
configurable: true,
|
|
48
|
+
writable: true,
|
|
49
|
+
value: void 0
|
|
50
|
+
});
|
|
51
|
+
Object.defineProperty(this, "lowVram", {
|
|
52
|
+
enumerable: true,
|
|
53
|
+
configurable: true,
|
|
54
|
+
writable: true,
|
|
55
|
+
value: void 0
|
|
56
|
+
});
|
|
57
|
+
Object.defineProperty(this, "seed", {
|
|
58
|
+
enumerable: true,
|
|
59
|
+
configurable: true,
|
|
60
|
+
writable: true,
|
|
61
|
+
value: void 0
|
|
62
|
+
});
|
|
63
|
+
Object.defineProperty(this, "useMlock", {
|
|
64
|
+
enumerable: true,
|
|
65
|
+
configurable: true,
|
|
66
|
+
writable: true,
|
|
67
|
+
value: void 0
|
|
68
|
+
});
|
|
69
|
+
Object.defineProperty(this, "useMmap", {
|
|
70
|
+
enumerable: true,
|
|
71
|
+
configurable: true,
|
|
72
|
+
writable: true,
|
|
73
|
+
value: void 0
|
|
74
|
+
});
|
|
75
|
+
Object.defineProperty(this, "vocabOnly", {
|
|
76
|
+
enumerable: true,
|
|
77
|
+
configurable: true,
|
|
78
|
+
writable: true,
|
|
79
|
+
value: void 0
|
|
80
|
+
});
|
|
81
|
+
Object.defineProperty(this, "modelPath", {
|
|
82
|
+
enumerable: true,
|
|
83
|
+
configurable: true,
|
|
84
|
+
writable: true,
|
|
85
|
+
value: void 0
|
|
86
|
+
});
|
|
87
|
+
Object.defineProperty(this, "_model", {
|
|
88
|
+
enumerable: true,
|
|
89
|
+
configurable: true,
|
|
90
|
+
writable: true,
|
|
91
|
+
value: void 0
|
|
92
|
+
});
|
|
93
|
+
Object.defineProperty(this, "_context", {
|
|
94
|
+
enumerable: true,
|
|
95
|
+
configurable: true,
|
|
96
|
+
writable: true,
|
|
97
|
+
value: void 0
|
|
98
|
+
});
|
|
99
|
+
Object.defineProperty(this, "_session", {
|
|
100
|
+
enumerable: true,
|
|
101
|
+
configurable: true,
|
|
102
|
+
writable: true,
|
|
103
|
+
value: void 0
|
|
104
|
+
});
|
|
105
|
+
this.batchSize = inputs?.batchSize;
|
|
106
|
+
this.contextSize = inputs?.contextSize;
|
|
107
|
+
this.embedding = inputs?.embedding;
|
|
108
|
+
this.f16Kv = inputs?.f16Kv;
|
|
109
|
+
this.gpuLayers = inputs?.gpuLayers;
|
|
110
|
+
this.logitsAll = inputs?.logitsAll;
|
|
111
|
+
this.lowVram = inputs?.lowVram;
|
|
112
|
+
this.modelPath = inputs.modelPath;
|
|
113
|
+
this.seed = inputs?.seed;
|
|
114
|
+
this.useMlock = inputs?.useMlock;
|
|
115
|
+
this.useMmap = inputs?.useMmap;
|
|
116
|
+
this.vocabOnly = inputs?.vocabOnly;
|
|
117
|
+
this._model = new LlamaModel(inputs);
|
|
118
|
+
this._context = new LlamaContext({ model: this._model });
|
|
119
|
+
this._session = null;
|
|
120
|
+
}
|
|
121
|
+
_llmType() {
|
|
122
|
+
return "llama2_cpp";
|
|
123
|
+
}
|
|
124
|
+
invocationParams() {
|
|
125
|
+
return {
|
|
126
|
+
batchSize: this.batchSize,
|
|
127
|
+
contextSize: this.contextSize,
|
|
128
|
+
embedding: this.embedding,
|
|
129
|
+
f16Kv: this.f16Kv,
|
|
130
|
+
gpuLayers: this.gpuLayers,
|
|
131
|
+
logitsAll: this.logitsAll,
|
|
132
|
+
lowVram: this.lowVram,
|
|
133
|
+
modelPath: this.modelPath,
|
|
134
|
+
seed: this.seed,
|
|
135
|
+
useMlock: this.useMlock,
|
|
136
|
+
useMmap: this.useMmap,
|
|
137
|
+
vocabOnly: this.vocabOnly,
|
|
138
|
+
};
|
|
139
|
+
}
|
|
140
|
+
/** @ignore */
|
|
141
|
+
_combineLLMOutput() {
|
|
142
|
+
return {};
|
|
143
|
+
}
|
|
144
|
+
/** @ignore */
|
|
145
|
+
async _call(messages, options) {
|
|
146
|
+
let prompt = "";
|
|
147
|
+
if (messages.length > 1) {
|
|
148
|
+
// We need to build a new _session
|
|
149
|
+
prompt = this._buildSession(messages);
|
|
150
|
+
}
|
|
151
|
+
else if (!this._session) {
|
|
152
|
+
prompt = this._buildSession(messages);
|
|
153
|
+
}
|
|
154
|
+
else {
|
|
155
|
+
// If we already have a session then we should just have a single prompt
|
|
156
|
+
prompt = messages[0].content;
|
|
157
|
+
}
|
|
158
|
+
try {
|
|
159
|
+
// @ts-expect-error - TS2531: Object is possibly 'null'.
|
|
160
|
+
const completion = await this._session.prompt(prompt, options);
|
|
161
|
+
return completion;
|
|
162
|
+
}
|
|
163
|
+
catch (e) {
|
|
164
|
+
throw new Error("Error getting prompt completion.");
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
// This constructs a new session if we need to adding in any sys messages or previous chats
|
|
168
|
+
_buildSession(messages) {
|
|
169
|
+
let prompt = "";
|
|
170
|
+
let sysMessage = "";
|
|
171
|
+
let noSystemMessages = [];
|
|
172
|
+
let interactions = [];
|
|
173
|
+
// Let's see if we have a system message
|
|
174
|
+
if (messages.findIndex((msg) => msg._getType() === "system") !== -1) {
|
|
175
|
+
const sysMessages = messages.filter((message) => message._getType() === "system");
|
|
176
|
+
// Only use the last provided system message
|
|
177
|
+
sysMessage = sysMessages[sysMessages.length - 1].content;
|
|
178
|
+
// Now filter out the system messages
|
|
179
|
+
noSystemMessages = messages.filter((message) => message._getType() !== "system");
|
|
180
|
+
}
|
|
181
|
+
else {
|
|
182
|
+
noSystemMessages = messages;
|
|
183
|
+
}
|
|
184
|
+
// Lets see if we just have a prompt left or are their previous interactions?
|
|
185
|
+
if (noSystemMessages.length > 1) {
|
|
186
|
+
// Is the last message a prompt?
|
|
187
|
+
if (noSystemMessages[noSystemMessages.length - 1]._getType() === "human") {
|
|
188
|
+
prompt = noSystemMessages[noSystemMessages.length - 1].content;
|
|
189
|
+
interactions = this._convertMessagesToInteractions(noSystemMessages.slice(0, noSystemMessages.length - 1));
|
|
190
|
+
}
|
|
191
|
+
else {
|
|
192
|
+
interactions = this._convertMessagesToInteractions(noSystemMessages);
|
|
193
|
+
}
|
|
194
|
+
}
|
|
195
|
+
else {
|
|
196
|
+
// If there was only a single message we assume it's a prompt
|
|
197
|
+
prompt = noSystemMessages[0].content;
|
|
198
|
+
}
|
|
199
|
+
// Now lets construct a session according to what we got
|
|
200
|
+
if (sysMessage !== "" && interactions.length > 0) {
|
|
201
|
+
this._session = new LlamaChatSession({
|
|
202
|
+
context: this._context,
|
|
203
|
+
conversationHistory: interactions,
|
|
204
|
+
systemPrompt: sysMessage,
|
|
205
|
+
});
|
|
206
|
+
}
|
|
207
|
+
else if (sysMessage !== "" && interactions.length === 0) {
|
|
208
|
+
this._session = new LlamaChatSession({
|
|
209
|
+
context: this._context,
|
|
210
|
+
systemPrompt: sysMessage,
|
|
211
|
+
});
|
|
212
|
+
}
|
|
213
|
+
else if (sysMessage === "" && interactions.length > 0) {
|
|
214
|
+
this._session = new LlamaChatSession({
|
|
215
|
+
context: this._context,
|
|
216
|
+
conversationHistory: interactions,
|
|
217
|
+
});
|
|
218
|
+
}
|
|
219
|
+
else {
|
|
220
|
+
this._session = new LlamaChatSession({
|
|
221
|
+
context: this._context,
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
return prompt;
|
|
225
|
+
}
|
|
226
|
+
// This builds a an array of interactions
|
|
227
|
+
_convertMessagesToInteractions(messages) {
|
|
228
|
+
const result = [];
|
|
229
|
+
for (let i = 0; i < messages.length; i += 2) {
|
|
230
|
+
if (i + 1 < messages.length) {
|
|
231
|
+
result.push({
|
|
232
|
+
prompt: messages[i].content,
|
|
233
|
+
response: messages[i + 1].content,
|
|
234
|
+
});
|
|
235
|
+
}
|
|
236
|
+
}
|
|
237
|
+
return result;
|
|
238
|
+
}
|
|
239
|
+
}
|