langchain 0.0.176 → 0.0.177

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/chat_models/bedrock.cjs +25 -4
  2. package/dist/chat_models/bedrock.d.ts +2 -1
  3. package/dist/chat_models/bedrock.js +25 -4
  4. package/dist/chat_models/llama_cpp.cjs +31 -79
  5. package/dist/chat_models/llama_cpp.d.ts +15 -58
  6. package/dist/chat_models/llama_cpp.js +32 -80
  7. package/dist/chat_models/openai.cjs +91 -6
  8. package/dist/chat_models/openai.d.ts +10 -0
  9. package/dist/chat_models/openai.js +91 -6
  10. package/dist/embeddings/hf.cjs +10 -1
  11. package/dist/embeddings/hf.d.ts +4 -2
  12. package/dist/embeddings/hf.js +10 -1
  13. package/dist/embeddings/llama_cpp.cjs +67 -0
  14. package/dist/embeddings/llama_cpp.d.ts +26 -0
  15. package/dist/embeddings/llama_cpp.js +63 -0
  16. package/dist/embeddings/ollama.cjs +7 -1
  17. package/dist/embeddings/ollama.js +7 -1
  18. package/dist/llms/bedrock.cjs +25 -3
  19. package/dist/llms/bedrock.d.ts +2 -1
  20. package/dist/llms/bedrock.js +25 -3
  21. package/dist/llms/hf.cjs +10 -1
  22. package/dist/llms/hf.d.ts +3 -0
  23. package/dist/llms/hf.js +10 -1
  24. package/dist/llms/llama_cpp.cjs +25 -65
  25. package/dist/llms/llama_cpp.d.ts +7 -43
  26. package/dist/llms/llama_cpp.js +25 -65
  27. package/dist/load/import_constants.cjs +1 -0
  28. package/dist/load/import_constants.js +1 -0
  29. package/dist/prompts/few_shot.cjs +162 -1
  30. package/dist/prompts/few_shot.d.ts +90 -2
  31. package/dist/prompts/few_shot.js +160 -0
  32. package/dist/prompts/index.cjs +2 -1
  33. package/dist/prompts/index.d.ts +1 -1
  34. package/dist/prompts/index.js +1 -1
  35. package/dist/retrievers/zep.cjs +26 -3
  36. package/dist/retrievers/zep.d.ts +11 -2
  37. package/dist/retrievers/zep.js +26 -3
  38. package/dist/util/bedrock.d.ts +2 -0
  39. package/dist/util/llama_cpp.cjs +34 -0
  40. package/dist/util/llama_cpp.d.ts +46 -0
  41. package/dist/util/llama_cpp.js +28 -0
  42. package/dist/util/openai-format-fndef.cjs +81 -0
  43. package/dist/util/openai-format-fndef.d.ts +44 -0
  44. package/dist/util/openai-format-fndef.js +77 -0
  45. package/dist/util/openapi.d.ts +2 -2
  46. package/dist/vectorstores/pinecone.cjs +5 -5
  47. package/dist/vectorstores/pinecone.d.ts +2 -2
  48. package/dist/vectorstores/pinecone.js +5 -5
  49. package/embeddings/llama_cpp.cjs +1 -0
  50. package/embeddings/llama_cpp.d.ts +1 -0
  51. package/embeddings/llama_cpp.js +1 -0
  52. package/package.json +13 -5
@@ -133,6 +133,12 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
133
133
  writable: true,
134
134
  value: new eventstream_codec_1.EventStreamCodec(util_utf8_1.toUtf8, util_utf8_1.fromUtf8)
135
135
  });
136
+ Object.defineProperty(this, "streaming", {
137
+ enumerable: true,
138
+ configurable: true,
139
+ writable: true,
140
+ value: false
141
+ });
136
142
  this.model = fields?.model ?? this.model;
137
143
  const allowedModels = ["ai21", "anthropic", "amazon"];
138
144
  if (!allowedModels.includes(this.model.split(".")[0])) {
@@ -150,6 +156,7 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
150
156
  this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl;
151
157
  this.stopSequences = fields?.stopSequences;
152
158
  this.modelKwargs = fields?.modelKwargs;
159
+ this.streaming = fields?.streaming ?? this.streaming;
153
160
  }
154
161
  /** Call out to Bedrock service model.
155
162
  Arguments:
@@ -161,10 +168,23 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
161
168
  Example:
162
169
  response = model.call("Tell me a joke.")
163
170
  */
164
- async _call(messages, options) {
171
+ async _call(messages, options, runManager) {
165
172
  const service = "bedrock-runtime";
166
173
  const endpointHost = this.endpointHost ?? `${service}.${this.region}.amazonaws.com`;
167
174
  const provider = this.model.split(".")[0];
175
+ if (this.streaming) {
176
+ const stream = this._streamResponseChunks(messages, options, runManager);
177
+ let finalResult;
178
+ for await (const chunk of stream) {
179
+ if (finalResult === undefined) {
180
+ finalResult = chunk;
181
+ }
182
+ else {
183
+ finalResult = finalResult.concat(chunk);
184
+ }
185
+ }
186
+ return finalResult?.message.content ?? "";
187
+ }
168
188
  const response = await this._signedFetch(messages, options, {
169
189
  bedrockMethod: "invoke",
170
190
  endpointHost,
@@ -233,7 +253,6 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
233
253
  event.headers[":content-type"].value !== "application/json") {
234
254
  throw Error(`Failed to get event chunk: got ${chunk}`);
235
255
  }
236
- // console.log(decoder.decode(event.body));
237
256
  const body = JSON.parse(decoder.decode(event.body));
238
257
  if (body.message) {
239
258
  throw new Error(body.message);
@@ -245,7 +264,8 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
245
264
  text,
246
265
  message: new index_js_1.AIMessageChunk({ content: text }),
247
266
  });
248
- await runManager?.handleLLMNewToken(text);
267
+ // eslint-disable-next-line no-void
268
+ void runManager?.handleLLMNewToken(text);
249
269
  }
250
270
  }
251
271
  }
@@ -256,7 +276,8 @@ class ChatBedrock extends base_js_1.SimpleChatModel {
256
276
  text,
257
277
  message: new index_js_1.AIMessageChunk({ content: text }),
258
278
  });
259
- await runManager?.handleLLMNewToken(text);
279
+ // eslint-disable-next-line no-void
280
+ void runManager?.handleLLMNewToken(text);
260
281
  }
261
282
  }
262
283
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -33,6 +33,7 @@ export declare class ChatBedrock extends SimpleChatModel implements BaseBedrockI
33
33
  stopSequences?: string[];
34
34
  modelKwargs?: Record<string, unknown>;
35
35
  codec: EventStreamCodec;
36
+ streaming: boolean;
36
37
  get lc_secrets(): {
37
38
  [key: string]: string;
38
39
  } | undefined;
@@ -49,7 +50,7 @@ export declare class ChatBedrock extends SimpleChatModel implements BaseBedrockI
49
50
  Example:
50
51
  response = model.call("Tell me a joke.")
51
52
  */
52
- _call(messages: BaseMessage[], options: this["ParsedCallOptions"]): Promise<string>;
53
+ _call(messages: BaseMessage[], options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): Promise<string>;
53
54
  _signedFetch(messages: BaseMessage[], options: this["ParsedCallOptions"], fields: {
54
55
  bedrockMethod: "invoke" | "invoke-with-response-stream";
55
56
  endpointHost: string;
@@ -128,6 +128,12 @@ export class ChatBedrock extends SimpleChatModel {
128
128
  writable: true,
129
129
  value: new EventStreamCodec(toUtf8, fromUtf8)
130
130
  });
131
+ Object.defineProperty(this, "streaming", {
132
+ enumerable: true,
133
+ configurable: true,
134
+ writable: true,
135
+ value: false
136
+ });
131
137
  this.model = fields?.model ?? this.model;
132
138
  const allowedModels = ["ai21", "anthropic", "amazon"];
133
139
  if (!allowedModels.includes(this.model.split(".")[0])) {
@@ -145,6 +151,7 @@ export class ChatBedrock extends SimpleChatModel {
145
151
  this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl;
146
152
  this.stopSequences = fields?.stopSequences;
147
153
  this.modelKwargs = fields?.modelKwargs;
154
+ this.streaming = fields?.streaming ?? this.streaming;
148
155
  }
149
156
  /** Call out to Bedrock service model.
150
157
  Arguments:
@@ -156,10 +163,23 @@ export class ChatBedrock extends SimpleChatModel {
156
163
  Example:
157
164
  response = model.call("Tell me a joke.")
158
165
  */
159
- async _call(messages, options) {
166
+ async _call(messages, options, runManager) {
160
167
  const service = "bedrock-runtime";
161
168
  const endpointHost = this.endpointHost ?? `${service}.${this.region}.amazonaws.com`;
162
169
  const provider = this.model.split(".")[0];
170
+ if (this.streaming) {
171
+ const stream = this._streamResponseChunks(messages, options, runManager);
172
+ let finalResult;
173
+ for await (const chunk of stream) {
174
+ if (finalResult === undefined) {
175
+ finalResult = chunk;
176
+ }
177
+ else {
178
+ finalResult = finalResult.concat(chunk);
179
+ }
180
+ }
181
+ return finalResult?.message.content ?? "";
182
+ }
163
183
  const response = await this._signedFetch(messages, options, {
164
184
  bedrockMethod: "invoke",
165
185
  endpointHost,
@@ -228,7 +248,6 @@ export class ChatBedrock extends SimpleChatModel {
228
248
  event.headers[":content-type"].value !== "application/json") {
229
249
  throw Error(`Failed to get event chunk: got ${chunk}`);
230
250
  }
231
- // console.log(decoder.decode(event.body));
232
251
  const body = JSON.parse(decoder.decode(event.body));
233
252
  if (body.message) {
234
253
  throw new Error(body.message);
@@ -240,7 +259,8 @@ export class ChatBedrock extends SimpleChatModel {
240
259
  text,
241
260
  message: new AIMessageChunk({ content: text }),
242
261
  });
243
- await runManager?.handleLLMNewToken(text);
262
+ // eslint-disable-next-line no-void
263
+ void runManager?.handleLLMNewToken(text);
244
264
  }
245
265
  }
246
266
  }
@@ -251,7 +271,8 @@ export class ChatBedrock extends SimpleChatModel {
251
271
  text,
252
272
  message: new AIMessageChunk({ content: text }),
253
273
  });
254
- await runManager?.handleLLMNewToken(text);
274
+ // eslint-disable-next-line no-void
275
+ void runManager?.handleLLMNewToken(text);
255
276
  }
256
277
  }
257
278
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
@@ -3,6 +3,7 @@ Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.ChatLlamaCpp = void 0;
4
4
  const node_llama_cpp_1 = require("node-llama-cpp");
5
5
  const base_js_1 = require("./base.cjs");
6
+ const llama_cpp_js_1 = require("../util/llama_cpp.cjs");
6
7
  /**
7
8
  * To use this model you need to have the `node-llama-cpp` module installed.
8
9
  * This can be installed using `npm install -S node-llama-cpp` and the minimum
@@ -15,73 +16,31 @@ class ChatLlamaCpp extends base_js_1.SimpleChatModel {
15
16
  }
16
17
  constructor(inputs) {
17
18
  super(inputs);
18
- Object.defineProperty(this, "batchSize", {
19
+ Object.defineProperty(this, "maxTokens", {
19
20
  enumerable: true,
20
21
  configurable: true,
21
22
  writable: true,
22
23
  value: void 0
23
24
  });
24
- Object.defineProperty(this, "contextSize", {
25
+ Object.defineProperty(this, "temperature", {
25
26
  enumerable: true,
26
27
  configurable: true,
27
28
  writable: true,
28
29
  value: void 0
29
30
  });
30
- Object.defineProperty(this, "embedding", {
31
+ Object.defineProperty(this, "topK", {
31
32
  enumerable: true,
32
33
  configurable: true,
33
34
  writable: true,
34
35
  value: void 0
35
36
  });
36
- Object.defineProperty(this, "f16Kv", {
37
+ Object.defineProperty(this, "topP", {
37
38
  enumerable: true,
38
39
  configurable: true,
39
40
  writable: true,
40
41
  value: void 0
41
42
  });
42
- Object.defineProperty(this, "gpuLayers", {
43
- enumerable: true,
44
- configurable: true,
45
- writable: true,
46
- value: void 0
47
- });
48
- Object.defineProperty(this, "logitsAll", {
49
- enumerable: true,
50
- configurable: true,
51
- writable: true,
52
- value: void 0
53
- });
54
- Object.defineProperty(this, "lowVram", {
55
- enumerable: true,
56
- configurable: true,
57
- writable: true,
58
- value: void 0
59
- });
60
- Object.defineProperty(this, "seed", {
61
- enumerable: true,
62
- configurable: true,
63
- writable: true,
64
- value: void 0
65
- });
66
- Object.defineProperty(this, "useMlock", {
67
- enumerable: true,
68
- configurable: true,
69
- writable: true,
70
- value: void 0
71
- });
72
- Object.defineProperty(this, "useMmap", {
73
- enumerable: true,
74
- configurable: true,
75
- writable: true,
76
- value: void 0
77
- });
78
- Object.defineProperty(this, "vocabOnly", {
79
- enumerable: true,
80
- configurable: true,
81
- writable: true,
82
- value: void 0
83
- });
84
- Object.defineProperty(this, "modelPath", {
43
+ Object.defineProperty(this, "trimWhitespaceSuffix", {
85
44
  enumerable: true,
86
45
  configurable: true,
87
46
  writable: true,
@@ -105,47 +64,33 @@ class ChatLlamaCpp extends base_js_1.SimpleChatModel {
105
64
  writable: true,
106
65
  value: void 0
107
66
  });
108
- this.batchSize = inputs?.batchSize;
109
- this.contextSize = inputs?.contextSize;
110
- this.embedding = inputs?.embedding;
111
- this.f16Kv = inputs?.f16Kv;
112
- this.gpuLayers = inputs?.gpuLayers;
113
- this.logitsAll = inputs?.logitsAll;
114
- this.lowVram = inputs?.lowVram;
115
- this.modelPath = inputs.modelPath;
116
- this.seed = inputs?.seed;
117
- this.useMlock = inputs?.useMlock;
118
- this.useMmap = inputs?.useMmap;
119
- this.vocabOnly = inputs?.vocabOnly;
120
- this._model = new node_llama_cpp_1.LlamaModel(inputs);
121
- this._context = new node_llama_cpp_1.LlamaContext({ model: this._model });
67
+ this.maxTokens = inputs?.maxTokens;
68
+ this.temperature = inputs?.temperature;
69
+ this.topK = inputs?.topK;
70
+ this.topP = inputs?.topP;
71
+ this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
72
+ this._model = (0, llama_cpp_js_1.createLlamaModel)(inputs);
73
+ this._context = (0, llama_cpp_js_1.createLlamaContext)(this._model, inputs);
122
74
  this._session = null;
123
75
  }
124
76
  _llmType() {
125
77
  return "llama2_cpp";
126
78
  }
127
- invocationParams() {
128
- return {
129
- batchSize: this.batchSize,
130
- contextSize: this.contextSize,
131
- embedding: this.embedding,
132
- f16Kv: this.f16Kv,
133
- gpuLayers: this.gpuLayers,
134
- logitsAll: this.logitsAll,
135
- lowVram: this.lowVram,
136
- modelPath: this.modelPath,
137
- seed: this.seed,
138
- useMlock: this.useMlock,
139
- useMmap: this.useMmap,
140
- vocabOnly: this.vocabOnly,
141
- };
142
- }
143
79
  /** @ignore */
144
80
  _combineLLMOutput() {
145
81
  return {};
146
82
  }
83
+ invocationParams() {
84
+ return {
85
+ maxTokens: this.maxTokens,
86
+ temperature: this.temperature,
87
+ topK: this.topK,
88
+ topP: this.topP,
89
+ trimWhitespaceSuffix: this.trimWhitespaceSuffix,
90
+ };
91
+ }
147
92
  /** @ignore */
148
- async _call(messages, options) {
93
+ async _call(messages, _options) {
149
94
  let prompt = "";
150
95
  if (messages.length > 1) {
151
96
  // We need to build a new _session
@@ -159,8 +104,15 @@ class ChatLlamaCpp extends base_js_1.SimpleChatModel {
159
104
  prompt = messages[0].content;
160
105
  }
161
106
  try {
107
+ const promptOptions = {
108
+ maxTokens: this?.maxTokens,
109
+ temperature: this?.temperature,
110
+ topK: this?.topK,
111
+ topP: this?.topP,
112
+ trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
113
+ };
162
114
  // @ts-expect-error - TS2531: Object is possibly 'null'.
163
- const completion = await this._session.prompt(prompt, options);
115
+ const completion = await this._session.prompt(prompt, promptOptions);
164
116
  return completion;
165
117
  }
166
118
  catch (e) {
@@ -1,42 +1,13 @@
1
1
  import { LlamaModel, LlamaContext, LlamaChatSession, type ConversationInteraction } from "node-llama-cpp";
2
2
  import { SimpleChatModel, BaseChatModelParams } from "./base.js";
3
+ import { LlamaBaseCppInputs } from "../util/llama_cpp.js";
3
4
  import { BaseLanguageModelCallOptions } from "../base_language/index.js";
4
5
  import type { BaseMessage } from "../schema/index.js";
5
6
  /**
6
7
  * Note that the modelPath is the only required parameter. For testing you
7
8
  * can set this in the environment variable `LLAMA_PATH`.
8
9
  */
9
- export interface LlamaCppInputs extends BaseChatModelParams {
10
- /** Prompt processing batch size. */
11
- batchSize?: number;
12
- /** Text context size. */
13
- contextSize?: number;
14
- /** Embedding mode only. */
15
- embedding?: boolean;
16
- /** Use fp16 for KV cache. */
17
- f16Kv?: boolean;
18
- /** Number of layers to store in VRAM. */
19
- gpuLayers?: number;
20
- /** The llama_eval() call computes all logits, not just the last one. */
21
- logitsAll?: boolean;
22
- /** If true, reduce VRAM usage at the cost of performance. */
23
- lowVram?: boolean;
24
- /** Path to the model on the filesystem. */
25
- modelPath: string;
26
- /** If null, a random seed will be used. */
27
- seed?: null | number;
28
- /** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */
29
- temperature?: number;
30
- /** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */
31
- topK?: number;
32
- /** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */
33
- topP?: number;
34
- /** Force system to keep model in RAM. */
35
- useMlock?: boolean;
36
- /** Use mmap if possible. */
37
- useMmap?: boolean;
38
- /** Only load the vocabulary, no weights. */
39
- vocabOnly?: boolean;
10
+ export interface LlamaCppInputs extends LlamaBaseCppInputs, BaseChatModelParams {
40
11
  }
41
12
  export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
42
13
  /** The maximum number of tokens the response should contain. */
@@ -53,42 +24,28 @@ export interface LlamaCppCallOptions extends BaseLanguageModelCallOptions {
53
24
  export declare class ChatLlamaCpp extends SimpleChatModel<LlamaCppCallOptions> {
54
25
  CallOptions: LlamaCppCallOptions;
55
26
  static inputs: LlamaCppInputs;
56
- batchSize?: number;
57
- contextSize?: number;
58
- embedding?: boolean;
59
- f16Kv?: boolean;
60
- gpuLayers?: number;
61
- logitsAll?: boolean;
62
- lowVram?: boolean;
63
- seed?: null | number;
64
- useMlock?: boolean;
65
- useMmap?: boolean;
66
- vocabOnly?: boolean;
67
- modelPath: string;
27
+ maxTokens?: number;
28
+ temperature?: number;
29
+ topK?: number;
30
+ topP?: number;
31
+ trimWhitespaceSuffix?: boolean;
68
32
  _model: LlamaModel;
69
33
  _context: LlamaContext;
70
34
  _session: LlamaChatSession | null;
71
35
  static lc_name(): string;
72
36
  constructor(inputs: LlamaCppInputs);
73
37
  _llmType(): string;
74
- invocationParams(): {
75
- batchSize: number | undefined;
76
- contextSize: number | undefined;
77
- embedding: boolean | undefined;
78
- f16Kv: boolean | undefined;
79
- gpuLayers: number | undefined;
80
- logitsAll: boolean | undefined;
81
- lowVram: boolean | undefined;
82
- modelPath: string;
83
- seed: number | null | undefined;
84
- useMlock: boolean | undefined;
85
- useMmap: boolean | undefined;
86
- vocabOnly: boolean | undefined;
87
- };
88
38
  /** @ignore */
89
39
  _combineLLMOutput(): {};
40
+ invocationParams(): {
41
+ maxTokens: number | undefined;
42
+ temperature: number | undefined;
43
+ topK: number | undefined;
44
+ topP: number | undefined;
45
+ trimWhitespaceSuffix: boolean | undefined;
46
+ };
90
47
  /** @ignore */
91
- _call(messages: BaseMessage[], options: this["ParsedCallOptions"]): Promise<string>;
48
+ _call(messages: BaseMessage[], _options: this["ParsedCallOptions"]): Promise<string>;
92
49
  protected _buildSession(messages: BaseMessage[]): string;
93
50
  protected _convertMessagesToInteractions(messages: BaseMessage[]): ConversationInteraction[];
94
51
  }
@@ -1,5 +1,6 @@
1
- import { LlamaModel, LlamaContext, LlamaChatSession, } from "node-llama-cpp";
1
+ import { LlamaChatSession, } from "node-llama-cpp";
2
2
  import { SimpleChatModel } from "./base.js";
3
+ import { createLlamaModel, createLlamaContext, } from "../util/llama_cpp.js";
3
4
  /**
4
5
  * To use this model you need to have the `node-llama-cpp` module installed.
5
6
  * This can be installed using `npm install -S node-llama-cpp` and the minimum
@@ -12,73 +13,31 @@ export class ChatLlamaCpp extends SimpleChatModel {
12
13
  }
13
14
  constructor(inputs) {
14
15
  super(inputs);
15
- Object.defineProperty(this, "batchSize", {
16
+ Object.defineProperty(this, "maxTokens", {
16
17
  enumerable: true,
17
18
  configurable: true,
18
19
  writable: true,
19
20
  value: void 0
20
21
  });
21
- Object.defineProperty(this, "contextSize", {
22
+ Object.defineProperty(this, "temperature", {
22
23
  enumerable: true,
23
24
  configurable: true,
24
25
  writable: true,
25
26
  value: void 0
26
27
  });
27
- Object.defineProperty(this, "embedding", {
28
+ Object.defineProperty(this, "topK", {
28
29
  enumerable: true,
29
30
  configurable: true,
30
31
  writable: true,
31
32
  value: void 0
32
33
  });
33
- Object.defineProperty(this, "f16Kv", {
34
+ Object.defineProperty(this, "topP", {
34
35
  enumerable: true,
35
36
  configurable: true,
36
37
  writable: true,
37
38
  value: void 0
38
39
  });
39
- Object.defineProperty(this, "gpuLayers", {
40
- enumerable: true,
41
- configurable: true,
42
- writable: true,
43
- value: void 0
44
- });
45
- Object.defineProperty(this, "logitsAll", {
46
- enumerable: true,
47
- configurable: true,
48
- writable: true,
49
- value: void 0
50
- });
51
- Object.defineProperty(this, "lowVram", {
52
- enumerable: true,
53
- configurable: true,
54
- writable: true,
55
- value: void 0
56
- });
57
- Object.defineProperty(this, "seed", {
58
- enumerable: true,
59
- configurable: true,
60
- writable: true,
61
- value: void 0
62
- });
63
- Object.defineProperty(this, "useMlock", {
64
- enumerable: true,
65
- configurable: true,
66
- writable: true,
67
- value: void 0
68
- });
69
- Object.defineProperty(this, "useMmap", {
70
- enumerable: true,
71
- configurable: true,
72
- writable: true,
73
- value: void 0
74
- });
75
- Object.defineProperty(this, "vocabOnly", {
76
- enumerable: true,
77
- configurable: true,
78
- writable: true,
79
- value: void 0
80
- });
81
- Object.defineProperty(this, "modelPath", {
40
+ Object.defineProperty(this, "trimWhitespaceSuffix", {
82
41
  enumerable: true,
83
42
  configurable: true,
84
43
  writable: true,
@@ -102,47 +61,33 @@ export class ChatLlamaCpp extends SimpleChatModel {
102
61
  writable: true,
103
62
  value: void 0
104
63
  });
105
- this.batchSize = inputs?.batchSize;
106
- this.contextSize = inputs?.contextSize;
107
- this.embedding = inputs?.embedding;
108
- this.f16Kv = inputs?.f16Kv;
109
- this.gpuLayers = inputs?.gpuLayers;
110
- this.logitsAll = inputs?.logitsAll;
111
- this.lowVram = inputs?.lowVram;
112
- this.modelPath = inputs.modelPath;
113
- this.seed = inputs?.seed;
114
- this.useMlock = inputs?.useMlock;
115
- this.useMmap = inputs?.useMmap;
116
- this.vocabOnly = inputs?.vocabOnly;
117
- this._model = new LlamaModel(inputs);
118
- this._context = new LlamaContext({ model: this._model });
64
+ this.maxTokens = inputs?.maxTokens;
65
+ this.temperature = inputs?.temperature;
66
+ this.topK = inputs?.topK;
67
+ this.topP = inputs?.topP;
68
+ this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
69
+ this._model = createLlamaModel(inputs);
70
+ this._context = createLlamaContext(this._model, inputs);
119
71
  this._session = null;
120
72
  }
121
73
  _llmType() {
122
74
  return "llama2_cpp";
123
75
  }
124
- invocationParams() {
125
- return {
126
- batchSize: this.batchSize,
127
- contextSize: this.contextSize,
128
- embedding: this.embedding,
129
- f16Kv: this.f16Kv,
130
- gpuLayers: this.gpuLayers,
131
- logitsAll: this.logitsAll,
132
- lowVram: this.lowVram,
133
- modelPath: this.modelPath,
134
- seed: this.seed,
135
- useMlock: this.useMlock,
136
- useMmap: this.useMmap,
137
- vocabOnly: this.vocabOnly,
138
- };
139
- }
140
76
  /** @ignore */
141
77
  _combineLLMOutput() {
142
78
  return {};
143
79
  }
80
+ invocationParams() {
81
+ return {
82
+ maxTokens: this.maxTokens,
83
+ temperature: this.temperature,
84
+ topK: this.topK,
85
+ topP: this.topP,
86
+ trimWhitespaceSuffix: this.trimWhitespaceSuffix,
87
+ };
88
+ }
144
89
  /** @ignore */
145
- async _call(messages, options) {
90
+ async _call(messages, _options) {
146
91
  let prompt = "";
147
92
  if (messages.length > 1) {
148
93
  // We need to build a new _session
@@ -156,8 +101,15 @@ export class ChatLlamaCpp extends SimpleChatModel {
156
101
  prompt = messages[0].content;
157
102
  }
158
103
  try {
104
+ const promptOptions = {
105
+ maxTokens: this?.maxTokens,
106
+ temperature: this?.temperature,
107
+ topK: this?.topK,
108
+ topP: this?.topP,
109
+ trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
110
+ };
159
111
  // @ts-expect-error - TS2531: Object is possibly 'null'.
160
- const completion = await this._session.prompt(prompt, options);
112
+ const completion = await this._session.prompt(prompt, promptOptions);
161
113
  return completion;
162
114
  }
163
115
  catch (e) {