langchain 0.0.176 → 0.0.177

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/dist/chat_models/bedrock.cjs +25 -4
  2. package/dist/chat_models/bedrock.d.ts +2 -1
  3. package/dist/chat_models/bedrock.js +25 -4
  4. package/dist/chat_models/llama_cpp.cjs +31 -79
  5. package/dist/chat_models/llama_cpp.d.ts +15 -58
  6. package/dist/chat_models/llama_cpp.js +32 -80
  7. package/dist/chat_models/openai.cjs +91 -6
  8. package/dist/chat_models/openai.d.ts +10 -0
  9. package/dist/chat_models/openai.js +91 -6
  10. package/dist/embeddings/hf.cjs +10 -1
  11. package/dist/embeddings/hf.d.ts +4 -2
  12. package/dist/embeddings/hf.js +10 -1
  13. package/dist/embeddings/llama_cpp.cjs +67 -0
  14. package/dist/embeddings/llama_cpp.d.ts +26 -0
  15. package/dist/embeddings/llama_cpp.js +63 -0
  16. package/dist/embeddings/ollama.cjs +7 -1
  17. package/dist/embeddings/ollama.js +7 -1
  18. package/dist/llms/bedrock.cjs +25 -3
  19. package/dist/llms/bedrock.d.ts +2 -1
  20. package/dist/llms/bedrock.js +25 -3
  21. package/dist/llms/hf.cjs +10 -1
  22. package/dist/llms/hf.d.ts +3 -0
  23. package/dist/llms/hf.js +10 -1
  24. package/dist/llms/llama_cpp.cjs +25 -65
  25. package/dist/llms/llama_cpp.d.ts +7 -43
  26. package/dist/llms/llama_cpp.js +25 -65
  27. package/dist/load/import_constants.cjs +1 -0
  28. package/dist/load/import_constants.js +1 -0
  29. package/dist/prompts/few_shot.cjs +162 -1
  30. package/dist/prompts/few_shot.d.ts +90 -2
  31. package/dist/prompts/few_shot.js +160 -0
  32. package/dist/prompts/index.cjs +2 -1
  33. package/dist/prompts/index.d.ts +1 -1
  34. package/dist/prompts/index.js +1 -1
  35. package/dist/retrievers/zep.cjs +26 -3
  36. package/dist/retrievers/zep.d.ts +11 -2
  37. package/dist/retrievers/zep.js +26 -3
  38. package/dist/util/bedrock.d.ts +2 -0
  39. package/dist/util/llama_cpp.cjs +34 -0
  40. package/dist/util/llama_cpp.d.ts +46 -0
  41. package/dist/util/llama_cpp.js +28 -0
  42. package/dist/util/openai-format-fndef.cjs +81 -0
  43. package/dist/util/openai-format-fndef.d.ts +44 -0
  44. package/dist/util/openai-format-fndef.js +77 -0
  45. package/dist/util/openapi.d.ts +2 -2
  46. package/dist/vectorstores/pinecone.cjs +5 -5
  47. package/dist/vectorstores/pinecone.d.ts +2 -2
  48. package/dist/vectorstores/pinecone.js +5 -5
  49. package/embeddings/llama_cpp.cjs +1 -0
  50. package/embeddings/llama_cpp.d.ts +1 -0
  51. package/embeddings/llama_cpp.js +1 -0
  52. package/package.json +13 -5
@@ -23,6 +23,7 @@ export declare class Bedrock extends LLM implements BaseBedrockInput {
23
23
  stopSequences?: string[];
24
24
  modelKwargs?: Record<string, unknown>;
25
25
  codec: EventStreamCodec;
26
+ streaming: boolean;
26
27
  get lc_secrets(): {
27
28
  [key: string]: string;
28
29
  } | undefined;
@@ -39,7 +40,7 @@ export declare class Bedrock extends LLM implements BaseBedrockInput {
39
40
  Example:
40
41
  response = model.call("Tell me a joke.")
41
42
  */
42
- _call(prompt: string, options: this["ParsedCallOptions"]): Promise<string>;
43
+ _call(prompt: string, options: this["ParsedCallOptions"], runManager?: CallbackManagerForLLMRun): Promise<string>;
43
44
  _signedFetch(prompt: string, options: this["ParsedCallOptions"], fields: {
44
45
  bedrockMethod: "invoke" | "invoke-with-response-stream";
45
46
  endpointHost: string;
@@ -89,6 +89,12 @@ export class Bedrock extends LLM {
89
89
  writable: true,
90
90
  value: new EventStreamCodec(toUtf8, fromUtf8)
91
91
  });
92
+ Object.defineProperty(this, "streaming", {
93
+ enumerable: true,
94
+ configurable: true,
95
+ writable: true,
96
+ value: false
97
+ });
92
98
  this.model = fields?.model ?? this.model;
93
99
  const allowedModels = ["ai21", "anthropic", "amazon"];
94
100
  if (!allowedModels.includes(this.model.split(".")[0])) {
@@ -106,6 +112,7 @@ export class Bedrock extends LLM {
106
112
  this.endpointHost = fields?.endpointHost ?? fields?.endpointUrl;
107
113
  this.stopSequences = fields?.stopSequences;
108
114
  this.modelKwargs = fields?.modelKwargs;
115
+ this.streaming = fields?.streaming ?? this.streaming;
109
116
  }
110
117
  /** Call out to Bedrock service model.
111
118
  Arguments:
@@ -117,10 +124,23 @@ export class Bedrock extends LLM {
117
124
  Example:
118
125
  response = model.call("Tell me a joke.")
119
126
  */
120
- async _call(prompt, options) {
127
+ async _call(prompt, options, runManager) {
121
128
  const service = "bedrock-runtime";
122
129
  const endpointHost = this.endpointHost ?? `${service}.${this.region}.amazonaws.com`;
123
130
  const provider = this.model.split(".")[0];
131
+ if (this.streaming) {
132
+ const stream = this._streamResponseChunks(prompt, options, runManager);
133
+ let finalResult;
134
+ for await (const chunk of stream) {
135
+ if (finalResult === undefined) {
136
+ finalResult = chunk;
137
+ }
138
+ else {
139
+ finalResult = finalResult.concat(chunk);
140
+ }
141
+ }
142
+ return finalResult?.text ?? "";
143
+ }
124
144
  const response = await this._signedFetch(prompt, options, {
125
145
  bedrockMethod: "invoke",
126
146
  endpointHost,
@@ -201,7 +221,8 @@ export class Bedrock extends LLM {
201
221
  text,
202
222
  generationInfo: {},
203
223
  });
204
- await runManager?.handleLLMNewToken(text);
224
+ // eslint-disable-next-line no-void
225
+ void runManager?.handleLLMNewToken(text);
205
226
  }
206
227
  }
207
228
  }
@@ -212,7 +233,8 @@ export class Bedrock extends LLM {
212
233
  text,
213
234
  generationInfo: {},
214
235
  });
215
- await runManager?.handleLLMNewToken(text);
236
+ // eslint-disable-next-line no-void
237
+ void runManager?.handleLLMNewToken(text);
216
238
  }
217
239
  }
218
240
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
package/dist/llms/hf.cjs CHANGED
@@ -57,6 +57,12 @@ class HuggingFaceInference extends base_js_1.LLM {
57
57
  writable: true,
58
58
  value: undefined
59
59
  });
60
+ Object.defineProperty(this, "endpointUrl", {
61
+ enumerable: true,
62
+ configurable: true,
63
+ writable: true,
64
+ value: undefined
65
+ });
60
66
  this.model = fields?.model ?? this.model;
61
67
  this.temperature = fields?.temperature ?? this.temperature;
62
68
  this.maxTokens = fields?.maxTokens ?? this.maxTokens;
@@ -65,6 +71,7 @@ class HuggingFaceInference extends base_js_1.LLM {
65
71
  this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty;
66
72
  this.apiKey =
67
73
  fields?.apiKey ?? (0, env_js_1.getEnvironmentVariable)("HUGGINGFACEHUB_API_KEY");
74
+ this.endpointUrl = fields?.endpointUrl;
68
75
  if (!this.apiKey) {
69
76
  throw new Error("Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.");
70
77
  }
@@ -75,7 +82,9 @@ class HuggingFaceInference extends base_js_1.LLM {
75
82
  /** @ignore */
76
83
  async _call(prompt, options) {
77
84
  const { HfInference } = await HuggingFaceInference.imports();
78
- const hf = new HfInference(this.apiKey);
85
+ const hf = this.endpointUrl
86
+ ? new HfInference(this.apiKey).endpoint(this.endpointUrl)
87
+ : new HfInference(this.apiKey);
79
88
  const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), {
80
89
  model: this.model,
81
90
  parameters: {
package/dist/llms/hf.d.ts CHANGED
@@ -6,6 +6,8 @@ import { LLM, BaseLLMParams } from "./base.js";
6
6
  export interface HFInput {
7
7
  /** Model to use */
8
8
  model: string;
9
+ /** Custom inference endpoint URL to use */
10
+ endpointUrl?: string;
9
11
  /** Sampling temperature to use */
10
12
  temperature?: number;
11
13
  /**
@@ -36,6 +38,7 @@ export declare class HuggingFaceInference extends LLM implements HFInput {
36
38
  topK: number | undefined;
37
39
  frequencyPenalty: number | undefined;
38
40
  apiKey: string | undefined;
41
+ endpointUrl: string | undefined;
39
42
  constructor(fields?: Partial<HFInput> & BaseLLMParams);
40
43
  _llmType(): string;
41
44
  /** @ignore */
package/dist/llms/hf.js CHANGED
@@ -54,6 +54,12 @@ export class HuggingFaceInference extends LLM {
54
54
  writable: true,
55
55
  value: undefined
56
56
  });
57
+ Object.defineProperty(this, "endpointUrl", {
58
+ enumerable: true,
59
+ configurable: true,
60
+ writable: true,
61
+ value: undefined
62
+ });
57
63
  this.model = fields?.model ?? this.model;
58
64
  this.temperature = fields?.temperature ?? this.temperature;
59
65
  this.maxTokens = fields?.maxTokens ?? this.maxTokens;
@@ -62,6 +68,7 @@ export class HuggingFaceInference extends LLM {
62
68
  this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty;
63
69
  this.apiKey =
64
70
  fields?.apiKey ?? getEnvironmentVariable("HUGGINGFACEHUB_API_KEY");
71
+ this.endpointUrl = fields?.endpointUrl;
65
72
  if (!this.apiKey) {
66
73
  throw new Error("Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.");
67
74
  }
@@ -72,7 +79,9 @@ export class HuggingFaceInference extends LLM {
72
79
  /** @ignore */
73
80
  async _call(prompt, options) {
74
81
  const { HfInference } = await HuggingFaceInference.imports();
75
- const hf = new HfInference(this.apiKey);
82
+ const hf = this.endpointUrl
83
+ ? new HfInference(this.apiKey).endpoint(this.endpointUrl)
84
+ : new HfInference(this.apiKey);
76
85
  const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), {
77
86
  model: this.model,
78
87
  parameters: {
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
3
  exports.LlamaCpp = void 0;
4
- const node_llama_cpp_1 = require("node-llama-cpp");
4
+ const llama_cpp_js_1 = require("../util/llama_cpp.cjs");
5
5
  const base_js_1 = require("./base.cjs");
6
6
  /**
7
7
  * To use this model you need to have the `node-llama-cpp` module installed.
@@ -15,73 +15,31 @@ class LlamaCpp extends base_js_1.LLM {
15
15
  }
16
16
  constructor(inputs) {
17
17
  super(inputs);
18
- Object.defineProperty(this, "batchSize", {
18
+ Object.defineProperty(this, "maxTokens", {
19
19
  enumerable: true,
20
20
  configurable: true,
21
21
  writable: true,
22
22
  value: void 0
23
23
  });
24
- Object.defineProperty(this, "contextSize", {
24
+ Object.defineProperty(this, "temperature", {
25
25
  enumerable: true,
26
26
  configurable: true,
27
27
  writable: true,
28
28
  value: void 0
29
29
  });
30
- Object.defineProperty(this, "embedding", {
30
+ Object.defineProperty(this, "topK", {
31
31
  enumerable: true,
32
32
  configurable: true,
33
33
  writable: true,
34
34
  value: void 0
35
35
  });
36
- Object.defineProperty(this, "f16Kv", {
36
+ Object.defineProperty(this, "topP", {
37
37
  enumerable: true,
38
38
  configurable: true,
39
39
  writable: true,
40
40
  value: void 0
41
41
  });
42
- Object.defineProperty(this, "gpuLayers", {
43
- enumerable: true,
44
- configurable: true,
45
- writable: true,
46
- value: void 0
47
- });
48
- Object.defineProperty(this, "logitsAll", {
49
- enumerable: true,
50
- configurable: true,
51
- writable: true,
52
- value: void 0
53
- });
54
- Object.defineProperty(this, "lowVram", {
55
- enumerable: true,
56
- configurable: true,
57
- writable: true,
58
- value: void 0
59
- });
60
- Object.defineProperty(this, "seed", {
61
- enumerable: true,
62
- configurable: true,
63
- writable: true,
64
- value: void 0
65
- });
66
- Object.defineProperty(this, "useMlock", {
67
- enumerable: true,
68
- configurable: true,
69
- writable: true,
70
- value: void 0
71
- });
72
- Object.defineProperty(this, "useMmap", {
73
- enumerable: true,
74
- configurable: true,
75
- writable: true,
76
- value: void 0
77
- });
78
- Object.defineProperty(this, "vocabOnly", {
79
- enumerable: true,
80
- configurable: true,
81
- writable: true,
82
- value: void 0
83
- });
84
- Object.defineProperty(this, "modelPath", {
42
+ Object.defineProperty(this, "trimWhitespaceSuffix", {
85
43
  enumerable: true,
86
44
  configurable: true,
87
45
  writable: true,
@@ -105,29 +63,31 @@ class LlamaCpp extends base_js_1.LLM {
105
63
  writable: true,
106
64
  value: void 0
107
65
  });
108
- this.batchSize = inputs.batchSize;
109
- this.contextSize = inputs.contextSize;
110
- this.embedding = inputs.embedding;
111
- this.f16Kv = inputs.f16Kv;
112
- this.gpuLayers = inputs.gpuLayers;
113
- this.logitsAll = inputs.logitsAll;
114
- this.lowVram = inputs.lowVram;
115
- this.modelPath = inputs.modelPath;
116
- this.seed = inputs.seed;
117
- this.useMlock = inputs.useMlock;
118
- this.useMmap = inputs.useMmap;
119
- this.vocabOnly = inputs.vocabOnly;
120
- this._model = new node_llama_cpp_1.LlamaModel(inputs);
121
- this._context = new node_llama_cpp_1.LlamaContext({ model: this._model });
122
- this._session = new node_llama_cpp_1.LlamaChatSession({ context: this._context });
66
+ this.maxTokens = inputs?.maxTokens;
67
+ this.temperature = inputs?.temperature;
68
+ this.topK = inputs?.topK;
69
+ this.topP = inputs?.topP;
70
+ this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
71
+ this._model = (0, llama_cpp_js_1.createLlamaModel)(inputs);
72
+ this._context = (0, llama_cpp_js_1.createLlamaContext)(this._model, inputs);
73
+ this._session = (0, llama_cpp_js_1.createLlamaSession)(this._context);
123
74
  }
124
75
  _llmType() {
125
76
  return "llama2_cpp";
126
77
  }
127
78
  /** @ignore */
128
- async _call(prompt, options) {
79
+ async _call(prompt,
80
+ // @ts-expect-error - TS6133: 'options' is declared but its value is never read.
81
+ options) {
129
82
  try {
130
- const completion = await this._session.prompt(prompt, options);
83
+ const promptOptions = {
84
+ maxTokens: this?.maxTokens,
85
+ temperature: this?.temperature,
86
+ topK: this?.topK,
87
+ topP: this?.topP,
88
+ trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
89
+ };
90
+ const completion = await this._session.prompt(prompt, promptOptions);
131
91
  return completion;
132
92
  }
133
93
  catch (e) {
@@ -1,40 +1,11 @@
1
1
  import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp";
2
+ import { LlamaBaseCppInputs } from "../util/llama_cpp.js";
2
3
  import { LLM, BaseLLMCallOptions, BaseLLMParams } from "./base.js";
3
4
  /**
4
5
  * Note that the modelPath is the only required parameter. For testing you
5
6
  * can set this in the environment variable `LLAMA_PATH`.
6
7
  */
7
- export interface LlamaCppInputs extends BaseLLMParams {
8
- /** Prompt processing batch size. */
9
- batchSize?: number;
10
- /** Text context size. */
11
- contextSize?: number;
12
- /** Embedding mode only. */
13
- embedding?: boolean;
14
- /** Use fp16 for KV cache. */
15
- f16Kv?: boolean;
16
- /** Number of layers to store in VRAM. */
17
- gpuLayers?: number;
18
- /** The llama_eval() call computes all logits, not just the last one. */
19
- logitsAll?: boolean;
20
- /** If true, reduce VRAM usage at the cost of performance. */
21
- lowVram?: boolean;
22
- /** Path to the model on the filesystem. */
23
- modelPath: string;
24
- /** If null, a random seed will be used. */
25
- seed?: null | number;
26
- /** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */
27
- temperature?: number;
28
- /** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */
29
- topK?: number;
30
- /** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */
31
- topP?: number;
32
- /** Force system to keep model in RAM. */
33
- useMlock?: boolean;
34
- /** Use mmap if possible. */
35
- useMmap?: boolean;
36
- /** Only load the vocabulary, no weights. */
37
- vocabOnly?: boolean;
8
+ export interface LlamaCppInputs extends LlamaBaseCppInputs, BaseLLMParams {
38
9
  }
39
10
  export interface LlamaCppCallOptions extends BaseLLMCallOptions {
40
11
  /** The maximum number of tokens the response should contain. */
@@ -51,18 +22,11 @@ export interface LlamaCppCallOptions extends BaseLLMCallOptions {
51
22
  export declare class LlamaCpp extends LLM<LlamaCppCallOptions> {
52
23
  CallOptions: LlamaCppCallOptions;
53
24
  static inputs: LlamaCppInputs;
54
- batchSize?: number;
55
- contextSize?: number;
56
- embedding?: boolean;
57
- f16Kv?: boolean;
58
- gpuLayers?: number;
59
- logitsAll?: boolean;
60
- lowVram?: boolean;
61
- seed?: null | number;
62
- useMlock?: boolean;
63
- useMmap?: boolean;
64
- vocabOnly?: boolean;
65
- modelPath: string;
25
+ maxTokens?: number;
26
+ temperature?: number;
27
+ topK?: number;
28
+ topP?: number;
29
+ trimWhitespaceSuffix?: boolean;
66
30
  _model: LlamaModel;
67
31
  _context: LlamaContext;
68
32
  _session: LlamaChatSession;
@@ -1,4 +1,4 @@
1
- import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp";
1
+ import { createLlamaModel, createLlamaContext, createLlamaSession, } from "../util/llama_cpp.js";
2
2
  import { LLM } from "./base.js";
3
3
  /**
4
4
  * To use this model you need to have the `node-llama-cpp` module installed.
@@ -12,73 +12,31 @@ export class LlamaCpp extends LLM {
12
12
  }
13
13
  constructor(inputs) {
14
14
  super(inputs);
15
- Object.defineProperty(this, "batchSize", {
15
+ Object.defineProperty(this, "maxTokens", {
16
16
  enumerable: true,
17
17
  configurable: true,
18
18
  writable: true,
19
19
  value: void 0
20
20
  });
21
- Object.defineProperty(this, "contextSize", {
21
+ Object.defineProperty(this, "temperature", {
22
22
  enumerable: true,
23
23
  configurable: true,
24
24
  writable: true,
25
25
  value: void 0
26
26
  });
27
- Object.defineProperty(this, "embedding", {
27
+ Object.defineProperty(this, "topK", {
28
28
  enumerable: true,
29
29
  configurable: true,
30
30
  writable: true,
31
31
  value: void 0
32
32
  });
33
- Object.defineProperty(this, "f16Kv", {
33
+ Object.defineProperty(this, "topP", {
34
34
  enumerable: true,
35
35
  configurable: true,
36
36
  writable: true,
37
37
  value: void 0
38
38
  });
39
- Object.defineProperty(this, "gpuLayers", {
40
- enumerable: true,
41
- configurable: true,
42
- writable: true,
43
- value: void 0
44
- });
45
- Object.defineProperty(this, "logitsAll", {
46
- enumerable: true,
47
- configurable: true,
48
- writable: true,
49
- value: void 0
50
- });
51
- Object.defineProperty(this, "lowVram", {
52
- enumerable: true,
53
- configurable: true,
54
- writable: true,
55
- value: void 0
56
- });
57
- Object.defineProperty(this, "seed", {
58
- enumerable: true,
59
- configurable: true,
60
- writable: true,
61
- value: void 0
62
- });
63
- Object.defineProperty(this, "useMlock", {
64
- enumerable: true,
65
- configurable: true,
66
- writable: true,
67
- value: void 0
68
- });
69
- Object.defineProperty(this, "useMmap", {
70
- enumerable: true,
71
- configurable: true,
72
- writable: true,
73
- value: void 0
74
- });
75
- Object.defineProperty(this, "vocabOnly", {
76
- enumerable: true,
77
- configurable: true,
78
- writable: true,
79
- value: void 0
80
- });
81
- Object.defineProperty(this, "modelPath", {
39
+ Object.defineProperty(this, "trimWhitespaceSuffix", {
82
40
  enumerable: true,
83
41
  configurable: true,
84
42
  writable: true,
@@ -102,29 +60,31 @@ export class LlamaCpp extends LLM {
102
60
  writable: true,
103
61
  value: void 0
104
62
  });
105
- this.batchSize = inputs.batchSize;
106
- this.contextSize = inputs.contextSize;
107
- this.embedding = inputs.embedding;
108
- this.f16Kv = inputs.f16Kv;
109
- this.gpuLayers = inputs.gpuLayers;
110
- this.logitsAll = inputs.logitsAll;
111
- this.lowVram = inputs.lowVram;
112
- this.modelPath = inputs.modelPath;
113
- this.seed = inputs.seed;
114
- this.useMlock = inputs.useMlock;
115
- this.useMmap = inputs.useMmap;
116
- this.vocabOnly = inputs.vocabOnly;
117
- this._model = new LlamaModel(inputs);
118
- this._context = new LlamaContext({ model: this._model });
119
- this._session = new LlamaChatSession({ context: this._context });
63
+ this.maxTokens = inputs?.maxTokens;
64
+ this.temperature = inputs?.temperature;
65
+ this.topK = inputs?.topK;
66
+ this.topP = inputs?.topP;
67
+ this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
68
+ this._model = createLlamaModel(inputs);
69
+ this._context = createLlamaContext(this._model, inputs);
70
+ this._session = createLlamaSession(this._context);
120
71
  }
121
72
  _llmType() {
122
73
  return "llama2_cpp";
123
74
  }
124
75
  /** @ignore */
125
- async _call(prompt, options) {
76
+ async _call(prompt,
77
+ // @ts-expect-error - TS6133: 'options' is declared but its value is never read.
78
+ options) {
126
79
  try {
127
- const completion = await this._session.prompt(prompt, options);
80
+ const promptOptions = {
81
+ maxTokens: this?.maxTokens,
82
+ temperature: this?.temperature,
83
+ topK: this?.topK,
84
+ topP: this?.topP,
85
+ trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
86
+ };
87
+ const completion = await this._session.prompt(prompt, promptOptions);
128
88
  return completion;
129
89
  }
130
90
  catch (e) {
@@ -25,6 +25,7 @@ exports.optionalImportEntrypoints = [
25
25
  "langchain/embeddings/hf_transformers",
26
26
  "langchain/embeddings/googlevertexai",
27
27
  "langchain/embeddings/googlepalm",
28
+ "langchain/embeddings/llama_cpp",
28
29
  "langchain/llms/load",
29
30
  "langchain/llms/cohere",
30
31
  "langchain/llms/hf",
@@ -22,6 +22,7 @@ export const optionalImportEntrypoints = [
22
22
  "langchain/embeddings/hf_transformers",
23
23
  "langchain/embeddings/googlevertexai",
24
24
  "langchain/embeddings/googlepalm",
25
+ "langchain/embeddings/llama_cpp",
25
26
  "langchain/llms/load",
26
27
  "langchain/llms/cohere",
27
28
  "langchain/llms/hf",