langchain 0.0.175 → 0.0.177
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chat_models/bedrock.cjs +25 -4
- package/dist/chat_models/bedrock.d.ts +2 -1
- package/dist/chat_models/bedrock.js +25 -4
- package/dist/chat_models/googlevertexai/common.cjs +46 -7
- package/dist/chat_models/googlevertexai/common.d.ts +7 -2
- package/dist/chat_models/googlevertexai/common.js +47 -8
- package/dist/chat_models/googlevertexai/index.cjs +4 -3
- package/dist/chat_models/googlevertexai/index.js +4 -3
- package/dist/chat_models/googlevertexai/web.cjs +2 -1
- package/dist/chat_models/googlevertexai/web.js +2 -1
- package/dist/chat_models/llama_cpp.cjs +31 -79
- package/dist/chat_models/llama_cpp.d.ts +15 -58
- package/dist/chat_models/llama_cpp.js +32 -80
- package/dist/chat_models/openai.cjs +91 -6
- package/dist/chat_models/openai.d.ts +10 -0
- package/dist/chat_models/openai.js +91 -6
- package/dist/embeddings/googlevertexai.cjs +1 -1
- package/dist/embeddings/googlevertexai.js +1 -1
- package/dist/embeddings/hf.cjs +10 -1
- package/dist/embeddings/hf.d.ts +4 -2
- package/dist/embeddings/hf.js +10 -1
- package/dist/embeddings/llama_cpp.cjs +67 -0
- package/dist/embeddings/llama_cpp.d.ts +26 -0
- package/dist/embeddings/llama_cpp.js +63 -0
- package/dist/embeddings/ollama.cjs +7 -1
- package/dist/embeddings/ollama.js +7 -1
- package/dist/experimental/hubs/makersuite/googlemakersuitehub.d.ts +2 -2
- package/dist/experimental/multimodal_embeddings/googlevertexai.cjs +1 -1
- package/dist/experimental/multimodal_embeddings/googlevertexai.d.ts +2 -1
- package/dist/experimental/multimodal_embeddings/googlevertexai.js +2 -2
- package/dist/experimental/plan_and_execute/agent_executor.cjs +7 -4
- package/dist/experimental/plan_and_execute/agent_executor.d.ts +4 -3
- package/dist/experimental/plan_and_execute/agent_executor.js +8 -5
- package/dist/experimental/plan_and_execute/prompt.cjs +25 -9
- package/dist/experimental/plan_and_execute/prompt.d.ts +9 -1
- package/dist/experimental/plan_and_execute/prompt.js +23 -8
- package/dist/llms/bedrock.cjs +25 -3
- package/dist/llms/bedrock.d.ts +2 -1
- package/dist/llms/bedrock.js +25 -3
- package/dist/llms/googlevertexai/common.cjs +46 -13
- package/dist/llms/googlevertexai/common.d.ts +8 -3
- package/dist/llms/googlevertexai/common.js +46 -13
- package/dist/llms/googlevertexai/index.cjs +4 -3
- package/dist/llms/googlevertexai/index.js +4 -3
- package/dist/llms/googlevertexai/web.cjs +2 -1
- package/dist/llms/googlevertexai/web.js +2 -1
- package/dist/llms/hf.cjs +10 -1
- package/dist/llms/hf.d.ts +3 -0
- package/dist/llms/hf.js +10 -1
- package/dist/llms/llama_cpp.cjs +25 -65
- package/dist/llms/llama_cpp.d.ts +7 -43
- package/dist/llms/llama_cpp.js +25 -65
- package/dist/load/import_constants.cjs +1 -0
- package/dist/load/import_constants.js +1 -0
- package/dist/prompts/few_shot.cjs +162 -1
- package/dist/prompts/few_shot.d.ts +90 -2
- package/dist/prompts/few_shot.js +160 -0
- package/dist/prompts/index.cjs +2 -1
- package/dist/prompts/index.d.ts +1 -1
- package/dist/prompts/index.js +1 -1
- package/dist/retrievers/zep.cjs +26 -3
- package/dist/retrievers/zep.d.ts +11 -2
- package/dist/retrievers/zep.js +26 -3
- package/dist/types/googlevertexai-types.d.ts +12 -10
- package/dist/util/bedrock.d.ts +2 -0
- package/dist/util/googlevertexai-connection.cjs +298 -10
- package/dist/util/googlevertexai-connection.d.ts +76 -7
- package/dist/util/googlevertexai-connection.js +294 -9
- package/dist/util/googlevertexai-gauth.cjs +36 -0
- package/dist/util/googlevertexai-gauth.d.ts +8 -0
- package/dist/util/googlevertexai-gauth.js +32 -0
- package/dist/util/googlevertexai-webauth.cjs +38 -2
- package/dist/util/googlevertexai-webauth.d.ts +2 -6
- package/dist/util/googlevertexai-webauth.js +38 -2
- package/dist/util/llama_cpp.cjs +34 -0
- package/dist/util/llama_cpp.d.ts +46 -0
- package/dist/util/llama_cpp.js +28 -0
- package/dist/util/openai-format-fndef.cjs +81 -0
- package/dist/util/openai-format-fndef.d.ts +44 -0
- package/dist/util/openai-format-fndef.js +77 -0
- package/dist/util/openapi.d.ts +2 -2
- package/dist/vectorstores/googlevertexai.d.ts +4 -4
- package/dist/vectorstores/pinecone.cjs +5 -5
- package/dist/vectorstores/pinecone.d.ts +2 -2
- package/dist/vectorstores/pinecone.js +5 -5
- package/embeddings/llama_cpp.cjs +1 -0
- package/embeddings/llama_cpp.d.ts +1 -0
- package/embeddings/llama_cpp.js +1 -0
- package/package.json +13 -5
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.BaseGoogleVertexAI = void 0;
|
|
4
4
|
const base_js_1 = require("../base.cjs");
|
|
5
|
+
const index_js_1 = require("../../schema/index.cjs");
|
|
5
6
|
/**
|
|
6
7
|
* Base class for Google Vertex AI LLMs.
|
|
7
8
|
* Implemented subclasses must provide a GoogleVertexAILLMConnection
|
|
@@ -57,6 +58,12 @@ class BaseGoogleVertexAI extends base_js_1.BaseLLM {
|
|
|
57
58
|
writable: true,
|
|
58
59
|
value: void 0
|
|
59
60
|
});
|
|
61
|
+
Object.defineProperty(this, "streamedConnection", {
|
|
62
|
+
enumerable: true,
|
|
63
|
+
configurable: true,
|
|
64
|
+
writable: true,
|
|
65
|
+
value: void 0
|
|
66
|
+
});
|
|
60
67
|
this.model = fields?.model ?? this.model;
|
|
61
68
|
// Change the defaults for code models
|
|
62
69
|
if (this.model.startsWith("code-gecko")) {
|
|
@@ -73,26 +80,37 @@ class BaseGoogleVertexAI extends base_js_1.BaseLLM {
|
|
|
73
80
|
_llmType() {
|
|
74
81
|
return "vertexai";
|
|
75
82
|
}
|
|
83
|
+
async *_streamResponseChunks(_input, _options, _runManager) {
|
|
84
|
+
// Make the call as a streaming request
|
|
85
|
+
const instance = this.formatInstance(_input);
|
|
86
|
+
const parameters = this.formatParameters();
|
|
87
|
+
const result = await this.streamedConnection.request([instance], parameters, _options);
|
|
88
|
+
// Get the streaming parser of the response
|
|
89
|
+
const stream = result.data;
|
|
90
|
+
// Loop until the end of the stream
|
|
91
|
+
// During the loop, yield each time we get a chunk from the streaming parser
|
|
92
|
+
// that is either available or added to the queue
|
|
93
|
+
while (!stream.streamDone) {
|
|
94
|
+
const output = await stream.nextChunk();
|
|
95
|
+
const chunk = output !== null
|
|
96
|
+
? new index_js_1.GenerationChunk(this.extractGenerationFromPrediction(output.outputs[0]))
|
|
97
|
+
: new index_js_1.GenerationChunk({
|
|
98
|
+
text: "",
|
|
99
|
+
generationInfo: { finishReason: "stop" },
|
|
100
|
+
});
|
|
101
|
+
yield chunk;
|
|
102
|
+
}
|
|
103
|
+
}
|
|
76
104
|
async _generate(prompts, options) {
|
|
77
105
|
const generations = await Promise.all(prompts.map((prompt) => this._generatePrompt(prompt, options)));
|
|
78
106
|
return { generations };
|
|
79
107
|
}
|
|
80
108
|
async _generatePrompt(prompt, options) {
|
|
81
109
|
const instance = this.formatInstance(prompt);
|
|
82
|
-
const parameters =
|
|
83
|
-
temperature: this.temperature,
|
|
84
|
-
topK: this.topK,
|
|
85
|
-
topP: this.topP,
|
|
86
|
-
maxOutputTokens: this.maxOutputTokens,
|
|
87
|
-
};
|
|
110
|
+
const parameters = this.formatParameters();
|
|
88
111
|
const result = await this.connection.request([instance], parameters, options);
|
|
89
112
|
const prediction = this.extractPredictionFromResponse(result);
|
|
90
|
-
return [
|
|
91
|
-
{
|
|
92
|
-
text: prediction.content,
|
|
93
|
-
generationInfo: prediction,
|
|
94
|
-
},
|
|
95
|
-
];
|
|
113
|
+
return [this.extractGenerationFromPrediction(prediction)];
|
|
96
114
|
}
|
|
97
115
|
/**
|
|
98
116
|
* Formats the input instance as a text instance for the Google Vertex AI
|
|
@@ -123,13 +141,28 @@ class BaseGoogleVertexAI extends base_js_1.BaseLLM {
|
|
|
123
141
|
? this.formatInstanceCode(prompt)
|
|
124
142
|
: this.formatInstanceText(prompt);
|
|
125
143
|
}
|
|
144
|
+
formatParameters() {
|
|
145
|
+
return {
|
|
146
|
+
temperature: this.temperature,
|
|
147
|
+
topK: this.topK,
|
|
148
|
+
topP: this.topP,
|
|
149
|
+
maxOutputTokens: this.maxOutputTokens,
|
|
150
|
+
};
|
|
151
|
+
}
|
|
126
152
|
/**
|
|
127
153
|
* Extracts the prediction from the API response.
|
|
128
154
|
* @param result The API response from which to extract the prediction.
|
|
129
155
|
* @returns A TextPrediction object representing the extracted prediction.
|
|
130
156
|
*/
|
|
131
157
|
extractPredictionFromResponse(result) {
|
|
132
|
-
return result?.data
|
|
158
|
+
return result?.data
|
|
159
|
+
?.predictions[0];
|
|
160
|
+
}
|
|
161
|
+
extractGenerationFromPrediction(prediction) {
|
|
162
|
+
return {
|
|
163
|
+
text: prediction.content,
|
|
164
|
+
generationInfo: prediction,
|
|
165
|
+
};
|
|
133
166
|
}
|
|
134
167
|
}
|
|
135
168
|
exports.BaseGoogleVertexAI = BaseGoogleVertexAI;
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { BaseLLM } from "../base.js";
|
|
2
|
-
import { Generation, LLMResult } from "../../schema/index.js";
|
|
3
|
-
import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js";
|
|
4
|
-
import { GoogleVertexAIBaseLLMInput, GoogleVertexAIBasePrediction,
|
|
2
|
+
import { Generation, GenerationChunk, LLMResult } from "../../schema/index.js";
|
|
3
|
+
import { GoogleVertexAILLMConnection, GoogleVertexAILLMResponse } from "../../util/googlevertexai-connection.js";
|
|
4
|
+
import { GoogleVertexAIBaseLLMInput, GoogleVertexAIBasePrediction, GoogleVertexAIModelParams } from "../../types/googlevertexai-types.js";
|
|
5
5
|
import { BaseLanguageModelCallOptions } from "../../base_language/index.js";
|
|
6
|
+
import { CallbackManagerForLLMRun } from "../../callbacks/index.js";
|
|
6
7
|
/**
|
|
7
8
|
* Interface representing the instance of text input to the Google Vertex
|
|
8
9
|
* AI model.
|
|
@@ -41,9 +42,11 @@ export declare class BaseGoogleVertexAI<AuthOptions> extends BaseLLM implements
|
|
|
41
42
|
topP: number;
|
|
42
43
|
topK: number;
|
|
43
44
|
protected connection: GoogleVertexAILLMConnection<BaseLanguageModelCallOptions, GoogleVertexAILLMInstance, TextPrediction, AuthOptions>;
|
|
45
|
+
protected streamedConnection: GoogleVertexAILLMConnection<BaseLanguageModelCallOptions, GoogleVertexAILLMInstance, TextPrediction, AuthOptions>;
|
|
44
46
|
get lc_aliases(): Record<string, string>;
|
|
45
47
|
constructor(fields?: GoogleVertexAIBaseLLMInput<AuthOptions>);
|
|
46
48
|
_llmType(): string;
|
|
49
|
+
_streamResponseChunks(_input: string, _options: this["ParsedCallOptions"], _runManager?: CallbackManagerForLLMRun): AsyncGenerator<GenerationChunk>;
|
|
47
50
|
_generate(prompts: string[], options: this["ParsedCallOptions"]): Promise<LLMResult>;
|
|
48
51
|
_generatePrompt(prompt: string, options: this["ParsedCallOptions"]): Promise<Generation[]>;
|
|
49
52
|
/**
|
|
@@ -67,11 +70,13 @@ export declare class BaseGoogleVertexAI<AuthOptions> extends BaseLLM implements
|
|
|
67
70
|
* @returns A GoogleVertexAILLMInstance object representing the formatted instance.
|
|
68
71
|
*/
|
|
69
72
|
formatInstance(prompt: string): GoogleVertexAILLMInstance;
|
|
73
|
+
formatParameters(): GoogleVertexAIModelParams;
|
|
70
74
|
/**
|
|
71
75
|
* Extracts the prediction from the API response.
|
|
72
76
|
* @param result The API response from which to extract the prediction.
|
|
73
77
|
* @returns A TextPrediction object representing the extracted prediction.
|
|
74
78
|
*/
|
|
75
79
|
extractPredictionFromResponse(result: GoogleVertexAILLMResponse<TextPrediction>): TextPrediction;
|
|
80
|
+
extractGenerationFromPrediction(prediction: TextPrediction): Generation;
|
|
76
81
|
}
|
|
77
82
|
export {};
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import { BaseLLM } from "../base.js";
|
|
2
|
+
import { GenerationChunk } from "../../schema/index.js";
|
|
2
3
|
/**
|
|
3
4
|
* Base class for Google Vertex AI LLMs.
|
|
4
5
|
* Implemented subclasses must provide a GoogleVertexAILLMConnection
|
|
@@ -54,6 +55,12 @@ export class BaseGoogleVertexAI extends BaseLLM {
|
|
|
54
55
|
writable: true,
|
|
55
56
|
value: void 0
|
|
56
57
|
});
|
|
58
|
+
Object.defineProperty(this, "streamedConnection", {
|
|
59
|
+
enumerable: true,
|
|
60
|
+
configurable: true,
|
|
61
|
+
writable: true,
|
|
62
|
+
value: void 0
|
|
63
|
+
});
|
|
57
64
|
this.model = fields?.model ?? this.model;
|
|
58
65
|
// Change the defaults for code models
|
|
59
66
|
if (this.model.startsWith("code-gecko")) {
|
|
@@ -70,26 +77,37 @@ export class BaseGoogleVertexAI extends BaseLLM {
|
|
|
70
77
|
_llmType() {
|
|
71
78
|
return "vertexai";
|
|
72
79
|
}
|
|
80
|
+
async *_streamResponseChunks(_input, _options, _runManager) {
|
|
81
|
+
// Make the call as a streaming request
|
|
82
|
+
const instance = this.formatInstance(_input);
|
|
83
|
+
const parameters = this.formatParameters();
|
|
84
|
+
const result = await this.streamedConnection.request([instance], parameters, _options);
|
|
85
|
+
// Get the streaming parser of the response
|
|
86
|
+
const stream = result.data;
|
|
87
|
+
// Loop until the end of the stream
|
|
88
|
+
// During the loop, yield each time we get a chunk from the streaming parser
|
|
89
|
+
// that is either available or added to the queue
|
|
90
|
+
while (!stream.streamDone) {
|
|
91
|
+
const output = await stream.nextChunk();
|
|
92
|
+
const chunk = output !== null
|
|
93
|
+
? new GenerationChunk(this.extractGenerationFromPrediction(output.outputs[0]))
|
|
94
|
+
: new GenerationChunk({
|
|
95
|
+
text: "",
|
|
96
|
+
generationInfo: { finishReason: "stop" },
|
|
97
|
+
});
|
|
98
|
+
yield chunk;
|
|
99
|
+
}
|
|
100
|
+
}
|
|
73
101
|
async _generate(prompts, options) {
|
|
74
102
|
const generations = await Promise.all(prompts.map((prompt) => this._generatePrompt(prompt, options)));
|
|
75
103
|
return { generations };
|
|
76
104
|
}
|
|
77
105
|
async _generatePrompt(prompt, options) {
|
|
78
106
|
const instance = this.formatInstance(prompt);
|
|
79
|
-
const parameters =
|
|
80
|
-
temperature: this.temperature,
|
|
81
|
-
topK: this.topK,
|
|
82
|
-
topP: this.topP,
|
|
83
|
-
maxOutputTokens: this.maxOutputTokens,
|
|
84
|
-
};
|
|
107
|
+
const parameters = this.formatParameters();
|
|
85
108
|
const result = await this.connection.request([instance], parameters, options);
|
|
86
109
|
const prediction = this.extractPredictionFromResponse(result);
|
|
87
|
-
return [
|
|
88
|
-
{
|
|
89
|
-
text: prediction.content,
|
|
90
|
-
generationInfo: prediction,
|
|
91
|
-
},
|
|
92
|
-
];
|
|
110
|
+
return [this.extractGenerationFromPrediction(prediction)];
|
|
93
111
|
}
|
|
94
112
|
/**
|
|
95
113
|
* Formats the input instance as a text instance for the Google Vertex AI
|
|
@@ -120,12 +138,27 @@ export class BaseGoogleVertexAI extends BaseLLM {
|
|
|
120
138
|
? this.formatInstanceCode(prompt)
|
|
121
139
|
: this.formatInstanceText(prompt);
|
|
122
140
|
}
|
|
141
|
+
formatParameters() {
|
|
142
|
+
return {
|
|
143
|
+
temperature: this.temperature,
|
|
144
|
+
topK: this.topK,
|
|
145
|
+
topP: this.topP,
|
|
146
|
+
maxOutputTokens: this.maxOutputTokens,
|
|
147
|
+
};
|
|
148
|
+
}
|
|
123
149
|
/**
|
|
124
150
|
* Extracts the prediction from the API response.
|
|
125
151
|
* @param result The API response from which to extract the prediction.
|
|
126
152
|
* @returns A TextPrediction object representing the extracted prediction.
|
|
127
153
|
*/
|
|
128
154
|
extractPredictionFromResponse(result) {
|
|
129
|
-
return result?.data
|
|
155
|
+
return result?.data
|
|
156
|
+
?.predictions[0];
|
|
157
|
+
}
|
|
158
|
+
extractGenerationFromPrediction(prediction) {
|
|
159
|
+
return {
|
|
160
|
+
text: prediction.content,
|
|
161
|
+
generationInfo: prediction,
|
|
162
|
+
};
|
|
130
163
|
}
|
|
131
164
|
}
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.GoogleVertexAI = void 0;
|
|
4
|
-
const google_auth_library_1 = require("google-auth-library");
|
|
5
4
|
const googlevertexai_connection_js_1 = require("../../util/googlevertexai-connection.cjs");
|
|
6
5
|
const common_js_1 = require("./common.cjs");
|
|
6
|
+
const googlevertexai_gauth_js_1 = require("../../util/googlevertexai-gauth.cjs");
|
|
7
7
|
/**
|
|
8
8
|
* Enables calls to the Google Cloud's Vertex AI API to access
|
|
9
9
|
* Large Language Models.
|
|
@@ -24,11 +24,12 @@ class GoogleVertexAI extends common_js_1.BaseGoogleVertexAI {
|
|
|
24
24
|
}
|
|
25
25
|
constructor(fields) {
|
|
26
26
|
super(fields);
|
|
27
|
-
const client = new
|
|
27
|
+
const client = new googlevertexai_gauth_js_1.GAuthClient({
|
|
28
28
|
scopes: "https://www.googleapis.com/auth/cloud-platform",
|
|
29
29
|
...fields?.authOptions,
|
|
30
30
|
});
|
|
31
|
-
this.connection = new googlevertexai_connection_js_1.GoogleVertexAILLMConnection({ ...fields, ...this }, this.caller, client);
|
|
31
|
+
this.connection = new googlevertexai_connection_js_1.GoogleVertexAILLMConnection({ ...fields, ...this }, this.caller, client, false);
|
|
32
|
+
this.streamedConnection = new googlevertexai_connection_js_1.GoogleVertexAILLMConnection({ ...fields, ...this }, this.caller, client, true);
|
|
32
33
|
}
|
|
33
34
|
}
|
|
34
35
|
exports.GoogleVertexAI = GoogleVertexAI;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
import { GoogleAuth } from "google-auth-library";
|
|
2
1
|
import { GoogleVertexAILLMConnection } from "../../util/googlevertexai-connection.js";
|
|
3
2
|
import { BaseGoogleVertexAI } from "./common.js";
|
|
3
|
+
import { GAuthClient } from "../../util/googlevertexai-gauth.js";
|
|
4
4
|
/**
|
|
5
5
|
* Enables calls to the Google Cloud's Vertex AI API to access
|
|
6
6
|
* Large Language Models.
|
|
@@ -21,10 +21,11 @@ export class GoogleVertexAI extends BaseGoogleVertexAI {
|
|
|
21
21
|
}
|
|
22
22
|
constructor(fields) {
|
|
23
23
|
super(fields);
|
|
24
|
-
const client = new
|
|
24
|
+
const client = new GAuthClient({
|
|
25
25
|
scopes: "https://www.googleapis.com/auth/cloud-platform",
|
|
26
26
|
...fields?.authOptions,
|
|
27
27
|
});
|
|
28
|
-
this.connection = new GoogleVertexAILLMConnection({ ...fields, ...this }, this.caller, client);
|
|
28
|
+
this.connection = new GoogleVertexAILLMConnection({ ...fields, ...this }, this.caller, client, false);
|
|
29
|
+
this.streamedConnection = new GoogleVertexAILLMConnection({ ...fields, ...this }, this.caller, client, true);
|
|
29
30
|
}
|
|
30
31
|
}
|
|
@@ -25,7 +25,8 @@ class GoogleVertexAI extends common_js_1.BaseGoogleVertexAI {
|
|
|
25
25
|
constructor(fields) {
|
|
26
26
|
super(fields);
|
|
27
27
|
const client = new googlevertexai_webauth_js_1.WebGoogleAuth(fields?.authOptions);
|
|
28
|
-
this.connection = new googlevertexai_connection_js_1.GoogleVertexAILLMConnection({ ...fields, ...this }, this.caller, client);
|
|
28
|
+
this.connection = new googlevertexai_connection_js_1.GoogleVertexAILLMConnection({ ...fields, ...this }, this.caller, client, false);
|
|
29
|
+
this.streamedConnection = new googlevertexai_connection_js_1.GoogleVertexAILLMConnection({ ...fields, ...this }, this.caller, client, true);
|
|
29
30
|
}
|
|
30
31
|
}
|
|
31
32
|
exports.GoogleVertexAI = GoogleVertexAI;
|
|
@@ -22,6 +22,7 @@ export class GoogleVertexAI extends BaseGoogleVertexAI {
|
|
|
22
22
|
constructor(fields) {
|
|
23
23
|
super(fields);
|
|
24
24
|
const client = new WebGoogleAuth(fields?.authOptions);
|
|
25
|
-
this.connection = new GoogleVertexAILLMConnection({ ...fields, ...this }, this.caller, client);
|
|
25
|
+
this.connection = new GoogleVertexAILLMConnection({ ...fields, ...this }, this.caller, client, false);
|
|
26
|
+
this.streamedConnection = new GoogleVertexAILLMConnection({ ...fields, ...this }, this.caller, client, true);
|
|
26
27
|
}
|
|
27
28
|
}
|
package/dist/llms/hf.cjs
CHANGED
|
@@ -57,6 +57,12 @@ class HuggingFaceInference extends base_js_1.LLM {
|
|
|
57
57
|
writable: true,
|
|
58
58
|
value: undefined
|
|
59
59
|
});
|
|
60
|
+
Object.defineProperty(this, "endpointUrl", {
|
|
61
|
+
enumerable: true,
|
|
62
|
+
configurable: true,
|
|
63
|
+
writable: true,
|
|
64
|
+
value: undefined
|
|
65
|
+
});
|
|
60
66
|
this.model = fields?.model ?? this.model;
|
|
61
67
|
this.temperature = fields?.temperature ?? this.temperature;
|
|
62
68
|
this.maxTokens = fields?.maxTokens ?? this.maxTokens;
|
|
@@ -65,6 +71,7 @@ class HuggingFaceInference extends base_js_1.LLM {
|
|
|
65
71
|
this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty;
|
|
66
72
|
this.apiKey =
|
|
67
73
|
fields?.apiKey ?? (0, env_js_1.getEnvironmentVariable)("HUGGINGFACEHUB_API_KEY");
|
|
74
|
+
this.endpointUrl = fields?.endpointUrl;
|
|
68
75
|
if (!this.apiKey) {
|
|
69
76
|
throw new Error("Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.");
|
|
70
77
|
}
|
|
@@ -75,7 +82,9 @@ class HuggingFaceInference extends base_js_1.LLM {
|
|
|
75
82
|
/** @ignore */
|
|
76
83
|
async _call(prompt, options) {
|
|
77
84
|
const { HfInference } = await HuggingFaceInference.imports();
|
|
78
|
-
const hf =
|
|
85
|
+
const hf = this.endpointUrl
|
|
86
|
+
? new HfInference(this.apiKey).endpoint(this.endpointUrl)
|
|
87
|
+
: new HfInference(this.apiKey);
|
|
79
88
|
const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), {
|
|
80
89
|
model: this.model,
|
|
81
90
|
parameters: {
|
package/dist/llms/hf.d.ts
CHANGED
|
@@ -6,6 +6,8 @@ import { LLM, BaseLLMParams } from "./base.js";
|
|
|
6
6
|
export interface HFInput {
|
|
7
7
|
/** Model to use */
|
|
8
8
|
model: string;
|
|
9
|
+
/** Custom inference endpoint URL to use */
|
|
10
|
+
endpointUrl?: string;
|
|
9
11
|
/** Sampling temperature to use */
|
|
10
12
|
temperature?: number;
|
|
11
13
|
/**
|
|
@@ -36,6 +38,7 @@ export declare class HuggingFaceInference extends LLM implements HFInput {
|
|
|
36
38
|
topK: number | undefined;
|
|
37
39
|
frequencyPenalty: number | undefined;
|
|
38
40
|
apiKey: string | undefined;
|
|
41
|
+
endpointUrl: string | undefined;
|
|
39
42
|
constructor(fields?: Partial<HFInput> & BaseLLMParams);
|
|
40
43
|
_llmType(): string;
|
|
41
44
|
/** @ignore */
|
package/dist/llms/hf.js
CHANGED
|
@@ -54,6 +54,12 @@ export class HuggingFaceInference extends LLM {
|
|
|
54
54
|
writable: true,
|
|
55
55
|
value: undefined
|
|
56
56
|
});
|
|
57
|
+
Object.defineProperty(this, "endpointUrl", {
|
|
58
|
+
enumerable: true,
|
|
59
|
+
configurable: true,
|
|
60
|
+
writable: true,
|
|
61
|
+
value: undefined
|
|
62
|
+
});
|
|
57
63
|
this.model = fields?.model ?? this.model;
|
|
58
64
|
this.temperature = fields?.temperature ?? this.temperature;
|
|
59
65
|
this.maxTokens = fields?.maxTokens ?? this.maxTokens;
|
|
@@ -62,6 +68,7 @@ export class HuggingFaceInference extends LLM {
|
|
|
62
68
|
this.frequencyPenalty = fields?.frequencyPenalty ?? this.frequencyPenalty;
|
|
63
69
|
this.apiKey =
|
|
64
70
|
fields?.apiKey ?? getEnvironmentVariable("HUGGINGFACEHUB_API_KEY");
|
|
71
|
+
this.endpointUrl = fields?.endpointUrl;
|
|
65
72
|
if (!this.apiKey) {
|
|
66
73
|
throw new Error("Please set an API key for HuggingFace Hub in the environment variable HUGGINGFACEHUB_API_KEY or in the apiKey field of the HuggingFaceInference constructor.");
|
|
67
74
|
}
|
|
@@ -72,7 +79,9 @@ export class HuggingFaceInference extends LLM {
|
|
|
72
79
|
/** @ignore */
|
|
73
80
|
async _call(prompt, options) {
|
|
74
81
|
const { HfInference } = await HuggingFaceInference.imports();
|
|
75
|
-
const hf =
|
|
82
|
+
const hf = this.endpointUrl
|
|
83
|
+
? new HfInference(this.apiKey).endpoint(this.endpointUrl)
|
|
84
|
+
: new HfInference(this.apiKey);
|
|
76
85
|
const res = await this.caller.callWithOptions({ signal: options.signal }, hf.textGeneration.bind(hf), {
|
|
77
86
|
model: this.model,
|
|
78
87
|
parameters: {
|
package/dist/llms/llama_cpp.cjs
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.LlamaCpp = void 0;
|
|
4
|
-
const
|
|
4
|
+
const llama_cpp_js_1 = require("../util/llama_cpp.cjs");
|
|
5
5
|
const base_js_1 = require("./base.cjs");
|
|
6
6
|
/**
|
|
7
7
|
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
@@ -15,73 +15,31 @@ class LlamaCpp extends base_js_1.LLM {
|
|
|
15
15
|
}
|
|
16
16
|
constructor(inputs) {
|
|
17
17
|
super(inputs);
|
|
18
|
-
Object.defineProperty(this, "
|
|
18
|
+
Object.defineProperty(this, "maxTokens", {
|
|
19
19
|
enumerable: true,
|
|
20
20
|
configurable: true,
|
|
21
21
|
writable: true,
|
|
22
22
|
value: void 0
|
|
23
23
|
});
|
|
24
|
-
Object.defineProperty(this, "
|
|
24
|
+
Object.defineProperty(this, "temperature", {
|
|
25
25
|
enumerable: true,
|
|
26
26
|
configurable: true,
|
|
27
27
|
writable: true,
|
|
28
28
|
value: void 0
|
|
29
29
|
});
|
|
30
|
-
Object.defineProperty(this, "
|
|
30
|
+
Object.defineProperty(this, "topK", {
|
|
31
31
|
enumerable: true,
|
|
32
32
|
configurable: true,
|
|
33
33
|
writable: true,
|
|
34
34
|
value: void 0
|
|
35
35
|
});
|
|
36
|
-
Object.defineProperty(this, "
|
|
36
|
+
Object.defineProperty(this, "topP", {
|
|
37
37
|
enumerable: true,
|
|
38
38
|
configurable: true,
|
|
39
39
|
writable: true,
|
|
40
40
|
value: void 0
|
|
41
41
|
});
|
|
42
|
-
Object.defineProperty(this, "
|
|
43
|
-
enumerable: true,
|
|
44
|
-
configurable: true,
|
|
45
|
-
writable: true,
|
|
46
|
-
value: void 0
|
|
47
|
-
});
|
|
48
|
-
Object.defineProperty(this, "logitsAll", {
|
|
49
|
-
enumerable: true,
|
|
50
|
-
configurable: true,
|
|
51
|
-
writable: true,
|
|
52
|
-
value: void 0
|
|
53
|
-
});
|
|
54
|
-
Object.defineProperty(this, "lowVram", {
|
|
55
|
-
enumerable: true,
|
|
56
|
-
configurable: true,
|
|
57
|
-
writable: true,
|
|
58
|
-
value: void 0
|
|
59
|
-
});
|
|
60
|
-
Object.defineProperty(this, "seed", {
|
|
61
|
-
enumerable: true,
|
|
62
|
-
configurable: true,
|
|
63
|
-
writable: true,
|
|
64
|
-
value: void 0
|
|
65
|
-
});
|
|
66
|
-
Object.defineProperty(this, "useMlock", {
|
|
67
|
-
enumerable: true,
|
|
68
|
-
configurable: true,
|
|
69
|
-
writable: true,
|
|
70
|
-
value: void 0
|
|
71
|
-
});
|
|
72
|
-
Object.defineProperty(this, "useMmap", {
|
|
73
|
-
enumerable: true,
|
|
74
|
-
configurable: true,
|
|
75
|
-
writable: true,
|
|
76
|
-
value: void 0
|
|
77
|
-
});
|
|
78
|
-
Object.defineProperty(this, "vocabOnly", {
|
|
79
|
-
enumerable: true,
|
|
80
|
-
configurable: true,
|
|
81
|
-
writable: true,
|
|
82
|
-
value: void 0
|
|
83
|
-
});
|
|
84
|
-
Object.defineProperty(this, "modelPath", {
|
|
42
|
+
Object.defineProperty(this, "trimWhitespaceSuffix", {
|
|
85
43
|
enumerable: true,
|
|
86
44
|
configurable: true,
|
|
87
45
|
writable: true,
|
|
@@ -105,29 +63,31 @@ class LlamaCpp extends base_js_1.LLM {
|
|
|
105
63
|
writable: true,
|
|
106
64
|
value: void 0
|
|
107
65
|
});
|
|
108
|
-
this.
|
|
109
|
-
this.
|
|
110
|
-
this.
|
|
111
|
-
this.
|
|
112
|
-
this.
|
|
113
|
-
this.
|
|
114
|
-
this.
|
|
115
|
-
this.
|
|
116
|
-
this.seed = inputs.seed;
|
|
117
|
-
this.useMlock = inputs.useMlock;
|
|
118
|
-
this.useMmap = inputs.useMmap;
|
|
119
|
-
this.vocabOnly = inputs.vocabOnly;
|
|
120
|
-
this._model = new node_llama_cpp_1.LlamaModel(inputs);
|
|
121
|
-
this._context = new node_llama_cpp_1.LlamaContext({ model: this._model });
|
|
122
|
-
this._session = new node_llama_cpp_1.LlamaChatSession({ context: this._context });
|
|
66
|
+
this.maxTokens = inputs?.maxTokens;
|
|
67
|
+
this.temperature = inputs?.temperature;
|
|
68
|
+
this.topK = inputs?.topK;
|
|
69
|
+
this.topP = inputs?.topP;
|
|
70
|
+
this.trimWhitespaceSuffix = inputs?.trimWhitespaceSuffix;
|
|
71
|
+
this._model = (0, llama_cpp_js_1.createLlamaModel)(inputs);
|
|
72
|
+
this._context = (0, llama_cpp_js_1.createLlamaContext)(this._model, inputs);
|
|
73
|
+
this._session = (0, llama_cpp_js_1.createLlamaSession)(this._context);
|
|
123
74
|
}
|
|
124
75
|
_llmType() {
|
|
125
76
|
return "llama2_cpp";
|
|
126
77
|
}
|
|
127
78
|
/** @ignore */
|
|
128
|
-
async _call(prompt,
|
|
79
|
+
async _call(prompt,
|
|
80
|
+
// @ts-expect-error - TS6133: 'options' is declared but its value is never read.
|
|
81
|
+
options) {
|
|
129
82
|
try {
|
|
130
|
-
const
|
|
83
|
+
const promptOptions = {
|
|
84
|
+
maxTokens: this?.maxTokens,
|
|
85
|
+
temperature: this?.temperature,
|
|
86
|
+
topK: this?.topK,
|
|
87
|
+
topP: this?.topP,
|
|
88
|
+
trimWhitespaceSuffix: this?.trimWhitespaceSuffix,
|
|
89
|
+
};
|
|
90
|
+
const completion = await this._session.prompt(prompt, promptOptions);
|
|
131
91
|
return completion;
|
|
132
92
|
}
|
|
133
93
|
catch (e) {
|
package/dist/llms/llama_cpp.d.ts
CHANGED
|
@@ -1,40 +1,11 @@
|
|
|
1
1
|
import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp";
|
|
2
|
+
import { LlamaBaseCppInputs } from "../util/llama_cpp.js";
|
|
2
3
|
import { LLM, BaseLLMCallOptions, BaseLLMParams } from "./base.js";
|
|
3
4
|
/**
|
|
4
5
|
* Note that the modelPath is the only required parameter. For testing you
|
|
5
6
|
* can set this in the environment variable `LLAMA_PATH`.
|
|
6
7
|
*/
|
|
7
|
-
export interface LlamaCppInputs extends BaseLLMParams {
|
|
8
|
-
/** Prompt processing batch size. */
|
|
9
|
-
batchSize?: number;
|
|
10
|
-
/** Text context size. */
|
|
11
|
-
contextSize?: number;
|
|
12
|
-
/** Embedding mode only. */
|
|
13
|
-
embedding?: boolean;
|
|
14
|
-
/** Use fp16 for KV cache. */
|
|
15
|
-
f16Kv?: boolean;
|
|
16
|
-
/** Number of layers to store in VRAM. */
|
|
17
|
-
gpuLayers?: number;
|
|
18
|
-
/** The llama_eval() call computes all logits, not just the last one. */
|
|
19
|
-
logitsAll?: boolean;
|
|
20
|
-
/** If true, reduce VRAM usage at the cost of performance. */
|
|
21
|
-
lowVram?: boolean;
|
|
22
|
-
/** Path to the model on the filesystem. */
|
|
23
|
-
modelPath: string;
|
|
24
|
-
/** If null, a random seed will be used. */
|
|
25
|
-
seed?: null | number;
|
|
26
|
-
/** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */
|
|
27
|
-
temperature?: number;
|
|
28
|
-
/** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */
|
|
29
|
-
topK?: number;
|
|
30
|
-
/** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */
|
|
31
|
-
topP?: number;
|
|
32
|
-
/** Force system to keep model in RAM. */
|
|
33
|
-
useMlock?: boolean;
|
|
34
|
-
/** Use mmap if possible. */
|
|
35
|
-
useMmap?: boolean;
|
|
36
|
-
/** Only load the vocabulary, no weights. */
|
|
37
|
-
vocabOnly?: boolean;
|
|
8
|
+
export interface LlamaCppInputs extends LlamaBaseCppInputs, BaseLLMParams {
|
|
38
9
|
}
|
|
39
10
|
export interface LlamaCppCallOptions extends BaseLLMCallOptions {
|
|
40
11
|
/** The maximum number of tokens the response should contain. */
|
|
@@ -51,18 +22,11 @@ export interface LlamaCppCallOptions extends BaseLLMCallOptions {
|
|
|
51
22
|
export declare class LlamaCpp extends LLM<LlamaCppCallOptions> {
|
|
52
23
|
CallOptions: LlamaCppCallOptions;
|
|
53
24
|
static inputs: LlamaCppInputs;
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
logitsAll?: boolean;
|
|
60
|
-
lowVram?: boolean;
|
|
61
|
-
seed?: null | number;
|
|
62
|
-
useMlock?: boolean;
|
|
63
|
-
useMmap?: boolean;
|
|
64
|
-
vocabOnly?: boolean;
|
|
65
|
-
modelPath: string;
|
|
25
|
+
maxTokens?: number;
|
|
26
|
+
temperature?: number;
|
|
27
|
+
topK?: number;
|
|
28
|
+
topP?: number;
|
|
29
|
+
trimWhitespaceSuffix?: boolean;
|
|
66
30
|
_model: LlamaModel;
|
|
67
31
|
_context: LlamaContext;
|
|
68
32
|
_session: LlamaChatSession;
|