langchain 0.0.136 → 0.0.138
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/chat_models/minimax.cjs +1 -0
- package/chat_models/minimax.d.ts +1 -0
- package/chat_models/minimax.js +1 -0
- package/dist/callbacks/handlers/tracer.cjs +17 -17
- package/dist/callbacks/handlers/tracer.d.ts +1 -1
- package/dist/callbacks/handlers/tracer.js +17 -17
- package/dist/callbacks/manager.cjs +25 -10
- package/dist/callbacks/manager.d.ts +3 -2
- package/dist/callbacks/manager.js +25 -10
- package/dist/chat_models/minimax.cjs +547 -0
- package/dist/chat_models/minimax.d.ts +364 -0
- package/dist/chat_models/minimax.js +543 -0
- package/dist/chat_models/ollama.cjs +136 -0
- package/dist/chat_models/ollama.d.ts +34 -0
- package/dist/chat_models/ollama.js +136 -0
- package/dist/document_loaders/web/recursive_url.cjs +1 -1
- package/dist/document_loaders/web/recursive_url.js +1 -1
- package/dist/embeddings/minimax.cjs +152 -0
- package/dist/embeddings/minimax.d.ts +104 -0
- package/dist/embeddings/minimax.js +148 -0
- package/dist/llms/llama_cpp.cjs +132 -0
- package/dist/llms/llama_cpp.d.ts +73 -0
- package/dist/llms/llama_cpp.js +128 -0
- package/dist/llms/ollama.cjs +136 -0
- package/dist/llms/ollama.d.ts +34 -0
- package/dist/llms/ollama.js +136 -0
- package/dist/load/import_constants.cjs +1 -0
- package/dist/load/import_constants.js +1 -0
- package/dist/load/import_map.cjs +5 -2
- package/dist/load/import_map.d.ts +3 -0
- package/dist/load/import_map.js +3 -0
- package/dist/retrievers/multi_vector.cjs +72 -0
- package/dist/retrievers/multi_vector.d.ts +30 -0
- package/dist/retrievers/multi_vector.js +68 -0
- package/dist/retrievers/parent_document.cjs +1 -0
- package/dist/retrievers/parent_document.js +1 -0
- package/dist/schema/retriever.cjs +1 -4
- package/dist/schema/retriever.d.ts +2 -5
- package/dist/schema/retriever.js +1 -4
- package/dist/util/ollama.d.ts +34 -0
- package/dist/vectorstores/redis.cjs +17 -2
- package/dist/vectorstores/redis.d.ts +10 -1
- package/dist/vectorstores/redis.js +17 -2
- package/dist/vectorstores/zep.cjs +2 -1
- package/dist/vectorstores/zep.js +3 -2
- package/embeddings/minimax.cjs +1 -0
- package/embeddings/minimax.d.ts +1 -0
- package/embeddings/minimax.js +1 -0
- package/llms/llama_cpp.cjs +1 -0
- package/llms/llama_cpp.d.ts +1 -0
- package/llms/llama_cpp.js +1 -0
- package/package.json +40 -3
- package/retrievers/multi_vector.cjs +1 -0
- package/retrievers/multi_vector.d.ts +1 -0
- package/retrievers/multi_vector.js +1 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import { Embeddings, EmbeddingsParams } from "./base.js";
|
|
2
|
+
import { ConfigurationParameters } from "../chat_models/minimax.js";
|
|
3
|
+
/**
|
|
4
|
+
* Interface for MinimaxEmbeddings parameters. Extends EmbeddingsParams and
|
|
5
|
+
* defines additional parameters specific to the MinimaxEmbeddings class.
|
|
6
|
+
*/
|
|
7
|
+
export interface MinimaxEmbeddingsParams extends EmbeddingsParams {
|
|
8
|
+
/** Model name to use */
|
|
9
|
+
modelName: string;
|
|
10
|
+
/**
|
|
11
|
+
* API key to use when making requests. Defaults to the value of
|
|
12
|
+
* `MINIMAX_GROUP_ID` environment variable.
|
|
13
|
+
*/
|
|
14
|
+
minimaxGroupId?: string;
|
|
15
|
+
/**
|
|
16
|
+
* Secret key to use when making requests. Defaults to the value of
|
|
17
|
+
* `MINIMAX_API_KEY` environment variable.
|
|
18
|
+
*/
|
|
19
|
+
minimaxApiKey?: string;
|
|
20
|
+
/**
|
|
21
|
+
* The maximum number of documents to embed in a single request. This is
|
|
22
|
+
* limited by the Minimax API to a maximum of 4096.
|
|
23
|
+
*/
|
|
24
|
+
batchSize?: number;
|
|
25
|
+
/**
|
|
26
|
+
* Whether to strip new lines from the input text. This is recommended by
|
|
27
|
+
* Minimax, but may not be suitable for all use cases.
|
|
28
|
+
*/
|
|
29
|
+
stripNewLines?: boolean;
|
|
30
|
+
/**
|
|
31
|
+
* The target use-case after generating the vector.
|
|
32
|
+
* When using embeddings, the vector of the target content is first generated through the db and stored in the vector database,
|
|
33
|
+
* and then the vector of the retrieval text is generated through the query.
|
|
34
|
+
* Note: For the parameters of the partial algorithm, we adopted a separate algorithm plan for query and db.
|
|
35
|
+
* Therefore, for a paragraph of text, if it is to be used as a retrieval text, it should use the db,
|
|
36
|
+
* and if it is used as a retrieval text, it should use the query.
|
|
37
|
+
*/
|
|
38
|
+
type?: "db" | "query";
|
|
39
|
+
}
|
|
40
|
+
export interface CreateMinimaxEmbeddingRequest {
|
|
41
|
+
/**
|
|
42
|
+
* @type {string}
|
|
43
|
+
* @memberof CreateMinimaxEmbeddingRequest
|
|
44
|
+
*/
|
|
45
|
+
model: string;
|
|
46
|
+
/**
|
|
47
|
+
* Text to generate vector expectation
|
|
48
|
+
* @type {CreateEmbeddingRequestInput}
|
|
49
|
+
* @memberof CreateMinimaxEmbeddingRequest
|
|
50
|
+
*/
|
|
51
|
+
texts: string[];
|
|
52
|
+
/**
|
|
53
|
+
* The target use-case after generating the vector. When using embeddings,
|
|
54
|
+
* first generate the vector of the target content through the db and store it in the vector database,
|
|
55
|
+
* and then generate the vector of the retrieval text through the query.
|
|
56
|
+
* Note: For the parameter of the algorithm, we use the algorithm scheme of query and db separation,
|
|
57
|
+
* so a text, if it is to be retrieved as a text, should use the db,
|
|
58
|
+
* if it is used as a retrieval text, should use the query.
|
|
59
|
+
* @type {string}
|
|
60
|
+
* @memberof CreateMinimaxEmbeddingRequest
|
|
61
|
+
*/
|
|
62
|
+
type: "db" | "query";
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Class for generating embeddings using the Minimax API. Extends the
|
|
66
|
+
* Embeddings class and implements MinimaxEmbeddingsParams
|
|
67
|
+
*/
|
|
68
|
+
export declare class MinimaxEmbeddings extends Embeddings implements MinimaxEmbeddingsParams {
|
|
69
|
+
modelName: string;
|
|
70
|
+
batchSize: number;
|
|
71
|
+
stripNewLines: boolean;
|
|
72
|
+
minimaxGroupId?: string;
|
|
73
|
+
minimaxApiKey?: string;
|
|
74
|
+
type: "db" | "query";
|
|
75
|
+
apiUrl: string;
|
|
76
|
+
basePath?: string;
|
|
77
|
+
headers?: Record<string, string>;
|
|
78
|
+
constructor(fields?: Partial<MinimaxEmbeddingsParams> & {
|
|
79
|
+
configuration?: ConfigurationParameters;
|
|
80
|
+
});
|
|
81
|
+
/**
|
|
82
|
+
* Method to generate embeddings for an array of documents. Splits the
|
|
83
|
+
* documents into batches and makes requests to the Minimax API to generate
|
|
84
|
+
* embeddings.
|
|
85
|
+
* @param texts Array of documents to generate embeddings for.
|
|
86
|
+
* @returns Promise that resolves to a 2D array of embeddings for each document.
|
|
87
|
+
*/
|
|
88
|
+
embedDocuments(texts: string[]): Promise<number[][]>;
|
|
89
|
+
/**
|
|
90
|
+
* Method to generate an embedding for a single document. Calls the
|
|
91
|
+
* embeddingWithRetry method with the document as the input.
|
|
92
|
+
* @param text Document to generate an embedding for.
|
|
93
|
+
* @returns Promise that resolves to an embedding for the document.
|
|
94
|
+
*/
|
|
95
|
+
embedQuery(text: string): Promise<number[]>;
|
|
96
|
+
/**
|
|
97
|
+
* Private method to make a request to the Minimax API to generate
|
|
98
|
+
* embeddings. Handles the retry logic and returns the response from the
|
|
99
|
+
* API.
|
|
100
|
+
* @param request Request to send to the Minimax API.
|
|
101
|
+
* @returns Promise that resolves to the response from the API.
|
|
102
|
+
*/
|
|
103
|
+
private embeddingWithRetry;
|
|
104
|
+
}
|
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
import { getEnvironmentVariable } from "../util/env.js";
|
|
2
|
+
import { chunkArray } from "../util/chunk.js";
|
|
3
|
+
import { Embeddings } from "./base.js";
|
|
4
|
+
/**
|
|
5
|
+
* Class for generating embeddings using the Minimax API. Extends the
|
|
6
|
+
* Embeddings class and implements MinimaxEmbeddingsParams
|
|
7
|
+
*/
|
|
8
|
+
export class MinimaxEmbeddings extends Embeddings {
|
|
9
|
+
constructor(fields) {
|
|
10
|
+
const fieldsWithDefaults = { maxConcurrency: 2, ...fields };
|
|
11
|
+
super(fieldsWithDefaults);
|
|
12
|
+
Object.defineProperty(this, "modelName", {
|
|
13
|
+
enumerable: true,
|
|
14
|
+
configurable: true,
|
|
15
|
+
writable: true,
|
|
16
|
+
value: "embo-01"
|
|
17
|
+
});
|
|
18
|
+
Object.defineProperty(this, "batchSize", {
|
|
19
|
+
enumerable: true,
|
|
20
|
+
configurable: true,
|
|
21
|
+
writable: true,
|
|
22
|
+
value: 512
|
|
23
|
+
});
|
|
24
|
+
Object.defineProperty(this, "stripNewLines", {
|
|
25
|
+
enumerable: true,
|
|
26
|
+
configurable: true,
|
|
27
|
+
writable: true,
|
|
28
|
+
value: true
|
|
29
|
+
});
|
|
30
|
+
Object.defineProperty(this, "minimaxGroupId", {
|
|
31
|
+
enumerable: true,
|
|
32
|
+
configurable: true,
|
|
33
|
+
writable: true,
|
|
34
|
+
value: void 0
|
|
35
|
+
});
|
|
36
|
+
Object.defineProperty(this, "minimaxApiKey", {
|
|
37
|
+
enumerable: true,
|
|
38
|
+
configurable: true,
|
|
39
|
+
writable: true,
|
|
40
|
+
value: void 0
|
|
41
|
+
});
|
|
42
|
+
Object.defineProperty(this, "type", {
|
|
43
|
+
enumerable: true,
|
|
44
|
+
configurable: true,
|
|
45
|
+
writable: true,
|
|
46
|
+
value: "db"
|
|
47
|
+
});
|
|
48
|
+
Object.defineProperty(this, "apiUrl", {
|
|
49
|
+
enumerable: true,
|
|
50
|
+
configurable: true,
|
|
51
|
+
writable: true,
|
|
52
|
+
value: void 0
|
|
53
|
+
});
|
|
54
|
+
Object.defineProperty(this, "basePath", {
|
|
55
|
+
enumerable: true,
|
|
56
|
+
configurable: true,
|
|
57
|
+
writable: true,
|
|
58
|
+
value: "https://api.minimax.chat/v1"
|
|
59
|
+
});
|
|
60
|
+
Object.defineProperty(this, "headers", {
|
|
61
|
+
enumerable: true,
|
|
62
|
+
configurable: true,
|
|
63
|
+
writable: true,
|
|
64
|
+
value: void 0
|
|
65
|
+
});
|
|
66
|
+
this.minimaxGroupId =
|
|
67
|
+
fields?.minimaxGroupId ?? getEnvironmentVariable("MINIMAX_GROUP_ID");
|
|
68
|
+
if (!this.minimaxGroupId) {
|
|
69
|
+
throw new Error("Minimax GroupID not found");
|
|
70
|
+
}
|
|
71
|
+
this.minimaxApiKey =
|
|
72
|
+
fields?.minimaxApiKey ?? getEnvironmentVariable("MINIMAX_API_KEY");
|
|
73
|
+
if (!this.minimaxApiKey) {
|
|
74
|
+
throw new Error("Minimax ApiKey not found");
|
|
75
|
+
}
|
|
76
|
+
this.modelName = fieldsWithDefaults?.modelName ?? this.modelName;
|
|
77
|
+
this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize;
|
|
78
|
+
this.type = fieldsWithDefaults?.type ?? this.type;
|
|
79
|
+
this.stripNewLines =
|
|
80
|
+
fieldsWithDefaults?.stripNewLines ?? this.stripNewLines;
|
|
81
|
+
this.apiUrl = `${this.basePath}/embeddings`;
|
|
82
|
+
this.basePath = fields?.configuration?.basePath ?? this.basePath;
|
|
83
|
+
this.headers = fields?.configuration?.headers ?? this.headers;
|
|
84
|
+
}
|
|
85
|
+
/**
|
|
86
|
+
* Method to generate embeddings for an array of documents. Splits the
|
|
87
|
+
* documents into batches and makes requests to the Minimax API to generate
|
|
88
|
+
* embeddings.
|
|
89
|
+
* @param texts Array of documents to generate embeddings for.
|
|
90
|
+
* @returns Promise that resolves to a 2D array of embeddings for each document.
|
|
91
|
+
*/
|
|
92
|
+
async embedDocuments(texts) {
|
|
93
|
+
const batches = chunkArray(this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, this.batchSize);
|
|
94
|
+
const batchRequests = batches.map((batch) => this.embeddingWithRetry({
|
|
95
|
+
model: this.modelName,
|
|
96
|
+
texts: batch,
|
|
97
|
+
type: this.type,
|
|
98
|
+
}));
|
|
99
|
+
const batchResponses = await Promise.all(batchRequests);
|
|
100
|
+
const embeddings = [];
|
|
101
|
+
for (let i = 0; i < batchResponses.length; i += 1) {
|
|
102
|
+
const batch = batches[i];
|
|
103
|
+
const { vectors: batchResponse } = batchResponses[i];
|
|
104
|
+
for (let j = 0; j < batch.length; j += 1) {
|
|
105
|
+
embeddings.push(batchResponse[j]);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return embeddings;
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Method to generate an embedding for a single document. Calls the
|
|
112
|
+
* embeddingWithRetry method with the document as the input.
|
|
113
|
+
* @param text Document to generate an embedding for.
|
|
114
|
+
* @returns Promise that resolves to an embedding for the document.
|
|
115
|
+
*/
|
|
116
|
+
async embedQuery(text) {
|
|
117
|
+
const { vectors } = await this.embeddingWithRetry({
|
|
118
|
+
model: this.modelName,
|
|
119
|
+
texts: [this.stripNewLines ? text.replace(/\n/g, " ") : text],
|
|
120
|
+
type: this.type,
|
|
121
|
+
});
|
|
122
|
+
return vectors[0];
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Private method to make a request to the Minimax API to generate
|
|
126
|
+
* embeddings. Handles the retry logic and returns the response from the
|
|
127
|
+
* API.
|
|
128
|
+
* @param request Request to send to the Minimax API.
|
|
129
|
+
* @returns Promise that resolves to the response from the API.
|
|
130
|
+
*/
|
|
131
|
+
async embeddingWithRetry(request) {
|
|
132
|
+
const makeCompletionRequest = async () => {
|
|
133
|
+
const url = `${this.apiUrl}?GroupId=${this.minimaxGroupId}`;
|
|
134
|
+
const response = await fetch(url, {
|
|
135
|
+
method: "POST",
|
|
136
|
+
headers: {
|
|
137
|
+
"Content-Type": "application/json",
|
|
138
|
+
Authorization: `Bearer ${this.minimaxApiKey}`,
|
|
139
|
+
...this.headers,
|
|
140
|
+
},
|
|
141
|
+
body: JSON.stringify(request),
|
|
142
|
+
});
|
|
143
|
+
const json = await response.json();
|
|
144
|
+
return json;
|
|
145
|
+
};
|
|
146
|
+
return this.caller.call(makeCompletionRequest);
|
|
147
|
+
}
|
|
148
|
+
}
|
|
@@ -0,0 +1,132 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.LlamaCpp = void 0;
|
|
4
|
+
const node_llama_cpp_1 = require("node-llama-cpp");
|
|
5
|
+
const base_js_1 = require("./base.cjs");
|
|
6
|
+
/**
|
|
7
|
+
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
8
|
+
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
9
|
+
* version supported in version 2.0.0.
|
|
10
|
+
* This also requires that have a locally built version of Llama2 installed.
|
|
11
|
+
*/
|
|
12
|
+
class LlamaCpp extends base_js_1.LLM {
|
|
13
|
+
static lc_name() {
|
|
14
|
+
return "LlamaCpp";
|
|
15
|
+
}
|
|
16
|
+
constructor(inputs) {
|
|
17
|
+
super(inputs);
|
|
18
|
+
Object.defineProperty(this, "batchSize", {
|
|
19
|
+
enumerable: true,
|
|
20
|
+
configurable: true,
|
|
21
|
+
writable: true,
|
|
22
|
+
value: void 0
|
|
23
|
+
});
|
|
24
|
+
Object.defineProperty(this, "contextSize", {
|
|
25
|
+
enumerable: true,
|
|
26
|
+
configurable: true,
|
|
27
|
+
writable: true,
|
|
28
|
+
value: void 0
|
|
29
|
+
});
|
|
30
|
+
Object.defineProperty(this, "embedding", {
|
|
31
|
+
enumerable: true,
|
|
32
|
+
configurable: true,
|
|
33
|
+
writable: true,
|
|
34
|
+
value: void 0
|
|
35
|
+
});
|
|
36
|
+
Object.defineProperty(this, "f16Kv", {
|
|
37
|
+
enumerable: true,
|
|
38
|
+
configurable: true,
|
|
39
|
+
writable: true,
|
|
40
|
+
value: void 0
|
|
41
|
+
});
|
|
42
|
+
Object.defineProperty(this, "gpuLayers", {
|
|
43
|
+
enumerable: true,
|
|
44
|
+
configurable: true,
|
|
45
|
+
writable: true,
|
|
46
|
+
value: void 0
|
|
47
|
+
});
|
|
48
|
+
Object.defineProperty(this, "logitsAll", {
|
|
49
|
+
enumerable: true,
|
|
50
|
+
configurable: true,
|
|
51
|
+
writable: true,
|
|
52
|
+
value: void 0
|
|
53
|
+
});
|
|
54
|
+
Object.defineProperty(this, "lowVram", {
|
|
55
|
+
enumerable: true,
|
|
56
|
+
configurable: true,
|
|
57
|
+
writable: true,
|
|
58
|
+
value: void 0
|
|
59
|
+
});
|
|
60
|
+
Object.defineProperty(this, "seed", {
|
|
61
|
+
enumerable: true,
|
|
62
|
+
configurable: true,
|
|
63
|
+
writable: true,
|
|
64
|
+
value: void 0
|
|
65
|
+
});
|
|
66
|
+
Object.defineProperty(this, "useMlock", {
|
|
67
|
+
enumerable: true,
|
|
68
|
+
configurable: true,
|
|
69
|
+
writable: true,
|
|
70
|
+
value: void 0
|
|
71
|
+
});
|
|
72
|
+
Object.defineProperty(this, "useMmap", {
|
|
73
|
+
enumerable: true,
|
|
74
|
+
configurable: true,
|
|
75
|
+
writable: true,
|
|
76
|
+
value: void 0
|
|
77
|
+
});
|
|
78
|
+
Object.defineProperty(this, "vocabOnly", {
|
|
79
|
+
enumerable: true,
|
|
80
|
+
configurable: true,
|
|
81
|
+
writable: true,
|
|
82
|
+
value: void 0
|
|
83
|
+
});
|
|
84
|
+
Object.defineProperty(this, "modelPath", {
|
|
85
|
+
enumerable: true,
|
|
86
|
+
configurable: true,
|
|
87
|
+
writable: true,
|
|
88
|
+
value: void 0
|
|
89
|
+
});
|
|
90
|
+
Object.defineProperty(this, "_model", {
|
|
91
|
+
enumerable: true,
|
|
92
|
+
configurable: true,
|
|
93
|
+
writable: true,
|
|
94
|
+
value: void 0
|
|
95
|
+
});
|
|
96
|
+
Object.defineProperty(this, "_context", {
|
|
97
|
+
enumerable: true,
|
|
98
|
+
configurable: true,
|
|
99
|
+
writable: true,
|
|
100
|
+
value: void 0
|
|
101
|
+
});
|
|
102
|
+
this.batchSize = inputs.batchSize;
|
|
103
|
+
this.contextSize = inputs.contextSize;
|
|
104
|
+
this.embedding = inputs.embedding;
|
|
105
|
+
this.f16Kv = inputs.f16Kv;
|
|
106
|
+
this.gpuLayers = inputs.gpuLayers;
|
|
107
|
+
this.logitsAll = inputs.logitsAll;
|
|
108
|
+
this.lowVram = inputs.lowVram;
|
|
109
|
+
this.modelPath = inputs.modelPath;
|
|
110
|
+
this.seed = inputs.seed;
|
|
111
|
+
this.useMlock = inputs.useMlock;
|
|
112
|
+
this.useMmap = inputs.useMmap;
|
|
113
|
+
this.vocabOnly = inputs.vocabOnly;
|
|
114
|
+
this._model = new node_llama_cpp_1.LlamaModel(inputs);
|
|
115
|
+
this._context = new node_llama_cpp_1.LlamaContext({ model: this._model });
|
|
116
|
+
}
|
|
117
|
+
_llmType() {
|
|
118
|
+
return "llama2_cpp";
|
|
119
|
+
}
|
|
120
|
+
/** @ignore */
|
|
121
|
+
async _call(prompt, options) {
|
|
122
|
+
const session = new node_llama_cpp_1.LlamaChatSession({ context: this._context });
|
|
123
|
+
try {
|
|
124
|
+
const compleation = await session.prompt(prompt, options);
|
|
125
|
+
return compleation;
|
|
126
|
+
}
|
|
127
|
+
catch (e) {
|
|
128
|
+
throw new Error("Error getting prompt compleation.");
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
exports.LlamaCpp = LlamaCpp;
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { LlamaModel, LlamaContext } from "node-llama-cpp";
|
|
2
|
+
import { LLM, BaseLLMCallOptions, BaseLLMParams } from "./base.js";
|
|
3
|
+
/**
|
|
4
|
+
* Note that the modelPath is the only required parameter. For testing you
|
|
5
|
+
* can set this in the environment variable `LLAMA_PATH`.
|
|
6
|
+
*/
|
|
7
|
+
export interface LlamaCppInputs extends BaseLLMParams {
|
|
8
|
+
/** Prompt processing batch size. */
|
|
9
|
+
batchSize?: number;
|
|
10
|
+
/** Text context size. */
|
|
11
|
+
contextSize?: number;
|
|
12
|
+
/** Embedding mode only. */
|
|
13
|
+
embedding?: boolean;
|
|
14
|
+
/** Use fp16 for KV cache. */
|
|
15
|
+
f16Kv?: boolean;
|
|
16
|
+
/** Number of layers to store in VRAM. */
|
|
17
|
+
gpuLayers?: number;
|
|
18
|
+
/** The llama_eval() call computes all logits, not just the last one. */
|
|
19
|
+
logitsAll?: boolean;
|
|
20
|
+
/** If true, reduce VRAM usage at the cost of performance. */
|
|
21
|
+
lowVram?: boolean;
|
|
22
|
+
/** Path to the model on the filesystem. */
|
|
23
|
+
modelPath: string;
|
|
24
|
+
/** If null, a random seed will be used. */
|
|
25
|
+
seed?: null | number;
|
|
26
|
+
/** The randomness of the responses, e.g. 0.1 deterministic, 1.5 creative, 0.8 balanced, 0 disables. */
|
|
27
|
+
temperature?: number;
|
|
28
|
+
/** Consider the n most likely tokens, where n is 1 to vocabulary size, 0 disables (uses full vocabulary). Note: only applies when `temperature` > 0. */
|
|
29
|
+
topK?: number;
|
|
30
|
+
/** Selects the smallest token set whose probability exceeds P, where P is between 0 - 1, 1 disables. Note: only applies when `temperature` > 0. */
|
|
31
|
+
topP?: number;
|
|
32
|
+
/** Force system to keep model in RAM. */
|
|
33
|
+
useMlock?: boolean;
|
|
34
|
+
/** Use mmap if possible. */
|
|
35
|
+
useMmap?: boolean;
|
|
36
|
+
/** Only load the vocabulary, no weights. */
|
|
37
|
+
vocabOnly?: boolean;
|
|
38
|
+
}
|
|
39
|
+
export interface LlamaCppCallOptions extends BaseLLMCallOptions {
|
|
40
|
+
/** The maximum number of tokens the response should contain. */
|
|
41
|
+
maxTokens?: number;
|
|
42
|
+
/** A function called when matching the provided token array */
|
|
43
|
+
onToken?: (tokens: number[]) => void;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
47
|
+
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
48
|
+
* version supported in version 2.0.0.
|
|
49
|
+
* This also requires that have a locally built version of Llama2 installed.
|
|
50
|
+
*/
|
|
51
|
+
export declare class LlamaCpp extends LLM<LlamaCppCallOptions> {
|
|
52
|
+
CallOptions: LlamaCppCallOptions;
|
|
53
|
+
static inputs: LlamaCppInputs;
|
|
54
|
+
batchSize?: number;
|
|
55
|
+
contextSize?: number;
|
|
56
|
+
embedding?: boolean;
|
|
57
|
+
f16Kv?: boolean;
|
|
58
|
+
gpuLayers?: number;
|
|
59
|
+
logitsAll?: boolean;
|
|
60
|
+
lowVram?: boolean;
|
|
61
|
+
seed?: null | number;
|
|
62
|
+
useMlock?: boolean;
|
|
63
|
+
useMmap?: boolean;
|
|
64
|
+
vocabOnly?: boolean;
|
|
65
|
+
modelPath: string;
|
|
66
|
+
_model: LlamaModel;
|
|
67
|
+
_context: LlamaContext;
|
|
68
|
+
static lc_name(): string;
|
|
69
|
+
constructor(inputs: LlamaCppInputs);
|
|
70
|
+
_llmType(): string;
|
|
71
|
+
/** @ignore */
|
|
72
|
+
_call(prompt: string, options?: this["ParsedCallOptions"]): Promise<string>;
|
|
73
|
+
}
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
import { LlamaModel, LlamaContext, LlamaChatSession } from "node-llama-cpp";
|
|
2
|
+
import { LLM } from "./base.js";
|
|
3
|
+
/**
|
|
4
|
+
* To use this model you need to have the `node-llama-cpp` module installed.
|
|
5
|
+
* This can be installed using `npm install -S node-llama-cpp` and the minimum
|
|
6
|
+
* version supported in version 2.0.0.
|
|
7
|
+
* This also requires that have a locally built version of Llama2 installed.
|
|
8
|
+
*/
|
|
9
|
+
export class LlamaCpp extends LLM {
|
|
10
|
+
static lc_name() {
|
|
11
|
+
return "LlamaCpp";
|
|
12
|
+
}
|
|
13
|
+
constructor(inputs) {
|
|
14
|
+
super(inputs);
|
|
15
|
+
Object.defineProperty(this, "batchSize", {
|
|
16
|
+
enumerable: true,
|
|
17
|
+
configurable: true,
|
|
18
|
+
writable: true,
|
|
19
|
+
value: void 0
|
|
20
|
+
});
|
|
21
|
+
Object.defineProperty(this, "contextSize", {
|
|
22
|
+
enumerable: true,
|
|
23
|
+
configurable: true,
|
|
24
|
+
writable: true,
|
|
25
|
+
value: void 0
|
|
26
|
+
});
|
|
27
|
+
Object.defineProperty(this, "embedding", {
|
|
28
|
+
enumerable: true,
|
|
29
|
+
configurable: true,
|
|
30
|
+
writable: true,
|
|
31
|
+
value: void 0
|
|
32
|
+
});
|
|
33
|
+
Object.defineProperty(this, "f16Kv", {
|
|
34
|
+
enumerable: true,
|
|
35
|
+
configurable: true,
|
|
36
|
+
writable: true,
|
|
37
|
+
value: void 0
|
|
38
|
+
});
|
|
39
|
+
Object.defineProperty(this, "gpuLayers", {
|
|
40
|
+
enumerable: true,
|
|
41
|
+
configurable: true,
|
|
42
|
+
writable: true,
|
|
43
|
+
value: void 0
|
|
44
|
+
});
|
|
45
|
+
Object.defineProperty(this, "logitsAll", {
|
|
46
|
+
enumerable: true,
|
|
47
|
+
configurable: true,
|
|
48
|
+
writable: true,
|
|
49
|
+
value: void 0
|
|
50
|
+
});
|
|
51
|
+
Object.defineProperty(this, "lowVram", {
|
|
52
|
+
enumerable: true,
|
|
53
|
+
configurable: true,
|
|
54
|
+
writable: true,
|
|
55
|
+
value: void 0
|
|
56
|
+
});
|
|
57
|
+
Object.defineProperty(this, "seed", {
|
|
58
|
+
enumerable: true,
|
|
59
|
+
configurable: true,
|
|
60
|
+
writable: true,
|
|
61
|
+
value: void 0
|
|
62
|
+
});
|
|
63
|
+
Object.defineProperty(this, "useMlock", {
|
|
64
|
+
enumerable: true,
|
|
65
|
+
configurable: true,
|
|
66
|
+
writable: true,
|
|
67
|
+
value: void 0
|
|
68
|
+
});
|
|
69
|
+
Object.defineProperty(this, "useMmap", {
|
|
70
|
+
enumerable: true,
|
|
71
|
+
configurable: true,
|
|
72
|
+
writable: true,
|
|
73
|
+
value: void 0
|
|
74
|
+
});
|
|
75
|
+
Object.defineProperty(this, "vocabOnly", {
|
|
76
|
+
enumerable: true,
|
|
77
|
+
configurable: true,
|
|
78
|
+
writable: true,
|
|
79
|
+
value: void 0
|
|
80
|
+
});
|
|
81
|
+
Object.defineProperty(this, "modelPath", {
|
|
82
|
+
enumerable: true,
|
|
83
|
+
configurable: true,
|
|
84
|
+
writable: true,
|
|
85
|
+
value: void 0
|
|
86
|
+
});
|
|
87
|
+
Object.defineProperty(this, "_model", {
|
|
88
|
+
enumerable: true,
|
|
89
|
+
configurable: true,
|
|
90
|
+
writable: true,
|
|
91
|
+
value: void 0
|
|
92
|
+
});
|
|
93
|
+
Object.defineProperty(this, "_context", {
|
|
94
|
+
enumerable: true,
|
|
95
|
+
configurable: true,
|
|
96
|
+
writable: true,
|
|
97
|
+
value: void 0
|
|
98
|
+
});
|
|
99
|
+
this.batchSize = inputs.batchSize;
|
|
100
|
+
this.contextSize = inputs.contextSize;
|
|
101
|
+
this.embedding = inputs.embedding;
|
|
102
|
+
this.f16Kv = inputs.f16Kv;
|
|
103
|
+
this.gpuLayers = inputs.gpuLayers;
|
|
104
|
+
this.logitsAll = inputs.logitsAll;
|
|
105
|
+
this.lowVram = inputs.lowVram;
|
|
106
|
+
this.modelPath = inputs.modelPath;
|
|
107
|
+
this.seed = inputs.seed;
|
|
108
|
+
this.useMlock = inputs.useMlock;
|
|
109
|
+
this.useMmap = inputs.useMmap;
|
|
110
|
+
this.vocabOnly = inputs.vocabOnly;
|
|
111
|
+
this._model = new LlamaModel(inputs);
|
|
112
|
+
this._context = new LlamaContext({ model: this._model });
|
|
113
|
+
}
|
|
114
|
+
_llmType() {
|
|
115
|
+
return "llama2_cpp";
|
|
116
|
+
}
|
|
117
|
+
/** @ignore */
|
|
118
|
+
async _call(prompt, options) {
|
|
119
|
+
const session = new LlamaChatSession({ context: this._context });
|
|
120
|
+
try {
|
|
121
|
+
const compleation = await session.prompt(prompt, options);
|
|
122
|
+
return compleation;
|
|
123
|
+
}
|
|
124
|
+
catch (e) {
|
|
125
|
+
throw new Error("Error getting prompt compleation.");
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|