langchain 0.0.78 → 0.0.80
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chains/query_constructor/prompt.cjs +5 -5
- package/dist/chains/query_constructor/prompt.d.ts +2 -2
- package/dist/chains/query_constructor/prompt.js +5 -5
- package/dist/chains/sql_db/sql_db_chain.cjs +0 -3
- package/dist/chains/sql_db/sql_db_chain.js +0 -3
- package/dist/chains/vector_db_qa.cjs +1 -1
- package/dist/chains/vector_db_qa.js +1 -1
- package/dist/client/langchainplus.cjs +143 -52
- package/dist/client/langchainplus.d.ts +72 -15
- package/dist/client/langchainplus.js +144 -53
- package/dist/document_loaders/index.cjs +1 -3
- package/dist/document_loaders/index.d.ts +0 -1
- package/dist/document_loaders/index.js +0 -1
- package/dist/document_loaders/web/github.cjs +38 -23
- package/dist/document_loaders/web/github.d.ts +5 -2
- package/dist/document_loaders/web/github.js +38 -23
- package/dist/llms/googlevertexai.cjs +97 -0
- package/dist/llms/googlevertexai.d.ts +43 -0
- package/dist/llms/googlevertexai.js +93 -0
- package/dist/prompts/selectors/conditional.cjs +4 -0
- package/dist/prompts/selectors/conditional.d.ts +5 -0
- package/dist/prompts/selectors/conditional.js +4 -0
- package/dist/retrievers/metal.d.ts +2 -1
- package/dist/stores/message/redis.cjs +1 -10
- package/dist/stores/message/redis.js +1 -10
- package/dist/text_splitter.cjs +11 -4
- package/dist/text_splitter.d.ts +7 -2
- package/dist/text_splitter.js +11 -4
- package/dist/types/googlevertexai-types.cjs +2 -0
- package/dist/types/googlevertexai-types.d.ts +47 -0
- package/dist/types/googlevertexai-types.js +1 -0
- package/dist/util/googlevertexai-connection.cjs +66 -0
- package/dist/util/googlevertexai-connection.d.ts +13 -0
- package/dist/util/googlevertexai-connection.js +62 -0
- package/dist/vectorstores/chroma.cjs +34 -7
- package/dist/vectorstores/chroma.d.ts +5 -1
- package/dist/vectorstores/chroma.js +34 -7
- package/dist/vectorstores/milvus.cjs +9 -30
- package/dist/vectorstores/milvus.d.ts +0 -3
- package/dist/vectorstores/milvus.js +9 -30
- package/llms/googlevertexai.cjs +1 -0
- package/llms/googlevertexai.d.ts +1 -0
- package/llms/googlevertexai.js +1 -0
- package/package.json +21 -5
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.GoogleVertexAI = void 0;
|
|
4
|
+
const base_js_1 = require("./base.cjs");
|
|
5
|
+
const googlevertexai_connection_js_1 = require("../util/googlevertexai-connection.cjs");
|
|
6
|
+
/**
|
|
7
|
+
* Enables calls to the Google Cloud's Vertex AI API to access
|
|
8
|
+
* Large Language Models.
|
|
9
|
+
*
|
|
10
|
+
* To use, you will need to have one of the following authentication
|
|
11
|
+
* methods in place:
|
|
12
|
+
* - You are logged into an account permitted to the Google Cloud project
|
|
13
|
+
* using Vertex AI.
|
|
14
|
+
* - You are running this on a machine using a service account permitted to
|
|
15
|
+
* the Google Cloud project using Vertex AI.
|
|
16
|
+
* - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the
|
|
17
|
+
* path of a credentials file for a service account permitted to the
|
|
18
|
+
* Google Cloud project using Vertex AI.
|
|
19
|
+
*/
|
|
20
|
+
class GoogleVertexAI extends base_js_1.BaseLLM {
|
|
21
|
+
constructor(fields) {
|
|
22
|
+
super(fields ?? {});
|
|
23
|
+
Object.defineProperty(this, "model", {
|
|
24
|
+
enumerable: true,
|
|
25
|
+
configurable: true,
|
|
26
|
+
writable: true,
|
|
27
|
+
value: "text-bison"
|
|
28
|
+
});
|
|
29
|
+
Object.defineProperty(this, "temperature", {
|
|
30
|
+
enumerable: true,
|
|
31
|
+
configurable: true,
|
|
32
|
+
writable: true,
|
|
33
|
+
value: 0.7
|
|
34
|
+
});
|
|
35
|
+
Object.defineProperty(this, "maxOutputTokens", {
|
|
36
|
+
enumerable: true,
|
|
37
|
+
configurable: true,
|
|
38
|
+
writable: true,
|
|
39
|
+
value: 1024
|
|
40
|
+
});
|
|
41
|
+
Object.defineProperty(this, "topP", {
|
|
42
|
+
enumerable: true,
|
|
43
|
+
configurable: true,
|
|
44
|
+
writable: true,
|
|
45
|
+
value: 0.8
|
|
46
|
+
});
|
|
47
|
+
Object.defineProperty(this, "topK", {
|
|
48
|
+
enumerable: true,
|
|
49
|
+
configurable: true,
|
|
50
|
+
writable: true,
|
|
51
|
+
value: 40
|
|
52
|
+
});
|
|
53
|
+
Object.defineProperty(this, "connection", {
|
|
54
|
+
enumerable: true,
|
|
55
|
+
configurable: true,
|
|
56
|
+
writable: true,
|
|
57
|
+
value: void 0
|
|
58
|
+
});
|
|
59
|
+
this.model = fields?.model ?? this.model;
|
|
60
|
+
this.temperature = fields?.temperature ?? this.temperature;
|
|
61
|
+
this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens;
|
|
62
|
+
this.topP = fields?.topP ?? this.topP;
|
|
63
|
+
this.topK = fields?.topK ?? this.topK;
|
|
64
|
+
this.connection = new googlevertexai_connection_js_1.GoogleVertexAIConnection({ ...fields, ...this }, this.caller);
|
|
65
|
+
}
|
|
66
|
+
_llmType() {
|
|
67
|
+
return "googlevertexai";
|
|
68
|
+
}
|
|
69
|
+
async _generate(prompts, options) {
|
|
70
|
+
const generations = await Promise.all(prompts.map((prompt) => this._generatePrompt(prompt, options)));
|
|
71
|
+
return { generations };
|
|
72
|
+
}
|
|
73
|
+
async _generatePrompt(prompt, options) {
|
|
74
|
+
const instance = this.formatInstance(prompt);
|
|
75
|
+
const parameters = {
|
|
76
|
+
temperature: this.temperature,
|
|
77
|
+
topK: this.topK,
|
|
78
|
+
topP: this.topP,
|
|
79
|
+
maxOutputTokens: this.maxOutputTokens,
|
|
80
|
+
};
|
|
81
|
+
const result = await this.connection.request([instance], parameters, options);
|
|
82
|
+
const prediction = this.extractPredictionFromResponse(result);
|
|
83
|
+
return [
|
|
84
|
+
{
|
|
85
|
+
text: prediction.content,
|
|
86
|
+
generationInfo: prediction,
|
|
87
|
+
},
|
|
88
|
+
];
|
|
89
|
+
}
|
|
90
|
+
formatInstance(prompt) {
|
|
91
|
+
return { content: prompt };
|
|
92
|
+
}
|
|
93
|
+
extractPredictionFromResponse(result) {
|
|
94
|
+
return result?.data?.predictions[0];
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
exports.GoogleVertexAI = GoogleVertexAI;
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { BaseLLM } from "./base.js";
|
|
2
|
+
import { Generation, LLMResult } from "../schema/index.js";
|
|
3
|
+
import { GoogleVertexAIBaseLLMInput, GoogleVertexAIBasePrediction, GoogleVertexAILLMResponse } from "../types/googlevertexai-types.js";
|
|
4
|
+
export interface GoogleVertexAITextInput extends GoogleVertexAIBaseLLMInput {
|
|
5
|
+
}
|
|
6
|
+
interface GoogleVertexAILLMTextInstance {
|
|
7
|
+
content: string;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Models the data returned from the API call
|
|
11
|
+
*/
|
|
12
|
+
interface TextPrediction extends GoogleVertexAIBasePrediction {
|
|
13
|
+
content: string;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Enables calls to the Google Cloud's Vertex AI API to access
|
|
17
|
+
* Large Language Models.
|
|
18
|
+
*
|
|
19
|
+
* To use, you will need to have one of the following authentication
|
|
20
|
+
* methods in place:
|
|
21
|
+
* - You are logged into an account permitted to the Google Cloud project
|
|
22
|
+
* using Vertex AI.
|
|
23
|
+
* - You are running this on a machine using a service account permitted to
|
|
24
|
+
* the Google Cloud project using Vertex AI.
|
|
25
|
+
* - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the
|
|
26
|
+
* path of a credentials file for a service account permitted to the
|
|
27
|
+
* Google Cloud project using Vertex AI.
|
|
28
|
+
*/
|
|
29
|
+
export declare class GoogleVertexAI extends BaseLLM implements GoogleVertexAITextInput {
|
|
30
|
+
model: string;
|
|
31
|
+
temperature: number;
|
|
32
|
+
maxOutputTokens: number;
|
|
33
|
+
topP: number;
|
|
34
|
+
topK: number;
|
|
35
|
+
private connection;
|
|
36
|
+
constructor(fields?: GoogleVertexAITextInput);
|
|
37
|
+
_llmType(): string;
|
|
38
|
+
_generate(prompts: string[], options: this["ParsedCallOptions"]): Promise<LLMResult>;
|
|
39
|
+
_generatePrompt(prompt: string, options: this["ParsedCallOptions"]): Promise<Generation[]>;
|
|
40
|
+
formatInstance(prompt: string): GoogleVertexAILLMTextInstance;
|
|
41
|
+
extractPredictionFromResponse(result: GoogleVertexAILLMResponse<TextPrediction>): TextPrediction;
|
|
42
|
+
}
|
|
43
|
+
export {};
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { BaseLLM } from "./base.js";
|
|
2
|
+
import { GoogleVertexAIConnection } from "../util/googlevertexai-connection.js";
|
|
3
|
+
/**
|
|
4
|
+
* Enables calls to the Google Cloud's Vertex AI API to access
|
|
5
|
+
* Large Language Models.
|
|
6
|
+
*
|
|
7
|
+
* To use, you will need to have one of the following authentication
|
|
8
|
+
* methods in place:
|
|
9
|
+
* - You are logged into an account permitted to the Google Cloud project
|
|
10
|
+
* using Vertex AI.
|
|
11
|
+
* - You are running this on a machine using a service account permitted to
|
|
12
|
+
* the Google Cloud project using Vertex AI.
|
|
13
|
+
* - The `GOOGLE_APPLICATION_CREDENTIALS` environment variable is set to the
|
|
14
|
+
* path of a credentials file for a service account permitted to the
|
|
15
|
+
* Google Cloud project using Vertex AI.
|
|
16
|
+
*/
|
|
17
|
+
export class GoogleVertexAI extends BaseLLM {
|
|
18
|
+
constructor(fields) {
|
|
19
|
+
super(fields ?? {});
|
|
20
|
+
Object.defineProperty(this, "model", {
|
|
21
|
+
enumerable: true,
|
|
22
|
+
configurable: true,
|
|
23
|
+
writable: true,
|
|
24
|
+
value: "text-bison"
|
|
25
|
+
});
|
|
26
|
+
Object.defineProperty(this, "temperature", {
|
|
27
|
+
enumerable: true,
|
|
28
|
+
configurable: true,
|
|
29
|
+
writable: true,
|
|
30
|
+
value: 0.7
|
|
31
|
+
});
|
|
32
|
+
Object.defineProperty(this, "maxOutputTokens", {
|
|
33
|
+
enumerable: true,
|
|
34
|
+
configurable: true,
|
|
35
|
+
writable: true,
|
|
36
|
+
value: 1024
|
|
37
|
+
});
|
|
38
|
+
Object.defineProperty(this, "topP", {
|
|
39
|
+
enumerable: true,
|
|
40
|
+
configurable: true,
|
|
41
|
+
writable: true,
|
|
42
|
+
value: 0.8
|
|
43
|
+
});
|
|
44
|
+
Object.defineProperty(this, "topK", {
|
|
45
|
+
enumerable: true,
|
|
46
|
+
configurable: true,
|
|
47
|
+
writable: true,
|
|
48
|
+
value: 40
|
|
49
|
+
});
|
|
50
|
+
Object.defineProperty(this, "connection", {
|
|
51
|
+
enumerable: true,
|
|
52
|
+
configurable: true,
|
|
53
|
+
writable: true,
|
|
54
|
+
value: void 0
|
|
55
|
+
});
|
|
56
|
+
this.model = fields?.model ?? this.model;
|
|
57
|
+
this.temperature = fields?.temperature ?? this.temperature;
|
|
58
|
+
this.maxOutputTokens = fields?.maxOutputTokens ?? this.maxOutputTokens;
|
|
59
|
+
this.topP = fields?.topP ?? this.topP;
|
|
60
|
+
this.topK = fields?.topK ?? this.topK;
|
|
61
|
+
this.connection = new GoogleVertexAIConnection({ ...fields, ...this }, this.caller);
|
|
62
|
+
}
|
|
63
|
+
_llmType() {
|
|
64
|
+
return "googlevertexai";
|
|
65
|
+
}
|
|
66
|
+
async _generate(prompts, options) {
|
|
67
|
+
const generations = await Promise.all(prompts.map((prompt) => this._generatePrompt(prompt, options)));
|
|
68
|
+
return { generations };
|
|
69
|
+
}
|
|
70
|
+
async _generatePrompt(prompt, options) {
|
|
71
|
+
const instance = this.formatInstance(prompt);
|
|
72
|
+
const parameters = {
|
|
73
|
+
temperature: this.temperature,
|
|
74
|
+
topK: this.topK,
|
|
75
|
+
topP: this.topP,
|
|
76
|
+
maxOutputTokens: this.maxOutputTokens,
|
|
77
|
+
};
|
|
78
|
+
const result = await this.connection.request([instance], parameters, options);
|
|
79
|
+
const prediction = this.extractPredictionFromResponse(result);
|
|
80
|
+
return [
|
|
81
|
+
{
|
|
82
|
+
text: prediction.content,
|
|
83
|
+
generationInfo: prediction,
|
|
84
|
+
},
|
|
85
|
+
];
|
|
86
|
+
}
|
|
87
|
+
formatInstance(prompt) {
|
|
88
|
+
return { content: prompt };
|
|
89
|
+
}
|
|
90
|
+
extractPredictionFromResponse(result) {
|
|
91
|
+
return result?.data?.predictions[0];
|
|
92
|
+
}
|
|
93
|
+
}
|
|
@@ -2,6 +2,10 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.isChatModel = exports.isLLM = exports.ConditionalPromptSelector = exports.BasePromptSelector = void 0;
|
|
4
4
|
class BasePromptSelector {
|
|
5
|
+
async getPromptAsync(llm, options) {
|
|
6
|
+
const prompt = this.getPrompt(llm);
|
|
7
|
+
return prompt.partial(options?.partialVariables ?? {});
|
|
8
|
+
}
|
|
5
9
|
}
|
|
6
10
|
exports.BasePromptSelector = BasePromptSelector;
|
|
7
11
|
class ConditionalPromptSelector extends BasePromptSelector {
|
|
@@ -2,8 +2,13 @@ import { BaseChatModel } from "../../chat_models/base.js";
|
|
|
2
2
|
import { BasePromptTemplate } from "../base.js";
|
|
3
3
|
import { BaseLanguageModel } from "../../base_language/index.js";
|
|
4
4
|
import { BaseLLM } from "../../llms/base.js";
|
|
5
|
+
import { PartialValues } from "../../schema/index.js";
|
|
6
|
+
export type BaseGetPromptAsyncOptions = {
|
|
7
|
+
partialVariables?: PartialValues;
|
|
8
|
+
};
|
|
5
9
|
export declare abstract class BasePromptSelector {
|
|
6
10
|
abstract getPrompt(llm: BaseLanguageModel): BasePromptTemplate;
|
|
11
|
+
getPromptAsync(llm: BaseLanguageModel, options?: BaseGetPromptAsyncOptions): Promise<BasePromptTemplate>;
|
|
7
12
|
}
|
|
8
13
|
export declare class ConditionalPromptSelector extends BasePromptSelector {
|
|
9
14
|
defaultPrompt: BasePromptTemplate;
|
|
@@ -1,4 +1,8 @@
|
|
|
1
1
|
export class BasePromptSelector {
|
|
2
|
+
async getPromptAsync(llm, options) {
|
|
3
|
+
const prompt = this.getPrompt(llm);
|
|
4
|
+
return prompt.partial(options?.partialVariables ?? {});
|
|
5
|
+
}
|
|
2
6
|
}
|
|
3
7
|
export class ConditionalPromptSelector extends BasePromptSelector {
|
|
4
8
|
constructor(default_prompt, conditionals = []) {
|
|
@@ -1,7 +1,8 @@
|
|
|
1
|
+
import Metal from "@getmetal/metal-sdk";
|
|
1
2
|
import { BaseRetriever } from "../schema/index.js";
|
|
2
3
|
import { Document } from "../document.js";
|
|
3
4
|
export interface MetalRetrieverFields {
|
|
4
|
-
client:
|
|
5
|
+
client: Metal;
|
|
5
6
|
}
|
|
6
7
|
export declare class MetalRetriever extends BaseRetriever {
|
|
7
8
|
private client;
|
|
@@ -42,16 +42,7 @@ class RedisChatMessageHistory extends index_js_1.BaseListChatMessageHistory {
|
|
|
42
42
|
const orderedMessages = rawStoredMessages
|
|
43
43
|
.reverse()
|
|
44
44
|
.map((message) => JSON.parse(message));
|
|
45
|
-
|
|
46
|
-
.map((item) => ({
|
|
47
|
-
type: item.type,
|
|
48
|
-
data: {
|
|
49
|
-
role: item.role,
|
|
50
|
-
content: item.text,
|
|
51
|
-
},
|
|
52
|
-
}))
|
|
53
|
-
.filter((x) => x.type !== undefined && x.data.content !== undefined);
|
|
54
|
-
return (0, utils_js_1.mapStoredMessagesToChatMessages)(previousMessages);
|
|
45
|
+
return (0, utils_js_1.mapStoredMessagesToChatMessages)(orderedMessages);
|
|
55
46
|
}
|
|
56
47
|
async addMessage(message) {
|
|
57
48
|
await this.ensureReadiness();
|
|
@@ -39,16 +39,7 @@ export class RedisChatMessageHistory extends BaseListChatMessageHistory {
|
|
|
39
39
|
const orderedMessages = rawStoredMessages
|
|
40
40
|
.reverse()
|
|
41
41
|
.map((message) => JSON.parse(message));
|
|
42
|
-
|
|
43
|
-
.map((item) => ({
|
|
44
|
-
type: item.type,
|
|
45
|
-
data: {
|
|
46
|
-
role: item.role,
|
|
47
|
-
content: item.text,
|
|
48
|
-
},
|
|
49
|
-
}))
|
|
50
|
-
.filter((x) => x.type !== undefined && x.data.content !== undefined);
|
|
51
|
-
return mapStoredMessagesToChatMessages(previousMessages);
|
|
42
|
+
return mapStoredMessagesToChatMessages(orderedMessages);
|
|
52
43
|
}
|
|
53
44
|
async addMessage(message) {
|
|
54
45
|
await this.ensureReadiness();
|
package/dist/text_splitter.cjs
CHANGED
|
@@ -25,14 +25,17 @@ class TextSplitter {
|
|
|
25
25
|
}
|
|
26
26
|
async createDocuments(texts,
|
|
27
27
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
28
|
-
metadatas = []) {
|
|
28
|
+
metadatas = [], chunkHeaderOptions = {}) {
|
|
29
|
+
// if no metadata is provided, we create an empty one for each text
|
|
29
30
|
const _metadatas = metadatas.length > 0 ? metadatas : new Array(texts.length).fill({});
|
|
31
|
+
const { chunkHeader = "", chunkOverlapHeader = "(cont'd) ", appendChunkOverlapHeader = false, } = chunkHeaderOptions;
|
|
30
32
|
const documents = new Array();
|
|
31
33
|
for (let i = 0; i < texts.length; i += 1) {
|
|
32
34
|
const text = texts[i];
|
|
33
35
|
let lineCounterIndex = 1;
|
|
34
36
|
let prevChunk = null;
|
|
35
37
|
for (const chunk of await this.splitText(text)) {
|
|
38
|
+
let pageContent = chunkHeader;
|
|
36
39
|
// we need to count the \n that are in the text before getting removed by the splitting
|
|
37
40
|
let numberOfIntermediateNewLines = 0;
|
|
38
41
|
if (prevChunk) {
|
|
@@ -40,6 +43,9 @@ class TextSplitter {
|
|
|
40
43
|
const indexEndPrevChunk = text.indexOf(prevChunk) + prevChunk.length;
|
|
41
44
|
const removedNewlinesFromSplittingText = text.slice(indexEndPrevChunk, indexChunk);
|
|
42
45
|
numberOfIntermediateNewLines = (removedNewlinesFromSplittingText.match(/\n/g) || []).length;
|
|
46
|
+
if (appendChunkOverlapHeader) {
|
|
47
|
+
pageContent += chunkOverlapHeader;
|
|
48
|
+
}
|
|
43
49
|
}
|
|
44
50
|
lineCounterIndex += numberOfIntermediateNewLines;
|
|
45
51
|
const newLinesCount = (chunk.match(/\n/g) || []).length;
|
|
@@ -54,8 +60,9 @@ class TextSplitter {
|
|
|
54
60
|
..._metadatas[i],
|
|
55
61
|
loc,
|
|
56
62
|
};
|
|
63
|
+
pageContent += chunk;
|
|
57
64
|
documents.push(new document_js_1.Document({
|
|
58
|
-
pageContent
|
|
65
|
+
pageContent,
|
|
59
66
|
metadata: metadataWithLinesNumber,
|
|
60
67
|
}));
|
|
61
68
|
lineCounterIndex += newLinesCount;
|
|
@@ -64,11 +71,11 @@ class TextSplitter {
|
|
|
64
71
|
}
|
|
65
72
|
return documents;
|
|
66
73
|
}
|
|
67
|
-
async splitDocuments(documents) {
|
|
74
|
+
async splitDocuments(documents, chunkHeaderOptions = {}) {
|
|
68
75
|
const selectedDocuments = documents.filter((doc) => doc.pageContent !== undefined);
|
|
69
76
|
const texts = selectedDocuments.map((doc) => doc.pageContent);
|
|
70
77
|
const metadatas = selectedDocuments.map((doc) => doc.metadata);
|
|
71
|
-
return this.createDocuments(texts, metadatas);
|
|
78
|
+
return this.createDocuments(texts, metadatas, chunkHeaderOptions);
|
|
72
79
|
}
|
|
73
80
|
joinDocs(docs, separator) {
|
|
74
81
|
const text = docs.join(separator).trim();
|
package/dist/text_splitter.d.ts
CHANGED
|
@@ -4,13 +4,18 @@ export interface TextSplitterParams {
|
|
|
4
4
|
chunkSize: number;
|
|
5
5
|
chunkOverlap: number;
|
|
6
6
|
}
|
|
7
|
+
export type TextSplitterChunkHeaderOptions = {
|
|
8
|
+
chunkHeader?: string;
|
|
9
|
+
chunkOverlapHeader?: string;
|
|
10
|
+
appendChunkOverlapHeader?: boolean;
|
|
11
|
+
};
|
|
7
12
|
export declare abstract class TextSplitter implements TextSplitterParams {
|
|
8
13
|
chunkSize: number;
|
|
9
14
|
chunkOverlap: number;
|
|
10
15
|
constructor(fields?: Partial<TextSplitterParams>);
|
|
11
16
|
abstract splitText(text: string): Promise<string[]>;
|
|
12
|
-
createDocuments(texts: string[], metadatas?: Record<string, any>[]): Promise<Document[]>;
|
|
13
|
-
splitDocuments(documents: Document[]): Promise<Document[]>;
|
|
17
|
+
createDocuments(texts: string[], metadatas?: Record<string, any>[], chunkHeaderOptions?: TextSplitterChunkHeaderOptions): Promise<Document[]>;
|
|
18
|
+
splitDocuments(documents: Document[], chunkHeaderOptions?: TextSplitterChunkHeaderOptions): Promise<Document[]>;
|
|
14
19
|
private joinDocs;
|
|
15
20
|
mergeSplits(splits: string[], separator: string): string[];
|
|
16
21
|
}
|
package/dist/text_splitter.js
CHANGED
|
@@ -22,14 +22,17 @@ export class TextSplitter {
|
|
|
22
22
|
}
|
|
23
23
|
async createDocuments(texts,
|
|
24
24
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
25
|
-
metadatas = []) {
|
|
25
|
+
metadatas = [], chunkHeaderOptions = {}) {
|
|
26
|
+
// if no metadata is provided, we create an empty one for each text
|
|
26
27
|
const _metadatas = metadatas.length > 0 ? metadatas : new Array(texts.length).fill({});
|
|
28
|
+
const { chunkHeader = "", chunkOverlapHeader = "(cont'd) ", appendChunkOverlapHeader = false, } = chunkHeaderOptions;
|
|
27
29
|
const documents = new Array();
|
|
28
30
|
for (let i = 0; i < texts.length; i += 1) {
|
|
29
31
|
const text = texts[i];
|
|
30
32
|
let lineCounterIndex = 1;
|
|
31
33
|
let prevChunk = null;
|
|
32
34
|
for (const chunk of await this.splitText(text)) {
|
|
35
|
+
let pageContent = chunkHeader;
|
|
33
36
|
// we need to count the \n that are in the text before getting removed by the splitting
|
|
34
37
|
let numberOfIntermediateNewLines = 0;
|
|
35
38
|
if (prevChunk) {
|
|
@@ -37,6 +40,9 @@ export class TextSplitter {
|
|
|
37
40
|
const indexEndPrevChunk = text.indexOf(prevChunk) + prevChunk.length;
|
|
38
41
|
const removedNewlinesFromSplittingText = text.slice(indexEndPrevChunk, indexChunk);
|
|
39
42
|
numberOfIntermediateNewLines = (removedNewlinesFromSplittingText.match(/\n/g) || []).length;
|
|
43
|
+
if (appendChunkOverlapHeader) {
|
|
44
|
+
pageContent += chunkOverlapHeader;
|
|
45
|
+
}
|
|
40
46
|
}
|
|
41
47
|
lineCounterIndex += numberOfIntermediateNewLines;
|
|
42
48
|
const newLinesCount = (chunk.match(/\n/g) || []).length;
|
|
@@ -51,8 +57,9 @@ export class TextSplitter {
|
|
|
51
57
|
..._metadatas[i],
|
|
52
58
|
loc,
|
|
53
59
|
};
|
|
60
|
+
pageContent += chunk;
|
|
54
61
|
documents.push(new Document({
|
|
55
|
-
pageContent
|
|
62
|
+
pageContent,
|
|
56
63
|
metadata: metadataWithLinesNumber,
|
|
57
64
|
}));
|
|
58
65
|
lineCounterIndex += newLinesCount;
|
|
@@ -61,11 +68,11 @@ export class TextSplitter {
|
|
|
61
68
|
}
|
|
62
69
|
return documents;
|
|
63
70
|
}
|
|
64
|
-
async splitDocuments(documents) {
|
|
71
|
+
async splitDocuments(documents, chunkHeaderOptions = {}) {
|
|
65
72
|
const selectedDocuments = documents.filter((doc) => doc.pageContent !== undefined);
|
|
66
73
|
const texts = selectedDocuments.map((doc) => doc.pageContent);
|
|
67
74
|
const metadatas = selectedDocuments.map((doc) => doc.metadata);
|
|
68
|
-
return this.createDocuments(texts, metadatas);
|
|
75
|
+
return this.createDocuments(texts, metadatas, chunkHeaderOptions);
|
|
69
76
|
}
|
|
70
77
|
joinDocs(docs, separator) {
|
|
71
78
|
const text = docs.join(separator).trim();
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { BaseLLMParams } from "../llms/index.js";
|
|
2
|
+
export interface GoogleVertexAIConnectionParams {
|
|
3
|
+
/** Hostname for the API call */
|
|
4
|
+
endpoint?: string;
|
|
5
|
+
/** Region where the LLM is stored */
|
|
6
|
+
location?: string;
|
|
7
|
+
/** Model to use */
|
|
8
|
+
model?: string;
|
|
9
|
+
}
|
|
10
|
+
export interface GoogleVertexAIModelParams {
|
|
11
|
+
/** Sampling temperature to use */
|
|
12
|
+
temperature?: number;
|
|
13
|
+
/**
|
|
14
|
+
* Maximum number of tokens to generate in the completion.
|
|
15
|
+
*/
|
|
16
|
+
maxOutputTokens?: number;
|
|
17
|
+
/**
|
|
18
|
+
* Top-p changes how the model selects tokens for output.
|
|
19
|
+
*
|
|
20
|
+
* Tokens are selected from most probable to least until the sum
|
|
21
|
+
* of their probabilities equals the top-p value.
|
|
22
|
+
*
|
|
23
|
+
* For example, if tokens A, B, and C have a probability of
|
|
24
|
+
* .3, .2, and .1 and the top-p value is .5, then the model will
|
|
25
|
+
* select either A or B as the next token (using temperature).
|
|
26
|
+
*/
|
|
27
|
+
topP?: number;
|
|
28
|
+
/**
|
|
29
|
+
* Top-k changes how the model selects tokens for output.
|
|
30
|
+
*
|
|
31
|
+
* A top-k of 1 means the selected token is the most probable among
|
|
32
|
+
* all tokens in the model’s vocabulary (also called greedy decoding),
|
|
33
|
+
* while a top-k of 3 means that the next token is selected from
|
|
34
|
+
* among the 3 most probable tokens (using temperature).
|
|
35
|
+
*/
|
|
36
|
+
topK?: number;
|
|
37
|
+
}
|
|
38
|
+
export interface GoogleVertexAIBaseLLMInput extends BaseLLMParams, GoogleVertexAIConnectionParams, GoogleVertexAIModelParams {
|
|
39
|
+
}
|
|
40
|
+
export interface GoogleVertexAIBasePrediction {
|
|
41
|
+
safetyAttributes?: any;
|
|
42
|
+
}
|
|
43
|
+
export interface GoogleVertexAILLMResponse<PredictionType extends GoogleVertexAIBasePrediction> {
|
|
44
|
+
data: {
|
|
45
|
+
predictions: PredictionType[];
|
|
46
|
+
};
|
|
47
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.GoogleVertexAIConnection = void 0;
|
|
4
|
+
const google_auth_library_1 = require("google-auth-library");
|
|
5
|
+
class GoogleVertexAIConnection {
|
|
6
|
+
constructor(fields, caller) {
|
|
7
|
+
Object.defineProperty(this, "caller", {
|
|
8
|
+
enumerable: true,
|
|
9
|
+
configurable: true,
|
|
10
|
+
writable: true,
|
|
11
|
+
value: void 0
|
|
12
|
+
});
|
|
13
|
+
Object.defineProperty(this, "endpoint", {
|
|
14
|
+
enumerable: true,
|
|
15
|
+
configurable: true,
|
|
16
|
+
writable: true,
|
|
17
|
+
value: "us-central1-aiplatform.googleapis.com"
|
|
18
|
+
});
|
|
19
|
+
Object.defineProperty(this, "location", {
|
|
20
|
+
enumerable: true,
|
|
21
|
+
configurable: true,
|
|
22
|
+
writable: true,
|
|
23
|
+
value: "us-central1"
|
|
24
|
+
});
|
|
25
|
+
Object.defineProperty(this, "model", {
|
|
26
|
+
enumerable: true,
|
|
27
|
+
configurable: true,
|
|
28
|
+
writable: true,
|
|
29
|
+
value: void 0
|
|
30
|
+
});
|
|
31
|
+
Object.defineProperty(this, "auth", {
|
|
32
|
+
enumerable: true,
|
|
33
|
+
configurable: true,
|
|
34
|
+
writable: true,
|
|
35
|
+
value: void 0
|
|
36
|
+
});
|
|
37
|
+
this.caller = caller;
|
|
38
|
+
this.endpoint = fields?.endpoint ?? this.endpoint;
|
|
39
|
+
this.location = fields?.location ?? this.location;
|
|
40
|
+
this.model = fields?.model ?? this.model;
|
|
41
|
+
this.auth = new google_auth_library_1.GoogleAuth({
|
|
42
|
+
scopes: "https://www.googleapis.com/auth/cloud-platform",
|
|
43
|
+
});
|
|
44
|
+
}
|
|
45
|
+
async request(instances, parameters, options) {
|
|
46
|
+
const client = await this.auth.getClient();
|
|
47
|
+
const projectId = await this.auth.getProjectId();
|
|
48
|
+
const url = `https://${this.endpoint}/v1/projects/${projectId}/locations/${this.location}/publishers/google/models/${this.model}:predict`;
|
|
49
|
+
const method = "POST";
|
|
50
|
+
const data = {
|
|
51
|
+
instances,
|
|
52
|
+
parameters,
|
|
53
|
+
};
|
|
54
|
+
const opts = {
|
|
55
|
+
url,
|
|
56
|
+
method,
|
|
57
|
+
data,
|
|
58
|
+
};
|
|
59
|
+
async function _request() {
|
|
60
|
+
return client.request(opts);
|
|
61
|
+
}
|
|
62
|
+
const response = await this.caller.callWithOptions({ signal: options.signal }, _request.bind(client));
|
|
63
|
+
return response;
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
exports.GoogleVertexAIConnection = GoogleVertexAIConnection;
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { GoogleAuth } from "google-auth-library";
|
|
2
|
+
import { BaseLanguageModelCallOptions } from "../base_language/index.js";
|
|
3
|
+
import { AsyncCaller } from "./async_caller.js";
|
|
4
|
+
import { GoogleVertexAIBasePrediction, GoogleVertexAIConnectionParams, GoogleVertexAILLMResponse, GoogleVertexAIModelParams } from "../types/googlevertexai-types.js";
|
|
5
|
+
export declare class GoogleVertexAIConnection<CallOptions extends BaseLanguageModelCallOptions, InstanceType, PredictionType extends GoogleVertexAIBasePrediction> implements GoogleVertexAIConnectionParams {
|
|
6
|
+
caller: AsyncCaller;
|
|
7
|
+
endpoint: string;
|
|
8
|
+
location: string;
|
|
9
|
+
model: string;
|
|
10
|
+
auth: GoogleAuth;
|
|
11
|
+
constructor(fields: GoogleVertexAIConnectionParams | undefined, caller: AsyncCaller);
|
|
12
|
+
request(instances: [InstanceType], parameters: GoogleVertexAIModelParams, options: CallOptions): Promise<GoogleVertexAILLMResponse<PredictionType>>;
|
|
13
|
+
}
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import { GoogleAuth } from "google-auth-library";
|
|
2
|
+
export class GoogleVertexAIConnection {
|
|
3
|
+
constructor(fields, caller) {
|
|
4
|
+
Object.defineProperty(this, "caller", {
|
|
5
|
+
enumerable: true,
|
|
6
|
+
configurable: true,
|
|
7
|
+
writable: true,
|
|
8
|
+
value: void 0
|
|
9
|
+
});
|
|
10
|
+
Object.defineProperty(this, "endpoint", {
|
|
11
|
+
enumerable: true,
|
|
12
|
+
configurable: true,
|
|
13
|
+
writable: true,
|
|
14
|
+
value: "us-central1-aiplatform.googleapis.com"
|
|
15
|
+
});
|
|
16
|
+
Object.defineProperty(this, "location", {
|
|
17
|
+
enumerable: true,
|
|
18
|
+
configurable: true,
|
|
19
|
+
writable: true,
|
|
20
|
+
value: "us-central1"
|
|
21
|
+
});
|
|
22
|
+
Object.defineProperty(this, "model", {
|
|
23
|
+
enumerable: true,
|
|
24
|
+
configurable: true,
|
|
25
|
+
writable: true,
|
|
26
|
+
value: void 0
|
|
27
|
+
});
|
|
28
|
+
Object.defineProperty(this, "auth", {
|
|
29
|
+
enumerable: true,
|
|
30
|
+
configurable: true,
|
|
31
|
+
writable: true,
|
|
32
|
+
value: void 0
|
|
33
|
+
});
|
|
34
|
+
this.caller = caller;
|
|
35
|
+
this.endpoint = fields?.endpoint ?? this.endpoint;
|
|
36
|
+
this.location = fields?.location ?? this.location;
|
|
37
|
+
this.model = fields?.model ?? this.model;
|
|
38
|
+
this.auth = new GoogleAuth({
|
|
39
|
+
scopes: "https://www.googleapis.com/auth/cloud-platform",
|
|
40
|
+
});
|
|
41
|
+
}
|
|
42
|
+
async request(instances, parameters, options) {
|
|
43
|
+
const client = await this.auth.getClient();
|
|
44
|
+
const projectId = await this.auth.getProjectId();
|
|
45
|
+
const url = `https://${this.endpoint}/v1/projects/${projectId}/locations/${this.location}/publishers/google/models/${this.model}:predict`;
|
|
46
|
+
const method = "POST";
|
|
47
|
+
const data = {
|
|
48
|
+
instances,
|
|
49
|
+
parameters,
|
|
50
|
+
};
|
|
51
|
+
const opts = {
|
|
52
|
+
url,
|
|
53
|
+
method,
|
|
54
|
+
data,
|
|
55
|
+
};
|
|
56
|
+
async function _request() {
|
|
57
|
+
return client.request(opts);
|
|
58
|
+
}
|
|
59
|
+
const response = await this.caller.callWithOptions({ signal: options.signal }, _request.bind(client));
|
|
60
|
+
return response;
|
|
61
|
+
}
|
|
62
|
+
}
|