langchain 0.0.129 → 0.0.130
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/callbacks/manager.cjs +1 -1
- package/dist/callbacks/manager.js +1 -1
- package/dist/document_loaders/web/youtube.cjs +77 -0
- package/dist/document_loaders/web/youtube.d.ts +17 -0
- package/dist/document_loaders/web/youtube.js +73 -0
- package/dist/embeddings/cohere.cjs +16 -13
- package/dist/embeddings/cohere.js +16 -13
- package/dist/embeddings/googlevertexai.cjs +1 -1
- package/dist/embeddings/googlevertexai.js +1 -1
- package/dist/embeddings/openai.cjs +27 -21
- package/dist/embeddings/openai.js +27 -21
- package/dist/load/import_constants.cjs +1 -0
- package/dist/load/import_constants.js +1 -0
- package/dist/schema/index.cjs +2 -2
- package/dist/schema/index.d.ts +4 -0
- package/dist/schema/index.js +2 -2
- package/dist/stores/message/utils.cjs +5 -0
- package/dist/stores/message/utils.js +6 -1
- package/dist/vectorstores/hnswlib.cjs +19 -0
- package/dist/vectorstores/hnswlib.d.ts +3 -0
- package/dist/vectorstores/hnswlib.js +19 -0
- package/dist/vectorstores/myscale.cjs +1 -1
- package/dist/vectorstores/myscale.js +1 -1
- package/document_loaders/web/youtube.cjs +1 -0
- package/document_loaders/web/youtube.d.ts +1 -0
- package/document_loaders/web/youtube.js +1 -0
- package/package.json +27 -3
|
@@ -495,7 +495,7 @@ class CallbackManager extends BaseCallbackManager {
|
|
|
495
495
|
: localHandlers?.handlers, false);
|
|
496
496
|
}
|
|
497
497
|
const verboseEnabled = (0, env_js_1.getEnvironmentVariable)("LANGCHAIN_VERBOSE") || options?.verbose;
|
|
498
|
-
const tracingV2Enabled = (0, env_js_1.getEnvironmentVariable)("LANGCHAIN_TRACING_V2")
|
|
498
|
+
const tracingV2Enabled = (0, env_js_1.getEnvironmentVariable)("LANGCHAIN_TRACING_V2") === "true";
|
|
499
499
|
const tracingEnabled = tracingV2Enabled ||
|
|
500
500
|
((0, env_js_1.getEnvironmentVariable)("LANGCHAIN_TRACING") ?? false);
|
|
501
501
|
if (verboseEnabled || tracingEnabled) {
|
|
@@ -486,7 +486,7 @@ export class CallbackManager extends BaseCallbackManager {
|
|
|
486
486
|
: localHandlers?.handlers, false);
|
|
487
487
|
}
|
|
488
488
|
const verboseEnabled = getEnvironmentVariable("LANGCHAIN_VERBOSE") || options?.verbose;
|
|
489
|
-
const tracingV2Enabled = getEnvironmentVariable("LANGCHAIN_TRACING_V2")
|
|
489
|
+
const tracingV2Enabled = getEnvironmentVariable("LANGCHAIN_TRACING_V2") === "true";
|
|
490
490
|
const tracingEnabled = tracingV2Enabled ||
|
|
491
491
|
(getEnvironmentVariable("LANGCHAIN_TRACING") ?? false);
|
|
492
492
|
if (verboseEnabled || tracingEnabled) {
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.YoutubeLoader = void 0;
|
|
4
|
+
const youtube_transcript_1 = require("youtube-transcript");
|
|
5
|
+
const youtubei_js_1 = require("youtubei.js");
|
|
6
|
+
const document_js_1 = require("../../document.cjs");
|
|
7
|
+
const base_js_1 = require("../base.cjs");
|
|
8
|
+
class YoutubeLoader extends base_js_1.BaseDocumentLoader {
|
|
9
|
+
constructor(config) {
|
|
10
|
+
super();
|
|
11
|
+
Object.defineProperty(this, "videoId", {
|
|
12
|
+
enumerable: true,
|
|
13
|
+
configurable: true,
|
|
14
|
+
writable: true,
|
|
15
|
+
value: void 0
|
|
16
|
+
});
|
|
17
|
+
Object.defineProperty(this, "language", {
|
|
18
|
+
enumerable: true,
|
|
19
|
+
configurable: true,
|
|
20
|
+
writable: true,
|
|
21
|
+
value: void 0
|
|
22
|
+
});
|
|
23
|
+
Object.defineProperty(this, "addVideoInfo", {
|
|
24
|
+
enumerable: true,
|
|
25
|
+
configurable: true,
|
|
26
|
+
writable: true,
|
|
27
|
+
value: void 0
|
|
28
|
+
});
|
|
29
|
+
this.videoId = config.videoId;
|
|
30
|
+
this.language = config?.language;
|
|
31
|
+
this.addVideoInfo = config?.addVideoInfo ?? false;
|
|
32
|
+
}
|
|
33
|
+
static getVideoID(url) {
|
|
34
|
+
const match = url.match(/.*(?:youtu.be\/|v\/|u\/\w\/|embed\/|watch\?v=)([^#&?]*).*/);
|
|
35
|
+
if (match !== null && match[1].length === 11) {
|
|
36
|
+
return match[1];
|
|
37
|
+
}
|
|
38
|
+
else {
|
|
39
|
+
throw new Error("Failed to get youtube video id from the url");
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
static createFromUrl(url, config) {
|
|
43
|
+
const videoId = YoutubeLoader.getVideoID(url);
|
|
44
|
+
return new YoutubeLoader({ ...config, videoId });
|
|
45
|
+
}
|
|
46
|
+
async load() {
|
|
47
|
+
let transcript;
|
|
48
|
+
const metadata = {
|
|
49
|
+
source: this.videoId,
|
|
50
|
+
};
|
|
51
|
+
try {
|
|
52
|
+
transcript = await youtube_transcript_1.YoutubeTranscript.fetchTranscript(this.videoId, {
|
|
53
|
+
lang: this.language,
|
|
54
|
+
});
|
|
55
|
+
if (transcript === undefined) {
|
|
56
|
+
throw new Error("Transcription not found");
|
|
57
|
+
}
|
|
58
|
+
if (this.addVideoInfo) {
|
|
59
|
+
const youtube = await youtubei_js_1.Innertube.create();
|
|
60
|
+
const info = (await youtube.getBasicInfo(this.videoId)).basic_info;
|
|
61
|
+
metadata.description = info.short_description;
|
|
62
|
+
metadata.title = info.title;
|
|
63
|
+
metadata.view_count = info.view_count;
|
|
64
|
+
metadata.author = info.author;
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
catch (e) {
|
|
68
|
+
throw new Error(`Failed to get YouTube video transcription: ${e.message}`);
|
|
69
|
+
}
|
|
70
|
+
const document = new document_js_1.Document({
|
|
71
|
+
pageContent: transcript.map((item) => item.text).join(" "),
|
|
72
|
+
metadata,
|
|
73
|
+
});
|
|
74
|
+
return [document];
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
exports.YoutubeLoader = YoutubeLoader;
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import { Document } from "../../document.js";
|
|
2
|
+
import { BaseDocumentLoader } from "../base.js";
|
|
3
|
+
interface YoutubeConfig {
|
|
4
|
+
videoId: string;
|
|
5
|
+
language?: string;
|
|
6
|
+
addVideoInfo?: boolean;
|
|
7
|
+
}
|
|
8
|
+
export declare class YoutubeLoader extends BaseDocumentLoader {
|
|
9
|
+
private videoId;
|
|
10
|
+
private language?;
|
|
11
|
+
private addVideoInfo;
|
|
12
|
+
constructor(config: YoutubeConfig);
|
|
13
|
+
private static getVideoID;
|
|
14
|
+
static createFromUrl(url: string, config?: Omit<YoutubeConfig, "videoId">): YoutubeLoader;
|
|
15
|
+
load(): Promise<Document[]>;
|
|
16
|
+
}
|
|
17
|
+
export {};
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
import { YoutubeTranscript } from "youtube-transcript";
|
|
2
|
+
import { Innertube } from "youtubei.js";
|
|
3
|
+
import { Document } from "../../document.js";
|
|
4
|
+
import { BaseDocumentLoader } from "../base.js";
|
|
5
|
+
export class YoutubeLoader extends BaseDocumentLoader {
|
|
6
|
+
constructor(config) {
|
|
7
|
+
super();
|
|
8
|
+
Object.defineProperty(this, "videoId", {
|
|
9
|
+
enumerable: true,
|
|
10
|
+
configurable: true,
|
|
11
|
+
writable: true,
|
|
12
|
+
value: void 0
|
|
13
|
+
});
|
|
14
|
+
Object.defineProperty(this, "language", {
|
|
15
|
+
enumerable: true,
|
|
16
|
+
configurable: true,
|
|
17
|
+
writable: true,
|
|
18
|
+
value: void 0
|
|
19
|
+
});
|
|
20
|
+
Object.defineProperty(this, "addVideoInfo", {
|
|
21
|
+
enumerable: true,
|
|
22
|
+
configurable: true,
|
|
23
|
+
writable: true,
|
|
24
|
+
value: void 0
|
|
25
|
+
});
|
|
26
|
+
this.videoId = config.videoId;
|
|
27
|
+
this.language = config?.language;
|
|
28
|
+
this.addVideoInfo = config?.addVideoInfo ?? false;
|
|
29
|
+
}
|
|
30
|
+
static getVideoID(url) {
|
|
31
|
+
const match = url.match(/.*(?:youtu.be\/|v\/|u\/\w\/|embed\/|watch\?v=)([^#&?]*).*/);
|
|
32
|
+
if (match !== null && match[1].length === 11) {
|
|
33
|
+
return match[1];
|
|
34
|
+
}
|
|
35
|
+
else {
|
|
36
|
+
throw new Error("Failed to get youtube video id from the url");
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
static createFromUrl(url, config) {
|
|
40
|
+
const videoId = YoutubeLoader.getVideoID(url);
|
|
41
|
+
return new YoutubeLoader({ ...config, videoId });
|
|
42
|
+
}
|
|
43
|
+
async load() {
|
|
44
|
+
let transcript;
|
|
45
|
+
const metadata = {
|
|
46
|
+
source: this.videoId,
|
|
47
|
+
};
|
|
48
|
+
try {
|
|
49
|
+
transcript = await YoutubeTranscript.fetchTranscript(this.videoId, {
|
|
50
|
+
lang: this.language,
|
|
51
|
+
});
|
|
52
|
+
if (transcript === undefined) {
|
|
53
|
+
throw new Error("Transcription not found");
|
|
54
|
+
}
|
|
55
|
+
if (this.addVideoInfo) {
|
|
56
|
+
const youtube = await Innertube.create();
|
|
57
|
+
const info = (await youtube.getBasicInfo(this.videoId)).basic_info;
|
|
58
|
+
metadata.description = info.short_description;
|
|
59
|
+
metadata.title = info.title;
|
|
60
|
+
metadata.view_count = info.view_count;
|
|
61
|
+
metadata.author = info.author;
|
|
62
|
+
}
|
|
63
|
+
}
|
|
64
|
+
catch (e) {
|
|
65
|
+
throw new Error(`Failed to get YouTube video transcription: ${e.message}`);
|
|
66
|
+
}
|
|
67
|
+
const document = new Document({
|
|
68
|
+
pageContent: transcript.map((item) => item.text).join(" "),
|
|
69
|
+
metadata,
|
|
70
|
+
});
|
|
71
|
+
return [document];
|
|
72
|
+
}
|
|
73
|
+
}
|
|
@@ -13,7 +13,8 @@ class CohereEmbeddings extends base_js_1.Embeddings {
|
|
|
13
13
|
* @param fields - An optional object with properties to configure the instance.
|
|
14
14
|
*/
|
|
15
15
|
constructor(fields) {
|
|
16
|
-
|
|
16
|
+
const fieldsWithDefaults = { maxConcurrency: 2, ...fields };
|
|
17
|
+
super(fieldsWithDefaults);
|
|
17
18
|
Object.defineProperty(this, "modelName", {
|
|
18
19
|
enumerable: true,
|
|
19
20
|
configurable: true,
|
|
@@ -38,12 +39,12 @@ class CohereEmbeddings extends base_js_1.Embeddings {
|
|
|
38
39
|
writable: true,
|
|
39
40
|
value: void 0
|
|
40
41
|
});
|
|
41
|
-
const apiKey =
|
|
42
|
+
const apiKey = fieldsWithDefaults?.apiKey || (0, env_js_1.getEnvironmentVariable)("COHERE_API_KEY");
|
|
42
43
|
if (!apiKey) {
|
|
43
44
|
throw new Error("Cohere API key not found");
|
|
44
45
|
}
|
|
45
|
-
this.modelName =
|
|
46
|
-
this.batchSize =
|
|
46
|
+
this.modelName = fieldsWithDefaults?.modelName ?? this.modelName;
|
|
47
|
+
this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize;
|
|
47
48
|
this.apiKey = apiKey;
|
|
48
49
|
}
|
|
49
50
|
/**
|
|
@@ -53,16 +54,18 @@ class CohereEmbeddings extends base_js_1.Embeddings {
|
|
|
53
54
|
*/
|
|
54
55
|
async embedDocuments(texts) {
|
|
55
56
|
await this.maybeInitClient();
|
|
56
|
-
const
|
|
57
|
+
const batches = (0, chunk_js_1.chunkArray)(texts, this.batchSize);
|
|
58
|
+
const batchRequests = batches.map((batch) => this.embeddingWithRetry({
|
|
59
|
+
model: this.modelName,
|
|
60
|
+
texts: batch,
|
|
61
|
+
}));
|
|
62
|
+
const batchResponses = await Promise.all(batchRequests);
|
|
57
63
|
const embeddings = [];
|
|
58
|
-
for (let i = 0; i <
|
|
59
|
-
const
|
|
60
|
-
const { body } =
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
});
|
|
64
|
-
for (let j = 0; j < input.length; j += 1) {
|
|
65
|
-
embeddings.push(body.embeddings[j]);
|
|
64
|
+
for (let i = 0; i < batchResponses.length; i += 1) {
|
|
65
|
+
const batch = batches[i];
|
|
66
|
+
const { body: batchResponse } = batchResponses[i];
|
|
67
|
+
for (let j = 0; j < batch.length; j += 1) {
|
|
68
|
+
embeddings.push(batchResponse.embeddings[j]);
|
|
66
69
|
}
|
|
67
70
|
}
|
|
68
71
|
return embeddings;
|
|
@@ -10,7 +10,8 @@ export class CohereEmbeddings extends Embeddings {
|
|
|
10
10
|
* @param fields - An optional object with properties to configure the instance.
|
|
11
11
|
*/
|
|
12
12
|
constructor(fields) {
|
|
13
|
-
|
|
13
|
+
const fieldsWithDefaults = { maxConcurrency: 2, ...fields };
|
|
14
|
+
super(fieldsWithDefaults);
|
|
14
15
|
Object.defineProperty(this, "modelName", {
|
|
15
16
|
enumerable: true,
|
|
16
17
|
configurable: true,
|
|
@@ -35,12 +36,12 @@ export class CohereEmbeddings extends Embeddings {
|
|
|
35
36
|
writable: true,
|
|
36
37
|
value: void 0
|
|
37
38
|
});
|
|
38
|
-
const apiKey =
|
|
39
|
+
const apiKey = fieldsWithDefaults?.apiKey || getEnvironmentVariable("COHERE_API_KEY");
|
|
39
40
|
if (!apiKey) {
|
|
40
41
|
throw new Error("Cohere API key not found");
|
|
41
42
|
}
|
|
42
|
-
this.modelName =
|
|
43
|
-
this.batchSize =
|
|
43
|
+
this.modelName = fieldsWithDefaults?.modelName ?? this.modelName;
|
|
44
|
+
this.batchSize = fieldsWithDefaults?.batchSize ?? this.batchSize;
|
|
44
45
|
this.apiKey = apiKey;
|
|
45
46
|
}
|
|
46
47
|
/**
|
|
@@ -50,16 +51,18 @@ export class CohereEmbeddings extends Embeddings {
|
|
|
50
51
|
*/
|
|
51
52
|
async embedDocuments(texts) {
|
|
52
53
|
await this.maybeInitClient();
|
|
53
|
-
const
|
|
54
|
+
const batches = chunkArray(texts, this.batchSize);
|
|
55
|
+
const batchRequests = batches.map((batch) => this.embeddingWithRetry({
|
|
56
|
+
model: this.modelName,
|
|
57
|
+
texts: batch,
|
|
58
|
+
}));
|
|
59
|
+
const batchResponses = await Promise.all(batchRequests);
|
|
54
60
|
const embeddings = [];
|
|
55
|
-
for (let i = 0; i <
|
|
56
|
-
const
|
|
57
|
-
const { body } =
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
});
|
|
61
|
-
for (let j = 0; j < input.length; j += 1) {
|
|
62
|
-
embeddings.push(body.embeddings[j]);
|
|
61
|
+
for (let i = 0; i < batchResponses.length; i += 1) {
|
|
62
|
+
const batch = batches[i];
|
|
63
|
+
const { body: batchResponse } = batchResponses[i];
|
|
64
|
+
for (let j = 0; j < batch.length; j += 1) {
|
|
65
|
+
embeddings.push(batchResponse.embeddings[j]);
|
|
63
66
|
}
|
|
64
67
|
}
|
|
65
68
|
return embeddings;
|
|
@@ -39,7 +39,7 @@ class GoogleVertexAIEmbeddings extends base_js_1.Embeddings {
|
|
|
39
39
|
async embedDocuments(documents) {
|
|
40
40
|
const instanceChunks = (0, chunk_js_1.chunkArray)(documents.map((document) => ({
|
|
41
41
|
content: document,
|
|
42
|
-
})), 5); // Vertex AI accepts max 5
|
|
42
|
+
})), 5); // Vertex AI accepts max 5 instances per prediction
|
|
43
43
|
const parameters = {};
|
|
44
44
|
const options = {};
|
|
45
45
|
const responses = await Promise.all(instanceChunks.map((instances) => this.connection.request(instances, parameters, options)));
|
|
@@ -36,7 +36,7 @@ export class GoogleVertexAIEmbeddings extends Embeddings {
|
|
|
36
36
|
async embedDocuments(documents) {
|
|
37
37
|
const instanceChunks = chunkArray(documents.map((document) => ({
|
|
38
38
|
content: document,
|
|
39
|
-
})), 5); // Vertex AI accepts max 5
|
|
39
|
+
})), 5); // Vertex AI accepts max 5 instances per prediction
|
|
40
40
|
const parameters = {};
|
|
41
41
|
const options = {};
|
|
42
42
|
const responses = await Promise.all(instanceChunks.map((instances) => this.connection.request(instances, parameters, options)));
|
|
@@ -12,7 +12,8 @@ const base_js_1 = require("./base.cjs");
|
|
|
12
12
|
const azure_js_1 = require("../util/azure.cjs");
|
|
13
13
|
class OpenAIEmbeddings extends base_js_1.Embeddings {
|
|
14
14
|
constructor(fields, configuration) {
|
|
15
|
-
|
|
15
|
+
const fieldsWithDefaults = { maxConcurrency: 2, ...fields };
|
|
16
|
+
super(fieldsWithDefaults);
|
|
16
17
|
Object.defineProperty(this, "modelName", {
|
|
17
18
|
enumerable: true,
|
|
18
19
|
configurable: true,
|
|
@@ -79,27 +80,30 @@ class OpenAIEmbeddings extends base_js_1.Embeddings {
|
|
|
79
80
|
writable: true,
|
|
80
81
|
value: void 0
|
|
81
82
|
});
|
|
82
|
-
const apiKey =
|
|
83
|
-
|
|
83
|
+
const apiKey = fieldsWithDefaults?.openAIApiKey ??
|
|
84
|
+
(0, env_js_1.getEnvironmentVariable)("OPENAI_API_KEY");
|
|
85
|
+
const azureApiKey = fieldsWithDefaults?.azureOpenAIApiKey ??
|
|
84
86
|
(0, env_js_1.getEnvironmentVariable)("AZURE_OPENAI_API_KEY");
|
|
85
87
|
if (!azureApiKey && !apiKey) {
|
|
86
88
|
throw new Error("OpenAI or Azure OpenAI API key not found");
|
|
87
89
|
}
|
|
88
|
-
const azureApiInstanceName =
|
|
90
|
+
const azureApiInstanceName = fieldsWithDefaults?.azureOpenAIApiInstanceName ??
|
|
89
91
|
(0, env_js_1.getEnvironmentVariable)("AZURE_OPENAI_API_INSTANCE_NAME");
|
|
90
|
-
const azureApiDeploymentName = (
|
|
91
|
-
|
|
92
|
+
const azureApiDeploymentName = (fieldsWithDefaults?.azureOpenAIApiEmbeddingsDeploymentName ||
|
|
93
|
+
fieldsWithDefaults?.azureOpenAIApiDeploymentName) ??
|
|
92
94
|
((0, env_js_1.getEnvironmentVariable)("AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME") ||
|
|
93
95
|
(0, env_js_1.getEnvironmentVariable)("AZURE_OPENAI_API_DEPLOYMENT_NAME"));
|
|
94
|
-
const azureApiVersion =
|
|
96
|
+
const azureApiVersion = fieldsWithDefaults?.azureOpenAIApiVersion ??
|
|
95
97
|
(0, env_js_1.getEnvironmentVariable)("AZURE_OPENAI_API_VERSION");
|
|
96
98
|
this.azureOpenAIBasePath =
|
|
97
|
-
|
|
99
|
+
fieldsWithDefaults?.azureOpenAIBasePath ??
|
|
98
100
|
(0, env_js_1.getEnvironmentVariable)("AZURE_OPENAI_BASE_PATH");
|
|
99
|
-
this.modelName =
|
|
100
|
-
this.batchSize =
|
|
101
|
-
|
|
102
|
-
this.
|
|
101
|
+
this.modelName = fieldsWithDefaults?.modelName ?? this.modelName;
|
|
102
|
+
this.batchSize =
|
|
103
|
+
fieldsWithDefaults?.batchSize ?? (azureApiKey ? 1 : this.batchSize);
|
|
104
|
+
this.stripNewLines =
|
|
105
|
+
fieldsWithDefaults?.stripNewLines ?? this.stripNewLines;
|
|
106
|
+
this.timeout = fieldsWithDefaults?.timeout;
|
|
103
107
|
this.azureOpenAIApiVersion = azureApiVersion;
|
|
104
108
|
this.azureOpenAIApiKey = azureApiKey;
|
|
105
109
|
this.azureOpenAIApiInstanceName = azureApiInstanceName;
|
|
@@ -121,16 +125,18 @@ class OpenAIEmbeddings extends base_js_1.Embeddings {
|
|
|
121
125
|
};
|
|
122
126
|
}
|
|
123
127
|
async embedDocuments(texts) {
|
|
124
|
-
const
|
|
128
|
+
const batches = (0, chunk_js_1.chunkArray)(this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, this.batchSize);
|
|
129
|
+
const batchRequests = batches.map((batch) => this.embeddingWithRetry({
|
|
130
|
+
model: this.modelName,
|
|
131
|
+
input: batch,
|
|
132
|
+
}));
|
|
133
|
+
const batchResponses = await Promise.all(batchRequests);
|
|
125
134
|
const embeddings = [];
|
|
126
|
-
for (let i = 0; i <
|
|
127
|
-
const
|
|
128
|
-
const { data } =
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
});
|
|
132
|
-
for (let j = 0; j < input.length; j += 1) {
|
|
133
|
-
embeddings.push(data.data[j].embedding);
|
|
135
|
+
for (let i = 0; i < batchResponses.length; i += 1) {
|
|
136
|
+
const batch = batches[i];
|
|
137
|
+
const { data: batchResponse } = batchResponses[i];
|
|
138
|
+
for (let j = 0; j < batch.length; j += 1) {
|
|
139
|
+
embeddings.push(batchResponse.data[j].embedding);
|
|
134
140
|
}
|
|
135
141
|
}
|
|
136
142
|
return embeddings;
|
|
@@ -6,7 +6,8 @@ import { Embeddings } from "./base.js";
|
|
|
6
6
|
import { getEndpoint } from "../util/azure.js";
|
|
7
7
|
export class OpenAIEmbeddings extends Embeddings {
|
|
8
8
|
constructor(fields, configuration) {
|
|
9
|
-
|
|
9
|
+
const fieldsWithDefaults = { maxConcurrency: 2, ...fields };
|
|
10
|
+
super(fieldsWithDefaults);
|
|
10
11
|
Object.defineProperty(this, "modelName", {
|
|
11
12
|
enumerable: true,
|
|
12
13
|
configurable: true,
|
|
@@ -73,27 +74,30 @@ export class OpenAIEmbeddings extends Embeddings {
|
|
|
73
74
|
writable: true,
|
|
74
75
|
value: void 0
|
|
75
76
|
});
|
|
76
|
-
const apiKey =
|
|
77
|
-
|
|
77
|
+
const apiKey = fieldsWithDefaults?.openAIApiKey ??
|
|
78
|
+
getEnvironmentVariable("OPENAI_API_KEY");
|
|
79
|
+
const azureApiKey = fieldsWithDefaults?.azureOpenAIApiKey ??
|
|
78
80
|
getEnvironmentVariable("AZURE_OPENAI_API_KEY");
|
|
79
81
|
if (!azureApiKey && !apiKey) {
|
|
80
82
|
throw new Error("OpenAI or Azure OpenAI API key not found");
|
|
81
83
|
}
|
|
82
|
-
const azureApiInstanceName =
|
|
84
|
+
const azureApiInstanceName = fieldsWithDefaults?.azureOpenAIApiInstanceName ??
|
|
83
85
|
getEnvironmentVariable("AZURE_OPENAI_API_INSTANCE_NAME");
|
|
84
|
-
const azureApiDeploymentName = (
|
|
85
|
-
|
|
86
|
+
const azureApiDeploymentName = (fieldsWithDefaults?.azureOpenAIApiEmbeddingsDeploymentName ||
|
|
87
|
+
fieldsWithDefaults?.azureOpenAIApiDeploymentName) ??
|
|
86
88
|
(getEnvironmentVariable("AZURE_OPENAI_API_EMBEDDINGS_DEPLOYMENT_NAME") ||
|
|
87
89
|
getEnvironmentVariable("AZURE_OPENAI_API_DEPLOYMENT_NAME"));
|
|
88
|
-
const azureApiVersion =
|
|
90
|
+
const azureApiVersion = fieldsWithDefaults?.azureOpenAIApiVersion ??
|
|
89
91
|
getEnvironmentVariable("AZURE_OPENAI_API_VERSION");
|
|
90
92
|
this.azureOpenAIBasePath =
|
|
91
|
-
|
|
93
|
+
fieldsWithDefaults?.azureOpenAIBasePath ??
|
|
92
94
|
getEnvironmentVariable("AZURE_OPENAI_BASE_PATH");
|
|
93
|
-
this.modelName =
|
|
94
|
-
this.batchSize =
|
|
95
|
-
|
|
96
|
-
this.
|
|
95
|
+
this.modelName = fieldsWithDefaults?.modelName ?? this.modelName;
|
|
96
|
+
this.batchSize =
|
|
97
|
+
fieldsWithDefaults?.batchSize ?? (azureApiKey ? 1 : this.batchSize);
|
|
98
|
+
this.stripNewLines =
|
|
99
|
+
fieldsWithDefaults?.stripNewLines ?? this.stripNewLines;
|
|
100
|
+
this.timeout = fieldsWithDefaults?.timeout;
|
|
97
101
|
this.azureOpenAIApiVersion = azureApiVersion;
|
|
98
102
|
this.azureOpenAIApiKey = azureApiKey;
|
|
99
103
|
this.azureOpenAIApiInstanceName = azureApiInstanceName;
|
|
@@ -115,16 +119,18 @@ export class OpenAIEmbeddings extends Embeddings {
|
|
|
115
119
|
};
|
|
116
120
|
}
|
|
117
121
|
async embedDocuments(texts) {
|
|
118
|
-
const
|
|
122
|
+
const batches = chunkArray(this.stripNewLines ? texts.map((t) => t.replace(/\n/g, " ")) : texts, this.batchSize);
|
|
123
|
+
const batchRequests = batches.map((batch) => this.embeddingWithRetry({
|
|
124
|
+
model: this.modelName,
|
|
125
|
+
input: batch,
|
|
126
|
+
}));
|
|
127
|
+
const batchResponses = await Promise.all(batchRequests);
|
|
119
128
|
const embeddings = [];
|
|
120
|
-
for (let i = 0; i <
|
|
121
|
-
const
|
|
122
|
-
const { data } =
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
});
|
|
126
|
-
for (let j = 0; j < input.length; j += 1) {
|
|
127
|
-
embeddings.push(data.data[j].embedding);
|
|
129
|
+
for (let i = 0; i < batchResponses.length; i += 1) {
|
|
130
|
+
const batch = batches[i];
|
|
131
|
+
const { data: batchResponse } = batchResponses[i];
|
|
132
|
+
for (let j = 0; j < batch.length; j += 1) {
|
|
133
|
+
embeddings.push(batchResponse.data[j].embedding);
|
|
128
134
|
}
|
|
129
135
|
}
|
|
130
136
|
return embeddings;
|
|
@@ -71,6 +71,7 @@ exports.optionalImportEntrypoints = [
|
|
|
71
71
|
"langchain/document_loaders/web/s3",
|
|
72
72
|
"langchain/document_loaders/web/sonix_audio",
|
|
73
73
|
"langchain/document_loaders/web/confluence",
|
|
74
|
+
"langchain/document_loaders/web/youtube",
|
|
74
75
|
"langchain/document_loaders/fs/directory",
|
|
75
76
|
"langchain/document_loaders/fs/buffer",
|
|
76
77
|
"langchain/document_loaders/fs/text",
|
|
@@ -68,6 +68,7 @@ export const optionalImportEntrypoints = [
|
|
|
68
68
|
"langchain/document_loaders/web/s3",
|
|
69
69
|
"langchain/document_loaders/web/sonix_audio",
|
|
70
70
|
"langchain/document_loaders/web/confluence",
|
|
71
|
+
"langchain/document_loaders/web/youtube",
|
|
71
72
|
"langchain/document_loaders/fs/directory",
|
|
72
73
|
"langchain/document_loaders/fs/buffer",
|
|
73
74
|
"langchain/document_loaders/fs/text",
|
package/dist/schema/index.cjs
CHANGED
|
@@ -206,8 +206,8 @@ class FunctionMessage extends BaseMessage {
|
|
|
206
206
|
/** @deprecated */
|
|
207
207
|
name) {
|
|
208
208
|
if (typeof fields === "string") {
|
|
209
|
-
// eslint-disable-next-line no-param-reassign
|
|
210
|
-
fields = { content: fields, name };
|
|
209
|
+
// eslint-disable-next-line no-param-reassign, @typescript-eslint/no-non-null-assertion
|
|
210
|
+
fields = { content: fields, name: name };
|
|
211
211
|
}
|
|
212
212
|
super(fields);
|
|
213
213
|
}
|
package/dist/schema/index.d.ts
CHANGED
|
@@ -71,6 +71,9 @@ export interface BaseMessageFields {
|
|
|
71
71
|
export interface ChatMessageFieldsWithRole extends BaseMessageFields {
|
|
72
72
|
role: string;
|
|
73
73
|
}
|
|
74
|
+
export interface FunctionMessageFieldsWithName extends BaseMessageFields {
|
|
75
|
+
name: string;
|
|
76
|
+
}
|
|
74
77
|
export declare abstract class BaseMessage extends Serializable implements BaseMessageFields {
|
|
75
78
|
lc_namespace: string[];
|
|
76
79
|
lc_serializable: boolean;
|
|
@@ -138,6 +141,7 @@ export declare const AIChatMessage: typeof AIMessage;
|
|
|
138
141
|
*/
|
|
139
142
|
export declare const SystemChatMessage: typeof SystemMessage;
|
|
140
143
|
export declare class FunctionMessage extends BaseMessage {
|
|
144
|
+
constructor(fields: FunctionMessageFieldsWithName);
|
|
141
145
|
constructor(fields: string | BaseMessageFields,
|
|
142
146
|
/** @deprecated */
|
|
143
147
|
name: string);
|
package/dist/schema/index.js
CHANGED
|
@@ -194,8 +194,8 @@ export class FunctionMessage extends BaseMessage {
|
|
|
194
194
|
/** @deprecated */
|
|
195
195
|
name) {
|
|
196
196
|
if (typeof fields === "string") {
|
|
197
|
-
// eslint-disable-next-line no-param-reassign
|
|
198
|
-
fields = { content: fields, name };
|
|
197
|
+
// eslint-disable-next-line no-param-reassign, @typescript-eslint/no-non-null-assertion
|
|
198
|
+
fields = { content: fields, name: name };
|
|
199
199
|
}
|
|
200
200
|
super(fields);
|
|
201
201
|
}
|
|
@@ -30,6 +30,11 @@ function mapStoredMessagesToChatMessages(messages) {
|
|
|
30
30
|
return new index_js_1.AIMessage(storedMessage.data);
|
|
31
31
|
case "system":
|
|
32
32
|
return new index_js_1.SystemMessage(storedMessage.data);
|
|
33
|
+
case "function":
|
|
34
|
+
if (storedMessage.data.name === undefined) {
|
|
35
|
+
throw new Error("Name must be defined for function messages");
|
|
36
|
+
}
|
|
37
|
+
return new index_js_1.FunctionMessage(storedMessage.data);
|
|
33
38
|
case "chat": {
|
|
34
39
|
if (storedMessage.data.role === undefined) {
|
|
35
40
|
throw new Error("Role must be defined for chat messages");
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { AIMessage, ChatMessage, HumanMessage, SystemMessage, } from "../../schema/index.js";
|
|
1
|
+
import { AIMessage, ChatMessage, FunctionMessage, HumanMessage, SystemMessage, } from "../../schema/index.js";
|
|
2
2
|
export function mapV1MessageToStoredMessage(message) {
|
|
3
3
|
// TODO: Remove this mapper when we deprecate the old message format.
|
|
4
4
|
if (message.data !== undefined) {
|
|
@@ -26,6 +26,11 @@ export function mapStoredMessagesToChatMessages(messages) {
|
|
|
26
26
|
return new AIMessage(storedMessage.data);
|
|
27
27
|
case "system":
|
|
28
28
|
return new SystemMessage(storedMessage.data);
|
|
29
|
+
case "function":
|
|
30
|
+
if (storedMessage.data.name === undefined) {
|
|
31
|
+
throw new Error("Name must be defined for function messages");
|
|
32
|
+
}
|
|
33
|
+
return new FunctionMessage(storedMessage.data);
|
|
29
34
|
case "chat": {
|
|
30
35
|
if (storedMessage.data.role === undefined) {
|
|
31
36
|
throw new Error("Role must be defined for chat messages");
|
|
@@ -125,6 +125,25 @@ class HNSWLib extends base_js_1.SaveableVectorStore {
|
|
|
125
125
|
result.distances[resultIndex],
|
|
126
126
|
]);
|
|
127
127
|
}
|
|
128
|
+
async delete(params) {
|
|
129
|
+
const fs = await import("node:fs/promises");
|
|
130
|
+
const path = await import("node:path");
|
|
131
|
+
try {
|
|
132
|
+
await fs.access(path.join(params.directory, "hnswlib.index"));
|
|
133
|
+
}
|
|
134
|
+
catch (err) {
|
|
135
|
+
throw new Error(`Directory ${params.directory} does not contain a hnswlib.index file.`);
|
|
136
|
+
}
|
|
137
|
+
await Promise.all([
|
|
138
|
+
await fs.rm(path.join(params.directory, "hnswlib.index"), {
|
|
139
|
+
force: true,
|
|
140
|
+
}),
|
|
141
|
+
await fs.rm(path.join(params.directory, "docstore.json"), {
|
|
142
|
+
force: true,
|
|
143
|
+
}),
|
|
144
|
+
await fs.rm(path.join(params.directory, "args.json"), { force: true }),
|
|
145
|
+
]);
|
|
146
|
+
}
|
|
128
147
|
async save(directory) {
|
|
129
148
|
const fs = await import("node:fs/promises");
|
|
130
149
|
const path = await import("node:path");
|
|
@@ -25,6 +25,9 @@ export declare class HNSWLib extends SaveableVectorStore {
|
|
|
25
25
|
private set index(value);
|
|
26
26
|
addVectors(vectors: number[][], documents: Document[]): Promise<void>;
|
|
27
27
|
similaritySearchVectorWithScore(query: number[], k: number, filter?: this["FilterType"]): Promise<[Document<Record<string, any>>, number][]>;
|
|
28
|
+
delete(params: {
|
|
29
|
+
directory: string;
|
|
30
|
+
}): Promise<void>;
|
|
28
31
|
save(directory: string): Promise<void>;
|
|
29
32
|
static load(directory: string, embeddings: Embeddings): Promise<HNSWLib>;
|
|
30
33
|
static fromTexts(texts: string[], metadatas: object[] | object, embeddings: Embeddings, dbConfig?: {
|
|
@@ -122,6 +122,25 @@ export class HNSWLib extends SaveableVectorStore {
|
|
|
122
122
|
result.distances[resultIndex],
|
|
123
123
|
]);
|
|
124
124
|
}
|
|
125
|
+
async delete(params) {
|
|
126
|
+
const fs = await import("node:fs/promises");
|
|
127
|
+
const path = await import("node:path");
|
|
128
|
+
try {
|
|
129
|
+
await fs.access(path.join(params.directory, "hnswlib.index"));
|
|
130
|
+
}
|
|
131
|
+
catch (err) {
|
|
132
|
+
throw new Error(`Directory ${params.directory} does not contain a hnswlib.index file.`);
|
|
133
|
+
}
|
|
134
|
+
await Promise.all([
|
|
135
|
+
await fs.rm(path.join(params.directory, "hnswlib.index"), {
|
|
136
|
+
force: true,
|
|
137
|
+
}),
|
|
138
|
+
await fs.rm(path.join(params.directory, "docstore.json"), {
|
|
139
|
+
force: true,
|
|
140
|
+
}),
|
|
141
|
+
await fs.rm(path.join(params.directory, "args.json"), { force: true }),
|
|
142
|
+
]);
|
|
143
|
+
}
|
|
125
144
|
async save(directory) {
|
|
126
145
|
const fs = await import("node:fs/promises");
|
|
127
146
|
const path = await import("node:path");
|
|
@@ -82,7 +82,7 @@ class MyScaleStore extends base_js_1.VectorStore {
|
|
|
82
82
|
writable: true,
|
|
83
83
|
value: false
|
|
84
84
|
});
|
|
85
|
-
this.indexType = args.indexType || "
|
|
85
|
+
this.indexType = args.indexType || "MSTG";
|
|
86
86
|
this.indexParam = args.indexParam || {};
|
|
87
87
|
this.columnMap = args.columnMap || {
|
|
88
88
|
id: "id",
|
|
@@ -56,7 +56,7 @@ export class MyScaleStore extends VectorStore {
|
|
|
56
56
|
writable: true,
|
|
57
57
|
value: false
|
|
58
58
|
});
|
|
59
|
-
this.indexType = args.indexType || "
|
|
59
|
+
this.indexType = args.indexType || "MSTG";
|
|
60
60
|
this.indexParam = args.indexParam || {};
|
|
61
61
|
this.columnMap = args.columnMap || {
|
|
62
62
|
id: "id",
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
module.exports = require('../../dist/document_loaders/web/youtube.cjs');
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/web/youtube.js'
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from '../../dist/document_loaders/web/youtube.js'
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "langchain",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.130",
|
|
4
4
|
"description": "Typescript bindings for langchain",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"engines": {
|
|
@@ -313,6 +313,9 @@
|
|
|
313
313
|
"document_loaders/web/sort_xyz_blockchain.cjs",
|
|
314
314
|
"document_loaders/web/sort_xyz_blockchain.js",
|
|
315
315
|
"document_loaders/web/sort_xyz_blockchain.d.ts",
|
|
316
|
+
"document_loaders/web/youtube.cjs",
|
|
317
|
+
"document_loaders/web/youtube.js",
|
|
318
|
+
"document_loaders/web/youtube.d.ts",
|
|
316
319
|
"document_loaders/fs/directory.cjs",
|
|
317
320
|
"document_loaders/fs/directory.js",
|
|
318
321
|
"document_loaders/fs/directory.d.ts",
|
|
@@ -685,7 +688,9 @@
|
|
|
685
688
|
"typesense": "^1.5.3",
|
|
686
689
|
"usearch": "^1.1.1",
|
|
687
690
|
"vectordb": "^0.1.4",
|
|
688
|
-
"weaviate-ts-client": "^1.4.0"
|
|
691
|
+
"weaviate-ts-client": "^1.4.0",
|
|
692
|
+
"youtube-transcript": "^1.0.6",
|
|
693
|
+
"youtubei.js": "^5.8.0"
|
|
689
694
|
},
|
|
690
695
|
"peerDependencies": {
|
|
691
696
|
"@aws-crypto/sha256-js": "^5.0.0",
|
|
@@ -757,7 +762,9 @@
|
|
|
757
762
|
"typesense": "^1.5.3",
|
|
758
763
|
"usearch": "^1.1.1",
|
|
759
764
|
"vectordb": "^0.1.4",
|
|
760
|
-
"weaviate-ts-client": "^1.4.0"
|
|
765
|
+
"weaviate-ts-client": "^1.4.0",
|
|
766
|
+
"youtube-transcript": "^1.0.6",
|
|
767
|
+
"youtubei.js": "^5.8.0"
|
|
761
768
|
},
|
|
762
769
|
"peerDependenciesMeta": {
|
|
763
770
|
"@aws-crypto/sha256-js": {
|
|
@@ -817,6 +824,9 @@
|
|
|
817
824
|
"@huggingface/inference": {
|
|
818
825
|
"optional": true
|
|
819
826
|
},
|
|
827
|
+
"@mozilla/readability": {
|
|
828
|
+
"optional": true
|
|
829
|
+
},
|
|
820
830
|
"@notionhq/client": {
|
|
821
831
|
"optional": true
|
|
822
832
|
},
|
|
@@ -904,6 +914,9 @@
|
|
|
904
914
|
"ioredis": {
|
|
905
915
|
"optional": true
|
|
906
916
|
},
|
|
917
|
+
"jsdom": {
|
|
918
|
+
"optional": true
|
|
919
|
+
},
|
|
907
920
|
"mammoth": {
|
|
908
921
|
"optional": true
|
|
909
922
|
},
|
|
@@ -963,6 +976,12 @@
|
|
|
963
976
|
},
|
|
964
977
|
"weaviate-ts-client": {
|
|
965
978
|
"optional": true
|
|
979
|
+
},
|
|
980
|
+
"youtube-transcript": {
|
|
981
|
+
"optional": true
|
|
982
|
+
},
|
|
983
|
+
"youtubei.js": {
|
|
984
|
+
"optional": true
|
|
966
985
|
}
|
|
967
986
|
},
|
|
968
987
|
"dependencies": {
|
|
@@ -1524,6 +1543,11 @@
|
|
|
1524
1543
|
"import": "./document_loaders/web/sort_xyz_blockchain.js",
|
|
1525
1544
|
"require": "./document_loaders/web/sort_xyz_blockchain.cjs"
|
|
1526
1545
|
},
|
|
1546
|
+
"./document_loaders/web/youtube": {
|
|
1547
|
+
"types": "./document_loaders/web/youtube.d.ts",
|
|
1548
|
+
"import": "./document_loaders/web/youtube.js",
|
|
1549
|
+
"require": "./document_loaders/web/youtube.cjs"
|
|
1550
|
+
},
|
|
1527
1551
|
"./document_loaders/fs/directory": {
|
|
1528
1552
|
"types": "./document_loaders/fs/directory.d.ts",
|
|
1529
1553
|
"import": "./document_loaders/fs/directory.js",
|