langchain 0.0.7 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/agents.mjs +1 -0
- package/chains.mjs +1 -0
- package/dist/agents/tests/{agent.test.d.ts → agent.int.test.d.ts} +0 -0
- package/dist/agents/tests/{agent.test.js → agent.int.test.js} +2 -3
- package/dist/agents/tests/agent.int.test.js.map +1 -0
- package/dist/agents/tools/serpapi.js +1 -1
- package/dist/agents/tools/serpapi.js.map +1 -1
- package/dist/cache.d.ts +5 -4
- package/dist/cache.js +17 -6
- package/dist/cache.js.map +1 -1
- package/dist/chains/base.d.ts +1 -0
- package/dist/chains/base.js +3 -2
- package/dist/chains/base.js.map +1 -1
- package/dist/chains/index.d.ts +1 -1
- package/dist/chains/index.js.map +1 -1
- package/dist/chains/llm_chain.d.ts +3 -2
- package/dist/chains/llm_chain.js +1 -1
- package/dist/chains/llm_chain.js.map +1 -1
- package/dist/chains/question_answering/tests/{load.test.d.ts → load.int.test.d.ts} +0 -0
- package/dist/chains/question_answering/tests/{load.test.js → load.int.test.js} +1 -1
- package/dist/chains/question_answering/tests/load.int.test.js.map +1 -0
- package/dist/chains/tests/{chat_vector_db_qa_chain.test.d.ts → chat_vector_db_qa_chain.int.test.d.ts} +0 -0
- package/dist/chains/tests/{chat_vector_db_qa_chain.test.js → chat_vector_db_qa_chain.int.test.js} +1 -1
- package/dist/chains/tests/chat_vector_db_qa_chain.int.test.js.map +1 -0
- package/dist/chains/tests/{combine_docs_chain.test.d.ts → combine_docs_chain.int.test.d.ts} +0 -0
- package/dist/chains/tests/{combine_docs_chain.test.js → combine_docs_chain.int.test.js} +1 -1
- package/dist/chains/tests/combine_docs_chain.int.test.js.map +1 -0
- package/dist/chains/tests/{llm_chain.test.d.ts → llm_chain.int.test.d.ts} +0 -0
- package/dist/chains/tests/{llm_chain.test.js → llm_chain.int.test.js} +1 -1
- package/dist/chains/tests/llm_chain.int.test.js.map +1 -0
- package/dist/chains/tests/{vector_db_qa_chain.test.d.ts → vector_db_qa_chain.int.test.d.ts} +0 -0
- package/dist/chains/tests/{vector_db_qa_chain.test.js → vector_db_qa_chain.int.test.js} +1 -1
- package/dist/chains/tests/vector_db_qa_chain.int.test.js.map +1 -0
- package/dist/docstore/base.d.ts +5 -0
- package/dist/docstore/base.js +7 -0
- package/dist/docstore/base.js.map +1 -0
- package/dist/docstore/in_memory.d.ts +10 -0
- package/dist/docstore/in_memory.js +36 -0
- package/dist/docstore/in_memory.js.map +1 -0
- package/dist/docstore/index.d.ts +3 -0
- package/dist/docstore/index.js +10 -0
- package/dist/docstore/index.js.map +1 -0
- package/dist/document.d.ts +1 -2
- package/dist/document_loaders/base.d.ts +10 -0
- package/dist/document_loaders/base.js +12 -0
- package/dist/document_loaders/base.js.map +1 -0
- package/dist/document_loaders/cheerio_web_base.d.ts +10 -0
- package/dist/document_loaders/cheerio_web_base.js +47 -0
- package/dist/document_loaders/cheerio_web_base.js.map +1 -0
- package/dist/document_loaders/college_confidential.d.ts +6 -0
- package/dist/document_loaders/college_confidential.js +18 -0
- package/dist/document_loaders/college_confidential.js.map +1 -0
- package/dist/document_loaders/hn.d.ts +9 -0
- package/dist/document_loaders/hn.js +53 -0
- package/dist/document_loaders/hn.js.map +1 -0
- package/dist/document_loaders/imsdb.d.ts +7 -0
- package/dist/document_loaders/imsdb.js +24 -0
- package/dist/document_loaders/imsdb.js.map +1 -0
- package/dist/document_loaders/index.d.ts +8 -0
- package/dist/document_loaders/index.js +18 -0
- package/dist/document_loaders/index.js.map +1 -0
- package/dist/document_loaders/srt.d.ts +7 -0
- package/dist/document_loaders/srt.js +79 -0
- package/dist/document_loaders/srt.js.map +1 -0
- package/dist/document_loaders/text.d.ts +7 -0
- package/dist/document_loaders/text.js +41 -0
- package/dist/document_loaders/text.js.map +1 -0
- package/dist/embeddings/fake.d.ts +5 -0
- package/dist/embeddings/fake.js +14 -0
- package/dist/embeddings/fake.js.map +1 -0
- package/dist/embeddings/tests/{openai.test.d.ts → openai.int.test.d.ts} +0 -0
- package/dist/embeddings/tests/{openai.test.js → openai.int.test.js} +1 -1
- package/dist/embeddings/tests/openai.int.test.js.map +1 -0
- package/dist/llms/base.js +7 -6
- package/dist/llms/base.js.map +1 -1
- package/dist/llms/index.d.ts +1 -1
- package/dist/llms/index.js +2 -1
- package/dist/llms/index.js.map +1 -1
- package/dist/llms/openai.d.ts +52 -3
- package/dist/llms/openai.js +81 -8
- package/dist/llms/openai.js.map +1 -1
- package/dist/llms/tests/{cohere.test.d.ts → cohere.int.test.d.ts} +0 -0
- package/dist/llms/tests/{cohere.test.js → cohere.int.test.js} +1 -1
- package/dist/llms/tests/cohere.int.test.js.map +1 -0
- package/dist/llms/tests/{openai.test.d.ts → openai.int.test.d.ts} +0 -0
- package/dist/llms/tests/{openai.test.js → openai.int.test.js} +1 -1
- package/dist/llms/tests/openai.int.test.js.map +1 -0
- package/dist/memory/buffer_window_memory.d.ts +17 -0
- package/dist/memory/buffer_window_memory.js +66 -0
- package/dist/memory/buffer_window_memory.js.map +1 -0
- package/dist/memory/index.d.ts +1 -0
- package/dist/memory/index.js +3 -1
- package/dist/memory/index.js.map +1 -1
- package/dist/memory/tests/buffer_window_memory.test.d.ts +1 -0
- package/dist/memory/tests/buffer_window_memory.test.js +24 -0
- package/dist/memory/tests/buffer_window_memory.test.js.map +1 -0
- package/dist/tests/text_splitter.test.d.ts +1 -0
- package/dist/tests/text_splitter.test.js +141 -0
- package/dist/tests/text_splitter.test.js.map +1 -0
- package/dist/text_splitter.d.ts +18 -1
- package/dist/text_splitter.js +70 -1
- package/dist/text_splitter.js.map +1 -1
- package/dist/util/hub.js +3 -2
- package/dist/util/hub.js.map +1 -1
- package/dist/vectorstores/base.d.ts +1 -5
- package/dist/vectorstores/base.js +2 -11
- package/dist/vectorstores/base.js.map +1 -1
- package/dist/vectorstores/hnswlib.d.ts +7 -4
- package/dist/vectorstores/hnswlib.js +31 -11
- package/dist/vectorstores/hnswlib.js.map +1 -1
- package/dist/vectorstores/index.d.ts +1 -0
- package/dist/vectorstores/index.js +3 -1
- package/dist/vectorstores/index.js.map +1 -1
- package/dist/vectorstores/pinecone.d.ts +17 -0
- package/dist/vectorstores/pinecone.js +78 -0
- package/dist/vectorstores/pinecone.js.map +1 -0
- package/dist/vectorstores/tests/hnswlib.int.test.d.ts +1 -0
- package/dist/vectorstores/tests/hnswlib.int.test.js +61 -0
- package/dist/vectorstores/tests/hnswlib.int.test.js.map +1 -0
- package/dist/vectorstores/tests/hnswlib.test.js +29 -38
- package/dist/vectorstores/tests/hnswlib.test.js.map +1 -1
- package/docstore.d.ts +1 -0
- package/docstore.js +1 -0
- package/docstore.mjs +1 -0
- package/document.mjs +1 -0
- package/document_loaders.d.ts +1 -0
- package/document_loaders.js +1 -0
- package/document_loaders.mjs +1 -0
- package/embeddings.mjs +1 -0
- package/index.d.ts +1 -0
- package/index.js +1 -0
- package/index.mjs +1 -0
- package/llms.mjs +1 -0
- package/memory.mjs +1 -0
- package/package.json +127 -8
- package/prompts.mjs +1 -0
- package/text_splitter.mjs +1 -0
- package/tools.mjs +1 -0
- package/vectorstores.mjs +1 -0
- package/dist/agents/tests/agent.test.js.map +0 -1
- package/dist/chains/question_answering/tests/load.test.js.map +0 -1
- package/dist/chains/tests/chat_vector_db_qa_chain.test.js.map +0 -1
- package/dist/chains/tests/combine_docs_chain.test.js.map +0 -1
- package/dist/chains/tests/llm_chain.test.js.map +0 -1
- package/dist/chains/tests/vector_db_qa_chain.test.js.map +0 -1
- package/dist/embeddings/tests/openai.test.js.map +0 -1
- package/dist/llms/tests/cohere.test.js.map +0 -1
- package/dist/llms/tests/openai.test.js.map +0 -1
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.BufferWindowMemory = void 0;
|
|
4
|
+
const base_1 = require("./base");
|
|
5
|
+
const getInputValue = (inputValues) => {
|
|
6
|
+
const keys = Object.keys(inputValues);
|
|
7
|
+
if (keys.length === 1) {
|
|
8
|
+
return inputValues[keys[0]];
|
|
9
|
+
}
|
|
10
|
+
throw new Error("input values have multiple keys, memory only supported when one key currently");
|
|
11
|
+
};
|
|
12
|
+
class BufferWindowMemory extends base_1.BaseMemory {
|
|
13
|
+
constructor(fields) {
|
|
14
|
+
var _a, _b, _c, _d;
|
|
15
|
+
super();
|
|
16
|
+
Object.defineProperty(this, "humanPrefix", {
|
|
17
|
+
enumerable: true,
|
|
18
|
+
configurable: true,
|
|
19
|
+
writable: true,
|
|
20
|
+
value: "Human"
|
|
21
|
+
});
|
|
22
|
+
Object.defineProperty(this, "aiPrefix", {
|
|
23
|
+
enumerable: true,
|
|
24
|
+
configurable: true,
|
|
25
|
+
writable: true,
|
|
26
|
+
value: "AI"
|
|
27
|
+
});
|
|
28
|
+
Object.defineProperty(this, "memoryKey", {
|
|
29
|
+
enumerable: true,
|
|
30
|
+
configurable: true,
|
|
31
|
+
writable: true,
|
|
32
|
+
value: "history"
|
|
33
|
+
});
|
|
34
|
+
Object.defineProperty(this, "buffer", {
|
|
35
|
+
enumerable: true,
|
|
36
|
+
configurable: true,
|
|
37
|
+
writable: true,
|
|
38
|
+
value: []
|
|
39
|
+
});
|
|
40
|
+
Object.defineProperty(this, "k", {
|
|
41
|
+
enumerable: true,
|
|
42
|
+
configurable: true,
|
|
43
|
+
writable: true,
|
|
44
|
+
value: 5
|
|
45
|
+
});
|
|
46
|
+
this.humanPrefix = (_a = fields === null || fields === void 0 ? void 0 : fields.humanPrefix) !== null && _a !== void 0 ? _a : this.humanPrefix;
|
|
47
|
+
this.aiPrefix = (_b = fields === null || fields === void 0 ? void 0 : fields.aiPrefix) !== null && _b !== void 0 ? _b : this.aiPrefix;
|
|
48
|
+
this.memoryKey = (_c = fields === null || fields === void 0 ? void 0 : fields.memoryKey) !== null && _c !== void 0 ? _c : this.memoryKey;
|
|
49
|
+
this.k = (_d = fields === null || fields === void 0 ? void 0 : fields.k) !== null && _d !== void 0 ? _d : this.k;
|
|
50
|
+
}
|
|
51
|
+
async loadMemoryVariables(_values) {
|
|
52
|
+
const result = {
|
|
53
|
+
[this.memoryKey]: this.buffer.slice(-this.k).join("\n\n"),
|
|
54
|
+
};
|
|
55
|
+
return result;
|
|
56
|
+
}
|
|
57
|
+
async saveContext(inputValues, outputValues) {
|
|
58
|
+
const values = await outputValues;
|
|
59
|
+
const human = `${this.humanPrefix}: ${getInputValue(inputValues)}`;
|
|
60
|
+
const ai = `${this.aiPrefix}: ${getInputValue(values)}`;
|
|
61
|
+
const newlines = [human, ai];
|
|
62
|
+
this.buffer.push(`\n${newlines.join("\n")}`);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
exports.BufferWindowMemory = BufferWindowMemory;
|
|
66
|
+
//# sourceMappingURL=buffer_window_memory.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"buffer_window_memory.js","sourceRoot":"","sources":["../../memory/buffer_window_memory.ts"],"names":[],"mappings":";;;AAAA,iCAAgF;AAShF,MAAM,aAAa,GAAG,CAAC,WAAwB,EAAE,EAAE;IACjD,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;IACtC,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE;QACrB,OAAO,WAAW,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC;KAC7B;IACD,MAAM,IAAI,KAAK,CACb,+EAA+E,CAChF,CAAC;AACJ,CAAC,CAAC;AAEF,MAAa,kBACX,SAAQ,iBAAU;IAalB,YAAY,MAAyC;;QACnD,KAAK,EAAE,CAAC;QAXV;;;;mBAAc,OAAO;WAAC;QAEtB;;;;mBAAW,IAAI;WAAC;QAEhB;;;;mBAAY,SAAS;WAAC;QAEtB;;;;mBAAmB,EAAE;WAAC;QAEtB;;;;mBAAI,CAAC;WAAC;QAIJ,IAAI,CAAC,WAAW,GAAG,MAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,WAAW,mCAAI,IAAI,CAAC,WAAW,CAAC;QAC3D,IAAI,CAAC,QAAQ,GAAG,MAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,QAAQ,mCAAI,IAAI,CAAC,QAAQ,CAAC;QAClD,IAAI,CAAC,SAAS,GAAG,MAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,SAAS,mCAAI,IAAI,CAAC,SAAS,CAAC;QACrD,IAAI,CAAC,CAAC,GAAG,MAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,CAAC,mCAAI,IAAI,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED,KAAK,CAAC,mBAAmB,CAAC,OAAoB;QAC5C,MAAM,MAAM,GAAG;YACb,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC;SAC1D,CAAC;QACF,OAAO,MAAM,CAAC;IAChB,CAAC;IAED,KAAK,CAAC,WAAW,CACf,WAAwB,EACxB,YAAmC;QAEnC,MAAM,MAAM,GAAG,MAAM,YAAY,CAAC;QAClC,MAAM,KAAK,GAAG,GAAG,IAAI,CAAC,WAAW,KAAK,aAAa,CAAC,WAAW,CAAC,EAAE,CAAC;QACnE,MAAM,EAAE,GAAG,GAAG,IAAI,CAAC,QAAQ,KAAK,aAAa,CAAC,MAAM,CAAC,EAAE,CAAC;QACxD,MAAM,QAAQ,GAAG,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;QAC7B,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAC/C,CAAC;CACF;AAvCD,gDAuCC"}
|
package/dist/memory/index.d.ts
CHANGED
package/dist/memory/index.js
CHANGED
|
@@ -1,8 +1,10 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.BaseMemory = exports.BufferMemory = void 0;
|
|
3
|
+
exports.BufferWindowMemory = exports.BaseMemory = exports.BufferMemory = void 0;
|
|
4
4
|
var buffer_memory_1 = require("./buffer_memory");
|
|
5
5
|
Object.defineProperty(exports, "BufferMemory", { enumerable: true, get: function () { return buffer_memory_1.BufferMemory; } });
|
|
6
6
|
var base_1 = require("./base");
|
|
7
7
|
Object.defineProperty(exports, "BaseMemory", { enumerable: true, get: function () { return base_1.BaseMemory; } });
|
|
8
|
+
var buffer_window_memory_1 = require("./buffer_window_memory");
|
|
9
|
+
Object.defineProperty(exports, "BufferWindowMemory", { enumerable: true, get: function () { return buffer_window_memory_1.BufferWindowMemory; } });
|
|
8
10
|
//# sourceMappingURL=index.js.map
|
package/dist/memory/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../memory/index.ts"],"names":[],"mappings":";;;AAAA,iDAA+C;AAAtC,6GAAA,YAAY,OAAA;AACrB,+BAAoC;AAA3B,kGAAA,UAAU,OAAA"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../memory/index.ts"],"names":[],"mappings":";;;AAAA,iDAA+C;AAAtC,6GAAA,YAAY,OAAA;AACrB,+BAAoC;AAA3B,kGAAA,UAAU,OAAA;AACnB,+DAA4D;AAAnD,0HAAA,kBAAkB,OAAA"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const globals_1 = require("@jest/globals");
|
|
4
|
+
const buffer_window_memory_1 = require("../buffer_window_memory");
|
|
5
|
+
(0, globals_1.test)("Test buffer memory", async () => {
|
|
6
|
+
const memory = new buffer_window_memory_1.BufferWindowMemory({ k: 1 });
|
|
7
|
+
const result1 = await memory.loadMemoryVariables({});
|
|
8
|
+
(0, globals_1.expect)(result1).toStrictEqual({ history: "" });
|
|
9
|
+
const llmResult = new Promise((resolve, _reject) => {
|
|
10
|
+
resolve({ bar: "foo" });
|
|
11
|
+
});
|
|
12
|
+
await memory.saveContext({ foo: "bar" }, llmResult);
|
|
13
|
+
const expectedString = "\nHuman: bar\nAI: foo";
|
|
14
|
+
const result2 = await memory.loadMemoryVariables({});
|
|
15
|
+
(0, globals_1.expect)(result2).toStrictEqual({ history: expectedString });
|
|
16
|
+
const llmResult2 = new Promise((resolve, _reject) => {
|
|
17
|
+
resolve({ bar: "foo" });
|
|
18
|
+
});
|
|
19
|
+
await memory.saveContext({ foo: "bar1" }, llmResult2);
|
|
20
|
+
const expectedString3 = "\nHuman: bar1\nAI: foo";
|
|
21
|
+
const result3 = await memory.loadMemoryVariables({});
|
|
22
|
+
(0, globals_1.expect)(result3).toStrictEqual({ history: expectedString3 });
|
|
23
|
+
});
|
|
24
|
+
//# sourceMappingURL=buffer_window_memory.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"buffer_window_memory.test.js","sourceRoot":"","sources":["../../../memory/tests/buffer_window_memory.test.ts"],"names":[],"mappings":";;AAAA,2CAA6C;AAC7C,kEAA6D;AAG7D,IAAA,cAAI,EAAC,oBAAoB,EAAE,KAAK,IAAI,EAAE;IACpC,MAAM,MAAM,GAAG,IAAI,yCAAkB,CAAC,EAAE,CAAC,EAAE,CAAC,EAAE,CAAC,CAAC;IAChD,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC;IACrD,IAAA,gBAAM,EAAC,OAAO,CAAC,CAAC,aAAa,CAAC,EAAE,OAAO,EAAE,EAAE,EAAE,CAAC,CAAC;IAE/C,MAAM,SAAS,GAAG,IAAI,OAAO,CAAe,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE;QAC/D,OAAO,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC1B,CAAC,CAAC,CAAC;IACH,MAAM,MAAM,CAAC,WAAW,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,EAAE,SAAS,CAAC,CAAC;IACpD,MAAM,cAAc,GAAG,uBAAuB,CAAC;IAC/C,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC;IACrD,IAAA,gBAAM,EAAC,OAAO,CAAC,CAAC,aAAa,CAAC,EAAE,OAAO,EAAE,cAAc,EAAE,CAAC,CAAC;IAE3D,MAAM,UAAU,GAAG,IAAI,OAAO,CAAe,CAAC,OAAO,EAAE,OAAO,EAAE,EAAE;QAChE,OAAO,CAAC,EAAE,GAAG,EAAE,KAAK,EAAE,CAAC,CAAC;IAC1B,CAAC,CAAC,CAAC;IACH,MAAM,MAAM,CAAC,WAAW,CAAC,EAAE,GAAG,EAAE,MAAM,EAAE,EAAE,UAAU,CAAC,CAAC;IACtD,MAAM,eAAe,GAAG,wBAAwB,CAAC;IACjD,MAAM,OAAO,GAAG,MAAM,MAAM,CAAC,mBAAmB,CAAC,EAAE,CAAC,CAAC;IACrD,IAAA,gBAAM,EAAC,OAAO,CAAC,CAAC,aAAa,CAAC,EAAE,OAAO,EAAE,eAAe,EAAE,CAAC,CAAC;AAC9D,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
const globals_1 = require("@jest/globals");
|
|
4
|
+
const document_1 = require("../document");
|
|
5
|
+
const text_splitter_1 = require("../text_splitter");
|
|
6
|
+
(0, globals_1.test)("Test splitting by character count.", () => {
|
|
7
|
+
const text = "foo bar baz 123";
|
|
8
|
+
const splitter = new text_splitter_1.CharacterTextSplitter({
|
|
9
|
+
separator: " ",
|
|
10
|
+
chunkSize: 7,
|
|
11
|
+
chunkOverlap: 3,
|
|
12
|
+
});
|
|
13
|
+
const output = splitter.splitText(text);
|
|
14
|
+
const expectedOutput = ["foo bar", "bar baz", "baz 123"];
|
|
15
|
+
(0, globals_1.expect)(output).toEqual(expectedOutput);
|
|
16
|
+
});
|
|
17
|
+
(0, globals_1.test)("Test splitting by character count doesn't create empty documents.", () => {
|
|
18
|
+
const text = "foo bar";
|
|
19
|
+
const splitter = new text_splitter_1.CharacterTextSplitter({
|
|
20
|
+
separator: " ",
|
|
21
|
+
chunkSize: 2,
|
|
22
|
+
chunkOverlap: 0,
|
|
23
|
+
});
|
|
24
|
+
const output = splitter.splitText(text);
|
|
25
|
+
const expectedOutput = ["foo", "bar"];
|
|
26
|
+
(0, globals_1.expect)(output).toEqual(expectedOutput);
|
|
27
|
+
});
|
|
28
|
+
(0, globals_1.test)("Test splitting by character count on long words.", () => {
|
|
29
|
+
const text = "foo bar baz a a";
|
|
30
|
+
const splitter = new text_splitter_1.CharacterTextSplitter({
|
|
31
|
+
separator: " ",
|
|
32
|
+
chunkSize: 3,
|
|
33
|
+
chunkOverlap: 1,
|
|
34
|
+
});
|
|
35
|
+
const output = splitter.splitText(text);
|
|
36
|
+
const expectedOutput = ["foo", "bar", "baz", "a a"];
|
|
37
|
+
(0, globals_1.expect)(output).toEqual(expectedOutput);
|
|
38
|
+
});
|
|
39
|
+
(0, globals_1.test)("Test splitting by character count when shorter words are first.", () => {
|
|
40
|
+
const text = "a a foo bar baz";
|
|
41
|
+
const splitter = new text_splitter_1.CharacterTextSplitter({
|
|
42
|
+
separator: " ",
|
|
43
|
+
chunkSize: 3,
|
|
44
|
+
chunkOverlap: 1,
|
|
45
|
+
});
|
|
46
|
+
const output = splitter.splitText(text);
|
|
47
|
+
const expectedOutput = ["a a", "foo", "bar", "baz"];
|
|
48
|
+
(0, globals_1.expect)(output).toEqual(expectedOutput);
|
|
49
|
+
});
|
|
50
|
+
(0, globals_1.test)("Test splitting by characters when splits not found easily.", () => {
|
|
51
|
+
const text = "foo bar baz 123";
|
|
52
|
+
const splitter = new text_splitter_1.CharacterTextSplitter({
|
|
53
|
+
separator: " ",
|
|
54
|
+
chunkSize: 1,
|
|
55
|
+
chunkOverlap: 0,
|
|
56
|
+
});
|
|
57
|
+
const output = splitter.splitText(text);
|
|
58
|
+
const expectedOutput = ["foo", "bar", "baz", "123"];
|
|
59
|
+
(0, globals_1.expect)(output).toEqual(expectedOutput);
|
|
60
|
+
});
|
|
61
|
+
(0, globals_1.test)("Test invalid arguments.", () => {
|
|
62
|
+
(0, globals_1.expect)(() => {
|
|
63
|
+
const res = new text_splitter_1.CharacterTextSplitter({ chunkSize: 2, chunkOverlap: 4 });
|
|
64
|
+
console.log(res);
|
|
65
|
+
}).toThrow();
|
|
66
|
+
});
|
|
67
|
+
(0, globals_1.test)("Test create documents method.", () => {
|
|
68
|
+
const texts = ["foo bar", "baz"];
|
|
69
|
+
const splitter = new text_splitter_1.CharacterTextSplitter({
|
|
70
|
+
separator: " ",
|
|
71
|
+
chunkSize: 3,
|
|
72
|
+
chunkOverlap: 0,
|
|
73
|
+
});
|
|
74
|
+
const docs = splitter.createDocuments(texts);
|
|
75
|
+
const expectedDocs = [
|
|
76
|
+
new document_1.Document({ pageContent: "foo" }),
|
|
77
|
+
new document_1.Document({ pageContent: "bar" }),
|
|
78
|
+
new document_1.Document({ pageContent: "baz" }),
|
|
79
|
+
];
|
|
80
|
+
(0, globals_1.expect)(docs).toEqual(expectedDocs);
|
|
81
|
+
});
|
|
82
|
+
(0, globals_1.test)("Test create documents with metadata method.", () => {
|
|
83
|
+
const texts = ["foo bar", "baz"];
|
|
84
|
+
const splitter = new text_splitter_1.CharacterTextSplitter({
|
|
85
|
+
separator: " ",
|
|
86
|
+
chunkSize: 3,
|
|
87
|
+
chunkOverlap: 0,
|
|
88
|
+
});
|
|
89
|
+
const docs = splitter.createDocuments(texts, [
|
|
90
|
+
{ source: "1" },
|
|
91
|
+
{ source: "2" },
|
|
92
|
+
]);
|
|
93
|
+
const expectedDocs = [
|
|
94
|
+
new document_1.Document({ pageContent: "foo", metadata: { source: "1" } }),
|
|
95
|
+
new document_1.Document({ pageContent: "bar", metadata: { source: "1" } }),
|
|
96
|
+
new document_1.Document({ pageContent: "baz", metadata: { source: "2" } }),
|
|
97
|
+
];
|
|
98
|
+
(0, globals_1.expect)(docs).toEqual(expectedDocs);
|
|
99
|
+
});
|
|
100
|
+
(0, globals_1.test)("Test iterative text splitter.", () => {
|
|
101
|
+
const text = `Hi.\n\nI'm Harrison.\n\nHow? Are? You?\nOkay then f f f f.
|
|
102
|
+
This is a weird text to write, but gotta test the splittingggg some how.\n\n
|
|
103
|
+
Bye!\n\n-H.`;
|
|
104
|
+
const splitter = new text_splitter_1.RecursiveCharacterTextSplitter({
|
|
105
|
+
chunkSize: 10,
|
|
106
|
+
chunkOverlap: 1,
|
|
107
|
+
});
|
|
108
|
+
const output = splitter.splitText(text);
|
|
109
|
+
const expectedOutput = [
|
|
110
|
+
"Hi.",
|
|
111
|
+
"I'm",
|
|
112
|
+
"Harrison.",
|
|
113
|
+
"How? Are?",
|
|
114
|
+
"You?",
|
|
115
|
+
"Okay then f",
|
|
116
|
+
"f f f f.",
|
|
117
|
+
"This is a",
|
|
118
|
+
"a weird",
|
|
119
|
+
"text to",
|
|
120
|
+
"write, but",
|
|
121
|
+
"gotta test",
|
|
122
|
+
"the",
|
|
123
|
+
"splitting",
|
|
124
|
+
"gggg",
|
|
125
|
+
"some how.",
|
|
126
|
+
"Bye!\n\n-H.",
|
|
127
|
+
];
|
|
128
|
+
(0, globals_1.expect)(output).toEqual(expectedOutput);
|
|
129
|
+
});
|
|
130
|
+
(0, globals_1.test)("Token text splitter", () => {
|
|
131
|
+
const text = "foo bar baz a a";
|
|
132
|
+
const splitter = new text_splitter_1.TokenTextSplitter({
|
|
133
|
+
encodingName: "r50k_base",
|
|
134
|
+
chunkSize: 3,
|
|
135
|
+
chunkOverlap: 0,
|
|
136
|
+
});
|
|
137
|
+
const output = splitter.splitText(text);
|
|
138
|
+
const expectedOutput = ["foo bar b", "az a a"];
|
|
139
|
+
(0, globals_1.expect)(output).toEqual(expectedOutput);
|
|
140
|
+
});
|
|
141
|
+
//# sourceMappingURL=text_splitter.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"text_splitter.test.js","sourceRoot":"","sources":["../../tests/text_splitter.test.ts"],"names":[],"mappings":";;AAAA,2CAA6C;AAC7C,0CAAuC;AACvC,oDAI0B;AAE1B,IAAA,cAAI,EAAC,oCAAoC,EAAE,GAAG,EAAE;IAC9C,MAAM,IAAI,GAAG,iBAAiB,CAAC;IAC/B,MAAM,QAAQ,GAAG,IAAI,qCAAqB,CAAC;QACzC,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,CAAC;QACZ,YAAY,EAAE,CAAC;KAChB,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,cAAc,GAAG,CAAC,SAAS,EAAE,SAAS,EAAE,SAAS,CAAC,CAAC;IACzD,IAAA,gBAAM,EAAC,MAAM,CAAC,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;AACzC,CAAC,CAAC,CAAC;AAEH,IAAA,cAAI,EAAC,mEAAmE,EAAE,GAAG,EAAE;IAC7E,MAAM,IAAI,GAAG,UAAU,CAAC;IACxB,MAAM,QAAQ,GAAG,IAAI,qCAAqB,CAAC;QACzC,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,CAAC;QACZ,YAAY,EAAE,CAAC;KAChB,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,cAAc,GAAG,CAAC,KAAK,EAAE,KAAK,CAAC,CAAC;IACtC,IAAA,gBAAM,EAAC,MAAM,CAAC,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;AACzC,CAAC,CAAC,CAAC;AAEH,IAAA,cAAI,EAAC,kDAAkD,EAAE,GAAG,EAAE;IAC5D,MAAM,IAAI,GAAG,iBAAiB,CAAC;IAC/B,MAAM,QAAQ,GAAG,IAAI,qCAAqB,CAAC;QACzC,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,CAAC;QACZ,YAAY,EAAE,CAAC;KAChB,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,cAAc,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IACpD,IAAA,gBAAM,EAAC,MAAM,CAAC,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;AACzC,CAAC,CAAC,CAAC;AAEH,IAAA,cAAI,EAAC,iEAAiE,EAAE,GAAG,EAAE;IAC3E,MAAM,IAAI,GAAG,iBAAiB,CAAC;IAC/B,MAAM,QAAQ,GAAG,IAAI,qCAAqB,CAAC;QACzC,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,CAAC;QACZ,YAAY,EAAE,CAAC;KAChB,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,cAAc,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IACpD,IAAA,gBAAM,EAAC,MAAM,CAAC,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;AACzC,CAAC,CAAC,CAAC;AAEH,IAAA,cAAI,EAAC,4DAA4D,EAAE,GAAG,EAAE;IACtE,MAAM,IAAI,GAAG,iBAAiB,CAAC;IAC/B,MAAM,QAAQ,GAAG,IAAI,qCAAqB,CAAC;QACzC,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,CAAC;QACZ,YAAY,EAAE,CAAC;KAChB,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,cAAc,GAAG,CAAC,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,CAAC,CAAC;IACpD,IAAA,gBAAM,EAAC,MAAM,CAAC,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;AACzC,CAAC,CAAC,CAAC;AAEH,IAAA,cAAI,EAAC,yBAAyB,EAAE,GAAG,EAAE;IACnC,IAAA,gBAAM,EAAC,GAAG,EAAE;QACV,MAAM,GAAG,GAAG,IAAI,qCAAqB,CAAC,EAAE,SAAS,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC,CAAC;QACzE,OAAO,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;IACnB,CAAC,CAAC,CAAC,OAAO,EAAE,CAAC;AACf,CAAC,CAAC,CAAC;AAEH,IAAA,cAAI,EAAC,+BAA+B,EAAE,GAAG,EAAE;IACzC,MAAM,KAAK,GAAG,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IACjC,MAAM,QAAQ,GAAG,IAAI,qCAAqB,CAAC;QACzC,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,CAAC;QACZ,YAAY,EAAE,CAAC;KAChB,CAAC,CAAC;IACH,MAAM,IAAI,GAAG,QAAQ,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC;IAC7C,MAAM,YAAY,GAAG;QACnB,IAAI,mBAAQ,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;QACpC,IAAI,mBAAQ,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;QACpC,IAAI,mBAAQ,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,CAAC;KACrC,CAAC;IACF,IAAA,gBAAM,EAAC,IAAI,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEH,IAAA,cAAI,EAAC,6CAA6C,EAAE,GAAG,EAAE;IACvD,MAAM,KAAK,GAAG,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC;IACjC,MAAM,QAAQ,GAAG,IAAI,qCAAqB,CAAC;QACzC,SAAS,EAAE,GAAG;QACd,SAAS,EAAE,CAAC;QACZ,YAAY,EAAE,CAAC;KAChB,CAAC,CAAC;IACH,MAAM,IAAI,GAAG,QAAQ,CAAC,eAAe,CAAC,KAAK,EAAE;QAC3C,EAAE,MAAM,EAAE,GAAG,EAAE;QACf,EAAE,MAAM,EAAE,GAAG,EAAE;KAChB,CAAC,CAAC;IACH,MAAM,YAAY,GAAG;QACnB,IAAI,mBAAQ,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;QAC/D,IAAI,mBAAQ,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;QAC/D,IAAI,mBAAQ,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,QAAQ,EAAE,EAAE,MAAM,EAAE,GAAG,EAAE,EAAE,CAAC;KAChE,CAAC;IACF,IAAA,gBAAM,EAAC,IAAI,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;AACrC,CAAC,CAAC,CAAC;AAEH,IAAA,cAAI,EAAC,+BAA+B,EAAE,GAAG,EAAE;IACzC,MAAM,IAAI,GAAG;;YAEH,CAAC;IACX,MAAM,QAAQ,GAAG,IAAI,8CAA8B,CAAC;QAClD,SAAS,EAAE,EAAE;QACb,YAAY,EAAE,CAAC;KAChB,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,cAAc,GAAG;QACrB,KAAK;QACL,KAAK;QACL,WAAW;QACX,WAAW;QACX,MAAM;QACN,aAAa;QACb,UAAU;QACV,WAAW;QACX,SAAS;QACT,SAAS;QACT,YAAY;QACZ,YAAY;QACZ,KAAK;QACL,WAAW;QACX,MAAM;QACN,WAAW;QACX,aAAa;KACd,CAAC;IACF,IAAA,gBAAM,EAAC,MAAM,CAAC,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;AACzC,CAAC,CAAC,CAAC;AAEH,IAAA,cAAI,EAAC,qBAAqB,EAAE,GAAG,EAAE;IAC/B,MAAM,IAAI,GAAG,iBAAiB,CAAC;IAC/B,MAAM,QAAQ,GAAG,IAAI,iCAAiB,CAAC;QACrC,YAAY,EAAE,WAAW;QACzB,SAAS,EAAE,CAAC;QACZ,YAAY,EAAE,CAAC;KAChB,CAAC,CAAC;IACH,MAAM,MAAM,GAAG,QAAQ,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;IACxC,MAAM,cAAc,GAAG,CAAC,WAAW,EAAE,QAAQ,CAAC,CAAC;IAE/C,IAAA,gBAAM,EAAC,MAAM,CAAC,CAAC,OAAO,CAAC,cAAc,CAAC,CAAC;AACzC,CAAC,CAAC,CAAC"}
|
package/dist/text_splitter.d.ts
CHANGED
|
@@ -1,9 +1,10 @@
|
|
|
1
|
+
import type * as tiktoken from "@dqbd/tiktoken";
|
|
1
2
|
import { Document } from "./document";
|
|
2
3
|
interface TextSplitterParams {
|
|
3
4
|
chunkSize: number;
|
|
4
5
|
chunkOverlap: number;
|
|
5
6
|
}
|
|
6
|
-
declare abstract class TextSplitter implements TextSplitterParams {
|
|
7
|
+
export declare abstract class TextSplitter implements TextSplitterParams {
|
|
7
8
|
chunkSize: number;
|
|
8
9
|
chunkOverlap: number;
|
|
9
10
|
constructor(fields?: Partial<TextSplitterParams>);
|
|
@@ -29,4 +30,20 @@ export declare class RecursiveCharacterTextSplitter extends TextSplitter impleme
|
|
|
29
30
|
constructor(fields?: Partial<RecursiveCharacterTextSplitterParams>);
|
|
30
31
|
splitText(text: string): string[];
|
|
31
32
|
}
|
|
33
|
+
export interface TokenTextSplitterParams extends TextSplitterParams {
|
|
34
|
+
encodingName: tiktoken.TiktokenEmbedding;
|
|
35
|
+
allowedSpecial: "all" | Set<string>;
|
|
36
|
+
disallowedSpecial: "all" | Array<string>;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Implementation of splitter which looks at tokens.
|
|
40
|
+
*/
|
|
41
|
+
export declare class TokenTextSplitter extends TextSplitter implements TokenTextSplitterParams {
|
|
42
|
+
encodingName: tiktoken.TiktokenEmbedding;
|
|
43
|
+
allowedSpecial: "all" | Set<string>;
|
|
44
|
+
disallowedSpecial: "all" | Array<string>;
|
|
45
|
+
private tokenizer;
|
|
46
|
+
constructor(fields?: Partial<TokenTextSplitterParams>);
|
|
47
|
+
splitText(text: string): string[];
|
|
48
|
+
}
|
|
32
49
|
export {};
|
package/dist/text_splitter.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"use strict";
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
-
exports.RecursiveCharacterTextSplitter = exports.CharacterTextSplitter = void 0;
|
|
3
|
+
exports.TokenTextSplitter = exports.RecursiveCharacterTextSplitter = exports.CharacterTextSplitter = exports.TextSplitter = void 0;
|
|
4
4
|
const document_1 = require("./document");
|
|
5
5
|
class TextSplitter {
|
|
6
6
|
constructor(fields) {
|
|
@@ -81,6 +81,7 @@ which is longer than the specified ${this.chunkSize}`);
|
|
|
81
81
|
return docs;
|
|
82
82
|
}
|
|
83
83
|
}
|
|
84
|
+
exports.TextSplitter = TextSplitter;
|
|
84
85
|
class CharacterTextSplitter extends TextSplitter {
|
|
85
86
|
constructor(fields) {
|
|
86
87
|
var _a;
|
|
@@ -164,4 +165,72 @@ class RecursiveCharacterTextSplitter extends TextSplitter {
|
|
|
164
165
|
}
|
|
165
166
|
}
|
|
166
167
|
exports.RecursiveCharacterTextSplitter = RecursiveCharacterTextSplitter;
|
|
168
|
+
/**
|
|
169
|
+
* Implementation of splitter which looks at tokens.
|
|
170
|
+
*/
|
|
171
|
+
class TokenTextSplitter extends TextSplitter {
|
|
172
|
+
constructor(fields) {
|
|
173
|
+
var _a, _b, _c;
|
|
174
|
+
super(fields);
|
|
175
|
+
Object.defineProperty(this, "encodingName", {
|
|
176
|
+
enumerable: true,
|
|
177
|
+
configurable: true,
|
|
178
|
+
writable: true,
|
|
179
|
+
value: void 0
|
|
180
|
+
});
|
|
181
|
+
Object.defineProperty(this, "allowedSpecial", {
|
|
182
|
+
enumerable: true,
|
|
183
|
+
configurable: true,
|
|
184
|
+
writable: true,
|
|
185
|
+
value: void 0
|
|
186
|
+
});
|
|
187
|
+
Object.defineProperty(this, "disallowedSpecial", {
|
|
188
|
+
enumerable: true,
|
|
189
|
+
configurable: true,
|
|
190
|
+
writable: true,
|
|
191
|
+
value: void 0
|
|
192
|
+
});
|
|
193
|
+
Object.defineProperty(this, "tokenizer", {
|
|
194
|
+
enumerable: true,
|
|
195
|
+
configurable: true,
|
|
196
|
+
writable: true,
|
|
197
|
+
value: void 0
|
|
198
|
+
});
|
|
199
|
+
this.encodingName = (_a = fields === null || fields === void 0 ? void 0 : fields.encodingName) !== null && _a !== void 0 ? _a : "gpt2";
|
|
200
|
+
this.allowedSpecial = (_b = fields === null || fields === void 0 ? void 0 : fields.allowedSpecial) !== null && _b !== void 0 ? _b : new Set();
|
|
201
|
+
this.disallowedSpecial = (_c = fields === null || fields === void 0 ? void 0 : fields.disallowedSpecial) !== null && _c !== void 0 ? _c : "all";
|
|
202
|
+
if ((fields === null || fields === void 0 ? void 0 : fields.allowedSpecial) != null) {
|
|
203
|
+
throw new Error("allowedSpecial is not implemented yet.");
|
|
204
|
+
}
|
|
205
|
+
if ((fields === null || fields === void 0 ? void 0 : fields.disallowedSpecial) != null) {
|
|
206
|
+
throw new Error("disallowedSpecial is not implemented yet.");
|
|
207
|
+
}
|
|
208
|
+
try {
|
|
209
|
+
const tiktoken =
|
|
210
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires, global-require
|
|
211
|
+
require("@dqbd/tiktoken");
|
|
212
|
+
this.tokenizer = tiktoken.get_encoding(this.encodingName);
|
|
213
|
+
}
|
|
214
|
+
catch (err) {
|
|
215
|
+
console.error(err);
|
|
216
|
+
throw new Error("Please install @dqbd/tiktoken as a dependency with, e.g. `npm install -S @dqbd/tiktoken`");
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
splitText(text) {
|
|
220
|
+
const splits = [];
|
|
221
|
+
const input_ids = this.tokenizer.encode(text);
|
|
222
|
+
let start_idx = 0;
|
|
223
|
+
let cur_idx = Math.min(start_idx + this.chunkSize, input_ids.length);
|
|
224
|
+
let chunk_ids = input_ids.slice(start_idx, cur_idx);
|
|
225
|
+
const decoder = new TextDecoder();
|
|
226
|
+
while (start_idx < input_ids.length) {
|
|
227
|
+
splits.push(decoder.decode(this.tokenizer.decode(chunk_ids)));
|
|
228
|
+
start_idx += this.chunkSize - this.chunkOverlap;
|
|
229
|
+
cur_idx = Math.min(start_idx + this.chunkSize, input_ids.length);
|
|
230
|
+
chunk_ids = input_ids.slice(start_idx, cur_idx);
|
|
231
|
+
}
|
|
232
|
+
return splits;
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
exports.TokenTextSplitter = TokenTextSplitter;
|
|
167
236
|
//# sourceMappingURL=text_splitter.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"text_splitter.js","sourceRoot":"","sources":["../text_splitter.ts"],"names":[],"mappings":";;;
|
|
1
|
+
{"version":3,"file":"text_splitter.js","sourceRoot":"","sources":["../text_splitter.ts"],"names":[],"mappings":";;;AACA,yCAAsC;AAQtC,MAAsB,YAAY;IAKhC,YAAY,MAAoC;;QAJhD;;;;mBAAY,IAAI;WAAC;QAEjB;;;;mBAAe,GAAG;WAAC;QAGjB,IAAI,CAAC,SAAS,GAAG,MAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,SAAS,mCAAI,IAAI,CAAC,SAAS,CAAC;QACrD,IAAI,CAAC,YAAY,GAAG,MAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,YAAY,mCAAI,IAAI,CAAC,YAAY,CAAC;QAC9D,IAAI,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,SAAS,EAAE;YACvC,MAAM,IAAI,KAAK,CAAC,uCAAuC,CAAC,CAAC;SAC1D;IACH,CAAC;IAID,eAAe,CACb,KAAe;IACf,8DAA8D;IAC9D,YAAmC,EAAE;QAErC,MAAM,UAAU,GACd,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,KAAK,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;QACtE,MAAM,SAAS,GAAG,IAAI,KAAK,EAAY,CAAC;QACxC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE;YACxC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,EAAE;gBACxC,SAAS,CAAC,IAAI,CACZ,IAAI,mBAAQ,CAAC,EAAE,WAAW,EAAE,KAAK,EAAE,QAAQ,EAAE,UAAU,CAAC,CAAC,CAAC,EAAE,CAAC,CAC9D,CAAC;aACH;SACF;QACD,OAAO,SAAS,CAAC;IACnB,CAAC;IAED,cAAc,CAAC,SAAqB;QAClC,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,WAAW,CAAC,CAAC;QACtD,MAAM,SAAS,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,GAAG,EAAE,EAAE,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;QACvD,OAAO,IAAI,CAAC,eAAe,CAAC,KAAK,EAAE,SAAS,CAAC,CAAC;IAChD,CAAC;IAEO,QAAQ,CAAC,IAAc,EAAE,SAAiB;QAChD,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;QACzC,OAAO,IAAI,KAAK,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC;IACnC,CAAC;IAED,WAAW,CAAC,MAAgB,EAAE,SAAiB;QAC7C,MAAM,IAAI,GAAa,EAAE,CAAC;QAC1B,MAAM,UAAU,GAAa,EAAE,CAAC;QAChC,IAAI,KAAK,GAAG,CAAC,CAAC;QACd,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE;YACtB,MAAM,IAAI,GAAG,CAAC,CAAC,MAAM,CAAC;YACtB,IAAI,KAAK,GAAG,IAAI,IAAI,IAAI,CAAC,SAAS,EAAE;gBAClC,IAAI,KAAK,GAAG,IAAI,CAAC,SAAS,EAAE;oBAC1B,OAAO,CAAC,IAAI,CACV,2BAA2B,KAAK;qCACP,IAAI,CAAC,SAAS,EAAE,CAC1C,CAAC;iBACH;gBACD,IAAI,UAAU,CAAC,MAAM,GAAG,CAAC,EAAE;oBACzB,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;oBACjD,IAAI,GAAG,KAAK,IAAI,EAAE;wBAChB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;qBAChB;oBACD,sBAAsB;oBACtB,qDAAqD;oBACrD,0DAA0D;oBAC1D,OACE,KAAK,GAAG,IAAI,CAAC,YAAY;wBACzB,CAAC,KAAK,GAAG,IAAI,GAAG,IAAI,CAAC,SAAS,IAAI,KAAK,GAAG,CAAC,CAAC,EAC5C;wBACA,KAAK,IAAI,UAAU,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC;wBAC9B,UAAU,CAAC,KAAK,EAAE,CAAC;qBACpB;iBACF;aACF;YACD,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;YACnB,KAAK,IAAI,IAAI,CAAC;SACf;QACD,MAAM,GAAG,GAAG,IAAI,CAAC,QAAQ,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;QACjD,IAAI,GAAG,KAAK,IAAI,EAAE;YAChB,IAAI,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;SAChB;QACD,OAAO,IAAI,CAAC;IACd,CAAC;CACF;AApFD,oCAoFC;AAMD,MAAa,qBACX,SAAQ,YAAY;IAKpB,YAAY,MAA6C;;QACvD,KAAK,CAAC,MAAM,CAAC,CAAC;QAHhB;;;;mBAAY,MAAM;WAAC;QAIjB,IAAI,CAAC,SAAS,GAAG,MAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,SAAS,mCAAI,IAAI,CAAC,SAAS,CAAC;IACvD,CAAC;IAEM,SAAS,CAAC,IAAY;QAC3B,uEAAuE;QACvE,IAAI,MAAgB,CAAC;QACrB,IAAI,IAAI,CAAC,SAAS,EAAE;YAClB,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;SACrC;aAAM;YACL,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;SACzB;QACD,OAAO,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;IAClD,CAAC;CACF;AArBD,sDAqBC;AAOD,MAAa,8BACX,SAAQ,YAAY;IAKpB,YAAY,MAAsD;;QAChE,KAAK,CAAC,MAAM,CAAC,CAAC;QAHhB;;;;mBAAuB,CAAC,MAAM,EAAE,IAAI,EAAE,GAAG,EAAE,EAAE,CAAC;WAAC;QAI7C,IAAI,CAAC,UAAU,GAAG,MAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,UAAU,mCAAI,IAAI,CAAC,UAAU,CAAC;IAC1D,CAAC;IAED,SAAS,CAAC,IAAY;QACpB,MAAM,WAAW,GAAa,EAAE,CAAC;QAEjC,mCAAmC;QACnC,IAAI,SAAS,GAAW,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC,UAAU,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;QACpE,KAAK,MAAM,CAAC,IAAI,IAAI,CAAC,UAAU,EAAE;YAC/B,IAAI,CAAC,KAAK,EAAE,EAAE;gBACZ,SAAS,GAAG,CAAC,CAAC;gBACd,MAAM;aACP;YACD,IAAI,IAAI,CAAC,QAAQ,CAAC,CAAC,CAAC,EAAE;gBACpB,SAAS,GAAG,CAAC,CAAC;gBACd,MAAM;aACP;SACF;QAED,iDAAiD;QACjD,IAAI,MAAgB,CAAC;QACrB,IAAI,SAAS,EAAE;YACb,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;SAChC;aAAM;YACL,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;SACzB;QAED,6DAA6D;QAC7D,IAAI,UAAU,GAAa,EAAE,CAAC;QAC9B,KAAK,MAAM,CAAC,IAAI,MAAM,EAAE;YACtB,IAAI,CAAC,CAAC,MAAM,GAAG,IAAI,CAAC,SAAS,EAAE;gBAC7B,UAAU,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;aACpB;iBAAM;gBACL,IAAI,UAAU,CAAC,MAAM,EAAE;oBACrB,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;oBAC3D,WAAW,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;oBAChC,UAAU,GAAG,EAAE,CAAC;iBACjB;gBACD,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;gBACpC,WAAW,CAAC,IAAI,CAAC,GAAG,SAAS,CAAC,CAAC;aAChC;SACF;QACD,IAAI,UAAU,CAAC,MAAM,EAAE;YACrB,MAAM,UAAU,GAAG,IAAI,CAAC,WAAW,CAAC,UAAU,EAAE,SAAS,CAAC,CAAC;YAC3D,WAAW,CAAC,IAAI,CAAC,GAAG,UAAU,CAAC,CAAC;SACjC;QACD,OAAO,WAAW,CAAC;IACrB,CAAC;CACF;AAxDD,wEAwDC;AAQD;;GAEG;AACH,MAAa,iBACX,SAAQ,YAAY;IAWpB,YAAY,MAAyC;;QACnD,KAAK,CAAC,MAAM,CAAC,CAAC;QAThB;;;;;WAAyC;QAEzC;;;;;WAAoC;QAEpC;;;;;WAAyC;QAEzC;;;;;WAAqC;QAKnC,IAAI,CAAC,YAAY,GAAG,MAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,YAAY,mCAAI,MAAM,CAAC;QACnD,IAAI,CAAC,cAAc,GAAG,MAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,cAAc,mCAAI,IAAI,GAAG,EAAE,CAAC;QAC1D,IAAI,CAAC,iBAAiB,GAAG,MAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,iBAAiB,mCAAI,KAAK,CAAC;QAE5D,IAAI,CAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,cAAc,KAAI,IAAI,EAAE;YAClC,MAAM,IAAI,KAAK,CAAC,wCAAwC,CAAC,CAAC;SAC3D;QAED,IAAI,CAAA,MAAM,aAAN,MAAM,uBAAN,MAAM,CAAE,iBAAiB,KAAI,IAAI,EAAE;YACrC,MAAM,IAAI,KAAK,CAAC,2CAA2C,CAAC,CAAC;SAC9D;QAED,IAAI;YACF,MAAM,QAAQ;YACZ,8EAA8E;YAC9E,OAAO,CAAC,gBAAgB,CAAoC,CAAC;YAC/D,IAAI,CAAC,SAAS,GAAG,QAAQ,CAAC,YAAY,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;SAC3D;QAAC,OAAO,GAAG,EAAE;YACZ,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC;YACnB,MAAM,IAAI,KAAK,CACb,0FAA0F,CAC3F,CAAC;SACH;IACH,CAAC;IAED,SAAS,CAAC,IAAY;QACpB,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,MAAM,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QAE9C,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;QACrE,IAAI,SAAS,GAAG,SAAS,CAAC,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;QAEpD,MAAM,OAAO,GAAG,IAAI,WAAW,EAAE,CAAC;QAElC,OAAO,SAAS,GAAG,SAAS,CAAC,MAAM,EAAE;YACnC,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,MAAM,CAAC,IAAI,CAAC,SAAS,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,CAAC,CAAC;YAE9D,SAAS,IAAI,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC,YAAY,CAAC;YAChD,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,EAAE,SAAS,CAAC,MAAM,CAAC,CAAC;YACjE,SAAS,GAAG,SAAS,CAAC,KAAK,CAAC,SAAS,EAAE,OAAO,CAAC,CAAC;SACjD;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF;AA7DD,8CA6DC"}
|
package/dist/util/hub.js
CHANGED
|
@@ -12,6 +12,7 @@ const index_1 = require("./index");
|
|
|
12
12
|
const HUB_PATH_REGEX = /lc(@[^:]+)?:\/\/(.*)/;
|
|
13
13
|
const DEFAULT_REF = (_a = process.env.LANGCHAIN_HUB_DEFAULT_REF) !== null && _a !== void 0 ? _a : "master";
|
|
14
14
|
const URL_BASE = (_b = process.env.LANGCHAIN_HUB_URL_BASE) !== null && _b !== void 0 ? _b : "https://raw.githubusercontent.com/hwchase17/langchain-hub/";
|
|
15
|
+
const URL_PATH_SEPARATOR = "/";
|
|
15
16
|
const loadFromHub = async (uri, loader, validPrefix, validSuffixes, values = {}) => {
|
|
16
17
|
const match = uri.match(HUB_PATH_REGEX);
|
|
17
18
|
if (!match) {
|
|
@@ -19,7 +20,7 @@ const loadFromHub = async (uri, loader, validPrefix, validSuffixes, values = {})
|
|
|
19
20
|
}
|
|
20
21
|
const [rawRef, remotePath] = match.slice(1);
|
|
21
22
|
const ref = rawRef ? rawRef.slice(1) : DEFAULT_REF;
|
|
22
|
-
const parts = remotePath.split(
|
|
23
|
+
const parts = remotePath.split(URL_PATH_SEPARATOR);
|
|
23
24
|
if (parts[0] !== validPrefix) {
|
|
24
25
|
return undefined;
|
|
25
26
|
}
|
|
@@ -33,7 +34,7 @@ const loadFromHub = async (uri, loader, validPrefix, validSuffixes, values = {})
|
|
|
33
34
|
}
|
|
34
35
|
const text = await res.text();
|
|
35
36
|
const tmpdir = fs_1.default.mkdtempSync(path_1.default.join(os_1.default.tmpdir(), "langchain"));
|
|
36
|
-
const file = path_1.default.join(tmpdir, path_1.default.basename(remotePath));
|
|
37
|
+
const file = path_1.default.join(tmpdir, path_1.default.basename(remotePath.replace(URL_PATH_SEPARATOR, path_1.default.sep)));
|
|
37
38
|
fs_1.default.writeFileSync(file, text);
|
|
38
39
|
return loader(file, values);
|
|
39
40
|
};
|
package/dist/util/hub.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"hub.js","sourceRoot":"","sources":["../../util/hub.ts"],"names":[],"mappings":";;;;;;;AAAA,gDAAwB;AACxB,4CAAoB;AACpB,4CAAoB;AACpB,mCAA2C;AAE3C,MAAM,cAAc,GAAG,sBAAsB,CAAC;AAC9C,MAAM,WAAW,GAAG,MAAA,OAAO,CAAC,GAAG,CAAC,yBAAyB,mCAAI,QAAQ,CAAC;AACtE,MAAM,QAAQ,GACZ,MAAA,OAAO,CAAC,GAAG,CAAC,sBAAsB,mCAClC,4DAA4D,CAAC;
|
|
1
|
+
{"version":3,"file":"hub.js","sourceRoot":"","sources":["../../util/hub.ts"],"names":[],"mappings":";;;;;;;AAAA,gDAAwB;AACxB,4CAAoB;AACpB,4CAAoB;AACpB,mCAA2C;AAE3C,MAAM,cAAc,GAAG,sBAAsB,CAAC;AAC9C,MAAM,WAAW,GAAG,MAAA,OAAO,CAAC,GAAG,CAAC,yBAAyB,mCAAI,QAAQ,CAAC;AACtE,MAAM,QAAQ,GACZ,MAAA,OAAO,CAAC,GAAG,CAAC,sBAAsB,mCAClC,4DAA4D,CAAC;AAI/D,MAAM,kBAAkB,GAAG,GAAG,CAAC;AAExB,MAAM,WAAW,GAAG,KAAK,EAC9B,GAAW,EACX,MAA4C,EAC5C,WAAmB,EACnB,aAA0B,EAC1B,SAAqB,EAAE,EACC,EAAE;IAC1B,MAAM,KAAK,GAAG,GAAG,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;IACxC,IAAI,CAAC,KAAK,EAAE;QACV,OAAO,SAAS,CAAC;KAClB;IACD,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC;IAC5C,MAAM,GAAG,GAAG,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC;IACnD,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC;IACnD,IAAI,KAAK,CAAC,CAAC,CAAC,KAAK,WAAW,EAAE;QAC5B,OAAO,SAAS,CAAC;KAClB;IAED,IAAI,CAAC,aAAa,CAAC,GAAG,CAAC,cAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,EAAE;QACzD,MAAM,IAAI,KAAK,CAAC,wBAAwB,CAAC,CAAC;KAC3C;IAED,MAAM,GAAG,GAAG,CAAC,QAAQ,EAAE,GAAG,EAAE,UAAU,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IAClD,MAAM,GAAG,GAAG,MAAM,IAAA,wBAAgB,EAAC,GAAG,EAAE,EAAE,OAAO,EAAE,IAAI,EAAE,CAAC,CAAC;IAC3D,IAAI,GAAG,CAAC,MAAM,KAAK,GAAG,EAAE;QACtB,MAAM,IAAI,KAAK,CAAC,0BAA0B,GAAG,EAAE,CAAC,CAAC;KAClD;IAED,MAAM,IAAI,GAAG,MAAM,GAAG,CAAC,IAAI,EAAE,CAAC;IAC9B,MAAM,MAAM,GAAG,YAAE,CAAC,WAAW,CAAC,cAAI,CAAC,IAAI,CAAC,YAAE,CAAC,MAAM,EAAE,EAAE,WAAW,CAAC,CAAC,CAAC;IACnE,MAAM,IAAI,GAAG,cAAI,CAAC,IAAI,CACpB,MAAM,EACN,cAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,OAAO,CAAC,kBAAkB,EAAE,cAAI,CAAC,GAAG,CAAC,CAAC,CAChE,CAAC;IACF,YAAE,CAAC,aAAa,CAAC,IAAI,EAAE,IAAI,CAAC,CAAC;IAC7B,OAAO,MAAM,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;AAC9B,CAAC,CAAC;AApCW,QAAA,WAAW,eAoCtB"}
|
|
@@ -1,14 +1,10 @@
|
|
|
1
1
|
import { Embeddings } from "../embeddings/base";
|
|
2
2
|
import { Document } from "../document";
|
|
3
|
-
export interface DocStore {
|
|
4
|
-
[key: number]: object;
|
|
5
|
-
}
|
|
6
3
|
export declare abstract class VectorStore {
|
|
7
4
|
embeddings: Embeddings;
|
|
8
|
-
|
|
5
|
+
constructor(embeddings: Embeddings);
|
|
9
6
|
abstract addVectors(vectors: number[][], documents: Document[]): Promise<void>;
|
|
10
7
|
abstract similaritySearchVectorWithScore(query: number[], k: number): Promise<[Document, number][]>;
|
|
11
|
-
addDocuments(documents: Document[]): Promise<void>;
|
|
12
8
|
similaritySearch(query: string, k?: number): Promise<Document[]>;
|
|
13
9
|
similaritySearchWithScore(query: string, k?: number): Promise<[object, number][]>;
|
|
14
10
|
}
|
|
@@ -2,23 +2,14 @@
|
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
3
|
exports.SaveableVectorStore = exports.VectorStore = void 0;
|
|
4
4
|
class VectorStore {
|
|
5
|
-
constructor() {
|
|
5
|
+
constructor(embeddings) {
|
|
6
6
|
Object.defineProperty(this, "embeddings", {
|
|
7
7
|
enumerable: true,
|
|
8
8
|
configurable: true,
|
|
9
9
|
writable: true,
|
|
10
10
|
value: void 0
|
|
11
11
|
});
|
|
12
|
-
|
|
13
|
-
enumerable: true,
|
|
14
|
-
configurable: true,
|
|
15
|
-
writable: true,
|
|
16
|
-
value: void 0
|
|
17
|
-
});
|
|
18
|
-
}
|
|
19
|
-
async addDocuments(documents) {
|
|
20
|
-
const texts = documents.map(({ pageContent }) => pageContent);
|
|
21
|
-
this.addVectors(await this.embeddings.embedDocuments(texts), documents);
|
|
12
|
+
this.embeddings = embeddings;
|
|
22
13
|
}
|
|
23
14
|
async similaritySearch(query, k = 4) {
|
|
24
15
|
const results = await this.similaritySearchVectorWithScore(await this.embeddings.embedQuery(query), k);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"base.js","sourceRoot":"","sources":["../../vectorstores/base.ts"],"names":[],"mappings":";;;
|
|
1
|
+
{"version":3,"file":"base.js","sourceRoot":"","sources":["../../vectorstores/base.ts"],"names":[],"mappings":";;;AAGA,MAAsB,WAAW;IAG/B,YAAY,UAAsB;QAFlC;;;;;WAAuB;QAGrB,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IAC/B,CAAC;IAYD,KAAK,CAAC,gBAAgB,CAAC,KAAa,EAAE,CAAC,GAAG,CAAC;QACzC,MAAM,OAAO,GAAG,MAAM,IAAI,CAAC,+BAA+B,CACxD,MAAM,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,EACvC,CAAC,CACF,CAAC;QAEF,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;IAC5C,CAAC;IAED,KAAK,CAAC,yBAAyB,CAC7B,KAAa,EACb,CAAC,GAAG,CAAC;QAEL,OAAO,IAAI,CAAC,+BAA+B,CACzC,MAAM,IAAI,CAAC,UAAU,CAAC,UAAU,CAAC,KAAK,CAAC,EACvC,CAAC,CACF,CAAC;IACJ,CAAC;CACF;AAnCD,kCAmCC;AAED,MAAsB,mBAAoB,SAAQ,WAAW;IAG3D,MAAM,CAAC,IAAI,CACT,UAAkB,EAClB,WAAuB;QAEvB,MAAM,IAAI,KAAK,CAAC,iBAAiB,CAAC,CAAC;IACrC,CAAC;CACF;AATD,kDASC"}
|
|
@@ -1,19 +1,22 @@
|
|
|
1
1
|
import type { HierarchicalNSW as HierarchicalNSWT, SpaceName } from "hnswlib-node";
|
|
2
2
|
import { Embeddings } from "../embeddings/base";
|
|
3
|
-
import {
|
|
3
|
+
import { SaveableVectorStore } from "./base";
|
|
4
4
|
import { Document } from "../document";
|
|
5
|
+
import { InMemoryDocstore } from "../docstore";
|
|
5
6
|
export interface HNSWLibArgs {
|
|
6
7
|
space: SpaceName;
|
|
7
8
|
numDimensions?: number;
|
|
8
9
|
}
|
|
9
10
|
export declare class HNSWLib extends SaveableVectorStore {
|
|
10
11
|
index?: HierarchicalNSWT;
|
|
12
|
+
docstore: InMemoryDocstore;
|
|
11
13
|
args: HNSWLibArgs;
|
|
12
|
-
constructor(args: HNSWLibArgs, embeddings: Embeddings, docstore:
|
|
14
|
+
constructor(args: HNSWLibArgs, embeddings: Embeddings, docstore: InMemoryDocstore, index?: HierarchicalNSWT);
|
|
15
|
+
addDocuments(documents: Document[]): Promise<void>;
|
|
13
16
|
addVectors(vectors: number[][], documents: Document[]): Promise<void>;
|
|
14
17
|
similaritySearchVectorWithScore(query: number[], k: number): Promise<[Document, number][]>;
|
|
15
18
|
save(directory: string): Promise<void>;
|
|
16
19
|
static load(directory: string, embeddings: Embeddings): Promise<HNSWLib>;
|
|
17
|
-
static fromTexts(texts: string[], metadatas: object[], embeddings: Embeddings): Promise<HNSWLib>;
|
|
18
|
-
static fromDocuments(docs: Document[], embeddings: Embeddings): Promise<HNSWLib>;
|
|
20
|
+
static fromTexts(texts: string[], metadatas: object[], embeddings: Embeddings, docstore?: InMemoryDocstore): Promise<HNSWLib>;
|
|
21
|
+
static fromDocuments(docs: Document[], embeddings: Embeddings, docstore?: InMemoryDocstore): Promise<HNSWLib>;
|
|
19
22
|
}
|