@fllf/agent-sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +240 -0
- package/dist/agent/Agent.d.ts +34 -0
- package/dist/agent/Agent.d.ts.map +1 -0
- package/dist/agent/Agent.js +168 -0
- package/dist/agent/Agent.js.map +1 -0
- package/dist/agent/createAgent.d.ts +4 -0
- package/dist/agent/createAgent.d.ts.map +1 -0
- package/dist/agent/createAgent.js +8 -0
- package/dist/agent/createAgent.js.map +1 -0
- package/dist/agent/index.d.ts +4 -0
- package/dist/agent/index.d.ts.map +1 -0
- package/dist/agent/index.js +8 -0
- package/dist/agent/index.js.map +1 -0
- package/dist/agent/types.d.ts +23 -0
- package/dist/agent/types.d.ts.map +1 -0
- package/dist/agent/types.js +3 -0
- package/dist/agent/types.js.map +1 -0
- package/dist/config/config.d.ts +35 -0
- package/dist/config/config.d.ts.map +1 -0
- package/dist/config/config.js +123 -0
- package/dist/config/config.js.map +1 -0
- package/dist/config/index.d.ts +3 -0
- package/dist/config/index.d.ts.map +1 -0
- package/dist/config/index.js +6 -0
- package/dist/config/index.js.map +1 -0
- package/dist/executors/base.d.ts +44 -0
- package/dist/executors/base.d.ts.map +1 -0
- package/dist/executors/base.js +3 -0
- package/dist/executors/base.js.map +1 -0
- package/dist/executors/index.d.ts +7 -0
- package/dist/executors/index.d.ts.map +1 -0
- package/dist/executors/index.js +10 -0
- package/dist/executors/index.js.map +1 -0
- package/dist/executors/rag-executor.d.ts +16 -0
- package/dist/executors/rag-executor.d.ts.map +1 -0
- package/dist/executors/rag-executor.js +120 -0
- package/dist/executors/rag-executor.js.map +1 -0
- package/dist/executors/simple-chat-executor.d.ts +5 -0
- package/dist/executors/simple-chat-executor.d.ts.map +1 -0
- package/dist/executors/simple-chat-executor.js +77 -0
- package/dist/executors/simple-chat-executor.js.map +1 -0
- package/dist/executors/tool-calling-executor.d.ts +10 -0
- package/dist/executors/tool-calling-executor.d.ts.map +1 -0
- package/dist/executors/tool-calling-executor.js +151 -0
- package/dist/executors/tool-calling-executor.js.map +1 -0
- package/dist/history/base.d.ts +16 -0
- package/dist/history/base.d.ts.map +1 -0
- package/dist/history/base.js +10 -0
- package/dist/history/base.js.map +1 -0
- package/dist/history/in-memory.d.ts +26 -0
- package/dist/history/in-memory.d.ts.map +1 -0
- package/dist/history/in-memory.js +88 -0
- package/dist/history/in-memory.js.map +1 -0
- package/dist/history/index.d.ts +5 -0
- package/dist/history/index.d.ts.map +1 -0
- package/dist/history/index.js +10 -0
- package/dist/history/index.js.map +1 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +26 -0
- package/dist/index.js.map +1 -0
- package/dist/llm/LLM.d.ts +23 -0
- package/dist/llm/LLM.d.ts.map +1 -0
- package/dist/llm/LLM.js +404 -0
- package/dist/llm/LLM.js.map +1 -0
- package/dist/llm/errors.d.ts +43 -0
- package/dist/llm/errors.d.ts.map +1 -0
- package/dist/llm/errors.js +128 -0
- package/dist/llm/errors.js.map +1 -0
- package/dist/llm/factory.d.ts +9 -0
- package/dist/llm/factory.d.ts.map +1 -0
- package/dist/llm/factory.js +28 -0
- package/dist/llm/factory.js.map +1 -0
- package/dist/llm/index.d.ts +6 -0
- package/dist/llm/index.d.ts.map +1 -0
- package/dist/llm/index.js +18 -0
- package/dist/llm/index.js.map +1 -0
- package/dist/llm/providers/base.d.ts +3 -0
- package/dist/llm/providers/base.d.ts.map +1 -0
- package/dist/llm/providers/base.js +3 -0
- package/dist/llm/providers/base.js.map +1 -0
- package/dist/llm/providers/local.d.ts +6 -0
- package/dist/llm/providers/local.d.ts.map +1 -0
- package/dist/llm/providers/local.js +19 -0
- package/dist/llm/providers/local.js.map +1 -0
- package/dist/llm/providers/openai-compatible.d.ts +13 -0
- package/dist/llm/providers/openai-compatible.d.ts.map +1 -0
- package/dist/llm/providers/openai-compatible.js +176 -0
- package/dist/llm/providers/openai-compatible.js.map +1 -0
- package/dist/llm/providers/openai.d.ts +6 -0
- package/dist/llm/providers/openai.d.ts.map +1 -0
- package/dist/llm/providers/openai.js +23 -0
- package/dist/llm/providers/openai.js.map +1 -0
- package/dist/llm/types.d.ts +116 -0
- package/dist/llm/types.d.ts.map +1 -0
- package/dist/llm/types.js +3 -0
- package/dist/llm/types.js.map +1 -0
- package/dist/messages/index.d.ts +3 -0
- package/dist/messages/index.d.ts.map +1 -0
- package/dist/messages/index.js +6 -0
- package/dist/messages/index.js.map +1 -0
- package/dist/messages/message.d.ts +25 -0
- package/dist/messages/message.d.ts.map +1 -0
- package/dist/messages/message.js +110 -0
- package/dist/messages/message.js.map +1 -0
- package/dist/messages/types.d.ts +20 -0
- package/dist/messages/types.d.ts.map +1 -0
- package/dist/messages/types.js +3 -0
- package/dist/messages/types.js.map +1 -0
- package/dist/observability/console-observer.d.ts +10 -0
- package/dist/observability/console-observer.d.ts.map +1 -0
- package/dist/observability/console-observer.js +28 -0
- package/dist/observability/console-observer.js.map +1 -0
- package/dist/observability/index.d.ts +5 -0
- package/dist/observability/index.d.ts.map +1 -0
- package/dist/observability/index.js +11 -0
- package/dist/observability/index.js.map +1 -0
- package/dist/observability/observer.d.ts +8 -0
- package/dist/observability/observer.d.ts.map +1 -0
- package/dist/observability/observer.js +46 -0
- package/dist/observability/observer.js.map +1 -0
- package/dist/observability/types.d.ts +34 -0
- package/dist/observability/types.d.ts.map +1 -0
- package/dist/observability/types.js +3 -0
- package/dist/observability/types.js.map +1 -0
- package/dist/rag/chunking/auto-chunker.d.ts +25 -0
- package/dist/rag/chunking/auto-chunker.d.ts.map +1 -0
- package/dist/rag/chunking/auto-chunker.js +101 -0
- package/dist/rag/chunking/auto-chunker.js.map +1 -0
- package/dist/rag/chunking/chunker.d.ts +44 -0
- package/dist/rag/chunking/chunker.d.ts.map +1 -0
- package/dist/rag/chunking/chunker.js +162 -0
- package/dist/rag/chunking/chunker.js.map +1 -0
- package/dist/rag/chunking/index.d.ts +6 -0
- package/dist/rag/chunking/index.d.ts.map +1 -0
- package/dist/rag/chunking/index.js +22 -0
- package/dist/rag/chunking/index.js.map +1 -0
- package/dist/rag/chunking/markdown-chunker.d.ts +13 -0
- package/dist/rag/chunking/markdown-chunker.d.ts.map +1 -0
- package/dist/rag/chunking/markdown-chunker.js +144 -0
- package/dist/rag/chunking/markdown-chunker.js.map +1 -0
- package/dist/rag/chunking/parent-child.d.ts +16 -0
- package/dist/rag/chunking/parent-child.d.ts.map +1 -0
- package/dist/rag/chunking/parent-child.js +76 -0
- package/dist/rag/chunking/parent-child.js.map +1 -0
- package/dist/rag/chunking/recursive-chunker.d.ts +11 -0
- package/dist/rag/chunking/recursive-chunker.d.ts.map +1 -0
- package/dist/rag/chunking/recursive-chunker.js +27 -0
- package/dist/rag/chunking/recursive-chunker.js.map +1 -0
- package/dist/rag/embeddings/embedder.d.ts +24 -0
- package/dist/rag/embeddings/embedder.d.ts.map +1 -0
- package/dist/rag/embeddings/embedder.js +73 -0
- package/dist/rag/embeddings/embedder.js.map +1 -0
- package/dist/rag/embeddings/embedding-cache.d.ts +40 -0
- package/dist/rag/embeddings/embedding-cache.d.ts.map +1 -0
- package/dist/rag/embeddings/embedding-cache.js +118 -0
- package/dist/rag/embeddings/embedding-cache.js.map +1 -0
- package/dist/rag/embeddings/fake-embedder.d.ts +20 -0
- package/dist/rag/embeddings/fake-embedder.d.ts.map +1 -0
- package/dist/rag/embeddings/fake-embedder.js +55 -0
- package/dist/rag/embeddings/fake-embedder.js.map +1 -0
- package/dist/rag/embeddings/index.d.ts +5 -0
- package/dist/rag/embeddings/index.d.ts.map +1 -0
- package/dist/rag/embeddings/index.js +21 -0
- package/dist/rag/embeddings/index.js.map +1 -0
- package/dist/rag/embeddings/openai-compatible-embedder.d.ts +46 -0
- package/dist/rag/embeddings/openai-compatible-embedder.d.ts.map +1 -0
- package/dist/rag/embeddings/openai-compatible-embedder.js +145 -0
- package/dist/rag/embeddings/openai-compatible-embedder.js.map +1 -0
- package/dist/rag/generation/context-builder.d.ts +22 -0
- package/dist/rag/generation/context-builder.d.ts.map +1 -0
- package/dist/rag/generation/context-builder.js +166 -0
- package/dist/rag/generation/context-builder.js.map +1 -0
- package/dist/rag/generation/generator.d.ts +25 -0
- package/dist/rag/generation/generator.d.ts.map +1 -0
- package/dist/rag/generation/generator.js +185 -0
- package/dist/rag/generation/generator.js.map +1 -0
- package/dist/rag/generation/index.d.ts +4 -0
- package/dist/rag/generation/index.d.ts.map +1 -0
- package/dist/rag/generation/index.js +20 -0
- package/dist/rag/generation/index.js.map +1 -0
- package/dist/rag/generation/verifier.d.ts +13 -0
- package/dist/rag/generation/verifier.d.ts.map +1 -0
- package/dist/rag/generation/verifier.js +43 -0
- package/dist/rag/generation/verifier.js.map +1 -0
- package/dist/rag/index.d.ts +9 -0
- package/dist/rag/index.d.ts.map +1 -0
- package/dist/rag/index.js +25 -0
- package/dist/rag/index.js.map +1 -0
- package/dist/rag/ingestion/index.d.ts +4 -0
- package/dist/rag/ingestion/index.d.ts.map +1 -0
- package/dist/rag/ingestion/index.js +20 -0
- package/dist/rag/ingestion/index.js.map +1 -0
- package/dist/rag/ingestion/loaders.d.ts +69 -0
- package/dist/rag/ingestion/loaders.d.ts.map +1 -0
- package/dist/rag/ingestion/loaders.js +653 -0
- package/dist/rag/ingestion/loaders.js.map +1 -0
- package/dist/rag/ingestion/metadata.d.ts +31 -0
- package/dist/rag/ingestion/metadata.d.ts.map +1 -0
- package/dist/rag/ingestion/metadata.js +81 -0
- package/dist/rag/ingestion/metadata.js.map +1 -0
- package/dist/rag/ingestion/normalizer.d.ts +20 -0
- package/dist/rag/ingestion/normalizer.d.ts.map +1 -0
- package/dist/rag/ingestion/normalizer.js +161 -0
- package/dist/rag/ingestion/normalizer.js.map +1 -0
- package/dist/rag/pipeline.d.ts +62 -0
- package/dist/rag/pipeline.d.ts.map +1 -0
- package/dist/rag/pipeline.js +75 -0
- package/dist/rag/pipeline.js.map +1 -0
- package/dist/rag/retrieval/dense-retriever.d.ts +17 -0
- package/dist/rag/retrieval/dense-retriever.d.ts.map +1 -0
- package/dist/rag/retrieval/dense-retriever.js +21 -0
- package/dist/rag/retrieval/dense-retriever.js.map +1 -0
- package/dist/rag/retrieval/fusion.d.ts +12 -0
- package/dist/rag/retrieval/fusion.d.ts.map +1 -0
- package/dist/rag/retrieval/fusion.js +54 -0
- package/dist/rag/retrieval/fusion.js.map +1 -0
- package/dist/rag/retrieval/http-rerank-model-client.d.ts +41 -0
- package/dist/rag/retrieval/http-rerank-model-client.d.ts.map +1 -0
- package/dist/rag/retrieval/http-rerank-model-client.js +130 -0
- package/dist/rag/retrieval/http-rerank-model-client.js.map +1 -0
- package/dist/rag/retrieval/hybrid-retriever.d.ts +22 -0
- package/dist/rag/retrieval/hybrid-retriever.d.ts.map +1 -0
- package/dist/rag/retrieval/hybrid-retriever.js +49 -0
- package/dist/rag/retrieval/hybrid-retriever.js.map +1 -0
- package/dist/rag/retrieval/index.d.ts +8 -0
- package/dist/rag/retrieval/index.d.ts.map +1 -0
- package/dist/rag/retrieval/index.js +24 -0
- package/dist/rag/retrieval/index.js.map +1 -0
- package/dist/rag/retrieval/reranker.d.ts +30 -0
- package/dist/rag/retrieval/reranker.d.ts.map +1 -0
- package/dist/rag/retrieval/reranker.js +85 -0
- package/dist/rag/retrieval/reranker.js.map +1 -0
- package/dist/rag/retrieval/sparse-retriever.d.ts +14 -0
- package/dist/rag/retrieval/sparse-retriever.d.ts.map +1 -0
- package/dist/rag/retrieval/sparse-retriever.js +18 -0
- package/dist/rag/retrieval/sparse-retriever.js.map +1 -0
- package/dist/rag/retrieval/types.d.ts +11 -0
- package/dist/rag/retrieval/types.d.ts.map +1 -0
- package/dist/rag/retrieval/types.js +10 -0
- package/dist/rag/retrieval/types.js.map +1 -0
- package/dist/rag/retrieval/utils.d.ts +8 -0
- package/dist/rag/retrieval/utils.d.ts.map +1 -0
- package/dist/rag/retrieval/utils.js +114 -0
- package/dist/rag/retrieval/utils.js.map +1 -0
- package/dist/rag/stores/in-memory-document-store.d.ts +14 -0
- package/dist/rag/stores/in-memory-document-store.d.ts.map +1 -0
- package/dist/rag/stores/in-memory-document-store.js +64 -0
- package/dist/rag/stores/in-memory-document-store.js.map +1 -0
- package/dist/rag/stores/in-memory-keyword-store.d.ts +10 -0
- package/dist/rag/stores/in-memory-keyword-store.d.ts.map +1 -0
- package/dist/rag/stores/in-memory-keyword-store.js +99 -0
- package/dist/rag/stores/in-memory-keyword-store.js.map +1 -0
- package/dist/rag/stores/in-memory-vector-store.d.ts +14 -0
- package/dist/rag/stores/in-memory-vector-store.d.ts.map +1 -0
- package/dist/rag/stores/in-memory-vector-store.js +61 -0
- package/dist/rag/stores/in-memory-vector-store.js.map +1 -0
- package/dist/rag/stores/index.d.ts +6 -0
- package/dist/rag/stores/index.d.ts.map +1 -0
- package/dist/rag/stores/index.js +22 -0
- package/dist/rag/stores/index.js.map +1 -0
- package/dist/rag/stores/postgres/index.d.ts +6 -0
- package/dist/rag/stores/postgres/index.d.ts.map +1 -0
- package/dist/rag/stores/postgres/index.js +22 -0
- package/dist/rag/stores/postgres/index.js.map +1 -0
- package/dist/rag/stores/postgres/pg-vector-store.d.ts +16 -0
- package/dist/rag/stores/postgres/pg-vector-store.d.ts.map +1 -0
- package/dist/rag/stores/postgres/pg-vector-store.js +77 -0
- package/dist/rag/stores/postgres/pg-vector-store.js.map +1 -0
- package/dist/rag/stores/postgres/postgres-document-store.d.ts +15 -0
- package/dist/rag/stores/postgres/postgres-document-store.d.ts.map +1 -0
- package/dist/rag/stores/postgres/postgres-document-store.js +174 -0
- package/dist/rag/stores/postgres/postgres-document-store.js.map +1 -0
- package/dist/rag/stores/postgres/postgres-keyword-store.d.ts +15 -0
- package/dist/rag/stores/postgres/postgres-keyword-store.d.ts.map +1 -0
- package/dist/rag/stores/postgres/postgres-keyword-store.js +163 -0
- package/dist/rag/stores/postgres/postgres-keyword-store.js.map +1 -0
- package/dist/rag/stores/postgres/schema.d.ts +8 -0
- package/dist/rag/stores/postgres/schema.d.ts.map +1 -0
- package/dist/rag/stores/postgres/schema.js +91 -0
- package/dist/rag/stores/postgres/schema.js.map +1 -0
- package/dist/rag/stores/postgres/sql.d.ts +23 -0
- package/dist/rag/stores/postgres/sql.d.ts.map +1 -0
- package/dist/rag/stores/postgres/sql.js +177 -0
- package/dist/rag/stores/postgres/sql.js.map +1 -0
- package/dist/rag/stores/postgres/types.d.ts +17 -0
- package/dist/rag/stores/postgres/types.d.ts.map +1 -0
- package/dist/rag/stores/postgres/types.js +3 -0
- package/dist/rag/stores/postgres/types.js.map +1 -0
- package/dist/rag/stores/types.d.ts +36 -0
- package/dist/rag/stores/types.d.ts.map +1 -0
- package/dist/rag/stores/types.js +3 -0
- package/dist/rag/stores/types.js.map +1 -0
- package/dist/rag/stores/utils.d.ts +11 -0
- package/dist/rag/stores/utils.d.ts.map +1 -0
- package/dist/rag/stores/utils.js +200 -0
- package/dist/rag/stores/utils.js.map +1 -0
- package/dist/rag/types.d.ts +174 -0
- package/dist/rag/types.d.ts.map +1 -0
- package/dist/rag/types.js +3 -0
- package/dist/rag/types.js.map +1 -0
- package/dist/tools/base.d.ts +28 -0
- package/dist/tools/base.d.ts.map +1 -0
- package/dist/tools/base.js +68 -0
- package/dist/tools/base.js.map +1 -0
- package/dist/tools/builtin/advancedSearchTool.d.ts +24 -0
- package/dist/tools/builtin/advancedSearchTool.d.ts.map +1 -0
- package/dist/tools/builtin/advancedSearchTool.js +134 -0
- package/dist/tools/builtin/advancedSearchTool.js.map +1 -0
- package/dist/tools/builtin/ragSearchTool.d.ts +29 -0
- package/dist/tools/builtin/ragSearchTool.d.ts.map +1 -0
- package/dist/tools/builtin/ragSearchTool.js +91 -0
- package/dist/tools/builtin/ragSearchTool.js.map +1 -0
- package/dist/tools/executor.d.ts +10 -0
- package/dist/tools/executor.d.ts.map +1 -0
- package/dist/tools/executor.js +86 -0
- package/dist/tools/executor.js.map +1 -0
- package/dist/tools/index.d.ts +9 -0
- package/dist/tools/index.d.ts.map +1 -0
- package/dist/tools/index.js +16 -0
- package/dist/tools/index.js.map +1 -0
- package/dist/tools/registry.d.ts +16 -0
- package/dist/tools/registry.d.ts.map +1 -0
- package/dist/tools/registry.js +35 -0
- package/dist/tools/registry.js.map +1 -0
- package/package.json +43 -0
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.MarkdownChunker = void 0;
|
|
4
|
+
const chunker_1 = require("./chunker");
|
|
5
|
+
class MarkdownChunker {
|
|
6
|
+
options;
|
|
7
|
+
constructor(options = {}) {
|
|
8
|
+
this.options = { ...options };
|
|
9
|
+
}
|
|
10
|
+
async chunk(document) {
|
|
11
|
+
if (document.blocks === undefined || document.blocks.length === 0) {
|
|
12
|
+
return this.chunkPlainContent(document);
|
|
13
|
+
}
|
|
14
|
+
const chunks = [];
|
|
15
|
+
const headingStack = [];
|
|
16
|
+
let chunkIndex = 0;
|
|
17
|
+
for (const block of document.blocks) {
|
|
18
|
+
if (block.type === 'heading') {
|
|
19
|
+
updateHeadingStack(headingStack, block);
|
|
20
|
+
if (this.options.includeHeadingInContent === true) {
|
|
21
|
+
chunks.push((0, chunker_1.createChunk)({
|
|
22
|
+
document,
|
|
23
|
+
content: block.text,
|
|
24
|
+
kind: 'text',
|
|
25
|
+
chunkIndex,
|
|
26
|
+
headingPath: headingStack.map((heading) => heading.text),
|
|
27
|
+
extra: { blockType: 'heading', strategy: 'markdown' },
|
|
28
|
+
}));
|
|
29
|
+
chunkIndex += 1;
|
|
30
|
+
}
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
const headingPath = headingStack.map((heading) => heading.text);
|
|
34
|
+
const blockChunks = createBlockChunks({
|
|
35
|
+
document,
|
|
36
|
+
block,
|
|
37
|
+
headingPath,
|
|
38
|
+
nextChunkIndex: chunkIndex,
|
|
39
|
+
options: this.options,
|
|
40
|
+
});
|
|
41
|
+
chunks.push(...blockChunks);
|
|
42
|
+
chunkIndex += blockChunks.length;
|
|
43
|
+
}
|
|
44
|
+
return chunks;
|
|
45
|
+
}
|
|
46
|
+
async chunkMany(documents) {
|
|
47
|
+
return await (0, chunker_1.chunkManyDocuments)(this, documents);
|
|
48
|
+
}
|
|
49
|
+
chunkPlainContent(document) {
|
|
50
|
+
return (0, chunker_1.splitTextSegments)(document.content, this.options)
|
|
51
|
+
.map((segment, index) => (0, chunker_1.createChunk)({
|
|
52
|
+
document,
|
|
53
|
+
content: segment.content,
|
|
54
|
+
kind: 'text',
|
|
55
|
+
chunkIndex: index,
|
|
56
|
+
startOffset: segment.startOffset,
|
|
57
|
+
endOffset: segment.endOffset,
|
|
58
|
+
extra: { strategy: 'markdown-fallback' },
|
|
59
|
+
}));
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
exports.MarkdownChunker = MarkdownChunker;
|
|
63
|
+
function createBlockChunks(input) {
|
|
64
|
+
switch (input.block.type) {
|
|
65
|
+
case 'paragraph':
|
|
66
|
+
return (0, chunker_1.splitTextSegments)(input.block.text, input.options)
|
|
67
|
+
.map((segment, offset) => (0, chunker_1.createChunk)({
|
|
68
|
+
document: input.document,
|
|
69
|
+
content: segment.content,
|
|
70
|
+
kind: 'text',
|
|
71
|
+
chunkIndex: input.nextChunkIndex + offset,
|
|
72
|
+
headingPath: input.headingPath,
|
|
73
|
+
extra: { blockType: 'paragraph', strategy: 'markdown' },
|
|
74
|
+
}));
|
|
75
|
+
case 'table':
|
|
76
|
+
return [
|
|
77
|
+
(0, chunker_1.createChunk)({
|
|
78
|
+
document: input.document,
|
|
79
|
+
content: input.block.markdown,
|
|
80
|
+
kind: 'table',
|
|
81
|
+
chunkIndex: input.nextChunkIndex,
|
|
82
|
+
headingPath: input.headingPath,
|
|
83
|
+
extra: { blockType: 'table', strategy: 'markdown' },
|
|
84
|
+
}),
|
|
85
|
+
];
|
|
86
|
+
case 'code':
|
|
87
|
+
return [
|
|
88
|
+
(0, chunker_1.createChunk)({
|
|
89
|
+
document: input.document,
|
|
90
|
+
content: formatCodeBlock(input.block),
|
|
91
|
+
kind: 'code',
|
|
92
|
+
chunkIndex: input.nextChunkIndex,
|
|
93
|
+
headingPath: input.headingPath,
|
|
94
|
+
extra: createCodeExtra(input.block),
|
|
95
|
+
}),
|
|
96
|
+
];
|
|
97
|
+
case 'image': {
|
|
98
|
+
const content = [input.block.alt, input.block.text]
|
|
99
|
+
.filter((value) => value !== undefined && value.trim().length > 0)
|
|
100
|
+
.join('\n');
|
|
101
|
+
if (content.length === 0) {
|
|
102
|
+
return [];
|
|
103
|
+
}
|
|
104
|
+
return [
|
|
105
|
+
(0, chunker_1.createChunk)({
|
|
106
|
+
document: input.document,
|
|
107
|
+
content,
|
|
108
|
+
kind: 'image_text',
|
|
109
|
+
chunkIndex: input.nextChunkIndex,
|
|
110
|
+
headingPath: input.headingPath,
|
|
111
|
+
extra: { blockType: 'image', strategy: 'markdown' },
|
|
112
|
+
}),
|
|
113
|
+
];
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
function updateHeadingStack(stack, block) {
|
|
118
|
+
while (stack.length > 0 && (stack.at(-1)?.level ?? 0) >= block.level) {
|
|
119
|
+
stack.pop();
|
|
120
|
+
}
|
|
121
|
+
if (block.text.trim().length > 0) {
|
|
122
|
+
stack.push({
|
|
123
|
+
level: block.level,
|
|
124
|
+
text: block.text,
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
function formatCodeBlock(block) {
|
|
129
|
+
if (block.language === undefined) {
|
|
130
|
+
return block.code;
|
|
131
|
+
}
|
|
132
|
+
return `\`\`\`${block.language}\n${block.code}\n\`\`\``;
|
|
133
|
+
}
|
|
134
|
+
function createCodeExtra(block) {
|
|
135
|
+
const extra = {
|
|
136
|
+
blockType: 'code',
|
|
137
|
+
strategy: 'markdown',
|
|
138
|
+
};
|
|
139
|
+
if (block.language !== undefined) {
|
|
140
|
+
extra.language = block.language;
|
|
141
|
+
}
|
|
142
|
+
return extra;
|
|
143
|
+
}
|
|
144
|
+
//# sourceMappingURL=markdown-chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"markdown-chunker.js","sourceRoot":"","sources":["../../../src/rag/chunking/markdown-chunker.ts"],"names":[],"mappings":";;;AAKA,uCAImB;AAenB,MAAa,eAAe;IACP,OAAO,CAAyB;IAEjD,YAAY,UAAkC,EAAE;QAC5C,IAAI,CAAC,OAAO,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC;IAClC,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,QAAqB;QAC7B,IAAI,QAAQ,CAAC,MAAM,KAAK,SAAS,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAChE,OAAO,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC5C,CAAC;QAED,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,MAAM,YAAY,GAAmB,EAAE,CAAC;QACxC,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;YAClC,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;gBAC3B,kBAAkB,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;gBACxC,IAAI,IAAI,CAAC,OAAO,CAAC,uBAAuB,KAAK,IAAI,EAAE,CAAC;oBAChD,MAAM,CAAC,IAAI,CAAC,IAAA,qBAAW,EAAC;wBACpB,QAAQ;wBACR,OAAO,EAAE,KAAK,CAAC,IAAI;wBACnB,IAAI,EAAE,MAAM;wBACZ,UAAU;wBACV,WAAW,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;wBACxD,KAAK,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAE;qBACxD,CAAC,CAAC,CAAC;oBACJ,UAAU,IAAI,CAAC,CAAC;gBACpB,CAAC;gBACD,SAAS;YACb,CAAC;YAED,MAAM,WAAW,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAChE,MAAM,WAAW,GAAG,iBAAiB,CAAC;gBAClC,QAAQ;gBACR,KAAK;gBACL,WAAW;gBACX,cAAc,EAAE,UAAU;gBAC1B,OAAO,EAAE,IAAI,CAAC,OAAO;aACxB,CAAC,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;YAC5B,UAAU,IAAI,WAAW,CAAC,MAAM,CAAC;QACrC,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,SAAwB;QACpC,OAAO,MAAM,IAAA,4BAAkB,EAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACrD,CAAC;IAEO,iBAAiB,CAAC,QAAqB;QAC3C,OAAO,IAAA,2BAAiB,EAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC;aACnD,GAAG,CAAC,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,IAAA,qBAAW,EAAC;YACjC,QAAQ;YACR,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,IAAI,EAAE,MAAM;YACZ,UAAU,EAAE,KAAK;YACjB,WAAW,EAAE,OAAO,CAAC,WAAW;YAChC,SAAS,EAAE,OAAO,CAAC,SAAS;YAC5B,KAAK,EAAE,EAAE,QAAQ,EAAE,mBAAmB,EAAE;SAC3C,CAAC,CAAC,CAAC;IACZ,CAAC;CACJ;AAhED,0CAgEC;AAUD,SAAS,iBAAiB,CAAC,KAA6B;IACpD,QAAQ,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;QACvB,KAAK,WAAW;YACZ,OAAO,IAAA,2BAAiB,EAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC;iBACpD,GAAG,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC,IAAA,qBAAW,EAAC;gBAClC,QAAQ,EAAE,KAAK,CAAC,QAAQ;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,IAAI,EAAE,MAAM;gBACZ,UAAU,EAAE,KAAK,CAAC,cAAc,GAAG,MAAM;gBACzC,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,KAAK,EAAE,EAAE,SAAS,EAAE,WAAW,EAAE,QAAQ,EAAE,UAAU,EAAE;aAC1D,CAAC,CAAC,CAAC;QACZ,KAAK,OAAO;YACR,OAAO;gBACH,IAAA,qBAAW,EAAC;oBACR,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,QAAQ;oBAC7B,IAAI,EAAE,OAAO;oBACb,UAAU,EAAE,KAAK,CAAC,cAAc;oBAChC,WAAW,EAAE,KAAK,CAAC,WAAW;oBAC9B,KAAK,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE;iBACtD,CAAC;aACL,CAAC;QACN,KAAK,MAAM;YACP,OAAO;gBACH,IAAA,qBAAW,EAAC;oBACR,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,OAAO,EAAE,eAAe,CAAC,KAAK,CAAC,KAAK,CAAC;oBACrC,IAAI,EAAE,MAAM;oBACZ,UAAU,EAAE,KAAK,CAAC,cAAc;oBAChC,WAAW,EAAE,KAAK,CAAC,WAAW;oBAC9B,KAAK,EAAE,eAAe,CAAC,KAAK,CAAC,KAAK,CAAC;iBACtC,CAAC;aACL,CAAC;QACN,KAAK,OAAO,CAAC,CAAC,CAAC;YACX,MAAM,OAAO,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC;iBAC9C,MAAM,CAAC,CAAC,KAAK,EAAmB,EAAE,CAAC,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;iBAClF,IAAI,CAAC,IAAI,CAAC,CAAC;YAChB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACvB,OAAO,EAAE,CAAC;YACd,CAAC;YAED,OAAO;gBACH,IAAA,qBAAW,EAAC;oBACR,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,OAAO;oBACP,IAAI,EAAE,YAAY;oBAClB,UAAU,EAAE,KAAK,CAAC,cAAc;oBAChC,WAAW,EAAE,KAAK,CAAC,WAAW;oBAC9B,KAAK,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE;iBACtD,CAAC;aACL,CAAC;QACN,CAAC;IACL,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CACvB,KAAqB,EACrB,KAAkD;IAElD,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;QACnE,KAAK,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;IAED,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC/B,KAAK,CAAC,IAAI,CAAC;YACP,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,IAAI,EAAE,KAAK,CAAC,IAAI;SACnB,CAAC,CAAC;IACP,CAAC;AACL,CAAC;AAED,SAAS,eAAe,CAAC,KAA+C;IACpE,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QAC/B,OAAO,KAAK,CAAC,IAAI,CAAC;IACtB,CAAC;IAED,OAAO,SAAS,KAAK,CAAC,QAAQ,KAAK,KAAK,CAAC,IAAI,UAAU,CAAC;AAC5D,CAAC;AAED,SAAS,eAAe,CAAC,KAA+C;IACpE,MAAM,KAAK,GAA4B;QACnC,SAAS,EAAE,MAAM;QACjB,QAAQ,EAAE,UAAU;KACvB,CAAC;IAEF,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QAC/B,KAAK,CAAC,QAAQ,GAAG,KAAK,CAAC,QAAQ,CAAC;IACpC,CAAC;IAED,OAAO,KAAK,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { Chunk, RawDocument } from '../types';
|
|
2
|
+
import type { Chunker, TextSplitOptions } from './chunker';
|
|
3
|
+
export interface ParentChildChunkerOptions {
|
|
4
|
+
parent?: TextSplitOptions;
|
|
5
|
+
child?: TextSplitOptions;
|
|
6
|
+
includeParentChunks?: boolean;
|
|
7
|
+
}
|
|
8
|
+
export declare class ParentChildChunker implements Chunker {
|
|
9
|
+
private readonly parentOptions;
|
|
10
|
+
private readonly childOptions;
|
|
11
|
+
private readonly includeParentChunks;
|
|
12
|
+
constructor(options?: ParentChildChunkerOptions);
|
|
13
|
+
chunk(document: RawDocument): Promise<Chunk[]>;
|
|
14
|
+
chunkMany(documents: RawDocument[]): Promise<Chunk[]>;
|
|
15
|
+
}
|
|
16
|
+
//# sourceMappingURL=parent-child.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parent-child.d.ts","sourceRoot":"","sources":["../../../src/rag/chunking/parent-child.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,KAAK,EACL,WAAW,EACd,MAAM,UAAU,CAAC;AAMlB,OAAO,KAAK,EACR,OAAO,EAEP,gBAAgB,EACnB,MAAM,WAAW,CAAC;AAEnB,MAAM,WAAW,yBAAyB;IACtC,MAAM,CAAC,EAAE,gBAAgB,CAAC;IAC1B,KAAK,CAAC,EAAE,gBAAgB,CAAC;IACzB,mBAAmB,CAAC,EAAE,OAAO,CAAC;CACjC;AAED,qBAAa,kBAAmB,YAAW,OAAO;IAC9C,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAmB;IACjD,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAmB;IAChD,OAAO,CAAC,QAAQ,CAAC,mBAAmB,CAAU;gBAElC,OAAO,GAAE,yBAA8B;IAgB7C,KAAK,CAAC,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAmC9C,SAAS,CAAC,SAAS,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;CAG9D"}
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.ParentChildChunker = void 0;
|
|
4
|
+
const chunker_1 = require("./chunker");
|
|
5
|
+
class ParentChildChunker {
|
|
6
|
+
parentOptions;
|
|
7
|
+
childOptions;
|
|
8
|
+
includeParentChunks;
|
|
9
|
+
constructor(options = {}) {
|
|
10
|
+
this.parentOptions = {
|
|
11
|
+
maxChunkLength: 3000,
|
|
12
|
+
minChunkLength: 800,
|
|
13
|
+
overlapLength: 0,
|
|
14
|
+
...options.parent,
|
|
15
|
+
};
|
|
16
|
+
this.childOptions = {
|
|
17
|
+
maxChunkLength: 900,
|
|
18
|
+
minChunkLength: 180,
|
|
19
|
+
overlapLength: 90,
|
|
20
|
+
...options.child,
|
|
21
|
+
};
|
|
22
|
+
this.includeParentChunks = options.includeParentChunks ?? true;
|
|
23
|
+
}
|
|
24
|
+
async chunk(document) {
|
|
25
|
+
const chunks = [];
|
|
26
|
+
const parentSegments = (0, chunker_1.splitTextSegments)(document.content, this.parentOptions);
|
|
27
|
+
let chunkIndex = 0;
|
|
28
|
+
for (const parentSegment of parentSegments) {
|
|
29
|
+
const parentChunk = (0, chunker_1.createChunk)({
|
|
30
|
+
document,
|
|
31
|
+
content: parentSegment.content,
|
|
32
|
+
kind: 'text',
|
|
33
|
+
chunkIndex,
|
|
34
|
+
startOffset: parentSegment.startOffset,
|
|
35
|
+
endOffset: parentSegment.endOffset,
|
|
36
|
+
extra: { role: 'parent', strategy: 'parent-child' },
|
|
37
|
+
});
|
|
38
|
+
chunkIndex += 1;
|
|
39
|
+
if (this.includeParentChunks) {
|
|
40
|
+
chunks.push(parentChunk);
|
|
41
|
+
}
|
|
42
|
+
const childChunks = createChildChunks({
|
|
43
|
+
document,
|
|
44
|
+
parentChunk,
|
|
45
|
+
parentSegment,
|
|
46
|
+
startChunkIndex: chunkIndex,
|
|
47
|
+
childOptions: this.childOptions,
|
|
48
|
+
});
|
|
49
|
+
chunks.push(...childChunks);
|
|
50
|
+
chunkIndex += childChunks.length;
|
|
51
|
+
}
|
|
52
|
+
return chunks;
|
|
53
|
+
}
|
|
54
|
+
async chunkMany(documents) {
|
|
55
|
+
return await (0, chunker_1.chunkManyDocuments)(this, documents);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
exports.ParentChildChunker = ParentChildChunker;
|
|
59
|
+
function createChildChunks(input) {
|
|
60
|
+
return (0, chunker_1.splitTextSegments)(input.parentSegment.content, input.childOptions)
|
|
61
|
+
.map((segment, offset) => (0, chunker_1.createChunk)({
|
|
62
|
+
document: input.document,
|
|
63
|
+
content: segment.content,
|
|
64
|
+
kind: 'text',
|
|
65
|
+
chunkIndex: input.startChunkIndex + offset,
|
|
66
|
+
parentId: input.parentChunk.id,
|
|
67
|
+
startOffset: input.parentSegment.startOffset + segment.startOffset,
|
|
68
|
+
endOffset: input.parentSegment.startOffset + segment.endOffset,
|
|
69
|
+
extra: {
|
|
70
|
+
role: 'child',
|
|
71
|
+
strategy: 'parent-child',
|
|
72
|
+
parentChunkIndex: input.parentChunk.metadata.chunkIndex,
|
|
73
|
+
},
|
|
74
|
+
}));
|
|
75
|
+
}
|
|
76
|
+
//# sourceMappingURL=parent-child.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"parent-child.js","sourceRoot":"","sources":["../../../src/rag/chunking/parent-child.ts"],"names":[],"mappings":";;;AAIA,uCAImB;AAanB,MAAa,kBAAkB;IACV,aAAa,CAAmB;IAChC,YAAY,CAAmB;IAC/B,mBAAmB,CAAU;IAE9C,YAAY,UAAqC,EAAE;QAC/C,IAAI,CAAC,aAAa,GAAG;YACjB,cAAc,EAAE,IAAI;YACpB,cAAc,EAAE,GAAG;YACnB,aAAa,EAAE,CAAC;YAChB,GAAG,OAAO,CAAC,MAAM;SACpB,CAAC;QACF,IAAI,CAAC,YAAY,GAAG;YAChB,cAAc,EAAE,GAAG;YACnB,cAAc,EAAE,GAAG;YACnB,aAAa,EAAE,EAAE;YACjB,GAAG,OAAO,CAAC,KAAK;SACnB,CAAC;QACF,IAAI,CAAC,mBAAmB,GAAG,OAAO,CAAC,mBAAmB,IAAI,IAAI,CAAC;IACnE,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,QAAqB;QAC7B,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,MAAM,cAAc,GAAG,IAAA,2BAAiB,EAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QAC/E,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,MAAM,aAAa,IAAI,cAAc,EAAE,CAAC;YACzC,MAAM,WAAW,GAAG,IAAA,qBAAW,EAAC;gBAC5B,QAAQ;gBACR,OAAO,EAAE,aAAa,CAAC,OAAO;gBAC9B,IAAI,EAAE,MAAM;gBACZ,UAAU;gBACV,WAAW,EAAE,aAAa,CAAC,WAAW;gBACtC,SAAS,EAAE,aAAa,CAAC,SAAS;gBAClC,KAAK,EAAE,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,cAAc,EAAE;aACtD,CAAC,CAAC;YACH,UAAU,IAAI,CAAC,CAAC;YAEhB,IAAI,IAAI,CAAC,mBAAmB,EAAE,CAAC;gBAC3B,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;YAC7B,CAAC;YAED,MAAM,WAAW,GAAG,iBAAiB,CAAC;gBAClC,QAAQ;gBACR,WAAW;gBACX,aAAa;gBACb,eAAe,EAAE,UAAU;gBAC3B,YAAY,EAAE,IAAI,CAAC,YAAY;aAClC,CAAC,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;YAC5B,UAAU,IAAI,WAAW,CAAC,MAAM,CAAC;QACrC,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,SAAwB;QACpC,OAAO,MAAM,IAAA,4BAAkB,EAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACrD,CAAC;CACJ;AA3DD,gDA2DC;AAUD,SAAS,iBAAiB,CAAC,KAA6B;IACpD,OAAO,IAAA,2BAAiB,EAAC,KAAK,CAAC,aAAa,CAAC,OAAO,EAAE,KAAK,CAAC,YAAY,CAAC;SACpE,GAAG,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC,IAAA,qBAAW,EAAC;QAClC,QAAQ,EAAE,KAAK,CAAC,QAAQ;QACxB,OAAO,EAAE,OAAO,CAAC,OAAO;QACxB,IAAI,EAAE,MAAM;QACZ,UAAU,EAAE,KAAK,CAAC,eAAe,GAAG,MAAM;QAC1C,QAAQ,EAAE,KAAK,CAAC,WAAW,CAAC,EAAE;QAC9B,WAAW,EAAE,KAAK,CAAC,aAAa,CAAC,WAAW,GAAG,OAAO,CAAC,WAAW;QAClE,SAAS,EAAE,KAAK,CAAC,aAAa,CAAC,WAAW,GAAG,OAAO,CAAC,SAAS;QAC9D,KAAK,EAAE;YACH,IAAI,EAAE,OAAO;YACb,QAAQ,EAAE,cAAc;YACxB,gBAAgB,EAAE,KAAK,CAAC,WAAW,CAAC,QAAQ,CAAC,UAAU;SAC1D;KACJ,CAAC,CAAC,CAAC;AACZ,CAAC"}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import type { Chunk, RawDocument } from '../types';
|
|
2
|
+
import type { Chunker, TextSplitOptions } from './chunker';
|
|
3
|
+
export interface RecursiveChunkerOptions extends TextSplitOptions {
|
|
4
|
+
}
|
|
5
|
+
export declare class RecursiveChunker implements Chunker {
|
|
6
|
+
private readonly options;
|
|
7
|
+
constructor(options?: RecursiveChunkerOptions);
|
|
8
|
+
chunk(document: RawDocument): Promise<Chunk[]>;
|
|
9
|
+
chunkMany(documents: RawDocument[]): Promise<Chunk[]>;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=recursive-chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"recursive-chunker.d.ts","sourceRoot":"","sources":["../../../src/rag/chunking/recursive-chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,KAAK,EAAE,WAAW,EAAE,MAAM,UAAU,CAAC;AAMnD,OAAO,KAAK,EACR,OAAO,EACP,gBAAgB,EACnB,MAAM,WAAW,CAAC;AAEnB,MAAM,WAAW,uBAAwB,SAAQ,gBAAgB;CAAG;AAEpE,qBAAa,gBAAiB,YAAW,OAAO;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAA0B;gBAEtC,OAAO,GAAE,uBAA4B;IAI3C,KAAK,CAAC,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAa9C,SAAS,CAAC,SAAS,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;CAG9D"}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.RecursiveChunker = void 0;
|
|
4
|
+
const chunker_1 = require("./chunker");
|
|
5
|
+
class RecursiveChunker {
|
|
6
|
+
options;
|
|
7
|
+
constructor(options = {}) {
|
|
8
|
+
this.options = { ...options };
|
|
9
|
+
}
|
|
10
|
+
async chunk(document) {
|
|
11
|
+
return (0, chunker_1.splitTextSegments)(document.content, this.options)
|
|
12
|
+
.map((segment, index) => (0, chunker_1.createChunk)({
|
|
13
|
+
document,
|
|
14
|
+
content: segment.content,
|
|
15
|
+
kind: 'text',
|
|
16
|
+
chunkIndex: index,
|
|
17
|
+
startOffset: segment.startOffset,
|
|
18
|
+
endOffset: segment.endOffset,
|
|
19
|
+
extra: { strategy: 'recursive' },
|
|
20
|
+
}));
|
|
21
|
+
}
|
|
22
|
+
async chunkMany(documents) {
|
|
23
|
+
return await (0, chunker_1.chunkManyDocuments)(this, documents);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
exports.RecursiveChunker = RecursiveChunker;
|
|
27
|
+
//# sourceMappingURL=recursive-chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"recursive-chunker.js","sourceRoot":"","sources":["../../../src/rag/chunking/recursive-chunker.ts"],"names":[],"mappings":";;;AACA,uCAImB;AAQnB,MAAa,gBAAgB;IACR,OAAO,CAA0B;IAElD,YAAY,UAAmC,EAAE;QAC7C,IAAI,CAAC,OAAO,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC;IAClC,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,QAAqB;QAC7B,OAAO,IAAA,2BAAiB,EAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC;aACnD,GAAG,CAAC,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,IAAA,qBAAW,EAAC;YACjC,QAAQ;YACR,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,IAAI,EAAE,MAAM;YACZ,UAAU,EAAE,KAAK;YACjB,WAAW,EAAE,OAAO,CAAC,WAAW;YAChC,SAAS,EAAE,OAAO,CAAC,SAAS;YAC5B,KAAK,EAAE,EAAE,QAAQ,EAAE,WAAW,EAAE;SACnC,CAAC,CAAC,CAAC;IACZ,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,SAAwB;QACpC,OAAO,MAAM,IAAA,4BAAkB,EAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACrD,CAAC;CACJ;AAvBD,4CAuBC"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
import type { Chunk, EmbeddedChunk } from '../types';
|
|
2
|
+
export interface Embedder {
|
|
3
|
+
readonly model: string;
|
|
4
|
+
readonly dimensions: number;
|
|
5
|
+
embedDocuments(texts: string[]): Promise<number[][]>;
|
|
6
|
+
embedQuery(text: string): Promise<number[]>;
|
|
7
|
+
}
|
|
8
|
+
export declare class EmbeddingDimensionError extends Error {
|
|
9
|
+
readonly expectedDimensions: number;
|
|
10
|
+
readonly actualDimensions: number;
|
|
11
|
+
constructor(expectedDimensions: number, actualDimensions: number, label?: string);
|
|
12
|
+
}
|
|
13
|
+
export declare class EmbeddingBatchSizeError extends Error {
|
|
14
|
+
readonly expectedSize: number;
|
|
15
|
+
readonly actualSize: number;
|
|
16
|
+
constructor(expectedSize: number, actualSize: number);
|
|
17
|
+
}
|
|
18
|
+
export declare function embedChunks(embedder: Embedder, chunks: Chunk[]): Promise<EmbeddedChunk[]>;
|
|
19
|
+
export declare function assertEmbeddingDimensions(embedding: readonly number[], expectedDimensions: number, label?: string): void;
|
|
20
|
+
export declare function assertEmbeddingBatchSize(embeddings: readonly unknown[], expectedSize: number): void;
|
|
21
|
+
export declare function cloneEmbedding(embedding: readonly number[]): number[];
|
|
22
|
+
export declare function normalizeEmbedding(embedding: readonly number[]): number[];
|
|
23
|
+
export declare function assertValidEmbeddingDimensions(dimensions: number): number;
|
|
24
|
+
//# sourceMappingURL=embedder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedder.d.ts","sourceRoot":"","sources":["../../../src/rag/embeddings/embedder.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,KAAK,EACL,aAAa,EAChB,MAAM,UAAU,CAAC;AAElB,MAAM,WAAW,QAAQ;IACrB,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC;IACrD,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC,CAAC;CAC/C;AAED,qBAAa,uBAAwB,SAAQ,KAAK;IAC9C,QAAQ,CAAC,kBAAkB,EAAE,MAAM,CAAC;IACpC,QAAQ,CAAC,gBAAgB,EAAE,MAAM,CAAC;gBAEtB,kBAAkB,EAAE,MAAM,EAAE,gBAAgB,EAAE,MAAM,EAAE,KAAK,SAAc;CAQxF;AAED,qBAAa,uBAAwB,SAAQ,KAAK;IAC9C,QAAQ,CAAC,YAAY,EAAE,MAAM,CAAC;IAC9B,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;gBAEhB,YAAY,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM;CAMvD;AAED,wBAAsB,WAAW,CAC7B,QAAQ,EAAE,QAAQ,EAClB,MAAM,EAAE,KAAK,EAAE,GAChB,OAAO,CAAC,aAAa,EAAE,CAAC,CAoB1B;AAED,wBAAgB,yBAAyB,CACrC,SAAS,EAAE,SAAS,MAAM,EAAE,EAC5B,kBAAkB,EAAE,MAAM,EAC1B,KAAK,CAAC,EAAE,MAAM,GACf,IAAI,CAIN;AAED,wBAAgB,wBAAwB,CACpC,UAAU,EAAE,SAAS,OAAO,EAAE,EAC9B,YAAY,EAAE,MAAM,GACrB,IAAI,CAIN;AAED,wBAAgB,cAAc,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,GAAG,MAAM,EAAE,CAErE;AAED,wBAAgB,kBAAkB,CAAC,SAAS,EAAE,SAAS,MAAM,EAAE,GAAG,MAAM,EAAE,CAUzE;AAED,wBAAgB,8BAA8B,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM,CAMzE"}
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.EmbeddingBatchSizeError = exports.EmbeddingDimensionError = void 0;
|
|
4
|
+
exports.embedChunks = embedChunks;
|
|
5
|
+
exports.assertEmbeddingDimensions = assertEmbeddingDimensions;
|
|
6
|
+
exports.assertEmbeddingBatchSize = assertEmbeddingBatchSize;
|
|
7
|
+
exports.cloneEmbedding = cloneEmbedding;
|
|
8
|
+
exports.normalizeEmbedding = normalizeEmbedding;
|
|
9
|
+
exports.assertValidEmbeddingDimensions = assertValidEmbeddingDimensions;
|
|
10
|
+
class EmbeddingDimensionError extends Error {
|
|
11
|
+
expectedDimensions;
|
|
12
|
+
actualDimensions;
|
|
13
|
+
constructor(expectedDimensions, actualDimensions, label = 'embedding') {
|
|
14
|
+
super(`${label} dimensions mismatch: expected ${expectedDimensions}, got ${actualDimensions}.`);
|
|
15
|
+
this.name = 'EmbeddingDimensionError';
|
|
16
|
+
this.expectedDimensions = expectedDimensions;
|
|
17
|
+
this.actualDimensions = actualDimensions;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
exports.EmbeddingDimensionError = EmbeddingDimensionError;
|
|
21
|
+
class EmbeddingBatchSizeError extends Error {
|
|
22
|
+
expectedSize;
|
|
23
|
+
actualSize;
|
|
24
|
+
constructor(expectedSize, actualSize) {
|
|
25
|
+
super(`Embedding batch size mismatch: expected ${expectedSize}, got ${actualSize}.`);
|
|
26
|
+
this.name = 'EmbeddingBatchSizeError';
|
|
27
|
+
this.expectedSize = expectedSize;
|
|
28
|
+
this.actualSize = actualSize;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
exports.EmbeddingBatchSizeError = EmbeddingBatchSizeError;
|
|
32
|
+
async function embedChunks(embedder, chunks) {
|
|
33
|
+
const embeddings = await embedder.embedDocuments(chunks.map((chunk) => chunk.content));
|
|
34
|
+
assertEmbeddingBatchSize(embeddings, chunks.length);
|
|
35
|
+
return chunks.map((chunk, index) => {
|
|
36
|
+
const embedding = embeddings[index];
|
|
37
|
+
if (embedding === undefined) {
|
|
38
|
+
throw new EmbeddingBatchSizeError(chunks.length, embeddings.length);
|
|
39
|
+
}
|
|
40
|
+
assertEmbeddingDimensions(embedding, embedder.dimensions, `chunk ${chunk.id} embedding`);
|
|
41
|
+
return {
|
|
42
|
+
chunk,
|
|
43
|
+
embedding: cloneEmbedding(embedding),
|
|
44
|
+
};
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
function assertEmbeddingDimensions(embedding, expectedDimensions, label) {
|
|
48
|
+
if (embedding.length !== expectedDimensions) {
|
|
49
|
+
throw new EmbeddingDimensionError(expectedDimensions, embedding.length, label);
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
function assertEmbeddingBatchSize(embeddings, expectedSize) {
|
|
53
|
+
if (embeddings.length !== expectedSize) {
|
|
54
|
+
throw new EmbeddingBatchSizeError(expectedSize, embeddings.length);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
function cloneEmbedding(embedding) {
|
|
58
|
+
return [...embedding];
|
|
59
|
+
}
|
|
60
|
+
function normalizeEmbedding(embedding) {
|
|
61
|
+
const magnitude = Math.sqrt(embedding.reduce((sum, value) => sum + value * value, 0));
|
|
62
|
+
if (magnitude === 0) {
|
|
63
|
+
return [...embedding];
|
|
64
|
+
}
|
|
65
|
+
return embedding.map((value) => value / magnitude);
|
|
66
|
+
}
|
|
67
|
+
function assertValidEmbeddingDimensions(dimensions) {
|
|
68
|
+
if (!Number.isInteger(dimensions) || dimensions <= 0) {
|
|
69
|
+
throw new RangeError('Embedding dimensions must be a positive integer.');
|
|
70
|
+
}
|
|
71
|
+
return dimensions;
|
|
72
|
+
}
|
|
73
|
+
//# sourceMappingURL=embedder.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedder.js","sourceRoot":"","sources":["../../../src/rag/embeddings/embedder.ts"],"names":[],"mappings":";;;AAsCA,kCAuBC;AAED,8DAQC;AAED,4DAOC;AAED,wCAEC;AAED,gDAUC;AAED,wEAMC;AA5FD,MAAa,uBAAwB,SAAQ,KAAK;IACrC,kBAAkB,CAAS;IAC3B,gBAAgB,CAAS;IAElC,YAAY,kBAA0B,EAAE,gBAAwB,EAAE,KAAK,GAAG,WAAW;QACjF,KAAK,CACD,GAAG,KAAK,kCAAkC,kBAAkB,SAAS,gBAAgB,GAAG,CAC3F,CAAC;QACF,IAAI,CAAC,IAAI,GAAG,yBAAyB,CAAC;QACtC,IAAI,CAAC,kBAAkB,GAAG,kBAAkB,CAAC;QAC7C,IAAI,CAAC,gBAAgB,GAAG,gBAAgB,CAAC;IAC7C,CAAC;CACJ;AAZD,0DAYC;AAED,MAAa,uBAAwB,SAAQ,KAAK;IACrC,YAAY,CAAS;IACrB,UAAU,CAAS;IAE5B,YAAY,YAAoB,EAAE,UAAkB;QAChD,KAAK,CAAC,2CAA2C,YAAY,SAAS,UAAU,GAAG,CAAC,CAAC;QACrF,IAAI,CAAC,IAAI,GAAG,yBAAyB,CAAC;QACtC,IAAI,CAAC,YAAY,GAAG,YAAY,CAAC;QACjC,IAAI,CAAC,UAAU,GAAG,UAAU,CAAC;IACjC,CAAC;CACJ;AAVD,0DAUC;AAEM,KAAK,UAAU,WAAW,CAC7B,QAAkB,EAClB,MAAe;IAEf,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,cAAc,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC;IACvF,wBAAwB,CAAC,UAAU,EAAE,MAAM,CAAC,MAAM,CAAC,CAAC;IAEpD,OAAO,MAAM,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QAC/B,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;QACpC,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;YAC1B,MAAM,IAAI,uBAAuB,CAAC,MAAM,CAAC,MAAM,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;QACxE,CAAC;QACD,yBAAyB,CACrB,SAAS,EACT,QAAQ,CAAC,UAAU,EACnB,SAAS,KAAK,CAAC,EAAE,YAAY,CAChC,CAAC;QAEF,OAAO;YACH,KAAK;YACL,SAAS,EAAE,cAAc,CAAC,SAAS,CAAC;SACvC,CAAC;IACN,CAAC,CAAC,CAAC;AACP,CAAC;AAED,SAAgB,yBAAyB,CACrC,SAA4B,EAC5B,kBAA0B,EAC1B,KAAc;IAEd,IAAI,SAAS,CAAC,MAAM,KAAK,kBAAkB,EAAE,CAAC;QAC1C,MAAM,IAAI,uBAAuB,CAAC,kBAAkB,EAAE,SAAS,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC;IACnF,CAAC;AACL,CAAC;AAED,SAAgB,wBAAwB,CACpC,UAA8B,EAC9B,YAAoB;IAEpB,IAAI,UAAU,CAAC,MAAM,KAAK,YAAY,EAAE,CAAC;QACrC,MAAM,IAAI,uBAAuB,CAAC,YAAY,EAAE,UAAU,CAAC,MAAM,CAAC,CAAC;IACvE,CAAC;AACL,CAAC;AAED,SAAgB,cAAc,CAAC,SAA4B;IACvD,OAAO,CAAC,GAAG,SAAS,CAAC,CAAC;AAC1B,CAAC;AAED,SAAgB,kBAAkB,CAAC,SAA4B;IAC3D,MAAM,SAAS,GAAG,IAAI,CAAC,IAAI,CACvB,SAAS,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,GAAG,KAAK,EAAE,CAAC,CAAC,CAC3D,CAAC;IAEF,IAAI,SAAS,KAAK,CAAC,EAAE,CAAC;QAClB,OAAO,CAAC,GAAG,SAAS,CAAC,CAAC;IAC1B,CAAC;IAED,OAAO,SAAS,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,GAAG,SAAS,CAAC,CAAC;AACvD,CAAC;AAED,SAAgB,8BAA8B,CAAC,UAAkB;IAC7D,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC,UAAU,CAAC,IAAI,UAAU,IAAI,CAAC,EAAE,CAAC;QACnD,MAAM,IAAI,UAAU,CAAC,kDAAkD,CAAC,CAAC;IAC7E,CAAC;IAED,OAAO,UAAU,CAAC;AACtB,CAAC"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import type { Embedder } from './embedder';
|
|
2
|
+
export interface EmbeddingCacheKeyInput {
|
|
3
|
+
text: string;
|
|
4
|
+
model: string;
|
|
5
|
+
dimensions: number;
|
|
6
|
+
namespace?: string;
|
|
7
|
+
}
|
|
8
|
+
export interface EmbeddingCacheStore {
|
|
9
|
+
get(key: string): Promise<number[] | undefined> | number[] | undefined;
|
|
10
|
+
set(key: string, embedding: readonly number[]): Promise<void> | void;
|
|
11
|
+
}
|
|
12
|
+
export interface EmbeddingCacheOptions {
|
|
13
|
+
embedder: Embedder;
|
|
14
|
+
store?: EmbeddingCacheStore;
|
|
15
|
+
namespace?: string;
|
|
16
|
+
}
|
|
17
|
+
export declare class InMemoryEmbeddingCacheStore implements EmbeddingCacheStore {
|
|
18
|
+
private readonly embeddings;
|
|
19
|
+
get size(): number;
|
|
20
|
+
get(key: string): Promise<number[] | undefined>;
|
|
21
|
+
set(key: string, embedding: readonly number[]): Promise<void>;
|
|
22
|
+
clear(): void;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* EmbeddingCache 用 text hash + model + dimensions 做缓存键。
|
|
26
|
+
* 维度不匹配会直接抛错,不能补零或截断,否则后续向量索引会被污染。
|
|
27
|
+
*/
|
|
28
|
+
export declare class EmbeddingCache implements Embedder {
|
|
29
|
+
readonly model: string;
|
|
30
|
+
readonly dimensions: number;
|
|
31
|
+
private readonly embedder;
|
|
32
|
+
private readonly store;
|
|
33
|
+
private readonly namespace;
|
|
34
|
+
constructor(options: EmbeddingCacheOptions);
|
|
35
|
+
embedDocuments(texts: string[]): Promise<number[][]>;
|
|
36
|
+
embedQuery(text: string): Promise<number[]>;
|
|
37
|
+
private createKey;
|
|
38
|
+
}
|
|
39
|
+
export declare function createEmbeddingCacheKey(input: EmbeddingCacheKeyInput): string;
|
|
40
|
+
//# sourceMappingURL=embedding-cache.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedding-cache.d.ts","sourceRoot":"","sources":["../../../src/rag/embeddings/embedding-cache.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAE3C,MAAM,WAAW,sBAAsB;IACnC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC;IACd,UAAU,EAAE,MAAM,CAAC;IACnB,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,MAAM,WAAW,mBAAmB;IAChC,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,SAAS,CAAC,GAAG,MAAM,EAAE,GAAG,SAAS,CAAC;IACvE,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAC;CACxE;AAED,MAAM,WAAW,qBAAqB;IAClC,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,CAAC,EAAE,mBAAmB,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;CACtB;AAED,qBAAa,2BAA4B,YAAW,mBAAmB;IACnE,OAAO,CAAC,QAAQ,CAAC,UAAU,CAA+B;IAE1D,IAAI,IAAI,IAAI,MAAM,CAEjB;IAEK,GAAG,CAAC,GAAG,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,GAAG,SAAS,CAAC;IAM/C,GAAG,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,SAAS,MAAM,EAAE,GAAG,OAAO,CAAC,IAAI,CAAC;IAInE,KAAK,IAAI,IAAI;CAGhB;AAED;;;GAGG;AACH,qBAAa,cAAe,YAAW,QAAQ;IAC3C,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAW;IACpC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAsB;IAC5C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAqB;gBAEnC,OAAO,EAAE,qBAAqB;IAQpC,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAkDpD,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAgBjD,OAAO,CAAC,SAAS;CAapB;AAED,wBAAgB,uBAAuB,CAAC,KAAK,EAAE,sBAAsB,GAAG,MAAM,CAU7E"}
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.EmbeddingCache = exports.InMemoryEmbeddingCacheStore = void 0;
|
|
4
|
+
exports.createEmbeddingCacheKey = createEmbeddingCacheKey;
|
|
5
|
+
const node_crypto_1 = require("node:crypto");
|
|
6
|
+
const embedder_1 = require("./embedder");
|
|
7
|
+
class InMemoryEmbeddingCacheStore {
|
|
8
|
+
embeddings = new Map();
|
|
9
|
+
get size() {
|
|
10
|
+
return this.embeddings.size;
|
|
11
|
+
}
|
|
12
|
+
async get(key) {
|
|
13
|
+
const embedding = this.embeddings.get(key);
|
|
14
|
+
return embedding === undefined ? undefined : (0, embedder_1.cloneEmbedding)(embedding);
|
|
15
|
+
}
|
|
16
|
+
async set(key, embedding) {
|
|
17
|
+
this.embeddings.set(key, (0, embedder_1.cloneEmbedding)(embedding));
|
|
18
|
+
}
|
|
19
|
+
clear() {
|
|
20
|
+
this.embeddings.clear();
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
exports.InMemoryEmbeddingCacheStore = InMemoryEmbeddingCacheStore;
|
|
24
|
+
/**
|
|
25
|
+
* EmbeddingCache 用 text hash + model + dimensions 做缓存键。
|
|
26
|
+
* 维度不匹配会直接抛错,不能补零或截断,否则后续向量索引会被污染。
|
|
27
|
+
*/
|
|
28
|
+
class EmbeddingCache {
|
|
29
|
+
model;
|
|
30
|
+
dimensions;
|
|
31
|
+
embedder;
|
|
32
|
+
store;
|
|
33
|
+
namespace;
|
|
34
|
+
constructor(options) {
|
|
35
|
+
this.embedder = options.embedder;
|
|
36
|
+
this.model = options.embedder.model;
|
|
37
|
+
this.dimensions = options.embedder.dimensions;
|
|
38
|
+
this.store = options.store ?? new InMemoryEmbeddingCacheStore();
|
|
39
|
+
this.namespace = options.namespace;
|
|
40
|
+
}
|
|
41
|
+
async embedDocuments(texts) {
|
|
42
|
+
const results = new Array(texts.length);
|
|
43
|
+
const misses = new Map();
|
|
44
|
+
for (const [index, text] of texts.entries()) {
|
|
45
|
+
const key = this.createKey(text);
|
|
46
|
+
const cached = await this.store.get(key);
|
|
47
|
+
if (cached !== undefined) {
|
|
48
|
+
(0, embedder_1.assertEmbeddingDimensions)(cached, this.dimensions, 'cached embedding');
|
|
49
|
+
results[index] = (0, embedder_1.cloneEmbedding)(cached);
|
|
50
|
+
continue;
|
|
51
|
+
}
|
|
52
|
+
const existing = misses.get(key);
|
|
53
|
+
if (existing === undefined) {
|
|
54
|
+
misses.set(key, { text, indexes: [index] });
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
existing.indexes.push(index);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (misses.size > 0) {
|
|
61
|
+
const missedEntries = [...misses.entries()];
|
|
62
|
+
const embeddings = await this.embedder.embedDocuments(missedEntries.map(([, miss]) => miss.text));
|
|
63
|
+
(0, embedder_1.assertEmbeddingBatchSize)(embeddings, missedEntries.length);
|
|
64
|
+
await Promise.all(missedEntries.map(async ([key, miss], offset) => {
|
|
65
|
+
const embedding = embeddings[offset];
|
|
66
|
+
if (embedding === undefined) {
|
|
67
|
+
return;
|
|
68
|
+
}
|
|
69
|
+
(0, embedder_1.assertEmbeddingDimensions)(embedding, this.dimensions, 'document embedding');
|
|
70
|
+
await this.store.set(key, embedding);
|
|
71
|
+
for (const index of miss.indexes) {
|
|
72
|
+
results[index] = (0, embedder_1.cloneEmbedding)(embedding);
|
|
73
|
+
}
|
|
74
|
+
}));
|
|
75
|
+
}
|
|
76
|
+
return results.map((embedding) => {
|
|
77
|
+
if (embedding === undefined) {
|
|
78
|
+
throw new Error('Embedding cache failed to resolve a document embedding.');
|
|
79
|
+
}
|
|
80
|
+
return (0, embedder_1.cloneEmbedding)(embedding);
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
async embedQuery(text) {
|
|
84
|
+
const key = this.createKey(text);
|
|
85
|
+
const cached = await this.store.get(key);
|
|
86
|
+
if (cached !== undefined) {
|
|
87
|
+
(0, embedder_1.assertEmbeddingDimensions)(cached, this.dimensions, 'cached embedding');
|
|
88
|
+
return (0, embedder_1.cloneEmbedding)(cached);
|
|
89
|
+
}
|
|
90
|
+
const embedding = await this.embedder.embedQuery(text);
|
|
91
|
+
(0, embedder_1.assertEmbeddingDimensions)(embedding, this.dimensions, 'query embedding');
|
|
92
|
+
await this.store.set(key, embedding);
|
|
93
|
+
return (0, embedder_1.cloneEmbedding)(embedding);
|
|
94
|
+
}
|
|
95
|
+
createKey(text) {
|
|
96
|
+
const input = {
|
|
97
|
+
text,
|
|
98
|
+
model: this.model,
|
|
99
|
+
dimensions: this.dimensions,
|
|
100
|
+
};
|
|
101
|
+
if (this.namespace !== undefined) {
|
|
102
|
+
input.namespace = this.namespace;
|
|
103
|
+
}
|
|
104
|
+
return createEmbeddingCacheKey(input);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
exports.EmbeddingCache = EmbeddingCache;
|
|
108
|
+
function createEmbeddingCacheKey(input) {
|
|
109
|
+
const textHash = (0, node_crypto_1.createHash)('sha256').update(input.text).digest('hex');
|
|
110
|
+
return [
|
|
111
|
+
'embedding',
|
|
112
|
+
input.namespace ?? 'default',
|
|
113
|
+
input.model,
|
|
114
|
+
String(input.dimensions),
|
|
115
|
+
textHash,
|
|
116
|
+
].join(':');
|
|
117
|
+
}
|
|
118
|
+
//# sourceMappingURL=embedding-cache.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"embedding-cache.js","sourceRoot":"","sources":["../../../src/rag/embeddings/embedding-cache.ts"],"names":[],"mappings":";;;AAoJA,0DAUC;AA9JD,6CAAyC;AACzC,yCAIoB;AAqBpB,MAAa,2BAA2B;IACnB,UAAU,GAAG,IAAI,GAAG,EAAoB,CAAC;IAE1D,IAAI,IAAI;QACJ,OAAO,IAAI,CAAC,UAAU,CAAC,IAAI,CAAC;IAChC,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,GAAW;QACjB,MAAM,SAAS,GAAG,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QAE3C,OAAO,SAAS,KAAK,SAAS,CAAC,CAAC,CAAC,SAAS,CAAC,CAAC,CAAC,IAAA,yBAAc,EAAC,SAAS,CAAC,CAAC;IAC3E,CAAC;IAED,KAAK,CAAC,GAAG,CAAC,GAAW,EAAE,SAA4B;QAC/C,IAAI,CAAC,UAAU,CAAC,GAAG,CAAC,GAAG,EAAE,IAAA,yBAAc,EAAC,SAAS,CAAC,CAAC,CAAC;IACxD,CAAC;IAED,KAAK;QACD,IAAI,CAAC,UAAU,CAAC,KAAK,EAAE,CAAC;IAC5B,CAAC;CACJ;AApBD,kEAoBC;AAED;;;GAGG;AACH,MAAa,cAAc;IACd,KAAK,CAAS;IACd,UAAU,CAAS;IACX,QAAQ,CAAW;IACnB,KAAK,CAAsB;IAC3B,SAAS,CAAqB;IAE/C,YAAY,OAA8B;QACtC,IAAI,CAAC,QAAQ,GAAG,OAAO,CAAC,QAAQ,CAAC;QACjC,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC;QACpC,IAAI,CAAC,UAAU,GAAG,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC;QAC9C,IAAI,CAAC,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,2BAA2B,EAAE,CAAC;QAChE,IAAI,CAAC,SAAS,GAAG,OAAO,CAAC,SAAS,CAAC;IACvC,CAAC;IAED,KAAK,CAAC,cAAc,CAAC,KAAe;QAChC,MAAM,OAAO,GAAG,IAAI,KAAK,CAAuB,KAAK,CAAC,MAAM,CAAC,CAAC;QAC9D,MAAM,MAAM,GAAG,IAAI,GAAG,EAA+C,CAAC;QAEtE,KAAK,MAAM,CAAC,KAAK,EAAE,IAAI,CAAC,IAAI,KAAK,CAAC,OAAO,EAAE,EAAE,CAAC;YAC1C,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;YACjC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACzC,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;gBACvB,IAAA,oCAAyB,EAAC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,kBAAkB,CAAC,CAAC;gBACvE,OAAO,CAAC,KAAK,CAAC,GAAG,IAAA,yBAAc,EAAC,MAAM,CAAC,CAAC;gBACxC,SAAS;YACb,CAAC;YAED,MAAM,QAAQ,GAAG,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;YACjC,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;gBACzB,MAAM,CAAC,GAAG,CAAC,GAAG,EAAE,EAAE,IAAI,EAAE,OAAO,EAAE,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC;YAChD,CAAC;iBAAM,CAAC;gBACJ,QAAQ,CAAC,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;YACjC,CAAC;QACL,CAAC;QAED,IAAI,MAAM,CAAC,IAAI,GAAG,CAAC,EAAE,CAAC;YAClB,MAAM,aAAa,GAAG,CAAC,GAAG,MAAM,CAAC,OAAO,EAAE,CAAC,CAAC;YAC5C,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,cAAc,CACjD,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,IAAI,CAAC,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,CAC7C,CAAC;YACF,IAAA,mCAAwB,EAAC,UAAU,EAAE,aAAa,CAAC,MAAM,CAAC,CAAC;YAE3D,MAAM,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,GAAG,CAAC,KAAK,EAAE,CAAC,GAAG,EAAE,IAAI,CAAC,EAAE,MAAM,EAAE,EAAE;gBAC9D,MAAM,SAAS,GAAG,UAAU,CAAC,MAAM,CAAC,CAAC;gBACrC,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;oBAC1B,OAAO;gBACX,CAAC;gBACD,IAAA,oCAAyB,EAAC,SAAS,EAAE,IAAI,CAAC,UAAU,EAAE,oBAAoB,CAAC,CAAC;gBAC5E,MAAM,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;gBACrC,KAAK,MAAM,KAAK,IAAI,IAAI,CAAC,OAAO,EAAE,CAAC;oBAC/B,OAAO,CAAC,KAAK,CAAC,GAAG,IAAA,yBAAc,EAAC,SAAS,CAAC,CAAC;gBAC/C,CAAC;YACL,CAAC,CAAC,CAAC,CAAC;QACR,CAAC;QAED,OAAO,OAAO,CAAC,GAAG,CAAC,CAAC,SAAS,EAAE,EAAE;YAC7B,IAAI,SAAS,KAAK,SAAS,EAAE,CAAC;gBAC1B,MAAM,IAAI,KAAK,CAAC,yDAAyD,CAAC,CAAC;YAC/E,CAAC;YAED,OAAO,IAAA,yBAAc,EAAC,SAAS,CAAC,CAAC;QACrC,CAAC,CAAC,CAAC;IACP,CAAC;IAED,KAAK,CAAC,UAAU,CAAC,IAAY;QACzB,MAAM,GAAG,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC;QACjC,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC;QACzC,IAAI,MAAM,KAAK,SAAS,EAAE,CAAC;YACvB,IAAA,oCAAyB,EAAC,MAAM,EAAE,IAAI,CAAC,UAAU,EAAE,kBAAkB,CAAC,CAAC;YAEvE,OAAO,IAAA,yBAAc,EAAC,MAAM,CAAC,CAAC;QAClC,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,IAAI,CAAC,CAAC;QACvD,IAAA,oCAAyB,EAAC,SAAS,EAAE,IAAI,CAAC,UAAU,EAAE,iBAAiB,CAAC,CAAC;QACzE,MAAM,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,GAAG,EAAE,SAAS,CAAC,CAAC;QAErC,OAAO,IAAA,yBAAc,EAAC,SAAS,CAAC,CAAC;IACrC,CAAC;IAEO,SAAS,CAAC,IAAY;QAC1B,MAAM,KAAK,GAA2B;YAClC,IAAI;YACJ,KAAK,EAAE,IAAI,CAAC,KAAK;YACjB,UAAU,EAAE,IAAI,CAAC,UAAU;SAC9B,CAAC;QAEF,IAAI,IAAI,CAAC,SAAS,KAAK,SAAS,EAAE,CAAC;YAC/B,KAAK,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QACrC,CAAC;QAED,OAAO,uBAAuB,CAAC,KAAK,CAAC,CAAC;IAC1C,CAAC;CACJ;AA9FD,wCA8FC;AAED,SAAgB,uBAAuB,CAAC,KAA6B;IACjE,MAAM,QAAQ,GAAG,IAAA,wBAAU,EAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;IAEvE,OAAO;QACH,WAAW;QACX,KAAK,CAAC,SAAS,IAAI,SAAS;QAC5B,KAAK,CAAC,KAAK;QACX,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC;QACxB,QAAQ;KACX,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import type { Embedder } from './embedder';
|
|
2
|
+
export interface FakeEmbedderOptions {
|
|
3
|
+
model?: string;
|
|
4
|
+
dimensions?: number;
|
|
5
|
+
normalize?: boolean;
|
|
6
|
+
}
|
|
7
|
+
/**
|
|
8
|
+
* FakeEmbedder 是测试用 embedding 实现,不调用外部服务。
|
|
9
|
+
* 它用 token 哈希填充固定维度向量,保证相同文本稳定得到相同向量。
|
|
10
|
+
*/
|
|
11
|
+
export declare class FakeEmbedder implements Embedder {
|
|
12
|
+
readonly model: string;
|
|
13
|
+
readonly dimensions: number;
|
|
14
|
+
private readonly normalize;
|
|
15
|
+
constructor(options?: FakeEmbedderOptions);
|
|
16
|
+
embedDocuments(texts: string[]): Promise<number[][]>;
|
|
17
|
+
embedQuery(text: string): Promise<number[]>;
|
|
18
|
+
private embed;
|
|
19
|
+
}
|
|
20
|
+
//# sourceMappingURL=fake-embedder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fake-embedder.d.ts","sourceRoot":"","sources":["../../../src/rag/embeddings/fake-embedder.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,YAAY,CAAC;AAE3C,MAAM,WAAW,mBAAmB;IAChC,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,SAAS,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;;GAGG;AACH,qBAAa,YAAa,YAAW,QAAQ;IACzC,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAU;gBAExB,OAAO,GAAE,mBAAwB;IAMvC,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC;IAIpD,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,EAAE,CAAC;IAIjD,OAAO,CAAC,KAAK;CAkBhB"}
|