@fllf/agent-sdk 0.1.2 → 0.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +169 -0
- package/README.md +824 -198
- package/dist/agent/Agent.d.ts +16 -5
- package/dist/agent/Agent.d.ts.map +1 -1
- package/dist/agent/Agent.js +283 -2
- package/dist/agent/Agent.js.map +1 -1
- package/dist/agent/index.d.ts +1 -1
- package/dist/agent/index.d.ts.map +1 -1
- package/dist/agent/types.d.ts +27 -3
- package/dist/agent/types.d.ts.map +1 -1
- package/dist/config/config.d.ts +22 -3
- package/dist/config/config.d.ts.map +1 -1
- package/dist/config/config.js +194 -47
- package/dist/config/config.js.map +1 -1
- package/dist/config/index.d.ts +1 -1
- package/dist/config/index.d.ts.map +1 -1
- package/dist/errors.d.ts +74 -0
- package/dist/errors.d.ts.map +1 -0
- package/dist/errors.js +186 -0
- package/dist/errors.js.map +1 -0
- package/dist/executors/base-executor.d.ts +14 -0
- package/dist/executors/base-executor.d.ts.map +1 -0
- package/dist/executors/base-executor.js +31 -0
- package/dist/executors/base-executor.js.map +1 -0
- package/dist/executors/base.d.ts +36 -5
- package/dist/executors/base.d.ts.map +1 -1
- package/dist/executors/chat-request-builder.d.ts +10 -0
- package/dist/executors/chat-request-builder.d.ts.map +1 -0
- package/dist/executors/chat-request-builder.js +96 -0
- package/dist/executors/chat-request-builder.js.map +1 -0
- package/dist/executors/index.d.ts +4 -1
- package/dist/executors/index.d.ts.map +1 -1
- package/dist/executors/index.js +6 -1
- package/dist/executors/index.js.map +1 -1
- package/dist/executors/rag-executor.js +1 -1
- package/dist/executors/rag-executor.js.map +1 -1
- package/dist/executors/simple-chat-executor.d.ts +4 -2
- package/dist/executors/simple-chat-executor.d.ts.map +1 -1
- package/dist/executors/simple-chat-executor.js +59 -57
- package/dist/executors/simple-chat-executor.js.map +1 -1
- package/dist/executors/tool-calling-executor.d.ts +20 -2
- package/dist/executors/tool-calling-executor.d.ts.map +1 -1
- package/dist/executors/tool-calling-executor.js +189 -91
- package/dist/executors/tool-calling-executor.js.map +1 -1
- package/dist/history/base.d.ts +55 -2
- package/dist/history/base.d.ts.map +1 -1
- package/dist/history/base.js +49 -0
- package/dist/history/base.js.map +1 -1
- package/dist/history/compression.d.ts +49 -0
- package/dist/history/compression.d.ts.map +1 -0
- package/dist/history/compression.js +53 -0
- package/dist/history/compression.js.map +1 -0
- package/dist/history/context-window.d.ts +33 -0
- package/dist/history/context-window.d.ts.map +1 -0
- package/dist/history/context-window.js +68 -0
- package/dist/history/context-window.js.map +1 -0
- package/dist/history/in-memory.d.ts +6 -4
- package/dist/history/in-memory.d.ts.map +1 -1
- package/dist/history/in-memory.js +25 -39
- package/dist/history/in-memory.js.map +1 -1
- package/dist/history/index.d.ts +9 -2
- package/dist/history/index.d.ts.map +1 -1
- package/dist/history/index.js +18 -1
- package/dist/history/index.js.map +1 -1
- package/dist/history/postgres/index.d.ts +4 -0
- package/dist/history/postgres/index.d.ts.map +1 -0
- package/dist/history/postgres/index.js +20 -0
- package/dist/history/postgres/index.js.map +1 -0
- package/dist/history/postgres/postgres-message-history-store.d.ts +35 -0
- package/dist/history/postgres/postgres-message-history-store.d.ts.map +1 -0
- package/dist/history/postgres/postgres-message-history-store.js +195 -0
- package/dist/history/postgres/postgres-message-history-store.js.map +1 -0
- package/dist/history/postgres/schema.d.ts +18 -0
- package/dist/history/postgres/schema.d.ts.map +1 -0
- package/dist/history/postgres/schema.js +46 -0
- package/dist/history/postgres/schema.js.map +1 -0
- package/dist/history/postgres/sql.d.ts +29 -0
- package/dist/history/postgres/sql.d.ts.map +1 -0
- package/dist/history/postgres/sql.js +102 -0
- package/dist/history/postgres/sql.js.map +1 -0
- package/dist/history/postgres/types.d.ts +20 -0
- package/dist/history/postgres/types.d.ts.map +1 -0
- package/dist/history/postgres/types.js +3 -0
- package/dist/history/postgres/types.js.map +1 -0
- package/dist/history/tool-pairing.d.ts +11 -0
- package/dist/history/tool-pairing.d.ts.map +1 -0
- package/dist/history/tool-pairing.js +52 -0
- package/dist/history/tool-pairing.js.map +1 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +2 -0
- package/dist/index.js.map +1 -1
- package/dist/llm/LLM.d.ts +1 -0
- package/dist/llm/LLM.d.ts.map +1 -1
- package/dist/llm/LLM.js +136 -26
- package/dist/llm/LLM.js.map +1 -1
- package/dist/llm/errors.d.ts +11 -1
- package/dist/llm/errors.d.ts.map +1 -1
- package/dist/llm/errors.js +50 -1
- package/dist/llm/errors.js.map +1 -1
- package/dist/llm/factory.d.ts +14 -2
- package/dist/llm/factory.d.ts.map +1 -1
- package/dist/llm/factory.js +48 -9
- package/dist/llm/factory.js.map +1 -1
- package/dist/llm/index.d.ts +8 -3
- package/dist/llm/index.d.ts.map +1 -1
- package/dist/llm/index.js +10 -1
- package/dist/llm/index.js.map +1 -1
- package/dist/llm/providers/anthropic.d.ts +14 -0
- package/dist/llm/providers/anthropic.d.ts.map +1 -0
- package/dist/llm/providers/anthropic.js +336 -0
- package/dist/llm/providers/anthropic.js.map +1 -0
- package/dist/llm/providers/openai-compatible.d.ts +7 -0
- package/dist/llm/providers/openai-compatible.d.ts.map +1 -1
- package/dist/llm/providers/openai-compatible.js +151 -47
- package/dist/llm/providers/openai-compatible.js.map +1 -1
- package/dist/llm/stream-accumulator.d.ts +17 -0
- package/dist/llm/stream-accumulator.d.ts.map +1 -0
- package/dist/llm/stream-accumulator.js +83 -0
- package/dist/llm/stream-accumulator.js.map +1 -0
- package/dist/llm/tool-arguments.d.ts +6 -0
- package/dist/llm/tool-arguments.d.ts.map +1 -0
- package/dist/llm/tool-arguments.js +20 -0
- package/dist/llm/tool-arguments.js.map +1 -0
- package/dist/llm/types.d.ts +76 -4
- package/dist/llm/types.d.ts.map +1 -1
- package/dist/messages/content.d.ts +36 -0
- package/dist/messages/content.d.ts.map +1 -0
- package/dist/messages/content.js +70 -0
- package/dist/messages/content.js.map +1 -0
- package/dist/messages/index.d.ts +4 -2
- package/dist/messages/index.d.ts.map +1 -1
- package/dist/messages/index.js +10 -1
- package/dist/messages/index.js.map +1 -1
- package/dist/messages/message.d.ts +11 -9
- package/dist/messages/message.d.ts.map +1 -1
- package/dist/messages/message.js +69 -4
- package/dist/messages/message.js.map +1 -1
- package/dist/messages/types.d.ts +12 -0
- package/dist/messages/types.d.ts.map +1 -1
- package/dist/observability/collecting-observer.d.ts +8 -0
- package/dist/observability/collecting-observer.d.ts.map +1 -0
- package/dist/observability/collecting-observer.js +15 -0
- package/dist/observability/collecting-observer.js.map +1 -0
- package/dist/observability/index.d.ts +1 -0
- package/dist/observability/index.d.ts.map +1 -1
- package/dist/observability/index.js +3 -1
- package/dist/observability/index.js.map +1 -1
- package/dist/observability/observer.d.ts.map +1 -1
- package/dist/observability/observer.js +14 -1
- package/dist/observability/observer.js.map +1 -1
- package/dist/observability/types.d.ts +16 -1
- package/dist/observability/types.d.ts.map +1 -1
- package/dist/rag/chunking/auto-chunker.d.ts +7 -0
- package/dist/rag/chunking/auto-chunker.d.ts.map +1 -1
- package/dist/rag/chunking/auto-chunker.js +13 -0
- package/dist/rag/chunking/auto-chunker.js.map +1 -1
- package/dist/rag/chunking/chunker.d.ts.map +1 -1
- package/dist/rag/chunking/chunker.js +29 -5
- package/dist/rag/chunking/chunker.js.map +1 -1
- package/dist/rag/chunking/index.d.ts +2 -0
- package/dist/rag/chunking/index.d.ts.map +1 -1
- package/dist/rag/chunking/index.js +2 -0
- package/dist/rag/chunking/index.js.map +1 -1
- package/dist/rag/chunking/markdown-chunker.d.ts.map +1 -1
- package/dist/rag/chunking/markdown-chunker.js.map +1 -1
- package/dist/rag/chunking/qa-pair-chunker.d.ts +23 -0
- package/dist/rag/chunking/qa-pair-chunker.d.ts.map +1 -0
- package/dist/rag/chunking/qa-pair-chunker.js +162 -0
- package/dist/rag/chunking/qa-pair-chunker.js.map +1 -0
- package/dist/rag/chunking/semantic-chunker.d.ts +19 -0
- package/dist/rag/chunking/semantic-chunker.d.ts.map +1 -0
- package/dist/rag/chunking/semantic-chunker.js +291 -0
- package/dist/rag/chunking/semantic-chunker.js.map +1 -0
- package/dist/rag/embeddings/embedder.d.ts.map +1 -1
- package/dist/rag/embeddings/embedder.js +6 -0
- package/dist/rag/embeddings/embedder.js.map +1 -1
- package/dist/rag/generation/context-builder.d.ts +7 -0
- package/dist/rag/generation/context-builder.d.ts.map +1 -1
- package/dist/rag/generation/context-builder.js +4 -1
- package/dist/rag/generation/context-builder.js.map +1 -1
- package/dist/rag/ingestion/metadata.d.ts +6 -1
- package/dist/rag/ingestion/metadata.d.ts.map +1 -1
- package/dist/rag/ingestion/metadata.js +6 -2
- package/dist/rag/ingestion/metadata.js.map +1 -1
- package/dist/rag/pipeline.d.ts.map +1 -1
- package/dist/rag/pipeline.js +34 -11
- package/dist/rag/pipeline.js.map +1 -1
- package/dist/rag/retrieval/index.d.ts +1 -0
- package/dist/rag/retrieval/index.d.ts.map +1 -1
- package/dist/rag/retrieval/index.js +1 -0
- package/dist/rag/retrieval/index.js.map +1 -1
- package/dist/rag/retrieval/parent-child-expanding-retriever.d.ts +31 -0
- package/dist/rag/retrieval/parent-child-expanding-retriever.d.ts.map +1 -0
- package/dist/rag/retrieval/parent-child-expanding-retriever.js +194 -0
- package/dist/rag/retrieval/parent-child-expanding-retriever.js.map +1 -0
- package/dist/rag/stores/in-memory-keyword-store.d.ts.map +1 -1
- package/dist/rag/stores/in-memory-keyword-store.js +3 -8
- package/dist/rag/stores/in-memory-keyword-store.js.map +1 -1
- package/dist/rag/stores/keyword-tokenizer.d.ts +4 -0
- package/dist/rag/stores/keyword-tokenizer.d.ts.map +1 -0
- package/dist/rag/stores/keyword-tokenizer.js +113 -0
- package/dist/rag/stores/keyword-tokenizer.js.map +1 -0
- package/dist/rag/stores/postgres/pg-vector-store.d.ts.map +1 -1
- package/dist/rag/stores/postgres/pg-vector-store.js +21 -8
- package/dist/rag/stores/postgres/pg-vector-store.js.map +1 -1
- package/dist/rag/stores/postgres/postgres-document-store.d.ts.map +1 -1
- package/dist/rag/stores/postgres/postgres-document-store.js +26 -13
- package/dist/rag/stores/postgres/postgres-document-store.js.map +1 -1
- package/dist/rag/stores/postgres/postgres-keyword-store.d.ts.map +1 -1
- package/dist/rag/stores/postgres/postgres-keyword-store.js +43 -43
- package/dist/rag/stores/postgres/postgres-keyword-store.js.map +1 -1
- package/dist/rag/stores/postgres/sql.d.ts +22 -0
- package/dist/rag/stores/postgres/sql.d.ts.map +1 -1
- package/dist/rag/stores/postgres/sql.js +42 -0
- package/dist/rag/stores/postgres/sql.js.map +1 -1
- package/dist/rag/stores/types.d.ts +5 -0
- package/dist/rag/stores/types.d.ts.map +1 -1
- package/dist/rag/types.d.ts +6 -0
- package/dist/rag/types.d.ts.map +1 -1
- package/dist/testing/agent.d.ts +11 -0
- package/dist/testing/agent.d.ts.map +1 -0
- package/dist/testing/agent.js +45 -0
- package/dist/testing/agent.js.map +1 -0
- package/dist/testing/history-contract.d.ts +12 -0
- package/dist/testing/history-contract.d.ts.map +1 -0
- package/dist/testing/history-contract.js +111 -0
- package/dist/testing/history-contract.js.map +1 -0
- package/dist/testing/index.d.ts +11 -0
- package/dist/testing/index.d.ts.map +1 -0
- package/dist/testing/index.js +16 -0
- package/dist/testing/index.js.map +1 -0
- package/dist/testing/models.d.ts +34 -0
- package/dist/testing/models.d.ts.map +1 -0
- package/dist/testing/models.js +74 -0
- package/dist/testing/models.js.map +1 -0
- package/dist/testing/tool-schema.d.ts +6 -0
- package/dist/testing/tool-schema.d.ts.map +1 -0
- package/dist/testing/tool-schema.js +29 -0
- package/dist/testing/tool-schema.js.map +1 -0
- package/dist/testing/tools.d.ts +21 -0
- package/dist/testing/tools.d.ts.map +1 -0
- package/dist/testing/tools.js +43 -0
- package/dist/testing/tools.js.map +1 -0
- package/dist/tools/base.d.ts +38 -7
- package/dist/tools/base.d.ts.map +1 -1
- package/dist/tools/base.js +238 -11
- package/dist/tools/base.js.map +1 -1
- package/dist/tools/builtin/advancedSearchTool.d.ts.map +1 -1
- package/dist/tools/builtin/advancedSearchTool.js +30 -4
- package/dist/tools/builtin/advancedSearchTool.js.map +1 -1
- package/dist/tools/builtin/ragSearchTool.d.ts +6 -3
- package/dist/tools/builtin/ragSearchTool.d.ts.map +1 -1
- package/dist/tools/builtin/ragSearchTool.js +8 -6
- package/dist/tools/builtin/ragSearchTool.js.map +1 -1
- package/dist/tools/executor.d.ts +30 -2
- package/dist/tools/executor.d.ts.map +1 -1
- package/dist/tools/executor.js +83 -15
- package/dist/tools/executor.js.map +1 -1
- package/dist/tools/index.d.ts +3 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/registry.d.ts +15 -3
- package/dist/tools/registry.d.ts.map +1 -1
- package/dist/tools/registry.js +21 -2
- package/dist/tools/registry.js.map +1 -1
- package/package.json +58 -4
|
@@ -14,11 +14,21 @@ const DEFAULT_SEPARATORS = [
|
|
|
14
14
|
'\n',
|
|
15
15
|
'。 ',
|
|
16
16
|
'。', // 中文文档常见句末符号,不能只依赖空格。
|
|
17
|
+
'!',
|
|
18
|
+
'?',
|
|
19
|
+
';',
|
|
20
|
+
',',
|
|
21
|
+
'、',
|
|
17
22
|
'. ',
|
|
18
23
|
'! ',
|
|
19
24
|
'? ',
|
|
20
25
|
'; ',
|
|
21
26
|
', ',
|
|
27
|
+
'.',
|
|
28
|
+
'!',
|
|
29
|
+
'?',
|
|
30
|
+
';',
|
|
31
|
+
',',
|
|
22
32
|
' ',
|
|
23
33
|
];
|
|
24
34
|
async function chunkManyDocuments(chunker, documents) {
|
|
@@ -104,7 +114,7 @@ function createChunkId(input) {
|
|
|
104
114
|
function findBestBreak(text, start, hardEnd, minChunkLength) {
|
|
105
115
|
const minEnd = Math.min(start + minChunkLength, hardEnd);
|
|
106
116
|
for (const separator of DEFAULT_SEPARATORS) {
|
|
107
|
-
const index = text.lastIndexOf(separator, hardEnd);
|
|
117
|
+
const index = text.lastIndexOf(separator, hardEnd - separator.length);
|
|
108
118
|
if (index >= minEnd) {
|
|
109
119
|
return index + separator.length;
|
|
110
120
|
}
|
|
@@ -133,11 +143,25 @@ function trimRange(text, start, end) {
|
|
|
133
143
|
};
|
|
134
144
|
}
|
|
135
145
|
function findForwardBoundary(text, preferredStart, end) {
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
index
|
|
146
|
+
const searchEnd = Math.min(end, preferredStart + getBoundarySearchWindow(end - preferredStart));
|
|
147
|
+
for (let index = preferredStart; index < searchEnd; index += 1) {
|
|
148
|
+
if (isNaturalStartBoundary(text[index - 1])) {
|
|
149
|
+
return index;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
return preferredStart;
|
|
153
|
+
}
|
|
154
|
+
function getBoundarySearchWindow(overlapLength) {
|
|
155
|
+
return Math.max(16, Math.ceil(overlapLength / 2));
|
|
156
|
+
}
|
|
157
|
+
function isNaturalStartBoundary(previousCharacter) {
|
|
158
|
+
if (previousCharacter === undefined) {
|
|
159
|
+
return true;
|
|
160
|
+
}
|
|
161
|
+
if (/\s/.test(previousCharacter)) {
|
|
162
|
+
return true;
|
|
139
163
|
}
|
|
140
|
-
return
|
|
164
|
+
return /[。!?;,、.!?;,]/u.test(previousCharacter);
|
|
141
165
|
}
|
|
142
166
|
function toPositiveInteger(value, fallback) {
|
|
143
167
|
if (value === undefined || !Number.isFinite(value) || value <= 0) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../../src/rag/chunking/chunker.ts"],"names":[],"mappings":";;;
|
|
1
|
+
{"version":3,"file":"chunker.js","sourceRoot":"","sources":["../../../src/rag/chunking/chunker.ts"],"names":[],"mappings":";;;AAoEA,gDAOC;AAED,8CA+CC;AAED,kCA2CC;AASD,sCASC;AA3LD,6CAAyC;AAyC5B,QAAA,wBAAwB,GAAG,IAAI,CAAC;AAChC,QAAA,wBAAwB,GAAG,GAAG,CAAC;AAC/B,QAAA,sBAAsB,GAAG,GAAG,CAAC;AAE1C,MAAM,kBAAkB,GAAG;IACvB,MAAM;IACN,IAAI;IACJ,IAAI;IACJ,GAAG,EAAE,sBAAsB;IAC3B,GAAG;IACH,GAAG;IACH,GAAG;IACH,GAAG;IACH,GAAG;IACH,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,IAAI;IACJ,GAAG;IACH,GAAG;IACH,GAAG;IACH,GAAG;IACH,GAAG;IACH,GAAG;CACG,CAAC;AAEJ,KAAK,UAAU,kBAAkB,CACpC,OAAgB,EAChB,SAAwB;IAExB,MAAM,MAAM,GAAG,MAAM,OAAO,CAAC,GAAG,CAAC,SAAS,CAAC,GAAG,CAAC,CAAC,QAAQ,EAAE,EAAE,CAAC,OAAO,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC;IAEvF,OAAO,MAAM,CAAC,IAAI,EAAE,CAAC;AACzB,CAAC;AAED,SAAgB,iBAAiB,CAC7B,IAAY,EACZ,UAA4B,EAAE;IAE9B,MAAM,cAAc,GAAG,iBAAiB,CACpC,OAAO,CAAC,cAAc,EACtB,gCAAwB,CAC3B,CAAC;IACF,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAC3B,iBAAiB,CAAC,OAAO,CAAC,cAAc,EAAE,gCAAwB,CAAC,EACnE,cAAc,CACjB,CAAC;IACF,MAAM,aAAa,GAAG,IAAI,CAAC,GAAG,CAC1B,IAAI,CAAC,GAAG,CAAC,iBAAiB,CAAC,OAAO,CAAC,aAAa,EAAE,8BAAsB,CAAC,EAAE,CAAC,CAAC,EAC7E,IAAI,CAAC,GAAG,CAAC,cAAc,GAAG,CAAC,EAAE,CAAC,CAAC,CAClC,CAAC;IAEF,MAAM,QAAQ,GAAkB,EAAE,CAAC;IACnC,IAAI,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,CAAC,CAAC,CAAC;IAEpC,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;QACzB,MAAM,OAAO,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,cAAc,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9D,MAAM,GAAG,GAAG,OAAO,GAAG,IAAI,CAAC,MAAM;YAC7B,CAAC,CAAC,aAAa,CAAC,IAAI,EAAE,KAAK,EAAE,OAAO,EAAE,cAAc,CAAC;YACrD,CAAC,CAAC,OAAO,CAAC;QACd,MAAM,OAAO,GAAG,SAAS,CAAC,IAAI,EAAE,KAAK,EAAE,GAAG,CAAC,CAAC;QAE5C,IAAI,OAAO,CAAC,KAAK,GAAG,OAAO,CAAC,GAAG,EAAE,CAAC;YAC9B,QAAQ,CAAC,IAAI,CAAC;gBACV,OAAO,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,EAAE,OAAO,CAAC,GAAG,CAAC;gBAC/C,WAAW,EAAE,OAAO,CAAC,KAAK;gBAC1B,SAAS,EAAE,OAAO,CAAC,GAAG;aACzB,CAAC,CAAC;QACP,CAAC;QAED,IAAI,GAAG,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;YACrB,MAAM;QACV,CAAC;QAED,MAAM,cAAc,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,aAAa,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC;QAChE,MAAM,SAAS,GAAG,aAAa,GAAG,CAAC;YAC/B,CAAC,CAAC,mBAAmB,CAAC,IAAI,EAAE,cAAc,EAAE,GAAG,CAAC;YAChD,CAAC,CAAC,GAAG,CAAC;QACV,KAAK,GAAG,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,GAAG,CAAC,SAAS,EAAE,KAAK,GAAG,CAAC,CAAC,CAAC,CAAC;IACjE,CAAC;IAED,OAAO,QAAQ,CAAC;AACpB,CAAC;AAED,SAAgB,WAAW,CAAC,KAAuB;IAC/C,MAAM,OAAO,GAAG,KAAK,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;IACrC,MAAM,QAAQ,GAAkB;QAC5B,MAAM,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,MAAM;QACtC,UAAU,EAAE,KAAK,CAAC,UAAU;KAC/B,CAAC;IAEF,eAAe,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC;IAClE,eAAe,CAAC,QAAQ,EAAE,UAAU,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC;IACxE,eAAe,CAAC,QAAQ,EAAE,iBAAiB,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAC;IACtF,eAAe,CAAC,QAAQ,EAAE,aAAa,EAAE,KAAK,CAAC,WAAW,CAAC,CAAC;IAC5D,eAAe,CAAC,QAAQ,EAAE,WAAW,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC;IAExD,IAAI,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,KAAK,SAAS,EAAE,CAAC;QAC5C,QAAQ,CAAC,GAAG,GAAG,CAAC,GAAG,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,GAAG,CAAC,CAAC;IACpD,CAAC;IACD,IAAI,KAAK,CAAC,WAAW,KAAK,SAAS,IAAI,KAAK,CAAC,WAAW,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAClE,QAAQ,CAAC,WAAW,GAAG,CAAC,GAAG,KAAK,CAAC,WAAW,CAAC,CAAC;IAClD,CAAC;IAED,MAAM,KAAK,GAAG,YAAY,CAAC,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,KAAK,CAAC,CAAC;IACvE,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACtB,QAAQ,CAAC,KAAK,GAAG,KAAK,CAAC;IAC3B,CAAC;IAED,MAAM,YAAY,GAAiB;QAC/B,UAAU,EAAE,KAAK,CAAC,QAAQ,CAAC,EAAE;QAC7B,UAAU,EAAE,KAAK,CAAC,UAAU;QAC5B,OAAO;KACV,CAAC;IACF,eAAe,CAAC,YAAY,EAAE,UAAU,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IAE1D,MAAM,KAAK,GAAU;QACjB,EAAE,EAAE,aAAa,CAAC,YAAY,CAAC;QAC/B,UAAU,EAAE,KAAK,CAAC,QAAQ,CAAC,EAAE;QAC7B,OAAO;QACP,IAAI,EAAE,KAAK,CAAC,IAAI;QAChB,QAAQ;KACX,CAAC;IAEF,eAAe,CAAC,KAAK,EAAE,UAAU,EAAE,KAAK,CAAC,QAAQ,CAAC,CAAC;IAEnD,OAAO,KAAK,CAAC;AACjB,CAAC;AASD,SAAgB,aAAa,CAAC,KAAmB;IAC7C,MAAM,SAAS,GAAG;QACd,KAAK,CAAC,UAAU;QAChB,MAAM,CAAC,KAAK,CAAC,UAAU,CAAC;QACxB,KAAK,CAAC,QAAQ,IAAI,EAAE;QACpB,KAAK,CAAC,OAAO;KAChB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAEb,OAAO,SAAS,IAAA,wBAAU,EAAC,QAAQ,CAAC,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,EAAE,CAAC;AACxF,CAAC;AAED,SAAS,aAAa,CAClB,IAAY,EACZ,KAAa,EACb,OAAe,EACf,cAAsB;IAEtB,MAAM,MAAM,GAAG,IAAI,CAAC,GAAG,CAAC,KAAK,GAAG,cAAc,EAAE,OAAO,CAAC,CAAC;IAEzD,KAAK,MAAM,SAAS,IAAI,kBAAkB,EAAE,CAAC;QACzC,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,SAAS,EAAE,OAAO,GAAG,SAAS,CAAC,MAAM,CAAC,CAAC;QACtE,IAAI,KAAK,IAAI,MAAM,EAAE,CAAC;YAClB,OAAO,KAAK,GAAG,SAAS,CAAC,MAAM,CAAC;QACpC,CAAC;IACL,CAAC;IAED,OAAO,OAAO,CAAC;AACnB,CAAC;AAED,SAAS,cAAc,CAAC,IAAY,EAAE,KAAa;IAC/C,IAAI,KAAK,GAAG,KAAK,CAAC;IAClB,OAAO,KAAK,GAAG,IAAI,CAAC,MAAM,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;QACzD,KAAK,IAAI,CAAC,CAAC;IACf,CAAC;IAED,OAAO,KAAK,CAAC;AACjB,CAAC;AAED,SAAS,SAAS,CAAC,IAAY,EAAE,KAAa,EAAE,GAAW;IACvD,IAAI,YAAY,GAAG,KAAK,CAAC;IACzB,IAAI,UAAU,GAAG,GAAG,CAAC;IAErB,OAAO,YAAY,GAAG,UAAU,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;QACtE,YAAY,IAAI,CAAC,CAAC;IACtB,CAAC;IACD,OAAO,UAAU,GAAG,YAAY,IAAI,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,UAAU,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC,EAAE,CAAC;QACxE,UAAU,IAAI,CAAC,CAAC;IACpB,CAAC;IAED,OAAO;QACH,KAAK,EAAE,YAAY;QACnB,GAAG,EAAE,UAAU;KAClB,CAAC;AACN,CAAC;AAED,SAAS,mBAAmB,CAAC,IAAY,EAAE,cAAsB,EAAE,GAAW;IAC1E,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,GAAG,EAAE,cAAc,GAAG,uBAAuB,CAAC,GAAG,GAAG,cAAc,CAAC,CAAC,CAAC;IAEhG,KAAK,IAAI,KAAK,GAAG,cAAc,EAAE,KAAK,GAAG,SAAS,EAAE,KAAK,IAAI,CAAC,EAAE,CAAC;QAC7D,IAAI,sBAAsB,CAAC,IAAI,CAAC,KAAK,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC;YAC1C,OAAO,KAAK,CAAC;QACjB,CAAC;IACL,CAAC;IAED,OAAO,cAAc,CAAC;AAC1B,CAAC;AAED,SAAS,uBAAuB,CAAC,aAAqB;IAClD,OAAO,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,IAAI,CAAC,IAAI,CAAC,aAAa,GAAG,CAAC,CAAC,CAAC,CAAC;AACtD,CAAC;AAED,SAAS,sBAAsB,CAAC,iBAAqC;IACjE,IAAI,iBAAiB,KAAK,SAAS,EAAE,CAAC;QAClC,OAAO,IAAI,CAAC;IAChB,CAAC;IACD,IAAI,IAAI,CAAC,IAAI,CAAC,iBAAiB,CAAC,EAAE,CAAC;QAC/B,OAAO,IAAI,CAAC;IAChB,CAAC;IAED,OAAO,gBAAgB,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;AACpD,CAAC;AAED,SAAS,iBAAiB,CAAC,KAAyB,EAAE,QAAgB;IAClE,IAAI,KAAK,KAAK,SAAS,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;QAC/D,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;AAC7B,CAAC;AAED,SAAS,YAAY,CACjB,IAAyC,EACzC,KAA0C;IAE1C,IAAI,IAAI,KAAK,SAAS,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QAC5C,OAAO,SAAS,CAAC;IACrB,CAAC;IAED,OAAO;QACH,GAAG,CAAC,IAAI,IAAI,EAAE,CAAC;QACf,GAAG,CAAC,KAAK,IAAI,EAAE,CAAC;KACnB,CAAC;AACN,CAAC;AAED,SAAS,eAAe,CACpB,MAAS,EACT,GAAM,EACN,KAAuB;IAEvB,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACtB,MAAM,CAAC,GAAG,CAAC,GAAG,KAAK,CAAC;IACxB,CAAC;AACL,CAAC"}
|
|
@@ -2,5 +2,7 @@ export * from './chunker';
|
|
|
2
2
|
export * from './auto-chunker';
|
|
3
3
|
export * from './markdown-chunker';
|
|
4
4
|
export * from './parent-child';
|
|
5
|
+
export * from './qa-pair-chunker';
|
|
5
6
|
export * from './recursive-chunker';
|
|
7
|
+
export * from './semantic-chunker';
|
|
6
8
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/rag/chunking/index.ts"],"names":[],"mappings":"AAAA,cAAc,WAAW,CAAC;AAC1B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,oBAAoB,CAAC;AACnC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,qBAAqB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../../src/rag/chunking/index.ts"],"names":[],"mappings":"AAAA,cAAc,WAAW,CAAC;AAC1B,cAAc,gBAAgB,CAAC;AAC/B,cAAc,oBAAoB,CAAC;AACnC,cAAc,gBAAgB,CAAC;AAC/B,cAAc,mBAAmB,CAAC;AAClC,cAAc,qBAAqB,CAAC;AACpC,cAAc,oBAAoB,CAAC"}
|
|
@@ -18,5 +18,7 @@ __exportStar(require("./chunker"), exports);
|
|
|
18
18
|
__exportStar(require("./auto-chunker"), exports);
|
|
19
19
|
__exportStar(require("./markdown-chunker"), exports);
|
|
20
20
|
__exportStar(require("./parent-child"), exports);
|
|
21
|
+
__exportStar(require("./qa-pair-chunker"), exports);
|
|
21
22
|
__exportStar(require("./recursive-chunker"), exports);
|
|
23
|
+
__exportStar(require("./semantic-chunker"), exports);
|
|
22
24
|
//# sourceMappingURL=index.js.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/rag/chunking/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,4CAA0B;AAC1B,iDAA+B;AAC/B,qDAAmC;AACnC,iDAA+B;AAC/B,sDAAoC"}
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/rag/chunking/index.ts"],"names":[],"mappings":";;;;;;;;;;;;;;;;AAAA,4CAA0B;AAC1B,iDAA+B;AAC/B,qDAAmC;AACnC,iDAA+B;AAC/B,oDAAkC;AAClC,sDAAoC;AACpC,qDAAmC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown-chunker.d.ts","sourceRoot":"","sources":["../../../src/rag/chunking/markdown-chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,KAAK,EAEL,WAAW,EACd,MAAM,UAAU,CAAC;AAMlB,OAAO,KAAK,EACR,OAAO,EACP,gBAAgB,EACnB,MAAM,WAAW,CAAC;
|
|
1
|
+
{"version":3,"file":"markdown-chunker.d.ts","sourceRoot":"","sources":["../../../src/rag/chunking/markdown-chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,KAAK,EAEL,WAAW,EACd,MAAM,UAAU,CAAC;AAMlB,OAAO,KAAK,EACR,OAAO,EACP,gBAAgB,EACnB,MAAM,WAAW,CAAC;AAInB,MAAM,WAAW,sBAAuB,SAAQ,gBAAgB;IAC5D,uBAAuB,CAAC,EAAE,OAAO,CAAC;CACrC;AAOD,qBAAa,eAAgB,YAAW,OAAO;IAC3C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAyB;gBAErC,OAAO,GAAE,sBAA2B;IAI1C,KAAK,CAAC,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAyC9C,SAAS,CAAC,SAAS,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAI3D,OAAO,CAAC,iBAAiB;CAY5B"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"markdown-chunker.js","sourceRoot":"","sources":["../../../src/rag/chunking/markdown-chunker.ts"],"names":[],"mappings":";;;AAKA,uCAImB;
|
|
1
|
+
{"version":3,"file":"markdown-chunker.js","sourceRoot":"","sources":["../../../src/rag/chunking/markdown-chunker.ts"],"names":[],"mappings":";;;AAKA,uCAImB;AAiBnB,MAAa,eAAe;IACP,OAAO,CAAyB;IAEjD,YAAY,UAAkC,EAAE;QAC5C,IAAI,CAAC,OAAO,GAAG,EAAE,GAAG,OAAO,EAAE,CAAC;IAClC,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,QAAqB;QAC7B,IAAI,QAAQ,CAAC,MAAM,KAAK,SAAS,IAAI,QAAQ,CAAC,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAChE,OAAO,IAAI,CAAC,iBAAiB,CAAC,QAAQ,CAAC,CAAC;QAC5C,CAAC;QAED,MAAM,MAAM,GAAY,EAAE,CAAC;QAC3B,MAAM,YAAY,GAAmB,EAAE,CAAC;QACxC,IAAI,UAAU,GAAG,CAAC,CAAC;QAEnB,KAAK,MAAM,KAAK,IAAI,QAAQ,CAAC,MAAM,EAAE,CAAC;YAClC,IAAI,KAAK,CAAC,IAAI,KAAK,SAAS,EAAE,CAAC;gBAC3B,kBAAkB,CAAC,YAAY,EAAE,KAAK,CAAC,CAAC;gBACxC,IAAI,IAAI,CAAC,OAAO,CAAC,uBAAuB,KAAK,IAAI,EAAE,CAAC;oBAChD,MAAM,CAAC,IAAI,CAAC,IAAA,qBAAW,EAAC;wBACpB,QAAQ;wBACR,OAAO,EAAE,KAAK,CAAC,IAAI;wBACnB,IAAI,EAAE,MAAM;wBACZ,UAAU;wBACV,WAAW,EAAE,YAAY,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC;wBACxD,KAAK,EAAE,EAAE,SAAS,EAAE,SAAS,EAAE,QAAQ,EAAE,UAAU,EAAE;qBACxD,CAAC,CAAC,CAAC;oBACJ,UAAU,IAAI,CAAC,CAAC;gBACpB,CAAC;gBACD,SAAS;YACb,CAAC;YAED,MAAM,WAAW,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,CAAC;YAChE,MAAM,WAAW,GAAG,iBAAiB,CAAC;gBAClC,QAAQ;gBACR,KAAK;gBACL,WAAW;gBACX,cAAc,EAAE,UAAU;gBAC1B,OAAO,EAAE,IAAI,CAAC,OAAO;aACxB,CAAC,CAAC;YACH,MAAM,CAAC,IAAI,CAAC,GAAG,WAAW,CAAC,CAAC;YAC5B,UAAU,IAAI,WAAW,CAAC,MAAM,CAAC;QACrC,CAAC;QAED,OAAO,MAAM,CAAC;IAClB,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,SAAwB;QACpC,OAAO,MAAM,IAAA,4BAAkB,EAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACrD,CAAC;IAEO,iBAAiB,CAAC,QAAqB;QAC3C,OAAO,IAAA,2BAAiB,EAAC,QAAQ,CAAC,OAAO,EAAE,IAAI,CAAC,OAAO,CAAC;aACnD,GAAG,CAAC,CAAC,OAAO,EAAE,KAAK,EAAE,EAAE,CAAC,IAAA,qBAAW,EAAC;YACjC,QAAQ;YACR,OAAO,EAAE,OAAO,CAAC,OAAO;YACxB,IAAI,EAAE,MAAM;YACZ,UAAU,EAAE,KAAK;YACjB,WAAW,EAAE,OAAO,CAAC,WAAW;YAChC,SAAS,EAAE,OAAO,CAAC,SAAS;YAC5B,KAAK,EAAE,EAAE,QAAQ,EAAE,mBAAmB,EAAE;SAC3C,CAAC,CAAC,CAAC;IACZ,CAAC;CACJ;AAhED,0CAgEC;AAUD,SAAS,iBAAiB,CAAC,KAA6B;IACpD,QAAQ,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,CAAC;QACvB,KAAK,WAAW;YACZ,OAAO,IAAA,2BAAiB,EAAC,KAAK,CAAC,KAAK,CAAC,IAAI,EAAE,KAAK,CAAC,OAAO,CAAC;iBACpD,GAAG,CAAC,CAAC,OAAO,EAAE,MAAM,EAAE,EAAE,CAAC,IAAA,qBAAW,EAAC;gBAClC,QAAQ,EAAE,KAAK,CAAC,QAAQ;gBACxB,OAAO,EAAE,OAAO,CAAC,OAAO;gBACxB,IAAI,EAAE,MAAM;gBACZ,UAAU,EAAE,KAAK,CAAC,cAAc,GAAG,MAAM;gBACzC,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,KAAK,EAAE,EAAE,SAAS,EAAE,WAAW,EAAE,QAAQ,EAAE,UAAU,EAAE;aAC1D,CAAC,CAAC,CAAC;QACZ,KAAK,OAAO;YACR,OAAO;gBACH,IAAA,qBAAW,EAAC;oBACR,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,OAAO,EAAE,KAAK,CAAC,KAAK,CAAC,QAAQ;oBAC7B,IAAI,EAAE,OAAO;oBACb,UAAU,EAAE,KAAK,CAAC,cAAc;oBAChC,WAAW,EAAE,KAAK,CAAC,WAAW;oBAC9B,KAAK,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE;iBACtD,CAAC;aACL,CAAC;QACN,KAAK,MAAM;YACP,OAAO;gBACH,IAAA,qBAAW,EAAC;oBACR,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,OAAO,EAAE,eAAe,CAAC,KAAK,CAAC,KAAK,CAAC;oBACrC,IAAI,EAAE,MAAM;oBACZ,UAAU,EAAE,KAAK,CAAC,cAAc;oBAChC,WAAW,EAAE,KAAK,CAAC,WAAW;oBAC9B,KAAK,EAAE,eAAe,CAAC,KAAK,CAAC,KAAK,CAAC;iBACtC,CAAC;aACL,CAAC;QACN,KAAK,OAAO,CAAC,CAAC,CAAC;YACX,MAAM,OAAO,GAAG,CAAC,KAAK,CAAC,KAAK,CAAC,GAAG,EAAE,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC;iBAC9C,MAAM,CAAC,CAAC,KAAK,EAAmB,EAAE,CAAC,KAAK,KAAK,SAAS,IAAI,KAAK,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,CAAC;iBAClF,IAAI,CAAC,IAAI,CAAC,CAAC;YAChB,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBACvB,OAAO,EAAE,CAAC;YACd,CAAC;YAED,OAAO;gBACH,IAAA,qBAAW,EAAC;oBACR,QAAQ,EAAE,KAAK,CAAC,QAAQ;oBACxB,OAAO;oBACP,IAAI,EAAE,YAAY;oBAClB,UAAU,EAAE,KAAK,CAAC,cAAc;oBAChC,WAAW,EAAE,KAAK,CAAC,WAAW;oBAC9B,KAAK,EAAE,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,UAAU,EAAE;iBACtD,CAAC;aACL,CAAC;QACN,CAAC;IACL,CAAC;AACL,CAAC;AAED,SAAS,kBAAkB,CACvB,KAAqB,EACrB,KAAkD;IAElD,OAAO,KAAK,CAAC,MAAM,GAAG,CAAC,IAAI,CAAC,KAAK,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,EAAE,KAAK,IAAI,CAAC,CAAC,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;QACnE,KAAK,CAAC,GAAG,EAAE,CAAC;IAChB,CAAC;IAED,IAAI,KAAK,CAAC,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QAC/B,KAAK,CAAC,IAAI,CAAC;YACP,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,IAAI,EAAE,KAAK,CAAC,IAAI;SACnB,CAAC,CAAC;IACP,CAAC;AACL,CAAC;AAED,SAAS,eAAe,CAAC,KAA+C;IACpE,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QAC/B,OAAO,KAAK,CAAC,IAAI,CAAC;IACtB,CAAC;IAED,OAAO,SAAS,KAAK,CAAC,QAAQ,KAAK,KAAK,CAAC,IAAI,UAAU,CAAC;AAC5D,CAAC;AAED,SAAS,eAAe,CAAC,KAA+C;IACpE,MAAM,KAAK,GAA4B;QACnC,SAAS,EAAE,MAAM;QACjB,QAAQ,EAAE,UAAU;KACvB,CAAC;IAEF,IAAI,KAAK,CAAC,QAAQ,KAAK,SAAS,EAAE,CAAC;QAC/B,KAAK,CAAC,QAAQ,GAAG,KAAK,CAAC,QAAQ,CAAC;IACpC,CAAC;IAED,OAAO,KAAK,CAAC;AACjB,CAAC"}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
import type { Chunk, RawDocument } from '../types';
|
|
2
|
+
import type { Chunker, TextSplitOptions } from './chunker';
|
|
3
|
+
export interface QaPair {
|
|
4
|
+
question: string;
|
|
5
|
+
answer: string;
|
|
6
|
+
content: string;
|
|
7
|
+
startOffset: number;
|
|
8
|
+
endOffset: number;
|
|
9
|
+
}
|
|
10
|
+
export interface QaPairChunkerOptions extends TextSplitOptions {
|
|
11
|
+
minPairs?: number;
|
|
12
|
+
fallbackChunker?: Chunker;
|
|
13
|
+
}
|
|
14
|
+
export declare class QaPairChunker implements Chunker {
|
|
15
|
+
private readonly minPairs;
|
|
16
|
+
private readonly fallbackChunker;
|
|
17
|
+
constructor(options?: QaPairChunkerOptions);
|
|
18
|
+
chunk(document: RawDocument): Promise<Chunk[]>;
|
|
19
|
+
chunkMany(documents: RawDocument[]): Promise<Chunk[]>;
|
|
20
|
+
}
|
|
21
|
+
export declare function hasQaPairs(text: string, minPairs?: number): boolean;
|
|
22
|
+
export declare function extractQaPairs(text: string): QaPair[];
|
|
23
|
+
//# sourceMappingURL=qa-pair-chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"qa-pair-chunker.d.ts","sourceRoot":"","sources":["../../../src/rag/chunking/qa-pair-chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACR,KAAK,EACL,WAAW,EACd,MAAM,UAAU,CAAC;AAKlB,OAAO,KAAK,EACR,OAAO,EACP,gBAAgB,EACnB,MAAM,WAAW,CAAC;AAQnB,MAAM,WAAW,MAAM;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,MAAM,EAAE,MAAM,CAAC;IACf,OAAO,EAAE,MAAM,CAAC;IAChB,WAAW,EAAE,MAAM,CAAC;IACpB,SAAS,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,oBAAqB,SAAQ,gBAAgB;IAC1D,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,eAAe,CAAC,EAAE,OAAO,CAAC;CAC7B;AAgBD,qBAAa,aAAc,YAAW,OAAO;IACzC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAU;gBAE9B,OAAO,GAAE,oBAAyB;IAKxC,KAAK,CAAC,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IAoB9C,SAAS,CAAC,SAAS,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;CAG9D;AAED,wBAAgB,UAAU,CAAC,IAAI,EAAE,MAAM,EAAE,QAAQ,SAAI,GAAG,OAAO,CAE9D;AAED,wBAAgB,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,EAAE,CAkDrD"}
|
|
@@ -0,0 +1,162 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.QaPairChunker = void 0;
|
|
4
|
+
exports.hasQaPairs = hasQaPairs;
|
|
5
|
+
exports.extractQaPairs = extractQaPairs;
|
|
6
|
+
const chunker_1 = require("./chunker");
|
|
7
|
+
const recursive_chunker_1 = require("./recursive-chunker");
|
|
8
|
+
const QUESTION_MARKER_PATTERN = /^(?:#{1,6}\s*)?(?:Q(?:uestion)?|问(?:题)?)(?:\s*\d+)?[::]\s*(.*)$/iu;
|
|
9
|
+
const ANSWER_MARKER_PATTERN = /^(?:A(?:nswer)?|答(?:案)?)(?:\s*\d+)?[::]\s*(.*)$/iu;
|
|
10
|
+
class QaPairChunker {
|
|
11
|
+
minPairs;
|
|
12
|
+
fallbackChunker;
|
|
13
|
+
constructor(options = {}) {
|
|
14
|
+
this.minPairs = toPositiveInteger(options.minPairs, 1);
|
|
15
|
+
this.fallbackChunker = options.fallbackChunker ?? new recursive_chunker_1.RecursiveChunker(options);
|
|
16
|
+
}
|
|
17
|
+
async chunk(document) {
|
|
18
|
+
const pairs = extractQaPairs(document.content);
|
|
19
|
+
if (pairs.length < this.minPairs) {
|
|
20
|
+
return await this.fallbackChunker.chunk(document);
|
|
21
|
+
}
|
|
22
|
+
return pairs.map((pair, index) => (0, chunker_1.createChunk)({
|
|
23
|
+
document,
|
|
24
|
+
content: pair.content,
|
|
25
|
+
kind: 'text',
|
|
26
|
+
chunkIndex: index,
|
|
27
|
+
startOffset: pair.startOffset,
|
|
28
|
+
endOffset: pair.endOffset,
|
|
29
|
+
extra: {
|
|
30
|
+
strategy: 'qa-pair',
|
|
31
|
+
pairIndex: index,
|
|
32
|
+
},
|
|
33
|
+
}));
|
|
34
|
+
}
|
|
35
|
+
async chunkMany(documents) {
|
|
36
|
+
return await (0, chunker_1.chunkManyDocuments)(this, documents);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
exports.QaPairChunker = QaPairChunker;
|
|
40
|
+
function hasQaPairs(text, minPairs = 1) {
|
|
41
|
+
return extractQaPairs(text).length >= toPositiveInteger(minPairs, 1);
|
|
42
|
+
}
|
|
43
|
+
function extractQaPairs(text) {
|
|
44
|
+
const pairs = [];
|
|
45
|
+
let current;
|
|
46
|
+
for (const line of splitLines(text)) {
|
|
47
|
+
const question = readQuestionMarker(line.text);
|
|
48
|
+
if (question !== undefined) {
|
|
49
|
+
const pair = finalizeDraft(current, text);
|
|
50
|
+
if (pair !== undefined) {
|
|
51
|
+
pairs.push(pair);
|
|
52
|
+
}
|
|
53
|
+
current = {
|
|
54
|
+
questionLines: question.length > 0 ? [question] : [],
|
|
55
|
+
answerLines: [],
|
|
56
|
+
startOffset: line.startOffset,
|
|
57
|
+
endOffset: line.endOffset,
|
|
58
|
+
state: 'question',
|
|
59
|
+
};
|
|
60
|
+
continue;
|
|
61
|
+
}
|
|
62
|
+
const answer = readAnswerMarker(line.text);
|
|
63
|
+
if (answer !== undefined && current !== undefined) {
|
|
64
|
+
current.state = 'answer';
|
|
65
|
+
if (answer.length > 0) {
|
|
66
|
+
current.answerLines.push(answer);
|
|
67
|
+
}
|
|
68
|
+
current.endOffset = line.endOffset;
|
|
69
|
+
continue;
|
|
70
|
+
}
|
|
71
|
+
if (current === undefined) {
|
|
72
|
+
continue;
|
|
73
|
+
}
|
|
74
|
+
if (current.state === 'question') {
|
|
75
|
+
appendLine(current.questionLines, line.text);
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
current.answerLines.push(line.text);
|
|
79
|
+
current.endOffset = line.endOffset;
|
|
80
|
+
}
|
|
81
|
+
}
|
|
82
|
+
const pair = finalizeDraft(current, text);
|
|
83
|
+
if (pair !== undefined) {
|
|
84
|
+
pairs.push(pair);
|
|
85
|
+
}
|
|
86
|
+
return pairs;
|
|
87
|
+
}
|
|
88
|
+
function finalizeDraft(draft, text) {
|
|
89
|
+
if (draft === undefined) {
|
|
90
|
+
return undefined;
|
|
91
|
+
}
|
|
92
|
+
const question = normalizeMultilineText(draft.questionLines);
|
|
93
|
+
const answer = normalizeMultilineText(draft.answerLines);
|
|
94
|
+
if (question.length === 0 || answer.length === 0) {
|
|
95
|
+
return undefined;
|
|
96
|
+
}
|
|
97
|
+
const content = text.slice(draft.startOffset, draft.endOffset).trim();
|
|
98
|
+
if (content.length === 0) {
|
|
99
|
+
return undefined;
|
|
100
|
+
}
|
|
101
|
+
return {
|
|
102
|
+
question,
|
|
103
|
+
answer,
|
|
104
|
+
content,
|
|
105
|
+
startOffset: draft.startOffset,
|
|
106
|
+
endOffset: draft.endOffset,
|
|
107
|
+
};
|
|
108
|
+
}
|
|
109
|
+
function splitLines(text) {
|
|
110
|
+
const lines = [];
|
|
111
|
+
let startOffset = 0;
|
|
112
|
+
while (startOffset <= text.length) {
|
|
113
|
+
const newlineIndex = text.indexOf('\n', startOffset);
|
|
114
|
+
const rawEndOffset = newlineIndex < 0 ? text.length : newlineIndex;
|
|
115
|
+
const hasCarriageReturn = text[rawEndOffset - 1] === '\r';
|
|
116
|
+
const endOffset = hasCarriageReturn ? rawEndOffset - 1 : rawEndOffset;
|
|
117
|
+
lines.push({
|
|
118
|
+
text: text.slice(startOffset, endOffset),
|
|
119
|
+
startOffset,
|
|
120
|
+
endOffset,
|
|
121
|
+
});
|
|
122
|
+
if (newlineIndex < 0) {
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
startOffset = newlineIndex + 1;
|
|
126
|
+
}
|
|
127
|
+
return lines;
|
|
128
|
+
}
|
|
129
|
+
function readQuestionMarker(line) {
|
|
130
|
+
return readMarker(line, QUESTION_MARKER_PATTERN);
|
|
131
|
+
}
|
|
132
|
+
function readAnswerMarker(line) {
|
|
133
|
+
return readMarker(line, ANSWER_MARKER_PATTERN);
|
|
134
|
+
}
|
|
135
|
+
function readMarker(line, pattern) {
|
|
136
|
+
const match = line.trim().match(pattern);
|
|
137
|
+
return match?.[1]?.trim();
|
|
138
|
+
}
|
|
139
|
+
function appendLine(lines, line) {
|
|
140
|
+
if (line.trim().length === 0 && lines.length === 0) {
|
|
141
|
+
return;
|
|
142
|
+
}
|
|
143
|
+
lines.push(line);
|
|
144
|
+
}
|
|
145
|
+
function normalizeMultilineText(lines) {
|
|
146
|
+
let start = 0;
|
|
147
|
+
let end = lines.length;
|
|
148
|
+
while (start < end && lines[start]?.trim().length === 0) {
|
|
149
|
+
start += 1;
|
|
150
|
+
}
|
|
151
|
+
while (end > start && lines[end - 1]?.trim().length === 0) {
|
|
152
|
+
end -= 1;
|
|
153
|
+
}
|
|
154
|
+
return lines.slice(start, end).join('\n').trim();
|
|
155
|
+
}
|
|
156
|
+
function toPositiveInteger(value, fallback) {
|
|
157
|
+
if (value === undefined || !Number.isFinite(value) || value <= 0) {
|
|
158
|
+
return fallback;
|
|
159
|
+
}
|
|
160
|
+
return Math.floor(value);
|
|
161
|
+
}
|
|
162
|
+
//# sourceMappingURL=qa-pair-chunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"qa-pair-chunker.js","sourceRoot":"","sources":["../../../src/rag/chunking/qa-pair-chunker.ts"],"names":[],"mappings":";;;AAgFA,gCAEC;AAED,wCAkDC;AAlID,uCAGmB;AAKnB,2DAE6B;AAE7B,MAAM,uBAAuB,GAAG,mEAAmE,CAAC;AACpG,MAAM,qBAAqB,GAAG,mDAAmD,CAAC;AA6BlF,MAAa,aAAa;IACL,QAAQ,CAAS;IACjB,eAAe,CAAU;IAE1C,YAAY,UAAgC,EAAE;QAC1C,IAAI,CAAC,QAAQ,GAAG,iBAAiB,CAAC,OAAO,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;QACvD,IAAI,CAAC,eAAe,GAAG,OAAO,CAAC,eAAe,IAAI,IAAI,oCAAgB,CAAC,OAAO,CAAC,CAAC;IACpF,CAAC;IAED,KAAK,CAAC,KAAK,CAAC,QAAqB;QAC7B,MAAM,KAAK,GAAG,cAAc,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC/C,IAAI,KAAK,CAAC,MAAM,GAAG,IAAI,CAAC,QAAQ,EAAE,CAAC;YAC/B,OAAO,MAAM,IAAI,CAAC,eAAe,CAAC,KAAK,CAAC,QAAQ,CAAC,CAAC;QACtD,CAAC;QAED,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE,CAAC,IAAA,qBAAW,EAAC;YAC1C,QAAQ;YACR,OAAO,EAAE,IAAI,CAAC,OAAO;YACrB,IAAI,EAAE,MAAM;YACZ,UAAU,EAAE,KAAK;YACjB,WAAW,EAAE,IAAI,CAAC,WAAW;YAC7B,SAAS,EAAE,IAAI,CAAC,SAAS;YACzB,KAAK,EAAE;gBACH,QAAQ,EAAE,SAAS;gBACnB,SAAS,EAAE,KAAK;aACnB;SACJ,CAAC,CAAC,CAAC;IACR,CAAC;IAED,KAAK,CAAC,SAAS,CAAC,SAAwB;QACpC,OAAO,MAAM,IAAA,4BAAkB,EAAC,IAAI,EAAE,SAAS,CAAC,CAAC;IACrD,CAAC;CACJ;AAhCD,sCAgCC;AAED,SAAgB,UAAU,CAAC,IAAY,EAAE,QAAQ,GAAG,CAAC;IACjD,OAAO,cAAc,CAAC,IAAI,CAAC,CAAC,MAAM,IAAI,iBAAiB,CAAC,QAAQ,EAAE,CAAC,CAAC,CAAC;AACzE,CAAC;AAED,SAAgB,cAAc,CAAC,IAAY;IACvC,MAAM,KAAK,GAAa,EAAE,CAAC;IAC3B,IAAI,OAAgC,CAAC;IAErC,KAAK,MAAM,IAAI,IAAI,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QAClC,MAAM,QAAQ,GAAG,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/C,IAAI,QAAQ,KAAK,SAAS,EAAE,CAAC;YACzB,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;YAC1C,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;gBACrB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACrB,CAAC;YAED,OAAO,GAAG;gBACN,aAAa,EAAE,QAAQ,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE;gBACpD,WAAW,EAAE,EAAE;gBACf,WAAW,EAAE,IAAI,CAAC,WAAW;gBAC7B,SAAS,EAAE,IAAI,CAAC,SAAS;gBACzB,KAAK,EAAE,UAAU;aACpB,CAAC;YACF,SAAS;QACb,CAAC;QAED,MAAM,MAAM,GAAG,gBAAgB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC3C,IAAI,MAAM,KAAK,SAAS,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;YAChD,OAAO,CAAC,KAAK,GAAG,QAAQ,CAAC;YACzB,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACpB,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;YACrC,CAAC;YACD,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;YACnC,SAAS;QACb,CAAC;QAED,IAAI,OAAO,KAAK,SAAS,EAAE,CAAC;YACxB,SAAS;QACb,CAAC;QAED,IAAI,OAAO,CAAC,KAAK,KAAK,UAAU,EAAE,CAAC;YAC/B,UAAU,CAAC,OAAO,CAAC,aAAa,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC;QACjD,CAAC;aAAM,CAAC;YACJ,OAAO,CAAC,WAAW,CAAC,IAAI,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACpC,OAAO,CAAC,SAAS,GAAG,IAAI,CAAC,SAAS,CAAC;QACvC,CAAC;IACL,CAAC;IAED,MAAM,IAAI,GAAG,aAAa,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC;IAC1C,IAAI,IAAI,KAAK,SAAS,EAAE,CAAC;QACrB,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IACrB,CAAC;IAED,OAAO,KAAK,CAAC;AACjB,CAAC;AAED,SAAS,aAAa,CAAC,KAA8B,EAAE,IAAY;IAC/D,IAAI,KAAK,KAAK,SAAS,EAAE,CAAC;QACtB,OAAO,SAAS,CAAC;IACrB,CAAC;IAED,MAAM,QAAQ,GAAG,sBAAsB,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;IAC7D,MAAM,MAAM,GAAG,sBAAsB,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;IACzD,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC/C,OAAO,SAAS,CAAC;IACrB,CAAC;IAED,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,WAAW,EAAE,KAAK,CAAC,SAAS,CAAC,CAAC,IAAI,EAAE,CAAC;IACtE,IAAI,OAAO,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACvB,OAAO,SAAS,CAAC;IACrB,CAAC;IAED,OAAO;QACH,QAAQ;QACR,MAAM;QACN,OAAO;QACP,WAAW,EAAE,KAAK,CAAC,WAAW;QAC9B,SAAS,EAAE,KAAK,CAAC,SAAS;KAC7B,CAAC;AACN,CAAC;AAED,SAAS,UAAU,CAAC,IAAY;IAC5B,MAAM,KAAK,GAAW,EAAE,CAAC;IACzB,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,OAAO,WAAW,IAAI,IAAI,CAAC,MAAM,EAAE,CAAC;QAChC,MAAM,YAAY,GAAG,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,WAAW,CAAC,CAAC;QACrD,MAAM,YAAY,GAAG,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC,CAAC,YAAY,CAAC;QACnE,MAAM,iBAAiB,GAAG,IAAI,CAAC,YAAY,GAAG,CAAC,CAAC,KAAK,IAAI,CAAC;QAC1D,MAAM,SAAS,GAAG,iBAAiB,CAAC,CAAC,CAAC,YAAY,GAAG,CAAC,CAAC,CAAC,CAAC,YAAY,CAAC;QAEtE,KAAK,CAAC,IAAI,CAAC;YACP,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,SAAS,CAAC;YACxC,WAAW;YACX,SAAS;SACZ,CAAC,CAAC;QAEH,IAAI,YAAY,GAAG,CAAC,EAAE,CAAC;YACnB,MAAM;QACV,CAAC;QAED,WAAW,GAAG,YAAY,GAAG,CAAC,CAAC;IACnC,CAAC;IAED,OAAO,KAAK,CAAC;AACjB,CAAC;AAED,SAAS,kBAAkB,CAAC,IAAY;IACpC,OAAO,UAAU,CAAC,IAAI,EAAE,uBAAuB,CAAC,CAAC;AACrD,CAAC;AAED,SAAS,gBAAgB,CAAC,IAAY;IAClC,OAAO,UAAU,CAAC,IAAI,EAAE,qBAAqB,CAAC,CAAC;AACnD,CAAC;AAED,SAAS,UAAU,CAAC,IAAY,EAAE,OAAe;IAC7C,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;IAEzC,OAAO,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC;AAC9B,CAAC;AAED,SAAS,UAAU,CAAC,KAAe,EAAE,IAAY;IAC7C,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACjD,OAAO;IACX,CAAC;IAED,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AACrB,CAAC;AAED,SAAS,sBAAsB,CAAC,KAAe;IAC3C,IAAI,KAAK,GAAG,CAAC,CAAC;IACd,IAAI,GAAG,GAAG,KAAK,CAAC,MAAM,CAAC;IAEvB,OAAO,KAAK,GAAG,GAAG,IAAI,KAAK,CAAC,KAAK,CAAC,EAAE,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACtD,KAAK,IAAI,CAAC,CAAC;IACf,CAAC;IACD,OAAO,GAAG,GAAG,KAAK,IAAI,KAAK,CAAC,GAAG,GAAG,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QACxD,GAAG,IAAI,CAAC,CAAC;IACb,CAAC;IAED,OAAO,KAAK,CAAC,KAAK,CAAC,KAAK,EAAE,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,IAAI,EAAE,CAAC;AACrD,CAAC;AAED,SAAS,iBAAiB,CAAC,KAAyB,EAAE,QAAgB;IAClE,IAAI,KAAK,KAAK,SAAS,IAAI,CAAC,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,EAAE,CAAC;QAC/D,OAAO,QAAQ,CAAC;IACpB,CAAC;IAED,OAAO,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;AAC7B,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { Embedder } from '../embeddings';
|
|
2
|
+
import type { Chunk, RawDocument } from '../types';
|
|
3
|
+
import type { Chunker, TextSplitOptions } from './chunker';
|
|
4
|
+
export interface SemanticChunkerOptions extends TextSplitOptions {
|
|
5
|
+
embedder: Embedder;
|
|
6
|
+
similarityThreshold?: number;
|
|
7
|
+
overlapLength?: number;
|
|
8
|
+
}
|
|
9
|
+
export declare class SemanticChunker implements Chunker {
|
|
10
|
+
private readonly embedder;
|
|
11
|
+
private readonly similarityThreshold;
|
|
12
|
+
private readonly maxChunkLength;
|
|
13
|
+
private readonly minChunkLength;
|
|
14
|
+
private readonly overlapLength;
|
|
15
|
+
constructor(options: SemanticChunkerOptions);
|
|
16
|
+
chunk(document: RawDocument): Promise<Chunk[]>;
|
|
17
|
+
chunkMany(documents: RawDocument[]): Promise<Chunk[]>;
|
|
18
|
+
}
|
|
19
|
+
//# sourceMappingURL=semantic-chunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic-chunker.d.ts","sourceRoot":"","sources":["../../../src/rag/chunking/semantic-chunker.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAK9C,OAAO,KAAK,EACR,KAAK,EACL,WAAW,EACd,MAAM,UAAU,CAAC;AAQlB,OAAO,KAAK,EACR,OAAO,EACP,gBAAgB,EACnB,MAAM,WAAW,CAAC;AAKnB,MAAM,WAAW,sBAAuB,SAAQ,gBAAgB;IAC5D,QAAQ,EAAE,QAAQ,CAAC;IACnB,mBAAmB,CAAC,EAAE,MAAM,CAAC;IAC7B,aAAa,CAAC,EAAE,MAAM,CAAC;CAC1B;AAaD,qBAAa,eAAgB,YAAW,OAAO;IAC3C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAW;IACpC,OAAO,CAAC,QAAQ,CAAC,mBAAmB,CAAS;IAC7C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAS;gBAE3B,OAAO,EAAE,sBAAsB;IAoBrC,KAAK,CAAC,QAAQ,EAAE,WAAW,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;IA2C9C,SAAS,CAAC,SAAS,EAAE,WAAW,EAAE,GAAG,OAAO,CAAC,KAAK,EAAE,CAAC;CAG9D"}
|