voctar 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +102 -0
- package/dist/index.d.ts +6 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +29 -0
- package/dist/index.js.map +1 -0
- package/dist/src/chunking/index.d.ts +48 -0
- package/dist/src/chunking/index.d.ts.map +1 -0
- package/dist/src/chunking/index.js +123 -0
- package/dist/src/chunking/index.js.map +1 -0
- package/dist/src/chunking/strategies/fixed.d.ts +14 -0
- package/dist/src/chunking/strategies/fixed.d.ts.map +1 -0
- package/dist/src/chunking/strategies/fixed.js +111 -0
- package/dist/src/chunking/strategies/fixed.js.map +1 -0
- package/dist/src/chunking/strategies/paragraph.d.ts +6 -0
- package/dist/src/chunking/strategies/paragraph.d.ts.map +1 -0
- package/dist/src/chunking/strategies/paragraph.js +84 -0
- package/dist/src/chunking/strategies/paragraph.js.map +1 -0
- package/dist/src/chunking/strategies/recursive.d.ts +17 -0
- package/dist/src/chunking/strategies/recursive.d.ts.map +1 -0
- package/dist/src/chunking/strategies/recursive.js +192 -0
- package/dist/src/chunking/strategies/recursive.js.map +1 -0
- package/dist/src/chunking/strategies/semantic.d.ts +96 -0
- package/dist/src/chunking/strategies/semantic.d.ts.map +1 -0
- package/dist/src/chunking/strategies/semantic.js +587 -0
- package/dist/src/chunking/strategies/semantic.js.map +1 -0
- package/dist/src/chunking/strategies/sentence.d.ts +7 -0
- package/dist/src/chunking/strategies/sentence.d.ts.map +1 -0
- package/dist/src/chunking/strategies/sentence.js +116 -0
- package/dist/src/chunking/strategies/sentence.js.map +1 -0
- package/dist/src/chunking/types.d.ts +45 -0
- package/dist/src/chunking/types.d.ts.map +1 -0
- package/dist/src/chunking/types.js +4 -0
- package/dist/src/chunking/types.js.map +1 -0
- package/dist/src/chunking/utils/tokenizer.d.ts +10 -0
- package/dist/src/chunking/utils/tokenizer.d.ts.map +1 -0
- package/dist/src/chunking/utils/tokenizer.js +50 -0
- package/dist/src/chunking/utils/tokenizer.js.map +1 -0
- package/dist/src/providers/embeddings/index.d.ts +3 -0
- package/dist/src/providers/embeddings/index.d.ts.map +1 -0
- package/dist/src/providers/embeddings/index.js +7 -0
- package/dist/src/providers/embeddings/index.js.map +1 -0
- package/dist/src/providers/embeddings/openai.d.ts +21 -0
- package/dist/src/providers/embeddings/openai.d.ts.map +1 -0
- package/dist/src/providers/embeddings/openai.js +86 -0
- package/dist/src/providers/embeddings/openai.js.map +1 -0
- package/dist/src/providers/index.d.ts +3 -0
- package/dist/src/providers/index.d.ts.map +1 -0
- package/dist/src/providers/index.js +20 -0
- package/dist/src/providers/index.js.map +1 -0
- package/dist/src/providers/stores/index.d.ts +6 -0
- package/dist/src/providers/stores/index.d.ts.map +1 -0
- package/dist/src/providers/stores/index.js +11 -0
- package/dist/src/providers/stores/index.js.map +1 -0
- package/dist/src/providers/stores/memory.d.ts +18 -0
- package/dist/src/providers/stores/memory.d.ts.map +1 -0
- package/dist/src/providers/stores/memory.js +169 -0
- package/dist/src/providers/stores/memory.js.map +1 -0
- package/dist/src/providers/stores/qdrant.d.ts +28 -0
- package/dist/src/providers/stores/qdrant.d.ts.map +1 -0
- package/dist/src/providers/stores/qdrant.js +223 -0
- package/dist/src/providers/stores/qdrant.js.map +1 -0
- package/dist/src/providers/stores/sqlite.d.ts +38 -0
- package/dist/src/providers/stores/sqlite.d.ts.map +1 -0
- package/dist/src/providers/stores/sqlite.js +306 -0
- package/dist/src/providers/stores/sqlite.js.map +1 -0
- package/dist/src/types.d.ts +111 -0
- package/dist/src/types.d.ts.map +1 -0
- package/dist/src/types.js +32 -0
- package/dist/src/types.js.map +1 -0
- package/dist/src/vector.d.ts +74 -0
- package/dist/src/vector.d.ts.map +1 -0
- package/dist/src/vector.js +505 -0
- package/dist/src/vector.js.map +1 -0
- package/docs/API.md +361 -0
- package/docs/CHUNKING.md +280 -0
- package/docs/CUSTOM_PROVIDERS.md +101 -0
- package/docs/README.md +11 -0
- package/docs/STORAGE_BACKENDS.md +189 -0
- package/docs/assets/vectar.png +0 -0
- package/package.json +46 -0
|
@@ -0,0 +1,587 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.SemanticChunkingStrategy = void 0;
|
|
4
|
+
// Semantic chunking strategy - intelligent chunking based on meaning
|
|
5
|
+
const uuid_1 = require("uuid");
|
|
6
|
+
const tokenizer_1 = require("../utils/tokenizer");
|
|
7
|
+
class SemanticChunkingStrategy {
|
|
8
|
+
getName() {
|
|
9
|
+
return 'semantic';
|
|
10
|
+
}
|
|
11
|
+
chunk(text, documentId, options) {
|
|
12
|
+
const opts = options;
|
|
13
|
+
// Get token limit from options or use defaults
|
|
14
|
+
const tokenLimit = opts.tokenLimit ?? 8192;
|
|
15
|
+
const maxChunkSize = opts.maxChunkSize ?? 1000;
|
|
16
|
+
// Set defaults - ensure soft/hard limits don't exceed token limit
|
|
17
|
+
const softLimit = Math.min(opts.softLimit ?? 800, tokenLimit);
|
|
18
|
+
const hardLimit = Math.min(opts.hardLimit ?? Math.min(maxChunkSize, tokenLimit), tokenLimit);
|
|
19
|
+
const similarityThreshold = opts.similarityThreshold ?? 0.75;
|
|
20
|
+
const contextOverlapPercent = opts.contextOverlapPercent ?? 0.12; // 12%
|
|
21
|
+
const smartOverlap = opts.smartOverlap ?? true;
|
|
22
|
+
const volatilityWindow = opts.volatilityWindow ?? 5;
|
|
23
|
+
const generateHeaders = opts.generateHeaders ?? false;
|
|
24
|
+
const stripNoise = opts.stripNoise ?? false;
|
|
25
|
+
const addRoleMarkers = opts.addRoleMarkers ?? true;
|
|
26
|
+
// Auto-detect content type if not specified
|
|
27
|
+
const contentType = opts.contentType ?? this.detectContentType(text);
|
|
28
|
+
// Step 1: Pre-segment into atoms
|
|
29
|
+
const atoms = this.preSegment(text, contentType, stripNoise, opts.noisePatterns);
|
|
30
|
+
if (atoms.length === 0) {
|
|
31
|
+
return [];
|
|
32
|
+
}
|
|
33
|
+
// Step 2: Semantic merging with embeddings (if provider available)
|
|
34
|
+
const chunks = this.semanticMerge(atoms, softLimit, hardLimit, similarityThreshold, volatilityWindow, opts.embeddingProvider);
|
|
35
|
+
// Step 3: Apply smart overlap
|
|
36
|
+
const chunksWithOverlap = smartOverlap
|
|
37
|
+
? this.applySmartOverlap(chunks, contextOverlapPercent, opts.embeddingProvider)
|
|
38
|
+
: chunks;
|
|
39
|
+
// Step 4: Convert to Chunk objects with metadata and validate token limits
|
|
40
|
+
const results = [];
|
|
41
|
+
let currentChunkIndex = 0;
|
|
42
|
+
for (let i = 0; i < chunksWithOverlap.length; i++) {
|
|
43
|
+
const chunk = chunksWithOverlap[i];
|
|
44
|
+
let chunkText = chunk.text;
|
|
45
|
+
// Add role markers if needed
|
|
46
|
+
if (addRoleMarkers && chunk.atoms) {
|
|
47
|
+
chunkText = this.addRoleMarkers(chunk.atoms);
|
|
48
|
+
}
|
|
49
|
+
// Validate token count and split if necessary
|
|
50
|
+
const chunkTokens = this.estimateTokens(chunkText);
|
|
51
|
+
if (chunkTokens > hardLimit) {
|
|
52
|
+
// This chunk exceeds the limit (likely due to overlap or role markers)
|
|
53
|
+
// Split it using recursive chunking as a fallback
|
|
54
|
+
const safeChunkSize = Math.floor(hardLimit * 0.9); // Use 90% for safety
|
|
55
|
+
// Simple recursive split by sentences/paragraphs
|
|
56
|
+
const subChunks = this.splitOversizedChunk(chunkText, safeChunkSize, hardLimit);
|
|
57
|
+
for (let j = 0; j < subChunks.length; j++) {
|
|
58
|
+
const subChunkText = subChunks[j];
|
|
59
|
+
const header = generateHeaders
|
|
60
|
+
? this.generateSimpleHeader(subChunkText)
|
|
61
|
+
: undefined;
|
|
62
|
+
const metadata = {
|
|
63
|
+
documentId,
|
|
64
|
+
chunkIndex: currentChunkIndex++,
|
|
65
|
+
totalChunks: chunksWithOverlap.length + subChunks.length - 1, // Will be updated later
|
|
66
|
+
startChar: chunk.startChar + (j > 0 ? subChunks.slice(0, j).join('').length : 0),
|
|
67
|
+
endChar: chunk.startChar + subChunks.slice(0, j + 1).join('').length,
|
|
68
|
+
header,
|
|
69
|
+
dependsOn: chunk.dependsOn,
|
|
70
|
+
topicVolatility: chunk.volatility,
|
|
71
|
+
roles: chunk.roles,
|
|
72
|
+
_isSubChunk: true,
|
|
73
|
+
_parentChunkIndex: i,
|
|
74
|
+
...opts.metadata,
|
|
75
|
+
};
|
|
76
|
+
results.push({
|
|
77
|
+
id: (0, uuid_1.v4)(),
|
|
78
|
+
text: subChunkText,
|
|
79
|
+
metadata,
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
else {
|
|
84
|
+
// Generate header if requested (Note: async not supported in sync chunk method)
|
|
85
|
+
// Headers would need to be generated separately or use cached embeddings
|
|
86
|
+
const header = generateHeaders
|
|
87
|
+
? this.generateSimpleHeader(chunkText)
|
|
88
|
+
: undefined;
|
|
89
|
+
const metadata = {
|
|
90
|
+
documentId,
|
|
91
|
+
chunkIndex: currentChunkIndex++,
|
|
92
|
+
totalChunks: chunksWithOverlap.length, // Will be updated later
|
|
93
|
+
startChar: chunk.startChar,
|
|
94
|
+
endChar: chunk.endChar,
|
|
95
|
+
header,
|
|
96
|
+
dependsOn: chunk.dependsOn,
|
|
97
|
+
topicVolatility: chunk.volatility,
|
|
98
|
+
roles: chunk.roles,
|
|
99
|
+
...opts.metadata,
|
|
100
|
+
};
|
|
101
|
+
results.push({
|
|
102
|
+
id: (0, uuid_1.v4)(),
|
|
103
|
+
text: chunkText,
|
|
104
|
+
metadata,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
// Update totalChunks for all chunks
|
|
109
|
+
results.forEach(chunk => {
|
|
110
|
+
chunk.metadata.totalChunks = results.length;
|
|
111
|
+
});
|
|
112
|
+
return results;
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Auto-detect if text is conversational based on role markers
|
|
116
|
+
*/
|
|
117
|
+
detectContentType(text) {
|
|
118
|
+
const lines = text.split('\n');
|
|
119
|
+
let conversationMarkers = 0;
|
|
120
|
+
// Check first 20 lines for conversation markers
|
|
121
|
+
const sampleLines = lines.slice(0, 20);
|
|
122
|
+
for (const line of sampleLines) {
|
|
123
|
+
const trimmed = line.trim();
|
|
124
|
+
// Look for role markers at start of line
|
|
125
|
+
if (/^(u|user|a|agent|assistant|tool|t|speaker\d+|system):/i.test(trimmed)) {
|
|
126
|
+
conversationMarkers++;
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
// If 30% or more lines have conversation markers, treat as conversation
|
|
130
|
+
const threshold = sampleLines.length * 0.3;
|
|
131
|
+
return conversationMarkers >= threshold ? 'conversation' : 'text';
|
|
132
|
+
}
|
|
133
|
+
/**
|
|
134
|
+
* Pre-segment text into atomic units (turns, paragraphs, sentences)
|
|
135
|
+
*/
|
|
136
|
+
preSegment(text, contentType, stripNoise, noisePatterns) {
|
|
137
|
+
if (contentType === 'conversation') {
|
|
138
|
+
return this.segmentConversation(text, stripNoise, noisePatterns);
|
|
139
|
+
}
|
|
140
|
+
else {
|
|
141
|
+
return this.segmentText(text, stripNoise, noisePatterns);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
/**
|
|
145
|
+
* Segment conversation into turns (user/agent/tool)
|
|
146
|
+
*/
|
|
147
|
+
segmentConversation(text, stripNoise, noisePatterns) {
|
|
148
|
+
const atoms = [];
|
|
149
|
+
const lines = text.split('\n');
|
|
150
|
+
let currentTurn = '';
|
|
151
|
+
let currentRole;
|
|
152
|
+
let startChar = 0;
|
|
153
|
+
let currentStart = 0;
|
|
154
|
+
for (const line of lines) {
|
|
155
|
+
const trimmed = line.trim();
|
|
156
|
+
// Detect role markers
|
|
157
|
+
let role;
|
|
158
|
+
let content = trimmed;
|
|
159
|
+
if (trimmed.startsWith('u:') || trimmed.startsWith('user:')) {
|
|
160
|
+
role = 'user';
|
|
161
|
+
content = trimmed.replace(/^u:|^user:/i, '').trim();
|
|
162
|
+
}
|
|
163
|
+
else if (trimmed.startsWith('a:') || trimmed.startsWith('agent:') || trimmed.startsWith('assistant:')) {
|
|
164
|
+
role = 'agent';
|
|
165
|
+
content = trimmed.replace(/^a:|^agent:|^assistant:/i, '').trim();
|
|
166
|
+
}
|
|
167
|
+
else if (trimmed.startsWith('tool:') || trimmed.startsWith('t:')) {
|
|
168
|
+
role = 'tool';
|
|
169
|
+
content = trimmed.replace(/^tool:|^t:/i, '').trim();
|
|
170
|
+
}
|
|
171
|
+
// If we detected a new role and have accumulated content, save it
|
|
172
|
+
if (role && currentTurn) {
|
|
173
|
+
const cleaned = stripNoise ? this.stripNoise(currentTurn, noisePatterns) : currentTurn;
|
|
174
|
+
if (cleaned.trim()) {
|
|
175
|
+
atoms.push({
|
|
176
|
+
text: cleaned,
|
|
177
|
+
startChar: currentStart,
|
|
178
|
+
endChar: startChar,
|
|
179
|
+
type: 'turn',
|
|
180
|
+
role: currentRole,
|
|
181
|
+
});
|
|
182
|
+
}
|
|
183
|
+
currentTurn = content;
|
|
184
|
+
currentRole = role;
|
|
185
|
+
currentStart = startChar;
|
|
186
|
+
}
|
|
187
|
+
else if (role) {
|
|
188
|
+
// Start new turn
|
|
189
|
+
currentTurn = content;
|
|
190
|
+
currentRole = role;
|
|
191
|
+
currentStart = startChar;
|
|
192
|
+
}
|
|
193
|
+
else {
|
|
194
|
+
// Continue current turn
|
|
195
|
+
currentTurn += (currentTurn ? '\n' : '') + trimmed;
|
|
196
|
+
}
|
|
197
|
+
startChar += line.length + 1; // +1 for newline
|
|
198
|
+
}
|
|
199
|
+
// Add final turn
|
|
200
|
+
if (currentTurn.trim()) {
|
|
201
|
+
const cleaned = stripNoise ? this.stripNoise(currentTurn, noisePatterns) : currentTurn;
|
|
202
|
+
if (cleaned.trim()) {
|
|
203
|
+
atoms.push({
|
|
204
|
+
text: cleaned,
|
|
205
|
+
startChar: currentStart,
|
|
206
|
+
endChar: startChar,
|
|
207
|
+
type: 'turn',
|
|
208
|
+
role: currentRole,
|
|
209
|
+
});
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
return atoms;
|
|
213
|
+
}
|
|
214
|
+
/**
|
|
215
|
+
* Segment plain text into paragraphs and sentences
|
|
216
|
+
*/
|
|
217
|
+
segmentText(text, stripNoise, noisePatterns) {
|
|
218
|
+
const atoms = [];
|
|
219
|
+
// Split into paragraphs
|
|
220
|
+
const paragraphs = text.split(/\n\s*\n/);
|
|
221
|
+
let startChar = 0;
|
|
222
|
+
for (const para of paragraphs) {
|
|
223
|
+
const paraText = para.trim();
|
|
224
|
+
if (!paraText) {
|
|
225
|
+
startChar += para.length + 2; // +2 for \n\n
|
|
226
|
+
continue;
|
|
227
|
+
}
|
|
228
|
+
// Split paragraph into sentences
|
|
229
|
+
const sentences = this.splitIntoSentences(paraText);
|
|
230
|
+
for (const sentence of sentences) {
|
|
231
|
+
const cleaned = stripNoise ? this.stripNoise(sentence, noisePatterns) : sentence;
|
|
232
|
+
if (cleaned.trim()) {
|
|
233
|
+
const endChar = startChar + sentence.length;
|
|
234
|
+
atoms.push({
|
|
235
|
+
text: cleaned,
|
|
236
|
+
startChar,
|
|
237
|
+
endChar,
|
|
238
|
+
type: 'sentence',
|
|
239
|
+
});
|
|
240
|
+
startChar = endChar;
|
|
241
|
+
}
|
|
242
|
+
else {
|
|
243
|
+
startChar += sentence.length;
|
|
244
|
+
}
|
|
245
|
+
}
|
|
246
|
+
startChar += 2; // For paragraph break
|
|
247
|
+
}
|
|
248
|
+
return atoms;
|
|
249
|
+
}
|
|
250
|
+
/**
|
|
251
|
+
* Semantic merging with soft/hard limits
|
|
252
|
+
*/
|
|
253
|
+
semanticMerge(atoms, softLimit, hardLimit, similarityThreshold, volatilityWindow, embeddingProvider) {
|
|
254
|
+
const chunks = [];
|
|
255
|
+
let currentChunk = [];
|
|
256
|
+
let currentTokens = 0;
|
|
257
|
+
let recentEmbeddings = [];
|
|
258
|
+
const chunkIds = [];
|
|
259
|
+
for (let i = 0; i < atoms.length; i++) {
|
|
260
|
+
const atom = atoms[i];
|
|
261
|
+
const atomTokens = this.estimateTokens(atom.text);
|
|
262
|
+
// If a single atom exceeds hard limit, we need to split it
|
|
263
|
+
if (atomTokens > hardLimit) {
|
|
264
|
+
// Finalize current chunk if it exists
|
|
265
|
+
if (currentChunk.length > 0) {
|
|
266
|
+
const chunkId = (0, uuid_1.v4)();
|
|
267
|
+
chunkIds.push(chunkId);
|
|
268
|
+
chunks.push(this.finalizeChunk(currentChunk, chunkId));
|
|
269
|
+
currentChunk = [];
|
|
270
|
+
currentTokens = 0;
|
|
271
|
+
}
|
|
272
|
+
// Split the oversized atom using recursive strategy
|
|
273
|
+
// Note: This is a simplified split - the final validation will catch any remaining issues
|
|
274
|
+
const atomChunks = this.splitOversizedAtom(atom, hardLimit);
|
|
275
|
+
for (const atomChunk of atomChunks) {
|
|
276
|
+
const chunkId = (0, uuid_1.v4)();
|
|
277
|
+
chunkIds.push(chunkId);
|
|
278
|
+
chunks.push({
|
|
279
|
+
id: chunkId,
|
|
280
|
+
text: atomChunk.text,
|
|
281
|
+
startChar: atomChunk.startChar,
|
|
282
|
+
endChar: atomChunk.endChar,
|
|
283
|
+
atoms: [atomChunk],
|
|
284
|
+
volatility: 0,
|
|
285
|
+
roles: atom.role ? [atom.role] : [],
|
|
286
|
+
dependsOn: [],
|
|
287
|
+
});
|
|
288
|
+
}
|
|
289
|
+
recentEmbeddings = [];
|
|
290
|
+
continue;
|
|
291
|
+
}
|
|
292
|
+
// Check hard limit
|
|
293
|
+
if (currentTokens + atomTokens > hardLimit && currentChunk.length > 0) {
|
|
294
|
+
// Force cut
|
|
295
|
+
const chunkId = (0, uuid_1.v4)();
|
|
296
|
+
chunkIds.push(chunkId);
|
|
297
|
+
chunks.push(this.finalizeChunk(currentChunk, chunkId));
|
|
298
|
+
currentChunk = [atom];
|
|
299
|
+
currentTokens = atomTokens;
|
|
300
|
+
recentEmbeddings = [];
|
|
301
|
+
continue;
|
|
302
|
+
}
|
|
303
|
+
// Check soft limit - try semantic merging
|
|
304
|
+
if (currentTokens + atomTokens > softLimit && currentChunk.length > 0) {
|
|
305
|
+
// Use lexical similarity (embeddings would require async)
|
|
306
|
+
const lexicalSimilarity = this.computeLexicalSimilarity(currentChunk.map(a => a.text).join(' '), atom.text);
|
|
307
|
+
if (lexicalSimilarity < similarityThreshold) {
|
|
308
|
+
const chunkId = (0, uuid_1.v4)();
|
|
309
|
+
chunkIds.push(chunkId);
|
|
310
|
+
chunks.push(this.finalizeChunk(currentChunk, chunkId));
|
|
311
|
+
currentChunk = [atom];
|
|
312
|
+
currentTokens = atomTokens;
|
|
313
|
+
recentEmbeddings = [];
|
|
314
|
+
continue;
|
|
315
|
+
}
|
|
316
|
+
}
|
|
317
|
+
// Add atom to current chunk
|
|
318
|
+
currentChunk.push(atom);
|
|
319
|
+
currentTokens += atomTokens;
|
|
320
|
+
// Track volatility (simplified without actual embeddings)
|
|
321
|
+
if (recentEmbeddings.length >= volatilityWindow) {
|
|
322
|
+
recentEmbeddings.shift();
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
// Add final chunk
|
|
326
|
+
if (currentChunk.length > 0) {
|
|
327
|
+
const chunkId = (0, uuid_1.v4)();
|
|
328
|
+
chunkIds.push(chunkId);
|
|
329
|
+
chunks.push(this.finalizeChunk(currentChunk, chunkId));
|
|
330
|
+
}
|
|
331
|
+
return chunks;
|
|
332
|
+
}
|
|
333
|
+
/**
|
|
334
|
+
* Finalize chunk from atoms
|
|
335
|
+
*/
|
|
336
|
+
finalizeChunk(atoms, id) {
|
|
337
|
+
const text = atoms.map(a => a.text).join('\n');
|
|
338
|
+
const startChar = atoms[0].startChar;
|
|
339
|
+
const endChar = atoms[atoms.length - 1].endChar;
|
|
340
|
+
const roles = [...new Set(atoms.map(a => a.role).filter(Boolean))];
|
|
341
|
+
// Calculate topic volatility (simplified)
|
|
342
|
+
const volatility = this.calculateVolatility(atoms);
|
|
343
|
+
return {
|
|
344
|
+
id,
|
|
345
|
+
text,
|
|
346
|
+
startChar,
|
|
347
|
+
endChar,
|
|
348
|
+
atoms,
|
|
349
|
+
volatility,
|
|
350
|
+
roles,
|
|
351
|
+
dependsOn: [],
|
|
352
|
+
};
|
|
353
|
+
}
|
|
354
|
+
/**
|
|
355
|
+
* Calculate topic volatility (simplified - uses lexical changes)
|
|
356
|
+
*/
|
|
357
|
+
calculateVolatility(atoms) {
|
|
358
|
+
if (atoms.length < 2)
|
|
359
|
+
return 0;
|
|
360
|
+
let totalDissimilarity = 0;
|
|
361
|
+
for (let i = 1; i < atoms.length; i++) {
|
|
362
|
+
const similarity = this.computeLexicalSimilarity(atoms[i - 1].text, atoms[i].text);
|
|
363
|
+
totalDissimilarity += (1 - similarity);
|
|
364
|
+
}
|
|
365
|
+
return totalDissimilarity / (atoms.length - 1);
|
|
366
|
+
}
|
|
367
|
+
/**
|
|
368
|
+
* Apply smart overlap between chunks
|
|
369
|
+
*/
|
|
370
|
+
applySmartOverlap(chunks, overlapPercent, embeddingProvider) {
|
|
371
|
+
if (chunks.length < 2)
|
|
372
|
+
return chunks;
|
|
373
|
+
const result = [...chunks];
|
|
374
|
+
for (let i = 1; i < result.length; i++) {
|
|
375
|
+
const prevChunk = result[i - 1];
|
|
376
|
+
const currentChunk = result[i];
|
|
377
|
+
// Get trailing atoms from previous chunk
|
|
378
|
+
const overlapSize = Math.ceil(prevChunk.atoms.length * overlapPercent);
|
|
379
|
+
const trailingAtoms = prevChunk.atoms.slice(-overlapSize);
|
|
380
|
+
if (trailingAtoms.length === 0)
|
|
381
|
+
continue;
|
|
382
|
+
// Check if overlap is semantically relevant
|
|
383
|
+
const overlapText = trailingAtoms.map((a) => a.text).join('\n');
|
|
384
|
+
const similarity = this.computeLexicalSimilarity(overlapText, currentChunk.atoms[0]?.text || '');
|
|
385
|
+
// Only add overlap if it's relevant (similarity above threshold)
|
|
386
|
+
if (similarity > 0.3) {
|
|
387
|
+
currentChunk.text = overlapText + '\n---\n' + currentChunk.text;
|
|
388
|
+
currentChunk.dependsOn = currentChunk.dependsOn || [];
|
|
389
|
+
if (prevChunk.id) {
|
|
390
|
+
currentChunk.dependsOn.push(prevChunk.id);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
}
|
|
394
|
+
return result;
|
|
395
|
+
}
|
|
396
|
+
/**
|
|
397
|
+
* Add role markers to text
|
|
398
|
+
*/
|
|
399
|
+
addRoleMarkers(atoms) {
|
|
400
|
+
return atoms
|
|
401
|
+
.map(atom => {
|
|
402
|
+
if (atom.role) {
|
|
403
|
+
return `${atom.role}: ${atom.text}`;
|
|
404
|
+
}
|
|
405
|
+
return atom.text;
|
|
406
|
+
})
|
|
407
|
+
.join('\n\n');
|
|
408
|
+
}
|
|
409
|
+
/**
|
|
410
|
+
* Generate simple header (synchronous version)
|
|
411
|
+
*/
|
|
412
|
+
generateSimpleHeader(text) {
|
|
413
|
+
// Extract first sentence as summary
|
|
414
|
+
const sentences = this.splitIntoSentences(text);
|
|
415
|
+
const summary = sentences[0]?.substring(0, 200) || '';
|
|
416
|
+
// Extract potential facts (dates, numbers, entities)
|
|
417
|
+
const facts = this.extractFacts(text);
|
|
418
|
+
return { summary, facts };
|
|
419
|
+
}
|
|
420
|
+
/**
|
|
421
|
+
* Extract facts from text (dates, numbers, key entities)
|
|
422
|
+
*/
|
|
423
|
+
extractFacts(text) {
|
|
424
|
+
const facts = [];
|
|
425
|
+
// Extract dates
|
|
426
|
+
const dateRegex = /\b\d{4}-\d{2}-\d{2}\b|\b\d{1,2}\/\d{1,2}\/\d{2,4}\b/g;
|
|
427
|
+
const dates = text.match(dateRegex);
|
|
428
|
+
if (dates)
|
|
429
|
+
facts.push(...dates);
|
|
430
|
+
// Extract IDs or references
|
|
431
|
+
const idRegex = /\b[A-Z]{2,}-\d+\b|\b[a-f0-9]{8}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{4}-[a-f0-9]{12}\b/gi;
|
|
432
|
+
const ids = text.match(idRegex);
|
|
433
|
+
if (ids)
|
|
434
|
+
facts.push(...ids.slice(0, 5)); // Limit to 5
|
|
435
|
+
// Extract key numbers with context
|
|
436
|
+
const numberRegex = /\$\d+(?:,\d{3})*(?:\.\d{2})?|\b\d+(?:,\d{3})*(?:\.\d+)?\s*(?:days?|hours?|minutes?|users?|items?|percent|%)/gi;
|
|
437
|
+
const numbers = text.match(numberRegex);
|
|
438
|
+
if (numbers)
|
|
439
|
+
facts.push(...numbers.slice(0, 5));
|
|
440
|
+
return [...new Set(facts)].slice(0, 10); // Max 10 unique facts
|
|
441
|
+
}
|
|
442
|
+
/**
|
|
443
|
+
* Strip noise from text
|
|
444
|
+
*/
|
|
445
|
+
stripNoise(text, customPatterns) {
|
|
446
|
+
let cleaned = text;
|
|
447
|
+
// Default noise patterns
|
|
448
|
+
const defaultPatterns = [
|
|
449
|
+
/\b(thanks|thank you|lol|haha|hmm|uh|um)\b/gi,
|
|
450
|
+
/^[\s\-_]+$/gm, // Empty lines with just whitespace/dashes
|
|
451
|
+
/\[signature\].*$/gi,
|
|
452
|
+
/^--+\s*$/gm, // Signature separators
|
|
453
|
+
];
|
|
454
|
+
const patterns = [...defaultPatterns, ...(customPatterns || [])];
|
|
455
|
+
for (const pattern of patterns) {
|
|
456
|
+
cleaned = cleaned.replace(pattern, '');
|
|
457
|
+
}
|
|
458
|
+
return cleaned.replace(/\n{3,}/g, '\n\n').trim();
|
|
459
|
+
}
|
|
460
|
+
/**
|
|
461
|
+
* Compute lexical similarity (Jaccard similarity on words)
|
|
462
|
+
*/
|
|
463
|
+
computeLexicalSimilarity(text1, text2) {
|
|
464
|
+
const words1 = new Set(text1.toLowerCase()
|
|
465
|
+
.replace(/[^\w\s]/g, '')
|
|
466
|
+
.split(/\s+/)
|
|
467
|
+
.filter(w => w.length > 2));
|
|
468
|
+
const words2 = new Set(text2.toLowerCase()
|
|
469
|
+
.replace(/[^\w\s]/g, '')
|
|
470
|
+
.split(/\s+/)
|
|
471
|
+
.filter(w => w.length > 2));
|
|
472
|
+
if (words1.size === 0 || words2.size === 0)
|
|
473
|
+
return 0;
|
|
474
|
+
const intersection = new Set([...words1].filter(w => words2.has(w)));
|
|
475
|
+
const union = new Set([...words1, ...words2]);
|
|
476
|
+
return intersection.size / union.size;
|
|
477
|
+
}
|
|
478
|
+
/**
|
|
479
|
+
* Split text into sentences
|
|
480
|
+
*/
|
|
481
|
+
splitIntoSentences(text) {
|
|
482
|
+
// Simple sentence splitter
|
|
483
|
+
return text
|
|
484
|
+
.replace(/([.!?]+)\s+/g, '$1\n')
|
|
485
|
+
.split('\n')
|
|
486
|
+
.map(s => s.trim())
|
|
487
|
+
.filter(s => s.length > 0);
|
|
488
|
+
}
|
|
489
|
+
/**
|
|
490
|
+
* Estimate token count using accurate tokenizer
|
|
491
|
+
*/
|
|
492
|
+
estimateTokens(text) {
|
|
493
|
+
return (0, tokenizer_1.countTokens)(text);
|
|
494
|
+
}
|
|
495
|
+
/**
|
|
496
|
+
* Split an oversized atom (turn, paragraph, or sentence)
|
|
497
|
+
*/
|
|
498
|
+
splitOversizedAtom(atom, hardLimit) {
|
|
499
|
+
const chunks = [];
|
|
500
|
+
const safeLimit = Math.floor(hardLimit * 0.9);
|
|
501
|
+
// Split by sentences if it's a paragraph or turn
|
|
502
|
+
const sentences = this.splitIntoSentences(atom.text);
|
|
503
|
+
let currentText = '';
|
|
504
|
+
let currentStart = atom.startChar;
|
|
505
|
+
let currentTokens = 0;
|
|
506
|
+
let textOffset = 0; // Track position in original text
|
|
507
|
+
for (const sentence of sentences) {
|
|
508
|
+
const sentTokens = (0, tokenizer_1.countTokens)(sentence);
|
|
509
|
+
if (currentTokens + sentTokens > safeLimit && currentText) {
|
|
510
|
+
chunks.push({
|
|
511
|
+
text: currentText.trim(),
|
|
512
|
+
startChar: currentStart,
|
|
513
|
+
endChar: atom.startChar + textOffset,
|
|
514
|
+
});
|
|
515
|
+
currentStart = atom.startChar + textOffset;
|
|
516
|
+
currentText = sentence;
|
|
517
|
+
currentTokens = sentTokens;
|
|
518
|
+
textOffset += currentText.length + 1; // +1 for space
|
|
519
|
+
}
|
|
520
|
+
else {
|
|
521
|
+
currentText += (currentText ? ' ' : '') + sentence;
|
|
522
|
+
currentTokens += sentTokens;
|
|
523
|
+
textOffset += sentence.length + (currentText.includes(sentence + ' ') ? 1 : 0);
|
|
524
|
+
}
|
|
525
|
+
}
|
|
526
|
+
if (currentText) {
|
|
527
|
+
chunks.push({
|
|
528
|
+
text: currentText.trim(),
|
|
529
|
+
startChar: currentStart,
|
|
530
|
+
endChar: atom.endChar,
|
|
531
|
+
});
|
|
532
|
+
}
|
|
533
|
+
return chunks.length > 0 ? chunks : [{ text: atom.text, startChar: atom.startChar, endChar: atom.endChar }];
|
|
534
|
+
}
|
|
535
|
+
/**
|
|
536
|
+
* Split an oversized chunk using simple recursive splitting
|
|
537
|
+
*/
|
|
538
|
+
splitOversizedChunk(text, maxTokens, hardLimit) {
|
|
539
|
+
const chunks = [];
|
|
540
|
+
// Try splitting by paragraphs first
|
|
541
|
+
const paragraphs = text.split(/\n\s*\n/).filter(p => p.trim());
|
|
542
|
+
let currentChunk = '';
|
|
543
|
+
let currentTokens = 0;
|
|
544
|
+
for (const para of paragraphs) {
|
|
545
|
+
const paraTokens = (0, tokenizer_1.countTokens)(para);
|
|
546
|
+
if (currentTokens + paraTokens > maxTokens && currentChunk) {
|
|
547
|
+
chunks.push(currentChunk.trim());
|
|
548
|
+
currentChunk = para;
|
|
549
|
+
currentTokens = paraTokens;
|
|
550
|
+
}
|
|
551
|
+
else {
|
|
552
|
+
currentChunk += (currentChunk ? '\n\n' : '') + para;
|
|
553
|
+
currentTokens += paraTokens;
|
|
554
|
+
}
|
|
555
|
+
}
|
|
556
|
+
if (currentChunk) {
|
|
557
|
+
// If remaining chunk still exceeds limit, split by sentences
|
|
558
|
+
const finalTokens = (0, tokenizer_1.countTokens)(currentChunk);
|
|
559
|
+
if (finalTokens > hardLimit) {
|
|
560
|
+
const sentences = this.splitIntoSentences(currentChunk);
|
|
561
|
+
let sentenceChunk = '';
|
|
562
|
+
let sentenceTokens = 0;
|
|
563
|
+
for (const sentence of sentences) {
|
|
564
|
+
const sentTokens = (0, tokenizer_1.countTokens)(sentence);
|
|
565
|
+
if (sentenceTokens + sentTokens > maxTokens && sentenceChunk) {
|
|
566
|
+
chunks.push(sentenceChunk.trim());
|
|
567
|
+
sentenceChunk = sentence;
|
|
568
|
+
sentenceTokens = sentTokens;
|
|
569
|
+
}
|
|
570
|
+
else {
|
|
571
|
+
sentenceChunk += (sentenceChunk ? ' ' : '') + sentence;
|
|
572
|
+
sentenceTokens += sentTokens;
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
if (sentenceChunk) {
|
|
576
|
+
chunks.push(sentenceChunk.trim());
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
else {
|
|
580
|
+
chunks.push(currentChunk.trim());
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
return chunks.filter(c => c.length > 0);
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
exports.SemanticChunkingStrategy = SemanticChunkingStrategy;
|
|
587
|
+
//# sourceMappingURL=semantic.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"semantic.js","sourceRoot":"","sources":["../../../../src/chunking/strategies/semantic.ts"],"names":[],"mappings":";;;AAAA,qEAAqE;AACrE,+BAAoC;AAGpC,kDAAiD;AAyDjD,MAAa,wBAAwB;IACnC,OAAO;QACL,OAAO,UAAU,CAAC;IACpB,CAAC;IAED,KAAK,CAAC,IAAY,EAAE,UAAkB,EAAE,OAAwB;QAC9D,MAAM,IAAI,GAAG,OAAkC,CAAC;QAEhD,+CAA+C;QAC/C,MAAM,UAAU,GAAI,IAAY,CAAC,UAAU,IAAI,IAAI,CAAC;QACpD,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC;QAE/C,kEAAkE;QAClE,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,IAAI,GAAG,EAAE,UAAU,CAAC,CAAC;QAC9D,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,GAAG,CAAC,YAAY,EAAE,UAAU,CAAC,EAAE,UAAU,CAAC,CAAC;QAC7F,MAAM,mBAAmB,GAAG,IAAI,CAAC,mBAAmB,IAAI,IAAI,CAAC;QAC7D,MAAM,qBAAqB,GAAG,IAAI,CAAC,qBAAqB,IAAI,IAAI,CAAC,CAAC,MAAM;QACxE,MAAM,YAAY,GAAG,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC;QAC/C,MAAM,gBAAgB,GAAG,IAAI,CAAC,gBAAgB,IAAI,CAAC,CAAC;QACpD,MAAM,eAAe,GAAG,IAAI,CAAC,eAAe,IAAI,KAAK,CAAC;QACtD,MAAM,UAAU,GAAG,IAAI,CAAC,UAAU,IAAI,KAAK,CAAC;QAC5C,MAAM,cAAc,GAAG,IAAI,CAAC,cAAc,IAAI,IAAI,CAAC;QAEnD,4CAA4C;QAC5C,MAAM,WAAW,GAAG,IAAI,CAAC,WAAW,IAAI,IAAI,CAAC,iBAAiB,CAAC,IAAI,CAAC,CAAC;QAErE,iCAAiC;QACjC,MAAM,KAAK,GAAG,IAAI,CAAC,UAAU,CAAC,IAAI,EAAE,WAAW,EAAE,UAAU,EAAE,IAAI,CAAC,aAAa,CAAC,CAAC;QAEjF,IAAI,KAAK,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACvB,OAAO,EAAE,CAAC;QACZ,CAAC;QAED,mEAAmE;QACnE,MAAM,MAAM,GAAG,IAAI,CAAC,aAAa,CAC/B,KAAK,EACL,SAAS,EACT,SAAS,EACT,mBAAmB,EACnB,gBAAgB,EAChB,IAAI,CAAC,iBAAiB,CACvB,CAAC;QAEF,8BAA8B;QAC9B,MAAM,iBAAiB,GAAG,YAAY;YACpC,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,MAAM,EAAE,qBAAqB,EAAE,IAAI,CAAC,iBAAiB,CAAC;YAC/E,CAAC,CAAC,MAAM,CAAC;QAEX,2EAA2E;QAC3E,MAAM,OAAO,GAAY,EAAE,CAAC;QAC5B,IAAI,iBAAiB,GAAG,CAAC,CAAC;QAE1B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,iBAAiB,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YAClD,MAAM,KAAK,GAAG,iBAAiB,CAAC,CAAC,CAAC,CAAC;YACnC,IAAI,SAAS,GAAG,KAAK,CAAC,IAAI,CAAC;YAE3B,6BAA6B;YAC7B,IAAI,cAAc,IAAI,KAAK,CAAC,KAAK,EAAE,CAAC;gBAClC,SAAS,GAAG,IAAI,CAAC,cAAc,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;YAC/C,CAAC;YAED,8CAA8C;YAC9C,MAAM,WAAW,GAAG,IAAI,CAAC,cAAc,CAAC,SAAS,CAAC,CAAC;YAEnD,IAAI,WAAW,GAAG,SAAS,EAAE,CAAC;gBAC5B,uEAAuE;gBACvE,kDAAkD;gBAClD,MAAM,aAAa,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC,CAAC,qBAAqB;gBAExE,iDAAiD;gBACjD,MAAM,SAAS,GAAG,IAAI,CAAC,mBAAmB,CAAC,SAAS,EAAE,aAAa,EAAE,SAAS,CAAC,CAAC;gBAEhF,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;oBAC1C,MAAM,YAAY,GAAG,SAAS,CAAC,CAAC,CAAC,CAAC;oBAElC,MAAM,MAAM,GAA4B,eAAe;wBACrD,CAAC,CAAC,IAAI,CAAC,oBAAoB,CAAC,YAAY,CAAC;wBACzC,CAAC,CAAC,SAAS,CAAC;oBAEd,MAAM,QAAQ,GAAQ;wBACpB,UAAU;wBACV,UAAU,EAAE,iBAAiB,EAAE;wBAC/B,WAAW,EAAE,iBAAiB,CAAC,MAAM,GAAG,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,wBAAwB;wBACtF,SAAS,EAAE,KAAK,CAAC,SAAS,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAC;wBAChF,OAAO,EAAE,KAAK,CAAC,SAAS,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC,MAAM;wBACpE,MAAM;wBACN,SAAS,EAAE,KAAK,CAAC,SAAS;wBAC1B,eAAe,EAAE,KAAK,CAAC,UAAU;wBACjC,KAAK,EAAE,KAAK,CAAC,KAAK;wBAClB,WAAW,EAAE,IAAI;wBACjB,iBAAiB,EAAE,CAAC;wBACpB,GAAG,IAAI,CAAC,QAAQ;qBACjB,CAAC;oBAEF,OAAO,CAAC,IAAI,CAAC;wBACX,EAAE,EAAE,IAAA,SAAM,GAAE;wBACZ,IAAI,EAAE,YAAY;wBAClB,QAAQ;qBACT,CAAC,CAAC;gBACL,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,gFAAgF;gBAChF,yEAAyE;gBACzE,MAAM,MAAM,GAA4B,eAAe;oBACrD,CAAC,CAAC,IAAI,CAAC,oBAAoB,CAAC,SAAS,CAAC;oBACtC,CAAC,CAAC,SAAS,CAAC;gBAEd,MAAM,QAAQ,GAAQ;oBACpB,UAAU;oBACV,UAAU,EAAE,iBAAiB,EAAE;oBAC/B,WAAW,EAAE,iBAAiB,CAAC,MAAM,EAAE,wBAAwB;oBAC/D,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,OAAO,EAAE,KAAK,CAAC,OAAO;oBACtB,MAAM;oBACN,SAAS,EAAE,KAAK,CAAC,SAAS;oBAC1B,eAAe,EAAE,KAAK,CAAC,UAAU;oBACjC,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,GAAG,IAAI,CAAC,QAAQ;iBACjB,CAAC;gBAEF,OAAO,CAAC,IAAI,CAAC;oBACX,EAAE,EAAE,IAAA,SAAM,GAAE;oBACZ,IAAI,EAAE,SAAS;oBACf,QAAQ;iBACT,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,oCAAoC;QACpC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE;YACtB,KAAK,CAAC,QAAQ,CAAC,WAAW,GAAG,OAAO,CAAC,MAAM,CAAC;QAC9C,CAAC,CAAC,CAAC;QAEH,OAAO,OAAO,CAAC;IACjB,CAAC;IAED;;OAEG;IACK,iBAAiB,CAAC,IAAY;QACpC,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAC/B,IAAI,mBAAmB,GAAG,CAAC,CAAC;QAE5B,gDAAgD;QAChD,MAAM,WAAW,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QAEvC,KAAK,MAAM,IAAI,IAAI,WAAW,EAAE,CAAC;YAC/B,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC5B,yCAAyC;YACzC,IACE,wDAAwD,CAAC,IAAI,CAAC,OAAO,CAAC,EACtE,CAAC;gBACD,mBAAmB,EAAE,CAAC;YACxB,CAAC;QACH,CAAC;QAED,wEAAwE;QACxE,MAAM,SAAS,GAAG,WAAW,CAAC,MAAM,GAAG,GAAG,CAAC;QAC3C,OAAO,mBAAmB,IAAI,SAAS,CAAC,CAAC,CAAC,cAAc,CAAC,CAAC,CAAC,MAAM,CAAC;IACpE,CAAC;IAED;;OAEG;IACK,UAAU,CAChB,IAAY,EACZ,WAAoC,EACpC,UAAmB,EACnB,aAAwB;QAExB,IAAI,WAAW,KAAK,cAAc,EAAE,CAAC;YACnC,OAAO,IAAI,CAAC,mBAAmB,CAAC,IAAI,EAAE,UAAU,EAAE,aAAa,CAAC,CAAC;QACnE,CAAC;aAAM,CAAC;YACN,OAAO,IAAI,CAAC,WAAW,CAAC,IAAI,EAAE,UAAU,EAAE,aAAa,CAAC,CAAC;QAC3D,CAAC;IACH,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,IAAY,EAAE,UAAmB,EAAE,aAAwB;QACrF,MAAM,KAAK,GAAW,EAAE,CAAC;QACzB,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAE/B,IAAI,WAAW,GAAG,EAAE,CAAC;QACrB,IAAI,WAAkD,CAAC;QACvD,IAAI,SAAS,GAAG,CAAC,CAAC;QAClB,IAAI,YAAY,GAAG,CAAC,CAAC;QAErB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;YACzB,MAAM,OAAO,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAE5B,sBAAsB;YACtB,IAAI,IAA2C,CAAC;YAChD,IAAI,OAAO,GAAG,OAAO,CAAC;YAEtB,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,EAAE,CAAC;gBAC5D,IAAI,GAAG,MAAM,CAAC;gBACd,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACtD,CAAC;iBAAM,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,QAAQ,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;gBACxG,IAAI,GAAG,OAAO,CAAC;gBACf,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,0BAA0B,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACnE,CAAC;iBAAM,IAAI,OAAO,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,OAAO,CAAC,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;gBACnE,IAAI,GAAG,MAAM,CAAC;gBACd,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,aAAa,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAC;YACtD,CAAC;YAED,kEAAkE;YAClE,IAAI,IAAI,IAAI,WAAW,EAAE,CAAC;gBACxB,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC;gBACvF,IAAI,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;oBACnB,KAAK,CAAC,IAAI,CAAC;wBACT,IAAI,EAAE,OAAO;wBACb,SAAS,EAAE,YAAY;wBACvB,OAAO,EAAE,SAAS;wBAClB,IAAI,EAAE,MAAM;wBACZ,IAAI,EAAE,WAAW;qBAClB,CAAC,CAAC;gBACL,CAAC;gBACD,WAAW,GAAG,OAAO,CAAC;gBACtB,WAAW,GAAG,IAAI,CAAC;gBACnB,YAAY,GAAG,SAAS,CAAC;YAC3B,CAAC;iBAAM,IAAI,IAAI,EAAE,CAAC;gBAChB,iBAAiB;gBACjB,WAAW,GAAG,OAAO,CAAC;gBACtB,WAAW,GAAG,IAAI,CAAC;gBACnB,YAAY,GAAG,SAAS,CAAC;YAC3B,CAAC;iBAAM,CAAC;gBACN,wBAAwB;gBACxB,WAAW,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,OAAO,CAAC;YACrD,CAAC;YAED,SAAS,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,iBAAiB;QACjD,CAAC;QAED,iBAAiB;QACjB,IAAI,WAAW,CAAC,IAAI,EAAE,EAAE,CAAC;YACvB,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC,WAAW,CAAC;YACvF,IAAI,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;gBACnB,KAAK,CAAC,IAAI,CAAC;oBACT,IAAI,EAAE,OAAO;oBACb,SAAS,EAAE,YAAY;oBACvB,OAAO,EAAE,SAAS;oBAClB,IAAI,EAAE,MAAM;oBACZ,IAAI,EAAE,WAAW;iBAClB,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,IAAY,EAAE,UAAmB,EAAE,aAAwB;QAC7E,MAAM,KAAK,GAAW,EAAE,CAAC;QAEzB,wBAAwB;QACxB,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QACzC,IAAI,SAAS,GAAG,CAAC,CAAC;QAElB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,EAAE,CAAC;YAC7B,IAAI,CAAC,QAAQ,EAAE,CAAC;gBACd,SAAS,IAAI,IAAI,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,cAAc;gBAC5C,SAAS;YACX,CAAC;YAED,iCAAiC;YACjC,MAAM,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YAEpD,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;gBACjC,MAAM,OAAO,GAAG,UAAU,CAAC,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,QAAQ,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC;gBACjF,IAAI,OAAO,CAAC,IAAI,EAAE,EAAE,CAAC;oBACnB,MAAM,OAAO,GAAG,SAAS,GAAG,QAAQ,CAAC,MAAM,CAAC;oBAC5C,KAAK,CAAC,IAAI,CAAC;wBACT,IAAI,EAAE,OAAO;wBACb,SAAS;wBACT,OAAO;wBACP,IAAI,EAAE,UAAU;qBACjB,CAAC,CAAC;oBACH,SAAS,GAAG,OAAO,CAAC;gBACtB,CAAC;qBAAM,CAAC;oBACN,SAAS,IAAI,QAAQ,CAAC,MAAM,CAAC;gBAC/B,CAAC;YACH,CAAC;YAED,SAAS,IAAI,CAAC,CAAC,CAAC,sBAAsB;QACxC,CAAC;QAED,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;OAEG;IACK,aAAa,CACnB,KAAa,EACb,SAAiB,EACjB,SAAiB,EACjB,mBAA2B,EAC3B,gBAAwB,EACxB,iBAAqC;QAUrC,MAAM,MAAM,GAAe,EAAE,CAAC;QAC9B,IAAI,YAAY,GAAW,EAAE,CAAC;QAC9B,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,gBAAgB,GAAe,EAAE,CAAC;QACtC,MAAM,QAAQ,GAAa,EAAE,CAAC;QAE9B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC;YACtB,MAAM,UAAU,GAAG,IAAI,CAAC,cAAc,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YAElD,2DAA2D;YAC3D,IAAI,UAAU,GAAG,SAAS,EAAE,CAAC;gBAC3B,sCAAsC;gBACtC,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC5B,MAAM,OAAO,GAAG,IAAA,SAAM,GAAE,CAAC;oBACzB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;oBACvB,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC;oBACvD,YAAY,GAAG,EAAE,CAAC;oBAClB,aAAa,GAAG,CAAC,CAAC;gBACpB,CAAC;gBAED,oDAAoD;gBACpD,0FAA0F;gBAC1F,MAAM,UAAU,GAAG,IAAI,CAAC,kBAAkB,CAAC,IAAI,EAAE,SAAS,CAAC,CAAC;gBAC5D,KAAK,MAAM,SAAS,IAAI,UAAU,EAAE,CAAC;oBACnC,MAAM,OAAO,GAAG,IAAA,SAAM,GAAE,CAAC;oBACzB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;oBACvB,MAAM,CAAC,IAAI,CAAC;wBACV,EAAE,EAAE,OAAO;wBACX,IAAI,EAAE,SAAS,CAAC,IAAI;wBACpB,SAAS,EAAE,SAAS,CAAC,SAAS;wBAC9B,OAAO,EAAE,SAAS,CAAC,OAAO;wBAC1B,KAAK,EAAE,CAAC,SAAS,CAAC;wBAClB,UAAU,EAAE,CAAC;wBACb,KAAK,EAAE,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,EAAE;wBACnC,SAAS,EAAE,EAAE;qBACd,CAAC,CAAC;gBACL,CAAC;gBACD,gBAAgB,GAAG,EAAE,CAAC;gBACtB,SAAS;YACX,CAAC;YAED,mBAAmB;YACnB,IAAI,aAAa,GAAG,UAAU,GAAG,SAAS,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtE,YAAY;gBACZ,MAAM,OAAO,GAAG,IAAA,SAAM,GAAE,CAAC;gBACzB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;gBACvB,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC;gBACvD,YAAY,GAAG,CAAC,IAAI,CAAC,CAAC;gBACtB,aAAa,GAAG,UAAU,CAAC;gBAC3B,gBAAgB,GAAG,EAAE,CAAC;gBACtB,SAAS;YACX,CAAC;YAED,0CAA0C;YAC1C,IAAI,aAAa,GAAG,UAAU,GAAG,SAAS,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACtE,0DAA0D;gBAC1D,MAAM,iBAAiB,GAAG,IAAI,CAAC,wBAAwB,CACrD,YAAY,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,EACvC,IAAI,CAAC,IAAI,CACV,CAAC;gBAEF,IAAI,iBAAiB,GAAG,mBAAmB,EAAE,CAAC;oBAC5C,MAAM,OAAO,GAAG,IAAA,SAAM,GAAE,CAAC;oBACzB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;oBACvB,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC;oBACvD,YAAY,GAAG,CAAC,IAAI,CAAC,CAAC;oBACtB,aAAa,GAAG,UAAU,CAAC;oBAC3B,gBAAgB,GAAG,EAAE,CAAC;oBACtB,SAAS;gBACX,CAAC;YACH,CAAC;YAED,4BAA4B;YAC5B,YAAY,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACxB,aAAa,IAAI,UAAU,CAAC;YAE5B,0DAA0D;YAC1D,IAAI,gBAAgB,CAAC,MAAM,IAAI,gBAAgB,EAAE,CAAC;gBAChD,gBAAgB,CAAC,KAAK,EAAE,CAAC;YAC3B,CAAC;QACH,CAAC;QAED,kBAAkB;QAClB,IAAI,YAAY,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC5B,MAAM,OAAO,GAAG,IAAA,SAAM,GAAE,CAAC;YACzB,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACvB,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,aAAa,CAAC,YAAY,EAAE,OAAO,CAAC,CAAC,CAAC;QACzD,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,aAAa,CAAC,KAAa,EAAE,EAAW;QAC9C,MAAM,IAAI,GAAG,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QAC/C,MAAM,SAAS,GAAG,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS,CAAC;QACrC,MAAM,OAAO,GAAG,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,OAAO,CAAC;QAChD,MAAM,KAAK,GAAG,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,CAAC,CAAC;QAEnE,0CAA0C;QAC1C,MAAM,UAAU,GAAG,IAAI,CAAC,mBAAmB,CAAC,KAAK,CAAC,CAAC;QAEnD,OAAO;YACL,EAAE;YACF,IAAI;YACJ,SAAS;YACT,OAAO;YACP,KAAK;YACL,UAAU;YACV,KAAK;YACL,SAAS,EAAE,EAAE;SACd,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,KAAa;QACvC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,CAAC,CAAC;QAE/B,IAAI,kBAAkB,GAAG,CAAC,CAAC;QAC3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACtC,MAAM,UAAU,GAAG,IAAI,CAAC,wBAAwB,CAAC,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YACnF,kBAAkB,IAAI,CAAC,CAAC,GAAG,UAAU,CAAC,CAAC;QACzC,CAAC;QAED,OAAO,kBAAkB,GAAG,CAAC,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACjD,CAAC;IAED;;OAEG;IACK,iBAAiB,CACvB,MAAa,EACb,cAAsB,EACtB,iBAAqC;QAErC,IAAI,MAAM,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,MAAM,CAAC;QAErC,MAAM,MAAM,GAAG,CAAC,GAAG,MAAM,CAAC,CAAC;QAE3B,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,MAAM,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;YACvC,MAAM,SAAS,GAAG,MAAM,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC;YAChC,MAAM,YAAY,GAAG,MAAM,CAAC,CAAC,CAAC,CAAC;YAE/B,yCAAyC;YACzC,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,MAAM,GAAG,cAAc,CAAC,CAAC;YACvE,MAAM,aAAa,GAAG,SAAS,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,WAAW,CAAC,CAAC;YAE1D,IAAI,aAAa,CAAC,MAAM,KAAK,CAAC;gBAAE,SAAS;YAEzC,4CAA4C;YAC5C,MAAM,WAAW,GAAG,aAAa,CAAC,GAAG,CAAC,CAAC,CAAO,EAAE,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;YACtE,MAAM,UAAU,GAAG,IAAI,CAAC,wBAAwB,CAC9C,WAAW,EACX,YAAY,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,IAAI,IAAI,EAAE,CAClC,CAAC;YAEF,iEAAiE;YACjE,IAAI,UAAU,GAAG,GAAG,EAAE,CAAC;gBACrB,YAAY,CAAC,IAAI,GAAG,WAAW,GAAG,SAAS,GAAG,YAAY,CAAC,IAAI,CAAC;gBAChE,YAAY,CAAC,SAAS,GAAG,YAAY,CAAC,SAAS,IAAI,EAAE,CAAC;gBACtD,IAAI,SAAS,CAAC,EAAE,EAAE,CAAC;oBACjB,YAAY,CAAC,SAAS,CAAC,IAAI,CAAC,SAAS,CAAC,EAAE,CAAC,CAAC;gBAC5C,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,KAAa;QAClC,OAAO,KAAK;aACT,GAAG,CAAC,IAAI,CAAC,EAAE;YACV,IAAI,IAAI,CAAC,IAAI,EAAE,CAAC;gBACd,OAAO,GAAG,IAAI,CAAC,IAAI,KAAK,IAAI,CAAC,IAAI,EAAE,CAAC;YACtC,CAAC;YACD,OAAO,IAAI,CAAC,IAAI,CAAC;QACnB,CAAC,CAAC;aACD,IAAI,CAAC,MAAM,CAAC,CAAC;IAClB,CAAC;IAED;;OAEG;IACK,oBAAoB,CAAC,IAAY;QACvC,oCAAoC;QACpC,MAAM,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,CAAC;QAChD,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,EAAE,GAAG,CAAC,IAAI,EAAE,CAAC;QAEtD,qDAAqD;QACrD,MAAM,KAAK,GAAG,IAAI,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC;QAEtC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,CAAC;IAC5B,CAAC;IAED;;OAEG;IACK,YAAY,CAAC,IAAY;QAC/B,MAAM,KAAK,GAAa,EAAE,CAAC;QAE3B,gBAAgB;QAChB,MAAM,SAAS,GAAG,sDAAsD,CAAC;QACzE,MAAM,KAAK,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC;QACpC,IAAI,KAAK;YAAE,KAAK,CAAC,IAAI,CAAC,GAAG,KAAK,CAAC,CAAC;QAEhC,4BAA4B;QAC5B,MAAM,OAAO,GAAG,sFAAsF,CAAC;QACvG,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;QAChC,IAAI,GAAG;YAAE,KAAK,CAAC,IAAI,CAAC,GAAG,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAC,aAAa;QAEtD,mCAAmC;QACnC,MAAM,WAAW,GAAG,+GAA+G,CAAC;QACpI,MAAM,OAAO,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QACxC,IAAI,OAAO;YAAE,KAAK,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;QAEhD,OAAO,CAAC,GAAG,IAAI,GAAG,CAAC,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,sBAAsB;IACjE,CAAC;IAED;;OAEG;IACK,UAAU,CAAC,IAAY,EAAE,cAAyB;QACxD,IAAI,OAAO,GAAG,IAAI,CAAC;QAEnB,yBAAyB;QACzB,MAAM,eAAe,GAAG;YACtB,6CAA6C;YAC7C,cAAc,EAAE,0CAA0C;YAC1D,oBAAoB;YACpB,YAAY,EAAE,uBAAuB;SACtC,CAAC;QAEF,MAAM,QAAQ,GAAG,CAAC,GAAG,eAAe,EAAE,GAAG,CAAC,cAAc,IAAI,EAAE,CAAC,CAAC,CAAC;QAEjE,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;YAC/B,OAAO,GAAG,OAAO,CAAC,OAAO,CAAC,OAAO,EAAE,EAAE,CAAC,CAAC;QACzC,CAAC;QAED,OAAO,OAAO,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IACnD,CAAC;IAED;;OAEG;IACK,wBAAwB,CAAC,KAAa,EAAE,KAAa;QAC3D,MAAM,MAAM,GAAG,IAAI,GAAG,CACpB,KAAK,CAAC,WAAW,EAAE;aAChB,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;aACvB,KAAK,CAAC,KAAK,CAAC;aACZ,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAC7B,CAAC;QAEF,MAAM,MAAM,GAAG,IAAI,GAAG,CACpB,KAAK,CAAC,WAAW,EAAE;aAChB,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC;aACvB,KAAK,CAAC,KAAK,CAAC;aACZ,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAC7B,CAAC;QAEF,IAAI,MAAM,CAAC,IAAI,KAAK,CAAC,IAAI,MAAM,CAAC,IAAI,KAAK,CAAC;YAAE,OAAO,CAAC,CAAC;QAErD,MAAM,YAAY,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,MAAM,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;QACrE,MAAM,KAAK,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,MAAM,EAAE,GAAG,MAAM,CAAC,CAAC,CAAC;QAE9C,OAAO,YAAY,CAAC,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;IACxC,CAAC;IAGD;;OAEG;IACK,kBAAkB,CAAC,IAAY;QACrC,2BAA2B;QAC3B,OAAO,IAAI;aACR,OAAO,CAAC,cAAc,EAAE,MAAM,CAAC;aAC/B,KAAK,CAAC,IAAI,CAAC;aACX,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC;aAClB,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC/B,CAAC;IAED;;OAEG;IACK,cAAc,CAAC,IAAY;QACjC,OAAO,IAAA,uBAAW,EAAC,IAAI,CAAC,CAAC;IAC3B,CAAC;IAED;;OAEG;IACK,kBAAkB,CAAC,IAAU,EAAE,SAAiB;QACtD,MAAM,MAAM,GAAgE,EAAE,CAAC;QAC/E,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,GAAG,GAAG,CAAC,CAAC;QAE9C,iDAAiD;QACjD,MAAM,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;QACrD,IAAI,WAAW,GAAG,EAAE,CAAC;QACrB,IAAI,YAAY,GAAG,IAAI,CAAC,SAAS,CAAC;QAClC,IAAI,aAAa,GAAG,CAAC,CAAC;QACtB,IAAI,UAAU,GAAG,CAAC,CAAC,CAAC,kCAAkC;QAEtD,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;YACjC,MAAM,UAAU,GAAG,IAAA,uBAAW,EAAC,QAAQ,CAAC,CAAC;YAEzC,IAAI,aAAa,GAAG,UAAU,GAAG,SAAS,IAAI,WAAW,EAAE,CAAC;gBAC1D,MAAM,CAAC,IAAI,CAAC;oBACV,IAAI,EAAE,WAAW,CAAC,IAAI,EAAE;oBACxB,SAAS,EAAE,YAAY;oBACvB,OAAO,EAAE,IAAI,CAAC,SAAS,GAAG,UAAU;iBACrC,CAAC,CAAC;gBACH,YAAY,GAAG,IAAI,CAAC,SAAS,GAAG,UAAU,CAAC;gBAC3C,WAAW,GAAG,QAAQ,CAAC;gBACvB,aAAa,GAAG,UAAU,CAAC;gBAC3B,UAAU,IAAI,WAAW,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,eAAe;YACvD,CAAC;iBAAM,CAAC;gBACN,WAAW,IAAI,CAAC,WAAW,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC;gBACnD,aAAa,IAAI,UAAU,CAAC;gBAC5B,UAAU,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,WAAW,CAAC,QAAQ,CAAC,QAAQ,GAAG,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;YACjF,CAAC;QACH,CAAC;QAED,IAAI,WAAW,EAAE,CAAC;YAChB,MAAM,CAAC,IAAI,CAAC;gBACV,IAAI,EAAE,WAAW,CAAC,IAAI,EAAE;gBACxB,SAAS,EAAE,YAAY;gBACvB,OAAO,EAAE,IAAI,CAAC,OAAO;aACtB,CAAC,CAAC;QACL,CAAC;QAED,OAAO,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,IAAI,CAAC,IAAI,EAAE,SAAS,EAAE,IAAI,CAAC,SAAS,EAAE,OAAO,EAAE,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC;IAC9G,CAAC;IAED;;OAEG;IACK,mBAAmB,CAAC,IAAY,EAAE,SAAiB,EAAE,SAAiB;QAC5E,MAAM,MAAM,GAAa,EAAE,CAAC;QAE5B,oCAAoC;QACpC,MAAM,UAAU,GAAG,IAAI,CAAC,KAAK,CAAC,SAAS,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC;QAC/D,IAAI,YAAY,GAAG,EAAE,CAAC;QACtB,IAAI,aAAa,GAAG,CAAC,CAAC;QAEtB,KAAK,MAAM,IAAI,IAAI,UAAU,EAAE,CAAC;YAC9B,MAAM,UAAU,GAAG,IAAA,uBAAW,EAAC,IAAI,CAAC,CAAC;YAErC,IAAI,aAAa,GAAG,UAAU,GAAG,SAAS,IAAI,YAAY,EAAE,CAAC;gBAC3D,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;gBACjC,YAAY,GAAG,IAAI,CAAC;gBACpB,aAAa,GAAG,UAAU,CAAC;YAC7B,CAAC;iBAAM,CAAC;gBACN,YAAY,IAAI,CAAC,YAAY,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,IAAI,CAAC;gBACpD,aAAa,IAAI,UAAU,CAAC;YAC9B,CAAC;QACH,CAAC;QAED,IAAI,YAAY,EAAE,CAAC;YACjB,6DAA6D;YAC7D,MAAM,WAAW,GAAG,IAAA,uBAAW,EAAC,YAAY,CAAC,CAAC;YAC9C,IAAI,WAAW,GAAG,SAAS,EAAE,CAAC;gBAC5B,MAAM,SAAS,GAAG,IAAI,CAAC,kBAAkB,CAAC,YAAY,CAAC,CAAC;gBACxD,IAAI,aAAa,GAAG,EAAE,CAAC;gBACvB,IAAI,cAAc,GAAG,CAAC,CAAC;gBAEvB,KAAK,MAAM,QAAQ,IAAI,SAAS,EAAE,CAAC;oBACjC,MAAM,UAAU,GAAG,IAAA,uBAAW,EAAC,QAAQ,CAAC,CAAC;oBAEzC,IAAI,cAAc,GAAG,UAAU,GAAG,SAAS,IAAI,aAAa,EAAE,CAAC;wBAC7D,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC;wBAClC,aAAa,GAAG,QAAQ,CAAC;wBACzB,cAAc,GAAG,UAAU,CAAC;oBAC9B,CAAC;yBAAM,CAAC;wBACN,aAAa,IAAI,CAAC,aAAa,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,GAAG,QAAQ,CAAC;wBACvD,cAAc,IAAI,UAAU,CAAC;oBAC/B,CAAC;gBACH,CAAC;gBAED,IAAI,aAAa,EAAE,CAAC;oBAClB,MAAM,CAAC,IAAI,CAAC,aAAa,CAAC,IAAI,EAAE,CAAC,CAAC;gBACpC,CAAC;YACH,CAAC;iBAAM,CAAC;gBACN,MAAM,CAAC,IAAI,CAAC,YAAY,CAAC,IAAI,EAAE,CAAC,CAAC;YACnC,CAAC;QACH,CAAC;QAED,OAAO,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAC1C,CAAC;CACF;AApsBD,4DAosBC"}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { Chunk, ChunkingOptions, ChunkingStrategy } from '../types';
|
|
2
|
+
export declare class SentenceChunkingStrategy implements ChunkingStrategy {
|
|
3
|
+
getName(): string;
|
|
4
|
+
chunk(text: string, documentId: string, options: ChunkingOptions): Chunk[];
|
|
5
|
+
private splitIntoSentences;
|
|
6
|
+
}
|
|
7
|
+
//# sourceMappingURL=sentence.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sentence.d.ts","sourceRoot":"","sources":["../../../../src/chunking/strategies/sentence.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,KAAK,EAAE,eAAe,EAAE,gBAAgB,EAAE,MAAM,UAAU,CAAC;AAGzE,qBAAa,wBAAyB,YAAW,gBAAgB;IAC/D,OAAO,IAAI,MAAM;IAIjB,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,eAAe,GAAG,KAAK,EAAE;IA8E1E,OAAO,CAAC,kBAAkB;CAwC3B"}
|