cccmemory 1.9.0 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +96 -50
- package/dist/chunking/ChunkingConfig.d.ts +63 -0
- package/dist/chunking/ChunkingConfig.d.ts.map +1 -0
- package/dist/chunking/ChunkingConfig.js +44 -0
- package/dist/chunking/ChunkingConfig.js.map +1 -0
- package/dist/chunking/TextChunker.d.ts +59 -0
- package/dist/chunking/TextChunker.d.ts.map +1 -0
- package/dist/chunking/TextChunker.js +130 -0
- package/dist/chunking/TextChunker.js.map +1 -0
- package/dist/chunking/index.d.ts +9 -0
- package/dist/chunking/index.d.ts.map +1 -0
- package/dist/chunking/index.js +8 -0
- package/dist/chunking/index.js.map +1 -0
- package/dist/chunking/strategies/SentenceChunker.d.ts +19 -0
- package/dist/chunking/strategies/SentenceChunker.d.ts.map +1 -0
- package/dist/chunking/strategies/SentenceChunker.js +251 -0
- package/dist/chunking/strategies/SentenceChunker.js.map +1 -0
- package/dist/chunking/strategies/SlidingWindowChunker.d.ts +10 -0
- package/dist/chunking/strategies/SlidingWindowChunker.d.ts.map +1 -0
- package/dist/chunking/strategies/SlidingWindowChunker.js +133 -0
- package/dist/chunking/strategies/SlidingWindowChunker.js.map +1 -0
- package/dist/embeddings/VectorStore.d.ts +78 -3
- package/dist/embeddings/VectorStore.d.ts.map +1 -1
- package/dist/embeddings/VectorStore.js +352 -21
- package/dist/embeddings/VectorStore.js.map +1 -1
- package/dist/mcp-server.d.ts +5 -0
- package/dist/mcp-server.d.ts.map +1 -1
- package/dist/mcp-server.js +79 -11
- package/dist/mcp-server.js.map +1 -1
- package/dist/parsers/ExtractionValidator.d.ts +84 -0
- package/dist/parsers/ExtractionValidator.d.ts.map +1 -0
- package/dist/parsers/ExtractionValidator.js +296 -0
- package/dist/parsers/ExtractionValidator.js.map +1 -0
- package/dist/search/HybridReranker.d.ts +80 -0
- package/dist/search/HybridReranker.d.ts.map +1 -0
- package/dist/search/HybridReranker.js +146 -0
- package/dist/search/HybridReranker.js.map +1 -0
- package/dist/search/QueryExpander.d.ts +53 -0
- package/dist/search/QueryExpander.d.ts.map +1 -0
- package/dist/search/QueryExpander.js +178 -0
- package/dist/search/QueryExpander.js.map +1 -0
- package/dist/search/ResultAggregator.d.ts +70 -0
- package/dist/search/ResultAggregator.d.ts.map +1 -0
- package/dist/search/ResultAggregator.js +155 -0
- package/dist/search/ResultAggregator.js.map +1 -0
- package/dist/search/SemanticSearch.d.ts +25 -1
- package/dist/search/SemanticSearch.d.ts.map +1 -1
- package/dist/search/SemanticSearch.js +264 -53
- package/dist/search/SemanticSearch.js.map +1 -1
- package/dist/search/SnippetGenerator.d.ts +81 -0
- package/dist/search/SnippetGenerator.d.ts.map +1 -0
- package/dist/search/SnippetGenerator.js +268 -0
- package/dist/search/SnippetGenerator.js.map +1 -0
- package/dist/search/index.d.ts +15 -0
- package/dist/search/index.d.ts.map +1 -0
- package/dist/search/index.js +10 -0
- package/dist/search/index.js.map +1 -0
- package/dist/storage/migrations.d.ts +3 -2
- package/dist/storage/migrations.d.ts.map +1 -1
- package/dist/storage/migrations.js +235 -13
- package/dist/storage/migrations.js.map +1 -1
- package/dist/tools/ToolDefinitions.d.ts +786 -0
- package/dist/tools/ToolDefinitions.d.ts.map +1 -1
- package/dist/tools/ToolDefinitions.js +758 -1
- package/dist/tools/ToolDefinitions.js.map +1 -1
- package/dist/tools/ToolHandlers.d.ts +96 -21
- package/dist/tools/ToolHandlers.d.ts.map +1 -1
- package/dist/tools/ToolHandlers.js +1926 -127
- package/dist/tools/ToolHandlers.js.map +1 -1
- package/dist/types/ToolTypes.d.ts +495 -18
- package/dist/types/ToolTypes.d.ts.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -4,76 +4,51 @@ An MCP server that gives Claude long-term memory by indexing conversation histor
|
|
|
4
4
|
|
|
5
5
|
---
|
|
6
6
|
|
|
7
|
-
##
|
|
7
|
+
## What's New in v2.0
|
|
8
8
|
|
|
9
|
-
|
|
9
|
+
Version 2.0 brings major improvements to search quality and accuracy:
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
- **Smart Chunking** - Long messages are now split at sentence boundaries, ensuring full content is searchable (previously truncated at 512 tokens)
|
|
12
|
+
- **Hybrid Search** - Combines semantic search with full-text search using Reciprocal Rank Fusion (RRF) for better ranking
|
|
13
|
+
- **Dynamic Thresholds** - Similarity thresholds adjust based on query length for better precision
|
|
14
|
+
- **Improved Snippets** - Search results highlight matching terms in context
|
|
15
|
+
- **Extraction Validation** - Reduces false positives in decision/mistake detection
|
|
16
|
+
- **Query Expansion** - Optional synonym expansion for broader recall (disabled by default)
|
|
12
17
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
```bash
|
|
16
|
-
npm uninstall -g claude-conversation-memory-mcp
|
|
17
|
-
```
|
|
18
|
-
|
|
19
|
-
### 2. Install the new package
|
|
20
|
-
|
|
21
|
-
```bash
|
|
22
|
-
npm install -g cccmemory
|
|
23
|
-
```
|
|
24
|
-
|
|
25
|
-
### 3. Update your MCP configuration
|
|
26
|
-
|
|
27
|
-
**Claude Desktop** (`~/Library/Application Support/Claude/claude_desktop_config.json`):
|
|
28
|
-
```json
|
|
29
|
-
{
|
|
30
|
-
"mcpServers": {
|
|
31
|
-
"cccmemory": {
|
|
32
|
-
"command": "npx",
|
|
33
|
-
"args": ["-y", "cccmemory"]
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
}
|
|
37
|
-
```
|
|
18
|
+
---
|
|
38
19
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
{
|
|
42
|
-
"mcpServers": {
|
|
43
|
-
"cccmemory": {
|
|
44
|
-
"command": "npx",
|
|
45
|
-
"args": ["-y", "cccmemory"]
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
}
|
|
49
|
-
```
|
|
20
|
+
<details>
|
|
21
|
+
<summary><strong>⚠️ Breaking Changes in v1.8.0</strong> (click to expand)</summary>
|
|
50
22
|
|
|
51
|
-
**
|
|
52
|
-
```toml
|
|
53
|
-
[mcp_servers.cccmemory]
|
|
54
|
-
command = "npx"
|
|
55
|
-
args = ["-y", "cccmemory"]
|
|
56
|
-
```
|
|
23
|
+
**This package was renamed from `claude-conversation-memory-mcp` to `cccmemory`.**
|
|
57
24
|
|
|
58
|
-
|
|
25
|
+
If upgrading from the old package:
|
|
59
26
|
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
27
|
+
1. Uninstall old package: `npm uninstall -g claude-conversation-memory-mcp`
|
|
28
|
+
2. Install new package: `npm install -g cccmemory`
|
|
29
|
+
3. Update MCP config to use `cccmemory` command
|
|
30
|
+
4. Database migration is automatic (`.claude-conversations-memory.db` → `.cccmemory.db`)
|
|
63
31
|
|
|
64
|
-
|
|
32
|
+
</details>
|
|
65
33
|
|
|
66
34
|
---
|
|
67
35
|
|
|
68
36
|
## Features
|
|
69
37
|
|
|
70
38
|
- **Search conversations** - Natural language search across your chat history
|
|
39
|
+
- **Smart chunking** - Long messages fully indexed without truncation
|
|
40
|
+
- **Hybrid search** - Combines vector + keyword search with RRF re-ranking
|
|
71
41
|
- **Track decisions** - Remember why you made technical choices
|
|
72
42
|
- **Prevent mistakes** - Learn from past errors
|
|
73
43
|
- **Git integration** - Link conversations to commits
|
|
74
44
|
- **Cross-project search** - Search across all your projects globally
|
|
75
45
|
- **Project migration** - Keep history when renaming/moving projects
|
|
76
46
|
- **Semantic search** - Uses Transformers.js embeddings (bundled, works offline)
|
|
47
|
+
- **Working memory** - Store and recall facts, decisions, and context across sessions
|
|
48
|
+
- **Session handoff** - Seamless context transfer between conversations
|
|
49
|
+
- **Tag management** - Organize memories, decisions, and patterns with tags
|
|
50
|
+
- **Memory quality** - Track confidence, importance, and verification status
|
|
51
|
+
- **Database maintenance** - Find duplicates, clean stale data, health reports
|
|
77
52
|
|
|
78
53
|
## Installation
|
|
79
54
|
|
|
@@ -249,6 +224,19 @@ Config:
|
|
|
249
224
|
Set `OPENAI_API_KEY` environment variable.
|
|
250
225
|
</details>
|
|
251
226
|
|
|
227
|
+
### Search Configuration (Optional)
|
|
228
|
+
|
|
229
|
+
Tune search behavior with environment variables:
|
|
230
|
+
|
|
231
|
+
| Variable | Default | Description |
|
|
232
|
+
|----------|---------|-------------|
|
|
233
|
+
| `CCCMEMORY_CHUNKING_ENABLED` | `true` | Enable smart chunking for long messages |
|
|
234
|
+
| `CCCMEMORY_CHUNK_SIZE` | `450` | Target chunk size in tokens |
|
|
235
|
+
| `CCCMEMORY_CHUNK_OVERLAP` | `0.1` | Overlap between chunks (0-1) |
|
|
236
|
+
| `CCCMEMORY_RERANK_ENABLED` | `true` | Enable hybrid re-ranking (vector + FTS) |
|
|
237
|
+
| `CCCMEMORY_RERANK_WEIGHT` | `0.7` | Vector weight in re-ranking (FTS gets 1-weight) |
|
|
238
|
+
| `CCCMEMORY_QUERY_EXPANSION` | `false` | Enable synonym expansion for queries |
|
|
239
|
+
|
|
252
240
|
## MCP Tools
|
|
253
241
|
|
|
254
242
|
### Indexing
|
|
@@ -290,6 +278,64 @@ Set `OPENAI_API_KEY` environment variable.
|
|
|
290
278
|
| `forget_by_topic` | Delete conversations by keyword |
|
|
291
279
|
| `generate_documentation` | Generate docs from local code scan + conversations |
|
|
292
280
|
|
|
281
|
+
### Working Memory
|
|
282
|
+
| Tool | Description |
|
|
283
|
+
|------|-------------|
|
|
284
|
+
| `remember` | Store a fact, decision, or context with optional TTL |
|
|
285
|
+
| `recall` | Retrieve a specific memory by key |
|
|
286
|
+
| `recall_relevant` | Semantic search across stored memories |
|
|
287
|
+
| `list_memory` | List all memories, optionally filtered by tags |
|
|
288
|
+
| `forget` | Remove a memory by key |
|
|
289
|
+
|
|
290
|
+
### Session Handoff
|
|
291
|
+
| Tool | Description |
|
|
292
|
+
|------|-------------|
|
|
293
|
+
| `prepare_handoff` | Create handoff document for session transition |
|
|
294
|
+
| `resume_from_handoff` | Resume work from a previous handoff |
|
|
295
|
+
| `list_handoffs` | List available handoff documents |
|
|
296
|
+
|
|
297
|
+
### Context Injection
|
|
298
|
+
| Tool | Description |
|
|
299
|
+
|------|-------------|
|
|
300
|
+
| `get_startup_context` | Get relevant context at conversation start |
|
|
301
|
+
| `inject_relevant_context` | Auto-inject context based on user message |
|
|
302
|
+
|
|
303
|
+
### Tag Management
|
|
304
|
+
| Tool | Description |
|
|
305
|
+
|------|-------------|
|
|
306
|
+
| `list_tags` | List all tags with usage statistics |
|
|
307
|
+
| `search_by_tags` | Find items by tag (memories, decisions, patterns) |
|
|
308
|
+
| `rename_tag` | Rename a tag across all items |
|
|
309
|
+
| `merge_tags` | Merge multiple tags into one |
|
|
310
|
+
| `delete_tag` | Delete a tag and unlink all items |
|
|
311
|
+
| `tag_item` | Add tags to an item |
|
|
312
|
+
| `untag_item` | Remove tags from an item |
|
|
313
|
+
|
|
314
|
+
### Memory Quality
|
|
315
|
+
| Tool | Description |
|
|
316
|
+
|------|-------------|
|
|
317
|
+
| `set_memory_confidence` | Set confidence level (uncertain/likely/confirmed/verified) |
|
|
318
|
+
| `set_memory_importance` | Set importance level (low/normal/high/critical) |
|
|
319
|
+
| `pin_memory` | Pin a memory to prevent cleanup |
|
|
320
|
+
| `archive_memory` | Archive a memory with optional reason |
|
|
321
|
+
| `unarchive_memory` | Restore an archived memory |
|
|
322
|
+
| `search_memory_by_quality` | Search memories by confidence/importance |
|
|
323
|
+
| `get_memory_stats` | Get memory statistics by confidence/importance |
|
|
324
|
+
|
|
325
|
+
### Maintenance
|
|
326
|
+
| Tool | Description |
|
|
327
|
+
|------|-------------|
|
|
328
|
+
| `get_storage_stats` | Database size and item counts |
|
|
329
|
+
| `find_stale_items` | Find items not accessed recently |
|
|
330
|
+
| `find_duplicates` | Find similar/duplicate items |
|
|
331
|
+
| `merge_duplicates` | Merge duplicate items |
|
|
332
|
+
| `cleanup_stale` | Archive or delete stale items |
|
|
333
|
+
| `vacuum_database` | Reclaim disk space |
|
|
334
|
+
| `cleanup_orphans` | Remove orphaned records |
|
|
335
|
+
| `get_health_report` | Overall database health check |
|
|
336
|
+
| `run_maintenance` | Run multiple maintenance tasks |
|
|
337
|
+
| `get_maintenance_history` | View past maintenance operations |
|
|
338
|
+
|
|
293
339
|
### Session IDs
|
|
294
340
|
|
|
295
341
|
`list_recent_sessions` returns **two identifiers**:
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunking Configuration Types
|
|
3
|
+
* Defines configuration options for text chunking strategies
|
|
4
|
+
*/
|
|
5
|
+
export type ChunkingStrategy = "sentence" | "sliding_window" | "paragraph";
|
|
6
|
+
export interface ChunkingConfig {
|
|
7
|
+
/** Enable or disable chunking (default: true) */
|
|
8
|
+
enabled: boolean;
|
|
9
|
+
/** Chunking strategy to use (default: "sentence") */
|
|
10
|
+
strategy: ChunkingStrategy;
|
|
11
|
+
/** Target chunk size in tokens (default: 450 for 512 limit with margin) */
|
|
12
|
+
chunkSize: number;
|
|
13
|
+
/** Overlap between chunks as a fraction (default: 0.1 = 10%) */
|
|
14
|
+
overlap: number;
|
|
15
|
+
/** Minimum chunk size in tokens - don't split smaller texts (default: 50) */
|
|
16
|
+
minChunkSize: number;
|
|
17
|
+
/** Maximum chunk size as hard limit (default: 500) */
|
|
18
|
+
maxChunkSize: number;
|
|
19
|
+
/** Characters per token estimate for prose (default: 4) */
|
|
20
|
+
charsPerTokenProse: number;
|
|
21
|
+
/** Characters per token estimate for code (default: 3.5) */
|
|
22
|
+
charsPerTokenCode: number;
|
|
23
|
+
}
|
|
24
|
+
export declare const DEFAULT_CHUNKING_CONFIG: ChunkingConfig;
|
|
25
|
+
/**
|
|
26
|
+
* Result of chunking a text
|
|
27
|
+
*/
|
|
28
|
+
export interface TextChunk {
|
|
29
|
+
/** The chunk content */
|
|
30
|
+
content: string;
|
|
31
|
+
/** Index of this chunk within the source text */
|
|
32
|
+
index: number;
|
|
33
|
+
/** Total number of chunks from the source text */
|
|
34
|
+
totalChunks: number;
|
|
35
|
+
/** Character offset where this chunk starts in the original text */
|
|
36
|
+
startOffset: number;
|
|
37
|
+
/** Character offset where this chunk ends in the original text */
|
|
38
|
+
endOffset: number;
|
|
39
|
+
/** Estimated token count for this chunk */
|
|
40
|
+
estimatedTokens: number;
|
|
41
|
+
/** Strategy used to create this chunk */
|
|
42
|
+
strategy: ChunkingStrategy;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Metadata about the chunking operation
|
|
46
|
+
*/
|
|
47
|
+
export interface ChunkingResult {
|
|
48
|
+
/** Original text that was chunked */
|
|
49
|
+
originalLength: number;
|
|
50
|
+
/** Whether the text was actually chunked or returned as-is */
|
|
51
|
+
wasChunked: boolean;
|
|
52
|
+
/** Chunks produced */
|
|
53
|
+
chunks: TextChunk[];
|
|
54
|
+
/** Strategy used */
|
|
55
|
+
strategy: ChunkingStrategy;
|
|
56
|
+
/** Estimated total tokens in original text */
|
|
57
|
+
estimatedTotalTokens: number;
|
|
58
|
+
}
|
|
59
|
+
/**
|
|
60
|
+
* Get chunking config from environment or defaults
|
|
61
|
+
*/
|
|
62
|
+
export declare function getChunkingConfig(): ChunkingConfig;
|
|
63
|
+
//# sourceMappingURL=ChunkingConfig.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ChunkingConfig.d.ts","sourceRoot":"","sources":["../../src/chunking/ChunkingConfig.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,MAAM,gBAAgB,GAAG,UAAU,GAAG,gBAAgB,GAAG,WAAW,CAAC;AAE3E,MAAM,WAAW,cAAc;IAC7B,iDAAiD;IACjD,OAAO,EAAE,OAAO,CAAC;IAEjB,qDAAqD;IACrD,QAAQ,EAAE,gBAAgB,CAAC;IAE3B,2EAA2E;IAC3E,SAAS,EAAE,MAAM,CAAC;IAElB,gEAAgE;IAChE,OAAO,EAAE,MAAM,CAAC;IAEhB,6EAA6E;IAC7E,YAAY,EAAE,MAAM,CAAC;IAErB,sDAAsD;IACtD,YAAY,EAAE,MAAM,CAAC;IAErB,2DAA2D;IAC3D,kBAAkB,EAAE,MAAM,CAAC;IAE3B,4DAA4D;IAC5D,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,eAAO,MAAM,uBAAuB,EAAE,cASrC,CAAC;AAEF;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,wBAAwB;IACxB,OAAO,EAAE,MAAM,CAAC;IAEhB,iDAAiD;IACjD,KAAK,EAAE,MAAM,CAAC;IAEd,kDAAkD;IAClD,WAAW,EAAE,MAAM,CAAC;IAEpB,oEAAoE;IACpE,WAAW,EAAE,MAAM,CAAC;IAEpB,kEAAkE;IAClE,SAAS,EAAE,MAAM,CAAC;IAElB,2CAA2C;IAC3C,eAAe,EAAE,MAAM,CAAC;IAExB,yCAAyC;IACzC,QAAQ,EAAE,gBAAgB,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,qCAAqC;IACrC,cAAc,EAAE,MAAM,CAAC;IAEvB,8DAA8D;IAC9D,UAAU,EAAE,OAAO,CAAC;IAEpB,sBAAsB;IACtB,MAAM,EAAE,SAAS,EAAE,CAAC;IAEpB,oBAAoB;IACpB,QAAQ,EAAE,gBAAgB,CAAC;IAE3B,8CAA8C;IAC9C,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAED;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,cAAc,CA8BlD"}
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Chunking Configuration Types
|
|
3
|
+
* Defines configuration options for text chunking strategies
|
|
4
|
+
*/
|
|
5
|
+
export const DEFAULT_CHUNKING_CONFIG = {
|
|
6
|
+
enabled: true,
|
|
7
|
+
strategy: "sentence",
|
|
8
|
+
chunkSize: 450,
|
|
9
|
+
overlap: 0.1,
|
|
10
|
+
minChunkSize: 50,
|
|
11
|
+
maxChunkSize: 500,
|
|
12
|
+
charsPerTokenProse: 4,
|
|
13
|
+
charsPerTokenCode: 3.5,
|
|
14
|
+
};
|
|
15
|
+
/**
|
|
16
|
+
* Get chunking config from environment or defaults
|
|
17
|
+
*/
|
|
18
|
+
export function getChunkingConfig() {
|
|
19
|
+
const config = { ...DEFAULT_CHUNKING_CONFIG };
|
|
20
|
+
// Environment overrides
|
|
21
|
+
if (process.env.CCCMEMORY_CHUNKING_ENABLED !== undefined) {
|
|
22
|
+
config.enabled = process.env.CCCMEMORY_CHUNKING_ENABLED === "true";
|
|
23
|
+
}
|
|
24
|
+
if (process.env.CCCMEMORY_CHUNK_SIZE) {
|
|
25
|
+
const size = parseInt(process.env.CCCMEMORY_CHUNK_SIZE, 10);
|
|
26
|
+
if (!isNaN(size) && size > 0) {
|
|
27
|
+
config.chunkSize = size;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
if (process.env.CCCMEMORY_CHUNKING_STRATEGY) {
|
|
31
|
+
const strategy = process.env.CCCMEMORY_CHUNKING_STRATEGY;
|
|
32
|
+
if (["sentence", "sliding_window", "paragraph"].includes(strategy)) {
|
|
33
|
+
config.strategy = strategy;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
if (process.env.CCCMEMORY_CHUNK_OVERLAP) {
|
|
37
|
+
const overlap = parseFloat(process.env.CCCMEMORY_CHUNK_OVERLAP);
|
|
38
|
+
if (!isNaN(overlap) && overlap >= 0 && overlap < 1) {
|
|
39
|
+
config.overlap = overlap;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
return config;
|
|
43
|
+
}
|
|
44
|
+
//# sourceMappingURL=ChunkingConfig.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"ChunkingConfig.js","sourceRoot":"","sources":["../../src/chunking/ChunkingConfig.ts"],"names":[],"mappings":"AAAA;;;GAGG;AA8BH,MAAM,CAAC,MAAM,uBAAuB,GAAmB;IACrD,OAAO,EAAE,IAAI;IACb,QAAQ,EAAE,UAAU;IACpB,SAAS,EAAE,GAAG;IACd,OAAO,EAAE,GAAG;IACZ,YAAY,EAAE,EAAE;IAChB,YAAY,EAAE,GAAG;IACjB,kBAAkB,EAAE,CAAC;IACrB,iBAAiB,EAAE,GAAG;CACvB,CAAC;AAgDF;;GAEG;AACH,MAAM,UAAU,iBAAiB;IAC/B,MAAM,MAAM,GAAG,EAAE,GAAG,uBAAuB,EAAE,CAAC;IAE9C,wBAAwB;IACxB,IAAI,OAAO,CAAC,GAAG,CAAC,0BAA0B,KAAK,SAAS,EAAE,CAAC;QACzD,MAAM,CAAC,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,0BAA0B,KAAK,MAAM,CAAC;IACrE,CAAC;IAED,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,CAAC;QACrC,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,EAAE,CAAC,CAAC;QAC5D,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;YAC7B,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,IAAI,OAAO,CAAC,GAAG,CAAC,2BAA2B,EAAE,CAAC;QAC5C,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,2BAA+C,CAAC;QAC7E,IAAI,CAAC,UAAU,EAAE,gBAAgB,EAAE,WAAW,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YACnE,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,IAAI,OAAO,CAAC,GAAG,CAAC,uBAAuB,EAAE,CAAC;QACxC,MAAM,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QAChE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,OAAO,IAAI,CAAC,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;YACnD,MAAM,CAAC,OAAO,GAAG,OAAO,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text Chunker Factory
|
|
3
|
+
* Provides unified interface for text chunking strategies
|
|
4
|
+
*/
|
|
5
|
+
import type { ChunkingConfig, ChunkingResult, TextChunk } from "./ChunkingConfig.js";
|
|
6
|
+
/**
|
|
7
|
+
* Interface for chunking strategies
|
|
8
|
+
*/
|
|
9
|
+
export interface ChunkingStrategy {
|
|
10
|
+
chunk(text: string, config: ChunkingConfig): ChunkingResult;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Text Chunker - Factory for creating and using chunking strategies
|
|
14
|
+
*/
|
|
15
|
+
export declare class TextChunker {
|
|
16
|
+
private config;
|
|
17
|
+
constructor(config?: Partial<ChunkingConfig>);
|
|
18
|
+
/**
|
|
19
|
+
* Get current configuration
|
|
20
|
+
*/
|
|
21
|
+
getConfig(): ChunkingConfig;
|
|
22
|
+
/**
|
|
23
|
+
* Check if text needs chunking based on estimated token count
|
|
24
|
+
*/
|
|
25
|
+
needsChunking(text: string): boolean;
|
|
26
|
+
/**
|
|
27
|
+
* Estimate token count for text
|
|
28
|
+
*/
|
|
29
|
+
estimateTokens(text: string): number;
|
|
30
|
+
/**
|
|
31
|
+
* Chunk text using configured strategy
|
|
32
|
+
*/
|
|
33
|
+
chunk(text: string): ChunkingResult;
|
|
34
|
+
/**
|
|
35
|
+
* Chunk multiple texts in batch
|
|
36
|
+
*/
|
|
37
|
+
chunkBatch(texts: string[]): ChunkingResult[];
|
|
38
|
+
/**
|
|
39
|
+
* Flatten chunks from multiple texts into a single array with source tracking
|
|
40
|
+
*/
|
|
41
|
+
chunkBatchFlat(texts: Array<{
|
|
42
|
+
id: string | number;
|
|
43
|
+
content: string;
|
|
44
|
+
}>): Array<TextChunk & {
|
|
45
|
+
sourceId: string | number;
|
|
46
|
+
}>;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Get or create global chunker instance
|
|
50
|
+
*/
|
|
51
|
+
export declare function getTextChunker(config?: Partial<ChunkingConfig>): TextChunker;
|
|
52
|
+
/**
|
|
53
|
+
* Reset global chunker (useful for testing)
|
|
54
|
+
*/
|
|
55
|
+
export declare function resetTextChunker(): void;
|
|
56
|
+
export type { ChunkingConfig, ChunkingResult, TextChunk };
|
|
57
|
+
export { DEFAULT_CHUNKING_CONFIG, getChunkingConfig } from "./ChunkingConfig.js";
|
|
58
|
+
export { estimateTokens } from "./strategies/SentenceChunker.js";
|
|
59
|
+
//# sourceMappingURL=TextChunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TextChunker.d.ts","sourceRoot":"","sources":["../../src/chunking/TextChunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EACV,cAAc,EACd,cAAc,EACd,SAAS,EACV,MAAM,qBAAqB,CAAC;AAQ7B;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,GAAG,cAAc,CAAC;CAC7D;AAED;;GAEG;AACH,qBAAa,WAAW;IACtB,OAAO,CAAC,MAAM,CAAiB;gBAEnB,MAAM,CAAC,EAAE,OAAO,CAAC,cAAc,CAAC;IAU5C;;OAEG;IACH,SAAS,IAAI,cAAc;IAI3B;;OAEG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;IASpC;;OAEG;IACH,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;IAIpC;;OAEG;IACH,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,cAAc;IAyCnC;;OAEG;IACH,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,cAAc,EAAE;IAI7C;;OAEG;IACH,cAAc,CACZ,KAAK,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,GAAG,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,GACrD,KAAK,CAAC,SAAS,GAAG;QAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAAA;KAAE,CAAC;CAepD;AAOD;;GAEG;AACH,wBAAgB,cAAc,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,cAAc,CAAC,GAAG,WAAW,CAU5E;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,IAAI,CAEvC;AAGD,YAAY,EAAE,cAAc,EAAE,cAAc,EAAE,SAAS,EAAE,CAAC;AAC1D,OAAO,EAAE,uBAAuB,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACjF,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC"}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text Chunker Factory
|
|
3
|
+
* Provides unified interface for text chunking strategies
|
|
4
|
+
*/
|
|
5
|
+
import { DEFAULT_CHUNKING_CONFIG, getChunkingConfig, } from "./ChunkingConfig.js";
|
|
6
|
+
import { chunkWithSentences, estimateTokens } from "./strategies/SentenceChunker.js";
|
|
7
|
+
import { chunkWithSlidingWindow } from "./strategies/SlidingWindowChunker.js";
|
|
8
|
+
/**
|
|
9
|
+
* Text Chunker - Factory for creating and using chunking strategies
|
|
10
|
+
*/
|
|
11
|
+
export class TextChunker {
|
|
12
|
+
config;
|
|
13
|
+
constructor(config) {
|
|
14
|
+
// Merge with defaults and environment config
|
|
15
|
+
const envConfig = getChunkingConfig();
|
|
16
|
+
this.config = {
|
|
17
|
+
...DEFAULT_CHUNKING_CONFIG,
|
|
18
|
+
...envConfig,
|
|
19
|
+
...config,
|
|
20
|
+
};
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Get current configuration
|
|
24
|
+
*/
|
|
25
|
+
getConfig() {
|
|
26
|
+
return { ...this.config };
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Check if text needs chunking based on estimated token count
|
|
30
|
+
*/
|
|
31
|
+
needsChunking(text) {
|
|
32
|
+
if (!this.config.enabled) {
|
|
33
|
+
return false;
|
|
34
|
+
}
|
|
35
|
+
const estimatedTokenCount = estimateTokens(text, this.config);
|
|
36
|
+
return estimatedTokenCount > this.config.chunkSize;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Estimate token count for text
|
|
40
|
+
*/
|
|
41
|
+
estimateTokens(text) {
|
|
42
|
+
return estimateTokens(text, this.config);
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Chunk text using configured strategy
|
|
46
|
+
*/
|
|
47
|
+
chunk(text) {
|
|
48
|
+
// If chunking disabled, return single chunk
|
|
49
|
+
if (!this.config.enabled) {
|
|
50
|
+
return {
|
|
51
|
+
originalLength: text.length,
|
|
52
|
+
wasChunked: false,
|
|
53
|
+
chunks: [
|
|
54
|
+
{
|
|
55
|
+
content: text,
|
|
56
|
+
index: 0,
|
|
57
|
+
totalChunks: 1,
|
|
58
|
+
startOffset: 0,
|
|
59
|
+
endOffset: text.length,
|
|
60
|
+
estimatedTokens: estimateTokens(text, this.config),
|
|
61
|
+
strategy: this.config.strategy,
|
|
62
|
+
},
|
|
63
|
+
],
|
|
64
|
+
strategy: this.config.strategy,
|
|
65
|
+
estimatedTotalTokens: estimateTokens(text, this.config),
|
|
66
|
+
};
|
|
67
|
+
}
|
|
68
|
+
// Select strategy based on configuration
|
|
69
|
+
switch (this.config.strategy) {
|
|
70
|
+
case "sentence":
|
|
71
|
+
return chunkWithSentences(text, this.config);
|
|
72
|
+
case "sliding_window":
|
|
73
|
+
return chunkWithSlidingWindow(text, this.config);
|
|
74
|
+
case "paragraph":
|
|
75
|
+
// Fall back to sentence chunking for now
|
|
76
|
+
// Paragraph chunking would split at \n\n boundaries
|
|
77
|
+
return chunkWithSentences(text, this.config);
|
|
78
|
+
default:
|
|
79
|
+
// Default to sentence chunking
|
|
80
|
+
return chunkWithSentences(text, this.config);
|
|
81
|
+
}
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Chunk multiple texts in batch
|
|
85
|
+
*/
|
|
86
|
+
chunkBatch(texts) {
|
|
87
|
+
return texts.map((text) => this.chunk(text));
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Flatten chunks from multiple texts into a single array with source tracking
|
|
91
|
+
*/
|
|
92
|
+
chunkBatchFlat(texts) {
|
|
93
|
+
const results = [];
|
|
94
|
+
for (const { id, content } of texts) {
|
|
95
|
+
const result = this.chunk(content);
|
|
96
|
+
for (const chunk of result.chunks) {
|
|
97
|
+
results.push({
|
|
98
|
+
...chunk,
|
|
99
|
+
sourceId: id,
|
|
100
|
+
});
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
return results;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Global chunker instance with default config
|
|
108
|
+
*/
|
|
109
|
+
let defaultChunker = null;
|
|
110
|
+
/**
|
|
111
|
+
* Get or create global chunker instance
|
|
112
|
+
*/
|
|
113
|
+
export function getTextChunker(config) {
|
|
114
|
+
if (config) {
|
|
115
|
+
return new TextChunker(config);
|
|
116
|
+
}
|
|
117
|
+
if (!defaultChunker) {
|
|
118
|
+
defaultChunker = new TextChunker();
|
|
119
|
+
}
|
|
120
|
+
return defaultChunker;
|
|
121
|
+
}
|
|
122
|
+
/**
|
|
123
|
+
* Reset global chunker (useful for testing)
|
|
124
|
+
*/
|
|
125
|
+
export function resetTextChunker() {
|
|
126
|
+
defaultChunker = null;
|
|
127
|
+
}
|
|
128
|
+
export { DEFAULT_CHUNKING_CONFIG, getChunkingConfig } from "./ChunkingConfig.js";
|
|
129
|
+
export { estimateTokens } from "./strategies/SentenceChunker.js";
|
|
130
|
+
//# sourceMappingURL=TextChunker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"TextChunker.js","sourceRoot":"","sources":["../../src/chunking/TextChunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH,OAAO,EACL,uBAAuB,EACvB,iBAAiB,GAClB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,kBAAkB,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACrF,OAAO,EAAE,sBAAsB,EAAE,MAAM,sCAAsC,CAAC;AAS9E;;GAEG;AACH,MAAM,OAAO,WAAW;IACd,MAAM,CAAiB;IAE/B,YAAY,MAAgC;QAC1C,6CAA6C;QAC7C,MAAM,SAAS,GAAG,iBAAiB,EAAE,CAAC;QACtC,IAAI,CAAC,MAAM,GAAG;YACZ,GAAG,uBAAuB;YAC1B,GAAG,SAAS;YACZ,GAAG,MAAM;SACV,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,SAAS;QACP,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,IAAY;QACxB,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACzB,OAAO,KAAK,CAAC;QACf,CAAC;QAED,MAAM,mBAAmB,GAAG,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9D,OAAO,mBAAmB,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;IACrD,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,IAAY;QACzB,OAAO,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IAC3C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAY;QAChB,4CAA4C;QAC5C,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACzB,OAAO;gBACL,cAAc,EAAE,IAAI,CAAC,MAAM;gBAC3B,UAAU,EAAE,KAAK;gBACjB,MAAM,EAAE;oBACN;wBACE,OAAO,EAAE,IAAI;wBACb,KAAK,EAAE,CAAC;wBACR,WAAW,EAAE,CAAC;wBACd,WAAW,EAAE,CAAC;wBACd,SAAS,EAAE,IAAI,CAAC,MAAM;wBACtB,eAAe,EAAE,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC;wBAClD,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ;qBAC/B;iBACF;gBACD,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ;gBAC9B,oBAAoB,EAAE,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC;aACxD,CAAC;QACJ,CAAC;QAED,yCAAyC;QACzC,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;YAC7B,KAAK,UAAU;gBACb,OAAO,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YAE/C,KAAK,gBAAgB;gBACnB,OAAO,sBAAsB,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YAEnD,KAAK,WAAW;gBACd,yCAAyC;gBACzC,oDAAoD;gBACpD,OAAO,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YAE/C;gBACE,+BAA+B;gBAC/B,OAAO,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACjD,CAAC;IACH,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,KAAe;QACxB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,cAAc,CACZ,KAAsD;QAEtD,MAAM,OAAO,GAAqD,EAAE,CAAC;QAErE,KAAK,MAAM,EAAE,EAAE,EAAE,OAAO,EAAE,IAAI,KAAK,EAAE,CAAC;YACpC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACnC,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;gBAClC,OAAO,CAAC,IAAI,CAAC;oBACX,GAAG,KAAK;oBACR,QAAQ,EAAE,EAAE;iBACb,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF;AAED;;GAEG;AACH,IAAI,cAAc,GAAuB,IAAI,CAAC;AAE9C;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,MAAgC;IAC7D,IAAI,MAAM,EAAE,CAAC;QACX,OAAO,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC;IACjC,CAAC;IAED,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,cAAc,GAAG,IAAI,WAAW,EAAE,CAAC;IACrC,CAAC;IAED,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB;IAC9B,cAAc,GAAG,IAAI,CAAC;AACxB,CAAC;AAID,OAAO,EAAE,uBAAuB,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACjF,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC"}
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text Chunking Module
|
|
3
|
+
* Provides text chunking strategies for handling long messages that exceed embedding model limits
|
|
4
|
+
*/
|
|
5
|
+
export { TextChunker, getTextChunker, resetTextChunker, estimateTokens, DEFAULT_CHUNKING_CONFIG, getChunkingConfig, } from "./TextChunker.js";
|
|
6
|
+
export type { ChunkingConfig, ChunkingResult, TextChunk, ChunkingStrategy as ChunkingStrategyType, } from "./ChunkingConfig.js";
|
|
7
|
+
export { chunkWithSentences } from "./strategies/SentenceChunker.js";
|
|
8
|
+
export { chunkWithSlidingWindow } from "./strategies/SlidingWindowChunker.js";
|
|
9
|
+
//# sourceMappingURL=index.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/chunking/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,WAAW,EACX,cAAc,EACd,gBAAgB,EAChB,cAAc,EACd,uBAAuB,EACvB,iBAAiB,GAClB,MAAM,kBAAkB,CAAC;AAE1B,YAAY,EACV,cAAc,EACd,cAAc,EACd,SAAS,EACT,gBAAgB,IAAI,oBAAoB,GACzC,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,OAAO,EAAE,sBAAsB,EAAE,MAAM,sCAAsC,CAAC"}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Text Chunking Module
|
|
3
|
+
* Provides text chunking strategies for handling long messages that exceed embedding model limits
|
|
4
|
+
*/
|
|
5
|
+
export { TextChunker, getTextChunker, resetTextChunker, estimateTokens, DEFAULT_CHUNKING_CONFIG, getChunkingConfig, } from "./TextChunker.js";
|
|
6
|
+
export { chunkWithSentences } from "./strategies/SentenceChunker.js";
|
|
7
|
+
export { chunkWithSlidingWindow } from "./strategies/SlidingWindowChunker.js";
|
|
8
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/chunking/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,WAAW,EACX,cAAc,EACd,gBAAgB,EAChB,cAAc,EACd,uBAAuB,EACvB,iBAAiB,GAClB,MAAM,kBAAkB,CAAC;AAS1B,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,OAAO,EAAE,sBAAsB,EAAE,MAAM,sCAAsC,CAAC"}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Sentence-Aware Text Chunker
|
|
3
|
+
* Splits text at sentence boundaries while respecting code blocks and paragraphs
|
|
4
|
+
*/
|
|
5
|
+
import type { ChunkingConfig, ChunkingResult } from "../ChunkingConfig.js";
|
|
6
|
+
/**
|
|
7
|
+
* Estimate token count using character ratios
|
|
8
|
+
*/
|
|
9
|
+
declare function estimateTokens(text: string, config: ChunkingConfig): number;
|
|
10
|
+
/**
|
|
11
|
+
* Detect if text contains code patterns
|
|
12
|
+
*/
|
|
13
|
+
declare function isCodeLike(text: string): boolean;
|
|
14
|
+
/**
|
|
15
|
+
* Chunk text using sentence-aware strategy
|
|
16
|
+
*/
|
|
17
|
+
export declare function chunkWithSentences(text: string, config: ChunkingConfig): ChunkingResult;
|
|
18
|
+
export { estimateTokens, isCodeLike };
|
|
19
|
+
//# sourceMappingURL=SentenceChunker.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"SentenceChunker.d.ts","sourceRoot":"","sources":["../../../src/chunking/strategies/SentenceChunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAa,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAEtF;;GAEG;AACH,iBAAS,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,GAAG,MAAM,CAWpE;AAED;;GAEG;AACH,iBAAS,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAczC;AAuND;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,cAAc,GACrB,cAAc,CAqChB;AAED,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,CAAC"}
|