cccmemory 1.9.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. package/README.md +96 -50
  2. package/dist/chunking/ChunkingConfig.d.ts +63 -0
  3. package/dist/chunking/ChunkingConfig.d.ts.map +1 -0
  4. package/dist/chunking/ChunkingConfig.js +44 -0
  5. package/dist/chunking/ChunkingConfig.js.map +1 -0
  6. package/dist/chunking/TextChunker.d.ts +59 -0
  7. package/dist/chunking/TextChunker.d.ts.map +1 -0
  8. package/dist/chunking/TextChunker.js +130 -0
  9. package/dist/chunking/TextChunker.js.map +1 -0
  10. package/dist/chunking/index.d.ts +9 -0
  11. package/dist/chunking/index.d.ts.map +1 -0
  12. package/dist/chunking/index.js +8 -0
  13. package/dist/chunking/index.js.map +1 -0
  14. package/dist/chunking/strategies/SentenceChunker.d.ts +19 -0
  15. package/dist/chunking/strategies/SentenceChunker.d.ts.map +1 -0
  16. package/dist/chunking/strategies/SentenceChunker.js +251 -0
  17. package/dist/chunking/strategies/SentenceChunker.js.map +1 -0
  18. package/dist/chunking/strategies/SlidingWindowChunker.d.ts +10 -0
  19. package/dist/chunking/strategies/SlidingWindowChunker.d.ts.map +1 -0
  20. package/dist/chunking/strategies/SlidingWindowChunker.js +133 -0
  21. package/dist/chunking/strategies/SlidingWindowChunker.js.map +1 -0
  22. package/dist/embeddings/VectorStore.d.ts +78 -3
  23. package/dist/embeddings/VectorStore.d.ts.map +1 -1
  24. package/dist/embeddings/VectorStore.js +352 -21
  25. package/dist/embeddings/VectorStore.js.map +1 -1
  26. package/dist/mcp-server.d.ts +5 -0
  27. package/dist/mcp-server.d.ts.map +1 -1
  28. package/dist/mcp-server.js +79 -11
  29. package/dist/mcp-server.js.map +1 -1
  30. package/dist/parsers/ExtractionValidator.d.ts +84 -0
  31. package/dist/parsers/ExtractionValidator.d.ts.map +1 -0
  32. package/dist/parsers/ExtractionValidator.js +296 -0
  33. package/dist/parsers/ExtractionValidator.js.map +1 -0
  34. package/dist/search/HybridReranker.d.ts +80 -0
  35. package/dist/search/HybridReranker.d.ts.map +1 -0
  36. package/dist/search/HybridReranker.js +146 -0
  37. package/dist/search/HybridReranker.js.map +1 -0
  38. package/dist/search/QueryExpander.d.ts +53 -0
  39. package/dist/search/QueryExpander.d.ts.map +1 -0
  40. package/dist/search/QueryExpander.js +178 -0
  41. package/dist/search/QueryExpander.js.map +1 -0
  42. package/dist/search/ResultAggregator.d.ts +70 -0
  43. package/dist/search/ResultAggregator.d.ts.map +1 -0
  44. package/dist/search/ResultAggregator.js +155 -0
  45. package/dist/search/ResultAggregator.js.map +1 -0
  46. package/dist/search/SemanticSearch.d.ts +25 -1
  47. package/dist/search/SemanticSearch.d.ts.map +1 -1
  48. package/dist/search/SemanticSearch.js +264 -53
  49. package/dist/search/SemanticSearch.js.map +1 -1
  50. package/dist/search/SnippetGenerator.d.ts +81 -0
  51. package/dist/search/SnippetGenerator.d.ts.map +1 -0
  52. package/dist/search/SnippetGenerator.js +268 -0
  53. package/dist/search/SnippetGenerator.js.map +1 -0
  54. package/dist/search/index.d.ts +15 -0
  55. package/dist/search/index.d.ts.map +1 -0
  56. package/dist/search/index.js +10 -0
  57. package/dist/search/index.js.map +1 -0
  58. package/dist/storage/migrations.d.ts +3 -2
  59. package/dist/storage/migrations.d.ts.map +1 -1
  60. package/dist/storage/migrations.js +235 -13
  61. package/dist/storage/migrations.js.map +1 -1
  62. package/dist/tools/ToolDefinitions.d.ts +786 -0
  63. package/dist/tools/ToolDefinitions.d.ts.map +1 -1
  64. package/dist/tools/ToolDefinitions.js +758 -1
  65. package/dist/tools/ToolDefinitions.js.map +1 -1
  66. package/dist/tools/ToolHandlers.d.ts +96 -21
  67. package/dist/tools/ToolHandlers.d.ts.map +1 -1
  68. package/dist/tools/ToolHandlers.js +1926 -127
  69. package/dist/tools/ToolHandlers.js.map +1 -1
  70. package/dist/types/ToolTypes.d.ts +495 -18
  71. package/dist/types/ToolTypes.d.ts.map +1 -1
  72. package/package.json +1 -1
package/README.md CHANGED
@@ -4,76 +4,51 @@ An MCP server that gives Claude long-term memory by indexing conversation histor
4
4
 
5
5
  ---
6
6
 
7
- ## ⚠️ Breaking Changes in v1.8.0
7
+ ## What's New in v2.0
8
8
 
9
- **This package has been renamed from `claude-conversation-memory-mcp` to `cccmemory`.**
9
+ Version 2.0 brings major improvements to search quality and accuracy:
10
10
 
11
- If you were using the old package, follow these migration steps:
11
+ - **Smart Chunking** - Long messages are now split at sentence boundaries, ensuring full content is searchable (previously truncated at 512 tokens)
12
+ - **Hybrid Search** - Combines semantic search with full-text search using Reciprocal Rank Fusion (RRF) for better ranking
13
+ - **Dynamic Thresholds** - Similarity thresholds adjust based on query length for better precision
14
+ - **Improved Snippets** - Search results highlight matching terms in context
15
+ - **Extraction Validation** - Reduces false positives in decision/mistake detection
16
+ - **Query Expansion** - Optional synonym expansion for broader recall (disabled by default)
12
17
 
13
- ### 1. Uninstall the old package
14
-
15
- ```bash
16
- npm uninstall -g claude-conversation-memory-mcp
17
- ```
18
-
19
- ### 2. Install the new package
20
-
21
- ```bash
22
- npm install -g cccmemory
23
- ```
24
-
25
- ### 3. Update your MCP configuration
26
-
27
- **Claude Desktop** (`~/Library/Application Support/Claude/claude_desktop_config.json`):
28
- ```json
29
- {
30
- "mcpServers": {
31
- "cccmemory": {
32
- "command": "npx",
33
- "args": ["-y", "cccmemory"]
34
- }
35
- }
36
- }
37
- ```
18
+ ---
38
19
 
39
- **Claude Code** (`~/.claude.json`):
40
- ```json
41
- {
42
- "mcpServers": {
43
- "cccmemory": {
44
- "command": "npx",
45
- "args": ["-y", "cccmemory"]
46
- }
47
- }
48
- }
49
- ```
20
+ <details>
21
+ <summary><strong>⚠️ Breaking Changes in v1.8.0</strong> (click to expand)</summary>
50
22
 
51
- **Codex** (`~/.codex/config.toml`):
52
- ```toml
53
- [mcp_servers.cccmemory]
54
- command = "npx"
55
- args = ["-y", "cccmemory"]
56
- ```
23
+ **This package was renamed from `claude-conversation-memory-mcp` to `cccmemory`.**
57
24
 
58
- ### 4. Database migration (automatic)
25
+ If upgrading from the old package:
59
26
 
60
- Your conversation history is preserved. The database files are automatically migrated:
61
- - `.claude-conversations-memory.db` `.cccmemory.db`
62
- - `.codex-conversations-memory.db` `.cccmemory.db`
27
+ 1. Uninstall old package: `npm uninstall -g claude-conversation-memory-mcp`
28
+ 2. Install new package: `npm install -g cccmemory`
29
+ 3. Update MCP config to use `cccmemory` command
30
+ 4. Database migration is automatic (`.claude-conversations-memory.db` → `.cccmemory.db`)
63
31
 
64
- No manual action required - the migration happens on first run.
32
+ </details>
65
33
 
66
34
  ---
67
35
 
68
36
  ## Features
69
37
 
70
38
  - **Search conversations** - Natural language search across your chat history
39
+ - **Smart chunking** - Long messages fully indexed without truncation
40
+ - **Hybrid search** - Combines vector + keyword search with RRF re-ranking
71
41
  - **Track decisions** - Remember why you made technical choices
72
42
  - **Prevent mistakes** - Learn from past errors
73
43
  - **Git integration** - Link conversations to commits
74
44
  - **Cross-project search** - Search across all your projects globally
75
45
  - **Project migration** - Keep history when renaming/moving projects
76
46
  - **Semantic search** - Uses Transformers.js embeddings (bundled, works offline)
47
+ - **Working memory** - Store and recall facts, decisions, and context across sessions
48
+ - **Session handoff** - Seamless context transfer between conversations
49
+ - **Tag management** - Organize memories, decisions, and patterns with tags
50
+ - **Memory quality** - Track confidence, importance, and verification status
51
+ - **Database maintenance** - Find duplicates, clean stale data, health reports
77
52
 
78
53
  ## Installation
79
54
 
@@ -249,6 +224,19 @@ Config:
249
224
  Set `OPENAI_API_KEY` environment variable.
250
225
  </details>
251
226
 
227
+ ### Search Configuration (Optional)
228
+
229
+ Tune search behavior with environment variables:
230
+
231
+ | Variable | Default | Description |
232
+ |----------|---------|-------------|
233
+ | `CCCMEMORY_CHUNKING_ENABLED` | `true` | Enable smart chunking for long messages |
234
+ | `CCCMEMORY_CHUNK_SIZE` | `450` | Target chunk size in tokens |
235
+ | `CCCMEMORY_CHUNK_OVERLAP` | `0.1` | Overlap between chunks (0-1) |
236
+ | `CCCMEMORY_RERANK_ENABLED` | `true` | Enable hybrid re-ranking (vector + FTS) |
237
+ | `CCCMEMORY_RERANK_WEIGHT` | `0.7` | Vector weight in re-ranking (FTS gets 1-weight) |
238
+ | `CCCMEMORY_QUERY_EXPANSION` | `false` | Enable synonym expansion for queries |
239
+
252
240
  ## MCP Tools
253
241
 
254
242
  ### Indexing
@@ -290,6 +278,64 @@ Set `OPENAI_API_KEY` environment variable.
290
278
  | `forget_by_topic` | Delete conversations by keyword |
291
279
  | `generate_documentation` | Generate docs from local code scan + conversations |
292
280
 
281
+ ### Working Memory
282
+ | Tool | Description |
283
+ |------|-------------|
284
+ | `remember` | Store a fact, decision, or context with optional TTL |
285
+ | `recall` | Retrieve a specific memory by key |
286
+ | `recall_relevant` | Semantic search across stored memories |
287
+ | `list_memory` | List all memories, optionally filtered by tags |
288
+ | `forget` | Remove a memory by key |
289
+
290
+ ### Session Handoff
291
+ | Tool | Description |
292
+ |------|-------------|
293
+ | `prepare_handoff` | Create handoff document for session transition |
294
+ | `resume_from_handoff` | Resume work from a previous handoff |
295
+ | `list_handoffs` | List available handoff documents |
296
+
297
+ ### Context Injection
298
+ | Tool | Description |
299
+ |------|-------------|
300
+ | `get_startup_context` | Get relevant context at conversation start |
301
+ | `inject_relevant_context` | Auto-inject context based on user message |
302
+
303
+ ### Tag Management
304
+ | Tool | Description |
305
+ |------|-------------|
306
+ | `list_tags` | List all tags with usage statistics |
307
+ | `search_by_tags` | Find items by tag (memories, decisions, patterns) |
308
+ | `rename_tag` | Rename a tag across all items |
309
+ | `merge_tags` | Merge multiple tags into one |
310
+ | `delete_tag` | Delete a tag and unlink all items |
311
+ | `tag_item` | Add tags to an item |
312
+ | `untag_item` | Remove tags from an item |
313
+
314
+ ### Memory Quality
315
+ | Tool | Description |
316
+ |------|-------------|
317
+ | `set_memory_confidence` | Set confidence level (uncertain/likely/confirmed/verified) |
318
+ | `set_memory_importance` | Set importance level (low/normal/high/critical) |
319
+ | `pin_memory` | Pin a memory to prevent cleanup |
320
+ | `archive_memory` | Archive a memory with optional reason |
321
+ | `unarchive_memory` | Restore an archived memory |
322
+ | `search_memory_by_quality` | Search memories by confidence/importance |
323
+ | `get_memory_stats` | Get memory statistics by confidence/importance |
324
+
325
+ ### Maintenance
326
+ | Tool | Description |
327
+ |------|-------------|
328
+ | `get_storage_stats` | Database size and item counts |
329
+ | `find_stale_items` | Find items not accessed recently |
330
+ | `find_duplicates` | Find similar/duplicate items |
331
+ | `merge_duplicates` | Merge duplicate items |
332
+ | `cleanup_stale` | Archive or delete stale items |
333
+ | `vacuum_database` | Reclaim disk space |
334
+ | `cleanup_orphans` | Remove orphaned records |
335
+ | `get_health_report` | Overall database health check |
336
+ | `run_maintenance` | Run multiple maintenance tasks |
337
+ | `get_maintenance_history` | View past maintenance operations |
338
+
293
339
  ### Session IDs
294
340
 
295
341
  `list_recent_sessions` returns **two identifiers**:
@@ -0,0 +1,63 @@
1
+ /**
2
+ * Chunking Configuration Types
3
+ * Defines configuration options for text chunking strategies
4
+ */
5
+ export type ChunkingStrategy = "sentence" | "sliding_window" | "paragraph";
6
+ export interface ChunkingConfig {
7
+ /** Enable or disable chunking (default: true) */
8
+ enabled: boolean;
9
+ /** Chunking strategy to use (default: "sentence") */
10
+ strategy: ChunkingStrategy;
11
+ /** Target chunk size in tokens (default: 450 for 512 limit with margin) */
12
+ chunkSize: number;
13
+ /** Overlap between chunks as a fraction (default: 0.1 = 10%) */
14
+ overlap: number;
15
+ /** Minimum chunk size in tokens - don't split smaller texts (default: 50) */
16
+ minChunkSize: number;
17
+ /** Maximum chunk size as hard limit (default: 500) */
18
+ maxChunkSize: number;
19
+ /** Characters per token estimate for prose (default: 4) */
20
+ charsPerTokenProse: number;
21
+ /** Characters per token estimate for code (default: 3.5) */
22
+ charsPerTokenCode: number;
23
+ }
24
+ export declare const DEFAULT_CHUNKING_CONFIG: ChunkingConfig;
25
+ /**
26
+ * Result of chunking a text
27
+ */
28
+ export interface TextChunk {
29
+ /** The chunk content */
30
+ content: string;
31
+ /** Index of this chunk within the source text */
32
+ index: number;
33
+ /** Total number of chunks from the source text */
34
+ totalChunks: number;
35
+ /** Character offset where this chunk starts in the original text */
36
+ startOffset: number;
37
+ /** Character offset where this chunk ends in the original text */
38
+ endOffset: number;
39
+ /** Estimated token count for this chunk */
40
+ estimatedTokens: number;
41
+ /** Strategy used to create this chunk */
42
+ strategy: ChunkingStrategy;
43
+ }
44
+ /**
45
+ * Metadata about the chunking operation
46
+ */
47
+ export interface ChunkingResult {
48
+ /** Original text that was chunked */
49
+ originalLength: number;
50
+ /** Whether the text was actually chunked or returned as-is */
51
+ wasChunked: boolean;
52
+ /** Chunks produced */
53
+ chunks: TextChunk[];
54
+ /** Strategy used */
55
+ strategy: ChunkingStrategy;
56
+ /** Estimated total tokens in original text */
57
+ estimatedTotalTokens: number;
58
+ }
59
+ /**
60
+ * Get chunking config from environment or defaults
61
+ */
62
+ export declare function getChunkingConfig(): ChunkingConfig;
63
+ //# sourceMappingURL=ChunkingConfig.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ChunkingConfig.d.ts","sourceRoot":"","sources":["../../src/chunking/ChunkingConfig.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,MAAM,MAAM,gBAAgB,GAAG,UAAU,GAAG,gBAAgB,GAAG,WAAW,CAAC;AAE3E,MAAM,WAAW,cAAc;IAC7B,iDAAiD;IACjD,OAAO,EAAE,OAAO,CAAC;IAEjB,qDAAqD;IACrD,QAAQ,EAAE,gBAAgB,CAAC;IAE3B,2EAA2E;IAC3E,SAAS,EAAE,MAAM,CAAC;IAElB,gEAAgE;IAChE,OAAO,EAAE,MAAM,CAAC;IAEhB,6EAA6E;IAC7E,YAAY,EAAE,MAAM,CAAC;IAErB,sDAAsD;IACtD,YAAY,EAAE,MAAM,CAAC;IAErB,2DAA2D;IAC3D,kBAAkB,EAAE,MAAM,CAAC;IAE3B,4DAA4D;IAC5D,iBAAiB,EAAE,MAAM,CAAC;CAC3B;AAED,eAAO,MAAM,uBAAuB,EAAE,cASrC,CAAC;AAEF;;GAEG;AACH,MAAM,WAAW,SAAS;IACxB,wBAAwB;IACxB,OAAO,EAAE,MAAM,CAAC;IAEhB,iDAAiD;IACjD,KAAK,EAAE,MAAM,CAAC;IAEd,kDAAkD;IAClD,WAAW,EAAE,MAAM,CAAC;IAEpB,oEAAoE;IACpE,WAAW,EAAE,MAAM,CAAC;IAEpB,kEAAkE;IAClE,SAAS,EAAE,MAAM,CAAC;IAElB,2CAA2C;IAC3C,eAAe,EAAE,MAAM,CAAC;IAExB,yCAAyC;IACzC,QAAQ,EAAE,gBAAgB,CAAC;CAC5B;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,qCAAqC;IACrC,cAAc,EAAE,MAAM,CAAC;IAEvB,8DAA8D;IAC9D,UAAU,EAAE,OAAO,CAAC;IAEpB,sBAAsB;IACtB,MAAM,EAAE,SAAS,EAAE,CAAC;IAEpB,oBAAoB;IACpB,QAAQ,EAAE,gBAAgB,CAAC;IAE3B,8CAA8C;IAC9C,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAED;;GAEG;AACH,wBAAgB,iBAAiB,IAAI,cAAc,CA8BlD"}
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Chunking Configuration Types
3
+ * Defines configuration options for text chunking strategies
4
+ */
5
+ export const DEFAULT_CHUNKING_CONFIG = {
6
+ enabled: true,
7
+ strategy: "sentence",
8
+ chunkSize: 450,
9
+ overlap: 0.1,
10
+ minChunkSize: 50,
11
+ maxChunkSize: 500,
12
+ charsPerTokenProse: 4,
13
+ charsPerTokenCode: 3.5,
14
+ };
15
+ /**
16
+ * Get chunking config from environment or defaults
17
+ */
18
+ export function getChunkingConfig() {
19
+ const config = { ...DEFAULT_CHUNKING_CONFIG };
20
+ // Environment overrides
21
+ if (process.env.CCCMEMORY_CHUNKING_ENABLED !== undefined) {
22
+ config.enabled = process.env.CCCMEMORY_CHUNKING_ENABLED === "true";
23
+ }
24
+ if (process.env.CCCMEMORY_CHUNK_SIZE) {
25
+ const size = parseInt(process.env.CCCMEMORY_CHUNK_SIZE, 10);
26
+ if (!isNaN(size) && size > 0) {
27
+ config.chunkSize = size;
28
+ }
29
+ }
30
+ if (process.env.CCCMEMORY_CHUNKING_STRATEGY) {
31
+ const strategy = process.env.CCCMEMORY_CHUNKING_STRATEGY;
32
+ if (["sentence", "sliding_window", "paragraph"].includes(strategy)) {
33
+ config.strategy = strategy;
34
+ }
35
+ }
36
+ if (process.env.CCCMEMORY_CHUNK_OVERLAP) {
37
+ const overlap = parseFloat(process.env.CCCMEMORY_CHUNK_OVERLAP);
38
+ if (!isNaN(overlap) && overlap >= 0 && overlap < 1) {
39
+ config.overlap = overlap;
40
+ }
41
+ }
42
+ return config;
43
+ }
44
+ //# sourceMappingURL=ChunkingConfig.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"ChunkingConfig.js","sourceRoot":"","sources":["../../src/chunking/ChunkingConfig.ts"],"names":[],"mappings":"AAAA;;;GAGG;AA8BH,MAAM,CAAC,MAAM,uBAAuB,GAAmB;IACrD,OAAO,EAAE,IAAI;IACb,QAAQ,EAAE,UAAU;IACpB,SAAS,EAAE,GAAG;IACd,OAAO,EAAE,GAAG;IACZ,YAAY,EAAE,EAAE;IAChB,YAAY,EAAE,GAAG;IACjB,kBAAkB,EAAE,CAAC;IACrB,iBAAiB,EAAE,GAAG;CACvB,CAAC;AAgDF;;GAEG;AACH,MAAM,UAAU,iBAAiB;IAC/B,MAAM,MAAM,GAAG,EAAE,GAAG,uBAAuB,EAAE,CAAC;IAE9C,wBAAwB;IACxB,IAAI,OAAO,CAAC,GAAG,CAAC,0BAA0B,KAAK,SAAS,EAAE,CAAC;QACzD,MAAM,CAAC,OAAO,GAAG,OAAO,CAAC,GAAG,CAAC,0BAA0B,KAAK,MAAM,CAAC;IACrE,CAAC;IAED,IAAI,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,CAAC;QACrC,MAAM,IAAI,GAAG,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,oBAAoB,EAAE,EAAE,CAAC,CAAC;QAC5D,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,IAAI,GAAG,CAAC,EAAE,CAAC;YAC7B,MAAM,CAAC,SAAS,GAAG,IAAI,CAAC;QAC1B,CAAC;IACH,CAAC;IAED,IAAI,OAAO,CAAC,GAAG,CAAC,2BAA2B,EAAE,CAAC;QAC5C,MAAM,QAAQ,GAAG,OAAO,CAAC,GAAG,CAAC,2BAA+C,CAAC;QAC7E,IAAI,CAAC,UAAU,EAAE,gBAAgB,EAAE,WAAW,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC;YACnE,MAAM,CAAC,QAAQ,GAAG,QAAQ,CAAC;QAC7B,CAAC;IACH,CAAC;IAED,IAAI,OAAO,CAAC,GAAG,CAAC,uBAAuB,EAAE,CAAC;QACxC,MAAM,OAAO,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC,CAAC;QAChE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,IAAI,OAAO,IAAI,CAAC,IAAI,OAAO,GAAG,CAAC,EAAE,CAAC;YACnD,MAAM,CAAC,OAAO,GAAG,OAAO,CAAC;QAC3B,CAAC;IACH,CAAC;IAED,OAAO,MAAM,CAAC;AAChB,CAAC"}
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Text Chunker Factory
3
+ * Provides unified interface for text chunking strategies
4
+ */
5
+ import type { ChunkingConfig, ChunkingResult, TextChunk } from "./ChunkingConfig.js";
6
+ /**
7
+ * Interface for chunking strategies
8
+ */
9
+ export interface ChunkingStrategy {
10
+ chunk(text: string, config: ChunkingConfig): ChunkingResult;
11
+ }
12
+ /**
13
+ * Text Chunker - Factory for creating and using chunking strategies
14
+ */
15
+ export declare class TextChunker {
16
+ private config;
17
+ constructor(config?: Partial<ChunkingConfig>);
18
+ /**
19
+ * Get current configuration
20
+ */
21
+ getConfig(): ChunkingConfig;
22
+ /**
23
+ * Check if text needs chunking based on estimated token count
24
+ */
25
+ needsChunking(text: string): boolean;
26
+ /**
27
+ * Estimate token count for text
28
+ */
29
+ estimateTokens(text: string): number;
30
+ /**
31
+ * Chunk text using configured strategy
32
+ */
33
+ chunk(text: string): ChunkingResult;
34
+ /**
35
+ * Chunk multiple texts in batch
36
+ */
37
+ chunkBatch(texts: string[]): ChunkingResult[];
38
+ /**
39
+ * Flatten chunks from multiple texts into a single array with source tracking
40
+ */
41
+ chunkBatchFlat(texts: Array<{
42
+ id: string | number;
43
+ content: string;
44
+ }>): Array<TextChunk & {
45
+ sourceId: string | number;
46
+ }>;
47
+ }
48
+ /**
49
+ * Get or create global chunker instance
50
+ */
51
+ export declare function getTextChunker(config?: Partial<ChunkingConfig>): TextChunker;
52
+ /**
53
+ * Reset global chunker (useful for testing)
54
+ */
55
+ export declare function resetTextChunker(): void;
56
+ export type { ChunkingConfig, ChunkingResult, TextChunk };
57
+ export { DEFAULT_CHUNKING_CONFIG, getChunkingConfig } from "./ChunkingConfig.js";
58
+ export { estimateTokens } from "./strategies/SentenceChunker.js";
59
+ //# sourceMappingURL=TextChunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TextChunker.d.ts","sourceRoot":"","sources":["../../src/chunking/TextChunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EACV,cAAc,EACd,cAAc,EACd,SAAS,EACV,MAAM,qBAAqB,CAAC;AAQ7B;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,KAAK,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,GAAG,cAAc,CAAC;CAC7D;AAED;;GAEG;AACH,qBAAa,WAAW;IACtB,OAAO,CAAC,MAAM,CAAiB;gBAEnB,MAAM,CAAC,EAAE,OAAO,CAAC,cAAc,CAAC;IAU5C;;OAEG;IACH,SAAS,IAAI,cAAc;IAI3B;;OAEG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO;IASpC;;OAEG;IACH,cAAc,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM;IAIpC;;OAEG;IACH,KAAK,CAAC,IAAI,EAAE,MAAM,GAAG,cAAc;IAyCnC;;OAEG;IACH,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,GAAG,cAAc,EAAE;IAI7C;;OAEG;IACH,cAAc,CACZ,KAAK,EAAE,KAAK,CAAC;QAAE,EAAE,EAAE,MAAM,GAAG,MAAM,CAAC;QAAC,OAAO,EAAE,MAAM,CAAA;KAAE,CAAC,GACrD,KAAK,CAAC,SAAS,GAAG;QAAE,QAAQ,EAAE,MAAM,GAAG,MAAM,CAAA;KAAE,CAAC;CAepD;AAOD;;GAEG;AACH,wBAAgB,cAAc,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,cAAc,CAAC,GAAG,WAAW,CAU5E;AAED;;GAEG;AACH,wBAAgB,gBAAgB,IAAI,IAAI,CAEvC;AAGD,YAAY,EAAE,cAAc,EAAE,cAAc,EAAE,SAAS,EAAE,CAAC;AAC1D,OAAO,EAAE,uBAAuB,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACjF,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC"}
@@ -0,0 +1,130 @@
1
+ /**
2
+ * Text Chunker Factory
3
+ * Provides unified interface for text chunking strategies
4
+ */
5
+ import { DEFAULT_CHUNKING_CONFIG, getChunkingConfig, } from "./ChunkingConfig.js";
6
+ import { chunkWithSentences, estimateTokens } from "./strategies/SentenceChunker.js";
7
+ import { chunkWithSlidingWindow } from "./strategies/SlidingWindowChunker.js";
8
+ /**
9
+ * Text Chunker - Factory for creating and using chunking strategies
10
+ */
11
+ export class TextChunker {
12
+ config;
13
+ constructor(config) {
14
+ // Merge with defaults and environment config
15
+ const envConfig = getChunkingConfig();
16
+ this.config = {
17
+ ...DEFAULT_CHUNKING_CONFIG,
18
+ ...envConfig,
19
+ ...config,
20
+ };
21
+ }
22
+ /**
23
+ * Get current configuration
24
+ */
25
+ getConfig() {
26
+ return { ...this.config };
27
+ }
28
+ /**
29
+ * Check if text needs chunking based on estimated token count
30
+ */
31
+ needsChunking(text) {
32
+ if (!this.config.enabled) {
33
+ return false;
34
+ }
35
+ const estimatedTokenCount = estimateTokens(text, this.config);
36
+ return estimatedTokenCount > this.config.chunkSize;
37
+ }
38
+ /**
39
+ * Estimate token count for text
40
+ */
41
+ estimateTokens(text) {
42
+ return estimateTokens(text, this.config);
43
+ }
44
+ /**
45
+ * Chunk text using configured strategy
46
+ */
47
+ chunk(text) {
48
+ // If chunking disabled, return single chunk
49
+ if (!this.config.enabled) {
50
+ return {
51
+ originalLength: text.length,
52
+ wasChunked: false,
53
+ chunks: [
54
+ {
55
+ content: text,
56
+ index: 0,
57
+ totalChunks: 1,
58
+ startOffset: 0,
59
+ endOffset: text.length,
60
+ estimatedTokens: estimateTokens(text, this.config),
61
+ strategy: this.config.strategy,
62
+ },
63
+ ],
64
+ strategy: this.config.strategy,
65
+ estimatedTotalTokens: estimateTokens(text, this.config),
66
+ };
67
+ }
68
+ // Select strategy based on configuration
69
+ switch (this.config.strategy) {
70
+ case "sentence":
71
+ return chunkWithSentences(text, this.config);
72
+ case "sliding_window":
73
+ return chunkWithSlidingWindow(text, this.config);
74
+ case "paragraph":
75
+ // Fall back to sentence chunking for now
76
+ // Paragraph chunking would split at \n\n boundaries
77
+ return chunkWithSentences(text, this.config);
78
+ default:
79
+ // Default to sentence chunking
80
+ return chunkWithSentences(text, this.config);
81
+ }
82
+ }
83
+ /**
84
+ * Chunk multiple texts in batch
85
+ */
86
+ chunkBatch(texts) {
87
+ return texts.map((text) => this.chunk(text));
88
+ }
89
+ /**
90
+ * Flatten chunks from multiple texts into a single array with source tracking
91
+ */
92
+ chunkBatchFlat(texts) {
93
+ const results = [];
94
+ for (const { id, content } of texts) {
95
+ const result = this.chunk(content);
96
+ for (const chunk of result.chunks) {
97
+ results.push({
98
+ ...chunk,
99
+ sourceId: id,
100
+ });
101
+ }
102
+ }
103
+ return results;
104
+ }
105
+ }
106
+ /**
107
+ * Global chunker instance with default config
108
+ */
109
+ let defaultChunker = null;
110
+ /**
111
+ * Get or create global chunker instance
112
+ */
113
+ export function getTextChunker(config) {
114
+ if (config) {
115
+ return new TextChunker(config);
116
+ }
117
+ if (!defaultChunker) {
118
+ defaultChunker = new TextChunker();
119
+ }
120
+ return defaultChunker;
121
+ }
122
+ /**
123
+ * Reset global chunker (useful for testing)
124
+ */
125
+ export function resetTextChunker() {
126
+ defaultChunker = null;
127
+ }
128
+ export { DEFAULT_CHUNKING_CONFIG, getChunkingConfig } from "./ChunkingConfig.js";
129
+ export { estimateTokens } from "./strategies/SentenceChunker.js";
130
+ //# sourceMappingURL=TextChunker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"TextChunker.js","sourceRoot":"","sources":["../../src/chunking/TextChunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAOH,OAAO,EACL,uBAAuB,EACvB,iBAAiB,GAClB,MAAM,qBAAqB,CAAC;AAC7B,OAAO,EAAE,kBAAkB,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC;AACrF,OAAO,EAAE,sBAAsB,EAAE,MAAM,sCAAsC,CAAC;AAS9E;;GAEG;AACH,MAAM,OAAO,WAAW;IACd,MAAM,CAAiB;IAE/B,YAAY,MAAgC;QAC1C,6CAA6C;QAC7C,MAAM,SAAS,GAAG,iBAAiB,EAAE,CAAC;QACtC,IAAI,CAAC,MAAM,GAAG;YACZ,GAAG,uBAAuB;YAC1B,GAAG,SAAS;YACZ,GAAG,MAAM;SACV,CAAC;IACJ,CAAC;IAED;;OAEG;IACH,SAAS;QACP,OAAO,EAAE,GAAG,IAAI,CAAC,MAAM,EAAE,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,aAAa,CAAC,IAAY;QACxB,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACzB,OAAO,KAAK,CAAC;QACf,CAAC;QAED,MAAM,mBAAmB,GAAG,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QAC9D,OAAO,mBAAmB,GAAG,IAAI,CAAC,MAAM,CAAC,SAAS,CAAC;IACrD,CAAC;IAED;;OAEG;IACH,cAAc,CAAC,IAAY;QACzB,OAAO,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;IAC3C,CAAC;IAED;;OAEG;IACH,KAAK,CAAC,IAAY;QAChB,4CAA4C;QAC5C,IAAI,CAAC,IAAI,CAAC,MAAM,CAAC,OAAO,EAAE,CAAC;YACzB,OAAO;gBACL,cAAc,EAAE,IAAI,CAAC,MAAM;gBAC3B,UAAU,EAAE,KAAK;gBACjB,MAAM,EAAE;oBACN;wBACE,OAAO,EAAE,IAAI;wBACb,KAAK,EAAE,CAAC;wBACR,WAAW,EAAE,CAAC;wBACd,WAAW,EAAE,CAAC;wBACd,SAAS,EAAE,IAAI,CAAC,MAAM;wBACtB,eAAe,EAAE,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC;wBAClD,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ;qBAC/B;iBACF;gBACD,QAAQ,EAAE,IAAI,CAAC,MAAM,CAAC,QAAQ;gBAC9B,oBAAoB,EAAE,cAAc,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC;aACxD,CAAC;QACJ,CAAC;QAED,yCAAyC;QACzC,QAAQ,IAAI,CAAC,MAAM,CAAC,QAAQ,EAAE,CAAC;YAC7B,KAAK,UAAU;gBACb,OAAO,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YAE/C,KAAK,gBAAgB;gBACnB,OAAO,sBAAsB,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YAEnD,KAAK,WAAW;gBACd,yCAAyC;gBACzC,oDAAoD;gBACpD,OAAO,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;YAE/C;gBACE,+BAA+B;gBAC/B,OAAO,kBAAkB,CAAC,IAAI,EAAE,IAAI,CAAC,MAAM,CAAC,CAAC;QACjD,CAAC;IACH,CAAC;IAED;;OAEG;IACH,UAAU,CAAC,KAAe;QACxB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC;IAC/C,CAAC;IAED;;OAEG;IACH,cAAc,CACZ,KAAsD;QAEtD,MAAM,OAAO,GAAqD,EAAE,CAAC;QAErE,KAAK,MAAM,EAAE,EAAE,EAAE,OAAO,EAAE,IAAI,KAAK,EAAE,CAAC;YACpC,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YACnC,KAAK,MAAM,KAAK,IAAI,MAAM,CAAC,MAAM,EAAE,CAAC;gBAClC,OAAO,CAAC,IAAI,CAAC;oBACX,GAAG,KAAK;oBACR,QAAQ,EAAE,EAAE;iBACb,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,OAAO,OAAO,CAAC;IACjB,CAAC;CACF;AAED;;GAEG;AACH,IAAI,cAAc,GAAuB,IAAI,CAAC;AAE9C;;GAEG;AACH,MAAM,UAAU,cAAc,CAAC,MAAgC;IAC7D,IAAI,MAAM,EAAE,CAAC;QACX,OAAO,IAAI,WAAW,CAAC,MAAM,CAAC,CAAC;IACjC,CAAC;IAED,IAAI,CAAC,cAAc,EAAE,CAAC;QACpB,cAAc,GAAG,IAAI,WAAW,EAAE,CAAC;IACrC,CAAC;IAED,OAAO,cAAc,CAAC;AACxB,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,gBAAgB;IAC9B,cAAc,GAAG,IAAI,CAAC;AACxB,CAAC;AAID,OAAO,EAAE,uBAAuB,EAAE,iBAAiB,EAAE,MAAM,qBAAqB,CAAC;AACjF,OAAO,EAAE,cAAc,EAAE,MAAM,iCAAiC,CAAC"}
@@ -0,0 +1,9 @@
1
+ /**
2
+ * Text Chunking Module
3
+ * Provides text chunking strategies for handling long messages that exceed embedding model limits
4
+ */
5
+ export { TextChunker, getTextChunker, resetTextChunker, estimateTokens, DEFAULT_CHUNKING_CONFIG, getChunkingConfig, } from "./TextChunker.js";
6
+ export type { ChunkingConfig, ChunkingResult, TextChunk, ChunkingStrategy as ChunkingStrategyType, } from "./ChunkingConfig.js";
7
+ export { chunkWithSentences } from "./strategies/SentenceChunker.js";
8
+ export { chunkWithSlidingWindow } from "./strategies/SlidingWindowChunker.js";
9
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/chunking/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,WAAW,EACX,cAAc,EACd,gBAAgB,EAChB,cAAc,EACd,uBAAuB,EACvB,iBAAiB,GAClB,MAAM,kBAAkB,CAAC;AAE1B,YAAY,EACV,cAAc,EACd,cAAc,EACd,SAAS,EACT,gBAAgB,IAAI,oBAAoB,GACzC,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,OAAO,EAAE,sBAAsB,EAAE,MAAM,sCAAsC,CAAC"}
@@ -0,0 +1,8 @@
1
+ /**
2
+ * Text Chunking Module
3
+ * Provides text chunking strategies for handling long messages that exceed embedding model limits
4
+ */
5
+ export { TextChunker, getTextChunker, resetTextChunker, estimateTokens, DEFAULT_CHUNKING_CONFIG, getChunkingConfig, } from "./TextChunker.js";
6
+ export { chunkWithSentences } from "./strategies/SentenceChunker.js";
7
+ export { chunkWithSlidingWindow } from "./strategies/SlidingWindowChunker.js";
8
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../../src/chunking/index.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,WAAW,EACX,cAAc,EACd,gBAAgB,EAChB,cAAc,EACd,uBAAuB,EACvB,iBAAiB,GAClB,MAAM,kBAAkB,CAAC;AAS1B,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,OAAO,EAAE,sBAAsB,EAAE,MAAM,sCAAsC,CAAC"}
@@ -0,0 +1,19 @@
1
+ /**
2
+ * Sentence-Aware Text Chunker
3
+ * Splits text at sentence boundaries while respecting code blocks and paragraphs
4
+ */
5
+ import type { ChunkingConfig, ChunkingResult } from "../ChunkingConfig.js";
6
+ /**
7
+ * Estimate token count using character ratios
8
+ */
9
+ declare function estimateTokens(text: string, config: ChunkingConfig): number;
10
+ /**
11
+ * Detect if text contains code patterns
12
+ */
13
+ declare function isCodeLike(text: string): boolean;
14
+ /**
15
+ * Chunk text using sentence-aware strategy
16
+ */
17
+ export declare function chunkWithSentences(text: string, config: ChunkingConfig): ChunkingResult;
18
+ export { estimateTokens, isCodeLike };
19
+ //# sourceMappingURL=SentenceChunker.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"SentenceChunker.d.ts","sourceRoot":"","sources":["../../../src/chunking/strategies/SentenceChunker.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAa,cAAc,EAAE,MAAM,sBAAsB,CAAC;AAEtF;;GAEG;AACH,iBAAS,cAAc,CAAC,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,cAAc,GAAG,MAAM,CAWpE;AAED;;GAEG;AACH,iBAAS,UAAU,CAAC,IAAI,EAAE,MAAM,GAAG,OAAO,CAczC;AAuND;;GAEG;AACH,wBAAgB,kBAAkB,CAChC,IAAI,EAAE,MAAM,EACZ,MAAM,EAAE,cAAc,GACrB,cAAc,CAqChB;AAED,OAAO,EAAE,cAAc,EAAE,UAAU,EAAE,CAAC"}