@agentionai/agents 0.3.0-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +517 -0
  2. package/dist/agents/Agent.d.ts +29 -0
  3. package/dist/agents/Agent.js +28 -0
  4. package/dist/agents/AgentConfig.d.ts +118 -0
  5. package/dist/agents/AgentConfig.js +3 -0
  6. package/dist/agents/AgentEvent.d.ts +18 -0
  7. package/dist/agents/AgentEvent.js +26 -0
  8. package/dist/agents/BaseAgent.d.ts +82 -0
  9. package/dist/agents/BaseAgent.js +121 -0
  10. package/dist/agents/anthropic/ClaudeAgent.d.ts +46 -0
  11. package/dist/agents/anthropic/ClaudeAgent.js +262 -0
  12. package/dist/agents/errors/AgentError.d.ts +47 -0
  13. package/dist/agents/errors/AgentError.js +74 -0
  14. package/dist/agents/google/GeminiAgent.d.ts +63 -0
  15. package/dist/agents/google/GeminiAgent.js +395 -0
  16. package/dist/agents/mistral/MistralAgent.d.ts +47 -0
  17. package/dist/agents/mistral/MistralAgent.js +313 -0
  18. package/dist/agents/model-types.d.ts +30 -0
  19. package/dist/agents/model-types.js +8 -0
  20. package/dist/agents/openai/OpenAiAgent.d.ts +48 -0
  21. package/dist/agents/openai/OpenAiAgent.js +338 -0
  22. package/dist/chunkers/Chunker.d.ts +53 -0
  23. package/dist/chunkers/Chunker.js +174 -0
  24. package/dist/chunkers/RecursiveChunker.d.ts +52 -0
  25. package/dist/chunkers/RecursiveChunker.js +166 -0
  26. package/dist/chunkers/TextChunker.d.ts +27 -0
  27. package/dist/chunkers/TextChunker.js +50 -0
  28. package/dist/chunkers/TokenChunker.d.ts +60 -0
  29. package/dist/chunkers/TokenChunker.js +176 -0
  30. package/dist/chunkers/index.d.ts +6 -0
  31. package/dist/chunkers/index.js +14 -0
  32. package/dist/chunkers/types.d.ts +95 -0
  33. package/dist/chunkers/types.js +3 -0
  34. package/dist/graph/AgentGraph.d.ts +99 -0
  35. package/dist/graph/AgentGraph.js +115 -0
  36. package/dist/graph/BaseExecutor.d.ts +86 -0
  37. package/dist/graph/BaseExecutor.js +61 -0
  38. package/dist/graph/GraphMetrics.d.ts +143 -0
  39. package/dist/graph/GraphMetrics.js +264 -0
  40. package/dist/graph/MapExecutor.d.ts +39 -0
  41. package/dist/graph/MapExecutor.js +123 -0
  42. package/dist/graph/ParallelExecutor.d.ts +51 -0
  43. package/dist/graph/ParallelExecutor.js +103 -0
  44. package/dist/graph/Pipeline.d.ts +44 -0
  45. package/dist/graph/Pipeline.js +109 -0
  46. package/dist/graph/RouterExecutor.d.ts +89 -0
  47. package/dist/graph/RouterExecutor.js +209 -0
  48. package/dist/graph/SequentialExecutor.d.ts +44 -0
  49. package/dist/graph/SequentialExecutor.js +115 -0
  50. package/dist/graph/VotingSystem.d.ts +54 -0
  51. package/dist/graph/VotingSystem.js +106 -0
  52. package/dist/history/History.d.ts +107 -0
  53. package/dist/history/History.js +166 -0
  54. package/dist/history/RedisHistory.d.ts +27 -0
  55. package/dist/history/RedisHistory.js +55 -0
  56. package/dist/history/transformers.d.ts +102 -0
  57. package/dist/history/transformers.js +415 -0
  58. package/dist/history/types.d.ts +130 -0
  59. package/dist/history/types.js +55 -0
  60. package/dist/index.d.ts +16 -0
  61. package/dist/index.js +48 -0
  62. package/dist/ingestion/IngestionPipeline.d.ts +86 -0
  63. package/dist/ingestion/IngestionPipeline.js +266 -0
  64. package/dist/ingestion/index.d.ts +3 -0
  65. package/dist/ingestion/index.js +7 -0
  66. package/dist/ingestion/types.d.ts +74 -0
  67. package/dist/ingestion/types.js +3 -0
  68. package/dist/team/Team.d.ts +46 -0
  69. package/dist/team/Team.js +104 -0
  70. package/dist/tools/Tool.d.ts +75 -0
  71. package/dist/tools/Tool.js +137 -0
  72. package/dist/vectorstore/Embeddings.d.ts +67 -0
  73. package/dist/vectorstore/Embeddings.js +54 -0
  74. package/dist/vectorstore/LanceDBVectorStore.d.ts +149 -0
  75. package/dist/vectorstore/LanceDBVectorStore.js +338 -0
  76. package/dist/vectorstore/OpenAIEmbeddings.d.ts +45 -0
  77. package/dist/vectorstore/OpenAIEmbeddings.js +109 -0
  78. package/dist/vectorstore/VectorStore.d.ts +255 -0
  79. package/dist/vectorstore/VectorStore.js +216 -0
  80. package/dist/vectorstore/index.d.ts +28 -0
  81. package/dist/vectorstore/index.js +35 -0
  82. package/dist/viz/VizConfig.d.ts +54 -0
  83. package/dist/viz/VizConfig.js +100 -0
  84. package/dist/viz/VizReporter.d.ts +127 -0
  85. package/dist/viz/VizReporter.js +595 -0
  86. package/dist/viz/index.d.ts +31 -0
  87. package/dist/viz/index.js +51 -0
  88. package/dist/viz/types.d.ts +105 -0
  89. package/dist/viz/types.js +7 -0
  90. package/package.json +109 -0
  91. package/readme.md +1 -0
@@ -0,0 +1,166 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.RecursiveChunker = void 0;
4
+ const Chunker_1 = require("./Chunker");
5
+ /**
6
+ * Recursive text chunker that tries to split on semantic boundaries.
7
+ * It attempts to split by larger separators first (paragraphs), then
8
+ * falls back to smaller ones (sentences, words) to keep semantic units together.
9
+ *
10
+ * @example
11
+ * ```typescript
12
+ * const chunker = new RecursiveChunker({
13
+ * chunkSize: 1000,
14
+ * chunkOverlap: 100,
15
+ * separators: ["\n\n", "\n", ". ", " "],
16
+ * });
17
+ *
18
+ * const chunks = await chunker.chunk(document);
19
+ * ```
20
+ */
21
+ class RecursiveChunker extends Chunker_1.Chunker {
22
+ constructor(config) {
23
+ super(config);
24
+ this.name = "RecursiveChunker";
25
+ this.separators = config.separators ?? ["\n\n", "\n", ". ", " "];
26
+ }
27
+ /**
28
+ * Split text recursively using the separator hierarchy.
29
+ */
30
+ splitText(text) {
31
+ return this.recursiveSplit(text, 0);
32
+ }
33
+ /**
34
+ * Recursively split text using separators at the given index.
35
+ */
36
+ recursiveSplit(text, separatorIndex) {
37
+ const { chunkSize, chunkOverlap = 0 } = this.config;
38
+ // Base case: text fits in one chunk
39
+ if (text.length <= chunkSize) {
40
+ return text.trim() ? [text] : [];
41
+ }
42
+ // No more separators: force split by character
43
+ if (separatorIndex >= this.separators.length) {
44
+ return this.forceSplit(text);
45
+ }
46
+ const separator = this.separators[separatorIndex];
47
+ const parts = this.splitBySeparator(text, separator);
48
+ // If separator didn't help, try the next one
49
+ if (parts.length <= 1) {
50
+ return this.recursiveSplit(text, separatorIndex + 1);
51
+ }
52
+ // Merge parts into chunks respecting size limit
53
+ const chunks = [];
54
+ let currentChunk = "";
55
+ for (const part of parts) {
56
+ const partWithSep = currentChunk ? separator + part : part;
57
+ const wouldBeLength = currentChunk.length + partWithSep.length;
58
+ if (wouldBeLength <= chunkSize) {
59
+ // Part fits in current chunk
60
+ currentChunk = currentChunk ? currentChunk + separator + part : part;
61
+ }
62
+ else {
63
+ // Save current chunk if it has content
64
+ if (currentChunk.trim()) {
65
+ chunks.push(currentChunk);
66
+ }
67
+ // Check if part itself is too big
68
+ if (part.length > chunkSize) {
69
+ // Recursively split the oversized part
70
+ const subChunks = this.recursiveSplit(part, separatorIndex + 1);
71
+ chunks.push(...subChunks);
72
+ currentChunk = "";
73
+ }
74
+ else {
75
+ currentChunk = part;
76
+ }
77
+ }
78
+ }
79
+ // Don't forget the last chunk
80
+ if (currentChunk.trim()) {
81
+ chunks.push(currentChunk);
82
+ }
83
+ // Apply overlap if configured
84
+ if (chunkOverlap > 0 && chunks.length > 1) {
85
+ return this.applyOverlap(chunks, separator);
86
+ }
87
+ return chunks;
88
+ }
89
+ /**
90
+ * Split text by separator, keeping the parts clean.
91
+ */
92
+ splitBySeparator(text, separator) {
93
+ if (separator === ". ") {
94
+ // Special handling for sentence boundaries - keep the period
95
+ return text.split(/(?<=\.)\s+/).filter((p) => p.trim());
96
+ }
97
+ return text.split(separator).filter((p) => p.trim());
98
+ }
99
+ /**
100
+ * Force split text by character count when no separator works.
101
+ */
102
+ forceSplit(text) {
103
+ const { chunkSize, chunkOverlap = 0 } = this.config;
104
+ const chunks = [];
105
+ const step = chunkSize - chunkOverlap;
106
+ let start = 0;
107
+ while (start < text.length) {
108
+ const end = Math.min(start + chunkSize, text.length);
109
+ const chunk = text.slice(start, end);
110
+ if (chunk.trim()) {
111
+ chunks.push(chunk);
112
+ }
113
+ if (end >= text.length)
114
+ break;
115
+ start += step;
116
+ }
117
+ return chunks;
118
+ }
119
+ /**
120
+ * Apply overlap between chunks by prepending context from previous chunk.
121
+ */
122
+ applyOverlap(chunks, separator) {
123
+ const { chunkOverlap = 0 } = this.config;
124
+ if (chunkOverlap === 0 || chunks.length <= 1) {
125
+ return chunks;
126
+ }
127
+ const result = [chunks[0]];
128
+ for (let i = 1; i < chunks.length; i++) {
129
+ const prevChunk = chunks[i - 1];
130
+ const currentChunk = chunks[i];
131
+ // Get overlap from end of previous chunk
132
+ const overlapText = this.getOverlapText(prevChunk, chunkOverlap, separator);
133
+ if (overlapText) {
134
+ result.push(overlapText + separator + currentChunk);
135
+ }
136
+ else {
137
+ result.push(currentChunk);
138
+ }
139
+ }
140
+ return result;
141
+ }
142
+ /**
143
+ * Extract overlap text from the end of a chunk, trying to break at separator.
144
+ */
145
+ getOverlapText(text, overlapSize, separator) {
146
+ if (text.length <= overlapSize) {
147
+ return text;
148
+ }
149
+ // Try to find a clean break point near the overlap size
150
+ const overlapStart = text.length - overlapSize;
151
+ const sepIndex = text.indexOf(separator, overlapStart);
152
+ if (sepIndex !== -1 && sepIndex < text.length - 1) {
153
+ return text.slice(sepIndex + separator.length);
154
+ }
155
+ // Fall back to exact character overlap
156
+ return text.slice(overlapStart);
157
+ }
158
+ /**
159
+ * Get the configured separators.
160
+ */
161
+ getSeparators() {
162
+ return [...this.separators];
163
+ }
164
+ }
165
+ exports.RecursiveChunker = RecursiveChunker;
166
+ //# sourceMappingURL=RecursiveChunker.js.map
@@ -0,0 +1,27 @@
1
+ import { Chunker } from "./Chunker";
2
+ import { ChunkerConfig } from "./types";
3
+ /**
4
+ * Simple text chunker that splits by character count with optional overlap.
5
+ *
6
+ * @example
7
+ * ```typescript
8
+ * const chunker = new TextChunker({
9
+ * chunkSize: 1000,
10
+ * chunkOverlap: 200,
11
+ * });
12
+ *
13
+ * const chunks = await chunker.chunk(longText, {
14
+ * sourceId: 'doc-123',
15
+ * sourcePath: '/docs/readme.md',
16
+ * });
17
+ * ```
18
+ */
19
+ export declare class TextChunker extends Chunker {
20
+ readonly name = "TextChunker";
21
+ constructor(config: ChunkerConfig);
22
+ /**
23
+ * Split text by character count with overlap.
24
+ */
25
+ protected splitText(text: string): string[];
26
+ }
27
+ //# sourceMappingURL=TextChunker.d.ts.map
@@ -0,0 +1,50 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.TextChunker = void 0;
4
+ const Chunker_1 = require("./Chunker");
5
+ /**
6
+ * Simple text chunker that splits by character count with optional overlap.
7
+ *
8
+ * @example
9
+ * ```typescript
10
+ * const chunker = new TextChunker({
11
+ * chunkSize: 1000,
12
+ * chunkOverlap: 200,
13
+ * });
14
+ *
15
+ * const chunks = await chunker.chunk(longText, {
16
+ * sourceId: 'doc-123',
17
+ * sourcePath: '/docs/readme.md',
18
+ * });
19
+ * ```
20
+ */
21
+ class TextChunker extends Chunker_1.Chunker {
22
+ constructor(config) {
23
+ super(config);
24
+ this.name = "TextChunker";
25
+ }
26
+ /**
27
+ * Split text by character count with overlap.
28
+ */
29
+ splitText(text) {
30
+ const { chunkSize, chunkOverlap = 0 } = this.config;
31
+ const chunks = [];
32
+ if (text.length <= chunkSize) {
33
+ return [text];
34
+ }
35
+ const step = chunkSize - chunkOverlap;
36
+ let start = 0;
37
+ while (start < text.length) {
38
+ const end = Math.min(start + chunkSize, text.length);
39
+ chunks.push(text.slice(start, end));
40
+ // If we've reached the end, stop
41
+ if (end >= text.length) {
42
+ break;
43
+ }
44
+ start += step;
45
+ }
46
+ return chunks;
47
+ }
48
+ }
49
+ exports.TextChunker = TextChunker;
50
+ //# sourceMappingURL=TextChunker.js.map
@@ -0,0 +1,60 @@
1
+ import { Chunker } from "./Chunker";
2
+ import { Chunk, TokenChunkerConfig } from "./types";
3
+ /**
4
+ * Load tokenx module using dynamic import.
5
+ * This function can be mocked in tests.
6
+ * @internal
7
+ */
8
+ export declare function loadTokenx(): Promise<typeof import("tokenx")>;
9
+ /**
10
+ * Reset the tokenx module cache. Used in tests.
11
+ * @internal
12
+ */
13
+ export declare function resetTokenxCache(): void;
14
+ /**
15
+ * Token-aware text chunker using the tokenx library.
16
+ * Splits text based on token count rather than character count,
17
+ * ensuring chunks fit within LLM token limits.
18
+ *
19
+ * Uses tokenx for fast token estimation (~96% accuracy, ~2kB).
20
+ *
21
+ * @example
22
+ * ```typescript
23
+ * const chunker = new TokenChunker({
24
+ * chunkSize: 500, // 500 tokens per chunk
25
+ * chunkOverlap: 50, // 50 token overlap
26
+ * });
27
+ *
28
+ * const chunks = await chunker.chunk(longDocument);
29
+ * // Each chunk.metadata.tokenCount contains estimated tokens
30
+ * ```
31
+ */
32
+ export declare class TokenChunker extends Chunker {
33
+ readonly name = "TokenChunker";
34
+ constructor(config: TokenChunkerConfig);
35
+ /**
36
+ * Protected method to get tokenx - can be overridden in tests
37
+ */
38
+ protected getTokenx(): Promise<typeof import("tokenx")>;
39
+ /**
40
+ * Split text by token count using tokenx.
41
+ */
42
+ protected splitText(text: string): Promise<string[]>;
43
+ /**
44
+ * Apply token-based overlap between chunks.
45
+ */
46
+ private applyTokenOverlap;
47
+ /**
48
+ * Get approximately chunkOverlap tokens from the end of text.
49
+ */
50
+ private getTokenOverlap;
51
+ /**
52
+ * Override chunk to add token count to metadata.
53
+ */
54
+ chunk(text: string, options?: import("./types").ChunkOptions): Promise<Chunk[]>;
55
+ /**
56
+ * Estimate token count for a given text.
57
+ */
58
+ static estimateTokens(text: string): Promise<number>;
59
+ }
60
+ //# sourceMappingURL=TokenChunker.d.ts.map
@@ -0,0 +1,176 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
+ Object.defineProperty(o, "default", { enumerable: true, value: v });
15
+ }) : function(o, v) {
16
+ o["default"] = v;
17
+ });
18
+ var __importStar = (this && this.__importStar) || (function () {
19
+ var ownKeys = function(o) {
20
+ ownKeys = Object.getOwnPropertyNames || function (o) {
21
+ var ar = [];
22
+ for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
+ return ar;
24
+ };
25
+ return ownKeys(o);
26
+ };
27
+ return function (mod) {
28
+ if (mod && mod.__esModule) return mod;
29
+ var result = {};
30
+ if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
+ __setModuleDefault(result, mod);
32
+ return result;
33
+ };
34
+ })();
35
+ Object.defineProperty(exports, "__esModule", { value: true });
36
+ exports.TokenChunker = void 0;
37
+ exports.loadTokenx = loadTokenx;
38
+ exports.resetTokenxCache = resetTokenxCache;
39
+ const Chunker_1 = require("./Chunker");
40
+ // Cache the tokenx module after first import
41
+ let tokenxModule = null;
42
+ /**
43
+ * Load tokenx module using dynamic import.
44
+ * This function can be mocked in tests.
45
+ * @internal
46
+ */
47
+ async function loadTokenx() {
48
+ if (!tokenxModule) {
49
+ // Use dynamic import for ESM module
50
+ tokenxModule = await Promise.resolve().then(() => __importStar(require("tokenx")));
51
+ }
52
+ return tokenxModule;
53
+ }
54
+ /**
55
+ * Reset the tokenx module cache. Used in tests.
56
+ * @internal
57
+ */
58
+ function resetTokenxCache() {
59
+ tokenxModule = null;
60
+ }
61
+ /**
62
+ * Token-aware text chunker using the tokenx library.
63
+ * Splits text based on token count rather than character count,
64
+ * ensuring chunks fit within LLM token limits.
65
+ *
66
+ * Uses tokenx for fast token estimation (~96% accuracy, ~2kB).
67
+ *
68
+ * @example
69
+ * ```typescript
70
+ * const chunker = new TokenChunker({
71
+ * chunkSize: 500, // 500 tokens per chunk
72
+ * chunkOverlap: 50, // 50 token overlap
73
+ * });
74
+ *
75
+ * const chunks = await chunker.chunk(longDocument);
76
+ * // Each chunk.metadata.tokenCount contains estimated tokens
77
+ * ```
78
+ */
79
+ class TokenChunker extends Chunker_1.Chunker {
80
+ constructor(config) {
81
+ super(config);
82
+ this.name = "TokenChunker";
83
+ }
84
+ /**
85
+ * Protected method to get tokenx - can be overridden in tests
86
+ */
87
+ async getTokenx() {
88
+ return loadTokenx();
89
+ }
90
+ /**
91
+ * Split text by token count using tokenx.
92
+ */
93
+ async splitText(text) {
94
+ const { chunkSize, chunkOverlap = 0 } = this.config;
95
+ const tokenx = await this.getTokenx();
96
+ const { splitByTokens } = tokenx;
97
+ // Use tokenx's splitByTokens for token-aware splitting
98
+ const chunks = splitByTokens(text, chunkSize);
99
+ // Apply overlap if configured
100
+ if (chunkOverlap > 0 && chunks.length > 1) {
101
+ return this.applyTokenOverlap(chunks, text);
102
+ }
103
+ return chunks;
104
+ }
105
+ /**
106
+ * Apply token-based overlap between chunks.
107
+ */
108
+ async applyTokenOverlap(chunks, _originalText) {
109
+ const { chunkOverlap = 0 } = this.config;
110
+ const result = [chunks[0]];
111
+ for (let i = 1; i < chunks.length; i++) {
112
+ const prevChunk = chunks[i - 1];
113
+ const currentChunk = chunks[i];
114
+ // Get overlap from end of previous chunk
115
+ const overlapText = await this.getTokenOverlap(prevChunk, chunkOverlap);
116
+ if (overlapText && overlapText.trim()) {
117
+ result.push(overlapText + " " + currentChunk);
118
+ }
119
+ else {
120
+ result.push(currentChunk);
121
+ }
122
+ }
123
+ return result;
124
+ }
125
+ /**
126
+ * Get approximately chunkOverlap tokens from the end of text.
127
+ */
128
+ async getTokenOverlap(text, overlapTokens) {
129
+ const tokenx = await this.getTokenx();
130
+ const { estimateTokenCount } = tokenx;
131
+ // Estimate characters per token (roughly 4 chars per token for English)
132
+ const estimatedChars = overlapTokens * 4;
133
+ if (text.length <= estimatedChars) {
134
+ return text;
135
+ }
136
+ // Start from estimated position and find a word boundary
137
+ let start = text.length - estimatedChars;
138
+ // Find the next space to start at a word boundary
139
+ const spaceIndex = text.indexOf(" ", start);
140
+ if (spaceIndex !== -1 && spaceIndex < text.length - 1) {
141
+ start = spaceIndex + 1;
142
+ }
143
+ const overlap = text.slice(start);
144
+ // Verify we're close to the target token count
145
+ const actualTokens = estimateTokenCount(overlap);
146
+ if (actualTokens > overlapTokens * 1.5) {
147
+ // Too many tokens, trim more aggressively
148
+ const words = overlap.split(/\s+/);
149
+ const targetWords = Math.ceil(words.length * (overlapTokens / actualTokens));
150
+ return words.slice(-targetWords).join(" ");
151
+ }
152
+ return overlap;
153
+ }
154
+ /**
155
+ * Override chunk to add token count to metadata.
156
+ */
157
+ async chunk(text, options) {
158
+ const chunks = await super.chunk(text, options);
159
+ const tokenx = await this.getTokenx();
160
+ const { estimateTokenCount } = tokenx;
161
+ // Add token count to each chunk's metadata
162
+ for (const chunk of chunks) {
163
+ chunk.metadata.tokenCount = estimateTokenCount(chunk.content);
164
+ }
165
+ return chunks;
166
+ }
167
+ /**
168
+ * Estimate token count for a given text.
169
+ */
170
+ static async estimateTokens(text) {
171
+ const tokenx = await loadTokenx();
172
+ return tokenx.estimateTokenCount(text);
173
+ }
174
+ }
175
+ exports.TokenChunker = TokenChunker;
176
+ //# sourceMappingURL=TokenChunker.js.map
@@ -0,0 +1,6 @@
1
+ export { Chunk, ChunkMetadata, ChunkerConfig, ChunkOptions, RecursiveChunkerConfig, TokenChunkerConfig, } from "./types";
2
+ export { Chunker } from "./Chunker";
3
+ export { TextChunker } from "./TextChunker";
4
+ export { RecursiveChunker } from "./RecursiveChunker";
5
+ export { TokenChunker } from "./TokenChunker";
6
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1,14 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.TokenChunker = exports.RecursiveChunker = exports.TextChunker = exports.Chunker = void 0;
4
+ // Base class
5
+ var Chunker_1 = require("./Chunker");
6
+ Object.defineProperty(exports, "Chunker", { enumerable: true, get: function () { return Chunker_1.Chunker; } });
7
+ // Implementations
8
+ var TextChunker_1 = require("./TextChunker");
9
+ Object.defineProperty(exports, "TextChunker", { enumerable: true, get: function () { return TextChunker_1.TextChunker; } });
10
+ var RecursiveChunker_1 = require("./RecursiveChunker");
11
+ Object.defineProperty(exports, "RecursiveChunker", { enumerable: true, get: function () { return RecursiveChunker_1.RecursiveChunker; } });
12
+ var TokenChunker_1 = require("./TokenChunker");
13
+ Object.defineProperty(exports, "TokenChunker", { enumerable: true, get: function () { return TokenChunker_1.TokenChunker; } });
14
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1,95 @@
1
+ /**
2
+ * Represents a chunk of text with metadata for tracking and linking.
3
+ */
4
+ export interface Chunk {
5
+ /** Unique identifier for this chunk */
6
+ id: string;
7
+ /** The text content of the chunk */
8
+ content: string;
9
+ /** Metadata about the chunk */
10
+ metadata: ChunkMetadata;
11
+ }
12
+ /**
13
+ * Metadata associated with each chunk.
14
+ */
15
+ export interface ChunkMetadata {
16
+ /** Zero-based index of this chunk in the sequence */
17
+ chunkIndex: number;
18
+ /** Total number of chunks in the sequence */
19
+ totalChunks: number;
20
+ /** ID of the previous chunk, or null if first */
21
+ previousChunkId: string | null;
22
+ /** ID of the next chunk, or null if last */
23
+ nextChunkId: string | null;
24
+ /** Character offset where this chunk starts in the source text */
25
+ startOffset: number;
26
+ /** Character offset where this chunk ends in the source text */
27
+ endOffset: number;
28
+ /** Optional identifier for the source document */
29
+ sourceId?: string;
30
+ /** Optional path to the source file */
31
+ sourcePath?: string;
32
+ /** Number of characters in the chunk content */
33
+ charCount: number;
34
+ /** Estimated number of tokens (when available) */
35
+ tokenCount?: number;
36
+ /** SHA-256 hash of the content for deduplication */
37
+ hash: string;
38
+ /** Section title if detected (e.g., markdown headers) */
39
+ sectionTitle?: string;
40
+ [key: string]: unknown;
41
+ }
42
+ /**
43
+ * Configuration for creating a chunker.
44
+ */
45
+ export interface ChunkerConfig {
46
+ /** Target size for each chunk (in characters or tokens depending on chunker) */
47
+ chunkSize: number;
48
+ /** Number of characters/tokens to overlap between chunks (default: 0) */
49
+ chunkOverlap?: number;
50
+ /**
51
+ * Optional processor function applied to each chunk.
52
+ * Can modify the chunk or return null to filter it out.
53
+ */
54
+ chunkProcessor?: (chunk: Chunk, index: number, all: Chunk[]) => Chunk | null | Promise<Chunk | null>;
55
+ /**
56
+ * Custom ID generator function.
57
+ * @param content - The chunk content
58
+ * @param index - The chunk index
59
+ * @param sourceId - Optional source document ID
60
+ * @returns A unique ID for the chunk
61
+ */
62
+ idGenerator?: (content: string, index: number, sourceId?: string) => string;
63
+ }
64
+ /**
65
+ * Options passed when chunking text.
66
+ */
67
+ export interface ChunkOptions {
68
+ /** Identifier for the source document */
69
+ sourceId?: string;
70
+ /** Path to the source file */
71
+ sourcePath?: string;
72
+ /** Additional metadata to merge into each chunk */
73
+ metadata?: Record<string, unknown>;
74
+ }
75
+ /**
76
+ * Configuration specific to RecursiveChunker.
77
+ */
78
+ export interface RecursiveChunkerConfig extends ChunkerConfig {
79
+ /**
80
+ * Separators to try in order, from largest to smallest semantic unit.
81
+ * Default: ["\n\n", "\n", ". ", " "]
82
+ */
83
+ separators?: string[];
84
+ }
85
+ /**
86
+ * Configuration specific to TokenChunker.
87
+ */
88
+ export interface TokenChunkerConfig extends ChunkerConfig {
89
+ /**
90
+ * Chunk size is in tokens, not characters.
91
+ * Uses tokenx for estimation (~96% accuracy).
92
+ */
93
+ chunkSize: number;
94
+ }
95
+ //# sourceMappingURL=types.d.ts.map
@@ -0,0 +1,3 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ //# sourceMappingURL=types.js.map