@agentionai/agents 0.3.0-beta

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/README.md +517 -0
  2. package/dist/agents/Agent.d.ts +29 -0
  3. package/dist/agents/Agent.js +28 -0
  4. package/dist/agents/AgentConfig.d.ts +118 -0
  5. package/dist/agents/AgentConfig.js +3 -0
  6. package/dist/agents/AgentEvent.d.ts +18 -0
  7. package/dist/agents/AgentEvent.js +26 -0
  8. package/dist/agents/BaseAgent.d.ts +82 -0
  9. package/dist/agents/BaseAgent.js +121 -0
  10. package/dist/agents/anthropic/ClaudeAgent.d.ts +46 -0
  11. package/dist/agents/anthropic/ClaudeAgent.js +262 -0
  12. package/dist/agents/errors/AgentError.d.ts +47 -0
  13. package/dist/agents/errors/AgentError.js +74 -0
  14. package/dist/agents/google/GeminiAgent.d.ts +63 -0
  15. package/dist/agents/google/GeminiAgent.js +395 -0
  16. package/dist/agents/mistral/MistralAgent.d.ts +47 -0
  17. package/dist/agents/mistral/MistralAgent.js +313 -0
  18. package/dist/agents/model-types.d.ts +30 -0
  19. package/dist/agents/model-types.js +8 -0
  20. package/dist/agents/openai/OpenAiAgent.d.ts +48 -0
  21. package/dist/agents/openai/OpenAiAgent.js +338 -0
  22. package/dist/chunkers/Chunker.d.ts +53 -0
  23. package/dist/chunkers/Chunker.js +174 -0
  24. package/dist/chunkers/RecursiveChunker.d.ts +52 -0
  25. package/dist/chunkers/RecursiveChunker.js +166 -0
  26. package/dist/chunkers/TextChunker.d.ts +27 -0
  27. package/dist/chunkers/TextChunker.js +50 -0
  28. package/dist/chunkers/TokenChunker.d.ts +60 -0
  29. package/dist/chunkers/TokenChunker.js +176 -0
  30. package/dist/chunkers/index.d.ts +6 -0
  31. package/dist/chunkers/index.js +14 -0
  32. package/dist/chunkers/types.d.ts +95 -0
  33. package/dist/chunkers/types.js +3 -0
  34. package/dist/graph/AgentGraph.d.ts +99 -0
  35. package/dist/graph/AgentGraph.js +115 -0
  36. package/dist/graph/BaseExecutor.d.ts +86 -0
  37. package/dist/graph/BaseExecutor.js +61 -0
  38. package/dist/graph/GraphMetrics.d.ts +143 -0
  39. package/dist/graph/GraphMetrics.js +264 -0
  40. package/dist/graph/MapExecutor.d.ts +39 -0
  41. package/dist/graph/MapExecutor.js +123 -0
  42. package/dist/graph/ParallelExecutor.d.ts +51 -0
  43. package/dist/graph/ParallelExecutor.js +103 -0
  44. package/dist/graph/Pipeline.d.ts +44 -0
  45. package/dist/graph/Pipeline.js +109 -0
  46. package/dist/graph/RouterExecutor.d.ts +89 -0
  47. package/dist/graph/RouterExecutor.js +209 -0
  48. package/dist/graph/SequentialExecutor.d.ts +44 -0
  49. package/dist/graph/SequentialExecutor.js +115 -0
  50. package/dist/graph/VotingSystem.d.ts +54 -0
  51. package/dist/graph/VotingSystem.js +106 -0
  52. package/dist/history/History.d.ts +107 -0
  53. package/dist/history/History.js +166 -0
  54. package/dist/history/RedisHistory.d.ts +27 -0
  55. package/dist/history/RedisHistory.js +55 -0
  56. package/dist/history/transformers.d.ts +102 -0
  57. package/dist/history/transformers.js +415 -0
  58. package/dist/history/types.d.ts +130 -0
  59. package/dist/history/types.js +55 -0
  60. package/dist/index.d.ts +16 -0
  61. package/dist/index.js +48 -0
  62. package/dist/ingestion/IngestionPipeline.d.ts +86 -0
  63. package/dist/ingestion/IngestionPipeline.js +266 -0
  64. package/dist/ingestion/index.d.ts +3 -0
  65. package/dist/ingestion/index.js +7 -0
  66. package/dist/ingestion/types.d.ts +74 -0
  67. package/dist/ingestion/types.js +3 -0
  68. package/dist/team/Team.d.ts +46 -0
  69. package/dist/team/Team.js +104 -0
  70. package/dist/tools/Tool.d.ts +75 -0
  71. package/dist/tools/Tool.js +137 -0
  72. package/dist/vectorstore/Embeddings.d.ts +67 -0
  73. package/dist/vectorstore/Embeddings.js +54 -0
  74. package/dist/vectorstore/LanceDBVectorStore.d.ts +149 -0
  75. package/dist/vectorstore/LanceDBVectorStore.js +338 -0
  76. package/dist/vectorstore/OpenAIEmbeddings.d.ts +45 -0
  77. package/dist/vectorstore/OpenAIEmbeddings.js +109 -0
  78. package/dist/vectorstore/VectorStore.d.ts +255 -0
  79. package/dist/vectorstore/VectorStore.js +216 -0
  80. package/dist/vectorstore/index.d.ts +28 -0
  81. package/dist/vectorstore/index.js +35 -0
  82. package/dist/viz/VizConfig.d.ts +54 -0
  83. package/dist/viz/VizConfig.js +100 -0
  84. package/dist/viz/VizReporter.d.ts +127 -0
  85. package/dist/viz/VizReporter.js +595 -0
  86. package/dist/viz/index.d.ts +31 -0
  87. package/dist/viz/index.js +51 -0
  88. package/dist/viz/types.d.ts +105 -0
  89. package/dist/viz/types.js +7 -0
  90. package/package.json +109 -0
  91. package/readme.md +1 -0
@@ -0,0 +1,338 @@
1
+ "use strict";
2
+ var __importDefault = (this && this.__importDefault) || function (mod) {
3
+ return (mod && mod.__esModule) ? mod : { "default": mod };
4
+ };
5
+ Object.defineProperty(exports, "__esModule", { value: true });
6
+ exports.OpenAiAgent = void 0;
7
+ const openai_1 = __importDefault(require("openai"));
8
+ const BaseAgent_1 = require("../BaseAgent");
9
+ const AgentEvent_1 = require("../AgentEvent");
10
+ const AgentError_1 = require("../errors/AgentError");
11
+ const transformers_1 = require("../../history/transformers");
12
+ const VizReporter_1 = require("../../viz/VizReporter");
13
+ const VizConfig_1 = require("../../viz/VizConfig");
14
+ /**
15
+ * Agent for OpenAI models using the Responses API.
16
+ *
17
+ * @example
18
+ * ```typescript
19
+ * const agent = new OpenAiAgent({
20
+ * id: "1",
21
+ * name: "Assistant",
22
+ * description: "A helpful assistant",
23
+ * apiKey: process.env.OPENAI_API_KEY,
24
+ * });
25
+ *
26
+ * const response = await agent.execute("Hello!");
27
+ * ```
28
+ */
29
+ class OpenAiAgent extends BaseAgent_1.BaseAgent {
30
+ constructor(config, history) {
31
+ super({ ...config, vendor: "openai" }, history);
32
+ /** Count of tool calls in current execution */
33
+ this.currentToolCallCount = 0;
34
+ this.client = new openai_1.default({
35
+ apiKey: config.apiKey,
36
+ });
37
+ // Merge flat config (deprecated) with nested vendorConfig
38
+ // Flat config takes precedence for backward compatibility
39
+ const vendorConfig = config.vendorConfig?.openai || {};
40
+ const disableParallelToolUse = config.disableParallelToolUse ??
41
+ vendorConfig.disableParallelToolUse ??
42
+ false;
43
+ const disableReasoning = config.disableReasoning ?? vendorConfig.disableReasoning ?? false;
44
+ const reasoningEffort = config.reasoningEffort ?? vendorConfig.reasoningEffort;
45
+ const user = config.user ?? vendorConfig.user;
46
+ this.config = {
47
+ model: config.model || "gpt-4.1-mini",
48
+ maxTokens: config.maxTokens || 1024,
49
+ disableParallelToolUse,
50
+ disableReasoning,
51
+ reasoningEffort,
52
+ user,
53
+ apiKey: config.apiKey,
54
+ temperature: config.temperature,
55
+ topP: config.topP,
56
+ seed: config.seed,
57
+ presencePenalty: config.presencePenalty,
58
+ frequencyPenalty: config.frequencyPenalty,
59
+ stopSequences: config.stopSequences,
60
+ };
61
+ // Add system message to history (skips if already exists with same content)
62
+ this.addSystemMessage(this.getSystemMessage());
63
+ }
64
+ getToolDefinitions() {
65
+ return Array.from(this.tools.values()).map((tool) => {
66
+ const prompt = tool.getPrompt();
67
+ return {
68
+ type: "function",
69
+ name: prompt.name,
70
+ description: prompt.description,
71
+ parameters: {
72
+ type: prompt.input_schema.type,
73
+ properties: prompt.input_schema.properties,
74
+ required: prompt.input_schema.required,
75
+ additionalProperties: false,
76
+ },
77
+ strict: true,
78
+ };
79
+ });
80
+ }
81
+ async process(_input) {
82
+ return "";
83
+ }
84
+ async execute(input) {
85
+ this.emit(AgentEvent_1.AgentEvent.BEFORE_EXECUTE, input);
86
+ // Reset token usage for this execution
87
+ this.lastTokenUsage = undefined;
88
+ this.currentToolCallCount = 0;
89
+ // Start visualization reporting
90
+ if (VizConfig_1.vizConfig.isEnabled()) {
91
+ this.vizEventId = VizReporter_1.vizReporter.agentStart(this.id, this.name, this.config.model, "openai", input);
92
+ }
93
+ if (this.history.transient) {
94
+ this.history.clear();
95
+ // Re-add system message after clear
96
+ this.addSystemMessage(this.getSystemMessage());
97
+ }
98
+ this.addTextToHistory("user", input);
99
+ try {
100
+ const inputMessages = transformers_1.openAiTransformer.toProvider(this.history.entries);
101
+ const response = await this.client.responses.create({
102
+ model: this.config.model,
103
+ max_output_tokens: this.config.maxTokens,
104
+ input: inputMessages,
105
+ tools: this.getToolDefinitions(),
106
+ store: false,
107
+ temperature: this.config.temperature,
108
+ top_p: this.config.topP,
109
+ // Note: Responses API doesn't support seed, presence_penalty, frequency_penalty, stop
110
+ user: this.config.user,
111
+ ...(this.config.disableReasoning && { reasoning: { effort: null } }),
112
+ reasoning: { effort: this.config.reasoningEffort },
113
+ });
114
+ this.emit(AgentEvent_1.AgentEvent.AFTER_EXECUTE, response);
115
+ return await this.handleResponse(response);
116
+ }
117
+ catch (error) {
118
+ if (error && typeof error === "object" && "error" in error) {
119
+ const openAIError = error;
120
+ const apiError = new AgentError_1.ApiError(`OpenAI API error: ${openAIError.error.message || "Unknown error"}`, openAIError.status, openAIError.error);
121
+ if (openAIError.error.code === "insufficient_quota") {
122
+ apiError.message =
123
+ "OpenAI API quota exceeded. Please check your billing details.";
124
+ }
125
+ this.emit(AgentEvent_1.AgentEvent.ERROR, apiError);
126
+ // Report error to viz
127
+ if (this.vizEventId) {
128
+ VizReporter_1.vizReporter.agentError(this.vizEventId, "ApiError", apiError.message, openAIError.error.code === "rate_limit_exceeded");
129
+ this.vizEventId = undefined;
130
+ }
131
+ throw apiError;
132
+ }
133
+ else {
134
+ const executionError = new AgentError_1.ExecutionError(`Error executing agent: ${error instanceof Error ? error.message : "Unknown error"}`);
135
+ this.emit(AgentEvent_1.AgentEvent.ERROR, executionError);
136
+ // Report error to viz
137
+ if (this.vizEventId) {
138
+ VizReporter_1.vizReporter.agentError(this.vizEventId, "ExecutionError", executionError.message, false);
139
+ this.vizEventId = undefined;
140
+ }
141
+ throw executionError;
142
+ }
143
+ }
144
+ }
145
+ async handleResponse(response) {
146
+ if (!response.output || !response.output.length) {
147
+ const error = new AgentError_1.ExecutionError("Invalid response format: missing output");
148
+ this.emit(AgentEvent_1.AgentEvent.ERROR, error);
149
+ throw error;
150
+ }
151
+ // Track token usage if available
152
+ if (response.usage) {
153
+ const usage = this.parseUsage(response.usage);
154
+ if (this.lastTokenUsage) {
155
+ this.lastTokenUsage.input_tokens += usage.input_tokens;
156
+ this.lastTokenUsage.output_tokens += usage.output_tokens;
157
+ this.lastTokenUsage.total_tokens += usage.total_tokens;
158
+ }
159
+ else {
160
+ this.lastTokenUsage = { ...usage };
161
+ }
162
+ }
163
+ const toolCalls = response.output.filter((output) => output.type === "function_call");
164
+ // Find the message output (skip reasoning outputs)
165
+ const messageOutput = response.output.find((output) => output.type === "message");
166
+ // Handle incomplete responses (e.g., reasoning hit token limit)
167
+ if (!toolCalls.length &&
168
+ messageOutput &&
169
+ messageOutput.type === "message" &&
170
+ messageOutput.status === "incomplete") {
171
+ const error = new AgentError_1.ExecutionError(`Response incomplete: ${response.incomplete_details?.reason || "unknown reason"}. ` +
172
+ `Try increasing maxTokens or setting disableReasoning: true for this agent.`);
173
+ this.emit(AgentEvent_1.AgentEvent.ERROR, error);
174
+ // Report error to viz
175
+ if (this.vizEventId) {
176
+ VizReporter_1.vizReporter.agentError(this.vizEventId, "ExecutionError", error.message, false);
177
+ this.vizEventId = undefined;
178
+ }
179
+ throw error;
180
+ }
181
+ if (!toolCalls.length &&
182
+ messageOutput &&
183
+ messageOutput.type === "message" &&
184
+ messageOutput.status === "completed") {
185
+ // Normal text response - add to history in normalized format
186
+ const entry = transformers_1.openAiTransformer.fromProviderMessage("assistant", response.output_text);
187
+ this.addToHistory(entry);
188
+ this.emit(AgentEvent_1.AgentEvent.DONE, response, this.lastTokenUsage);
189
+ // Report completion to viz
190
+ if (this.vizEventId) {
191
+ VizReporter_1.vizReporter.agentComplete(this.vizEventId, {
192
+ input: this.lastTokenUsage?.input_tokens || 0,
193
+ output: this.lastTokenUsage?.output_tokens || 0,
194
+ total: this.lastTokenUsage?.total_tokens || 0,
195
+ }, "end_turn", this.currentToolCallCount > 0, this.currentToolCallCount, response.output_text);
196
+ this.vizEventId = undefined;
197
+ }
198
+ return response.output_text;
199
+ }
200
+ else if (toolCalls.length) {
201
+ try {
202
+ // Add assistant message with tool calls to history (normalized)
203
+ const functionCalls = toolCalls.map((tc) => ({
204
+ id: tc.id || tc.call_id,
205
+ call_id: tc.call_id,
206
+ name: tc.name,
207
+ arguments: tc.arguments,
208
+ }));
209
+ const assistantEntry = transformers_1.openAiTransformer.fromProviderMessage("assistant", response.output_text || "", functionCalls);
210
+ this.addToHistory(assistantEntry);
211
+ const toolResponses = await this.handleToolUse(toolCalls);
212
+ // Add tool results to history (normalized)
213
+ for (const result of toolResponses) {
214
+ const resultEntry = transformers_1.openAiTransformer.toolResultEntry(result.call_id, result.output, false);
215
+ this.addToHistory(resultEntry);
216
+ }
217
+ // Continue conversation
218
+ try {
219
+ const inputMessages = transformers_1.openAiTransformer.toProvider(this.history.entries);
220
+ const newResponse = await this.client.responses.create({
221
+ model: this.config.model,
222
+ max_output_tokens: this.config.maxTokens,
223
+ input: inputMessages,
224
+ tools: this.getToolDefinitions(),
225
+ store: false,
226
+ temperature: this.config.temperature,
227
+ top_p: this.config.topP,
228
+ // Note: Responses API doesn't support seed, presence_penalty, frequency_penalty, stop
229
+ user: this.config.user,
230
+ ...(this.config.disableReasoning && {
231
+ reasoning: { effort: null },
232
+ }),
233
+ ...(this.config.reasoningEffort &&
234
+ !this.config.disableReasoning && {
235
+ reasoning: { effort: this.config.reasoningEffort },
236
+ }),
237
+ });
238
+ this.emit(AgentEvent_1.AgentEvent.AFTER_EXECUTE, newResponse);
239
+ return this.handleResponse(newResponse);
240
+ }
241
+ catch (error) {
242
+ if (error && typeof error === "object" && "error" in error) {
243
+ const openAIError = error;
244
+ const apiError = new AgentError_1.ApiError(`OpenAI API error during tool response: ${openAIError.error.message || "Unknown error"}`, openAIError.status, openAIError.error);
245
+ this.emit(AgentEvent_1.AgentEvent.ERROR, apiError);
246
+ throw apiError;
247
+ }
248
+ else {
249
+ throw new AgentError_1.ExecutionError(`Error processing tool response: ${error instanceof Error ? error.message : "Unknown error"}`);
250
+ }
251
+ }
252
+ }
253
+ catch (error) {
254
+ if (this.debug) {
255
+ console.error(error);
256
+ }
257
+ if (error instanceof AgentError_1.ToolExecutionError) {
258
+ this.emit(AgentEvent_1.AgentEvent.TOOL_ERROR, error);
259
+ throw error;
260
+ }
261
+ const executionError = new AgentError_1.ExecutionError(`Error during tool execution: ${error instanceof Error ? error.message : "Unknown error"}`);
262
+ this.emit(AgentEvent_1.AgentEvent.ERROR, executionError);
263
+ throw executionError;
264
+ }
265
+ }
266
+ else {
267
+ const error = new AgentError_1.ExecutionError(`Unexpected response format: ${JSON.stringify(response.output)}`);
268
+ this.emit(AgentEvent_1.AgentEvent.ERROR, error);
269
+ // Report error to viz
270
+ if (this.vizEventId) {
271
+ VizReporter_1.vizReporter.agentError(this.vizEventId, "ExecutionError", error.message, false);
272
+ this.vizEventId = undefined;
273
+ }
274
+ throw error;
275
+ }
276
+ }
277
+ async handleToolUse(content) {
278
+ if (!content || !content.length) {
279
+ throw new AgentError_1.ExecutionError("Invalid tool calls content");
280
+ }
281
+ // Track tool call count for viz reporting
282
+ this.currentToolCallCount += content.length;
283
+ const toolResults = await Promise.all(content.map(async (toolCall) => {
284
+ if (!toolCall.name) {
285
+ throw new AgentError_1.ExecutionError("Invalid tool call format");
286
+ }
287
+ const toolName = toolCall.name;
288
+ const tool = this.tools.get(toolName);
289
+ if (!tool) {
290
+ const errorMessage = `Tool '${toolName}' not found`;
291
+ const error = new AgentError_1.ToolExecutionError(errorMessage, toolName, toolCall.arguments);
292
+ if (this.debug) {
293
+ console.error(error);
294
+ }
295
+ return {
296
+ call_id: toolCall.call_id,
297
+ output: errorMessage,
298
+ };
299
+ }
300
+ try {
301
+ let toolArgs;
302
+ try {
303
+ toolArgs = JSON.parse(toolCall.arguments);
304
+ }
305
+ catch (parseError) {
306
+ throw new AgentError_1.ToolExecutionError(`Invalid tool arguments: ${parseError instanceof Error ? parseError.message : "Parse error"}`, toolName, toolCall.arguments);
307
+ }
308
+ const result = await tool.execute(this.getId(), this.getName(), toolArgs, toolCall.id || "", this.config.model, "openai");
309
+ return {
310
+ call_id: toolCall.call_id,
311
+ output: JSON.stringify(result),
312
+ };
313
+ }
314
+ catch (error) {
315
+ const errorMessage = `Error executing tool '${toolName}': ${error instanceof Error ? error.message : "Unknown error"}`;
316
+ const toolError = new AgentError_1.ToolExecutionError(errorMessage, toolName, toolCall.arguments);
317
+ this.emit(AgentEvent_1.AgentEvent.TOOL_ERROR, toolError);
318
+ if (this.debug) {
319
+ console.error(toolError);
320
+ }
321
+ return {
322
+ call_id: toolCall.call_id,
323
+ output: errorMessage,
324
+ };
325
+ }
326
+ }));
327
+ return toolResults;
328
+ }
329
+ parseUsage(input) {
330
+ return {
331
+ input_tokens: input.input_tokens,
332
+ output_tokens: input.output_tokens,
333
+ total_tokens: input.total_tokens,
334
+ };
335
+ }
336
+ }
337
+ exports.OpenAiAgent = OpenAiAgent;
338
+ //# sourceMappingURL=OpenAiAgent.js.map
@@ -0,0 +1,53 @@
1
+ import { Chunk, ChunkerConfig, ChunkOptions } from "./types";
2
+ /**
3
+ * Abstract base class for text chunkers.
4
+ * Provides common utilities for ID generation, hashing, and chunk linking.
5
+ */
6
+ export declare abstract class Chunker {
7
+ /** Name identifier for this chunker type */
8
+ abstract readonly name: string;
9
+ protected config: ChunkerConfig;
10
+ constructor(config: ChunkerConfig);
11
+ /**
12
+ * Split text into chunks with metadata.
13
+ * @param text - The text to chunk
14
+ * @param options - Optional chunking options
15
+ * @returns Array of chunks with metadata
16
+ */
17
+ chunk(text: string, options?: ChunkOptions): Promise<Chunk[]>;
18
+ /**
19
+ * Split the text into raw string segments.
20
+ * Must be implemented by subclasses.
21
+ */
22
+ protected abstract splitText(text: string): Promise<string[]> | string[];
23
+ /**
24
+ * Generate a unique ID for a chunk.
25
+ */
26
+ protected generateId(content: string, index: number, sourceId?: string): string;
27
+ /**
28
+ * Compute SHA-256 hash of content.
29
+ */
30
+ protected computeHash(content: string): string;
31
+ /**
32
+ * Link chunks with previousChunkId and nextChunkId.
33
+ */
34
+ protected linkChunks(chunks: Chunk[]): void;
35
+ /**
36
+ * Apply the chunk processor, filtering out null results.
37
+ */
38
+ protected applyProcessor(chunks: Chunk[]): Promise<Chunk[]>;
39
+ /**
40
+ * Detect section titles from content.
41
+ * Looks for markdown headers (# Title) or lines ending with colon.
42
+ */
43
+ protected detectSectionTitle(content: string): string | undefined;
44
+ /**
45
+ * Get the chunk size configuration.
46
+ */
47
+ getChunkSize(): number;
48
+ /**
49
+ * Get the chunk overlap configuration.
50
+ */
51
+ getChunkOverlap(): number;
52
+ }
53
+ //# sourceMappingURL=Chunker.d.ts.map
@@ -0,0 +1,174 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.Chunker = void 0;
4
+ const crypto_1 = require("crypto");
5
+ /**
6
+ * Abstract base class for text chunkers.
7
+ * Provides common utilities for ID generation, hashing, and chunk linking.
8
+ */
9
+ class Chunker {
10
+ constructor(config) {
11
+ if (config.chunkSize <= 0) {
12
+ throw new Error("chunkSize must be greater than 0");
13
+ }
14
+ if (config.chunkOverlap !== undefined && config.chunkOverlap < 0) {
15
+ throw new Error("chunkOverlap must be non-negative");
16
+ }
17
+ if (config.chunkOverlap !== undefined &&
18
+ config.chunkOverlap >= config.chunkSize) {
19
+ throw new Error("chunkOverlap must be less than chunkSize");
20
+ }
21
+ this.config = {
22
+ chunkOverlap: 0,
23
+ ...config,
24
+ };
25
+ }
26
+ /**
27
+ * Split text into chunks with metadata.
28
+ * @param text - The text to chunk
29
+ * @param options - Optional chunking options
30
+ * @returns Array of chunks with metadata
31
+ */
32
+ async chunk(text, options) {
33
+ if (!text || text.length === 0) {
34
+ return [];
35
+ }
36
+ // Get raw splits from the subclass implementation
37
+ const splits = await this.splitText(text);
38
+ if (splits.length === 0) {
39
+ return [];
40
+ }
41
+ // Build chunks with metadata
42
+ const chunks = [];
43
+ let currentOffset = 0;
44
+ let currentSection;
45
+ for (let i = 0; i < splits.length; i++) {
46
+ const content = splits[i];
47
+ const startOffset = text.indexOf(content, currentOffset);
48
+ const endOffset = startOffset + content.length;
49
+ // Detect section titles (markdown headers or lines ending with :)
50
+ const detectedSection = this.detectSectionTitle(content);
51
+ if (detectedSection) {
52
+ currentSection = detectedSection;
53
+ }
54
+ const id = this.generateId(content, i, options?.sourceId);
55
+ const metadata = {
56
+ chunkIndex: i,
57
+ totalChunks: splits.length,
58
+ previousChunkId: null, // Will be linked after
59
+ nextChunkId: null, // Will be linked after
60
+ startOffset,
61
+ endOffset,
62
+ sourceId: options?.sourceId,
63
+ sourcePath: options?.sourcePath,
64
+ charCount: content.length,
65
+ hash: this.computeHash(content),
66
+ sectionTitle: currentSection,
67
+ ...options?.metadata,
68
+ };
69
+ chunks.push({ id, content, metadata });
70
+ currentOffset = startOffset + 1; // Move past current match for next search
71
+ }
72
+ // Link chunks together
73
+ this.linkChunks(chunks);
74
+ // Update totalChunks now that we know the final count
75
+ for (const chunk of chunks) {
76
+ chunk.metadata.totalChunks = chunks.length;
77
+ }
78
+ // Apply processor if provided
79
+ if (this.config.chunkProcessor) {
80
+ return this.applyProcessor(chunks);
81
+ }
82
+ return chunks;
83
+ }
84
+ /**
85
+ * Generate a unique ID for a chunk.
86
+ */
87
+ generateId(content, index, sourceId) {
88
+ if (this.config.idGenerator) {
89
+ return this.config.idGenerator(content, index, sourceId);
90
+ }
91
+ // Default: hash-based ID with source prefix
92
+ const hash = this.computeHash(content).substring(0, 8);
93
+ const prefix = sourceId ? `${sourceId}-` : "";
94
+ return `${prefix}chunk-${index}-${hash}`;
95
+ }
96
+ /**
97
+ * Compute SHA-256 hash of content.
98
+ */
99
+ computeHash(content) {
100
+ return (0, crypto_1.createHash)("sha256").update(content).digest("hex");
101
+ }
102
+ /**
103
+ * Link chunks with previousChunkId and nextChunkId.
104
+ */
105
+ linkChunks(chunks) {
106
+ for (let i = 0; i < chunks.length; i++) {
107
+ if (i > 0) {
108
+ chunks[i].metadata.previousChunkId = chunks[i - 1].id;
109
+ }
110
+ if (i < chunks.length - 1) {
111
+ chunks[i].metadata.nextChunkId = chunks[i + 1].id;
112
+ }
113
+ }
114
+ }
115
+ /**
116
+ * Apply the chunk processor, filtering out null results.
117
+ */
118
+ async applyProcessor(chunks) {
119
+ if (!this.config.chunkProcessor) {
120
+ return chunks;
121
+ }
122
+ const processed = [];
123
+ for (let i = 0; i < chunks.length; i++) {
124
+ const result = await this.config.chunkProcessor(chunks[i], i, chunks);
125
+ if (result !== null) {
126
+ processed.push(result);
127
+ }
128
+ }
129
+ // Re-link after filtering and update indices
130
+ for (let i = 0; i < processed.length; i++) {
131
+ processed[i].metadata.chunkIndex = i;
132
+ processed[i].metadata.totalChunks = processed.length;
133
+ processed[i].metadata.previousChunkId =
134
+ i > 0 ? processed[i - 1].id : null;
135
+ processed[i].metadata.nextChunkId =
136
+ i < processed.length - 1 ? processed[i + 1].id : null;
137
+ }
138
+ return processed;
139
+ }
140
+ /**
141
+ * Detect section titles from content.
142
+ * Looks for markdown headers (# Title) or lines ending with colon.
143
+ */
144
+ detectSectionTitle(content) {
145
+ const lines = content.split("\n");
146
+ for (const line of lines) {
147
+ const trimmed = line.trim();
148
+ // Markdown header
149
+ const headerMatch = trimmed.match(/^#{1,6}\s+(.+)$/);
150
+ if (headerMatch) {
151
+ return headerMatch[1].trim();
152
+ }
153
+ // Line ending with colon (common section pattern)
154
+ if (trimmed.endsWith(":") && trimmed.length > 1 && trimmed.length < 100) {
155
+ return trimmed.slice(0, -1).trim();
156
+ }
157
+ }
158
+ return undefined;
159
+ }
160
+ /**
161
+ * Get the chunk size configuration.
162
+ */
163
+ getChunkSize() {
164
+ return this.config.chunkSize;
165
+ }
166
+ /**
167
+ * Get the chunk overlap configuration.
168
+ */
169
+ getChunkOverlap() {
170
+ return this.config.chunkOverlap ?? 0;
171
+ }
172
+ }
173
+ exports.Chunker = Chunker;
174
+ //# sourceMappingURL=Chunker.js.map
@@ -0,0 +1,52 @@
1
+ import { Chunker } from "./Chunker";
2
+ import { RecursiveChunkerConfig } from "./types";
3
+ /**
4
+ * Recursive text chunker that tries to split on semantic boundaries.
5
+ * It attempts to split by larger separators first (paragraphs), then
6
+ * falls back to smaller ones (sentences, words) to keep semantic units together.
7
+ *
8
+ * @example
9
+ * ```typescript
10
+ * const chunker = new RecursiveChunker({
11
+ * chunkSize: 1000,
12
+ * chunkOverlap: 100,
13
+ * separators: ["\n\n", "\n", ". ", " "],
14
+ * });
15
+ *
16
+ * const chunks = await chunker.chunk(document);
17
+ * ```
18
+ */
19
+ export declare class RecursiveChunker extends Chunker {
20
+ readonly name = "RecursiveChunker";
21
+ private separators;
22
+ constructor(config: RecursiveChunkerConfig);
23
+ /**
24
+ * Split text recursively using the separator hierarchy.
25
+ */
26
+ protected splitText(text: string): string[];
27
+ /**
28
+ * Recursively split text using separators at the given index.
29
+ */
30
+ private recursiveSplit;
31
+ /**
32
+ * Split text by separator, keeping the parts clean.
33
+ */
34
+ private splitBySeparator;
35
+ /**
36
+ * Force split text by character count when no separator works.
37
+ */
38
+ private forceSplit;
39
+ /**
40
+ * Apply overlap between chunks by prepending context from previous chunk.
41
+ */
42
+ private applyOverlap;
43
+ /**
44
+ * Extract overlap text from the end of a chunk, trying to break at separator.
45
+ */
46
+ private getOverlapText;
47
+ /**
48
+ * Get the configured separators.
49
+ */
50
+ getSeparators(): string[];
51
+ }
52
+ //# sourceMappingURL=RecursiveChunker.d.ts.map