@townco/agent 0.1.84 → 0.1.85

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (28) hide show
  1. package/dist/definition/index.d.ts +0 -2
  2. package/dist/definition/index.js +0 -1
  3. package/dist/runner/agent-runner.d.ts +1 -2
  4. package/dist/runner/hooks/executor.d.ts +4 -2
  5. package/dist/runner/hooks/executor.js +9 -1
  6. package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.d.ts +37 -0
  7. package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.js +134 -0
  8. package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.d.ts +20 -0
  9. package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.js +171 -0
  10. package/dist/runner/hooks/predefined/document-context-extractor/extraction-state.d.ts +57 -0
  11. package/dist/runner/hooks/predefined/document-context-extractor/extraction-state.js +126 -0
  12. package/dist/runner/hooks/predefined/document-context-extractor/index.d.ts +22 -0
  13. package/dist/runner/hooks/predefined/document-context-extractor/index.js +338 -0
  14. package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.d.ts +19 -0
  15. package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.js +156 -0
  16. package/dist/runner/hooks/predefined/document-context-extractor/types.d.ts +130 -0
  17. package/dist/runner/hooks/predefined/document-context-extractor/types.js +8 -0
  18. package/dist/runner/hooks/predefined/tool-response-compactor.js +77 -212
  19. package/dist/runner/hooks/types.d.ts +15 -8
  20. package/dist/runner/langchain/index.js +2 -0
  21. package/dist/runner/langchain/tools/document_extract.d.ts +26 -0
  22. package/dist/runner/langchain/tools/document_extract.js +135 -0
  23. package/dist/runner/tools.d.ts +2 -2
  24. package/dist/runner/tools.js +1 -0
  25. package/dist/templates/index.d.ts +0 -1
  26. package/dist/tsconfig.tsbuildinfo +1 -1
  27. package/package.json +6 -6
  28. package/templates/index.ts +0 -1
@@ -22,7 +22,6 @@ export declare const HookConfigSchema: z.ZodObject<{
22
22
  threshold: z.ZodNumber;
23
23
  }, z.core.$strip>, z.ZodObject<{
24
24
  maxTokensSize: z.ZodOptional<z.ZodNumber>;
25
- responseTruncationThreshold: z.ZodOptional<z.ZodNumber>;
26
25
  }, z.core.$strip>]>>;
27
26
  callback: z.ZodString;
28
27
  }, z.core.$strip>;
@@ -79,7 +78,6 @@ export declare const AgentDefinitionSchema: z.ZodObject<{
79
78
  threshold: z.ZodNumber;
80
79
  }, z.core.$strip>, z.ZodObject<{
81
80
  maxTokensSize: z.ZodOptional<z.ZodNumber>;
82
- responseTruncationThreshold: z.ZodOptional<z.ZodNumber>;
83
81
  }, z.core.$strip>]>>;
84
82
  callback: z.ZodString;
85
83
  }, z.core.$strip>>>;
@@ -65,7 +65,6 @@ export const HookConfigSchema = z.object({
65
65
  // For tool_response hooks
66
66
  z.object({
67
67
  maxTokensSize: z.number().min(0).optional(),
68
- responseTruncationThreshold: z.number().min(0).max(100).optional(),
69
68
  }),
70
69
  ])
71
70
  .optional(),
@@ -9,7 +9,7 @@ export declare const zAgentRunnerParams: z.ZodObject<{
9
9
  suggestedPrompts: z.ZodOptional<z.ZodArray<z.ZodString>>;
10
10
  systemPrompt: z.ZodNullable<z.ZodString>;
11
11
  model: z.ZodString;
12
- tools: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodUnion<readonly [z.ZodLiteral<"artifacts">, z.ZodLiteral<"todo_write">, z.ZodLiteral<"get_weather">, z.ZodLiteral<"web_search">, z.ZodLiteral<"town_web_search">, z.ZodLiteral<"filesystem">, z.ZodLiteral<"generate_image">, z.ZodLiteral<"town_generate_image">, z.ZodLiteral<"browser">]>, z.ZodObject<{
12
+ tools: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodUnion<readonly [z.ZodLiteral<"artifacts">, z.ZodLiteral<"todo_write">, z.ZodLiteral<"get_weather">, z.ZodLiteral<"web_search">, z.ZodLiteral<"town_web_search">, z.ZodLiteral<"filesystem">, z.ZodLiteral<"generate_image">, z.ZodLiteral<"town_generate_image">, z.ZodLiteral<"browser">, z.ZodLiteral<"document_extract">]>, z.ZodObject<{
13
13
  type: z.ZodLiteral<"custom">;
14
14
  modulePath: z.ZodString;
15
15
  }, z.core.$strip>, z.ZodObject<{
@@ -49,7 +49,6 @@ export declare const zAgentRunnerParams: z.ZodObject<{
49
49
  threshold: z.ZodNumber;
50
50
  }, z.core.$strip>, z.ZodObject<{
51
51
  maxTokensSize: z.ZodOptional<z.ZodNumber>;
52
- responseTruncationThreshold: z.ZodOptional<z.ZodNumber>;
53
52
  }, z.core.$strip>]>>;
54
53
  callback: z.ZodString;
55
54
  }, z.core.$strip>>>;
@@ -1,5 +1,5 @@
1
1
  import type { ContextEntry } from "../../acp-server/session-storage";
2
- import type { HookCallback, HookConfig, HookNotification, ReadonlySession } from "./types";
2
+ import type { HookCallback, HookConfig, HookNotification, HookStorageInterface, ReadonlySession } from "./types";
3
3
  /**
4
4
  * Callback for streaming hook notifications in real-time
5
5
  */
@@ -12,7 +12,9 @@ export declare class HookExecutor {
12
12
  private model;
13
13
  private loadCallback;
14
14
  private onNotification;
15
- constructor(hooks: HookConfig[], model: string, loadCallback: (callbackRef: string) => Promise<HookCallback>, onNotification?: OnHookNotification);
15
+ private storage;
16
+ private sessionId;
17
+ constructor(hooks: HookConfig[], model: string, loadCallback: (callbackRef: string) => Promise<HookCallback>, onNotification?: OnHookNotification, storage?: HookStorageInterface, sessionId?: string);
16
18
  /**
17
19
  * Emit a notification - sends immediately if callback provided, otherwise collects for batch return
18
20
  */
@@ -9,11 +9,15 @@ export class HookExecutor {
9
9
  model;
10
10
  loadCallback;
11
11
  onNotification;
12
- constructor(hooks, model, loadCallback, onNotification) {
12
+ storage;
13
+ sessionId;
14
+ constructor(hooks, model, loadCallback, onNotification, storage, sessionId) {
13
15
  this.hooks = hooks;
14
16
  this.model = model;
15
17
  this.loadCallback = loadCallback;
16
18
  this.onNotification = onNotification;
19
+ this.storage = storage;
20
+ this.sessionId = sessionId;
17
21
  }
18
22
  /**
19
23
  * Emit a notification - sends immediately if callback provided, otherwise collects for batch return
@@ -84,6 +88,8 @@ export class HookExecutor {
84
88
  maxTokens,
85
89
  percentage,
86
90
  model: this.model,
91
+ sessionId: this.sessionId,
92
+ storage: this.storage,
87
93
  };
88
94
  const result = await callback(hookContext);
89
95
  // Notify completion
@@ -189,6 +195,8 @@ export class HookExecutor {
189
195
  maxTokens,
190
196
  percentage,
191
197
  model: this.model,
198
+ sessionId: this.sessionId,
199
+ storage: this.storage,
192
200
  toolResponse,
193
201
  };
194
202
  const result = await callback(hookContext);
@@ -0,0 +1,37 @@
1
+ /**
2
+ * Chunk manager for document context extraction
3
+ *
4
+ * Handles dynamic chunk sizing and document splitting
5
+ * to maximize efficiency while staying within model context limits.
6
+ */
7
+ import type { ChunkInfo, ExtractorConfig } from "./types.js";
8
+ /**
9
+ * Calculate optimal chunk size based on model context and overhead
10
+ *
11
+ * Goal: Maximize chunk size to minimize iterations while leaving room for:
12
+ * - System prompt
13
+ * - LLM response buffer
14
+ * - Safety margin
15
+ */
16
+ export declare function calculateChunkSize(config: ExtractorConfig): number;
17
+ /**
18
+ * Calculate maximum iterations based on document size and chunk size
19
+ */
20
+ export declare function calculateMaxIterations(documentTokens: number, chunkSizeTokens: number): number;
21
+ /**
22
+ * Split a document string into chunks with optional overlap
23
+ *
24
+ * The overlap helps maintain context continuity at chunk boundaries,
25
+ * ensuring the LLM doesn't miss information that spans boundaries.
26
+ */
27
+ export declare function createChunks(content: string, chunkSizeTokens: number, overlapTokens?: number): ChunkInfo[];
28
+ /**
29
+ * Get summary statistics about chunks
30
+ */
31
+ export declare function getChunkStats(chunks: ChunkInfo[]): {
32
+ totalChunks: number;
33
+ totalTokens: number;
34
+ avgTokensPerChunk: number;
35
+ minChunkTokens: number;
36
+ maxChunkTokens: number;
37
+ };
@@ -0,0 +1,134 @@
1
+ /**
2
+ * Chunk manager for document context extraction
3
+ *
4
+ * Handles dynamic chunk sizing and document splitting
5
+ * to maximize efficiency while staying within model context limits.
6
+ */
7
+ import { countTokens } from "../../../../utils/token-counter.js";
8
+ /**
9
+ * Calculate optimal chunk size based on model context and overhead
10
+ *
11
+ * Goal: Maximize chunk size to minimize iterations while leaving room for:
12
+ * - System prompt
13
+ * - LLM response buffer
14
+ * - Safety margin
15
+ */
16
+ export function calculateChunkSize(config) {
17
+ const { modelContextSize, systemPromptTokens } = config;
18
+ // Reserve space for:
19
+ // - System prompt (~500 tokens for extraction instructions)
20
+ // - LLM response buffer (~2000 tokens for scoring/extraction output)
21
+ // - Safety margin (10%)
22
+ const responseBuffer = 2000;
23
+ const overhead = systemPromptTokens + responseBuffer;
24
+ const availableForChunk = modelContextSize - overhead;
25
+ const safeChunkSize = Math.floor(availableForChunk * 0.9);
26
+ // Minimum chunk size to ensure meaningful content (10K tokens)
27
+ const minChunkSize = 10000;
28
+ return Math.max(safeChunkSize, minChunkSize);
29
+ }
30
+ /**
31
+ * Calculate maximum iterations based on document size and chunk size
32
+ */
33
+ export function calculateMaxIterations(documentTokens, chunkSizeTokens) {
34
+ // Number of chunks + buffer for processing overhead
35
+ const estimatedChunks = Math.ceil(documentTokens / chunkSizeTokens);
36
+ // Add 2 iterations for analysis and merging phases
37
+ return estimatedChunks + 2;
38
+ }
39
+ /**
40
+ * Split a document string into chunks with optional overlap
41
+ *
42
+ * The overlap helps maintain context continuity at chunk boundaries,
43
+ * ensuring the LLM doesn't miss information that spans boundaries.
44
+ */
45
+ export function createChunks(content, chunkSizeTokens, overlapTokens = 200) {
46
+ const chunks = [];
47
+ const totalTokens = countTokens(content);
48
+ // If content fits in a single chunk, return it as-is
49
+ if (totalTokens <= chunkSizeTokens) {
50
+ return [
51
+ {
52
+ index: 0,
53
+ startOffset: 0,
54
+ endOffset: content.length,
55
+ tokenCount: totalTokens,
56
+ content,
57
+ },
58
+ ];
59
+ }
60
+ // Estimate characters per token for this content
61
+ const charsPerToken = content.length / totalTokens;
62
+ // Calculate target chunk size in characters
63
+ const targetChunkChars = Math.floor(chunkSizeTokens * charsPerToken);
64
+ const overlapChars = Math.floor(overlapTokens * charsPerToken);
65
+ let currentOffset = 0;
66
+ let chunkIndex = 0;
67
+ while (currentOffset < content.length) {
68
+ // Calculate end offset for this chunk
69
+ let endOffset = Math.min(currentOffset + targetChunkChars, content.length);
70
+ // Try to break at a natural boundary (newline or sentence end)
71
+ if (endOffset < content.length) {
72
+ const searchStart = Math.max(currentOffset, endOffset - Math.floor(targetChunkChars * 0.1));
73
+ const searchRegion = content.slice(searchStart, endOffset);
74
+ // Look for natural break points (paragraph break preferred, then sentence)
75
+ const paragraphBreak = searchRegion.lastIndexOf("\n\n");
76
+ const sentenceEnd = searchRegion.search(/[.!?]\s+[A-Z]/);
77
+ const newline = searchRegion.lastIndexOf("\n");
78
+ if (paragraphBreak !== -1) {
79
+ endOffset = searchStart + paragraphBreak + 2;
80
+ }
81
+ else if (sentenceEnd !== -1) {
82
+ endOffset = searchStart + sentenceEnd + 1;
83
+ }
84
+ else if (newline !== -1) {
85
+ endOffset = searchStart + newline + 1;
86
+ }
87
+ }
88
+ // Extract chunk content
89
+ const chunkContent = content.slice(currentOffset, endOffset);
90
+ const chunkTokens = countTokens(chunkContent);
91
+ chunks.push({
92
+ index: chunkIndex,
93
+ startOffset: currentOffset,
94
+ endOffset,
95
+ tokenCount: chunkTokens,
96
+ content: chunkContent,
97
+ });
98
+ // Move to next chunk with overlap
99
+ // For the last chunk, we don't need overlap
100
+ if (endOffset >= content.length) {
101
+ break;
102
+ }
103
+ currentOffset = Math.max(currentOffset + 1, endOffset - overlapChars);
104
+ chunkIndex++;
105
+ // Safety check to prevent infinite loops
106
+ if (chunkIndex > 1000) {
107
+ break;
108
+ }
109
+ }
110
+ return chunks;
111
+ }
112
+ /**
113
+ * Get summary statistics about chunks
114
+ */
115
+ export function getChunkStats(chunks) {
116
+ if (chunks.length === 0) {
117
+ return {
118
+ totalChunks: 0,
119
+ totalTokens: 0,
120
+ avgTokensPerChunk: 0,
121
+ minChunkTokens: 0,
122
+ maxChunkTokens: 0,
123
+ };
124
+ }
125
+ const tokenCounts = chunks.map((c) => c.tokenCount);
126
+ const totalTokens = tokenCounts.reduce((a, b) => a + b, 0);
127
+ return {
128
+ totalChunks: chunks.length,
129
+ totalTokens,
130
+ avgTokensPerChunk: Math.round(totalTokens / chunks.length),
131
+ minChunkTokens: Math.min(...tokenCounts),
132
+ maxChunkTokens: Math.max(...tokenCounts),
133
+ };
134
+ }
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Content extractor for document chunks
3
+ *
4
+ * Phase 2 of extraction: Extract relevant information from high-scoring chunks.
5
+ */
6
+ import type { ChunkExtractionResult, ChunkInfo, ExtractorConfig } from "./types.js";
7
+ /**
8
+ * Extract content from all relevant chunks
9
+ *
10
+ * This is Phase 2 of the extraction process.
11
+ * Only chunks that meet the relevance threshold are processed.
12
+ */
13
+ export declare function extractFromChunks(chunks: ChunkInfo[], keyRequirements: string, config: ExtractorConfig, onProgress?: (extracted: number, total: number) => void): Promise<Map<number, ChunkExtractionResult>>;
14
+ /**
15
+ * Merge extracted content into a single result
16
+ */
17
+ export declare function mergeExtractions(extractions: Map<number, ChunkExtractionResult>, originalChunks: ChunkInfo[]): {
18
+ content: string;
19
+ keyFacts: string[];
20
+ };
@@ -0,0 +1,171 @@
1
+ /**
2
+ * Content extractor for document chunks
3
+ *
4
+ * Phase 2 of extraction: Extract relevant information from high-scoring chunks.
5
+ */
6
+ import Anthropic from "@anthropic-ai/sdk";
7
+ import { createLogger } from "../../../../logger.js";
8
+ import { telemetry } from "../../../../telemetry/index.js";
9
+ const logger = createLogger("content-extractor");
10
+ // Create Anthropic client directly (not using LangChain)
11
+ const anthropic = new Anthropic({
12
+ apiKey: process.env.ANTHROPIC_API_KEY,
13
+ });
14
+ const EXTRACTION_SYSTEM_PROMPT = `You are extracting relevant information from a document chunk.
15
+
16
+ Your task is to extract ONLY the information that matches the extraction requirements.
17
+ Be concise but preserve important details, structure, and relationships.
18
+
19
+ Guidelines:
20
+ 1. Focus on information directly related to the requirements
21
+ 2. Preserve exact values, names, identifiers, and technical details
22
+ 3. Maintain relationships between pieces of information
23
+ 4. Summarize verbose explanations while keeping key facts
24
+ 5. Omit completely irrelevant content
25
+
26
+ Respond with ONLY a JSON object in this exact format:
27
+ {"extracted": "concise extracted content as a string", "keyFacts": ["fact1", "fact2", ...]}
28
+
29
+ Do not include any other text before or after the JSON.`;
30
+ /**
31
+ * Build the extraction prompt for a chunk
32
+ */
33
+ function buildExtractionPrompt(chunkContent, keyRequirements, chunkIndex, totalChunks, relevanceScore) {
34
+ return `## Extraction Requirements
35
+ ${keyRequirements}
36
+
37
+ ## Content Chunk (${chunkIndex + 1}/${totalChunks}, relevance score: ${relevanceScore}/10)
38
+ ${chunkContent}
39
+
40
+ ## Task
41
+ Extract all information relevant to the requirements above. Preserve important details.`;
42
+ }
43
+ /**
44
+ * Parse extraction result from LLM response
45
+ */
46
+ function parseExtractionResult(responseText) {
47
+ try {
48
+ // Try to extract JSON from the response
49
+ const jsonMatch = responseText.match(/\{[\s\S]*?\}/);
50
+ if (jsonMatch) {
51
+ const parsed = JSON.parse(jsonMatch[0]);
52
+ return {
53
+ extracted: String(parsed.extracted || ""),
54
+ keyFacts: Array.isArray(parsed.keyFacts)
55
+ ? parsed.keyFacts.map(String)
56
+ : [],
57
+ };
58
+ }
59
+ }
60
+ catch {
61
+ // Fall through to raw extraction
62
+ }
63
+ // If JSON parsing failed, use the raw response
64
+ return {
65
+ extracted: responseText.trim(),
66
+ keyFacts: [],
67
+ };
68
+ }
69
+ /**
70
+ * Extract content from a single chunk
71
+ */
72
+ async function extractFromChunk(chunk, keyRequirements, totalChunks, config) {
73
+ const span = telemetry.startSpan("extractor.extract_chunk", {
74
+ "gen_ai.operation.name": "chat",
75
+ "gen_ai.provider.name": "anthropic",
76
+ "gen_ai.request.model": config.model,
77
+ "extractor.phase": "extracting",
78
+ "extractor.chunk_index": chunk.index,
79
+ "extractor.chunk_tokens": chunk.tokenCount,
80
+ "extractor.relevance_score": chunk.relevanceScore ?? 0,
81
+ });
82
+ try {
83
+ const prompt = buildExtractionPrompt(chunk.content, keyRequirements, chunk.index, totalChunks, chunk.relevanceScore ?? 5);
84
+ const response = await telemetry.withActiveSpanAsync(span, () => anthropic.messages.create({
85
+ model: config.model,
86
+ max_tokens: 4096,
87
+ temperature: 0,
88
+ system: EXTRACTION_SYSTEM_PROMPT,
89
+ messages: [{ role: "user", content: prompt }],
90
+ }));
91
+ telemetry.recordTokenUsage(response.usage.input_tokens, response.usage.output_tokens, span);
92
+ telemetry.endSpan(span);
93
+ const firstContent = response.content[0];
94
+ const responseText = firstContent && firstContent.type === "text" ? firstContent.text : "";
95
+ return parseExtractionResult(responseText);
96
+ }
97
+ catch (error) {
98
+ telemetry.endSpan(span, error);
99
+ logger.error("Failed to extract from chunk", {
100
+ chunkIndex: chunk.index,
101
+ error: error instanceof Error ? error.message : String(error),
102
+ });
103
+ // Return empty extraction on error
104
+ return { extracted: "", keyFacts: [] };
105
+ }
106
+ }
107
+ /**
108
+ * Extract content from all relevant chunks
109
+ *
110
+ * This is Phase 2 of the extraction process.
111
+ * Only chunks that meet the relevance threshold are processed.
112
+ */
113
+ export async function extractFromChunks(chunks, keyRequirements, config, onProgress) {
114
+ // Filter to only relevant chunks
115
+ const relevantChunks = chunks.filter((c) => (c.relevanceScore ?? 0) >= config.relevanceThreshold);
116
+ logger.info("Starting content extraction phase", {
117
+ totalChunks: chunks.length,
118
+ relevantChunks: relevantChunks.length,
119
+ threshold: config.relevanceThreshold,
120
+ });
121
+ const extractions = new Map();
122
+ for (const chunk of relevantChunks) {
123
+ const result = await extractFromChunk(chunk, keyRequirements, chunks.length, config);
124
+ // Only store non-empty extractions
125
+ if (result.extracted.trim() || result.keyFacts.length > 0) {
126
+ extractions.set(chunk.index, result);
127
+ }
128
+ logger.debug("Chunk extraction complete", {
129
+ chunkIndex: chunk.index,
130
+ extractedLength: result.extracted.length,
131
+ keyFactsCount: result.keyFacts.length,
132
+ });
133
+ onProgress?.(extractions.size, relevantChunks.length);
134
+ }
135
+ logger.info("Content extraction complete", {
136
+ chunksProcessed: relevantChunks.length,
137
+ chunksWithContent: extractions.size,
138
+ });
139
+ return extractions;
140
+ }
141
+ /**
142
+ * Merge extracted content into a single result
143
+ */
144
+ export function mergeExtractions(extractions, originalChunks) {
145
+ // Sort by original chunk index to maintain document order
146
+ const sortedEntries = Array.from(extractions.entries()).sort(([a], [b]) => a - b);
147
+ const allKeyFacts = [];
148
+ const contentParts = [];
149
+ for (const [chunkIndex, result] of sortedEntries) {
150
+ if (result.extracted.trim()) {
151
+ // Get the original chunk's relevance score for context
152
+ const chunk = originalChunks.find((c) => c.index === chunkIndex);
153
+ const score = chunk?.relevanceScore ?? 5;
154
+ // Add a section header for high-relevance content
155
+ if (score >= 8) {
156
+ contentParts.push(`[High relevance - Chunk ${chunkIndex + 1}]`);
157
+ }
158
+ contentParts.push(result.extracted);
159
+ }
160
+ // Collect unique key facts
161
+ for (const fact of result.keyFacts) {
162
+ if (!allKeyFacts.includes(fact)) {
163
+ allKeyFacts.push(fact);
164
+ }
165
+ }
166
+ }
167
+ return {
168
+ content: contentParts.join("\n\n"),
169
+ keyFacts: allKeyFacts,
170
+ };
171
+ }
@@ -0,0 +1,57 @@
1
+ /**
2
+ * Extraction state persistence for debugging and recovery
3
+ *
4
+ * Saves intermediate extraction state to session artifacts,
5
+ * allowing for debugging and potential recovery if extraction fails mid-way.
6
+ */
7
+ import type { HookStorageInterface } from "../../types.js";
8
+ import type { ChunkInfo, ExtractionPhase, ExtractionState } from "./types.js";
9
+ /**
10
+ * Re-export the storage interface for convenience
11
+ */
12
+ export type StorageInterface = HookStorageInterface;
13
+ /**
14
+ * Create a new extraction state
15
+ */
16
+ export declare function createExtractionState(params: {
17
+ sessionId: string;
18
+ toolCallId: string;
19
+ toolName: string;
20
+ originalTokenCount: number;
21
+ targetTokens: number;
22
+ keyRequirements: string;
23
+ chunks: ChunkInfo[];
24
+ maxIterations: number;
25
+ }): ExtractionState;
26
+ /**
27
+ * Update extraction state phase
28
+ */
29
+ export declare function updateStatePhase(state: ExtractionState, phase: ExtractionPhase): ExtractionState;
30
+ /**
31
+ * Update chunk relevance score in state
32
+ */
33
+ export declare function updateChunkScore(state: ExtractionState, chunkIndex: number, score: number, reason: string): ExtractionState;
34
+ /**
35
+ * Add extracted content for a chunk
36
+ */
37
+ export declare function addExtractedContent(state: ExtractionState, chunkIndex: number, content: string): ExtractionState;
38
+ /**
39
+ * Record an error in state
40
+ */
41
+ export declare function recordError(state: ExtractionState, error: string): ExtractionState;
42
+ /**
43
+ * Increment iteration counter
44
+ */
45
+ export declare function incrementIteration(state: ExtractionState): ExtractionState;
46
+ /**
47
+ * Save extraction state to artifacts
48
+ */
49
+ export declare function saveExtractionState(storage: StorageInterface, state: ExtractionState): void;
50
+ /**
51
+ * Load extraction state from artifacts
52
+ */
53
+ export declare function loadExtractionState(storage: StorageInterface, sessionId: string, toolName: string, toolCallId: string): ExtractionState | null;
54
+ /**
55
+ * Check if extraction state exists
56
+ */
57
+ export declare function hasExtractionState(storage: StorageInterface, sessionId: string, toolName: string, toolCallId: string): boolean;
@@ -0,0 +1,126 @@
1
+ /**
2
+ * Extraction state persistence for debugging and recovery
3
+ *
4
+ * Saves intermediate extraction state to session artifacts,
5
+ * allowing for debugging and potential recovery if extraction fails mid-way.
6
+ */
7
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
8
+ import { join } from "node:path";
9
+ /**
10
+ * Create a new extraction state
11
+ */
12
+ export function createExtractionState(params) {
13
+ const now = new Date().toISOString();
14
+ return {
15
+ sessionId: params.sessionId,
16
+ toolCallId: params.toolCallId,
17
+ toolName: params.toolName,
18
+ originalTokenCount: params.originalTokenCount,
19
+ targetTokens: params.targetTokens,
20
+ keyRequirements: params.keyRequirements,
21
+ chunks: params.chunks,
22
+ phase: "initializing",
23
+ extractedContent: {},
24
+ currentIteration: 0,
25
+ maxIterations: params.maxIterations,
26
+ errors: [],
27
+ startedAt: now,
28
+ updatedAt: now,
29
+ };
30
+ }
31
+ /**
32
+ * Update extraction state phase
33
+ */
34
+ export function updateStatePhase(state, phase) {
35
+ return {
36
+ ...state,
37
+ phase,
38
+ updatedAt: new Date().toISOString(),
39
+ };
40
+ }
41
+ /**
42
+ * Update chunk relevance score in state
43
+ */
44
+ export function updateChunkScore(state, chunkIndex, score, reason) {
45
+ const updatedChunks = state.chunks.map((chunk) => chunk.index === chunkIndex
46
+ ? { ...chunk, relevanceScore: score, scoreReason: reason }
47
+ : chunk);
48
+ return {
49
+ ...state,
50
+ chunks: updatedChunks,
51
+ updatedAt: new Date().toISOString(),
52
+ };
53
+ }
54
+ /**
55
+ * Add extracted content for a chunk
56
+ */
57
+ export function addExtractedContent(state, chunkIndex, content) {
58
+ return {
59
+ ...state,
60
+ extractedContent: {
61
+ ...state.extractedContent,
62
+ [chunkIndex]: content,
63
+ },
64
+ updatedAt: new Date().toISOString(),
65
+ };
66
+ }
67
+ /**
68
+ * Record an error in state
69
+ */
70
+ export function recordError(state, error) {
71
+ return {
72
+ ...state,
73
+ errors: [...state.errors, error],
74
+ updatedAt: new Date().toISOString(),
75
+ };
76
+ }
77
+ /**
78
+ * Increment iteration counter
79
+ */
80
+ export function incrementIteration(state) {
81
+ return {
82
+ ...state,
83
+ currentIteration: state.currentIteration + 1,
84
+ updatedAt: new Date().toISOString(),
85
+ };
86
+ }
87
+ /**
88
+ * Get the file path for extraction state
89
+ */
90
+ function getStatePath(storage, sessionId, toolName, toolCallId) {
91
+ return join(storage.getArtifactsDir(sessionId), `tool-${toolName}`, `${toolCallId}.extraction-state.json`);
92
+ }
93
+ /**
94
+ * Save extraction state to artifacts
95
+ */
96
+ export function saveExtractionState(storage, state) {
97
+ const toolDir = join(storage.getArtifactsDir(state.sessionId), `tool-${state.toolName}`);
98
+ if (!existsSync(toolDir)) {
99
+ mkdirSync(toolDir, { recursive: true });
100
+ }
101
+ const filePath = getStatePath(storage, state.sessionId, state.toolName, state.toolCallId);
102
+ writeFileSync(filePath, JSON.stringify(state, null, 2), "utf-8");
103
+ }
104
+ /**
105
+ * Load extraction state from artifacts
106
+ */
107
+ export function loadExtractionState(storage, sessionId, toolName, toolCallId) {
108
+ const filePath = getStatePath(storage, sessionId, toolName, toolCallId);
109
+ if (!existsSync(filePath)) {
110
+ return null;
111
+ }
112
+ try {
113
+ const content = readFileSync(filePath, "utf-8");
114
+ return JSON.parse(content);
115
+ }
116
+ catch {
117
+ return null;
118
+ }
119
+ }
120
+ /**
121
+ * Check if extraction state exists
122
+ */
123
+ export function hasExtractionState(storage, sessionId, toolName, toolCallId) {
124
+ const filePath = getStatePath(storage, sessionId, toolName, toolCallId);
125
+ return existsSync(filePath);
126
+ }
@@ -0,0 +1,22 @@
1
+ /**
2
+ * Document Context Extractor
3
+ *
4
+ * Main orchestration for extracting relevant information from large tool responses.
5
+ *
6
+ * Two-phase approach:
7
+ * 1. Score all chunks for relevance (0-10)
8
+ * 2. Extract only from high-scoring chunks
9
+ *
10
+ * Falls back to truncation if extraction fails.
11
+ */
12
+ import type { HookStorageInterface } from "../../types.js";
13
+ import type { ExtractorResult } from "./types.js";
14
+ /**
15
+ * Main extraction function
16
+ *
17
+ * Extracts relevant information from a large tool response using a two-phase approach:
18
+ * 1. Score all chunks for relevance
19
+ * 2. Extract from high-scoring chunks
20
+ */
21
+ export declare function extractDocumentContext(rawOutput: Record<string, unknown>, toolName: string, toolCallId: string, toolInput: Record<string, unknown>, conversationContext: string, targetTokens: number, sessionId: string, storage?: HookStorageInterface): Promise<ExtractorResult>;
22
+ export type { ChunkInfo, ExtractionState, ExtractorConfig, ExtractorResult, } from "./types.js";