@townco/agent 0.1.84 → 0.1.87
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/acp-server/adapter.d.ts +0 -8
- package/dist/definition/index.d.ts +0 -2
- package/dist/definition/index.js +0 -1
- package/dist/runner/agent-runner.d.ts +1 -2
- package/dist/runner/hooks/executor.d.ts +4 -8
- package/dist/runner/hooks/executor.js +9 -1
- package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.d.ts +37 -0
- package/dist/runner/hooks/predefined/document-context-extractor/chunk-manager.js +134 -0
- package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.d.ts +20 -0
- package/dist/runner/hooks/predefined/document-context-extractor/content-extractor.js +171 -0
- package/dist/runner/hooks/predefined/document-context-extractor/extraction-state.d.ts +57 -0
- package/dist/runner/hooks/predefined/document-context-extractor/extraction-state.js +126 -0
- package/dist/runner/hooks/predefined/document-context-extractor/index.d.ts +22 -0
- package/dist/runner/hooks/predefined/document-context-extractor/index.js +338 -0
- package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.d.ts +19 -0
- package/dist/runner/hooks/predefined/document-context-extractor/relevance-scorer.js +156 -0
- package/dist/runner/hooks/predefined/document-context-extractor/types.d.ts +130 -0
- package/dist/runner/hooks/predefined/document-context-extractor/types.js +8 -0
- package/dist/runner/hooks/predefined/tool-response-compactor.js +77 -212
- package/dist/runner/hooks/types.d.ts +15 -8
- package/dist/runner/index.d.ts +51 -2
- package/dist/runner/langchain/index.js +27 -0
- package/dist/runner/langchain/tools/artifacts.d.ts +6 -6
- package/dist/runner/langchain/tools/artifacts.js +98 -93
- package/dist/runner/langchain/tools/browser.d.ts +9 -9
- package/dist/runner/langchain/tools/document_extract.d.ts +26 -0
- package/dist/runner/langchain/tools/document_extract.js +135 -0
- package/dist/runner/langchain/tools/filesystem.d.ts +3 -3
- package/dist/runner/langchain/tools/generate_image.d.ts +8 -8
- package/dist/runner/langchain/tools/todo.d.ts +10 -10
- package/dist/runner/tools.d.ts +2 -2
- package/dist/runner/tools.js +1 -0
- package/dist/scaffold/project-scaffold.js +4 -4
- package/dist/templates/index.d.ts +0 -1
- package/dist/tsconfig.tsbuildinfo +1 -1
- package/package.json +11 -13
- package/templates/index.ts +0 -1
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Context Extractor
|
|
3
|
+
*
|
|
4
|
+
* Main orchestration for extracting relevant information from large tool responses.
|
|
5
|
+
*
|
|
6
|
+
* Two-phase approach:
|
|
7
|
+
* 1. Score all chunks for relevance (0-10)
|
|
8
|
+
* 2. Extract only from high-scoring chunks
|
|
9
|
+
*
|
|
10
|
+
* Falls back to truncation if extraction fails.
|
|
11
|
+
*/
|
|
12
|
+
import type { HookStorageInterface } from "../../types.js";
|
|
13
|
+
import type { ExtractorResult } from "./types.js";
|
|
14
|
+
/**
|
|
15
|
+
* Main extraction function
|
|
16
|
+
*
|
|
17
|
+
* Extracts relevant information from a large tool response using a two-phase approach:
|
|
18
|
+
* 1. Score all chunks for relevance
|
|
19
|
+
* 2. Extract from high-scoring chunks
|
|
20
|
+
*/
|
|
21
|
+
export declare function extractDocumentContext(rawOutput: Record<string, unknown>, toolName: string, toolCallId: string, toolInput: Record<string, unknown>, conversationContext: string, targetTokens: number, sessionId: string, storage?: HookStorageInterface): Promise<ExtractorResult>;
|
|
22
|
+
export type { ChunkInfo, ExtractionState, ExtractorConfig, ExtractorResult, } from "./types.js";
|
|
@@ -0,0 +1,338 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Context Extractor
|
|
3
|
+
*
|
|
4
|
+
* Main orchestration for extracting relevant information from large tool responses.
|
|
5
|
+
*
|
|
6
|
+
* Two-phase approach:
|
|
7
|
+
* 1. Score all chunks for relevance (0-10)
|
|
8
|
+
* 2. Extract only from high-scoring chunks
|
|
9
|
+
*
|
|
10
|
+
* Falls back to truncation if extraction fails.
|
|
11
|
+
*/
|
|
12
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
13
|
+
import { createLogger } from "../../../../logger.js";
|
|
14
|
+
import { telemetry } from "../../../../telemetry/index.js";
|
|
15
|
+
import { countTokens, countToolResultTokens, } from "../../../../utils/token-counter.js";
|
|
16
|
+
import { calculateChunkSize, calculateMaxIterations, createChunks, getChunkStats, } from "./chunk-manager.js";
|
|
17
|
+
import { extractFromChunks, mergeExtractions } from "./content-extractor.js";
|
|
18
|
+
import { addExtractedContent, createExtractionState, recordError, saveExtractionState, updateChunkScore, updateStatePhase, } from "./extraction-state.js";
|
|
19
|
+
import { filterRelevantChunks, scoreChunks } from "./relevance-scorer.js";
|
|
20
|
+
const logger = createLogger("document-context-extractor");
|
|
21
|
+
// Haiku 4.5 for extraction (fast and cost-effective)
|
|
22
|
+
const EXTRACTION_MODEL = "claude-haiku-4-5-20251001";
|
|
23
|
+
const EXTRACTION_MODEL_CONTEXT = 200000;
|
|
24
|
+
// Default configuration values
|
|
25
|
+
const DEFAULT_RELEVANCE_THRESHOLD = 5;
|
|
26
|
+
const DEFAULT_CHUNK_OVERLAP = 200;
|
|
27
|
+
const DEFAULT_SYSTEM_PROMPT_TOKENS = 500;
|
|
28
|
+
// Create Anthropic client directly (not using LangChain)
|
|
29
|
+
const anthropic = new Anthropic({
|
|
30
|
+
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
31
|
+
});
|
|
32
|
+
/**
|
|
33
|
+
* Analyze what information needs to be extracted
|
|
34
|
+
*/
|
|
35
|
+
async function analyzeExtractionRequirements(toolName, toolInput, conversationContext) {
|
|
36
|
+
const analysisPrompt = `You are helping to extract relevant information from a large tool response.
|
|
37
|
+
|
|
38
|
+
A tool was just called with these parameters:
|
|
39
|
+
Tool: ${toolName}
|
|
40
|
+
Input: ${JSON.stringify(toolInput, null, 2)}
|
|
41
|
+
|
|
42
|
+
Recent conversation context:
|
|
43
|
+
${conversationContext}
|
|
44
|
+
|
|
45
|
+
Based on the tool input and conversation context, what key information is the user looking for from this tool response?
|
|
46
|
+
|
|
47
|
+
Provide a concise list (3-5 bullet points) of the most important elements to extract.`;
|
|
48
|
+
const span = telemetry.startSpan("extractor.analysis", {
|
|
49
|
+
"gen_ai.operation.name": "chat",
|
|
50
|
+
"gen_ai.provider.name": "anthropic",
|
|
51
|
+
"gen_ai.request.model": EXTRACTION_MODEL,
|
|
52
|
+
"gen_ai.system_instructions": "You are a helpful assistant analyzing information needs.",
|
|
53
|
+
});
|
|
54
|
+
try {
|
|
55
|
+
const response = await telemetry.withActiveSpanAsync(span, () => anthropic.messages.create({
|
|
56
|
+
model: EXTRACTION_MODEL,
|
|
57
|
+
max_tokens: 1024,
|
|
58
|
+
temperature: 0,
|
|
59
|
+
system: "You are a helpful assistant analyzing information needs.",
|
|
60
|
+
messages: [{ role: "user", content: analysisPrompt }],
|
|
61
|
+
}));
|
|
62
|
+
telemetry.recordTokenUsage(response.usage.input_tokens, response.usage.output_tokens, span);
|
|
63
|
+
telemetry.endSpan(span);
|
|
64
|
+
const firstContent = response.content[0];
|
|
65
|
+
return firstContent && firstContent.type === "text"
|
|
66
|
+
? firstContent.text
|
|
67
|
+
: "Extract relevant information";
|
|
68
|
+
}
|
|
69
|
+
catch (error) {
|
|
70
|
+
telemetry.endSpan(span, error);
|
|
71
|
+
throw error;
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Perform final compaction of merged extractions if still too large
|
|
76
|
+
*/
|
|
77
|
+
async function compactFinalResult(mergedContent, keyFacts, keyRequirements, targetTokens) {
|
|
78
|
+
const currentTokens = countTokens(mergedContent);
|
|
79
|
+
// If already under target, return as structured result
|
|
80
|
+
if (currentTokens <= targetTokens) {
|
|
81
|
+
return {
|
|
82
|
+
extracted_content: mergedContent,
|
|
83
|
+
key_facts: keyFacts,
|
|
84
|
+
_extraction_notice: "Content extracted from large tool response",
|
|
85
|
+
};
|
|
86
|
+
}
|
|
87
|
+
// Need to compact further
|
|
88
|
+
const span = telemetry.startSpan("extractor.final_compaction", {
|
|
89
|
+
"gen_ai.operation.name": "chat",
|
|
90
|
+
"gen_ai.provider.name": "anthropic",
|
|
91
|
+
"gen_ai.request.model": EXTRACTION_MODEL,
|
|
92
|
+
"extractor.phase": "merging",
|
|
93
|
+
"extractor.current_tokens": currentTokens,
|
|
94
|
+
"extractor.target_tokens": targetTokens,
|
|
95
|
+
});
|
|
96
|
+
try {
|
|
97
|
+
const compactionPrompt = `You are condensing extracted information to fit within a token limit.
|
|
98
|
+
|
|
99
|
+
Key information needed:
|
|
100
|
+
${keyRequirements}
|
|
101
|
+
|
|
102
|
+
Current extracted content (${currentTokens.toLocaleString()} tokens):
|
|
103
|
+
${mergedContent}
|
|
104
|
+
|
|
105
|
+
Key facts identified:
|
|
106
|
+
${keyFacts.map((f) => `- ${f}`).join("\n")}
|
|
107
|
+
|
|
108
|
+
Target size: ${targetTokens.toLocaleString()} tokens (reduce by ${Math.round(((currentTokens - targetTokens) / currentTokens) * 100)}%)
|
|
109
|
+
|
|
110
|
+
Your task: Create a more concise version that:
|
|
111
|
+
1. Retains all key facts and critical information
|
|
112
|
+
2. Removes redundancy and verbose explanations
|
|
113
|
+
3. Preserves technical details, values, and identifiers
|
|
114
|
+
4. Meets the target token limit
|
|
115
|
+
|
|
116
|
+
Return ONLY the condensed content (no JSON wrapper needed).`;
|
|
117
|
+
const response = await telemetry.withActiveSpanAsync(span, () => anthropic.messages.create({
|
|
118
|
+
model: EXTRACTION_MODEL,
|
|
119
|
+
max_tokens: Math.min(targetTokens * 2, 8000),
|
|
120
|
+
temperature: 0,
|
|
121
|
+
system: "You are a helpful assistant condensing information while preserving key details.",
|
|
122
|
+
messages: [{ role: "user", content: compactionPrompt }],
|
|
123
|
+
}));
|
|
124
|
+
telemetry.recordTokenUsage(response.usage.input_tokens, response.usage.output_tokens, span);
|
|
125
|
+
telemetry.endSpan(span);
|
|
126
|
+
const firstContent = response.content[0];
|
|
127
|
+
const compactedContent = firstContent && firstContent.type === "text"
|
|
128
|
+
? firstContent.text
|
|
129
|
+
: mergedContent;
|
|
130
|
+
return {
|
|
131
|
+
extracted_content: compactedContent,
|
|
132
|
+
key_facts: keyFacts,
|
|
133
|
+
_extraction_notice: "Content extracted and compacted from large tool response",
|
|
134
|
+
};
|
|
135
|
+
}
|
|
136
|
+
catch (error) {
|
|
137
|
+
telemetry.endSpan(span, error);
|
|
138
|
+
// On error, return original content
|
|
139
|
+
return {
|
|
140
|
+
extracted_content: mergedContent,
|
|
141
|
+
key_facts: keyFacts,
|
|
142
|
+
_extraction_notice: "Content extracted from large tool response (compaction skipped)",
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
/**
|
|
147
|
+
* Main extraction function
|
|
148
|
+
*
|
|
149
|
+
* Extracts relevant information from a large tool response using a two-phase approach:
|
|
150
|
+
* 1. Score all chunks for relevance
|
|
151
|
+
* 2. Extract from high-scoring chunks
|
|
152
|
+
*/
|
|
153
|
+
export async function extractDocumentContext(rawOutput, toolName, toolCallId, toolInput, conversationContext, targetTokens, sessionId, storage) {
|
|
154
|
+
const startTime = Date.now();
|
|
155
|
+
// Convert output to string for processing
|
|
156
|
+
const outputString = JSON.stringify(rawOutput, null, 2);
|
|
157
|
+
const originalTokens = countToolResultTokens(rawOutput);
|
|
158
|
+
logger.info("Starting document context extraction", {
|
|
159
|
+
toolName,
|
|
160
|
+
toolCallId,
|
|
161
|
+
originalTokens,
|
|
162
|
+
targetTokens,
|
|
163
|
+
sessionId,
|
|
164
|
+
});
|
|
165
|
+
// Build configuration
|
|
166
|
+
const config = {
|
|
167
|
+
model: EXTRACTION_MODEL,
|
|
168
|
+
modelContextSize: EXTRACTION_MODEL_CONTEXT,
|
|
169
|
+
targetTokens,
|
|
170
|
+
maxIterations: 0, // Will be calculated after chunking
|
|
171
|
+
systemPromptTokens: DEFAULT_SYSTEM_PROMPT_TOKENS,
|
|
172
|
+
relevanceThreshold: DEFAULT_RELEVANCE_THRESHOLD,
|
|
173
|
+
chunkOverlapTokens: DEFAULT_CHUNK_OVERLAP,
|
|
174
|
+
};
|
|
175
|
+
// Calculate chunk size and create chunks
|
|
176
|
+
const chunkSizeTokens = calculateChunkSize(config);
|
|
177
|
+
const chunks = createChunks(outputString, chunkSizeTokens, config.chunkOverlapTokens);
|
|
178
|
+
const chunkStats = getChunkStats(chunks);
|
|
179
|
+
// Update max iterations based on actual chunk count
|
|
180
|
+
config.maxIterations = calculateMaxIterations(originalTokens, chunkSizeTokens);
|
|
181
|
+
logger.info("Chunks created", {
|
|
182
|
+
...chunkStats,
|
|
183
|
+
chunkSizeTokens,
|
|
184
|
+
maxIterations: config.maxIterations,
|
|
185
|
+
});
|
|
186
|
+
try {
|
|
187
|
+
// Step 1: Analyze what to extract
|
|
188
|
+
const keyRequirements = await analyzeExtractionRequirements(toolName, toolInput, conversationContext);
|
|
189
|
+
logger.info("Identified extraction requirements", {
|
|
190
|
+
requirements: keyRequirements.substring(0, 200),
|
|
191
|
+
});
|
|
192
|
+
// Initialize state for persistence
|
|
193
|
+
let state = createExtractionState({
|
|
194
|
+
sessionId,
|
|
195
|
+
toolCallId,
|
|
196
|
+
toolName,
|
|
197
|
+
originalTokenCount: originalTokens,
|
|
198
|
+
targetTokens,
|
|
199
|
+
keyRequirements,
|
|
200
|
+
chunks,
|
|
201
|
+
maxIterations: config.maxIterations,
|
|
202
|
+
});
|
|
203
|
+
// Save initial state
|
|
204
|
+
if (storage) {
|
|
205
|
+
saveExtractionState(storage, state);
|
|
206
|
+
}
|
|
207
|
+
// Step 2: Score all chunks (Phase 1)
|
|
208
|
+
state = updateStatePhase(state, "scoring");
|
|
209
|
+
if (storage) {
|
|
210
|
+
saveExtractionState(storage, state);
|
|
211
|
+
}
|
|
212
|
+
const scoredChunks = await scoreChunks(chunks, keyRequirements, config, (scored, total) => {
|
|
213
|
+
// Progress callback - just log progress
|
|
214
|
+
logger.debug("Scoring progress", { scored, total });
|
|
215
|
+
});
|
|
216
|
+
// Update state with all scores after scoring completes
|
|
217
|
+
for (const scoredChunk of scoredChunks) {
|
|
218
|
+
if (scoredChunk.relevanceScore !== undefined) {
|
|
219
|
+
state = updateChunkScore(state, scoredChunk.index, scoredChunk.relevanceScore, scoredChunk.scoreReason ?? "");
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
state = {
|
|
223
|
+
...state,
|
|
224
|
+
chunks: scoredChunks,
|
|
225
|
+
};
|
|
226
|
+
if (storage) {
|
|
227
|
+
saveExtractionState(storage, state);
|
|
228
|
+
}
|
|
229
|
+
// Check if we have any relevant chunks
|
|
230
|
+
const relevantChunks = filterRelevantChunks(scoredChunks, config.relevanceThreshold);
|
|
231
|
+
if (relevantChunks.length === 0) {
|
|
232
|
+
logger.warn("No chunks met relevance threshold", {
|
|
233
|
+
threshold: config.relevanceThreshold,
|
|
234
|
+
avgScore: scoredChunks.reduce((s, c) => s + (c.relevanceScore ?? 0), 0) /
|
|
235
|
+
scoredChunks.length,
|
|
236
|
+
});
|
|
237
|
+
// Fall back to using top 3 chunks regardless of score
|
|
238
|
+
const topChunks = scoredChunks.slice(0, 3);
|
|
239
|
+
relevantChunks.push(...topChunks);
|
|
240
|
+
}
|
|
241
|
+
// Step 3: Extract from relevant chunks (Phase 2)
|
|
242
|
+
state = updateStatePhase(state, "extracting");
|
|
243
|
+
if (storage) {
|
|
244
|
+
saveExtractionState(storage, state);
|
|
245
|
+
}
|
|
246
|
+
const extractions = await extractFromChunks(scoredChunks, keyRequirements, config, (extracted, _total) => {
|
|
247
|
+
state = {
|
|
248
|
+
...state,
|
|
249
|
+
currentIteration: extracted,
|
|
250
|
+
};
|
|
251
|
+
if (storage) {
|
|
252
|
+
saveExtractionState(storage, state);
|
|
253
|
+
}
|
|
254
|
+
});
|
|
255
|
+
// Update state with extractions
|
|
256
|
+
for (const [chunkIndex, result] of extractions) {
|
|
257
|
+
state = addExtractedContent(state, chunkIndex, result.extracted);
|
|
258
|
+
}
|
|
259
|
+
// Step 4: Merge extractions
|
|
260
|
+
state = updateStatePhase(state, "merging");
|
|
261
|
+
if (storage) {
|
|
262
|
+
saveExtractionState(storage, state);
|
|
263
|
+
}
|
|
264
|
+
const { content: mergedContent, keyFacts } = mergeExtractions(extractions, scoredChunks);
|
|
265
|
+
logger.info("Extractions merged", {
|
|
266
|
+
mergedContentTokens: countTokens(mergedContent),
|
|
267
|
+
keyFactsCount: keyFacts.length,
|
|
268
|
+
});
|
|
269
|
+
// Step 5: Final compaction if needed
|
|
270
|
+
const result = await compactFinalResult(mergedContent, keyFacts, keyRequirements, targetTokens);
|
|
271
|
+
const finalTokens = countToolResultTokens(result);
|
|
272
|
+
// Mark state as complete
|
|
273
|
+
state = updateStatePhase(state, "complete");
|
|
274
|
+
if (storage) {
|
|
275
|
+
saveExtractionState(storage, state);
|
|
276
|
+
}
|
|
277
|
+
const duration = Date.now() - startTime;
|
|
278
|
+
logger.info("Document context extraction complete", {
|
|
279
|
+
originalTokens,
|
|
280
|
+
finalTokens,
|
|
281
|
+
targetTokens,
|
|
282
|
+
chunksProcessed: chunks.length,
|
|
283
|
+
chunksExtractedFrom: extractions.size,
|
|
284
|
+
durationMs: duration,
|
|
285
|
+
});
|
|
286
|
+
return {
|
|
287
|
+
success: true,
|
|
288
|
+
extractedData: result,
|
|
289
|
+
extractedTokens: finalTokens,
|
|
290
|
+
metadata: {
|
|
291
|
+
originalTokens,
|
|
292
|
+
targetTokens,
|
|
293
|
+
chunksProcessed: chunks.length,
|
|
294
|
+
chunksExtractedFrom: extractions.size,
|
|
295
|
+
iterationsUsed: state.currentIteration,
|
|
296
|
+
phase: "complete",
|
|
297
|
+
averageRelevanceScore: scoredChunks.reduce((s, c) => s + (c.relevanceScore ?? 0), 0) /
|
|
298
|
+
scoredChunks.length,
|
|
299
|
+
},
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
catch (error) {
|
|
303
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
304
|
+
logger.error("Document context extraction failed", {
|
|
305
|
+
toolName,
|
|
306
|
+
toolCallId,
|
|
307
|
+
error: errorMessage,
|
|
308
|
+
});
|
|
309
|
+
// Update state with error
|
|
310
|
+
if (storage) {
|
|
311
|
+
let state = createExtractionState({
|
|
312
|
+
sessionId,
|
|
313
|
+
toolCallId,
|
|
314
|
+
toolName,
|
|
315
|
+
originalTokenCount: originalTokens,
|
|
316
|
+
targetTokens,
|
|
317
|
+
keyRequirements: "",
|
|
318
|
+
chunks,
|
|
319
|
+
maxIterations: config.maxIterations,
|
|
320
|
+
});
|
|
321
|
+
state = recordError(state, errorMessage);
|
|
322
|
+
state = updateStatePhase(state, "failed");
|
|
323
|
+
saveExtractionState(storage, state);
|
|
324
|
+
}
|
|
325
|
+
return {
|
|
326
|
+
success: false,
|
|
327
|
+
metadata: {
|
|
328
|
+
originalTokens,
|
|
329
|
+
targetTokens,
|
|
330
|
+
chunksProcessed: chunks.length,
|
|
331
|
+
chunksExtractedFrom: 0,
|
|
332
|
+
iterationsUsed: 0,
|
|
333
|
+
phase: "failed",
|
|
334
|
+
},
|
|
335
|
+
error: errorMessage,
|
|
336
|
+
};
|
|
337
|
+
}
|
|
338
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Relevance scorer for document chunks
|
|
3
|
+
*
|
|
4
|
+
* Phase 1 of extraction: Score each chunk for relevance (0-10)
|
|
5
|
+
* to determine which chunks should be processed for extraction.
|
|
6
|
+
*/
|
|
7
|
+
import type { ChunkInfo, ExtractorConfig } from "./types.js";
|
|
8
|
+
/**
|
|
9
|
+
* Score all chunks for relevance
|
|
10
|
+
*
|
|
11
|
+
* This is Phase 1 of the extraction process.
|
|
12
|
+
* Each chunk gets a relevance score (0-10) which determines
|
|
13
|
+
* whether it will be processed in Phase 2.
|
|
14
|
+
*/
|
|
15
|
+
export declare function scoreChunks(chunks: ChunkInfo[], keyRequirements: string, config: ExtractorConfig, onProgress?: (scored: number, total: number) => void): Promise<ChunkInfo[]>;
|
|
16
|
+
/**
|
|
17
|
+
* Filter chunks by relevance threshold
|
|
18
|
+
*/
|
|
19
|
+
export declare function filterRelevantChunks(chunks: ChunkInfo[], threshold: number): ChunkInfo[];
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Relevance scorer for document chunks
|
|
3
|
+
*
|
|
4
|
+
* Phase 1 of extraction: Score each chunk for relevance (0-10)
|
|
5
|
+
* to determine which chunks should be processed for extraction.
|
|
6
|
+
*/
|
|
7
|
+
import Anthropic from "@anthropic-ai/sdk";
|
|
8
|
+
import { createLogger } from "../../../../logger.js";
|
|
9
|
+
import { telemetry } from "../../../../telemetry/index.js";
|
|
10
|
+
const logger = createLogger("relevance-scorer");
|
|
11
|
+
// Create Anthropic client directly (not using LangChain)
|
|
12
|
+
// This ensures scoring LLM calls don't get captured by LangGraph's streaming
|
|
13
|
+
const anthropic = new Anthropic({
|
|
14
|
+
apiKey: process.env.ANTHROPIC_API_KEY,
|
|
15
|
+
});
|
|
16
|
+
const SCORING_SYSTEM_PROMPT = `You are evaluating content relevance for information extraction.
|
|
17
|
+
|
|
18
|
+
Score the following content chunk from 0-10 based on how relevant it is to the extraction requirements.
|
|
19
|
+
|
|
20
|
+
Scoring guide:
|
|
21
|
+
- 0-2: Not relevant, can be skipped entirely
|
|
22
|
+
- 3-4: Low relevance, minimal useful information
|
|
23
|
+
- 5-6: Moderate relevance, contains some useful context
|
|
24
|
+
- 7-8: Relevant, contains important information
|
|
25
|
+
- 9-10: Highly relevant, contains critical information
|
|
26
|
+
|
|
27
|
+
Respond with ONLY a JSON object in this exact format:
|
|
28
|
+
{"score": N, "reason": "brief explanation (max 50 words)"}
|
|
29
|
+
|
|
30
|
+
Do not include any other text before or after the JSON.`;
|
|
31
|
+
/**
|
|
32
|
+
* Build the scoring prompt for a chunk
|
|
33
|
+
*/
|
|
34
|
+
function buildScoringPrompt(chunkContent, keyRequirements, chunkIndex, totalChunks) {
|
|
35
|
+
return `## Extraction Requirements
|
|
36
|
+
${keyRequirements}
|
|
37
|
+
|
|
38
|
+
## Content Chunk (${chunkIndex + 1}/${totalChunks})
|
|
39
|
+
${chunkContent}
|
|
40
|
+
|
|
41
|
+
## Task
|
|
42
|
+
Score this chunk's relevance to the extraction requirements (0-10).`;
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Parse scoring result from LLM response
|
|
46
|
+
*/
|
|
47
|
+
function parseScoringResult(responseText) {
|
|
48
|
+
try {
|
|
49
|
+
// Try to extract JSON from the response
|
|
50
|
+
const jsonMatch = responseText.match(/\{[\s\S]*?\}/);
|
|
51
|
+
if (jsonMatch) {
|
|
52
|
+
const parsed = JSON.parse(jsonMatch[0]);
|
|
53
|
+
return {
|
|
54
|
+
score: Math.min(10, Math.max(0, Number(parsed.score) || 5)),
|
|
55
|
+
reason: String(parsed.reason || "No reason provided"),
|
|
56
|
+
};
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
catch {
|
|
60
|
+
// Fall through to default
|
|
61
|
+
}
|
|
62
|
+
// Try to extract just a number if JSON parsing failed
|
|
63
|
+
const numberMatch = responseText.match(/\b(\d+)\b/);
|
|
64
|
+
if (numberMatch) {
|
|
65
|
+
const score = Math.min(10, Math.max(0, Number(numberMatch[1])));
|
|
66
|
+
return { score, reason: "Score extracted from response" };
|
|
67
|
+
}
|
|
68
|
+
// Default to medium relevance if parsing fails
|
|
69
|
+
return { score: 5, reason: "Failed to parse scoring response" };
|
|
70
|
+
}
|
|
71
|
+
/**
|
|
72
|
+
* Score a single chunk for relevance
|
|
73
|
+
*/
|
|
74
|
+
async function scoreChunk(chunk, keyRequirements, totalChunks, config) {
|
|
75
|
+
const span = telemetry.startSpan("extractor.score_chunk", {
|
|
76
|
+
"gen_ai.operation.name": "chat",
|
|
77
|
+
"gen_ai.provider.name": "anthropic",
|
|
78
|
+
"gen_ai.request.model": config.model,
|
|
79
|
+
"extractor.phase": "scoring",
|
|
80
|
+
"extractor.chunk_index": chunk.index,
|
|
81
|
+
"extractor.chunk_tokens": chunk.tokenCount,
|
|
82
|
+
});
|
|
83
|
+
try {
|
|
84
|
+
const prompt = buildScoringPrompt(chunk.content, keyRequirements, chunk.index, totalChunks);
|
|
85
|
+
const response = await telemetry.withActiveSpanAsync(span, () => anthropic.messages.create({
|
|
86
|
+
model: config.model,
|
|
87
|
+
max_tokens: 256,
|
|
88
|
+
temperature: 0,
|
|
89
|
+
system: SCORING_SYSTEM_PROMPT,
|
|
90
|
+
messages: [{ role: "user", content: prompt }],
|
|
91
|
+
}));
|
|
92
|
+
telemetry.recordTokenUsage(response.usage.input_tokens, response.usage.output_tokens, span);
|
|
93
|
+
telemetry.endSpan(span);
|
|
94
|
+
const firstContent = response.content[0];
|
|
95
|
+
const responseText = firstContent && firstContent.type === "text" ? firstContent.text : "";
|
|
96
|
+
return parseScoringResult(responseText);
|
|
97
|
+
}
|
|
98
|
+
catch (error) {
|
|
99
|
+
telemetry.endSpan(span, error);
|
|
100
|
+
logger.error("Failed to score chunk", {
|
|
101
|
+
chunkIndex: chunk.index,
|
|
102
|
+
error: error instanceof Error ? error.message : String(error),
|
|
103
|
+
});
|
|
104
|
+
// Return medium relevance on error to be safe
|
|
105
|
+
return {
|
|
106
|
+
score: 5,
|
|
107
|
+
reason: `Scoring error: ${error instanceof Error ? error.message : "Unknown"}`,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
/**
|
|
112
|
+
* Score all chunks for relevance
|
|
113
|
+
*
|
|
114
|
+
* This is Phase 1 of the extraction process.
|
|
115
|
+
* Each chunk gets a relevance score (0-10) which determines
|
|
116
|
+
* whether it will be processed in Phase 2.
|
|
117
|
+
*/
|
|
118
|
+
export async function scoreChunks(chunks, keyRequirements, config, onProgress) {
|
|
119
|
+
logger.info("Starting chunk scoring phase", {
|
|
120
|
+
totalChunks: chunks.length,
|
|
121
|
+
relevanceThreshold: config.relevanceThreshold,
|
|
122
|
+
});
|
|
123
|
+
const scoredChunks = [];
|
|
124
|
+
for (const chunk of chunks) {
|
|
125
|
+
const result = await scoreChunk(chunk, keyRequirements, chunks.length, config);
|
|
126
|
+
const scoredChunk = {
|
|
127
|
+
...chunk,
|
|
128
|
+
relevanceScore: result.score,
|
|
129
|
+
scoreReason: result.reason,
|
|
130
|
+
};
|
|
131
|
+
scoredChunks.push(scoredChunk);
|
|
132
|
+
logger.debug("Chunk scored", {
|
|
133
|
+
chunkIndex: chunk.index,
|
|
134
|
+
score: result.score,
|
|
135
|
+
reason: result.reason.substring(0, 100),
|
|
136
|
+
});
|
|
137
|
+
onProgress?.(scoredChunks.length, chunks.length);
|
|
138
|
+
}
|
|
139
|
+
// Sort by relevance score (highest first)
|
|
140
|
+
scoredChunks.sort((a, b) => (b.relevanceScore ?? 0) - (a.relevanceScore ?? 0));
|
|
141
|
+
const relevantCount = scoredChunks.filter((c) => (c.relevanceScore ?? 0) >= config.relevanceThreshold).length;
|
|
142
|
+
logger.info("Chunk scoring complete", {
|
|
143
|
+
totalChunks: chunks.length,
|
|
144
|
+
relevantChunks: relevantCount,
|
|
145
|
+
threshold: config.relevanceThreshold,
|
|
146
|
+
averageScore: scoredChunks.reduce((sum, c) => sum + (c.relevanceScore ?? 0), 0) /
|
|
147
|
+
scoredChunks.length,
|
|
148
|
+
});
|
|
149
|
+
return scoredChunks;
|
|
150
|
+
}
|
|
151
|
+
/**
|
|
152
|
+
* Filter chunks by relevance threshold
|
|
153
|
+
*/
|
|
154
|
+
export function filterRelevantChunks(chunks, threshold) {
|
|
155
|
+
return chunks.filter((c) => (c.relevanceScore ?? 0) >= threshold);
|
|
156
|
+
}
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Types for the document context extractor
|
|
3
|
+
*
|
|
4
|
+
* The extractor uses a two-phase approach:
|
|
5
|
+
* 1. Score all chunks for relevance (0-10)
|
|
6
|
+
* 2. Extract only from high-scoring chunks
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Configuration for the document context extractor
|
|
10
|
+
*/
|
|
11
|
+
export interface ExtractorConfig {
|
|
12
|
+
/** LLM model to use for extraction (e.g., claude-haiku-4-5-20251001) */
|
|
13
|
+
model: string;
|
|
14
|
+
/** Model's context window size in tokens */
|
|
15
|
+
modelContextSize: number;
|
|
16
|
+
/** Target output size in tokens */
|
|
17
|
+
targetTokens: number;
|
|
18
|
+
/** Maximum iterations for the extraction process */
|
|
19
|
+
maxIterations: number;
|
|
20
|
+
/** Estimated tokens for system prompt overhead */
|
|
21
|
+
systemPromptTokens: number;
|
|
22
|
+
/** Minimum relevance score (0-10) to extract from a chunk */
|
|
23
|
+
relevanceThreshold: number;
|
|
24
|
+
/** Token overlap between chunks to maintain context continuity */
|
|
25
|
+
chunkOverlapTokens: number;
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Information about a single chunk of the document
|
|
29
|
+
*/
|
|
30
|
+
export interface ChunkInfo {
|
|
31
|
+
/** Index of this chunk in the document (0-based) */
|
|
32
|
+
index: number;
|
|
33
|
+
/** Character offset where this chunk starts in the original document */
|
|
34
|
+
startOffset: number;
|
|
35
|
+
/** Character offset where this chunk ends in the original document */
|
|
36
|
+
endOffset: number;
|
|
37
|
+
/** Estimated token count of this chunk */
|
|
38
|
+
tokenCount: number;
|
|
39
|
+
/** The actual content of this chunk */
|
|
40
|
+
content: string;
|
|
41
|
+
/** Relevance score (0-10) assigned during scoring phase */
|
|
42
|
+
relevanceScore?: number;
|
|
43
|
+
/** Brief reason for the relevance score */
|
|
44
|
+
scoreReason?: string;
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Extraction phase states
|
|
48
|
+
*/
|
|
49
|
+
export type ExtractionPhase = "initializing" | "scoring" | "extracting" | "merging" | "complete" | "failed";
|
|
50
|
+
/**
|
|
51
|
+
* State of the extraction process (persisted to artifacts)
|
|
52
|
+
*/
|
|
53
|
+
export interface ExtractionState {
|
|
54
|
+
/** Session ID for this extraction */
|
|
55
|
+
sessionId: string;
|
|
56
|
+
/** Tool call ID that triggered this extraction */
|
|
57
|
+
toolCallId: string;
|
|
58
|
+
/** Name of the tool that produced the large response */
|
|
59
|
+
toolName: string;
|
|
60
|
+
/** Original token count of the tool response */
|
|
61
|
+
originalTokenCount: number;
|
|
62
|
+
/** Target token count for the extracted result */
|
|
63
|
+
targetTokens: number;
|
|
64
|
+
/** Key requirements identified for extraction */
|
|
65
|
+
keyRequirements: string;
|
|
66
|
+
/** All chunks of the document */
|
|
67
|
+
chunks: ChunkInfo[];
|
|
68
|
+
/** Current phase of extraction */
|
|
69
|
+
phase: ExtractionPhase;
|
|
70
|
+
/** Extracted content mapped by chunk index */
|
|
71
|
+
extractedContent: Record<number, string>;
|
|
72
|
+
/** Current iteration number */
|
|
73
|
+
currentIteration: number;
|
|
74
|
+
/** Maximum allowed iterations */
|
|
75
|
+
maxIterations: number;
|
|
76
|
+
/** Any errors encountered during extraction */
|
|
77
|
+
errors: string[];
|
|
78
|
+
/** Timestamp when extraction started */
|
|
79
|
+
startedAt: string;
|
|
80
|
+
/** Timestamp of last update */
|
|
81
|
+
updatedAt: string;
|
|
82
|
+
}
|
|
83
|
+
/**
|
|
84
|
+
* Result of the extraction process
|
|
85
|
+
*/
|
|
86
|
+
export interface ExtractorResult {
|
|
87
|
+
/** Whether extraction completed successfully */
|
|
88
|
+
success: boolean;
|
|
89
|
+
/** The extracted data (if successful) */
|
|
90
|
+
extractedData?: Record<string, unknown>;
|
|
91
|
+
/** Token count of the extracted data */
|
|
92
|
+
extractedTokens?: number;
|
|
93
|
+
/** Metadata about the extraction process */
|
|
94
|
+
metadata: {
|
|
95
|
+
/** Original token count */
|
|
96
|
+
originalTokens: number;
|
|
97
|
+
/** Target token count */
|
|
98
|
+
targetTokens: number;
|
|
99
|
+
/** Number of chunks processed */
|
|
100
|
+
chunksProcessed: number;
|
|
101
|
+
/** Number of chunks that had content extracted */
|
|
102
|
+
chunksExtractedFrom: number;
|
|
103
|
+
/** Number of iterations used */
|
|
104
|
+
iterationsUsed: number;
|
|
105
|
+
/** Final phase of the extraction */
|
|
106
|
+
phase: ExtractionPhase;
|
|
107
|
+
/** Average relevance score across all chunks */
|
|
108
|
+
averageRelevanceScore?: number;
|
|
109
|
+
};
|
|
110
|
+
/** Error message if extraction failed */
|
|
111
|
+
error?: string;
|
|
112
|
+
}
|
|
113
|
+
/**
|
|
114
|
+
* Progress callback for extraction phases
|
|
115
|
+
*/
|
|
116
|
+
export type OnProgress = (phase: ExtractionPhase, current: number, total: number, message?: string) => void;
|
|
117
|
+
/**
|
|
118
|
+
* Scoring result from the LLM
|
|
119
|
+
*/
|
|
120
|
+
export interface ScoringResult {
|
|
121
|
+
score: number;
|
|
122
|
+
reason: string;
|
|
123
|
+
}
|
|
124
|
+
/**
|
|
125
|
+
* Extraction result from the LLM for a single chunk
|
|
126
|
+
*/
|
|
127
|
+
export interface ChunkExtractionResult {
|
|
128
|
+
extracted: string;
|
|
129
|
+
keyFacts: string[];
|
|
130
|
+
}
|