@luckydraw/cumulus 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +148 -0
- package/dist/cli/cumulus.d.ts +3 -0
- package/dist/cli/cumulus.d.ts.map +1 -0
- package/dist/cli/cumulus.js +233 -0
- package/dist/cli/cumulus.js.map +1 -0
- package/dist/index.d.ts +33 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +43 -0
- package/dist/index.js.map +1 -0
- package/dist/lib/config.d.ts +86 -0
- package/dist/lib/config.d.ts.map +1 -0
- package/dist/lib/config.js +241 -0
- package/dist/lib/config.js.map +1 -0
- package/dist/lib/content-detector.d.ts +46 -0
- package/dist/lib/content-detector.d.ts.map +1 -0
- package/dist/lib/content-detector.js +359 -0
- package/dist/lib/content-detector.js.map +1 -0
- package/dist/lib/content-store.d.ts +255 -0
- package/dist/lib/content-store.d.ts.map +1 -0
- package/dist/lib/content-store.js +955 -0
- package/dist/lib/content-store.js.map +1 -0
- package/dist/lib/context-budget.d.ts +83 -0
- package/dist/lib/context-budget.d.ts.map +1 -0
- package/dist/lib/context-budget.js +101 -0
- package/dist/lib/context-budget.js.map +1 -0
- package/dist/lib/embeddings.d.ts +64 -0
- package/dist/lib/embeddings.d.ts.map +1 -0
- package/dist/lib/embeddings.js +176 -0
- package/dist/lib/embeddings.js.map +1 -0
- package/dist/lib/history.d.ts +120 -0
- package/dist/lib/history.d.ts.map +1 -0
- package/dist/lib/history.js +205 -0
- package/dist/lib/history.js.map +1 -0
- package/dist/lib/image-utils.d.ts +41 -0
- package/dist/lib/image-utils.d.ts.map +1 -0
- package/dist/lib/image-utils.js +288 -0
- package/dist/lib/image-utils.js.map +1 -0
- package/dist/lib/migrate.d.ts +35 -0
- package/dist/lib/migrate.d.ts.map +1 -0
- package/dist/lib/migrate.js +196 -0
- package/dist/lib/migrate.js.map +1 -0
- package/dist/lib/retriever.d.ts +56 -0
- package/dist/lib/retriever.d.ts.map +1 -0
- package/dist/lib/retriever.js +644 -0
- package/dist/lib/retriever.js.map +1 -0
- package/dist/lib/revert.d.ts +23 -0
- package/dist/lib/revert.d.ts.map +1 -0
- package/dist/lib/revert.js +75 -0
- package/dist/lib/revert.js.map +1 -0
- package/dist/lib/session.d.ts +65 -0
- package/dist/lib/session.d.ts.map +1 -0
- package/dist/lib/session.js +289 -0
- package/dist/lib/session.js.map +1 -0
- package/dist/lib/snapshots.d.ts +39 -0
- package/dist/lib/snapshots.d.ts.map +1 -0
- package/dist/lib/snapshots.js +99 -0
- package/dist/lib/snapshots.js.map +1 -0
- package/dist/lib/stream-processor.d.ts +149 -0
- package/dist/lib/stream-processor.d.ts.map +1 -0
- package/dist/lib/stream-processor.js +389 -0
- package/dist/lib/stream-processor.js.map +1 -0
- package/dist/lib/summarizer.d.ts +67 -0
- package/dist/lib/summarizer.d.ts.map +1 -0
- package/dist/lib/summarizer.js +213 -0
- package/dist/lib/summarizer.js.map +1 -0
- package/dist/mcp/index.d.ts +3 -0
- package/dist/mcp/index.d.ts.map +1 -0
- package/dist/mcp/index.js +16 -0
- package/dist/mcp/index.js.map +1 -0
- package/dist/mcp/proxy.d.ts +19 -0
- package/dist/mcp/proxy.d.ts.map +1 -0
- package/dist/mcp/proxy.js +120 -0
- package/dist/mcp/proxy.js.map +1 -0
- package/dist/mcp/server.d.ts +6 -0
- package/dist/mcp/server.d.ts.map +1 -0
- package/dist/mcp/server.js +29 -0
- package/dist/mcp/server.js.map +1 -0
- package/dist/mcp/shared-server.d.ts +21 -0
- package/dist/mcp/shared-server.d.ts.map +1 -0
- package/dist/mcp/shared-server.js +210 -0
- package/dist/mcp/shared-server.js.map +1 -0
- package/dist/mcp/tool-handler.d.ts +20 -0
- package/dist/mcp/tool-handler.d.ts.map +1 -0
- package/dist/mcp/tool-handler.js +1405 -0
- package/dist/mcp/tool-handler.js.map +1 -0
- package/dist/tui/components/App.d.ts +11 -0
- package/dist/tui/components/App.d.ts.map +1 -0
- package/dist/tui/components/App.js +607 -0
- package/dist/tui/components/App.js.map +1 -0
- package/dist/tui/components/DebugContextView.d.ts +13 -0
- package/dist/tui/components/DebugContextView.d.ts.map +1 -0
- package/dist/tui/components/DebugContextView.js +78 -0
- package/dist/tui/components/DebugContextView.js.map +1 -0
- package/dist/tui/components/IncludeMenu.d.ts +12 -0
- package/dist/tui/components/IncludeMenu.d.ts.map +1 -0
- package/dist/tui/components/IncludeMenu.js +127 -0
- package/dist/tui/components/IncludeMenu.js.map +1 -0
- package/dist/tui/components/InputArea.d.ts +27 -0
- package/dist/tui/components/InputArea.d.ts.map +1 -0
- package/dist/tui/components/InputArea.js +366 -0
- package/dist/tui/components/InputArea.js.map +1 -0
- package/dist/tui/components/MarkdownText.d.ts +38 -0
- package/dist/tui/components/MarkdownText.d.ts.map +1 -0
- package/dist/tui/components/MarkdownText.js +234 -0
- package/dist/tui/components/MarkdownText.js.map +1 -0
- package/dist/tui/components/MessageBubble.d.ts +11 -0
- package/dist/tui/components/MessageBubble.d.ts.map +1 -0
- package/dist/tui/components/MessageBubble.js +16 -0
- package/dist/tui/components/MessageBubble.js.map +1 -0
- package/dist/tui/components/MessageHistory.d.ts +11 -0
- package/dist/tui/components/MessageHistory.d.ts.map +1 -0
- package/dist/tui/components/MessageHistory.js +12 -0
- package/dist/tui/components/MessageHistory.js.map +1 -0
- package/dist/tui/components/RevertMenu.d.ts +17 -0
- package/dist/tui/components/RevertMenu.d.ts.map +1 -0
- package/dist/tui/components/RevertMenu.js +144 -0
- package/dist/tui/components/RevertMenu.js.map +1 -0
- package/dist/tui/components/StatusBar.d.ts +14 -0
- package/dist/tui/components/StatusBar.d.ts.map +1 -0
- package/dist/tui/components/StatusBar.js +13 -0
- package/dist/tui/components/StatusBar.js.map +1 -0
- package/dist/tui/components/StreamingResponse.d.ts +15 -0
- package/dist/tui/components/StreamingResponse.d.ts.map +1 -0
- package/dist/tui/components/StreamingResponse.js +52 -0
- package/dist/tui/components/StreamingResponse.js.map +1 -0
- package/dist/tui/hooks/useAppState.d.ts +147 -0
- package/dist/tui/hooks/useAppState.d.ts.map +1 -0
- package/dist/tui/hooks/useAppState.js +110 -0
- package/dist/tui/hooks/useAppState.js.map +1 -0
- package/dist/tui/hooks/useClaudeProcess.d.ts +19 -0
- package/dist/tui/hooks/useClaudeProcess.d.ts.map +1 -0
- package/dist/tui/hooks/useClaudeProcess.js +185 -0
- package/dist/tui/hooks/useClaudeProcess.js.map +1 -0
- package/dist/tui/index.d.ts +10 -0
- package/dist/tui/index.d.ts.map +1 -0
- package/dist/tui/index.js +11 -0
- package/dist/tui/index.js.map +1 -0
- package/dist/tui/utils/streamParser.d.ts +31 -0
- package/dist/tui/utils/streamParser.d.ts.map +1 -0
- package/dist/tui/utils/streamParser.js +63 -0
- package/dist/tui/utils/streamParser.js.map +1 -0
- package/package.json +94 -0
|
@@ -0,0 +1,1405 @@
|
|
|
1
|
+
import { spawn } from 'child_process';
|
|
2
|
+
import * as fs from 'fs/promises';
|
|
3
|
+
import * as path from 'path';
|
|
4
|
+
import { cosineSimilarity, embeddingsAvailable, generateMissingEmbeddings, getEmbeddingProvider, loadEmbeddings, } from '../lib/embeddings.js';
|
|
5
|
+
import { SessionManager } from '../lib/session.js';
|
|
6
|
+
import { getSummary } from '../lib/summarizer.js';
|
|
7
|
+
// Sub-query configuration
|
|
8
|
+
const MAX_SUB_QUERIES_PER_REQUEST = 10;
|
|
9
|
+
const SUB_QUERY_TIMEOUT_MS = 60000; // 60 seconds for CLI
|
|
10
|
+
// Track per-request sub_query usage
|
|
11
|
+
const requestStates = new Map();
|
|
12
|
+
// Clean up old request states periodically (every 5 minutes)
|
|
13
|
+
setInterval(() => {
|
|
14
|
+
const fiveMinutesAgo = Date.now() - 5 * 60 * 1000;
|
|
15
|
+
for (const [requestId, state] of requestStates.entries()) {
|
|
16
|
+
if (state.timestamp < fiveMinutesAgo) {
|
|
17
|
+
requestStates.delete(requestId);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
}, 5 * 60 * 1000);
|
|
21
|
+
/**
|
|
22
|
+
* Execute a sub-query using the claude CLI.
|
|
23
|
+
* Uses --print mode with haiku model for cost efficiency.
|
|
24
|
+
*/
|
|
25
|
+
async function handleSubQuery(params, requestId) {
|
|
26
|
+
// Check rate limit
|
|
27
|
+
const state = requestStates.get(requestId) || { subQueryCount: 0, timestamp: Date.now() };
|
|
28
|
+
if (state.subQueryCount >= MAX_SUB_QUERIES_PER_REQUEST) {
|
|
29
|
+
throw new Error(`Maximum sub_query calls (${MAX_SUB_QUERIES_PER_REQUEST}) exceeded for this request`);
|
|
30
|
+
}
|
|
31
|
+
// Increment counter
|
|
32
|
+
state.subQueryCount++;
|
|
33
|
+
state.timestamp = Date.now();
|
|
34
|
+
requestStates.set(requestId, state);
|
|
35
|
+
// Construct the prompt for Claude
|
|
36
|
+
const fullPrompt = `<context>
|
|
37
|
+
${params.context}
|
|
38
|
+
</context>
|
|
39
|
+
|
|
40
|
+
Based on the context above, answer this question concisely:
|
|
41
|
+
${params.prompt}`;
|
|
42
|
+
// Build args for claude CLI
|
|
43
|
+
const modelFlag = params.model === 'sonnet' ? 'sonnet' : 'haiku';
|
|
44
|
+
const args = ['--print', '--model', modelFlag, fullPrompt];
|
|
45
|
+
return new Promise((resolve, reject) => {
|
|
46
|
+
// Filter out Claude Code env vars so child process is isolated
|
|
47
|
+
const cleanEnv = Object.fromEntries(Object.entries(process.env).filter(([key]) => !key.startsWith('CLAUDE') && key !== 'CLAUDECODE'));
|
|
48
|
+
const claude = spawn('claude', args, {
|
|
49
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
50
|
+
env: cleanEnv,
|
|
51
|
+
});
|
|
52
|
+
let stdout = '';
|
|
53
|
+
let stderr = '';
|
|
54
|
+
const timeout = setTimeout(() => {
|
|
55
|
+
claude.kill();
|
|
56
|
+
reject(new Error('sub_query timed out after 60 seconds'));
|
|
57
|
+
}, SUB_QUERY_TIMEOUT_MS);
|
|
58
|
+
claude.stdout.on('data', (data) => {
|
|
59
|
+
stdout += data.toString();
|
|
60
|
+
});
|
|
61
|
+
claude.stderr.on('data', (data) => {
|
|
62
|
+
stderr += data.toString();
|
|
63
|
+
});
|
|
64
|
+
claude.on('close', code => {
|
|
65
|
+
clearTimeout(timeout);
|
|
66
|
+
if (code === 0) {
|
|
67
|
+
resolve({ response: stdout.trim() });
|
|
68
|
+
}
|
|
69
|
+
else {
|
|
70
|
+
reject(new Error(`claude exited with code ${code}: ${stderr}`));
|
|
71
|
+
}
|
|
72
|
+
});
|
|
73
|
+
claude.on('error', err => {
|
|
74
|
+
clearTimeout(timeout);
|
|
75
|
+
if (err.message.includes('ENOENT')) {
|
|
76
|
+
reject(new Error('Claude CLI not found. Please install it first.'));
|
|
77
|
+
}
|
|
78
|
+
else {
|
|
79
|
+
reject(err);
|
|
80
|
+
}
|
|
81
|
+
});
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
const SNIPPET_LENGTH = 200;
|
|
85
|
+
const MAX_READ_MESSAGES = 50;
|
|
86
|
+
const DEFAULT_PEEK_COUNT = 5;
|
|
87
|
+
const MAX_PEEK_COUNT = 20;
|
|
88
|
+
const DEFAULT_SEARCH_LIMIT = 10;
|
|
89
|
+
function createSnippet(content, maxLength = SNIPPET_LENGTH) {
|
|
90
|
+
if (content.length <= maxLength) {
|
|
91
|
+
return content;
|
|
92
|
+
}
|
|
93
|
+
return content.slice(0, maxLength) + '...';
|
|
94
|
+
}
|
|
95
|
+
function timestampToISO(timestamp) {
|
|
96
|
+
return new Date(timestamp).toISOString();
|
|
97
|
+
}
|
|
98
|
+
function messageToSearchResult(msg, index) {
|
|
99
|
+
const result = {
|
|
100
|
+
id: msg.id,
|
|
101
|
+
timestamp: timestampToISO(msg.timestamp),
|
|
102
|
+
role: msg.role,
|
|
103
|
+
content_snippet: createSnippet(msg.content),
|
|
104
|
+
index,
|
|
105
|
+
};
|
|
106
|
+
if (msg.metadata?.sessionId) {
|
|
107
|
+
result.sessionId = msg.metadata.sessionId;
|
|
108
|
+
}
|
|
109
|
+
return result;
|
|
110
|
+
}
|
|
111
|
+
function messageToResult(msg, index) {
|
|
112
|
+
const result = {
|
|
113
|
+
id: msg.id,
|
|
114
|
+
timestamp: timestampToISO(msg.timestamp),
|
|
115
|
+
role: msg.role,
|
|
116
|
+
content: msg.content,
|
|
117
|
+
index,
|
|
118
|
+
};
|
|
119
|
+
if (msg.metadata?.sessionId) {
|
|
120
|
+
result.sessionId = msg.metadata.sessionId;
|
|
121
|
+
}
|
|
122
|
+
return result;
|
|
123
|
+
}
|
|
124
|
+
export function getToolDefinitions() {
|
|
125
|
+
return [
|
|
126
|
+
{
|
|
127
|
+
name: 'search_history',
|
|
128
|
+
description: 'Search conversation history for relevant messages. Supports keyword, semantic, and hybrid search modes.',
|
|
129
|
+
inputSchema: {
|
|
130
|
+
type: 'object',
|
|
131
|
+
properties: {
|
|
132
|
+
query: {
|
|
133
|
+
type: 'string',
|
|
134
|
+
description: 'Search query',
|
|
135
|
+
},
|
|
136
|
+
limit: {
|
|
137
|
+
type: 'number',
|
|
138
|
+
description: 'Maximum number of results (default: 10)',
|
|
139
|
+
},
|
|
140
|
+
mode: {
|
|
141
|
+
type: 'string',
|
|
142
|
+
enum: ['keyword', 'semantic', 'hybrid'],
|
|
143
|
+
description: "Search mode: 'keyword' (exact substring), 'semantic' (meaning-based), 'hybrid' (combined). Default: hybrid",
|
|
144
|
+
},
|
|
145
|
+
},
|
|
146
|
+
required: ['query'],
|
|
147
|
+
},
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
name: 'read_messages',
|
|
151
|
+
description: 'Read full message content for a range of history indices. Use after search_history to get complete message content.',
|
|
152
|
+
inputSchema: {
|
|
153
|
+
type: 'object',
|
|
154
|
+
properties: {
|
|
155
|
+
startIndex: {
|
|
156
|
+
type: 'number',
|
|
157
|
+
description: 'Start index (0-indexed, inclusive)',
|
|
158
|
+
},
|
|
159
|
+
endIndex: {
|
|
160
|
+
type: 'number',
|
|
161
|
+
description: 'End index (inclusive)',
|
|
162
|
+
},
|
|
163
|
+
},
|
|
164
|
+
required: ['startIndex', 'endIndex'],
|
|
165
|
+
},
|
|
166
|
+
},
|
|
167
|
+
{
|
|
168
|
+
name: 'peek_recent',
|
|
169
|
+
description: 'Get the most recent messages from conversation history. Useful for understanding current context.',
|
|
170
|
+
inputSchema: {
|
|
171
|
+
type: 'object',
|
|
172
|
+
properties: {
|
|
173
|
+
count: {
|
|
174
|
+
type: 'number',
|
|
175
|
+
description: 'Number of recent messages to retrieve (default: 5, max: 20)',
|
|
176
|
+
},
|
|
177
|
+
},
|
|
178
|
+
required: [],
|
|
179
|
+
},
|
|
180
|
+
},
|
|
181
|
+
{
|
|
182
|
+
name: 'get_history_stats',
|
|
183
|
+
description: 'Get statistics about the conversation history including message count, token estimate, and time range.',
|
|
184
|
+
inputSchema: {
|
|
185
|
+
type: 'object',
|
|
186
|
+
properties: {},
|
|
187
|
+
required: [],
|
|
188
|
+
},
|
|
189
|
+
},
|
|
190
|
+
{
|
|
191
|
+
name: 'sub_query',
|
|
192
|
+
description: 'Make a recursive LLM call to reason over a specific context snippet. Use this to extract focused answers from large contexts (like conversation history) without consuming main context window. Defaults to fast/cheap haiku model.',
|
|
193
|
+
inputSchema: {
|
|
194
|
+
type: 'object',
|
|
195
|
+
properties: {
|
|
196
|
+
prompt: {
|
|
197
|
+
type: 'string',
|
|
198
|
+
description: 'The question to answer based on the provided context',
|
|
199
|
+
},
|
|
200
|
+
context: {
|
|
201
|
+
type: 'string',
|
|
202
|
+
description: 'The context snippet (conversation history, documents, etc.) to reason over',
|
|
203
|
+
},
|
|
204
|
+
model: {
|
|
205
|
+
type: 'string',
|
|
206
|
+
enum: ['haiku', 'sonnet'],
|
|
207
|
+
description: "Model for sub-query. Default: 'haiku' (fast/cheap). Use 'sonnet' for complex reasoning.",
|
|
208
|
+
},
|
|
209
|
+
},
|
|
210
|
+
required: ['prompt', 'context'],
|
|
211
|
+
},
|
|
212
|
+
},
|
|
213
|
+
{
|
|
214
|
+
name: 'get_summary',
|
|
215
|
+
description: "Get a summary of conversation history. Use 'recent' for the latest chunk, 'full' for overall summary, or 'range' for specific message indices. Summaries are auto-generated every 50 messages.",
|
|
216
|
+
inputSchema: {
|
|
217
|
+
type: 'object',
|
|
218
|
+
properties: {
|
|
219
|
+
scope: {
|
|
220
|
+
type: 'string',
|
|
221
|
+
enum: ['recent', 'full', 'range'],
|
|
222
|
+
description: "Summary scope: 'recent' (last chunk), 'full' (entire conversation), 'range' (specific messages). Default: recent",
|
|
223
|
+
},
|
|
224
|
+
startIndex: {
|
|
225
|
+
type: 'number',
|
|
226
|
+
description: "Starting message index (only for scope='range')",
|
|
227
|
+
},
|
|
228
|
+
endIndex: {
|
|
229
|
+
type: 'number',
|
|
230
|
+
description: "Ending message index (only for scope='range')",
|
|
231
|
+
},
|
|
232
|
+
},
|
|
233
|
+
required: [],
|
|
234
|
+
},
|
|
235
|
+
},
|
|
236
|
+
// Content retrieval tools (for externalized content)
|
|
237
|
+
{
|
|
238
|
+
name: 'retrieve_content',
|
|
239
|
+
description: 'Retrieve externally stored content by reference ID. Use when you see [STORED:xxx] references in tool results.',
|
|
240
|
+
inputSchema: {
|
|
241
|
+
type: 'object',
|
|
242
|
+
properties: {
|
|
243
|
+
contentId: {
|
|
244
|
+
type: 'string',
|
|
245
|
+
description: 'The content reference ID (e.g., cnt_abc123)',
|
|
246
|
+
},
|
|
247
|
+
chunkIndex: {
|
|
248
|
+
type: 'number',
|
|
249
|
+
description: 'Optional: retrieve only a specific chunk (0-indexed)',
|
|
250
|
+
},
|
|
251
|
+
},
|
|
252
|
+
required: ['contentId'],
|
|
253
|
+
},
|
|
254
|
+
},
|
|
255
|
+
{
|
|
256
|
+
name: 'search_content',
|
|
257
|
+
description: 'Search across all externally stored content (file reads, command outputs, web fetches, etc.).',
|
|
258
|
+
inputSchema: {
|
|
259
|
+
type: 'object',
|
|
260
|
+
properties: {
|
|
261
|
+
query: {
|
|
262
|
+
type: 'string',
|
|
263
|
+
description: 'Search query',
|
|
264
|
+
},
|
|
265
|
+
sourceTypes: {
|
|
266
|
+
type: 'array',
|
|
267
|
+
items: { type: 'string' },
|
|
268
|
+
description: "Filter by source type: 'file_read', 'bash_output', 'web_fetch', 'user_input', 'tool_result'",
|
|
269
|
+
},
|
|
270
|
+
limit: {
|
|
271
|
+
type: 'number',
|
|
272
|
+
description: 'Maximum number of results (default: 10)',
|
|
273
|
+
},
|
|
274
|
+
},
|
|
275
|
+
required: ['query'],
|
|
276
|
+
},
|
|
277
|
+
},
|
|
278
|
+
{
|
|
279
|
+
name: 'list_stored_content',
|
|
280
|
+
description: 'List all externally stored content with summaries. Use to see what large content is available.',
|
|
281
|
+
inputSchema: {
|
|
282
|
+
type: 'object',
|
|
283
|
+
properties: {
|
|
284
|
+
limit: {
|
|
285
|
+
type: 'number',
|
|
286
|
+
description: 'Maximum number of items to return (default: 20)',
|
|
287
|
+
},
|
|
288
|
+
sourceTypes: {
|
|
289
|
+
type: 'array',
|
|
290
|
+
items: { type: 'string' },
|
|
291
|
+
description: 'Filter by source type',
|
|
292
|
+
},
|
|
293
|
+
},
|
|
294
|
+
required: [],
|
|
295
|
+
},
|
|
296
|
+
},
|
|
297
|
+
{
|
|
298
|
+
name: 'read_content_chunk',
|
|
299
|
+
description: 'Read a specific chunk of stored content. Use for navigating through large content piece by piece.',
|
|
300
|
+
inputSchema: {
|
|
301
|
+
type: 'object',
|
|
302
|
+
properties: {
|
|
303
|
+
contentId: {
|
|
304
|
+
type: 'string',
|
|
305
|
+
description: 'The content reference ID',
|
|
306
|
+
},
|
|
307
|
+
chunkIndex: {
|
|
308
|
+
type: 'number',
|
|
309
|
+
description: 'The chunk index to read (0-indexed)',
|
|
310
|
+
},
|
|
311
|
+
},
|
|
312
|
+
required: ['contentId', 'chunkIndex'],
|
|
313
|
+
},
|
|
314
|
+
},
|
|
315
|
+
{
|
|
316
|
+
name: 'forget_content',
|
|
317
|
+
description: 'Permanently delete stored content that should be forgotten. Use when the user says content was incorrect, outdated, or should no longer be referenced. This removes it from storage and search results.',
|
|
318
|
+
inputSchema: {
|
|
319
|
+
type: 'object',
|
|
320
|
+
properties: {
|
|
321
|
+
contentId: {
|
|
322
|
+
type: 'string',
|
|
323
|
+
description: 'The content reference ID to forget (e.g., cnt_abc123). Get this from list_stored_content or search_content.',
|
|
324
|
+
},
|
|
325
|
+
reason: {
|
|
326
|
+
type: 'string',
|
|
327
|
+
description: 'Brief reason for forgetting (for logging)',
|
|
328
|
+
},
|
|
329
|
+
},
|
|
330
|
+
required: ['contentId'],
|
|
331
|
+
},
|
|
332
|
+
},
|
|
333
|
+
// File read and content storage tools
|
|
334
|
+
{
|
|
335
|
+
name: 'read_file',
|
|
336
|
+
description: 'CRITICAL: This tool REPLACES the built-in Read tool. You MUST use this for ALL file reads — text, code, PDFs. The built-in Read tool is DISABLED in this environment. read_file stores content in a vector database for persistent retrieval across sessions. Using the built-in Read tool bypasses storage and loses all context permanently. Returns a summary, content ID, and chunk table-of-contents. Use read_content_chunk to access specific chunks.',
|
|
337
|
+
inputSchema: {
|
|
338
|
+
type: 'object',
|
|
339
|
+
properties: {
|
|
340
|
+
file_path: {
|
|
341
|
+
type: 'string',
|
|
342
|
+
description: 'Absolute path to the file to read',
|
|
343
|
+
},
|
|
344
|
+
encoding: {
|
|
345
|
+
type: 'string',
|
|
346
|
+
description: "File encoding for text files (default: 'utf-8'). Ignored for PDFs.",
|
|
347
|
+
},
|
|
348
|
+
},
|
|
349
|
+
required: ['file_path'],
|
|
350
|
+
},
|
|
351
|
+
},
|
|
352
|
+
{
|
|
353
|
+
name: 'store_content',
|
|
354
|
+
description: 'Store arbitrary text content in the content store for future retrieval. Use this AFTER reading PDFs or images with the built-in Read tool — pass the extracted text so it gets chunked, indexed, and searchable in future sessions.',
|
|
355
|
+
inputSchema: {
|
|
356
|
+
type: 'object',
|
|
357
|
+
properties: {
|
|
358
|
+
content: {
|
|
359
|
+
type: 'string',
|
|
360
|
+
description: 'The text content to store',
|
|
361
|
+
},
|
|
362
|
+
source: {
|
|
363
|
+
type: 'string',
|
|
364
|
+
description: 'Description of where this content came from (e.g., "PDF: /path/to/file.pdf pages 1-20")',
|
|
365
|
+
},
|
|
366
|
+
file_path: {
|
|
367
|
+
type: 'string',
|
|
368
|
+
description: 'Optional file path for metadata (if content came from a file)',
|
|
369
|
+
},
|
|
370
|
+
},
|
|
371
|
+
required: ['content'],
|
|
372
|
+
},
|
|
373
|
+
},
|
|
374
|
+
{
|
|
375
|
+
name: 'detect_anomalies',
|
|
376
|
+
description: "Analyze stored content for anomalous chunks that don't belong in their surrounding context. Uses embedding-based rolling window coherence scoring. Useful for finding out-of-place content, corruption, or misplaced sections.",
|
|
377
|
+
inputSchema: {
|
|
378
|
+
type: 'object',
|
|
379
|
+
properties: {
|
|
380
|
+
contentId: {
|
|
381
|
+
type: 'string',
|
|
382
|
+
description: 'The stored content ID to analyze',
|
|
383
|
+
},
|
|
384
|
+
threshold: {
|
|
385
|
+
type: 'number',
|
|
386
|
+
description: 'Override the adaptive threshold (0-1). If not set, uses max(mean - 2σ, 0.3)',
|
|
387
|
+
},
|
|
388
|
+
},
|
|
389
|
+
required: ['contentId'],
|
|
390
|
+
},
|
|
391
|
+
},
|
|
392
|
+
];
|
|
393
|
+
}
|
|
394
|
+
export async function handleToolCall(name, args, ctx) {
|
|
395
|
+
const { historyStore, contentStore, sessionsPath, currentSessionId } = ctx;
|
|
396
|
+
try {
|
|
397
|
+
switch (name) {
|
|
398
|
+
case 'search_history': {
|
|
399
|
+
const query = args?.query;
|
|
400
|
+
const limit = Math.max(1, args?.limit || DEFAULT_SEARCH_LIMIT);
|
|
401
|
+
const mode = args?.mode || 'hybrid';
|
|
402
|
+
if (!query || typeof query !== 'string') {
|
|
403
|
+
return {
|
|
404
|
+
content: [
|
|
405
|
+
{
|
|
406
|
+
type: 'text',
|
|
407
|
+
text: JSON.stringify({ error: 'query parameter is required' }),
|
|
408
|
+
},
|
|
409
|
+
],
|
|
410
|
+
};
|
|
411
|
+
}
|
|
412
|
+
const allMsgs = await historyStore.getAll();
|
|
413
|
+
// Exclude session docs from message search (they're searched separately)
|
|
414
|
+
const messages = allMsgs.filter(m => m.role !== 'session');
|
|
415
|
+
const queryLower = query.toLowerCase();
|
|
416
|
+
const queryTerms = queryLower.split(/\s+/);
|
|
417
|
+
// Keyword search scores
|
|
418
|
+
const keywordScores = new Map();
|
|
419
|
+
for (let i = 0; i < messages.length; i++) {
|
|
420
|
+
const msg = messages[i];
|
|
421
|
+
if (!msg)
|
|
422
|
+
continue;
|
|
423
|
+
const contentLower = msg.content.toLowerCase();
|
|
424
|
+
let score = 0;
|
|
425
|
+
for (const term of queryTerms) {
|
|
426
|
+
if (contentLower.includes(term)) {
|
|
427
|
+
// Boost for exact phrase match
|
|
428
|
+
if (contentLower.includes(queryLower)) {
|
|
429
|
+
score += 2;
|
|
430
|
+
}
|
|
431
|
+
else {
|
|
432
|
+
score += 1;
|
|
433
|
+
}
|
|
434
|
+
}
|
|
435
|
+
}
|
|
436
|
+
// Normalize to 0-1
|
|
437
|
+
if (score > 0) {
|
|
438
|
+
keywordScores.set(i, Math.min(score / queryTerms.length, 1));
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
// Semantic search scores (if available and mode requires it)
|
|
442
|
+
const semanticScores = new Map();
|
|
443
|
+
const useSemanticSearch = mode !== 'keyword' && (await embeddingsAvailable());
|
|
444
|
+
if (useSemanticSearch) {
|
|
445
|
+
try {
|
|
446
|
+
// Generate missing embeddings first
|
|
447
|
+
await generateMissingEmbeddings(historyStore.threadPath, messages.map(m => ({ id: m.id, content: m.content })));
|
|
448
|
+
// Load embeddings and generate query embedding
|
|
449
|
+
const embeddings = await loadEmbeddings(historyStore.threadPath);
|
|
450
|
+
const provider = getEmbeddingProvider();
|
|
451
|
+
const [queryEmbedding] = await provider.embed([query]);
|
|
452
|
+
// Calculate similarity for each message
|
|
453
|
+
for (let i = 0; i < messages.length; i++) {
|
|
454
|
+
const msg = messages[i];
|
|
455
|
+
if (!msg)
|
|
456
|
+
continue;
|
|
457
|
+
const embedding = embeddings.get(msg.id);
|
|
458
|
+
if (!embedding || !queryEmbedding)
|
|
459
|
+
continue;
|
|
460
|
+
const similarity = cosineSimilarity(queryEmbedding, embedding);
|
|
461
|
+
if (similarity > 0.3) {
|
|
462
|
+
// Threshold for relevance
|
|
463
|
+
semanticScores.set(i, similarity);
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
catch {
|
|
468
|
+
// Fall back to keyword-only if embeddings fail
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
const combined = [];
|
|
472
|
+
const allIndices = new Set([...keywordScores.keys(), ...semanticScores.keys()]);
|
|
473
|
+
for (const index of allIndices) {
|
|
474
|
+
const kwScore = keywordScores.get(index) ?? 0;
|
|
475
|
+
const semScore = semanticScores.get(index) ?? 0;
|
|
476
|
+
let finalScore;
|
|
477
|
+
if (mode === 'keyword') {
|
|
478
|
+
finalScore = kwScore;
|
|
479
|
+
}
|
|
480
|
+
else if (mode === 'semantic') {
|
|
481
|
+
finalScore = semScore;
|
|
482
|
+
}
|
|
483
|
+
else {
|
|
484
|
+
// hybrid: combine with weights (semantic weighted higher)
|
|
485
|
+
finalScore = kwScore * 0.3 + semScore * 0.7;
|
|
486
|
+
// Boost if both match
|
|
487
|
+
if (kwScore > 0 && semScore > 0) {
|
|
488
|
+
finalScore *= 1.2;
|
|
489
|
+
}
|
|
490
|
+
}
|
|
491
|
+
if (finalScore > 0) {
|
|
492
|
+
combined.push({
|
|
493
|
+
index,
|
|
494
|
+
score: finalScore,
|
|
495
|
+
keywordScore: kwScore > 0 ? kwScore : undefined,
|
|
496
|
+
semanticScore: semScore > 0 ? semScore : undefined,
|
|
497
|
+
});
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
// Sort by score descending, then by index descending (most recent first)
|
|
501
|
+
combined.sort((a, b) => {
|
|
502
|
+
if (b.score !== a.score) {
|
|
503
|
+
return b.score - a.score;
|
|
504
|
+
}
|
|
505
|
+
return b.index - a.index; // Tiebreaker: more recent first
|
|
506
|
+
});
|
|
507
|
+
// Build message results
|
|
508
|
+
const results = [];
|
|
509
|
+
for (const scored of combined.slice(0, limit)) {
|
|
510
|
+
const msg = messages[scored.index];
|
|
511
|
+
if (msg) {
|
|
512
|
+
results.push({
|
|
513
|
+
...messageToSearchResult(msg, scored.index),
|
|
514
|
+
score: Math.round(scored.score * 100) / 100,
|
|
515
|
+
});
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
// Search session documents if available
|
|
519
|
+
if (sessionsPath && useSemanticSearch) {
|
|
520
|
+
try {
|
|
521
|
+
const provider = getEmbeddingProvider();
|
|
522
|
+
const [queryEmbedding] = await provider.embed([query]);
|
|
523
|
+
if (queryEmbedding) {
|
|
524
|
+
const sessionResults = await SessionManager.searchAllSessions(sessionsPath, queryEmbedding, currentSessionId ?? '', 0.3);
|
|
525
|
+
for (const sr of sessionResults.slice(0, 3)) {
|
|
526
|
+
results.push({
|
|
527
|
+
id: `${sr.sessionId}:chunk${sr.chunkIndex}`,
|
|
528
|
+
timestamp: new Date().toISOString(),
|
|
529
|
+
role: 'session',
|
|
530
|
+
content_snippet: sr.content_snippet,
|
|
531
|
+
index: -1,
|
|
532
|
+
sessionId: sr.sessionId,
|
|
533
|
+
score: Math.round(sr.score * 100) / 100,
|
|
534
|
+
});
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
}
|
|
538
|
+
catch {
|
|
539
|
+
// Session search failure is non-fatal
|
|
540
|
+
}
|
|
541
|
+
}
|
|
542
|
+
return {
|
|
543
|
+
content: [
|
|
544
|
+
{
|
|
545
|
+
type: 'text',
|
|
546
|
+
text: JSON.stringify(results),
|
|
547
|
+
},
|
|
548
|
+
],
|
|
549
|
+
};
|
|
550
|
+
}
|
|
551
|
+
case 'read_messages': {
|
|
552
|
+
const startIndex = args?.startIndex;
|
|
553
|
+
const endIndex = args?.endIndex;
|
|
554
|
+
if (typeof startIndex !== 'number' || typeof endIndex !== 'number') {
|
|
555
|
+
return {
|
|
556
|
+
content: [
|
|
557
|
+
{
|
|
558
|
+
type: 'text',
|
|
559
|
+
text: JSON.stringify({ error: 'startIndex and endIndex are required' }),
|
|
560
|
+
},
|
|
561
|
+
],
|
|
562
|
+
};
|
|
563
|
+
}
|
|
564
|
+
if (startIndex > endIndex) {
|
|
565
|
+
return {
|
|
566
|
+
content: [
|
|
567
|
+
{
|
|
568
|
+
type: 'text',
|
|
569
|
+
text: JSON.stringify([]),
|
|
570
|
+
},
|
|
571
|
+
],
|
|
572
|
+
};
|
|
573
|
+
}
|
|
574
|
+
const messages = await historyStore.getAll();
|
|
575
|
+
const clampedStart = Math.max(0, startIndex);
|
|
576
|
+
const clampedEnd = Math.min(messages.length - 1, endIndex);
|
|
577
|
+
// Limit range to MAX_READ_MESSAGES
|
|
578
|
+
const effectiveEnd = Math.min(clampedEnd, clampedStart + MAX_READ_MESSAGES - 1);
|
|
579
|
+
const results = [];
|
|
580
|
+
for (let i = clampedStart; i <= effectiveEnd && i < messages.length; i++) {
|
|
581
|
+
const msg = messages[i];
|
|
582
|
+
if (msg) {
|
|
583
|
+
results.push(messageToResult(msg, i));
|
|
584
|
+
}
|
|
585
|
+
}
|
|
586
|
+
return {
|
|
587
|
+
content: [
|
|
588
|
+
{
|
|
589
|
+
type: 'text',
|
|
590
|
+
text: JSON.stringify(results),
|
|
591
|
+
},
|
|
592
|
+
],
|
|
593
|
+
};
|
|
594
|
+
}
|
|
595
|
+
case 'peek_recent': {
|
|
596
|
+
const rawCount = args?.count || DEFAULT_PEEK_COUNT;
|
|
597
|
+
const count = Math.max(1, Math.min(MAX_PEEK_COUNT, rawCount));
|
|
598
|
+
const allMessages = await historyStore.getAll();
|
|
599
|
+
// Filter out session documents — peek is for conversation messages only
|
|
600
|
+
const messages = allMessages.filter(m => m.role !== 'session');
|
|
601
|
+
const startIndex = Math.max(0, messages.length - count);
|
|
602
|
+
const results = [];
|
|
603
|
+
for (let i = startIndex; i < messages.length; i++) {
|
|
604
|
+
const msg = messages[i];
|
|
605
|
+
if (msg) {
|
|
606
|
+
// Use original index from allMessages for consistency
|
|
607
|
+
const originalIndex = allMessages.indexOf(msg);
|
|
608
|
+
results.push(messageToResult(msg, originalIndex >= 0 ? originalIndex : i));
|
|
609
|
+
}
|
|
610
|
+
}
|
|
611
|
+
return {
|
|
612
|
+
content: [
|
|
613
|
+
{
|
|
614
|
+
type: 'text',
|
|
615
|
+
text: JSON.stringify(results),
|
|
616
|
+
},
|
|
617
|
+
],
|
|
618
|
+
};
|
|
619
|
+
}
|
|
620
|
+
case 'get_history_stats': {
|
|
621
|
+
const stats = await historyStore.getStats();
|
|
622
|
+
const result = {
|
|
623
|
+
messageCount: stats.count,
|
|
624
|
+
totalTokens: stats.totalTokens,
|
|
625
|
+
oldestTimestamp: stats.oldestTimestamp ? timestampToISO(stats.oldestTimestamp) : null,
|
|
626
|
+
newestTimestamp: stats.newestTimestamp ? timestampToISO(stats.newestTimestamp) : null,
|
|
627
|
+
};
|
|
628
|
+
return {
|
|
629
|
+
content: [
|
|
630
|
+
{
|
|
631
|
+
type: 'text',
|
|
632
|
+
text: JSON.stringify(result),
|
|
633
|
+
},
|
|
634
|
+
],
|
|
635
|
+
};
|
|
636
|
+
}
|
|
637
|
+
case 'sub_query': {
|
|
638
|
+
const prompt = args?.prompt;
|
|
639
|
+
const context = args?.context;
|
|
640
|
+
const model = args?.model;
|
|
641
|
+
if (!prompt || typeof prompt !== 'string') {
|
|
642
|
+
return {
|
|
643
|
+
content: [
|
|
644
|
+
{
|
|
645
|
+
type: 'text',
|
|
646
|
+
text: JSON.stringify({ error: 'prompt parameter is required' }),
|
|
647
|
+
},
|
|
648
|
+
],
|
|
649
|
+
isError: true,
|
|
650
|
+
};
|
|
651
|
+
}
|
|
652
|
+
if (!context || typeof context !== 'string') {
|
|
653
|
+
return {
|
|
654
|
+
content: [
|
|
655
|
+
{
|
|
656
|
+
type: 'text',
|
|
657
|
+
text: JSON.stringify({ error: 'context parameter is required' }),
|
|
658
|
+
},
|
|
659
|
+
],
|
|
660
|
+
isError: true,
|
|
661
|
+
};
|
|
662
|
+
}
|
|
663
|
+
// Generate request ID for rate limiting (use timestamp-based for now)
|
|
664
|
+
const requestId = `req_${Date.now()}`;
|
|
665
|
+
const subQueryResult = await handleSubQuery({ prompt, context, model }, requestId);
|
|
666
|
+
return {
|
|
667
|
+
content: [
|
|
668
|
+
{
|
|
669
|
+
type: 'text',
|
|
670
|
+
text: JSON.stringify(subQueryResult, null, 2),
|
|
671
|
+
},
|
|
672
|
+
],
|
|
673
|
+
};
|
|
674
|
+
}
|
|
675
|
+
case 'get_summary': {
|
|
676
|
+
const scope = args?.scope || 'recent';
|
|
677
|
+
const summaryStartIndex = args?.startIndex;
|
|
678
|
+
const summaryEndIndex = args?.endIndex;
|
|
679
|
+
// Check if range scope has required parameters
|
|
680
|
+
if (scope === 'range') {
|
|
681
|
+
if (summaryStartIndex === undefined || summaryEndIndex === undefined) {
|
|
682
|
+
return {
|
|
683
|
+
content: [
|
|
684
|
+
{
|
|
685
|
+
type: 'text',
|
|
686
|
+
text: JSON.stringify({
|
|
687
|
+
error: "startIndex and endIndex are required for scope='range'",
|
|
688
|
+
}),
|
|
689
|
+
},
|
|
690
|
+
],
|
|
691
|
+
isError: true,
|
|
692
|
+
};
|
|
693
|
+
}
|
|
694
|
+
}
|
|
695
|
+
const result = await getSummary(historyStore.threadPath, scope, summaryStartIndex, summaryEndIndex);
|
|
696
|
+
if (!result) {
|
|
697
|
+
return {
|
|
698
|
+
content: [
|
|
699
|
+
{
|
|
700
|
+
type: 'text',
|
|
701
|
+
text: JSON.stringify({
|
|
702
|
+
message: 'No summaries available yet. Summaries are generated after 50+ messages.',
|
|
703
|
+
}),
|
|
704
|
+
},
|
|
705
|
+
],
|
|
706
|
+
};
|
|
707
|
+
}
|
|
708
|
+
return {
|
|
709
|
+
content: [
|
|
710
|
+
{
|
|
711
|
+
type: 'text',
|
|
712
|
+
text: JSON.stringify(result, null, 2),
|
|
713
|
+
},
|
|
714
|
+
],
|
|
715
|
+
};
|
|
716
|
+
}
|
|
717
|
+
// Content retrieval tools
|
|
718
|
+
case 'retrieve_content': {
|
|
719
|
+
if (!contentStore) {
|
|
720
|
+
return {
|
|
721
|
+
content: [
|
|
722
|
+
{
|
|
723
|
+
type: 'text',
|
|
724
|
+
text: JSON.stringify({ error: 'Content store not available' }),
|
|
725
|
+
},
|
|
726
|
+
],
|
|
727
|
+
isError: true,
|
|
728
|
+
};
|
|
729
|
+
}
|
|
730
|
+
const contentId = args?.contentId;
|
|
731
|
+
const chunkIndex = args?.chunkIndex;
|
|
732
|
+
if (!contentId) {
|
|
733
|
+
return {
|
|
734
|
+
content: [
|
|
735
|
+
{
|
|
736
|
+
type: 'text',
|
|
737
|
+
text: JSON.stringify({ error: 'contentId is required' }),
|
|
738
|
+
},
|
|
739
|
+
],
|
|
740
|
+
isError: true,
|
|
741
|
+
};
|
|
742
|
+
}
|
|
743
|
+
const content = await contentStore.retrieve(contentId, chunkIndex);
|
|
744
|
+
if (content === null) {
|
|
745
|
+
return {
|
|
746
|
+
content: [
|
|
747
|
+
{
|
|
748
|
+
type: 'text',
|
|
749
|
+
text: JSON.stringify({ error: `Content not found: ${contentId}` }),
|
|
750
|
+
},
|
|
751
|
+
],
|
|
752
|
+
isError: true,
|
|
753
|
+
};
|
|
754
|
+
}
|
|
755
|
+
const meta = await contentStore.getMeta(contentId);
|
|
756
|
+
return {
|
|
757
|
+
content: [
|
|
758
|
+
{
|
|
759
|
+
type: 'text',
|
|
760
|
+
text: JSON.stringify({
|
|
761
|
+
contentId,
|
|
762
|
+
chunkIndex: chunkIndex ?? 'all',
|
|
763
|
+
totalChunks: meta?.chunkCount ?? 1,
|
|
764
|
+
contentType: meta?.contentType,
|
|
765
|
+
content,
|
|
766
|
+
}, null, 2),
|
|
767
|
+
},
|
|
768
|
+
],
|
|
769
|
+
};
|
|
770
|
+
}
|
|
771
|
+
case 'search_content': {
|
|
772
|
+
if (!contentStore) {
|
|
773
|
+
return {
|
|
774
|
+
content: [
|
|
775
|
+
{
|
|
776
|
+
type: 'text',
|
|
777
|
+
text: JSON.stringify({ error: 'Content store not available' }),
|
|
778
|
+
},
|
|
779
|
+
],
|
|
780
|
+
isError: true,
|
|
781
|
+
};
|
|
782
|
+
}
|
|
783
|
+
const query = args?.query;
|
|
784
|
+
const sourceTypes = args?.sourceTypes;
|
|
785
|
+
const searchLimit = args?.limit || 10;
|
|
786
|
+
if (!query) {
|
|
787
|
+
return {
|
|
788
|
+
content: [
|
|
789
|
+
{
|
|
790
|
+
type: 'text',
|
|
791
|
+
text: JSON.stringify({ error: 'query is required' }),
|
|
792
|
+
},
|
|
793
|
+
],
|
|
794
|
+
isError: true,
|
|
795
|
+
};
|
|
796
|
+
}
|
|
797
|
+
const results = await contentStore.search(query, {
|
|
798
|
+
sourceTypes: sourceTypes,
|
|
799
|
+
limit: searchLimit,
|
|
800
|
+
});
|
|
801
|
+
return {
|
|
802
|
+
content: [
|
|
803
|
+
{
|
|
804
|
+
type: 'text',
|
|
805
|
+
text: JSON.stringify(results.map(r => ({
|
|
806
|
+
contentId: r.meta.id,
|
|
807
|
+
sourceType: r.meta.sourceType,
|
|
808
|
+
contentType: r.meta.contentType,
|
|
809
|
+
summary: r.meta.summary,
|
|
810
|
+
score: r.score,
|
|
811
|
+
snippet: r.snippet,
|
|
812
|
+
})), null, 2),
|
|
813
|
+
},
|
|
814
|
+
],
|
|
815
|
+
};
|
|
816
|
+
}
|
|
817
|
+
case 'list_stored_content': {
|
|
818
|
+
if (!contentStore) {
|
|
819
|
+
return {
|
|
820
|
+
content: [
|
|
821
|
+
{
|
|
822
|
+
type: 'text',
|
|
823
|
+
text: JSON.stringify({ error: 'Content store not available' }),
|
|
824
|
+
},
|
|
825
|
+
],
|
|
826
|
+
isError: true,
|
|
827
|
+
};
|
|
828
|
+
}
|
|
829
|
+
const listLimit = args?.limit || 20;
|
|
830
|
+
const listSourceTypes = args?.sourceTypes;
|
|
831
|
+
const items = await contentStore.list({
|
|
832
|
+
limit: listLimit,
|
|
833
|
+
sourceTypes: listSourceTypes,
|
|
834
|
+
});
|
|
835
|
+
return {
|
|
836
|
+
content: [
|
|
837
|
+
{
|
|
838
|
+
type: 'text',
|
|
839
|
+
text: JSON.stringify(items.map(item => ({
|
|
840
|
+
contentId: item.id,
|
|
841
|
+
sourceType: item.sourceType,
|
|
842
|
+
sourceTool: item.sourceTool,
|
|
843
|
+
contentType: item.contentType,
|
|
844
|
+
originalSize: item.originalSize,
|
|
845
|
+
tokenEstimate: item.tokenEstimate,
|
|
846
|
+
chunkCount: item.chunkCount,
|
|
847
|
+
summary: item.summary,
|
|
848
|
+
timestamp: new Date(item.timestamp).toISOString(),
|
|
849
|
+
})), null, 2),
|
|
850
|
+
},
|
|
851
|
+
],
|
|
852
|
+
};
|
|
853
|
+
}
|
|
854
|
+
case 'read_content_chunk': {
|
|
855
|
+
if (!contentStore) {
|
|
856
|
+
return {
|
|
857
|
+
content: [
|
|
858
|
+
{
|
|
859
|
+
type: 'text',
|
|
860
|
+
text: JSON.stringify({ error: 'Content store not available' }),
|
|
861
|
+
},
|
|
862
|
+
],
|
|
863
|
+
isError: true,
|
|
864
|
+
};
|
|
865
|
+
}
|
|
866
|
+
const chunkContentId = args?.contentId;
|
|
867
|
+
const chunkIdx = args?.chunkIndex;
|
|
868
|
+
if (!chunkContentId || chunkIdx === undefined) {
|
|
869
|
+
return {
|
|
870
|
+
content: [
|
|
871
|
+
{
|
|
872
|
+
type: 'text',
|
|
873
|
+
text: JSON.stringify({ error: 'contentId and chunkIndex are required' }),
|
|
874
|
+
},
|
|
875
|
+
],
|
|
876
|
+
isError: true,
|
|
877
|
+
};
|
|
878
|
+
}
|
|
879
|
+
const chunkContent = await contentStore.retrieve(chunkContentId, chunkIdx);
|
|
880
|
+
if (chunkContent === null) {
|
|
881
|
+
return {
|
|
882
|
+
content: [
|
|
883
|
+
{
|
|
884
|
+
type: 'text',
|
|
885
|
+
text: JSON.stringify({
|
|
886
|
+
error: `Chunk not found: ${chunkContentId}[${chunkIdx}]`,
|
|
887
|
+
}),
|
|
888
|
+
},
|
|
889
|
+
],
|
|
890
|
+
isError: true,
|
|
891
|
+
};
|
|
892
|
+
}
|
|
893
|
+
const chunkMeta = await contentStore.getMeta(chunkContentId);
|
|
894
|
+
return {
|
|
895
|
+
content: [
|
|
896
|
+
{
|
|
897
|
+
type: 'text',
|
|
898
|
+
text: JSON.stringify({
|
|
899
|
+
contentId: chunkContentId,
|
|
900
|
+
chunkIndex: chunkIdx,
|
|
901
|
+
totalChunks: chunkMeta?.chunkCount ?? 1,
|
|
902
|
+
hasNext: chunkMeta ? chunkIdx < chunkMeta.chunkCount - 1 : false,
|
|
903
|
+
hasPrev: chunkIdx > 0,
|
|
904
|
+
content: chunkContent,
|
|
905
|
+
}, null, 2),
|
|
906
|
+
},
|
|
907
|
+
],
|
|
908
|
+
};
|
|
909
|
+
}
|
|
910
|
+
case 'forget_content': {
|
|
911
|
+
if (!contentStore) {
|
|
912
|
+
return {
|
|
913
|
+
content: [
|
|
914
|
+
{
|
|
915
|
+
type: 'text',
|
|
916
|
+
text: JSON.stringify({ error: 'Content store not available' }),
|
|
917
|
+
},
|
|
918
|
+
],
|
|
919
|
+
isError: true,
|
|
920
|
+
};
|
|
921
|
+
}
|
|
922
|
+
const forgetContentId = args?.contentId;
|
|
923
|
+
const forgetReason = args?.reason || 'User requested deletion';
|
|
924
|
+
if (!forgetContentId) {
|
|
925
|
+
return {
|
|
926
|
+
content: [
|
|
927
|
+
{
|
|
928
|
+
type: 'text',
|
|
929
|
+
text: JSON.stringify({ error: 'contentId is required' }),
|
|
930
|
+
},
|
|
931
|
+
],
|
|
932
|
+
isError: true,
|
|
933
|
+
};
|
|
934
|
+
}
|
|
935
|
+
// Get meta before deletion for confirmation message
|
|
936
|
+
const forgetMeta = await contentStore.getMeta(forgetContentId);
|
|
937
|
+
if (!forgetMeta) {
|
|
938
|
+
return {
|
|
939
|
+
content: [
|
|
940
|
+
{
|
|
941
|
+
type: 'text',
|
|
942
|
+
text: JSON.stringify({
|
|
943
|
+
error: `Content not found: ${forgetContentId}`,
|
|
944
|
+
}),
|
|
945
|
+
},
|
|
946
|
+
],
|
|
947
|
+
isError: true,
|
|
948
|
+
};
|
|
949
|
+
}
|
|
950
|
+
const deleted = await contentStore.delete(forgetContentId);
|
|
951
|
+
console.error(`[forget_content] Deleted ${forgetContentId}: ${forgetMeta.summary.slice(0, 50)}... Reason: ${forgetReason}`);
|
|
952
|
+
return {
|
|
953
|
+
content: [
|
|
954
|
+
{
|
|
955
|
+
type: 'text',
|
|
956
|
+
text: JSON.stringify({
|
|
957
|
+
success: deleted,
|
|
958
|
+
contentId: forgetContentId,
|
|
959
|
+
summary: forgetMeta.summary,
|
|
960
|
+
message: `Content "${forgetMeta.summary.slice(0, 50)}..." has been permanently deleted and will no longer appear in searches.`,
|
|
961
|
+
}, null, 2),
|
|
962
|
+
},
|
|
963
|
+
],
|
|
964
|
+
};
|
|
965
|
+
}
|
|
966
|
+
// File read and content storage handlers
|
|
967
|
+
case 'read_file': {
|
|
968
|
+
if (!contentStore) {
|
|
969
|
+
return {
|
|
970
|
+
content: [
|
|
971
|
+
{
|
|
972
|
+
type: 'text',
|
|
973
|
+
text: JSON.stringify({ error: 'Content store not available' }),
|
|
974
|
+
},
|
|
975
|
+
],
|
|
976
|
+
isError: true,
|
|
977
|
+
};
|
|
978
|
+
}
|
|
979
|
+
const filePath = args?.file_path;
|
|
980
|
+
if (!filePath) {
|
|
981
|
+
return {
|
|
982
|
+
content: [
|
|
983
|
+
{
|
|
984
|
+
type: 'text',
|
|
985
|
+
text: JSON.stringify({ error: 'file_path is required' }),
|
|
986
|
+
},
|
|
987
|
+
],
|
|
988
|
+
isError: true,
|
|
989
|
+
};
|
|
990
|
+
}
|
|
991
|
+
// Resolve and validate path
|
|
992
|
+
const resolvedPath = path.resolve(filePath);
|
|
993
|
+
try {
|
|
994
|
+
const stat = await fs.stat(resolvedPath);
|
|
995
|
+
if (stat.isDirectory()) {
|
|
996
|
+
return {
|
|
997
|
+
content: [
|
|
998
|
+
{
|
|
999
|
+
type: 'text',
|
|
1000
|
+
text: JSON.stringify({
|
|
1001
|
+
error: `Path is a directory, not a file: ${resolvedPath}`,
|
|
1002
|
+
}),
|
|
1003
|
+
},
|
|
1004
|
+
],
|
|
1005
|
+
isError: true,
|
|
1006
|
+
};
|
|
1007
|
+
}
|
|
1008
|
+
}
|
|
1009
|
+
catch (statError) {
|
|
1010
|
+
const code = statError.code;
|
|
1011
|
+
if (code === 'ENOENT') {
|
|
1012
|
+
return {
|
|
1013
|
+
content: [
|
|
1014
|
+
{
|
|
1015
|
+
type: 'text',
|
|
1016
|
+
text: JSON.stringify({ error: `File not found: ${resolvedPath}` }),
|
|
1017
|
+
},
|
|
1018
|
+
],
|
|
1019
|
+
isError: true,
|
|
1020
|
+
};
|
|
1021
|
+
}
|
|
1022
|
+
if (code === 'EACCES') {
|
|
1023
|
+
return {
|
|
1024
|
+
content: [
|
|
1025
|
+
{
|
|
1026
|
+
type: 'text',
|
|
1027
|
+
text: JSON.stringify({ error: `Permission denied: ${resolvedPath}` }),
|
|
1028
|
+
},
|
|
1029
|
+
],
|
|
1030
|
+
isError: true,
|
|
1031
|
+
};
|
|
1032
|
+
}
|
|
1033
|
+
throw statError;
|
|
1034
|
+
}
|
|
1035
|
+
const isPdf = resolvedPath.toLowerCase().endsWith('.pdf');
|
|
1036
|
+
let fileContent;
|
|
1037
|
+
let pdfPageCount;
|
|
1038
|
+
if (isPdf) {
|
|
1039
|
+
// Extract text from PDF using pdfjs-dist (lazy-imported)
|
|
1040
|
+
let getDocument;
|
|
1041
|
+
try {
|
|
1042
|
+
const pdfjs = await import('pdfjs-dist/legacy/build/pdf.mjs');
|
|
1043
|
+
getDocument = pdfjs.getDocument;
|
|
1044
|
+
}
|
|
1045
|
+
catch {
|
|
1046
|
+
return {
|
|
1047
|
+
content: [
|
|
1048
|
+
{
|
|
1049
|
+
type: 'text',
|
|
1050
|
+
text: JSON.stringify({
|
|
1051
|
+
error: 'PDF reading not available — pdfjs-dist not installed. Only text/code files can be read.',
|
|
1052
|
+
}),
|
|
1053
|
+
},
|
|
1054
|
+
],
|
|
1055
|
+
isError: true,
|
|
1056
|
+
};
|
|
1057
|
+
}
|
|
1058
|
+
try {
|
|
1059
|
+
const pdfBuffer = await fs.readFile(resolvedPath);
|
|
1060
|
+
const uint8 = new Uint8Array(pdfBuffer);
|
|
1061
|
+
const doc = await getDocument({ data: uint8, useSystemFonts: true }).promise;
|
|
1062
|
+
pdfPageCount = doc.numPages;
|
|
1063
|
+
const pages = [];
|
|
1064
|
+
for (let i = 1; i <= doc.numPages; i++) {
|
|
1065
|
+
const page = await doc.getPage(i);
|
|
1066
|
+
const content = await page.getTextContent();
|
|
1067
|
+
// Join text items with position-aware line breaks
|
|
1068
|
+
let pageText = '';
|
|
1069
|
+
let lastY = null;
|
|
1070
|
+
for (const item of content.items) {
|
|
1071
|
+
if (!('str' in item) || item.str === '')
|
|
1072
|
+
continue;
|
|
1073
|
+
const y = Math.round(item.transform[5]);
|
|
1074
|
+
if (lastY !== null && lastY !== y) {
|
|
1075
|
+
pageText += '\n';
|
|
1076
|
+
}
|
|
1077
|
+
else if (lastY === y && pageText.length > 0) {
|
|
1078
|
+
pageText += ' ';
|
|
1079
|
+
}
|
|
1080
|
+
pageText += item.str;
|
|
1081
|
+
lastY = y;
|
|
1082
|
+
}
|
|
1083
|
+
pages.push(`[Page ${i}]\n${pageText}`);
|
|
1084
|
+
}
|
|
1085
|
+
fileContent = pages.join('\n\n');
|
|
1086
|
+
console.error(`[read_file] Extracted ${doc.numPages} pages from PDF: ${resolvedPath} (${fileContent.length} chars)`);
|
|
1087
|
+
}
|
|
1088
|
+
catch (pdfError) {
|
|
1089
|
+
return {
|
|
1090
|
+
content: [
|
|
1091
|
+
{
|
|
1092
|
+
type: 'text',
|
|
1093
|
+
text: JSON.stringify({
|
|
1094
|
+
error: `Failed to parse PDF: ${pdfError.message}`,
|
|
1095
|
+
}),
|
|
1096
|
+
},
|
|
1097
|
+
],
|
|
1098
|
+
isError: true,
|
|
1099
|
+
};
|
|
1100
|
+
}
|
|
1101
|
+
}
|
|
1102
|
+
else {
|
|
1103
|
+
// Text file
|
|
1104
|
+
const encoding = args?.encoding || 'utf-8';
|
|
1105
|
+
try {
|
|
1106
|
+
fileContent = await fs.readFile(resolvedPath, encoding);
|
|
1107
|
+
}
|
|
1108
|
+
catch (readError) {
|
|
1109
|
+
return {
|
|
1110
|
+
content: [
|
|
1111
|
+
{
|
|
1112
|
+
type: 'text',
|
|
1113
|
+
text: JSON.stringify({
|
|
1114
|
+
error: `Failed to read file: ${readError.message}`,
|
|
1115
|
+
}),
|
|
1116
|
+
},
|
|
1117
|
+
],
|
|
1118
|
+
isError: true,
|
|
1119
|
+
};
|
|
1120
|
+
}
|
|
1121
|
+
// Detect binary content (check for null bytes in first 8KB)
|
|
1122
|
+
const sample = fileContent.slice(0, 8192);
|
|
1123
|
+
if (sample.includes('\0')) {
|
|
1124
|
+
return {
|
|
1125
|
+
content: [
|
|
1126
|
+
{
|
|
1127
|
+
type: 'text',
|
|
1128
|
+
text: JSON.stringify({
|
|
1129
|
+
error: `Binary file detected: ${resolvedPath}. Use the built-in Read tool for images and other binary files.`,
|
|
1130
|
+
}),
|
|
1131
|
+
},
|
|
1132
|
+
],
|
|
1133
|
+
isError: true,
|
|
1134
|
+
};
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
const stored = await contentStore.store(fileContent, {
|
|
1138
|
+
sourceType: isPdf ? 'pdf_read' : 'file_read',
|
|
1139
|
+
sourceTool: 'read_file',
|
|
1140
|
+
metadata: {
|
|
1141
|
+
filePath: resolvedPath,
|
|
1142
|
+
...(pdfPageCount !== undefined && { pdfPageCount }),
|
|
1143
|
+
},
|
|
1144
|
+
});
|
|
1145
|
+
// Get chunk previews for table of contents
|
|
1146
|
+
const chunkPreviews = await contentStore.getChunkPreviews(stored.id);
|
|
1147
|
+
console.error(`[read_file] Stored ${resolvedPath} as ${stored.id} (${stored.chunkCount} chunks, ~${stored.tokenEstimate} tokens)`);
|
|
1148
|
+
return {
|
|
1149
|
+
content: [
|
|
1150
|
+
{
|
|
1151
|
+
type: 'text',
|
|
1152
|
+
text: JSON.stringify({
|
|
1153
|
+
contentId: stored.id,
|
|
1154
|
+
filePath: resolvedPath,
|
|
1155
|
+
fileType: isPdf ? 'pdf' : 'text',
|
|
1156
|
+
...(pdfPageCount !== undefined && { pdfPageCount }),
|
|
1157
|
+
summary: stored.summary,
|
|
1158
|
+
contentType: stored.contentType,
|
|
1159
|
+
chunkCount: stored.chunkCount,
|
|
1160
|
+
tokenEstimate: stored.tokenEstimate,
|
|
1161
|
+
originalSize: stored.originalSize,
|
|
1162
|
+
chunks: chunkPreviews,
|
|
1163
|
+
hint: `Use read_content_chunk("${stored.id}", N) to read a specific chunk by index. Use search_content("query") to find relevant chunks by content.`,
|
|
1164
|
+
}, null, 2),
|
|
1165
|
+
},
|
|
1166
|
+
],
|
|
1167
|
+
};
|
|
1168
|
+
}
|
|
1169
|
+
case 'store_content': {
|
|
1170
|
+
if (!contentStore) {
|
|
1171
|
+
return {
|
|
1172
|
+
content: [
|
|
1173
|
+
{
|
|
1174
|
+
type: 'text',
|
|
1175
|
+
text: JSON.stringify({ error: 'Content store not available' }),
|
|
1176
|
+
},
|
|
1177
|
+
],
|
|
1178
|
+
isError: true,
|
|
1179
|
+
};
|
|
1180
|
+
}
|
|
1181
|
+
const storeText = args?.content;
|
|
1182
|
+
if (!storeText) {
|
|
1183
|
+
return {
|
|
1184
|
+
content: [
|
|
1185
|
+
{
|
|
1186
|
+
type: 'text',
|
|
1187
|
+
text: JSON.stringify({ error: 'content is required' }),
|
|
1188
|
+
},
|
|
1189
|
+
],
|
|
1190
|
+
isError: true,
|
|
1191
|
+
};
|
|
1192
|
+
}
|
|
1193
|
+
const storeSource = args?.source || 'manual';
|
|
1194
|
+
const storeFilePath = args?.file_path;
|
|
1195
|
+
const stored = await contentStore.store(storeText, {
|
|
1196
|
+
sourceType: 'tool_result',
|
|
1197
|
+
sourceTool: 'store_content',
|
|
1198
|
+
metadata: {
|
|
1199
|
+
source: storeSource,
|
|
1200
|
+
...(storeFilePath && { filePath: storeFilePath }),
|
|
1201
|
+
},
|
|
1202
|
+
});
|
|
1203
|
+
// Get chunk previews for navigation
|
|
1204
|
+
const storeChunkPreviews = await contentStore.getChunkPreviews(stored.id);
|
|
1205
|
+
console.error(`[store_content] Stored ${stored.id} from "${storeSource}" (${stored.chunkCount} chunks, ~${stored.tokenEstimate} tokens)`);
|
|
1206
|
+
return {
|
|
1207
|
+
content: [
|
|
1208
|
+
{
|
|
1209
|
+
type: 'text',
|
|
1210
|
+
text: JSON.stringify({
|
|
1211
|
+
contentId: stored.id,
|
|
1212
|
+
summary: stored.summary,
|
|
1213
|
+
contentType: stored.contentType,
|
|
1214
|
+
chunkCount: stored.chunkCount,
|
|
1215
|
+
tokenEstimate: stored.tokenEstimate,
|
|
1216
|
+
originalSize: stored.originalSize,
|
|
1217
|
+
chunks: storeChunkPreviews,
|
|
1218
|
+
hint: `Content stored and indexed. Use read_content_chunk("${stored.id}", N) to read a specific chunk by index. Use search_content("query") to find relevant chunks.`,
|
|
1219
|
+
}, null, 2),
|
|
1220
|
+
},
|
|
1221
|
+
],
|
|
1222
|
+
};
|
|
1223
|
+
}
|
|
1224
|
+
case 'detect_anomalies': {
|
|
1225
|
+
if (!contentStore) {
|
|
1226
|
+
return {
|
|
1227
|
+
content: [
|
|
1228
|
+
{
|
|
1229
|
+
type: 'text',
|
|
1230
|
+
text: JSON.stringify({ error: 'Content store not available' }),
|
|
1231
|
+
},
|
|
1232
|
+
],
|
|
1233
|
+
isError: true,
|
|
1234
|
+
};
|
|
1235
|
+
}
|
|
1236
|
+
const anomalyContentId = args?.contentId;
|
|
1237
|
+
const manualThreshold = args?.threshold;
|
|
1238
|
+
if (!anomalyContentId) {
|
|
1239
|
+
return {
|
|
1240
|
+
content: [
|
|
1241
|
+
{
|
|
1242
|
+
type: 'text',
|
|
1243
|
+
text: JSON.stringify({ error: 'contentId is required' }),
|
|
1244
|
+
},
|
|
1245
|
+
],
|
|
1246
|
+
isError: true,
|
|
1247
|
+
};
|
|
1248
|
+
}
|
|
1249
|
+
const anomalyMeta = await contentStore.getMeta(anomalyContentId);
|
|
1250
|
+
if (!anomalyMeta) {
|
|
1251
|
+
return {
|
|
1252
|
+
content: [
|
|
1253
|
+
{
|
|
1254
|
+
type: 'text',
|
|
1255
|
+
text: JSON.stringify({
|
|
1256
|
+
error: `Content not found: ${anomalyContentId}`,
|
|
1257
|
+
}),
|
|
1258
|
+
},
|
|
1259
|
+
],
|
|
1260
|
+
isError: true,
|
|
1261
|
+
};
|
|
1262
|
+
}
|
|
1263
|
+
if (anomalyMeta.chunkCount < 2) {
|
|
1264
|
+
return {
|
|
1265
|
+
content: [
|
|
1266
|
+
{
|
|
1267
|
+
type: 'text',
|
|
1268
|
+
text: JSON.stringify({
|
|
1269
|
+
anomalies: [],
|
|
1270
|
+
stats: {
|
|
1271
|
+
totalChunks: anomalyMeta.chunkCount,
|
|
1272
|
+
meanSimilarity: 1,
|
|
1273
|
+
stdDev: 0,
|
|
1274
|
+
adaptiveThreshold: 0.3,
|
|
1275
|
+
embeddingCalls: 0,
|
|
1276
|
+
},
|
|
1277
|
+
message: 'Content has fewer than 2 chunks — anomaly detection requires at least 2.',
|
|
1278
|
+
}),
|
|
1279
|
+
},
|
|
1280
|
+
],
|
|
1281
|
+
};
|
|
1282
|
+
}
|
|
1283
|
+
if (!(await embeddingsAvailable())) {
|
|
1284
|
+
return {
|
|
1285
|
+
content: [
|
|
1286
|
+
{
|
|
1287
|
+
type: 'text',
|
|
1288
|
+
text: JSON.stringify({
|
|
1289
|
+
error: 'Embeddings not available — @huggingface/transformers not installed',
|
|
1290
|
+
}),
|
|
1291
|
+
},
|
|
1292
|
+
],
|
|
1293
|
+
isError: true,
|
|
1294
|
+
};
|
|
1295
|
+
}
|
|
1296
|
+
// Get embeddings for all chunks (generates missing ones)
|
|
1297
|
+
const chunkEmbeddings = await contentStore.getEmbeddingsForContent(anomalyContentId);
|
|
1298
|
+
if (!chunkEmbeddings || chunkEmbeddings.size === 0) {
|
|
1299
|
+
return {
|
|
1300
|
+
content: [
|
|
1301
|
+
{
|
|
1302
|
+
type: 'text',
|
|
1303
|
+
text: JSON.stringify({
|
|
1304
|
+
error: 'Could not generate embeddings for content',
|
|
1305
|
+
}),
|
|
1306
|
+
},
|
|
1307
|
+
],
|
|
1308
|
+
isError: true,
|
|
1309
|
+
};
|
|
1310
|
+
}
|
|
1311
|
+
// Rolling window coherence scoring with ring buffer
|
|
1312
|
+
const n = anomalyMeta.chunkCount;
|
|
1313
|
+
const coherenceScores = [];
|
|
1314
|
+
for (let i = 0; i < n; i++) {
|
|
1315
|
+
const curr = chunkEmbeddings.get(i);
|
|
1316
|
+
if (!curr) {
|
|
1317
|
+
coherenceScores.push(0);
|
|
1318
|
+
continue;
|
|
1319
|
+
}
|
|
1320
|
+
const prev = i > 0 ? chunkEmbeddings.get(i - 1) : null;
|
|
1321
|
+
const next = i < n - 1 ? chunkEmbeddings.get(i + 1) : null;
|
|
1322
|
+
const similarities = [];
|
|
1323
|
+
if (prev)
|
|
1324
|
+
similarities.push(cosineSimilarity(curr, prev));
|
|
1325
|
+
if (next)
|
|
1326
|
+
similarities.push(cosineSimilarity(curr, next));
|
|
1327
|
+
const avgSim = similarities.length > 0
|
|
1328
|
+
? similarities.reduce((a, b) => a + b, 0) / similarities.length
|
|
1329
|
+
: 1;
|
|
1330
|
+
coherenceScores.push(avgSim);
|
|
1331
|
+
}
|
|
1332
|
+
// Compute adaptive threshold: max(mean - 2σ, 0.3)
|
|
1333
|
+
const mean = coherenceScores.reduce((a, b) => a + b, 0) / coherenceScores.length;
|
|
1334
|
+
const variance = coherenceScores.reduce((sum, s) => sum + (s - mean) ** 2, 0) / coherenceScores.length;
|
|
1335
|
+
const stdDev = Math.sqrt(variance);
|
|
1336
|
+
const adaptiveThreshold = Math.max(mean - 2 * stdDev, 0.3);
|
|
1337
|
+
const effectiveThreshold = manualThreshold ?? adaptiveThreshold;
|
|
1338
|
+
// Boundary relaxation factor (0.85x for first/last chunks)
|
|
1339
|
+
const BOUNDARY_RELAX = 0.85;
|
|
1340
|
+
// Find anomalies
|
|
1341
|
+
const anomalies = [];
|
|
1342
|
+
for (let i = 0; i < n; i++) {
|
|
1343
|
+
const isBoundary = i === 0 || i === n - 1;
|
|
1344
|
+
const chunkThreshold = isBoundary
|
|
1345
|
+
? effectiveThreshold * BOUNDARY_RELAX
|
|
1346
|
+
: effectiveThreshold;
|
|
1347
|
+
const score = coherenceScores[i] ?? 0;
|
|
1348
|
+
if (score < chunkThreshold) {
|
|
1349
|
+
// Get preview of anomalous chunk
|
|
1350
|
+
const chunkText = await contentStore.retrieve(anomalyContentId, i);
|
|
1351
|
+
anomalies.push({
|
|
1352
|
+
chunkIndex: i,
|
|
1353
|
+
coherenceScore: Math.round(score * 1000) / 1000,
|
|
1354
|
+
threshold: Math.round(chunkThreshold * 1000) / 1000,
|
|
1355
|
+
preview: (chunkText || '').slice(0, 200),
|
|
1356
|
+
});
|
|
1357
|
+
}
|
|
1358
|
+
}
|
|
1359
|
+
console.error(`[detect_anomalies] ${anomalyContentId}: ${n} chunks, ${anomalies.length} anomalies, threshold=${Math.round(effectiveThreshold * 1000) / 1000}`);
|
|
1360
|
+
return {
|
|
1361
|
+
content: [
|
|
1362
|
+
{
|
|
1363
|
+
type: 'text',
|
|
1364
|
+
text: JSON.stringify({
|
|
1365
|
+
anomalies,
|
|
1366
|
+
stats: {
|
|
1367
|
+
totalChunks: n,
|
|
1368
|
+
meanSimilarity: Math.round(mean * 1000) / 1000,
|
|
1369
|
+
stdDev: Math.round(stdDev * 1000) / 1000,
|
|
1370
|
+
adaptiveThreshold: Math.round(adaptiveThreshold * 1000) / 1000,
|
|
1371
|
+
effectiveThreshold: Math.round(effectiveThreshold * 1000) / 1000,
|
|
1372
|
+
embeddingCalls: chunkEmbeddings.size,
|
|
1373
|
+
},
|
|
1374
|
+
}, null, 2),
|
|
1375
|
+
},
|
|
1376
|
+
],
|
|
1377
|
+
};
|
|
1378
|
+
}
|
|
1379
|
+
default:
|
|
1380
|
+
return {
|
|
1381
|
+
content: [
|
|
1382
|
+
{
|
|
1383
|
+
type: 'text',
|
|
1384
|
+
text: JSON.stringify({ error: `Unknown tool: ${name}` }),
|
|
1385
|
+
},
|
|
1386
|
+
],
|
|
1387
|
+
isError: true,
|
|
1388
|
+
};
|
|
1389
|
+
}
|
|
1390
|
+
}
|
|
1391
|
+
catch (error) {
|
|
1392
|
+
// Graceful error handling - return error objects, don't crash
|
|
1393
|
+
const errorMessage = error instanceof Error ? error.message : 'Unknown error';
|
|
1394
|
+
return {
|
|
1395
|
+
content: [
|
|
1396
|
+
{
|
|
1397
|
+
type: 'text',
|
|
1398
|
+
text: JSON.stringify({ error: errorMessage }),
|
|
1399
|
+
},
|
|
1400
|
+
],
|
|
1401
|
+
isError: true,
|
|
1402
|
+
};
|
|
1403
|
+
}
|
|
1404
|
+
}
|
|
1405
|
+
//# sourceMappingURL=tool-handler.js.map
|