ctxpkg 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/LICENSE +661 -0
  2. package/README.md +282 -0
  3. package/bin/cli.js +8 -0
  4. package/bin/daemon.js +7 -0
  5. package/package.json +70 -0
  6. package/src/agent/AGENTS.md +249 -0
  7. package/src/agent/agent.prompts.ts +66 -0
  8. package/src/agent/agent.test-runner.schemas.ts +158 -0
  9. package/src/agent/agent.test-runner.ts +436 -0
  10. package/src/agent/agent.ts +371 -0
  11. package/src/agent/agent.types.ts +94 -0
  12. package/src/backend/AGENTS.md +112 -0
  13. package/src/backend/backend.protocol.ts +95 -0
  14. package/src/backend/backend.schemas.ts +123 -0
  15. package/src/backend/backend.services.ts +151 -0
  16. package/src/backend/backend.ts +111 -0
  17. package/src/backend/backend.types.ts +34 -0
  18. package/src/cli/AGENTS.md +213 -0
  19. package/src/cli/cli.agent.ts +197 -0
  20. package/src/cli/cli.chat.ts +369 -0
  21. package/src/cli/cli.client.ts +55 -0
  22. package/src/cli/cli.collections.ts +491 -0
  23. package/src/cli/cli.config.ts +252 -0
  24. package/src/cli/cli.daemon.ts +160 -0
  25. package/src/cli/cli.documents.ts +413 -0
  26. package/src/cli/cli.mcp.ts +177 -0
  27. package/src/cli/cli.ts +28 -0
  28. package/src/cli/cli.utils.ts +122 -0
  29. package/src/client/AGENTS.md +135 -0
  30. package/src/client/client.adapters.ts +279 -0
  31. package/src/client/client.ts +86 -0
  32. package/src/client/client.types.ts +17 -0
  33. package/src/collections/AGENTS.md +185 -0
  34. package/src/collections/collections.schemas.ts +195 -0
  35. package/src/collections/collections.ts +1160 -0
  36. package/src/config/config.ts +118 -0
  37. package/src/daemon/AGENTS.md +168 -0
  38. package/src/daemon/daemon.config.ts +23 -0
  39. package/src/daemon/daemon.manager.ts +215 -0
  40. package/src/daemon/daemon.schemas.ts +22 -0
  41. package/src/daemon/daemon.ts +205 -0
  42. package/src/database/AGENTS.md +211 -0
  43. package/src/database/database.ts +64 -0
  44. package/src/database/migrations/migrations.001-init.ts +56 -0
  45. package/src/database/migrations/migrations.002-fts5.ts +32 -0
  46. package/src/database/migrations/migrations.ts +20 -0
  47. package/src/database/migrations/migrations.types.ts +9 -0
  48. package/src/documents/AGENTS.md +301 -0
  49. package/src/documents/documents.schemas.ts +190 -0
  50. package/src/documents/documents.ts +734 -0
  51. package/src/embedder/embedder.ts +53 -0
  52. package/src/exports.ts +0 -0
  53. package/src/mcp/AGENTS.md +264 -0
  54. package/src/mcp/mcp.ts +105 -0
  55. package/src/tools/AGENTS.md +228 -0
  56. package/src/tools/agent/agent.ts +45 -0
  57. package/src/tools/documents/documents.ts +401 -0
  58. package/src/tools/tools.langchain.ts +37 -0
  59. package/src/tools/tools.mcp.ts +46 -0
  60. package/src/tools/tools.types.ts +35 -0
  61. package/src/utils/utils.services.ts +46 -0
@@ -0,0 +1,158 @@
1
+ import * as z from 'zod';
2
+
3
+ /**
4
+ * Validation modes for test assertions
5
+ */
6
+ export const validationModeSchema = z.enum(['semantic', 'llm', 'keywords']);
7
+ export type ValidationMode = z.infer<typeof validationModeSchema>;
8
+
9
+ /**
10
+ * Individual test case
11
+ */
12
+ export const testCaseSchema = z.object({
13
+ /** Unique identifier for the test */
14
+ id: z.string(),
15
+
16
+ /** The question to ask the agent */
17
+ query: z.string(),
18
+
19
+ /** Use case context for the question */
20
+ useCase: z.string(),
21
+
22
+ /** Expected answer description or reference answer */
23
+ expected: z.string(),
24
+
25
+ /** Keywords that should appear in the answer (for keywords mode) */
26
+ keywords: z.array(z.string()).optional(),
27
+
28
+ /** Override validation mode for this specific test */
29
+ validationMode: validationModeSchema.optional(),
30
+
31
+ /** Custom validation instructions for LLM mode */
32
+ validationInstructions: z.string().optional(),
33
+
34
+ /** Override pass threshold for this specific test (0-1) */
35
+ passThreshold: z.number().min(0).max(1).optional(),
36
+
37
+ /** Whether this test is currently skipped */
38
+ skip: z.boolean().optional(),
39
+ });
40
+
41
+ export type TestCase = z.infer<typeof testCaseSchema>;
42
+
43
+ /**
44
+ * Collection specification (same as context.json format)
45
+ */
46
+ export const collectionSpecSchema = z.object({
47
+ url: z.string(),
48
+ });
49
+
50
+ export type CollectionSpec = z.infer<typeof collectionSpecSchema>;
51
+
52
+ /**
53
+ * Test suite options
54
+ */
55
+ export const testOptionsSchema = z.object({
56
+ /** Default validation mode (default: semantic) */
57
+ validationMode: validationModeSchema.optional().default('semantic'),
58
+
59
+ /** Pass threshold for semantic similarity (0-1, default: 0.75) */
60
+ passThreshold: z.number().min(0).max(1).optional().default(0.75),
61
+
62
+ /** Default validation instructions for LLM mode */
63
+ validationInstructions: z.string().optional(),
64
+
65
+ /** Maximum time per test in milliseconds (default: 60000) */
66
+ timeoutMs: z.number().optional().default(60000),
67
+ });
68
+
69
+ export type TestOptions = z.infer<typeof testOptionsSchema>;
70
+
71
+ /**
72
+ * Complete test suite file structure
73
+ */
74
+ export const testSuiteSchema = z.object({
75
+ /** Name of the test suite */
76
+ name: z.string(),
77
+
78
+ /** Description of what this test suite covers */
79
+ description: z.string().optional(),
80
+
81
+ /** Collections to sync before running tests */
82
+ collections: z.record(z.string(), collectionSpecSchema),
83
+
84
+ /** Test suite options */
85
+ options: testOptionsSchema.optional(),
86
+
87
+ /** Test cases */
88
+ tests: z.array(testCaseSchema).min(1),
89
+ });
90
+
91
+ export type TestSuite = z.infer<typeof testSuiteSchema>;
92
+
93
+ /**
94
+ * Result of a single test case
95
+ */
96
+ export const testResultSchema = z.object({
97
+ /** Test case ID */
98
+ id: z.string(),
99
+
100
+ /** Whether the test passed */
101
+ passed: z.boolean(),
102
+
103
+ /** Score (0-1) for semantic/llm validation */
104
+ score: z.number().optional(),
105
+
106
+ /** The agent's actual answer */
107
+ actualAnswer: z.string(),
108
+
109
+ /** Validation reasoning (from LLM mode) or match details */
110
+ reasoning: z.string().optional(),
111
+
112
+ /** Keywords found (for keywords mode) */
113
+ keywordsFound: z.array(z.string()).optional(),
114
+
115
+ /** Keywords missing (for keywords mode) */
116
+ keywordsMissing: z.array(z.string()).optional(),
117
+
118
+ /** Time taken in milliseconds */
119
+ durationMs: z.number(),
120
+
121
+ /** Error message if the test failed to run */
122
+ error: z.string().optional(),
123
+
124
+ /** Whether the test was skipped */
125
+ skipped: z.boolean().optional(),
126
+ });
127
+
128
+ export type TestResult = z.infer<typeof testResultSchema>;
129
+
130
+ /**
131
+ * Complete test run results
132
+ */
133
+ export const testRunResultSchema = z.object({
134
+ /** Test suite name */
135
+ suiteName: z.string(),
136
+
137
+ /** When the test run started */
138
+ startedAt: z.string(),
139
+
140
+ /** When the test run completed */
141
+ completedAt: z.string(),
142
+
143
+ /** Total duration in milliseconds */
144
+ durationMs: z.number(),
145
+
146
+ /** Summary statistics */
147
+ summary: z.object({
148
+ total: z.number(),
149
+ passed: z.number(),
150
+ failed: z.number(),
151
+ skipped: z.number(),
152
+ }),
153
+
154
+ /** Individual test results */
155
+ results: z.array(testResultSchema),
156
+ });
157
+
158
+ export type TestRunResult = z.infer<typeof testRunResultSchema>;
@@ -0,0 +1,436 @@
1
+ import { readFile } from 'node:fs/promises';
2
+ import { dirname, resolve } from 'node:path';
3
+
4
+ import { parse as parseYaml } from 'yaml';
5
+
6
+ import { createDocumentAgent, getLLMConfigFromAppConfig } from './agent.ts';
7
+ import type { LLMConfig } from './agent.types.ts';
8
+ import {
9
+ testSuiteSchema,
10
+ type TestCase,
11
+ type TestResult,
12
+ type TestRunResult,
13
+ type TestSuite,
14
+ type ValidationMode,
15
+ } from './agent.test-runner.schemas.ts';
16
+
17
+ import type { BackendClient } from '#root/client/client.ts';
18
+ import { createClient } from '#root/client/client.ts';
19
+ import { EmbedderService } from '#root/embedder/embedder.ts';
20
+ import { Services, destroy } from '#root/utils/utils.services.ts';
21
+
22
+ /**
23
+ * Callback for test progress updates
24
+ */
25
+ type TestProgressCallback = (event: TestProgressEvent) => void;
26
+
27
+ type TestProgressEvent =
28
+ | { type: 'suite_start'; suiteName: string; totalTests: number }
29
+ | { type: 'sync_start' }
30
+ | { type: 'sync_complete' }
31
+ | { type: 'test_start'; testId: string; index: number }
32
+ | { type: 'test_complete'; testId: string; result: TestResult }
33
+ | { type: 'suite_complete'; result: TestRunResult };
34
+
35
+ /**
36
+ * Options for running a test suite
37
+ */
38
+ type TestRunnerOptions = {
39
+ /** LLM configuration (defaults to app config) */
40
+ llmConfig?: LLMConfig;
41
+ /** Progress callback */
42
+ onProgress?: TestProgressCallback;
43
+ /** Override validation mode for all tests */
44
+ validationMode?: ValidationMode;
45
+ /** Override pass threshold for all tests */
46
+ passThreshold?: number;
47
+ /** Model to use for LLM validation (defaults to llmConfig.model) */
48
+ validationModel?: string;
49
+ /** Base directory for resolving relative URLs in the test file (defaults to test file's directory) */
50
+ baseDir?: string;
51
+ };
52
+
53
+ /**
54
+ * LLM validation prompt
55
+ */
56
+ const LLM_VALIDATION_PROMPT = `You are evaluating an AI agent's answer against expected criteria.
57
+
58
+ ## Expected Answer / Criteria
59
+ {expected}
60
+
61
+ ## Actual Answer
62
+ {actual}
63
+
64
+ ## Validation Instructions
65
+ {instructions}
66
+
67
+ ## Task
68
+ Evaluate how well the actual answer meets the expected criteria. Consider:
69
+ - Does it address the key points?
70
+ - Is the information accurate (based on what was expected)?
71
+ - Is it appropriately detailed?
72
+
73
+ Respond with a JSON object:
74
+ \`\`\`json
75
+ {
76
+ "score": <0.0 to 1.0>,
77
+ "passed": <true if score >= threshold>,
78
+ "reasoning": "<brief explanation of your evaluation>"
79
+ }
80
+ \`\`\``;
81
+
82
+ const DEFAULT_VALIDATION_INSTRUCTIONS = `Evaluate whether the actual answer adequately addresses the expected criteria.
83
+ Focus on factual correctness and completeness rather than exact wording.`;
84
+
85
+ /**
86
+ * Test runner service for validating agent performance
87
+ */
88
+ class AgentTestRunner {
89
+ #services: Services;
90
+ #embedder: EmbedderService;
91
+
92
+ constructor() {
93
+ this.#services = new Services();
94
+ this.#embedder = this.#services.get(EmbedderService);
95
+ }
96
+
97
+ /**
98
+ * Compute collection ID from spec URL (mirrors CollectionsService.computeCollectionId)
99
+ */
100
+ #computeCollectionId(url: string): string {
101
+ const normalizedUrl = url.replace(/\/+$/, '');
102
+ return `pkg:${normalizedUrl}`;
103
+ }
104
+
105
+ /**
106
+ * Load and parse a test suite from a YAML file
107
+ */
108
+ async loadTestSuite(filePath: string): Promise<{ suite: TestSuite; baseDir: string }> {
109
+ const content = await readFile(filePath, 'utf-8');
110
+ const parsed = parseYaml(content);
111
+ const suite = testSuiteSchema.parse(parsed);
112
+ const baseDir = dirname(resolve(filePath));
113
+ return { suite, baseDir };
114
+ }
115
+
116
+ /**
117
+ * Run a complete test suite
118
+ */
119
+ async runTestSuite(suite: TestSuite, options: TestRunnerOptions = {}): Promise<TestRunResult> {
120
+ const { onProgress, llmConfig: providedLlmConfig, baseDir = process.cwd() } = options;
121
+ const startedAt = new Date().toISOString();
122
+ const startTime = Date.now();
123
+
124
+ // Get LLM config
125
+ const llmConfig = providedLlmConfig ?? (await getLLMConfigFromAppConfig());
126
+
127
+ onProgress?.({ type: 'suite_start', suiteName: suite.name, totalTests: suite.tests.length });
128
+
129
+ const results: TestResult[] = [];
130
+
131
+ // Create client using direct mode (uses existing database)
132
+ const client = await createClient({ mode: 'direct' });
133
+
134
+ try {
135
+ // Sync collections from test suite
136
+ onProgress?.({ type: 'sync_start' });
137
+
138
+ // Build alias map for test suite collections only
139
+ const aliasMap = new Map<string, string>();
140
+
141
+ for (const [alias, spec] of Object.entries(suite.collections)) {
142
+ // Compute collection ID (same as CollectionsService.computeCollectionId)
143
+ const collectionId = this.#computeCollectionId(spec.url);
144
+ aliasMap.set(alias, collectionId);
145
+
146
+ // Sync the collection
147
+ await client.collections.sync({
148
+ name: alias,
149
+ spec,
150
+ cwd: baseDir,
151
+ });
152
+ }
153
+
154
+ onProgress?.({ type: 'sync_complete' });
155
+
156
+ // Create agent with only the test suite's collections
157
+ const agent = createDocumentAgent({
158
+ client,
159
+ llmConfig,
160
+ aliasMap,
161
+ // Restrict searches to only the test suite's collections
162
+ collections: Array.from(aliasMap.values()),
163
+ });
164
+
165
+ // Run each test
166
+ for (let i = 0; i < suite.tests.length; i++) {
167
+ const testCase = suite.tests[i];
168
+ onProgress?.({ type: 'test_start', testId: testCase.id, index: i });
169
+
170
+ const result = await this.#runSingleTest(testCase, agent, client, llmConfig, suite.options, options);
171
+ results.push(result);
172
+
173
+ onProgress?.({ type: 'test_complete', testId: testCase.id, result });
174
+ }
175
+ } finally {
176
+ await client.disconnect();
177
+ }
178
+
179
+ const completedAt = new Date().toISOString();
180
+ const durationMs = Date.now() - startTime;
181
+
182
+ const summary = {
183
+ total: results.length,
184
+ passed: results.filter((r) => r.passed && !r.skipped).length,
185
+ failed: results.filter((r) => !r.passed && !r.skipped).length,
186
+ skipped: results.filter((r) => r.skipped).length,
187
+ };
188
+
189
+ const runResult: TestRunResult = {
190
+ suiteName: suite.name,
191
+ startedAt,
192
+ completedAt,
193
+ durationMs,
194
+ summary,
195
+ results,
196
+ };
197
+
198
+ onProgress?.({ type: 'suite_complete', result: runResult });
199
+
200
+ return runResult;
201
+ }
202
+
203
+ /**
204
+ * Run a single test case
205
+ */
206
+ async #runSingleTest(
207
+ testCase: TestCase,
208
+ agent: ReturnType<typeof createDocumentAgent>,
209
+ client: BackendClient,
210
+ llmConfig: LLMConfig,
211
+ suiteOptions: TestSuite['options'],
212
+ runnerOptions: TestRunnerOptions,
213
+ ): Promise<TestResult> {
214
+ const startTime = Date.now();
215
+
216
+ // Check if skipped
217
+ if (testCase.skip) {
218
+ return {
219
+ id: testCase.id,
220
+ passed: false,
221
+ skipped: true,
222
+ actualAnswer: '',
223
+ durationMs: 0,
224
+ };
225
+ }
226
+
227
+ try {
228
+ // Get the agent's answer
229
+ const response = await agent.ask(testCase.query, testCase.useCase);
230
+ const actualAnswer = response.answer;
231
+
232
+ // Determine validation mode
233
+ const validationMode =
234
+ runnerOptions.validationMode ?? testCase.validationMode ?? suiteOptions?.validationMode ?? 'semantic';
235
+
236
+ // Determine pass threshold
237
+ const passThreshold =
238
+ runnerOptions.passThreshold ?? testCase.passThreshold ?? suiteOptions?.passThreshold ?? 0.75;
239
+
240
+ // Validate based on mode
241
+ let result: TestResult;
242
+
243
+ switch (validationMode) {
244
+ case 'keywords':
245
+ result = await this.#validateKeywords(testCase, actualAnswer, passThreshold);
246
+ break;
247
+ case 'llm':
248
+ result = await this.#validateWithLLM(
249
+ testCase,
250
+ actualAnswer,
251
+ passThreshold,
252
+ llmConfig,
253
+ suiteOptions,
254
+ runnerOptions.validationModel,
255
+ );
256
+ break;
257
+ case 'semantic':
258
+ default:
259
+ result = await this.#validateSemantic(testCase, actualAnswer, passThreshold);
260
+ break;
261
+ }
262
+
263
+ result.durationMs = Date.now() - startTime;
264
+ return result;
265
+ } catch (error) {
266
+ const message = error instanceof Error ? error.message : String(error);
267
+ return {
268
+ id: testCase.id,
269
+ passed: false,
270
+ actualAnswer: '',
271
+ error: message,
272
+ durationMs: Date.now() - startTime,
273
+ };
274
+ }
275
+ }
276
+
277
+ /**
278
+ * Validate using semantic similarity
279
+ */
280
+ async #validateSemantic(testCase: TestCase, actualAnswer: string, passThreshold: number): Promise<TestResult> {
281
+ // Embed both expected and actual as documents (not queries)
282
+ const embeddings = await this.#embedder.createDocumentEmbeddings([testCase.expected, actualAnswer]);
283
+ const [expectedEmbedding, actualEmbedding] = embeddings;
284
+
285
+ // Compute cosine similarity
286
+ const similarity = this.#cosineSimilarity(expectedEmbedding, actualEmbedding);
287
+
288
+ return {
289
+ id: testCase.id,
290
+ passed: similarity >= passThreshold,
291
+ score: similarity,
292
+ actualAnswer,
293
+ reasoning: `Semantic similarity: ${(similarity * 100).toFixed(1)}% (threshold: ${(passThreshold * 100).toFixed(1)}%)`,
294
+ durationMs: 0,
295
+ };
296
+ }
297
+
298
+ /**
299
+ * Validate using keyword matching
300
+ */
301
+ async #validateKeywords(testCase: TestCase, actualAnswer: string, passThreshold: number): Promise<TestResult> {
302
+ const keywords = testCase.keywords ?? [];
303
+
304
+ if (keywords.length === 0) {
305
+ return {
306
+ id: testCase.id,
307
+ passed: false,
308
+ actualAnswer,
309
+ error: 'No keywords specified for keywords validation mode',
310
+ durationMs: 0,
311
+ };
312
+ }
313
+
314
+ const lowerAnswer = actualAnswer.toLowerCase();
315
+ const found: string[] = [];
316
+ const missing: string[] = [];
317
+
318
+ for (const keyword of keywords) {
319
+ if (lowerAnswer.includes(keyword.toLowerCase())) {
320
+ found.push(keyword);
321
+ } else {
322
+ missing.push(keyword);
323
+ }
324
+ }
325
+
326
+ const score = found.length / keywords.length;
327
+
328
+ return {
329
+ id: testCase.id,
330
+ passed: score >= passThreshold,
331
+ score,
332
+ actualAnswer,
333
+ keywordsFound: found,
334
+ keywordsMissing: missing,
335
+ reasoning: `Found ${found.length}/${keywords.length} keywords (${(score * 100).toFixed(1)}%)`,
336
+ durationMs: 0,
337
+ };
338
+ }
339
+
340
+ /**
341
+ * Validate using LLM as judge
342
+ */
343
+ async #validateWithLLM(
344
+ testCase: TestCase,
345
+ actualAnswer: string,
346
+ passThreshold: number,
347
+ llmConfig: LLMConfig,
348
+ suiteOptions: TestSuite['options'],
349
+ validationModel?: string,
350
+ ): Promise<TestResult> {
351
+ const { ChatOpenAI } = await import('@langchain/openai');
352
+ const { HumanMessage } = await import('@langchain/core/messages');
353
+
354
+ const llm = new ChatOpenAI({
355
+ configuration: { baseURL: llmConfig.provider },
356
+ modelName: validationModel ?? llmConfig.model,
357
+ apiKey: llmConfig.apiKey,
358
+ temperature: 0,
359
+ });
360
+
361
+ const instructions =
362
+ testCase.validationInstructions ?? suiteOptions?.validationInstructions ?? DEFAULT_VALIDATION_INSTRUCTIONS;
363
+
364
+ const prompt = LLM_VALIDATION_PROMPT.replace('{expected}', testCase.expected)
365
+ .replace('{actual}', actualAnswer)
366
+ .replace('{instructions}', instructions)
367
+ .replace('{threshold}', passThreshold.toString());
368
+
369
+ const response = await llm.invoke([new HumanMessage(prompt)]);
370
+ const content = typeof response.content === 'string' ? response.content : JSON.stringify(response.content);
371
+
372
+ // Parse JSON response
373
+ const jsonMatch = content.match(/```json\s*([\s\S]*?)\s*```/) ?? content.match(/\{[\s\S]*\}/);
374
+
375
+ if (jsonMatch) {
376
+ try {
377
+ const parsed = JSON.parse(jsonMatch[1] ?? jsonMatch[0]);
378
+ const score = Number(parsed.score) || 0;
379
+
380
+ return {
381
+ id: testCase.id,
382
+ passed: score >= passThreshold,
383
+ score,
384
+ actualAnswer,
385
+ reasoning: parsed.reasoning ?? 'No reasoning provided',
386
+ durationMs: 0,
387
+ };
388
+ } catch {
389
+ // Fall through
390
+ }
391
+ }
392
+
393
+ return {
394
+ id: testCase.id,
395
+ passed: false,
396
+ actualAnswer,
397
+ error: 'Failed to parse LLM validation response',
398
+ reasoning: content,
399
+ durationMs: 0,
400
+ };
401
+ }
402
+
403
+ /**
404
+ * Compute cosine similarity between two vectors
405
+ */
406
+ #cosineSimilarity(a: number[], b: number[]): number {
407
+ let dotProduct = 0;
408
+ let normA = 0;
409
+ let normB = 0;
410
+
411
+ for (let i = 0; i < a.length; i++) {
412
+ dotProduct += a[i] * b[i];
413
+ normA += a[i] * a[i];
414
+ normB += b[i] * b[i];
415
+ }
416
+
417
+ return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
418
+ }
419
+
420
+ /**
421
+ * Clean up resources
422
+ */
423
+ async [destroy](): Promise<void> {
424
+ await this.#services.destroy();
425
+ }
426
+ }
427
+
428
+ /**
429
+ * Create a test runner instance
430
+ */
431
+ const createTestRunner = (): AgentTestRunner => {
432
+ return new AgentTestRunner();
433
+ };
434
+
435
+ export { AgentTestRunner, createTestRunner };
436
+ export type { TestProgressCallback, TestProgressEvent, TestRunnerOptions };