npm - ctxpkg - Versions diffs - 0.0.1 - Mend

ctxpkg 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (61) hide show

package/LICENSE +661 -0
package/README.md +282 -0
package/bin/cli.js +8 -0
package/bin/daemon.js +7 -0
package/package.json +70 -0
package/src/agent/AGENTS.md +249 -0
package/src/agent/agent.prompts.ts +66 -0
package/src/agent/agent.test-runner.schemas.ts +158 -0
package/src/agent/agent.test-runner.ts +436 -0
package/src/agent/agent.ts +371 -0
package/src/agent/agent.types.ts +94 -0
package/src/backend/AGENTS.md +112 -0
package/src/backend/backend.protocol.ts +95 -0
package/src/backend/backend.schemas.ts +123 -0
package/src/backend/backend.services.ts +151 -0
package/src/backend/backend.ts +111 -0
package/src/backend/backend.types.ts +34 -0
package/src/cli/AGENTS.md +213 -0
package/src/cli/cli.agent.ts +197 -0
package/src/cli/cli.chat.ts +369 -0
package/src/cli/cli.client.ts +55 -0
package/src/cli/cli.collections.ts +491 -0
package/src/cli/cli.config.ts +252 -0
package/src/cli/cli.daemon.ts +160 -0
package/src/cli/cli.documents.ts +413 -0
package/src/cli/cli.mcp.ts +177 -0
package/src/cli/cli.ts +28 -0
package/src/cli/cli.utils.ts +122 -0
package/src/client/AGENTS.md +135 -0
package/src/client/client.adapters.ts +279 -0
package/src/client/client.ts +86 -0
package/src/client/client.types.ts +17 -0
package/src/collections/AGENTS.md +185 -0
package/src/collections/collections.schemas.ts +195 -0
package/src/collections/collections.ts +1160 -0
package/src/config/config.ts +118 -0
package/src/daemon/AGENTS.md +168 -0
package/src/daemon/daemon.config.ts +23 -0
package/src/daemon/daemon.manager.ts +215 -0
package/src/daemon/daemon.schemas.ts +22 -0
package/src/daemon/daemon.ts +205 -0
package/src/database/AGENTS.md +211 -0
package/src/database/database.ts +64 -0
package/src/database/migrations/migrations.001-init.ts +56 -0
package/src/database/migrations/migrations.002-fts5.ts +32 -0
package/src/database/migrations/migrations.ts +20 -0
package/src/database/migrations/migrations.types.ts +9 -0
package/src/documents/AGENTS.md +301 -0
package/src/documents/documents.schemas.ts +190 -0
package/src/documents/documents.ts +734 -0
package/src/embedder/embedder.ts +53 -0
package/src/exports.ts +0 -0
package/src/mcp/AGENTS.md +264 -0
package/src/mcp/mcp.ts +105 -0
package/src/tools/AGENTS.md +228 -0
package/src/tools/agent/agent.ts +45 -0
package/src/tools/documents/documents.ts +401 -0
package/src/tools/tools.langchain.ts +37 -0
package/src/tools/tools.mcp.ts +46 -0
package/src/tools/tools.types.ts +35 -0
package/src/utils/utils.services.ts +46 -0

package/src/agent/agent.test-runner.schemas.ts ADDED Viewed

@@ -0,0 +1,158 @@
+import * as z from 'zod';
+/**
+ * Validation modes for test assertions
+ */
+export const validationModeSchema = z.enum(['semantic', 'llm', 'keywords']);
+export type ValidationMode = z.infer<typeof validationModeSchema>;
+/**
+ * Individual test case
+ */
+export const testCaseSchema = z.object({
+  /** Unique identifier for the test */
+  id: z.string(),
+  /** The question to ask the agent */
+  query: z.string(),
+  /** Use case context for the question */
+  useCase: z.string(),
+  /** Expected answer description or reference answer */
+  expected: z.string(),
+  /** Keywords that should appear in the answer (for keywords mode) */
+  keywords: z.array(z.string()).optional(),
+  /** Override validation mode for this specific test */
+  validationMode: validationModeSchema.optional(),
+  /** Custom validation instructions for LLM mode */
+  validationInstructions: z.string().optional(),
+  /** Override pass threshold for this specific test (0-1) */
+  passThreshold: z.number().min(0).max(1).optional(),
+  /** Whether this test is currently skipped */
+  skip: z.boolean().optional(),
+});
+export type TestCase = z.infer<typeof testCaseSchema>;
+/**
+ * Collection specification (same as context.json format)
+ */
+export const collectionSpecSchema = z.object({
+  url: z.string(),
+});
+export type CollectionSpec = z.infer<typeof collectionSpecSchema>;
+/**
+ * Test suite options
+ */
+export const testOptionsSchema = z.object({
+  /** Default validation mode (default: semantic) */
+  validationMode: validationModeSchema.optional().default('semantic'),
+  /** Pass threshold for semantic similarity (0-1, default: 0.75) */
+  passThreshold: z.number().min(0).max(1).optional().default(0.75),
+  /** Default validation instructions for LLM mode */
+  validationInstructions: z.string().optional(),
+  /** Maximum time per test in milliseconds (default: 60000) */
+  timeoutMs: z.number().optional().default(60000),
+});
+export type TestOptions = z.infer<typeof testOptionsSchema>;
+/**
+ * Complete test suite file structure
+ */
+export const testSuiteSchema = z.object({
+  /** Name of the test suite */
+  name: z.string(),
+  /** Description of what this test suite covers */
+  description: z.string().optional(),
+  /** Collections to sync before running tests */
+  collections: z.record(z.string(), collectionSpecSchema),
+  /** Test suite options */
+  options: testOptionsSchema.optional(),
+  /** Test cases */
+  tests: z.array(testCaseSchema).min(1),
+});
+export type TestSuite = z.infer<typeof testSuiteSchema>;
+/**
+ * Result of a single test case
+ */
+export const testResultSchema = z.object({
+  /** Test case ID */
+  id: z.string(),
+  /** Whether the test passed */
+  passed: z.boolean(),
+  /** Score (0-1) for semantic/llm validation */
+  score: z.number().optional(),
+  /** The agent's actual answer */
+  actualAnswer: z.string(),
+  /** Validation reasoning (from LLM mode) or match details */
+  reasoning: z.string().optional(),
+  /** Keywords found (for keywords mode) */
+  keywordsFound: z.array(z.string()).optional(),
+  /** Keywords missing (for keywords mode) */
+  keywordsMissing: z.array(z.string()).optional(),
+  /** Time taken in milliseconds */
+  durationMs: z.number(),
+  /** Error message if the test failed to run */
+  error: z.string().optional(),
+  /** Whether the test was skipped */
+  skipped: z.boolean().optional(),
+});
+export type TestResult = z.infer<typeof testResultSchema>;
+/**
+ * Complete test run results
+ */
+export const testRunResultSchema = z.object({
+  /** Test suite name */
+  suiteName: z.string(),
+  /** When the test run started */
+  startedAt: z.string(),
+  /** When the test run completed */
+  completedAt: z.string(),
+  /** Total duration in milliseconds */
+  durationMs: z.number(),
+  /** Summary statistics */
+  summary: z.object({
+    total: z.number(),
+    passed: z.number(),
+    failed: z.number(),
+    skipped: z.number(),
+  }),
+  /** Individual test results */
+  results: z.array(testResultSchema),
+});
+export type TestRunResult = z.infer<typeof testRunResultSchema>;

package/src/agent/agent.test-runner.ts ADDED Viewed

@@ -0,0 +1,436 @@
+import { readFile } from 'node:fs/promises';
+import { dirname, resolve } from 'node:path';
+import { parse as parseYaml } from 'yaml';
+import { createDocumentAgent, getLLMConfigFromAppConfig } from './agent.ts';
+import type { LLMConfig } from './agent.types.ts';
+import {
+  testSuiteSchema,
+  type TestCase,
+  type TestResult,
+  type TestRunResult,
+  type TestSuite,
+  type ValidationMode,
+} from './agent.test-runner.schemas.ts';
+import type { BackendClient } from '#root/client/client.ts';
+import { createClient } from '#root/client/client.ts';
+import { EmbedderService } from '#root/embedder/embedder.ts';
+import { Services, destroy } from '#root/utils/utils.services.ts';
+/**
+ * Callback for test progress updates
+ */
+type TestProgressCallback = (event: TestProgressEvent) => void;
+type TestProgressEvent =
+  | { type: 'suite_start'; suiteName: string; totalTests: number }
+  | { type: 'sync_start' }
+  | { type: 'sync_complete' }
+  | { type: 'test_start'; testId: string; index: number }
+  | { type: 'test_complete'; testId: string; result: TestResult }
+  | { type: 'suite_complete'; result: TestRunResult };
+/**
+ * Options for running a test suite
+ */
+type TestRunnerOptions = {
+  /** LLM configuration (defaults to app config) */
+  llmConfig?: LLMConfig;
+  /** Progress callback */
+  onProgress?: TestProgressCallback;
+  /** Override validation mode for all tests */
+  validationMode?: ValidationMode;
+  /** Override pass threshold for all tests */
+  passThreshold?: number;
+  /** Model to use for LLM validation (defaults to llmConfig.model) */
+  validationModel?: string;
+  /** Base directory for resolving relative URLs in the test file (defaults to test file's directory) */
+  baseDir?: string;
+};
+/**
+ * LLM validation prompt
+ */
+const LLM_VALIDATION_PROMPT = `You are evaluating an AI agent's answer against expected criteria.
+## Expected Answer / Criteria
+{expected}
+## Actual Answer
+{actual}
+## Validation Instructions
+{instructions}
+## Task
+Evaluate how well the actual answer meets the expected criteria. Consider:
+- Does it address the key points?
+- Is the information accurate (based on what was expected)?
+- Is it appropriately detailed?
+Respond with a JSON object:
+\`\`\`json
+{
+  "score": <0.0 to 1.0>,
+  "passed": <true if score >= threshold>,
+  "reasoning": "<brief explanation of your evaluation>"
+}
+\`\`\``;
+const DEFAULT_VALIDATION_INSTRUCTIONS = `Evaluate whether the actual answer adequately addresses the expected criteria.
+Focus on factual correctness and completeness rather than exact wording.`;
+/**
+ * Test runner service for validating agent performance
+ */
+class AgentTestRunner {
+  #services: Services;
+  #embedder: EmbedderService;
+  constructor() {
+    this.#services = new Services();
+    this.#embedder = this.#services.get(EmbedderService);
+  }
+  /**
+   * Compute collection ID from spec URL (mirrors CollectionsService.computeCollectionId)
+   */
+  #computeCollectionId(url: string): string {
+    const normalizedUrl = url.replace(/\/+$/, '');
+    return `pkg:${normalizedUrl}`;
+  }
+  /**
+   * Load and parse a test suite from a YAML file
+   */
+  async loadTestSuite(filePath: string): Promise<{ suite: TestSuite; baseDir: string }> {
+    const content = await readFile(filePath, 'utf-8');
+    const parsed = parseYaml(content);
+    const suite = testSuiteSchema.parse(parsed);
+    const baseDir = dirname(resolve(filePath));
+    return { suite, baseDir };
+  }
+  /**
+   * Run a complete test suite
+   */
+  async runTestSuite(suite: TestSuite, options: TestRunnerOptions = {}): Promise<TestRunResult> {
+    const { onProgress, llmConfig: providedLlmConfig, baseDir = process.cwd() } = options;
+    const startedAt = new Date().toISOString();
+    const startTime = Date.now();
+    // Get LLM config
+    const llmConfig = providedLlmConfig ?? (await getLLMConfigFromAppConfig());
+    onProgress?.({ type: 'suite_start', suiteName: suite.name, totalTests: suite.tests.length });
+    const results: TestResult[] = [];
+    // Create client using direct mode (uses existing database)
+    const client = await createClient({ mode: 'direct' });
+    try {
+      // Sync collections from test suite
+      onProgress?.({ type: 'sync_start' });
+      // Build alias map for test suite collections only
+      const aliasMap = new Map<string, string>();
+      for (const [alias, spec] of Object.entries(suite.collections)) {
+        // Compute collection ID (same as CollectionsService.computeCollectionId)
+        const collectionId = this.#computeCollectionId(spec.url);
+        aliasMap.set(alias, collectionId);
+        // Sync the collection
+        await client.collections.sync({
+          name: alias,
+          spec,
+          cwd: baseDir,
+        });
+      }
+      onProgress?.({ type: 'sync_complete' });
+      // Create agent with only the test suite's collections
+      const agent = createDocumentAgent({
+        client,
+        llmConfig,
+        aliasMap,
+        // Restrict searches to only the test suite's collections
+        collections: Array.from(aliasMap.values()),
+      });
+      // Run each test
+      for (let i = 0; i < suite.tests.length; i++) {
+        const testCase = suite.tests[i];
+        onProgress?.({ type: 'test_start', testId: testCase.id, index: i });
+        const result = await this.#runSingleTest(testCase, agent, client, llmConfig, suite.options, options);
+        results.push(result);
+        onProgress?.({ type: 'test_complete', testId: testCase.id, result });
+      }
+    } finally {
+      await client.disconnect();
+    }
+    const completedAt = new Date().toISOString();
+    const durationMs = Date.now() - startTime;
+    const summary = {
+      total: results.length,
+      passed: results.filter((r) => r.passed && !r.skipped).length,
+      failed: results.filter((r) => !r.passed && !r.skipped).length,
+      skipped: results.filter((r) => r.skipped).length,
+    };
+    const runResult: TestRunResult = {
+      suiteName: suite.name,
+      startedAt,
+      completedAt,
+      durationMs,
+      summary,
+      results,
+    };
+    onProgress?.({ type: 'suite_complete', result: runResult });
+    return runResult;
+  }
+  /**
+   * Run a single test case
+   */
+  async #runSingleTest(
+    testCase: TestCase,
+    agent: ReturnType<typeof createDocumentAgent>,
+    client: BackendClient,
+    llmConfig: LLMConfig,
+    suiteOptions: TestSuite['options'],
+    runnerOptions: TestRunnerOptions,
+  ): Promise<TestResult> {
+    const startTime = Date.now();
+    // Check if skipped
+    if (testCase.skip) {
+      return {
+        id: testCase.id,
+        passed: false,
+        skipped: true,
+        actualAnswer: '',
+        durationMs: 0,
+      };
+    }
+    try {
+      // Get the agent's answer
+      const response = await agent.ask(testCase.query, testCase.useCase);
+      const actualAnswer = response.answer;
+      // Determine validation mode
+      const validationMode =
+        runnerOptions.validationMode ?? testCase.validationMode ?? suiteOptions?.validationMode ?? 'semantic';
+      // Determine pass threshold
+      const passThreshold =
+        runnerOptions.passThreshold ?? testCase.passThreshold ?? suiteOptions?.passThreshold ?? 0.75;
+      // Validate based on mode
+      let result: TestResult;
+      switch (validationMode) {
+        case 'keywords':
+          result = await this.#validateKeywords(testCase, actualAnswer, passThreshold);
+          break;
+        case 'llm':
+          result = await this.#validateWithLLM(
+            testCase,
+            actualAnswer,
+            passThreshold,
+            llmConfig,
+            suiteOptions,
+            runnerOptions.validationModel,
+          );
+          break;
+        case 'semantic':
+        default:
+          result = await this.#validateSemantic(testCase, actualAnswer, passThreshold);
+          break;
+      }
+      result.durationMs = Date.now() - startTime;
+      return result;
+    } catch (error) {
+      const message = error instanceof Error ? error.message : String(error);
+      return {
+        id: testCase.id,
+        passed: false,
+        actualAnswer: '',
+        error: message,
+        durationMs: Date.now() - startTime,
+      };
+    }
+  }
+  /**
+   * Validate using semantic similarity
+   */
+  async #validateSemantic(testCase: TestCase, actualAnswer: string, passThreshold: number): Promise<TestResult> {
+    // Embed both expected and actual as documents (not queries)
+    const embeddings = await this.#embedder.createDocumentEmbeddings([testCase.expected, actualAnswer]);
+    const [expectedEmbedding, actualEmbedding] = embeddings;
+    // Compute cosine similarity
+    const similarity = this.#cosineSimilarity(expectedEmbedding, actualEmbedding);
+    return {
+      id: testCase.id,
+      passed: similarity >= passThreshold,
+      score: similarity,
+      actualAnswer,
+      reasoning: `Semantic similarity: ${(similarity * 100).toFixed(1)}% (threshold: ${(passThreshold * 100).toFixed(1)}%)`,
+      durationMs: 0,
+    };
+  }
+  /**
+   * Validate using keyword matching
+   */
+  async #validateKeywords(testCase: TestCase, actualAnswer: string, passThreshold: number): Promise<TestResult> {
+    const keywords = testCase.keywords ?? [];
+    if (keywords.length === 0) {
+      return {
+        id: testCase.id,
+        passed: false,
+        actualAnswer,
+        error: 'No keywords specified for keywords validation mode',
+        durationMs: 0,
+      };
+    }
+    const lowerAnswer = actualAnswer.toLowerCase();
+    const found: string[] = [];
+    const missing: string[] = [];
+    for (const keyword of keywords) {
+      if (lowerAnswer.includes(keyword.toLowerCase())) {
+        found.push(keyword);
+      } else {
+        missing.push(keyword);
+      }
+    }
+    const score = found.length / keywords.length;
+    return {
+      id: testCase.id,
+      passed: score >= passThreshold,
+      score,
+      actualAnswer,
+      keywordsFound: found,
+      keywordsMissing: missing,
+      reasoning: `Found ${found.length}/${keywords.length} keywords (${(score * 100).toFixed(1)}%)`,
+      durationMs: 0,
+    };
+  }
+  /**
+   * Validate using LLM as judge
+   */
+  async #validateWithLLM(
+    testCase: TestCase,
+    actualAnswer: string,
+    passThreshold: number,
+    llmConfig: LLMConfig,
+    suiteOptions: TestSuite['options'],
+    validationModel?: string,
+  ): Promise<TestResult> {
+    const { ChatOpenAI } = await import('@langchain/openai');
+    const { HumanMessage } = await import('@langchain/core/messages');
+    const llm = new ChatOpenAI({
+      configuration: { baseURL: llmConfig.provider },
+      modelName: validationModel ?? llmConfig.model,
+      apiKey: llmConfig.apiKey,
+      temperature: 0,
+    });
+    const instructions =
+      testCase.validationInstructions ?? suiteOptions?.validationInstructions ?? DEFAULT_VALIDATION_INSTRUCTIONS;
+    const prompt = LLM_VALIDATION_PROMPT.replace('{expected}', testCase.expected)
+      .replace('{actual}', actualAnswer)
+      .replace('{instructions}', instructions)
+      .replace('{threshold}', passThreshold.toString());
+    const response = await llm.invoke([new HumanMessage(prompt)]);
+    const content = typeof response.content === 'string' ? response.content : JSON.stringify(response.content);
+    // Parse JSON response
+    const jsonMatch = content.match(/```json\s*([\s\S]*?)\s*```/) ?? content.match(/\{[\s\S]*\}/);
+    if (jsonMatch) {
+      try {
+        const parsed = JSON.parse(jsonMatch[1] ?? jsonMatch[0]);
+        const score = Number(parsed.score) || 0;
+        return {
+          id: testCase.id,
+          passed: score >= passThreshold,
+          score,
+          actualAnswer,
+          reasoning: parsed.reasoning ?? 'No reasoning provided',
+          durationMs: 0,
+        };
+      } catch {
+        // Fall through
+      }
+    }
+    return {
+      id: testCase.id,
+      passed: false,
+      actualAnswer,
+      error: 'Failed to parse LLM validation response',
+      reasoning: content,
+      durationMs: 0,
+    };
+  }
+  /**
+   * Compute cosine similarity between two vectors
+   */
+  #cosineSimilarity(a: number[], b: number[]): number {
+    let dotProduct = 0;
+    let normA = 0;
+    let normB = 0;
+    for (let i = 0; i < a.length; i++) {
+      dotProduct += a[i] * b[i];
+      normA += a[i] * a[i];
+      normB += b[i] * b[i];
+    }
+    return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
+  }
+  /**
+   * Clean up resources
+   */
+  async [destroy](): Promise<void> {
+    await this.#services.destroy();
+  }
+}
+/**
+ * Create a test runner instance
+ */
+const createTestRunner = (): AgentTestRunner => {
+  return new AgentTestRunner();
+};
+export { AgentTestRunner, createTestRunner };
+export type { TestProgressCallback, TestProgressEvent, TestRunnerOptions };