npm - @huydao/karrot - Versions diffs - 0.1.1 - Mend

@huydao/karrot 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

package/GUIDE.md +484 -0
package/README.md +253 -0
package/dist/assertions/assertion.d.ts +18 -0
package/dist/assertions/assertion.js +198 -0
package/dist/assertions/turn-eval.d.ts +22 -0
package/dist/assertions/turn-eval.js +178 -0
package/dist/executors/adapters/ag-ui-post.d.ts +55 -0
package/dist/executors/adapters/ag-ui-post.js +703 -0
package/dist/executors/adapters/ag-ui.d.ts +15 -0
package/dist/executors/adapters/ag-ui.js +275 -0
package/dist/executors/execute.d.ts +16 -0
package/dist/executors/execute.js +145 -0
package/dist/executors/executor.d.ts +37 -0
package/dist/executors/executor.js +203 -0
package/dist/executors/run-result.d.ts +33 -0
package/dist/executors/run-result.js +22 -0
package/dist/index.d.ts +12 -0
package/dist/index.js +28 -0
package/dist/prompts/turn-eval-system-prompt.md +68 -0
package/dist/prompts/turn-message-gen-system-prompt.md +16 -0
package/dist/reports/report.d.ts +68 -0
package/dist/reports/report.js +366 -0
package/dist/scenarios/generated-message.d.ts +15 -0
package/dist/scenarios/generated-message.js +116 -0
package/dist/scenarios/scenario-loader.d.ts +12 -0
package/dist/scenarios/scenario-loader.js +103 -0
package/dist/scenarios/scenario.d.ts +62 -0
package/dist/scenarios/scenario.js +35 -0
package/dist/utils/artifact-files.d.ts +3 -0
package/dist/utils/artifact-files.js +22 -0
package/dist/utils/config.d.ts +101 -0
package/dist/utils/config.js +57 -0
package/dist/utils/openai-eval.d.ts +5 -0
package/dist/utils/openai-eval.js +54 -0
package/package.json +146 -0

package/dist/executors/adapters/ag-ui.d.ts ADDED Viewed

@@ -0,0 +1,15 @@
+import { type MessageRunResult } from '../run-result';
+type RunAgUiMessageOptions = {
+    message: string;
+    env: NodeJS.ProcessEnv;
+    outputDirectory: string;
+    threadId?: string;
+    threadIdFallback?: string;
+    allowIdleTimeoutWithAssistantText?: boolean;
+    processTimeoutMs?: number;
+};
+export declare function parseExecutionTestResultId(output: string): string | undefined;
+export declare function extractToolCallNames(logContent: string): string[];
+export declare function extractAppendedLog(previousLogContent: string, latestLogContent: string): string;
+export declare function runAgUiMessage(options: RunAgUiMessageOptions): Promise<MessageRunResult>;
+export {};

package/dist/executors/adapters/ag-ui.js ADDED Viewed

@@ -0,0 +1,275 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.parseExecutionTestResultId = parseExecutionTestResultId;
+exports.extractToolCallNames = extractToolCallNames;
+exports.extractAppendedLog = extractAppendedLog;
+exports.runAgUiMessage = runAgUiMessage;
+const promises_1 = __importDefault(require("node:fs/promises"));
+const node_path_1 = __importDefault(require("node:path"));
+const node_child_process_1 = require("node:child_process");
+const run_result_1 = require("../run-result");
+function getAgUiBinaryPath() {
+    const packageJsonPath = require.resolve('ag-ui-wss/package.json');
+    const packageDirectory = node_path_1.default.dirname(packageJsonPath);
+    const packageJson = JSON.parse(require('node:fs').readFileSync(packageJsonPath, 'utf8'));
+    const binaryRelativePath = packageJson.bin?.['ag-ui-wss'];
+    if (!binaryRelativePath) {
+        throw new Error(`Unable to resolve ag-ui-wss binary from ${packageJsonPath}.`);
+    }
+    return node_path_1.default.join(packageDirectory, binaryRelativePath);
+}
+function parseThreadId(output) {
+    return output.match(/Thread:\s*([^\s]+)/)?.[1];
+}
+function extractStdoutAssistantText(output) {
+    const normalized = output.replace(/\r/g, '');
+    if (!normalized.trim()) {
+        return undefined;
+    }
+    const withoutAnsi = normalized.replace(/\u001B\[[0-9;]*m/g, '');
+    const trimmed = withoutAnsi.trim();
+    return trimmed || undefined;
+}
+function parseExecutionTestResultId(output) {
+    const urlMatch = output.match(/\/test-results\/(\d+)/);
+    if (urlMatch) {
+        return urlMatch[1];
+    }
+    const plainMatch = output.match(/test result(?: ID)?\s+(\d{3,})/i);
+    return plainMatch?.[1];
+}
+function parseTimingMetrics(output) {
+    const matches = [
+        ...output.matchAll(/(?:(?:TTF-Tool:\s*([\d.]+)s)\s*\|\s*)?TTF-Text:\s*([\d.]+)s\s*\|\s*Total:\s*([\d.]+)s\s*\|\s*Protocol efficiency:\s*([\d.]+)KB\/([\d.]+)KB\s*\((\d+)%\)/g),
+    ];
+    const match = matches.at(-1);
+    if (!match) {
+        return {};
+    }
+    return {
+        ttfToolSeconds: match[1] ? Number(match[1]) : undefined,
+        ttfTextSeconds: Number(match[2]),
+        totalSeconds: Number(match[3]),
+        protocolUsedKb: Number(match[4]),
+        protocolTotalKb: Number(match[5]),
+        efficiencyPercent: Number(match[6]),
+    };
+}
+function parseRunError(logContent) {
+    const lines = logContent
+        .split('\n')
+        .map((line) => line.trim())
+        .filter(Boolean);
+    for (let index = lines.length - 1; index >= 0; index -= 1) {
+        try {
+            const event = JSON.parse(lines[index]);
+            if (event.type === 'RUN_ERROR' && typeof event.error === 'string' && event.error.trim()) {
+                return event.error.trim();
+            }
+        }
+        catch {
+            // Ignore malformed lines in the JSONL stream.
+        }
+    }
+    return undefined;
+}
+function parseConsoleError(output) {
+    const matches = [...output.matchAll(/^Error:\s*(.+)$/gm)]
+        .map((match) => match[1].trim())
+        .filter((message) => message && message !== 'Agent run failed');
+    return matches.at(0);
+}
+function countToolCalls(logContent) {
+    return [...logContent.matchAll(/"type":"TOOL_CALL_START"/g)].length;
+}
+function extractToolCallNames(logContent) {
+    const toolCalls = [];
+    const lines = logContent
+        .split('\n')
+        .map((line) => line.trim())
+        .filter(Boolean);
+    for (const line of lines) {
+        try {
+            const event = JSON.parse(line);
+            if (event.type !== 'TOOL_CALL_START' || typeof event.toolCallName !== 'string') {
+                continue;
+            }
+            const toolCallName = event.toolCallName.trim();
+            if (toolCallName) {
+                toolCalls.push(toolCallName);
+            }
+        }
+        catch {
+            // Ignore malformed JSONL lines.
+        }
+    }
+    return toolCalls;
+}
+function extractAppendedLog(previousLogContent, latestLogContent) {
+    if (!previousLogContent) {
+        return latestLogContent;
+    }
+    if (latestLogContent.startsWith(previousLogContent)) {
+        return latestLogContent.slice(previousLogContent.length);
+    }
+    return latestLogContent;
+}
+function extractAssistantText(logContent) {
+    const fragmentsByMessage = new Map();
+    const orderedMessageIds = [];
+    let latestFullContent;
+    const lines = logContent
+        .split('\n')
+        .map((line) => line.trim())
+        .filter(Boolean);
+    for (const line of lines) {
+        try {
+            const event = JSON.parse(line);
+            if (!String(event.type).startsWith('TEXT_MESSAGE_')) {
+                continue;
+            }
+            const messageId = typeof event.messageId === 'string' && event.messageId.trim()
+                ? event.messageId.trim()
+                : '__default__';
+            if (!fragmentsByMessage.has(messageId)) {
+                fragmentsByMessage.set(messageId, []);
+                orderedMessageIds.push(messageId);
+            }
+            if (typeof event.content === 'string' && event.content.trim()) {
+                latestFullContent = event.content.trim();
+            }
+            if (typeof event.text === 'string' && event.text.trim()) {
+                latestFullContent = event.text.trim();
+            }
+            if (typeof event.delta === 'string' && event.delta.length > 0) {
+                fragmentsByMessage.get(messageId)?.push(event.delta);
+            }
+        }
+        catch {
+            // Ignore malformed JSONL lines.
+        }
+    }
+    if (latestFullContent) {
+        return latestFullContent;
+    }
+    const lastMessageId = orderedMessageIds.at(-1);
+    if (!lastMessageId) {
+        return undefined;
+    }
+    const joined = fragmentsByMessage.get(lastMessageId)?.join('').trim();
+    return joined || undefined;
+}
+async function readJsonl(pathname) {
+    try {
+        return await promises_1.default.readFile(pathname, 'utf8');
+    }
+    catch {
+        return '';
+    }
+}
+async function runAgUiMessage(options) {
+    await promises_1.default.mkdir(options.outputDirectory, { recursive: true });
+    const expectedThreadId = options.threadId;
+    const existingLogPath = expectedThreadId
+        ? node_path_1.default.join(options.outputDirectory, `${expectedThreadId}.jsonl`)
+        : undefined;
+    const previousLogContent = existingLogPath ? await readJsonl(existingLogPath) : '';
+    const child = (0, node_child_process_1.spawn)(getAgUiBinaryPath(), expectedThreadId ? ['--thread', expectedThreadId, options.message] : [options.message], {
+        cwd: options.outputDirectory,
+        env: options.env,
+        stdio: ['ignore', 'pipe', 'pipe'],
+    });
+    let stdoutOutput = '';
+    let stderrOutput = '';
+    child.stdout.on('data', (chunk) => {
+        const text = chunk.toString();
+        stdoutOutput += text;
+        process.stdout.write(text);
+    });
+    child.stderr.on('data', (chunk) => {
+        const text = chunk.toString();
+        stderrOutput += text;
+        process.stderr.write(text);
+    });
+    const exitCode = await new Promise((resolve, reject) => {
+        let timedOut = false;
+        const timeoutId = typeof options.processTimeoutMs === 'number'
+            ? setTimeout(() => {
+                timedOut = true;
+                child.kill('SIGTERM');
+                setTimeout(() => child.kill('SIGKILL'), 5_000).unref();
+                reject(new run_result_1.MessageRunError(`ag-ui-wss exceeded ${options.processTimeoutMs}ms and was terminated.`, {
+                    threadId: parseThreadId(`${stderrOutput}\n${stdoutOutput}`) ?? expectedThreadId ?? options.threadIdFallback,
+                    output: extractStdoutAssistantText(stdoutOutput) ?? '',
+                }));
+            }, options.processTimeoutMs)
+            : undefined;
+        child.on('error', reject);
+        child.on('close', (code) => {
+            if (timeoutId) {
+                clearTimeout(timeoutId);
+            }
+            if (timedOut) {
+                return;
+            }
+            resolve(code ?? 1);
+        });
+    });
+    const combinedConsoleOutput = `${stderrOutput}\n${stdoutOutput}`;
+    const threadId = parseThreadId(combinedConsoleOutput) ?? expectedThreadId ?? options.threadIdFallback;
+    if (!threadId) {
+        throw new Error('Unable to parse the thread ID from ag-ui-wss output.');
+    }
+    const logPath = node_path_1.default.join(options.outputDirectory, `${threadId}.jsonl`);
+    const logContent = await readJsonl(logPath);
+    const runLogContent = extractAppendedLog(previousLogContent, logContent);
+    const assistantOutput = extractAssistantText(runLogContent)
+        ?? extractAssistantText(logContent)
+        ?? extractStdoutAssistantText(stdoutOutput)
+        ?? '';
+    const metrics = parseTimingMetrics(combinedConsoleOutput);
+    const toolCallCount = countToolCalls(runLogContent);
+    const toolCalls = extractToolCallNames(runLogContent);
+    const hasAssistantTextEvent = /"type":"TEXT_MESSAGE_CONTENT"/.test(runLogContent)
+        || /"type":"TEXT_MESSAGE_CONTENT"/.test(logContent);
+    const missingAssistantTextNote = !assistantOutput.trim() && !hasAssistantTextEvent
+        ? 'Run finished without any assistant text content.'
+        : undefined;
+    if (exitCode !== 0) {
+        const hasRunError = runLogContent.includes('"type":"RUN_ERROR"');
+        const hasAssistantText = hasAssistantTextEvent || Boolean(extractStdoutAssistantText(stdoutOutput));
+        if (!hasRunError &&
+            hasAssistantText &&
+            combinedConsoleOutput.includes('idle timeout')) {
+            return {
+                output: assistantOutput,
+                threadId,
+                outputPath: logPath,
+                note: 'Assistant text received but the run did not emit RUN_FINISHED before idle timeout.',
+                toolCallCount,
+                toolCalls,
+                metrics,
+            };
+        }
+        throw new run_result_1.MessageRunError(parseRunError(runLogContent) ?? parseConsoleError(combinedConsoleOutput) ?? `ag-ui-wss exited with code ${exitCode}.`, {
+            threadId,
+            outputPath: logPath,
+            output: assistantOutput,
+            metrics,
+            toolCallCount,
+            toolCalls,
+        });
+    }
+    return {
+        output: assistantOutput,
+        threadId,
+        outputPath: logPath,
+        note: missingAssistantTextNote,
+        toolCallCount,
+        toolCalls,
+        metrics,
+    };
+}

package/dist/executors/execute.d.ts ADDED Viewed

@@ -0,0 +1,16 @@
+import { type KarrotConfig, type KarrotScenarioSelection } from '../utils/config';
+import { type ScenarioRunResult } from '../reports/report';
+type ExecuteOptions = {
+    variables?: Record<string, unknown>;
+    scenario: KarrotScenarioSelection;
+};
+type ExecuteResult = {
+    outputDirectory: string;
+    results: ScenarioRunResult[];
+    reportPaths?: {
+        jsonPath: string;
+        htmlPath: string;
+    };
+};
+export declare function execute(configOrPath: KarrotConfig | string, options: ExecuteOptions): Promise<ExecuteResult>;
+export {};

package/dist/executors/execute.js ADDED Viewed

@@ -0,0 +1,145 @@
+"use strict";
+var __importDefault = (this && this.__importDefault) || function (mod) {
+    return (mod && mod.__esModule) ? mod : { "default": mod };
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.execute = execute;
+const node_path_1 = __importDefault(require("node:path"));
+const artifact_files_1 = require("../utils/artifact-files");
+const ag_ui_post_1 = require("./adapters/ag-ui-post");
+const ag_ui_1 = require("./adapters/ag-ui");
+const config_1 = require("../utils/config");
+const executor_1 = require("./executor");
+const report_1 = require("../reports/report");
+const scenario_loader_1 = require("../scenarios/scenario-loader");
+function resolveConfiguredPath(baseDirectory, value) {
+    const trimmed = value?.trim();
+    if (!trimmed) {
+        return undefined;
+    }
+    return node_path_1.default.isAbsolute(trimmed) ? trimmed : node_path_1.default.resolve(baseDirectory, trimmed);
+}
+function createAgUiRunner(config) {
+    const transport = config.transport;
+    if (transport.type !== 'ag-ui-wss') {
+        throw new Error('createAgUiRunner requires ag-ui-wss transport config.');
+    }
+    return async ({ message, outputDirectory, threadId, processTimeoutMs }) => await (0, ag_ui_1.runAgUiMessage)({
+        message,
+        env: {
+            ...process.env,
+            ...transport.env,
+        },
+        outputDirectory,
+        threadId,
+        processTimeoutMs: processTimeoutMs ?? transport.processTimeoutMs,
+        allowIdleTimeoutWithAssistantText: transport.allowIdleTimeoutWithAssistantText,
+    });
+}
+function createAgUiPostRunner(config) {
+    const transport = config.transport;
+    if (transport.type !== 'ag-ui-post') {
+        throw new Error('createAgUiPostRunner requires ag-ui-post transport config.');
+    }
+    return async ({ message, outputDirectory, threadId, processTimeoutMs }) => await (0, ag_ui_post_1.runAgUiPostMessage)({
+        message,
+        outputDirectory,
+        threadId,
+        processTimeoutMs: processTimeoutMs ?? transport.processTimeoutMs,
+        injectMessage: transport.injectMessage,
+        injectRunMetadata: transport.injectRunMetadata,
+        run: transport.run ?? transport.request,
+        connect: transport.connect,
+        observe: transport.observe,
+        completionCheck: transport.completionCheck,
+    });
+}
+function isAgUiWssConfig(config) {
+    return config.transport.type === 'ag-ui-wss';
+}
+function isAgUiPostConfig(config) {
+    return config.transport.type === 'ag-ui-post';
+}
+function normalizeConfig(config) {
+    if (config.version !== 1) {
+        throw new Error(`Unsupported karrot config version "${String(config.version)}".`);
+    }
+    if (!isAgUiWssConfig(config) && !isAgUiPostConfig(config)) {
+        throw new Error(`Unsupported transport type "${config.transport.type}".`);
+    }
+    if (!config.context.projectId || typeof config.context.projectId !== 'string') {
+        throw new Error('karrot config requires context.projectId.');
+    }
+    return config;
+}
+function normalizeScenarioSelection(selection) {
+    if (!selection.file?.trim()) {
+        throw new Error('execute requires scenario.file.');
+    }
+    return selection;
+}
+async function execute(configOrPath, options) {
+    const configBaseDirectory = typeof configOrPath === 'string'
+        ? node_path_1.default.dirname(node_path_1.default.isAbsolute(configOrPath) ? configOrPath : node_path_1.default.resolve(process.cwd(), configOrPath))
+        : process.cwd();
+    const loadedConfig = typeof configOrPath === 'string'
+        ? await (0, config_1.loadConfig)(configOrPath)
+        : configOrPath;
+    const resolvedConfig = normalizeConfig(options.variables ? (0, config_1.resolveVariables)(loadedConfig, options.variables) : loadedConfig);
+    const scenarioSelection = normalizeScenarioSelection(options.variables ? (0, config_1.resolveVariables)(options.scenario, options.variables) : options.scenario);
+    const outputDirectory = await (0, artifact_files_1.createRunArtifactDirectory)(resolvedConfig.artifacts?.directory);
+    const scenarioModule = await (0, scenario_loader_1.loadScenarioModule)({
+        scenarioFile: scenarioSelection.file,
+        defaultRelativePath: scenarioSelection.file,
+    });
+    const context = {
+        ...scenarioModule.buildScenarioContext(resolvedConfig.context.projectId),
+        ...resolvedConfig.context,
+    };
+    const selectedScenarios = scenarioModule.scenarioSet.select(scenarioSelection.ids);
+    if (selectedScenarios.length === 0) {
+        throw new Error('No matching scenarios were selected.');
+    }
+    const results = await (0, executor_1.runScenario)(selectedScenarios, {
+        context,
+        env: {
+            ...process.env,
+            ...(resolvedConfig.evaluation?.systemPromptPath
+                ? {
+                    AI_TURN_EVAL_SYSTEM_PROMPT_PATH: resolveConfiguredPath(configBaseDirectory, resolvedConfig.evaluation.systemPromptPath),
+                }
+                : {}),
+            ...(resolvedConfig.evaluation?.promptDirectory
+                ? {
+                    AI_TURN_EVAL_PROMPT_DIRECTORY: resolveConfiguredPath(configBaseDirectory, resolvedConfig.evaluation.promptDirectory),
+                }
+                : {}),
+            ...(isAgUiWssConfig(resolvedConfig) ? resolvedConfig.transport.env : {}),
+        },
+        outputDirectory,
+        messageRunner: isAgUiWssConfig(resolvedConfig)
+            ? createAgUiRunner(resolvedConfig)
+            : createAgUiPostRunner(resolvedConfig),
+        stopOnFailure: resolvedConfig.execution?.stopOnFailure ?? false,
+    });
+    let reportPaths;
+    if (resolvedConfig.report?.enabled !== false && resolvedConfig.report) {
+        reportPaths = await (0, report_1.writeScenarioRunReport)({
+            outputDirectory,
+            runtime: resolvedConfig.report.runtime,
+            environment: resolvedConfig.report.environment,
+            projectName: resolvedConfig.report.projectName,
+            scenarioContext: {
+                ...context,
+                ...resolvedConfig.report.scenarioContext,
+                scenarioFilePath: scenarioModule.scenarioFilePath,
+            },
+            results,
+        });
+    }
+    return {
+        outputDirectory,
+        results,
+        reportPaths,
+    };
+}

package/dist/executors/executor.d.ts ADDED Viewed

@@ -0,0 +1,37 @@
+import { type AiScenario, type AiTurn, type BaseAiScenarioContext } from '../scenarios/scenario';
+import { type ScenarioRunResult } from '../reports/report';
+import { type MessageRunResult } from './run-result';
+type BaseScenarioExecutionOptions = {
+    env: NodeJS.ProcessEnv;
+    outputDirectory: string;
+    stopOnFailure?: boolean;
+    maxDurationMs?: number;
+    initialThreadId?: string;
+};
+export type ScenarioMessageRunner<TContext extends BaseAiScenarioContext> = (options: {
+    scenario: AiScenario<TContext>;
+    turn: AiTurn<TContext>;
+    context: TContext;
+    message: string;
+    env: NodeJS.ProcessEnv;
+    outputDirectory: string;
+    threadId?: string;
+    processTimeoutMs?: number;
+}) => Promise<MessageRunResult>;
+export declare function runScenario(scenario: AiScenario<BaseAiScenarioContext>, options: BaseScenarioExecutionOptions & {
+    context: BaseAiScenarioContext;
+    messageRunner?: ScenarioMessageRunner<BaseAiScenarioContext>;
+}): Promise<ScenarioRunResult>;
+export declare function runScenario(scenario: AiScenario<BaseAiScenarioContext>[], options: BaseScenarioExecutionOptions & {
+    context: BaseAiScenarioContext;
+    messageRunner?: ScenarioMessageRunner<BaseAiScenarioContext>;
+}): Promise<ScenarioRunResult[]>;
+export declare function runScenario<TContext extends BaseAiScenarioContext>(scenario: AiScenario<TContext>, options: BaseScenarioExecutionOptions & {
+    context: TContext;
+    messageRunner?: ScenarioMessageRunner<TContext>;
+}): Promise<ScenarioRunResult>;
+export declare function runScenario<TContext extends BaseAiScenarioContext>(scenario: AiScenario<TContext>[], options: BaseScenarioExecutionOptions & {
+    context: TContext;
+    messageRunner?: ScenarioMessageRunner<TContext>;
+}): Promise<ScenarioRunResult[]>;
+export {};