npm - @superblocksteam/vite-plugin-file-sync - Versions diffs - 2.0.67 → 2.0.68-next.0 - Mend

@superblocksteam/vite-plugin-file-sync 2.0.67 → 2.0.68-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (116) hide show

package/dist/ai-service/judge/judge-service.d.ts ADDED Viewed

@@ -0,0 +1,161 @@
+/**
+ * Judge service for evaluating AI-generated applications.
+ *
+ * Encapsulates the complete judge evaluation workflow including:
+ * - LLM client access for executing evaluations
+ * - Storage for persisting evaluation results
+ * - MCP server lifecycle management
+ * - Executor orchestration
+ */
+import type { JudgeEvaluation, EvaluationCriteria, JudgeConfig } from "./types.js";
+import type { Logger } from "../../util/logger.js";
+import type { LLMClient } from "../llm/client.js";
+import type { LLMProvider } from "../llm/types.js";
+import type { JudgeStorage, StoredEvaluation } from "./storage/index.js";
+import type { PlaywrightMcpServerManager } from "../mcp/playwright-server.js";
+import type { LLMProviderConfig } from "@superblocksteam/library-shared/types";
+/**
+ * Options for judge evaluation.
+ */
+export interface JudgeEvaluationOptions {
+    /** Application URL to evaluate */
+    appUrl: string;
+    /** JWT token for AI Gateway authentication */
+    jwt?: string;
+    /** LLM configuration (provider, thinking opts, disabled tools) */
+    llmConfig?: LLMProviderConfig;
+    /** Optional websocket endpoint to reuse an existing Playwright browser */
+    connectWsEndpoint?: string;
+    /** Raw storage state data to seed Playwright context */
+    storageStateData?: any;
+    /** Session storage entries to seed for the app origin */
+    sessionStorageData?: {
+        origin: string;
+        items: Array<{
+            name: string;
+            value: string;
+        }>;
+    };
+    /** Additional origins storage to seed (e.g., auth domain local/session storage) */
+    extraOrigins?: Array<{
+        origin: string;
+        localStorage: Array<{
+            name: string;
+            value: string;
+        }>;
+        sessionStorage?: Array<{
+            name: string;
+            value: string;
+        }>;
+    }>;
+    /** Optional evaluation criteria (auto-generated if not provided) */
+    criteria?: EvaluationCriteria;
+    /** Optional judge configuration overrides */
+    config?: Partial<JudgeConfig>;
+    /** Complexity level for model selection */
+    complexity?: "low" | "medium" | "high";
+    /** Optional path to Playwright storage state for authentication */
+    storageStatePath?: string;
+    /** Whether the Playwright MCP browser should run headless */
+    playwrightHeadless?: boolean;
+    /** Browser engine to use for Playwright MCP */
+    playwrightBrowser?: "chromium" | "firefox" | "webkit";
+    /** External Playwright MCP server to reuse instead of starting a new one */
+    playwrightServerUrl?: string;
+}
+/**
+ * Result of a judge evaluation with metadata.
+ */
+export interface JudgeEvaluationResult extends JudgeEvaluation {
+    /** Prompt ID that was evaluated */
+    promptId: string;
+    /** Application ID */
+    appId: string;
+}
+/**
+ * Judge service for orchestrating application evaluations.
+ *
+ * Provides a high-level API for running judge evaluations
+ * and managing their lifecycle.
+ */
+export declare class JudgeService {
+    private llmClient;
+    private llmProvider;
+    private storage;
+    private mcpServerManager;
+    private logger;
+    constructor(config: {
+        llmClient: LLMClient;
+        llmProvider: LLMProvider;
+        storage: JudgeStorage;
+        mcpServerManager: PlaywrightMcpServerManager;
+        logger: Logger;
+    });
+    /**
+     * Evaluates an application against a prompt.
+     *
+     * This is the main entry point for judge evaluations. It:
+     * 1. Ensures MCP server is running
+     * 2. Generates or uses provided criteria
+     * 3. Executes the evaluation via JudgeExecutor
+     * 4. Stores the result
+     * 5. Returns the evaluation
+     *
+     * @param promptId - Unique identifier for the prompt
+     * @param prompt - The prompt text that was used to generate the app
+     * @param appId - Application identifier
+     * @param options - Evaluation options
+     * @returns Complete evaluation result
+     * @throws Error if evaluation fails
+     */
+    evaluateApplication(promptId: string, prompt: string, appId: string, options: JudgeEvaluationOptions): Promise<JudgeEvaluationResult>;
+    /**
+     * Retrieves stored evaluations.
+     *
+     * @param promptId - Optional prompt ID to filter by
+     * @returns Array of stored evaluations
+     */
+    getEvaluations(promptId?: string): Promise<StoredEvaluation[]>;
+    /**
+     * Retrieves evaluations for a specific branch.
+     *
+     * @param branchName - Git branch name
+     * @returns Array of stored evaluations for the branch
+     */
+    getEvaluationsByBranch(branchName: string): Promise<StoredEvaluation[]>;
+    /**
+     * Ensures the MCP server is running.
+     *
+     * Starts the server if not already running.
+     *
+     * @param storageStatePath - Optional path to Playwright storage state for authentication
+     * @returns MCP server URL
+     */
+    private ensureMcpServer;
+    /**
+     * Gets the current git branch name.
+     *
+     * @returns Branch name or 'unknown' if git is not available
+     */
+    private getCurrentBranch;
+    /**
+     * Gets the current git commit SHA.
+     *
+     * @returns Commit SHA or 'unknown' if git is not available
+     */
+    private getCurrentCommitSha;
+    /**
+     * Selects the optimal model task based on complexity.
+     *
+     * @param complexity - Task complexity level
+     * @returns LLM task identifier for the provider
+     */
+    private getModelTaskForComplexity;
+    /**
+     * Cleans up resources used by the judge service.
+     *
+     * Stops the MCP server if running.
+     */
+    dispose(): Promise<void>;
+}
+//# sourceMappingURL=judge-service.d.ts.map

package/dist/ai-service/judge/judge-service.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"judge-service.d.ts","sourceRoot":"","sources":["../../../src/ai-service/judge/judge-service.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAIH,OAAO,KAAK,EACV,eAAe,EACf,kBAAkB,EAClB,WAAW,EACZ,MAAM,YAAY,CAAC;AACpB,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAC;AACnD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,kBAAkB,CAAC;AAClD,OAAO,KAAK,EAAE,WAAW,EAAW,MAAM,iBAAiB,CAAC;AAC5D,OAAO,KAAK,EAAE,YAAY,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAC;AACzE,OAAO,KAAK,EAAE,0BAA0B,EAAE,MAAM,6BAA6B,CAAC;AAC9E,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,uCAAuC,CAAC;AAE/E;;GAEG;AACH,MAAM,WAAW,sBAAsB;IACrC,kCAAkC;IAClC,MAAM,EAAE,MAAM,CAAC;IAEf,8CAA8C;IAC9C,GAAG,CAAC,EAAE,MAAM,CAAC;IAEb,kEAAkE;IAClE,SAAS,CAAC,EAAE,iBAAiB,CAAC;IAE9B,0EAA0E;IAC1E,iBAAiB,CAAC,EAAE,MAAM,CAAC;IAE3B,wDAAwD;IACxD,gBAAgB,CAAC,EAAE,GAAG,CAAC;IAEvB,yDAAyD;IACzD,kBAAkB,CAAC,EAAE;QACnB,MAAM,EAAE,MAAM,CAAC;QACf,KAAK,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;KAC/C,CAAC;IAEF,mFAAmF;IACnF,YAAY,CAAC,EAAE,KAAK,CAAC;QACnB,MAAM,EAAE,MAAM,CAAC;QACf,YAAY,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;QACrD,cAAc,CAAC,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;KACzD,CAAC,CAAC;IAEH,oEAAoE;IACpE,QAAQ,CAAC,EAAE,kBAAkB,CAAC;IAE9B,6CAA6C;IAC7C,MAAM,CAAC,EAAE,OAAO,CAAC,WAAW,CAAC,CAAC;IAE9B,2CAA2C;IAC3C,UAAU,CAAC,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;IAEvC,mEAAmE;IACnE,gBAAgB,CAAC,EAAE,MAAM,CAAC;IAE1B,6DAA6D;IAC7D,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAE7B,+CAA+C;IAC/C,iBAAiB,CAAC,EAAE,UAAU,GAAG,SAAS,GAAG,QAAQ,CAAC;IAEtD,4EAA4E;IAC5E,mBAAmB,CAAC,EAAE,MAAM,CAAC;CAC9B;AAED;;GAEG;AACH,MAAM,WAAW,qBAAsB,SAAQ,eAAe;IAC5D,mCAAmC;IACnC,QAAQ,EAAE,MAAM,CAAC;IAEjB,qBAAqB;IACrB,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;GAKG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,SAAS,CAAY;IAC7B,OAAO,CAAC,WAAW,CAAc;IACjC,OAAO,CAAC,OAAO,CAAe;IAC9B,OAAO,CAAC,gBAAgB,CAA6B;IACrD,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,EAAE;QAClB,SAAS,EAAE,SAAS,CAAC;QACrB,WAAW,EAAE,WAAW,CAAC;QACzB,OAAO,EAAE,YAAY,CAAC;QACtB,gBAAgB,EAAE,0BAA0B,CAAC;QAC7C,MAAM,EAAE,MAAM,CAAC;KAChB;IAQD;;;;;;;;;;;;;;;;OAgBG;IACG,mBAAmB,CACvB,QAAQ,EAAE,MAAM,EAChB,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,sBAAsB,GAC9B,OAAO,CAAC,qBAAqB,CAAC;IA0GjC;;;;;OAKG;IACG,cAAc,CAAC,QAAQ,CAAC,EAAE,MAAM,GAAG,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAOpE;;;;;OAKG;IACG,sBAAsB,CAC1B,UAAU,EAAE,MAAM,GACjB,OAAO,CAAC,gBAAgB,EAAE,CAAC;IAI9B;;;;;;;OAOG;YACW,eAAe;IA8C7B;;;;OAIG;YACW,gBAAgB;IAa9B;;;;OAIG;YACW,mBAAmB;IAajC;;;;;OAKG;IACH,OAAO,CAAC,yBAAyB;IAkBjC;;;;OAIG;IACG,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAO/B"}

package/dist/ai-service/judge/judge-service.js ADDED Viewed

@@ -0,0 +1,241 @@
+/**
+ * Judge service for evaluating AI-generated applications.
+ *
+ * Encapsulates the complete judge evaluation workflow including:
+ * - LLM client access for executing evaluations
+ * - Storage for persisting evaluation results
+ * - MCP server lifecycle management
+ * - Executor orchestration
+ */
+import { JudgeExecutor } from "./judge-executor.js";
+import { buildCriteriaFromPrompts } from "./prompts/system-prompt.js";
+/**
+ * Judge service for orchestrating application evaluations.
+ *
+ * Provides a high-level API for running judge evaluations
+ * and managing their lifecycle.
+ */
+export class JudgeService {
+    llmClient;
+    llmProvider;
+    storage;
+    mcpServerManager;
+    logger;
+    constructor(config) {
+        this.llmClient = config.llmClient;
+        this.llmProvider = config.llmProvider;
+        this.storage = config.storage;
+        this.mcpServerManager = config.mcpServerManager;
+        this.logger = config.logger;
+    }
+    /**
+     * Evaluates an application against a prompt.
+     *
+     * This is the main entry point for judge evaluations. It:
+     * 1. Ensures MCP server is running
+     * 2. Generates or uses provided criteria
+     * 3. Executes the evaluation via JudgeExecutor
+     * 4. Stores the result
+     * 5. Returns the evaluation
+     *
+     * @param promptId - Unique identifier for the prompt
+     * @param prompt - The prompt text that was used to generate the app
+     * @param appId - Application identifier
+     * @param options - Evaluation options
+     * @returns Complete evaluation result
+     * @throws Error if evaluation fails
+     */
+    async evaluateApplication(promptId, prompt, appId, options) {
+        this.logger.info(`Starting judge evaluation: promptId=${promptId}, appId=${appId}, appUrl=${options.appUrl}`);
+        try {
+            const _useExternalServer = !!options.playwrightServerUrl;
+            let mcpUrl = options.playwrightServerUrl;
+            if (!mcpUrl) {
+                mcpUrl = await this.ensureMcpServer(options.storageStatePath, options.playwrightBrowser, options.playwrightHeadless, options.jwt, options.connectWsEndpoint, options.storageStateData, options.appUrl);
+            }
+            // Generate criteria if not provided
+            const criteria = options.criteria ||
+                buildCriteriaFromPrompts([prompt], options.complexity || "medium");
+            // Select model based on complexity - create LanguageModel instance from provider
+            const model = this.llmProvider.modelForTask("generateBroadEdit");
+            // Build judge config
+            const judgeConfig = {
+                ...options.config,
+                playwrightMcpUrl: mcpUrl,
+                jwt: options.jwt,
+            };
+            // Create executor
+            const executor = new JudgeExecutor(this.llmClient, model, this.logger, judgeConfig);
+            // Initialize executor
+            await executor.initialize();
+            try {
+                // Execute evaluation
+                // Note: JudgeExecutor expects a simulation result, but we'll adapt it
+                const simulationResult = {
+                    promptId,
+                    success: true, // Assume success since we're evaluating the final result
+                    stepResults: [{ prompt }],
+                    tokens: { input: 0, output: 0, total: 0 },
+                    duration: 0,
+                };
+                const evaluation = await executor.evaluateSimulation(simulationResult, criteria, options.appUrl);
+                // Get current git metadata for tracking
+                const branchName = await this.getCurrentBranch();
+                const commitSha = await this.getCurrentCommitSha();
+                // Store evaluation
+                const storedEvaluation = {
+                    timestamp: new Date().toISOString(),
+                    promptId,
+                    branchName,
+                    commitSha,
+                    appId,
+                    prompt,
+                    evaluation,
+                };
+                await this.storage.saveEvaluation(storedEvaluation);
+                this.logger.info(`Judge evaluation completed and stored: promptId=${promptId}, appId=${appId}, score=${evaluation.overallScore}, passed=${evaluation.passed}`);
+                // Return result with metadata
+                return {
+                    ...evaluation,
+                    promptId,
+                    appId,
+                };
+            }
+            finally {
+                // Always cleanup executor
+                await executor.cleanup();
+            }
+        }
+        catch (error) {
+            this.logger.error(`Judge evaluation failed: ${String(error)}`, {
+                error: {
+                    kind: "JudgeEvaluationError",
+                    message: String(error),
+                    stack: error instanceof Error ? error.stack : undefined,
+                },
+            });
+            throw error;
+        }
+    }
+    /**
+     * Retrieves stored evaluations.
+     *
+     * @param promptId - Optional prompt ID to filter by
+     * @returns Array of stored evaluations
+     */
+    async getEvaluations(promptId) {
+        if (promptId) {
+            return this.storage.getEvaluationsByPrompt(promptId);
+        }
+        return this.storage.getEvaluations();
+    }
+    /**
+     * Retrieves evaluations for a specific branch.
+     *
+     * @param branchName - Git branch name
+     * @returns Array of stored evaluations for the branch
+     */
+    async getEvaluationsByBranch(branchName) {
+        return this.storage.getEvaluationsByBranch(branchName);
+    }
+    /**
+     * Ensures the MCP server is running.
+     *
+     * Starts the server if not already running.
+     *
+     * @param storageStatePath - Optional path to Playwright storage state for authentication
+     * @returns MCP server URL
+     */
+    async ensureMcpServer(storageStatePath, browser = "chromium", headless = true, jwt, connectWsEndpoint, storageStateData, appUrl, sessionStorageData, extraOrigins) {
+        if (!this.mcpServerManager.isRunning()) {
+            this.logger.info(`Starting Playwright MCP server for judge`, storageStatePath
+                ? `with storage state: ${storageStatePath}`
+                : "without storage state");
+            return await this.mcpServerManager.start({
+                browser,
+                headless,
+                storageStatePath,
+                jwt,
+                connectWsEndpoint,
+                storageStateData,
+                appUrl,
+                sessionStorageData,
+                extraOrigins,
+            });
+        }
+        const url = this.mcpServerManager.getUrl();
+        if (!url) {
+            throw new Error("MCP server is running but URL is not available");
+        }
+        return url;
+    }
+    /**
+     * Gets the current git branch name.
+     *
+     * @returns Branch name or 'unknown' if git is not available
+     */
+    async getCurrentBranch() {
+        try {
+            const { execSync } = await import("child_process");
+            const branch = execSync("git rev-parse --abbrev-ref HEAD", {
+                encoding: "utf-8",
+                cwd: process.cwd(),
+            }).trim();
+            return branch;
+        }
+        catch {
+            return "unknown";
+        }
+    }
+    /**
+     * Gets the current git commit SHA.
+     *
+     * @returns Commit SHA or 'unknown' if git is not available
+     */
+    async getCurrentCommitSha() {
+        try {
+            const { execSync } = await import("child_process");
+            const sha = execSync("git rev-parse HEAD", {
+                encoding: "utf-8",
+                cwd: process.cwd(),
+            }).trim();
+            return sha;
+        }
+        catch {
+            return "unknown";
+        }
+    }
+    /**
+     * Selects the optimal model task based on complexity.
+     *
+     * @param complexity - Task complexity level
+     * @returns LLM task identifier for the provider
+     */
+    getModelTaskForComplexity(complexity) {
+        switch (complexity) {
+            case "low":
+                // Fast evaluation for simple tasks - use explain task (lighter model)
+                return "explain";
+            case "medium":
+                // Balanced evaluation - use broad edit task
+                return "generateBroadEdit";
+            case "high":
+                // Thorough evaluation for complex tasks - use full app gen task
+                return "generateFullAppGen";
+            default:
+                return "generateBroadEdit";
+        }
+    }
+    /**
+     * Cleans up resources used by the judge service.
+     *
+     * Stops the MCP server if running.
+     */
+    async dispose() {
+        this.logger.info("Disposing judge service");
+        if (this.mcpServerManager.isRunning()) {
+            await this.mcpServerManager.stop();
+        }
+    }
+}
+//# sourceMappingURL=judge-service.js.map

package/dist/ai-service/judge/judge-service.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"judge-service.js","sourceRoot":"","sources":["../../../src/ai-service/judge/judge-service.ts"],"names":[],"mappings":"AAAA;;;;;;;;GAQG;AAEH,OAAO,EAAE,aAAa,EAAE,MAAM,qBAAqB,CAAC;AACpD,OAAO,EAAE,wBAAwB,EAAE,MAAM,4BAA4B,CAAC;AA8EtE;;;;;GAKG;AACH,MAAM,OAAO,YAAY;IACf,SAAS,CAAY;IACrB,WAAW,CAAc;IACzB,OAAO,CAAe;IACtB,gBAAgB,CAA6B;IAC7C,MAAM,CAAS;IAEvB,YAAY,MAMX;QACC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;QAClC,IAAI,CAAC,WAAW,GAAG,MAAM,CAAC,WAAW,CAAC;QACtC,IAAI,CAAC,OAAO,GAAG,MAAM,CAAC,OAAO,CAAC;QAC9B,IAAI,CAAC,gBAAgB,GAAG,MAAM,CAAC,gBAAgB,CAAC;QAChD,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC;IAC9B,CAAC;IAED;;;;;;;;;;;;;;;;OAgBG;IACH,KAAK,CAAC,mBAAmB,CACvB,QAAgB,EAChB,MAAc,EACd,KAAa,EACb,OAA+B;QAE/B,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,uCAAuC,QAAQ,WAAW,KAAK,YAAY,OAAO,CAAC,MAAM,EAAE,CAC5F,CAAC;QAEF,IAAI,CAAC;YACH,MAAM,kBAAkB,GAAG,CAAC,CAAC,OAAO,CAAC,mBAAmB,CAAC;YACzD,IAAI,MAAM,GAAG,OAAO,CAAC,mBAAmB,CAAC;YACzC,IAAI,CAAC,MAAM,EAAE,CAAC;gBACZ,MAAM,GAAG,MAAM,IAAI,CAAC,eAAe,CACjC,OAAO,CAAC,gBAAgB,EACxB,OAAO,CAAC,iBAAiB,EACzB,OAAO,CAAC,kBAAkB,EAC1B,OAAO,CAAC,GAAG,EACX,OAAO,CAAC,iBAAiB,EACzB,OAAO,CAAC,gBAAgB,EACxB,OAAO,CAAC,MAAM,CACf,CAAC;YACJ,CAAC;YAED,oCAAoC;YACpC,MAAM,QAAQ,GACZ,OAAO,CAAC,QAAQ;gBAChB,wBAAwB,CAAC,CAAC,MAAM,CAAC,EAAE,OAAO,CAAC,UAAU,IAAI,QAAQ,CAAC,CAAC;YAErE,iFAAiF;YACjF,MAAM,KAAK,GAAG,IAAI,CAAC,WAAW,CAAC,YAAY,CAAC,mBAAmB,CAAC,CAAC;YAEjE,qBAAqB;YACrB,MAAM,WAAW,GAAyB;gBACxC,GAAG,OAAO,CAAC,MAAM;gBACjB,gBAAgB,EAAE,MAAM;gBACxB,GAAG,EAAE,OAAO,CAAC,GAAG;aACjB,CAAC;YAEF,kBAAkB;YAClB,MAAM,QAAQ,GAAG,IAAI,aAAa,CAChC,IAAI,CAAC,SAAS,EACd,KAAK,EACL,IAAI,CAAC,MAAM,EACX,WAAW,CACZ,CAAC;YAEF,sBAAsB;YACtB,MAAM,QAAQ,CAAC,UAAU,EAAE,CAAC;YAE5B,IAAI,CAAC;gBACH,qBAAqB;gBACrB,sEAAsE;gBACtE,MAAM,gBAAgB,GAAG;oBACvB,QAAQ;oBACR,OAAO,EAAE,IAAI,EAAE,yDAAyD;oBACxE,WAAW,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC;oBACzB,MAAM,EAAE,EAAE,KAAK,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC,EAAE,KAAK,EAAE,CAAC,EAAE;oBACzC,QAAQ,EAAE,CAAC;iBACZ,CAAC;gBAEF,MAAM,UAAU,GAAG,MAAM,QAAQ,CAAC,kBAAkB,CAClD,gBAAgB,EAChB,QAAQ,EACR,OAAO,CAAC,MAAM,CACf,CAAC;gBAEF,wCAAwC;gBACxC,MAAM,UAAU,GAAG,MAAM,IAAI,CAAC,gBAAgB,EAAE,CAAC;gBACjD,MAAM,SAAS,GAAG,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;gBAEnD,mBAAmB;gBACnB,MAAM,gBAAgB,GAAqB;oBACzC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;oBACnC,QAAQ;oBACR,UAAU;oBACV,SAAS;oBACT,KAAK;oBACL,MAAM;oBACN,UAAU;iBACX,CAAC;gBAEF,MAAM,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,gBAAgB,CAAC,CAAC;gBAEpD,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,mDAAmD,QAAQ,WAAW,KAAK,WAAW,UAAU,CAAC,YAAY,YAAY,UAAU,CAAC,MAAM,EAAE,CAC7I,CAAC;gBAEF,8BAA8B;gBAC9B,OAAO;oBACL,GAAG,UAAU;oBACb,QAAQ;oBACR,KAAK;iBACN,CAAC;YACJ,CAAC;oBAAS,CAAC;gBACT,0BAA0B;gBAC1B,MAAM,QAAQ,CAAC,OAAO,EAAE,CAAC;YAC3B,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,MAAM,CAAC,KAAK,CAAC,4BAA4B,MAAM,CAAC,KAAK,CAAC,EAAE,EAAE;gBAC7D,KAAK,EAAE;oBACL,IAAI,EAAE,sBAAsB;oBAC5B,OAAO,EAAE,MAAM,CAAC,KAAK,CAAC;oBACtB,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS;iBACxD;aACF,CAAC,CAAC;YACH,MAAM,KAAK,CAAC;QACd,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,cAAc,CAAC,QAAiB;QACpC,IAAI,QAAQ,EAAE,CAAC;YACb,OAAO,IAAI,CAAC,OAAO,CAAC,sBAAsB,CAAC,QAAQ,CAAC,CAAC;QACvD,CAAC;QACD,OAAO,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;IACvC,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,sBAAsB,CAC1B,UAAkB;QAElB,OAAO,IAAI,CAAC,OAAO,CAAC,sBAAsB,CAAC,UAAU,CAAC,CAAC;IACzD,CAAC;IAED;;;;;;;OAOG;IACK,KAAK,CAAC,eAAe,CAC3B,gBAAyB,EACzB,UAA6C,UAAU,EACvD,WAAoB,IAAI,EACxB,GAAY,EACZ,iBAA0B,EAC1B,gBAAsB,EACtB,MAAe,EACf,kBAGC,EACD,YAIE;QAEF,IAAI,CAAC,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,EAAE,CAAC;YACvC,IAAI,CAAC,MAAM,CAAC,IAAI,CACd,0CAA0C,EAC1C,gBAAgB;gBACd,CAAC,CAAC,uBAAuB,gBAAgB,EAAE;gBAC3C,CAAC,CAAC,uBAAuB,CAC5B,CAAC;YACF,OAAO,MAAM,IAAI,CAAC,gBAAgB,CAAC,KAAK,CAAC;gBACvC,OAAO;gBACP,QAAQ;gBACR,gBAAgB;gBAChB,GAAG;gBACH,iBAAiB;gBACjB,gBAAgB;gBAChB,MAAM;gBACN,kBAAkB;gBAClB,YAAY;aACb,CAAC,CAAC;QACL,CAAC;QAED,MAAM,GAAG,GAAG,IAAI,CAAC,gBAAgB,CAAC,MAAM,EAAE,CAAC;QAC3C,IAAI,CAAC,GAAG,EAAE,CAAC;YACT,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAC;QACpE,CAAC;QAED,OAAO,GAAG,CAAC;IACb,CAAC;IAED;;;;OAIG;IACK,KAAK,CAAC,gBAAgB;QAC5B,IAAI,CAAC;YACH,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;YACnD,MAAM,MAAM,GAAG,QAAQ,CAAC,iCAAiC,EAAE;gBACzD,QAAQ,EAAE,OAAO;gBACjB,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;aACnB,CAAC,CAAC,IAAI,EAAE,CAAC;YACV,OAAO,MAAM,CAAC;QAChB,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAED;;;;OAIG;IACK,KAAK,CAAC,mBAAmB;QAC/B,IAAI,CAAC;YACH,MAAM,EAAE,QAAQ,EAAE,GAAG,MAAM,MAAM,CAAC,eAAe,CAAC,CAAC;YACnD,MAAM,GAAG,GAAG,QAAQ,CAAC,oBAAoB,EAAE;gBACzC,QAAQ,EAAE,OAAO;gBACjB,GAAG,EAAE,OAAO,CAAC,GAAG,EAAE;aACnB,CAAC,CAAC,IAAI,EAAE,CAAC;YACV,OAAO,GAAG,CAAC;QACb,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,SAAS,CAAC;QACnB,CAAC;IACH,CAAC;IAED;;;;;OAKG;IACK,yBAAyB,CAC/B,UAAqC;QAErC,QAAQ,UAAU,EAAE,CAAC;YACnB,KAAK,KAAK;gBACR,sEAAsE;gBACtE,OAAO,SAAS,CAAC;YACnB,KAAK,QAAQ;gBACX,4CAA4C;gBAC5C,OAAO,mBAAmB,CAAC;YAC7B,KAAK,MAAM;gBACT,gEAAgE;gBAChE,OAAO,oBAAoB,CAAC;YAC9B;gBACE,OAAO,mBAAmB,CAAC;QAC/B,CAAC;IACH,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,OAAO;QACX,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,yBAAyB,CAAC,CAAC;QAE5C,IAAI,IAAI,CAAC,gBAAgB,CAAC,SAAS,EAAE,EAAE,CAAC;YACtC,MAAM,IAAI,CAAC,gBAAgB,CAAC,IAAI,EAAE,CAAC;QACrC,CAAC;IACH,CAAC;CACF"}

package/dist/ai-service/judge/prompts/evaluation-criteria.d.ts ADDED Viewed

@@ -0,0 +1,37 @@
+/**
+ * Evaluation criteria builder for judge assessments.
+ *
+ * Provides utilities for creating structured evaluation criteria
+ * based on simulation prompts and complexity levels.
+ */
+import type { EvaluationCriteria } from "../types.js";
+/**
+ * Simulation prompt interface matching the test infrastructure.
+ */
+interface SimulationPrompt {
+    id: string;
+    name: string;
+    description: string;
+    prompts: string[];
+    complexity: "low" | "medium" | "high";
+}
+/**
+ * Builds evaluation criteria from a simulation prompt.
+ *
+ * Analyzes the prompt structure and content to generate
+ * appropriate evaluation criteria for the judge.
+ *
+ * @param prompt - Simulation prompt object
+ * @returns Structured evaluation criteria
+ */
+export declare function buildCriteriaFromPrompt(prompt: SimulationPrompt): EvaluationCriteria;
+/**
+ * Creates minimal criteria for basic testing.
+ *
+ * Used when specific criteria cannot be extracted.
+ *
+ * @returns Minimal evaluation criteria
+ */
+export declare function createMinimalCriteria(): EvaluationCriteria;
+export {};
+//# sourceMappingURL=evaluation-criteria.d.ts.map

package/dist/ai-service/judge/prompts/evaluation-criteria.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"evaluation-criteria.d.ts","sourceRoot":"","sources":["../../../../src/ai-service/judge/prompts/evaluation-criteria.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AAEH,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAEtD;;GAEG;AACH,UAAU,gBAAgB;IACxB,EAAE,EAAE,MAAM,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,WAAW,EAAE,MAAM,CAAC;IACpB,OAAO,EAAE,MAAM,EAAE,CAAC;IAClB,UAAU,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;CACvC;AAED;;;;;;;;GAQG;AACH,wBAAgB,uBAAuB,CACrC,MAAM,EAAE,gBAAgB,GACvB,kBAAkB,CAwCpB;AAkTD;;;;;;GAMG;AACH,wBAAgB,qBAAqB,IAAI,kBAAkB,CAS1D"}