npm - @orq-ai/evaluatorq - Versions diffs - 1.2.2 → 1.2.3-rc.1 - Mend

@orq-ai/evaluatorq 1.2.2 → 1.2.3-rc.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (93) hide show

package/dist/lib/integrations/simulation/utils/extract-json.d.ts ADDED Viewed

@@ -0,0 +1,17 @@
+/**
+ * JSON extraction utilities for parsing LLM responses.
+ */
+/**
+ * Extract JSON from LLM response, handling markdown code blocks.
+ *
+ * Robust extraction that handles:
+ * - ```json ... ``` blocks
+ * - ``` ... ``` blocks (no language specifier)
+ * - Plain JSON arrays or objects (no code block)
+ * - Multiple code blocks (returns first one)
+ *
+ * @param content - Raw LLM response content
+ * @returns Extracted JSON string, stripped of whitespace
+ */
+export declare function extractJsonFromResponse(content: string): string;
+//# sourceMappingURL=extract-json.d.ts.map

package/dist/lib/integrations/simulation/utils/extract-json.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"extract-json.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/utils/extract-json.ts"],"names":[],"mappings":"AAAA;;GAEG;AAQH;;;;;;;;;;;GAWG;AACH,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,MAAM,GAAG,MAAM,CAqB/D"}

package/dist/lib/integrations/simulation/utils/extract-json.js ADDED Viewed

@@ -0,0 +1,106 @@
+/**
+ * JSON extraction utilities for parsing LLM responses.
+ */
+/**
+ * Regex pattern for extracting JSON from markdown code blocks.
+ * Handles ```json ... ``` and ``` ... ``` blocks.
+ */
+const JSON_BLOCK_PATTERN = /```(?:json)?\s*\n?([\s\S]*?)\n?```/i;
+/**
+ * Extract JSON from LLM response, handling markdown code blocks.
+ *
+ * Robust extraction that handles:
+ * - ```json ... ``` blocks
+ * - ``` ... ``` blocks (no language specifier)
+ * - Plain JSON arrays or objects (no code block)
+ * - Multiple code blocks (returns first one)
+ *
+ * @param content - Raw LLM response content
+ * @returns Extracted JSON string, stripped of whitespace
+ */
+export function extractJsonFromResponse(content) {
+    if (!content) {
+        return "";
+    }
+    // Try to extract from code block using regex
+    const match = JSON_BLOCK_PATTERN.exec(content);
+    if (match?.[1]) {
+        return match[1].trim();
+    }
+    // No code block found — try to find the outermost JSON array or object
+    // by matching balanced brackets/braces
+    const arrayJson = extractBalanced(content, "[", "]");
+    if (arrayJson)
+        return arrayJson;
+    const objectJson = extractBalanced(content, "{", "}");
+    if (objectJson)
+        return objectJson;
+    // Fallback: return trimmed content as-is
+    return content.trim();
+}
+/**
+ * Find the outermost balanced pair of open/close characters in content.
+ * Respects JSON string literals to avoid counting brackets inside strings.
+ */
+function extractBalancedFrom(content, open, close, startIdx) {
+    let depth = 0;
+    let inString = false;
+    let escaped = false;
+    for (let i = startIdx; i < content.length; i++) {
+        const ch = content[i];
+        if (escaped) {
+            escaped = false;
+            continue;
+        }
+        if (ch === "\\") {
+            escaped = true;
+            continue;
+        }
+        if (ch === '"') {
+            inString = !inString;
+            continue;
+        }
+        if (inString)
+            continue;
+        if (ch === open) {
+            depth++;
+        }
+        else if (ch === close) {
+            depth--;
+            if (depth === 0) {
+                return content.slice(startIdx, i + 1);
+            }
+        }
+    }
+    return null;
+}
+/**
+ * Find the outermost balanced pair of open/close characters in content.
+ * Tries each candidate occurrence and returns the first that parses as valid JSON.
+ * Falls back to the first balanced extraction if none parse.
+ */
+function extractBalanced(content, open, close) {
+    let firstMatch = null;
+    let searchFrom = 0;
+    while (searchFrom < content.length) {
+        const idx = content.indexOf(open, searchFrom);
+        if (idx === -1)
+            break;
+        const candidate = extractBalancedFrom(content, open, close, idx);
+        if (!candidate) {
+            searchFrom = idx + 1;
+            continue;
+        }
+        if (!firstMatch)
+            firstMatch = candidate;
+        try {
+            JSON.parse(candidate);
+            return candidate; // Valid JSON — use it
+        }
+        catch {
+            // Not valid JSON, try next occurrence
+        }
+        searchFrom = idx + 1;
+    }
+    return firstMatch;
+}

package/dist/lib/integrations/simulation/utils/prompt-builders.d.ts ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * Prompt building utilities for personas, scenarios, and datapoints.
+ *
+ * These are the TypeScript equivalents of:
+ * - Python Persona.to_system_prompt()
+ * - Python Scenario.to_user_context()
+ * - Python Datapoint.generate() / Datapoint._build_system_prompt()
+ */
+import type { Datapoint, Persona, Scenario } from "../types.js";
+/**
+ * Convert a persona to a system prompt for the user simulator.
+ *
+ * Mirrors Python `Persona.to_system_prompt()`.
+ */
+export declare function buildPersonaSystemPrompt(persona: Persona): string;
+/**
+ * Convert a scenario to context for the user simulator.
+ *
+ * Mirrors Python `Scenario.to_user_context()`.
+ */
+export declare function buildScenarioUserContext(scenario: Scenario): string;
+/**
+ * Build the combined system prompt from persona and scenario.
+ *
+ * Mirrors Python `Datapoint._build_system_prompt()`.
+ */
+export declare function buildDatapointSystemPrompt(persona: Persona, scenario: Scenario): string;
+/**
+ * Generate a datapoint from persona and scenario.
+ *
+ * Mirrors Python `Datapoint.generate()`.
+ */
+export declare function generateDatapoint(persona: Persona, scenario: Scenario, firstMessage?: string): Datapoint;
+//# sourceMappingURL=prompt-builders.d.ts.map

package/dist/lib/integrations/simulation/utils/prompt-builders.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"prompt-builders.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/utils/prompt-builders.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AAEH,OAAO,KAAK,EAAE,SAAS,EAAE,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAahE;;;;GAIG;AACH,wBAAgB,wBAAwB,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM,CAwDjE;AAcD;;;;GAIG;AACH,wBAAgB,wBAAwB,CAAC,QAAQ,EAAE,QAAQ,GAAG,MAAM,CA6CnE;AAMD;;;;GAIG;AACH,wBAAgB,0BAA0B,CACxC,OAAO,EAAE,OAAO,EAChB,QAAQ,EAAE,QAAQ,GACjB,MAAM,CAKR;AAED;;;;GAIG;AACH,wBAAgB,iBAAiB,CAC/B,OAAO,EAAE,OAAO,EAChB,QAAQ,EAAE,QAAQ,EAClB,YAAY,SAAK,GAChB,SAAS,CAQX"}

package/dist/lib/integrations/simulation/utils/prompt-builders.js ADDED Viewed

@@ -0,0 +1,147 @@
+/**
+ * Prompt building utilities for personas, scenarios, and datapoints.
+ *
+ * These are the TypeScript equivalents of:
+ * - Python Persona.to_system_prompt()
+ * - Python Scenario.to_user_context()
+ * - Python Datapoint.generate() / Datapoint._build_system_prompt()
+ */
+import { CULTURAL_CONTEXT_INSTRUCTIONS, EMOTIONAL_ARC_INSTRUCTIONS, INPUT_FORMAT_INSTRUCTIONS, STRATEGY_INSTRUCTIONS, } from "../types.js";
+import { delimit } from "./sanitize.js";
+// ---------------------------------------------------------------------------
+// Persona → system prompt
+// ---------------------------------------------------------------------------
+/**
+ * Convert a persona to a system prompt for the user simulator.
+ *
+ * Mirrors Python `Persona.to_system_prompt()`.
+ */
+export function buildPersonaSystemPrompt(persona) {
+    const patienceDesc = persona.patience < 0.3
+        ? "very impatient"
+        : persona.patience > 0.7
+            ? "patient"
+            : "moderately patient";
+    const assertiveDesc = persona.assertiveness > 0.7
+        ? "very assertive and direct"
+        : persona.assertiveness < 0.3
+            ? "passive"
+            : "balanced";
+    const politeDesc = persona.politeness < 0.3
+        ? "rude and curt"
+        : persona.politeness > 0.7
+            ? "very polite"
+            : "neutral in tone";
+    const techDesc = persona.technical_level < 0.3
+        ? "a complete novice"
+        : persona.technical_level > 0.7
+            ? "a technical expert"
+            : "somewhat technical";
+    let arcText = "";
+    const arcKey = persona.emotional_arc ?? "stable";
+    const arcInstruction = EMOTIONAL_ARC_INSTRUCTIONS[arcKey];
+    if (arcInstruction) {
+        arcText = `\n\nEmotional Arc: ${arcInstruction}`;
+    }
+    let culturalText = "";
+    const culturalKey = persona.cultural_context ?? "neutral";
+    const culturalInstruction = CULTURAL_CONTEXT_INSTRUCTIONS[culturalKey];
+    if (culturalInstruction) {
+        culturalText = `\n\nCultural Communication Style: ${culturalInstruction}`;
+    }
+    return `You are simulating a user with the following characteristics:
+Name: ${delimit(persona.name)}
+Patience: You are ${patienceDesc}
+Assertiveness: You are ${assertiveDesc}
+Politeness: You are ${politeDesc}
+Technical Level: You are ${techDesc}
+Communication Style: ${delimit(persona.communication_style)}
+Background: ${delimit(persona.background)}
+${arcText}${culturalText}
+Stay in character throughout the conversation. Your responses should reflect these traits consistently.
+Do not break character or acknowledge that you are a simulation.`;
+}
+// ---------------------------------------------------------------------------
+// Scenario → user context
+// ---------------------------------------------------------------------------
+const EMOTION_INSTRUCTIONS = {
+    neutral: "You approach this conversation calmly.",
+    frustrated: "You are frustrated and may express irritation.",
+    confused: "You are confused and may ask for clarification.",
+    happy: "You are in a good mood and friendly.",
+    urgent: "This is urgent and you need a quick resolution.",
+};
+/**
+ * Convert a scenario to context for the user simulator.
+ *
+ * Mirrors Python `Scenario.to_user_context()`.
+ */
+export function buildScenarioUserContext(scenario) {
+    let criteriaText = "";
+    if (scenario.criteria && scenario.criteria.length > 0) {
+        const mustHappen = scenario.criteria
+            .filter((c) => c.type === "must_happen")
+            .map((c) => delimit(c.description));
+        const mustNot = scenario.criteria
+            .filter((c) => c.type === "must_not_happen")
+            .map((c) => delimit(c.description));
+        if (mustHappen.length > 0) {
+            criteriaText += `\n\nYou expect the agent to: ${mustHappen.join(", ")}`;
+        }
+        if (mustNot.length > 0) {
+            criteriaText += `\n\nYou would be dissatisfied if: ${mustNot.join(", ")}`;
+        }
+    }
+    let strategyText = "";
+    const strategyKey = scenario.conversation_strategy ?? "cooperative";
+    const strategyInstruction = STRATEGY_INSTRUCTIONS[strategyKey];
+    if (strategyInstruction) {
+        strategyText = `\n\nConversation Strategy: ${strategyInstruction}`;
+    }
+    let formatText = "";
+    const formatKey = scenario.input_format ?? "plain_text";
+    const formatInstruction = INPUT_FORMAT_INSTRUCTIONS[formatKey];
+    if (formatInstruction) {
+        formatText = `\n\nMessage Format: ${formatInstruction}`;
+    }
+    const emotionText = EMOTION_INSTRUCTIONS[scenario.starting_emotion ?? "neutral"] ?? "";
+    return `Scenario: ${delimit(scenario.name)}
+Your Goal: ${delimit(scenario.goal)}
+Context: ${delimit(scenario.context ?? "")}
+Emotional State: ${emotionText}
+${criteriaText}${strategyText}${formatText}
+Work towards your goal naturally. React authentically based on how the agent responds.`;
+}
+// ---------------------------------------------------------------------------
+// Datapoint helpers
+// ---------------------------------------------------------------------------
+/**
+ * Build the combined system prompt from persona and scenario.
+ *
+ * Mirrors Python `Datapoint._build_system_prompt()`.
+ */
+export function buildDatapointSystemPrompt(persona, scenario) {
+    const personaPrompt = buildPersonaSystemPrompt(persona);
+    const scenarioContext = buildScenarioUserContext(scenario);
+    return `${personaPrompt}\n\n---\n\n${scenarioContext}`;
+}
+/**
+ * Generate a datapoint from persona and scenario.
+ *
+ * Mirrors Python `Datapoint.generate()`.
+ */
+export function generateDatapoint(persona, scenario, firstMessage = "") {
+    return {
+        id: `dp_${crypto.randomUUID().replace(/-/g, "").slice(0, 12)}`,
+        persona,
+        scenario,
+        user_system_prompt: buildDatapointSystemPrompt(persona, scenario),
+        first_message: firstMessage,
+    };
+}

package/dist/lib/integrations/simulation/utils/sanitize.d.ts ADDED Viewed

@@ -0,0 +1,15 @@
+/**
+ * Input sanitization utilities for prompt injection prevention.
+ */
+/**
+ * Wrap user-controlled text in delimiters to prevent prompt injection.
+ *
+ * Uses XML-like data tags to clearly separate user content from
+ * system instructions in LLM prompts. The closing tag in the input
+ * is escaped to prevent breakout.
+ *
+ * @param text - User-controlled text to wrap
+ * @returns Delimited text safe for prompt interpolation
+ */
+export declare function delimit(text: string): string;
+//# sourceMappingURL=sanitize.d.ts.map

package/dist/lib/integrations/simulation/utils/sanitize.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"sanitize.d.ts","sourceRoot":"","sources":["../../../../../src/lib/integrations/simulation/utils/sanitize.ts"],"names":[],"mappings":"AAAA;;GAEG;AAEH;;;;;;;;;GASG;AACH,wBAAgB,OAAO,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAM5C"}

package/dist/lib/integrations/simulation/utils/sanitize.js ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * Input sanitization utilities for prompt injection prevention.
+ */
+/**
+ * Wrap user-controlled text in delimiters to prevent prompt injection.
+ *
+ * Uses XML-like data tags to clearly separate user content from
+ * system instructions in LLM prompts. The closing tag in the input
+ * is escaped to prevent breakout.
+ *
+ * @param text - User-controlled text to wrap
+ * @returns Delimited text safe for prompt interpolation
+ */
+export function delimit(text) {
+    const sanitized = text
+        .replace(/&/g, "&amp;") // must be first
+        .replace(/<data>/gi, "&lt;data&gt;")
+        .replace(/<\/data>/gi, "&lt;/data&gt;");
+    return `<data>${sanitized}</data>`;
+}

package/dist/lib/integrations/simulation/wrap-agent.d.ts ADDED Viewed

@@ -0,0 +1,65 @@
+/**
+ * Wraps the simulation framework as an evaluatorq Job.
+ *
+ * Follows the same pattern as wrapAISdkAgent() and wrapLangChainAgent().
+ */
+import type { Job } from "../../types.js";
+import type { ChatMessage } from "./types.js";
+/**
+ * Options for creating a simulation job.
+ */
+export interface SimulationJobOptions {
+    /** Display name for this job in results. Defaults to "simulation". */
+    name?: string;
+    /** Target agent callback — receives messages and returns a response string. */
+    targetCallback?: (messages: ChatMessage[]) => string | Promise<string>;
+    /** Orq deployment key — used if targetCallback is not provided. */
+    agentKey?: string;
+    /** Maximum conversation turns per simulation. Defaults to 10. */
+    maxTurns?: number;
+    /** Model used for user simulator and judge agents. Defaults to "azure/gpt-4o-mini". */
+    model?: string;
+    /** Built-in evaluator names to apply to results. Defaults to ["goal_achieved", "criteria_met"]. */
+    evaluators?: string[];
+}
+/**
+ * Creates an evaluatorq Job that runs agent simulations.
+ *
+ * Each DataPoint should have inputs containing simulation data:
+ * - `persona` (Persona object) and `scenario` (Scenario object), or
+ * - `datapoint` (full Datapoint object), or
+ * - `personas` (Persona[]) and `scenarios` (Scenario[]) for batch generation
+ *
+ * The job:
+ * 1. Extracts persona/scenario/datapoint from data.inputs
+ * 2. Runs simulate() with the target agent
+ * 3. Converts the first result to OpenResponses format
+ * 4. Returns { name, output: ResponseResource }
+ *
+ * @example
+ * ```typescript
+ * import { wrapSimulationAgent } from "@orq-ai/evaluatorq/simulation";
+ *
+ * const job = wrapSimulationAgent({
+ *   targetCallback: async (messages) => {
+ *     // Your agent logic here
+ *     return "Agent response";
+ *   },
+ *   maxTurns: 5,
+ * });
+ *
+ * await evaluatorq("simulation-eval", {
+ *   data: [
+ *     {
+ *       inputs: {
+ *         persona: { name: "Impatient User", patience: 0.2, ... },
+ *         scenario: { name: "Refund Request", goal: "Get a refund", ... },
+ *       },
+ *     },
+ *   ],
+ *   jobs: [job],
+ * });
+ * ```
+ */
+export declare function wrapSimulationAgent(options: SimulationJobOptions): Job;
+//# sourceMappingURL=wrap-agent.d.ts.map

package/dist/lib/integrations/simulation/wrap-agent.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"wrap-agent.d.ts","sourceRoot":"","sources":["../../../../src/lib/integrations/simulation/wrap-agent.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,OAAO,KAAK,EAAa,GAAG,EAAU,MAAM,gBAAgB,CAAC;AAI7D,OAAO,KAAK,EACV,WAAW,EAKZ,MAAM,YAAY,CAAC;AAEpB;;GAEG;AACH,MAAM,WAAW,oBAAoB;IACnC,sEAAsE;IACtE,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,+EAA+E;IAC/E,cAAc,CAAC,EAAE,CAAC,QAAQ,EAAE,WAAW,EAAE,KAAK,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAAC;IACvE,mEAAmE;IACnE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,iEAAiE;IACjE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,uFAAuF;IACvF,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,mGAAmG;IACnG,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;CACvB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AACH,wBAAgB,mBAAmB,CAAC,OAAO,EAAE,oBAAoB,GAAG,GAAG,CAkGtE"}

package/dist/lib/integrations/simulation/wrap-agent.js ADDED Viewed

@@ -0,0 +1,140 @@
+/**
+ * Wraps the simulation framework as an evaluatorq Job.
+ *
+ * Follows the same pattern as wrapAISdkAgent() and wrapLangChainAgent().
+ */
+import { fromOrqDeployment } from "./adapters.js";
+import { toOpenResponses } from "./convert.js";
+import { simulate } from "./simulation/index.js";
+/**
+ * Creates an evaluatorq Job that runs agent simulations.
+ *
+ * Each DataPoint should have inputs containing simulation data:
+ * - `persona` (Persona object) and `scenario` (Scenario object), or
+ * - `datapoint` (full Datapoint object), or
+ * - `personas` (Persona[]) and `scenarios` (Scenario[]) for batch generation
+ *
+ * The job:
+ * 1. Extracts persona/scenario/datapoint from data.inputs
+ * 2. Runs simulate() with the target agent
+ * 3. Converts the first result to OpenResponses format
+ * 4. Returns { name, output: ResponseResource }
+ *
+ * @example
+ * ```typescript
+ * import { wrapSimulationAgent } from "@orq-ai/evaluatorq/simulation";
+ *
+ * const job = wrapSimulationAgent({
+ *   targetCallback: async (messages) => {
+ *     // Your agent logic here
+ *     return "Agent response";
+ *   },
+ *   maxTurns: 5,
+ * });
+ *
+ * await evaluatorq("simulation-eval", {
+ *   data: [
+ *     {
+ *       inputs: {
+ *         persona: { name: "Impatient User", patience: 0.2, ... },
+ *         scenario: { name: "Refund Request", goal: "Get a refund", ... },
+ *       },
+ *     },
+ *   ],
+ *   jobs: [job],
+ * });
+ * ```
+ */
+export function wrapSimulationAgent(options) {
+    const { name = "simulation", targetCallback, agentKey, maxTurns = 10, model, evaluators, } = options;
+    return async (data, _row) => {
+        // Resolve the target callback
+        let resolvedCallback = targetCallback;
+        if (!resolvedCallback && agentKey) {
+            resolvedCallback = fromOrqDeployment(agentKey);
+        }
+        if (!resolvedCallback) {
+            throw new Error("wrapSimulationAgent requires either targetCallback or agentKey");
+        }
+        // Extract simulation inputs from DataPoint
+        const inputs = data.inputs;
+        let datapoints;
+        let personas;
+        let scenarios;
+        if (inputs.datapoint) {
+            const dp = inputs.datapoint;
+            validateShape(dp, "datapoint", ["persona", "scenario", "first_message"]);
+            datapoints = [dp];
+        }
+        else if (inputs.datapoints) {
+            const dps = inputs.datapoints;
+            if (!Array.isArray(dps)) {
+                throw new Error("Expected 'datapoints' to be an array");
+            }
+            for (const dp of dps) {
+                validateShape(dp, "datapoints[]", [
+                    "persona",
+                    "scenario",
+                    "first_message",
+                ]);
+            }
+            datapoints = dps;
+        }
+        else if (inputs.persona && inputs.scenario) {
+            validateShape(inputs.persona, "persona", ["name"]);
+            validateShape(inputs.scenario, "scenario", ["name", "goal"]);
+            personas = [inputs.persona];
+            scenarios = [inputs.scenario];
+        }
+        else if (inputs.personas && inputs.scenarios) {
+            if (!Array.isArray(inputs.personas) || !Array.isArray(inputs.scenarios)) {
+                throw new Error("Expected 'personas' and 'scenarios' to be arrays");
+            }
+            for (const p of inputs.personas)
+                validateShape(p, "personas[]", ["name"]);
+            for (const s of inputs.scenarios)
+                validateShape(s, "scenarios[]", ["name", "goal"]);
+            personas = inputs.personas;
+            scenarios = inputs.scenarios;
+        }
+        else {
+            throw new Error("Expected data.inputs to contain 'persona' + 'scenario', 'datapoint', 'datapoints', or 'personas' + 'scenarios'");
+        }
+        // Run simulation
+        const results = await simulate({
+            evaluationName: name,
+            targetCallback: resolvedCallback,
+            datapoints,
+            personas,
+            scenarios,
+            maxTurns,
+            model,
+            evaluators,
+        });
+        // Convert first result to OpenResponses format
+        const result = results[0];
+        if (!result) {
+            throw new Error("Simulation produced no results");
+        }
+        if (results.length > 1) {
+            console.warn(`wrapSimulationAgent: ${results.length} simulations ran but only the first result is returned. ` +
+                "Use simulate() directly to collect all results.");
+        }
+        const openResponsesOutput = toOpenResponses(result, model);
+        return {
+            name,
+            output: openResponsesOutput,
+        };
+    };
+}
+/** Lightweight runtime check that an object has the expected keys. */
+function validateShape(value, label, requiredKeys) {
+    if (typeof value !== "object" || value === null) {
+        throw new Error(`Expected '${label}' to be an object, got ${typeof value}`);
+    }
+    for (const key of requiredKeys) {
+        if (!(key in value)) {
+            throw new Error(`Invalid '${label}': missing required field '${key}'`);
+        }
+    }
+}

package/dist/lib/send-results.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"send-results.d.ts","sourceRoot":"","sources":["../../src/lib/send-results.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAEhC,OAAO,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAGtE,MAAM,WAAW,wBAAwB;IACvC,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE;QACL,KAAK,EACD,MAAM,GACN,OAAO,GACP,MAAM,GACN;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;SAAE,CAAC;QACrD,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,eAAe,CAAC,EAAE,wBAAwB,EAAE,CAAC;CAC9C;AAED,MAAM,WAAW,yBAAyB;IACxC,SAAS,EAAE,SAAS,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,mBAAmB,EAAE,CAAC;CACpC;AAGD,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,yBAAyB,EAAE,CAAC;CACtC;~~AAWD~~,eAAO,MAAM,sBAAsB,GACjC,QAAQ,MAAM,EACd,gBAAgB,MAAM,EACtB,uBAAuB,MAAM,GAAG,SAAS,EACzC,WAAW,MAAM,GAAG,SAAS,EAC7B,SAAS,gBAAgB,EACzB,WAAW,IAAI,EACf,SAAS,IAAI,EACb,MAAM,MAAM,GAAG,SAAS,KACvB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,~~CAoG~~/B,CAAC"}
1	+ {"version":3,"file":"send-results.d.ts","sourceRoot":"","sources":["../../src/lib/send-results.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAEhC,OAAO,KAAK,EAAE,SAAS,EAAE,gBAAgB,EAAE,MAAM,EAAE,MAAM,YAAY,CAAC;AAGtE,MAAM,WAAW,wBAAwB;IACvC,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE;QACL,KAAK,EACD,MAAM,GACN,OAAO,GACP,MAAM,GACN;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAA;SAAE,CAAC;QACrD,WAAW,CAAC,EAAE,MAAM,CAAC;KACtB,CAAC;IACF,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,mBAAmB;IAClC,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,eAAe,CAAC,EAAE,wBAAwB,EAAE,CAAC;CAC9C;AAED,MAAM,WAAW,yBAAyB;IACxC,SAAS,EAAE,SAAS,CAAC;IACrB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,UAAU,CAAC,EAAE,mBAAmB,EAAE,CAAC;CACpC;AAGD,MAAM,WAAW,kBAAkB;IACjC,KAAK,EAAE,MAAM,CAAC;IACd,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,UAAU,EAAE,MAAM,CAAC;IACnB,QAAQ,EAAE,MAAM,CAAC;IACjB,mBAAmB,EAAE,MAAM,CAAC;IAC5B,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,OAAO,EAAE,yBAAyB,EAAE,CAAC;CACtC;AAmCD,eAAO,MAAM,sBAAsB,GACjC,QAAQ,MAAM,EACd,gBAAgB,MAAM,EACtB,uBAAuB,MAAM,GAAG,SAAS,EACzC,WAAW,MAAM,GAAG,SAAS,EAC7B,SAAS,gBAAgB,EACzB,WAAW,IAAI,EACf,SAAS,IAAI,EACb,MAAM,MAAM,GAAG,SAAS,KACvB,MAAM,CAAC,MAAM,CAAC,IAAI,EAAE,KAAK,EAAE,KAAK,CA2G/B,CAAC"}

package/dist/lib/send-results.js CHANGED Viewed

@@ -1,4 +1,16 @@
 import { Effect } from "effect";
+const isRecord = (value) => typeof value === "object" && value !== null && !Array.isArray(value);
+const isEvaluationResultCell = (value) => isRecord(value) && typeof value.type === "string" && isRecord(value.value);
+const serializeScoreValue = (value) => {
+    if (isRecord(value) && !isEvaluationResultCell(value)) {
+        return JSON.stringify(value);
+    }
+    return value;
+};
+const isResponseResource = (value) => isRecord(value) && value.object === "response";
+const serializeOutput = (output) => isRecord(output) && !isResponseResource(output)
+    ? JSON.stringify(output)
+    : output;
 export const sendResultsToOrqEffect = (apiKey, evaluationName, evaluationDescription, datasetId, results, startTime, endTime, path) => Effect.gen(function* (_) {
     // Convert Error objects to strings for JSON serialization
     const serializedResults = results.map((result) => ({
@@ -6,11 +18,14 @@ export const sendResultsToOrqEffect = (apiKey, evaluationName, evaluationDescrip
         error: result.error ? String(result.error) : undefined,
         jobResults: result.jobResults?.map((jobResult) => ({
             jobName: jobResult.jobName,
-            output: jobResult.output,
+            output: serializeOutput(jobResult.output),
             error: jobResult.error ? String(jobResult.error) : undefined,
             evaluatorScores: jobResult.evaluatorScores?.map((score) => ({
                 evaluatorName: score.evaluatorName,
-                score: score.score,
+                score: {
+                    ...score.score,
+                    value: serializeScoreValue(score.score.value),
+                },
                 error: score.error ? String(score.error) : undefined,
             })),
         })),

package/dist/lib/types.d.ts CHANGED Viewed

@@ -12,7 +12,7 @@ type EvaluationResult<T> = {
 };
 export interface EvaluatorScore {
     evaluatorName: string;
-    score: EvaluationResult<number | boolean | string | EvaluationResultCell>;
+    score: EvaluationResult<number | boolean | string | EvaluationResultCell | Record<string, unknown>>;
     error?: Error;
 }
 export interface JobResult {
@@ -84,6 +84,6 @@ export type ScorerParameter = {
     data: DataPoint;
     output: Output;
 };
-export type Scorer = (params: ScorerParameter) => Promise<EvaluationResult<string | number | boolean | EvaluationResultCell>>;
+export type Scorer = (params: ScorerParameter) => Promise<EvaluationResult<string | number | boolean | EvaluationResultCell | Record<string, unknown>>>;
 export {};
 //# sourceMappingURL=types.d.ts.map

package/dist/lib/types.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/lib/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uCAAuC,CAAC;AAE9E,MAAM,MAAM,MAAM,GACd,MAAM,GACN,MAAM,GACN,OAAO,GACP,gBAAgB,GAChB,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACvB,IAAI,CAAC;AAET,MAAM,MAAM,yBAAyB,GACjC,MAAM,GACN,MAAM,GACN,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC;AAEtE,MAAM,MAAM,oBAAoB,GAAG;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,yBAAyB,CAAC,CAAC;CAClD,CAAC;AAEF,KAAK,gBAAgB,CAAC,CAAC,IAAI;IACzB,KAAK,EAAE,CAAC,CAAC;IACT,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,OAAO,CAAC;CAChB,CAAC;AAEF,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE,gBAAgB,~~CAAC~~,MAAM,GAAG,OAAO,GAAG,MAAM,GAAG,oBAAoB,CAAC,CAAC;~~IAC1E~~,KAAK,CAAC,EAAE,KAAK,CAAC;CACf;AAED,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,eAAe,CAAC,EAAE,cAAc,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,SAAS,CAAC;IACrB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;CAC1B;AAED,MAAM,MAAM,gBAAgB,GAAG,eAAe,EAAE,CAAC;AAEjD;;;GAGG;AACH,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,MAAM,GAAG,GAAG,CAChB,IAAI,EAAE,SAAS,EACf,GAAG,EAAE,MAAM,KACR,OAAO,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC,CAAC;AAEH;;;;;;;;;;;GAWG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EACA;QACE,SAAS,EAAE,MAAM,CAAC;QAClB,eAAe,CAAC,EAAE,OAAO,CAAC;KAC3B,GACD,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,SAAS,CAAC,EAAE,CAAC;IACvC,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB,IAAI,EAAE,GAAG,EAAE,CAAC;IACZ,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,MAAM,SAAS,GAAG;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,IAAI,EAAE,SAAS,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,MAAM,MAAM,MAAM,GAAG,CACnB,MAAM,EAAE,eAAe,KACpB,OAAO,CACV,gBAAgB,~~CAAC~~,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,oBAAoB,CAAC,~~CACnE~~,CAAC"}
1	+ {"version":3,"file":"types.d.ts","sourceRoot":"","sources":["../../src/lib/types.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uCAAuC,CAAC;AAE9E,MAAM,MAAM,MAAM,GACd,MAAM,GACN,MAAM,GACN,OAAO,GACP,gBAAgB,GAChB,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,GACvB,IAAI,CAAC;AAET,MAAM,MAAM,yBAAyB,GACjC,MAAM,GACN,MAAM,GACN,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,GAAG,MAAM,CAAC,MAAM,EAAE,MAAM,GAAG,MAAM,CAAC,CAAC,CAAC;AAEtE,MAAM,MAAM,oBAAoB,GAAG;IACjC,IAAI,EAAE,MAAM,CAAC;IACb,KAAK,EAAE,MAAM,CAAC,MAAM,EAAE,yBAAyB,CAAC,CAAC;CAClD,CAAC;AAEF,KAAK,gBAAgB,CAAC,CAAC,IAAI;IACzB,KAAK,EAAE,CAAC,CAAC;IACT,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,OAAO,CAAC;CAChB,CAAC;AAEF,MAAM,WAAW,cAAc;IAC7B,aAAa,EAAE,MAAM,CAAC;IACtB,KAAK,EAAE,gBAAgB,CACrB,MAAM,GAAG,OAAO,GAAG,MAAM,GAAG,oBAAoB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAC3E,CAAC;IACF,KAAK,CAAC,EAAE,KAAK,CAAC;CACf;AAED,MAAM,WAAW,SAAS;IACxB,OAAO,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,eAAe,CAAC,EAAE,cAAc,EAAE,CAAC;CACpC;AAED,MAAM,WAAW,eAAe;IAC9B,SAAS,EAAE,SAAS,CAAC;IACrB,KAAK,CAAC,EAAE,KAAK,CAAC;IACd,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;CAC1B;AAED,MAAM,MAAM,gBAAgB,GAAG,eAAe,EAAE,CAAC;AAEjD;;;GAGG;AACH,MAAM,WAAW,SAAS;IACxB,MAAM,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IAChC,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,MAAM,GAAG,GAAG,CAChB,IAAI,EAAE,SAAS,EACf,GAAG,EAAE,MAAM,KACR,OAAO,CAAC;IACX,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC,CAAC;AAEH;;;;;;;;;;;GAWG;AACH,MAAM,WAAW,eAAe;IAC9B,IAAI,EACA;QACE,SAAS,EAAE,MAAM,CAAC;QAClB,eAAe,CAAC,EAAE,OAAO,CAAC;KAC3B,GACD,CAAC,OAAO,CAAC,SAAS,CAAC,GAAG,SAAS,CAAC,EAAE,CAAC;IACvC,UAAU,CAAC,EAAE,SAAS,EAAE,CAAC;IACzB,IAAI,EAAE,GAAG,EAAE,CAAC;IACZ,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,KAAK,CAAC,EAAE,OAAO,CAAC;IAChB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,MAAM,MAAM,SAAS,GAAG;IACtB,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,MAAM,MAAM,eAAe,GAAG;IAC5B,IAAI,EAAE,SAAS,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;CAChB,CAAC;AAEF,MAAM,MAAM,MAAM,GAAG,CACnB,MAAM,EAAE,eAAe,KACpB,OAAO,CACV,gBAAgB,CACd,MAAM,GAAG,MAAM,GAAG,OAAO,GAAG,oBAAoB,GAAG,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAC3E,CACF,CAAC"}