npm - @salesforce/plugin-agent - Versions diffs - 1.30.11 → 1.31.0 - Mend

@salesforce/plugin-agent 1.30.11 → 1.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/README.md +19 -19
package/lib/commands/agent/test/run-eval.d.ts +33 -0
package/lib/commands/agent/test/run-eval.js +221 -0
package/lib/commands/agent/test/run-eval.js.map +1 -0
package/lib/evalFormatter.d.ts +30 -0
package/lib/evalFormatter.js +263 -0
package/lib/evalFormatter.js.map +1 -0
package/lib/evalNormalizer.d.ts +57 -0
package/lib/evalNormalizer.js +421 -0
package/lib/evalNormalizer.js.map +1 -0
package/lib/yamlSpecTranslator.d.ts +20 -0
package/lib/yamlSpecTranslator.js +217 -0
package/lib/yamlSpecTranslator.js.map +1 -0
package/messages/agent.test.run-eval.md +91 -0
package/oclif.manifest.json +219 -91
package/package.json +5 -5
package/schemas/agent-test-run__eval.json +52 -0

package/lib/evalNormalizer.js ADDED Viewed

@@ -0,0 +1,421 @@
+/*
+ * Copyright 2026, Salesforce, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+// --- Evaluator classification ---
+const SCORING_EVALUATORS = new Set([
+    'evaluator.text_alignment',
+    'evaluator.hallucination_detection',
+    'evaluator.citation_recall',
+    'evaluator.answer_faithfulness',
+]);
+const ASSERTION_EVALUATORS = new Set(['evaluator.string_assertion', 'evaluator.json_assertion']);
+const DEFAULT_METRIC_NAMES = {
+    'evaluator.text_alignment': 'base.cosine_similarity',
+    'evaluator.hallucination_detection': 'hallucination_detection',
+    'evaluator.citation_recall': 'citation_recall',
+    'evaluator.answer_faithfulness': 'answer_faithfulness',
+};
+const SCORING_VALID_FIELDS = new Set([
+    'type',
+    'id',
+    'generated_output',
+    'reference_answer',
+    'metric_name',
+    'threshold',
+]);
+const ASSERTION_VALID_FIELDS = new Set([
+    'type',
+    'id',
+    'actual',
+    'expected',
+    'operator',
+    'threshold',
+    'json_path',
+    'json_schema',
+    'metric_name',
+]);
+const VALID_AGENT_FIELDS = {
+    'agent.create_session': new Set(['type', 'id', 'agent_id', 'agent_version_id', 'use_agent_api', 'planner_id']),
+    'agent.send_message': new Set(['type', 'id', 'session_id', 'utterance']),
+    'agent.get_state': new Set(['type', 'id', 'session_id']),
+};
+// --- Auto-correction maps ---
+const AGENT_CORRECTIONS = {
+    agentId: 'agent_id',
+    agentVersionId: 'agent_version_id',
+    sessionId: 'session_id',
+    text: 'utterance',
+    message: 'utterance',
+    input: 'utterance',
+    prompt: 'utterance',
+    user_message: 'utterance',
+    userMessage: 'utterance',
+};
+const EVALUATOR_CORRECTIONS = {
+    subject: 'actual',
+    expectedValue: 'expected',
+    expected_value: 'expected',
+    actualValue: 'actual',
+    actual_value: 'actual',
+    assertionType: 'operator',
+    assertion_type: 'operator',
+    comparator: 'operator',
+};
+// --- camelCase alias maps for agent.create_session ---
+const AGENT_FIELD_ALIASES = {
+    useAgentApi: 'use_agent_api',
+    plannerId: 'planner_id',
+    plannerDefinitionId: 'planner_id',
+    planner_definition_id: 'planner_id',
+    planner_version_id: 'planner_id',
+    plannerVersionId: 'planner_id',
+};
+// --- Scoring evaluator field aliases ---
+const SCORING_FIELD_ALIASES = {
+    actual: 'generated_output',
+    expected: 'reference_answer',
+    actual_value: 'generated_output',
+    expected_value: 'reference_answer',
+    actual_output: 'generated_output',
+    expected_output: 'reference_answer',
+    response: 'generated_output',
+    ground_truth: 'reference_answer',
+};
+// --- Assertion evaluator field aliases ---
+const ASSERTION_FIELD_ALIASES = {
+    actual_value: 'actual',
+    expected_value: 'expected',
+    generated_output: 'actual',
+    reference_answer: 'expected',
+    actual_output: 'actual',
+    expected_output: 'expected',
+    response: 'actual',
+    ground_truth: 'expected',
+};
+// --- MCP shorthand field mapping ---
+// MCP uses `field: "gs1.planner_state.topic"` — map to Eval API `actual` with correct JSONPath
+const MCP_FIELD_MAP = {
+    'planner_state.topic': 'response.planner_response.lastExecution.topic',
+    'planner_state.invokedActions': 'response.planner_response.lastExecution.invokedActions',
+    'planner_state.actionsSequence': 'response.planner_response.lastExecution.invokedActions',
+    response: 'response',
+    'response.messages': 'response',
+};
+// --- Main entry point ---
+/**
+ * Apply all normalizations to a test payload.
+ * Passes run in order: mcp-shorthand -> auto-correct -> camelCase -> evaluator fields -> shorthand refs -> defaults -> strip.
+ */
+export function normalizePayload(payload) {
+    const normalized = {
+        tests: payload.tests.map((test) => {
+            let steps = [...test.steps];
+            steps = normalizeMcpShorthand(steps);
+            steps = autoCorrectFields(steps);
+            steps = normalizeCamelCase(steps);
+            steps = normalizeEvaluatorFields(steps);
+            steps = convertShorthandRefs(steps);
+            steps = injectDefaults(steps);
+            steps = stripUnrecognizedFields(steps);
+            return { ...test, steps };
+        }),
+    };
+    return normalized;
+}
+// --- Individual normalization passes ---
+/**
+ * Convert MCP shorthand format to raw Eval API format.
+ * MCP uses type="evaluator" + evaluator_type, raw API uses type="evaluator.xxx".
+ * Also maps `field` to `actual` with proper JSONPath and auto-generates missing `id` fields.
+ */
+export function normalizeMcpShorthand(steps) {
+    let evalCounter = 0;
+    return steps.map((step) => {
+        const evaluator_type = step.evaluator_type;
+        // Only applies to MCP shorthand: type="evaluator" with evaluator_type field
+        if (step.type !== 'evaluator' || !evaluator_type)
+            return step;
+        const normalized = { ...step };
+        // Merge type: "evaluator" + evaluator_type: "xxx" → type: "evaluator.xxx"
+        normalized.type = `evaluator.${evaluator_type}`;
+        delete normalized.evaluator_type;
+        // Convert `field` to `actual` with proper shorthand ref format
+        if ('field' in normalized) {
+            if (!('actual' in normalized)) {
+                const fieldValue = normalized.field;
+                // Parse "gs1.planner_state.topic" → stepId="gs1", fieldPath="planner_state.topic"
+                const dotIdx = fieldValue.indexOf('.');
+                if (dotIdx > 0) {
+                    const stepId = fieldValue.substring(0, dotIdx);
+                    const fieldPath = fieldValue.substring(dotIdx + 1);
+                    const mappedPath = MCP_FIELD_MAP[fieldPath] ?? fieldPath;
+                    normalized.actual = `{${stepId}.${mappedPath}}`;
+                }
+                else {
+                    normalized.actual = fieldValue;
+                }
+            }
+            delete normalized.field;
+        }
+        // Auto-generate id if missing
+        if (!normalized.id || normalized.id === '') {
+            normalized.id = `eval_${evalCounter}`;
+        }
+        evalCounter++;
+        return normalized;
+    });
+}
+/**
+ * Auto-correct common field name mistakes.
+ * Maps wrong field names to correct ones (agentId->agent_id, text->utterance, etc.)
+ */
+export function autoCorrectFields(steps) {
+    return steps.map((step) => {
+        const corrected = { ...step };
+        const stepType = corrected.type ?? '';
+        if (stepType.startsWith('agent.')) {
+            for (const [wrong, correct] of Object.entries(AGENT_CORRECTIONS)) {
+                if (wrong in corrected && !(correct in corrected)) {
+                    corrected[correct] = corrected[wrong];
+                    delete corrected[wrong];
+                }
+            }
+        }
+        else if (stepType.startsWith('evaluator.')) {
+            for (const [wrong, correct] of Object.entries(EVALUATOR_CORRECTIONS)) {
+                if (wrong in corrected && !(correct in corrected)) {
+                    corrected[correct] = corrected[wrong];
+                    delete corrected[wrong];
+                }
+            }
+        }
+        return corrected;
+    });
+}
+/**
+ * Normalize camelCase agent field names to snake_case.
+ * useAgentApi->use_agent_api, plannerDefinitionId->planner_id, etc.
+ */
+export function normalizeCamelCase(steps) {
+    return steps.map((step) => {
+        if (step.type !== 'agent.create_session')
+            return step;
+        const normalized = { ...step };
+        for (const [alias, canonical] of Object.entries(AGENT_FIELD_ALIASES)) {
+            if (alias in normalized) {
+                if (!(canonical in normalized)) {
+                    normalized[canonical] = normalized[alias];
+                }
+                delete normalized[alias];
+            }
+        }
+        return normalized;
+    });
+}
+/**
+ * Apply field aliases: remap alias keys to canonical keys, removing duplicates.
+ */
+function applyFieldAliases(step, aliases) {
+    for (const [alias, canonical] of Object.entries(aliases)) {
+        if (alias in step && !(canonical in step)) {
+            step[canonical] = step[alias];
+            delete step[alias];
+        }
+        else if (alias in step && canonical in step) {
+            delete step[alias];
+        }
+    }
+}
+/**
+ * Normalize a scoring evaluator step (field aliases + metric_name injection).
+ */
+function normalizeScoringEvaluator(normalized, evalType) {
+    applyFieldAliases(normalized, SCORING_FIELD_ALIASES);
+    // Auto-inject or correct metric_name
+    if (!('metric_name' in normalized)) {
+        const defaultMetric = DEFAULT_METRIC_NAMES[evalType];
+        if (defaultMetric) {
+            normalized.metric_name = defaultMetric;
+        }
+    }
+    else if (normalized.metric_name === evalType.split('.')[1]) {
+        const defaultMetric = DEFAULT_METRIC_NAMES[evalType];
+        if (defaultMetric) {
+            normalized.metric_name = defaultMetric;
+        }
+    }
+}
+/**
+ * Normalize an assertion evaluator step (field aliases + operator lowercase + metric_name).
+ */
+function normalizeAssertionEvaluator(normalized, evalType) {
+    applyFieldAliases(normalized, ASSERTION_FIELD_ALIASES);
+    // Auto-lowercase operator
+    if ('operator' in normalized && typeof normalized.operator === 'string') {
+        normalized.operator = normalized.operator.toLowerCase();
+    }
+    // Auto-inject metric_name for assertion evaluators
+    if (!('metric_name' in normalized)) {
+        normalized.metric_name = evalType.split('.')[1];
+    }
+}
+/**
+ * Normalize evaluator field names based on evaluator category.
+ * Maps actual/expected <-> generated_output/reference_answer.
+ * Also auto-lowercases operator values and auto-injects metric_name.
+ */
+export function normalizeEvaluatorFields(steps) {
+    return steps.map((step) => {
+        const evalType = step.type ?? '';
+        if (!evalType.startsWith('evaluator.'))
+            return step;
+        const normalized = { ...step };
+        if (SCORING_EVALUATORS.has(evalType)) {
+            normalizeScoringEvaluator(normalized, evalType);
+        }
+        else if (ASSERTION_EVALUATORS.has(evalType)) {
+            normalizeAssertionEvaluator(normalized, evalType);
+        }
+        // Don't inject metric_name for unknown evaluator types to avoid API validation errors
+        // Unknown evaluators like bot_response_rating and planner_topic_assertion don't use metric_name
+        return normalized;
+    });
+}
+/**
+ * Convert {step_id.field} shorthand references to JSONPath $.outputs[N].field.
+ * Builds step_id->index mapping from non-evaluator steps.
+ */
+export function convertShorthandRefs(steps) {
+    // Build step_id -> output-array index mapping
+    const stepIdToIdx = {};
+    let outputIdx = 0;
+    for (const step of steps) {
+        const sid = step.id;
+        const stype = step.type ?? '';
+        if (sid && !stype.startsWith('evaluator.')) {
+            stepIdToIdx[sid] = outputIdx;
+            outputIdx += 1;
+        }
+    }
+    const refPattern = /\{([^}]+)\}/g;
+    function replaceValue(value) {
+        if (typeof value !== 'string')
+            return value;
+        return value.replace(refPattern, (match, ref) => {
+            const dotIdx = ref.indexOf('.');
+            if (dotIdx < 0)
+                return match;
+            const sid = ref.substring(0, dotIdx);
+            let field = ref.substring(dotIdx + 1);
+            if (!(sid in stepIdToIdx))
+                return match;
+            const idx = stepIdToIdx[sid];
+            // Normalize legacy nested-response path to flat response
+            if (field.startsWith('response.messages')) {
+                field = 'response';
+            }
+            return `$.outputs[${idx}].${field}`;
+        });
+    }
+    return steps.map((step) => {
+        const newStep = {};
+        for (const [key, val] of Object.entries(step)) {
+            if (typeof val === 'string') {
+                newStep[key] = replaceValue(val);
+            }
+            else if (val !== null && typeof val === 'object' && !Array.isArray(val)) {
+                const newObj = {};
+                for (const [k, v] of Object.entries(val)) {
+                    newObj[k] = typeof v === 'string' ? replaceValue(v) : v;
+                }
+                newStep[key] = newObj;
+            }
+            else if (Array.isArray(val)) {
+                newStep[key] = val.map((item) => typeof item === 'string' ? replaceValue(item) : item);
+            }
+            else {
+                newStep[key] = val;
+            }
+        }
+        return newStep;
+    });
+}
+/**
+ * Inject default values:
+ * - use_agent_api=true on agent.create_session if neither use_agent_api nor planner_id present
+ */
+export function injectDefaults(steps) {
+    return steps.map((step) => {
+        if (step.type === 'agent.create_session') {
+            if (!('use_agent_api' in step) && !('planner_id' in step)) {
+                return { ...step, use_agent_api: true };
+            }
+        }
+        return step;
+    });
+}
+/**
+ * Strip unrecognized fields from steps based on type-specific whitelists.
+ */
+export function stripUnrecognizedFields(steps) {
+    return steps.map((step) => {
+        const stepType = step.type ?? '';
+        // Agent steps
+        if (stepType in VALID_AGENT_FIELDS) {
+            const validFields = VALID_AGENT_FIELDS[stepType];
+            const stripped = {};
+            for (const [key, val] of Object.entries(step)) {
+                if (validFields.has(key)) {
+                    stripped[key] = val;
+                }
+            }
+            return stripped;
+        }
+        // Scoring evaluators
+        if (SCORING_EVALUATORS.has(stepType)) {
+            const stripped = {};
+            for (const [key, val] of Object.entries(step)) {
+                if (SCORING_VALID_FIELDS.has(key)) {
+                    stripped[key] = val;
+                }
+            }
+            return stripped;
+        }
+        // Assertion evaluators
+        if (ASSERTION_EVALUATORS.has(stepType)) {
+            const stripped = {};
+            for (const [key, val] of Object.entries(step)) {
+                if (ASSERTION_VALID_FIELDS.has(key)) {
+                    stripped[key] = val;
+                }
+            }
+            return stripped;
+        }
+        // Unknown types: don't strip (to avoid breaking future evaluator types)
+        return step;
+    });
+}
+// --- Batch splitting ---
+/**
+ * Split tests array into chunks of batchSize.
+ */
+export function splitIntoBatches(tests, batchSize) {
+    const batches = [];
+    for (let i = 0; i < tests.length; i += batchSize) {
+        batches.push(tests.slice(i, i + batchSize));
+    }
+    return batches;
+}
+//# sourceMappingURL=evalNormalizer.js.map

package/lib/evalNormalizer.js.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"evalNormalizer.js","sourceRoot":"","sources":["../src/evalNormalizer.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;GAcG;AAqBH,mCAAmC;AAEnC,MAAM,kBAAkB,GAAG,IAAI,GAAG,CAAC;IACjC,0BAA0B;IAC1B,mCAAmC;IACnC,2BAA2B;IAC3B,+BAA+B;CAChC,CAAC,CAAC;AAEH,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC,CAAC,4BAA4B,EAAE,0BAA0B,CAAC,CAAC,CAAC;AAEjG,MAAM,oBAAoB,GAA2B;IACnD,0BAA0B,EAAE,wBAAwB;IACpD,mCAAmC,EAAE,yBAAyB;IAC9D,2BAA2B,EAAE,iBAAiB;IAC9C,+BAA+B,EAAE,qBAAqB;CACvD,CAAC;AAEF,MAAM,oBAAoB,GAAG,IAAI,GAAG,CAAC;IACnC,MAAM;IACN,IAAI;IACJ,kBAAkB;IAClB,kBAAkB;IAClB,aAAa;IACb,WAAW;CACZ,CAAC,CAAC;AAEH,MAAM,sBAAsB,GAAG,IAAI,GAAG,CAAC;IACrC,MAAM;IACN,IAAI;IACJ,QAAQ;IACR,UAAU;IACV,UAAU;IACV,WAAW;IACX,WAAW;IACX,aAAa;IACb,aAAa;CACd,CAAC,CAAC;AAEH,MAAM,kBAAkB,GAAgC;IACtD,sBAAsB,EAAE,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,kBAAkB,EAAE,eAAe,EAAE,YAAY,CAAC,CAAC;IAC9G,oBAAoB,EAAE,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,YAAY,EAAE,WAAW,CAAC,CAAC;IACxE,iBAAiB,EAAE,IAAI,GAAG,CAAC,CAAC,MAAM,EAAE,IAAI,EAAE,YAAY,CAAC,CAAC;CACzD,CAAC;AAEF,+BAA+B;AAE/B,MAAM,iBAAiB,GAA2B;IAChD,OAAO,EAAE,UAAU;IACnB,cAAc,EAAE,kBAAkB;IAClC,SAAS,EAAE,YAAY;IACvB,IAAI,EAAE,WAAW;IACjB,OAAO,EAAE,WAAW;IACpB,KAAK,EAAE,WAAW;IAClB,MAAM,EAAE,WAAW;IACnB,YAAY,EAAE,WAAW;IACzB,WAAW,EAAE,WAAW;CACzB,CAAC;AAEF,MAAM,qBAAqB,GAA2B;IACpD,OAAO,EAAE,QAAQ;IACjB,aAAa,EAAE,UAAU;IACzB,cAAc,EAAE,UAAU;IAC1B,WAAW,EAAE,QAAQ;IACrB,YAAY,EAAE,QAAQ;IACtB,aAAa,EAAE,UAAU;IACzB,cAAc,EAAE,UAAU;IAC1B,UAAU,EAAE,UAAU;CACvB,CAAC;AAEF,wDAAwD;AAExD,MAAM,mBAAmB,GAA2B;IAClD,WAAW,EAAE,eAAe;IAC5B,SAAS,EAAE,YAAY;IACvB,mBAAmB,EAAE,YAAY;IACjC,qBAAqB,EAAE,YAAY;IACnC,kBAAkB,EAAE,YAAY;IAChC,gBAAgB,EAAE,YAAY;CAC/B,CAAC;AAEF,0CAA0C;AAE1C,MAAM,qBAAqB,GAA2B;IACpD,MAAM,EAAE,kBAAkB;IAC1B,QAAQ,EAAE,kBAAkB;IAC5B,YAAY,EAAE,kBAAkB;IAChC,cAAc,EAAE,kBAAkB;IAClC,aAAa,EAAE,kBAAkB;IACjC,eAAe,EAAE,kBAAkB;IACnC,QAAQ,EAAE,kBAAkB;IAC5B,YAAY,EAAE,kBAAkB;CACjC,CAAC;AAEF,4CAA4C;AAE5C,MAAM,uBAAuB,GAA2B;IACtD,YAAY,EAAE,QAAQ;IACtB,cAAc,EAAE,UAAU;IAC1B,gBAAgB,EAAE,QAAQ;IAC1B,gBAAgB,EAAE,UAAU;IAC5B,aAAa,EAAE,QAAQ;IACvB,eAAe,EAAE,UAAU;IAC3B,QAAQ,EAAE,QAAQ;IAClB,YAAY,EAAE,UAAU;CACzB,CAAC;AAEF,sCAAsC;AAEtC,+FAA+F;AAC/F,MAAM,aAAa,GAA2B;IAC5C,qBAAqB,EAAE,+CAA+C;IACtE,8BAA8B,EAAE,wDAAwD;IACxF,+BAA+B,EAAE,wDAAwD;IACzF,QAAQ,EAAE,UAAU;IACpB,mBAAmB,EAAE,UAAU;CAChC,CAAC;AAEF,2BAA2B;AAE3B;;;GAGG;AACH,MAAM,UAAU,gBAAgB,CAAC,OAAoB;IACnD,MAAM,UAAU,GAAgB;QAC9B,KAAK,EAAE,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;YAChC,IAAI,KAAK,GAAG,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;YAC5B,KAAK,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;YACrC,KAAK,GAAG,iBAAiB,CAAC,KAAK,CAAC,CAAC;YACjC,KAAK,GAAG,kBAAkB,CAAC,KAAK,CAAC,CAAC;YAClC,KAAK,GAAG,wBAAwB,CAAC,KAAK,CAAC,CAAC;YACxC,KAAK,GAAG,oBAAoB,CAAC,KAAK,CAAC,CAAC;YACpC,KAAK,GAAG,cAAc,CAAC,KAAK,CAAC,CAAC;YAC9B,KAAK,GAAG,uBAAuB,CAAC,KAAK,CAAC,CAAC;YACvC,OAAO,EAAE,GAAG,IAAI,EAAE,KAAK,EAAE,CAAC;QAC5B,CAAC,CAAC;KACH,CAAC;IACF,OAAO,UAAU,CAAC;AACpB,CAAC;AAED,0CAA0C;AAE1C;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,KAAiB;IACrD,IAAI,WAAW,GAAG,CAAC,CAAC;IAEpB,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,MAAM,cAAc,GAAG,IAAI,CAAC,cAAoC,CAAC;QAEjE,4EAA4E;QAC5E,IAAI,IAAI,CAAC,IAAI,KAAK,WAAW,IAAI,CAAC,cAAc;YAAE,OAAO,IAAI,CAAC;QAE9D,MAAM,UAAU,GAAG,EAAE,GAAG,IAAI,EAAE,CAAC;QAE/B,0EAA0E;QAC1E,UAAU,CAAC,IAAI,GAAG,aAAa,cAAc,EAAE,CAAC;QAChD,OAAO,UAAU,CAAC,cAAc,CAAC;QAEjC,+DAA+D;QAC/D,IAAI,OAAO,IAAI,UAAU,EAAE,CAAC;YAC1B,IAAI,CAAC,CAAC,QAAQ,IAAI,UAAU,CAAC,EAAE,CAAC;gBAC9B,MAAM,UAAU,GAAG,UAAU,CAAC,KAAe,CAAC;gBAE9C,kFAAkF;gBAClF,MAAM,MAAM,GAAG,UAAU,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;gBACvC,IAAI,MAAM,GAAG,CAAC,EAAE,CAAC;oBACf,MAAM,MAAM,GAAG,UAAU,CAAC,SAAS,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;oBAC/C,MAAM,SAAS,GAAG,UAAU,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;oBACnD,MAAM,UAAU,GAAG,aAAa,CAAC,SAAS,CAAC,IAAI,SAAS,CAAC;oBACzD,UAAU,CAAC,MAAM,GAAG,IAAI,MAAM,IAAI,UAAU,GAAG,CAAC;gBAClD,CAAC;qBAAM,CAAC;oBACN,UAAU,CAAC,MAAM,GAAG,UAAU,CAAC;gBACjC,CAAC;YACH,CAAC;YACD,OAAO,UAAU,CAAC,KAAK,CAAC;QAC1B,CAAC;QAED,8BAA8B;QAC9B,IAAI,CAAC,UAAU,CAAC,EAAE,IAAI,UAAU,CAAC,EAAE,KAAK,EAAE,EAAE,CAAC;YAC3C,UAAU,CAAC,EAAE,GAAG,QAAQ,WAAW,EAAE,CAAC;QACxC,CAAC;QACD,WAAW,EAAE,CAAC;QAEd,OAAO,UAAsB,CAAC;IAChC,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,iBAAiB,CAAC,KAAiB;IACjD,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,MAAM,SAAS,GAAG,EAAE,GAAG,IAAI,EAAE,CAAC;QAC9B,MAAM,QAAQ,GAAG,SAAS,CAAC,IAAI,IAAI,EAAE,CAAC;QAEtC,IAAI,QAAQ,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;YAClC,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,iBAAiB,CAAC,EAAE,CAAC;gBACjE,IAAI,KAAK,IAAI,SAAS,IAAI,CAAC,CAAC,OAAO,IAAI,SAAS,CAAC,EAAE,CAAC;oBAClD,SAAS,CAAC,OAAO,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;oBACtC,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;gBAC1B,CAAC;YACH,CAAC;QACH,CAAC;aAAM,IAAI,QAAQ,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;YAC7C,KAAK,MAAM,CAAC,KAAK,EAAE,OAAO,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,qBAAqB,CAAC,EAAE,CAAC;gBACrE,IAAI,KAAK,IAAI,SAAS,IAAI,CAAC,CAAC,OAAO,IAAI,SAAS,CAAC,EAAE,CAAC;oBAClD,SAAS,CAAC,OAAO,CAAC,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC;oBACtC,OAAO,SAAS,CAAC,KAAK,CAAC,CAAC;gBAC1B,CAAC;YACH,CAAC;QACH,CAAC;QAED,OAAO,SAAqB,CAAC;IAC/B,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kBAAkB,CAAC,KAAiB;IAClD,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,IAAI,IAAI,CAAC,IAAI,KAAK,sBAAsB;YAAE,OAAO,IAAI,CAAC;QAEtD,MAAM,UAAU,GAAG,EAAE,GAAG,IAAI,EAAE,CAAC;QAC/B,KAAK,MAAM,CAAC,KAAK,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,mBAAmB,CAAC,EAAE,CAAC;YACrE,IAAI,KAAK,IAAI,UAAU,EAAE,CAAC;gBACxB,IAAI,CAAC,CAAC,SAAS,IAAI,UAAU,CAAC,EAAE,CAAC;oBAC/B,UAAU,CAAC,SAAS,CAAC,GAAG,UAAU,CAAC,KAAK,CAAC,CAAC;gBAC5C,CAAC;gBACD,OAAO,UAAU,CAAC,KAAK,CAAC,CAAC;YAC3B,CAAC;QACH,CAAC;QACD,OAAO,UAAsB,CAAC;IAChC,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,SAAS,iBAAiB,CAAC,IAAc,EAAE,OAA+B;IACxE,KAAK,MAAM,CAAC,KAAK,EAAE,SAAS,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC;QACzD,IAAI,KAAK,IAAI,IAAI,IAAI,CAAC,CAAC,SAAS,IAAI,IAAI,CAAC,EAAE,CAAC;YAC1C,IAAI,CAAC,SAAS,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC;YAC9B,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;aAAM,IAAI,KAAK,IAAI,IAAI,IAAI,SAAS,IAAI,IAAI,EAAE,CAAC;YAC9C,OAAO,IAAI,CAAC,KAAK,CAAC,CAAC;QACrB,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,yBAAyB,CAAC,UAAoB,EAAE,QAAgB;IACvE,iBAAiB,CAAC,UAAU,EAAE,qBAAqB,CAAC,CAAC;IAErD,qCAAqC;IACrC,IAAI,CAAC,CAAC,aAAa,IAAI,UAAU,CAAC,EAAE,CAAC;QACnC,MAAM,aAAa,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;QACrD,IAAI,aAAa,EAAE,CAAC;YAClB,UAAU,CAAC,WAAW,GAAG,aAAa,CAAC;QACzC,CAAC;IACH,CAAC;SAAM,IAAI,UAAU,CAAC,WAAW,KAAK,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7D,MAAM,aAAa,GAAG,oBAAoB,CAAC,QAAQ,CAAC,CAAC;QACrD,IAAI,aAAa,EAAE,CAAC;YAClB,UAAU,CAAC,WAAW,GAAG,aAAa,CAAC;QACzC,CAAC;IACH,CAAC;AACH,CAAC;AAED;;GAEG;AACH,SAAS,2BAA2B,CAAC,UAAoB,EAAE,QAAgB;IACzE,iBAAiB,CAAC,UAAU,EAAE,uBAAuB,CAAC,CAAC;IAEvD,0BAA0B;IAC1B,IAAI,UAAU,IAAI,UAAU,IAAI,OAAO,UAAU,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QACxE,UAAU,CAAC,QAAQ,GAAG,UAAU,CAAC,QAAQ,CAAC,WAAW,EAAE,CAAC;IAC1D,CAAC;IAED,mDAAmD;IACnD,IAAI,CAAC,CAAC,aAAa,IAAI,UAAU,CAAC,EAAE,CAAC;QACnC,UAAU,CAAC,WAAW,GAAG,QAAQ,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,CAAC;IAClD,CAAC;AACH,CAAC;AAED;;;;GAIG;AACH,MAAM,UAAU,wBAAwB,CAAC,KAAiB;IACxD,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QACjC,IAAI,CAAC,QAAQ,CAAC,UAAU,CAAC,YAAY,CAAC;YAAE,OAAO,IAAI,CAAC;QAEpD,MAAM,UAAU,GAAG,EAAE,GAAG,IAAI,EAAE,CAAC;QAE/B,IAAI,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACrC,yBAAyB,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QAClD,CAAC;aAAM,IAAI,oBAAoB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YAC9C,2BAA2B,CAAC,UAAU,EAAE,QAAQ,CAAC,CAAC;QACpD,CAAC;QACD,sFAAsF;QACtF,gGAAgG;QAEhG,OAAO,UAAsB,CAAC;IAChC,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,oBAAoB,CAAC,KAAiB;IACpD,8CAA8C;IAC9C,MAAM,WAAW,GAA2B,EAAE,CAAC;IAC/C,IAAI,SAAS,GAAG,CAAC,CAAC;IAClB,KAAK,MAAM,IAAI,IAAI,KAAK,EAAE,CAAC;QACzB,MAAM,GAAG,GAAG,IAAI,CAAC,EAAE,CAAC;QACpB,MAAM,KAAK,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QAC9B,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,UAAU,CAAC,YAAY,CAAC,EAAE,CAAC;YAC3C,WAAW,CAAC,GAAG,CAAC,GAAG,SAAS,CAAC;YAC7B,SAAS,IAAI,CAAC,CAAC;QACjB,CAAC;IACH,CAAC;IAED,MAAM,UAAU,GAAG,cAAc,CAAC;IAElC,SAAS,YAAY,CAAC,KAAc;QAClC,IAAI,OAAO,KAAK,KAAK,QAAQ;YAAE,OAAO,KAAK,CAAC;QAE5C,OAAO,KAAK,CAAC,OAAO,CAAC,UAAU,EAAE,CAAC,KAAK,EAAE,GAAW,EAAE,EAAE;YACtD,MAAM,MAAM,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,CAAC;YAChC,IAAI,MAAM,GAAG,CAAC;gBAAE,OAAO,KAAK,CAAC;YAE7B,MAAM,GAAG,GAAG,GAAG,CAAC,SAAS,CAAC,CAAC,EAAE,MAAM,CAAC,CAAC;YACrC,IAAI,KAAK,GAAG,GAAG,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;YAEtC,IAAI,CAAC,CAAC,GAAG,IAAI,WAAW,CAAC;gBAAE,OAAO,KAAK,CAAC;YAExC,MAAM,GAAG,GAAG,WAAW,CAAC,GAAG,CAAC,CAAC;YAE7B,yDAAyD;YACzD,IAAI,KAAK,CAAC,UAAU,CAAC,mBAAmB,CAAC,EAAE,CAAC;gBAC1C,KAAK,GAAG,UAAU,CAAC;YACrB,CAAC;YAED,OAAO,aAAa,GAAG,KAAK,KAAK,EAAE,CAAC;QACtC,CAAC,CAAC,CAAC;IACL,CAAC;IAED,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,MAAM,OAAO,GAA4B,EAAE,CAAC;QAC5C,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;YAC9C,IAAI,OAAO,GAAG,KAAK,QAAQ,EAAE,CAAC;gBAC5B,OAAO,CAAC,GAAG,CAAC,GAAG,YAAY,CAAC,GAAG,CAAC,CAAC;YACnC,CAAC;iBAAM,IAAI,GAAG,KAAK,IAAI,IAAI,OAAO,GAAG,KAAK,QAAQ,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC1E,MAAM,MAAM,GAA4B,EAAE,CAAC;gBAC3C,KAAK,MAAM,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,GAA8B,CAAC,EAAE,CAAC;oBACpE,MAAM,CAAC,CAAC,CAAC,GAAG,OAAO,CAAC,KAAK,QAAQ,CAAC,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC,CAAC;gBAC1D,CAAC;gBACD,OAAO,CAAC,GAAG,CAAC,GAAG,MAAM,CAAC;YACxB,CAAC;iBAAM,IAAI,KAAK,CAAC,OAAO,CAAC,GAAG,CAAC,EAAE,CAAC;gBAC9B,OAAO,CAAC,GAAG,CAAC,GAAI,GAAiB,CAAC,GAAG,CAAC,CAAC,IAAa,EAAE,EAAE,CACtD,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,YAAY,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,IAAI,CACrD,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,OAAO,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;YACrB,CAAC;QACH,CAAC;QACD,OAAO,OAAmB,CAAC;IAC7B,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAAC,KAAiB;IAC9C,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,IAAI,IAAI,CAAC,IAAI,KAAK,sBAAsB,EAAE,CAAC;YACzC,IAAI,CAAC,CAAC,eAAe,IAAI,IAAI,CAAC,IAAI,CAAC,CAAC,YAAY,IAAI,IAAI,CAAC,EAAE,CAAC;gBAC1D,OAAO,EAAE,GAAG,IAAI,EAAE,aAAa,EAAE,IAAI,EAAE,CAAC;YAC1C,CAAC;QACH,CAAC;QACD,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED;;GAEG;AACH,MAAM,UAAU,uBAAuB,CAAC,KAAiB;IACvD,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE;QACxB,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,IAAI,EAAE,CAAC;QAEjC,cAAc;QACd,IAAI,QAAQ,IAAI,kBAAkB,EAAE,CAAC;YACnC,MAAM,WAAW,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;YACjD,MAAM,QAAQ,GAA4B,EAAE,CAAC;YAC7C,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC9C,IAAI,WAAW,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;oBACzB,QAAQ,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;gBACtB,CAAC;YACH,CAAC;YACD,OAAO,QAAoB,CAAC;QAC9B,CAAC;QAED,qBAAqB;QACrB,IAAI,kBAAkB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACrC,MAAM,QAAQ,GAA4B,EAAE,CAAC;YAC7C,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC9C,IAAI,oBAAoB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;oBAClC,QAAQ,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;gBACtB,CAAC;YACH,CAAC;YACD,OAAO,QAAoB,CAAC;QAC9B,CAAC;QAED,uBAAuB;QACvB,IAAI,oBAAoB,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC;YACvC,MAAM,QAAQ,GAA4B,EAAE,CAAC;YAC7C,KAAK,MAAM,CAAC,GAAG,EAAE,GAAG,CAAC,IAAI,MAAM,CAAC,OAAO,CAAC,IAAI,CAAC,EAAE,CAAC;gBAC9C,IAAI,sBAAsB,CAAC,GAAG,CAAC,GAAG,CAAC,EAAE,CAAC;oBACpC,QAAQ,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;gBACtB,CAAC;YACH,CAAC;YACD,OAAO,QAAoB,CAAC;QAC9B,CAAC;QAED,wEAAwE;QACxE,OAAO,IAAI,CAAC;IACd,CAAC,CAAC,CAAC;AACL,CAAC;AAED,0BAA0B;AAE1B;;GAEG;AACH,MAAM,UAAU,gBAAgB,CAAC,KAAiB,EAAE,SAAiB;IACnE,MAAM,OAAO,GAAiB,EAAE,CAAC;IACjC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,SAAS,EAAE,CAAC;QACjD,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC;IAC9C,CAAC;IACD,OAAO,OAAO,CAAC;AACjB,CAAC"}

package/lib/yamlSpecTranslator.d.ts ADDED Viewed

@@ -0,0 +1,20 @@
+import type { TestSpec, TestCase } from '@salesforce/agents';
+import type { EvalPayload, EvalTest } from './evalNormalizer.js';
+/**
+ * Returns true if the content looks like a YAML TestSpec (has testCases + subjectName).
+ * Returns false for JSON EvalPayload, invalid content, or YAML missing required fields.
+ */
+export declare function isYamlTestSpec(content: string): boolean;
+/**
+ * Parse a YAML string into a TestSpec.
+ * Throws if the content is not valid YAML or is missing required fields.
+ */
+export declare function parseTestSpec(content: string): TestSpec;
+/**
+ * Translate a full TestSpec into an EvalPayload.
+ */
+export declare function translateTestSpec(spec: TestSpec): EvalPayload;
+/**
+ * Translate a single TestCase into an EvalTest with ordered steps.
+ */
+export declare function translateTestCase(testCase: TestCase, index: number, specName?: string): EvalTest;

package/lib/yamlSpecTranslator.js ADDED Viewed

@@ -0,0 +1,217 @@
+/*
+ * Copyright 2026, Salesforce, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/* eslint-disable camelcase */
+import { parse as parseYaml } from 'yaml';
+// --- JSONPath mappings from org model to Eval API refs ---
+const ACTUAL_PATH_MAP = {
+    '$.generatedData.outcome': '{sm.response}',
+    '$.generatedData.topic': '{gs.response.planner_response.lastExecution.topic}',
+    '$.generatedData.invokedActions': '{gs.response.planner_response.lastExecution.invokedActions}',
+    '$.generatedData.actionsSequence': '{gs.response.planner_response.lastExecution.invokedActions}',
+};
+// --- Custom evaluation name to evaluator type mapping ---
+const CUSTOM_EVAL_TYPE_MAP = {
+    string_comparison: 'evaluator.string_assertion',
+    numeric_comparison: 'evaluator.numeric_assertion',
+};
+// JSONPaths that require the get_state step
+const PLANNER_PATHS = new Set([
+    '$.generatedData.topic',
+    '$.generatedData.invokedActions',
+    '$.generatedData.actionsSequence',
+]);
+// --- Public API ---
+/**
+ * Returns true if the content looks like a YAML TestSpec (has testCases + subjectName).
+ * Returns false for JSON EvalPayload, invalid content, or YAML missing required fields.
+ */
+export function isYamlTestSpec(content) {
+    try {
+        const parsed = parseYaml(content);
+        if (parsed === null || typeof parsed !== 'object' || Array.isArray(parsed)) {
+            return false;
+        }
+        const obj = parsed;
+        return Array.isArray(obj.testCases) && typeof obj.subjectName === 'string';
+    }
+    catch {
+        return false;
+    }
+}
+/**
+ * Parse a YAML string into a TestSpec.
+ * Throws if the content is not valid YAML or is missing required fields.
+ */
+export function parseTestSpec(content) {
+    const parsed = parseYaml(content);
+    if (parsed === null || typeof parsed !== 'object' || Array.isArray(parsed)) {
+        throw new Error('Invalid TestSpec: expected a YAML object');
+    }
+    const obj = parsed;
+    if (!Array.isArray(obj.testCases)) {
+        throw new Error('Invalid TestSpec: missing testCases array');
+    }
+    if (typeof obj.subjectName !== 'string') {
+        throw new Error('Invalid TestSpec: missing subjectName');
+    }
+    if (typeof obj.name !== 'string') {
+        throw new Error('Invalid TestSpec: missing name');
+    }
+    return parsed;
+}
+/**
+ * Translate a full TestSpec into an EvalPayload.
+ */
+export function translateTestSpec(spec) {
+    return {
+        tests: spec.testCases.map((tc, idx) => translateTestCase(tc, idx, spec.name)),
+    };
+}
+/**
+ * Translate a single TestCase into an EvalTest with ordered steps.
+ */
+export function translateTestCase(testCase, index, specName) {
+    const id = specName ? `${specName}_case_${index}` : `test_case_${index}`;
+    const steps = [];
+    // 1. agent.create_session
+    steps.push({
+        type: 'agent.create_session',
+        id: 'cs',
+        use_agent_api: true,
+    });
+    // 2. Conversation history — only user messages become send_message steps
+    let historyIdx = 0;
+    if (testCase.conversationHistory) {
+        for (const entry of testCase.conversationHistory) {
+            if (entry.role === 'user') {
+                steps.push({
+                    type: 'agent.send_message',
+                    id: `history_${historyIdx}`,
+                    session_id: '{cs.session_id}',
+                    utterance: entry.message,
+                });
+                historyIdx++;
+            }
+        }
+    }
+    // 3. Test utterance
+    steps.push({
+        type: 'agent.send_message',
+        id: 'sm',
+        session_id: '{cs.session_id}',
+        utterance: testCase.utterance,
+    });
+    // 4. Determine if get_state is needed
+    const needsGetState = needsPlannerState(testCase);
+    if (needsGetState) {
+        steps.push({
+            type: 'agent.get_state',
+            id: 'gs',
+            session_id: '{cs.session_id}',
+        });
+    }
+    // 5. Evaluators
+    if (testCase.expectedTopic !== undefined) {
+        steps.push({
+            type: 'evaluator.planner_topic_assertion',
+            id: 'check_topic',
+            expected: testCase.expectedTopic,
+            actual: '{gs.response.planner_response.lastExecution.topic}',
+            operator: 'contains',
+        });
+    }
+    if (testCase.expectedActions !== undefined && testCase.expectedActions.length > 0) {
+        steps.push({
+            type: 'evaluator.planner_actions_assertion',
+            id: 'check_actions',
+            expected: testCase.expectedActions,
+            actual: '{gs.response.planner_response.lastExecution.invokedActions}',
+            operator: 'includes_items',
+        });
+    }
+    if (testCase.expectedOutcome !== undefined) {
+        steps.push({
+            type: 'evaluator.bot_response_rating',
+            id: 'check_outcome',
+            utterance: testCase.utterance,
+            expected: testCase.expectedOutcome,
+            actual: '{sm.response}',
+            threshold: 3.0,
+        });
+    }
+    if (testCase.customEvaluations) {
+        testCase.customEvaluations.forEach((customEval, customIdx) => {
+            const step = translateCustomEvaluation(customEval, customIdx);
+            steps.push(step);
+        });
+    }
+    return { id, steps };
+}
+// --- Internal helpers ---
+/**
+ * Determine whether the get_state step is needed for this test case.
+ */
+function needsPlannerState(testCase) {
+    if (testCase.expectedTopic !== undefined)
+        return true;
+    if (testCase.expectedActions !== undefined && testCase.expectedActions.length > 0)
+        return true;
+    if (testCase.customEvaluations) {
+        for (const customEval of testCase.customEvaluations) {
+            for (const param of customEval.parameters) {
+                if (param.name === 'actual' && PLANNER_PATHS.has(param.value)) {
+                    return true;
+                }
+            }
+        }
+    }
+    return false;
+}
+/**
+ * Translate a single customEvaluation entry into an EvalStep.
+ */
+function translateCustomEvaluation(customEval, index) {
+    const evalType = CUSTOM_EVAL_TYPE_MAP[customEval.name] ?? `evaluator.${customEval.name}`;
+    let operator = '';
+    let actual = '';
+    let expected = '';
+    for (const param of customEval.parameters) {
+        if (param.name === 'operator') {
+            operator = param.value;
+        }
+        else if (param.name === 'actual') {
+            actual = mapActualPath(param.value);
+        }
+        else if (param.name === 'expected') {
+            expected = param.value;
+        }
+    }
+    return {
+        type: evalType,
+        id: `custom_${index}`,
+        operator,
+        actual,
+        expected,
+    };
+}
+/**
+ * Map an org-model JSONPath to the Eval API shorthand ref.
+ * Unknown paths are returned as-is.
+ */
+function mapActualPath(path) {
+    return ACTUAL_PATH_MAP[path] ?? path;
+}
+//# sourceMappingURL=yamlSpecTranslator.js.map