llm-testrunner-components 1.2.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. package/README.md +9 -5
  2. package/dist/cjs/{app-chips_4.cjs.entry.js → app-chips_5.cjs.entry.js} +38 -6
  3. package/dist/cjs/app-chips_5.cjs.entry.js.map +1 -0
  4. package/dist/cjs/index.cjs.js +499 -68
  5. package/dist/cjs/index.cjs.js.map +1 -1
  6. package/dist/cjs/llm-testrunner.cjs.js +1 -1
  7. package/dist/cjs/loader.cjs.js +1 -1
  8. package/dist/collection/collection-manifest.json +1 -0
  9. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js +22 -12
  10. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js.map +1 -1
  11. package/dist/collection/components/llm-test-runner/llm-test-runner.js +59 -15
  12. package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
  13. package/dist/collection/components/llm-test-runner/test-cases/chat-history.css +101 -0
  14. package/dist/collection/components/llm-test-runner/test-cases/chat-history.js +105 -0
  15. package/dist/collection/components/llm-test-runner/test-cases/chat-history.js.map +1 -0
  16. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +45 -5
  17. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
  18. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +21 -0
  19. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +12 -2
  20. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
  21. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
  22. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
  23. package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js +1 -1
  24. package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js.map +1 -1
  25. package/dist/collection/index.js.map +1 -1
  26. package/dist/collection/lib/evaluation/actual-value-resolver.js +52 -0
  27. package/dist/collection/lib/evaluation/actual-value-resolver.js.map +1 -0
  28. package/dist/collection/lib/evaluation/evaluation-engine.js +1 -1
  29. package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
  30. package/dist/collection/lib/evaluation/evaluation-service.js +55 -17
  31. package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
  32. package/dist/collection/lib/evaluation/types.js.map +1 -1
  33. package/dist/collection/lib/form/components/app-chips.js +1 -1
  34. package/dist/collection/lib/form/components/app-select.js +1 -1
  35. package/dist/collection/lib/form/components/app-textarea.js +2 -2
  36. package/dist/collection/lib/import-export/test-suite-exporter.js +4 -0
  37. package/dist/collection/lib/import-export/test-suite-exporter.js.map +1 -1
  38. package/dist/collection/lib/import-export/test-suite-importer.js +7 -1
  39. package/dist/collection/lib/import-export/test-suite-importer.js.map +1 -1
  40. package/dist/collection/lib/test-cases/test-case-factory.js +7 -0
  41. package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
  42. package/dist/collection/lib/test-cases/test-case-mutations.js +58 -23
  43. package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
  44. package/dist/collection/schemas/expected-outcome.js +39 -0
  45. package/dist/collection/schemas/expected-outcome.js.map +1 -1
  46. package/dist/collection/schemas/model-response.js +7 -0
  47. package/dist/collection/schemas/model-response.js.map +1 -0
  48. package/dist/collection/schemas/test-case.js +8 -1
  49. package/dist/collection/schemas/test-case.js.map +1 -1
  50. package/dist/collection/types/expected-outcome.js.map +1 -1
  51. package/dist/collection/types/llm-test-runner.js.map +1 -1
  52. package/dist/collection/types/test-case.js.map +1 -1
  53. package/dist/components/app-chips.js +1 -1
  54. package/dist/components/app-select.js +1 -1
  55. package/dist/components/app-textarea.js +1 -1
  56. package/dist/components/chat-history.d.ts +11 -0
  57. package/dist/components/chat-history.js +2 -0
  58. package/dist/components/chat-history.js.map +1 -0
  59. package/dist/components/index.js +1 -1
  60. package/dist/components/llm-test-runner.js +1 -1
  61. package/dist/components/{p-CVtKFBJl.js → p-D2qDAxFN.js} +2 -2
  62. package/dist/components/{p-Dv7cB5FU.js → p-D4dHUFN9.js} +2 -2
  63. package/dist/components/{p-CE5-1jfZ.js → p-eN2dLrsr.js} +2 -2
  64. package/dist/components/p-kmtfMXcQ.js +2 -0
  65. package/dist/components/p-kmtfMXcQ.js.map +1 -0
  66. package/dist/components/{p-BcygfrMf.js → p-wzA48RFK.js} +3 -3
  67. package/dist/components/p-wzA48RFK.js.map +1 -0
  68. package/dist/esm/{app-chips_4.entry.js → app-chips_5.entry.js} +38 -7
  69. package/dist/esm/app-chips_5.entry.js.map +1 -0
  70. package/dist/esm/index.js +499 -68
  71. package/dist/esm/index.js.map +1 -1
  72. package/dist/esm/llm-testrunner.js +1 -1
  73. package/dist/esm/loader.js +1 -1
  74. package/dist/llm-testrunner/index.esm.js +2 -2
  75. package/dist/llm-testrunner/index.esm.js.map +1 -1
  76. package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
  77. package/dist/llm-testrunner/p-5bf1fc78.entry.js +2 -0
  78. package/dist/llm-testrunner/p-5bf1fc78.entry.js.map +1 -0
  79. package/dist/react/components.d.ts +6 -1
  80. package/dist/react/components.d.ts.map +1 -1
  81. package/dist/react/components.js +9 -0
  82. package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +4 -4
  83. package/dist/types/components/llm-test-runner/test-cases/chat-history.d.ts +14 -0
  84. package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +1 -0
  85. package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +6 -0
  86. package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +3 -0
  87. package/dist/types/components/llm-test-runner/test-cases/output/response-output.d.ts +2 -1
  88. package/dist/types/components.d.ts +55 -2
  89. package/dist/types/index.d.ts +1 -1
  90. package/dist/types/lib/evaluation/actual-value-resolver.d.ts +9 -0
  91. package/dist/types/lib/evaluation/evaluation-service.d.ts +2 -2
  92. package/dist/types/lib/evaluation/types.d.ts +1 -1
  93. package/dist/types/lib/import-export/test-suite-exporter.d.ts +4 -0
  94. package/dist/types/lib/import-export/test-suite-importer.d.ts +1 -1
  95. package/dist/types/lib/test-cases/test-case-mutations.d.ts +10 -1
  96. package/dist/types/schemas/expected-outcome.d.ts +116 -0
  97. package/dist/types/schemas/model-response.d.ts +7 -0
  98. package/dist/types/schemas/test-case.d.ts +93 -1
  99. package/dist/types/types/expected-outcome.d.ts +1 -1
  100. package/dist/types/types/llm-test-runner.d.ts +6 -3
  101. package/dist/types/types/test-case.d.ts +1 -1
  102. package/package.json +1 -1
  103. package/dist/cjs/app-chips_4.cjs.entry.js.map +0 -1
  104. package/dist/components/p-BcygfrMf.js.map +0 -1
  105. package/dist/esm/app-chips_4.entry.js.map +0 -1
  106. package/dist/llm-testrunner/p-5df053b4.entry.js +0 -2
  107. package/dist/llm-testrunner/p-5df053b4.entry.js.map +0 -1
  108. /package/dist/components/{p-CVtKFBJl.js.map → p-D2qDAxFN.js.map} +0 -0
  109. /package/dist/components/{p-Dv7cB5FU.js.map → p-D4dHUFN9.js.map} +0 -0
  110. /package/dist/components/{p-CE5-1jfZ.js.map → p-eN2dLrsr.js.map} +0 -0
@@ -1 +1 @@
1
- {"version":3,"file":"evaluation-engine.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-engine.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAC;AACxE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,OAAO,mBAAmB;IAC9B,KAAK,CAAC,gBAAgB,CACpB,OAA4B,EAC5B,QAA4B;QAE5B,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,UAAU,CAC7C,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAC,KAAK,EAAC,EAAE;YAC/B,MAAM,YAAY,GAAsB;gBACtC,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,cAAc,EAAE,OAAO,CAAC,cAAc;gBACtC,eAAe,EAAE,KAAK,CAAC,aAAa;gBACpC,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;aACjD,CAAC;YACF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;YAEtD,MAAM,WAAW,GAA0B;gBACzC,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,oBAAoB,EAAE,MAAM,CAAC,oBAAqB;gBAClD,wBAAwB,EAAE,MAAM,CAAC,wBAAwB;aAC1D,CAAC;YACF,OAAO,WAAW,CAAC;QACrB,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,YAAY,GAA4B,cAAc,CAAC,GAAG,CAC9D,CAAC,aAAa,EAAE,KAAK,EAAE,EAAE;YACvB,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,aAAa,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBACzC,OAAO,aAAa,CAAC,KAAK,CAAC;YAC7B,CAAC;YAED,OAAO;gBACL,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;gBAChD,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,KAAK,CAAC,oBAAoB,CAAC,QAAQ;iBAClD;gBACD,KAAK,EAAE,IAAI,CAAC,mBAAmB,CAAC,aAAa,CAAC,MAAM,CAAC;aACtD,CAAC;QACJ,CAAC,CACF,CAAC;QAEF,MAAM,cAAc,GAAG,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QAC3E,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAEzE,QAAQ,CAAC;YACP,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,MAAM;YACN,cAAc;YACd,YAAY;YACZ,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,OAA0B;QACpD,MAAM,QAAQ,GAAuB,OAAO,CAAC,oBAAoB,CAAC,QAAQ,CAAC;QAC3E,QAAQ,QAAQ,EAAE,CAAC;YACjB,KAAK,kBAAkB,CAAC,IAAI;gBAC1B,OAAO,qBAAqB,CAAC,OAAO,CAAC,CAAC;YACxC,KAAK,kBAAkB,CAAC,KAAK;gBAC3B,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;YACpC,KAAK,kBAAkB,CAAC,OAAO;gBAC7B,OAAO,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,kBAAkB,CAAC,OAAO;gBAC7B,OAAO,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,kBAAkB,CAAC,QAAQ;gBAC9B,OAAO,yBAAyB,CAAC,OAAO,CAAC,CAAC;YAC5C;gBACE,OAAO,CAAC,IAAI,CACV,8BAA8B,OAAO,CAAC,oBAAoB,CAAC,QAAQ,kCAAkC,CACtG,CAAC;gBACF,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAEO,mBAAmB,CAAC,KAAc;QACxC,OAAO,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,0BAA0B,CAAC;IAC7E,CAAC;CACF","sourcesContent":["import {\n EvaluationRequest,\n EvaluationResult,\n EvaluationCallback,\n FieldEvaluationResult,\n EvaluationRequestV2,\n} from './types';\nimport { performEvaluation } from './evaluators/exact/exact';\nimport { EvaluationApproach } from './constants';\nimport { performRouge1Evaluation } from './evaluators/rouge1-evaluator';\nimport { performSemanticEvaluation } from './evaluators/semantic/index';\nimport { performRougeLEvaluation } from './evaluators/rougeL-evaluator';\nimport { performBleuEvaluation } from './evaluators/bleu/bleu-evaluator';\n\nexport class LLMEvaluationEngine {\n async evaluateResponse(\n request: EvaluationRequestV2,\n callback: EvaluationCallback,\n ): Promise<void> {\n const settledResults = await Promise.allSettled(\n request.fields.map(async field => {\n const fieldRequest: EvaluationRequest = {\n testCaseId: request.testCaseId,\n question: request.question,\n actualResponse: request.actualResponse,\n expectedOutcome: field.expectedValue,\n evaluationParameters: field.evaluationParameters,\n };\n const result = await this.evaluateField(fieldRequest);\n\n const fieldResult: FieldEvaluationResult = {\n index: field.index,\n label: field.label,\n type: field.type,\n expectedValue: field.expectedValue,\n passed: result.passed,\n keywordMatches: result.keywordMatches,\n evaluationParameters: result.evaluationParameters!,\n evaluationApproachResult: result.evaluationApproachResult,\n };\n return fieldResult;\n }),\n );\n\n const fieldResults: FieldEvaluationResult[] = settledResults.map(\n (settledResult, index) => {\n const field = request.fields[index];\n if (settledResult.status === 'fulfilled') {\n return settledResult.value;\n }\n\n return {\n index: field.index,\n label: field.label,\n type: field.type,\n expectedValue: field.expectedValue,\n passed: false,\n keywordMatches: [],\n evaluationParameters: field.evaluationParameters,\n evaluationApproachResult: {\n score: 0,\n approachUsed: field.evaluationParameters.approach,\n },\n error: this.getSafeErrorMessage(settledResult.reason),\n };\n },\n );\n\n const keywordMatches = fieldResults.flatMap(field => field.keywordMatches);\n const passed = fieldResults.every(field => field.passed && !field.error);\n\n callback({\n testCaseId: request.testCaseId,\n passed,\n keywordMatches,\n fieldResults,\n timestamp: new Date().toISOString(),\n });\n }\n\n private async evaluateField(request: EvaluationRequest): Promise<EvaluationResult> {\n const approach: EvaluationApproach = request.evaluationParameters.approach;\n switch (approach) {\n case EvaluationApproach.BLEU:\n return performBleuEvaluation(request);\n case EvaluationApproach.EXACT:\n return performEvaluation(request);\n case EvaluationApproach.ROUGE_1:\n return performRouge1Evaluation(request);\n case EvaluationApproach.ROUGE_L:\n return performRougeLEvaluation(request);\n case EvaluationApproach.SEMANTIC:\n return performSemanticEvaluation(request);\n default:\n console.warn(\n `Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`,\n );\n return performEvaluation(request);\n }\n }\n\n private getSafeErrorMessage(error: unknown): string {\n return error instanceof Error ? error.message : 'Field evaluation failed.';\n }\n}\n"]}
1
+ {"version":3,"file":"evaluation-engine.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-engine.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAC;AACxE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,OAAO,mBAAmB;IAC9B,KAAK,CAAC,gBAAgB,CACpB,OAA4B,EAC5B,QAA4B;QAE5B,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,UAAU,CAC7C,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAC,KAAK,EAAC,EAAE;YAC/B,MAAM,YAAY,GAAsB;gBACtC,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,cAAc,EAAE,KAAK,CAAC,cAAc;gBACpC,eAAe,EAAE,KAAK,CAAC,aAAa;gBACpC,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;aACjD,CAAC;YACF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;YAEtD,MAAM,WAAW,GAA0B;gBACzC,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,oBAAoB,EAAE,MAAM,CAAC,oBAAqB;gBAClD,wBAAwB,EAAE,MAAM,CAAC,wBAAwB;aAC1D,CAAC;YACF,OAAO,WAAW,CAAC;QACrB,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,YAAY,GAA4B,cAAc,CAAC,GAAG,CAC9D,CAAC,aAAa,EAAE,KAAK,EAAE,EAAE;YACvB,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,aAAa,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBACzC,OAAO,aAAa,CAAC,KAAK,CAAC;YAC7B,CAAC;YAED,OAAO;gBACL,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;gBAChD,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,KAAK,CAAC,oBAAoB,CAAC,QAAQ;iBAClD;gBACD,KAAK,EAAE,IAAI,CAAC,mBAAmB,CAAC,aAAa,CAAC,MAAM,CAAC;aACtD,CAAC;QACJ,CAAC,CACF,CAAC;QAEF,MAAM,cAAc,GAAG,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QAC3E,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAEzE,QAAQ,CAAC;YACP,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,MAAM;YACN,cAAc;YACd,YAAY;YACZ,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,OAA0B;QACpD,MAAM,QAAQ,GAAuB,OAAO,CAAC,oBAAoB,CAAC,QAAQ,CAAC;QAC3E,QAAQ,QAAQ,EAAE,CAAC;YACjB,KAAK,kBAAkB,CAAC,IAAI;gBAC1B,OAAO,qBAAqB,CAAC,OAAO,CAAC,CAAC;YACxC,KAAK,kBAAkB,CAAC,KAAK;gBAC3B,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;YACpC,KAAK,kBAAkB,CAAC,OAAO;gBAC7B,OAAO,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,kBAAkB,CAAC,OAAO;gBAC7B,OAAO,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,kBAAkB,CAAC,QAAQ;gBAC9B,OAAO,yBAAyB,CAAC,OAAO,CAAC,CAAC;YAC5C;gBACE,OAAO,CAAC,IAAI,CACV,8BAA8B,OAAO,CAAC,oBAAoB,CAAC,QAAQ,kCAAkC,CACtG,CAAC;gBACF,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAEO,mBAAmB,CAAC,KAAc;QACxC,OAAO,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,0BAA0B,CAAC;IAC7E,CAAC;CACF","sourcesContent":["import {\n EvaluationRequest,\n EvaluationResult,\n EvaluationCallback,\n FieldEvaluationResult,\n EvaluationRequestV2,\n} from './types';\nimport { performEvaluation } from './evaluators/exact/exact';\nimport { EvaluationApproach } from './constants';\nimport { performRouge1Evaluation } from './evaluators/rouge1-evaluator';\nimport { performSemanticEvaluation } from './evaluators/semantic/index';\nimport { performRougeLEvaluation } from './evaluators/rougeL-evaluator';\nimport { performBleuEvaluation } from './evaluators/bleu/bleu-evaluator';\n\nexport class LLMEvaluationEngine {\n async evaluateResponse(\n request: EvaluationRequestV2,\n callback: EvaluationCallback,\n ): Promise<void> {\n const settledResults = await Promise.allSettled(\n request.fields.map(async field => {\n const fieldRequest: EvaluationRequest = {\n testCaseId: request.testCaseId,\n question: request.question,\n actualResponse: field.actualResponse,\n expectedOutcome: field.expectedValue,\n evaluationParameters: field.evaluationParameters,\n };\n const result = await this.evaluateField(fieldRequest);\n\n const fieldResult: FieldEvaluationResult = {\n index: field.index,\n label: field.label,\n type: field.type,\n expectedValue: field.expectedValue,\n passed: result.passed,\n keywordMatches: result.keywordMatches,\n evaluationParameters: result.evaluationParameters!,\n evaluationApproachResult: result.evaluationApproachResult,\n };\n return fieldResult;\n }),\n );\n\n const fieldResults: FieldEvaluationResult[] = settledResults.map(\n (settledResult, index) => {\n const field = request.fields[index];\n if (settledResult.status === 'fulfilled') {\n return settledResult.value;\n }\n\n return {\n index: field.index,\n label: field.label,\n type: field.type,\n expectedValue: field.expectedValue,\n passed: false,\n keywordMatches: [],\n evaluationParameters: field.evaluationParameters,\n evaluationApproachResult: {\n score: 0,\n approachUsed: field.evaluationParameters.approach,\n },\n error: this.getSafeErrorMessage(settledResult.reason),\n };\n },\n );\n\n const keywordMatches = fieldResults.flatMap(field => field.keywordMatches);\n const passed = fieldResults.every(field => field.passed && !field.error);\n\n callback({\n testCaseId: request.testCaseId,\n passed,\n keywordMatches,\n fieldResults,\n timestamp: new Date().toISOString(),\n });\n }\n\n private async evaluateField(request: EvaluationRequest): Promise<EvaluationResult> {\n const approach: EvaluationApproach = request.evaluationParameters.approach;\n switch (approach) {\n case EvaluationApproach.BLEU:\n return performBleuEvaluation(request);\n case EvaluationApproach.EXACT:\n return performEvaluation(request);\n case EvaluationApproach.ROUGE_1:\n return performRouge1Evaluation(request);\n case EvaluationApproach.ROUGE_L:\n return performRougeLEvaluation(request);\n case EvaluationApproach.SEMANTIC:\n return performSemanticEvaluation(request);\n default:\n console.warn(\n `Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`,\n );\n return performEvaluation(request);\n }\n }\n\n private getSafeErrorMessage(error: unknown): string {\n return error instanceof Error ? error.message : 'Field evaluation failed.';\n }\n}\n"]}
@@ -1,5 +1,6 @@
1
1
  import { LLMEvaluationEngine } from "./evaluation-engine";
2
2
  import { normalizeEvaluationParametersForField } from "./field-evaluation-approach";
3
+ import { resolveActualValue } from "./actual-value-resolver";
3
4
  /**
4
5
  * Service for evaluating test case responses
5
6
  */
@@ -13,34 +14,71 @@ export class EvaluationService {
13
14
  * @param testCase - The test case to evaluate
14
15
  * @param onResult - Callback to handle the evaluation result
15
16
  */
16
- async evaluateTestCase(testCase, onResult) {
17
- if (!testCase.output) {
18
- console.warn('⚠️ No output to evaluate for test case:', testCase.id);
19
- return;
20
- }
21
- const fields = (testCase.expectedOutcome || []).flatMap((field, index) => {
17
+ async evaluateTestCase(testCase, onResult, extractors) {
18
+ const fields = [];
19
+ const failedFields = [];
20
+ for (const [index, field] of (testCase.expectedOutcome || []).entries()) {
22
21
  if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {
23
- return [];
22
+ continue;
24
23
  }
25
- return [
26
- {
24
+ const evaluationParameters = normalizeEvaluationParametersForField(field.type, field.evaluationParameters);
25
+ const expectedValue = getFieldExpectedValue(field);
26
+ const resolvedActualValue = await resolveActualValue(field, testCase.output, extractors);
27
+ if (resolvedActualValue.success) {
28
+ fields.push({
27
29
  index,
28
30
  label: field.label,
29
31
  type: field.type,
30
- expectedValue: getFieldExpectedValue(field),
31
- evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
32
- },
33
- ];
34
- });
32
+ expectedValue,
33
+ actualResponse: resolvedActualValue.value,
34
+ evaluationParameters,
35
+ });
36
+ }
37
+ else {
38
+ failedFields.push({
39
+ index,
40
+ label: field.label,
41
+ type: field.type,
42
+ expectedValue,
43
+ passed: false,
44
+ keywordMatches: [],
45
+ evaluationParameters,
46
+ evaluationApproachResult: {
47
+ score: 0,
48
+ approachUsed: evaluationParameters.approach,
49
+ },
50
+ error: 'error' in resolvedActualValue
51
+ ? resolvedActualValue.error
52
+ : 'Failed to resolve actual value.',
53
+ });
54
+ }
55
+ }
56
+ if (fields.length === 0) {
57
+ if (failedFields.length === 0) {
58
+ console.warn('⚠️ No evaluable fields for test case:', testCase.id);
59
+ return;
60
+ }
61
+ onResult({
62
+ testCaseId: testCase.id,
63
+ passed: false,
64
+ keywordMatches: [],
65
+ fieldResults: failedFields,
66
+ timestamp: new Date().toISOString(),
67
+ });
68
+ return;
69
+ }
35
70
  const evaluationRequest = {
36
71
  testCaseId: testCase.id,
37
72
  question: testCase.question,
38
- actualResponse: testCase.output,
39
73
  fields,
40
74
  };
41
75
  await this.engine.evaluateResponse(evaluationRequest, (result) => {
42
- console.log('📊 Evaluation result received:', result);
43
- onResult(result);
76
+ const combinedResults = [...(result.fieldResults || []), ...failedFields].sort((a, b) => a.index - b.index);
77
+ onResult({
78
+ ...result,
79
+ passed: combinedResults.every(field => field.passed && !field.error),
80
+ fieldResults: combinedResults,
81
+ });
44
82
  });
45
83
  }
46
84
  }
@@ -1 +1 @@
1
- {"version":3,"file":"evaluation-service.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAO1D,OAAO,EAAE,qCAAqC,EAAE,MAAM,6BAA6B,CAAC;AAEpF;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAsB;IAEpC;QACE,IAAI,CAAC,MAAM,GAAG,IAAI,mBAAmB,EAAE,CAAC;IAC1C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB,CACpB,QAAkB,EAClB,QAA4C;QAE5C,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YACrB,OAAO,CAAC,IAAI,CAAC,yCAAyC,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;YACrE,OAAO;QACT,CAAC;QAED,MAAM,MAAM,GAA2B,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,OAAO,CAC7E,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;YACf,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,IAAI,KAAK,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;gBACjE,OAAO,EAAE,CAAC;YACZ,CAAC;YAED,OAAO;gBACL;oBACE,KAAK;oBACL,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,aAAa,EAAE,qBAAqB,CAAC,KAAK,CAAC;oBAC3C,oBAAoB,EAAE,qCAAqC,CACzD,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B;iBACF;aACF,CAAC;QACJ,CAAC,CACF,CAAC;QAEF,MAAM,iBAAiB,GAAwB;YAC7C,UAAU,EAAE,QAAQ,CAAC,EAAE;YACvB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;YAC3B,cAAc,EAAE,QAAQ,CAAC,MAAM;YAC/B,MAAM;SACP,CAAC;QAEF,MAAM,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAChC,iBAAiB,EACjB,CAAC,MAAwB,EAAE,EAAE;YAC3B,OAAO,CAAC,GAAG,CAAC,gCAAgC,EAAE,MAAM,CAAC,CAAC;YACtD,QAAQ,CAAC,MAAM,CAAC,CAAC;QACnB,CAAC,CACF,CAAC;IACJ,CAAC;CACF;AAED,SAAS,qBAAqB,CAAC,KAA2B;IACxD,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;QACjC,OAAO,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IACD,OAAO,KAAK,CAAC,KAAK,CAAC;AACrB,CAAC","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport {\n EvaluationResult,\n FieldEvaluationInput,\n EvaluationRequestV2,\n} from './types';\nimport { TestCase, ExpectedOutcomeField } from '../../types/llm-test-runner';\nimport { normalizeEvaluationParametersForField } from './field-evaluation-approach';\n\n/**\n * Service for evaluating test case responses\n */\nexport class EvaluationService {\n private engine: LLMEvaluationEngine;\n\n constructor() {\n this.engine = new LLMEvaluationEngine();\n }\n\n /**\n * Evaluates a test case response\n * @param testCase - The test case to evaluate\n * @param onResult - Callback to handle the evaluation result\n */\n async evaluateTestCase(\n testCase: TestCase,\n onResult: (result: EvaluationResult) => void,\n ): Promise<void> {\n if (!testCase.output) {\n console.warn('⚠️ No output to evaluate for test case:', testCase.id);\n return;\n }\n\n const fields: FieldEvaluationInput[] = (testCase.expectedOutcome || []).flatMap(\n (field, index) => {\n if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {\n return [];\n }\n\n return [\n {\n index,\n label: field.label,\n type: field.type,\n expectedValue: getFieldExpectedValue(field),\n evaluationParameters: normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n ),\n },\n ];\n },\n );\n\n const evaluationRequest: EvaluationRequestV2 = {\n testCaseId: testCase.id,\n question: testCase.question,\n actualResponse: testCase.output,\n fields,\n };\n\n await this.engine.evaluateResponse(\n evaluationRequest,\n (result: EvaluationResult) => {\n console.log('📊 Evaluation result received:', result);\n onResult(result);\n },\n );\n }\n}\n\nfunction getFieldExpectedValue(field: ExpectedOutcomeField): string {\n if (field.type === 'chips-input') {\n return field.value.join(', ');\n }\n return field.value;\n}\n"]}
1
+ {"version":3,"file":"evaluation-service.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAY1D,OAAO,EAAE,qCAAqC,EAAE,MAAM,6BAA6B,CAAC;AACpF,OAAO,EAAE,kBAAkB,EAAE,MAAM,yBAAyB,CAAC;AAE7D;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAsB;IAEpC;QACE,IAAI,CAAC,MAAM,GAAG,IAAI,mBAAmB,EAAE,CAAC;IAC1C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB,CACpB,QAAkB,EAClB,QAA4C,EAC5C,UAAuC;QAEvC,MAAM,MAAM,GAA2B,EAAE,CAAC;QAC1C,MAAM,YAAY,GAA4B,EAAE,CAAC;QAEjD,KAAK,MAAM,CAAC,KAAK,EAAE,KAAK,CAAC,IAAI,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,OAAO,EAAE,EAAE,CAAC;YACxE,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,IAAI,KAAK,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;gBACjE,SAAS;YACX,CAAC;YAED,MAAM,oBAAoB,GAAG,qCAAqC,CAChE,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B,CAAC;YACF,MAAM,aAAa,GAAG,qBAAqB,CAAC,KAAK,CAAC,CAAC;YACnD,MAAM,mBAAmB,GAAG,MAAM,kBAAkB,CAClD,KAAK,EACL,QAAQ,CAAC,MAAM,EACf,UAAU,CACX,CAAC;YAEF,IAAI,mBAAmB,CAAC,OAAO,EAAE,CAAC;gBAChC,MAAM,CAAC,IAAI,CAAC;oBACV,KAAK;oBACL,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,aAAa;oBACb,cAAc,EAAE,mBAAmB,CAAC,KAAK;oBACzC,oBAAoB;iBACrB,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,YAAY,CAAC,IAAI,CAAC;oBAChB,KAAK;oBACL,KAAK,EAAE,KAAK,CAAC,KAAK;oBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;oBAChB,aAAa;oBACb,MAAM,EAAE,KAAK;oBACb,cAAc,EAAE,EAAE;oBAClB,oBAAoB;oBACpB,wBAAwB,EAAE;wBACxB,KAAK,EAAE,CAAC;wBACR,YAAY,EAAE,oBAAoB,CAAC,QAAQ;qBAC5C;oBACD,KAAK,EACH,OAAO,IAAI,mBAAmB;wBAC5B,CAAC,CAAC,mBAAmB,CAAC,KAAK;wBAC3B,CAAC,CAAC,iCAAiC;iBACxC,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QAED,IAAI,MAAM,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YACxB,IAAI,YAAY,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;gBAC9B,OAAO,CAAC,IAAI,CAAC,uCAAuC,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;gBACnE,OAAO;YACT,CAAC;YAED,QAAQ,CAAC;gBACP,UAAU,EAAE,QAAQ,CAAC,EAAE;gBACvB,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,YAAY,EAAE,YAAY;gBAC1B,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;aACpC,CAAC,CAAC;YACH,OAAO;QACT,CAAC;QAED,MAAM,iBAAiB,GAAwB;YAC7C,UAAU,EAAE,QAAQ,CAAC,EAAE;YACvB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;YAC3B,MAAM;SACP,CAAC;QAEF,MAAM,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAAC,iBAAiB,EAAE,CAAC,MAAwB,EAAE,EAAE;YACjF,MAAM,eAAe,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,YAAY,IAAI,EAAE,CAAC,EAAE,GAAG,YAAY,CAAC,CAAC,IAAI,CAC5E,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,KAAK,CAC5B,CAAC;YACF,QAAQ,CAAC;gBACP,GAAG,MAAM;gBACT,MAAM,EAAE,eAAe,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC;gBACpE,YAAY,EAAE,eAAe;aAC9B,CAAC,CAAC;QACL,CAAC,CAAC,CAAC;IACL,CAAC;CACF;AAED,SAAS,qBAAqB,CAAC,KAA2B;IACxD,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;QACjC,OAAO,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IACD,OAAO,KAAK,CAAC,KAAK,CAAC;AACrB,CAAC","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport {\n EvaluationResult,\n FieldEvaluationInput,\n EvaluationRequestV2,\n FieldEvaluationResult,\n} from './types';\nimport {\n TestCase,\n ExpectedOutcomeField,\n EvaluationSourceExtractors,\n} from '../../types/llm-test-runner';\nimport { normalizeEvaluationParametersForField } from './field-evaluation-approach';\nimport { resolveActualValue } from './actual-value-resolver';\n\n/**\n * Service for evaluating test case responses\n */\nexport class EvaluationService {\n private engine: LLMEvaluationEngine;\n\n constructor() {\n this.engine = new LLMEvaluationEngine();\n }\n\n /**\n * Evaluates a test case response\n * @param testCase - The test case to evaluate\n * @param onResult - Callback to handle the evaluation result\n */\n async evaluateTestCase(\n testCase: TestCase,\n onResult: (result: EvaluationResult) => void,\n extractors?: EvaluationSourceExtractors,\n ): Promise<void> {\n const fields: FieldEvaluationInput[] = [];\n const failedFields: FieldEvaluationResult[] = [];\n\n for (const [index, field] of (testCase.expectedOutcome || []).entries()) {\n if (field.type === 'textarea' && field.outcomeMode === 'dynamic') {\n continue;\n }\n\n const evaluationParameters = normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n );\n const expectedValue = getFieldExpectedValue(field);\n const resolvedActualValue = await resolveActualValue(\n field,\n testCase.output,\n extractors,\n );\n\n if (resolvedActualValue.success) {\n fields.push({\n index,\n label: field.label,\n type: field.type,\n expectedValue,\n actualResponse: resolvedActualValue.value,\n evaluationParameters,\n });\n } else {\n failedFields.push({\n index,\n label: field.label,\n type: field.type,\n expectedValue,\n passed: false,\n keywordMatches: [],\n evaluationParameters,\n evaluationApproachResult: {\n score: 0,\n approachUsed: evaluationParameters.approach,\n },\n error:\n 'error' in resolvedActualValue\n ? resolvedActualValue.error\n : 'Failed to resolve actual value.',\n });\n }\n }\n\n if (fields.length === 0) {\n if (failedFields.length === 0) {\n console.warn('⚠️ No evaluable fields for test case:', testCase.id);\n return;\n }\n\n onResult({\n testCaseId: testCase.id,\n passed: false,\n keywordMatches: [],\n fieldResults: failedFields,\n timestamp: new Date().toISOString(),\n });\n return;\n }\n\n const evaluationRequest: EvaluationRequestV2 = {\n testCaseId: testCase.id,\n question: testCase.question,\n fields,\n };\n\n await this.engine.evaluateResponse(evaluationRequest, (result: EvaluationResult) => {\n const combinedResults = [...(result.fieldResults || []), ...failedFields].sort(\n (a, b) => a.index - b.index,\n );\n onResult({\n ...result,\n passed: combinedResults.every(field => field.passed && !field.error),\n fieldResults: combinedResults,\n });\n });\n }\n}\n\nfunction getFieldExpectedValue(field: ExpectedOutcomeField): string {\n if (field.type === 'chips-input') {\n return field.value.join(', ');\n }\n return field.value;\n}\n"]}
@@ -1 +1 @@
1
- {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/evaluation/types.ts"],"names":[],"mappings":"","sourcesContent":["import {\n EvaluationParameters,\n EvaluationApproachResult,\n} from '../../types/evaluation';\nimport type { ExpectedOutcomeFieldType } from '../../types/llm-test-runner';\n\nexport interface EvaluationRequest {\n testCaseId: string;\n question: string;\n expectedOutcome: string;\n actualResponse: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface FieldEvaluationInput {\n index: number;\n label: string;\n type: ExpectedOutcomeFieldType;\n expectedValue: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface EvaluationRequestV2 {\n testCaseId: string;\n question: string;\n actualResponse: string;\n fields: FieldEvaluationInput[];\n}\n\nexport interface EvaluationResult {\n testCaseId: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n fieldResults?: FieldEvaluationResult[];\n timestamp?: string;\n evaluationParameters?: EvaluationParameters;\n evaluationApproachResult?: EvaluationApproachResult;\n}\n\nexport interface FieldEvaluationResult {\n index: number;\n label: string;\n type: ExpectedOutcomeFieldType;\n expectedValue: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n evaluationParameters: EvaluationParameters;\n evaluationApproachResult: EvaluationApproachResult;\n error?: string;\n}\n\nexport interface KeywordMatch {\n keyword: string;\n found: boolean;\n evaluationApproachResult: EvaluationApproachResult;\n}\n\nexport type EvaluationCallback = (result: EvaluationResult) => void;\n\nexport interface RougeKeywordDetails {\n rouge1: number;\n rougeL: number;\n scoreUsed: string;\n approach: string;\n}\n\nexport interface Rouge1OverallDetails {\n keywordsPassed: number;\n totalKeywords: number;\n passRate: string;\n thresholdUsed: number;\n approach: string;\n}\n"]}
1
+ {"version":3,"file":"types.js","sourceRoot":"","sources":["../../../src/lib/evaluation/types.ts"],"names":[],"mappings":"","sourcesContent":["import {\n EvaluationParameters,\n EvaluationApproachResult,\n} from '../../types/evaluation';\nimport type { ExpectedOutcomeFieldType } from '../../types/llm-test-runner';\n\nexport interface EvaluationRequest {\n testCaseId: string;\n question: string;\n expectedOutcome: string;\n actualResponse: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface FieldEvaluationInput {\n index: number;\n label: string;\n type: ExpectedOutcomeFieldType;\n expectedValue: string;\n actualResponse: string;\n evaluationParameters: EvaluationParameters;\n}\n\nexport interface EvaluationRequestV2 {\n testCaseId: string;\n question: string;\n fields: FieldEvaluationInput[];\n}\n\nexport interface EvaluationResult {\n testCaseId: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n fieldResults?: FieldEvaluationResult[];\n timestamp?: string;\n evaluationParameters?: EvaluationParameters;\n evaluationApproachResult?: EvaluationApproachResult;\n}\n\nexport interface FieldEvaluationResult {\n index: number;\n label: string;\n type: ExpectedOutcomeFieldType;\n expectedValue: string;\n passed: boolean;\n keywordMatches: KeywordMatch[];\n evaluationParameters: EvaluationParameters;\n evaluationApproachResult: EvaluationApproachResult;\n error?: string;\n}\n\nexport interface KeywordMatch {\n keyword: string;\n found: boolean;\n evaluationApproachResult: EvaluationApproachResult;\n}\n\nexport type EvaluationCallback = (result: EvaluationResult) => void;\n\nexport interface RougeKeywordDetails {\n rouge1: number;\n rougeL: number;\n scoreUsed: string;\n approach: string;\n}\n\nexport interface Rouge1OverallDetails {\n keywordsPassed: number;\n totalKeywords: number;\n passRate: string;\n thresholdUsed: number;\n approach: string;\n}\n"]}
@@ -29,7 +29,7 @@ export class AppChips {
29
29
  name: c.name,
30
30
  autocomplete: c.autocomplete,
31
31
  };
32
- return (h("div", { key: '4f081007c0b11fb20bd74644eb063a6dd5a45d98', class: "app-chips" }, c.label && (h("label", { key: '3934ec12898c947b163a15494330a9326fe3d6f1', class: "app-chips__label", htmlFor: c.name }, c.label)), h("div", { key: 'cba194df0033a5951f568875e4686a76c9c28438', class: "app-chips__container" }, this.value.map((chip) => (h("span", { class: "app-chips__chip", key: chip }, c.type === 'url' ? (h("a", { href: chip, target: "_blank", rel: "noopener noreferrer", class: "app-chips__link" }, chip)) : (chip), h("button", { class: "app-chips__remove", type: "button", onClick: () => this.emitRemoveChip(chip) }, "\u00D7")))), h("input", { key: '11c9e7f326c37cb3a413e9124926ca88eefb1911', class: "app-chips__input", type: c.type || 'text', ...allowedAttrs, onKeyDown: (e) => {
32
+ return (h("div", { key: 'fb7d4d5444e9c9ac33c56aec88e3e10ed103c8be', class: "app-chips" }, c.label && (h("label", { key: '2d0041b3a137fecddef2273eac3792b5e8de27ab', class: "app-chips__label", htmlFor: c.name }, c.label)), h("div", { key: 'f73b1105e567b233626073e05b9da712689e7b12', class: "app-chips__container" }, this.value.map((chip) => (h("span", { class: "app-chips__chip", key: chip }, c.type === 'url' ? (h("a", { href: chip, target: "_blank", rel: "noopener noreferrer", class: "app-chips__link" }, chip)) : (chip), h("button", { class: "app-chips__remove", type: "button", onClick: () => this.emitRemoveChip(chip) }, "\u00D7")))), h("input", { key: '7676ff95531b34d10cbf9402e72a723e7e123e0e', class: "app-chips__input", type: c.type || 'text', ...allowedAttrs, onKeyDown: (e) => {
33
33
  if (e.key === 'Enter') {
34
34
  const input = e.target;
35
35
  const val = input.value.trim();
@@ -14,7 +14,7 @@ export class AppSelect {
14
14
  placeholder: c.placeholder,
15
15
  autocomplete: c.autocomplete,
16
16
  };
17
- return (h("div", { key: '4d9d250026b51799b073ae828c7d15563ec27c22', class: "app-select" }, c.label && (h("label", { key: '53e101427e78063bd6ed671a31919bbe373c5563', class: "app-select__label", htmlFor: c.name }, c.label)), h("div", { key: '28b18245c261a3766948ed7d151c356b8faeb4cf' }, h("select", { key: 'eed9bcb9fc3e00ae62c681a37e967815d350d5fb', ...allowedAttrs, class: "app-select__select", onInput: (e) => {
17
+ return (h("div", { key: '968f0fffe1eff976ac7e00f02db6fb84aa529de4', class: "app-select" }, c.label && (h("label", { key: 'dac02d2335754ff5d6ce6ba1df5777f8b019cfae', class: "app-select__label", htmlFor: c.name }, c.label)), h("div", { key: 'c92698199479bbdde1cfd559d69fc97e54d2862a' }, h("select", { key: '6e9b27b034d057921f52fbd12653c5ef1b6af1bc', ...allowedAttrs, class: "app-select__select", onInput: (e) => {
18
18
  const raw = e.target.value;
19
19
  const matched = c.optionList.find(opt => String(opt) === raw);
20
20
  this.valueChange.emit({
@@ -21,10 +21,10 @@ export class AppTextarea {
21
21
  name: c.name,
22
22
  autocomplete: c.autocomplete,
23
23
  };
24
- return (h("div", { key: '29bbcd954df6d18e968f6b2670224fda6f03a9f2', class: {
24
+ return (h("div", { key: 'f0749b1f2badf8434272da9fb37b354b42ba988b', class: {
25
25
  'textarea-wrapper': true,
26
26
  'textarea-wrapper--read-only': !!c.readOnly,
27
- } }, c.label && (h("label", { key: 'd95d3cbf01ce66c5bd074ede2eb1f87a0c98f3e4', class: "textarea-label", htmlFor: c.name }, c.label)), h("textarea", { key: 'faeb28c35a543213d9c5847e93779f957e367ff0', ...allowedAttrs, class: "textarea-element", value: this.value, onInput: this.handleChange }), c.helpText && h("p", { key: '558abfe03f7107352a6d40aea78a35b304bc11a6', class: "help-text" }, c.helpText)));
27
+ } }, c.label && (h("label", { key: '3448c838bcf9e962df005eae8fc313d216497c35', class: "textarea-label", htmlFor: c.name }, c.label)), h("textarea", { key: 'b4ee67a24fa71b0fa042625f943b0e226a6d14b7', ...allowedAttrs, class: "textarea-element", value: this.value, onInput: this.handleChange }), c.helpText && h("p", { key: 'fb6263c32e6cc5e36dbc77344c31487d63d51a1c', class: "help-text" }, c.helpText)));
28
28
  }
29
29
  static get is() { return "app-textarea"; }
30
30
  static get encapsulation() { return "shadow"; }
@@ -8,6 +8,10 @@ export function formatTestSuiteAsJson(testCases) {
8
8
  id: testCase.id,
9
9
  question: testCase.question,
10
10
  expectedOutcome: testCase.expectedOutcome,
11
+ chatHistory: {
12
+ enabled: testCase.chatHistory.enabled,
13
+ value: testCase.chatHistory.value,
14
+ },
11
15
  }));
12
16
  return JSON.stringify(exportData, null, 2);
13
17
  }
@@ -1 +1 @@
1
- {"version":3,"file":"test-suite-exporter.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-suite-exporter.ts"],"names":[],"mappings":"AAQA;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,SAAqB;IACzD,MAAM,UAAU,GAA0B,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACnE,EAAE,EAAE,QAAQ,CAAC,EAAE;QACf,QAAQ,EAAE,QAAQ,CAAC,QAAQ;QAC3B,eAAe,EAAE,QAAQ,CAAC,eAAe;KAC1C,CAAC,CAAC,CAAC;IAEJ,OAAO,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AAC7C,CAAC","sourcesContent":["import { ExpectedOutcomeField, TestCase } from '../../types/llm-test-runner';\n\nexport interface TestSuiteExportData {\n id: string;\n question: string;\n expectedOutcome: ExpectedOutcomeField[];\n}\n\n/**\n * Formats test cases as a JSON string suitable for saving as a test suite\n * @param testCases - Array of test cases to format\n * @returns JSON string representation of the test suite\n */\nexport function formatTestSuiteAsJson(testCases: TestCase[]): string {\n const exportData: TestSuiteExportData[] = testCases.map(testCase => ({\n id: testCase.id,\n question: testCase.question,\n expectedOutcome: testCase.expectedOutcome,\n }));\n\n return JSON.stringify(exportData, null, 2);\n}\n"]}
1
+ {"version":3,"file":"test-suite-exporter.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-suite-exporter.ts"],"names":[],"mappings":"AAYA;;;;GAIG;AACH,MAAM,UAAU,qBAAqB,CAAC,SAAqB;IACzD,MAAM,UAAU,GAA0B,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;QACnE,EAAE,EAAE,QAAQ,CAAC,EAAE;QACf,QAAQ,EAAE,QAAQ,CAAC,QAAQ;QAC3B,eAAe,EAAE,QAAQ,CAAC,eAAe;QACzC,WAAW,EAAE;YACX,OAAO,EAAE,QAAQ,CAAC,WAAW,CAAC,OAAO;YACrC,KAAK,EAAE,QAAQ,CAAC,WAAW,CAAC,KAAK;SAClC;KACF,CAAC,CAAC,CAAC;IAEJ,OAAO,IAAI,CAAC,SAAS,CAAC,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC,CAAC;AAC7C,CAAC","sourcesContent":["import { ExpectedOutcomeField, TestCase } from '../../types/llm-test-runner';\n\nexport interface TestSuiteExportData {\n id: string;\n question: string;\n expectedOutcome: ExpectedOutcomeField[];\n chatHistory: {\n enabled: boolean;\n value: string;\n };\n}\n\n/**\n * Formats test cases as a JSON string suitable for saving as a test suite\n * @param testCases - Array of test cases to format\n * @returns JSON string representation of the test suite\n */\nexport function formatTestSuiteAsJson(testCases: TestCase[]): string {\n const exportData: TestSuiteExportData[] = testCases.map(testCase => ({\n id: testCase.id,\n question: testCase.question,\n expectedOutcome: testCase.expectedOutcome,\n chatHistory: {\n enabled: testCase.chatHistory.enabled,\n value: testCase.chatHistory.value,\n },\n }));\n\n return JSON.stringify(exportData, null, 2);\n}\n"]}
@@ -1,14 +1,20 @@
1
1
  import { createTestCaseFromInput } from "../test-cases/test-case-factory";
2
2
  import { validateTestCaseInputArray } from "../../schemas/test-case";
3
+ import { validateExpectedOutcomeArrayWithExtractors } from "../../schemas/expected-outcome";
3
4
  /**
4
5
  * Validates and imports test cases from JSON content
5
6
  * @param jsonContent - The JSON string to parse and validate
6
7
  * @returns Validation result with test cases or error message
7
8
  */
8
- export function importTestSuite(jsonContent) {
9
+ export function importTestSuite(jsonContent, allowedExtractorIds = []) {
9
10
  try {
10
11
  const parsed = JSON.parse(jsonContent);
11
12
  validateTestCaseInputArray(parsed);
13
+ if (allowedExtractorIds.length > 0) {
14
+ parsed.forEach((testCase) => {
15
+ validateExpectedOutcomeArrayWithExtractors(testCase.expectedOutcome, allowedExtractorIds);
16
+ });
17
+ }
12
18
  const testCases = parsed.map((item, index) => {
13
19
  try {
14
20
  return createTestCaseFromInput(item);
@@ -1 +1 @@
1
- {"version":3,"file":"test-suite-importer.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-suite-importer.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,EAAE,0BAA0B,EAAE,MAAM,yBAAyB,CAAC;AAQrE;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAAC,WAAmB;IACjD,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QACvC,0BAA0B,CAAC,MAAM,CAAC,CAAC;QAEnC,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;YAC3C,IAAI,CAAC;gBACH,OAAO,uBAAuB,CAAC,IAAI,CAAC,CAAC;YACvC,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;gBACrE,MAAM,IAAI,KAAK,CAAC,8BAA8B,KAAK,KAAK,OAAO,EAAE,CAAC,CAAC;YACrE,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,OAAO;YACL,OAAO,EAAE,IAAI;YACb,SAAS;SACV,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EACH,GAAG,YAAY,KAAK;gBAClB,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,gEAAgE;SACvE,CAAC;IACJ,CAAC;AACH,CAAC","sourcesContent":["import type { TestCase } from '../../types/llm-test-runner';\nimport { createTestCaseFromInput } from '../test-cases/test-case-factory';\nimport { validateTestCaseInputArray } from '../../schemas/test-case';\n\nexport interface ImportValidationResult {\n success: boolean;\n testCases?: TestCase[];\n error?: string;\n}\n\n/**\n * Validates and imports test cases from JSON content\n * @param jsonContent - The JSON string to parse and validate\n * @returns Validation result with test cases or error message\n */\nexport function importTestSuite(jsonContent: string): ImportValidationResult {\n try {\n const parsed = JSON.parse(jsonContent);\n validateTestCaseInputArray(parsed);\n\n const testCases = parsed.map((item, index) => {\n try {\n return createTestCaseFromInput(item);\n } catch (err) {\n const message = err instanceof Error ? err.message : 'Unknown error';\n throw new Error(`Invalid test case at index ${index}: ${message}`);\n }\n });\n\n return {\n success: true,\n testCases,\n };\n } catch (err) {\n return {\n success: false,\n error:\n err instanceof Error\n ? err.message\n : 'Error processing file. Please ensure it is a valid JSON array.',\n };\n }\n}\n\n"]}
1
+ {"version":3,"file":"test-suite-importer.js","sourceRoot":"","sources":["../../../src/lib/import-export/test-suite-importer.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,uBAAuB,EAAE,MAAM,iCAAiC,CAAC;AAC1E,OAAO,EAAE,0BAA0B,EAAE,MAAM,yBAAyB,CAAC;AACrE,OAAO,EAAE,0CAA0C,EAAE,MAAM,gCAAgC,CAAC;AAQ5F;;;;GAIG;AACH,MAAM,UAAU,eAAe,CAC7B,WAAmB,EACnB,sBAAgC,EAAE;IAElC,IAAI,CAAC;QACH,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,WAAW,CAAC,CAAC;QACvC,0BAA0B,CAAC,MAAM,CAAC,CAAC;QACnC,IAAI,mBAAmB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACnC,MAAM,CAAC,OAAO,CAAC,CAAC,QAAQ,EAAE,EAAE;gBAC1B,0CAA0C,CACxC,QAAQ,CAAC,eAAe,EACxB,mBAAmB,CACpB,CAAC;YACJ,CAAC,CAAC,CAAC;QACL,CAAC;QAED,MAAM,SAAS,GAAG,MAAM,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,KAAK,EAAE,EAAE;YAC3C,IAAI,CAAC;gBACH,OAAO,uBAAuB,CAAC,IAAI,CAAC,CAAC;YACvC,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,MAAM,OAAO,GAAG,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,eAAe,CAAC;gBACrE,MAAM,IAAI,KAAK,CAAC,8BAA8B,KAAK,KAAK,OAAO,EAAE,CAAC,CAAC;YACrE,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,OAAO;YACL,OAAO,EAAE,IAAI;YACb,SAAS;SACV,CAAC;IACJ,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO;YACL,OAAO,EAAE,KAAK;YACd,KAAK,EACH,GAAG,YAAY,KAAK;gBAClB,CAAC,CAAC,GAAG,CAAC,OAAO;gBACb,CAAC,CAAC,gEAAgE;SACvE,CAAC;IACJ,CAAC;AACH,CAAC","sourcesContent":["import type { TestCase } from '../../types/llm-test-runner';\nimport { createTestCaseFromInput } from '../test-cases/test-case-factory';\nimport { validateTestCaseInputArray } from '../../schemas/test-case';\nimport { validateExpectedOutcomeArrayWithExtractors } from '../../schemas/expected-outcome';\n\nexport interface ImportValidationResult {\n success: boolean;\n testCases?: TestCase[];\n error?: string;\n}\n\n/**\n * Validates and imports test cases from JSON content\n * @param jsonContent - The JSON string to parse and validate\n * @returns Validation result with test cases or error message\n */\nexport function importTestSuite(\n jsonContent: string,\n allowedExtractorIds: string[] = [],\n): ImportValidationResult {\n try {\n const parsed = JSON.parse(jsonContent);\n validateTestCaseInputArray(parsed);\n if (allowedExtractorIds.length > 0) {\n parsed.forEach((testCase) => {\n validateExpectedOutcomeArrayWithExtractors(\n testCase.expectedOutcome,\n allowedExtractorIds,\n );\n });\n }\n\n const testCases = parsed.map((item, index) => {\n try {\n return createTestCaseFromInput(item);\n } catch (err) {\n const message = err instanceof Error ? err.message : 'Unknown error';\n throw new Error(`Invalid test case at index ${index}: ${message}`);\n }\n });\n\n return {\n success: true,\n testCases,\n };\n } catch (err) {\n return {\n success: false,\n error:\n err instanceof Error\n ? err.message\n : 'Error processing file. Please ensure it is a valid JSON array.',\n };\n }\n}\n\n"]}
@@ -11,6 +11,7 @@ export const DEFAULT_EXPECTED_OUTCOME_SCHEMA = [
11
11
  function normalizeExpectedOutcomeField(field) {
12
12
  return {
13
13
  ...field,
14
+ evaluationSource: field.evaluationSource || { type: 'text' },
14
15
  evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
15
16
  };
16
17
  }
@@ -23,6 +24,7 @@ export function createTestCase(expectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_
23
24
  id: uuidv4(),
24
25
  question: '',
25
26
  expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),
27
+ chatHistory: { enabled: false, value: '' },
26
28
  isRunning: false,
27
29
  };
28
30
  }
@@ -33,6 +35,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
33
35
  type: 'text',
34
36
  label: schemaField.label,
35
37
  placeholder: schemaField.placeholder,
38
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
36
39
  value: '',
37
40
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
38
41
  };
@@ -41,6 +44,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
41
44
  type: 'textarea',
42
45
  label: schemaField.label,
43
46
  placeholder: schemaField.placeholder,
47
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
44
48
  rows: schemaField.rows,
45
49
  value: '',
46
50
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
@@ -50,6 +54,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
50
54
  type: 'chips-input',
51
55
  label: schemaField.label,
52
56
  placeholder: schemaField.placeholder,
57
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
53
58
  value: [],
54
59
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
55
60
  };
@@ -58,6 +63,7 @@ function createExpectedOutcomeFieldFromSchema(schemaField) {
58
63
  type: 'select',
59
64
  label: schemaField.label,
60
65
  placeholder: schemaField.placeholder,
66
+ evaluationSource: schemaField.evaluationSource || { type: 'text' },
61
67
  value: schemaField.options[0],
62
68
  options: schemaField.options,
63
69
  evaluationParameters: normalizeEvaluationParametersForField(schemaField.type, schemaField.evaluationParameters),
@@ -82,6 +88,7 @@ export function createExpectedOutcomeFromSchema(expectedOutcomeSchema) {
82
88
  export function createTestCaseFromInput(data) {
83
89
  return {
84
90
  ...data,
91
+ chatHistory: data.chatHistory ?? { enabled: false, value: '' },
85
92
  expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),
86
93
  };
87
94
  }
@@ -1 +1 @@
1
- {"version":3,"file":"test-case-factory.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;AASpC,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAEhG,MAAM,CAAC,MAAM,+BAA+B,GAA0B;IACpE;QACE,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE,kBAAkB;QACzB,WAAW,EAAE,2BAA2B;QACxC,IAAI,EAAE,CAAC;KACR;CACF,CAAC;AAEF,SAAS,6BAA6B,CACpC,KAA2B;IAE3B,OAAO;QACL,GAAG,KAAK;QACR,oBAAoB,EAAE,qCAAqC,CACzD,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,wBAA+C,+BAA+B;IAE9E,OAAO;QACL,EAAE,EAAE,MAAM,EAAE;QACZ,QAAQ,EAAE,EAAE;QACZ,eAAe,EAAE,+BAA+B,CAAC,qBAAqB,CAAC;QACvE,SAAS,EAAE,KAAK;KACjB,CAAC;AACJ,CAAC;AAED,SAAS,oCAAoC,CAC3C,WAAuC;IAEvC,QAAQ,WAAW,CAAC,IAAI,EAAE,CAAC;QACzB,KAAK,MAAM;YACT,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,UAAU;YACb,OAAO;gBACL,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,IAAI,EAAE,WAAW,CAAC,IAAI;gBACtB,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,aAAa;YAChB,OAAO;gBACL,IAAI,EAAE,aAAa;gBACnB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,QAAQ;YACX,OAAO;gBACL,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,KAAK,EAAE,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7B,OAAO,EAAE,WAAW,CAAC,OAAO;gBAC5B,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CAC6B;aAChE,CAAC;QAEJ,OAAO,CAAC,CAAC,CAAC;YACR,MAAM,gBAAgB,GAAU,WAAW,CAAC;YAC5C,OAAO,gBAAgB,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,+BAA+B,CAC7C,qBAA4C;IAE5C,OAAO,qBAAqB,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;AACzE,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,uBAAuB,CAAC,IAAmB;IACzD,OAAO;QACL,GAAG,IAAI;QACP,eAAe,EAAE,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,6BAA6B,CAAC;KACzE,CAAC;AACJ,CAAC","sourcesContent":["import { v4 as uuidv4 } from 'uuid';\nimport {\n ExpectedOutcomeField,\n ExpectedOutcomeSchema,\n ExpectedOutcomeSchemaField,\n TestCase,\n TestCaseInput,\n} from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\nimport { normalizeEvaluationParametersForField } from '../evaluation/field-evaluation-approach';\n\nexport const DEFAULT_EXPECTED_OUTCOME_SCHEMA: ExpectedOutcomeSchema = [\n {\n type: 'textarea',\n label: 'Expected Outcome',\n placeholder: 'Enter expected outcome...',\n rows: 2,\n },\n];\n\nfunction normalizeExpectedOutcomeField(\n field: ExpectedOutcomeField,\n): ExpectedOutcomeField {\n return {\n ...field,\n evaluationParameters: normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n ),\n };\n}\n\n/**\n * Creates a new test case with default values\n * @returns A new TestCase object with a unique ID\n */\nexport function createTestCase(\n expectedOutcomeSchema: ExpectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA,\n): TestCase {\n return {\n id: uuidv4(),\n question: '',\n expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),\n isRunning: false,\n };\n}\n\nfunction createExpectedOutcomeFieldFromSchema(\n schemaField: ExpectedOutcomeSchemaField,\n): ExpectedOutcomeField {\n switch (schemaField.type) {\n case 'text':\n return {\n type: 'text',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'textarea':\n return {\n type: 'textarea',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n rows: schemaField.rows,\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'chips-input':\n return {\n type: 'chips-input',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: [],\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'select':\n return {\n type: 'select',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n value: schemaField.options[0],\n options: schemaField.options,\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ) as { approach: EvaluationApproach.EXACT; threshold?: number },\n };\n\n default: {\n const _exhaustiveCheck: never = schemaField;\n return _exhaustiveCheck;\n }\n }\n}\n\nexport function createExpectedOutcomeFromSchema(\n expectedOutcomeSchema: ExpectedOutcomeSchema,\n): ExpectedOutcomeField[] {\n return expectedOutcomeSchema.map(createExpectedOutcomeFieldFromSchema);\n}\n\n/**\n * Creates a runtime test case from validated input data.\n * The input is expected to already satisfy `TestCaseInput`,\n * and this function only performs normalization/defaulting.\n *\n * @param data - Validated test case input\n * @returns A normalized TestCase object with runtime defaults applied\n */\nexport function createTestCaseFromInput(data: TestCaseInput): TestCase {\n return {\n ...data,\n expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),\n };\n}\n"]}
1
+ {"version":3,"file":"test-case-factory.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-factory.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,EAAE,IAAI,MAAM,EAAE,MAAM,MAAM,CAAC;AASpC,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAEhG,MAAM,CAAC,MAAM,+BAA+B,GAA0B;IACpE;QACE,IAAI,EAAE,UAAU;QAChB,KAAK,EAAE,kBAAkB;QACzB,WAAW,EAAE,2BAA2B;QACxC,IAAI,EAAE,CAAC;KACR;CACF,CAAC;AAEF,SAAS,6BAA6B,CACpC,KAA2B;IAE3B,OAAO;QACL,GAAG,KAAK;QACR,gBAAgB,EAAE,KAAK,CAAC,gBAAgB,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;QAC5D,oBAAoB,EAAE,qCAAqC,CACzD,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B;KACF,CAAC;AACJ,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,cAAc,CAC5B,wBAA+C,+BAA+B;IAE9E,OAAO;QACL,EAAE,EAAE,MAAM,EAAE;QACZ,QAAQ,EAAE,EAAE;QACZ,eAAe,EAAE,+BAA+B,CAAC,qBAAqB,CAAC;QACvE,WAAW,EAAE,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;QAC1C,SAAS,EAAE,KAAK;KACjB,CAAC;AACJ,CAAC;AAED,SAAS,oCAAoC,CAC3C,WAAuC;IAEvC,QAAQ,WAAW,CAAC,IAAI,EAAE,CAAC;QACzB,KAAK,MAAM;YACT,OAAO;gBACL,IAAI,EAAE,MAAM;gBACZ,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,gBAAgB,EAAE,WAAW,CAAC,gBAAgB,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;gBAClE,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,UAAU;YACb,OAAO;gBACL,IAAI,EAAE,UAAU;gBAChB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,gBAAgB,EAAE,WAAW,CAAC,gBAAgB,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;gBAClE,IAAI,EAAE,WAAW,CAAC,IAAI;gBACtB,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,aAAa;YAChB,OAAO;gBACL,IAAI,EAAE,aAAa;gBACnB,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,gBAAgB,EAAE,WAAW,CAAC,gBAAgB,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;gBAClE,KAAK,EAAE,EAAE;gBACT,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CACjC;aACF,CAAC;QAEJ,KAAK,QAAQ;YACX,OAAO;gBACL,IAAI,EAAE,QAAQ;gBACd,KAAK,EAAE,WAAW,CAAC,KAAK;gBACxB,WAAW,EAAE,WAAW,CAAC,WAAW;gBACpC,gBAAgB,EAAE,WAAW,CAAC,gBAAgB,IAAI,EAAE,IAAI,EAAE,MAAM,EAAE;gBAClE,KAAK,EAAE,WAAW,CAAC,OAAO,CAAC,CAAC,CAAC;gBAC7B,OAAO,EAAE,WAAW,CAAC,OAAO;gBAC5B,oBAAoB,EAAE,qCAAqC,CACzD,WAAW,CAAC,IAAI,EAChB,WAAW,CAAC,oBAAoB,CAC6B;aAChE,CAAC;QAEJ,OAAO,CAAC,CAAC,CAAC;YACR,MAAM,gBAAgB,GAAU,WAAW,CAAC;YAC5C,OAAO,gBAAgB,CAAC;QAC1B,CAAC;IACH,CAAC;AACH,CAAC;AAED,MAAM,UAAU,+BAA+B,CAC7C,qBAA4C;IAE5C,OAAO,qBAAqB,CAAC,GAAG,CAAC,oCAAoC,CAAC,CAAC;AACzE,CAAC;AAED;;;;;;;GAOG;AACH,MAAM,UAAU,uBAAuB,CAAC,IAAmB;IACzD,OAAO;QACL,GAAG,IAAI;QACP,WAAW,EAAE,IAAI,CAAC,WAAW,IAAI,EAAE,OAAO,EAAE,KAAK,EAAE,KAAK,EAAE,EAAE,EAAE;QAC9D,eAAe,EAAE,IAAI,CAAC,eAAe,CAAC,GAAG,CAAC,6BAA6B,CAAC;KACzE,CAAC;AACJ,CAAC","sourcesContent":["import { v4 as uuidv4 } from 'uuid';\nimport {\n ExpectedOutcomeField,\n ExpectedOutcomeSchema,\n ExpectedOutcomeSchemaField,\n TestCase,\n TestCaseInput,\n} from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\nimport { normalizeEvaluationParametersForField } from '../evaluation/field-evaluation-approach';\n\nexport const DEFAULT_EXPECTED_OUTCOME_SCHEMA: ExpectedOutcomeSchema = [\n {\n type: 'textarea',\n label: 'Expected Outcome',\n placeholder: 'Enter expected outcome...',\n rows: 2,\n },\n];\n\nfunction normalizeExpectedOutcomeField(\n field: ExpectedOutcomeField,\n): ExpectedOutcomeField {\n return {\n ...field,\n evaluationSource: field.evaluationSource || { type: 'text' },\n evaluationParameters: normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n ),\n };\n}\n\n/**\n * Creates a new test case with default values\n * @returns A new TestCase object with a unique ID\n */\nexport function createTestCase(\n expectedOutcomeSchema: ExpectedOutcomeSchema = DEFAULT_EXPECTED_OUTCOME_SCHEMA,\n): TestCase {\n return {\n id: uuidv4(),\n question: '',\n expectedOutcome: createExpectedOutcomeFromSchema(expectedOutcomeSchema),\n chatHistory: { enabled: false, value: '' },\n isRunning: false,\n };\n}\n\nfunction createExpectedOutcomeFieldFromSchema(\n schemaField: ExpectedOutcomeSchemaField,\n): ExpectedOutcomeField {\n switch (schemaField.type) {\n case 'text':\n return {\n type: 'text',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n evaluationSource: schemaField.evaluationSource || { type: 'text' },\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'textarea':\n return {\n type: 'textarea',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n evaluationSource: schemaField.evaluationSource || { type: 'text' },\n rows: schemaField.rows,\n value: '',\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'chips-input':\n return {\n type: 'chips-input',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n evaluationSource: schemaField.evaluationSource || { type: 'text' },\n value: [],\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ),\n };\n\n case 'select':\n return {\n type: 'select',\n label: schemaField.label,\n placeholder: schemaField.placeholder,\n evaluationSource: schemaField.evaluationSource || { type: 'text' },\n value: schemaField.options[0],\n options: schemaField.options,\n evaluationParameters: normalizeEvaluationParametersForField(\n schemaField.type,\n schemaField.evaluationParameters,\n ) as { approach: EvaluationApproach.EXACT; threshold?: number },\n };\n\n default: {\n const _exhaustiveCheck: never = schemaField;\n return _exhaustiveCheck;\n }\n }\n}\n\nexport function createExpectedOutcomeFromSchema(\n expectedOutcomeSchema: ExpectedOutcomeSchema,\n): ExpectedOutcomeField[] {\n return expectedOutcomeSchema.map(createExpectedOutcomeFieldFromSchema);\n}\n\n/**\n * Creates a runtime test case from validated input data.\n * The input is expected to already satisfy `TestCaseInput`,\n * and this function only performs normalization/defaulting.\n *\n * @param data - Validated test case input\n * @returns A normalized TestCase object with runtime defaults applied\n */\nexport function createTestCaseFromInput(data: TestCaseInput): TestCase {\n return {\n ...data,\n chatHistory: data.chatHistory ?? { enabled: false, value: '' },\n expectedOutcome: data.expectedOutcome.map(normalizeExpectedOutcomeField),\n };\n}\n"]}
@@ -1,4 +1,13 @@
1
1
  import { normalizeEvaluationParametersForField } from "../evaluation/field-evaluation-approach";
2
+ function isChipsInputField(field) {
3
+ return field.type === 'chips-input';
4
+ }
5
+ function isTextareaField(field) {
6
+ return field.type === 'textarea';
7
+ }
8
+ function isDynamicTextareaField(field) {
9
+ return isTextareaField(field) && field.outcomeMode === 'dynamic';
10
+ }
2
11
  export function applyExpectedOutcomeChange(testCase, change) {
3
12
  const { index } = change;
4
13
  const expectedOutcome = [...(testCase.expectedOutcome || [])];
@@ -6,73 +15,99 @@ export function applyExpectedOutcomeChange(testCase, change) {
6
15
  if (!target) {
7
16
  return testCase;
8
17
  }
18
+ const commit = (updatedField) => {
19
+ expectedOutcome[index] = updatedField;
20
+ return { ...testCase, expectedOutcome };
21
+ };
9
22
  switch (change.operation) {
10
23
  case 'set-value': {
11
- if (target.type === 'chips-input') {
24
+ if (isChipsInputField(target)) {
12
25
  return testCase;
13
26
  }
14
- if (target.type === 'textarea' && target.outcomeMode === 'dynamic') {
27
+ if (isDynamicTextareaField(target)) {
15
28
  return testCase;
16
29
  }
17
- expectedOutcome[index] = {
30
+ return commit({
18
31
  ...target,
19
32
  value: change.value,
20
- };
21
- return { ...testCase, expectedOutcome };
33
+ });
22
34
  }
23
35
  case 'add-chip': {
24
- if (target.type !== 'chips-input') {
36
+ if (!isChipsInputField(target)) {
25
37
  return testCase;
26
38
  }
27
- expectedOutcome[index] = {
39
+ return commit({
28
40
  ...target,
29
41
  value: [...target.value, change.value],
30
- };
31
- return { ...testCase, expectedOutcome };
42
+ });
32
43
  }
33
44
  case 'remove-chip': {
34
- if (target.type !== 'chips-input') {
45
+ if (!isChipsInputField(target)) {
35
46
  return testCase;
36
47
  }
37
- expectedOutcome[index] = {
48
+ return commit({
38
49
  ...target,
39
50
  value: target.value.filter(chip => chip !== change.value),
40
- };
41
- return { ...testCase, expectedOutcome };
51
+ });
42
52
  }
43
53
  case 'set-evaluation-approach':
44
54
  return updateExpectedOutcomeFieldApproach(testCase, index, change.value);
45
55
  case 'set-outcome-mode': {
46
- if (target.type !== 'textarea') {
56
+ if (!isTextareaField(target)) {
47
57
  return testCase;
48
58
  }
49
59
  const mode = change.value;
50
60
  if (mode === 'static') {
51
61
  const { resolutionQuery: _, ...rest } = target;
52
- expectedOutcome[index] = {
62
+ return commit({
53
63
  ...rest,
54
64
  outcomeMode: 'static',
55
65
  value: '',
56
- };
66
+ });
57
67
  }
58
68
  else {
59
- expectedOutcome[index] = {
69
+ return commit({
60
70
  ...target,
61
71
  outcomeMode: 'dynamic',
62
72
  value: '',
63
- };
73
+ });
64
74
  }
65
- return { ...testCase, expectedOutcome };
66
75
  }
67
76
  case 'set-resolution-query': {
68
- if (target.type !== 'textarea' || target.outcomeMode !== 'dynamic') {
77
+ if (!isDynamicTextareaField(target)) {
69
78
  return testCase;
70
79
  }
71
- expectedOutcome[index] = {
80
+ return commit({
72
81
  ...target,
73
82
  resolutionQuery: change.value,
74
- };
75
- return { ...testCase, expectedOutcome };
83
+ });
84
+ }
85
+ case 'set-evaluation-source-type': {
86
+ if (change.value === 'text') {
87
+ return commit({
88
+ ...target,
89
+ evaluationSource: { type: 'text' },
90
+ });
91
+ }
92
+ const extractorId = target.evaluationSource?.type === 'custom'
93
+ ? target.evaluationSource.extractorId
94
+ : (change.fallbackExtractorId ?? '');
95
+ return commit({
96
+ ...target,
97
+ evaluationSource: {
98
+ type: 'custom',
99
+ extractorId,
100
+ },
101
+ });
102
+ }
103
+ case 'set-evaluation-source-extractor': {
104
+ return commit({
105
+ ...target,
106
+ evaluationSource: {
107
+ type: 'custom',
108
+ extractorId: change.value,
109
+ },
110
+ });
76
111
  }
77
112
  }
78
113
  }
@@ -1 +1 @@
1
- {"version":3,"file":"test-case-mutations.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-mutations.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAkChG,MAAM,UAAU,0BAA0B,CACxC,QAAkB,EAClB,MAA6B;IAE7B,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IACzB,MAAM,eAAe,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,QAAQ,MAAM,CAAC,SAAS,EAAE,CAAC;QACzB,KAAK,WAAW,CAAC,CAAC,CAAC;YACjB,IAAI,MAAM,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;gBAClC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,IAAI,MAAM,CAAC,IAAI,KAAK,UAAU,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;gBACnE,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,eAAe,CAAC,KAAK,CAAC,GAAG;gBACvB,GAAG,MAAM;gBACT,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC;YACF,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;QAC1C,CAAC;QACD,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,IAAI,MAAM,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;gBAClC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,eAAe,CAAC,KAAK,CAAC,GAAG;gBACvB,GAAG,MAAM;gBACT,KAAK,EAAE,CAAC,GAAG,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC;aACvC,CAAC;YACF,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;QAC1C,CAAC;QACD,KAAK,aAAa,CAAC,CAAC,CAAC;YACnB,IAAI,MAAM,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;gBAClC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,eAAe,CAAC,KAAK,CAAC,GAAG;gBACvB,GAAG,MAAM;gBACT,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,KAAK,MAAM,CAAC,KAAK,CAAC;aAC1D,CAAC;YACF,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;QAC1C,CAAC;QACD,KAAK,yBAAyB;YAC5B,OAAO,kCAAkC,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QAC3E,KAAK,kBAAkB,CAAC,CAAC,CAAC;YACxB,IAAI,MAAM,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;gBAC/B,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC;YAC1B,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACtB,MAAM,EAAE,eAAe,EAAE,CAAC,EAAE,GAAG,IAAI,EAAE,GAAG,MAAM,CAAC;gBAC/C,eAAe,CAAC,KAAK,CAAC,GAAG;oBACvB,GAAG,IAAI;oBACP,WAAW,EAAE,QAAQ;oBACrB,KAAK,EAAE,EAAE;iBACV,CAAC;YACJ,CAAC;iBAAM,CAAC;gBACN,eAAe,CAAC,KAAK,CAAC,GAAG;oBACvB,GAAG,MAAM;oBACT,WAAW,EAAE,SAAS;oBACtB,KAAK,EAAE,EAAE;iBACV,CAAC;YACJ,CAAC;YACD,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;QAC1C,CAAC;QACD,KAAK,sBAAsB,CAAC,CAAC,CAAC;YAC5B,IAAI,MAAM,CAAC,IAAI,KAAK,UAAU,IAAI,MAAM,CAAC,WAAW,KAAK,SAAS,EAAE,CAAC;gBACnE,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,eAAe,CAAC,KAAK,CAAC,GAAG;gBACvB,GAAG,MAAM;gBACT,eAAe,EAAE,MAAM,CAAC,KAAK;aAC9B,CAAC;YACF,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;QAC1C,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kCAAkC,CAChD,QAAkB,EAClB,UAAkB,EAClB,QAA4B;IAE5B,MAAM,eAAe,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;IAE3C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,2BAA2B,GAAG,MAAM,CAAC,oBAAoB,CAAC;IAChE,eAAe,CAAC,UAAU,CAAC,GAAG;QAC5B,GAAG,MAAM;QACT,oBAAoB,EAAE,qCAAqC,CAAC,MAAM,CAAC,IAAI,EAAE;YACvE,GAAG,2BAA2B;YAC9B,QAAQ;SACT,CAAC;KACH,CAAC;IAEF,OAAO;QACL,GAAG,QAAQ;QACX,eAAe;KAChB,CAAC;AACJ,CAAC","sourcesContent":["import {\n TestCase,\n type ExpectedOutcomeMode,\n} from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\nimport { normalizeEvaluationParametersForField } from '../evaluation/field-evaluation-approach';\n\nexport type ExpectedOutcomeChange =\n | {\n index: number;\n operation: 'set-value';\n value: string;\n }\n | {\n index: number;\n operation: 'add-chip';\n value: string;\n }\n | {\n index: number;\n operation: 'remove-chip';\n value: string;\n }\n | {\n index: number;\n operation: 'set-evaluation-approach';\n value: EvaluationApproach;\n }\n | {\n index: number;\n operation: 'set-outcome-mode';\n value: ExpectedOutcomeMode;\n }\n | {\n index: number;\n operation: 'set-resolution-query';\n value: string;\n };\n\nexport function applyExpectedOutcomeChange(\n testCase: TestCase,\n change: ExpectedOutcomeChange,\n): TestCase {\n const { index } = change;\n const expectedOutcome = [...(testCase.expectedOutcome || [])];\n const target = expectedOutcome[index];\n\n if (!target) {\n return testCase;\n }\n\n switch (change.operation) {\n case 'set-value': {\n if (target.type === 'chips-input') {\n return testCase;\n }\n if (target.type === 'textarea' && target.outcomeMode === 'dynamic') {\n return testCase;\n }\n expectedOutcome[index] = {\n ...target,\n value: change.value,\n };\n return { ...testCase, expectedOutcome };\n }\n case 'add-chip': {\n if (target.type !== 'chips-input') {\n return testCase;\n }\n expectedOutcome[index] = {\n ...target,\n value: [...target.value, change.value],\n };\n return { ...testCase, expectedOutcome };\n }\n case 'remove-chip': {\n if (target.type !== 'chips-input') {\n return testCase;\n }\n expectedOutcome[index] = {\n ...target,\n value: target.value.filter(chip => chip !== change.value),\n };\n return { ...testCase, expectedOutcome };\n }\n case 'set-evaluation-approach':\n return updateExpectedOutcomeFieldApproach(testCase, index, change.value);\n case 'set-outcome-mode': {\n if (target.type !== 'textarea') {\n return testCase;\n }\n const mode = change.value;\n if (mode === 'static') {\n const { resolutionQuery: _, ...rest } = target;\n expectedOutcome[index] = {\n ...rest,\n outcomeMode: 'static',\n value: '',\n };\n } else {\n expectedOutcome[index] = {\n ...target,\n outcomeMode: 'dynamic',\n value: '',\n };\n }\n return { ...testCase, expectedOutcome };\n }\n case 'set-resolution-query': {\n if (target.type !== 'textarea' || target.outcomeMode !== 'dynamic') {\n return testCase;\n }\n expectedOutcome[index] = {\n ...target,\n resolutionQuery: change.value,\n };\n return { ...testCase, expectedOutcome };\n }\n }\n}\n\n/**\n * Updates the evaluation approach for a specific expected outcome field.\n * Select fields always use exact matching.\n */\nexport function updateExpectedOutcomeFieldApproach(\n testCase: TestCase,\n fieldIndex: number,\n approach: EvaluationApproach,\n): TestCase {\n const expectedOutcome = [...(testCase.expectedOutcome || [])];\n const target = expectedOutcome[fieldIndex];\n\n if (!target) {\n return testCase;\n }\n\n const currentEvaluationParameters = target.evaluationParameters;\n expectedOutcome[fieldIndex] = {\n ...target,\n evaluationParameters: normalizeEvaluationParametersForField(target.type, {\n ...currentEvaluationParameters,\n approach,\n }),\n };\n\n return {\n ...testCase,\n expectedOutcome,\n };\n}\n"]}
1
+ {"version":3,"file":"test-case-mutations.js","sourceRoot":"","sources":["../../../src/lib/test-cases/test-case-mutations.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,qCAAqC,EAAE,MAAM,yCAAyC,CAAC;AAEhG,SAAS,iBAAiB,CACxB,KAA2B;IAE3B,OAAO,KAAK,CAAC,IAAI,KAAK,aAAa,CAAC;AACtC,CAAC;AAED,SAAS,eAAe,CACtB,KAA2B;IAE3B,OAAO,KAAK,CAAC,IAAI,KAAK,UAAU,CAAC;AACnC,CAAC;AAED,SAAS,sBAAsB,CAC7B,KAA2B;IAE3B,OAAO,eAAe,CAAC,KAAK,CAAC,IAAI,KAAK,CAAC,WAAW,KAAK,SAAS,CAAC;AACnE,CAAC;AA8CD,MAAM,UAAU,0BAA0B,CACxC,QAAkB,EAClB,MAA6B;IAE7B,MAAM,EAAE,KAAK,EAAE,GAAG,MAAM,CAAC;IACzB,MAAM,eAAe,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,eAAe,CAAC,KAAK,CAAC,CAAC;IAEtC,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,MAAM,GAAG,CAAC,YAAkC,EAAY,EAAE;QAC9D,eAAe,CAAC,KAAK,CAAC,GAAG,YAAY,CAAC;QACtC,OAAO,EAAE,GAAG,QAAQ,EAAE,eAAe,EAAE,CAAC;IAC1C,CAAC,CAAC;IAEF,QAAQ,MAAM,CAAC,SAAS,EAAE,CAAC;QACzB,KAAK,WAAW,CAAC,CAAC,CAAC;YACjB,IAAI,iBAAiB,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC9B,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,IAAI,sBAAsB,CAAC,MAAM,CAAC,EAAE,CAAC;gBACnC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,OAAO,MAAM,CAAC;gBACZ,GAAG,MAAM;gBACT,KAAK,EAAE,MAAM,CAAC,KAAK;aACpB,CAAC,CAAC;QACL,CAAC;QACD,KAAK,UAAU,CAAC,CAAC,CAAC;YAChB,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC/B,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,OAAO,MAAM,CAAC;gBACZ,GAAG,MAAM;gBACT,KAAK,EAAE,CAAC,GAAG,MAAM,CAAC,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC;aACvC,CAAC,CAAC;QACL,CAAC;QACD,KAAK,aAAa,CAAC,CAAC,CAAC;YACnB,IAAI,CAAC,iBAAiB,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC/B,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,OAAO,MAAM,CAAC;gBACZ,GAAG,MAAM;gBACT,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,MAAM,CAAC,IAAI,CAAC,EAAE,CAAC,IAAI,KAAK,MAAM,CAAC,KAAK,CAAC;aAC1D,CAAC,CAAC;QACL,CAAC;QACD,KAAK,yBAAyB;YAC5B,OAAO,kCAAkC,CAAC,QAAQ,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,CAAC,CAAC;QAC3E,KAAK,kBAAkB,CAAC,CAAC,CAAC;YACxB,IAAI,CAAC,eAAe,CAAC,MAAM,CAAC,EAAE,CAAC;gBAC7B,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,MAAM,IAAI,GAAG,MAAM,CAAC,KAAK,CAAC;YAC1B,IAAI,IAAI,KAAK,QAAQ,EAAE,CAAC;gBACtB,MAAM,EAAE,eAAe,EAAE,CAAC,EAAE,GAAG,IAAI,EAAE,GAAG,MAAM,CAAC;gBAC/C,OAAO,MAAM,CAAC;oBACZ,GAAG,IAAI;oBACP,WAAW,EAAE,QAAQ;oBACrB,KAAK,EAAE,EAAE;iBACV,CAAC,CAAC;YACL,CAAC;iBAAM,CAAC;gBACN,OAAO,MAAM,CAAC;oBACZ,GAAG,MAAM;oBACT,WAAW,EAAE,SAAS;oBACtB,KAAK,EAAE,EAAE;iBACV,CAAC,CAAC;YACL,CAAC;QACH,CAAC;QACD,KAAK,sBAAsB,CAAC,CAAC,CAAC;YAC5B,IAAI,CAAC,sBAAsB,CAAC,MAAM,CAAC,EAAE,CAAC;gBACpC,OAAO,QAAQ,CAAC;YAClB,CAAC;YACD,OAAO,MAAM,CAAC;gBACZ,GAAG,MAAM;gBACT,eAAe,EAAE,MAAM,CAAC,KAAK;aAC9B,CAAC,CAAC;QACL,CAAC;QACD,KAAK,4BAA4B,CAAC,CAAC,CAAC;YAClC,IAAI,MAAM,CAAC,KAAK,KAAK,MAAM,EAAE,CAAC;gBAC5B,OAAO,MAAM,CAAC;oBACZ,GAAG,MAAM;oBACT,gBAAgB,EAAE,EAAE,IAAI,EAAE,MAAM,EAAE;iBACnC,CAAC,CAAC;YACL,CAAC;YAED,MAAM,WAAW,GACf,MAAM,CAAC,gBAAgB,EAAE,IAAI,KAAK,QAAQ;gBACxC,CAAC,CAAC,MAAM,CAAC,gBAAgB,CAAC,WAAW;gBACrC,CAAC,CAAC,CAAC,MAAM,CAAC,mBAAmB,IAAI,EAAE,CAAC,CAAC;YACzC,OAAO,MAAM,CAAC;gBACZ,GAAG,MAAM;gBACT,gBAAgB,EAAE;oBAChB,IAAI,EAAE,QAAQ;oBACd,WAAW;iBACZ;aACF,CAAC,CAAC;QACL,CAAC;QACD,KAAK,iCAAiC,CAAC,CAAC,CAAC;YACvC,OAAO,MAAM,CAAC;gBACZ,GAAG,MAAM;gBACT,gBAAgB,EAAE;oBAChB,IAAI,EAAE,QAAQ;oBACd,WAAW,EAAE,MAAM,CAAC,KAAK;iBAC1B;aACF,CAAC,CAAC;QACL,CAAC;IACH,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,UAAU,kCAAkC,CAChD,QAAkB,EAClB,UAAkB,EAClB,QAA4B;IAE5B,MAAM,eAAe,GAAG,CAAC,GAAG,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,CAAC;IAC9D,MAAM,MAAM,GAAG,eAAe,CAAC,UAAU,CAAC,CAAC;IAE3C,IAAI,CAAC,MAAM,EAAE,CAAC;QACZ,OAAO,QAAQ,CAAC;IAClB,CAAC;IAED,MAAM,2BAA2B,GAAG,MAAM,CAAC,oBAAoB,CAAC;IAChE,eAAe,CAAC,UAAU,CAAC,GAAG;QAC5B,GAAG,MAAM;QACT,oBAAoB,EAAE,qCAAqC,CAAC,MAAM,CAAC,IAAI,EAAE;YACvE,GAAG,2BAA2B;YAC9B,QAAQ;SACT,CAAC;KACH,CAAC;IAEF,OAAO;QACL,GAAG,QAAQ;QACX,eAAe;KAChB,CAAC;AACJ,CAAC","sourcesContent":["import {\n TestCase,\n type ExpectedOutcomeField,\n type EvaluationSource,\n type ExpectedOutcomeMode,\n} from '../../types/llm-test-runner';\nimport { EvaluationApproach } from '../evaluation/constants';\nimport { normalizeEvaluationParametersForField } from '../evaluation/field-evaluation-approach';\n\nfunction isChipsInputField(\n field: ExpectedOutcomeField,\n): field is Extract<ExpectedOutcomeField, { type: 'chips-input' }> {\n return field.type === 'chips-input';\n}\n\nfunction isTextareaField(\n field: ExpectedOutcomeField,\n): field is Extract<ExpectedOutcomeField, { type: 'textarea' }> {\n return field.type === 'textarea';\n}\n\nfunction isDynamicTextareaField(\n field: ExpectedOutcomeField,\n): field is Extract<ExpectedOutcomeField, { type: 'textarea' }> {\n return isTextareaField(field) && field.outcomeMode === 'dynamic';\n}\n\n\nexport type ExpectedOutcomeChange =\n | {\n index: number;\n operation: 'set-value';\n value: string;\n }\n | {\n index: number;\n operation: 'add-chip';\n value: string;\n }\n | {\n index: number;\n operation: 'remove-chip';\n value: string;\n }\n | {\n index: number;\n operation: 'set-evaluation-approach';\n value: EvaluationApproach;\n }\n | {\n index: number;\n operation: 'set-outcome-mode';\n value: ExpectedOutcomeMode;\n }\n | {\n index: number;\n operation: 'set-resolution-query';\n value: string;\n }\n | {\n index: number;\n operation: 'set-evaluation-source-type';\n value: EvaluationSource['type'];\n fallbackExtractorId?: string;\n }\n | {\n index: number;\n operation: 'set-evaluation-source-extractor';\n value: string;\n };\n\nexport function applyExpectedOutcomeChange(\n testCase: TestCase,\n change: ExpectedOutcomeChange,\n): TestCase {\n const { index } = change;\n const expectedOutcome = [...(testCase.expectedOutcome || [])];\n const target = expectedOutcome[index];\n\n if (!target) {\n return testCase;\n }\n\n const commit = (updatedField: ExpectedOutcomeField): TestCase => {\n expectedOutcome[index] = updatedField;\n return { ...testCase, expectedOutcome };\n };\n\n switch (change.operation) {\n case 'set-value': {\n if (isChipsInputField(target)) {\n return testCase;\n }\n if (isDynamicTextareaField(target)) {\n return testCase;\n }\n return commit({\n ...target,\n value: change.value,\n });\n }\n case 'add-chip': {\n if (!isChipsInputField(target)) {\n return testCase;\n }\n return commit({\n ...target,\n value: [...target.value, change.value],\n });\n }\n case 'remove-chip': {\n if (!isChipsInputField(target)) {\n return testCase;\n }\n return commit({\n ...target,\n value: target.value.filter(chip => chip !== change.value),\n });\n }\n case 'set-evaluation-approach':\n return updateExpectedOutcomeFieldApproach(testCase, index, change.value);\n case 'set-outcome-mode': {\n if (!isTextareaField(target)) {\n return testCase;\n }\n const mode = change.value;\n if (mode === 'static') {\n const { resolutionQuery: _, ...rest } = target;\n return commit({\n ...rest,\n outcomeMode: 'static',\n value: '',\n });\n } else {\n return commit({\n ...target,\n outcomeMode: 'dynamic',\n value: '',\n });\n }\n }\n case 'set-resolution-query': {\n if (!isDynamicTextareaField(target)) {\n return testCase;\n }\n return commit({\n ...target,\n resolutionQuery: change.value,\n });\n }\n case 'set-evaluation-source-type': {\n if (change.value === 'text') {\n return commit({\n ...target,\n evaluationSource: { type: 'text' },\n });\n }\n\n const extractorId =\n target.evaluationSource?.type === 'custom'\n ? target.evaluationSource.extractorId\n : (change.fallbackExtractorId ?? '');\n return commit({\n ...target,\n evaluationSource: {\n type: 'custom',\n extractorId,\n },\n });\n }\n case 'set-evaluation-source-extractor': {\n return commit({\n ...target,\n evaluationSource: {\n type: 'custom',\n extractorId: change.value,\n },\n });\n }\n }\n}\n\n/**\n * Updates the evaluation approach for a specific expected outcome field.\n * Select fields always use exact matching.\n */\nexport function updateExpectedOutcomeFieldApproach(\n testCase: TestCase,\n fieldIndex: number,\n approach: EvaluationApproach,\n): TestCase {\n const expectedOutcome = [...(testCase.expectedOutcome || [])];\n const target = expectedOutcome[fieldIndex];\n\n if (!target) {\n return testCase;\n }\n\n const currentEvaluationParameters = target.evaluationParameters;\n expectedOutcome[fieldIndex] = {\n ...target,\n evaluationParameters: normalizeEvaluationParametersForField(target.type, {\n ...currentEvaluationParameters,\n approach,\n }),\n };\n\n return {\n ...testCase,\n expectedOutcome,\n };\n}\n"]}
@@ -6,6 +6,19 @@ const optionalPositiveInt = z.number().int().positive().optional();
6
6
  const optionalString = z.string().optional();
7
7
  const selectOptionsSchema = z.array(nonEmptyString).min(1);
8
8
  const optionalNumber = z.number().optional();
9
+ const textEvaluationSourceSchema = z.object({
10
+ type: z.literal('text'),
11
+ });
12
+ const customEvaluationSourceSchema = z.object({
13
+ type: z.literal('custom'),
14
+ extractorId: nonEmptyString,
15
+ });
16
+ export const evaluationSourceExtractorSchema = z.custom(value => typeof value === 'function', 'Extractor must be a function.');
17
+ export const evaluationSourceExtractorsSchema = z.record(z.string().min(1), evaluationSourceExtractorSchema);
18
+ export const evaluationSourceSchema = z.discriminatedUnion('type', [
19
+ textEvaluationSourceSchema,
20
+ customEvaluationSourceSchema,
21
+ ]);
9
22
  export const expectedOutcomeModeSchema = z.enum(['static', 'dynamic']);
10
23
  const evaluationParametersSchema = z.object({
11
24
  approach: z.enum(EvaluationApproach),
@@ -23,6 +36,7 @@ const selectEvaluationParametersSchema = evaluationParametersSchema.superRefine(
23
36
  const defaultExpectedOutcomeBaseSchema = z.object({
24
37
  label: nonEmptyString,
25
38
  placeholder: optionalString,
39
+ evaluationSource: evaluationSourceSchema.optional(),
26
40
  });
27
41
  const createDefaultExpectedOutcomeFieldSchemas = (baseSchema) => ({
28
42
  text: baseSchema.extend({
@@ -122,4 +136,29 @@ export function validateExpectedOutcomeArray(expectedOutcome) {
122
136
  throw new Error(`Invalid expectedOutcome: ${parsed.error.issues[0].message}`);
123
137
  }
124
138
  }
139
+ export function validateExpectedOutcomeArrayWithExtractors(expectedOutcome, allowedExtractorIds) {
140
+ const allowed = new Set(allowedExtractorIds);
141
+ const schema = expectedOutcomeArraySchema.superRefine((fields, ctx) => {
142
+ fields.forEach((field, index) => {
143
+ if (field.evaluationSource?.type !== 'custom') {
144
+ return;
145
+ }
146
+ if (allowed.has(field.evaluationSource.extractorId)) {
147
+ return;
148
+ }
149
+ ctx.addIssue({
150
+ code: 'custom',
151
+ path: [index, 'evaluationSource', 'extractorId'],
152
+ message: `Invalid expectedOutcome: Extractor "${field.evaluationSource.extractorId}" is not registered.`,
153
+ });
154
+ });
155
+ });
156
+ const parsed = schema.safeParse(expectedOutcome);
157
+ if (!parsed.success) {
158
+ throw new Error(parsed.error.issues[0].message);
159
+ }
160
+ }
161
+ export function getExtractorIds(extractors) {
162
+ return Object.keys(extractors || {});
163
+ }
125
164
  //# sourceMappingURL=expected-outcome.js.map