llm-testrunner-components 1.1.0 → 1.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +165 -242
  3. package/dist/cjs/index.cjs.js +305 -237
  4. package/dist/cjs/index.cjs.js.map +1 -1
  5. package/dist/cjs/llm-testrunner.cjs.js +1 -1
  6. package/dist/cjs/loader.cjs.js +1 -1
  7. package/dist/collection/components/llm-test-runner/header/llm-test-runner-header.js +2 -2
  8. package/dist/collection/components/llm-test-runner/header/llm-test-runner-header.js.map +1 -1
  9. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js +25 -54
  10. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js.map +1 -1
  11. package/dist/collection/components/llm-test-runner/llm-test-runner.js +27 -49
  12. package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
  13. package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.css +60 -21
  14. package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.js +3 -1
  15. package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.js.map +1 -1
  16. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +31 -11
  17. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
  18. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +17 -0
  19. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +2 -12
  20. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
  21. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
  22. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
  23. package/dist/collection/lib/evaluation/evaluation-engine.js +63 -42
  24. package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
  25. package/dist/collection/lib/evaluation/evaluation-service.js +15 -3
  26. package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
  27. package/dist/collection/lib/evaluation/{rouge1-evaluator.test.js → evaluators/rouge1-evaluator.test.js} +2 -2
  28. package/dist/collection/lib/evaluation/evaluators/rouge1-evaluator.test.js.map +1 -0
  29. package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js +4 -3
  30. package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js.map +1 -1
  31. package/dist/collection/lib/evaluation/field-evaluation-approach.js +24 -0
  32. package/dist/collection/lib/evaluation/field-evaluation-approach.js.map +1 -0
  33. package/dist/collection/lib/evaluation/index.js +0 -4
  34. package/dist/collection/lib/evaluation/index.js.map +1 -1
  35. package/dist/collection/lib/evaluation/types.js.map +1 -1
  36. package/dist/collection/lib/import-export/test-results-csv.js +47 -33
  37. package/dist/collection/lib/import-export/test-results-csv.js.map +1 -1
  38. package/dist/collection/lib/import-export/test-suite-exporter.js +0 -1
  39. package/dist/collection/lib/import-export/test-suite-exporter.js.map +1 -1
  40. package/dist/collection/lib/test-cases/test-case-factory.js +17 -27
  41. package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
  42. package/dist/collection/lib/test-cases/test-case-mutations.js +60 -9
  43. package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
  44. package/dist/collection/schemas/expected-outcome.js +20 -2
  45. package/dist/collection/schemas/expected-outcome.js.map +1 -1
  46. package/dist/collection/schemas/test-case.js +2 -20
  47. package/dist/collection/schemas/test-case.js.map +1 -1
  48. package/dist/collection/types/llm-test-runner.js.map +1 -1
  49. package/dist/collection/types/test-case.js.map +1 -1
  50. package/dist/components/index.js +1 -1
  51. package/dist/components/llm-test-runner.js +1 -1
  52. package/dist/components/p-JPMPoOC8.js +7 -0
  53. package/dist/components/p-JPMPoOC8.js.map +1 -0
  54. package/dist/esm/index.js +305 -237
  55. package/dist/esm/index.js.map +1 -1
  56. package/dist/esm/llm-testrunner.js +1 -1
  57. package/dist/esm/loader.js +1 -1
  58. package/dist/llm-testrunner/index.esm.js +2 -2
  59. package/dist/llm-testrunner/index.esm.js.map +1 -1
  60. package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
  61. package/dist/types/components/llm-test-runner/header/llm-test-runner-header.d.ts +1 -0
  62. package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +1 -1
  63. package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +3 -6
  64. package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +0 -2
  65. package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +0 -2
  66. package/dist/types/components.d.ts +9 -0
  67. package/dist/types/lib/evaluation/evaluation-engine.d.ts +4 -2
  68. package/dist/types/lib/evaluation/field-evaluation-approach.d.ts +6 -0
  69. package/dist/types/lib/evaluation/index.d.ts +0 -1
  70. package/dist/types/lib/evaluation/types.d.ts +26 -0
  71. package/dist/types/lib/import-export/test-suite-exporter.d.ts +0 -4
  72. package/dist/types/lib/test-cases/test-case-factory.d.ts +2 -3
  73. package/dist/types/lib/test-cases/test-case-mutations.d.ts +21 -5
  74. package/dist/types/schemas/expected-outcome.d.ts +65 -17
  75. package/dist/types/schemas/test-case.d.ts +51 -95
  76. package/dist/types/types/llm-test-runner.d.ts +1 -1
  77. package/dist/types/types/test-case.d.ts +1 -1
  78. package/package.json +9 -2
  79. package/dist/collection/lib/evaluation/rouge1-evaluator.test.js.map +0 -1
  80. package/dist/components/p-BF90yb1z.js +0 -7
  81. package/dist/components/p-BF90yb1z.js.map +0 -1
  82. /package/dist/types/lib/evaluation/{rouge1-evaluator.test.d.ts → evaluators/rouge1-evaluator.test.d.ts} +0 -0
@@ -7,49 +7,88 @@
7
7
  flex-direction: column;
8
8
  }
9
9
 
10
- .evaluation-summary__details {
10
+ .evaluation-summary__field-results {
11
11
  display: flex;
12
12
  flex-direction: column;
13
13
  gap: var(--spacing-2);
14
+ margin-top: var(--spacing-2);
14
15
  }
15
16
 
16
- .evaluation-summary__placeholder {
17
+ .evaluation-summary__field-result {
18
+ border: var(--border-width) solid var(--border);
19
+ border-radius: var(--radius-md);
20
+ padding: var(--spacing-2);
17
21
  display: flex;
18
- align-items: center;
19
- justify-content: center;
22
+ flex-direction: column;
23
+ gap: var(--spacing-1);
24
+ }
25
+
26
+ .evaluation-summary__field-header {
27
+ display: flex;
28
+ flex-direction: column;
29
+ gap: var(--spacing-1);
30
+ }
31
+
32
+ .evaluation-summary__field-label {
33
+ font-weight: var(--font-weight-semibold);
34
+ font-size: var(--font-size-xs);
35
+ }
36
+
37
+ .evaluation-summary__field-approach {
20
38
  color: var(--muted-foreground);
21
- font-style: italic;
22
- flex: 1;
23
- background: var(--muted);
24
- border: 2px dashed var(--border);
25
- border-radius: var(--radius);
39
+ font-size: 11px;
26
40
  }
27
41
 
28
- /* Evaluation Result Element */
29
- .evaluation-summary__result {
42
+ .evaluation-summary__field-details {
30
43
  display: flex;
31
44
  flex-direction: column;
32
- gap: var(--spacing-2);
45
+ gap: var(--spacing-1);
46
+ font-size: var(--font-size-xs);
33
47
  }
34
48
 
35
- .evaluation-summary__result-status {
49
+ .evaluation-summary__field-status {
50
+ width: fit-content;
51
+ padding: 2px var(--spacing-2);
52
+ border-radius: var(--radius-sm);
53
+ font-size: 11px;
36
54
  font-weight: var(--font-weight-semibold);
37
- font-size: var(--font-size-sm);
38
- padding: var(--spacing-2) var(--spacing-3);
39
- border-radius: var(--radius-md);
40
- text-align: center;
55
+ border: var(--border-width) solid transparent;
41
56
  }
42
57
 
43
- .evaluation-summary__result-status--passed {
58
+ .evaluation-summary__field-status--passed {
44
59
  background: var(--success);
45
60
  color: var(--success-foreground);
46
- border: var(--border-width) solid var(--success);
61
+ border-color: var(--success);
47
62
  }
48
63
 
49
- .evaluation-summary__result-status--failed {
64
+ .evaluation-summary__field-status--failed {
50
65
  background: var(--destructive);
51
66
  color: var(--destructive-foreground);
52
- border: var(--border-width) solid var(--destructive);
67
+ border-color: var(--destructive);
68
+ }
69
+
70
+ .evaluation-summary__error-message {
71
+ color: var(--destructive);
72
+ font-size: var(--font-size-xs);
73
+ }
74
+
75
+ .evaluation-summary__placeholder {
76
+ display: flex;
77
+ align-items: center;
78
+ justify-content: center;
79
+ color: var(--muted-foreground);
80
+ font-style: italic;
81
+ flex: 1;
82
+ background: var(--muted);
83
+ border: 2px dashed var(--border);
84
+ border-radius: var(--radius);
85
+ }
86
+
87
+ /* Evaluation Result Element */
88
+ .evaluation-summary__result {
89
+ display: flex;
90
+ flex-direction: column;
91
+ gap: var(--spacing-2);
53
92
  }
54
93
 
55
94
  /* Responsive Design */
@@ -1,5 +1,7 @@
1
1
  import { h } from "@stencil/core";
2
2
  export const EvaluationSummary = ({ result, isRunning, }) => {
3
- return (h("div", { class: "evaluation-summary" }, result ? (h("div", { class: "evaluation-summary__result" }, h("div", { class: `evaluation-summary__result-status evaluation-summary__result-status--${result.passed ? 'passed' : 'failed'}` }, result.passed ? '✅ PASSED' : '❌ FAILED'), h("div", { class: "evaluation-summary__details" }, "Keywords: ", result.keywordMatches.filter(m => m.found).length, "/", result.keywordMatches.length, " found"))) : (h("div", { class: "evaluation-summary__placeholder" }, isRunning ? 'Evaluating...' : ''))));
3
+ const fieldResults = result?.fieldResults || [];
4
+ const hasFieldResults = fieldResults.length > 0;
5
+ return (h("div", { class: "evaluation-summary" }, result ? (h("div", { class: "evaluation-summary__result" }, hasFieldResults ? (h("div", { class: "evaluation-summary__field-results" }, fieldResults.map(fieldResult => (h("div", { class: "evaluation-summary__field-result" }, h("div", { class: "evaluation-summary__field-header" }, h("span", { class: "evaluation-summary__field-label" }, fieldResult.label), h("span", { class: "evaluation-summary__field-approach" }, "Strategy: ", fieldResult.evaluationParameters.approach)), h("div", { class: "evaluation-summary__field-details" }, h("span", { class: `evaluation-summary__field-status evaluation-summary__field-status--${fieldResult.passed ? 'passed' : 'failed'}` }, fieldResult.passed ? 'PASSED' : 'FAILED'), fieldResult.error && (h("span", { class: "evaluation-summary__error-message" }, fieldResult.error)), h("span", null, "Score: ", fieldResult.evaluationApproachResult.score.toFixed(2)), h("span", null, "Matches:", ' ', fieldResult.keywordMatches.filter(match => match.found).length, "/", fieldResult.keywordMatches.length))))))) : null)) : (h("div", { class: "evaluation-summary__placeholder" }, isRunning ? 'Evaluating...' : ''))));
4
6
  };
5
7
  //# sourceMappingURL=evaluation-summary.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"evaluation-summary.js","sourceRoot":"","sources":["../../../../../src/components/llm-test-runner/test-cases/evaluation/evaluation-summary.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAQvD,MAAM,CAAC,MAAM,iBAAiB,GAAgD,CAAC,EAC7E,MAAM,EACN,SAAS,GACV,EAAE,EAAE;IACH,OAAO,CACL,WAAK,KAAK,EAAC,oBAAoB,IAC5B,MAAM,CAAC,CAAC,CAAC,CACR,WAAK,KAAK,EAAC,4BAA4B;QACrC,WACE,KAAK,EAAE,wEAAwE,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,EAAE,IAEnH,MAAM,CAAC,MAAM,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,UAAU,CACpC;QACN,WAAK,KAAK,EAAC,6BAA6B;;YAC3B,MAAM,CAAC,cAAc,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,MAAM;;YAC3D,MAAM,CAAC,cAAc,CAAC,MAAM;qBACzB,CACF,CACP,CAAC,CAAC,CAAC,CACF,WAAK,KAAK,EAAC,iCAAiC,IACzC,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,CAC7B,CACP,CACG,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { EvaluationResult } from '../../../../lib/evaluation/types';\n\nexport interface EvaluationSummaryProps {\n result?: EvaluationResult;\n isRunning: boolean;\n}\n\nexport const EvaluationSummary: FunctionalComponent<EvaluationSummaryProps> = ({\n result,\n isRunning,\n}) => {\n return (\n <div class=\"evaluation-summary\">\n {result ? (\n <div class=\"evaluation-summary__result\">\n <div\n class={`evaluation-summary__result-status evaluation-summary__result-status--${result.passed ? 'passed' : 'failed'}`}\n >\n {result.passed ? 'PASSED' : 'FAILED'}\n </div>\n <div class=\"evaluation-summary__details\">\n Keywords: {result.keywordMatches.filter(m => m.found).length}/\n {result.keywordMatches.length} found\n </div>\n </div>\n ) : (\n <div class=\"evaluation-summary__placeholder\">\n {isRunning ? 'Evaluating...' : ''}\n </div>\n )}\n </div>\n );\n};\n"]}
1
+ {"version":3,"file":"evaluation-summary.js","sourceRoot":"","sources":["../../../../../src/components/llm-test-runner/test-cases/evaluation/evaluation-summary.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAQvD,MAAM,CAAC,MAAM,iBAAiB,GAAgD,CAAC,EAC7E,MAAM,EACN,SAAS,GACV,EAAE,EAAE;IACH,MAAM,YAAY,GAAG,MAAM,EAAE,YAAY,IAAI,EAAE,CAAC;IAChD,MAAM,eAAe,GAAG,YAAY,CAAC,MAAM,GAAG,CAAC,CAAC;IAEhD,OAAO,CACL,WAAK,KAAK,EAAC,oBAAoB,IAC5B,MAAM,CAAC,CAAC,CAAC,CACR,WAAK,KAAK,EAAC,4BAA4B,IACpC,eAAe,CAAC,CAAC,CAAC,CACjB,WAAK,KAAK,EAAC,mCAAmC,IAC3C,YAAY,CAAC,GAAG,CAAC,WAAW,CAAC,EAAE,CAAC,CAC/B,WAAK,KAAK,EAAC,kCAAkC;QAC3C,WAAK,KAAK,EAAC,kCAAkC;YAC3C,YAAM,KAAK,EAAC,iCAAiC,IAC1C,WAAW,CAAC,KAAK,CACb;YACP,YAAM,KAAK,EAAC,oCAAoC;;gBACnC,WAAW,CAAC,oBAAoB,CAAC,QAAQ,CAC/C,CACH;QACN,WAAK,KAAK,EAAC,mCAAmC;YAC5C,YACE,KAAK,EAAE,sEAAsE,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,EAAE,IAEtH,WAAW,CAAC,MAAM,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,QAAQ,CACpC;YACN,WAAW,CAAC,KAAK,IAAI,CACpB,YAAM,KAAK,EAAC,mCAAmC,IAC5C,WAAW,CAAC,KAAK,CACb,CACR;YACD;;gBACU,WAAW,CAAC,wBAAwB,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,CACxD;YACP;;gBACW,GAAG;gBACX,WAAW,CAAC,cAAc,CAAC,MAAM,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,MAAM;;gBAC9D,WAAW,CAAC,cAAc,CAAC,MAAM,CAC7B,CACH,CACF,CACP,CAAC,CACE,CACP,CAAC,CAAC,CAAC,IAAI,CACJ,CACP,CAAC,CAAC,CAAC,CACF,WAAK,KAAK,EAAC,iCAAiC,IACzC,SAAS,CAAC,CAAC,CAAC,eAAe,CAAC,CAAC,CAAC,EAAE,CAC7B,CACP,CACG,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { EvaluationResult } from '../../../../lib/evaluation/types';\n\nexport interface EvaluationSummaryProps {\n result?: EvaluationResult;\n isRunning: boolean;\n}\n\nexport const EvaluationSummary: FunctionalComponent<EvaluationSummaryProps> = ({\n result,\n isRunning,\n}) => {\n const fieldResults = result?.fieldResults || [];\n const hasFieldResults = fieldResults.length > 0;\n\n return (\n <div class=\"evaluation-summary\">\n {result ? (\n <div class=\"evaluation-summary__result\">\n {hasFieldResults ? (\n <div class=\"evaluation-summary__field-results\">\n {fieldResults.map(fieldResult => (\n <div class=\"evaluation-summary__field-result\">\n <div class=\"evaluation-summary__field-header\">\n <span class=\"evaluation-summary__field-label\">\n {fieldResult.label}\n </span>\n <span class=\"evaluation-summary__field-approach\">\n Strategy: {fieldResult.evaluationParameters.approach}\n </span>\n </div>\n <div class=\"evaluation-summary__field-details\">\n <span\n class={`evaluation-summary__field-status evaluation-summary__field-status--${fieldResult.passed ? 'passed' : 'failed'}`}\n >\n {fieldResult.passed ? 'PASSED' : 'FAILED'}\n </span>\n {fieldResult.error && (\n <span class=\"evaluation-summary__error-message\">\n {fieldResult.error}\n </span>\n )}\n <span>\n Score: {fieldResult.evaluationApproachResult.score.toFixed(2)}\n </span>\n <span>\n Matches:{' '}\n {fieldResult.keywordMatches.filter(match => match.found).length}/\n {fieldResult.keywordMatches.length}\n </span>\n </div>\n </div>\n ))}\n </div>\n ) : null}\n </div>\n ) : (\n <div class=\"evaluation-summary__placeholder\">\n {isRunning ? 'Evaluating...' : ''}\n </div>\n )}\n </div>\n );\n};\n"]}
@@ -1,9 +1,29 @@
1
1
  import { h } from "@stencil/core";
2
2
  import { FormFieldType } from "../../../lib/form/schema";
3
+ import { EvaluationApproach, } from "../../../lib/evaluation/constants";
4
+ import { getAllowedApproachesForFieldType } from "../../../lib/evaluation/field-evaluation-approach";
3
5
  export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeChange, }) => {
4
6
  const emit = (detail) => onExpectedOutcomeChange({
5
7
  detail,
6
8
  });
9
+ const buildEvaluationConfig = (index, optionList) => ({
10
+ name: `expectedOutcomeEvaluation-${index}`,
11
+ fieldType: FormFieldType.SELECT,
12
+ label: 'Evaluation Approach',
13
+ placeholder: 'Select evaluation approach…',
14
+ required: true,
15
+ optionList,
16
+ defaultValue: EvaluationApproach.EXACT,
17
+ });
18
+ const renderEvaluationSelector = (field, index) => {
19
+ const optionList = getAllowedApproachesForFieldType(field.type);
20
+ return (h("app-select", { config: buildEvaluationConfig(index, optionList), value: field.evaluationParameters?.approach, onValueChange: (e) => emit({
21
+ testCaseId,
22
+ index,
23
+ operation: 'set-evaluation-approach',
24
+ value: e.detail.value,
25
+ }) }));
26
+ };
7
27
  return (h("div", { class: "expected-outcome-renderer" }, (fields || []).map((field, index) => {
8
28
  if (field.type === 'textarea') {
9
29
  const config = {
@@ -11,15 +31,15 @@ export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeC
11
31
  fieldType: FormFieldType.TEXT_AREA,
12
32
  label: field.label,
13
33
  placeholder: field.placeholder,
14
- required: field.required,
34
+ required: true,
15
35
  rows: field.rows || 2,
16
36
  };
17
- return (h("app-textarea", { config: config, value: field.value, onValueChange: (e) => emit({
37
+ return (h("div", { class: "expected-outcome-renderer__group" }, h("app-textarea", { config: config, value: field.value, onValueChange: (e) => emit({
18
38
  testCaseId,
19
39
  index,
20
40
  operation: 'set-value',
21
41
  value: e.detail.value,
22
- }) }));
42
+ }) }), renderEvaluationSelector(field, index)));
23
43
  }
24
44
  if (field.type === 'chips-input') {
25
45
  const config = {
@@ -27,9 +47,9 @@ export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeC
27
47
  fieldType: FormFieldType.CHIPS,
28
48
  label: field.label,
29
49
  placeholder: field.placeholder,
30
- required: field.required,
50
+ required: true,
31
51
  };
32
- return (h("app-chips", { config: config, value: field.value, onAddChip: (e) => emit({
52
+ return (h("div", { class: "expected-outcome-renderer__group" }, h("app-chips", { config: config, value: field.value, onAddChip: (e) => emit({
33
53
  testCaseId,
34
54
  index,
35
55
  operation: 'add-chip',
@@ -39,7 +59,7 @@ export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeC
39
59
  index,
40
60
  operation: 'remove-chip',
41
61
  value: e.detail.value,
42
- }) }));
62
+ }) }), renderEvaluationSelector(field, index)));
43
63
  }
44
64
  if (field.type === 'select') {
45
65
  const config = {
@@ -47,22 +67,22 @@ export const ExpectedOutcomeRenderer = ({ testCaseId, fields, onExpectedOutcomeC
47
67
  fieldType: FormFieldType.SELECT,
48
68
  label: field.label,
49
69
  placeholder: field.placeholder,
50
- required: field.required,
70
+ required: true,
51
71
  optionList: field.options,
52
72
  };
53
- return (h("app-select", { config: config, value: field.value, onValueChange: (e) => emit({
73
+ return (h("div", { class: "expected-outcome-renderer__group" }, h("app-select", { config: config, value: field.value, onValueChange: (e) => emit({
54
74
  testCaseId,
55
75
  index,
56
76
  operation: 'set-value',
57
77
  value: e.detail.value,
58
- }) }));
78
+ }) }), renderEvaluationSelector(field, index)));
59
79
  }
60
- return (h("div", { class: "expected-outcome-renderer__text" }, h("label", null, field.label), h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
80
+ return (h("div", { class: "expected-outcome-renderer__group" }, h("div", { class: "expected-outcome-renderer__text" }, h("label", null, field.label), h("input", { type: "text", value: field.value, placeholder: field.placeholder, onInput: (e) => emit({
61
81
  testCaseId,
62
82
  index,
63
83
  operation: 'set-value',
64
84
  value: e.target.value,
65
- }) })));
85
+ }) })), renderEvaluationSelector(field, index)));
66
86
  })));
67
87
  };
68
88
  //# sourceMappingURL=expected-outcome-renderer.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"expected-outcome-renderer.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/expected-outcome-renderer.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAIvD,OAAO,EAAe,aAAa,EAAgC,MAAM,0BAA0B,CAAC;AAsBpG,MAAM,CAAC,MAAM,uBAAuB,GAAsD,CAAC,EACzF,UAAU,EACV,MAAM,EACN,uBAAuB,GACxB,EAAE,EAAE;IACH,MAAM,IAAI,GAAG,CAAC,MAAmC,EAAE,EAAE,CACnD,uBAAuB,CAAC;QACtB,MAAM;KACqC,CAAC,CAAC;IAEjD,OAAO,CACL,WAAK,KAAK,EAAC,2BAA2B,IACnC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QACnC,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAC9B,MAAM,MAAM,GAAmB;gBAC7B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,SAAS;gBAClC,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,KAAK,CAAC,QAAQ;gBACxB,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC;aACtB,CAAC;YACF,OAAO,CACL,oBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;oBACH,UAAU;oBACV,KAAK;oBACL,SAAS,EAAE,WAAW;oBACtB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;iBACtB,CAAC,GAEJ,CACH,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YACjC,MAAM,MAAM,GAAgB;gBAC1B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,KAAK;gBAC9B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,KAAK,CAAC,QAAQ;aACzB,CAAC;YAEF,OAAO,CACL,iBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CACf,IAAI,CAAC;oBACH,UAAU;oBACV,KAAK;oBACL,SAAS,EAAE,UAAU;oBACrB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;iBACtB,CAAC,EAEJ,YAAY,EAAE,CAAC,CAAC,EAAE,EAAE,CAClB,IAAI,CAAC;oBACH,UAAU;oBACV,KAAK;oBACL,SAAS,EAAE,aAAa;oBACxB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;iBACtB,CAAC,GAEJ,CACH,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAiB;gBAC3B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,MAAM;gBAC/B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,KAAK,CAAC,QAAQ;gBACxB,UAAU,EAAE,KAAK,CAAC,OAAO;aAC1B,CAAC;YAEF,OAAO,CACL,kBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;oBACH,UAAU;oBACV,KAAK;oBACL,SAAS,EAAE,WAAW;oBACtB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;iBACtB,CAAC,GAEJ,CACH,CAAC;QACJ,CAAC;QAED,OAAO,CACL,WAAK,KAAK,EAAC,iCAAiC;YAC1C,iBAAQ,KAAK,CAAC,KAAK,CAAS;YAC5B,aACE,IAAI,EAAC,MAAM,EACX,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,WAAW,EAAE,KAAK,CAAC,WAAW,EAC9B,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CACb,IAAI,CAAC;oBACH,UAAU;oBACV,KAAK;oBACL,SAAS,EAAE,WAAW;oBACtB,KAAK,EAAG,CAAC,CAAC,MAA2B,CAAC,KAAK;iBAC5C,CAAC,GAEJ,CACE,CACP,CAAC;IACJ,CAAC,CAAC,CACE,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport {\n ExpectedOutcomeField,\n} from '../../../types/llm-test-runner';\nimport { ChipsConfig, FormFieldType, SelectConfig, TextAreaConfig } from '../../../lib/form/schema';\n\nexport type ExpectedOutcomeOperation =\n | 'set-value'\n | 'add-chip'\n | 'remove-chip';\n\nexport interface ExpectedOutcomeChangeDetail {\n testCaseId: string;\n index: number;\n operation: ExpectedOutcomeOperation;\n value?: string;\n}\n\ninterface ExpectedOutcomeRendererProps {\n testCaseId: string;\n fields: ExpectedOutcomeField[];\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const ExpectedOutcomeRenderer: FunctionalComponent<ExpectedOutcomeRendererProps> = ({\n testCaseId,\n fields,\n onExpectedOutcomeChange,\n}) => {\n const emit = (detail: ExpectedOutcomeChangeDetail) =>\n onExpectedOutcomeChange({\n detail,\n } as CustomEvent<ExpectedOutcomeChangeDetail>);\n\n return (\n <div class=\"expected-outcome-renderer\">\n {(fields || []).map((field, index) => {\n if (field.type === 'textarea') {\n const config: TextAreaConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.TEXT_AREA,\n label: field.label,\n placeholder: field.placeholder,\n required: field.required,\n rows: field.rows || 2,\n };\n return (\n <app-textarea\n config={config}\n value={field.value}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: e.detail.value,\n })\n }\n />\n );\n }\n\n if (field.type === 'chips-input') {\n const config: ChipsConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.CHIPS,\n label: field.label,\n placeholder: field.placeholder,\n required: field.required,\n };\n\n return (\n <app-chips\n config={config}\n value={field.value}\n onAddChip={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'add-chip',\n value: e.detail.value,\n })\n }\n onRemoveChip={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'remove-chip',\n value: e.detail.value,\n })\n }\n />\n );\n }\n\n if (field.type === 'select') {\n const config: SelectConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.SELECT,\n label: field.label,\n placeholder: field.placeholder,\n required: field.required,\n optionList: field.options,\n };\n\n return (\n <app-select\n config={config}\n value={field.value}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: e.detail.value,\n })\n }\n />\n );\n }\n\n return (\n <div class=\"expected-outcome-renderer__text\">\n <label>{field.label}</label>\n <input\n type=\"text\"\n value={field.value}\n placeholder={field.placeholder}\n onInput={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: (e.target as HTMLInputElement).value,\n })\n }\n />\n </div>\n );\n })}\n </div>\n );\n};\n"]}
1
+ {"version":3,"file":"expected-outcome-renderer.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/expected-outcome-renderer.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAIvD,OAAO,EAAe,aAAa,EAAgC,MAAM,0BAA0B,CAAC;AACpG,OAAO,EACL,kBAAkB,GACnB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,gCAAgC,EAAE,MAAM,mDAAmD,CAAC;AAerG,MAAM,CAAC,MAAM,uBAAuB,GAAsD,CAAC,EACzF,UAAU,EACV,MAAM,EACN,uBAAuB,GACxB,EAAE,EAAE;IACH,MAAM,IAAI,GAAG,CAAC,MAAmC,EAAE,EAAE,CACnD,uBAAuB,CAAC;QACtB,MAAM;KACqC,CAAC,CAAC;IAEjD,MAAM,qBAAqB,GAAG,CAC5B,KAAa,EACb,UAAoB,EACN,EAAE,CAAC,CAAC;QAClB,IAAI,EAAE,6BAA6B,KAAK,EAAE;QAC1C,SAAS,EAAE,aAAa,CAAC,MAAM;QAC/B,KAAK,EAAE,qBAAqB;QAC5B,WAAW,EAAE,6BAA6B;QAC1C,QAAQ,EAAE,IAAI;QACd,UAAU;QACV,YAAY,EAAE,kBAAkB,CAAC,KAAK;KACvC,CAAC,CAAC;IAEH,MAAM,wBAAwB,GAAG,CAC/B,KAA2B,EAC3B,KAAa,EACb,EAAE;QACF,MAAM,UAAU,GAAG,gCAAgC,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC;QAEhE,OAAO,CACL,kBACE,MAAM,EAAE,qBAAqB,CAAC,KAAK,EAAE,UAAU,CAAC,EAChD,KAAK,EAAE,KAAK,CAAC,oBAAoB,EAAE,QAAQ,EAC3C,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;gBACH,UAAU;gBACV,KAAK;gBACL,SAAS,EAAE,yBAAyB;gBACpC,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAA2B;aAC5C,CAAC,GAEJ,CACH,CAAC;IACJ,CAAC,CAAC;IAEF,OAAO,CACL,WAAK,KAAK,EAAC,2BAA2B,IACnC,CAAC,MAAM,IAAI,EAAE,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE;QACnC,IAAI,KAAK,CAAC,IAAI,KAAK,UAAU,EAAE,CAAC;YAC9B,MAAM,MAAM,GAAmB;gBAC7B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,SAAS;gBAClC,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,IAAI;gBACd,IAAI,EAAE,KAAK,CAAC,IAAI,IAAI,CAAC;aACtB,CAAC;YACF,OAAO,CACL,WAAK,KAAK,EAAC,kCAAkC;gBAC3C,oBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,WAAW;wBACtB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB,CAAC,GAEJ;gBACD,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CACnC,CACP,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;YACjC,MAAM,MAAM,GAAgB;gBAC1B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,KAAK;gBAC9B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,IAAI;aACf,CAAC;YAEF,OAAO,CACL,WAAK,KAAK,EAAC,kCAAkC;gBAC3C,iBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,SAAS,EAAE,CAAC,CAAC,EAAE,EAAE,CACf,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,UAAU;wBACrB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB,CAAC,EAEJ,YAAY,EAAE,CAAC,CAAC,EAAE,EAAE,CAClB,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,aAAa;wBACxB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB,CAAC,GAEJ;gBACD,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CACnC,CACP,CAAC;QACJ,CAAC;QAED,IAAI,KAAK,CAAC,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC5B,MAAM,MAAM,GAAiB;gBAC3B,IAAI,EAAE,mBAAmB,KAAK,EAAE;gBAChC,SAAS,EAAE,aAAa,CAAC,MAAM;gBAC/B,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,WAAW,EAAE,KAAK,CAAC,WAAW;gBAC9B,QAAQ,EAAE,IAAI;gBACd,UAAU,EAAE,KAAK,CAAC,OAAO;aAC1B,CAAC;YAEF,OAAO,CACL,WAAK,KAAK,EAAC,kCAAkC;gBAC3C,kBACE,MAAM,EAAE,MAAM,EACd,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,WAAW;wBACtB,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB,CAAC,GAEJ;gBACD,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CACnC,CACP,CAAC;QACJ,CAAC;QAED,OAAO,CACL,WAAK,KAAK,EAAC,kCAAkC;YAC3C,WAAK,KAAK,EAAC,iCAAiC;gBAC1C,iBAAQ,KAAK,CAAC,KAAK,CAAS;gBAC5B,aACE,IAAI,EAAC,MAAM,EACX,KAAK,EAAE,KAAK,CAAC,KAAK,EAClB,WAAW,EAAE,KAAK,CAAC,WAAW,EAC9B,OAAO,EAAE,CAAC,CAAC,EAAE,EAAE,CACb,IAAI,CAAC;wBACH,UAAU;wBACV,KAAK;wBACL,SAAS,EAAE,WAAW;wBACtB,KAAK,EAAG,CAAC,CAAC,MAA2B,CAAC,KAAK;qBAC5C,CAAC,GAEJ,CACE;YACL,wBAAwB,CAAC,KAAK,EAAE,KAAK,CAAC,CACnC,CACP,CAAC;IACJ,CAAC,CAAC,CACE,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport {\n ExpectedOutcomeField,\n} from '../../../types/llm-test-runner';\nimport { ChipsConfig, FormFieldType, SelectConfig, TextAreaConfig } from '../../../lib/form/schema';\nimport {\n EvaluationApproach,\n} from '../../../lib/evaluation/constants';\nimport { getAllowedApproachesForFieldType } from '../../../lib/evaluation/field-evaluation-approach';\nimport { ExpectedOutcomeChange } from '../../../lib/test-cases/test-case-mutations';\n\nexport type ExpectedOutcomeChangeDetail = {\n testCaseId: string;\n} & ExpectedOutcomeChange;\n\ninterface ExpectedOutcomeRendererProps {\n testCaseId: string;\n fields: ExpectedOutcomeField[];\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const ExpectedOutcomeRenderer: FunctionalComponent<ExpectedOutcomeRendererProps> = ({\n testCaseId,\n fields,\n onExpectedOutcomeChange,\n}) => {\n const emit = (detail: ExpectedOutcomeChangeDetail) =>\n onExpectedOutcomeChange({\n detail,\n } as CustomEvent<ExpectedOutcomeChangeDetail>);\n\n const buildEvaluationConfig = (\n index: number,\n optionList: string[],\n ): SelectConfig => ({\n name: `expectedOutcomeEvaluation-${index}`,\n fieldType: FormFieldType.SELECT,\n label: 'Evaluation Approach',\n placeholder: 'Select evaluation approach…',\n required: true,\n optionList,\n defaultValue: EvaluationApproach.EXACT,\n });\n\n const renderEvaluationSelector = (\n field: ExpectedOutcomeField,\n index: number,\n ) => {\n const optionList = getAllowedApproachesForFieldType(field.type);\n\n return (\n <app-select\n config={buildEvaluationConfig(index, optionList)}\n value={field.evaluationParameters?.approach}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-evaluation-approach',\n value: e.detail.value as EvaluationApproach,\n })\n }\n />\n );\n };\n\n return (\n <div class=\"expected-outcome-renderer\">\n {(fields || []).map((field, index) => {\n if (field.type === 'textarea') {\n const config: TextAreaConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.TEXT_AREA,\n label: field.label,\n placeholder: field.placeholder,\n required: true,\n rows: field.rows || 2,\n };\n return (\n <div class=\"expected-outcome-renderer__group\">\n <app-textarea\n config={config}\n value={field.value}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: e.detail.value,\n })\n }\n />\n {renderEvaluationSelector(field, index)}\n </div>\n );\n }\n\n if (field.type === 'chips-input') {\n const config: ChipsConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.CHIPS,\n label: field.label,\n placeholder: field.placeholder,\n required: true,\n };\n\n return (\n <div class=\"expected-outcome-renderer__group\">\n <app-chips\n config={config}\n value={field.value}\n onAddChip={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'add-chip',\n value: e.detail.value,\n })\n }\n onRemoveChip={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'remove-chip',\n value: e.detail.value,\n })\n }\n />\n {renderEvaluationSelector(field, index)}\n </div>\n );\n }\n\n if (field.type === 'select') {\n const config: SelectConfig = {\n name: `expectedOutcome-${index}`,\n fieldType: FormFieldType.SELECT,\n label: field.label,\n placeholder: field.placeholder,\n required: true,\n optionList: field.options,\n };\n\n return (\n <div class=\"expected-outcome-renderer__group\">\n <app-select\n config={config}\n value={field.value}\n onValueChange={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: e.detail.value,\n })\n }\n />\n {renderEvaluationSelector(field, index)}\n </div>\n );\n }\n\n return (\n <div class=\"expected-outcome-renderer__group\">\n <div class=\"expected-outcome-renderer__text\">\n <label>{field.label}</label>\n <input\n type=\"text\"\n value={field.value}\n placeholder={field.placeholder}\n onInput={(e) =>\n emit({\n testCaseId,\n index,\n operation: 'set-value',\n value: (e.target as HTMLInputElement).value,\n })\n }\n />\n </div>\n {renderEvaluationSelector(field, index)}\n </div>\n );\n })}\n </div>\n );\n};\n"]}
@@ -18,6 +18,23 @@
18
18
  border-right: var(--border-width) solid var(--border);
19
19
  }
20
20
 
21
+ .expected-outcome-renderer {
22
+ display: flex;
23
+ flex-direction: column;
24
+ gap: var(--spacing-4);
25
+ margin-top: var(--spacing-4);
26
+ }
27
+
28
+ .expected-outcome-renderer__group {
29
+ display: flex;
30
+ flex-direction: column;
31
+ gap: var(--spacing-2);
32
+ padding: var(--spacing-3);
33
+ border: var(--border-width) solid var(--border);
34
+ border-radius: var(--radius-md);
35
+ background: var(--background);
36
+ }
37
+
21
38
  /* Responsive Design */
22
39
  @media (max-width: 1200px) {
23
40
  .test-case-row {
@@ -1,11 +1,10 @@
1
1
  import { h } from "@stencil/core";
2
- import { EvaluationApproach, EvaluationApproachValues, } from "../../../lib/evaluation/constants";
3
2
  import { ResponseOutput } from "./output/response-output";
4
3
  import { EvaluationSummary } from "./evaluation/evaluation-summary";
5
4
  import { RowActions } from "./actions/row-actions";
6
5
  import { FormFieldType } from "../../../lib/form/schema";
7
6
  import { ExpectedOutcomeRenderer, } from "./expected-outcome-renderer";
8
- export const LLMTestCaseRow = ({ testCase, onRun, onDelete, onUpdateApproach, handleTestCaseChange, onExpectedOutcomeChange, }) => {
7
+ export const LLMTestCaseRow = ({ testCase, onRun, onDelete, handleTestCaseChange, onExpectedOutcomeChange, }) => {
9
8
  const questionConfig = {
10
9
  name: 'question',
11
10
  fieldType: FormFieldType.TEXT_AREA,
@@ -15,21 +14,12 @@ export const LLMTestCaseRow = ({ testCase, onRun, onDelete, onUpdateApproach, ha
15
14
  required: true,
16
15
  rows: 3,
17
16
  };
18
- const evaluationConfig = {
19
- name: 'EvaluationApproach',
20
- fieldType: FormFieldType.SELECT,
21
- label: 'Evaluation',
22
- placeholder: 'Select evaluation approach…',
23
- required: true,
24
- optionList: EvaluationApproachValues,
25
- defaultValue: EvaluationApproach.EXACT,
26
- };
27
17
  return (h("div", { class: "test-case-row", key: testCase.id }, h("div", { class: "test-case-row__input-column" }, h("app-textarea", { config: questionConfig, value: testCase.question, onValueChange: (e) => handleTestCaseChange({
28
18
  detail: {
29
19
  testCaseId: testCase.id,
30
20
  key: 'question',
31
21
  value: e.detail.value,
32
22
  },
33
- }) }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], onExpectedOutcomeChange: onExpectedOutcomeChange }), h("app-select", { config: evaluationConfig, value: testCase.evaluationParameters?.approach, onValueChange: (e) => onUpdateApproach(testCase, e.detail.value) })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
23
+ }) }), h(ExpectedOutcomeRenderer, { testCaseId: testCase.id, fields: testCase.expectedOutcome || [], onExpectedOutcomeChange: onExpectedOutcomeChange })), h(ResponseOutput, { output: testCase.output, isRunning: testCase.isRunning }), h(EvaluationSummary, { result: testCase.evaluationResult, isRunning: testCase.isRunning }), h(RowActions, { isRunning: testCase.isRunning, canRun: !!testCase.question.trim(), onRun: () => onRun(testCase), onDelete: () => onDelete(testCase.id) })));
34
24
  };
35
25
  //# sourceMappingURL=llm-test-case-row.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"llm-test-case-row.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-case-row.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAEvD,OAAO,EACL,kBAAkB,EAClB,wBAAwB,GACzB,MAAM,mCAAmC,CAAC;AAC3C,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAgC,MAAM,0BAA0B,CAAC;AACvF,OAAO,EAEL,uBAAuB,GACxB,MAAM,6BAA6B,CAAC;AAerC,MAAM,CAAC,MAAM,cAAc,GAA6C,CAAC,EACvE,QAAQ,EACR,KAAK,EACL,QAAQ,EACR,gBAAgB,EAChB,oBAAoB,EACpB,uBAAuB,GACxB,EAAE,EAAE;IACH,MAAM,cAAc,GAAmB;QACrC,IAAI,EAAE,UAAU;QAChB,SAAS,EAAE,aAAa,CAAC,SAAS;QAClC,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,UAAU;QACjB,WAAW,EAAE,6BAA6B;QAC1C,QAAQ,EAAE,IAAI;QACd,IAAI,EAAE,CAAC;KACR,CAAC;IACF,MAAM,gBAAgB,GAAiB;QACrC,IAAI,EAAE,oBAAoB;QAC1B,SAAS,EAAE,aAAa,CAAC,MAAM;QAC/B,KAAK,EAAE,YAAY;QACnB,WAAW,EAAE,6BAA6B;QAC1C,QAAQ,EAAE,IAAI;QACd,UAAU,EAAE,wBAAwB;QACpC,YAAY,EAAE,kBAAkB,CAAC,KAAK;KACvC,CAAC;IAEF,OAAO,CACL,WAAK,KAAK,EAAC,eAAe,EAAC,GAAG,EAAE,QAAQ,CAAC,EAAE;QACzC,WAAK,KAAK,EAAC,6BAA6B;YACtC,oBACE,MAAM,EAAE,cAAc,EACtB,KAAK,EAAE,QAAQ,CAAC,QAAQ,EACxB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,oBAAoB,CAAC;oBACnB,MAAM,EAAE;wBACN,UAAU,EAAE,QAAQ,CAAC,EAAE;wBACvB,GAAG,EAAE,UAAU;wBACf,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB;iBACiE,CAAC,GAEvE;YACF,EAAC,uBAAuB,IACtB,UAAU,EAAE,QAAQ,CAAC,EAAE,EACvB,MAAM,EAAE,QAAQ,CAAC,eAAe,IAAI,EAAE,EACtC,uBAAuB,EAAE,uBAAuB,GAChD;YACF,kBACE,MAAM,EAAE,gBAAgB,EACxB,KAAK,EAAE,QAAQ,CAAC,oBAAoB,EAAE,QAAQ,EAC9C,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,gBAAgB,CAAC,QAAQ,EAAE,CAAC,CAAC,MAAM,CAAC,KAA2B,CAAC,GAElE,CACE;QAEN,EAAC,cAAc,IAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,CAAC,SAAS,GAAI;QAE1E,EAAC,iBAAiB,IAChB,MAAM,EAAE,QAAQ,CAAC,gBAAgB,EACjC,SAAS,EAAE,QAAQ,CAAC,SAAS,GAC7B;QAEF,EAAC,UAAU,IACT,SAAS,EAAE,QAAQ,CAAC,SAAS,EAC7B,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,EAClC,KAAK,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,EAC5B,QAAQ,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,GACrC,CACE,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport {\n EvaluationApproach,\n EvaluationApproachValues,\n} from '../../../lib/evaluation/constants';\nimport { ResponseOutput } from './output/response-output';\nimport { EvaluationSummary } from './evaluation/evaluation-summary';\nimport { RowActions } from './actions/row-actions';\nimport { FormFieldType, SelectConfig, TextAreaConfig } from '../../../lib/form/schema';\nimport {\n ExpectedOutcomeChangeDetail,\n ExpectedOutcomeRenderer,\n} from './expected-outcome-renderer';\n\nexport interface LLMTestCaseRowProps {\n testCase: TestCase;\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n onUpdateApproach: (testCase: TestCase, approach: EvaluationApproach) => void;\n handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const LLMTestCaseRow: FunctionalComponent<LLMTestCaseRowProps> = ({\n testCase,\n onRun,\n onDelete,\n onUpdateApproach,\n handleTestCaseChange,\n onExpectedOutcomeChange,\n}) => {\n const questionConfig: TextAreaConfig = {\n name: 'question',\n fieldType: FormFieldType.TEXT_AREA,\n type: 'text',\n label: 'Question',\n placeholder: 'Enter your question here...',\n required: true,\n rows: 3,\n };\n const evaluationConfig: SelectConfig = {\n name: 'EvaluationApproach',\n fieldType: FormFieldType.SELECT,\n label: 'Evaluation',\n placeholder: 'Select evaluation approach…',\n required: true,\n optionList: EvaluationApproachValues,\n defaultValue: EvaluationApproach.EXACT,\n };\n\n return (\n <div class=\"test-case-row\" key={testCase.id}>\n <div class=\"test-case-row__input-column\">\n <app-textarea\n config={questionConfig}\n value={testCase.question}\n onValueChange={(e) =>\n handleTestCaseChange({\n detail: {\n testCaseId: testCase.id,\n key: 'question',\n value: e.detail.value,\n },\n } as CustomEvent<{ testCaseId: string; key: string; value: string }>)\n }\n />\n <ExpectedOutcomeRenderer\n testCaseId={testCase.id}\n fields={testCase.expectedOutcome || []}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n />\n <app-select\n config={evaluationConfig}\n value={testCase.evaluationParameters?.approach}\n onValueChange={(e) =>\n onUpdateApproach(testCase, e.detail.value as EvaluationApproach)\n }\n />\n </div>\n\n <ResponseOutput output={testCase.output} isRunning={testCase.isRunning} />\n\n <EvaluationSummary\n result={testCase.evaluationResult}\n isRunning={testCase.isRunning}\n />\n\n <RowActions\n isRunning={testCase.isRunning}\n canRun={!!testCase.question.trim()}\n onRun={() => onRun(testCase)}\n onDelete={() => onDelete(testCase.id)}\n />\n </div>\n );\n};\n"]}
1
+ {"version":3,"file":"llm-test-case-row.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-case-row.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAEvD,OAAO,EAAE,cAAc,EAAE,MAAM,0BAA0B,CAAC;AAC1D,OAAO,EAAE,iBAAiB,EAAE,MAAM,iCAAiC,CAAC;AACpE,OAAO,EAAE,UAAU,EAAE,MAAM,uBAAuB,CAAC;AACnD,OAAO,EAAE,aAAa,EAAkB,MAAM,0BAA0B,CAAC;AACzE,OAAO,EAEL,uBAAuB,GACxB,MAAM,6BAA6B,CAAC;AAcrC,MAAM,CAAC,MAAM,cAAc,GAA6C,CAAC,EACvE,QAAQ,EACR,KAAK,EACL,QAAQ,EACR,oBAAoB,EACpB,uBAAuB,GACxB,EAAE,EAAE;IACH,MAAM,cAAc,GAAmB;QACrC,IAAI,EAAE,UAAU;QAChB,SAAS,EAAE,aAAa,CAAC,SAAS;QAClC,IAAI,EAAE,MAAM;QACZ,KAAK,EAAE,UAAU;QACjB,WAAW,EAAE,6BAA6B;QAC1C,QAAQ,EAAE,IAAI;QACd,IAAI,EAAE,CAAC;KACR,CAAC;IACF,OAAO,CACL,WAAK,KAAK,EAAC,eAAe,EAAC,GAAG,EAAE,QAAQ,CAAC,EAAE;QACzC,WAAK,KAAK,EAAC,6BAA6B;YACtC,oBACE,MAAM,EAAE,cAAc,EACtB,KAAK,EAAE,QAAQ,CAAC,QAAQ,EACxB,aAAa,EAAE,CAAC,CAAC,EAAE,EAAE,CACnB,oBAAoB,CAAC;oBACnB,MAAM,EAAE;wBACN,UAAU,EAAE,QAAQ,CAAC,EAAE;wBACvB,GAAG,EAAE,UAAU;wBACf,KAAK,EAAE,CAAC,CAAC,MAAM,CAAC,KAAK;qBACtB;iBACiE,CAAC,GAEvE;YACF,EAAC,uBAAuB,IACtB,UAAU,EAAE,QAAQ,CAAC,EAAE,EACvB,MAAM,EAAE,QAAQ,CAAC,eAAe,IAAI,EAAE,EACtC,uBAAuB,EAAE,uBAAuB,GAChD,CACE;QAEN,EAAC,cAAc,IAAC,MAAM,EAAE,QAAQ,CAAC,MAAM,EAAE,SAAS,EAAE,QAAQ,CAAC,SAAS,GAAI;QAE1E,EAAC,iBAAiB,IAChB,MAAM,EAAE,QAAQ,CAAC,gBAAgB,EACjC,SAAS,EAAE,QAAQ,CAAC,SAAS,GAC7B;QAEF,EAAC,UAAU,IACT,SAAS,EAAE,QAAQ,CAAC,SAAS,EAC7B,MAAM,EAAE,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,EAClC,KAAK,EAAE,GAAG,EAAE,CAAC,KAAK,CAAC,QAAQ,CAAC,EAC5B,QAAQ,EAAE,GAAG,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE,CAAC,GACrC,CACE,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport { ResponseOutput } from './output/response-output';\nimport { EvaluationSummary } from './evaluation/evaluation-summary';\nimport { RowActions } from './actions/row-actions';\nimport { FormFieldType, TextAreaConfig } from '../../../lib/form/schema';\nimport {\n ExpectedOutcomeChangeDetail,\n ExpectedOutcomeRenderer,\n} from './expected-outcome-renderer';\n\nexport interface LLMTestCaseRowProps {\n testCase: TestCase;\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const LLMTestCaseRow: FunctionalComponent<LLMTestCaseRowProps> = ({\n testCase,\n onRun,\n onDelete,\n handleTestCaseChange,\n onExpectedOutcomeChange,\n}) => {\n const questionConfig: TextAreaConfig = {\n name: 'question',\n fieldType: FormFieldType.TEXT_AREA,\n type: 'text',\n label: 'Question',\n placeholder: 'Enter your question here...',\n required: true,\n rows: 3,\n };\n return (\n <div class=\"test-case-row\" key={testCase.id}>\n <div class=\"test-case-row__input-column\">\n <app-textarea\n config={questionConfig}\n value={testCase.question}\n onValueChange={(e) =>\n handleTestCaseChange({\n detail: {\n testCaseId: testCase.id,\n key: 'question',\n value: e.detail.value,\n },\n } as CustomEvent<{ testCaseId: string; key: string; value: string }>)\n }\n />\n <ExpectedOutcomeRenderer\n testCaseId={testCase.id}\n fields={testCase.expectedOutcome || []}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n />\n </div>\n\n <ResponseOutput output={testCase.output} isRunning={testCase.isRunning} />\n\n <EvaluationSummary\n result={testCase.evaluationResult}\n isRunning={testCase.isRunning}\n />\n\n <RowActions\n isRunning={testCase.isRunning}\n canRun={!!testCase.question.trim()}\n onRun={() => onRun(testCase)}\n onDelete={() => onDelete(testCase.id)}\n />\n </div>\n );\n};\n"]}
@@ -1,7 +1,7 @@
1
1
  import { h } from "@stencil/core";
2
2
  import { LLMTestCaseRow } from "./llm-test-case-row";
3
3
  import { Button } from "../../../lib/ui/button/index";
4
- export const LLMTestCases = ({ testCases, onRun, onDelete, onUpdateApproach, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
5
- return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, onRun: onRun, onDelete: onDelete, onUpdateApproach: onUpdateApproach, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
4
+ export const LLMTestCases = ({ testCases, onRun, onDelete, onAddTestCase, handleTestCaseChange, onExpectedOutcomeChange, }) => {
5
+ return (h("div", { class: "test-cases" }, h("div", { class: "test-cases__column-headers" }, h("div", { class: "test-cases__column-header" }, "Input"), h("div", { class: "test-cases__column-header" }, "Output"), h("div", { class: "test-cases__column-header" }, "Evaluation"), h("div", { class: "test-cases__column-header" }, "Actions")), testCases.map(testCase => (h(LLMTestCaseRow, { testCase: testCase, onRun: onRun, onDelete: onDelete, handleTestCaseChange: handleTestCaseChange, onExpectedOutcomeChange: onExpectedOutcomeChange }))), h("div", { class: "test-cases__add-section" }, h(Button, { variant: "outline", size: "md", onClick: onAddTestCase }, "+ Add Question"))));
6
6
  };
7
7
  //# sourceMappingURL=llm-test-cases.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"llm-test-cases.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-cases.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAGvD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,8BAA8B,CAAC;AAiBtD,MAAM,CAAC,MAAM,YAAY,GAA2C,CAAC,EACnE,SAAS,EACT,KAAK,EACL,QAAQ,EACR,gBAAgB,EAChB,aAAa,EACb,oBAAoB,EACpB,uBAAuB,GACxB,EAAE,EAAE;IACH,OAAO,CACL,WAAK,KAAK,EAAC,YAAY;QACrB,WAAK,KAAK,EAAC,4BAA4B;YACrC,WAAK,KAAK,EAAC,2BAA2B,YAAY;YAClD,WAAK,KAAK,EAAC,2BAA2B,aAAa;YACnD,WAAK,KAAK,EAAC,2BAA2B,iBAAiB;YACvD,WAAK,KAAK,EAAC,2BAA2B,cAAc,CAChD;QAEL,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CACzB,EAAC,cAAc,IACb,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,KAAK,EACZ,QAAQ,EAAE,QAAQ,EAClB,gBAAgB,EAAE,gBAAgB,EAClC,oBAAoB,EAAE,oBAAoB,EAC1C,uBAAuB,EAAE,uBAAuB,GAChD,CACH,CAAC;QAEF,WAAK,KAAK,EAAC,yBAAyB;YAClC,EAAC,MAAM,IAAC,OAAO,EAAC,SAAS,EAAC,IAAI,EAAC,IAAI,EAAC,OAAO,EAAE,aAAa,qBAEjD,CACL,CACF,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport { EvaluationApproach } from '../../../lib/evaluation/constants';\nimport { LLMTestCaseRow } from './llm-test-case-row';\nimport { Button } from '../../../lib/ui/button/index';\nimport { ExpectedOutcomeChangeDetail } from './expected-outcome-renderer';\n\nexport interface LLMTestCasesProps {\n testCases: TestCase[];\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n onUpdateApproach: (testCase: TestCase, approach: EvaluationApproach) => void;\n onAddTestCase: () => void;\n handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const LLMTestCases: FunctionalComponent<LLMTestCasesProps> = ({\n testCases,\n onRun,\n onDelete,\n onUpdateApproach,\n onAddTestCase,\n handleTestCaseChange,\n onExpectedOutcomeChange,\n}) => {\n return (\n <div class=\"test-cases\">\n <div class=\"test-cases__column-headers\">\n <div class=\"test-cases__column-header\">Input</div>\n <div class=\"test-cases__column-header\">Output</div>\n <div class=\"test-cases__column-header\">Evaluation</div>\n <div class=\"test-cases__column-header\">Actions</div>\n </div>\n\n {testCases.map(testCase => (\n <LLMTestCaseRow\n testCase={testCase}\n onRun={onRun}\n onDelete={onDelete}\n onUpdateApproach={onUpdateApproach}\n handleTestCaseChange={handleTestCaseChange}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n />\n ))}\n\n <div class=\"test-cases__add-section\">\n <Button variant=\"outline\" size=\"md\" onClick={onAddTestCase}>\n + Add Question\n </Button>\n </div>\n </div>\n );\n};\n"]}
1
+ {"version":3,"file":"llm-test-cases.js","sourceRoot":"","sources":["../../../../src/components/llm-test-runner/test-cases/llm-test-cases.tsx"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAuB,MAAM,eAAe,CAAC;AAEvD,OAAO,EAAE,cAAc,EAAE,MAAM,qBAAqB,CAAC;AACrD,OAAO,EAAE,MAAM,EAAE,MAAM,8BAA8B,CAAC;AAgBtD,MAAM,CAAC,MAAM,YAAY,GAA2C,CAAC,EACnE,SAAS,EACT,KAAK,EACL,QAAQ,EACR,aAAa,EACb,oBAAoB,EACpB,uBAAuB,GACxB,EAAE,EAAE;IACH,OAAO,CACL,WAAK,KAAK,EAAC,YAAY;QACrB,WAAK,KAAK,EAAC,4BAA4B;YACrC,WAAK,KAAK,EAAC,2BAA2B,YAAY;YAClD,WAAK,KAAK,EAAC,2BAA2B,aAAa;YACnD,WAAK,KAAK,EAAC,2BAA2B,iBAAiB;YACvD,WAAK,KAAK,EAAC,2BAA2B,cAAc,CAChD;QAEL,SAAS,CAAC,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CACzB,EAAC,cAAc,IACb,QAAQ,EAAE,QAAQ,EAClB,KAAK,EAAE,KAAK,EACZ,QAAQ,EAAE,QAAQ,EAClB,oBAAoB,EAAE,oBAAoB,EAC1C,uBAAuB,EAAE,uBAAuB,GAChD,CACH,CAAC;QAEF,WAAK,KAAK,EAAC,yBAAyB;YAClC,EAAC,MAAM,IAAC,OAAO,EAAC,SAAS,EAAC,IAAI,EAAC,IAAI,EAAC,OAAO,EAAE,aAAa,qBAEjD,CACL,CACF,CACP,CAAC;AACJ,CAAC,CAAC","sourcesContent":["import { h, FunctionalComponent } from '@stencil/core';\nimport { TestCase } from '../../../types/llm-test-runner';\nimport { LLMTestCaseRow } from './llm-test-case-row';\nimport { Button } from '../../../lib/ui/button/index';\nimport { ExpectedOutcomeChangeDetail } from './expected-outcome-renderer';\n\nexport interface LLMTestCasesProps {\n testCases: TestCase[];\n onRun: (testCase: TestCase) => void;\n onDelete: (id: string) => void;\n onAddTestCase: () => void;\n handleTestCaseChange: (\n e: CustomEvent<{ testCaseId: string; key: string; value: string }>,\n ) => void;\n onExpectedOutcomeChange: (\n e: CustomEvent<ExpectedOutcomeChangeDetail>,\n ) => void;\n}\n\nexport const LLMTestCases: FunctionalComponent<LLMTestCasesProps> = ({\n testCases,\n onRun,\n onDelete,\n onAddTestCase,\n handleTestCaseChange,\n onExpectedOutcomeChange,\n}) => {\n return (\n <div class=\"test-cases\">\n <div class=\"test-cases__column-headers\">\n <div class=\"test-cases__column-header\">Input</div>\n <div class=\"test-cases__column-header\">Output</div>\n <div class=\"test-cases__column-header\">Evaluation</div>\n <div class=\"test-cases__column-header\">Actions</div>\n </div>\n\n {testCases.map(testCase => (\n <LLMTestCaseRow\n testCase={testCase}\n onRun={onRun}\n onDelete={onDelete}\n handleTestCaseChange={handleTestCaseChange}\n onExpectedOutcomeChange={onExpectedOutcomeChange}\n />\n ))}\n\n <div class=\"test-cases__add-section\">\n <Button variant=\"outline\" size=\"md\" onClick={onAddTestCase}>\n + Add Question\n </Button>\n </div>\n </div>\n );\n};\n"]}
@@ -6,56 +6,77 @@ import { performRougeLEvaluation } from "./evaluators/rougeL-evaluator";
6
6
  import { performBleuEvaluation } from "./evaluators/bleu/bleu-evaluator";
7
7
  export class LLMEvaluationEngine {
8
8
  async evaluateResponse(request, callback) {
9
- try {
10
- const approach = request.evaluationParameters.approach;
11
- switch (approach) {
12
- case EvaluationApproach.BLEU: {
13
- const bleuResult = performBleuEvaluation(request);
14
- callback(bleuResult);
15
- break;
16
- }
17
- case EvaluationApproach.EXACT: {
18
- const exactResult = await performEvaluation(request);
19
- callback(exactResult);
20
- break;
21
- }
22
- case EvaluationApproach.ROUGE_1: {
23
- const rougeResult = await performRouge1Evaluation(request);
24
- callback(rougeResult);
25
- break;
26
- }
27
- case EvaluationApproach.ROUGE_L: {
28
- const rougeLResult = await performRougeLEvaluation(request);
29
- callback(rougeLResult);
30
- break;
31
- }
32
- case EvaluationApproach.SEMANTIC: {
33
- const semanticResult = await performSemanticEvaluation(request);
34
- callback(semanticResult);
35
- break;
36
- }
37
- default: {
38
- console.warn(`Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`);
39
- const fallbackResult = await performEvaluation(request);
40
- callback(fallbackResult);
41
- }
42
- }
43
- }
44
- catch (error) {
45
- console.error('Evaluation failed:', error);
46
- const errorResult = {
9
+ const settledResults = await Promise.allSettled(request.fields.map(async (field) => {
10
+ const fieldRequest = {
47
11
  testCaseId: request.testCaseId,
12
+ question: request.question,
13
+ actualResponse: request.actualResponse,
14
+ expectedOutcome: field.expectedValue,
15
+ evaluationParameters: field.evaluationParameters,
16
+ };
17
+ const result = await this.evaluateField(fieldRequest);
18
+ const fieldResult = {
19
+ index: field.index,
20
+ label: field.label,
21
+ type: field.type,
22
+ expectedValue: field.expectedValue,
23
+ passed: result.passed,
24
+ keywordMatches: result.keywordMatches,
25
+ evaluationParameters: result.evaluationParameters,
26
+ evaluationApproachResult: result.evaluationApproachResult,
27
+ };
28
+ return fieldResult;
29
+ }));
30
+ const fieldResults = settledResults.map((settledResult, index) => {
31
+ const field = request.fields[index];
32
+ if (settledResult.status === 'fulfilled') {
33
+ return settledResult.value;
34
+ }
35
+ return {
36
+ index: field.index,
37
+ label: field.label,
38
+ type: field.type,
39
+ expectedValue: field.expectedValue,
48
40
  passed: false,
49
41
  keywordMatches: [],
50
- timestamp: new Date().toISOString(),
51
- evaluationParameters: request.evaluationParameters,
42
+ evaluationParameters: field.evaluationParameters,
52
43
  evaluationApproachResult: {
53
44
  score: 0,
54
- approachUsed: EvaluationApproach.EXACT,
45
+ approachUsed: field.evaluationParameters.approach,
55
46
  },
47
+ error: this.getSafeErrorMessage(settledResult.reason),
56
48
  };
57
- callback(errorResult);
49
+ });
50
+ const keywordMatches = fieldResults.flatMap(field => field.keywordMatches);
51
+ const passed = fieldResults.every(field => field.passed && !field.error);
52
+ callback({
53
+ testCaseId: request.testCaseId,
54
+ passed,
55
+ keywordMatches,
56
+ fieldResults,
57
+ timestamp: new Date().toISOString(),
58
+ });
59
+ }
60
+ async evaluateField(request) {
61
+ const approach = request.evaluationParameters.approach;
62
+ switch (approach) {
63
+ case EvaluationApproach.BLEU:
64
+ return performBleuEvaluation(request);
65
+ case EvaluationApproach.EXACT:
66
+ return performEvaluation(request);
67
+ case EvaluationApproach.ROUGE_1:
68
+ return performRouge1Evaluation(request);
69
+ case EvaluationApproach.ROUGE_L:
70
+ return performRougeLEvaluation(request);
71
+ case EvaluationApproach.SEMANTIC:
72
+ return performSemanticEvaluation(request);
73
+ default:
74
+ console.warn(`Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`);
75
+ return performEvaluation(request);
58
76
  }
59
77
  }
78
+ getSafeErrorMessage(error) {
79
+ return error instanceof Error ? error.message : 'Field evaluation failed.';
80
+ }
60
81
  }
61
82
  //# sourceMappingURL=evaluation-engine.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"evaluation-engine.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-engine.ts"],"names":[],"mappings":"AAKA,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAC;AACxE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,OAAO,mBAAmB;IAC9B,KAAK,CAAC,gBAAgB,CACpB,OAA0B,EAC1B,QAA4B;QAE5B,IAAI,CAAC;YACH,MAAM,QAAQ,GACZ,OAAO,CAAC,oBAAoB,CAAC,QAAQ,CAAC;YACxC,QAAQ,QAAQ,EAAE,CAAC;gBACjB,KAAK,kBAAkB,CAAC,IAAI,CAAC,CAAC,CAAC;oBAC7B,MAAM,UAAU,GAAG,qBAAqB,CAAC,OAAO,CAAC,CAAC;oBAClD,QAAQ,CAAC,UAAU,CAAC,CAAC;oBACrB,MAAM;gBACR,CAAC;gBAED,KAAK,kBAAkB,CAAC,KAAK,CAAC,CAAC,CAAC;oBAC9B,MAAM,WAAW,GAAG,MAAM,iBAAiB,CAAC,OAAO,CAAC,CAAC;oBACrD,QAAQ,CAAC,WAAW,CAAC,CAAC;oBACtB,MAAM;gBACR,CAAC;gBAED,KAAK,kBAAkB,CAAC,OAAO,CAAC,CAAC,CAAC;oBAChC,MAAM,WAAW,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;oBAC3D,QAAQ,CAAC,WAAW,CAAC,CAAC;oBACtB,MAAM;gBACR,CAAC;gBAED,KAAK,kBAAkB,CAAC,OAAO,CAAC,CAAC,CAAC;oBAChC,MAAM,YAAY,GAAG,MAAM,uBAAuB,CAAC,OAAO,CAAC,CAAC;oBAC5D,QAAQ,CAAC,YAAY,CAAC,CAAC;oBACvB,MAAM;gBACR,CAAC;gBAED,KAAK,kBAAkB,CAAC,QAAQ,CAAC,CAAC,CAAC;oBACjC,MAAM,cAAc,GAAG,MAAM,yBAAyB,CAAC,OAAO,CAAC,CAAC;oBAChE,QAAQ,CAAC,cAAc,CAAC,CAAC;oBACzB,MAAM;gBACR,CAAC;gBAED,OAAO,CAAC,CAAC,CAAC;oBACR,OAAO,CAAC,IAAI,CACV,8BAA8B,OAAO,CAAC,oBAAoB,CAAC,QAAQ,kCAAkC,CACtG,CAAC;oBACF,MAAM,cAAc,GAAG,MAAM,iBAAiB,CAAC,OAAO,CAAC,CAAC;oBACxD,QAAQ,CAAC,cAAc,CAAC,CAAC;gBAC3B,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,OAAO,CAAC,KAAK,CAAC,oBAAoB,EAAE,KAAK,CAAC,CAAC;YAE3C,MAAM,WAAW,GAAqB;gBACpC,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,oBAAoB,EAAE,OAAO,CAAC,oBAAoB;gBAClD,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,kBAAkB,CAAC,KAAK;iBACvC;aACF,CAAC;YAEF,QAAQ,CAAC,WAAW,CAAC,CAAC;QACxB,CAAC;IACH,CAAC;CACF","sourcesContent":["import {\n EvaluationRequest,\n EvaluationResult,\n EvaluationCallback,\n} from './types';\nimport { performEvaluation } from './evaluators/exact/exact';\nimport { EvaluationApproach } from './constants';\nimport { performRouge1Evaluation } from './evaluators/rouge1-evaluator';\nimport { performSemanticEvaluation } from './evaluators/semantic/index';\nimport { performRougeLEvaluation } from './evaluators/rougeL-evaluator';\nimport { performBleuEvaluation } from './evaluators/bleu/bleu-evaluator';\n\nexport class LLMEvaluationEngine {\n async evaluateResponse(\n request: EvaluationRequest,\n callback: EvaluationCallback,\n ): Promise<void> {\n try {\n const approach: EvaluationApproach =\n request.evaluationParameters.approach;\n switch (approach) {\n case EvaluationApproach.BLEU: {\n const bleuResult = performBleuEvaluation(request);\n callback(bleuResult);\n break;\n }\n\n case EvaluationApproach.EXACT: {\n const exactResult = await performEvaluation(request);\n callback(exactResult);\n break;\n }\n\n case EvaluationApproach.ROUGE_1: {\n const rougeResult = await performRouge1Evaluation(request);\n callback(rougeResult);\n break;\n }\n\n case EvaluationApproach.ROUGE_L: {\n const rougeLResult = await performRougeLEvaluation(request);\n callback(rougeLResult);\n break;\n }\n\n case EvaluationApproach.SEMANTIC: {\n const semanticResult = await performSemanticEvaluation(request);\n callback(semanticResult);\n break;\n }\n\n default: {\n console.warn(\n `Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`,\n );\n const fallbackResult = await performEvaluation(request);\n callback(fallbackResult);\n }\n }\n } catch (error) {\n console.error('Evaluation failed:', error);\n\n const errorResult: EvaluationResult = {\n testCaseId: request.testCaseId,\n passed: false,\n keywordMatches: [],\n timestamp: new Date().toISOString(),\n evaluationParameters: request.evaluationParameters,\n evaluationApproachResult: {\n score: 0,\n approachUsed: EvaluationApproach.EXACT,\n },\n };\n\n callback(errorResult);\n }\n }\n}\n"]}
1
+ {"version":3,"file":"evaluation-engine.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-engine.ts"],"names":[],"mappings":"AAOA,OAAO,EAAE,iBAAiB,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,yBAAyB,EAAE,MAAM,6BAA6B,CAAC;AACxE,OAAO,EAAE,uBAAuB,EAAE,MAAM,+BAA+B,CAAC;AACxE,OAAO,EAAE,qBAAqB,EAAE,MAAM,kCAAkC,CAAC;AAEzE,MAAM,OAAO,mBAAmB;IAC9B,KAAK,CAAC,gBAAgB,CACpB,OAA4B,EAC5B,QAA4B;QAE5B,MAAM,cAAc,GAAG,MAAM,OAAO,CAAC,UAAU,CAC7C,OAAO,CAAC,MAAM,CAAC,GAAG,CAAC,KAAK,EAAC,KAAK,EAAC,EAAE;YAC/B,MAAM,YAAY,GAAsB;gBACtC,UAAU,EAAE,OAAO,CAAC,UAAU;gBAC9B,QAAQ,EAAE,OAAO,CAAC,QAAQ;gBAC1B,cAAc,EAAE,OAAO,CAAC,cAAc;gBACtC,eAAe,EAAE,KAAK,CAAC,aAAa;gBACpC,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;aACjD,CAAC;YACF,MAAM,MAAM,GAAG,MAAM,IAAI,CAAC,aAAa,CAAC,YAAY,CAAC,CAAC;YAEtD,MAAM,WAAW,GAA0B;gBACzC,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,MAAM,EAAE,MAAM,CAAC,MAAM;gBACrB,cAAc,EAAE,MAAM,CAAC,cAAc;gBACrC,oBAAoB,EAAE,MAAM,CAAC,oBAAqB;gBAClD,wBAAwB,EAAE,MAAM,CAAC,wBAAwB;aAC1D,CAAC;YACF,OAAO,WAAW,CAAC;QACrB,CAAC,CAAC,CACH,CAAC;QAEF,MAAM,YAAY,GAA4B,cAAc,CAAC,GAAG,CAC9D,CAAC,aAAa,EAAE,KAAK,EAAE,EAAE;YACvB,MAAM,KAAK,GAAG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;YACpC,IAAI,aAAa,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBACzC,OAAO,aAAa,CAAC,KAAK,CAAC;YAC7B,CAAC;YAED,OAAO;gBACL,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,KAAK,EAAE,KAAK,CAAC,KAAK;gBAClB,IAAI,EAAE,KAAK,CAAC,IAAI;gBAChB,aAAa,EAAE,KAAK,CAAC,aAAa;gBAClC,MAAM,EAAE,KAAK;gBACb,cAAc,EAAE,EAAE;gBAClB,oBAAoB,EAAE,KAAK,CAAC,oBAAoB;gBAChD,wBAAwB,EAAE;oBACxB,KAAK,EAAE,CAAC;oBACR,YAAY,EAAE,KAAK,CAAC,oBAAoB,CAAC,QAAQ;iBAClD;gBACD,KAAK,EAAE,IAAI,CAAC,mBAAmB,CAAC,aAAa,CAAC,MAAM,CAAC;aACtD,CAAC;QACJ,CAAC,CACF,CAAC;QAEF,MAAM,cAAc,GAAG,YAAY,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,cAAc,CAAC,CAAC;QAC3E,MAAM,MAAM,GAAG,YAAY,CAAC,KAAK,CAAC,KAAK,CAAC,EAAE,CAAC,KAAK,CAAC,MAAM,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC;QAEzE,QAAQ,CAAC;YACP,UAAU,EAAE,OAAO,CAAC,UAAU;YAC9B,MAAM;YACN,cAAc;YACd,YAAY;YACZ,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;SACpC,CAAC,CAAC;IACL,CAAC;IAEO,KAAK,CAAC,aAAa,CAAC,OAA0B;QACpD,MAAM,QAAQ,GAAuB,OAAO,CAAC,oBAAoB,CAAC,QAAQ,CAAC;QAC3E,QAAQ,QAAQ,EAAE,CAAC;YACjB,KAAK,kBAAkB,CAAC,IAAI;gBAC1B,OAAO,qBAAqB,CAAC,OAAO,CAAC,CAAC;YACxC,KAAK,kBAAkB,CAAC,KAAK;gBAC3B,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;YACpC,KAAK,kBAAkB,CAAC,OAAO;gBAC7B,OAAO,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,kBAAkB,CAAC,OAAO;gBAC7B,OAAO,uBAAuB,CAAC,OAAO,CAAC,CAAC;YAC1C,KAAK,kBAAkB,CAAC,QAAQ;gBAC9B,OAAO,yBAAyB,CAAC,OAAO,CAAC,CAAC;YAC5C;gBACE,OAAO,CAAC,IAAI,CACV,8BAA8B,OAAO,CAAC,oBAAoB,CAAC,QAAQ,kCAAkC,CACtG,CAAC;gBACF,OAAO,iBAAiB,CAAC,OAAO,CAAC,CAAC;QACtC,CAAC;IACH,CAAC;IAEO,mBAAmB,CAAC,KAAc;QACxC,OAAO,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,0BAA0B,CAAC;IAC7E,CAAC;CACF","sourcesContent":["import {\n EvaluationRequest,\n EvaluationResult,\n EvaluationCallback,\n FieldEvaluationResult,\n EvaluationRequestV2,\n} from './types';\nimport { performEvaluation } from './evaluators/exact/exact';\nimport { EvaluationApproach } from './constants';\nimport { performRouge1Evaluation } from './evaluators/rouge1-evaluator';\nimport { performSemanticEvaluation } from './evaluators/semantic/index';\nimport { performRougeLEvaluation } from './evaluators/rougeL-evaluator';\nimport { performBleuEvaluation } from './evaluators/bleu/bleu-evaluator';\n\nexport class LLMEvaluationEngine {\n async evaluateResponse(\n request: EvaluationRequestV2,\n callback: EvaluationCallback,\n ): Promise<void> {\n const settledResults = await Promise.allSettled(\n request.fields.map(async field => {\n const fieldRequest: EvaluationRequest = {\n testCaseId: request.testCaseId,\n question: request.question,\n actualResponse: request.actualResponse,\n expectedOutcome: field.expectedValue,\n evaluationParameters: field.evaluationParameters,\n };\n const result = await this.evaluateField(fieldRequest);\n\n const fieldResult: FieldEvaluationResult = {\n index: field.index,\n label: field.label,\n type: field.type,\n expectedValue: field.expectedValue,\n passed: result.passed,\n keywordMatches: result.keywordMatches,\n evaluationParameters: result.evaluationParameters!,\n evaluationApproachResult: result.evaluationApproachResult,\n };\n return fieldResult;\n }),\n );\n\n const fieldResults: FieldEvaluationResult[] = settledResults.map(\n (settledResult, index) => {\n const field = request.fields[index];\n if (settledResult.status === 'fulfilled') {\n return settledResult.value;\n }\n\n return {\n index: field.index,\n label: field.label,\n type: field.type,\n expectedValue: field.expectedValue,\n passed: false,\n keywordMatches: [],\n evaluationParameters: field.evaluationParameters,\n evaluationApproachResult: {\n score: 0,\n approachUsed: field.evaluationParameters.approach,\n },\n error: this.getSafeErrorMessage(settledResult.reason),\n };\n },\n );\n\n const keywordMatches = fieldResults.flatMap(field => field.keywordMatches);\n const passed = fieldResults.every(field => field.passed && !field.error);\n\n callback({\n testCaseId: request.testCaseId,\n passed,\n keywordMatches,\n fieldResults,\n timestamp: new Date().toISOString(),\n });\n }\n\n private async evaluateField(request: EvaluationRequest): Promise<EvaluationResult> {\n const approach: EvaluationApproach = request.evaluationParameters.approach;\n switch (approach) {\n case EvaluationApproach.BLEU:\n return performBleuEvaluation(request);\n case EvaluationApproach.EXACT:\n return performEvaluation(request);\n case EvaluationApproach.ROUGE_1:\n return performRouge1Evaluation(request);\n case EvaluationApproach.ROUGE_L:\n return performRougeLEvaluation(request);\n case EvaluationApproach.SEMANTIC:\n return performSemanticEvaluation(request);\n default:\n console.warn(\n `Unknown matching approach: ${request.evaluationParameters.approach}, falling back to exact matching`,\n );\n return performEvaluation(request);\n }\n }\n\n private getSafeErrorMessage(error: unknown): string {\n return error instanceof Error ? error.message : 'Field evaluation failed.';\n }\n}\n"]}
@@ -1,5 +1,5 @@
1
1
  import { LLMEvaluationEngine } from "./evaluation-engine";
2
- import { serializeExpectedOutcome } from "../expected-outcome-serializer";
2
+ import { normalizeEvaluationParametersForField } from "./field-evaluation-approach";
3
3
  /**
4
4
  * Service for evaluating test case responses
5
5
  */
@@ -18,12 +18,18 @@ export class EvaluationService {
18
18
  console.warn('⚠️ No output to evaluate for test case:', testCase.id);
19
19
  return;
20
20
  }
21
+ const fields = (testCase.expectedOutcome || []).map((field, index) => ({
22
+ index,
23
+ label: field.label,
24
+ type: field.type,
25
+ expectedValue: getFieldExpectedValue(field),
26
+ evaluationParameters: normalizeEvaluationParametersForField(field.type, field.evaluationParameters),
27
+ }));
21
28
  const evaluationRequest = {
22
29
  testCaseId: testCase.id,
23
30
  question: testCase.question,
24
- expectedOutcome: serializeExpectedOutcome(testCase.expectedOutcome),
25
31
  actualResponse: testCase.output,
26
- evaluationParameters: testCase.evaluationParameters,
32
+ fields,
27
33
  };
28
34
  await this.engine.evaluateResponse(evaluationRequest, (result) => {
29
35
  console.log('📊 Evaluation result received:', result);
@@ -31,4 +37,10 @@ export class EvaluationService {
31
37
  });
32
38
  }
33
39
  }
40
+ function getFieldExpectedValue(field) {
41
+ if (field.type === 'chips-input') {
42
+ return field.value.join(', ');
43
+ }
44
+ return field.value;
45
+ }
34
46
  //# sourceMappingURL=evaluation-service.js.map
@@ -1 +1 @@
1
- {"version":3,"file":"evaluation-service.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAG1D,OAAO,EAAE,wBAAwB,EAAE,MAAM,gCAAgC,CAAC;AAE1E;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAsB;IAEpC;QACE,IAAI,CAAC,MAAM,GAAG,IAAI,mBAAmB,EAAE,CAAC;IAC1C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB,CACpB,QAAkB,EAClB,QAA4C;QAE5C,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YACrB,OAAO,CAAC,IAAI,CAAC,yCAAyC,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;YACrE,OAAO;QACT,CAAC;QAED,MAAM,iBAAiB,GAAsB;YAC3C,UAAU,EAAE,QAAQ,CAAC,EAAE;YACvB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;YAC3B,eAAe,EAAE,wBAAwB,CAAC,QAAQ,CAAC,eAAe,CAAC;YACnE,cAAc,EAAE,QAAQ,CAAC,MAAM;YAC/B,oBAAoB,EAAE,QAAQ,CAAC,oBAAoB;SACpD,CAAC;QAEF,MAAM,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAChC,iBAAiB,EACjB,CAAC,MAAwB,EAAE,EAAE;YAC3B,OAAO,CAAC,GAAG,CAAC,gCAAgC,EAAE,MAAM,CAAC,CAAC;YACtD,QAAQ,CAAC,MAAM,CAAC,CAAC;QACnB,CAAC,CACF,CAAC;IACJ,CAAC;CACF","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport { EvaluationRequest, EvaluationResult } from './types';\nimport { TestCase } from '../../types/llm-test-runner';\nimport { serializeExpectedOutcome } from '../expected-outcome-serializer';\n\n/**\n * Service for evaluating test case responses\n */\nexport class EvaluationService {\n private engine: LLMEvaluationEngine;\n\n constructor() {\n this.engine = new LLMEvaluationEngine();\n }\n\n /**\n * Evaluates a test case response\n * @param testCase - The test case to evaluate\n * @param onResult - Callback to handle the evaluation result\n */\n async evaluateTestCase(\n testCase: TestCase,\n onResult: (result: EvaluationResult) => void,\n ): Promise<void> {\n if (!testCase.output) {\n console.warn('⚠️ No output to evaluate for test case:', testCase.id);\n return;\n }\n\n const evaluationRequest: EvaluationRequest = {\n testCaseId: testCase.id,\n question: testCase.question,\n expectedOutcome: serializeExpectedOutcome(testCase.expectedOutcome),\n actualResponse: testCase.output,\n evaluationParameters: testCase.evaluationParameters,\n };\n\n await this.engine.evaluateResponse(\n evaluationRequest,\n (result: EvaluationResult) => {\n console.log('📊 Evaluation result received:', result);\n onResult(result);\n },\n );\n }\n}\n"]}
1
+ {"version":3,"file":"evaluation-service.js","sourceRoot":"","sources":["../../../src/lib/evaluation/evaluation-service.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,mBAAmB,EAAE,MAAM,qBAAqB,CAAC;AAO1D,OAAO,EAAE,qCAAqC,EAAE,MAAM,6BAA6B,CAAC;AAEpF;;GAEG;AACH,MAAM,OAAO,iBAAiB;IACpB,MAAM,CAAsB;IAEpC;QACE,IAAI,CAAC,MAAM,GAAG,IAAI,mBAAmB,EAAE,CAAC;IAC1C,CAAC;IAED;;;;OAIG;IACH,KAAK,CAAC,gBAAgB,CACpB,QAAkB,EAClB,QAA4C;QAE5C,IAAI,CAAC,QAAQ,CAAC,MAAM,EAAE,CAAC;YACrB,OAAO,CAAC,IAAI,CAAC,yCAAyC,EAAE,QAAQ,CAAC,EAAE,CAAC,CAAC;YACrE,OAAO;QACT,CAAC;QAED,MAAM,MAAM,GAA2B,CAAC,QAAQ,CAAC,eAAe,IAAI,EAAE,CAAC,CAAC,GAAG,CACzE,CAAC,KAAK,EAAE,KAAK,EAAE,EAAE,CAAC,CAAC;YACjB,KAAK;YACL,KAAK,EAAE,KAAK,CAAC,KAAK;YAClB,IAAI,EAAE,KAAK,CAAC,IAAI;YAChB,aAAa,EAAE,qBAAqB,CAAC,KAAK,CAAC;YAC3C,oBAAoB,EAAE,qCAAqC,CACzD,KAAK,CAAC,IAAI,EACV,KAAK,CAAC,oBAAoB,CAC3B;SACF,CAAC,CACH,CAAC;QAEF,MAAM,iBAAiB,GAAwB;YAC7C,UAAU,EAAE,QAAQ,CAAC,EAAE;YACvB,QAAQ,EAAE,QAAQ,CAAC,QAAQ;YAC3B,cAAc,EAAE,QAAQ,CAAC,MAAM;YAC/B,MAAM;SACP,CAAC;QAEF,MAAM,IAAI,CAAC,MAAM,CAAC,gBAAgB,CAChC,iBAAiB,EACjB,CAAC,MAAwB,EAAE,EAAE;YAC3B,OAAO,CAAC,GAAG,CAAC,gCAAgC,EAAE,MAAM,CAAC,CAAC;YACtD,QAAQ,CAAC,MAAM,CAAC,CAAC;QACnB,CAAC,CACF,CAAC;IACJ,CAAC;CACF;AAED,SAAS,qBAAqB,CAAC,KAA2B;IACxD,IAAI,KAAK,CAAC,IAAI,KAAK,aAAa,EAAE,CAAC;QACjC,OAAO,KAAK,CAAC,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;IAChC,CAAC;IACD,OAAO,KAAK,CAAC,KAAK,CAAC;AACrB,CAAC","sourcesContent":["import { LLMEvaluationEngine } from './evaluation-engine';\nimport {\n EvaluationResult,\n FieldEvaluationInput,\n EvaluationRequestV2,\n} from './types';\nimport { TestCase, ExpectedOutcomeField } from '../../types/llm-test-runner';\nimport { normalizeEvaluationParametersForField } from './field-evaluation-approach';\n\n/**\n * Service for evaluating test case responses\n */\nexport class EvaluationService {\n private engine: LLMEvaluationEngine;\n\n constructor() {\n this.engine = new LLMEvaluationEngine();\n }\n\n /**\n * Evaluates a test case response\n * @param testCase - The test case to evaluate\n * @param onResult - Callback to handle the evaluation result\n */\n async evaluateTestCase(\n testCase: TestCase,\n onResult: (result: EvaluationResult) => void,\n ): Promise<void> {\n if (!testCase.output) {\n console.warn('⚠️ No output to evaluate for test case:', testCase.id);\n return;\n }\n\n const fields: FieldEvaluationInput[] = (testCase.expectedOutcome || []).map(\n (field, index) => ({\n index,\n label: field.label,\n type: field.type,\n expectedValue: getFieldExpectedValue(field),\n evaluationParameters: normalizeEvaluationParametersForField(\n field.type,\n field.evaluationParameters,\n ),\n }),\n );\n\n const evaluationRequest: EvaluationRequestV2 = {\n testCaseId: testCase.id,\n question: testCase.question,\n actualResponse: testCase.output,\n fields,\n };\n\n await this.engine.evaluateResponse(\n evaluationRequest,\n (result: EvaluationResult) => {\n console.log('📊 Evaluation result received:', result);\n onResult(result);\n },\n );\n }\n}\n\nfunction getFieldExpectedValue(field: ExpectedOutcomeField): string {\n if (field.type === 'chips-input') {\n return field.value.join(', ');\n }\n return field.value;\n}\n"]}