llm-testrunner-components 1.0.6 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +101 -100
- package/dist/cjs/app-chips_5.cjs.entry.js +158 -0
- package/dist/cjs/app-chips_5.cjs.entry.js.map +1 -0
- package/dist/cjs/app-globals-Chb-oJtg.js +34 -0
- package/dist/cjs/app-globals-Chb-oJtg.js.map +1 -0
- package/dist/cjs/index-By1scwl6.js +25542 -0
- package/dist/cjs/index-By1scwl6.js.map +1 -0
- package/dist/cjs/index-CgmLNwZO.js +21460 -0
- package/dist/cjs/index-CgmLNwZO.js.map +1 -0
- package/dist/cjs/index.cjs.js +5 -483
- package/dist/cjs/index.cjs.js.map +1 -1
- package/dist/cjs/llm-testrunner.cjs.js +6 -4
- package/dist/cjs/llm-testrunner.cjs.js.map +1 -1
- package/dist/cjs/loader.cjs.js +5 -3
- package/dist/collection/collection-manifest.json +8 -3
- package/dist/collection/components/error-message/error-message.css +34 -0
- package/dist/collection/components/error-message/error-message.js +2 -2
- package/dist/collection/components/error-message/error-message.js.map +1 -1
- package/dist/collection/components/llm-test-runner/header/llm-test-runner-header.css +60 -0
- package/dist/collection/components/llm-test-runner/header/llm-test-runner-header.js +18 -0
- package/dist/collection/components/llm-test-runner/header/llm-test-runner-header.js.map +1 -0
- package/dist/collection/components/llm-test-runner/llm-test-runner.css +17 -657
- package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js +253 -0
- package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js.map +1 -0
- package/dist/collection/components/llm-test-runner/llm-test-runner.js +191 -200
- package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
- package/dist/collection/components/llm-test-runner/test-cases/actions/row-actions.css +28 -0
- package/dist/collection/components/llm-test-runner/test-cases/actions/row-actions.js +6 -0
- package/dist/collection/components/llm-test-runner/test-cases/actions/row-actions.js.map +1 -0
- package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.css +67 -0
- package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.js +5 -0
- package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.js.map +1 -0
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +42 -0
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +39 -0
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -0
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.css +39 -0
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +7 -0
- package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -0
- package/dist/collection/components/llm-test-runner/test-cases/output/response-output.css +51 -0
- package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js +5 -0
- package/dist/collection/components/llm-test-runner/test-cases/output/response-output.js.map +1 -0
- package/dist/collection/global/env.js +3 -1
- package/dist/collection/global/env.js.map +1 -1
- package/dist/collection/index.js.map +1 -1
- package/dist/collection/lib/evaluation/constants.js +14 -0
- package/dist/collection/lib/evaluation/constants.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluation-engine.js +45 -45
- package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
- package/dist/collection/lib/evaluation/evaluation-service.js +33 -0
- package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/bleu/bleu-evaluator.js +116 -0
- package/dist/collection/lib/evaluation/evaluators/bleu/bleu-evaluator.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/bleu/tests/bleu.test.js +352 -0
- package/dist/collection/lib/evaluation/evaluators/bleu/tests/bleu.test.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/exact/exact.js +44 -0
- package/dist/collection/lib/evaluation/evaluators/exact/exact.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/rouge1-evaluator.js +88 -0
- package/dist/collection/lib/evaluation/evaluators/rouge1-evaluator.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.js +82 -0
- package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.test.js +326 -0
- package/dist/collection/lib/evaluation/evaluators/rougeL-evaluator.test.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js +69 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/SemanticEvaluator.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/evaluate-keywords.js +56 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/evaluate-keywords.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/index.js +7 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/index.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/model-loader.js +19 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/model-loader.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/similarity-utils.js +16 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/similarity-utils.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.js +65 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.js.map +1 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/text-utils.js +5 -0
- package/dist/collection/lib/evaluation/evaluators/semantic/text-utils.js.map +1 -0
- package/dist/collection/lib/evaluation/index.js.map +1 -1
- package/dist/collection/lib/evaluation/rouge1-evaluator.test.js +117 -0
- package/dist/collection/lib/evaluation/rouge1-evaluator.test.js.map +1 -0
- package/dist/collection/lib/evaluation/types.js.map +1 -1
- package/dist/collection/lib/file/file-download.js +18 -0
- package/dist/collection/lib/file/file-download.js.map +1 -0
- package/dist/collection/lib/file/file-reader.js +14 -0
- package/dist/collection/lib/file/file-reader.js.map +1 -0
- package/dist/collection/lib/form/components/app-chips.css +97 -0
- package/dist/collection/lib/form/components/app-chips.js +155 -0
- package/dist/collection/lib/form/components/app-chips.js.map +1 -0
- package/dist/collection/lib/form/components/app-select.css +28 -0
- package/dist/collection/lib/form/components/app-select.js +101 -0
- package/dist/collection/lib/form/components/app-select.js.map +1 -0
- package/dist/collection/lib/form/components/app-textarea.css +38 -0
- package/dist/collection/lib/form/components/app-textarea.js +126 -0
- package/dist/collection/lib/form/components/app-textarea.js.map +1 -0
- package/dist/collection/lib/form/form-builder.js +171 -0
- package/dist/collection/lib/form/form-builder.js.map +1 -0
- package/dist/collection/lib/form/schema/base-input-field-config.js +2 -0
- package/dist/collection/lib/form/schema/base-input-field-config.js.map +1 -0
- package/dist/collection/lib/form/schema/form-control-config.js +2 -0
- package/dist/collection/lib/form/schema/form-control-config.js.map +1 -0
- package/dist/collection/lib/form/schema/index.js +8 -0
- package/dist/collection/lib/form/schema/index.js.map +1 -0
- package/dist/collection/lib/import-export/test-results-csv.js +65 -0
- package/dist/collection/lib/import-export/test-results-csv.js.map +1 -0
- package/dist/collection/lib/import-export/test-suite-exporter.js +15 -0
- package/dist/collection/lib/import-export/test-suite-exporter.js.map +1 -0
- package/dist/collection/lib/import-export/test-suite-importer.js +44 -0
- package/dist/collection/lib/import-export/test-suite-importer.js.map +1 -0
- package/dist/collection/lib/rate-limited-fetcher/rate-limited-fetcher.js +6 -6
- package/dist/collection/lib/rate-limited-fetcher/rate-limited-fetcher.js.map +1 -1
- package/dist/collection/lib/test-cases/test-case-factory.js +56 -0
- package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -0
- package/dist/collection/lib/test-cases/test-case-mutations.js +16 -0
- package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -0
- package/dist/collection/lib/ui/button/button.css +113 -0
- package/dist/collection/lib/ui/button/button.js +21 -0
- package/dist/collection/lib/ui/button/button.js.map +1 -0
- package/dist/collection/lib/ui/button/index.js +2 -0
- package/dist/collection/lib/ui/button/index.js.map +1 -0
- package/dist/collection/lib/ui/icon-button/icon-button.css +77 -0
- package/dist/collection/lib/ui/icon-button/icon-button.js +19 -0
- package/dist/collection/lib/ui/icon-button/icon-button.js.map +1 -0
- package/dist/collection/lib/ui/icon-button/index.js +2 -0
- package/dist/collection/lib/ui/icon-button/index.js.map +1 -0
- package/dist/collection/services/adapters.js +2 -0
- package/dist/collection/services/adapters.js.map +1 -0
- package/dist/collection/services/models/gemini.js +17 -0
- package/dist/collection/services/models/gemini.js.map +1 -0
- package/dist/collection/styles/tokens.css +180 -0
- package/dist/collection/types/evaluation.js +2 -0
- package/dist/collection/types/evaluation.js.map +1 -0
- package/dist/collection/types/llm-test-runner.js +2 -0
- package/dist/collection/types/llm-test-runner.js.map +1 -0
- package/dist/components/app-chips.d.ts +11 -0
- package/dist/components/app-chips.js +2 -0
- package/dist/components/app-chips.js.map +1 -0
- package/dist/components/app-select.d.ts +11 -0
- package/dist/components/app-select.js +2 -0
- package/dist/components/app-select.js.map +1 -0
- package/dist/components/app-textarea.d.ts +11 -0
- package/dist/components/app-textarea.js +2 -0
- package/dist/components/app-textarea.js.map +1 -0
- package/dist/components/form-builder.d.ts +11 -0
- package/dist/components/form-builder.js +2 -0
- package/dist/components/form-builder.js.map +1 -0
- package/dist/components/index.d.ts +2 -0
- package/dist/components/index.js +1 -13
- package/dist/components/index.js.map +1 -1
- package/dist/components/llm-test-runner.js +1 -8
- package/dist/components/llm-test-runner.js.map +1 -1
- package/dist/components/p--2rdv_J9.js +2 -0
- package/dist/components/p--2rdv_J9.js.map +1 -0
- package/dist/components/p-B7J48VNq.js +2 -0
- package/dist/components/p-B7J48VNq.js.map +1 -0
- package/dist/components/p-BCB1rjPS.js +7 -0
- package/dist/components/p-BCB1rjPS.js.map +1 -0
- package/dist/components/p-BQhb2H_a.js +2 -0
- package/dist/components/p-BQhb2H_a.js.map +1 -0
- package/dist/components/p-D9BrlHdP.js +297 -0
- package/dist/components/p-D9BrlHdP.js.map +1 -0
- package/dist/components/p-DtCkZ1g2.js +2 -0
- package/dist/components/p-DtCkZ1g2.js.map +1 -0
- package/dist/esm/app-chips_5.entry.js +153 -0
- package/dist/esm/app-chips_5.entry.js.map +1 -0
- package/dist/esm/app-globals-DbR5vV7d.js +32 -0
- package/dist/esm/app-globals-DbR5vV7d.js.map +1 -0
- package/dist/esm/index-Bvg6mh1M.js +25539 -0
- package/dist/esm/index-Bvg6mh1M.js.map +1 -0
- package/dist/esm/index-DxzhGhec.js +21450 -0
- package/dist/esm/index-DxzhGhec.js.map +1 -0
- package/dist/esm/index.js +4 -486
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm-testrunner.js +7 -5
- package/dist/esm/llm-testrunner.js.map +1 -1
- package/dist/esm/loader.js +6 -4
- package/dist/llm-testrunner/index.esm.js +1 -1
- package/dist/llm-testrunner/index.esm.js.map +1 -1
- package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
- package/dist/llm-testrunner/llm-testrunner.esm.js.map +1 -1
- package/dist/llm-testrunner/p-3f04b0fb.entry.js +2 -0
- package/dist/llm-testrunner/p-3f04b0fb.entry.js.map +1 -0
- package/dist/llm-testrunner/p-DFds8y01.js +7 -0
- package/dist/llm-testrunner/p-DFds8y01.js.map +1 -0
- package/dist/llm-testrunner/p-DxzhGhec.js +298 -0
- package/dist/llm-testrunner/p-DxzhGhec.js.map +1 -0
- package/dist/llm-testrunner/p-GQwFOmwJ.js +2 -0
- package/dist/llm-testrunner/p-GQwFOmwJ.js.map +1 -0
- package/dist/react/components.d.ts +32 -2
- package/dist/react/components.d.ts.map +1 -1
- package/dist/react/components.js +44 -2
- package/dist/types/components/llm-test-runner/header/llm-test-runner-header.d.ts +14 -0
- package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +13 -29
- package/dist/types/components/llm-test-runner/llm-test-runner.import-export.test.d.ts +1 -0
- package/dist/types/components/llm-test-runner/test-cases/actions/row-actions.d.ts +8 -0
- package/dist/types/components/llm-test-runner/test-cases/evaluation/evaluation-summary.d.ts +7 -0
- package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +25 -0
- package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +26 -0
- package/dist/types/components/llm-test-runner/test-cases/output/response-output.d.ts +6 -0
- package/dist/types/components.d.ts +199 -4
- package/dist/types/global/env.d.ts +2 -0
- package/dist/types/index.d.ts +1 -1
- package/dist/types/lib/evaluation/constants.d.ts +11 -0
- package/dist/types/lib/evaluation/evaluation-engine.d.ts +0 -4
- package/dist/types/lib/evaluation/evaluation-service.d.ts +15 -0
- package/dist/types/lib/evaluation/evaluators/bleu/bleu-evaluator.d.ts +18 -0
- package/dist/types/lib/evaluation/evaluators/bleu/tests/bleu.test.d.ts +1 -0
- package/dist/types/lib/evaluation/evaluators/exact/exact.d.ts +2 -0
- package/dist/types/lib/evaluation/evaluators/rouge1-evaluator.d.ts +17 -0
- package/dist/types/lib/evaluation/evaluators/rougeL-evaluator.d.ts +2 -0
- package/dist/types/lib/evaluation/evaluators/rougeL-evaluator.test.d.ts +1 -0
- package/dist/types/lib/evaluation/evaluators/semantic/SemanticEvaluator.d.ts +6 -0
- package/dist/types/lib/evaluation/evaluators/semantic/evaluate-keywords.d.ts +7 -0
- package/dist/types/lib/evaluation/evaluators/semantic/index.d.ts +2 -0
- package/dist/types/lib/evaluation/evaluators/semantic/model-loader.d.ts +1 -0
- package/dist/types/lib/evaluation/evaluators/semantic/similarity-utils.d.ts +1 -0
- package/dist/types/lib/evaluation/evaluators/semantic/tests/evaluate-keywords.test.d.ts +1 -0
- package/dist/types/lib/evaluation/evaluators/semantic/text-utils.d.ts +1 -0
- package/dist/types/lib/evaluation/index.d.ts +2 -2
- package/dist/types/lib/evaluation/rouge1-evaluator.test.d.ts +1 -0
- package/dist/types/lib/evaluation/types.d.ts +19 -7
- package/dist/types/lib/file/file-download.d.ts +7 -0
- package/dist/types/lib/file/file-reader.d.ts +6 -0
- package/dist/types/lib/form/components/app-chips.d.ts +20 -0
- package/dist/types/lib/form/components/app-select.d.ts +7 -0
- package/dist/types/lib/form/components/app-textarea.d.ts +14 -0
- package/dist/types/lib/form/form-builder.d.ts +24 -0
- package/dist/types/lib/form/schema/base-input-field-config.d.ts +37 -0
- package/dist/types/lib/form/schema/form-control-config.d.ts +13 -0
- package/dist/types/lib/form/schema/index.d.ts +9 -0
- package/dist/types/lib/import-export/test-results-csv.d.ts +13 -0
- package/dist/types/lib/import-export/test-suite-exporter.d.ts +16 -0
- package/dist/types/lib/import-export/test-suite-importer.d.ts +12 -0
- package/dist/types/lib/rate-limited-fetcher/rate-limited-fetcher.d.ts +1 -1
- package/dist/types/lib/test-cases/test-case-factory.d.ts +12 -0
- package/dist/types/lib/test-cases/test-case-mutations.d.ts +9 -0
- package/dist/types/lib/ui/button/button.d.ts +13 -0
- package/dist/types/lib/ui/button/index.d.ts +2 -0
- package/dist/types/lib/ui/icon-button/icon-button.d.ts +11 -0
- package/dist/types/lib/ui/icon-button/index.d.ts +2 -0
- package/dist/types/services/adapters.d.ts +3 -0
- package/dist/types/services/models/gemini.d.ts +11 -0
- package/dist/types/stencil-public-runtime.d.ts +110 -6
- package/dist/types/types/evaluation.d.ts +9 -0
- package/dist/types/types/llm-test-runner.d.ts +22 -0
- package/package.json +30 -6
- package/dist/cjs/app-globals-CbbEbofA.js +0 -14
- package/dist/cjs/app-globals-CbbEbofA.js.map +0 -1
- package/dist/cjs/index-D-FySkoV.js +0 -1470
- package/dist/cjs/index-D-FySkoV.js.map +0 -1
- package/dist/cjs/llm-test-runner.cjs.entry.js +0 -9
- package/dist/cjs/llm-test-runner.entry.cjs.js.map +0 -1
- package/dist/components/p-CYUbsbxt.js +0 -1770
- package/dist/components/p-CYUbsbxt.js.map +0 -1
- package/dist/esm/app-globals-BOQOUavG.js +0 -12
- package/dist/esm/app-globals-BOQOUavG.js.map +0 -1
- package/dist/esm/index-cncubhtM.js +0 -1463
- package/dist/esm/index-cncubhtM.js.map +0 -1
- package/dist/esm/llm-test-runner.entry.js +0 -3
- package/dist/esm/llm-test-runner.entry.js.map +0 -1
- package/dist/llm-testrunner/llm-test-runner.entry.esm.js.map +0 -1
- package/dist/llm-testrunner/loader.esm.js.map +0 -1
- package/dist/llm-testrunner/p-BOQOUavG.js +0 -2
- package/dist/llm-testrunner/p-BOQOUavG.js.map +0 -1
- package/dist/llm-testrunner/p-cncubhtM.js +0 -3
- package/dist/llm-testrunner/p-cncubhtM.js.map +0 -1
- package/dist/llm-testrunner/p-f68fd660.entry.js +0 -2
- package/dist/llm-testrunner/p-f68fd660.entry.js.map +0 -1
package/dist/cjs/index.cjs.js
CHANGED
|
@@ -1,489 +1,11 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var index = require('./index-
|
|
3
|
+
var index = require('./index-By1scwl6.js');
|
|
4
|
+
require('./index-CgmLNwZO.js');
|
|
4
5
|
|
|
5
|
-
class LLMEvaluationEngine {
|
|
6
|
-
constructor() { }
|
|
7
|
-
async evaluateResponse(request, callback) {
|
|
8
|
-
try {
|
|
9
|
-
console.log('🔍 Starting evaluation for test case:', request.testCaseId);
|
|
10
|
-
const result = await this.performEvaluation(request);
|
|
11
|
-
console.log('Evaluation completed for test case:', request.testCaseId);
|
|
12
|
-
console.log('Result:', result);
|
|
13
|
-
callback(result);
|
|
14
|
-
}
|
|
15
|
-
catch (error) {
|
|
16
|
-
console.error('Evaluation failed:', error);
|
|
17
|
-
const errorResult = {
|
|
18
|
-
testCaseId: request.testCaseId,
|
|
19
|
-
passed: false,
|
|
20
|
-
keywordMatches: [],
|
|
21
|
-
sourceLinkMatches: [],
|
|
22
|
-
timestamp: new Date().toISOString()
|
|
23
|
-
};
|
|
24
|
-
callback(errorResult);
|
|
25
|
-
}
|
|
26
|
-
}
|
|
27
|
-
async performEvaluation(request) {
|
|
28
|
-
const { testCaseId, expectedKeywords, expectedSourceLinks, actualResponse } = request;
|
|
29
|
-
const keywordMatches = this.evaluateKeywords(expectedKeywords, actualResponse);
|
|
30
|
-
const sourceLinkMatches = this.evaluateSourceLinks(expectedSourceLinks, actualResponse);
|
|
31
|
-
// Test passes only if ALL expected keywords and source links are found
|
|
32
|
-
const totalItems = keywordMatches.length + sourceLinkMatches.length;
|
|
33
|
-
const foundItems = keywordMatches.filter(m => m.found).length + sourceLinkMatches.filter(m => m.found).length;
|
|
34
|
-
const passed = foundItems === totalItems;
|
|
35
|
-
return {
|
|
36
|
-
testCaseId,
|
|
37
|
-
passed,
|
|
38
|
-
keywordMatches,
|
|
39
|
-
sourceLinkMatches,
|
|
40
|
-
timestamp: new Date().toISOString()
|
|
41
|
-
};
|
|
42
|
-
}
|
|
43
|
-
evaluateKeywords(expectedKeywords, actualResponse) {
|
|
44
|
-
// Case-insensitive keyword matching
|
|
45
|
-
const response = actualResponse.toLowerCase();
|
|
46
|
-
return expectedKeywords.map(keyword => {
|
|
47
|
-
const keywordToMatch = keyword.toLowerCase();
|
|
48
|
-
const found = response.includes(keywordToMatch);
|
|
49
|
-
return {
|
|
50
|
-
keyword,
|
|
51
|
-
found
|
|
52
|
-
};
|
|
53
|
-
});
|
|
54
|
-
}
|
|
55
|
-
evaluateSourceLinks(expectedSourceLinks, actualResponse) {
|
|
56
|
-
return expectedSourceLinks.map(link => {
|
|
57
|
-
const found = actualResponse.includes(link);
|
|
58
|
-
return {
|
|
59
|
-
link,
|
|
60
|
-
found
|
|
61
|
-
};
|
|
62
|
-
});
|
|
63
|
-
}
|
|
64
|
-
}
|
|
65
6
|
|
|
66
|
-
/**
|
|
67
|
-
* Convert array of 16 byte values to UUID string format of the form:
|
|
68
|
-
* XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX
|
|
69
|
-
*/
|
|
70
|
-
var byteToHex = [];
|
|
71
|
-
for (var i = 0; i < 256; ++i) {
|
|
72
|
-
byteToHex.push((i + 0x100).toString(16).slice(1));
|
|
73
|
-
}
|
|
74
|
-
function unsafeStringify(arr, offset = 0) {
|
|
75
|
-
// Note: Be careful editing this code! It's been tuned for performance
|
|
76
|
-
// and works in ways you may not expect. See https://github.com/uuidjs/uuid/pull/434
|
|
77
|
-
//
|
|
78
|
-
// Note to future-self: No, you can't remove the `toLowerCase()` call.
|
|
79
|
-
// REF: https://github.com/uuidjs/uuid/pull/677#issuecomment-1757351351
|
|
80
|
-
return (byteToHex[arr[offset + 0]] + byteToHex[arr[offset + 1]] + byteToHex[arr[offset + 2]] + byteToHex[arr[offset + 3]] + '-' + byteToHex[arr[offset + 4]] + byteToHex[arr[offset + 5]] + '-' + byteToHex[arr[offset + 6]] + byteToHex[arr[offset + 7]] + '-' + byteToHex[arr[offset + 8]] + byteToHex[arr[offset + 9]] + '-' + byteToHex[arr[offset + 10]] + byteToHex[arr[offset + 11]] + byteToHex[arr[offset + 12]] + byteToHex[arr[offset + 13]] + byteToHex[arr[offset + 14]] + byteToHex[arr[offset + 15]]).toLowerCase();
|
|
81
|
-
}
|
|
82
7
|
|
|
83
|
-
|
|
84
|
-
// require the crypto API and do not support built-in fallback to lower quality random number
|
|
85
|
-
// generators (like Math.random()).
|
|
86
|
-
|
|
87
|
-
var getRandomValues;
|
|
88
|
-
var rnds8 = new Uint8Array(16);
|
|
89
|
-
function rng() {
|
|
90
|
-
// lazy load so that environments that need to polyfill have a chance to do so
|
|
91
|
-
if (!getRandomValues) {
|
|
92
|
-
// getRandomValues needs to be invoked in a context where "this" is a Crypto implementation.
|
|
93
|
-
getRandomValues = typeof crypto !== 'undefined' && crypto.getRandomValues && crypto.getRandomValues.bind(crypto);
|
|
94
|
-
if (!getRandomValues) {
|
|
95
|
-
throw new Error('crypto.getRandomValues() not supported. See https://github.com/uuidjs/uuid#getrandomvalues-not-supported');
|
|
96
|
-
}
|
|
97
|
-
}
|
|
98
|
-
return getRandomValues(rnds8);
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
var randomUUID = typeof crypto !== 'undefined' && crypto.randomUUID && crypto.randomUUID.bind(crypto);
|
|
102
|
-
var native = {
|
|
103
|
-
randomUUID
|
|
104
|
-
};
|
|
105
|
-
|
|
106
|
-
function v4(options, buf, offset) {
|
|
107
|
-
if (native.randomUUID && true && !options) {
|
|
108
|
-
return native.randomUUID();
|
|
109
|
-
}
|
|
110
|
-
options = options || {};
|
|
111
|
-
var rnds = options.random || (options.rng || rng)();
|
|
112
|
-
|
|
113
|
-
// Per 4.4, set bits for version and `clock_seq_hi_and_reserved`
|
|
114
|
-
rnds[6] = rnds[6] & 0x0f | 0x40;
|
|
115
|
-
rnds[8] = rnds[8] & 0x3f | 0x80;
|
|
116
|
-
return unsafeStringify(rnds);
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
const ErrorMessage = ({ message, onClear }) => {
|
|
120
|
-
if (!message) {
|
|
121
|
-
return null;
|
|
122
|
-
}
|
|
123
|
-
return (index.h("div", { class: "error-message" }, index.h("span", null, message), onClear && (index.h("button", { class: "error-close", title: "Close", onClick: onClear }, "\u00D7"))));
|
|
124
|
-
};
|
|
125
|
-
|
|
126
|
-
class RateLimitedFetcher {
|
|
127
|
-
queue = [];
|
|
128
|
-
delay; // delay in milliseconds
|
|
129
|
-
intervalId;
|
|
130
|
-
constructor(delayMs) {
|
|
131
|
-
this.delay = delayMs;
|
|
132
|
-
}
|
|
133
|
-
startQueue() {
|
|
134
|
-
if (this.intervalId)
|
|
135
|
-
return;
|
|
136
|
-
this.intervalId = setInterval(() => {
|
|
137
|
-
const task = this.queue.shift();
|
|
138
|
-
if (task)
|
|
139
|
-
task();
|
|
140
|
-
if (this.queue.length === 0) {
|
|
141
|
-
this.stop();
|
|
142
|
-
}
|
|
143
|
-
}, this.delay);
|
|
144
|
-
}
|
|
145
|
-
schedule(task) {
|
|
146
|
-
return new Promise((resolve, reject) => {
|
|
147
|
-
this.queue.push(() => {
|
|
148
|
-
task().then(resolve).catch(reject);
|
|
149
|
-
});
|
|
150
|
-
this.startQueue();
|
|
151
|
-
});
|
|
152
|
-
}
|
|
153
|
-
stop() {
|
|
154
|
-
if (this.intervalId) {
|
|
155
|
-
clearInterval(this.intervalId);
|
|
156
|
-
this.intervalId = undefined;
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
async runAll(tasks) {
|
|
160
|
-
const promises = tasks.map(task => this.schedule(task));
|
|
161
|
-
return Promise.all(promises);
|
|
162
|
-
}
|
|
163
|
-
}
|
|
164
|
-
|
|
165
|
-
const llmTestRunnerCss = ":host {\n display: block;\n font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;\n background-color: #f8fafc;\n min-height: 100vh;\n}\n\n.test-runner-container {\n padding: 20px;\n background: white;\n border-radius: 8px;\n box-shadow: 0 2px 4px rgba(0,0,0,0.1);\n margin: 20px 0;\n}\n\n.simple-test {\n margin: 20px 0;\n padding: 20px;\n border: 1px solid #ddd;\n border-radius: 4px;\n}\n\n.test-cases {\n margin: 20px 0;\n}\n\n.test-case {\n margin: 20px 0;\n padding: 20px;\n border: 1px solid #eee;\n border-radius: 4px;\n background: #f9f9f9;\n}\n\n.test-case h3 {\n margin-top: 0;\n color: #333;\n}\n\n.test-case textarea {\n width: 100%;\n padding: 10px;\n border: 1px solid #ddd;\n border-radius: 4px;\n font-family: inherit;\n margin: 10px 0;\n}\n\n.test-case button {\n background: #007bff;\n color: white;\n border: none;\n padding: 10px 20px;\n border-radius: 4px;\n cursor: pointer;\n margin: 10px 5px 10px 0;\n}\n\n.test-case button:disabled {\n background: #ccc;\n cursor: not-allowed;\n}\n\n.output, .error {\n margin: 10px 0;\n padding: 10px;\n border-radius: 4px;\n}\n\n.output {\n background: #d4edda;\n border: 1px solid #c3e6cb;\n color: #155724;\n}\n\n.error {\n background: #f8d7da;\n border: 1px solid #f5c6cb;\n color: #721c24;\n}\n\n.test-runner-container {\n max-width: 1400px;\n margin: 0 auto;\n background: white;\n box-shadow: 0 0 20px rgba(0, 0, 0, 0.1);\n}\n\n/* Header Styles */\n.test-runner-header {\n display: flex;\n justify-content: space-between;\n align-items: center;\n padding: 20px 24px;\n background: /*linear-gradient(135deg, #667eea 0%, #764ba2 100%);*/ white;\n color: white;\n box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);\n}\n\n.header-left, .header-right {\n display: flex;\n gap: 12px;\n align-items: center;\n}\n\n.header-center {\n flex: 1;\n display: flex;\n justify-content: center;\n align-items: center;\n}\n\n.api-status {\n display: flex;\n align-items: center;\n gap: 0.5rem;\n}\n\n.api-status-text {\n color: #28a745;\n font-weight: 500;\n font-size: 0.9rem;\n}\n\n/* Button Styles */\n.btn {\n display: inline-flex;\n align-items: center;\n gap: 8px;\n padding: 10px 16px;\n border: none;\n border-radius: 8px;\n font-size: 14px;\n font-weight: 500;\n cursor: pointer;\n transition: all 0.2s ease;\n text-decoration: none;\n position: relative;\n}\n\n.btn:disabled {\n opacity: 0.6;\n cursor: not-allowed;\n transform: none !important;\n}\n\n.btn-primary {\n color: black;\n box-shadow: 0 2px 4px rgba(59, 130, 246, 0.3);\n}\n\n.btn-primary:hover:not(:disabled) {\n transform: translateY(-1px);\n box-shadow: 0 4px 8px rgba(59, 130, 246, 0.4);\n}\n\n.btn-secondary {\n background: rgba(255, 255, 255, 0.2);\n color: blue;\n border: 1px solid rgba(255, 255, 255, 0.3);\n}\n\n.btn-secondary:hover:not(:disabled) {\n background: rgba(255, 255, 255, 0.3);\n transform: translateY(-1px);\n}\n\n.btn-outline {\n background: transparent;\n color: #6b7280;\n border: 2px solid #e5e7eb;\n}\n\n.btn-outline:hover {\n background: #f9fafb;\n border-color: #d1d5db;\n transform: translateY(-1px);\n}\n\n.btn-icon {\n padding: 8px;\n min-width: 40px;\n height: 40px;\n display: flex;\n align-items: center;\n justify-content: center;\n font-size: 16px;\n}\n\n.btn-run {\n color: white;\n}\n\n.btn-run:hover:not(:disabled) {\n transform: translateY(-1px);\n}\n\n.btn-delete {\n color: white;\n}\n\n.btn-delete:hover:not(:disabled) {\n transform: translateY(-1px);\n}\n\n.icon {\n font-size: 16px;\n}\n\n/* Main Content */\n.test-runner-content {\n padding: 0;\n}\n\n/* Column Headers */\n.column-headers {\n display: grid;\n grid-template-columns: 1fr 1.5fr 0.5fr 120px;\n gap: 1px;\n background: #e5e7eb;\n border-bottom: 2px solid #d1d5db;\n}\n\n.column-header {\n background: #f8fafc;\n padding: 16px 20px;\n font-weight: 600;\n color: #374151;\n font-size: 14px;\n text-transform: uppercase;\n letter-spacing: 0.05em;\n}\n\n/* Test Cases */\n.test-cases {\n background: white;\n}\n\n.test-case-row {\n display: grid;\n grid-template-columns: 1fr 1.5fr 0.5fr 120px;\n gap: 1px;\n border-bottom: 1px solid #e5e7eb;\n min-height: 200px;\n}\n\n.test-case-row:hover {\n background: #f9fafb;\n}\n\n/* Input Column */\n.input-column {\n padding: 20px;\n background: white;\n border-right: 1px solid #e5e7eb;\n}\n\n.input-group {\n margin-bottom: 16px;\n}\n\n.input-group label {\n display: block;\n margin-bottom: 8px;\n font-weight: 500;\n color: #374151;\n font-size: 14px;\n}\n\n.input-group textarea {\n width: 95%;\n padding: 12px;\n border: 2px solid #e5e7eb;\n border-radius: 8px;\n font-size: 14px;\n resize: vertical;\n outline: none;\n transition: border-color 0.2s ease;\n font-family: inherit;\n}\n\n.input-group textarea:focus {\n border-color: #3b82f6;\n box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1);\n}\n\n/* Keywords and Links */\n.keywords-group, .links-group {\n margin-bottom: 16px;\n}\n\n.keywords-group label, .links-group label {\n display: block;\n margin-bottom: 8px;\n font-weight: 500;\n color: #374151;\n font-size: 14px;\n}\n\n.tags-container, .links-container {\n display: flex;\n flex-wrap: wrap;\n gap: 8px;\n align-items: center;\n}\n\n.tag {\n display: inline-flex;\n align-items: center;\n gap: 6px;\n background: #dbeafe;\n color: #1e40af;\n padding: 4px 8px;\n border-radius: 16px;\n font-size: 12px;\n font-weight: 500;\n}\n\n.tag-remove {\n background: none;\n border: none;\n color: #1e40af;\n cursor: pointer;\n font-size: 14px;\n padding: 0;\n width: 16px;\n height: 16px;\n display: flex;\n align-items: center;\n justify-content: center;\n border-radius: 50%;\n}\n\n.tag-remove:hover {\n background: rgba(30, 64, 175, 0.1);\n}\n\n.link-item {\n display: flex;\n align-items: center;\n gap: 6px;\n background: #f0f9ff;\n padding: 4px 8px;\n border-radius: 6px;\n font-size: 12px;\n}\n\n.link-item a {\n color: #0369a1;\n text-decoration: none;\n max-width: 200px;\n overflow: hidden;\n text-overflow: ellipsis;\n white-space: nowrap;\n}\n\n.link-item a:hover {\n text-decoration: underline;\n}\n\n.link-remove {\n background: none;\n border: none;\n color: #0369a1;\n cursor: pointer;\n font-size: 12px;\n padding: 0;\n width: 16px;\n height: 16px;\n display: flex;\n align-items: center;\n justify-content: center;\n border-radius: 50%;\n}\n\n.link-remove:hover {\n background: rgba(3, 105, 161, 0.1);\n}\n\n.tags-container input, .links-container input {\n border: 1px solid #d1d5db;\n border-radius: 6px;\n padding: 6px 8px;\n font-size: 12px;\n outline: none;\n min-width: 120px;\n}\n\n.tags-container input:focus, .links-container input:focus {\n border-color: #3b82f6;\n box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.1);\n}\n\n/* Output Column */\n.output-column {\n padding: 20px;\n background: white;\n border-right: 1px solid #e5e7eb;\n display: flex;\n flex-direction: column;\n}\n\n.output-content {\n background: #f8fafc;\n border: 1px solid #e5e7eb;\n border-radius: 8px;\n padding: 16px;\n font-size: 14px;\n line-height: 1.6;\n color: #374151;\n white-space: pre-wrap;\n word-wrap: break-word;\n flex: 1;\n overflow-y: auto;\n max-height: 250px;\n overflow-x: scroll;\n}\n\n.output-placeholder {\n display: flex;\n align-items: center;\n justify-content: center;\n color: #9ca3af;\n font-style: italic;\n flex: 1;\n background: #f9fafb;\n border: 2px dashed #d1d5db;\n border-radius: 8px;\n}\n\n/* Evaluation Column */\n.evaluation-column {\n padding: 20px;\n background: white;\n border-right: 1px solid #e5e7eb;\n display: flex;\n flex-direction: column;\n}\n\n.evaluation-content {\n display: flex;\n flex-direction: column;\n gap: 12px;\n flex: 1;\n}\n\n.score-display {\n text-align: center;\n}\n\n.score-number {\n font-size: 24px;\n font-weight: 700;\n color: #111827;\n display: block;\n margin-bottom: 8px;\n}\n\n.score-bar {\n width: 100%;\n height: 8px;\n background: #e5e7eb;\n border-radius: 4px;\n overflow: hidden;\n}\n\n.score-fill {\n height: 100%;\n background: linear-gradient(90deg, #ef4444 0%, #f59e0b 50%, #10b981 100%);\n transition: width 0.3s ease;\n}\n\n.evaluation-details {\n display: flex;\n flex-direction: column;\n gap: 8px;\n}\n\n.detail-item {\n display: flex;\n align-items: center;\n gap: 8px;\n font-size: 14px;\n}\n\n.status {\n width: 20px;\n height: 20px;\n border-radius: 50%;\n display: flex;\n align-items: center;\n justify-content: center;\n font-size: 12px;\n font-weight: bold;\n}\n\n.status.pass {\n background: #dcfce7;\n color: #166534;\n}\n\n.status.fail {\n background: #fef2f2;\n color: #dc2626;\n}\n\n.evaluation-text {\n font-size: 12px;\n color: #6b7280;\n line-height: 1.4;\n background: #f9fafb;\n padding: 8px;\n border-radius: 6px;\n border: 1px solid #e5e7eb;\n}\n\n.evaluation-placeholder {\n display: flex;\n align-items: center;\n justify-content: center;\n color: #9ca3af;\n font-style: italic;\n flex: 1;\n background: #f9fafb;\n border: 2px dashed #d1d5db;\n border-radius: 8px;\n}\n\n/* New evaluation result styles */\n.evaluation-result {\n display: flex;\n flex-direction: column;\n gap: 8px;\n}\n\n.evaluation-status {\n font-weight: 600;\n font-size: 14px;\n padding: 8px 12px;\n border-radius: 4px;\n text-align: center;\n}\n\n.evaluation-status.passed {\n background: #d4edda;\n color: #155724;\n border: 1px solid #c3e6cb;\n}\n\n.evaluation-status.failed {\n background: #f8d7da;\n color: #721c24;\n border: 1px solid #f5c6cb;\n}\n\n.evaluation-score {\n font-size: 12px;\n color: #495057;\n text-align: center;\n font-weight: 500;\n}\n\n.evaluation-feedback {\n font-size: 12px;\n color: #6c757d;\n background: #f8f9fa;\n padding: 8px;\n border-radius: 4px;\n border: 1px solid #dee2e6;\n}\n\n/* Actions Column */\n.actions-column {\n padding: 20px;\n background: white;\n display: flex;\n flex-direction: column;\n gap: 12px;\n align-items: center;\n justify-content: flex-start;\n align-self: flex-start;\n}\n\n/* Add Test Case */\n.add-test-case {\n padding: 24px;\n text-align: center;\n background: #f9fafb;\n border-top: 1px solid #e5e7eb;\n}\n\n.hidden {\n display: none;\n}\n\n.error-message {\n background: #ffeaea;\n color: #b71c1c;\n border: 1px solid #f44336;\n padding: 0.75em 2.5em 0.75em 1em;\n border-radius: 4px;\n margin: 1em 0;\n position: relative;\n font-size: 1em;\n display: flex;\n align-items: center;\n gap: 1em;\n}\n\n.error-close {\n background: none;\n border: none;\n color: #b71c1c;\n font-size: 1.25em;\n font-weight: bold;\n cursor: pointer;\n position: absolute;\n right: 0.75em;\n top: 50%;\n transform: translateY(-50%);\n line-height: 1;\n padding: 0;\n}\n\n/* Responsive Design */\n@media (max-width: 1200px) {\n .test-case-row {\n grid-template-columns: 1fr;\n gap: 0;\n }\n \n .column-headers {\n display: none;\n }\n \n .input-column, .output-column, .evaluation-column, .actions-column {\n border-right: none;\n border-bottom: 1px solid #e5e7eb;\n }\n \n .actions-column {\n flex-direction: row;\n justify-content: center;\n }\n}\n\n@media (max-width: 768px) {\n .test-runner-header {\n flex-direction: column;\n gap: 16px;\n padding: 16px;\n }\n \n .header-left, .header-right {\n flex-wrap: wrap;\n justify-content: center;\n }\n \n .btn {\n font-size: 12px;\n padding: 8px 12px;\n }\n \n .input-column, .output-column, .evaluation-column, .actions-column {\n padding: 16px;\n }\n \n .test-case-row {\n min-height: auto;\n }\n}";
|
|
166
|
-
|
|
167
|
-
const LLMTestRunner = class {
|
|
168
|
-
constructor(hostRef) {
|
|
169
|
-
index.registerInstance(this, hostRef);
|
|
170
|
-
this.llmRequest = index.createEvent(this, "llmRequest");
|
|
171
|
-
}
|
|
172
|
-
llmRequest;
|
|
173
|
-
delayMs = 500;
|
|
174
|
-
testCases = [
|
|
175
|
-
{
|
|
176
|
-
id: '1',
|
|
177
|
-
question: '',
|
|
178
|
-
expectedKeywords: [],
|
|
179
|
-
expectedSourceLinks: [],
|
|
180
|
-
isRunning: false
|
|
181
|
-
}
|
|
182
|
-
];
|
|
183
|
-
isRunningAll = false;
|
|
184
|
-
error = '';
|
|
185
|
-
fileInput;
|
|
186
|
-
isExportingTestSuite = false;
|
|
187
|
-
isExportingTestResults = false;
|
|
188
|
-
evaluationEngine;
|
|
189
|
-
componentWillLoad() {
|
|
190
|
-
this.evaluationEngine = new LLMEvaluationEngine();
|
|
191
|
-
}
|
|
192
|
-
componentDidLoad() {
|
|
193
|
-
}
|
|
194
|
-
disconnectedCallback() {
|
|
195
|
-
}
|
|
196
|
-
addNewTestCase() {
|
|
197
|
-
const newTestCase = {
|
|
198
|
-
id: this.generateId(),
|
|
199
|
-
question: '',
|
|
200
|
-
expectedKeywords: [],
|
|
201
|
-
expectedSourceLinks: [],
|
|
202
|
-
isRunning: false
|
|
203
|
-
};
|
|
204
|
-
this.testCases = [...this.testCases, newTestCase];
|
|
205
|
-
}
|
|
206
|
-
updateTestCase(id, updates) {
|
|
207
|
-
this.testCases = this.testCases.map(tc => tc.id === id ? { ...tc, ...updates } : tc);
|
|
208
|
-
}
|
|
209
|
-
async runSingleTest(testCase) {
|
|
210
|
-
console.log('🚀 Starting test for question:', testCase.question);
|
|
211
|
-
const startTime = Date.now();
|
|
212
|
-
this.updateTestCase(testCase.id, { isRunning: true });
|
|
213
|
-
return new Promise((resolve, reject) => {
|
|
214
|
-
this.llmRequest.emit({
|
|
215
|
-
prompt: testCase.question,
|
|
216
|
-
resolve: async (aiResponse) => {
|
|
217
|
-
console.log('✅ AI call completed for test case:', testCase.id);
|
|
218
|
-
const endTime = Date.now();
|
|
219
|
-
const responseTime = endTime - startTime;
|
|
220
|
-
this.updateTestCase(testCase.id, {
|
|
221
|
-
isRunning: false,
|
|
222
|
-
output: aiResponse,
|
|
223
|
-
error: null,
|
|
224
|
-
responseTime: responseTime
|
|
225
|
-
});
|
|
226
|
-
await this.evaluateResponse({
|
|
227
|
-
...testCase,
|
|
228
|
-
output: aiResponse,
|
|
229
|
-
responseTime: responseTime
|
|
230
|
-
});
|
|
231
|
-
resolve();
|
|
232
|
-
},
|
|
233
|
-
reject: (error) => {
|
|
234
|
-
console.error('❌ Error in runSingleTest:', error);
|
|
235
|
-
this.updateTestCase(testCase.id, {
|
|
236
|
-
isRunning: false,
|
|
237
|
-
output: null,
|
|
238
|
-
error: error instanceof Error ? error.message : 'Unknown error'
|
|
239
|
-
});
|
|
240
|
-
reject(error);
|
|
241
|
-
}
|
|
242
|
-
});
|
|
243
|
-
});
|
|
244
|
-
}
|
|
245
|
-
deleteTestCase(id) {
|
|
246
|
-
this.testCases = this.testCases.filter(tc => tc.id !== id);
|
|
247
|
-
}
|
|
248
|
-
addKeyword(testCaseId, keyword) {
|
|
249
|
-
if (keyword.trim()) {
|
|
250
|
-
const testCase = this.testCases.find(tc => tc.id === testCaseId);
|
|
251
|
-
if (testCase) {
|
|
252
|
-
this.updateTestCase(testCaseId, {
|
|
253
|
-
expectedKeywords: [...testCase.expectedKeywords, keyword.trim()]
|
|
254
|
-
});
|
|
255
|
-
}
|
|
256
|
-
}
|
|
257
|
-
}
|
|
258
|
-
removeKeyword(testCaseId, index) {
|
|
259
|
-
const testCase = this.testCases.find(tc => tc.id === testCaseId);
|
|
260
|
-
if (testCase) {
|
|
261
|
-
const newKeywords = testCase.expectedKeywords.filter((_, i) => i !== index);
|
|
262
|
-
this.updateTestCase(testCaseId, { expectedKeywords: newKeywords });
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
|
-
addSourceLink(testCaseId, link) {
|
|
266
|
-
if (link.trim()) {
|
|
267
|
-
const testCase = this.testCases.find(tc => tc.id === testCaseId);
|
|
268
|
-
if (testCase) {
|
|
269
|
-
this.updateTestCase(testCaseId, {
|
|
270
|
-
expectedSourceLinks: [...testCase.expectedSourceLinks, link.trim()]
|
|
271
|
-
});
|
|
272
|
-
}
|
|
273
|
-
}
|
|
274
|
-
}
|
|
275
|
-
removeSourceLink(testCaseId, index) {
|
|
276
|
-
const testCase = this.testCases.find(tc => tc.id === testCaseId);
|
|
277
|
-
if (testCase) {
|
|
278
|
-
const newLinks = testCase.expectedSourceLinks.filter((_, i) => i !== index);
|
|
279
|
-
this.updateTestCase(testCaseId, { expectedSourceLinks: newLinks });
|
|
280
|
-
}
|
|
281
|
-
}
|
|
282
|
-
async evaluateResponse(testCase) {
|
|
283
|
-
if (!testCase.output) {
|
|
284
|
-
console.warn('⚠️ No output to evaluate for test case:', testCase.id);
|
|
285
|
-
return;
|
|
286
|
-
}
|
|
287
|
-
const evaluationRequest = {
|
|
288
|
-
testCaseId: testCase.id,
|
|
289
|
-
question: testCase.question,
|
|
290
|
-
expectedKeywords: testCase.expectedKeywords,
|
|
291
|
-
expectedSourceLinks: testCase.expectedSourceLinks,
|
|
292
|
-
actualResponse: testCase.output
|
|
293
|
-
};
|
|
294
|
-
await this.evaluationEngine.evaluateResponse(evaluationRequest, (result) => {
|
|
295
|
-
console.log('📊 Evaluation result received:', result);
|
|
296
|
-
this.updateTestCase(testCase.id, {
|
|
297
|
-
evaluationResult: result
|
|
298
|
-
});
|
|
299
|
-
});
|
|
300
|
-
}
|
|
301
|
-
async runAllTests() {
|
|
302
|
-
this.isRunningAll = true;
|
|
303
|
-
const tasks = [];
|
|
304
|
-
for (const testCase of this.testCases) {
|
|
305
|
-
if (!testCase.isRunning && testCase.question.trim()) {
|
|
306
|
-
tasks.push(() => this.runSingleTest(testCase).catch(err => {
|
|
307
|
-
console.error(`⚠️ Test case ${testCase.id} failed`, err);
|
|
308
|
-
}));
|
|
309
|
-
}
|
|
310
|
-
}
|
|
311
|
-
try {
|
|
312
|
-
const fetcher = new RateLimitedFetcher(this.delayMs);
|
|
313
|
-
await fetcher.runAll(tasks);
|
|
314
|
-
}
|
|
315
|
-
catch (err) {
|
|
316
|
-
console.error('⚠️ Error running all tests:', err);
|
|
317
|
-
}
|
|
318
|
-
this.isRunningAll = false;
|
|
319
|
-
}
|
|
320
|
-
generateId() {
|
|
321
|
-
return v4();
|
|
322
|
-
}
|
|
323
|
-
handleFileSelect() {
|
|
324
|
-
this.fileInput.click();
|
|
325
|
-
}
|
|
326
|
-
async handleFileChange(event) {
|
|
327
|
-
const target = event.target;
|
|
328
|
-
const file = target.files?.[0];
|
|
329
|
-
// Immediately clear the input value to allow for a new upload.
|
|
330
|
-
target.value = '';
|
|
331
|
-
if (!file) {
|
|
332
|
-
this.error = 'No file selected.';
|
|
333
|
-
return;
|
|
334
|
-
}
|
|
335
|
-
const isJsonType = file.type === 'application/json';
|
|
336
|
-
const isJsonExtension = file.name.toLowerCase().endsWith('.json');
|
|
337
|
-
if (!isJsonType && !isJsonExtension) {
|
|
338
|
-
this.error = 'Invalid file type. Please select a JSON file.';
|
|
339
|
-
return;
|
|
340
|
-
}
|
|
341
|
-
this.error = '';
|
|
342
|
-
try {
|
|
343
|
-
const content = await this.readFileAsync(file);
|
|
344
|
-
const fileContent = JSON.parse(content);
|
|
345
|
-
if (!Array.isArray(fileContent)) {
|
|
346
|
-
throw new Error("Invalid JSON structure. Expected a JSON array.");
|
|
347
|
-
}
|
|
348
|
-
const importedTestCases = fileContent.map((item) => ({
|
|
349
|
-
id: this.generateId(),
|
|
350
|
-
question: item.question || '',
|
|
351
|
-
expectedKeywords: Array.isArray(item.expectedKeywords) ? item.expectedKeywords : [],
|
|
352
|
-
expectedSourceLinks: Array.isArray(item.expectedSourceLinks) ? item.expectedSourceLinks : [],
|
|
353
|
-
isRunning: false
|
|
354
|
-
}));
|
|
355
|
-
this.testCases = importedTestCases;
|
|
356
|
-
}
|
|
357
|
-
catch (err) {
|
|
358
|
-
this.error = err?.message || 'Error processing file. Please ensure it is a valid JSON array.';
|
|
359
|
-
console.error('File Processing Error:', err);
|
|
360
|
-
}
|
|
361
|
-
}
|
|
362
|
-
readFileAsync(file) {
|
|
363
|
-
return new Promise((resolve, reject) => {
|
|
364
|
-
const reader = new FileReader();
|
|
365
|
-
reader.onload = () => resolve(reader.result);
|
|
366
|
-
reader.onerror = () => reject(reader.error);
|
|
367
|
-
reader.readAsText(file);
|
|
368
|
-
});
|
|
369
|
-
}
|
|
370
|
-
downloadFile(content, filename, mimeType) {
|
|
371
|
-
const blob = new Blob([content], { type: mimeType });
|
|
372
|
-
const url = URL.createObjectURL(blob);
|
|
373
|
-
const link = document.createElement('a');
|
|
374
|
-
link.href = url;
|
|
375
|
-
link.download = filename;
|
|
376
|
-
document.body.appendChild(link);
|
|
377
|
-
link.click();
|
|
378
|
-
document.body.removeChild(link);
|
|
379
|
-
URL.revokeObjectURL(url);
|
|
380
|
-
}
|
|
381
|
-
async handleExportTestSuite() {
|
|
382
|
-
this.isExportingTestSuite = true;
|
|
383
|
-
try {
|
|
384
|
-
// Exporting only input data (question, expected keywords, expected source links)
|
|
385
|
-
const exportData = this.testCases.map(testCase => ({
|
|
386
|
-
id: testCase.id,
|
|
387
|
-
question: testCase.question,
|
|
388
|
-
expectedKeywords: testCase.expectedKeywords,
|
|
389
|
-
expectedSourceLinks: testCase.expectedSourceLinks
|
|
390
|
-
}));
|
|
391
|
-
const jsonContent = JSON.stringify(exportData, null, 2);
|
|
392
|
-
// Added a small delay to show the loading state
|
|
393
|
-
await new Promise(resolve => setTimeout(resolve, 500));
|
|
394
|
-
this.downloadFile(jsonContent, 'test-suite.json', 'application/json');
|
|
395
|
-
}
|
|
396
|
-
finally {
|
|
397
|
-
this.isExportingTestSuite = false;
|
|
398
|
-
}
|
|
399
|
-
}
|
|
400
|
-
async handleExportTestResults() {
|
|
401
|
-
this.isExportingTestResults = true;
|
|
402
|
-
try {
|
|
403
|
-
// Create CSV content with the required fields
|
|
404
|
-
const csvRows = [];
|
|
405
|
-
// Add header row
|
|
406
|
-
const headers = [
|
|
407
|
-
'Question',
|
|
408
|
-
'Expected Keywords',
|
|
409
|
-
'Generated Keywords',
|
|
410
|
-
'Keywords Match',
|
|
411
|
-
'Expected Source Links',
|
|
412
|
-
'Generated Source Links',
|
|
413
|
-
'Source Links Match',
|
|
414
|
-
'Response Time (s)'
|
|
415
|
-
];
|
|
416
|
-
csvRows.push(headers.join(','));
|
|
417
|
-
// Add data rows
|
|
418
|
-
this.testCases.forEach(testCase => {
|
|
419
|
-
const expectedKeywords = testCase.expectedKeywords.join('; ');
|
|
420
|
-
const expectedSourceLinks = testCase.expectedSourceLinks.join('; ');
|
|
421
|
-
let generatedKeywords = '';
|
|
422
|
-
let generatedSourceLinks = '';
|
|
423
|
-
let keywordsMatch = '';
|
|
424
|
-
let sourceLinksMatch = '';
|
|
425
|
-
if (testCase.evaluationResult) {
|
|
426
|
-
const foundKeywords = testCase.evaluationResult.keywordMatches
|
|
427
|
-
.filter(match => match.found)
|
|
428
|
-
.map(match => match.keyword);
|
|
429
|
-
const foundSourceLinks = testCase.evaluationResult.sourceLinkMatches
|
|
430
|
-
.filter(match => match.found)
|
|
431
|
-
.map(match => match.link);
|
|
432
|
-
generatedKeywords = foundKeywords.join('; ');
|
|
433
|
-
generatedSourceLinks = foundSourceLinks.join('; ');
|
|
434
|
-
// Calculate match percentages
|
|
435
|
-
const keywordMatchCount = testCase.evaluationResult.keywordMatches.filter(m => m.found).length;
|
|
436
|
-
const totalKeywords = testCase.evaluationResult.keywordMatches.length;
|
|
437
|
-
const sourceLinkMatchCount = testCase.evaluationResult.sourceLinkMatches.filter(m => m.found).length;
|
|
438
|
-
const totalSourceLinks = testCase.evaluationResult.sourceLinkMatches.length;
|
|
439
|
-
keywordsMatch = totalKeywords > 0 ? `${keywordMatchCount}/${totalKeywords}` : 'N/A';
|
|
440
|
-
sourceLinksMatch = totalSourceLinks > 0 ? `${sourceLinkMatchCount}/${totalSourceLinks}` : 'N/A';
|
|
441
|
-
}
|
|
442
|
-
const responseTime = testCase.responseTime ? (testCase.responseTime / 1000).toFixed(3) : 'N/A';
|
|
443
|
-
const row = [
|
|
444
|
-
this.escapeCsvField(testCase.question),
|
|
445
|
-
this.escapeCsvField(expectedKeywords),
|
|
446
|
-
this.escapeCsvField(generatedKeywords),
|
|
447
|
-
keywordsMatch,
|
|
448
|
-
this.escapeCsvField(expectedSourceLinks),
|
|
449
|
-
this.escapeCsvField(generatedSourceLinks),
|
|
450
|
-
sourceLinksMatch,
|
|
451
|
-
responseTime
|
|
452
|
-
];
|
|
453
|
-
csvRows.push(row.join(','));
|
|
454
|
-
});
|
|
455
|
-
const csvContent = csvRows.join('\n');
|
|
456
|
-
// Added a small delay to show the loading state
|
|
457
|
-
await new Promise(resolve => setTimeout(resolve, 500));
|
|
458
|
-
this.downloadFile(csvContent, 'test-results.csv', 'text/csv');
|
|
459
|
-
}
|
|
460
|
-
finally {
|
|
461
|
-
this.isExportingTestResults = false;
|
|
462
|
-
}
|
|
463
|
-
}
|
|
464
|
-
escapeCsvField(field) {
|
|
465
|
-
if (field.includes(',') || field.includes('"') || field.includes('\n')) {
|
|
466
|
-
return `"${field.replace(/"/g, '""')}"`;
|
|
467
|
-
}
|
|
468
|
-
return field;
|
|
469
|
-
}
|
|
470
|
-
render() {
|
|
471
|
-
return (index.h("div", { key: 'beb2ab78108fede00c5d759d1ac5c98ae7f037d0', class: "test-runner-container" }, index.h("header", { key: 'd09b424069227500ffffcab8724b48d91705baab', class: "test-runner-header" }, index.h("div", { key: '368ef117a1c2cc0be0446e2d373e38e0af1c2040', class: "header-left" }, index.h("input", { key: '74346a634b36e9d04f75817495834983d970306c', class: "hidden", type: "file", ref: (el) => (this.fileInput = el), onChange: (e) => this.handleFileChange(e), accept: ".json,application/json" }), index.h("button", { key: 'e095ff9de105cb281e60f70de02fb0de8a0f6478', class: "btn btn-secondary", onClick: () => this.handleFileSelect() }, index.h("span", { key: '6f828f297867ae54baf1f89c9dbce611eba2056a', class: "icon" }, "\u2191"), "Import Test Suite"), index.h("button", { key: '5682dc8f70edac9fdf33b394677a6859555c039b', class: "btn btn-secondary", onClick: () => this.handleExportTestSuite(), disabled: this.isExportingTestSuite }, index.h("span", { key: 'a1e8d3170ce39e499160e578e3e02d39cd0da9d7', class: "icon" }, this.isExportingTestSuite ? '⏳' : '↓'), this.isExportingTestSuite ? 'Exporting...' : 'Export Test Suite')), index.h("div", { key: 'af96ea230b04f4f6ce8e3488a1f8f7f2a01e74c0', class: "header-right" }, index.h("button", { key: 'a90bfb3421f5e2e5112b3642ddf81cf19372e0ad', class: "btn btn-secondary" }, index.h("span", { key: '79611828e5dc1ad53998a74fca10a1a05c5d2a4e', class: "icon" }, "\u2699\uFE0F"), "Prompt Editor"), index.h("button", { key: '451367ce26183a58add0902e1de2d29b4933add4', class: "btn btn-secondary", onClick: () => this.handleExportTestResults(), disabled: this.isExportingTestResults }, index.h("span", { key: '11f791f2071b58e0800d14ac7e6bebba67421449', class: "icon" }, this.isExportingTestResults ? '⏳' : '↓'), this.isExportingTestResults ? 'Exporting...' : 'Export Test Results'), index.h("button", { key: '3cbf7626ccc5f171cdc6fd7ce3750ea3786218aa', class: "btn btn-primary", onClick: () => this.runAllTests(), disabled: this.isRunningAll }, this.isRunningAll ? 'Running...' : 'Run All'))), index.h(ErrorMessage, { key: 'ffdd72a1db1f45c5db158d2df0be64dc978fa930', message: this.error, onClear: () => (this.error = '') }), index.h("div", { key: '64edd5c84c4b566f90d1860f6d6bf5aeafb7ae0f', class: "test-runner-content" }, index.h("div", { key: '4be27d07a13d88611b5fc54b05e826c790fd2145', class: "column-headers" }, index.h("div", { key: 'eeb9ace5e63b80359d2decb1b42461f77cfe98cf', class: "column-header" }, "Input"), index.h("div", { key: 'c70cc5af0aaca2636eee298dfaa4da81799df6cf', class: "column-header" }, "Output"), index.h("div", { key: '9ca69fddafe7267bf5b8b12601e2cf64d65a58e1', class: "column-header" }, "Evaluation"), index.h("div", { key: 'b800841828c22a6aad0345e58b47f71469594792', class: "column-header" }, "Actions")), index.h("div", { key: 'da186e25c2111474f65b89a017158bffdbc83e03', class: "test-cases" }, this.testCases.map((testCase) => (index.h("div", { class: "test-case-row", key: testCase.id }, index.h("div", { class: "input-column" }, index.h("div", { class: "input-group" }, index.h("label", null, "Question"), index.h("textarea", { value: testCase.question, onInput: (e) => this.updateTestCase(testCase.id, {
|
|
472
|
-
question: e.target.value
|
|
473
|
-
}), placeholder: "Enter your question here...", rows: 3 })), index.h("div", { class: "keywords-group" }, index.h("label", null, "Expected keywords"), index.h("div", { class: "tags-container" }, testCase.expectedKeywords.map((keyword, index$1) => (index.h("span", { class: "tag", key: index$1 }, keyword, index.h("button", { class: "tag-remove", onClick: () => this.removeKeyword(testCase.id, index$1) }, "\u00D7")))), index.h("input", { type: "text", placeholder: "New item...", onKeyDown: (e) => {
|
|
474
|
-
if (e.key === 'Enter') {
|
|
475
|
-
this.addKeyword(testCase.id, e.target.value);
|
|
476
|
-
e.target.value = '';
|
|
477
|
-
}
|
|
478
|
-
} }))), index.h("div", { class: "links-group" }, index.h("label", null, "Expected source links"), index.h("div", { class: "links-container" }, testCase.expectedSourceLinks.map((link, index$1) => (index.h("div", { class: "link-item", key: index$1 }, index.h("a", { href: link, target: "_blank", rel: "noopener noreferrer" }, link), index.h("button", { class: "link-remove", onClick: () => this.removeSourceLink(testCase.id, index$1) }, "\u00D7")))), index.h("input", { type: "url", placeholder: "New item...", onKeyDown: (e) => {
|
|
479
|
-
if (e.key === 'Enter') {
|
|
480
|
-
this.addSourceLink(testCase.id, e.target.value);
|
|
481
|
-
e.target.value = '';
|
|
482
|
-
}
|
|
483
|
-
} })))), index.h("div", { class: "output-column" }, testCase.output ? (index.h("div", { class: "output-content" }, testCase.output)) : (index.h("div", { class: "output-placeholder" }, testCase.isRunning ? 'Running...' : ''))), index.h("div", { class: "evaluation-column" }, testCase.evaluationResult ? (index.h("div", { class: "evaluation-result" }, index.h("div", { class: `evaluation-status ${testCase.evaluationResult.passed ? 'passed' : 'failed'}` }, testCase.evaluationResult.passed ? '✅ PASSED' : '❌ FAILED'), index.h("div", { class: "evaluation-details" }, "Keywords: ", testCase.evaluationResult.keywordMatches.filter(m => m.found).length, "/", testCase.evaluationResult.keywordMatches.length, " found"))) : (index.h("div", { class: "evaluation-placeholder" }, testCase.isRunning ? 'Evaluating...' : ''))), index.h("div", { class: "actions-column" }, index.h("button", { class: "btn btn-icon btn-run", onClick: () => this.runSingleTest(testCase), disabled: testCase.isRunning || !testCase.question.trim(), title: !testCase.question.trim() ? "Enter a question first" : "Run this test" }, testCase.isRunning ? '⏳' : '▶️'), index.h("button", { class: "btn btn-icon btn-delete", onClick: () => this.deleteTestCase(testCase.id), title: "Delete this test" }, "\uD83D\uDDD1\uFE0F")))))), index.h("div", { key: '2d4446b15a31adfbaf88b3d46fcbb3cf314f8861', class: "add-test-case" }, index.h("button", { key: '737e9d856db0f9090923bef718bb9bfde974b950', class: "btn btn-outline", onClick: () => this.addNewTestCase() }, "+ Add Question")))));
|
|
484
|
-
}
|
|
485
|
-
};
|
|
486
|
-
LLMTestRunner.style = llmTestRunnerCss;
|
|
487
|
-
|
|
488
|
-
exports.LLMTestRunner = LLMTestRunner;
|
|
8
|
+
exports.LLMTestRunner = index.LLMTestRunner;
|
|
489
9
|
//# sourceMappingURL=index.cjs.js.map
|
|
10
|
+
|
|
11
|
+
//# sourceMappingURL=index.cjs.js.map
|