llm-testrunner-components 1.0.4 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +113 -10
- package/dist/cjs/app-globals-CbbEbofA.js +14 -0
- package/dist/cjs/app-globals-CbbEbofA.js.map +1 -0
- package/dist/cjs/{index-CY2lQip_.js → index-D-FySkoV.js} +25 -5
- package/dist/cjs/index-D-FySkoV.js.map +1 -0
- package/dist/cjs/index.cjs.js +159 -66
- package/dist/cjs/index.cjs.js.map +1 -1
- package/dist/cjs/llm-test-runner.cjs.entry.js +1 -1
- package/dist/cjs/llm-testrunner.cjs.js +4 -4
- package/dist/cjs/llm-testrunner.cjs.js.map +1 -1
- package/dist/cjs/loader.cjs.js +3 -3
- package/dist/collection/collection-manifest.json +3 -2
- package/dist/collection/components/llm-test-runner/llm-test-runner.js +152 -72
- package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
- package/dist/collection/global/env.js +6 -0
- package/dist/collection/global/env.js.map +1 -0
- package/dist/collection/lib/rate-limited-fetcher/rate-limited-fetcher.js +39 -0
- package/dist/collection/lib/rate-limited-fetcher/rate-limited-fetcher.js.map +1 -0
- package/dist/components/index.js +6 -1650
- package/dist/components/index.js.map +1 -1
- package/dist/components/llm-test-runner.js +1 -1
- package/dist/components/p-CYUbsbxt.js +1770 -0
- package/dist/components/p-CYUbsbxt.js.map +1 -0
- package/dist/esm/app-globals-BOQOUavG.js +12 -0
- package/dist/esm/app-globals-BOQOUavG.js.map +1 -0
- package/dist/esm/{index-DBp-rMPb.js → index-cncubhtM.js} +25 -6
- package/dist/esm/index-cncubhtM.js.map +1 -0
- package/dist/esm/index.js +159 -66
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm-test-runner.entry.js +1 -1
- package/dist/esm/llm-testrunner.js +5 -5
- package/dist/esm/llm-testrunner.js.map +1 -1
- package/dist/esm/loader.js +4 -4
- package/dist/llm-testrunner/index.esm.js +1 -1
- package/dist/llm-testrunner/index.esm.js.map +1 -1
- package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
- package/dist/llm-testrunner/llm-testrunner.esm.js.map +1 -1
- package/dist/llm-testrunner/p-BOQOUavG.js +2 -0
- package/dist/llm-testrunner/p-BOQOUavG.js.map +1 -0
- package/dist/llm-testrunner/p-cncubhtM.js +3 -0
- package/dist/llm-testrunner/p-cncubhtM.js.map +1 -0
- package/dist/llm-testrunner/p-f68fd660.entry.js +2 -0
- package/dist/react/components.d.ts +6 -3
- package/dist/react/components.d.ts.map +1 -1
- package/dist/react/components.js +2 -2
- package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +13 -4
- package/dist/types/components.d.ts +26 -2
- package/dist/types/global/env.d.ts +8 -0
- package/dist/types/lib/rate-limited-fetcher/rate-limited-fetcher.d.ts +10 -0
- package/dist/types/stencil-public-runtime.d.ts +19 -10
- package/package.json +3 -1
- package/dist/cjs/app-globals-V2Kpy_OQ.js +0 -8
- package/dist/cjs/app-globals-V2Kpy_OQ.js.map +0 -1
- package/dist/cjs/index-CY2lQip_.js.map +0 -1
- package/dist/esm/app-globals-DQuL1Twl.js +0 -6
- package/dist/esm/app-globals-DQuL1Twl.js.map +0 -1
- package/dist/esm/index-DBp-rMPb.js.map +0 -1
- package/dist/llm-testrunner/p-DBp-rMPb.js +0 -3
- package/dist/llm-testrunner/p-DBp-rMPb.js.map +0 -1
- package/dist/llm-testrunner/p-DQuL1Twl.js +0 -2
- package/dist/llm-testrunner/p-DQuL1Twl.js.map +0 -1
- package/dist/llm-testrunner/p-ed2ea423.entry.js +0 -2
- /package/dist/llm-testrunner/{p-ed2ea423.entry.js.map → p-f68fd660.entry.js.map} +0 -0
package/dist/cjs/index.cjs.js
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
'use strict';
|
|
2
2
|
|
|
3
|
-
var index = require('./index-
|
|
3
|
+
var index = require('./index-D-FySkoV.js');
|
|
4
4
|
|
|
5
5
|
class LLMEvaluationEngine {
|
|
6
6
|
constructor() { }
|
|
@@ -123,13 +123,54 @@ const ErrorMessage = ({ message, onClear }) => {
|
|
|
123
123
|
return (index.h("div", { class: "error-message" }, index.h("span", null, message), onClear && (index.h("button", { class: "error-close", title: "Close", onClick: onClear }, "\u00D7"))));
|
|
124
124
|
};
|
|
125
125
|
|
|
126
|
+
class RateLimitedFetcher {
|
|
127
|
+
queue = [];
|
|
128
|
+
delay; // delay in milliseconds
|
|
129
|
+
intervalId;
|
|
130
|
+
constructor(delayMs) {
|
|
131
|
+
this.delay = delayMs;
|
|
132
|
+
}
|
|
133
|
+
startQueue() {
|
|
134
|
+
if (this.intervalId)
|
|
135
|
+
return;
|
|
136
|
+
this.intervalId = setInterval(() => {
|
|
137
|
+
const task = this.queue.shift();
|
|
138
|
+
if (task)
|
|
139
|
+
task();
|
|
140
|
+
if (this.queue.length === 0) {
|
|
141
|
+
this.stop();
|
|
142
|
+
}
|
|
143
|
+
}, this.delay);
|
|
144
|
+
}
|
|
145
|
+
schedule(task) {
|
|
146
|
+
return new Promise((resolve, reject) => {
|
|
147
|
+
this.queue.push(() => {
|
|
148
|
+
task().then(resolve).catch(reject);
|
|
149
|
+
});
|
|
150
|
+
this.startQueue();
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
stop() {
|
|
154
|
+
if (this.intervalId) {
|
|
155
|
+
clearInterval(this.intervalId);
|
|
156
|
+
this.intervalId = undefined;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
async runAll(tasks) {
|
|
160
|
+
const promises = tasks.map(task => this.schedule(task));
|
|
161
|
+
return Promise.all(promises);
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
126
165
|
const llmTestRunnerCss = ":host {\n display: block;\n font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;\n background-color: #f8fafc;\n min-height: 100vh;\n}\n\n.test-runner-container {\n padding: 20px;\n background: white;\n border-radius: 8px;\n box-shadow: 0 2px 4px rgba(0,0,0,0.1);\n margin: 20px 0;\n}\n\n.simple-test {\n margin: 20px 0;\n padding: 20px;\n border: 1px solid #ddd;\n border-radius: 4px;\n}\n\n.test-cases {\n margin: 20px 0;\n}\n\n.test-case {\n margin: 20px 0;\n padding: 20px;\n border: 1px solid #eee;\n border-radius: 4px;\n background: #f9f9f9;\n}\n\n.test-case h3 {\n margin-top: 0;\n color: #333;\n}\n\n.test-case textarea {\n width: 100%;\n padding: 10px;\n border: 1px solid #ddd;\n border-radius: 4px;\n font-family: inherit;\n margin: 10px 0;\n}\n\n.test-case button {\n background: #007bff;\n color: white;\n border: none;\n padding: 10px 20px;\n border-radius: 4px;\n cursor: pointer;\n margin: 10px 5px 10px 0;\n}\n\n.test-case button:disabled {\n background: #ccc;\n cursor: not-allowed;\n}\n\n.output, .error {\n margin: 10px 0;\n padding: 10px;\n border-radius: 4px;\n}\n\n.output {\n background: #d4edda;\n border: 1px solid #c3e6cb;\n color: #155724;\n}\n\n.error {\n background: #f8d7da;\n border: 1px solid #f5c6cb;\n color: #721c24;\n}\n\n.test-runner-container {\n max-width: 1400px;\n margin: 0 auto;\n background: white;\n box-shadow: 0 0 20px rgba(0, 0, 0, 0.1);\n}\n\n/* Header Styles */\n.test-runner-header {\n display: flex;\n justify-content: space-between;\n align-items: center;\n padding: 20px 24px;\n background: /*linear-gradient(135deg, #667eea 0%, #764ba2 100%);*/ white;\n color: white;\n box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);\n}\n\n.header-left, .header-right {\n display: flex;\n gap: 12px;\n align-items: center;\n}\n\n.header-center {\n flex: 1;\n display: flex;\n justify-content: center;\n align-items: center;\n}\n\n.api-status {\n display: flex;\n align-items: center;\n gap: 0.5rem;\n}\n\n.api-status-text {\n color: #28a745;\n font-weight: 500;\n font-size: 0.9rem;\n}\n\n/* Button Styles */\n.btn {\n display: inline-flex;\n align-items: center;\n gap: 8px;\n padding: 10px 16px;\n border: none;\n border-radius: 8px;\n font-size: 14px;\n font-weight: 500;\n cursor: pointer;\n transition: all 0.2s ease;\n text-decoration: none;\n position: relative;\n}\n\n.btn:disabled {\n opacity: 0.6;\n cursor: not-allowed;\n transform: none !important;\n}\n\n.btn-primary {\n color: black;\n box-shadow: 0 2px 4px rgba(59, 130, 246, 0.3);\n}\n\n.btn-primary:hover:not(:disabled) {\n transform: translateY(-1px);\n box-shadow: 0 4px 8px rgba(59, 130, 246, 0.4);\n}\n\n.btn-secondary {\n background: rgba(255, 255, 255, 0.2);\n color: blue;\n border: 1px solid rgba(255, 255, 255, 0.3);\n}\n\n.btn-secondary:hover:not(:disabled) {\n background: rgba(255, 255, 255, 0.3);\n transform: translateY(-1px);\n}\n\n.btn-outline {\n background: transparent;\n color: #6b7280;\n border: 2px solid #e5e7eb;\n}\n\n.btn-outline:hover {\n background: #f9fafb;\n border-color: #d1d5db;\n transform: translateY(-1px);\n}\n\n.btn-icon {\n padding: 8px;\n min-width: 40px;\n height: 40px;\n display: flex;\n align-items: center;\n justify-content: center;\n font-size: 16px;\n}\n\n.btn-run {\n color: white;\n}\n\n.btn-run:hover:not(:disabled) {\n transform: translateY(-1px);\n}\n\n.btn-delete {\n color: white;\n}\n\n.btn-delete:hover:not(:disabled) {\n transform: translateY(-1px);\n}\n\n.icon {\n font-size: 16px;\n}\n\n/* Main Content */\n.test-runner-content {\n padding: 0;\n}\n\n/* Column Headers */\n.column-headers {\n display: grid;\n grid-template-columns: 1fr 1.5fr 0.5fr 120px;\n gap: 1px;\n background: #e5e7eb;\n border-bottom: 2px solid #d1d5db;\n}\n\n.column-header {\n background: #f8fafc;\n padding: 16px 20px;\n font-weight: 600;\n color: #374151;\n font-size: 14px;\n text-transform: uppercase;\n letter-spacing: 0.05em;\n}\n\n/* Test Cases */\n.test-cases {\n background: white;\n}\n\n.test-case-row {\n display: grid;\n grid-template-columns: 1fr 1.5fr 0.5fr 120px;\n gap: 1px;\n border-bottom: 1px solid #e5e7eb;\n min-height: 200px;\n}\n\n.test-case-row:hover {\n background: #f9fafb;\n}\n\n/* Input Column */\n.input-column {\n padding: 20px;\n background: white;\n border-right: 1px solid #e5e7eb;\n}\n\n.input-group {\n margin-bottom: 16px;\n}\n\n.input-group label {\n display: block;\n margin-bottom: 8px;\n font-weight: 500;\n color: #374151;\n font-size: 14px;\n}\n\n.input-group textarea {\n width: 95%;\n padding: 12px;\n border: 2px solid #e5e7eb;\n border-radius: 8px;\n font-size: 14px;\n resize: vertical;\n outline: none;\n transition: border-color 0.2s ease;\n font-family: inherit;\n}\n\n.input-group textarea:focus {\n border-color: #3b82f6;\n box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1);\n}\n\n/* Keywords and Links */\n.keywords-group, .links-group {\n margin-bottom: 16px;\n}\n\n.keywords-group label, .links-group label {\n display: block;\n margin-bottom: 8px;\n font-weight: 500;\n color: #374151;\n font-size: 14px;\n}\n\n.tags-container, .links-container {\n display: flex;\n flex-wrap: wrap;\n gap: 8px;\n align-items: center;\n}\n\n.tag {\n display: inline-flex;\n align-items: center;\n gap: 6px;\n background: #dbeafe;\n color: #1e40af;\n padding: 4px 8px;\n border-radius: 16px;\n font-size: 12px;\n font-weight: 500;\n}\n\n.tag-remove {\n background: none;\n border: none;\n color: #1e40af;\n cursor: pointer;\n font-size: 14px;\n padding: 0;\n width: 16px;\n height: 16px;\n display: flex;\n align-items: center;\n justify-content: center;\n border-radius: 50%;\n}\n\n.tag-remove:hover {\n background: rgba(30, 64, 175, 0.1);\n}\n\n.link-item {\n display: flex;\n align-items: center;\n gap: 6px;\n background: #f0f9ff;\n padding: 4px 8px;\n border-radius: 6px;\n font-size: 12px;\n}\n\n.link-item a {\n color: #0369a1;\n text-decoration: none;\n max-width: 200px;\n overflow: hidden;\n text-overflow: ellipsis;\n white-space: nowrap;\n}\n\n.link-item a:hover {\n text-decoration: underline;\n}\n\n.link-remove {\n background: none;\n border: none;\n color: #0369a1;\n cursor: pointer;\n font-size: 12px;\n padding: 0;\n width: 16px;\n height: 16px;\n display: flex;\n align-items: center;\n justify-content: center;\n border-radius: 50%;\n}\n\n.link-remove:hover {\n background: rgba(3, 105, 161, 0.1);\n}\n\n.tags-container input, .links-container input {\n border: 1px solid #d1d5db;\n border-radius: 6px;\n padding: 6px 8px;\n font-size: 12px;\n outline: none;\n min-width: 120px;\n}\n\n.tags-container input:focus, .links-container input:focus {\n border-color: #3b82f6;\n box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.1);\n}\n\n/* Output Column */\n.output-column {\n padding: 20px;\n background: white;\n border-right: 1px solid #e5e7eb;\n display: flex;\n flex-direction: column;\n}\n\n.output-content {\n background: #f8fafc;\n border: 1px solid #e5e7eb;\n border-radius: 8px;\n padding: 16px;\n font-size: 14px;\n line-height: 1.6;\n color: #374151;\n white-space: pre-wrap;\n word-wrap: break-word;\n flex: 1;\n overflow-y: auto;\n max-height: 250px;\n overflow-x: scroll;\n}\n\n.output-placeholder {\n display: flex;\n align-items: center;\n justify-content: center;\n color: #9ca3af;\n font-style: italic;\n flex: 1;\n background: #f9fafb;\n border: 2px dashed #d1d5db;\n border-radius: 8px;\n}\n\n/* Evaluation Column */\n.evaluation-column {\n padding: 20px;\n background: white;\n border-right: 1px solid #e5e7eb;\n display: flex;\n flex-direction: column;\n}\n\n.evaluation-content {\n display: flex;\n flex-direction: column;\n gap: 12px;\n flex: 1;\n}\n\n.score-display {\n text-align: center;\n}\n\n.score-number {\n font-size: 24px;\n font-weight: 700;\n color: #111827;\n display: block;\n margin-bottom: 8px;\n}\n\n.score-bar {\n width: 100%;\n height: 8px;\n background: #e5e7eb;\n border-radius: 4px;\n overflow: hidden;\n}\n\n.score-fill {\n height: 100%;\n background: linear-gradient(90deg, #ef4444 0%, #f59e0b 50%, #10b981 100%);\n transition: width 0.3s ease;\n}\n\n.evaluation-details {\n display: flex;\n flex-direction: column;\n gap: 8px;\n}\n\n.detail-item {\n display: flex;\n align-items: center;\n gap: 8px;\n font-size: 14px;\n}\n\n.status {\n width: 20px;\n height: 20px;\n border-radius: 50%;\n display: flex;\n align-items: center;\n justify-content: center;\n font-size: 12px;\n font-weight: bold;\n}\n\n.status.pass {\n background: #dcfce7;\n color: #166534;\n}\n\n.status.fail {\n background: #fef2f2;\n color: #dc2626;\n}\n\n.evaluation-text {\n font-size: 12px;\n color: #6b7280;\n line-height: 1.4;\n background: #f9fafb;\n padding: 8px;\n border-radius: 6px;\n border: 1px solid #e5e7eb;\n}\n\n.evaluation-placeholder {\n display: flex;\n align-items: center;\n justify-content: center;\n color: #9ca3af;\n font-style: italic;\n flex: 1;\n background: #f9fafb;\n border: 2px dashed #d1d5db;\n border-radius: 8px;\n}\n\n/* New evaluation result styles */\n.evaluation-result {\n display: flex;\n flex-direction: column;\n gap: 8px;\n}\n\n.evaluation-status {\n font-weight: 600;\n font-size: 14px;\n padding: 8px 12px;\n border-radius: 4px;\n text-align: center;\n}\n\n.evaluation-status.passed {\n background: #d4edda;\n color: #155724;\n border: 1px solid #c3e6cb;\n}\n\n.evaluation-status.failed {\n background: #f8d7da;\n color: #721c24;\n border: 1px solid #f5c6cb;\n}\n\n.evaluation-score {\n font-size: 12px;\n color: #495057;\n text-align: center;\n font-weight: 500;\n}\n\n.evaluation-feedback {\n font-size: 12px;\n color: #6c757d;\n background: #f8f9fa;\n padding: 8px;\n border-radius: 4px;\n border: 1px solid #dee2e6;\n}\n\n/* Actions Column */\n.actions-column {\n padding: 20px;\n background: white;\n display: flex;\n flex-direction: column;\n gap: 12px;\n align-items: center;\n justify-content: flex-start;\n align-self: flex-start;\n}\n\n/* Add Test Case */\n.add-test-case {\n padding: 24px;\n text-align: center;\n background: #f9fafb;\n border-top: 1px solid #e5e7eb;\n}\n\n.hidden {\n display: none;\n}\n\n.error-message {\n background: #ffeaea;\n color: #b71c1c;\n border: 1px solid #f44336;\n padding: 0.75em 2.5em 0.75em 1em;\n border-radius: 4px;\n margin: 1em 0;\n position: relative;\n font-size: 1em;\n display: flex;\n align-items: center;\n gap: 1em;\n}\n\n.error-close {\n background: none;\n border: none;\n color: #b71c1c;\n font-size: 1.25em;\n font-weight: bold;\n cursor: pointer;\n position: absolute;\n right: 0.75em;\n top: 50%;\n transform: translateY(-50%);\n line-height: 1;\n padding: 0;\n}\n\n/* Responsive Design */\n@media (max-width: 1200px) {\n .test-case-row {\n grid-template-columns: 1fr;\n gap: 0;\n }\n \n .column-headers {\n display: none;\n }\n \n .input-column, .output-column, .evaluation-column, .actions-column {\n border-right: none;\n border-bottom: 1px solid #e5e7eb;\n }\n \n .actions-column {\n flex-direction: row;\n justify-content: center;\n }\n}\n\n@media (max-width: 768px) {\n .test-runner-header {\n flex-direction: column;\n gap: 16px;\n padding: 16px;\n }\n \n .header-left, .header-right {\n flex-wrap: wrap;\n justify-content: center;\n }\n \n .btn {\n font-size: 12px;\n padding: 8px 12px;\n }\n \n .input-column, .output-column, .evaluation-column, .actions-column {\n padding: 16px;\n }\n \n .test-case-row {\n min-height: auto;\n }\n}";
|
|
127
166
|
|
|
128
167
|
const LLMTestRunner = class {
|
|
129
168
|
constructor(hostRef) {
|
|
130
169
|
index.registerInstance(this, hostRef);
|
|
170
|
+
this.llmRequest = index.createEvent(this, "llmRequest");
|
|
131
171
|
}
|
|
132
|
-
|
|
172
|
+
llmRequest;
|
|
173
|
+
delayMs = 500;
|
|
133
174
|
testCases = [
|
|
134
175
|
{
|
|
135
176
|
id: '1',
|
|
@@ -143,15 +184,10 @@ const LLMTestRunner = class {
|
|
|
143
184
|
error = '';
|
|
144
185
|
fileInput;
|
|
145
186
|
isExportingTestSuite = false;
|
|
187
|
+
isExportingTestResults = false;
|
|
146
188
|
evaluationEngine;
|
|
147
|
-
|
|
148
|
-
async componentWillLoad() {
|
|
189
|
+
componentWillLoad() {
|
|
149
190
|
this.evaluationEngine = new LLMEvaluationEngine();
|
|
150
|
-
console.log('🔍 componentWillLoad - apiKey:', this.apiKey ? 'SET' : 'NOT SET');
|
|
151
|
-
console.log('🔍 componentWillLoad - apiKey value:', this.apiKey);
|
|
152
|
-
if (!this.apiKey) {
|
|
153
|
-
throw new Error('API key is required. Please provide the apiKey prop: <llm-test-runner apiKey="your-api-key" />');
|
|
154
|
-
}
|
|
155
191
|
}
|
|
156
192
|
componentDidLoad() {
|
|
157
193
|
}
|
|
@@ -172,28 +208,39 @@ const LLMTestRunner = class {
|
|
|
172
208
|
}
|
|
173
209
|
async runSingleTest(testCase) {
|
|
174
210
|
console.log('🚀 Starting test for question:', testCase.question);
|
|
211
|
+
const startTime = Date.now();
|
|
175
212
|
this.updateTestCase(testCase.id, { isRunning: true });
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
213
|
+
return new Promise((resolve, reject) => {
|
|
214
|
+
this.llmRequest.emit({
|
|
215
|
+
prompt: testCase.question,
|
|
216
|
+
resolve: async (aiResponse) => {
|
|
217
|
+
console.log('✅ AI call completed for test case:', testCase.id);
|
|
218
|
+
const endTime = Date.now();
|
|
219
|
+
const responseTime = endTime - startTime;
|
|
220
|
+
this.updateTestCase(testCase.id, {
|
|
221
|
+
isRunning: false,
|
|
222
|
+
output: aiResponse,
|
|
223
|
+
error: null,
|
|
224
|
+
responseTime: responseTime
|
|
225
|
+
});
|
|
226
|
+
await this.evaluateResponse({
|
|
227
|
+
...testCase,
|
|
228
|
+
output: aiResponse,
|
|
229
|
+
responseTime: responseTime
|
|
230
|
+
});
|
|
231
|
+
resolve();
|
|
232
|
+
},
|
|
233
|
+
reject: (error) => {
|
|
234
|
+
console.error('❌ Error in runSingleTest:', error);
|
|
235
|
+
this.updateTestCase(testCase.id, {
|
|
236
|
+
isRunning: false,
|
|
237
|
+
output: null,
|
|
238
|
+
error: error instanceof Error ? error.message : 'Unknown error'
|
|
239
|
+
});
|
|
240
|
+
reject(error);
|
|
241
|
+
}
|
|
195
242
|
});
|
|
196
|
-
}
|
|
243
|
+
});
|
|
197
244
|
}
|
|
198
245
|
deleteTestCase(id) {
|
|
199
246
|
this.testCases = this.testCases.filter(tc => tc.id !== id);
|
|
@@ -232,38 +279,6 @@ const LLMTestRunner = class {
|
|
|
232
279
|
this.updateTestCase(testCaseId, { expectedSourceLinks: newLinks });
|
|
233
280
|
}
|
|
234
281
|
}
|
|
235
|
-
async callGeminiAPI(prompt) {
|
|
236
|
-
console.log('🔍 callGeminiAPI - apiKey:', this.apiKey ? 'SET' : 'NOT SET');
|
|
237
|
-
console.log('🔍 callGeminiAPI - apiKey value:', this.apiKey ? `${this.apiKey.substring(0, 10)}...` : 'undefined');
|
|
238
|
-
if (!this.apiKey) {
|
|
239
|
-
throw new Error('API key is required. Please provide the apiKey prop.');
|
|
240
|
-
}
|
|
241
|
-
const requestBody = {
|
|
242
|
-
contents: [{
|
|
243
|
-
parts: [{
|
|
244
|
-
text: prompt
|
|
245
|
-
}]
|
|
246
|
-
}]
|
|
247
|
-
};
|
|
248
|
-
const response = await fetch(`${this.apiUrl}?key=${this.apiKey}`, {
|
|
249
|
-
method: 'POST',
|
|
250
|
-
headers: {
|
|
251
|
-
'Content-Type': 'application/json',
|
|
252
|
-
},
|
|
253
|
-
body: JSON.stringify(requestBody)
|
|
254
|
-
});
|
|
255
|
-
if (!response.ok) {
|
|
256
|
-
const errorData = await response.json().catch(() => ({}));
|
|
257
|
-
throw new Error(errorData.error?.message || `HTTP error! status: ${response.status}`);
|
|
258
|
-
}
|
|
259
|
-
const data = await response.json();
|
|
260
|
-
if (data.candidates && data.candidates[0] && data.candidates[0].content) {
|
|
261
|
-
return data.candidates[0].content.parts[0].text;
|
|
262
|
-
}
|
|
263
|
-
else {
|
|
264
|
-
throw new Error('Unexpected response format from Gemini API');
|
|
265
|
-
}
|
|
266
|
-
}
|
|
267
282
|
async evaluateResponse(testCase) {
|
|
268
283
|
if (!testCase.output) {
|
|
269
284
|
console.warn('⚠️ No output to evaluate for test case:', testCase.id);
|
|
@@ -285,13 +300,21 @@ const LLMTestRunner = class {
|
|
|
285
300
|
}
|
|
286
301
|
async runAllTests() {
|
|
287
302
|
this.isRunningAll = true;
|
|
303
|
+
const tasks = [];
|
|
288
304
|
for (const testCase of this.testCases) {
|
|
289
305
|
if (!testCase.isRunning && testCase.question.trim()) {
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
306
|
+
tasks.push(() => this.runSingleTest(testCase).catch(err => {
|
|
307
|
+
console.error(`⚠️ Test case ${testCase.id} failed`, err);
|
|
308
|
+
}));
|
|
293
309
|
}
|
|
294
310
|
}
|
|
311
|
+
try {
|
|
312
|
+
const fetcher = new RateLimitedFetcher(this.delayMs);
|
|
313
|
+
await fetcher.runAll(tasks);
|
|
314
|
+
}
|
|
315
|
+
catch (err) {
|
|
316
|
+
console.error('⚠️ Error running all tests:', err);
|
|
317
|
+
}
|
|
295
318
|
this.isRunningAll = false;
|
|
296
319
|
}
|
|
297
320
|
generateId() {
|
|
@@ -374,8 +397,78 @@ const LLMTestRunner = class {
|
|
|
374
397
|
this.isExportingTestSuite = false;
|
|
375
398
|
}
|
|
376
399
|
}
|
|
400
|
+
async handleExportTestResults() {
|
|
401
|
+
this.isExportingTestResults = true;
|
|
402
|
+
try {
|
|
403
|
+
// Create CSV content with the required fields
|
|
404
|
+
const csvRows = [];
|
|
405
|
+
// Add header row
|
|
406
|
+
const headers = [
|
|
407
|
+
'Question',
|
|
408
|
+
'Expected Keywords',
|
|
409
|
+
'Generated Keywords',
|
|
410
|
+
'Keywords Match',
|
|
411
|
+
'Expected Source Links',
|
|
412
|
+
'Generated Source Links',
|
|
413
|
+
'Source Links Match',
|
|
414
|
+
'Response Time (s)'
|
|
415
|
+
];
|
|
416
|
+
csvRows.push(headers.join(','));
|
|
417
|
+
// Add data rows
|
|
418
|
+
this.testCases.forEach(testCase => {
|
|
419
|
+
const expectedKeywords = testCase.expectedKeywords.join('; ');
|
|
420
|
+
const expectedSourceLinks = testCase.expectedSourceLinks.join('; ');
|
|
421
|
+
let generatedKeywords = '';
|
|
422
|
+
let generatedSourceLinks = '';
|
|
423
|
+
let keywordsMatch = '';
|
|
424
|
+
let sourceLinksMatch = '';
|
|
425
|
+
if (testCase.evaluationResult) {
|
|
426
|
+
const foundKeywords = testCase.evaluationResult.keywordMatches
|
|
427
|
+
.filter(match => match.found)
|
|
428
|
+
.map(match => match.keyword);
|
|
429
|
+
const foundSourceLinks = testCase.evaluationResult.sourceLinkMatches
|
|
430
|
+
.filter(match => match.found)
|
|
431
|
+
.map(match => match.link);
|
|
432
|
+
generatedKeywords = foundKeywords.join('; ');
|
|
433
|
+
generatedSourceLinks = foundSourceLinks.join('; ');
|
|
434
|
+
// Calculate match percentages
|
|
435
|
+
const keywordMatchCount = testCase.evaluationResult.keywordMatches.filter(m => m.found).length;
|
|
436
|
+
const totalKeywords = testCase.evaluationResult.keywordMatches.length;
|
|
437
|
+
const sourceLinkMatchCount = testCase.evaluationResult.sourceLinkMatches.filter(m => m.found).length;
|
|
438
|
+
const totalSourceLinks = testCase.evaluationResult.sourceLinkMatches.length;
|
|
439
|
+
keywordsMatch = totalKeywords > 0 ? `${keywordMatchCount}/${totalKeywords}` : 'N/A';
|
|
440
|
+
sourceLinksMatch = totalSourceLinks > 0 ? `${sourceLinkMatchCount}/${totalSourceLinks}` : 'N/A';
|
|
441
|
+
}
|
|
442
|
+
const responseTime = testCase.responseTime ? (testCase.responseTime / 1000).toFixed(3) : 'N/A';
|
|
443
|
+
const row = [
|
|
444
|
+
this.escapeCsvField(testCase.question),
|
|
445
|
+
this.escapeCsvField(expectedKeywords),
|
|
446
|
+
this.escapeCsvField(generatedKeywords),
|
|
447
|
+
keywordsMatch,
|
|
448
|
+
this.escapeCsvField(expectedSourceLinks),
|
|
449
|
+
this.escapeCsvField(generatedSourceLinks),
|
|
450
|
+
sourceLinksMatch,
|
|
451
|
+
responseTime
|
|
452
|
+
];
|
|
453
|
+
csvRows.push(row.join(','));
|
|
454
|
+
});
|
|
455
|
+
const csvContent = csvRows.join('\n');
|
|
456
|
+
// Added a small delay to show the loading state
|
|
457
|
+
await new Promise(resolve => setTimeout(resolve, 500));
|
|
458
|
+
this.downloadFile(csvContent, 'test-results.csv', 'text/csv');
|
|
459
|
+
}
|
|
460
|
+
finally {
|
|
461
|
+
this.isExportingTestResults = false;
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
escapeCsvField(field) {
|
|
465
|
+
if (field.includes(',') || field.includes('"') || field.includes('\n')) {
|
|
466
|
+
return `"${field.replace(/"/g, '""')}"`;
|
|
467
|
+
}
|
|
468
|
+
return field;
|
|
469
|
+
}
|
|
377
470
|
render() {
|
|
378
|
-
return (index.h("div", { key: '
|
|
471
|
+
return (index.h("div", { key: 'beb2ab78108fede00c5d759d1ac5c98ae7f037d0', class: "test-runner-container" }, index.h("header", { key: 'd09b424069227500ffffcab8724b48d91705baab', class: "test-runner-header" }, index.h("div", { key: '368ef117a1c2cc0be0446e2d373e38e0af1c2040', class: "header-left" }, index.h("input", { key: '74346a634b36e9d04f75817495834983d970306c', class: "hidden", type: "file", ref: (el) => (this.fileInput = el), onChange: (e) => this.handleFileChange(e), accept: ".json,application/json" }), index.h("button", { key: 'e095ff9de105cb281e60f70de02fb0de8a0f6478', class: "btn btn-secondary", onClick: () => this.handleFileSelect() }, index.h("span", { key: '6f828f297867ae54baf1f89c9dbce611eba2056a', class: "icon" }, "\u2191"), "Import Test Suite"), index.h("button", { key: '5682dc8f70edac9fdf33b394677a6859555c039b', class: "btn btn-secondary", onClick: () => this.handleExportTestSuite(), disabled: this.isExportingTestSuite }, index.h("span", { key: 'a1e8d3170ce39e499160e578e3e02d39cd0da9d7', class: "icon" }, this.isExportingTestSuite ? '⏳' : '↓'), this.isExportingTestSuite ? 'Exporting...' : 'Export Test Suite')), index.h("div", { key: 'af96ea230b04f4f6ce8e3488a1f8f7f2a01e74c0', class: "header-right" }, index.h("button", { key: 'a90bfb3421f5e2e5112b3642ddf81cf19372e0ad', class: "btn btn-secondary" }, index.h("span", { key: '79611828e5dc1ad53998a74fca10a1a05c5d2a4e', class: "icon" }, "\u2699\uFE0F"), "Prompt Editor"), index.h("button", { key: '451367ce26183a58add0902e1de2d29b4933add4', class: "btn btn-secondary", onClick: () => this.handleExportTestResults(), disabled: this.isExportingTestResults }, index.h("span", { key: '11f791f2071b58e0800d14ac7e6bebba67421449', class: "icon" }, this.isExportingTestResults ? '⏳' : '↓'), this.isExportingTestResults ? 'Exporting...' : 'Export Test Results'), index.h("button", { key: '3cbf7626ccc5f171cdc6fd7ce3750ea3786218aa', class: "btn btn-primary", onClick: () => this.runAllTests(), disabled: this.isRunningAll }, this.isRunningAll ? 'Running...' : 'Run All'))), index.h(ErrorMessage, { key: 'ffdd72a1db1f45c5db158d2df0be64dc978fa930', message: this.error, onClear: () => (this.error = '') }), index.h("div", { key: '64edd5c84c4b566f90d1860f6d6bf5aeafb7ae0f', class: "test-runner-content" }, index.h("div", { key: '4be27d07a13d88611b5fc54b05e826c790fd2145', class: "column-headers" }, index.h("div", { key: 'eeb9ace5e63b80359d2decb1b42461f77cfe98cf', class: "column-header" }, "Input"), index.h("div", { key: 'c70cc5af0aaca2636eee298dfaa4da81799df6cf', class: "column-header" }, "Output"), index.h("div", { key: '9ca69fddafe7267bf5b8b12601e2cf64d65a58e1', class: "column-header" }, "Evaluation"), index.h("div", { key: 'b800841828c22a6aad0345e58b47f71469594792', class: "column-header" }, "Actions")), index.h("div", { key: 'da186e25c2111474f65b89a017158bffdbc83e03', class: "test-cases" }, this.testCases.map((testCase) => (index.h("div", { class: "test-case-row", key: testCase.id }, index.h("div", { class: "input-column" }, index.h("div", { class: "input-group" }, index.h("label", null, "Question"), index.h("textarea", { value: testCase.question, onInput: (e) => this.updateTestCase(testCase.id, {
|
|
379
472
|
question: e.target.value
|
|
380
473
|
}), placeholder: "Enter your question here...", rows: 3 })), index.h("div", { class: "keywords-group" }, index.h("label", null, "Expected keywords"), index.h("div", { class: "tags-container" }, testCase.expectedKeywords.map((keyword, index$1) => (index.h("span", { class: "tag", key: index$1 }, keyword, index.h("button", { class: "tag-remove", onClick: () => this.removeKeyword(testCase.id, index$1) }, "\u00D7")))), index.h("input", { type: "text", placeholder: "New item...", onKeyDown: (e) => {
|
|
381
474
|
if (e.key === 'Enter') {
|
|
@@ -387,7 +480,7 @@ const LLMTestRunner = class {
|
|
|
387
480
|
this.addSourceLink(testCase.id, e.target.value);
|
|
388
481
|
e.target.value = '';
|
|
389
482
|
}
|
|
390
|
-
} })))), index.h("div", { class: "output-column" }, testCase.output ? (index.h("div", { class: "output-content" }, testCase.output)) : (index.h("div", { class: "output-placeholder" }, testCase.isRunning ? 'Running...' : ''))), index.h("div", { class: "evaluation-column" }, testCase.evaluationResult ? (index.h("div", { class: "evaluation-result" }, index.h("div", { class: `evaluation-status ${testCase.evaluationResult.passed ? 'passed' : 'failed'}` }, testCase.evaluationResult.passed ? '✅ PASSED' : '❌ FAILED'), index.h("div", { class: "evaluation-details" }, "Keywords: ", testCase.evaluationResult.keywordMatches.filter(m => m.found).length, "/", testCase.evaluationResult.keywordMatches.length, " found"))) : (index.h("div", { class: "evaluation-placeholder" }, testCase.isRunning ? 'Evaluating...' : ''))), index.h("div", { class: "actions-column" }, index.h("button", { class: "btn btn-icon btn-run", onClick: () => this.runSingleTest(testCase), disabled: testCase.isRunning || !testCase.question.trim(), title: !testCase.question.trim() ? "Enter a question first" : "Run this test" }, testCase.isRunning ? '⏳' : '▶️'), index.h("button", { class: "btn btn-icon btn-delete", onClick: () => this.deleteTestCase(testCase.id), title: "Delete this test" }, "\uD83D\uDDD1\uFE0F")))))), index.h("div", { key: '
|
|
483
|
+
} })))), index.h("div", { class: "output-column" }, testCase.output ? (index.h("div", { class: "output-content" }, testCase.output)) : (index.h("div", { class: "output-placeholder" }, testCase.isRunning ? 'Running...' : ''))), index.h("div", { class: "evaluation-column" }, testCase.evaluationResult ? (index.h("div", { class: "evaluation-result" }, index.h("div", { class: `evaluation-status ${testCase.evaluationResult.passed ? 'passed' : 'failed'}` }, testCase.evaluationResult.passed ? '✅ PASSED' : '❌ FAILED'), index.h("div", { class: "evaluation-details" }, "Keywords: ", testCase.evaluationResult.keywordMatches.filter(m => m.found).length, "/", testCase.evaluationResult.keywordMatches.length, " found"))) : (index.h("div", { class: "evaluation-placeholder" }, testCase.isRunning ? 'Evaluating...' : ''))), index.h("div", { class: "actions-column" }, index.h("button", { class: "btn btn-icon btn-run", onClick: () => this.runSingleTest(testCase), disabled: testCase.isRunning || !testCase.question.trim(), title: !testCase.question.trim() ? "Enter a question first" : "Run this test" }, testCase.isRunning ? '⏳' : '▶️'), index.h("button", { class: "btn btn-icon btn-delete", onClick: () => this.deleteTestCase(testCase.id), title: "Delete this test" }, "\uD83D\uDDD1\uFE0F")))))), index.h("div", { key: '2d4446b15a31adfbaf88b3d46fcbb3cf314f8861', class: "add-test-case" }, index.h("button", { key: '737e9d856db0f9090923bef718bb9bfde974b950', class: "btn btn-outline", onClick: () => this.addNewTestCase() }, "+ Add Question")))));
|
|
391
484
|
}
|
|
392
485
|
};
|
|
393
486
|
LLMTestRunner.style = llmTestRunnerCss;
|