llm-testrunner-components 1.0.4 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +113 -10
- package/dist/cjs/app-globals-CbbEbofA.js +14 -0
- package/dist/cjs/app-globals-CbbEbofA.js.map +1 -0
- package/dist/cjs/{index-CY2lQip_.js → index-D-FySkoV.js} +25 -5
- package/dist/cjs/index-D-FySkoV.js.map +1 -0
- package/dist/cjs/index.cjs.js +159 -66
- package/dist/cjs/index.cjs.js.map +1 -1
- package/dist/cjs/llm-test-runner.cjs.entry.js +1 -1
- package/dist/cjs/llm-testrunner.cjs.js +4 -4
- package/dist/cjs/llm-testrunner.cjs.js.map +1 -1
- package/dist/cjs/loader.cjs.js +3 -3
- package/dist/collection/collection-manifest.json +3 -2
- package/dist/collection/components/llm-test-runner/llm-test-runner.js +152 -72
- package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
- package/dist/collection/global/env.js +6 -0
- package/dist/collection/global/env.js.map +1 -0
- package/dist/collection/lib/rate-limited-fetcher/rate-limited-fetcher.js +39 -0
- package/dist/collection/lib/rate-limited-fetcher/rate-limited-fetcher.js.map +1 -0
- package/dist/components/index.js +6 -1650
- package/dist/components/index.js.map +1 -1
- package/dist/components/llm-test-runner.js +1 -1
- package/dist/components/p-CYUbsbxt.js +1770 -0
- package/dist/components/p-CYUbsbxt.js.map +1 -0
- package/dist/esm/app-globals-BOQOUavG.js +12 -0
- package/dist/esm/app-globals-BOQOUavG.js.map +1 -0
- package/dist/esm/{index-DBp-rMPb.js → index-cncubhtM.js} +25 -6
- package/dist/esm/index-cncubhtM.js.map +1 -0
- package/dist/esm/index.js +159 -66
- package/dist/esm/index.js.map +1 -1
- package/dist/esm/llm-test-runner.entry.js +1 -1
- package/dist/esm/llm-testrunner.js +5 -5
- package/dist/esm/llm-testrunner.js.map +1 -1
- package/dist/esm/loader.js +4 -4
- package/dist/llm-testrunner/index.esm.js +1 -1
- package/dist/llm-testrunner/index.esm.js.map +1 -1
- package/dist/llm-testrunner/llm-testrunner.esm.js +1 -1
- package/dist/llm-testrunner/llm-testrunner.esm.js.map +1 -1
- package/dist/llm-testrunner/p-BOQOUavG.js +2 -0
- package/dist/llm-testrunner/p-BOQOUavG.js.map +1 -0
- package/dist/llm-testrunner/p-cncubhtM.js +3 -0
- package/dist/llm-testrunner/p-cncubhtM.js.map +1 -0
- package/dist/llm-testrunner/p-f68fd660.entry.js +2 -0
- package/dist/react/components.d.ts +6 -3
- package/dist/react/components.d.ts.map +1 -1
- package/dist/react/components.js +2 -2
- package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +13 -4
- package/dist/types/components.d.ts +26 -2
- package/dist/types/global/env.d.ts +8 -0
- package/dist/types/lib/rate-limited-fetcher/rate-limited-fetcher.d.ts +10 -0
- package/dist/types/stencil-public-runtime.d.ts +19 -10
- package/package.json +6 -4
- package/dist/cjs/app-globals-V2Kpy_OQ.js +0 -8
- package/dist/cjs/app-globals-V2Kpy_OQ.js.map +0 -1
- package/dist/cjs/index-CY2lQip_.js.map +0 -1
- package/dist/esm/app-globals-DQuL1Twl.js +0 -6
- package/dist/esm/app-globals-DQuL1Twl.js.map +0 -1
- package/dist/esm/index-DBp-rMPb.js.map +0 -1
- package/dist/llm-testrunner/p-DBp-rMPb.js +0 -3
- package/dist/llm-testrunner/p-DBp-rMPb.js.map +0 -1
- package/dist/llm-testrunner/p-DQuL1Twl.js +0 -2
- package/dist/llm-testrunner/p-DQuL1Twl.js.map +0 -1
- package/dist/llm-testrunner/p-ed2ea423.entry.js +0 -2
- /package/dist/llm-testrunner/{p-ed2ea423.entry.js.map → p-f68fd660.entry.js.map} +0 -0
package/dist/esm/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { h, r as registerInstance } from './index-
|
|
1
|
+
import { h, r as registerInstance, c as createEvent } from './index-cncubhtM.js';
|
|
2
2
|
|
|
3
3
|
class LLMEvaluationEngine {
|
|
4
4
|
constructor() { }
|
|
@@ -121,13 +121,54 @@ const ErrorMessage = ({ message, onClear }) => {
|
|
|
121
121
|
return (h("div", { class: "error-message" }, h("span", null, message), onClear && (h("button", { class: "error-close", title: "Close", onClick: onClear }, "\u00D7"))));
|
|
122
122
|
};
|
|
123
123
|
|
|
124
|
+
class RateLimitedFetcher {
|
|
125
|
+
queue = [];
|
|
126
|
+
delay; // delay in milliseconds
|
|
127
|
+
intervalId;
|
|
128
|
+
constructor(delayMs) {
|
|
129
|
+
this.delay = delayMs;
|
|
130
|
+
}
|
|
131
|
+
startQueue() {
|
|
132
|
+
if (this.intervalId)
|
|
133
|
+
return;
|
|
134
|
+
this.intervalId = setInterval(() => {
|
|
135
|
+
const task = this.queue.shift();
|
|
136
|
+
if (task)
|
|
137
|
+
task();
|
|
138
|
+
if (this.queue.length === 0) {
|
|
139
|
+
this.stop();
|
|
140
|
+
}
|
|
141
|
+
}, this.delay);
|
|
142
|
+
}
|
|
143
|
+
schedule(task) {
|
|
144
|
+
return new Promise((resolve, reject) => {
|
|
145
|
+
this.queue.push(() => {
|
|
146
|
+
task().then(resolve).catch(reject);
|
|
147
|
+
});
|
|
148
|
+
this.startQueue();
|
|
149
|
+
});
|
|
150
|
+
}
|
|
151
|
+
stop() {
|
|
152
|
+
if (this.intervalId) {
|
|
153
|
+
clearInterval(this.intervalId);
|
|
154
|
+
this.intervalId = undefined;
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
async runAll(tasks) {
|
|
158
|
+
const promises = tasks.map(task => this.schedule(task));
|
|
159
|
+
return Promise.all(promises);
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
124
163
|
const llmTestRunnerCss = ":host {\n display: block;\n font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;\n background-color: #f8fafc;\n min-height: 100vh;\n}\n\n.test-runner-container {\n padding: 20px;\n background: white;\n border-radius: 8px;\n box-shadow: 0 2px 4px rgba(0,0,0,0.1);\n margin: 20px 0;\n}\n\n.simple-test {\n margin: 20px 0;\n padding: 20px;\n border: 1px solid #ddd;\n border-radius: 4px;\n}\n\n.test-cases {\n margin: 20px 0;\n}\n\n.test-case {\n margin: 20px 0;\n padding: 20px;\n border: 1px solid #eee;\n border-radius: 4px;\n background: #f9f9f9;\n}\n\n.test-case h3 {\n margin-top: 0;\n color: #333;\n}\n\n.test-case textarea {\n width: 100%;\n padding: 10px;\n border: 1px solid #ddd;\n border-radius: 4px;\n font-family: inherit;\n margin: 10px 0;\n}\n\n.test-case button {\n background: #007bff;\n color: white;\n border: none;\n padding: 10px 20px;\n border-radius: 4px;\n cursor: pointer;\n margin: 10px 5px 10px 0;\n}\n\n.test-case button:disabled {\n background: #ccc;\n cursor: not-allowed;\n}\n\n.output, .error {\n margin: 10px 0;\n padding: 10px;\n border-radius: 4px;\n}\n\n.output {\n background: #d4edda;\n border: 1px solid #c3e6cb;\n color: #155724;\n}\n\n.error {\n background: #f8d7da;\n border: 1px solid #f5c6cb;\n color: #721c24;\n}\n\n.test-runner-container {\n max-width: 1400px;\n margin: 0 auto;\n background: white;\n box-shadow: 0 0 20px rgba(0, 0, 0, 0.1);\n}\n\n/* Header Styles */\n.test-runner-header {\n display: flex;\n justify-content: space-between;\n align-items: center;\n padding: 20px 24px;\n background: /*linear-gradient(135deg, #667eea 0%, #764ba2 100%);*/ white;\n color: white;\n box-shadow: 0 2px 10px rgba(0, 0, 0, 0.1);\n}\n\n.header-left, .header-right {\n display: flex;\n gap: 12px;\n align-items: center;\n}\n\n.header-center {\n flex: 1;\n display: flex;\n justify-content: center;\n align-items: center;\n}\n\n.api-status {\n display: flex;\n align-items: center;\n gap: 0.5rem;\n}\n\n.api-status-text {\n color: #28a745;\n font-weight: 500;\n font-size: 0.9rem;\n}\n\n/* Button Styles */\n.btn {\n display: inline-flex;\n align-items: center;\n gap: 8px;\n padding: 10px 16px;\n border: none;\n border-radius: 8px;\n font-size: 14px;\n font-weight: 500;\n cursor: pointer;\n transition: all 0.2s ease;\n text-decoration: none;\n position: relative;\n}\n\n.btn:disabled {\n opacity: 0.6;\n cursor: not-allowed;\n transform: none !important;\n}\n\n.btn-primary {\n color: black;\n box-shadow: 0 2px 4px rgba(59, 130, 246, 0.3);\n}\n\n.btn-primary:hover:not(:disabled) {\n transform: translateY(-1px);\n box-shadow: 0 4px 8px rgba(59, 130, 246, 0.4);\n}\n\n.btn-secondary {\n background: rgba(255, 255, 255, 0.2);\n color: blue;\n border: 1px solid rgba(255, 255, 255, 0.3);\n}\n\n.btn-secondary:hover:not(:disabled) {\n background: rgba(255, 255, 255, 0.3);\n transform: translateY(-1px);\n}\n\n.btn-outline {\n background: transparent;\n color: #6b7280;\n border: 2px solid #e5e7eb;\n}\n\n.btn-outline:hover {\n background: #f9fafb;\n border-color: #d1d5db;\n transform: translateY(-1px);\n}\n\n.btn-icon {\n padding: 8px;\n min-width: 40px;\n height: 40px;\n display: flex;\n align-items: center;\n justify-content: center;\n font-size: 16px;\n}\n\n.btn-run {\n color: white;\n}\n\n.btn-run:hover:not(:disabled) {\n transform: translateY(-1px);\n}\n\n.btn-delete {\n color: white;\n}\n\n.btn-delete:hover:not(:disabled) {\n transform: translateY(-1px);\n}\n\n.icon {\n font-size: 16px;\n}\n\n/* Main Content */\n.test-runner-content {\n padding: 0;\n}\n\n/* Column Headers */\n.column-headers {\n display: grid;\n grid-template-columns: 1fr 1.5fr 0.5fr 120px;\n gap: 1px;\n background: #e5e7eb;\n border-bottom: 2px solid #d1d5db;\n}\n\n.column-header {\n background: #f8fafc;\n padding: 16px 20px;\n font-weight: 600;\n color: #374151;\n font-size: 14px;\n text-transform: uppercase;\n letter-spacing: 0.05em;\n}\n\n/* Test Cases */\n.test-cases {\n background: white;\n}\n\n.test-case-row {\n display: grid;\n grid-template-columns: 1fr 1.5fr 0.5fr 120px;\n gap: 1px;\n border-bottom: 1px solid #e5e7eb;\n min-height: 200px;\n}\n\n.test-case-row:hover {\n background: #f9fafb;\n}\n\n/* Input Column */\n.input-column {\n padding: 20px;\n background: white;\n border-right: 1px solid #e5e7eb;\n}\n\n.input-group {\n margin-bottom: 16px;\n}\n\n.input-group label {\n display: block;\n margin-bottom: 8px;\n font-weight: 500;\n color: #374151;\n font-size: 14px;\n}\n\n.input-group textarea {\n width: 95%;\n padding: 12px;\n border: 2px solid #e5e7eb;\n border-radius: 8px;\n font-size: 14px;\n resize: vertical;\n outline: none;\n transition: border-color 0.2s ease;\n font-family: inherit;\n}\n\n.input-group textarea:focus {\n border-color: #3b82f6;\n box-shadow: 0 0 0 3px rgba(59, 130, 246, 0.1);\n}\n\n/* Keywords and Links */\n.keywords-group, .links-group {\n margin-bottom: 16px;\n}\n\n.keywords-group label, .links-group label {\n display: block;\n margin-bottom: 8px;\n font-weight: 500;\n color: #374151;\n font-size: 14px;\n}\n\n.tags-container, .links-container {\n display: flex;\n flex-wrap: wrap;\n gap: 8px;\n align-items: center;\n}\n\n.tag {\n display: inline-flex;\n align-items: center;\n gap: 6px;\n background: #dbeafe;\n color: #1e40af;\n padding: 4px 8px;\n border-radius: 16px;\n font-size: 12px;\n font-weight: 500;\n}\n\n.tag-remove {\n background: none;\n border: none;\n color: #1e40af;\n cursor: pointer;\n font-size: 14px;\n padding: 0;\n width: 16px;\n height: 16px;\n display: flex;\n align-items: center;\n justify-content: center;\n border-radius: 50%;\n}\n\n.tag-remove:hover {\n background: rgba(30, 64, 175, 0.1);\n}\n\n.link-item {\n display: flex;\n align-items: center;\n gap: 6px;\n background: #f0f9ff;\n padding: 4px 8px;\n border-radius: 6px;\n font-size: 12px;\n}\n\n.link-item a {\n color: #0369a1;\n text-decoration: none;\n max-width: 200px;\n overflow: hidden;\n text-overflow: ellipsis;\n white-space: nowrap;\n}\n\n.link-item a:hover {\n text-decoration: underline;\n}\n\n.link-remove {\n background: none;\n border: none;\n color: #0369a1;\n cursor: pointer;\n font-size: 12px;\n padding: 0;\n width: 16px;\n height: 16px;\n display: flex;\n align-items: center;\n justify-content: center;\n border-radius: 50%;\n}\n\n.link-remove:hover {\n background: rgba(3, 105, 161, 0.1);\n}\n\n.tags-container input, .links-container input {\n border: 1px solid #d1d5db;\n border-radius: 6px;\n padding: 6px 8px;\n font-size: 12px;\n outline: none;\n min-width: 120px;\n}\n\n.tags-container input:focus, .links-container input:focus {\n border-color: #3b82f6;\n box-shadow: 0 0 0 2px rgba(59, 130, 246, 0.1);\n}\n\n/* Output Column */\n.output-column {\n padding: 20px;\n background: white;\n border-right: 1px solid #e5e7eb;\n display: flex;\n flex-direction: column;\n}\n\n.output-content {\n background: #f8fafc;\n border: 1px solid #e5e7eb;\n border-radius: 8px;\n padding: 16px;\n font-size: 14px;\n line-height: 1.6;\n color: #374151;\n white-space: pre-wrap;\n word-wrap: break-word;\n flex: 1;\n overflow-y: auto;\n max-height: 250px;\n overflow-x: scroll;\n}\n\n.output-placeholder {\n display: flex;\n align-items: center;\n justify-content: center;\n color: #9ca3af;\n font-style: italic;\n flex: 1;\n background: #f9fafb;\n border: 2px dashed #d1d5db;\n border-radius: 8px;\n}\n\n/* Evaluation Column */\n.evaluation-column {\n padding: 20px;\n background: white;\n border-right: 1px solid #e5e7eb;\n display: flex;\n flex-direction: column;\n}\n\n.evaluation-content {\n display: flex;\n flex-direction: column;\n gap: 12px;\n flex: 1;\n}\n\n.score-display {\n text-align: center;\n}\n\n.score-number {\n font-size: 24px;\n font-weight: 700;\n color: #111827;\n display: block;\n margin-bottom: 8px;\n}\n\n.score-bar {\n width: 100%;\n height: 8px;\n background: #e5e7eb;\n border-radius: 4px;\n overflow: hidden;\n}\n\n.score-fill {\n height: 100%;\n background: linear-gradient(90deg, #ef4444 0%, #f59e0b 50%, #10b981 100%);\n transition: width 0.3s ease;\n}\n\n.evaluation-details {\n display: flex;\n flex-direction: column;\n gap: 8px;\n}\n\n.detail-item {\n display: flex;\n align-items: center;\n gap: 8px;\n font-size: 14px;\n}\n\n.status {\n width: 20px;\n height: 20px;\n border-radius: 50%;\n display: flex;\n align-items: center;\n justify-content: center;\n font-size: 12px;\n font-weight: bold;\n}\n\n.status.pass {\n background: #dcfce7;\n color: #166534;\n}\n\n.status.fail {\n background: #fef2f2;\n color: #dc2626;\n}\n\n.evaluation-text {\n font-size: 12px;\n color: #6b7280;\n line-height: 1.4;\n background: #f9fafb;\n padding: 8px;\n border-radius: 6px;\n border: 1px solid #e5e7eb;\n}\n\n.evaluation-placeholder {\n display: flex;\n align-items: center;\n justify-content: center;\n color: #9ca3af;\n font-style: italic;\n flex: 1;\n background: #f9fafb;\n border: 2px dashed #d1d5db;\n border-radius: 8px;\n}\n\n/* New evaluation result styles */\n.evaluation-result {\n display: flex;\n flex-direction: column;\n gap: 8px;\n}\n\n.evaluation-status {\n font-weight: 600;\n font-size: 14px;\n padding: 8px 12px;\n border-radius: 4px;\n text-align: center;\n}\n\n.evaluation-status.passed {\n background: #d4edda;\n color: #155724;\n border: 1px solid #c3e6cb;\n}\n\n.evaluation-status.failed {\n background: #f8d7da;\n color: #721c24;\n border: 1px solid #f5c6cb;\n}\n\n.evaluation-score {\n font-size: 12px;\n color: #495057;\n text-align: center;\n font-weight: 500;\n}\n\n.evaluation-feedback {\n font-size: 12px;\n color: #6c757d;\n background: #f8f9fa;\n padding: 8px;\n border-radius: 4px;\n border: 1px solid #dee2e6;\n}\n\n/* Actions Column */\n.actions-column {\n padding: 20px;\n background: white;\n display: flex;\n flex-direction: column;\n gap: 12px;\n align-items: center;\n justify-content: flex-start;\n align-self: flex-start;\n}\n\n/* Add Test Case */\n.add-test-case {\n padding: 24px;\n text-align: center;\n background: #f9fafb;\n border-top: 1px solid #e5e7eb;\n}\n\n.hidden {\n display: none;\n}\n\n.error-message {\n background: #ffeaea;\n color: #b71c1c;\n border: 1px solid #f44336;\n padding: 0.75em 2.5em 0.75em 1em;\n border-radius: 4px;\n margin: 1em 0;\n position: relative;\n font-size: 1em;\n display: flex;\n align-items: center;\n gap: 1em;\n}\n\n.error-close {\n background: none;\n border: none;\n color: #b71c1c;\n font-size: 1.25em;\n font-weight: bold;\n cursor: pointer;\n position: absolute;\n right: 0.75em;\n top: 50%;\n transform: translateY(-50%);\n line-height: 1;\n padding: 0;\n}\n\n/* Responsive Design */\n@media (max-width: 1200px) {\n .test-case-row {\n grid-template-columns: 1fr;\n gap: 0;\n }\n \n .column-headers {\n display: none;\n }\n \n .input-column, .output-column, .evaluation-column, .actions-column {\n border-right: none;\n border-bottom: 1px solid #e5e7eb;\n }\n \n .actions-column {\n flex-direction: row;\n justify-content: center;\n }\n}\n\n@media (max-width: 768px) {\n .test-runner-header {\n flex-direction: column;\n gap: 16px;\n padding: 16px;\n }\n \n .header-left, .header-right {\n flex-wrap: wrap;\n justify-content: center;\n }\n \n .btn {\n font-size: 12px;\n padding: 8px 12px;\n }\n \n .input-column, .output-column, .evaluation-column, .actions-column {\n padding: 16px;\n }\n \n .test-case-row {\n min-height: auto;\n }\n}";
|
|
125
164
|
|
|
126
165
|
const LLMTestRunner = class {
|
|
127
166
|
constructor(hostRef) {
|
|
128
167
|
registerInstance(this, hostRef);
|
|
168
|
+
this.llmRequest = createEvent(this, "llmRequest");
|
|
129
169
|
}
|
|
130
|
-
|
|
170
|
+
llmRequest;
|
|
171
|
+
delayMs = 500;
|
|
131
172
|
testCases = [
|
|
132
173
|
{
|
|
133
174
|
id: '1',
|
|
@@ -141,15 +182,10 @@ const LLMTestRunner = class {
|
|
|
141
182
|
error = '';
|
|
142
183
|
fileInput;
|
|
143
184
|
isExportingTestSuite = false;
|
|
185
|
+
isExportingTestResults = false;
|
|
144
186
|
evaluationEngine;
|
|
145
|
-
|
|
146
|
-
async componentWillLoad() {
|
|
187
|
+
componentWillLoad() {
|
|
147
188
|
this.evaluationEngine = new LLMEvaluationEngine();
|
|
148
|
-
console.log('🔍 componentWillLoad - apiKey:', this.apiKey ? 'SET' : 'NOT SET');
|
|
149
|
-
console.log('🔍 componentWillLoad - apiKey value:', this.apiKey);
|
|
150
|
-
if (!this.apiKey) {
|
|
151
|
-
throw new Error('API key is required. Please provide the apiKey prop: <llm-test-runner apiKey="your-api-key" />');
|
|
152
|
-
}
|
|
153
189
|
}
|
|
154
190
|
componentDidLoad() {
|
|
155
191
|
}
|
|
@@ -170,28 +206,39 @@ const LLMTestRunner = class {
|
|
|
170
206
|
}
|
|
171
207
|
async runSingleTest(testCase) {
|
|
172
208
|
console.log('🚀 Starting test for question:', testCase.question);
|
|
209
|
+
const startTime = Date.now();
|
|
173
210
|
this.updateTestCase(testCase.id, { isRunning: true });
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
211
|
+
return new Promise((resolve, reject) => {
|
|
212
|
+
this.llmRequest.emit({
|
|
213
|
+
prompt: testCase.question,
|
|
214
|
+
resolve: async (aiResponse) => {
|
|
215
|
+
console.log('✅ AI call completed for test case:', testCase.id);
|
|
216
|
+
const endTime = Date.now();
|
|
217
|
+
const responseTime = endTime - startTime;
|
|
218
|
+
this.updateTestCase(testCase.id, {
|
|
219
|
+
isRunning: false,
|
|
220
|
+
output: aiResponse,
|
|
221
|
+
error: null,
|
|
222
|
+
responseTime: responseTime
|
|
223
|
+
});
|
|
224
|
+
await this.evaluateResponse({
|
|
225
|
+
...testCase,
|
|
226
|
+
output: aiResponse,
|
|
227
|
+
responseTime: responseTime
|
|
228
|
+
});
|
|
229
|
+
resolve();
|
|
230
|
+
},
|
|
231
|
+
reject: (error) => {
|
|
232
|
+
console.error('❌ Error in runSingleTest:', error);
|
|
233
|
+
this.updateTestCase(testCase.id, {
|
|
234
|
+
isRunning: false,
|
|
235
|
+
output: null,
|
|
236
|
+
error: error instanceof Error ? error.message : 'Unknown error'
|
|
237
|
+
});
|
|
238
|
+
reject(error);
|
|
239
|
+
}
|
|
193
240
|
});
|
|
194
|
-
}
|
|
241
|
+
});
|
|
195
242
|
}
|
|
196
243
|
deleteTestCase(id) {
|
|
197
244
|
this.testCases = this.testCases.filter(tc => tc.id !== id);
|
|
@@ -230,38 +277,6 @@ const LLMTestRunner = class {
|
|
|
230
277
|
this.updateTestCase(testCaseId, { expectedSourceLinks: newLinks });
|
|
231
278
|
}
|
|
232
279
|
}
|
|
233
|
-
async callGeminiAPI(prompt) {
|
|
234
|
-
console.log('🔍 callGeminiAPI - apiKey:', this.apiKey ? 'SET' : 'NOT SET');
|
|
235
|
-
console.log('🔍 callGeminiAPI - apiKey value:', this.apiKey ? `${this.apiKey.substring(0, 10)}...` : 'undefined');
|
|
236
|
-
if (!this.apiKey) {
|
|
237
|
-
throw new Error('API key is required. Please provide the apiKey prop.');
|
|
238
|
-
}
|
|
239
|
-
const requestBody = {
|
|
240
|
-
contents: [{
|
|
241
|
-
parts: [{
|
|
242
|
-
text: prompt
|
|
243
|
-
}]
|
|
244
|
-
}]
|
|
245
|
-
};
|
|
246
|
-
const response = await fetch(`${this.apiUrl}?key=${this.apiKey}`, {
|
|
247
|
-
method: 'POST',
|
|
248
|
-
headers: {
|
|
249
|
-
'Content-Type': 'application/json',
|
|
250
|
-
},
|
|
251
|
-
body: JSON.stringify(requestBody)
|
|
252
|
-
});
|
|
253
|
-
if (!response.ok) {
|
|
254
|
-
const errorData = await response.json().catch(() => ({}));
|
|
255
|
-
throw new Error(errorData.error?.message || `HTTP error! status: ${response.status}`);
|
|
256
|
-
}
|
|
257
|
-
const data = await response.json();
|
|
258
|
-
if (data.candidates && data.candidates[0] && data.candidates[0].content) {
|
|
259
|
-
return data.candidates[0].content.parts[0].text;
|
|
260
|
-
}
|
|
261
|
-
else {
|
|
262
|
-
throw new Error('Unexpected response format from Gemini API');
|
|
263
|
-
}
|
|
264
|
-
}
|
|
265
280
|
async evaluateResponse(testCase) {
|
|
266
281
|
if (!testCase.output) {
|
|
267
282
|
console.warn('⚠️ No output to evaluate for test case:', testCase.id);
|
|
@@ -283,13 +298,21 @@ const LLMTestRunner = class {
|
|
|
283
298
|
}
|
|
284
299
|
async runAllTests() {
|
|
285
300
|
this.isRunningAll = true;
|
|
301
|
+
const tasks = [];
|
|
286
302
|
for (const testCase of this.testCases) {
|
|
287
303
|
if (!testCase.isRunning && testCase.question.trim()) {
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
304
|
+
tasks.push(() => this.runSingleTest(testCase).catch(err => {
|
|
305
|
+
console.error(`⚠️ Test case ${testCase.id} failed`, err);
|
|
306
|
+
}));
|
|
291
307
|
}
|
|
292
308
|
}
|
|
309
|
+
try {
|
|
310
|
+
const fetcher = new RateLimitedFetcher(this.delayMs);
|
|
311
|
+
await fetcher.runAll(tasks);
|
|
312
|
+
}
|
|
313
|
+
catch (err) {
|
|
314
|
+
console.error('⚠️ Error running all tests:', err);
|
|
315
|
+
}
|
|
293
316
|
this.isRunningAll = false;
|
|
294
317
|
}
|
|
295
318
|
generateId() {
|
|
@@ -372,8 +395,78 @@ const LLMTestRunner = class {
|
|
|
372
395
|
this.isExportingTestSuite = false;
|
|
373
396
|
}
|
|
374
397
|
}
|
|
398
|
+
async handleExportTestResults() {
|
|
399
|
+
this.isExportingTestResults = true;
|
|
400
|
+
try {
|
|
401
|
+
// Create CSV content with the required fields
|
|
402
|
+
const csvRows = [];
|
|
403
|
+
// Add header row
|
|
404
|
+
const headers = [
|
|
405
|
+
'Question',
|
|
406
|
+
'Expected Keywords',
|
|
407
|
+
'Generated Keywords',
|
|
408
|
+
'Keywords Match',
|
|
409
|
+
'Expected Source Links',
|
|
410
|
+
'Generated Source Links',
|
|
411
|
+
'Source Links Match',
|
|
412
|
+
'Response Time (s)'
|
|
413
|
+
];
|
|
414
|
+
csvRows.push(headers.join(','));
|
|
415
|
+
// Add data rows
|
|
416
|
+
this.testCases.forEach(testCase => {
|
|
417
|
+
const expectedKeywords = testCase.expectedKeywords.join('; ');
|
|
418
|
+
const expectedSourceLinks = testCase.expectedSourceLinks.join('; ');
|
|
419
|
+
let generatedKeywords = '';
|
|
420
|
+
let generatedSourceLinks = '';
|
|
421
|
+
let keywordsMatch = '';
|
|
422
|
+
let sourceLinksMatch = '';
|
|
423
|
+
if (testCase.evaluationResult) {
|
|
424
|
+
const foundKeywords = testCase.evaluationResult.keywordMatches
|
|
425
|
+
.filter(match => match.found)
|
|
426
|
+
.map(match => match.keyword);
|
|
427
|
+
const foundSourceLinks = testCase.evaluationResult.sourceLinkMatches
|
|
428
|
+
.filter(match => match.found)
|
|
429
|
+
.map(match => match.link);
|
|
430
|
+
generatedKeywords = foundKeywords.join('; ');
|
|
431
|
+
generatedSourceLinks = foundSourceLinks.join('; ');
|
|
432
|
+
// Calculate match percentages
|
|
433
|
+
const keywordMatchCount = testCase.evaluationResult.keywordMatches.filter(m => m.found).length;
|
|
434
|
+
const totalKeywords = testCase.evaluationResult.keywordMatches.length;
|
|
435
|
+
const sourceLinkMatchCount = testCase.evaluationResult.sourceLinkMatches.filter(m => m.found).length;
|
|
436
|
+
const totalSourceLinks = testCase.evaluationResult.sourceLinkMatches.length;
|
|
437
|
+
keywordsMatch = totalKeywords > 0 ? `${keywordMatchCount}/${totalKeywords}` : 'N/A';
|
|
438
|
+
sourceLinksMatch = totalSourceLinks > 0 ? `${sourceLinkMatchCount}/${totalSourceLinks}` : 'N/A';
|
|
439
|
+
}
|
|
440
|
+
const responseTime = testCase.responseTime ? (testCase.responseTime / 1000).toFixed(3) : 'N/A';
|
|
441
|
+
const row = [
|
|
442
|
+
this.escapeCsvField(testCase.question),
|
|
443
|
+
this.escapeCsvField(expectedKeywords),
|
|
444
|
+
this.escapeCsvField(generatedKeywords),
|
|
445
|
+
keywordsMatch,
|
|
446
|
+
this.escapeCsvField(expectedSourceLinks),
|
|
447
|
+
this.escapeCsvField(generatedSourceLinks),
|
|
448
|
+
sourceLinksMatch,
|
|
449
|
+
responseTime
|
|
450
|
+
];
|
|
451
|
+
csvRows.push(row.join(','));
|
|
452
|
+
});
|
|
453
|
+
const csvContent = csvRows.join('\n');
|
|
454
|
+
// Added a small delay to show the loading state
|
|
455
|
+
await new Promise(resolve => setTimeout(resolve, 500));
|
|
456
|
+
this.downloadFile(csvContent, 'test-results.csv', 'text/csv');
|
|
457
|
+
}
|
|
458
|
+
finally {
|
|
459
|
+
this.isExportingTestResults = false;
|
|
460
|
+
}
|
|
461
|
+
}
|
|
462
|
+
escapeCsvField(field) {
|
|
463
|
+
if (field.includes(',') || field.includes('"') || field.includes('\n')) {
|
|
464
|
+
return `"${field.replace(/"/g, '""')}"`;
|
|
465
|
+
}
|
|
466
|
+
return field;
|
|
467
|
+
}
|
|
375
468
|
render() {
|
|
376
|
-
return (h("div", { key: '
|
|
469
|
+
return (h("div", { key: 'beb2ab78108fede00c5d759d1ac5c98ae7f037d0', class: "test-runner-container" }, h("header", { key: 'd09b424069227500ffffcab8724b48d91705baab', class: "test-runner-header" }, h("div", { key: '368ef117a1c2cc0be0446e2d373e38e0af1c2040', class: "header-left" }, h("input", { key: '74346a634b36e9d04f75817495834983d970306c', class: "hidden", type: "file", ref: (el) => (this.fileInput = el), onChange: (e) => this.handleFileChange(e), accept: ".json,application/json" }), h("button", { key: 'e095ff9de105cb281e60f70de02fb0de8a0f6478', class: "btn btn-secondary", onClick: () => this.handleFileSelect() }, h("span", { key: '6f828f297867ae54baf1f89c9dbce611eba2056a', class: "icon" }, "\u2191"), "Import Test Suite"), h("button", { key: '5682dc8f70edac9fdf33b394677a6859555c039b', class: "btn btn-secondary", onClick: () => this.handleExportTestSuite(), disabled: this.isExportingTestSuite }, h("span", { key: 'a1e8d3170ce39e499160e578e3e02d39cd0da9d7', class: "icon" }, this.isExportingTestSuite ? '⏳' : '↓'), this.isExportingTestSuite ? 'Exporting...' : 'Export Test Suite')), h("div", { key: 'af96ea230b04f4f6ce8e3488a1f8f7f2a01e74c0', class: "header-right" }, h("button", { key: 'a90bfb3421f5e2e5112b3642ddf81cf19372e0ad', class: "btn btn-secondary" }, h("span", { key: '79611828e5dc1ad53998a74fca10a1a05c5d2a4e', class: "icon" }, "\u2699\uFE0F"), "Prompt Editor"), h("button", { key: '451367ce26183a58add0902e1de2d29b4933add4', class: "btn btn-secondary", onClick: () => this.handleExportTestResults(), disabled: this.isExportingTestResults }, h("span", { key: '11f791f2071b58e0800d14ac7e6bebba67421449', class: "icon" }, this.isExportingTestResults ? '⏳' : '↓'), this.isExportingTestResults ? 'Exporting...' : 'Export Test Results'), h("button", { key: '3cbf7626ccc5f171cdc6fd7ce3750ea3786218aa', class: "btn btn-primary", onClick: () => this.runAllTests(), disabled: this.isRunningAll }, this.isRunningAll ? 'Running...' : 'Run All'))), h(ErrorMessage, { key: 'ffdd72a1db1f45c5db158d2df0be64dc978fa930', message: this.error, onClear: () => (this.error = '') }), h("div", { key: '64edd5c84c4b566f90d1860f6d6bf5aeafb7ae0f', class: "test-runner-content" }, h("div", { key: '4be27d07a13d88611b5fc54b05e826c790fd2145', class: "column-headers" }, h("div", { key: 'eeb9ace5e63b80359d2decb1b42461f77cfe98cf', class: "column-header" }, "Input"), h("div", { key: 'c70cc5af0aaca2636eee298dfaa4da81799df6cf', class: "column-header" }, "Output"), h("div", { key: '9ca69fddafe7267bf5b8b12601e2cf64d65a58e1', class: "column-header" }, "Evaluation"), h("div", { key: 'b800841828c22a6aad0345e58b47f71469594792', class: "column-header" }, "Actions")), h("div", { key: 'da186e25c2111474f65b89a017158bffdbc83e03', class: "test-cases" }, this.testCases.map((testCase) => (h("div", { class: "test-case-row", key: testCase.id }, h("div", { class: "input-column" }, h("div", { class: "input-group" }, h("label", null, "Question"), h("textarea", { value: testCase.question, onInput: (e) => this.updateTestCase(testCase.id, {
|
|
377
470
|
question: e.target.value
|
|
378
471
|
}), placeholder: "Enter your question here...", rows: 3 })), h("div", { class: "keywords-group" }, h("label", null, "Expected keywords"), h("div", { class: "tags-container" }, testCase.expectedKeywords.map((keyword, index) => (h("span", { class: "tag", key: index }, keyword, h("button", { class: "tag-remove", onClick: () => this.removeKeyword(testCase.id, index) }, "\u00D7")))), h("input", { type: "text", placeholder: "New item...", onKeyDown: (e) => {
|
|
379
472
|
if (e.key === 'Enter') {
|
|
@@ -385,7 +478,7 @@ const LLMTestRunner = class {
|
|
|
385
478
|
this.addSourceLink(testCase.id, e.target.value);
|
|
386
479
|
e.target.value = '';
|
|
387
480
|
}
|
|
388
|
-
} })))), h("div", { class: "output-column" }, testCase.output ? (h("div", { class: "output-content" }, testCase.output)) : (h("div", { class: "output-placeholder" }, testCase.isRunning ? 'Running...' : ''))), h("div", { class: "evaluation-column" }, testCase.evaluationResult ? (h("div", { class: "evaluation-result" }, h("div", { class: `evaluation-status ${testCase.evaluationResult.passed ? 'passed' : 'failed'}` }, testCase.evaluationResult.passed ? '✅ PASSED' : '❌ FAILED'), h("div", { class: "evaluation-details" }, "Keywords: ", testCase.evaluationResult.keywordMatches.filter(m => m.found).length, "/", testCase.evaluationResult.keywordMatches.length, " found"))) : (h("div", { class: "evaluation-placeholder" }, testCase.isRunning ? 'Evaluating...' : ''))), h("div", { class: "actions-column" }, h("button", { class: "btn btn-icon btn-run", onClick: () => this.runSingleTest(testCase), disabled: testCase.isRunning || !testCase.question.trim(), title: !testCase.question.trim() ? "Enter a question first" : "Run this test" }, testCase.isRunning ? '⏳' : '▶️'), h("button", { class: "btn btn-icon btn-delete", onClick: () => this.deleteTestCase(testCase.id), title: "Delete this test" }, "\uD83D\uDDD1\uFE0F")))))), h("div", { key: '
|
|
481
|
+
} })))), h("div", { class: "output-column" }, testCase.output ? (h("div", { class: "output-content" }, testCase.output)) : (h("div", { class: "output-placeholder" }, testCase.isRunning ? 'Running...' : ''))), h("div", { class: "evaluation-column" }, testCase.evaluationResult ? (h("div", { class: "evaluation-result" }, h("div", { class: `evaluation-status ${testCase.evaluationResult.passed ? 'passed' : 'failed'}` }, testCase.evaluationResult.passed ? '✅ PASSED' : '❌ FAILED'), h("div", { class: "evaluation-details" }, "Keywords: ", testCase.evaluationResult.keywordMatches.filter(m => m.found).length, "/", testCase.evaluationResult.keywordMatches.length, " found"))) : (h("div", { class: "evaluation-placeholder" }, testCase.isRunning ? 'Evaluating...' : ''))), h("div", { class: "actions-column" }, h("button", { class: "btn btn-icon btn-run", onClick: () => this.runSingleTest(testCase), disabled: testCase.isRunning || !testCase.question.trim(), title: !testCase.question.trim() ? "Enter a question first" : "Run this test" }, testCase.isRunning ? '⏳' : '▶️'), h("button", { class: "btn btn-icon btn-delete", onClick: () => this.deleteTestCase(testCase.id), title: "Delete this test" }, "\uD83D\uDDD1\uFE0F")))))), h("div", { key: '2d4446b15a31adfbaf88b3d46fcbb3cf314f8861', class: "add-test-case" }, h("button", { key: '737e9d856db0f9090923bef718bb9bfde974b950', class: "btn btn-outline", onClick: () => this.addNewTestCase() }, "+ Add Question")))));
|
|
389
482
|
}
|
|
390
483
|
};
|
|
391
484
|
LLMTestRunner.style = llmTestRunnerCss;
|