llm-testrunner-components 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (71) hide show
  1. package/LICENSE +1 -1
  2. package/README.md +165 -242
  3. package/dist/cjs/index.cjs.js +298 -232
  4. package/dist/cjs/index.cjs.js.map +1 -1
  5. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js +25 -54
  6. package/dist/collection/components/llm-test-runner/llm-test-runner.import-export.test.js.map +1 -1
  7. package/dist/collection/components/llm-test-runner/llm-test-runner.js +6 -49
  8. package/dist/collection/components/llm-test-runner/llm-test-runner.js.map +1 -1
  9. package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.css +60 -21
  10. package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.js +3 -1
  11. package/dist/collection/components/llm-test-runner/test-cases/evaluation/evaluation-summary.js.map +1 -1
  12. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js +31 -11
  13. package/dist/collection/components/llm-test-runner/test-cases/expected-outcome-renderer.js.map +1 -1
  14. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.css +17 -0
  15. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js +2 -12
  16. package/dist/collection/components/llm-test-runner/test-cases/llm-test-case-row.js.map +1 -1
  17. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js +2 -2
  18. package/dist/collection/components/llm-test-runner/test-cases/llm-test-cases.js.map +1 -1
  19. package/dist/collection/lib/evaluation/evaluation-engine.js +63 -42
  20. package/dist/collection/lib/evaluation/evaluation-engine.js.map +1 -1
  21. package/dist/collection/lib/evaluation/evaluation-service.js +15 -3
  22. package/dist/collection/lib/evaluation/evaluation-service.js.map +1 -1
  23. package/dist/collection/lib/evaluation/{rouge1-evaluator.test.js → evaluators/rouge1-evaluator.test.js} +2 -2
  24. package/dist/collection/lib/evaluation/evaluators/rouge1-evaluator.test.js.map +1 -0
  25. package/dist/collection/lib/evaluation/field-evaluation-approach.js +24 -0
  26. package/dist/collection/lib/evaluation/field-evaluation-approach.js.map +1 -0
  27. package/dist/collection/lib/evaluation/index.js +0 -4
  28. package/dist/collection/lib/evaluation/index.js.map +1 -1
  29. package/dist/collection/lib/evaluation/types.js.map +1 -1
  30. package/dist/collection/lib/import-export/test-results-csv.js +47 -33
  31. package/dist/collection/lib/import-export/test-results-csv.js.map +1 -1
  32. package/dist/collection/lib/import-export/test-suite-exporter.js +0 -1
  33. package/dist/collection/lib/import-export/test-suite-exporter.js.map +1 -1
  34. package/dist/collection/lib/test-cases/test-case-factory.js +17 -27
  35. package/dist/collection/lib/test-cases/test-case-factory.js.map +1 -1
  36. package/dist/collection/lib/test-cases/test-case-mutations.js +60 -9
  37. package/dist/collection/lib/test-cases/test-case-mutations.js.map +1 -1
  38. package/dist/collection/schemas/expected-outcome.js +20 -2
  39. package/dist/collection/schemas/expected-outcome.js.map +1 -1
  40. package/dist/collection/schemas/test-case.js +2 -20
  41. package/dist/collection/schemas/test-case.js.map +1 -1
  42. package/dist/collection/types/llm-test-runner.js.map +1 -1
  43. package/dist/collection/types/test-case.js.map +1 -1
  44. package/dist/components/index.js +1 -1
  45. package/dist/components/llm-test-runner.js +1 -1
  46. package/dist/components/p-Bb89MYYu.js +7 -0
  47. package/dist/components/p-Bb89MYYu.js.map +1 -0
  48. package/dist/esm/index.js +298 -232
  49. package/dist/esm/index.js.map +1 -1
  50. package/dist/llm-testrunner/index.esm.js +2 -2
  51. package/dist/llm-testrunner/index.esm.js.map +1 -1
  52. package/dist/types/components/llm-test-runner/llm-test-runner.d.ts +0 -1
  53. package/dist/types/components/llm-test-runner/test-cases/expected-outcome-renderer.d.ts +3 -6
  54. package/dist/types/components/llm-test-runner/test-cases/llm-test-case-row.d.ts +0 -2
  55. package/dist/types/components/llm-test-runner/test-cases/llm-test-cases.d.ts +0 -2
  56. package/dist/types/lib/evaluation/evaluation-engine.d.ts +4 -2
  57. package/dist/types/lib/evaluation/field-evaluation-approach.d.ts +6 -0
  58. package/dist/types/lib/evaluation/index.d.ts +0 -1
  59. package/dist/types/lib/evaluation/types.d.ts +26 -0
  60. package/dist/types/lib/import-export/test-suite-exporter.d.ts +0 -4
  61. package/dist/types/lib/test-cases/test-case-factory.d.ts +2 -3
  62. package/dist/types/lib/test-cases/test-case-mutations.d.ts +21 -5
  63. package/dist/types/schemas/expected-outcome.d.ts +65 -17
  64. package/dist/types/schemas/test-case.d.ts +51 -95
  65. package/dist/types/types/llm-test-runner.d.ts +1 -1
  66. package/dist/types/types/test-case.d.ts +1 -1
  67. package/package.json +9 -2
  68. package/dist/collection/lib/evaluation/rouge1-evaluator.test.js.map +0 -1
  69. package/dist/components/p-BF90yb1z.js +0 -7
  70. package/dist/components/p-BF90yb1z.js.map +0 -1
  71. /package/dist/types/lib/evaluation/{rouge1-evaluator.test.d.ts → evaluators/rouge1-evaluator.test.d.ts} +0 -0
package/LICENSE CHANGED
@@ -1,6 +1,6 @@
1
1
  MIT License
2
2
 
3
- Copyright (c) 2024
3
+ Copyright (c) 2024 Fluxon Apps LLC
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
package/README.md CHANGED
@@ -1,298 +1,221 @@
1
- # LLM TestRunner Web Components
1
+ # LLM TestRunner Components
2
2
 
3
- A Stencil web component library that provides a comprehensive LLM testing solution with automated evaluation capabilities.
3
+ **A ready-made UI for testing your LLM.** Add questions and expected outcomes, run tests one-by-one or in batch, and get pass/fail results using five evaluation strategies—while you keep full control over which LLM you call (OpenAI, Gemini, Claude, or your own).
4
4
 
5
- ## Overview
5
+ [![npm](https://img.shields.io/npm/v/llm-testrunner-components.svg)](https://www.npmjs.com/package/llm-testrunner-components) [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
6
6
 
7
- The LLM TestRunner is a tool for testing Large Language Model (LLM) responses against expected criteria. It provides a complete interface for:
7
+ ---
8
8
 
9
- - **Question Management**: Add, edit, and organize test questions
10
- - **AI Integration**: Can be integrated with any LLM provider
11
- - **Automated Evaluation**: Built-in evaluation engine that checks responses against expected keywords and source links
12
- - **Batch Testing**: Run multiple tests sequentially
13
- - **Real-time Results**: Live evaluation results with pass/fail indicators, including details such as:
14
- - Number of keywords matched.
15
- - Presence of source links in the response.
9
+ ## Why use this
16
10
 
17
- > **Note:** Source-link checking uses _overlap/partial match_.
18
- > A full URL match is **not required** any overlapping portion of the expected link (for example, matching the domain or path segment) in the response counts as present.
11
+ - **Test faster** — You get a complete test-runner UI (questions, expected outcomes, run one / run all, pass/fail, response times). No need to build tables, evaluation logic, or import/export from scratch.
12
+ - **Stay in control** The library never calls an LLM. You handle one event: we send you the prompt, you call your API and pass back the response (or an error). Works with any provider or local model.
13
+ - **Match how you think** — Each expected-outcome field can use a different evaluation: exact keywords, semantic similarity (meaning), ROUGE (word overlap / sequence), or BLEU (n-gram precision). Choose per field.
14
+ - **Fit your stack** — Load test cases from your backend or a JSON file. Optionally persist runs with a Save button that emits the current state so you can store it in Firebase, your API, or anywhere else.
19
15
 
20
- ## Components
16
+ ---
21
17
 
22
- ### `<llm-test-runner>`
18
+ ## What you get
23
19
 
24
- The main component that provides a complete LLM testing interface.
20
+ - **Test case table** Add, edit, delete test cases. Each test case has a question, configurable expected-outcome fields (single line, paragraph, keyword chips, dropdown), and a per-field evaluation approach (exact, semantic, ROUGE-1, ROUGE-L, BLEU).
21
+ - **Run one or run all** — Run a single test or batch with a configurable delay between API calls (rate limiting).
22
+ - **Live results** — Pass/fail, keyword match count (e.g. X/Y found), and response time per test.
23
+ - **Import / export** — Import a test suite from JSON. Export the current suite as JSON or export run results as CSV.
24
+ - **Optional save** — When enabled, a Save button emits the current test cases so your app can persist them (e.g. to your backend).
25
25
 
26
- **Features:**
26
+ ---
27
27
 
28
- - Question input with expected keywords and source links
29
- - Real-time AI response generation any LLM provider
30
- - Test case management (add, delete, run individual or all tests)
31
- - Built-in evaluation engine with keyword and source link matching
32
- - Error handling and loading states
33
- - Rate limiting for batch operations
28
+ ## Installation
34
29
 
35
- **Usage:**
30
+ ```bash
31
+ npm install llm-testrunner-components
32
+ ```
36
33
 
37
- ```html
38
- <llm-test-runner delay-ms="1000"></llm-test-runner>
34
+ ---
35
+
36
+ ## Get started (React)
37
+
38
+ **Step 1 — Register the custom elements once** (e.g. in your app entry):
39
+
40
+ ```tsx
41
+ // e.g. in main.tsx or App.tsx
42
+ import { defineCustomElements } from "llm-testrunner-components/loader";
43
+
44
+ defineCustomElements();
39
45
  ```
40
46
 
41
- ## 🎯 Usage Modes
47
+ **Step 2 Use the component and connect your LLM.** The runner fires an `llmRequest` event whenever it needs a response. You call your API, then either `resolve(responseText)` or `reject(error)`.
42
48
 
43
- ### 1. Direct HTML Usage
49
+ ```tsx
50
+ import { useRef } from "react";
51
+ import { LlmTestRunner } from "llm-testrunner-components/react";
44
52
 
45
- Simply include the component in your HTML:
53
+ function App() {
54
+ const runnerRef = useRef<any>(null);
46
55
 
47
- ```html
48
- <!DOCTYPE html>
49
- <html>
50
- <head>
51
- <script type="module" src="/build/llm-testrunner.esm.js"></script>
52
- <script nomodule src="/build/llm-testrunner.js"></script>
53
- </head>
54
- <body>
55
- <llm-test-runner id="llm-test-runner" delay-ms="1000"></llm-test-runner>
56
- </body>
57
- <script>
58
- const llmTestRunner = document.getElementById('llm-test-runner');
59
- // Gemini API
60
- async function handlellmRequest(event) {
61
- try {
62
- const requestBody = {
63
- contents: [
64
- {
65
- parts: [
66
- {
67
- text: event.detail.prompt,
68
- },
69
- ],
70
- },
71
- ],
72
- };
73
-
74
- const response = await fetch(
75
- `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=your-gemini-api-key-here`,
76
- {
77
- method: 'POST',
78
- headers: {
79
- 'Content-Type': 'application/json',
80
- },
81
- body: JSON.stringify(requestBody),
82
- },
83
- );
84
-
85
- if (!response.ok) {
86
- const errorData = await response.json().catch(() => ({}));
87
- throw new Error(
88
- errorData.error?.message ||
89
- `HTTP error! status: ${response.status}`,
90
- );
91
- }
92
-
93
- const data = await response.json();
94
-
95
- if (
96
- data.candidates &&
97
- data.candidates[0] &&
98
- data.candidates[0].content
99
- ) {
100
- event.detail.resolve(data.candidates[0].content.parts[0].text);
101
- } else {
102
- throw new Error('Unexpected response format from Gemini API');
103
- }
104
- } catch (err) {
105
- event.detail.reject(
106
- err instanceof Error ? err : new Error(String(err)),
107
- );
108
- }
56
+ const handleLlmRequest = async (e) => {
57
+ try {
58
+ const response = await yourLLMApi(e.detail.prompt);
59
+ e.detail.resolve(response);
60
+ } catch (err) {
61
+ e.detail.reject(err);
109
62
  }
110
- llmTestRunner.addEventListener('llmRequest', handlellmRequest);
111
- </script>
112
- </html>
63
+ };
64
+
65
+ const handleSave = async (e) => {
66
+ await yourSaveApi(e.detail);
67
+ await runnerRef.current?.resetSavingState();
68
+ };
69
+
70
+ return (
71
+ <LlmTestRunner
72
+ ref={runnerRef}
73
+ onLlmRequest={handleLlmRequest}
74
+ onSave={handleSave}
75
+ delayMs={500}
76
+ useSave={true}
77
+ />
78
+ );
79
+ }
113
80
  ```
114
81
 
115
- ### 2. Library Integration
82
+ That’s enough for a working runner. Replace `yourLLMApi` and `yourSaveApi` with your real calls. If you don’t need persistence, omit `useSave`, `onSave`, and `ref` / `resetSavingState`.
83
+
84
+ ---
116
85
 
117
- Import as a module in your application:
86
+ ## Get started (vanilla HTML)
118
87
 
119
- ```javascript
120
- import { LLMTestRunner } from 'llm-testrunner-components';
88
+ Load the loader and define the custom elements, then listen for `llmRequest` and call `resolve` or `reject`.
121
89
 
122
- // The component is automatically registered and ready to use
90
+ ```html
91
+ <llm-test-runner id="runner" delay-ms="500"></llm-test-runner>
92
+
93
+ <script type="module">
94
+ import { defineCustomElements } from "https://unpkg.com/llm-testrunner-components@1/loader/index.js";
95
+ defineCustomElements();
96
+
97
+ const runner = document.getElementById("runner");
98
+ runner.addEventListener("llmRequest", async (e) => {
99
+ try {
100
+ const response = await yourLLMFetch(e.detail.prompt);
101
+ e.detail.resolve(response);
102
+ } catch (err) {
103
+ e.detail.reject(err);
104
+ }
105
+ });
106
+ </script>
123
107
  ```
124
108
 
125
- ## Configuration
109
+ ---
126
110
 
127
- ### 🧠 delayMs Prop — Controlling API Rate Limiting
111
+ ## Connect your LLM
128
112
 
129
- The `delayMs` prop allows you to control **how frequently API calls are made** when triggering multiple requests.
130
- This helps prevent exceeding **API rate limits** by spacing out requests automatically.
113
+ The library **never** sends requests to an LLM. You do. When a test runs, the component emits an `llmRequest` event with:
131
114
 
132
- ### ⚙️ Description
115
+ - `prompt` — the question text for this test case
116
+ - `resolve(responseText)` — call this with the model’s reply (string)
117
+ - `reject(error)` — call this if the request fails
133
118
 
134
- | Prop Name | Type | Default | Description |
135
- | --------- | -------- | ----------- | -------------------------------------------------------------------------------------------------------------------- |
136
- | `delayMs` | `number` | `undefined` | Optional delay (in milliseconds) between consecutive API calls. If not provided, all API calls are made in parallel. |
119
+ How you get the response is up to you: REST, SDK, or local inference. Same pattern for OpenAI, Gemini, Claude, or any other provider.
137
120
 
138
- ```html
139
- <llm-test-runner delay-ms="2000"></llm-test-runner>
140
- ```
121
+ ---
141
122
 
142
- ### React/JSX Usage
123
+ ## Loading and saving test cases
143
124
 
144
- ```jsx
145
- function App() {
146
- return (
147
- <div>
148
- <llm-test-runner delayMs="1000" />
149
- </div>
150
- );
151
- }
152
- ```
125
+ **Loading** — Pass `initialTestCases` with an array of test cases (e.g. from your backend or a file). You can use the full `TestCase` shape or a minimal one: `question` and `expectedOutcome`. The runner will fill in `id` and run state.
153
126
 
154
- ## Evaluation Engine
127
+ **Saving** Set `useSave={true}` to show the Save button. When the user clicks it, the component emits a `save` event with `{ timestamp, testCases }`. Persist that in your backend (e.g. Firebase or your API). After the save completes, call `runnerRef.current.resetSavingState()` so the button leaves the loading state. If you don’t call it, a failsafe resets it after 10 seconds.
155
128
 
156
- The built-in evaluation engine provides:
129
+ ---
157
130
 
158
- - **Keyword Matching**: Case-insensitive matching of expected keywords in AI responses
159
- - **Source Link Validation**: Checks for presence of expected URLs in responses
160
- - **Pass/Fail Logic**: Tests pass only when ALL expected items are found
161
- - **Detailed Results**: Shows which keywords and links were found/missing
131
+ ## Evaluation: pick the right approach
162
132
 
163
- ### Evaluation Criteria
133
+ Each expected-outcome field can use a different evaluation method. All of them compare the **expected** text for that field to the **actual** LLM response. A test **passes only if every field** passes with its selected method.
164
134
 
165
- - **Keywords**: Must be present in the AI response (case-insensitive)
166
- - **Source Links**: Must be present as exact URL matches
167
- - **Pass Condition**: ALL expected keywords AND source links must be found
135
+ | Approach | What it measures | Good for | Paraphrasing / synonyms | Speed |
136
+ | --------- | ----------------------------- | --------------------------------------------- | ------------------------ | ------------ |
137
+ | **Exact** | Literal keyword in response | Strict wording, facts, templates | No | Fast |
138
+ | **ROUGE-1** | Word overlap (unigram) | Slight paraphrasing, same key words | Moderate | Fast |
139
+ | **ROUGE-L** | Longest common subsequence | Phrasing and word order matter | Moderate–high | Slightly slower |
140
+ | **Semantic** | Meaning (embeddings + cosine) | Different words, same meaning | Yes | First run loads model |
141
+ | **BLEU** | N-gram precision (1–4) | Translation-like or n-gram overlap | Moderate | Fast |
168
142
 
169
- ## Using in React Applications
143
+ - Set **per expected-outcome field** via the dropdown in the UI, or via each field’s `evaluationParameters.approach` when you pass `initialTestCases`.
144
+ - **ROUGE, BLEU, and Semantic** use a fixed threshold (0.7).
145
+ - **Semantic** uses in-browser embeddings ([Xenova/all-MiniLM-L6-v2](https://huggingface.co/Xenova/all-MiniLM-L6-v2)). The first time you use it, the model is downloaded; later runs are faster.
170
146
 
171
- ### Installation
147
+ ---
172
148
 
173
- ```bash
174
- npm install llm-testrunner-components
175
- ```
149
+ ## Expected outcome fields
176
150
 
177
- ### Integration
151
+ Expected outcomes can be more than a single text block. You can define:
178
152
 
179
- ```tsx
180
- import React, { useEffect } from 'react';
181
- import { defineCustomElements } from 'llm-testrunner-components/loader';
153
+ - **text** — Single line
154
+ - **textarea** Multi-line
155
+ - **chips-input** List of keywords (each compared in evaluation)
156
+ - **select** — Dropdown (value must be one of the options)
182
157
 
183
- function App() {
184
- useEffect(() => {
185
- defineCustomElements();
186
- }, []);
158
+ When you pass `initialTestCases`, use an array of objects with `type`, `label`, and `value` (and for `select`, `options`). For **new** test cases, the runner uses `defaultExpectedOutcomeSchema` if you pass it; otherwise it uses a default single textarea.
187
159
 
188
- const handlellmRequest = (event: CustomEvent<LLMRequestPayload>) => {
189
- try {
190
- console.log('🚀 callGeminiAPI called with prompt:', event.detail.prompt);
191
- const requestBody = {
192
- contents: [
193
- {
194
- parts: [
195
- {
196
- text: event.detail.prompt,
197
- },
198
- ],
199
- },
200
- ],
201
- };
202
-
203
- const response = await fetch(
204
- `https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent?key=your-gemini-api-key-here`,
205
- {
206
- method: 'POST',
207
- headers: {
208
- 'Content-Type': 'application/json',
209
- },
210
- body: JSON.stringify(requestBody),
211
- },
212
- );
213
-
214
- if (!response.ok) {
215
- const errorData = await response.json().catch(() => ({}));
216
- throw new Error(
217
- errorData.error?.message || `HTTP error! status: ${response.status}`,
218
- );
219
- }
220
-
221
- const data = await response.json();
222
-
223
- if (data.candidates && data.candidates[0] && data.candidates[0].content) {
224
- event.detail.resolve(data.candidates[0].content.parts[0].text);
225
- } else {
226
- throw new Error('Unexpected response format from Gemini API');
227
- }
228
- } catch (err) {
229
- event.detail.reject(err instanceof Error ? err : new Error(String(err)));
230
- }
231
- };
160
+ ---
232
161
 
233
- return (
234
- <div>
235
- <h1>LLM Test Runner</h1>
236
- <llm-test-runner llmRequest={handlellmRequest}></llm-test-runner>
237
- </div>
238
- );
239
- }
240
- ```
162
+ ## API reference
241
163
 
242
- ### TypeScript Support
164
+ ### Props
243
165
 
244
- ```tsx
245
- declare global {
246
- namespace JSX {
247
- interface IntrinsicElements {
248
- 'llm-test-runner': any;
249
- }
250
- }
251
- }
252
- ```
166
+ | Prop | Attribute | Type | Default | Description |
167
+ |------|-----------|------|---------|-------------|
168
+ | `delayMs` | `delay-ms` | `number` | `500` | Delay (ms) between API calls when running all tests (rate limiting). |
169
+ | `useSave` | `use-save` | `boolean` | `false` | Show Save button and emit `save` events. |
170
+ | `initialTestCases` | — | `TestCase[]` | `undefined` | Preload test cases. See [types](#types) below. |
171
+ | `defaultExpectedOutcomeSchema` | — | `ExpectedOutcomeSchema` | built-in | Schema for new test cases (field types and labels). |
253
172
 
254
- ## API Reference
173
+ ### Events
255
174
 
256
- ### Component Props
175
+ | Event | Payload | Description |
176
+ |-------|---------|-------------|
177
+ | `llmRequest` | `{ prompt, resolve, reject }` | Runner needs an LLM response. Call `resolve(responseText)` or `reject(error)`. |
178
+ | `save` | `{ timestamp, testCases }` | User clicked Save (only when `useSave` is true). Persist then call `resetSavingState()`. |
257
179
 
258
- ```typescript
259
- interface LLMTestRunnerProps {
260
- apiKey: string; // Required: Your Gemini API key
261
- }
262
- ```
180
+ ### Methods
263
181
 
264
- ### TestCase Interface
265
-
266
- ```typescript
267
- interface TestCase {
268
- id: string;
269
- question: string;
270
- expectedOutcome: string;
271
- output?: string;
272
- isRunning?: boolean;
273
- error?: string;
274
- evaluationResult?: EvaluationResult;
275
- }
276
- ```
182
+ | Method | Description |
183
+ |--------|-------------|
184
+ | `resetSavingState()` | Call after you finish persisting a save so the Save button leaves loading state. Use a ref in React. |
277
185
 
278
- ### EvaluationResult Interface
186
+ ### Types
279
187
 
280
- ```typescript
281
- interface EvaluationResult {
282
- testCaseId: string;
283
- passed: boolean;
284
- keywordMatches: KeywordMatch[];
285
- sourceLinkMatches: SourceLinkMatch[];
286
- timestamp?: string;
287
- }
188
+ Import from `llm-testrunner-components/react/types`:
189
+
190
+ ```ts
191
+ import type {
192
+ TestCase,
193
+ LLMRequestPayload,
194
+ SavePayload,
195
+ ExpectedOutcomeSchema,
196
+ ExpectedOutcomeField,
197
+ EvaluationParameters,
198
+ } from "llm-testrunner-components/react/types";
288
199
  ```
289
200
 
290
- ### LLMRequestPayload Interface
201
+ ---
291
202
 
292
- ```typescript
293
- interface LLMRequestPayload {
294
- prompt: string;
295
- resolve: (result: string) => void;
296
- reject: (err: Error | unknown) => void;
297
- }
298
- ```
203
+ ## Import and export
204
+
205
+ - **Import** — Use the UI to load a JSON file. It must be an array of test cases. Invalid or empty files show an error.
206
+ - **Export test suite** — Downloads a JSON file with the current test cases.
207
+ - **Export results** Downloads a CSV of the latest run (includes evaluation score).
208
+
209
+ ---
210
+
211
+ ## Contributing
212
+
213
+ We welcome contributions. See [CONTRIBUTING.md](CONTRIBUTING.md) for how to get started (opening issues, pull request workflow, and code of conduct).
214
+
215
+ ---
216
+
217
+ ## License
218
+
219
+ The project is licensed under the [MIT License](LICENSE).
220
+
221
+ Third-party licenses are in `node_modules/<package>/`. This project uses [licensee](https://github.com/jslicense/licensee.js) and the [Blue Oak Council](https://blueoakcouncil.org/list) permissive list; only dependencies with a Blue Oak bronze-or-better license (or an exception in [.licensee.json](.licensee.json)) are allowed. Run `npm run license-check` to verify locally.