llm-mock-server 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/dependabot.yml +11 -0
- package/.github/workflows/test.yml +34 -0
- package/.markdownlint.jsonc +11 -0
- package/.node-version +1 -0
- package/.oxlintrc.json +35 -0
- package/ARCHITECTURE.md +125 -0
- package/LICENCE +21 -0
- package/README.md +448 -0
- package/package.json +55 -0
- package/src/cli-validators.ts +56 -0
- package/src/cli.ts +128 -0
- package/src/formats/anthropic/index.ts +14 -0
- package/src/formats/anthropic/parse.ts +48 -0
- package/src/formats/anthropic/schema.ts +133 -0
- package/src/formats/anthropic/serialize.ts +91 -0
- package/src/formats/openai/index.ts +14 -0
- package/src/formats/openai/parse.ts +34 -0
- package/src/formats/openai/schema.ts +147 -0
- package/src/formats/openai/serialize.ts +92 -0
- package/src/formats/parse-helpers.ts +79 -0
- package/src/formats/responses/index.ts +14 -0
- package/src/formats/responses/parse.ts +56 -0
- package/src/formats/responses/schema.ts +143 -0
- package/src/formats/responses/serialize.ts +129 -0
- package/src/formats/types.ts +17 -0
- package/src/history.ts +66 -0
- package/src/index.ts +44 -0
- package/src/loader.ts +213 -0
- package/src/logger.ts +58 -0
- package/src/mock-server.ts +237 -0
- package/src/route-handler.ts +113 -0
- package/src/rule-engine.ts +119 -0
- package/src/sse-writer.ts +35 -0
- package/src/types/index.ts +4 -0
- package/src/types/reply.ts +49 -0
- package/src/types/request.ts +45 -0
- package/src/types/rule.ts +74 -0
- package/src/types.ts +5 -0
- package/test/cli-validators.test.ts +131 -0
- package/test/formats/anthropic-schema.test.ts +192 -0
- package/test/formats/anthropic.test.ts +260 -0
- package/test/formats/openai-schema.test.ts +105 -0
- package/test/formats/openai.test.ts +243 -0
- package/test/formats/responses-schema.test.ts +114 -0
- package/test/formats/responses.test.ts +299 -0
- package/test/loader.test.ts +314 -0
- package/test/mock-server.test.ts +565 -0
- package/test/rule-engine.test.ts +213 -0
- package/tsconfig.json +26 -0
- package/tsconfig.test.json +11 -0
- package/vitest.config.ts +18 -0
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
name: Test
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
pull_request:
|
|
5
|
+
branches: [main]
|
|
6
|
+
|
|
7
|
+
permissions:
|
|
8
|
+
contents: read
|
|
9
|
+
|
|
10
|
+
jobs:
|
|
11
|
+
test:
|
|
12
|
+
runs-on: ubuntu-latest
|
|
13
|
+
|
|
14
|
+
steps:
|
|
15
|
+
- name: Checkout code
|
|
16
|
+
uses: actions/checkout@v6.0.2
|
|
17
|
+
|
|
18
|
+
- name: Setup Node.js
|
|
19
|
+
uses: actions/setup-node@v6.3.0
|
|
20
|
+
with:
|
|
21
|
+
node-version-file: 'package.json'
|
|
22
|
+
cache: 'npm'
|
|
23
|
+
|
|
24
|
+
- name: Install dependencies
|
|
25
|
+
run: npm ci
|
|
26
|
+
|
|
27
|
+
- name: Type check
|
|
28
|
+
run: tsc --noEmit && tsc --noEmit -p tsconfig.test.json
|
|
29
|
+
|
|
30
|
+
- name: Lint
|
|
31
|
+
run: npm run lint
|
|
32
|
+
|
|
33
|
+
- name: Run tests
|
|
34
|
+
run: npm test
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{
|
|
2
|
+
"default": true,
|
|
3
|
+
// Allow longer lines in tables and code blocks
|
|
4
|
+
"MD013": false,
|
|
5
|
+
// Allow duplicate headings in different sections
|
|
6
|
+
"MD024": { "siblings_only": true },
|
|
7
|
+
// Allow inline HTML
|
|
8
|
+
"MD033": false,
|
|
9
|
+
// Allow bare URLs
|
|
10
|
+
"MD034": false
|
|
11
|
+
}
|
package/.node-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
22
|
package/.oxlintrc.json
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
{
|
|
2
|
+
"plugins": ["typescript", "unicorn", "oxc", "import", "promise", "node"],
|
|
3
|
+
"categories": {
|
|
4
|
+
"correctness": "error",
|
|
5
|
+
"suspicious": "error",
|
|
6
|
+
"perf": "warn"
|
|
7
|
+
},
|
|
8
|
+
"rules": {
|
|
9
|
+
"no-console": "off",
|
|
10
|
+
"no-unused-vars": "off",
|
|
11
|
+
"no-magic-numbers": ["warn", { "ignore": [0, 1, -1, 2, 10] }],
|
|
12
|
+
"no-await-in-loop": "off",
|
|
13
|
+
"typescript/no-unused-vars": "off",
|
|
14
|
+
"typescript/no-non-null-assertion": "off",
|
|
15
|
+
"unicorn/filename-case": "off",
|
|
16
|
+
"unicorn/no-null": "off",
|
|
17
|
+
"unicorn/prevent-abbreviations": "off",
|
|
18
|
+
"unicorn/no-array-for-each": "off",
|
|
19
|
+
"unicorn/consistent-function-scoping": "off",
|
|
20
|
+
"import/no-default-export": "off",
|
|
21
|
+
"import/no-named-export": "off",
|
|
22
|
+
"import/prefer-default-export": "off",
|
|
23
|
+
"import/group-exports": "off",
|
|
24
|
+
"oxc/no-map-spread": "off"
|
|
25
|
+
},
|
|
26
|
+
"overrides": [
|
|
27
|
+
{
|
|
28
|
+
"files": ["test/**/*.ts"],
|
|
29
|
+
"rules": {
|
|
30
|
+
"no-magic-numbers": "off"
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
],
|
|
34
|
+
"ignorePatterns": ["dist/", "node_modules/"]
|
|
35
|
+
}
|
package/ARCHITECTURE.md
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# llm-mock-server architecture
|
|
2
|
+
|
|
3
|
+
A mock LLM server built on Fastify. Clients send requests in OpenAI, Anthropic, or Responses API format. The server normalises them into a common shape, matches against registered rules, and sends back responses in the right format.
|
|
4
|
+
|
|
5
|
+
```mermaid
|
|
6
|
+
flowchart LR
|
|
7
|
+
Client["Test or SDK client"]
|
|
8
|
+
Server["llm-mock-server<br/>(Fastify)"]
|
|
9
|
+
Rules["Rule engine"]
|
|
10
|
+
|
|
11
|
+
Client <-->|"OpenAI / Anthropic / Responses"| Server
|
|
12
|
+
Server <--> Rules
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
## Table of contents
|
|
16
|
+
|
|
17
|
+
- [Startup](#startup)
|
|
18
|
+
- [Format system](#format-system)
|
|
19
|
+
- [Request lifecycle](#request-lifecycle)
|
|
20
|
+
- [Rule engine](#rule-engine)
|
|
21
|
+
- [Types](#types)
|
|
22
|
+
- [Streaming](#streaming)
|
|
23
|
+
- [File loading](#file-loading)
|
|
24
|
+
- [Logging](#logging)
|
|
25
|
+
- [Security model](#security-model)
|
|
26
|
+
|
|
27
|
+
## Startup
|
|
28
|
+
|
|
29
|
+
[`cli.ts`](src/cli.ts) is the CLI entry point. It parses flags with Commander, validates them through [`cli-validators.ts`](src/cli-validators.ts), creates a `MockServer`, loads any rule files, and handles SIGINT/SIGTERM. With `--watch`, it sets up `fs.watch()` on the rules path and reloads on changes.
|
|
30
|
+
|
|
31
|
+
[`MockServer`](src/mock-server.ts) is the main class. The constructor creates a Fastify instance and registers a route handler for each format. Consumers interact with it through `when()`, `fallback()`, `load()`, and the lifecycle methods.
|
|
32
|
+
|
|
33
|
+
[`createMock()`](src/index.ts) is a convenience that creates a server and starts it in one call.
|
|
34
|
+
|
|
35
|
+
## Format system
|
|
36
|
+
|
|
37
|
+
A format is anything that satisfies the [`Format`](src/formats/types.ts) interface. It needs to be able to parse requests, serialise streaming and non-streaming responses, and produce error bodies.
|
|
38
|
+
|
|
39
|
+
Three formats are included:
|
|
40
|
+
|
|
41
|
+
| Format | Route | Directory |
|
|
42
|
+
| ------ | ----- | --------- |
|
|
43
|
+
| OpenAI | `POST /v1/chat/completions` | [`formats/openai/`](src/formats/openai/) |
|
|
44
|
+
| Anthropic | `POST /v1/messages` | [`formats/anthropic/`](src/formats/anthropic/) |
|
|
45
|
+
| OpenAI Responses | `POST /v1/responses` | [`formats/responses/`](src/formats/responses/) |
|
|
46
|
+
|
|
47
|
+
Each format directory has three files:
|
|
48
|
+
|
|
49
|
+
- `parse.ts` takes the incoming request body (plus HTTP headers and path) and turns it into a normalised [`MockRequest`](src/types/request.ts)
|
|
50
|
+
- `serialize.ts` takes a [`ReplyObject`](src/types/reply.ts) and produces SSE chunks or a JSON response
|
|
51
|
+
- `index.ts` wires parse and serialize together into a `Format` object
|
|
52
|
+
|
|
53
|
+
Shared helpers live in [`parse-helpers.ts`](src/formats/parse-helpers.ts). Functions like `buildMockRequest()`, `genId()`, `splitText()`, `shouldEmitText()`, and `finishReason()` are used by all three formats. `isStreaming()` is also shared because every format checks `body.stream !== false` the same way.
|
|
54
|
+
|
|
55
|
+
To add a new format (say, Gemini), you would create a new directory with those three files and add it to the `formats` array in `mock-server.ts`. Everything else (rule matching, streaming, logging, history) works automatically.
|
|
56
|
+
|
|
57
|
+
## Request lifecycle
|
|
58
|
+
|
|
59
|
+
```mermaid
|
|
60
|
+
flowchart TD
|
|
61
|
+
A[Incoming POST] --> B[Format parses body into MockRequest]
|
|
62
|
+
B --> C[Rule engine finds first matching rule]
|
|
63
|
+
C --> D{Match found?}
|
|
64
|
+
D -->|yes| E[Resolve reply, static or function]
|
|
65
|
+
D -->|no| F[Use fallback reply]
|
|
66
|
+
E --> G{Error reply?}
|
|
67
|
+
F --> G
|
|
68
|
+
G -->|yes| H[Return HTTP error with format-specific body]
|
|
69
|
+
G -->|no| I{Streaming?}
|
|
70
|
+
I -->|yes| J[Serialise to SSE chunks and write with latency]
|
|
71
|
+
I -->|no| K[Serialise to JSON and return]
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
The route handler logic lives in [`route-handler.ts`](src/route-handler.ts). It is generic over the format and gets its dependencies (engine, history, logger, options) through a `RouteHandlerDeps` object.
|
|
75
|
+
|
|
76
|
+
## Rule engine
|
|
77
|
+
|
|
78
|
+
[`RuleEngine`](src/rule-engine.ts) holds rules in an array and evaluates them in order. The first match wins.
|
|
79
|
+
|
|
80
|
+
Each rule has a compiled matcher function, a resolver (either a static value or a function), and an optional `remaining` counter for `.times()`. When a rule's counter hits zero it gets removed from the list.
|
|
81
|
+
|
|
82
|
+
`moveToFront()` is what powers the `.first()` API. Rules that call `.first()` get moved to index 0.
|
|
83
|
+
|
|
84
|
+
The matching itself goes through [`compileMatcher()`](src/rule-engine.ts), which turns a `Match` (string, regex, object, or function) into a predicate. Object matchers check each specified field with AND logic. An optional `predicate` field on `MatchObject` runs last, after all structured fields have passed, so you can combine declarative matching with custom logic.
|
|
85
|
+
|
|
86
|
+
## Types
|
|
87
|
+
|
|
88
|
+
Types are split across three files in [`src/types/`](src/types/):
|
|
89
|
+
|
|
90
|
+
- [`request.ts`](src/types/request.ts) has `MockRequest`, `Message`, `ToolDef`, and `FormatName`
|
|
91
|
+
- [`reply.ts`](src/types/reply.ts) has `Reply`, `ReplyObject`, `ErrorReply`, `ToolCall`, `Resolver`, and `ReplyOptions`
|
|
92
|
+
- [`rule.ts`](src/types/rule.ts) has `Match`, `MatchObject`, `PendingRule`, `RuleHandle`, `RuleSummary`, `Handler`, and `Rule`
|
|
93
|
+
|
|
94
|
+
[`src/types.ts`](src/types.ts) re-exports everything from the barrel so internal files can import from a single place.
|
|
95
|
+
|
|
96
|
+
## Streaming
|
|
97
|
+
|
|
98
|
+
[`writeSSE()`](src/sse-writer.ts) writes an array of `SSEChunk` objects to the raw HTTP response as server-sent events. It adds a delay between chunks if latency is configured.
|
|
99
|
+
|
|
100
|
+
Each format's `serialize()` builds the chunk array. Text gets split by `chunkSize` when it's set. The Responses format also assigns incrementing `sequence_number` values to every event using a closure-based counter.
|
|
101
|
+
|
|
102
|
+
## File loading
|
|
103
|
+
|
|
104
|
+
[`loadRulesFromPath()`](src/loader.ts) loads rules from disk. It takes a `LoadContext` with the rule engine and an optional fallback setter. A `Map` dispatches by file extension:
|
|
105
|
+
|
|
106
|
+
- `.json5` and `.json` files get parsed with Zod validation. Files can be a bare array of rules or an object with optional `templates`, `fallback`, and `rules` fields. Template references (`$name`) are resolved at load time. Rules can use `reply` for a single response or `replies` for a sequence
|
|
107
|
+
- `.ts`, `.js`, and `.mjs` files get dynamically imported. The default export (a `Handler` or array) is registered. A named `fallback` export sets the server fallback
|
|
108
|
+
|
|
109
|
+
Directories are read and processed in sorted order.
|
|
110
|
+
|
|
111
|
+
## Logging
|
|
112
|
+
|
|
113
|
+
[`Logger`](src/logger.ts) is a threshold-based logger. Each level has a numeric priority and messages below the threshold are dropped. Output is timestamped and coloured with picocolors.
|
|
114
|
+
|
|
115
|
+
## Security model
|
|
116
|
+
|
|
117
|
+
The threat model is simple: the server runs locally or in CI, loading files written by the developer. There is no multi-tenant isolation or sandboxing.
|
|
118
|
+
|
|
119
|
+
Handler files (`.ts`, `.js`, `.mjs`) are loaded via `import()` and execute with full process permissions. The trust boundary is the file system path passed to `load()` or `--handler`. If an attacker can write to that path, they already have code execution on the machine. No path restriction is enforced because legitimate setups often load rules from outside the project directory.
|
|
120
|
+
|
|
121
|
+
JSON5 files go through Zod validation and never execute code. The only dynamic construction is `new RegExp()` for regex patterns in rule files, which could hang on pathological backtracking patterns but poses no injection risk.
|
|
122
|
+
|
|
123
|
+
Fastify caps request bodies at 1 MB by default. The server binds to `127.0.0.1` unless explicitly configured otherwise. Responses are serialised through JSON, so reply text cannot break out of SSE framing.
|
|
124
|
+
|
|
125
|
+
CLI inputs (port, host, latency, log level) are validated through [`cli-validators.ts`](src/cli-validators.ts) before use.
|
package/LICENCE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Suyash Srijan
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
# llm-mock-server
|
|
2
|
+
|
|
3
|
+
A mock LLM server for testing. It handles OpenAI `/chat/completions`, Anthropic `/messages`, and OpenAI `/responses` API formats, with both streaming (SSE) and non-streaming responses. Point any client at it and get instant, deterministic replies.
|
|
4
|
+
|
|
5
|
+
## Table of contents
|
|
6
|
+
|
|
7
|
+
- [Quick start](#quick-start)
|
|
8
|
+
- [API endpoints](#api-endpoints)
|
|
9
|
+
- [Basic usage](#basic-usage)
|
|
10
|
+
- [Matching rules](#matching-rules)
|
|
11
|
+
- [Replies](#replies)
|
|
12
|
+
- [Fallback](#fallback)
|
|
13
|
+
- [Request history](#request-history)
|
|
14
|
+
- [Advanced usage](#advanced-usage)
|
|
15
|
+
- [Tool matching](#tool-matching)
|
|
16
|
+
- [Error injection](#error-injection)
|
|
17
|
+
- [Rule lifecycle](#rule-lifecycle)
|
|
18
|
+
- [Rule inspection](#rule-inspection)
|
|
19
|
+
- [Streaming options](#streaming-options)
|
|
20
|
+
- [Advanced patterns](#advanced-patterns)
|
|
21
|
+
- [Loading rules from files](#loading-rules-from-files)
|
|
22
|
+
- [JSON5 format](#json5-format)
|
|
23
|
+
- [Handler files](#handler-files)
|
|
24
|
+
- [Logging](#logging)
|
|
25
|
+
- [CLI](#cli)
|
|
26
|
+
- [Security](#security)
|
|
27
|
+
- [Licence](#licence)
|
|
28
|
+
|
|
29
|
+
## Quick start
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
npm install llm-mock-server
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
```typescript
|
|
36
|
+
import { createMock } from "llm-mock-server";
|
|
37
|
+
|
|
38
|
+
await using server = await createMock();
|
|
39
|
+
server.when("hello").reply("Hi there!");
|
|
40
|
+
|
|
41
|
+
// Point your OpenAI/Anthropic/Codex client at server.url
|
|
42
|
+
const response = await fetch(`${server.url}/v1/chat/completions`, {
|
|
43
|
+
method: "POST",
|
|
44
|
+
headers: { "Content-Type": "application/json" },
|
|
45
|
+
body: JSON.stringify({
|
|
46
|
+
model: "gpt-5.4",
|
|
47
|
+
messages: [{ role: "user", content: "hello" }],
|
|
48
|
+
stream: false,
|
|
49
|
+
}),
|
|
50
|
+
});
|
|
51
|
+
// server.stop() is called automatically when the scope exits
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
The same rule matches on all three endpoints, so you don't need to set up separate mocks per provider. By default it binds to `127.0.0.1`. Pass `host: "0.0.0.0"` if you need it reachable from other machines or containers.
|
|
55
|
+
|
|
56
|
+
## API endpoints
|
|
57
|
+
|
|
58
|
+
| Route | Format |
|
|
59
|
+
| ----- | ------ |
|
|
60
|
+
| `POST /v1/chat/completions` | OpenAI |
|
|
61
|
+
| `POST /v1/messages` | Anthropic |
|
|
62
|
+
| `POST /v1/responses` | OpenAI Responses |
|
|
63
|
+
|
|
64
|
+
All three support streaming and non-streaming responses.
|
|
65
|
+
|
|
66
|
+
## Basic usage
|
|
67
|
+
|
|
68
|
+
### Matching rules
|
|
69
|
+
|
|
70
|
+
Rules are evaluated in order and the first match wins. A string does case-insensitive substring matching on the last user message. You can also use regex, object matchers, or predicate functions.
|
|
71
|
+
|
|
72
|
+
```typescript
|
|
73
|
+
server.when("hello").reply("Hi!");
|
|
74
|
+
server.when(/explain (\w+)/i).reply("Here's an explanation.");
|
|
75
|
+
|
|
76
|
+
// Match on model, system prompt, format, or tool presence
|
|
77
|
+
server.when({ model: /claude/, system: /pirate/ }).reply("Arrr!");
|
|
78
|
+
server.when({ format: "anthropic" }).reply("Anthropic request detected.");
|
|
79
|
+
|
|
80
|
+
// Predicate function for full control
|
|
81
|
+
server.when((req) => req.messages.length > 5).reply("Long conversation!");
|
|
82
|
+
|
|
83
|
+
// Combine structured fields with a predicate
|
|
84
|
+
server.when({
|
|
85
|
+
model: /claude/,
|
|
86
|
+
predicate: (req) => req.headers["x-team"] === "alpha",
|
|
87
|
+
}).reply("Alpha team on Claude!");
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Replies
|
|
91
|
+
|
|
92
|
+
Replies can be strings, objects, or functions.
|
|
93
|
+
|
|
94
|
+
```typescript
|
|
95
|
+
server.when("hello").reply("Hi!");
|
|
96
|
+
|
|
97
|
+
// Extended thinking (works with Anthropic and Responses formats)
|
|
98
|
+
server.when("think").reply({ text: "42", reasoning: "Let me work through this..." });
|
|
99
|
+
|
|
100
|
+
server.when("weather").reply({
|
|
101
|
+
tools: [{ name: "get_weather", args: { location: "London" } }],
|
|
102
|
+
});
|
|
103
|
+
|
|
104
|
+
// Dynamic reply based on the request
|
|
105
|
+
server.when("echo").reply((req) => `You said: ${req.lastMessage}`);
|
|
106
|
+
|
|
107
|
+
// Async resolvers work too
|
|
108
|
+
server.when("slow").reply(async (req) => {
|
|
109
|
+
return { text: "Done thinking." };
|
|
110
|
+
});
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
### Fallback
|
|
114
|
+
|
|
115
|
+
When no rule matches, the server uses a fallback reply. You can change it to whatever you like.
|
|
116
|
+
|
|
117
|
+
```typescript
|
|
118
|
+
server.fallback("I don't understand.");
|
|
119
|
+
server.fallback({ error: { status: 404, message: "No matching rule" } });
|
|
120
|
+
```
|
|
121
|
+
|
|
122
|
+
You can also set the fallback from a JSON5 file or handler file. See [loading rules from files](#loading-rules-from-files).
|
|
123
|
+
|
|
124
|
+
### Request history
|
|
125
|
+
|
|
126
|
+
Every request that hits the server gets recorded. You can query it with fluent methods in your test assertions.
|
|
127
|
+
|
|
128
|
+
```typescript
|
|
129
|
+
server.when("hello").reply("Hi!");
|
|
130
|
+
|
|
131
|
+
await post("/v1/chat/completions", { ... });
|
|
132
|
+
|
|
133
|
+
expect(server.history.count()).toBe(1);
|
|
134
|
+
expect(server.history.last()?.request.lastMessage).toBe("hello");
|
|
135
|
+
expect(server.history.first()?.rule).toBe('"hello"');
|
|
136
|
+
|
|
137
|
+
const matched = server.history.where(r => r.rule !== undefined);
|
|
138
|
+
|
|
139
|
+
for (const entry of server.history) {
|
|
140
|
+
console.log(entry.request.lastMessage);
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
const last = server.history.last();
|
|
144
|
+
console.log(last?.request.headers["authorization"]);
|
|
145
|
+
console.log(last?.request.path);
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Advanced usage
|
|
149
|
+
|
|
150
|
+
### Tool matching
|
|
151
|
+
|
|
152
|
+
```typescript
|
|
153
|
+
server.whenTool("get_weather").reply({
|
|
154
|
+
tools: [{ name: "get_weather", args: { location: "London" } }],
|
|
155
|
+
});
|
|
156
|
+
|
|
157
|
+
server.whenToolResult("call_abc").reply("Got your result!");
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
### Error injection
|
|
161
|
+
|
|
162
|
+
Errors are first-class replies and follow the same rule system as everything else.
|
|
163
|
+
|
|
164
|
+
```typescript
|
|
165
|
+
// One-shot error for the next request, then back to normal
|
|
166
|
+
server.nextError(429, "Rate limited");
|
|
167
|
+
|
|
168
|
+
// Pattern-matched error that fires every time
|
|
169
|
+
server.when("fail").reply({ error: { status: 500, message: "Internal error" } });
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
### Rule lifecycle
|
|
173
|
+
|
|
174
|
+
```typescript
|
|
175
|
+
server.when("hello").reply("Hi!").times(2);
|
|
176
|
+
server.when("catch-all").reply("Fallback.").first();
|
|
177
|
+
server.when("hello").reply("First time only!").times(1).first();
|
|
178
|
+
|
|
179
|
+
server.isDone(); // true when all .times() rules are consumed
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
### Rule inspection
|
|
183
|
+
|
|
184
|
+
You can see what rules are registered and how many matches they have left.
|
|
185
|
+
|
|
186
|
+
```typescript
|
|
187
|
+
server.when("hello").reply("Hi!");
|
|
188
|
+
server.when(/bye/i).reply("Goodbye!").times(3);
|
|
189
|
+
|
|
190
|
+
server.rules;
|
|
191
|
+
// [{ description: '"hello"', remaining: Infinity }, { description: '/bye/i', remaining: 3 }]
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### Streaming options
|
|
195
|
+
|
|
196
|
+
You can control how text gets chunked during SSE streaming, both per-rule and at the server level.
|
|
197
|
+
|
|
198
|
+
```typescript
|
|
199
|
+
server.when("hello").reply("Hello, world!", { latency: 50, chunkSize: 5 });
|
|
200
|
+
|
|
201
|
+
const server = new MockServer({ defaultLatency: 30, defaultChunkSize: 10 });
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
### Advanced patterns
|
|
205
|
+
|
|
206
|
+
These show how to combine the building blocks for more complex scenarios.
|
|
207
|
+
|
|
208
|
+
#### Reply sequences
|
|
209
|
+
|
|
210
|
+
Instead of registering multiple rules for multi-turn conversations, pass an array. Each match advances through the sequence. Once exhausted, the rule stops matching and falls through to the next rule or fallback.
|
|
211
|
+
|
|
212
|
+
```typescript
|
|
213
|
+
server.when("next step").replySequence([
|
|
214
|
+
"Starting the engine.",
|
|
215
|
+
"Engine is running.",
|
|
216
|
+
{ reply: { text: "All done." }, options: { latency: 100 } },
|
|
217
|
+
]);
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
#### Conditional replies
|
|
221
|
+
|
|
222
|
+
Use a function resolver when the reply depends on the request content.
|
|
223
|
+
|
|
224
|
+
```typescript
|
|
225
|
+
server.when("status").reply((req) => {
|
|
226
|
+
const hasTools = req.toolNames.length > 0;
|
|
227
|
+
return hasTools ? "Tools are available." : "No tools configured.";
|
|
228
|
+
});
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
#### Simulating flaky APIs
|
|
232
|
+
|
|
233
|
+
Use a closure to fail every Nth request.
|
|
234
|
+
|
|
235
|
+
```typescript
|
|
236
|
+
let count = 0;
|
|
237
|
+
server.when(() => ++count % 3 === 0)
|
|
238
|
+
.reply({ error: { status: 503, message: "Service unavailable" } })
|
|
239
|
+
.first();
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
#### Async lookups
|
|
243
|
+
|
|
244
|
+
Resolvers can be async if you need to compute the reply.
|
|
245
|
+
|
|
246
|
+
```typescript
|
|
247
|
+
server.when("data").reply(async (req) => {
|
|
248
|
+
const result = await someAsyncOperation(req.lastMessage);
|
|
249
|
+
return { text: result };
|
|
250
|
+
});
|
|
251
|
+
```
|
|
252
|
+
|
|
253
|
+
#### Matching on multiple conditions
|
|
254
|
+
|
|
255
|
+
Structured fields and predicates combine with AND logic.
|
|
256
|
+
|
|
257
|
+
```typescript
|
|
258
|
+
server.when({
|
|
259
|
+
model: /gpt/,
|
|
260
|
+
format: "openai",
|
|
261
|
+
system: /you are a translator/i,
|
|
262
|
+
predicate: (req) => req.messages.length > 2,
|
|
263
|
+
}).reply("Translated output here.");
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
## Loading rules from files
|
|
267
|
+
|
|
268
|
+
Rules can live in JSON5 files or TypeScript handler files. You can load a single file or a whole directory.
|
|
269
|
+
|
|
270
|
+
```typescript
|
|
271
|
+
await server.load("./rules");
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
### JSON5 format
|
|
275
|
+
|
|
276
|
+
`rules/greetings.json5`:
|
|
277
|
+
|
|
278
|
+
```json5
|
|
279
|
+
[
|
|
280
|
+
{
|
|
281
|
+
when: "hello",
|
|
282
|
+
reply: "Hi there!",
|
|
283
|
+
},
|
|
284
|
+
{
|
|
285
|
+
when: "/explain (\\w+)/i",
|
|
286
|
+
reply: "Here's an explanation.",
|
|
287
|
+
times: 3,
|
|
288
|
+
},
|
|
289
|
+
{
|
|
290
|
+
when: { model: "gpt-5.4", message: "hello" },
|
|
291
|
+
reply: { text: "Hi from GPT!", reasoning: "Simple greeting." },
|
|
292
|
+
},
|
|
293
|
+
]
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
If you have replies that repeat across multiple rules, you can define them once as templates. Use a `$name` reference in the `reply` field to pull from the templates section.
|
|
297
|
+
|
|
298
|
+
`rules/with-templates.json5`:
|
|
299
|
+
|
|
300
|
+
```json5
|
|
301
|
+
{
|
|
302
|
+
templates: {
|
|
303
|
+
weatherTool: { tools: [{ name: "get_weather", args: { location: "London" } }] },
|
|
304
|
+
done: "All done!",
|
|
305
|
+
},
|
|
306
|
+
rules: [
|
|
307
|
+
{ when: "forecast", reply: "$weatherTool" },
|
|
308
|
+
{ when: "weather", reply: "$weatherTool" },
|
|
309
|
+
{ when: "finish", reply: "$done" },
|
|
310
|
+
],
|
|
311
|
+
}
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
Sequences work in JSON5 too. Use `replies` instead of `reply` to define a multi-step sequence.
|
|
315
|
+
|
|
316
|
+
`rules/conversation.json5`:
|
|
317
|
+
|
|
318
|
+
```json5
|
|
319
|
+
[
|
|
320
|
+
{
|
|
321
|
+
when: "next step",
|
|
322
|
+
replies: [
|
|
323
|
+
"Starting the engine.",
|
|
324
|
+
{ reply: "Engine is running.", latency: 50 },
|
|
325
|
+
"All done.",
|
|
326
|
+
],
|
|
327
|
+
},
|
|
328
|
+
]
|
|
329
|
+
```
|
|
330
|
+
|
|
331
|
+
You can also set a fallback reply in the object format.
|
|
332
|
+
|
|
333
|
+
`rules/with-fallback.json5`:
|
|
334
|
+
|
|
335
|
+
```json5
|
|
336
|
+
{
|
|
337
|
+
fallback: "Sorry, I don't know about that.",
|
|
338
|
+
rules: [
|
|
339
|
+
{ when: "hello", reply: "Hi!" },
|
|
340
|
+
],
|
|
341
|
+
}
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
Both bare arrays and the object format work. Use bare arrays for simple cases and the object format when you need templates, sequences, or fallbacks.
|
|
345
|
+
|
|
346
|
+
### Handler files
|
|
347
|
+
|
|
348
|
+
`rules/echo.ts`:
|
|
349
|
+
|
|
350
|
+
```typescript
|
|
351
|
+
import type { Handler } from "llm-mock-server";
|
|
352
|
+
|
|
353
|
+
export default {
|
|
354
|
+
match: (req) => req.lastMessage.includes("echo"),
|
|
355
|
+
respond: (req) => `Echo: ${req.lastMessage}`,
|
|
356
|
+
} satisfies Handler;
|
|
357
|
+
```
|
|
358
|
+
|
|
359
|
+
Using `satisfies Handler` catches typos and wrong field names at compile time. The server also validates the shape at load time with Zod, so you get a clear error either way.
|
|
360
|
+
|
|
361
|
+
Handler files can export an array of handlers. To set a fallback, export a named `fallback` alongside the default:
|
|
362
|
+
|
|
363
|
+
```typescript
|
|
364
|
+
import type { Handler } from "llm-mock-server";
|
|
365
|
+
|
|
366
|
+
export const fallback = "I'm not sure about that.";
|
|
367
|
+
export default {
|
|
368
|
+
match: (req) => req.lastMessage.includes("echo"),
|
|
369
|
+
respond: (req) => `Echo: ${req.lastMessage}`,
|
|
370
|
+
} satisfies Handler;
|
|
371
|
+
```
|
|
372
|
+
|
|
373
|
+
## Logging
|
|
374
|
+
|
|
375
|
+
```typescript
|
|
376
|
+
const server = new MockServer({ logLevel: "info" });
|
|
377
|
+
```
|
|
378
|
+
|
|
379
|
+
The available levels are `none`, `error`, `warning`, `info`, `debug`, and `all`. At `info` you get one line per request. At `debug` you also get the parsed request details and reply previews.
|
|
380
|
+
|
|
381
|
+
## CLI
|
|
382
|
+
|
|
383
|
+
```bash
|
|
384
|
+
llm-mock-server [options]
|
|
385
|
+
```
|
|
386
|
+
|
|
387
|
+
| Option | Short | Default | Description |
|
|
388
|
+
| ------ | ----- | ------- | ----------- |
|
|
389
|
+
| `--port` | `-p` | `5555` | Port to listen on |
|
|
390
|
+
| `--host` | `-H` | `127.0.0.1` | Host to bind to |
|
|
391
|
+
| `--rules` | `-r` | | Path to rules file or directory |
|
|
392
|
+
| `--handler` | | | Path to handler file |
|
|
393
|
+
| `--latency` | `-l` | `0` | Ms between SSE chunks |
|
|
394
|
+
| `--chunk-size` | `-c` | `0` | Characters per SSE chunk |
|
|
395
|
+
| `--fallback` | `-f` | | Fallback reply text |
|
|
396
|
+
| `--watch` | `-w` | | Watch rules path and reload on changes |
|
|
397
|
+
| `--log-level` | | `info` | Log verbosity |
|
|
398
|
+
|
|
399
|
+
```bash
|
|
400
|
+
llm-mock-server -p 8080 -r ./rules --log-level debug
|
|
401
|
+
|
|
402
|
+
# Auto-reload rules when files change
|
|
403
|
+
llm-mock-server -r ./rules --watch
|
|
404
|
+
```
|
|
405
|
+
|
|
406
|
+
## Security
|
|
407
|
+
|
|
408
|
+
This is a testing tool, not a production service. It's designed to run locally or in CI, loading rule files that you wrote. A few things to be aware of.
|
|
409
|
+
|
|
410
|
+
### Handler files execute code
|
|
411
|
+
|
|
412
|
+
When you call `server.load()` or pass `--handler` on the CLI, `.ts`/`.js` files are loaded via dynamic `import()`. They run with the same permissions as the rest of your Node.js process. Only load files you trust.
|
|
413
|
+
|
|
414
|
+
### JSON5 rule files are data only
|
|
415
|
+
|
|
416
|
+
They go through Zod validation at load time and never execute code. Regex patterns in rule files are compiled with `new RegExp()`, which is safe but could hang on pathological patterns if you write something like `/^(a+)+$/`. Keep patterns simple.
|
|
417
|
+
|
|
418
|
+
### Network binding
|
|
419
|
+
|
|
420
|
+
The server binds to `127.0.0.1` by default, so it's only reachable from your machine. If you bind to `0.0.0.0`, anything on the network can send requests to it. That's fine for container setups, just be aware of it.
|
|
421
|
+
|
|
422
|
+
### Request limits
|
|
423
|
+
|
|
424
|
+
Request bodies are capped at 1 MB by Fastify's default. Responses are serialised through JSON, so there's no injection risk in the SSE output.
|
|
425
|
+
|
|
426
|
+
## Licence
|
|
427
|
+
|
|
428
|
+
MIT License
|
|
429
|
+
|
|
430
|
+
Copyright (c) 2026 Suyash Srijan
|
|
431
|
+
|
|
432
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
433
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
434
|
+
in the Software without restriction, including without limitation the rights
|
|
435
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
436
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
437
|
+
furnished to do so, subject to the following conditions:
|
|
438
|
+
|
|
439
|
+
The above copyright notice and this permission notice shall be included in all
|
|
440
|
+
copies or substantial portions of the Software.
|
|
441
|
+
|
|
442
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
443
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
444
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
445
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
446
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
447
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
448
|
+
SOFTWARE.
|