inference-gate 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- inference_gate-0.1.0/LICENSE +21 -0
- inference_gate-0.1.0/PKG-INFO +355 -0
- inference_gate-0.1.0/README.md +322 -0
- inference_gate-0.1.0/pyproject.toml +95 -0
- inference_gate-0.1.0/setup.cfg +4 -0
- inference_gate-0.1.0/src/inference_gate/__init__.py +14 -0
- inference_gate-0.1.0/src/inference_gate/cli.py +1227 -0
- inference_gate-0.1.0/src/inference_gate/cli_format.py +323 -0
- inference_gate-0.1.0/src/inference_gate/config.py +267 -0
- inference_gate-0.1.0/src/inference_gate/frontend/__init__.py +5 -0
- inference_gate-0.1.0/src/inference_gate/headers.py +274 -0
- inference_gate-0.1.0/src/inference_gate/inference_gate.py +229 -0
- inference_gate-0.1.0/src/inference_gate/inflow/__init__.py +9 -0
- inference_gate-0.1.0/src/inference_gate/inflow/admin.py +597 -0
- inference_gate-0.1.0/src/inference_gate/inflow/server.py +202 -0
- inference_gate-0.1.0/src/inference_gate/modes.py +23 -0
- inference_gate-0.1.0/src/inference_gate/outflow/__init__.py +10 -0
- inference_gate-0.1.0/src/inference_gate/outflow/client.py +164 -0
- inference_gate-0.1.0/src/inference_gate/outflow/model_router.py +277 -0
- inference_gate-0.1.0/src/inference_gate/pytest_context.py +118 -0
- inference_gate-0.1.0/src/inference_gate/pytest_plugin.py +537 -0
- inference_gate-0.1.0/src/inference_gate/recording/__init__.py +25 -0
- inference_gate-0.1.0/src/inference_gate/recording/atomic_io.py +46 -0
- inference_gate-0.1.0/src/inference_gate/recording/hashing.py +250 -0
- inference_gate-0.1.0/src/inference_gate/recording/models.py +256 -0
- inference_gate-0.1.0/src/inference_gate/recording/reassembly.py +348 -0
- inference_gate-0.1.0/src/inference_gate/recording/storage.py +1090 -0
- inference_gate-0.1.0/src/inference_gate/recording/tape_index.py +337 -0
- inference_gate-0.1.0/src/inference_gate/recording/tape_parser.py +286 -0
- inference_gate-0.1.0/src/inference_gate/recording/tape_writer.py +398 -0
- inference_gate-0.1.0/src/inference_gate/router/__init__.py +9 -0
- inference_gate-0.1.0/src/inference_gate/router/router.py +584 -0
- inference_gate-0.1.0/src/inference_gate/webui/__init__.py +12 -0
- inference_gate-0.1.0/src/inference_gate/webui/api.py +194 -0
- inference_gate-0.1.0/src/inference_gate/webui/server.py +197 -0
- inference_gate-0.1.0/src/inference_gate/webui/static/assets/index-3mjQG4Sn.js +11 -0
- inference_gate-0.1.0/src/inference_gate/webui/static/assets/index-DY74WqvV.css +1 -0
- inference_gate-0.1.0/src/inference_gate/webui/static/index.html +14 -0
- inference_gate-0.1.0/src/inference_gate/webui/static/vite.svg +1 -0
- inference_gate-0.1.0/src/inference_gate.egg-info/PKG-INFO +355 -0
- inference_gate-0.1.0/src/inference_gate.egg-info/SOURCES.txt +57 -0
- inference_gate-0.1.0/src/inference_gate.egg-info/dependency_links.txt +1 -0
- inference_gate-0.1.0/src/inference_gate.egg-info/entry_points.txt +5 -0
- inference_gate-0.1.0/src/inference_gate.egg-info/requires.txt +15 -0
- inference_gate-0.1.0/src/inference_gate.egg-info/top_level.txt +1 -0
- inference_gate-0.1.0/tests/test_admin.py +718 -0
- inference_gate-0.1.0/tests/test_cassette_replay.py +347 -0
- inference_gate-0.1.0/tests/test_cli.py +616 -0
- inference_gate-0.1.0/tests/test_config.py +305 -0
- inference_gate-0.1.0/tests/test_error_recording.py +607 -0
- inference_gate-0.1.0/tests/test_inference_gate.py +105 -0
- inference_gate-0.1.0/tests/test_outflow_router.py +361 -0
- inference_gate-0.1.0/tests/test_pytest_plugin.py +777 -0
- inference_gate-0.1.0/tests/test_reassembly.py +393 -0
- inference_gate-0.1.0/tests/test_router.py +872 -0
- inference_gate-0.1.0/tests/test_server.py +294 -0
- inference_gate-0.1.0/tests/test_storage.py +1201 -0
- inference_gate-0.1.0/tests/test_subprocess_server.py +146 -0
- inference_gate-0.1.0/tests/test_webui.py +253 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025-2026 DanielKluev
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,355 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: inference-gate
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Python library for efficient and convenient AI inference replay in testing, debugging and development
|
|
5
|
+
Author: DanielKluev
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/DanielKluev/InferenceGate
|
|
8
|
+
Project-URL: Repository, https://github.com/DanielKluev/InferenceGate
|
|
9
|
+
Classifier: Development Status :: 3 - Alpha
|
|
10
|
+
Classifier: Intended Audience :: Developers
|
|
11
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
12
|
+
Classifier: Programming Language :: Python :: 3
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: aiohttp>=3.8.0
|
|
20
|
+
Requires-Dist: pydantic>=2.10.0
|
|
21
|
+
Requires-Dist: click>=8.1.0
|
|
22
|
+
Requires-Dist: pyyaml>=6.0.0
|
|
23
|
+
Provides-Extra: dev
|
|
24
|
+
Requires-Dist: pytest>=8.3.0; extra == "dev"
|
|
25
|
+
Requires-Dist: pytest-asyncio>=0.24.0; extra == "dev"
|
|
26
|
+
Requires-Dist: pytest-aiohttp>=1.0.0; extra == "dev"
|
|
27
|
+
Requires-Dist: pytest-xdist>=3.6.0; extra == "dev"
|
|
28
|
+
Requires-Dist: ruff>=0.8.0; extra == "dev"
|
|
29
|
+
Requires-Dist: mypy>=1.13.0; extra == "dev"
|
|
30
|
+
Provides-Extra: test
|
|
31
|
+
Requires-Dist: pytest>=7.0; extra == "test"
|
|
32
|
+
Dynamic: license-file
|
|
33
|
+
|
|
34
|
+
# InferenceGate
|
|
35
|
+
|
|
36
|
+
Python library for efficient and convenient AI inference replay in testing, debugging and development, saving costs and time on repeated prompts.
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
pip install inference-gate
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Features
|
|
45
|
+
|
|
46
|
+
- **Record-and-Replay Mode**: Record new requests to cache, replay from cache when available
|
|
47
|
+
- **Replay-Only Mode**: Only serve cached responses (for unit tests and CI)
|
|
48
|
+
- **Web UI Dashboard**: Optional web-based dashboard for browsing cache entries, viewing statistics, and inspecting request/response details
|
|
49
|
+
- Supports OpenAI Chat Completions API and Responses API
|
|
50
|
+
- Supports streaming responses
|
|
51
|
+
- Preserves prompt, temperature, model, and other metadata
|
|
52
|
+
- YAML configuration file for persistent settings
|
|
53
|
+
- CLI tools for easy management
|
|
54
|
+
|
|
55
|
+
## Quick Start
|
|
56
|
+
|
|
57
|
+
### 1. Initialize Configuration (Optional)
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
inference-gate config init
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
This creates a configuration file at `$USERDIR/.InferenceGate/config.yaml`.
|
|
64
|
+
|
|
65
|
+
### 2. Test Your Upstream API Connection
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
inference-gate test-upstream --api-key $OPENAI_API_KEY
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
### 3. Start the Proxy
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
inference-gate start --api-key $OPENAI_API_KEY
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### 4. Test the Running Proxy
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
inference-gate test-gate
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
### 5. Point Your Client to the Proxy
|
|
84
|
+
|
|
85
|
+
```python
|
|
86
|
+
from openai import OpenAI
|
|
87
|
+
|
|
88
|
+
client = OpenAI(
|
|
89
|
+
api_key="any-key", # Not needed in replay mode
|
|
90
|
+
base_url="http://localhost:8080/v1"
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
response = client.chat.completions.create(
|
|
94
|
+
model="gpt-4",
|
|
95
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
96
|
+
)
|
|
97
|
+
```
|
|
98
|
+
|
|
99
|
+
## CLI Commands
|
|
100
|
+
|
|
101
|
+
### Server Commands
|
|
102
|
+
|
|
103
|
+
#### `start` - Record-and-Replay Mode (Default)
|
|
104
|
+
|
|
105
|
+
Replays cached inferences when available. On cache miss, forwards to upstream, records the response, and stores it for future replays.
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
inference-gate start [OPTIONS]
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
**Options:**
|
|
112
|
+
|
|
113
|
+
| Option | Description | Default |
|
|
114
|
+
|--------|-------------|---------|
|
|
115
|
+
| `--port, -p` | Server port | 8080 |
|
|
116
|
+
| `--host, -h` | Server host | 127.0.0.1 |
|
|
117
|
+
| `--cache-dir, -c` | Cache directory | .inference_cache |
|
|
118
|
+
| `--upstream, -u` | Upstream API URL | https://api.openai.com |
|
|
119
|
+
| `--api-key, -k` | OpenAI API key | $OPENAI_API_KEY |
|
|
120
|
+
| `--max-live-requests` | Global limit on live upstream requests | (infinite) |
|
|
121
|
+
| `--web-ui` | Enable web UI dashboard | false |
|
|
122
|
+
| `--web-ui-port` | Web UI server port | 8081 |
|
|
123
|
+
| `--verbose, -v` | Enable verbose logging | false |
|
|
124
|
+
|
|
125
|
+
#### `replay` - Replay-Only Mode
|
|
126
|
+
|
|
127
|
+
Only returns cached responses. Returns an error if a matching inference is not found in the cache. Useful for unit tests and CI pipelines.
|
|
128
|
+
|
|
129
|
+
```bash
|
|
130
|
+
inference-gate replay [OPTIONS]
|
|
131
|
+
```
|
|
132
|
+
|
|
133
|
+
**Options:**
|
|
134
|
+
|
|
135
|
+
| Option | Description | Default |
|
|
136
|
+
|--------|-------------|---------|
|
|
137
|
+
| `--port, -p` | Server port | 8080 |
|
|
138
|
+
| `--host, -h` | Server host | 127.0.0.1 |
|
|
139
|
+
| `--cache-dir, -c` | Cache directory | .inference_cache |
|
|
140
|
+
| `--web-ui` | Enable web UI dashboard | false |
|
|
141
|
+
| `--web-ui-port` | Web UI server port | 8081 |
|
|
142
|
+
| `--verbose, -v` | Enable verbose logging | false |
|
|
143
|
+
|
|
144
|
+
### Test Commands
|
|
145
|
+
|
|
146
|
+
#### `test-gate` - Test a Running InferenceGate Instance
|
|
147
|
+
|
|
148
|
+
Sends a test prompt to a running InferenceGate proxy. Uses the same host/port from config, so no API key or extra options needed.
|
|
149
|
+
|
|
150
|
+
```bash
|
|
151
|
+
inference-gate test-gate [OPTIONS]
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
**Options:**
|
|
155
|
+
|
|
156
|
+
| Option | Description | Default |
|
|
157
|
+
|--------|-------------|--------|
|
|
158
|
+
| `--host, -h` | Host of the running instance | 127.0.0.1 |
|
|
159
|
+
| `--port, -p` | Port of the running instance | 8080 |
|
|
160
|
+
| `--model, -m` | Model to use | gpt-4o-mini |
|
|
161
|
+
| `--prompt` | Custom test prompt | (built-in test prompt) |
|
|
162
|
+
| `--verbose, -v` | Enable verbose logging | false |
|
|
163
|
+
|
|
164
|
+
#### `test-upstream` - Test Upstream API Directly
|
|
165
|
+
|
|
166
|
+
Sends a test prompt directly to the upstream API (bypassing InferenceGate) to verify the API key and endpoint.
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
inference-gate test-upstream [OPTIONS]
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
**Options:**
|
|
173
|
+
|
|
174
|
+
| Option | Description | Default |
|
|
175
|
+
|--------|-------------|--------|
|
|
176
|
+
| `--upstream, -u` | Upstream API URL | https://api.openai.com |
|
|
177
|
+
| `--api-key, -k` | OpenAI API key | $OPENAI_API_KEY |
|
|
178
|
+
| `--model, -m` | Model to use | gpt-4o-mini |
|
|
179
|
+
| `--prompt` | Custom test prompt | (built-in test prompt) |
|
|
180
|
+
| `--verbose, -v` | Enable verbose logging | false |
|
|
181
|
+
|
|
182
|
+
### Cache Management
|
|
183
|
+
|
|
184
|
+
#### `cache list` - List Cached Entries
|
|
185
|
+
|
|
186
|
+
```bash
|
|
187
|
+
inference-gate cache list [--cache-dir PATH]
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
#### `cache info` - Show Cache Statistics
|
|
191
|
+
|
|
192
|
+
```bash
|
|
193
|
+
inference-gate cache info [--cache-dir PATH]
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
#### `cache clear` - Clear All Cached Entries
|
|
197
|
+
|
|
198
|
+
```bash
|
|
199
|
+
inference-gate cache clear [--cache-dir PATH] [--yes]
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## Web UI Dashboard
|
|
203
|
+
|
|
204
|
+
InferenceGate includes an optional web-based dashboard for browsing cached inference entries, viewing statistics, and inspecting request/response details.
|
|
205
|
+
|
|
206
|
+
### Enabling the Web UI
|
|
207
|
+
|
|
208
|
+
Add the `--web-ui` flag when starting InferenceGate:
|
|
209
|
+
|
|
210
|
+
```bash
|
|
211
|
+
# Record-and-replay mode with web UI
|
|
212
|
+
inference-gate start --api-key $OPENAI_API_KEY --web-ui
|
|
213
|
+
|
|
214
|
+
# Replay-only mode with web UI
|
|
215
|
+
inference-gate replay --web-ui
|
|
216
|
+
```
|
|
217
|
+
|
|
218
|
+
The web UI will be available at `http://localhost:8081` by default. You can customize the port with `--web-ui-port`:
|
|
219
|
+
|
|
220
|
+
```bash
|
|
221
|
+
inference-gate start --web-ui --web-ui-port 3000
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
### Features
|
|
225
|
+
|
|
226
|
+
- **Dashboard**: View cache statistics, current mode, and configuration at a glance
|
|
227
|
+
- **Cache List**: Browse all cached entries in a sortable, filterable table
|
|
228
|
+
- **Entry Details**: Inspect full request and response details including headers, body, and metadata
|
|
229
|
+
- **Search**: Filter cache entries by ID, model, path, or method
|
|
230
|
+
- **Streaming Support**: View streaming response chunks for SSE endpoints
|
|
231
|
+
|
|
232
|
+
### Screenshots
|
|
233
|
+
|
|
234
|
+
**Dashboard Page**
|
|
235
|
+
|
|
236
|
+

|
|
237
|
+
|
|
238
|
+
**Cache List Page**
|
|
239
|
+
|
|
240
|
+

|
|
241
|
+
|
|
242
|
+
**Entry Detail Page**
|
|
243
|
+
|
|
244
|
+

|
|
245
|
+
|
|
246
|
+
### Building the Frontend (Development Only)
|
|
247
|
+
|
|
248
|
+
The web UI frontend is pre-built and included in the package. You only need to build it if you're developing or modifying the frontend:
|
|
249
|
+
|
|
250
|
+
```bash
|
|
251
|
+
cd webui-frontend
|
|
252
|
+
npm install
|
|
253
|
+
npm run build
|
|
254
|
+
# Output goes to src/inference_gate/webui/static/
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
**Requirements:**
|
|
258
|
+
- Node.js 16+ and npm (only for frontend development)
|
|
259
|
+
- No runtime dependencies - the built static files are served by the Python backend
|
|
260
|
+
|
|
261
|
+
### Configuration Management
|
|
262
|
+
|
|
263
|
+
#### `config show` - Show Current Configuration
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
inference-gate config show
|
|
267
|
+
```
|
|
268
|
+
|
|
269
|
+
#### `config init` - Initialize Configuration File
|
|
270
|
+
|
|
271
|
+
```bash
|
|
272
|
+
inference-gate config init [--force]
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
#### `config path` - Show Configuration File Path
|
|
276
|
+
|
|
277
|
+
```bash
|
|
278
|
+
inference-gate config path
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
## Configuration File
|
|
282
|
+
|
|
283
|
+
InferenceGate uses a YAML configuration file to store default settings. The file is located at:
|
|
284
|
+
|
|
285
|
+
- **Windows**: `%USERPROFILE%\.InferenceGate\config.yaml`
|
|
286
|
+
- **macOS/Linux**: `~/.InferenceGate/config.yaml`
|
|
287
|
+
|
|
288
|
+
You can specify a custom path using the `--config` global option:
|
|
289
|
+
|
|
290
|
+
```bash
|
|
291
|
+
inference-gate --config /path/to/config.yaml start
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
### Configuration Options
|
|
295
|
+
|
|
296
|
+
```yaml
|
|
297
|
+
# Server settings
|
|
298
|
+
host: "127.0.0.1"
|
|
299
|
+
port: 8080
|
|
300
|
+
max_live_requests: null # Optional global limit on live upstream requests
|
|
301
|
+
|
|
302
|
+
# Upstream API settings
|
|
303
|
+
upstream: "https://api.openai.com"
|
|
304
|
+
# api_key is not stored in the config file for security
|
|
305
|
+
# Use OPENAI_API_KEY environment variable instead
|
|
306
|
+
|
|
307
|
+
# Storage settings
|
|
308
|
+
cache_dir: ".inference_cache"
|
|
309
|
+
|
|
310
|
+
# Logging settings
|
|
311
|
+
verbose: false
|
|
312
|
+
|
|
313
|
+
# Test command settings
|
|
314
|
+
test_model: "gpt-4o-mini"
|
|
315
|
+
test_prompt: "This is a test prompt. Reply with **ONLY** \"OK.\" to confirm that everything is ok. DO NOT output anything else."
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
### Configuration Priority
|
|
319
|
+
|
|
320
|
+
Settings are loaded in the following order (later overrides earlier):
|
|
321
|
+
|
|
322
|
+
1. Built-in defaults
|
|
323
|
+
2. Configuration file
|
|
324
|
+
3. Environment variables (`OPENAI_API_KEY`)
|
|
325
|
+
4. Command-line options
|
|
326
|
+
|
|
327
|
+
## Environment Variables
|
|
328
|
+
|
|
329
|
+
| Variable | Description |
|
|
330
|
+
|----------|-------------|
|
|
331
|
+
| `OPENAI_API_KEY` | OpenAI API key (used in record/test modes) |
|
|
332
|
+
|
|
333
|
+
## Development
|
|
334
|
+
|
|
335
|
+
Install development dependencies:
|
|
336
|
+
|
|
337
|
+
```bash
|
|
338
|
+
pip install -e ".[dev]"
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
Run tests:
|
|
342
|
+
|
|
343
|
+
```bash
|
|
344
|
+
pytest
|
|
345
|
+
```
|
|
346
|
+
|
|
347
|
+
Run linting:
|
|
348
|
+
|
|
349
|
+
```bash
|
|
350
|
+
ruff check src/ tests/
|
|
351
|
+
```
|
|
352
|
+
|
|
353
|
+
## License
|
|
354
|
+
|
|
355
|
+
MIT License
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
# InferenceGate
|
|
2
|
+
|
|
3
|
+
Python library for efficient and convenient AI inference replay in testing, debugging and development, saving costs and time on repeated prompts.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install inference-gate
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
## Features
|
|
12
|
+
|
|
13
|
+
- **Record-and-Replay Mode**: Record new requests to cache, replay from cache when available
|
|
14
|
+
- **Replay-Only Mode**: Only serve cached responses (for unit tests and CI)
|
|
15
|
+
- **Web UI Dashboard**: Optional web-based dashboard for browsing cache entries, viewing statistics, and inspecting request/response details
|
|
16
|
+
- Supports OpenAI Chat Completions API and Responses API
|
|
17
|
+
- Supports streaming responses
|
|
18
|
+
- Preserves prompt, temperature, model, and other metadata
|
|
19
|
+
- YAML configuration file for persistent settings
|
|
20
|
+
- CLI tools for easy management
|
|
21
|
+
|
|
22
|
+
## Quick Start
|
|
23
|
+
|
|
24
|
+
### 1. Initialize Configuration (Optional)
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
inference-gate config init
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
This creates a configuration file at `$USERDIR/.InferenceGate/config.yaml`.
|
|
31
|
+
|
|
32
|
+
### 2. Test Your Upstream API Connection
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
inference-gate test-upstream --api-key $OPENAI_API_KEY
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### 3. Start the Proxy
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
inference-gate start --api-key $OPENAI_API_KEY
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### 4. Test the Running Proxy
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
inference-gate test-gate
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
### 5. Point Your Client to the Proxy
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
from openai import OpenAI
|
|
54
|
+
|
|
55
|
+
client = OpenAI(
|
|
56
|
+
api_key="any-key", # Not needed in replay mode
|
|
57
|
+
base_url="http://localhost:8080/v1"
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
response = client.chat.completions.create(
|
|
61
|
+
model="gpt-4",
|
|
62
|
+
messages=[{"role": "user", "content": "Hello!"}]
|
|
63
|
+
)
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## CLI Commands
|
|
67
|
+
|
|
68
|
+
### Server Commands
|
|
69
|
+
|
|
70
|
+
#### `start` - Record-and-Replay Mode (Default)
|
|
71
|
+
|
|
72
|
+
Replays cached inferences when available. On cache miss, forwards to upstream, records the response, and stores it for future replays.
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
inference-gate start [OPTIONS]
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
**Options:**
|
|
79
|
+
|
|
80
|
+
| Option | Description | Default |
|
|
81
|
+
|--------|-------------|---------|
|
|
82
|
+
| `--port, -p` | Server port | 8080 |
|
|
83
|
+
| `--host, -h` | Server host | 127.0.0.1 |
|
|
84
|
+
| `--cache-dir, -c` | Cache directory | .inference_cache |
|
|
85
|
+
| `--upstream, -u` | Upstream API URL | https://api.openai.com |
|
|
86
|
+
| `--api-key, -k` | OpenAI API key | $OPENAI_API_KEY |
|
|
87
|
+
| `--max-live-requests` | Global limit on live upstream requests | (infinite) |
|
|
88
|
+
| `--web-ui` | Enable web UI dashboard | false |
|
|
89
|
+
| `--web-ui-port` | Web UI server port | 8081 |
|
|
90
|
+
| `--verbose, -v` | Enable verbose logging | false |
|
|
91
|
+
|
|
92
|
+
#### `replay` - Replay-Only Mode
|
|
93
|
+
|
|
94
|
+
Only returns cached responses. Returns an error if a matching inference is not found in the cache. Useful for unit tests and CI pipelines.
|
|
95
|
+
|
|
96
|
+
```bash
|
|
97
|
+
inference-gate replay [OPTIONS]
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
**Options:**
|
|
101
|
+
|
|
102
|
+
| Option | Description | Default |
|
|
103
|
+
|--------|-------------|---------|
|
|
104
|
+
| `--port, -p` | Server port | 8080 |
|
|
105
|
+
| `--host, -h` | Server host | 127.0.0.1 |
|
|
106
|
+
| `--cache-dir, -c` | Cache directory | .inference_cache |
|
|
107
|
+
| `--web-ui` | Enable web UI dashboard | false |
|
|
108
|
+
| `--web-ui-port` | Web UI server port | 8081 |
|
|
109
|
+
| `--verbose, -v` | Enable verbose logging | false |
|
|
110
|
+
|
|
111
|
+
### Test Commands
|
|
112
|
+
|
|
113
|
+
#### `test-gate` - Test a Running InferenceGate Instance
|
|
114
|
+
|
|
115
|
+
Sends a test prompt to a running InferenceGate proxy. Uses the same host/port from config, so no API key or extra options needed.
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
inference-gate test-gate [OPTIONS]
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
**Options:**
|
|
122
|
+
|
|
123
|
+
| Option | Description | Default |
|
|
124
|
+
|--------|-------------|--------|
|
|
125
|
+
| `--host, -h` | Host of the running instance | 127.0.0.1 |
|
|
126
|
+
| `--port, -p` | Port of the running instance | 8080 |
|
|
127
|
+
| `--model, -m` | Model to use | gpt-4o-mini |
|
|
128
|
+
| `--prompt` | Custom test prompt | (built-in test prompt) |
|
|
129
|
+
| `--verbose, -v` | Enable verbose logging | false |
|
|
130
|
+
|
|
131
|
+
#### `test-upstream` - Test Upstream API Directly
|
|
132
|
+
|
|
133
|
+
Sends a test prompt directly to the upstream API (bypassing InferenceGate) to verify the API key and endpoint.
|
|
134
|
+
|
|
135
|
+
```bash
|
|
136
|
+
inference-gate test-upstream [OPTIONS]
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
**Options:**
|
|
140
|
+
|
|
141
|
+
| Option | Description | Default |
|
|
142
|
+
|--------|-------------|--------|
|
|
143
|
+
| `--upstream, -u` | Upstream API URL | https://api.openai.com |
|
|
144
|
+
| `--api-key, -k` | OpenAI API key | $OPENAI_API_KEY |
|
|
145
|
+
| `--model, -m` | Model to use | gpt-4o-mini |
|
|
146
|
+
| `--prompt` | Custom test prompt | (built-in test prompt) |
|
|
147
|
+
| `--verbose, -v` | Enable verbose logging | false |
|
|
148
|
+
|
|
149
|
+
### Cache Management
|
|
150
|
+
|
|
151
|
+
#### `cache list` - List Cached Entries
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
inference-gate cache list [--cache-dir PATH]
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
#### `cache info` - Show Cache Statistics
|
|
158
|
+
|
|
159
|
+
```bash
|
|
160
|
+
inference-gate cache info [--cache-dir PATH]
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
#### `cache clear` - Clear All Cached Entries
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
inference-gate cache clear [--cache-dir PATH] [--yes]
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
## Web UI Dashboard
|
|
170
|
+
|
|
171
|
+
InferenceGate includes an optional web-based dashboard for browsing cached inference entries, viewing statistics, and inspecting request/response details.
|
|
172
|
+
|
|
173
|
+
### Enabling the Web UI
|
|
174
|
+
|
|
175
|
+
Add the `--web-ui` flag when starting InferenceGate:
|
|
176
|
+
|
|
177
|
+
```bash
|
|
178
|
+
# Record-and-replay mode with web UI
|
|
179
|
+
inference-gate start --api-key $OPENAI_API_KEY --web-ui
|
|
180
|
+
|
|
181
|
+
# Replay-only mode with web UI
|
|
182
|
+
inference-gate replay --web-ui
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
The web UI will be available at `http://localhost:8081` by default. You can customize the port with `--web-ui-port`:
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
inference-gate start --web-ui --web-ui-port 3000
|
|
189
|
+
```
|
|
190
|
+
|
|
191
|
+
### Features
|
|
192
|
+
|
|
193
|
+
- **Dashboard**: View cache statistics, current mode, and configuration at a glance
|
|
194
|
+
- **Cache List**: Browse all cached entries in a sortable, filterable table
|
|
195
|
+
- **Entry Details**: Inspect full request and response details including headers, body, and metadata
|
|
196
|
+
- **Search**: Filter cache entries by ID, model, path, or method
|
|
197
|
+
- **Streaming Support**: View streaming response chunks for SSE endpoints
|
|
198
|
+
|
|
199
|
+
### Screenshots
|
|
200
|
+
|
|
201
|
+
**Dashboard Page**
|
|
202
|
+
|
|
203
|
+

|
|
204
|
+
|
|
205
|
+
**Cache List Page**
|
|
206
|
+
|
|
207
|
+

|
|
208
|
+
|
|
209
|
+
**Entry Detail Page**
|
|
210
|
+
|
|
211
|
+

|
|
212
|
+
|
|
213
|
+
### Building the Frontend (Development Only)
|
|
214
|
+
|
|
215
|
+
The web UI frontend is pre-built and included in the package. You only need to build it if you're developing or modifying the frontend:
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
cd webui-frontend
|
|
219
|
+
npm install
|
|
220
|
+
npm run build
|
|
221
|
+
# Output goes to src/inference_gate/webui/static/
|
|
222
|
+
```
|
|
223
|
+
|
|
224
|
+
**Requirements:**
|
|
225
|
+
- Node.js 16+ and npm (only for frontend development)
|
|
226
|
+
- No runtime dependencies - the built static files are served by the Python backend
|
|
227
|
+
|
|
228
|
+
### Configuration Management
|
|
229
|
+
|
|
230
|
+
#### `config show` - Show Current Configuration
|
|
231
|
+
|
|
232
|
+
```bash
|
|
233
|
+
inference-gate config show
|
|
234
|
+
```
|
|
235
|
+
|
|
236
|
+
#### `config init` - Initialize Configuration File
|
|
237
|
+
|
|
238
|
+
```bash
|
|
239
|
+
inference-gate config init [--force]
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
#### `config path` - Show Configuration File Path
|
|
243
|
+
|
|
244
|
+
```bash
|
|
245
|
+
inference-gate config path
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## Configuration File
|
|
249
|
+
|
|
250
|
+
InferenceGate uses a YAML configuration file to store default settings. The file is located at:
|
|
251
|
+
|
|
252
|
+
- **Windows**: `%USERPROFILE%\.InferenceGate\config.yaml`
|
|
253
|
+
- **macOS/Linux**: `~/.InferenceGate/config.yaml`
|
|
254
|
+
|
|
255
|
+
You can specify a custom path using the `--config` global option:
|
|
256
|
+
|
|
257
|
+
```bash
|
|
258
|
+
inference-gate --config /path/to/config.yaml start
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
### Configuration Options
|
|
262
|
+
|
|
263
|
+
```yaml
|
|
264
|
+
# Server settings
|
|
265
|
+
host: "127.0.0.1"
|
|
266
|
+
port: 8080
|
|
267
|
+
max_live_requests: null # Optional global limit on live upstream requests
|
|
268
|
+
|
|
269
|
+
# Upstream API settings
|
|
270
|
+
upstream: "https://api.openai.com"
|
|
271
|
+
# api_key is not stored in the config file for security
|
|
272
|
+
# Use OPENAI_API_KEY environment variable instead
|
|
273
|
+
|
|
274
|
+
# Storage settings
|
|
275
|
+
cache_dir: ".inference_cache"
|
|
276
|
+
|
|
277
|
+
# Logging settings
|
|
278
|
+
verbose: false
|
|
279
|
+
|
|
280
|
+
# Test command settings
|
|
281
|
+
test_model: "gpt-4o-mini"
|
|
282
|
+
test_prompt: "This is a test prompt. Reply with **ONLY** \"OK.\" to confirm that everything is ok. DO NOT output anything else."
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
### Configuration Priority
|
|
286
|
+
|
|
287
|
+
Settings are loaded in the following order (later overrides earlier):
|
|
288
|
+
|
|
289
|
+
1. Built-in defaults
|
|
290
|
+
2. Configuration file
|
|
291
|
+
3. Environment variables (`OPENAI_API_KEY`)
|
|
292
|
+
4. Command-line options
|
|
293
|
+
|
|
294
|
+
## Environment Variables
|
|
295
|
+
|
|
296
|
+
| Variable | Description |
|
|
297
|
+
|----------|-------------|
|
|
298
|
+
| `OPENAI_API_KEY` | OpenAI API key (used in record/test modes) |
|
|
299
|
+
|
|
300
|
+
## Development
|
|
301
|
+
|
|
302
|
+
Install development dependencies:
|
|
303
|
+
|
|
304
|
+
```bash
|
|
305
|
+
pip install -e ".[dev]"
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
Run tests:
|
|
309
|
+
|
|
310
|
+
```bash
|
|
311
|
+
pytest
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
Run linting:
|
|
315
|
+
|
|
316
|
+
```bash
|
|
317
|
+
ruff check src/ tests/
|
|
318
|
+
```
|
|
319
|
+
|
|
320
|
+
## License
|
|
321
|
+
|
|
322
|
+
MIT License
|