inference-gate 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. inference_gate-0.1.0/LICENSE +21 -0
  2. inference_gate-0.1.0/PKG-INFO +355 -0
  3. inference_gate-0.1.0/README.md +322 -0
  4. inference_gate-0.1.0/pyproject.toml +95 -0
  5. inference_gate-0.1.0/setup.cfg +4 -0
  6. inference_gate-0.1.0/src/inference_gate/__init__.py +14 -0
  7. inference_gate-0.1.0/src/inference_gate/cli.py +1227 -0
  8. inference_gate-0.1.0/src/inference_gate/cli_format.py +323 -0
  9. inference_gate-0.1.0/src/inference_gate/config.py +267 -0
  10. inference_gate-0.1.0/src/inference_gate/frontend/__init__.py +5 -0
  11. inference_gate-0.1.0/src/inference_gate/headers.py +274 -0
  12. inference_gate-0.1.0/src/inference_gate/inference_gate.py +229 -0
  13. inference_gate-0.1.0/src/inference_gate/inflow/__init__.py +9 -0
  14. inference_gate-0.1.0/src/inference_gate/inflow/admin.py +597 -0
  15. inference_gate-0.1.0/src/inference_gate/inflow/server.py +202 -0
  16. inference_gate-0.1.0/src/inference_gate/modes.py +23 -0
  17. inference_gate-0.1.0/src/inference_gate/outflow/__init__.py +10 -0
  18. inference_gate-0.1.0/src/inference_gate/outflow/client.py +164 -0
  19. inference_gate-0.1.0/src/inference_gate/outflow/model_router.py +277 -0
  20. inference_gate-0.1.0/src/inference_gate/pytest_context.py +118 -0
  21. inference_gate-0.1.0/src/inference_gate/pytest_plugin.py +537 -0
  22. inference_gate-0.1.0/src/inference_gate/recording/__init__.py +25 -0
  23. inference_gate-0.1.0/src/inference_gate/recording/atomic_io.py +46 -0
  24. inference_gate-0.1.0/src/inference_gate/recording/hashing.py +250 -0
  25. inference_gate-0.1.0/src/inference_gate/recording/models.py +256 -0
  26. inference_gate-0.1.0/src/inference_gate/recording/reassembly.py +348 -0
  27. inference_gate-0.1.0/src/inference_gate/recording/storage.py +1090 -0
  28. inference_gate-0.1.0/src/inference_gate/recording/tape_index.py +337 -0
  29. inference_gate-0.1.0/src/inference_gate/recording/tape_parser.py +286 -0
  30. inference_gate-0.1.0/src/inference_gate/recording/tape_writer.py +398 -0
  31. inference_gate-0.1.0/src/inference_gate/router/__init__.py +9 -0
  32. inference_gate-0.1.0/src/inference_gate/router/router.py +584 -0
  33. inference_gate-0.1.0/src/inference_gate/webui/__init__.py +12 -0
  34. inference_gate-0.1.0/src/inference_gate/webui/api.py +194 -0
  35. inference_gate-0.1.0/src/inference_gate/webui/server.py +197 -0
  36. inference_gate-0.1.0/src/inference_gate/webui/static/assets/index-3mjQG4Sn.js +11 -0
  37. inference_gate-0.1.0/src/inference_gate/webui/static/assets/index-DY74WqvV.css +1 -0
  38. inference_gate-0.1.0/src/inference_gate/webui/static/index.html +14 -0
  39. inference_gate-0.1.0/src/inference_gate/webui/static/vite.svg +1 -0
  40. inference_gate-0.1.0/src/inference_gate.egg-info/PKG-INFO +355 -0
  41. inference_gate-0.1.0/src/inference_gate.egg-info/SOURCES.txt +57 -0
  42. inference_gate-0.1.0/src/inference_gate.egg-info/dependency_links.txt +1 -0
  43. inference_gate-0.1.0/src/inference_gate.egg-info/entry_points.txt +5 -0
  44. inference_gate-0.1.0/src/inference_gate.egg-info/requires.txt +15 -0
  45. inference_gate-0.1.0/src/inference_gate.egg-info/top_level.txt +1 -0
  46. inference_gate-0.1.0/tests/test_admin.py +718 -0
  47. inference_gate-0.1.0/tests/test_cassette_replay.py +347 -0
  48. inference_gate-0.1.0/tests/test_cli.py +616 -0
  49. inference_gate-0.1.0/tests/test_config.py +305 -0
  50. inference_gate-0.1.0/tests/test_error_recording.py +607 -0
  51. inference_gate-0.1.0/tests/test_inference_gate.py +105 -0
  52. inference_gate-0.1.0/tests/test_outflow_router.py +361 -0
  53. inference_gate-0.1.0/tests/test_pytest_plugin.py +777 -0
  54. inference_gate-0.1.0/tests/test_reassembly.py +393 -0
  55. inference_gate-0.1.0/tests/test_router.py +872 -0
  56. inference_gate-0.1.0/tests/test_server.py +294 -0
  57. inference_gate-0.1.0/tests/test_storage.py +1201 -0
  58. inference_gate-0.1.0/tests/test_subprocess_server.py +146 -0
  59. inference_gate-0.1.0/tests/test_webui.py +253 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025-2026 DanielKluev
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,355 @@
1
+ Metadata-Version: 2.4
2
+ Name: inference-gate
3
+ Version: 0.1.0
4
+ Summary: Python library for efficient and convenient AI inference replay in testing, debugging and development
5
+ Author: DanielKluev
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/DanielKluev/InferenceGate
8
+ Project-URL: Repository, https://github.com/DanielKluev/InferenceGate
9
+ Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Intended Audience :: Developers
11
+ Classifier: License :: OSI Approved :: MIT License
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.10
14
+ Classifier: Programming Language :: Python :: 3.11
15
+ Classifier: Programming Language :: Python :: 3.12
16
+ Requires-Python: >=3.10
17
+ Description-Content-Type: text/markdown
18
+ License-File: LICENSE
19
+ Requires-Dist: aiohttp>=3.8.0
20
+ Requires-Dist: pydantic>=2.10.0
21
+ Requires-Dist: click>=8.1.0
22
+ Requires-Dist: pyyaml>=6.0.0
23
+ Provides-Extra: dev
24
+ Requires-Dist: pytest>=8.3.0; extra == "dev"
25
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == "dev"
26
+ Requires-Dist: pytest-aiohttp>=1.0.0; extra == "dev"
27
+ Requires-Dist: pytest-xdist>=3.6.0; extra == "dev"
28
+ Requires-Dist: ruff>=0.8.0; extra == "dev"
29
+ Requires-Dist: mypy>=1.13.0; extra == "dev"
30
+ Provides-Extra: test
31
+ Requires-Dist: pytest>=7.0; extra == "test"
32
+ Dynamic: license-file
33
+
34
+ # InferenceGate
35
+
36
+ Python library for efficient and convenient AI inference replay in testing, debugging and development, saving costs and time on repeated prompts.
37
+
38
+ ## Installation
39
+
40
+ ```bash
41
+ pip install inference-gate
42
+ ```
43
+
44
+ ## Features
45
+
46
+ - **Record-and-Replay Mode**: Record new requests to cache, replay from cache when available
47
+ - **Replay-Only Mode**: Only serve cached responses (for unit tests and CI)
48
+ - **Web UI Dashboard**: Optional web-based dashboard for browsing cache entries, viewing statistics, and inspecting request/response details
49
+ - Supports OpenAI Chat Completions API and Responses API
50
+ - Supports streaming responses
51
+ - Preserves prompt, temperature, model, and other metadata
52
+ - YAML configuration file for persistent settings
53
+ - CLI tools for easy management
54
+
55
+ ## Quick Start
56
+
57
+ ### 1. Initialize Configuration (Optional)
58
+
59
+ ```bash
60
+ inference-gate config init
61
+ ```
62
+
63
+ This creates a configuration file at `$USERDIR/.InferenceGate/config.yaml`.
64
+
65
+ ### 2. Test Your Upstream API Connection
66
+
67
+ ```bash
68
+ inference-gate test-upstream --api-key $OPENAI_API_KEY
69
+ ```
70
+
71
+ ### 3. Start the Proxy
72
+
73
+ ```bash
74
+ inference-gate start --api-key $OPENAI_API_KEY
75
+ ```
76
+
77
+ ### 4. Test the Running Proxy
78
+
79
+ ```bash
80
+ inference-gate test-gate
81
+ ```
82
+
83
+ ### 5. Point Your Client to the Proxy
84
+
85
+ ```python
86
+ from openai import OpenAI
87
+
88
+ client = OpenAI(
89
+ api_key="any-key", # Not needed in replay mode
90
+ base_url="http://localhost:8080/v1"
91
+ )
92
+
93
+ response = client.chat.completions.create(
94
+ model="gpt-4",
95
+ messages=[{"role": "user", "content": "Hello!"}]
96
+ )
97
+ ```
98
+
99
+ ## CLI Commands
100
+
101
+ ### Server Commands
102
+
103
+ #### `start` - Record-and-Replay Mode (Default)
104
+
105
+ Replays cached inferences when available. On cache miss, forwards to upstream, records the response, and stores it for future replays.
106
+
107
+ ```bash
108
+ inference-gate start [OPTIONS]
109
+ ```
110
+
111
+ **Options:**
112
+
113
+ | Option | Description | Default |
114
+ |--------|-------------|---------|
115
+ | `--port, -p` | Server port | 8080 |
116
+ | `--host, -h` | Server host | 127.0.0.1 |
117
+ | `--cache-dir, -c` | Cache directory | .inference_cache |
118
+ | `--upstream, -u` | Upstream API URL | https://api.openai.com |
119
+ | `--api-key, -k` | OpenAI API key | $OPENAI_API_KEY |
120
+ | `--max-live-requests` | Global limit on live upstream requests | (infinite) |
121
+ | `--web-ui` | Enable web UI dashboard | false |
122
+ | `--web-ui-port` | Web UI server port | 8081 |
123
+ | `--verbose, -v` | Enable verbose logging | false |
124
+
125
+ #### `replay` - Replay-Only Mode
126
+
127
+ Only returns cached responses. Returns an error if a matching inference is not found in the cache. Useful for unit tests and CI pipelines.
128
+
129
+ ```bash
130
+ inference-gate replay [OPTIONS]
131
+ ```
132
+
133
+ **Options:**
134
+
135
+ | Option | Description | Default |
136
+ |--------|-------------|---------|
137
+ | `--port, -p` | Server port | 8080 |
138
+ | `--host, -h` | Server host | 127.0.0.1 |
139
+ | `--cache-dir, -c` | Cache directory | .inference_cache |
140
+ | `--web-ui` | Enable web UI dashboard | false |
141
+ | `--web-ui-port` | Web UI server port | 8081 |
142
+ | `--verbose, -v` | Enable verbose logging | false |
143
+
144
+ ### Test Commands
145
+
146
+ #### `test-gate` - Test a Running InferenceGate Instance
147
+
148
+ Sends a test prompt to a running InferenceGate proxy. Uses the same host/port from config, so no API key or extra options needed.
149
+
150
+ ```bash
151
+ inference-gate test-gate [OPTIONS]
152
+ ```
153
+
154
+ **Options:**
155
+
156
+ | Option | Description | Default |
157
+ |--------|-------------|--------|
158
+ | `--host, -h` | Host of the running instance | 127.0.0.1 |
159
+ | `--port, -p` | Port of the running instance | 8080 |
160
+ | `--model, -m` | Model to use | gpt-4o-mini |
161
+ | `--prompt` | Custom test prompt | (built-in test prompt) |
162
+ | `--verbose, -v` | Enable verbose logging | false |
163
+
164
+ #### `test-upstream` - Test Upstream API Directly
165
+
166
+ Sends a test prompt directly to the upstream API (bypassing InferenceGate) to verify the API key and endpoint.
167
+
168
+ ```bash
169
+ inference-gate test-upstream [OPTIONS]
170
+ ```
171
+
172
+ **Options:**
173
+
174
+ | Option | Description | Default |
175
+ |--------|-------------|--------|
176
+ | `--upstream, -u` | Upstream API URL | https://api.openai.com |
177
+ | `--api-key, -k` | OpenAI API key | $OPENAI_API_KEY |
178
+ | `--model, -m` | Model to use | gpt-4o-mini |
179
+ | `--prompt` | Custom test prompt | (built-in test prompt) |
180
+ | `--verbose, -v` | Enable verbose logging | false |
181
+
182
+ ### Cache Management
183
+
184
+ #### `cache list` - List Cached Entries
185
+
186
+ ```bash
187
+ inference-gate cache list [--cache-dir PATH]
188
+ ```
189
+
190
+ #### `cache info` - Show Cache Statistics
191
+
192
+ ```bash
193
+ inference-gate cache info [--cache-dir PATH]
194
+ ```
195
+
196
+ #### `cache clear` - Clear All Cached Entries
197
+
198
+ ```bash
199
+ inference-gate cache clear [--cache-dir PATH] [--yes]
200
+ ```
201
+
202
+ ## Web UI Dashboard
203
+
204
+ InferenceGate includes an optional web-based dashboard for browsing cached inference entries, viewing statistics, and inspecting request/response details.
205
+
206
+ ### Enabling the Web UI
207
+
208
+ Add the `--web-ui` flag when starting InferenceGate:
209
+
210
+ ```bash
211
+ # Record-and-replay mode with web UI
212
+ inference-gate start --api-key $OPENAI_API_KEY --web-ui
213
+
214
+ # Replay-only mode with web UI
215
+ inference-gate replay --web-ui
216
+ ```
217
+
218
+ The web UI will be available at `http://localhost:8081` by default. You can customize the port with `--web-ui-port`:
219
+
220
+ ```bash
221
+ inference-gate start --web-ui --web-ui-port 3000
222
+ ```
223
+
224
+ ### Features
225
+
226
+ - **Dashboard**: View cache statistics, current mode, and configuration at a glance
227
+ - **Cache List**: Browse all cached entries in a sortable, filterable table
228
+ - **Entry Details**: Inspect full request and response details including headers, body, and metadata
229
+ - **Search**: Filter cache entries by ID, model, path, or method
230
+ - **Streaming Support**: View streaming response chunks for SSE endpoints
231
+
232
+ ### Screenshots
233
+
234
+ **Dashboard Page**
235
+
236
+ ![Dashboard](https://github.com/user-attachments/assets/6ec5916c-6e0e-40a7-a9e8-1289af7ed2e8)
237
+
238
+ **Cache List Page**
239
+
240
+ ![Cache List](https://github.com/user-attachments/assets/01fe025c-7922-4f64-bf20-b2ea6158060e)
241
+
242
+ **Entry Detail Page**
243
+
244
+ ![Entry Detail](https://github.com/user-attachments/assets/3a858019-b978-4893-9c04-ceb466dea67c)
245
+
246
+ ### Building the Frontend (Development Only)
247
+
248
+ The web UI frontend is pre-built and included in the package. You only need to build it if you're developing or modifying the frontend:
249
+
250
+ ```bash
251
+ cd webui-frontend
252
+ npm install
253
+ npm run build
254
+ # Output goes to src/inference_gate/webui/static/
255
+ ```
256
+
257
+ **Requirements:**
258
+ - Node.js 16+ and npm (only for frontend development)
259
+ - No runtime dependencies - the built static files are served by the Python backend
260
+
261
+ ### Configuration Management
262
+
263
+ #### `config show` - Show Current Configuration
264
+
265
+ ```bash
266
+ inference-gate config show
267
+ ```
268
+
269
+ #### `config init` - Initialize Configuration File
270
+
271
+ ```bash
272
+ inference-gate config init [--force]
273
+ ```
274
+
275
+ #### `config path` - Show Configuration File Path
276
+
277
+ ```bash
278
+ inference-gate config path
279
+ ```
280
+
281
+ ## Configuration File
282
+
283
+ InferenceGate uses a YAML configuration file to store default settings. The file is located at:
284
+
285
+ - **Windows**: `%USERPROFILE%\.InferenceGate\config.yaml`
286
+ - **macOS/Linux**: `~/.InferenceGate/config.yaml`
287
+
288
+ You can specify a custom path using the `--config` global option:
289
+
290
+ ```bash
291
+ inference-gate --config /path/to/config.yaml start
292
+ ```
293
+
294
+ ### Configuration Options
295
+
296
+ ```yaml
297
+ # Server settings
298
+ host: "127.0.0.1"
299
+ port: 8080
300
+ max_live_requests: null # Optional global limit on live upstream requests
301
+
302
+ # Upstream API settings
303
+ upstream: "https://api.openai.com"
304
+ # api_key is not stored in the config file for security
305
+ # Use OPENAI_API_KEY environment variable instead
306
+
307
+ # Storage settings
308
+ cache_dir: ".inference_cache"
309
+
310
+ # Logging settings
311
+ verbose: false
312
+
313
+ # Test command settings
314
+ test_model: "gpt-4o-mini"
315
+ test_prompt: "This is a test prompt. Reply with **ONLY** \"OK.\" to confirm that everything is ok. DO NOT output anything else."
316
+ ```
317
+
318
+ ### Configuration Priority
319
+
320
+ Settings are loaded in the following order (later overrides earlier):
321
+
322
+ 1. Built-in defaults
323
+ 2. Configuration file
324
+ 3. Environment variables (`OPENAI_API_KEY`)
325
+ 4. Command-line options
326
+
327
+ ## Environment Variables
328
+
329
+ | Variable | Description |
330
+ |----------|-------------|
331
+ | `OPENAI_API_KEY` | OpenAI API key (used in record/test modes) |
332
+
333
+ ## Development
334
+
335
+ Install development dependencies:
336
+
337
+ ```bash
338
+ pip install -e ".[dev]"
339
+ ```
340
+
341
+ Run tests:
342
+
343
+ ```bash
344
+ pytest
345
+ ```
346
+
347
+ Run linting:
348
+
349
+ ```bash
350
+ ruff check src/ tests/
351
+ ```
352
+
353
+ ## License
354
+
355
+ MIT License
@@ -0,0 +1,322 @@
1
+ # InferenceGate
2
+
3
+ Python library for efficient and convenient AI inference replay in testing, debugging and development, saving costs and time on repeated prompts.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install inference-gate
9
+ ```
10
+
11
+ ## Features
12
+
13
+ - **Record-and-Replay Mode**: Record new requests to cache, replay from cache when available
14
+ - **Replay-Only Mode**: Only serve cached responses (for unit tests and CI)
15
+ - **Web UI Dashboard**: Optional web-based dashboard for browsing cache entries, viewing statistics, and inspecting request/response details
16
+ - Supports OpenAI Chat Completions API and Responses API
17
+ - Supports streaming responses
18
+ - Preserves prompt, temperature, model, and other metadata
19
+ - YAML configuration file for persistent settings
20
+ - CLI tools for easy management
21
+
22
+ ## Quick Start
23
+
24
+ ### 1. Initialize Configuration (Optional)
25
+
26
+ ```bash
27
+ inference-gate config init
28
+ ```
29
+
30
+ This creates a configuration file at `$USERDIR/.InferenceGate/config.yaml`.
31
+
32
+ ### 2. Test Your Upstream API Connection
33
+
34
+ ```bash
35
+ inference-gate test-upstream --api-key $OPENAI_API_KEY
36
+ ```
37
+
38
+ ### 3. Start the Proxy
39
+
40
+ ```bash
41
+ inference-gate start --api-key $OPENAI_API_KEY
42
+ ```
43
+
44
+ ### 4. Test the Running Proxy
45
+
46
+ ```bash
47
+ inference-gate test-gate
48
+ ```
49
+
50
+ ### 5. Point Your Client to the Proxy
51
+
52
+ ```python
53
+ from openai import OpenAI
54
+
55
+ client = OpenAI(
56
+ api_key="any-key", # Not needed in replay mode
57
+ base_url="http://localhost:8080/v1"
58
+ )
59
+
60
+ response = client.chat.completions.create(
61
+ model="gpt-4",
62
+ messages=[{"role": "user", "content": "Hello!"}]
63
+ )
64
+ ```
65
+
66
+ ## CLI Commands
67
+
68
+ ### Server Commands
69
+
70
+ #### `start` - Record-and-Replay Mode (Default)
71
+
72
+ Replays cached inferences when available. On cache miss, forwards to upstream, records the response, and stores it for future replays.
73
+
74
+ ```bash
75
+ inference-gate start [OPTIONS]
76
+ ```
77
+
78
+ **Options:**
79
+
80
+ | Option | Description | Default |
81
+ |--------|-------------|---------|
82
+ | `--port, -p` | Server port | 8080 |
83
+ | `--host, -h` | Server host | 127.0.0.1 |
84
+ | `--cache-dir, -c` | Cache directory | .inference_cache |
85
+ | `--upstream, -u` | Upstream API URL | https://api.openai.com |
86
+ | `--api-key, -k` | OpenAI API key | $OPENAI_API_KEY |
87
+ | `--max-live-requests` | Global limit on live upstream requests | (infinite) |
88
+ | `--web-ui` | Enable web UI dashboard | false |
89
+ | `--web-ui-port` | Web UI server port | 8081 |
90
+ | `--verbose, -v` | Enable verbose logging | false |
91
+
92
+ #### `replay` - Replay-Only Mode
93
+
94
+ Only returns cached responses. Returns an error if a matching inference is not found in the cache. Useful for unit tests and CI pipelines.
95
+
96
+ ```bash
97
+ inference-gate replay [OPTIONS]
98
+ ```
99
+
100
+ **Options:**
101
+
102
+ | Option | Description | Default |
103
+ |--------|-------------|---------|
104
+ | `--port, -p` | Server port | 8080 |
105
+ | `--host, -h` | Server host | 127.0.0.1 |
106
+ | `--cache-dir, -c` | Cache directory | .inference_cache |
107
+ | `--web-ui` | Enable web UI dashboard | false |
108
+ | `--web-ui-port` | Web UI server port | 8081 |
109
+ | `--verbose, -v` | Enable verbose logging | false |
110
+
111
+ ### Test Commands
112
+
113
+ #### `test-gate` - Test a Running InferenceGate Instance
114
+
115
+ Sends a test prompt to a running InferenceGate proxy. Uses the same host/port from config, so no API key or extra options needed.
116
+
117
+ ```bash
118
+ inference-gate test-gate [OPTIONS]
119
+ ```
120
+
121
+ **Options:**
122
+
123
+ | Option | Description | Default |
124
+ |--------|-------------|--------|
125
+ | `--host, -h` | Host of the running instance | 127.0.0.1 |
126
+ | `--port, -p` | Port of the running instance | 8080 |
127
+ | `--model, -m` | Model to use | gpt-4o-mini |
128
+ | `--prompt` | Custom test prompt | (built-in test prompt) |
129
+ | `--verbose, -v` | Enable verbose logging | false |
130
+
131
+ #### `test-upstream` - Test Upstream API Directly
132
+
133
+ Sends a test prompt directly to the upstream API (bypassing InferenceGate) to verify the API key and endpoint.
134
+
135
+ ```bash
136
+ inference-gate test-upstream [OPTIONS]
137
+ ```
138
+
139
+ **Options:**
140
+
141
+ | Option | Description | Default |
142
+ |--------|-------------|--------|
143
+ | `--upstream, -u` | Upstream API URL | https://api.openai.com |
144
+ | `--api-key, -k` | OpenAI API key | $OPENAI_API_KEY |
145
+ | `--model, -m` | Model to use | gpt-4o-mini |
146
+ | `--prompt` | Custom test prompt | (built-in test prompt) |
147
+ | `--verbose, -v` | Enable verbose logging | false |
148
+
149
+ ### Cache Management
150
+
151
+ #### `cache list` - List Cached Entries
152
+
153
+ ```bash
154
+ inference-gate cache list [--cache-dir PATH]
155
+ ```
156
+
157
+ #### `cache info` - Show Cache Statistics
158
+
159
+ ```bash
160
+ inference-gate cache info [--cache-dir PATH]
161
+ ```
162
+
163
+ #### `cache clear` - Clear All Cached Entries
164
+
165
+ ```bash
166
+ inference-gate cache clear [--cache-dir PATH] [--yes]
167
+ ```
168
+
169
+ ## Web UI Dashboard
170
+
171
+ InferenceGate includes an optional web-based dashboard for browsing cached inference entries, viewing statistics, and inspecting request/response details.
172
+
173
+ ### Enabling the Web UI
174
+
175
+ Add the `--web-ui` flag when starting InferenceGate:
176
+
177
+ ```bash
178
+ # Record-and-replay mode with web UI
179
+ inference-gate start --api-key $OPENAI_API_KEY --web-ui
180
+
181
+ # Replay-only mode with web UI
182
+ inference-gate replay --web-ui
183
+ ```
184
+
185
+ The web UI will be available at `http://localhost:8081` by default. You can customize the port with `--web-ui-port`:
186
+
187
+ ```bash
188
+ inference-gate start --web-ui --web-ui-port 3000
189
+ ```
190
+
191
+ ### Features
192
+
193
+ - **Dashboard**: View cache statistics, current mode, and configuration at a glance
194
+ - **Cache List**: Browse all cached entries in a sortable, filterable table
195
+ - **Entry Details**: Inspect full request and response details including headers, body, and metadata
196
+ - **Search**: Filter cache entries by ID, model, path, or method
197
+ - **Streaming Support**: View streaming response chunks for SSE endpoints
198
+
199
+ ### Screenshots
200
+
201
+ **Dashboard Page**
202
+
203
+ ![Dashboard](https://github.com/user-attachments/assets/6ec5916c-6e0e-40a7-a9e8-1289af7ed2e8)
204
+
205
+ **Cache List Page**
206
+
207
+ ![Cache List](https://github.com/user-attachments/assets/01fe025c-7922-4f64-bf20-b2ea6158060e)
208
+
209
+ **Entry Detail Page**
210
+
211
+ ![Entry Detail](https://github.com/user-attachments/assets/3a858019-b978-4893-9c04-ceb466dea67c)
212
+
213
+ ### Building the Frontend (Development Only)
214
+
215
+ The web UI frontend is pre-built and included in the package. You only need to build it if you're developing or modifying the frontend:
216
+
217
+ ```bash
218
+ cd webui-frontend
219
+ npm install
220
+ npm run build
221
+ # Output goes to src/inference_gate/webui/static/
222
+ ```
223
+
224
+ **Requirements:**
225
+ - Node.js 16+ and npm (only for frontend development)
226
+ - No runtime dependencies - the built static files are served by the Python backend
227
+
228
+ ### Configuration Management
229
+
230
+ #### `config show` - Show Current Configuration
231
+
232
+ ```bash
233
+ inference-gate config show
234
+ ```
235
+
236
+ #### `config init` - Initialize Configuration File
237
+
238
+ ```bash
239
+ inference-gate config init [--force]
240
+ ```
241
+
242
+ #### `config path` - Show Configuration File Path
243
+
244
+ ```bash
245
+ inference-gate config path
246
+ ```
247
+
248
+ ## Configuration File
249
+
250
+ InferenceGate uses a YAML configuration file to store default settings. The file is located at:
251
+
252
+ - **Windows**: `%USERPROFILE%\.InferenceGate\config.yaml`
253
+ - **macOS/Linux**: `~/.InferenceGate/config.yaml`
254
+
255
+ You can specify a custom path using the `--config` global option:
256
+
257
+ ```bash
258
+ inference-gate --config /path/to/config.yaml start
259
+ ```
260
+
261
+ ### Configuration Options
262
+
263
+ ```yaml
264
+ # Server settings
265
+ host: "127.0.0.1"
266
+ port: 8080
267
+ max_live_requests: null # Optional global limit on live upstream requests
268
+
269
+ # Upstream API settings
270
+ upstream: "https://api.openai.com"
271
+ # api_key is not stored in the config file for security
272
+ # Use OPENAI_API_KEY environment variable instead
273
+
274
+ # Storage settings
275
+ cache_dir: ".inference_cache"
276
+
277
+ # Logging settings
278
+ verbose: false
279
+
280
+ # Test command settings
281
+ test_model: "gpt-4o-mini"
282
+ test_prompt: "This is a test prompt. Reply with **ONLY** \"OK.\" to confirm that everything is ok. DO NOT output anything else."
283
+ ```
284
+
285
+ ### Configuration Priority
286
+
287
+ Settings are loaded in the following order (later overrides earlier):
288
+
289
+ 1. Built-in defaults
290
+ 2. Configuration file
291
+ 3. Environment variables (`OPENAI_API_KEY`)
292
+ 4. Command-line options
293
+
294
+ ## Environment Variables
295
+
296
+ | Variable | Description |
297
+ |----------|-------------|
298
+ | `OPENAI_API_KEY` | OpenAI API key (used in record/test modes) |
299
+
300
+ ## Development
301
+
302
+ Install development dependencies:
303
+
304
+ ```bash
305
+ pip install -e ".[dev]"
306
+ ```
307
+
308
+ Run tests:
309
+
310
+ ```bash
311
+ pytest
312
+ ```
313
+
314
+ Run linting:
315
+
316
+ ```bash
317
+ ruff check src/ tests/
318
+ ```
319
+
320
+ ## License
321
+
322
+ MIT License