auto-gateway 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. auto_gateway-0.1.0/.gitignore +7 -0
  2. auto_gateway-0.1.0/LICENSE +21 -0
  3. auto_gateway-0.1.0/PKG-INFO +603 -0
  4. auto_gateway-0.1.0/README.md +590 -0
  5. auto_gateway-0.1.0/auto_gateway/__init__.py +2 -0
  6. auto_gateway-0.1.0/auto_gateway/cli/main.py +207 -0
  7. auto_gateway-0.1.0/auto_gateway/config/manager.py +35 -0
  8. auto_gateway-0.1.0/auto_gateway/config/schema.py +47 -0
  9. auto_gateway-0.1.0/auto_gateway/core/exceptions.py +22 -0
  10. auto_gateway-0.1.0/auto_gateway/core/models.py +69 -0
  11. auto_gateway-0.1.0/auto_gateway/core/router.py +349 -0
  12. auto_gateway-0.1.0/auto_gateway/core/router_tool_calls_helpers.py +22 -0
  13. auto_gateway-0.1.0/auto_gateway/core/router_toolcalls_patch.py +6 -0
  14. auto_gateway-0.1.0/auto_gateway/core/server.py +146 -0
  15. auto_gateway-0.1.0/auto_gateway/network/__init__.py +4 -0
  16. auto_gateway-0.1.0/auto_gateway/network/hosting.py +105 -0
  17. auto_gateway-0.1.0/auto_gateway/network/hosting_test_utils.py +19 -0
  18. auto_gateway-0.1.0/auto_gateway/network/tunnels.py +8 -0
  19. auto_gateway-0.1.0/auto_gateway/network/uvicorn_runner.py +36 -0
  20. auto_gateway-0.1.0/auto_gateway/providers/base.py +95 -0
  21. auto_gateway-0.1.0/auto_gateway/providers/google.py +174 -0
  22. auto_gateway-0.1.0/auto_gateway/providers/openai_compatible.py +185 -0
  23. auto_gateway-0.1.0/auto_gateway/providers/registry.py +27 -0
  24. auto_gateway-0.1.0/auto_gateway/strategies/adaptive.py +603 -0
  25. auto_gateway-0.1.0/auto_gateway/strategies/bandit.py +111 -0
  26. auto_gateway-0.1.0/auto_gateway/strategies/base.py +49 -0
  27. auto_gateway-0.1.0/auto_gateway/strategies/sequential.py +127 -0
  28. auto_gateway-0.1.0/config.json.example +49 -0
  29. auto_gateway-0.1.0/pyproject.toml +35 -0
  30. auto_gateway-0.1.0/tests/test_comprehensive_api.py +505 -0
  31. auto_gateway-0.1.0/tests/test_openai_streaming_delta_shapes.py +123 -0
  32. auto_gateway-0.1.0/tests/test_simple_smoke.py +48 -0
  33. auto_gateway-0.1.0/tests/test_smoke_server.py +41 -0
  34. auto_gateway-0.1.0/tests/test_streaming_and_failover.py +119 -0
  35. auto_gateway-0.1.0/tests/test_tunnel_url_parsing.py +46 -0
@@ -0,0 +1,7 @@
1
+ config.json
2
+ TODO.md
3
+ __pycache__
4
+ .pytest_cache
5
+ .env
6
+ scrpts
7
+ .vscode
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 nguyenv
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,603 @@
1
+ Metadata-Version: 2.4
2
+ Name: auto-gateway
3
+ Version: 0.1.0
4
+ Summary: Standalone async auto-gateway with provider routing
5
+ License-File: LICENSE
6
+ Requires-Python: >=3.10
7
+ Provides-Extra: testing
8
+ Requires-Dist: pyngrok>=7.2.0; extra == 'testing'
9
+ Requires-Dist: pytest-asyncio>=0.24.0; extra == 'testing'
10
+ Requires-Dist: pytest>=8.0.0; extra == 'testing'
11
+ Requires-Dist: respx>=0.21.0; extra == 'testing'
12
+ Description-Content-Type: text/markdown
13
+
14
+ # auto-gateway
15
+
16
+ **OpenAI-compatible API gateway with intelligent provider routing, failover, and tunneling.**
17
+
18
+ `auto-gateway` exposes a single `POST /v1/chat/completions` endpoint that transparently routes requests to multiple AI providers (OpenAI-compatible, Google Gemini, etc.) using configurable strategies. It supports streaming (SSE), tool calls, vision/media filtering, automatic failover, and public URL tunneling via ngrok or cloudflared.
19
+
20
+ ---
21
+
22
+ ## Table of Contents
23
+
24
+ - [Why auto-gateway?](#why-auto-gateway)
25
+ - [Quick Start](#quick-start)
26
+ - [Architecture](#architecture)
27
+ - [Configuration](#configuration)
28
+ - [API Reference](#api-reference)
29
+ - [Routing Strategies](#routing-strategies)
30
+ - [Provider Architecture](#provider-architecture)
31
+ - [Network & Tunneling](#network--tunneling)
32
+ - [CLI Reference](#cli-reference)
33
+ - [Development](#development)
34
+ - [Testing](#testing)
35
+ - [Extending](#extending)
36
+
37
+ ---
38
+
39
+ ## Why auto-gateway?
40
+
41
+ - **Single OpenAI-compatible endpoint** — Drop-in replacement for OpenAI clients. No SDK changes needed.
42
+ - **Provider failover** — If one provider fails, automatically try the next.
43
+ - **Adaptive routing** — Latency-aware routing with circuit breakers and health tracking (optional).
44
+ - **Tunneling built-in** — Expose your local gateway publicly via ngrok or cloudflared with zero config.
45
+ - **Async everything** — Fully async stack (FastAPI + httpx) for high concurrency.
46
+ - **Extensible** — Add custom providers or routing strategies in minutes.
47
+
48
+ ---
49
+
50
+ ## Quick Start
51
+
52
+ ```bash
53
+ # Install
54
+ pip install auto-gateway
55
+
56
+ # Create a config file
57
+ cp config.json.example config.json
58
+ # Edit config.json with your API keys
59
+
60
+ # Start the gateway
61
+ auto-gateway start --config config.json --port 8000
62
+
63
+ # Test it
64
+ curl http://localhost:8000/v1/chat/completions \
65
+ -H "Content-Type: application/json" \
66
+ -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"hello"}],"stream":false}'
67
+ ```
68
+
69
+ ### Development install
70
+
71
+ ```bash
72
+ git clone <repo>
73
+ cd auto-gateway
74
+ pip install -e ".[dev]"
75
+ ```
76
+
77
+ ---
78
+
79
+ ## Architecture
80
+
81
+ ```
82
+ ┌─────────────────────────────────────────────────────────┐
83
+ │ Client (curl, SDK) │
84
+ │ POST /v1/chat/completions │
85
+ └─────────────────────────┬───────────────────────────────┘
86
+
87
+ ┌─────────────────────────▼───────────────────────────────┐
88
+ │ FastAPI Server │
89
+ │ core/server.py + core/models.py │
90
+ │ ┌──────────────────────────────────────────────────┐ │
91
+ │ │ ProviderRouter (core/router.py) │ │
92
+ │ │ - routes to provider via Strategy │ │
93
+ │ │ - message filtering (vision/media/video) │ │
94
+ │ │ - tool call SSE chunking │ │
95
+ │ │ - failover on exception │ │
96
+ │ └─────────────────────────┬────────────────────────┘ │
97
+ │ │ │
98
+ │ ┌──────────▼───────┐ │
99
+ │ │ Strategy: │ │
100
+ │ │ * Sequential │ │
101
+ │ │ * Adaptive │ │
102
+ │ * Bandit/UCB1 │ │
103
+ │ └──────────┬───────┘ │
104
+ │ │ │
105
+ └────────────────────────────┼────────────────────────────┘
106
+
107
+ ┌────────────────────────────▼─────────────────────────────┐
108
+ │ Providers │
109
+ │ ┌─────────────────┐ ┌─────────────────┐ │
110
+ │ │ OpenAICompatible│ │ Google │ │
111
+ │ │ (httpx.Async) │ │ (genai thread) │ │
112
+ │ └─────────────────┘ └─────────────────┘ │
113
+ └──────────────────────────────────────────────────────────┘
114
+ ```
115
+
116
+ ### Request flow
117
+
118
+ 1. **Client** sends OpenAI-compatible JSON to `POST /v1/chat/completions`
119
+ 2. **FastAPI server** validates the payload via Pydantic models
120
+ 3. **ProviderRouter** delegates to the configured **Strategy** to obtain an ordered list of `(provider, model, key, features)` tuples
121
+ 4. Router tries each target in order:
122
+ - Calls `provider.call()` (non-streaming) or `provider.call_stream()` (streaming)
123
+ - On success: records metrics and returns response
124
+ - On failure: records error, tries next target
125
+ 5. **Response** is formatted as an OpenAI-compatible JSON or SSE stream with `[DONE]` terminator
126
+
127
+ ---
128
+
129
+ ## Configuration
130
+
131
+ ### config.json schema
132
+
133
+ ```jsonc
134
+ {
135
+ "server": {
136
+ "host": "127.0.0.1", // Bind address
137
+ "port": 8000, // Port number
138
+ "api_key": "my-awesome-api-key", // Server auth key (via `Authrorization: Bearer`)
139
+ "socket_path": null, // UNIX socket path (optional, overrides host:port)
140
+ "tunnel": "none" // "none" | "ngrok" | "cloudflared"
141
+ },
142
+ "router": {
143
+ "strategy": "adaptive", // "sequential" | "adaptive" | "bandit"
144
+ "retries": 1 // Retries per key-provider-model pair
145
+ },
146
+ "providers": [
147
+ {
148
+ "type": "openai_compatible", // Provider type
149
+ "name": "local_openai", // Unique name for routing
150
+ "base_url": "http://localhost:8001/v1", // API base URL
151
+ "api_key": null, // API key (or env var reference)
152
+ "models": { // Model name -> features
153
+ "gpt-4o-mini": ["vision", "tool_calls"], // `vision` -> supports images; `tool_calls` -> support tool callingg
154
+ "gpt-4o": []
155
+ },
156
+ "extra_body": {} // Extra params sent with every request
157
+ },
158
+ {
159
+ "type": "google",
160
+ "name": "gemini",
161
+ "api_key": ["GOOGLE_API_KEY_1", "GOOGLE_API_KEY_2}", ...],
162
+ "models": {
163
+ "gemini-1.5-flash": ["vision"]
164
+ }
165
+ }
166
+ ],
167
+ "extra": {
168
+ "tunnels": { // Tunnel-specific config (optional)
169
+ "ngrok_authtoken": "YOUR_NGROK_AUTHTOKEN",
170
+ "cloudflared_binary": "cloudflared"
171
+ }
172
+ }
173
+ }
174
+ ```
175
+
176
+ ### Provider types
177
+
178
+ | Type | Class | Description |
179
+ |------|-------|-------------|
180
+ | `openai_compatible` | `OpenAICompatibleProvider` | Any OpenAI-compatible API (OpenAI, Anthropic via proxy, local vLLM, etc.) |
181
+ | `google` | `GoogleProvider` | Google Gemini via `google-genai` SDK |
182
+
183
+ ### Model features
184
+
185
+ Features are strings that enable message filtering in the router:
186
+
187
+ | Feature | Effect |
188
+ |---------|--------|
189
+ | `vision` | Image content (`image_url`) is forwarded to provider |
190
+ | `media` | Media content is forwarded for google (Built-in Coming Soon) |
191
+ | `video_vision` | Video content is forwarded (Built-in Coming Soon) |
192
+ | `tool_calls` | Specify that this model support tool calling |
193
+ | *(none)* | Image/media/video content is stripped from messages. No tool calling. |
194
+
195
+ ---
196
+
197
+ ## API Reference
198
+
199
+ ### `POST /v1/chat/completions`
200
+
201
+ OpenAI-compatible chat completions endpoint.
202
+
203
+ #### Request
204
+
205
+ ```json
206
+ {
207
+ "model": "gpt-4o-mini",
208
+ "messages": [{"role": "user", "content": "Hello!"}],
209
+ "temperature": 0.0,
210
+ "stream": false,
211
+ "tools": null,
212
+ "tool_choice": null,
213
+ "extra_body": {}
214
+ }
215
+ ```
216
+
217
+ #### Response (non-streaming)
218
+
219
+ ```json
220
+ {
221
+ "id": "chatcmpl_abc123",
222
+ "object": "chat.completion",
223
+ "created": 1700000000,
224
+ "model": "gpt-4o-mini",
225
+ "choices": [
226
+ {
227
+ "index": 0,
228
+ "message": {
229
+ "role": "assistant",
230
+ "content": "Hello! How can I help you today?"
231
+ },
232
+ "finish_reason": "stop"
233
+ }
234
+ ],
235
+ "usage": {
236
+ "prompt_tokens": 10,
237
+ "completion_tokens": 5,
238
+ "total_tokens": 15
239
+ }
240
+ }
241
+ ```
242
+
243
+ #### Response (streaming)
244
+
245
+ Server-Sent Events stream:
246
+
247
+ ```
248
+ data: {"id":"chatcmpl_xyz","object":"chat.completion.chunk","created":1700000000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"role":"assistant","content":""},"finish_reason":null}]}
249
+
250
+ data: {"id":"chatcmpl_xyz","object":"chat.completion.chunk","created":1700000000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{"content":"Hello"},"finish_reason":null}]}
251
+
252
+ data: {"id":"chatcmpl_xyz","object":"chat.completion.chunk","created":1700000000,"model":"gpt-4o-mini","choices":[{"index":0,"delta":{},"finish_reason":"stop"}]}
253
+
254
+ data: [DONE]
255
+ ```
256
+
257
+ #### Error handling
258
+
259
+ | Scenario | Status | Behavior |
260
+ |----------|--------|----------|
261
+ | All providers fail | 200 | Returns empty content `""` with `finish_reason: "stop"` |
262
+ | Invalid payload | 422 | FastAPI validation error |
263
+ | Provider timeout | — | Falls through to next provider automatically |
264
+
265
+ ---
266
+
267
+ ## Routing Strategies
268
+
269
+ ### Sequential Strategy
270
+
271
+ `auto_gateway/strategies/sequential.py`
272
+
273
+ Simple ordered rotation. Providers are tried in the order they appear in `all_models`. If a provider fails, the next one in sequence is attempted.
274
+
275
+ Configuration: `"strategy": "sequential"`
276
+
277
+ ### Adaptive Strategy
278
+
279
+ `auto_gateway/strategies/adaptive.py`
280
+
281
+ Health-aware routing with:
282
+
283
+ - **Health scoring**: Combines success rate (40%), average latency (30%), and stability (20%) for a `health_score`
284
+ - **Circuit breakers**: After `circuit_threshold` consecutive failures, a provider is temporarily skipped
285
+ - **Per-error backoff**: Rate limits, auth errors, and quotas have independent backoff timers with configurable delays and multipliers
286
+ - **Latency tracking**: Rolling window of latency samples for scoring
287
+ - **Persistence**: Health state can be persisted to disk (optional, via `persistence_path`)
288
+ - **Small model preference**: Models in `_SMALL_MODELS` list get a routing bonus
289
+
290
+ Configuration: `"strategy": "adaptive"`
291
+
292
+ > **Note**: Adaptive strategy is ported from the `callai` project and may have additional configuration knobs exposed in the future.
293
+
294
+ ---
295
+
296
+ ## Provider Architecture
297
+
298
+ ### Built-in providers
299
+
300
+ #### `OpenAICompatibleProvider` (`providers/openai_compatible.py`)
301
+
302
+ - Uses `httpx.AsyncClient` for async HTTP
303
+ - Supports both `call()` and `call_stream()`
304
+ - Passes headers, tools, tool_choice, and extra_body
305
+ - Subclass `OpenAIProvider` preconfigured for `https://api.openai.com/v1`
306
+
307
+ #### `GoogleProvider` (`providers/google.py`)
308
+
309
+ - Uses `google-genai` SDK via `asyncio.to_thread()` for synchronous execution
310
+ - Supports system instructions, multimodal content (images), function calling
311
+ - Returns normalized `ProviderCallResult` with text, reasoning, tool_calls, usage
312
+
313
+ ### Provider interface
314
+
315
+ All providers extend `BaseProvider` (`providers/base.py`):
316
+
317
+ ```python
318
+ class BaseProvider(ABC):
319
+ def __init__(self, name: str, keys: list[str] | None, models: dict[str, list[str]]):
320
+ ...
321
+
322
+ @abstractmethod
323
+ async def call(self, *, key: str, model: str, messages: list[ChatMessage], timeout: float, tools: Optional[list[dict[str, Any]]] = None, tool_choice: str, extra_body: dict[str, Any] =None) -> ProviderCallResult:
324
+ """Non-streaming call. Returns ProviderCallResult TypedDict."""
325
+
326
+ async def call_stream(self, *, key, model, messages, timeout, tools, tool_choice, extra_body=None) -> AsyncIterator[BaseProviderDelta]:
327
+ """Streaming call. Yields delta dicts with type/content/finish_reason/tool_calls fields."""
328
+ ```
329
+
330
+ ### Provider registry (`providers/registry.py`)
331
+
332
+ ```python
333
+ from auto_gateway.providers.registry import register_provider, get_provider_factory
334
+
335
+ @register_provider("my_custom")
336
+ def create_my_provider(config) -> BaseProvider:
337
+ ...
338
+ ```
339
+
340
+ ---
341
+
342
+ ## Network & Tunneling
343
+
344
+ ### Local server
345
+
346
+ Default: `http://127.0.0.1:8000`
347
+
348
+ The gateway supports binding to a **UNIX domain socket** instead of TCP:
349
+
350
+ ```json
351
+ {
352
+ "server": {
353
+ "socket_path": "/tmp/gateway.sock",
354
+ "host": "127.0.0.1",
355
+ "port": 8000
356
+ }
357
+ }
358
+ ```
359
+
360
+ If `socket_path` is provided, the server binds to the socket instead of TCP.
361
+
362
+ ### ngrok tunnel
363
+
364
+ ```bash
365
+ auto-gateway start --config config.json --tunnel ngrok
366
+ ```
367
+
368
+ Requires `NGROK_AUTHTOKEN` environment variable or configured in `config.json` under `extra.tunnels.ngrok_authtoken`.
369
+
370
+ ### cloudflared tunnel
371
+
372
+ ```bash
373
+ auto-gateway start --config config.json --tunnel cloudflared
374
+ ```
375
+
376
+ Requires `cloudflared` binary on `PATH` (or configured in `config.json` under `extra.tunnels.cloudflared_binary`).
377
+
378
+ The public URL is extracted from the `*.trycloudflare.com` output and logged at startup.
379
+
380
+ ### Tunnel info
381
+
382
+ ```python
383
+ from auto_gateway.network.hosting import TunnelInfo
384
+
385
+ info = TunnelInfo(public_url="https://abc123.ngrok.io", backend="ngrok")
386
+ ```
387
+
388
+ ---
389
+
390
+ ## CLI Reference
391
+
392
+ ```bash
393
+ auto-gateway [OPTIONS] COMMAND [ARGS]
394
+ ```
395
+
396
+ ### `start`
397
+
398
+ Start the gateway server.
399
+
400
+ ```bash
401
+ auto-gateway start --config config.json [--host 0.0.0.0] [--port 8000] [--tunnel none]
402
+ ```
403
+
404
+ | Option | Default | Description |
405
+ |--------|---------|-------------|
406
+ | `--config` | (required) | Path to config.json |
407
+ | `--host` | `127.0.0.1` | Bind address |
408
+ | `--port` | `8000` | Port number |
409
+ | `--tunnel` | `none` | Tunnel backend: `none`, `ngrok`, or `cloudflared` |
410
+
411
+ ### `check`
412
+
413
+ Validate configuration and print provider summary.
414
+
415
+ ```bash
416
+ auto-gateway check --config config.json
417
+ # Output:
418
+ # OK: providers=2 strategy=adaptive tunnel=none
419
+ # - local_openai: type=openai_compatible, models=['gpt-4o-mini']
420
+ # - gemini: type=google, models=['gemini-1.5-flash']
421
+ ```
422
+
423
+ ### `save-global`
424
+
425
+ Save your specified configuration to ~/.auto-gateway/config.json.
426
+
427
+ ```bash
428
+ auto-gateway save-global --config config.json
429
+
430
+ ```
431
+
432
+ Afterward, you can start without specifying `--config`, i.e. `auto-gateway start`.
433
+
434
+
435
+ ### `version`
436
+
437
+ Print version.
438
+
439
+ ```bash
440
+ auto-gateway version
441
+ # auto-gateway 0.1.0
442
+ ```
443
+
444
+ ---
445
+
446
+ ## Development
447
+
448
+ ### Project structure
449
+
450
+ ```
451
+ auto-gateway/
452
+ ├── auto_gateway/
453
+ │ ├── __init__.py
454
+ │ ├── cli/
455
+ │ │ └── main.py # Typer CLI commands
456
+ │ ├── config/
457
+ │ │ ├── manager.py # Config file loading
458
+ │ │ └── schema.py # Pydantic config models
459
+ │ ├── core/
460
+ │ │ ├── models.py # OpenAI API request/response models
461
+ │ │ ├── router.py # ProviderRouter with route/route_stream
462
+ │ │ ├── router_tool_calls_helpers.py # Tool call SSE chunking
463
+ │ │ ├── router_toolcalls_patch.py # Re-exports
464
+ │ │ └── server.py # FastAPI application setup
465
+ │ ├── network/
466
+ │ │ ├── hosting.py # start_ngrok, start_cloudflared, start_tunnel
467
+ │ │ ├── hosting_test_utils.py
468
+ │ │ ├── tunnels.py
469
+ │ │ └── uvicorn_runner.py # UDS/TCP app runner
470
+ │ ├── providers/
471
+ │ │ ├── base.py # BaseProvider ABC
472
+ │ │ ├── google.py # Google provider
473
+ │ │ ├── openai_compatible.py # OpenAI-compatible provider
474
+ │ │ └── registry.py # Provider factory registry
475
+ │ └── strategies/
476
+ │ ├── adaptive.py # Health-aware routing
477
+ │ ├── base.py # BaseStrategy ABC
478
+ │ └── sequential.py # Ordered rotation
479
+ ├── tests/
480
+ │ └── test_smoke_server.py # End-to-end smoke test
481
+ ├── auto_gateway/
482
+ │ └── tests/
483
+ │ ├── test_comprehensive_api.py # 19 comprehensive tests
484
+ │ ├── test_openai_streaming_delta_shapes.py # SSE delta validation
485
+ │ ├── test_streaming_and_failover.py # Streaming + failover
486
+ │ └── test_tunnel_url_parsing.py # Cloudflared URL parsing
487
+ ├── config.json.example
488
+ ├── pyproject.toml
489
+ └── README.md
490
+ ```
491
+
492
+
493
+ ### Adding a new provider
494
+
495
+ 1. Create `auto_gateway/providers/my_provider.py`:
496
+
497
+ ```python
498
+ from .base import BaseProvider, ProviderCallResult
499
+
500
+ class MyProvider(BaseProvider):
501
+ def __init__(self, keys, models, **kwargs):
502
+ super().__init__(name="my", keys=keys, models=models)
503
+ # Custom init
504
+
505
+ async def call(self, *, key, model, messages, timeout, tools, tool_choice, extra_body=None):
506
+ # Implement async call
507
+ return ProviderCallResult(text=..., reasoning=..., tool_calls=..., usage=...)
508
+
509
+ async def call_stream(self, *, key, model, messages, timeout, tools, tool_choice, extra_body=None):
510
+ # Yield BaseProviderDelta dicts
511
+ yield {"type": "content", "content": "..."}
512
+ yield {"type": "finish", "finish_reason": "stop"}
513
+ ```
514
+
515
+ 2. Register in the provider factory:
516
+
517
+ ```python
518
+ from .registry import register_provider
519
+
520
+ @register_provider("my")
521
+ def create_my_provider(config):
522
+ return MyProvider(
523
+ keys=[config.api_key],
524
+ models=config.models,
525
+ )
526
+ ```
527
+
528
+ 3. Add to `config/schema.py` as a new `ProviderBaseConfig` variant if needed.
529
+
530
+ ### Adding a new strategy
531
+
532
+ 1. Create `auto_gateway/strategies/my_strategy.py` extending `BaseStrategy`:
533
+
534
+ ```python
535
+ from .base import BaseStrategy
536
+
537
+ class MyStrategy(BaseStrategy):
538
+ def __init__(self, providers, all_models):
539
+ self.providers = providers
540
+ self.all_models = all_models
541
+
542
+ def generate_targets(self, provider, models, shuffle, message_hash=None, is_new_session=False):
543
+ # Yield (provider_name, model_name, api_key, features)
544
+ ...
545
+ ```
546
+
547
+ 2. Wire it in `cli/main.py` and `config/schema.py`.
548
+
549
+ ### Streaming delta protocol
550
+
551
+ Providers communicate streaming events to the router via `BaseProviderDelta` dicts:
552
+
553
+ ```python
554
+ # Text content delta
555
+ {"type": "content", "content": "Hello"}
556
+
557
+ # Tool call delta (OpenAI-compatible)
558
+ {"type": "tool_calls", "index": 0, "id": "call_1", "function": {"name": "get_weather", "arguments": "{}"}}
559
+
560
+ # Finish signal
561
+ {"type": "finish", "finish_reason": "stop"}
562
+ ```
563
+
564
+ The router translates these into OpenAI SSE `data: {...}\n\n` chunks with `[DONE]` termination.
565
+
566
+ ---
567
+
568
+ ## Extending
569
+
570
+ ### Custom tunnel backends
571
+
572
+ Implement in `auto_gateway/network/hosting.py`:
573
+
574
+ ```python
575
+ @dataclass
576
+ class TunnelInfo:
577
+ public_url: str
578
+ backend: str
579
+
580
+ async def start_my_tunnel(port: int, config: dict) -> TunnelInfo:
581
+ ...
582
+ ```
583
+
584
+ Wire in `start_tunnel()` and the CLI `--tunnel` option.
585
+
586
+ ### Custom config formats
587
+
588
+ The `config/manager.py` loads JSON. For YAML or TOML support, add a format detector and parser there.
589
+
590
+ ### Middleware / hooks
591
+
592
+ FastAPI middleware can be added directly in `core/server.py`:
593
+
594
+ ```python
595
+ app = FastAPI()
596
+ app.add_middleware(MyMiddleware, ...)
597
+ ```
598
+
599
+ ---
600
+
601
+ ## License
602
+
603
+ MIT