aoai_proxy 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Rio Fujita
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,255 @@
1
+ Metadata-Version: 2.3
2
+ Name: aoai_proxy
3
+ Version: 0.1.3
4
+ Summary: OpenAI-compatible proxy for Azure OpenAI using Entra ID authentication
5
+ Author: Rio Fujita
6
+ Author-email: Rio Fujita <rio_github@rio.st>
7
+ License: MIT License
8
+
9
+ Copyright (c) 2026 Rio Fujita
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
28
+ Requires-Dist: azure-identity>=1.17.1
29
+ Requires-Dist: fastapi>=0.115.0
30
+ Requires-Dist: httpx>=0.27.0
31
+ Requires-Dist: pydantic-settings>=2.4.0
32
+ Requires-Dist: uvicorn[standard]>=0.30.0
33
+ Requires-Dist: pytest>=9 ; extra == 'test'
34
+ Requires-Dist: pytest-cov>=7 ; extra == 'test'
35
+ Requires-Python: >=3.12
36
+ Project-URL: Homepage, https://github.com/rioriost/aoai_proxy
37
+ Project-URL: Issues, https://github.com/rioriost/aoai_proxy/issues
38
+ Project-URL: Repository, https://github.com/rioriost/aoai_proxy
39
+ Provides-Extra: test
40
+ Description-Content-Type: text/markdown
41
+
42
+ # aoai_proxy
43
+
44
+ Zed からは OpenAI 互換 API サーバとして見えつつ、Azure OpenAI には Entra ID 認証で接続するための軽量プロキシです。
45
+
46
+ Azure OpenAI 側で API キー認証を無効化していても、ローカルで `az login` 済みであれば、Zed の OpenAI compatible provider から `gpt-5.4` を利用できます。
47
+
48
+ ## これは何?
49
+
50
+ このプロキシは、次のような用途を想定しています。
51
+
52
+ - Azure OpenAI 側では API キー認証を無効化している
53
+ - Azure OpenAI には Entra ID 認証でアクセスしたい
54
+ - Zed からは OpenAI compatible provider として使いたい
55
+ - Azure OpenAI 上の `gpt-5.4` deployment を Zed の AI Agent として使いたい
56
+
57
+ このプロキシは **responses-first** の方針で実装しています。
58
+ 主に `POST /v1/responses` を Zed から受け取り、Azure OpenAI の `Responses API` に中継します。
59
+
60
+ ## 事前準備
61
+
62
+ 以下を準備してください。
63
+
64
+ - Azure OpenAI resource
65
+ - Azure OpenAI 上の `gpt-5.4` deployment
66
+ - Azure CLI
67
+ - `az login` 済みのローカル環境
68
+ - Zed
69
+ - Python 3.12+ または Docker / Docker Compose
70
+ - Docker Desktop
71
+
72
+ また、Azure OpenAI の endpoint と deployment 名が必要です。
73
+
74
+ 例:
75
+
76
+ - endpoint: `https://your-resource.cognitiveservices.azure.com`
77
+ - deployment: `gpt-5.4`
78
+
79
+ ## クイックスタート
80
+
81
+ ### 1. 設定ファイルを作る
82
+
83
+ `.env.example` をコピーして `.env` を作ります。
84
+
85
+ ```/dev/null/sh#L1-1
86
+ cp .env.example .env
87
+ ```
88
+
89
+ `.env` に最低限以下を設定してください。
90
+
91
+ ```/dev/null/text#L1-3
92
+ AOAI_PROXY_AZURE_OPENAI_ENDPOINT=https://your-resource.cognitiveservices.azure.com
93
+ AOAI_PROXY_AZURE_OPENAI_DEPLOYMENT=gpt-5.4
94
+ AOAI_PROXY_AZURE_OPENAI_API_VERSION=preview
95
+ ```
96
+
97
+ ### 2. Docker で起動する
98
+
99
+ ```/dev/null/sh#L1-1
100
+ docker compose up --build
101
+ ```
102
+
103
+ バックグラウンドで起動する場合:
104
+
105
+ ```/dev/null/sh#L1-1
106
+ docker compose up -d --build
107
+ ```
108
+
109
+ ### 3. 動作確認
110
+
111
+ ヘルスチェック:
112
+
113
+ ```/dev/null/sh#L1-1
114
+ curl http://127.0.0.1:8000/healthz
115
+ ```
116
+
117
+ モデル一覧:
118
+
119
+ ```/dev/null/sh#L1-1
120
+ curl http://127.0.0.1:8000/v1/models
121
+ ```
122
+
123
+ Responses API の簡単な確認:
124
+
125
+ ```/dev/null/sh#L1-7
126
+ curl http://127.0.0.1:8000/v1/responses \
127
+ -H "Content-Type: application/json" \
128
+ -d '{
129
+ "model": "gpt-5.4",
130
+ "input": "こんにちは。1文で返答してください。"
131
+ }'
132
+ ```
133
+
134
+ ### 4. ローカルで直接起動したい場合
135
+
136
+ 依存を入れて起動します。
137
+
138
+ ```/dev/null/sh#L1-2
139
+ pip install .
140
+ python -m aoai_proxy.main
141
+ ```
142
+
143
+ または:
144
+
145
+ ```/dev/null/sh#L1-1
146
+ aoai_proxy
147
+ ```
148
+
149
+ ## Zed の設定方法
150
+
151
+ Zed では OpenAI compatible provider としてこのプロキシを追加します。
152
+
153
+ ### 重要なポイント
154
+
155
+ - Base URL は `http://localhost:8000/v1`
156
+ - API Key はダミー値でよい
157
+ - Model は Azure 側の deployment 名を使う
158
+ - **`chat_completions` は `false` にする**
159
+
160
+ このプロキシは `responses-first` なので、Zed には `/v1/responses` を使わせる構成を推奨します。
161
+
162
+ ### 設定例
163
+
164
+ ```/dev/null/json#L1-20
165
+ {
166
+ "language_models": {
167
+ "openai_compatible": {
168
+ "aoai_proxy": {
169
+ "api_url": "http://localhost:8000/v1",
170
+ "available_models": [
171
+ {
172
+ "name": "gpt-5.4",
173
+ "max_tokens": 200000,
174
+ "max_output_tokens": 32000,
175
+ "max_completion_tokens": 200000,
176
+ "capabilities": {
177
+ "tools": true,
178
+ "images": false,
179
+ "parallel_tool_calls": false,
180
+ "prompt_cache_key": false,
181
+ "chat_completions": false
182
+ }
183
+ }
184
+ ]
185
+ }
186
+ }
187
+ }
188
+ }
189
+ ```
190
+
191
+ ### Zed での確認ポイント
192
+
193
+ - モデル: `gpt-5.4`
194
+ - OpenAI compatible provider の接続先: `http://localhost:8000/v1`
195
+ - `chat_completions: false`
196
+
197
+ ## 補足
198
+
199
+ ### なぜ `/responses` を使うのか
200
+
201
+ 今回の Azure OpenAI `gpt-5.4` deployment では、`/chat/completions` よりも `Responses API` を使う構成の方が安定していました。
202
+
203
+ そのため、このプロキシは `/v1/responses` を正規ルートとして扱います。
204
+
205
+ ### どこまで動作確認できているか
206
+
207
+ 少なくとも以下は確認済みです。
208
+
209
+ - Zed の OpenAI compatible provider から接続できる
210
+ - Azure OpenAI へ Entra ID 認証で接続できる
211
+ - `POST /v1/responses` が成功する
212
+ - Zed の AI Agent で通常応答が表示される
213
+ - terminal tool 呼び出しと、その結果を踏まえた応答が返る
214
+
215
+ ### 注意点
216
+
217
+ - 問題切り分け時は、長い既存 session ではなく **新しい clean session / thread** で確認してください
218
+ - 長い session では `function_call` / `function_call_output` の履歴が大量に蓄積し、不安定化の原因になることがあります
219
+ - 対象ファイルに未保存変更がある場合、Zed の edit 系 tool は安全のため停止することがあります
220
+ - Azure CLI 認証を使うため、Docker 利用時は `~/.azure` をコンテナにマウントする必要があります
221
+
222
+ ### 主な環境変数
223
+
224
+ 必須:
225
+
226
+ - `AOAI_PROXY_AZURE_OPENAI_ENDPOINT`
227
+ - `AOAI_PROXY_AZURE_OPENAI_DEPLOYMENT`
228
+
229
+ 任意:
230
+
231
+ - `AOAI_PROXY_AZURE_OPENAI_API_VERSION`
232
+ - `AOAI_PROXY_AZURE_OPENAI_BEARER_TOKEN`
233
+ - `AOAI_PROXY_HOST`
234
+ - `AOAI_PROXY_PORT`
235
+ - `AOAI_PROXY_LOG_LEVEL`
236
+ - `AOAI_PROXY_REQUEST_TIMEOUT_SECONDS`
237
+ - `AOAI_PROXY_TOKEN_SCOPE`
238
+
239
+ ## テスト
240
+
241
+ テスト依存を入れる:
242
+
243
+ ```/dev/null/sh#L1-1
244
+ uv sync --extra test
245
+ ```
246
+
247
+ テスト実行:
248
+
249
+ ```/dev/null/sh#L1-1
250
+ uv run pytest -q
251
+ ```
252
+
253
+ ## ライセンス
254
+
255
+ MIT
@@ -0,0 +1,214 @@
1
+ # aoai_proxy
2
+
3
+ Zed からは OpenAI 互換 API サーバとして見えつつ、Azure OpenAI には Entra ID 認証で接続するための軽量プロキシです。
4
+
5
+ Azure OpenAI 側で API キー認証を無効化していても、ローカルで `az login` 済みであれば、Zed の OpenAI compatible provider から `gpt-5.4` を利用できます。
6
+
7
+ ## これは何?
8
+
9
+ このプロキシは、次のような用途を想定しています。
10
+
11
+ - Azure OpenAI 側では API キー認証を無効化している
12
+ - Azure OpenAI には Entra ID 認証でアクセスしたい
13
+ - Zed からは OpenAI compatible provider として使いたい
14
+ - Azure OpenAI 上の `gpt-5.4` deployment を Zed の AI Agent として使いたい
15
+
16
+ このプロキシは **responses-first** の方針で実装しています。
17
+ 主に `POST /v1/responses` を Zed から受け取り、Azure OpenAI の `Responses API` に中継します。
18
+
19
+ ## 事前準備
20
+
21
+ 以下を準備してください。
22
+
23
+ - Azure OpenAI resource
24
+ - Azure OpenAI 上の `gpt-5.4` deployment
25
+ - Azure CLI
26
+ - `az login` 済みのローカル環境
27
+ - Zed
28
+ - Python 3.12+ または Docker / Docker Compose
29
+ - Docker Desktop
30
+
31
+ また、Azure OpenAI の endpoint と deployment 名が必要です。
32
+
33
+ 例:
34
+
35
+ - endpoint: `https://your-resource.cognitiveservices.azure.com`
36
+ - deployment: `gpt-5.4`
37
+
38
+ ## クイックスタート
39
+
40
+ ### 1. 設定ファイルを作る
41
+
42
+ `.env.example` をコピーして `.env` を作ります。
43
+
44
+ ```/dev/null/sh#L1-1
45
+ cp .env.example .env
46
+ ```
47
+
48
+ `.env` に最低限以下を設定してください。
49
+
50
+ ```/dev/null/text#L1-3
51
+ AOAI_PROXY_AZURE_OPENAI_ENDPOINT=https://your-resource.cognitiveservices.azure.com
52
+ AOAI_PROXY_AZURE_OPENAI_DEPLOYMENT=gpt-5.4
53
+ AOAI_PROXY_AZURE_OPENAI_API_VERSION=preview
54
+ ```
55
+
56
+ ### 2. Docker で起動する
57
+
58
+ ```/dev/null/sh#L1-1
59
+ docker compose up --build
60
+ ```
61
+
62
+ バックグラウンドで起動する場合:
63
+
64
+ ```/dev/null/sh#L1-1
65
+ docker compose up -d --build
66
+ ```
67
+
68
+ ### 3. 動作確認
69
+
70
+ ヘルスチェック:
71
+
72
+ ```/dev/null/sh#L1-1
73
+ curl http://127.0.0.1:8000/healthz
74
+ ```
75
+
76
+ モデル一覧:
77
+
78
+ ```/dev/null/sh#L1-1
79
+ curl http://127.0.0.1:8000/v1/models
80
+ ```
81
+
82
+ Responses API の簡単な確認:
83
+
84
+ ```/dev/null/sh#L1-7
85
+ curl http://127.0.0.1:8000/v1/responses \
86
+ -H "Content-Type: application/json" \
87
+ -d '{
88
+ "model": "gpt-5.4",
89
+ "input": "こんにちは。1文で返答してください。"
90
+ }'
91
+ ```
92
+
93
+ ### 4. ローカルで直接起動したい場合
94
+
95
+ 依存を入れて起動します。
96
+
97
+ ```/dev/null/sh#L1-2
98
+ pip install .
99
+ python -m aoai_proxy.main
100
+ ```
101
+
102
+ または:
103
+
104
+ ```/dev/null/sh#L1-1
105
+ aoai_proxy
106
+ ```
107
+
108
+ ## Zed の設定方法
109
+
110
+ Zed では OpenAI compatible provider としてこのプロキシを追加します。
111
+
112
+ ### 重要なポイント
113
+
114
+ - Base URL は `http://localhost:8000/v1`
115
+ - API Key はダミー値でよい
116
+ - Model は Azure 側の deployment 名を使う
117
+ - **`chat_completions` は `false` にする**
118
+
119
+ このプロキシは `responses-first` なので、Zed には `/v1/responses` を使わせる構成を推奨します。
120
+
121
+ ### 設定例
122
+
123
+ ```/dev/null/json#L1-20
124
+ {
125
+ "language_models": {
126
+ "openai_compatible": {
127
+ "aoai_proxy": {
128
+ "api_url": "http://localhost:8000/v1",
129
+ "available_models": [
130
+ {
131
+ "name": "gpt-5.4",
132
+ "max_tokens": 200000,
133
+ "max_output_tokens": 32000,
134
+ "max_completion_tokens": 200000,
135
+ "capabilities": {
136
+ "tools": true,
137
+ "images": false,
138
+ "parallel_tool_calls": false,
139
+ "prompt_cache_key": false,
140
+ "chat_completions": false
141
+ }
142
+ }
143
+ ]
144
+ }
145
+ }
146
+ }
147
+ }
148
+ ```
149
+
150
+ ### Zed での確認ポイント
151
+
152
+ - モデル: `gpt-5.4`
153
+ - OpenAI compatible provider の接続先: `http://localhost:8000/v1`
154
+ - `chat_completions: false`
155
+
156
+ ## 補足
157
+
158
+ ### なぜ `/responses` を使うのか
159
+
160
+ 今回の Azure OpenAI `gpt-5.4` deployment では、`/chat/completions` よりも `Responses API` を使う構成の方が安定していました。
161
+
162
+ そのため、このプロキシは `/v1/responses` を正規ルートとして扱います。
163
+
164
+ ### どこまで動作確認できているか
165
+
166
+ 少なくとも以下は確認済みです。
167
+
168
+ - Zed の OpenAI compatible provider から接続できる
169
+ - Azure OpenAI へ Entra ID 認証で接続できる
170
+ - `POST /v1/responses` が成功する
171
+ - Zed の AI Agent で通常応答が表示される
172
+ - terminal tool 呼び出しと、その結果を踏まえた応答が返る
173
+
174
+ ### 注意点
175
+
176
+ - 問題切り分け時は、長い既存 session ではなく **新しい clean session / thread** で確認してください
177
+ - 長い session では `function_call` / `function_call_output` の履歴が大量に蓄積し、不安定化の原因になることがあります
178
+ - 対象ファイルに未保存変更がある場合、Zed の edit 系 tool は安全のため停止することがあります
179
+ - Azure CLI 認証を使うため、Docker 利用時は `~/.azure` をコンテナにマウントする必要があります
180
+
181
+ ### 主な環境変数
182
+
183
+ 必須:
184
+
185
+ - `AOAI_PROXY_AZURE_OPENAI_ENDPOINT`
186
+ - `AOAI_PROXY_AZURE_OPENAI_DEPLOYMENT`
187
+
188
+ 任意:
189
+
190
+ - `AOAI_PROXY_AZURE_OPENAI_API_VERSION`
191
+ - `AOAI_PROXY_AZURE_OPENAI_BEARER_TOKEN`
192
+ - `AOAI_PROXY_HOST`
193
+ - `AOAI_PROXY_PORT`
194
+ - `AOAI_PROXY_LOG_LEVEL`
195
+ - `AOAI_PROXY_REQUEST_TIMEOUT_SECONDS`
196
+ - `AOAI_PROXY_TOKEN_SCOPE`
197
+
198
+ ## テスト
199
+
200
+ テスト依存を入れる:
201
+
202
+ ```/dev/null/sh#L1-1
203
+ uv sync --extra test
204
+ ```
205
+
206
+ テスト実行:
207
+
208
+ ```/dev/null/sh#L1-1
209
+ uv run pytest -q
210
+ ```
211
+
212
+ ## ライセンス
213
+
214
+ MIT
@@ -0,0 +1,54 @@
1
+ [project]
2
+ name = "aoai_proxy"
3
+ version = "0.1.3"
4
+ description = "OpenAI-compatible proxy for Azure OpenAI using Entra ID authentication"
5
+ readme = { file = "README.md", content-type = "text/markdown" }
6
+ requires-python = ">=3.12"
7
+ authors = [
8
+ { name = "Rio Fujita", email = "rio_github@rio.st" }
9
+ ]
10
+ license = { file = "LICENSE" }
11
+
12
+ dependencies = [
13
+ "azure-identity>=1.17.1",
14
+ "fastapi>=0.115.0",
15
+ "httpx>=0.27.0",
16
+ "pydantic-settings>=2.4.0",
17
+ "uvicorn[standard]>=0.30.0",
18
+ ]
19
+
20
+ [project.optional-dependencies]
21
+ test = [
22
+ "pytest>=9",
23
+ "pytest-cov>=7",
24
+ ]
25
+
26
+ [project.scripts]
27
+ aoai_proxy = "aoai_proxy.main:main"
28
+
29
+ [project.urls]
30
+ Homepage = "https://github.com/rioriost/aoai_proxy"
31
+ Issues = "https://github.com/rioriost/aoai_proxy/issues"
32
+ Repository = "https://github.com/rioriost/aoai_proxy"
33
+
34
+ [tool.pytest.ini_options]
35
+ testpaths = ["tests"]
36
+ python_files = ["test_*.py"]
37
+
38
+ [dependency-groups]
39
+ dev = [
40
+ "build>=1.2.2",
41
+ "twine>=6.1.0",
42
+ "ruff>=0.12.0",
43
+ ]
44
+
45
+ [build-system]
46
+ requires = ["uv_build>=0.9.27,<0.10.0"]
47
+ build-backend = "uv_build"
48
+
49
+ [tool.ruff]
50
+ line-length = 100
51
+ target-version = "py312"
52
+
53
+ [tool.ruff.lint]
54
+ select = ["E", "F", "I", "B", "UP"]
@@ -0,0 +1,2 @@
1
+ def main() -> None:
2
+ print("Hello from aoai_proxy!")
@@ -0,0 +1,619 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import logging
5
+ import os
6
+ import shutil
7
+ from contextlib import asynccontextmanager
8
+ from typing import Any, AsyncIterator
9
+ from urllib.parse import urlencode
10
+
11
+ import httpx
12
+ from azure.core.exceptions import ClientAuthenticationError
13
+ from azure.identity import CredentialUnavailableError
14
+ from azure.identity.aio import AzureCliCredential
15
+ from fastapi import FastAPI, HTTPException, Request, Response
16
+ from fastapi.responses import JSONResponse, PlainTextResponse, StreamingResponse
17
+ from pydantic import Field
18
+ from pydantic_settings import BaseSettings, SettingsConfigDict
19
+
20
+ logger = logging.getLogger("aoai_proxy")
21
+ logging.basicConfig(
22
+ level=os.getenv("LOG_LEVEL", "INFO").upper(),
23
+ format="%(asctime)s %(levelname)s %(name)s %(message)s",
24
+ )
25
+
26
+
27
+ class Settings(BaseSettings):
28
+ model_config = SettingsConfigDict(
29
+ env_prefix="AOAI_PROXY_",
30
+ case_sensitive=False,
31
+ extra="ignore",
32
+ )
33
+
34
+ azure_openai_endpoint: str = Field(
35
+ ...,
36
+ description="Azure OpenAI endpoint, e.g. https://your-resource.cognitiveservices.azure.com",
37
+ )
38
+ azure_openai_api_version: str = Field(
39
+ default="preview",
40
+ description="API version used when proxying Azure OpenAI requests",
41
+ )
42
+ azure_openai_deployment: str = Field(
43
+ ...,
44
+ description="Azure OpenAI deployment name, e.g. gpt-5.4",
45
+ )
46
+ azure_openai_bearer_token: str | None = Field(
47
+ default=None,
48
+ description="Optional bearer token to use instead of AzureCliCredential",
49
+ )
50
+ host: str = Field(default="0.0.0.0")
51
+ port: int = Field(default=8000)
52
+ log_level: str = Field(default="INFO")
53
+ request_timeout_seconds: float = Field(default=600.0)
54
+ token_scope: str = Field(
55
+ default="https://cognitiveservices.azure.com/.default",
56
+ )
57
+
58
+ @property
59
+ def normalized_endpoint(self) -> str:
60
+ return self.azure_openai_endpoint.rstrip("/")
61
+
62
+
63
+ settings = Settings()
64
+
65
+
66
+ def _json_loads(payload: bytes) -> dict[str, Any] | None:
67
+ if not payload:
68
+ return None
69
+ try:
70
+ parsed = json.loads(payload)
71
+ except json.JSONDecodeError:
72
+ return None
73
+ return parsed if isinstance(parsed, dict) else None
74
+
75
+
76
+ def _is_json_content_type(content_type: str | None) -> bool:
77
+ if not content_type:
78
+ return False
79
+ return "application/json" in content_type.lower()
80
+
81
+
82
+ def _is_streaming_request(payload: bytes, content_type: str | None) -> bool:
83
+ if not _is_json_content_type(content_type):
84
+ return False
85
+ parsed = _json_loads(payload)
86
+ return bool(parsed and parsed.get("stream") is True)
87
+
88
+
89
+ def _truncate_middle(text: str, max_length: int = 12000) -> str:
90
+ if len(text) <= max_length:
91
+ return text
92
+
93
+ keep = max_length // 2
94
+ return f"{text[:keep]} ... {text[-keep:]}"
95
+
96
+
97
+ def _looks_like_tool_error(value: Any) -> bool:
98
+ if isinstance(value, str):
99
+ lowered = value.lower()
100
+ return any(
101
+ marker in lowered
102
+ for marker in (
103
+ "error",
104
+ "failed",
105
+ "exception",
106
+ "traceback",
107
+ "denied",
108
+ "unsaved changes",
109
+ "permission",
110
+ )
111
+ )
112
+
113
+ if isinstance(value, dict):
114
+ if any(key in value for key in ("error", "errors", "message", "detail", "code")):
115
+ return True
116
+
117
+ return any(key in value for key in ("trace", "traceback", "stack", "exception", "stderr"))
118
+
119
+ return False
120
+
121
+
122
+ def _summarize_tool_error_value(value: Any) -> str:
123
+ if isinstance(value, str):
124
+ text = value.strip()
125
+ if not text:
126
+ return "Tool error: <empty error message>"
127
+
128
+ first_line = text.splitlines()[0].strip()
129
+ first_sentence = first_line.split(". ")[0].strip()
130
+ summary = first_sentence if first_sentence else first_line
131
+ return _truncate_middle(f"Tool error: {summary}", max_length=2000)
132
+
133
+ if isinstance(value, dict):
134
+ summary: dict[str, Any] = {}
135
+ for key in ("error", "message", "detail", "code", "path", "tool", "tool_name"):
136
+ if key in value:
137
+ summary[key] = value[key]
138
+
139
+ if not summary:
140
+ summary = {"error": "Tool execution failed"}
141
+
142
+ serialized = json.dumps(summary, ensure_ascii=False, separators=(",", ":"))
143
+ return _truncate_middle(serialized, max_length=2000)
144
+
145
+ return "Tool error: Tool execution failed"
146
+
147
+
148
+ def _sanitize_function_call_output_value(value: Any) -> str:
149
+ if _looks_like_tool_error(value):
150
+ return _summarize_tool_error_value(value)
151
+
152
+ if isinstance(value, str):
153
+ return value if value else "<Tool returned an empty string>"
154
+
155
+ if value is None:
156
+ return "<Tool returned no output>"
157
+
158
+ try:
159
+ serialized = json.dumps(value, ensure_ascii=False, separators=(",", ":"))
160
+ except (TypeError, ValueError):
161
+ serialized = str(value)
162
+
163
+ if not serialized:
164
+ return "<Tool returned an empty string>"
165
+
166
+ return _truncate_middle(serialized)
167
+
168
+
169
+ def sanitize_responses_request(payload: dict[str, Any]) -> dict[str, Any]:
170
+ sanitized = json.loads(json.dumps(payload))
171
+ input_items = sanitized.get("input")
172
+ if not isinstance(input_items, list):
173
+ return sanitized
174
+
175
+ for item in input_items:
176
+ if not isinstance(item, dict):
177
+ continue
178
+ if item.get("type") != "function_call_output":
179
+ continue
180
+
181
+ item["output"] = _sanitize_function_call_output_value(item.get("output"))
182
+
183
+ return sanitized
184
+
185
+
186
+ class AzureOpenAIProxy:
187
+ def __init__(self, config: Settings) -> None:
188
+ self.config = config
189
+ self.credential = AzureCliCredential()
190
+ self.client = httpx.AsyncClient(
191
+ timeout=httpx.Timeout(config.request_timeout_seconds),
192
+ follow_redirects=True,
193
+ )
194
+
195
+ async def startup_diagnostics(self) -> None:
196
+ az_path = shutil.which("az")
197
+ if az_path:
198
+ logger.info("Azure CLI detected at path=%s", az_path)
199
+ else:
200
+ logger.warning(
201
+ "Azure CLI executable `az` was not found on PATH. "
202
+ "Set AOAI_PROXY_AZURE_OPENAI_BEARER_TOKEN or install Azure CLI in the runtime."
203
+ )
204
+
205
+ async def close(self) -> None:
206
+ await self.client.aclose()
207
+ await self.credential.close()
208
+
209
+ async def bearer_token(self) -> str:
210
+ if self.config.azure_openai_bearer_token:
211
+ return self.config.azure_openai_bearer_token
212
+
213
+ try:
214
+ token = await self.credential.get_token(self.config.token_scope)
215
+ except ClientAuthenticationError as exc:
216
+ logger.warning("Azure CLI authentication failed: %s", exc)
217
+ raise HTTPException(
218
+ status_code=503,
219
+ detail=(
220
+ "Azure CLI authentication failed. Ensure `az` is installed and "
221
+ "`az login` has been completed, or set "
222
+ "`AOAI_PROXY_AZURE_OPENAI_BEARER_TOKEN`."
223
+ ),
224
+ ) from exc
225
+ except CredentialUnavailableError as exc:
226
+ logger.warning("Azure CLI credential unavailable: %s", exc)
227
+ raise HTTPException(
228
+ status_code=503,
229
+ detail=(
230
+ "Azure CLI credential unavailable. Ensure `az` is installed and "
231
+ "available on PATH inside the runtime container, or set "
232
+ "`AOAI_PROXY_AZURE_OPENAI_BEARER_TOKEN`."
233
+ ),
234
+ ) from exc
235
+ except Exception as exc:
236
+ logger.warning("Unable to acquire Azure OpenAI bearer token: %s", exc)
237
+ raise HTTPException(
238
+ status_code=503,
239
+ detail=(
240
+ "Unable to acquire Azure OpenAI bearer token. Ensure `az` is "
241
+ "installed and available on PATH inside the runtime container, "
242
+ "or set `AOAI_PROXY_AZURE_OPENAI_BEARER_TOKEN`."
243
+ ),
244
+ ) from exc
245
+
246
+ return token.token
247
+
248
+ def upstream_url(self, path: str, query_params: dict[str, str]) -> str:
249
+ normalized_path = path.lstrip("/")
250
+
251
+ if normalized_path.startswith("openai/"):
252
+ query = query_params.copy()
253
+ if "api-version" not in query:
254
+ query["api-version"] = self.config.azure_openai_api_version
255
+ suffix = f"?{urlencode(query)}" if query else ""
256
+ return f"{self.config.normalized_endpoint}/{normalized_path}{suffix}"
257
+
258
+ if normalized_path == "responses":
259
+ query = query_params.copy()
260
+ if "api-version" not in query:
261
+ query["api-version"] = self.config.azure_openai_api_version
262
+ return f"{self.config.normalized_endpoint}/openai/v1/responses?{urlencode(query)}"
263
+
264
+ if normalized_path == "embeddings":
265
+ query = query_params.copy()
266
+ if "api-version" not in query:
267
+ query["api-version"] = self.config.azure_openai_api_version
268
+ return (
269
+ f"{self.config.normalized_endpoint}/openai/deployments/"
270
+ f"{self.config.azure_openai_deployment}/embeddings"
271
+ f"?{urlencode(query)}"
272
+ )
273
+
274
+ query = query_params.copy()
275
+ if "api-version" not in query and normalized_path.startswith("openai/"):
276
+ query["api-version"] = self.config.azure_openai_api_version
277
+ suffix = f"?{urlencode(query)}" if query else ""
278
+ return f"{self.config.normalized_endpoint}/{normalized_path}{suffix}"
279
+
280
+ def models_payload(self) -> dict[str, object]:
281
+ model_id = self.config.azure_openai_deployment
282
+ return {
283
+ "object": "list",
284
+ "data": [
285
+ {
286
+ "id": model_id,
287
+ "object": "model",
288
+ "created": 0,
289
+ "owned_by": "azure-openai",
290
+ }
291
+ ],
292
+ }
293
+
294
+ async def forward(self, request: Request, path: str) -> Response:
295
+ normalized_path = path.lstrip("/")
296
+
297
+ if normalized_path == "models":
298
+ return JSONResponse(content=self.models_payload())
299
+
300
+ body = await request.body()
301
+ headers = await self._build_headers(request)
302
+ request_json = (
303
+ _json_loads(body)
304
+ if _is_json_content_type(request.headers.get("content-type"))
305
+ else None
306
+ )
307
+
308
+ if normalized_path == "chat/completions":
309
+ raise HTTPException(
310
+ status_code=404,
311
+ detail=(
312
+ "This proxy is responses-first. Configure your client to use "
313
+ "`/v1/responses` instead of `/v1/chat/completions`."
314
+ ),
315
+ )
316
+
317
+ upstream = self.upstream_url(normalized_path, dict(request.query_params))
318
+ is_stream = _is_streaming_request(body, request.headers.get("content-type"))
319
+
320
+ logger.info(
321
+ "Forwarding request path=%s deployment=%s upstream=%s stream=%s",
322
+ normalized_path,
323
+ self.config.azure_openai_deployment,
324
+ upstream,
325
+ is_stream,
326
+ )
327
+ if normalized_path == "responses" and request_json is not None:
328
+ request_json = sanitize_responses_request(request_json)
329
+ body = json.dumps(request_json).encode("utf-8")
330
+
331
+ input_items = request_json.get("input")
332
+ input_count = len(input_items) if isinstance(input_items, list) else 0
333
+ tools_count = (
334
+ len(request_json.get("tools", []))
335
+ if isinstance(request_json.get("tools"), list)
336
+ else 0
337
+ )
338
+
339
+ item_type_counts: dict[str, int] = {}
340
+ message_role_counts: dict[str, int] = {}
341
+ content_type_counts: dict[str, int] = {}
342
+
343
+ if isinstance(input_items, list):
344
+ for item in input_items:
345
+ if not isinstance(item, dict):
346
+ item_type_counts["<non-dict>"] = item_type_counts.get("<non-dict>", 0) + 1
347
+ continue
348
+
349
+ item_type = item.get("type")
350
+ item_type_key = item_type if isinstance(item_type, str) else "<missing>"
351
+ item_type_counts[item_type_key] = item_type_counts.get(item_type_key, 0) + 1
352
+
353
+ if item_type == "message":
354
+ role = item.get("role")
355
+ role_key = role if isinstance(role, str) else "<missing>"
356
+ message_role_counts[role_key] = message_role_counts.get(role_key, 0) + 1
357
+
358
+ content = item.get("content")
359
+ if isinstance(content, list):
360
+ for part in content:
361
+ if not isinstance(part, dict):
362
+ content_type_counts["<non-dict>"] = (
363
+ content_type_counts.get("<non-dict>", 0) + 1
364
+ )
365
+ continue
366
+
367
+ part_type = part.get("type")
368
+ part_type_key = (
369
+ part_type if isinstance(part_type, str) else "<missing>"
370
+ )
371
+ content_type_counts[part_type_key] = (
372
+ content_type_counts.get(part_type_key, 0) + 1
373
+ )
374
+
375
+ logger.info(
376
+ "Incoming responses shape: input_items=%s tools=%s tool_choice=%s stream=%s item_types=%s message_roles=%s content_types=%s",
377
+ input_count,
378
+ tools_count,
379
+ request_json.get("tool_choice"),
380
+ request_json.get("stream"),
381
+ item_type_counts,
382
+ message_role_counts,
383
+ content_type_counts,
384
+ )
385
+
386
+ if is_stream:
387
+ return await self._forward_streaming(
388
+ request=request,
389
+ upstream=upstream,
390
+ headers=headers,
391
+ body=body,
392
+ )
393
+
394
+ upstream_response = await self._request_upstream(
395
+ method=request.method,
396
+ url=upstream,
397
+ headers=headers,
398
+ body=body,
399
+ )
400
+
401
+ response_headers = self._filter_response_headers(upstream_response.headers)
402
+ return Response(
403
+ content=upstream_response.content,
404
+ status_code=upstream_response.status_code,
405
+ headers=response_headers,
406
+ media_type=upstream_response.headers.get("content-type"),
407
+ )
408
+
409
+ async def _request_upstream(
410
+ self,
411
+ method: str,
412
+ url: str,
413
+ headers: dict[str, str],
414
+ body: bytes,
415
+ ) -> httpx.Response:
416
+ try:
417
+ return await self.client.request(
418
+ method=method,
419
+ url=url,
420
+ headers=headers,
421
+ content=body,
422
+ )
423
+ except HTTPException:
424
+ raise
425
+ except Exception as exc:
426
+ logger.exception("Proxy request failed: %s", exc)
427
+ raise HTTPException(
428
+ status_code=502,
429
+ detail="Upstream request failed",
430
+ ) from exc
431
+
432
+ async def _forward_streaming(
433
+ self,
434
+ request: Request,
435
+ upstream: str,
436
+ headers: dict[str, str],
437
+ body: bytes,
438
+ ) -> Response:
439
+ try:
440
+ upstream_request = self.client.build_request(
441
+ method=request.method,
442
+ url=upstream,
443
+ headers=headers,
444
+ content=body,
445
+ )
446
+ upstream_response = await self.client.send(
447
+ upstream_request,
448
+ stream=True,
449
+ )
450
+ except Exception as exc:
451
+ logger.exception("Streaming proxy request failed: %s", exc)
452
+ raise HTTPException(
453
+ status_code=502,
454
+ detail="Upstream streaming request failed",
455
+ ) from exc
456
+
457
+ if upstream_response.status_code >= 400:
458
+ content = await upstream_response.aread()
459
+ await upstream_response.aclose()
460
+ logger.error(
461
+ "Upstream streaming request failed: status=%s body=%s",
462
+ upstream_response.status_code,
463
+ content.decode("utf-8", errors="replace"),
464
+ )
465
+ return Response(
466
+ content=content,
467
+ status_code=upstream_response.status_code,
468
+ media_type=upstream_response.headers.get("content-type", "application/json"),
469
+ )
470
+
471
+ async def iterator() -> AsyncIterator[bytes]:
472
+ try:
473
+ async for chunk in upstream_response.aiter_text():
474
+ if chunk:
475
+ yield chunk.encode("utf-8")
476
+ finally:
477
+ await upstream_response.aclose()
478
+
479
+ response_headers = self._filter_response_headers(upstream_response.headers)
480
+ response_headers["Cache-Control"] = "no-cache"
481
+ response_headers["Connection"] = "keep-alive"
482
+ response_headers["X-Accel-Buffering"] = "no"
483
+ return StreamingResponse(
484
+ iterator(),
485
+ status_code=upstream_response.status_code,
486
+ headers=response_headers,
487
+ media_type="text/event-stream; charset=utf-8",
488
+ )
489
+
490
+ async def _build_headers(self, request: Request) -> dict[str, str]:
491
+ incoming = request.headers
492
+ token = await self.bearer_token()
493
+
494
+ headers: dict[str, str] = {
495
+ "authorization": f"Bearer {token}",
496
+ }
497
+
498
+ for header_name in (
499
+ "content-type",
500
+ "accept",
501
+ "openai-beta",
502
+ "user-agent",
503
+ "x-request-id",
504
+ ):
505
+ header_value = incoming.get(header_name)
506
+ if header_value:
507
+ headers[header_name] = header_value
508
+
509
+ return headers
510
+
511
+ @staticmethod
512
+ def _filter_response_headers(headers: httpx.Headers) -> dict[str, str]:
513
+ excluded = {
514
+ "content-length",
515
+ "content-encoding",
516
+ "transfer-encoding",
517
+ "connection",
518
+ "keep-alive",
519
+ "proxy-authenticate",
520
+ "proxy-authorization",
521
+ "te",
522
+ "trailers",
523
+ "upgrade",
524
+ }
525
+ return {key: value for key, value in headers.items() if key.lower() not in excluded}
526
+
527
+ @staticmethod
528
+ def _decode_json_response(response: httpx.Response) -> dict[str, Any]:
529
+ try:
530
+ parsed = response.json()
531
+ except json.JSONDecodeError as exc:
532
+ raise HTTPException(
533
+ status_code=502,
534
+ detail="Upstream returned non-JSON response",
535
+ ) from exc
536
+
537
+ if not isinstance(parsed, dict):
538
+ raise HTTPException(
539
+ status_code=502,
540
+ detail="Upstream returned unexpected JSON shape",
541
+ )
542
+
543
+ return parsed
544
+
545
+
546
+ @asynccontextmanager
547
+ async def lifespan(app: FastAPI):
548
+ proxy = AzureOpenAIProxy(settings)
549
+ app.state.proxy = proxy
550
+ logger.info(
551
+ "Starting Azure OpenAI proxy for endpoint=%s deployment=%s",
552
+ settings.normalized_endpoint,
553
+ settings.azure_openai_deployment,
554
+ )
555
+ await proxy.startup_diagnostics()
556
+ try:
557
+ yield
558
+ finally:
559
+ await proxy.close()
560
+
561
+
562
+ app = FastAPI(
563
+ title="Azure OpenAI OpenAI-Compatible Proxy",
564
+ version="0.2.0",
565
+ lifespan=lifespan,
566
+ )
567
+
568
+
569
+ @app.get("/healthz")
570
+ async def healthz() -> dict[str, str]:
571
+ return {"status": "ok"}
572
+
573
+
574
+ @app.get("/")
575
+ async def root() -> dict[str, str]:
576
+ return {
577
+ "service": "aoai_proxy",
578
+ "status": "ok",
579
+ "deployment": settings.azure_openai_deployment,
580
+ }
581
+
582
+
583
+ @app.get("/v1/models")
584
+ async def list_models() -> Response:
585
+ return JSONResponse(content=app.state.proxy.models_payload())
586
+
587
+
588
+ @app.api_route(
589
+ "/v1/{path:path}",
590
+ methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
591
+ )
592
+ async def proxy_v1(path: str, request: Request) -> Response:
593
+ return await app.state.proxy.forward(request, path)
594
+
595
+
596
+ @app.api_route(
597
+ "/{path:path}",
598
+ methods=["GET", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
599
+ )
600
+ async def proxy_root(path: str, request: Request) -> Response:
601
+ normalized_path = path.lstrip("/")
602
+ if normalized_path == "":
603
+ return PlainTextResponse("aoai_proxy", status_code=200)
604
+ return await app.state.proxy.forward(request, normalized_path)
605
+
606
+
607
+ def main() -> None:
608
+ import uvicorn
609
+
610
+ uvicorn.run(
611
+ "aoai_proxy.main:app",
612
+ host=settings.host,
613
+ port=settings.port,
614
+ log_level=settings.log_level.lower(),
615
+ )
616
+
617
+
618
+ if __name__ == "__main__":
619
+ main()