argo-proxy 3.0.2__tar.gz → 3.0.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. {argo_proxy-3.0.2/src/argo_proxy.egg-info → argo_proxy-3.0.4}/PKG-INFO +1 -1
  2. {argo_proxy-3.0.2 → argo_proxy-3.0.4/src/argo_proxy.egg-info}/PKG-INFO +1 -1
  3. argo_proxy-3.0.4/src/argoproxy/__init__.py +1 -0
  4. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/endpoints/dispatch.py +3 -0
  5. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/utils/transports.py +98 -55
  6. argo_proxy-3.0.2/src/argoproxy/__init__.py +0 -1
  7. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/LICENSE +0 -0
  8. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/README.md +0 -0
  9. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/pyproject.toml +0 -0
  10. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/setup.cfg +0 -0
  11. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argo_proxy.egg-info/SOURCES.txt +0 -0
  12. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argo_proxy.egg-info/dependency_links.txt +0 -0
  13. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argo_proxy.egg-info/entry_points.txt +0 -0
  14. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argo_proxy.egg-info/requires.txt +0 -0
  15. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argo_proxy.egg-info/top_level.txt +0 -0
  16. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/__init__.py +0 -0
  17. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/endpoints/__init__.py +0 -0
  18. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/endpoints/chat.py +0 -0
  19. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/endpoints/completions.py +0 -0
  20. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/endpoints/embed.py +0 -0
  21. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/endpoints/native_anthropic.py +0 -0
  22. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/endpoints/native_openai.py +0 -0
  23. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/endpoints/responses.py +0 -0
  24. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/tool_calls/deprecated.py +0 -0
  25. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/tool_calls/google_helpers.py +0 -0
  26. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/tool_calls/handler.py +0 -0
  27. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/tool_calls/input_handle.py +0 -0
  28. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/tool_calls/leaked_tool_parser.py +0 -0
  29. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/tool_calls/output_handle.py +0 -0
  30. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/tool_calls/tool_prompts.py +0 -0
  31. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/types/__init__.py +0 -0
  32. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/types/chat_completion.py +0 -0
  33. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/types/completions.py +0 -0
  34. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/types/embedding.py +0 -0
  35. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/types/function_call.py +0 -0
  36. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/types/responses.py +0 -0
  37. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/utils/__init__.py +0 -0
  38. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/utils/input_handle.py +0 -0
  39. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/utils/models.py +0 -0
  40. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/utils/stream_decoder.py +0 -0
  41. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/utils/tokens.py +0 -0
  42. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_legacy/utils/usage.py +0 -0
  43. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_vendor/__init__.py +0 -0
  44. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_vendor/semver.py +0 -0
  45. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/_vendor/yaml.py +0 -0
  46. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/app.py +0 -0
  47. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/cli/__init__.py +0 -0
  48. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/cli/display.py +0 -0
  49. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/cli/handlers.py +0 -0
  50. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/cli/parser.py +0 -0
  51. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/config/__init__.py +0 -0
  52. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/config/interactive.py +0 -0
  53. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/config/io.py +0 -0
  54. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/config/model.py +0 -0
  55. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/config/validation.py +0 -0
  56. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/endpoints/dev_proxy.py +0 -0
  57. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/endpoints/extras.py +0 -0
  58. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/endpoints/passthrough.py +0 -0
  59. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/models.py +0 -0
  60. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/performance.py +0 -0
  61. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/py.typed +0 -0
  62. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/utils/attack_logger.py +0 -0
  63. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/utils/image_processing.py +0 -0
  64. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/utils/logging.py +0 -0
  65. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/utils/misc.py +0 -0
  66. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/utils/models.py +0 -0
  67. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/src/argoproxy/utils/tool_calls.py +0 -0
  68. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/test/test_chat_completions.py +0 -0
  69. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/test/test_embeddings.py +0 -0
  70. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/test/test_function_calling_multiple.py +0 -0
  71. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/test/test_function_calling_single.py +0 -0
  72. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/test/test_leaked_tool_parser.py +0 -0
  73. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/test/test_legacy_completions.py +0 -0
  74. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/test/test_model_resolution.py +0 -0
  75. {argo_proxy-3.0.2 → argo_proxy-3.0.4}/tests/test_config_migrate.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: argo-proxy
3
- Version: 3.0.2
3
+ Version: 3.0.4
4
4
  Summary: Proxy server to Argo API, OpenAI format compatible
5
5
  Author-email: Peng Ding <oaklight@gmx.com>
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: argo-proxy
3
- Version: 3.0.2
3
+ Version: 3.0.4
4
4
  Summary: Proxy server to Argo API, OpenAI format compatible
5
5
  Author-email: Peng Ding <oaklight@gmx.com>
6
6
  License-Expression: MIT
@@ -0,0 +1 @@
1
+ __version__ = "3.0.4"
@@ -1025,6 +1025,7 @@ async def _convert_non_streaming(
1025
1025
  _ensure_user_field(target_body, config.user)
1026
1026
  _downgrade_developer_role(target_body)
1027
1027
  _normalize_null_content(target_body)
1028
+ _normalize_thinking_for_upstream(target_body)
1028
1029
  _debug_dump("2_request_converted", target_body, config)
1029
1030
 
1030
1031
  # Log the converted body
@@ -1146,6 +1147,7 @@ async def _convert_buffered_streaming(
1146
1147
  _ensure_user_field(target_body, config.user)
1147
1148
  _downgrade_developer_role(target_body)
1148
1149
  _normalize_null_content(target_body)
1150
+ _normalize_thinking_for_upstream(target_body)
1149
1151
 
1150
1152
  # 3. Inject stream flags and update headers for streaming
1151
1153
  target_body = _inject_stream_flags(target_body, target_provider)
@@ -1317,6 +1319,7 @@ async def _convert_streaming(
1317
1319
  _ensure_user_field(target_body, config.user)
1318
1320
  _downgrade_developer_role(target_body)
1319
1321
  _normalize_null_content(target_body)
1322
+ _normalize_thinking_for_upstream(target_body)
1320
1323
 
1321
1324
  format_sse = _SSE_FORMATTERS[source_provider]
1322
1325
 
@@ -131,15 +131,20 @@ async def validate_api_async(
131
131
  raise ValueError("API validation failed after all attempts")
132
132
 
133
133
 
134
- async def _fetch_first_model(
134
+ async def _fetch_validation_models(
135
135
  models_url: str,
136
136
  timeout: int = 5,
137
137
  resolver_overrides: dict[str, str] | None = None,
138
- ) -> str | None:
139
- """Fetch the first available model ID from an OpenAI-compatible ``/models`` endpoint.
138
+ ) -> list[str]:
139
+ """Fetch candidate model IDs for validation from an OpenAI-compatible
140
+ ``/models`` endpoint.
141
+
142
+ Returns a list of model ID strings sorted by preference: lightweight
143
+ models (nano, mini) come first to minimise token cost during validation.
144
+ Embedding-only models are excluded because they cannot serve chat requests.
140
145
 
141
146
  Returns:
142
- A model ID string, or None if the request fails.
147
+ Sorted list of model IDs, or empty list if the request fails.
143
148
  """
144
149
  from ..performance import StaticOverrideResolver
145
150
 
@@ -155,15 +160,39 @@ async def _fetch_first_model(
155
160
  ) as session:
156
161
  async with session.get(models_url) as resp:
157
162
  if resp.status != 200:
158
- return None
163
+ return []
159
164
  data = await resp.json()
160
165
  models = data.get("data", [])
161
- if models:
162
- m = models[0]
163
- return m.get("internal_id") or m.get("id")
164
166
  except Exception:
165
- pass
166
- return None
167
+ return []
168
+
169
+ # Filter out embedding models (they can't serve chat completions)
170
+ _EMBEDDING_KEYWORDS = {"embedding", "ada", "v3small", "v3large"}
171
+ chat_models = []
172
+ for m in models:
173
+ iid = (m.get("internal_id") or m.get("id") or "").lower()
174
+ display_id = (m.get("id") or "").lower()
175
+ if any(kw in iid or kw in display_id for kw in _EMBEDDING_KEYWORDS):
176
+ continue
177
+ chat_models.append(m)
178
+
179
+ # Sort: nano first (cheapest), then mini, then others
180
+ def _sort_key(m: dict) -> int:
181
+ iid = (m.get("internal_id") or m.get("id") or "").lower()
182
+ if "nano" in iid:
183
+ return 0
184
+ if "mini" in iid:
185
+ return 1
186
+ return 2
187
+
188
+ chat_models.sort(key=_sort_key)
189
+
190
+ result: list[str] = []
191
+ for m in chat_models:
192
+ model_id = m.get("internal_id") or m.get("id")
193
+ if model_id:
194
+ result.append(model_id)
195
+ return result
167
196
 
168
197
 
169
198
  async def validate_user_async(
@@ -196,57 +225,71 @@ async def validate_user_async(
196
225
  from ..performance import StaticOverrideResolver
197
226
  from .misc import contains_argo_auth_warning, extract_text_from_response
198
227
 
199
- # Auto-detect a valid model name from the upstream
228
+ # Auto-detect valid model names from the upstream, sorted by preference
200
229
  models_url = chat_url.rsplit("/chat/completions", 1)[0] + "/models"
201
- model = await _fetch_first_model(
230
+ candidate_models = await _fetch_validation_models(
202
231
  models_url, timeout=timeout, resolver_overrides=resolver_overrides
203
232
  )
204
- if not model:
205
- model = "gpt-4o-latest" # fallback
206
-
207
- payload = {
208
- "model": model,
209
- "messages": [{"role": "user", "content": "say ok"}],
210
- "user": user,
211
- "max_tokens": 5,
212
- }
213
-
214
- connector = None
215
- if resolver_overrides:
216
- resolver = StaticOverrideResolver(resolver_overrides)
217
- connector = aiohttp.TCPConnector(resolver=resolver)
233
+ if not candidate_models:
234
+ candidate_models = ["gpt41nano"] # lightweight fallback
218
235
 
219
236
  client_timeout = aiohttp.ClientTimeout(total=timeout)
220
-
221
237
  last_err: Exception | None = None
222
- for attempt in range(attempts + 1):
223
- try:
224
- async with aiohttp.ClientSession(
225
- connector=connector,
226
- timeout=client_timeout,
227
- ) as session:
228
- async with session.post(
229
- chat_url,
230
- json=payload,
231
- headers={
232
- "Content-Type": "application/json",
233
- "Authorization": f"Bearer {user}",
234
- },
235
- ) as response:
236
- if response.status != 200:
237
- raise ValueError(f"API returned status code {response.status}")
238
- data = await response.json()
239
- text = extract_text_from_response(data, "openai")
240
- return not contains_argo_auth_warning(text)
241
- except Exception as e:
242
- last_err = e
243
- if attempt < attempts:
244
- await asyncio.sleep(0.5)
245
- if resolver_overrides and attempt < attempts:
246
- resolver = StaticOverrideResolver(resolver_overrides)
247
- connector = aiohttp.TCPConnector(resolver=resolver)
248
- else:
249
- connector = None
238
+
239
+ for model in candidate_models:
240
+ payload = {
241
+ "model": model,
242
+ "messages": [{"role": "user", "content": "say ok"}],
243
+ "user": user,
244
+ "max_tokens": 5,
245
+ }
246
+
247
+ connector = None
248
+ if resolver_overrides:
249
+ resolver = StaticOverrideResolver(resolver_overrides)
250
+ connector = aiohttp.TCPConnector(resolver=resolver)
251
+
252
+ for attempt in range(attempts + 1):
253
+ try:
254
+ async with aiohttp.ClientSession(
255
+ connector=connector,
256
+ timeout=client_timeout,
257
+ ) as session:
258
+ async with session.post(
259
+ chat_url,
260
+ json=payload,
261
+ headers={
262
+ "Content-Type": "application/json",
263
+ "Authorization": f"Bearer {user}",
264
+ },
265
+ ) as response:
266
+ if response.status == 400:
267
+ # Model rejected — try the next candidate
268
+ body = await response.json()
269
+ err_code = (
270
+ body.get("error", {}).get("code", "")
271
+ if isinstance(body, dict)
272
+ else ""
273
+ )
274
+ if err_code == "model_not_found":
275
+ last_err = ValueError(f"Model '{model}' not accepted")
276
+ break # skip to next model
277
+ if response.status != 200:
278
+ raise ValueError(
279
+ f"API returned status code {response.status}"
280
+ )
281
+ data = await response.json()
282
+ text = extract_text_from_response(data, "openai")
283
+ return not contains_argo_auth_warning(text)
284
+ except Exception as e:
285
+ last_err = e
286
+ if attempt < attempts:
287
+ await asyncio.sleep(0.5)
288
+ if resolver_overrides and attempt < attempts:
289
+ resolver = StaticOverrideResolver(resolver_overrides)
290
+ connector = aiohttp.TCPConnector(resolver=resolver)
291
+ else:
292
+ connector = None
250
293
 
251
294
  if last_err is not None:
252
295
  raise last_err
@@ -1 +0,0 @@
1
- __version__ = "3.0.2"
File without changes
File without changes
File without changes
File without changes