licos-dev-sdk 0.2.4__tar.gz → 0.2.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/.gitignore +4 -3
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/PKG-INFO +2 -2
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/pyproject.toml +3 -3
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/__init__.py +6 -6
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/model.py +285 -285
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/tests/test_model.py +74 -74
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/_utils.py +0 -0
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/archive.py +0 -0
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/chart.py +0 -0
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/data.py +0 -0
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/diagram.py +0 -0
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/document.py +0 -0
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/image.py +0 -0
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/observability.py +0 -0
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/presentation.py +0 -0
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/spreadsheet.py +0 -0
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/src/licos_dev_sdk/web.py +0 -0
- {licos_dev_sdk-0.2.4 → licos_dev_sdk-0.2.5}/tests/test_observability.py +0 -0
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: licos-dev-sdk
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.5
|
|
4
4
|
Summary: LICOS Dev SDK - file generation and model capability clients
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Requires-Dist: graphviz>=0.20
|
|
7
7
|
Requires-Dist: jinja2>=3.1
|
|
8
|
-
Requires-Dist: licos-platform-sdk>=0.2.
|
|
8
|
+
Requires-Dist: licos-platform-sdk>=0.2.8
|
|
9
9
|
Requires-Dist: matplotlib>=3.9
|
|
10
10
|
Requires-Dist: mistune>=3.0
|
|
11
11
|
Requires-Dist: openpyxl>=3.1
|
|
@@ -4,11 +4,11 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "licos-dev-sdk"
|
|
7
|
-
version = "0.2.
|
|
7
|
+
version = "0.2.5"
|
|
8
8
|
description = "LICOS Dev SDK - file generation and model capability clients"
|
|
9
9
|
requires-python = ">=3.10"
|
|
10
10
|
dependencies = [
|
|
11
|
-
"licos-platform-sdk>=0.2.
|
|
11
|
+
"licos-platform-sdk>=0.2.8",
|
|
12
12
|
"weasyprint>=62.0",
|
|
13
13
|
"python-docx>=1.1",
|
|
14
14
|
"openpyxl>=3.1",
|
|
@@ -24,4 +24,4 @@ dependencies = [
|
|
|
24
24
|
]
|
|
25
25
|
|
|
26
26
|
[tool.hatch.build.targets.wheel]
|
|
27
|
-
packages = ["src/licos_dev_sdk"]
|
|
27
|
+
packages = ["src/licos_dev_sdk"]
|
|
@@ -47,10 +47,10 @@ def __getattr__(name: str):
|
|
|
47
47
|
"ImageGenerationClient": ("model", "ImageGenerationClient"),
|
|
48
48
|
"VideoGenerationClient": ("model", "VideoGenerationClient"),
|
|
49
49
|
"SpeechRecognitionClient": ("model", "SpeechRecognitionClient"),
|
|
50
|
-
"ASRClient": ("model", "ASRClient"),
|
|
51
|
-
"fetch_model_catalogs": ("model", "fetch_model_catalogs"),
|
|
52
|
-
"fetch_model_detail": ("model", "fetch_model_detail"),
|
|
53
|
-
"resolve_llm_endpoint": ("model", "resolve_llm_endpoint"),
|
|
50
|
+
"ASRClient": ("model", "ASRClient"),
|
|
51
|
+
"fetch_model_catalogs": ("model", "fetch_model_catalogs"),
|
|
52
|
+
"fetch_model_detail": ("model", "fetch_model_detail"),
|
|
53
|
+
"resolve_llm_endpoint": ("model", "resolve_llm_endpoint"),
|
|
54
54
|
"resolve_vision_endpoint": ("model", "resolve_vision_endpoint"),
|
|
55
55
|
"resolve_image_generation_endpoint": ("model", "resolve_image_generation_endpoint"),
|
|
56
56
|
"resolve_video_generation_endpoint": ("model", "resolve_video_generation_endpoint"),
|
|
@@ -93,8 +93,8 @@ __all__ = [
|
|
|
93
93
|
"ModelRuntime", "ModelEndpoint", "ModelResult",
|
|
94
94
|
"ApiError", "ConfigurationError",
|
|
95
95
|
"LLMClient", "VisionClient", "VisionUnderstandingClient", "ImageGenerationClient", "VideoGenerationClient",
|
|
96
|
-
"SpeechRecognitionClient", "ASRClient",
|
|
97
|
-
"fetch_model_catalogs", "fetch_model_detail", "resolve_llm_endpoint", "resolve_vision_endpoint",
|
|
96
|
+
"SpeechRecognitionClient", "ASRClient",
|
|
97
|
+
"fetch_model_catalogs", "fetch_model_detail", "resolve_llm_endpoint", "resolve_vision_endpoint",
|
|
98
98
|
"resolve_image_generation_endpoint", "resolve_video_generation_endpoint",
|
|
99
99
|
"resolve_speech_recognition_endpoint",
|
|
100
100
|
"invoke_llm", "generate_image", "generate_video", "recognize_speech", "understand_image",
|
|
@@ -18,15 +18,15 @@ from licos_platform_sdk._runtime import (
|
|
|
18
18
|
)
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
MODEL_CATALOG_PATH = "/api/v1/llm-gateway/ai/model-catalog"
|
|
22
|
-
MODEL_DETAIL_PATH = "/api/v1/admin/workspaces/models/detail"
|
|
23
|
-
DEFAULT_REQUEST_TIMEOUT_SECS = 120
|
|
24
|
-
DEFAULT_ASYNC_TIMEOUT_SECS = 600
|
|
25
|
-
DEFAULT_ASYNC_POLL_INTERVAL_SECS = 2.0
|
|
26
|
-
DEFAULT_CATALOG_CACHE_TTL_SECS = 300
|
|
27
|
-
|
|
28
|
-
_CATALOG_CACHE: dict[tuple[str, str], tuple[float, list[dict[str, Any]]]] = {}
|
|
29
|
-
_DETAIL_CACHE: dict[tuple[str, str, str, str], tuple[float, dict[str, Any] | None]] = {}
|
|
21
|
+
MODEL_CATALOG_PATH = "/api/v1/llm-gateway/ai/model-catalog"
|
|
22
|
+
MODEL_DETAIL_PATH = "/api/v1/admin/workspaces/models/detail"
|
|
23
|
+
DEFAULT_REQUEST_TIMEOUT_SECS = 120
|
|
24
|
+
DEFAULT_ASYNC_TIMEOUT_SECS = 600
|
|
25
|
+
DEFAULT_ASYNC_POLL_INTERVAL_SECS = 2.0
|
|
26
|
+
DEFAULT_CATALOG_CACHE_TTL_SECS = 300
|
|
27
|
+
|
|
28
|
+
_CATALOG_CACHE: dict[tuple[str, str], tuple[float, list[dict[str, Any]]]] = {}
|
|
29
|
+
_DETAIL_CACHE: dict[tuple[str, str, str, str], tuple[float, dict[str, Any] | None]] = {}
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
@dataclass(frozen=True)
|
|
@@ -37,18 +37,18 @@ class ModelRuntime:
|
|
|
37
37
|
|
|
38
38
|
|
|
39
39
|
@dataclass(frozen=True)
|
|
40
|
-
class ModelEndpoint:
|
|
41
|
-
provider: str
|
|
42
|
-
capability: str
|
|
43
|
-
base_url: str
|
|
44
|
-
model: str
|
|
45
|
-
required_headers: dict[str, str] = field(default_factory=dict)
|
|
46
|
-
endpoint: dict[str, Any] = field(default_factory=dict)
|
|
47
|
-
response_url: str | None = None
|
|
48
|
-
cache_context: bool = False
|
|
49
|
-
context_length: int | None = None
|
|
50
|
-
max_input_length: int | None = None
|
|
51
|
-
max_output_length: int | None = None
|
|
40
|
+
class ModelEndpoint:
|
|
41
|
+
provider: str
|
|
42
|
+
capability: str
|
|
43
|
+
base_url: str
|
|
44
|
+
model: str
|
|
45
|
+
required_headers: dict[str, str] = field(default_factory=dict)
|
|
46
|
+
endpoint: dict[str, Any] = field(default_factory=dict)
|
|
47
|
+
response_url: str | None = None
|
|
48
|
+
cache_context: bool = False
|
|
49
|
+
context_length: int | None = None
|
|
50
|
+
max_input_length: int | None = None
|
|
51
|
+
max_output_length: int | None = None
|
|
52
52
|
|
|
53
53
|
@property
|
|
54
54
|
def async_task(self) -> bool:
|
|
@@ -96,7 +96,7 @@ class ModelResult:
|
|
|
96
96
|
return result
|
|
97
97
|
|
|
98
98
|
|
|
99
|
-
def fetch_model_catalogs(
|
|
99
|
+
def fetch_model_catalogs(
|
|
100
100
|
*,
|
|
101
101
|
base_url: str | None = None,
|
|
102
102
|
user_token: str | None = None,
|
|
@@ -104,33 +104,33 @@ def fetch_model_catalogs(
|
|
|
104
104
|
refresh: bool = False,
|
|
105
105
|
) -> list[dict[str, Any]]:
|
|
106
106
|
"""Fetch platform model capability catalog using the project owner token."""
|
|
107
|
-
runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
108
|
-
return _fetch_model_catalogs(runtime, refresh=refresh)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
def fetch_model_detail(
|
|
112
|
-
model_code: str,
|
|
113
|
-
*,
|
|
114
|
-
base_url: str | None = None,
|
|
115
|
-
user_token: str | None = None,
|
|
116
|
-
user_id: str | None = None,
|
|
117
|
-
workspace_id: str | None = None,
|
|
118
|
-
refresh: bool = False,
|
|
119
|
-
) -> dict[str, Any] | None:
|
|
120
|
-
"""Fetch model detail, including base URL, Response API support and token limits."""
|
|
121
|
-
runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
122
|
-
return _fetch_model_detail(runtime, model_code, workspace_id=workspace_id, refresh=refresh)
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
def resolve_llm_endpoint(
|
|
107
|
+
runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
108
|
+
return _fetch_model_catalogs(runtime, refresh=refresh)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def fetch_model_detail(
|
|
112
|
+
model_code: str,
|
|
113
|
+
*,
|
|
114
|
+
base_url: str | None = None,
|
|
115
|
+
user_token: str | None = None,
|
|
116
|
+
user_id: str | None = None,
|
|
117
|
+
workspace_id: str | None = None,
|
|
118
|
+
refresh: bool = False,
|
|
119
|
+
) -> dict[str, Any] | None:
|
|
120
|
+
"""Fetch model detail, including base URL, Response API support and token limits."""
|
|
121
|
+
runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
122
|
+
return _fetch_model_detail(runtime, model_code, workspace_id=workspace_id, refresh=refresh)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def resolve_llm_endpoint(
|
|
126
126
|
*,
|
|
127
127
|
model_group: str = "text",
|
|
128
128
|
base_url: str | None = None,
|
|
129
129
|
user_token: str | None = None,
|
|
130
130
|
user_id: str | None = None,
|
|
131
|
-
) -> ModelEndpoint:
|
|
132
|
-
runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
133
|
-
return _resolve_chat_endpoint(runtime, model_group=model_group)
|
|
131
|
+
) -> ModelEndpoint:
|
|
132
|
+
runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
133
|
+
return _resolve_chat_endpoint(runtime, model_group=model_group)
|
|
134
134
|
|
|
135
135
|
|
|
136
136
|
def resolve_vision_endpoint(
|
|
@@ -185,7 +185,7 @@ class LLMClient:
|
|
|
185
185
|
self.ctx = ctx
|
|
186
186
|
self.runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
187
187
|
|
|
188
|
-
def invoke(
|
|
188
|
+
def invoke(
|
|
189
189
|
self,
|
|
190
190
|
messages: Sequence[Any] | str,
|
|
191
191
|
*,
|
|
@@ -194,22 +194,22 @@ class LLMClient:
|
|
|
194
194
|
max_completion_tokens: int | None = None,
|
|
195
195
|
timeout: int | None = None,
|
|
196
196
|
**extra: Any,
|
|
197
|
-
) -> ModelResult:
|
|
198
|
-
endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
|
|
199
|
-
selected_model = endpoint.model
|
|
200
|
-
body = {
|
|
201
|
-
"model": selected_model,
|
|
202
|
-
"messages": _normalize_messages(messages),
|
|
203
|
-
**_not_none(
|
|
204
|
-
{
|
|
205
|
-
"temperature": temperature,
|
|
206
|
-
"max_completion_tokens": _effective_max_completion_tokens(
|
|
207
|
-
max_completion_tokens,
|
|
208
|
-
endpoint,
|
|
209
|
-
),
|
|
210
|
-
**extra,
|
|
211
|
-
}
|
|
212
|
-
),
|
|
197
|
+
) -> ModelResult:
|
|
198
|
+
endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
|
|
199
|
+
selected_model = endpoint.model
|
|
200
|
+
body = {
|
|
201
|
+
"model": selected_model,
|
|
202
|
+
"messages": _normalize_messages(messages),
|
|
203
|
+
**_not_none(
|
|
204
|
+
{
|
|
205
|
+
"temperature": temperature,
|
|
206
|
+
"max_completion_tokens": _effective_max_completion_tokens(
|
|
207
|
+
max_completion_tokens,
|
|
208
|
+
endpoint,
|
|
209
|
+
),
|
|
210
|
+
**extra,
|
|
211
|
+
}
|
|
212
|
+
),
|
|
213
213
|
}
|
|
214
214
|
response = _post_model_json(endpoint, self.runtime, body, timeout=timeout)
|
|
215
215
|
texts = _extract_chat_texts(response) or _collect_texts(response)
|
|
@@ -231,23 +231,23 @@ class LLMClient:
|
|
|
231
231
|
max_completion_tokens: int | None = None,
|
|
232
232
|
timeout: int | None = None,
|
|
233
233
|
**extra: Any,
|
|
234
|
-
) -> Iterator[str]:
|
|
235
|
-
endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
|
|
236
|
-
selected_model = endpoint.model
|
|
237
|
-
body = {
|
|
238
|
-
"model": selected_model,
|
|
239
|
-
"messages": _normalize_messages(messages),
|
|
240
|
-
"stream": True,
|
|
241
|
-
**_not_none(
|
|
242
|
-
{
|
|
243
|
-
"temperature": temperature,
|
|
244
|
-
"max_completion_tokens": _effective_max_completion_tokens(
|
|
245
|
-
max_completion_tokens,
|
|
246
|
-
endpoint,
|
|
247
|
-
),
|
|
248
|
-
**extra,
|
|
249
|
-
}
|
|
250
|
-
),
|
|
234
|
+
) -> Iterator[str]:
|
|
235
|
+
endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
|
|
236
|
+
selected_model = endpoint.model
|
|
237
|
+
body = {
|
|
238
|
+
"model": selected_model,
|
|
239
|
+
"messages": _normalize_messages(messages),
|
|
240
|
+
"stream": True,
|
|
241
|
+
**_not_none(
|
|
242
|
+
{
|
|
243
|
+
"temperature": temperature,
|
|
244
|
+
"max_completion_tokens": _effective_max_completion_tokens(
|
|
245
|
+
max_completion_tokens,
|
|
246
|
+
endpoint,
|
|
247
|
+
),
|
|
248
|
+
**extra,
|
|
249
|
+
}
|
|
250
|
+
),
|
|
251
251
|
}
|
|
252
252
|
yield from _stream_model_json(endpoint, self.runtime, body, timeout=timeout)
|
|
253
253
|
|
|
@@ -478,9 +478,9 @@ def understand_image(**kwargs: Any) -> ModelResult:
|
|
|
478
478
|
return VisionClient().understand(**kwargs)
|
|
479
479
|
|
|
480
480
|
|
|
481
|
-
def clear_model_catalog_cache_for_tests() -> None:
|
|
482
|
-
_CATALOG_CACHE.clear()
|
|
483
|
-
_DETAIL_CACHE.clear()
|
|
481
|
+
def clear_model_catalog_cache_for_tests() -> None:
|
|
482
|
+
_CATALOG_CACHE.clear()
|
|
483
|
+
_DETAIL_CACHE.clear()
|
|
484
484
|
|
|
485
485
|
|
|
486
486
|
def _model_runtime(
|
|
@@ -500,7 +500,7 @@ def _refresh_model_runtime(runtime: ModelRuntime) -> ModelRuntime:
|
|
|
500
500
|
return replace(runtime, token=token)
|
|
501
501
|
|
|
502
502
|
|
|
503
|
-
def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> list[dict[str, Any]]:
|
|
503
|
+
def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> list[dict[str, Any]]:
|
|
504
504
|
cache_key = (runtime.base_url, runtime.token)
|
|
505
505
|
ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
|
|
506
506
|
cached = _CATALOG_CACHE.get(cache_key)
|
|
@@ -521,81 +521,81 @@ def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> li
|
|
|
521
521
|
catalogs = _catalogs_from_payload(payload)
|
|
522
522
|
if not catalogs:
|
|
523
523
|
raise ApiError("model catalog has no provider entries", details=payload)
|
|
524
|
-
_CATALOG_CACHE[cache_key] = (time.time(), catalogs)
|
|
525
|
-
return catalogs
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
def _workspace_id(workspace_id: str | None = None) -> str | None:
|
|
529
|
-
value = workspace_id or env("AGENT_WORKSPACE_ID") or env("LICOS_WORKSPACE_ID")
|
|
530
|
-
if value is None:
|
|
531
|
-
return None
|
|
532
|
-
value = str(value).strip()
|
|
533
|
-
return value or None
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
def _fetch_model_detail(
|
|
537
|
-
runtime: ModelRuntime,
|
|
538
|
-
model_code: str,
|
|
539
|
-
*,
|
|
540
|
-
workspace_id: str | None = None,
|
|
541
|
-
refresh: bool = False,
|
|
542
|
-
) -> dict[str, Any] | None:
|
|
543
|
-
model_code = str(model_code or "").strip()
|
|
544
|
-
if not model_code:
|
|
545
|
-
return None
|
|
546
|
-
resolved_workspace_id = _workspace_id(workspace_id) or ""
|
|
547
|
-
cache_key = (runtime.base_url, runtime.token, model_code, resolved_workspace_id)
|
|
548
|
-
ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
|
|
549
|
-
cached = _DETAIL_CACHE.get(cache_key)
|
|
550
|
-
if cached and not refresh and time.time() - cached[0] <= ttl:
|
|
551
|
-
return cached[1]
|
|
552
|
-
|
|
553
|
-
query = {"code": model_code}
|
|
554
|
-
if resolved_workspace_id:
|
|
555
|
-
query["workspaceId"] = resolved_workspace_id
|
|
556
|
-
url = f"{runtime.base_url}{MODEL_DETAIL_PATH}?{parse.urlencode(query)}"
|
|
557
|
-
headers = {"X-Workspace-Id": resolved_workspace_id} if resolved_workspace_id else None
|
|
558
|
-
try:
|
|
559
|
-
payload = _request_json(
|
|
560
|
-
"GET",
|
|
561
|
-
url,
|
|
562
|
-
token=runtime.token,
|
|
563
|
-
headers=headers,
|
|
564
|
-
timeout=30,
|
|
565
|
-
)
|
|
566
|
-
except ApiError as exc:
|
|
567
|
-
if not refresh and should_refresh_user_token(exc):
|
|
568
|
-
return _fetch_model_detail(
|
|
569
|
-
_refresh_model_runtime(runtime),
|
|
570
|
-
model_code,
|
|
571
|
-
workspace_id=workspace_id,
|
|
572
|
-
refresh=True,
|
|
573
|
-
)
|
|
574
|
-
raise
|
|
575
|
-
detail = _model_detail_from_payload(payload)
|
|
576
|
-
_DETAIL_CACHE[cache_key] = (time.time(), detail)
|
|
577
|
-
return detail
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
def _model_detail_from_payload(payload: Any) -> dict[str, Any] | None:
|
|
581
|
-
if not isinstance(payload, dict):
|
|
582
|
-
raise ApiError("model detail response is not an object", details=payload)
|
|
583
|
-
code = payload.get("code")
|
|
584
|
-
if code not in (None, 0) or payload.get("success") is False:
|
|
585
|
-
raise ApiError(
|
|
586
|
-
str(payload.get("message") or "model detail API failed"),
|
|
587
|
-
code=code if isinstance(code, int) else None,
|
|
588
|
-
details=payload,
|
|
589
|
-
)
|
|
590
|
-
data = payload.get("data")
|
|
591
|
-
if data is None:
|
|
592
|
-
return None
|
|
593
|
-
if not isinstance(data, dict):
|
|
594
|
-
raise ApiError("model detail data is not an object", details=payload)
|
|
595
|
-
return data
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
|
|
524
|
+
_CATALOG_CACHE[cache_key] = (time.time(), catalogs)
|
|
525
|
+
return catalogs
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _workspace_id(workspace_id: str | None = None) -> str | None:
|
|
529
|
+
value = workspace_id or env("AGENT_WORKSPACE_ID") or env("LICOS_WORKSPACE_ID")
|
|
530
|
+
if value is None:
|
|
531
|
+
return None
|
|
532
|
+
value = str(value).strip()
|
|
533
|
+
return value or None
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def _fetch_model_detail(
|
|
537
|
+
runtime: ModelRuntime,
|
|
538
|
+
model_code: str,
|
|
539
|
+
*,
|
|
540
|
+
workspace_id: str | None = None,
|
|
541
|
+
refresh: bool = False,
|
|
542
|
+
) -> dict[str, Any] | None:
|
|
543
|
+
model_code = str(model_code or "").strip()
|
|
544
|
+
if not model_code:
|
|
545
|
+
return None
|
|
546
|
+
resolved_workspace_id = _workspace_id(workspace_id) or ""
|
|
547
|
+
cache_key = (runtime.base_url, runtime.token, model_code, resolved_workspace_id)
|
|
548
|
+
ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
|
|
549
|
+
cached = _DETAIL_CACHE.get(cache_key)
|
|
550
|
+
if cached and not refresh and time.time() - cached[0] <= ttl:
|
|
551
|
+
return cached[1]
|
|
552
|
+
|
|
553
|
+
query = {"code": model_code}
|
|
554
|
+
if resolved_workspace_id:
|
|
555
|
+
query["workspaceId"] = resolved_workspace_id
|
|
556
|
+
url = f"{runtime.base_url}{MODEL_DETAIL_PATH}?{parse.urlencode(query)}"
|
|
557
|
+
headers = {"X-Workspace-Id": resolved_workspace_id} if resolved_workspace_id else None
|
|
558
|
+
try:
|
|
559
|
+
payload = _request_json(
|
|
560
|
+
"GET",
|
|
561
|
+
url,
|
|
562
|
+
token=runtime.token,
|
|
563
|
+
headers=headers,
|
|
564
|
+
timeout=30,
|
|
565
|
+
)
|
|
566
|
+
except ApiError as exc:
|
|
567
|
+
if not refresh and should_refresh_user_token(exc):
|
|
568
|
+
return _fetch_model_detail(
|
|
569
|
+
_refresh_model_runtime(runtime),
|
|
570
|
+
model_code,
|
|
571
|
+
workspace_id=workspace_id,
|
|
572
|
+
refresh=True,
|
|
573
|
+
)
|
|
574
|
+
raise
|
|
575
|
+
detail = _model_detail_from_payload(payload)
|
|
576
|
+
_DETAIL_CACHE[cache_key] = (time.time(), detail)
|
|
577
|
+
return detail
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
def _model_detail_from_payload(payload: Any) -> dict[str, Any] | None:
|
|
581
|
+
if not isinstance(payload, dict):
|
|
582
|
+
raise ApiError("model detail response is not an object", details=payload)
|
|
583
|
+
code = payload.get("code")
|
|
584
|
+
if code not in (None, 0) or payload.get("success") is False:
|
|
585
|
+
raise ApiError(
|
|
586
|
+
str(payload.get("message") or "model detail API failed"),
|
|
587
|
+
code=code if isinstance(code, int) else None,
|
|
588
|
+
details=payload,
|
|
589
|
+
)
|
|
590
|
+
data = payload.get("data")
|
|
591
|
+
if data is None:
|
|
592
|
+
return None
|
|
593
|
+
if not isinstance(data, dict):
|
|
594
|
+
raise ApiError("model detail data is not an object", details=payload)
|
|
595
|
+
return data
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
|
|
599
599
|
if not isinstance(payload, dict):
|
|
600
600
|
raise ApiError("model catalog response is not an object", details=payload)
|
|
601
601
|
code = payload.get("code")
|
|
@@ -614,26 +614,26 @@ def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
|
|
|
614
614
|
items = []
|
|
615
615
|
if not isinstance(items, list):
|
|
616
616
|
raise ApiError("model catalog data is not a list", details=payload)
|
|
617
|
-
return [item for item in items if isinstance(item, dict)]
|
|
618
|
-
|
|
619
|
-
|
|
620
|
-
def _resolve_chat_endpoint(
|
|
621
|
-
runtime: ModelRuntime,
|
|
622
|
-
*,
|
|
623
|
-
model_group: str,
|
|
624
|
-
requested_model: str | None = None,
|
|
625
|
-
) -> ModelEndpoint:
|
|
626
|
-
endpoint = _resolve_endpoint(runtime, "chat", model_group=model_group)
|
|
627
|
-
selected_model = _selected_model(requested_model, endpoint.model)
|
|
628
|
-
endpoint = replace(endpoint, model=selected_model)
|
|
629
|
-
try:
|
|
630
|
-
detail = _fetch_model_detail(runtime, selected_model)
|
|
631
|
-
except ApiError:
|
|
632
|
-
return endpoint
|
|
633
|
-
return _apply_model_detail(endpoint, detail)
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
def _resolve_endpoint(
|
|
617
|
+
return [item for item in items if isinstance(item, dict)]
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
def _resolve_chat_endpoint(
|
|
621
|
+
runtime: ModelRuntime,
|
|
622
|
+
*,
|
|
623
|
+
model_group: str,
|
|
624
|
+
requested_model: str | None = None,
|
|
625
|
+
) -> ModelEndpoint:
|
|
626
|
+
endpoint = _resolve_endpoint(runtime, "chat", model_group=model_group)
|
|
627
|
+
selected_model = _selected_model(requested_model, endpoint.model)
|
|
628
|
+
endpoint = replace(endpoint, model=selected_model)
|
|
629
|
+
try:
|
|
630
|
+
detail = _fetch_model_detail(runtime, selected_model)
|
|
631
|
+
except ApiError:
|
|
632
|
+
return endpoint
|
|
633
|
+
return _apply_model_detail(endpoint, detail)
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
def _resolve_endpoint(
|
|
637
637
|
runtime: ModelRuntime,
|
|
638
638
|
capability_key: str,
|
|
639
639
|
*,
|
|
@@ -666,59 +666,59 @@ def _resolve_endpoint(
|
|
|
666
666
|
required_headers=_parse_required_headers(capability.get("requiredHeaders")),
|
|
667
667
|
endpoint=capability,
|
|
668
668
|
)
|
|
669
|
-
raise ApiError(f"capability `{capability_key}` is not available in model catalog", details=catalogs)
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
def _apply_model_detail(endpoint: ModelEndpoint, detail: dict[str, Any] | None) -> ModelEndpoint:
|
|
673
|
-
if not detail:
|
|
674
|
-
return endpoint
|
|
675
|
-
base_url = _first_non_empty_field(detail, ["baseUrl", "base_url", "url"]) or endpoint.base_url
|
|
676
|
-
response_url = _first_non_empty_field(
|
|
677
|
-
detail,
|
|
678
|
-
["responseUrl", "response_url", "responsesUrl", "responses_url"],
|
|
679
|
-
)
|
|
680
|
-
cache_context = _bool_field(detail, ["cacheContext", "cache_context"])
|
|
681
|
-
return replace(
|
|
682
|
-
endpoint,
|
|
683
|
-
base_url=base_url,
|
|
684
|
-
response_url=response_url or endpoint.response_url,
|
|
685
|
-
cache_context=endpoint.cache_context if cache_context is None else cache_context,
|
|
686
|
-
context_length=_int_field(
|
|
687
|
-
detail,
|
|
688
|
-
[
|
|
689
|
-
"contextLength",
|
|
690
|
-
"context_length",
|
|
691
|
-
"contextWindow",
|
|
692
|
-
"context_window",
|
|
693
|
-
"maxContextTokens",
|
|
694
|
-
"max_context_tokens",
|
|
695
|
-
],
|
|
696
|
-
),
|
|
697
|
-
max_input_length=_int_field(
|
|
698
|
-
detail,
|
|
699
|
-
[
|
|
700
|
-
"maxInputLength",
|
|
701
|
-
"max_input_length",
|
|
702
|
-
"maxInputTokens",
|
|
703
|
-
"max_input_tokens",
|
|
704
|
-
"inputTokenLimit",
|
|
705
|
-
"input_token_limit",
|
|
706
|
-
],
|
|
707
|
-
),
|
|
708
|
-
max_output_length=_int_field(
|
|
709
|
-
detail,
|
|
710
|
-
[
|
|
711
|
-
"maxOutputLength",
|
|
712
|
-
"max_output_length",
|
|
713
|
-
"maxOutputTokens",
|
|
714
|
-
"max_output_tokens",
|
|
715
|
-
"outputTokenLimit",
|
|
716
|
-
"output_token_limit",
|
|
717
|
-
"maxCompletionTokens",
|
|
718
|
-
"max_completion_tokens",
|
|
719
|
-
],
|
|
720
|
-
),
|
|
721
|
-
)
|
|
669
|
+
raise ApiError(f"capability `{capability_key}` is not available in model catalog", details=catalogs)
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
def _apply_model_detail(endpoint: ModelEndpoint, detail: dict[str, Any] | None) -> ModelEndpoint:
|
|
673
|
+
if not detail:
|
|
674
|
+
return endpoint
|
|
675
|
+
base_url = _first_non_empty_field(detail, ["baseUrl", "base_url", "url"]) or endpoint.base_url
|
|
676
|
+
response_url = _first_non_empty_field(
|
|
677
|
+
detail,
|
|
678
|
+
["responseUrl", "response_url", "responsesUrl", "responses_url"],
|
|
679
|
+
)
|
|
680
|
+
cache_context = _bool_field(detail, ["cacheContext", "cache_context"])
|
|
681
|
+
return replace(
|
|
682
|
+
endpoint,
|
|
683
|
+
base_url=base_url,
|
|
684
|
+
response_url=response_url or endpoint.response_url,
|
|
685
|
+
cache_context=endpoint.cache_context if cache_context is None else cache_context,
|
|
686
|
+
context_length=_int_field(
|
|
687
|
+
detail,
|
|
688
|
+
[
|
|
689
|
+
"contextLength",
|
|
690
|
+
"context_length",
|
|
691
|
+
"contextWindow",
|
|
692
|
+
"context_window",
|
|
693
|
+
"maxContextTokens",
|
|
694
|
+
"max_context_tokens",
|
|
695
|
+
],
|
|
696
|
+
),
|
|
697
|
+
max_input_length=_int_field(
|
|
698
|
+
detail,
|
|
699
|
+
[
|
|
700
|
+
"maxInputLength",
|
|
701
|
+
"max_input_length",
|
|
702
|
+
"maxInputTokens",
|
|
703
|
+
"max_input_tokens",
|
|
704
|
+
"inputTokenLimit",
|
|
705
|
+
"input_token_limit",
|
|
706
|
+
],
|
|
707
|
+
),
|
|
708
|
+
max_output_length=_int_field(
|
|
709
|
+
detail,
|
|
710
|
+
[
|
|
711
|
+
"maxOutputLength",
|
|
712
|
+
"max_output_length",
|
|
713
|
+
"maxOutputTokens",
|
|
714
|
+
"max_output_tokens",
|
|
715
|
+
"outputTokenLimit",
|
|
716
|
+
"output_token_limit",
|
|
717
|
+
"maxCompletionTokens",
|
|
718
|
+
"max_completion_tokens",
|
|
719
|
+
],
|
|
720
|
+
),
|
|
721
|
+
)
|
|
722
722
|
|
|
723
723
|
|
|
724
724
|
def _capability_base_url(item: dict[str, Any], capability: dict[str, Any]) -> str | None:
|
|
@@ -764,7 +764,7 @@ def _first_array_model(models: Any) -> str | None:
|
|
|
764
764
|
return _first_string(models)
|
|
765
765
|
|
|
766
766
|
|
|
767
|
-
def _first_string(value: Any) -> str | None:
|
|
767
|
+
def _first_string(value: Any) -> str | None:
|
|
768
768
|
if isinstance(value, str):
|
|
769
769
|
trimmed = value.strip()
|
|
770
770
|
return trimmed or None
|
|
@@ -772,60 +772,60 @@ def _first_string(value: Any) -> str | None:
|
|
|
772
772
|
for item in value:
|
|
773
773
|
if isinstance(item, str) and item.strip():
|
|
774
774
|
return item.strip()
|
|
775
|
-
return None
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
def _first_non_empty_field(data: dict[str, Any], names: Sequence[str]) -> str | None:
|
|
779
|
-
for name in names:
|
|
780
|
-
value = data.get(name)
|
|
781
|
-
if isinstance(value, str) and value.strip():
|
|
782
|
-
return value.strip()
|
|
783
|
-
return None
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
def _bool_field(data: dict[str, Any], names: Sequence[str]) -> bool | None:
|
|
787
|
-
for name in names:
|
|
788
|
-
value = data.get(name)
|
|
789
|
-
if isinstance(value, bool):
|
|
790
|
-
return value
|
|
791
|
-
if isinstance(value, str):
|
|
792
|
-
normalized = value.strip().lower()
|
|
793
|
-
if normalized in {"true", "1", "yes", "y"}:
|
|
794
|
-
return True
|
|
795
|
-
if normalized in {"false", "0", "no", "n"}:
|
|
796
|
-
return False
|
|
797
|
-
return None
|
|
798
|
-
|
|
799
|
-
|
|
800
|
-
def _int_field(data: dict[str, Any], names: Sequence[str]) -> int | None:
|
|
801
|
-
for name in names:
|
|
802
|
-
value = data.get(name)
|
|
803
|
-
if isinstance(value, bool):
|
|
804
|
-
continue
|
|
805
|
-
if isinstance(value, int):
|
|
806
|
-
return value if value > 0 else None
|
|
807
|
-
if isinstance(value, float):
|
|
808
|
-
parsed = int(value)
|
|
809
|
-
return parsed if parsed > 0 else None
|
|
810
|
-
if isinstance(value, str):
|
|
811
|
-
try:
|
|
812
|
-
parsed = int(value.strip())
|
|
813
|
-
except ValueError:
|
|
814
|
-
continue
|
|
815
|
-
return parsed if parsed > 0 else None
|
|
816
|
-
return None
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
def _effective_max_completion_tokens(requested: int | None, endpoint: ModelEndpoint) -> int | None:
|
|
820
|
-
limit = endpoint.max_output_length
|
|
821
|
-
if requested is None:
|
|
822
|
-
return limit
|
|
823
|
-
if limit is None:
|
|
824
|
-
return requested
|
|
825
|
-
return min(requested, limit)
|
|
826
|
-
|
|
827
|
-
|
|
828
|
-
def _post_model_json(
|
|
775
|
+
return None
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
def _first_non_empty_field(data: dict[str, Any], names: Sequence[str]) -> str | None:
|
|
779
|
+
for name in names:
|
|
780
|
+
value = data.get(name)
|
|
781
|
+
if isinstance(value, str) and value.strip():
|
|
782
|
+
return value.strip()
|
|
783
|
+
return None
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
def _bool_field(data: dict[str, Any], names: Sequence[str]) -> bool | None:
|
|
787
|
+
for name in names:
|
|
788
|
+
value = data.get(name)
|
|
789
|
+
if isinstance(value, bool):
|
|
790
|
+
return value
|
|
791
|
+
if isinstance(value, str):
|
|
792
|
+
normalized = value.strip().lower()
|
|
793
|
+
if normalized in {"true", "1", "yes", "y"}:
|
|
794
|
+
return True
|
|
795
|
+
if normalized in {"false", "0", "no", "n"}:
|
|
796
|
+
return False
|
|
797
|
+
return None
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
def _int_field(data: dict[str, Any], names: Sequence[str]) -> int | None:
|
|
801
|
+
for name in names:
|
|
802
|
+
value = data.get(name)
|
|
803
|
+
if isinstance(value, bool):
|
|
804
|
+
continue
|
|
805
|
+
if isinstance(value, int):
|
|
806
|
+
return value if value > 0 else None
|
|
807
|
+
if isinstance(value, float):
|
|
808
|
+
parsed = int(value)
|
|
809
|
+
return parsed if parsed > 0 else None
|
|
810
|
+
if isinstance(value, str):
|
|
811
|
+
try:
|
|
812
|
+
parsed = int(value.strip())
|
|
813
|
+
except ValueError:
|
|
814
|
+
continue
|
|
815
|
+
return parsed if parsed > 0 else None
|
|
816
|
+
return None
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
def _effective_max_completion_tokens(requested: int | None, endpoint: ModelEndpoint) -> int | None:
|
|
820
|
+
limit = endpoint.max_output_length
|
|
821
|
+
if requested is None:
|
|
822
|
+
return limit
|
|
823
|
+
if limit is None:
|
|
824
|
+
return requested
|
|
825
|
+
return min(requested, limit)
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def _post_model_json(
|
|
829
829
|
endpoint: ModelEndpoint,
|
|
830
830
|
runtime: ModelRuntime,
|
|
831
831
|
body: dict[str, Any],
|
|
@@ -40,7 +40,7 @@ class _FakeErrorBody:
|
|
|
40
40
|
return json.dumps(self._payload).encode("utf-8")
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
def _catalog_payload() -> dict[str, Any]:
|
|
43
|
+
def _catalog_payload() -> dict[str, Any]:
|
|
44
44
|
return {
|
|
45
45
|
"code": 0,
|
|
46
46
|
"success": True,
|
|
@@ -69,26 +69,26 @@ def _catalog_payload() -> dict[str, Any]:
|
|
|
69
69
|
}
|
|
70
70
|
]
|
|
71
71
|
},
|
|
72
|
-
}
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
def _model_detail_payload(model_code: str = "chat-text") -> dict[str, Any]:
|
|
76
|
-
return {
|
|
77
|
-
"code": 0,
|
|
78
|
-
"success": True,
|
|
79
|
-
"data": {
|
|
80
|
-
"code": model_code,
|
|
81
|
-
"baseUrl": "http://detail.example/v1/chat/completions",
|
|
82
|
-
"responseUrl": "http://detail.example/v1/responses",
|
|
83
|
-
"cacheContext": True,
|
|
84
|
-
"contextLength": 192000,
|
|
85
|
-
"maxInputLength": 128000,
|
|
86
|
-
"maxOutputLength": 64000,
|
|
87
|
-
},
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
class ModelSdkTests(unittest.TestCase):
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _model_detail_payload(model_code: str = "chat-text") -> dict[str, Any]:
|
|
76
|
+
return {
|
|
77
|
+
"code": 0,
|
|
78
|
+
"success": True,
|
|
79
|
+
"data": {
|
|
80
|
+
"code": model_code,
|
|
81
|
+
"baseUrl": "http://detail.example/v1/chat/completions",
|
|
82
|
+
"responseUrl": "http://detail.example/v1/responses",
|
|
83
|
+
"cacheContext": True,
|
|
84
|
+
"contextLength": 192000,
|
|
85
|
+
"maxInputLength": 128000,
|
|
86
|
+
"maxOutputLength": 64000,
|
|
87
|
+
},
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class ModelSdkTests(unittest.TestCase):
|
|
92
92
|
def setUp(self) -> None:
|
|
93
93
|
self.env = mock.patch.dict(
|
|
94
94
|
os.environ,
|
|
@@ -112,16 +112,16 @@ class ModelSdkTests(unittest.TestCase):
|
|
|
112
112
|
captured["exchange_headers"] = dict(req.header_items())
|
|
113
113
|
captured["exchange_body"] = json.loads(req.data.decode("utf-8"))
|
|
114
114
|
return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
|
|
115
|
-
if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
|
|
116
|
-
captured["catalog_headers"] = dict(req.header_items())
|
|
117
|
-
return _FakeResponse(_catalog_payload())
|
|
118
|
-
if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
|
|
119
|
-
captured["detail_headers"] = dict(req.header_items())
|
|
120
|
-
return _FakeResponse(_model_detail_payload("chat-text"))
|
|
121
|
-
if req.full_url == "http://detail.example/v1/chat/completions":
|
|
122
|
-
captured["chat_headers"] = dict(req.header_items())
|
|
123
|
-
captured["chat_body"] = json.loads(req.data.decode("utf-8"))
|
|
124
|
-
return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
|
|
115
|
+
if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
|
|
116
|
+
captured["catalog_headers"] = dict(req.header_items())
|
|
117
|
+
return _FakeResponse(_catalog_payload())
|
|
118
|
+
if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
|
|
119
|
+
captured["detail_headers"] = dict(req.header_items())
|
|
120
|
+
return _FakeResponse(_model_detail_payload("chat-text"))
|
|
121
|
+
if req.full_url == "http://detail.example/v1/chat/completions":
|
|
122
|
+
captured["chat_headers"] = dict(req.header_items())
|
|
123
|
+
captured["chat_body"] = json.loads(req.data.decode("utf-8"))
|
|
124
|
+
return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
|
|
125
125
|
raise AssertionError(req.full_url)
|
|
126
126
|
|
|
127
127
|
with mock.patch.object(model.request, "urlopen", fake_urlopen):
|
|
@@ -129,12 +129,12 @@ class ModelSdkTests(unittest.TestCase):
|
|
|
129
129
|
|
|
130
130
|
self.assertEqual(result.text, "hello")
|
|
131
131
|
self.assertEqual(captured["exchange_headers"]["Authorization"], "Bearer ai-agent-token")
|
|
132
|
-
self.assertEqual(captured["exchange_body"], {"userId": "user-1"})
|
|
133
|
-
self.assertEqual(captured["catalog_headers"]["Authorization"], "Bearer user-token")
|
|
134
|
-
self.assertEqual(captured["detail_headers"]["Authorization"], "Bearer user-token")
|
|
135
|
-
self.assertEqual(captured["chat_headers"]["Authorization"], "Bearer user-token")
|
|
136
|
-
self.assertEqual(captured["chat_body"]["model"], "chat-text")
|
|
137
|
-
self.assertEqual(captured["chat_body"]["max_completion_tokens"], 64000)
|
|
132
|
+
self.assertEqual(captured["exchange_body"], {"userId": "user-1"})
|
|
133
|
+
self.assertEqual(captured["catalog_headers"]["Authorization"], "Bearer user-token")
|
|
134
|
+
self.assertEqual(captured["detail_headers"]["Authorization"], "Bearer user-token")
|
|
135
|
+
self.assertEqual(captured["chat_headers"]["Authorization"], "Bearer user-token")
|
|
136
|
+
self.assertEqual(captured["chat_body"]["model"], "chat-text")
|
|
137
|
+
self.assertEqual(captured["chat_body"]["max_completion_tokens"], 64000)
|
|
138
138
|
|
|
139
139
|
def test_llm_explicit_model_overrides_catalog_default(self) -> None:
|
|
140
140
|
captured: dict[str, Any] = {}
|
|
@@ -142,13 +142,13 @@ class ModelSdkTests(unittest.TestCase):
|
|
|
142
142
|
def fake_urlopen(req: Any, timeout: int = 0) -> _FakeResponse:
|
|
143
143
|
if req.full_url == "http://platform.example/api/v1/internal/auth/ai-user-token":
|
|
144
144
|
return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
|
|
145
|
-
if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
|
|
146
|
-
return _FakeResponse(_catalog_payload())
|
|
147
|
-
if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
|
|
148
|
-
return _FakeResponse(_model_detail_payload("custom-chat-model"))
|
|
149
|
-
if req.full_url == "http://detail.example/v1/chat/completions":
|
|
150
|
-
captured["chat_body"] = json.loads(req.data.decode("utf-8"))
|
|
151
|
-
return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
|
|
145
|
+
if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
|
|
146
|
+
return _FakeResponse(_catalog_payload())
|
|
147
|
+
if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
|
|
148
|
+
return _FakeResponse(_model_detail_payload("custom-chat-model"))
|
|
149
|
+
if req.full_url == "http://detail.example/v1/chat/completions":
|
|
150
|
+
captured["chat_body"] = json.loads(req.data.decode("utf-8"))
|
|
151
|
+
return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
|
|
152
152
|
raise AssertionError(req.full_url)
|
|
153
153
|
|
|
154
154
|
with mock.patch.object(model.request, "urlopen", fake_urlopen):
|
|
@@ -165,14 +165,14 @@ class ModelSdkTests(unittest.TestCase):
|
|
|
165
165
|
def fake_urlopen(req: Any, timeout: int = 0) -> _FakeResponse:
|
|
166
166
|
if req.full_url == "http://platform.example/api/v1/internal/auth/ai-user-token":
|
|
167
167
|
return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": next(tokens)}})
|
|
168
|
-
if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
|
|
169
|
-
catalog_tokens.append(dict(req.header_items())["Authorization"])
|
|
170
|
-
return _FakeResponse(_catalog_payload())
|
|
171
|
-
if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
|
|
172
|
-
return _FakeResponse(_model_detail_payload("chat-text"))
|
|
173
|
-
if req.full_url == "http://detail.example/v1/chat/completions":
|
|
174
|
-
chat_tokens.append(dict(req.header_items())["Authorization"])
|
|
175
|
-
if len(chat_tokens) == 1:
|
|
168
|
+
if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
|
|
169
|
+
catalog_tokens.append(dict(req.header_items())["Authorization"])
|
|
170
|
+
return _FakeResponse(_catalog_payload())
|
|
171
|
+
if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
|
|
172
|
+
return _FakeResponse(_model_detail_payload("chat-text"))
|
|
173
|
+
if req.full_url == "http://detail.example/v1/chat/completions":
|
|
174
|
+
chat_tokens.append(dict(req.header_items())["Authorization"])
|
|
175
|
+
if len(chat_tokens) == 1:
|
|
176
176
|
raise urlerror.HTTPError(
|
|
177
177
|
req.full_url,
|
|
178
178
|
401,
|
|
@@ -187,28 +187,28 @@ class ModelSdkTests(unittest.TestCase):
|
|
|
187
187
|
result = model.LLMClient().invoke("Say hello", model="auto")
|
|
188
188
|
|
|
189
189
|
self.assertEqual(result.text, "hello")
|
|
190
|
-
self.assertEqual(catalog_tokens, ["Bearer old-token"])
|
|
191
|
-
self.assertEqual(chat_tokens, ["Bearer old-token", "Bearer new-token"])
|
|
192
|
-
|
|
193
|
-
def test_resolve_llm_endpoint_applies_model_detail_limits(self) -> None:
|
|
194
|
-
def fake_urlopen(req: Any, timeout: int = 0) -> _FakeResponse:
|
|
195
|
-
if req.full_url == "http://platform.example/api/v1/internal/auth/ai-user-token":
|
|
196
|
-
return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
|
|
197
|
-
if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
|
|
198
|
-
return _FakeResponse(_catalog_payload())
|
|
199
|
-
if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
|
|
200
|
-
return _FakeResponse(_model_detail_payload())
|
|
201
|
-
raise AssertionError(req.full_url)
|
|
202
|
-
|
|
203
|
-
with mock.patch.object(model.request, "urlopen", fake_urlopen):
|
|
204
|
-
endpoint = model.resolve_llm_endpoint()
|
|
205
|
-
|
|
206
|
-
self.assertEqual(endpoint.base_url, "http://detail.example/v1/chat/completions")
|
|
207
|
-
self.assertEqual(endpoint.response_url, "http://detail.example/v1/responses")
|
|
208
|
-
self.assertTrue(endpoint.cache_context)
|
|
209
|
-
self.assertEqual(endpoint.context_length, 192000)
|
|
210
|
-
self.assertEqual(endpoint.max_input_length, 128000)
|
|
211
|
-
self.assertEqual(endpoint.max_output_length, 64000)
|
|
190
|
+
self.assertEqual(catalog_tokens, ["Bearer old-token"])
|
|
191
|
+
self.assertEqual(chat_tokens, ["Bearer old-token", "Bearer new-token"])
|
|
192
|
+
|
|
193
|
+
def test_resolve_llm_endpoint_applies_model_detail_limits(self) -> None:
|
|
194
|
+
def fake_urlopen(req: Any, timeout: int = 0) -> _FakeResponse:
|
|
195
|
+
if req.full_url == "http://platform.example/api/v1/internal/auth/ai-user-token":
|
|
196
|
+
return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
|
|
197
|
+
if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
|
|
198
|
+
return _FakeResponse(_catalog_payload())
|
|
199
|
+
if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
|
|
200
|
+
return _FakeResponse(_model_detail_payload())
|
|
201
|
+
raise AssertionError(req.full_url)
|
|
202
|
+
|
|
203
|
+
with mock.patch.object(model.request, "urlopen", fake_urlopen):
|
|
204
|
+
endpoint = model.resolve_llm_endpoint()
|
|
205
|
+
|
|
206
|
+
self.assertEqual(endpoint.base_url, "http://detail.example/v1/chat/completions")
|
|
207
|
+
self.assertEqual(endpoint.response_url, "http://detail.example/v1/responses")
|
|
208
|
+
self.assertTrue(endpoint.cache_context)
|
|
209
|
+
self.assertEqual(endpoint.context_length, 192000)
|
|
210
|
+
self.assertEqual(endpoint.max_input_length, 128000)
|
|
211
|
+
self.assertEqual(endpoint.max_output_length, 64000)
|
|
212
212
|
|
|
213
213
|
def test_image_generation_defaults_to_one_image(self) -> None:
|
|
214
214
|
captured: dict[str, Any] = {}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|