licos-dev-sdk 0.2.2__tar.gz → 0.2.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/PKG-INFO +1 -1
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/pyproject.toml +1 -1
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/src/licos_dev_sdk/__init__.py +21 -5
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/src/licos_dev_sdk/model.py +285 -69
- licos_dev_sdk-0.2.4/src/licos_dev_sdk/observability.py +527 -0
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/tests/test_model.py +74 -29
- licos_dev_sdk-0.2.4/tests/test_observability.py +150 -0
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/.gitignore +0 -0
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/src/licos_dev_sdk/_utils.py +0 -0
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/src/licos_dev_sdk/archive.py +0 -0
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/src/licos_dev_sdk/chart.py +0 -0
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/src/licos_dev_sdk/data.py +0 -0
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/src/licos_dev_sdk/diagram.py +0 -0
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/src/licos_dev_sdk/document.py +0 -0
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/src/licos_dev_sdk/image.py +0 -0
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/src/licos_dev_sdk/presentation.py +0 -0
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/src/licos_dev_sdk/spreadsheet.py +0 -0
- {licos_dev_sdk-0.2.2 → licos_dev_sdk-0.2.4}/src/licos_dev_sdk/web.py +0 -0
|
@@ -47,9 +47,10 @@ def __getattr__(name: str):
|
|
|
47
47
|
"ImageGenerationClient": ("model", "ImageGenerationClient"),
|
|
48
48
|
"VideoGenerationClient": ("model", "VideoGenerationClient"),
|
|
49
49
|
"SpeechRecognitionClient": ("model", "SpeechRecognitionClient"),
|
|
50
|
-
"ASRClient": ("model", "ASRClient"),
|
|
51
|
-
"fetch_model_catalogs": ("model", "fetch_model_catalogs"),
|
|
52
|
-
"
|
|
50
|
+
"ASRClient": ("model", "ASRClient"),
|
|
51
|
+
"fetch_model_catalogs": ("model", "fetch_model_catalogs"),
|
|
52
|
+
"fetch_model_detail": ("model", "fetch_model_detail"),
|
|
53
|
+
"resolve_llm_endpoint": ("model", "resolve_llm_endpoint"),
|
|
53
54
|
"resolve_vision_endpoint": ("model", "resolve_vision_endpoint"),
|
|
54
55
|
"resolve_image_generation_endpoint": ("model", "resolve_image_generation_endpoint"),
|
|
55
56
|
"resolve_video_generation_endpoint": ("model", "resolve_video_generation_endpoint"),
|
|
@@ -59,6 +60,17 @@ def __getattr__(name: str):
|
|
|
59
60
|
"generate_video": ("model", "generate_video"),
|
|
60
61
|
"recognize_speech": ("model", "recognize_speech"),
|
|
61
62
|
"understand_image": ("model", "understand_image"),
|
|
63
|
+
# observability
|
|
64
|
+
"ObservabilityClient": ("observability", "ObservabilityClient"),
|
|
65
|
+
"ObservabilityRuntime": ("observability", "ObservabilityRuntime"),
|
|
66
|
+
"ensure_observability_database": ("observability", "ensure_observability_database"),
|
|
67
|
+
"log": ("observability", "log"),
|
|
68
|
+
"log_info": ("observability", "log_info"),
|
|
69
|
+
"log_warning": ("observability", "log_warning"),
|
|
70
|
+
"log_error": ("observability", "log_error"),
|
|
71
|
+
"record_trace": ("observability", "record_trace"),
|
|
72
|
+
"record_metric": ("observability", "record_metric"),
|
|
73
|
+
"record_error": ("observability", "record_error"),
|
|
62
74
|
}
|
|
63
75
|
if name in _map:
|
|
64
76
|
mod_name, attr = _map[name]
|
|
@@ -81,9 +93,13 @@ __all__ = [
|
|
|
81
93
|
"ModelRuntime", "ModelEndpoint", "ModelResult",
|
|
82
94
|
"ApiError", "ConfigurationError",
|
|
83
95
|
"LLMClient", "VisionClient", "VisionUnderstandingClient", "ImageGenerationClient", "VideoGenerationClient",
|
|
84
|
-
"SpeechRecognitionClient", "ASRClient",
|
|
85
|
-
"fetch_model_catalogs", "resolve_llm_endpoint", "resolve_vision_endpoint",
|
|
96
|
+
"SpeechRecognitionClient", "ASRClient",
|
|
97
|
+
"fetch_model_catalogs", "fetch_model_detail", "resolve_llm_endpoint", "resolve_vision_endpoint",
|
|
86
98
|
"resolve_image_generation_endpoint", "resolve_video_generation_endpoint",
|
|
87
99
|
"resolve_speech_recognition_endpoint",
|
|
88
100
|
"invoke_llm", "generate_image", "generate_video", "recognize_speech", "understand_image",
|
|
101
|
+
"ObservabilityClient", "ObservabilityRuntime",
|
|
102
|
+
"ensure_observability_database",
|
|
103
|
+
"log", "log_info", "log_warning", "log_error",
|
|
104
|
+
"record_trace", "record_metric", "record_error",
|
|
89
105
|
]
|
|
@@ -18,13 +18,15 @@ from licos_platform_sdk._runtime import (
|
|
|
18
18
|
)
|
|
19
19
|
|
|
20
20
|
|
|
21
|
-
MODEL_CATALOG_PATH = "/api/v1/llm-gateway/ai/model-catalog"
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
21
|
+
MODEL_CATALOG_PATH = "/api/v1/llm-gateway/ai/model-catalog"
|
|
22
|
+
MODEL_DETAIL_PATH = "/api/v1/admin/workspaces/models/detail"
|
|
23
|
+
DEFAULT_REQUEST_TIMEOUT_SECS = 120
|
|
24
|
+
DEFAULT_ASYNC_TIMEOUT_SECS = 600
|
|
25
|
+
DEFAULT_ASYNC_POLL_INTERVAL_SECS = 2.0
|
|
26
|
+
DEFAULT_CATALOG_CACHE_TTL_SECS = 300
|
|
27
|
+
|
|
28
|
+
_CATALOG_CACHE: dict[tuple[str, str], tuple[float, list[dict[str, Any]]]] = {}
|
|
29
|
+
_DETAIL_CACHE: dict[tuple[str, str, str, str], tuple[float, dict[str, Any] | None]] = {}
|
|
28
30
|
|
|
29
31
|
|
|
30
32
|
@dataclass(frozen=True)
|
|
@@ -35,13 +37,18 @@ class ModelRuntime:
|
|
|
35
37
|
|
|
36
38
|
|
|
37
39
|
@dataclass(frozen=True)
|
|
38
|
-
class ModelEndpoint:
|
|
39
|
-
provider: str
|
|
40
|
-
capability: str
|
|
41
|
-
base_url: str
|
|
42
|
-
model: str
|
|
43
|
-
required_headers: dict[str, str] = field(default_factory=dict)
|
|
44
|
-
endpoint: dict[str, Any] = field(default_factory=dict)
|
|
40
|
+
class ModelEndpoint:
|
|
41
|
+
provider: str
|
|
42
|
+
capability: str
|
|
43
|
+
base_url: str
|
|
44
|
+
model: str
|
|
45
|
+
required_headers: dict[str, str] = field(default_factory=dict)
|
|
46
|
+
endpoint: dict[str, Any] = field(default_factory=dict)
|
|
47
|
+
response_url: str | None = None
|
|
48
|
+
cache_context: bool = False
|
|
49
|
+
context_length: int | None = None
|
|
50
|
+
max_input_length: int | None = None
|
|
51
|
+
max_output_length: int | None = None
|
|
45
52
|
|
|
46
53
|
@property
|
|
47
54
|
def async_task(self) -> bool:
|
|
@@ -89,7 +96,7 @@ class ModelResult:
|
|
|
89
96
|
return result
|
|
90
97
|
|
|
91
98
|
|
|
92
|
-
def fetch_model_catalogs(
|
|
99
|
+
def fetch_model_catalogs(
|
|
93
100
|
*,
|
|
94
101
|
base_url: str | None = None,
|
|
95
102
|
user_token: str | None = None,
|
|
@@ -97,19 +104,33 @@ def fetch_model_catalogs(
|
|
|
97
104
|
refresh: bool = False,
|
|
98
105
|
) -> list[dict[str, Any]]:
|
|
99
106
|
"""Fetch platform model capability catalog using the project owner token."""
|
|
100
|
-
runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
101
|
-
return _fetch_model_catalogs(runtime, refresh=refresh)
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
def
|
|
107
|
+
runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
108
|
+
return _fetch_model_catalogs(runtime, refresh=refresh)
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def fetch_model_detail(
|
|
112
|
+
model_code: str,
|
|
113
|
+
*,
|
|
114
|
+
base_url: str | None = None,
|
|
115
|
+
user_token: str | None = None,
|
|
116
|
+
user_id: str | None = None,
|
|
117
|
+
workspace_id: str | None = None,
|
|
118
|
+
refresh: bool = False,
|
|
119
|
+
) -> dict[str, Any] | None:
|
|
120
|
+
"""Fetch model detail, including base URL, Response API support and token limits."""
|
|
121
|
+
runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
122
|
+
return _fetch_model_detail(runtime, model_code, workspace_id=workspace_id, refresh=refresh)
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def resolve_llm_endpoint(
|
|
105
126
|
*,
|
|
106
127
|
model_group: str = "text",
|
|
107
128
|
base_url: str | None = None,
|
|
108
129
|
user_token: str | None = None,
|
|
109
130
|
user_id: str | None = None,
|
|
110
|
-
) -> ModelEndpoint:
|
|
111
|
-
runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
112
|
-
return
|
|
131
|
+
) -> ModelEndpoint:
|
|
132
|
+
runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
133
|
+
return _resolve_chat_endpoint(runtime, model_group=model_group)
|
|
113
134
|
|
|
114
135
|
|
|
115
136
|
def resolve_vision_endpoint(
|
|
@@ -164,7 +185,7 @@ class LLMClient:
|
|
|
164
185
|
self.ctx = ctx
|
|
165
186
|
self.runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
|
|
166
187
|
|
|
167
|
-
def invoke(
|
|
188
|
+
def invoke(
|
|
168
189
|
self,
|
|
169
190
|
messages: Sequence[Any] | str,
|
|
170
191
|
*,
|
|
@@ -173,19 +194,22 @@ class LLMClient:
|
|
|
173
194
|
max_completion_tokens: int | None = None,
|
|
174
195
|
timeout: int | None = None,
|
|
175
196
|
**extra: Any,
|
|
176
|
-
) -> ModelResult:
|
|
177
|
-
endpoint =
|
|
178
|
-
selected_model =
|
|
179
|
-
body = {
|
|
180
|
-
"model": selected_model,
|
|
181
|
-
"messages": _normalize_messages(messages),
|
|
182
|
-
**_not_none(
|
|
183
|
-
{
|
|
184
|
-
"temperature": temperature,
|
|
185
|
-
"max_completion_tokens":
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
197
|
+
) -> ModelResult:
|
|
198
|
+
endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
|
|
199
|
+
selected_model = endpoint.model
|
|
200
|
+
body = {
|
|
201
|
+
"model": selected_model,
|
|
202
|
+
"messages": _normalize_messages(messages),
|
|
203
|
+
**_not_none(
|
|
204
|
+
{
|
|
205
|
+
"temperature": temperature,
|
|
206
|
+
"max_completion_tokens": _effective_max_completion_tokens(
|
|
207
|
+
max_completion_tokens,
|
|
208
|
+
endpoint,
|
|
209
|
+
),
|
|
210
|
+
**extra,
|
|
211
|
+
}
|
|
212
|
+
),
|
|
189
213
|
}
|
|
190
214
|
response = _post_model_json(endpoint, self.runtime, body, timeout=timeout)
|
|
191
215
|
texts = _extract_chat_texts(response) or _collect_texts(response)
|
|
@@ -207,20 +231,23 @@ class LLMClient:
|
|
|
207
231
|
max_completion_tokens: int | None = None,
|
|
208
232
|
timeout: int | None = None,
|
|
209
233
|
**extra: Any,
|
|
210
|
-
) -> Iterator[str]:
|
|
211
|
-
endpoint =
|
|
212
|
-
selected_model =
|
|
213
|
-
body = {
|
|
214
|
-
"model": selected_model,
|
|
215
|
-
"messages": _normalize_messages(messages),
|
|
216
|
-
"stream": True,
|
|
217
|
-
**_not_none(
|
|
218
|
-
{
|
|
219
|
-
"temperature": temperature,
|
|
220
|
-
"max_completion_tokens":
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
234
|
+
) -> Iterator[str]:
|
|
235
|
+
endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
|
|
236
|
+
selected_model = endpoint.model
|
|
237
|
+
body = {
|
|
238
|
+
"model": selected_model,
|
|
239
|
+
"messages": _normalize_messages(messages),
|
|
240
|
+
"stream": True,
|
|
241
|
+
**_not_none(
|
|
242
|
+
{
|
|
243
|
+
"temperature": temperature,
|
|
244
|
+
"max_completion_tokens": _effective_max_completion_tokens(
|
|
245
|
+
max_completion_tokens,
|
|
246
|
+
endpoint,
|
|
247
|
+
),
|
|
248
|
+
**extra,
|
|
249
|
+
}
|
|
250
|
+
),
|
|
224
251
|
}
|
|
225
252
|
yield from _stream_model_json(endpoint, self.runtime, body, timeout=timeout)
|
|
226
253
|
|
|
@@ -451,8 +478,9 @@ def understand_image(**kwargs: Any) -> ModelResult:
|
|
|
451
478
|
return VisionClient().understand(**kwargs)
|
|
452
479
|
|
|
453
480
|
|
|
454
|
-
def clear_model_catalog_cache_for_tests() -> None:
|
|
455
|
-
_CATALOG_CACHE.clear()
|
|
481
|
+
def clear_model_catalog_cache_for_tests() -> None:
|
|
482
|
+
_CATALOG_CACHE.clear()
|
|
483
|
+
_DETAIL_CACHE.clear()
|
|
456
484
|
|
|
457
485
|
|
|
458
486
|
def _model_runtime(
|
|
@@ -472,7 +500,7 @@ def _refresh_model_runtime(runtime: ModelRuntime) -> ModelRuntime:
|
|
|
472
500
|
return replace(runtime, token=token)
|
|
473
501
|
|
|
474
502
|
|
|
475
|
-
def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> list[dict[str, Any]]:
|
|
503
|
+
def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> list[dict[str, Any]]:
|
|
476
504
|
cache_key = (runtime.base_url, runtime.token)
|
|
477
505
|
ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
|
|
478
506
|
cached = _CATALOG_CACHE.get(cache_key)
|
|
@@ -493,11 +521,81 @@ def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> li
|
|
|
493
521
|
catalogs = _catalogs_from_payload(payload)
|
|
494
522
|
if not catalogs:
|
|
495
523
|
raise ApiError("model catalog has no provider entries", details=payload)
|
|
496
|
-
_CATALOG_CACHE[cache_key] = (time.time(), catalogs)
|
|
497
|
-
return catalogs
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
def
|
|
524
|
+
_CATALOG_CACHE[cache_key] = (time.time(), catalogs)
|
|
525
|
+
return catalogs
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def _workspace_id(workspace_id: str | None = None) -> str | None:
|
|
529
|
+
value = workspace_id or env("AGENT_WORKSPACE_ID") or env("LICOS_WORKSPACE_ID")
|
|
530
|
+
if value is None:
|
|
531
|
+
return None
|
|
532
|
+
value = str(value).strip()
|
|
533
|
+
return value or None
|
|
534
|
+
|
|
535
|
+
|
|
536
|
+
def _fetch_model_detail(
|
|
537
|
+
runtime: ModelRuntime,
|
|
538
|
+
model_code: str,
|
|
539
|
+
*,
|
|
540
|
+
workspace_id: str | None = None,
|
|
541
|
+
refresh: bool = False,
|
|
542
|
+
) -> dict[str, Any] | None:
|
|
543
|
+
model_code = str(model_code or "").strip()
|
|
544
|
+
if not model_code:
|
|
545
|
+
return None
|
|
546
|
+
resolved_workspace_id = _workspace_id(workspace_id) or ""
|
|
547
|
+
cache_key = (runtime.base_url, runtime.token, model_code, resolved_workspace_id)
|
|
548
|
+
ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
|
|
549
|
+
cached = _DETAIL_CACHE.get(cache_key)
|
|
550
|
+
if cached and not refresh and time.time() - cached[0] <= ttl:
|
|
551
|
+
return cached[1]
|
|
552
|
+
|
|
553
|
+
query = {"code": model_code}
|
|
554
|
+
if resolved_workspace_id:
|
|
555
|
+
query["workspaceId"] = resolved_workspace_id
|
|
556
|
+
url = f"{runtime.base_url}{MODEL_DETAIL_PATH}?{parse.urlencode(query)}"
|
|
557
|
+
headers = {"X-Workspace-Id": resolved_workspace_id} if resolved_workspace_id else None
|
|
558
|
+
try:
|
|
559
|
+
payload = _request_json(
|
|
560
|
+
"GET",
|
|
561
|
+
url,
|
|
562
|
+
token=runtime.token,
|
|
563
|
+
headers=headers,
|
|
564
|
+
timeout=30,
|
|
565
|
+
)
|
|
566
|
+
except ApiError as exc:
|
|
567
|
+
if not refresh and should_refresh_user_token(exc):
|
|
568
|
+
return _fetch_model_detail(
|
|
569
|
+
_refresh_model_runtime(runtime),
|
|
570
|
+
model_code,
|
|
571
|
+
workspace_id=workspace_id,
|
|
572
|
+
refresh=True,
|
|
573
|
+
)
|
|
574
|
+
raise
|
|
575
|
+
detail = _model_detail_from_payload(payload)
|
|
576
|
+
_DETAIL_CACHE[cache_key] = (time.time(), detail)
|
|
577
|
+
return detail
|
|
578
|
+
|
|
579
|
+
|
|
580
|
+
def _model_detail_from_payload(payload: Any) -> dict[str, Any] | None:
|
|
581
|
+
if not isinstance(payload, dict):
|
|
582
|
+
raise ApiError("model detail response is not an object", details=payload)
|
|
583
|
+
code = payload.get("code")
|
|
584
|
+
if code not in (None, 0) or payload.get("success") is False:
|
|
585
|
+
raise ApiError(
|
|
586
|
+
str(payload.get("message") or "model detail API failed"),
|
|
587
|
+
code=code if isinstance(code, int) else None,
|
|
588
|
+
details=payload,
|
|
589
|
+
)
|
|
590
|
+
data = payload.get("data")
|
|
591
|
+
if data is None:
|
|
592
|
+
return None
|
|
593
|
+
if not isinstance(data, dict):
|
|
594
|
+
raise ApiError("model detail data is not an object", details=payload)
|
|
595
|
+
return data
|
|
596
|
+
|
|
597
|
+
|
|
598
|
+
def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
|
|
501
599
|
if not isinstance(payload, dict):
|
|
502
600
|
raise ApiError("model catalog response is not an object", details=payload)
|
|
503
601
|
code = payload.get("code")
|
|
@@ -516,10 +614,26 @@ def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
|
|
|
516
614
|
items = []
|
|
517
615
|
if not isinstance(items, list):
|
|
518
616
|
raise ApiError("model catalog data is not a list", details=payload)
|
|
519
|
-
return [item for item in items if isinstance(item, dict)]
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
def
|
|
617
|
+
return [item for item in items if isinstance(item, dict)]
|
|
618
|
+
|
|
619
|
+
|
|
620
|
+
def _resolve_chat_endpoint(
|
|
621
|
+
runtime: ModelRuntime,
|
|
622
|
+
*,
|
|
623
|
+
model_group: str,
|
|
624
|
+
requested_model: str | None = None,
|
|
625
|
+
) -> ModelEndpoint:
|
|
626
|
+
endpoint = _resolve_endpoint(runtime, "chat", model_group=model_group)
|
|
627
|
+
selected_model = _selected_model(requested_model, endpoint.model)
|
|
628
|
+
endpoint = replace(endpoint, model=selected_model)
|
|
629
|
+
try:
|
|
630
|
+
detail = _fetch_model_detail(runtime, selected_model)
|
|
631
|
+
except ApiError:
|
|
632
|
+
return endpoint
|
|
633
|
+
return _apply_model_detail(endpoint, detail)
|
|
634
|
+
|
|
635
|
+
|
|
636
|
+
def _resolve_endpoint(
|
|
523
637
|
runtime: ModelRuntime,
|
|
524
638
|
capability_key: str,
|
|
525
639
|
*,
|
|
@@ -552,7 +666,59 @@ def _resolve_endpoint(
|
|
|
552
666
|
required_headers=_parse_required_headers(capability.get("requiredHeaders")),
|
|
553
667
|
endpoint=capability,
|
|
554
668
|
)
|
|
555
|
-
raise ApiError(f"capability `{capability_key}` is not available in model catalog", details=catalogs)
|
|
669
|
+
raise ApiError(f"capability `{capability_key}` is not available in model catalog", details=catalogs)
|
|
670
|
+
|
|
671
|
+
|
|
672
|
+
def _apply_model_detail(endpoint: ModelEndpoint, detail: dict[str, Any] | None) -> ModelEndpoint:
|
|
673
|
+
if not detail:
|
|
674
|
+
return endpoint
|
|
675
|
+
base_url = _first_non_empty_field(detail, ["baseUrl", "base_url", "url"]) or endpoint.base_url
|
|
676
|
+
response_url = _first_non_empty_field(
|
|
677
|
+
detail,
|
|
678
|
+
["responseUrl", "response_url", "responsesUrl", "responses_url"],
|
|
679
|
+
)
|
|
680
|
+
cache_context = _bool_field(detail, ["cacheContext", "cache_context"])
|
|
681
|
+
return replace(
|
|
682
|
+
endpoint,
|
|
683
|
+
base_url=base_url,
|
|
684
|
+
response_url=response_url or endpoint.response_url,
|
|
685
|
+
cache_context=endpoint.cache_context if cache_context is None else cache_context,
|
|
686
|
+
context_length=_int_field(
|
|
687
|
+
detail,
|
|
688
|
+
[
|
|
689
|
+
"contextLength",
|
|
690
|
+
"context_length",
|
|
691
|
+
"contextWindow",
|
|
692
|
+
"context_window",
|
|
693
|
+
"maxContextTokens",
|
|
694
|
+
"max_context_tokens",
|
|
695
|
+
],
|
|
696
|
+
),
|
|
697
|
+
max_input_length=_int_field(
|
|
698
|
+
detail,
|
|
699
|
+
[
|
|
700
|
+
"maxInputLength",
|
|
701
|
+
"max_input_length",
|
|
702
|
+
"maxInputTokens",
|
|
703
|
+
"max_input_tokens",
|
|
704
|
+
"inputTokenLimit",
|
|
705
|
+
"input_token_limit",
|
|
706
|
+
],
|
|
707
|
+
),
|
|
708
|
+
max_output_length=_int_field(
|
|
709
|
+
detail,
|
|
710
|
+
[
|
|
711
|
+
"maxOutputLength",
|
|
712
|
+
"max_output_length",
|
|
713
|
+
"maxOutputTokens",
|
|
714
|
+
"max_output_tokens",
|
|
715
|
+
"outputTokenLimit",
|
|
716
|
+
"output_token_limit",
|
|
717
|
+
"maxCompletionTokens",
|
|
718
|
+
"max_completion_tokens",
|
|
719
|
+
],
|
|
720
|
+
),
|
|
721
|
+
)
|
|
556
722
|
|
|
557
723
|
|
|
558
724
|
def _capability_base_url(item: dict[str, Any], capability: dict[str, Any]) -> str | None:
|
|
@@ -598,7 +764,7 @@ def _first_array_model(models: Any) -> str | None:
|
|
|
598
764
|
return _first_string(models)
|
|
599
765
|
|
|
600
766
|
|
|
601
|
-
def _first_string(value: Any) -> str | None:
|
|
767
|
+
def _first_string(value: Any) -> str | None:
|
|
602
768
|
if isinstance(value, str):
|
|
603
769
|
trimmed = value.strip()
|
|
604
770
|
return trimmed or None
|
|
@@ -606,10 +772,60 @@ def _first_string(value: Any) -> str | None:
|
|
|
606
772
|
for item in value:
|
|
607
773
|
if isinstance(item, str) and item.strip():
|
|
608
774
|
return item.strip()
|
|
609
|
-
return None
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
def
|
|
775
|
+
return None
|
|
776
|
+
|
|
777
|
+
|
|
778
|
+
def _first_non_empty_field(data: dict[str, Any], names: Sequence[str]) -> str | None:
|
|
779
|
+
for name in names:
|
|
780
|
+
value = data.get(name)
|
|
781
|
+
if isinstance(value, str) and value.strip():
|
|
782
|
+
return value.strip()
|
|
783
|
+
return None
|
|
784
|
+
|
|
785
|
+
|
|
786
|
+
def _bool_field(data: dict[str, Any], names: Sequence[str]) -> bool | None:
|
|
787
|
+
for name in names:
|
|
788
|
+
value = data.get(name)
|
|
789
|
+
if isinstance(value, bool):
|
|
790
|
+
return value
|
|
791
|
+
if isinstance(value, str):
|
|
792
|
+
normalized = value.strip().lower()
|
|
793
|
+
if normalized in {"true", "1", "yes", "y"}:
|
|
794
|
+
return True
|
|
795
|
+
if normalized in {"false", "0", "no", "n"}:
|
|
796
|
+
return False
|
|
797
|
+
return None
|
|
798
|
+
|
|
799
|
+
|
|
800
|
+
def _int_field(data: dict[str, Any], names: Sequence[str]) -> int | None:
|
|
801
|
+
for name in names:
|
|
802
|
+
value = data.get(name)
|
|
803
|
+
if isinstance(value, bool):
|
|
804
|
+
continue
|
|
805
|
+
if isinstance(value, int):
|
|
806
|
+
return value if value > 0 else None
|
|
807
|
+
if isinstance(value, float):
|
|
808
|
+
parsed = int(value)
|
|
809
|
+
return parsed if parsed > 0 else None
|
|
810
|
+
if isinstance(value, str):
|
|
811
|
+
try:
|
|
812
|
+
parsed = int(value.strip())
|
|
813
|
+
except ValueError:
|
|
814
|
+
continue
|
|
815
|
+
return parsed if parsed > 0 else None
|
|
816
|
+
return None
|
|
817
|
+
|
|
818
|
+
|
|
819
|
+
def _effective_max_completion_tokens(requested: int | None, endpoint: ModelEndpoint) -> int | None:
|
|
820
|
+
limit = endpoint.max_output_length
|
|
821
|
+
if requested is None:
|
|
822
|
+
return limit
|
|
823
|
+
if limit is None:
|
|
824
|
+
return requested
|
|
825
|
+
return min(requested, limit)
|
|
826
|
+
|
|
827
|
+
|
|
828
|
+
def _post_model_json(
|
|
613
829
|
endpoint: ModelEndpoint,
|
|
614
830
|
runtime: ModelRuntime,
|
|
615
831
|
body: dict[str, Any],
|