licos-dev-sdk 0.2.4__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,9 +17,10 @@ packages/*/dist/
17
17
  .DS_Store
18
18
  Thumbs.db
19
19
 
20
- # Environment
21
- .env
22
- .env.local
20
+ # Environment
21
+ .env
22
+ .env.local
23
+ crates/industrial/industrial-stack.env
23
24
 
24
25
  # Workspace
25
26
  /workspace/
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: licos-dev-sdk
3
- Version: 0.2.4
3
+ Version: 0.2.5
4
4
  Summary: LICOS Dev SDK - file generation and model capability clients
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: graphviz>=0.20
7
7
  Requires-Dist: jinja2>=3.1
8
- Requires-Dist: licos-platform-sdk>=0.2.6
8
+ Requires-Dist: licos-platform-sdk>=0.2.8
9
9
  Requires-Dist: matplotlib>=3.9
10
10
  Requires-Dist: mistune>=3.0
11
11
  Requires-Dist: openpyxl>=3.1
@@ -4,11 +4,11 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "licos-dev-sdk"
7
- version = "0.2.4"
7
+ version = "0.2.5"
8
8
  description = "LICOS Dev SDK - file generation and model capability clients"
9
9
  requires-python = ">=3.10"
10
10
  dependencies = [
11
- "licos-platform-sdk>=0.2.6",
11
+ "licos-platform-sdk>=0.2.8",
12
12
  "weasyprint>=62.0",
13
13
  "python-docx>=1.1",
14
14
  "openpyxl>=3.1",
@@ -24,4 +24,4 @@ dependencies = [
24
24
  ]
25
25
 
26
26
  [tool.hatch.build.targets.wheel]
27
- packages = ["src/licos_dev_sdk"]
27
+ packages = ["src/licos_dev_sdk"]
@@ -47,10 +47,10 @@ def __getattr__(name: str):
47
47
  "ImageGenerationClient": ("model", "ImageGenerationClient"),
48
48
  "VideoGenerationClient": ("model", "VideoGenerationClient"),
49
49
  "SpeechRecognitionClient": ("model", "SpeechRecognitionClient"),
50
- "ASRClient": ("model", "ASRClient"),
51
- "fetch_model_catalogs": ("model", "fetch_model_catalogs"),
52
- "fetch_model_detail": ("model", "fetch_model_detail"),
53
- "resolve_llm_endpoint": ("model", "resolve_llm_endpoint"),
50
+ "ASRClient": ("model", "ASRClient"),
51
+ "fetch_model_catalogs": ("model", "fetch_model_catalogs"),
52
+ "fetch_model_detail": ("model", "fetch_model_detail"),
53
+ "resolve_llm_endpoint": ("model", "resolve_llm_endpoint"),
54
54
  "resolve_vision_endpoint": ("model", "resolve_vision_endpoint"),
55
55
  "resolve_image_generation_endpoint": ("model", "resolve_image_generation_endpoint"),
56
56
  "resolve_video_generation_endpoint": ("model", "resolve_video_generation_endpoint"),
@@ -93,8 +93,8 @@ __all__ = [
93
93
  "ModelRuntime", "ModelEndpoint", "ModelResult",
94
94
  "ApiError", "ConfigurationError",
95
95
  "LLMClient", "VisionClient", "VisionUnderstandingClient", "ImageGenerationClient", "VideoGenerationClient",
96
- "SpeechRecognitionClient", "ASRClient",
97
- "fetch_model_catalogs", "fetch_model_detail", "resolve_llm_endpoint", "resolve_vision_endpoint",
96
+ "SpeechRecognitionClient", "ASRClient",
97
+ "fetch_model_catalogs", "fetch_model_detail", "resolve_llm_endpoint", "resolve_vision_endpoint",
98
98
  "resolve_image_generation_endpoint", "resolve_video_generation_endpoint",
99
99
  "resolve_speech_recognition_endpoint",
100
100
  "invoke_llm", "generate_image", "generate_video", "recognize_speech", "understand_image",
@@ -18,15 +18,15 @@ from licos_platform_sdk._runtime import (
18
18
  )
19
19
 
20
20
 
21
- MODEL_CATALOG_PATH = "/api/v1/llm-gateway/ai/model-catalog"
22
- MODEL_DETAIL_PATH = "/api/v1/admin/workspaces/models/detail"
23
- DEFAULT_REQUEST_TIMEOUT_SECS = 120
24
- DEFAULT_ASYNC_TIMEOUT_SECS = 600
25
- DEFAULT_ASYNC_POLL_INTERVAL_SECS = 2.0
26
- DEFAULT_CATALOG_CACHE_TTL_SECS = 300
27
-
28
- _CATALOG_CACHE: dict[tuple[str, str], tuple[float, list[dict[str, Any]]]] = {}
29
- _DETAIL_CACHE: dict[tuple[str, str, str, str], tuple[float, dict[str, Any] | None]] = {}
21
+ MODEL_CATALOG_PATH = "/api/v1/llm-gateway/ai/model-catalog"
22
+ MODEL_DETAIL_PATH = "/api/v1/admin/workspaces/models/detail"
23
+ DEFAULT_REQUEST_TIMEOUT_SECS = 120
24
+ DEFAULT_ASYNC_TIMEOUT_SECS = 600
25
+ DEFAULT_ASYNC_POLL_INTERVAL_SECS = 2.0
26
+ DEFAULT_CATALOG_CACHE_TTL_SECS = 300
27
+
28
+ _CATALOG_CACHE: dict[tuple[str, str], tuple[float, list[dict[str, Any]]]] = {}
29
+ _DETAIL_CACHE: dict[tuple[str, str, str, str], tuple[float, dict[str, Any] | None]] = {}
30
30
 
31
31
 
32
32
  @dataclass(frozen=True)
@@ -37,18 +37,18 @@ class ModelRuntime:
37
37
 
38
38
 
39
39
  @dataclass(frozen=True)
40
- class ModelEndpoint:
41
- provider: str
42
- capability: str
43
- base_url: str
44
- model: str
45
- required_headers: dict[str, str] = field(default_factory=dict)
46
- endpoint: dict[str, Any] = field(default_factory=dict)
47
- response_url: str | None = None
48
- cache_context: bool = False
49
- context_length: int | None = None
50
- max_input_length: int | None = None
51
- max_output_length: int | None = None
40
+ class ModelEndpoint:
41
+ provider: str
42
+ capability: str
43
+ base_url: str
44
+ model: str
45
+ required_headers: dict[str, str] = field(default_factory=dict)
46
+ endpoint: dict[str, Any] = field(default_factory=dict)
47
+ response_url: str | None = None
48
+ cache_context: bool = False
49
+ context_length: int | None = None
50
+ max_input_length: int | None = None
51
+ max_output_length: int | None = None
52
52
 
53
53
  @property
54
54
  def async_task(self) -> bool:
@@ -96,7 +96,7 @@ class ModelResult:
96
96
  return result
97
97
 
98
98
 
99
- def fetch_model_catalogs(
99
+ def fetch_model_catalogs(
100
100
  *,
101
101
  base_url: str | None = None,
102
102
  user_token: str | None = None,
@@ -104,33 +104,33 @@ def fetch_model_catalogs(
104
104
  refresh: bool = False,
105
105
  ) -> list[dict[str, Any]]:
106
106
  """Fetch platform model capability catalog using the project owner token."""
107
- runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
108
- return _fetch_model_catalogs(runtime, refresh=refresh)
109
-
110
-
111
- def fetch_model_detail(
112
- model_code: str,
113
- *,
114
- base_url: str | None = None,
115
- user_token: str | None = None,
116
- user_id: str | None = None,
117
- workspace_id: str | None = None,
118
- refresh: bool = False,
119
- ) -> dict[str, Any] | None:
120
- """Fetch model detail, including base URL, Response API support and token limits."""
121
- runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
122
- return _fetch_model_detail(runtime, model_code, workspace_id=workspace_id, refresh=refresh)
123
-
124
-
125
- def resolve_llm_endpoint(
107
+ runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
108
+ return _fetch_model_catalogs(runtime, refresh=refresh)
109
+
110
+
111
+ def fetch_model_detail(
112
+ model_code: str,
113
+ *,
114
+ base_url: str | None = None,
115
+ user_token: str | None = None,
116
+ user_id: str | None = None,
117
+ workspace_id: str | None = None,
118
+ refresh: bool = False,
119
+ ) -> dict[str, Any] | None:
120
+ """Fetch model detail, including base URL, Response API support and token limits."""
121
+ runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
122
+ return _fetch_model_detail(runtime, model_code, workspace_id=workspace_id, refresh=refresh)
123
+
124
+
125
+ def resolve_llm_endpoint(
126
126
  *,
127
127
  model_group: str = "text",
128
128
  base_url: str | None = None,
129
129
  user_token: str | None = None,
130
130
  user_id: str | None = None,
131
- ) -> ModelEndpoint:
132
- runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
133
- return _resolve_chat_endpoint(runtime, model_group=model_group)
131
+ ) -> ModelEndpoint:
132
+ runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
133
+ return _resolve_chat_endpoint(runtime, model_group=model_group)
134
134
 
135
135
 
136
136
  def resolve_vision_endpoint(
@@ -185,7 +185,7 @@ class LLMClient:
185
185
  self.ctx = ctx
186
186
  self.runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
187
187
 
188
- def invoke(
188
+ def invoke(
189
189
  self,
190
190
  messages: Sequence[Any] | str,
191
191
  *,
@@ -194,22 +194,22 @@ class LLMClient:
194
194
  max_completion_tokens: int | None = None,
195
195
  timeout: int | None = None,
196
196
  **extra: Any,
197
- ) -> ModelResult:
198
- endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
199
- selected_model = endpoint.model
200
- body = {
201
- "model": selected_model,
202
- "messages": _normalize_messages(messages),
203
- **_not_none(
204
- {
205
- "temperature": temperature,
206
- "max_completion_tokens": _effective_max_completion_tokens(
207
- max_completion_tokens,
208
- endpoint,
209
- ),
210
- **extra,
211
- }
212
- ),
197
+ ) -> ModelResult:
198
+ endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
199
+ selected_model = endpoint.model
200
+ body = {
201
+ "model": selected_model,
202
+ "messages": _normalize_messages(messages),
203
+ **_not_none(
204
+ {
205
+ "temperature": temperature,
206
+ "max_completion_tokens": _effective_max_completion_tokens(
207
+ max_completion_tokens,
208
+ endpoint,
209
+ ),
210
+ **extra,
211
+ }
212
+ ),
213
213
  }
214
214
  response = _post_model_json(endpoint, self.runtime, body, timeout=timeout)
215
215
  texts = _extract_chat_texts(response) or _collect_texts(response)
@@ -231,23 +231,23 @@ class LLMClient:
231
231
  max_completion_tokens: int | None = None,
232
232
  timeout: int | None = None,
233
233
  **extra: Any,
234
- ) -> Iterator[str]:
235
- endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
236
- selected_model = endpoint.model
237
- body = {
238
- "model": selected_model,
239
- "messages": _normalize_messages(messages),
240
- "stream": True,
241
- **_not_none(
242
- {
243
- "temperature": temperature,
244
- "max_completion_tokens": _effective_max_completion_tokens(
245
- max_completion_tokens,
246
- endpoint,
247
- ),
248
- **extra,
249
- }
250
- ),
234
+ ) -> Iterator[str]:
235
+ endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
236
+ selected_model = endpoint.model
237
+ body = {
238
+ "model": selected_model,
239
+ "messages": _normalize_messages(messages),
240
+ "stream": True,
241
+ **_not_none(
242
+ {
243
+ "temperature": temperature,
244
+ "max_completion_tokens": _effective_max_completion_tokens(
245
+ max_completion_tokens,
246
+ endpoint,
247
+ ),
248
+ **extra,
249
+ }
250
+ ),
251
251
  }
252
252
  yield from _stream_model_json(endpoint, self.runtime, body, timeout=timeout)
253
253
 
@@ -478,9 +478,9 @@ def understand_image(**kwargs: Any) -> ModelResult:
478
478
  return VisionClient().understand(**kwargs)
479
479
 
480
480
 
481
- def clear_model_catalog_cache_for_tests() -> None:
482
- _CATALOG_CACHE.clear()
483
- _DETAIL_CACHE.clear()
481
+ def clear_model_catalog_cache_for_tests() -> None:
482
+ _CATALOG_CACHE.clear()
483
+ _DETAIL_CACHE.clear()
484
484
 
485
485
 
486
486
  def _model_runtime(
@@ -500,7 +500,7 @@ def _refresh_model_runtime(runtime: ModelRuntime) -> ModelRuntime:
500
500
  return replace(runtime, token=token)
501
501
 
502
502
 
503
- def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> list[dict[str, Any]]:
503
+ def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> list[dict[str, Any]]:
504
504
  cache_key = (runtime.base_url, runtime.token)
505
505
  ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
506
506
  cached = _CATALOG_CACHE.get(cache_key)
@@ -521,81 +521,81 @@ def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> li
521
521
  catalogs = _catalogs_from_payload(payload)
522
522
  if not catalogs:
523
523
  raise ApiError("model catalog has no provider entries", details=payload)
524
- _CATALOG_CACHE[cache_key] = (time.time(), catalogs)
525
- return catalogs
526
-
527
-
528
- def _workspace_id(workspace_id: str | None = None) -> str | None:
529
- value = workspace_id or env("AGENT_WORKSPACE_ID") or env("LICOS_WORKSPACE_ID")
530
- if value is None:
531
- return None
532
- value = str(value).strip()
533
- return value or None
534
-
535
-
536
- def _fetch_model_detail(
537
- runtime: ModelRuntime,
538
- model_code: str,
539
- *,
540
- workspace_id: str | None = None,
541
- refresh: bool = False,
542
- ) -> dict[str, Any] | None:
543
- model_code = str(model_code or "").strip()
544
- if not model_code:
545
- return None
546
- resolved_workspace_id = _workspace_id(workspace_id) or ""
547
- cache_key = (runtime.base_url, runtime.token, model_code, resolved_workspace_id)
548
- ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
549
- cached = _DETAIL_CACHE.get(cache_key)
550
- if cached and not refresh and time.time() - cached[0] <= ttl:
551
- return cached[1]
552
-
553
- query = {"code": model_code}
554
- if resolved_workspace_id:
555
- query["workspaceId"] = resolved_workspace_id
556
- url = f"{runtime.base_url}{MODEL_DETAIL_PATH}?{parse.urlencode(query)}"
557
- headers = {"X-Workspace-Id": resolved_workspace_id} if resolved_workspace_id else None
558
- try:
559
- payload = _request_json(
560
- "GET",
561
- url,
562
- token=runtime.token,
563
- headers=headers,
564
- timeout=30,
565
- )
566
- except ApiError as exc:
567
- if not refresh and should_refresh_user_token(exc):
568
- return _fetch_model_detail(
569
- _refresh_model_runtime(runtime),
570
- model_code,
571
- workspace_id=workspace_id,
572
- refresh=True,
573
- )
574
- raise
575
- detail = _model_detail_from_payload(payload)
576
- _DETAIL_CACHE[cache_key] = (time.time(), detail)
577
- return detail
578
-
579
-
580
- def _model_detail_from_payload(payload: Any) -> dict[str, Any] | None:
581
- if not isinstance(payload, dict):
582
- raise ApiError("model detail response is not an object", details=payload)
583
- code = payload.get("code")
584
- if code not in (None, 0) or payload.get("success") is False:
585
- raise ApiError(
586
- str(payload.get("message") or "model detail API failed"),
587
- code=code if isinstance(code, int) else None,
588
- details=payload,
589
- )
590
- data = payload.get("data")
591
- if data is None:
592
- return None
593
- if not isinstance(data, dict):
594
- raise ApiError("model detail data is not an object", details=payload)
595
- return data
596
-
597
-
598
- def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
524
+ _CATALOG_CACHE[cache_key] = (time.time(), catalogs)
525
+ return catalogs
526
+
527
+
528
+ def _workspace_id(workspace_id: str | None = None) -> str | None:
529
+ value = workspace_id or env("AGENT_WORKSPACE_ID") or env("LICOS_WORKSPACE_ID")
530
+ if value is None:
531
+ return None
532
+ value = str(value).strip()
533
+ return value or None
534
+
535
+
536
+ def _fetch_model_detail(
537
+ runtime: ModelRuntime,
538
+ model_code: str,
539
+ *,
540
+ workspace_id: str | None = None,
541
+ refresh: bool = False,
542
+ ) -> dict[str, Any] | None:
543
+ model_code = str(model_code or "").strip()
544
+ if not model_code:
545
+ return None
546
+ resolved_workspace_id = _workspace_id(workspace_id) or ""
547
+ cache_key = (runtime.base_url, runtime.token, model_code, resolved_workspace_id)
548
+ ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
549
+ cached = _DETAIL_CACHE.get(cache_key)
550
+ if cached and not refresh and time.time() - cached[0] <= ttl:
551
+ return cached[1]
552
+
553
+ query = {"code": model_code}
554
+ if resolved_workspace_id:
555
+ query["workspaceId"] = resolved_workspace_id
556
+ url = f"{runtime.base_url}{MODEL_DETAIL_PATH}?{parse.urlencode(query)}"
557
+ headers = {"X-Workspace-Id": resolved_workspace_id} if resolved_workspace_id else None
558
+ try:
559
+ payload = _request_json(
560
+ "GET",
561
+ url,
562
+ token=runtime.token,
563
+ headers=headers,
564
+ timeout=30,
565
+ )
566
+ except ApiError as exc:
567
+ if not refresh and should_refresh_user_token(exc):
568
+ return _fetch_model_detail(
569
+ _refresh_model_runtime(runtime),
570
+ model_code,
571
+ workspace_id=workspace_id,
572
+ refresh=True,
573
+ )
574
+ raise
575
+ detail = _model_detail_from_payload(payload)
576
+ _DETAIL_CACHE[cache_key] = (time.time(), detail)
577
+ return detail
578
+
579
+
580
+ def _model_detail_from_payload(payload: Any) -> dict[str, Any] | None:
581
+ if not isinstance(payload, dict):
582
+ raise ApiError("model detail response is not an object", details=payload)
583
+ code = payload.get("code")
584
+ if code not in (None, 0) or payload.get("success") is False:
585
+ raise ApiError(
586
+ str(payload.get("message") or "model detail API failed"),
587
+ code=code if isinstance(code, int) else None,
588
+ details=payload,
589
+ )
590
+ data = payload.get("data")
591
+ if data is None:
592
+ return None
593
+ if not isinstance(data, dict):
594
+ raise ApiError("model detail data is not an object", details=payload)
595
+ return data
596
+
597
+
598
+ def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
599
599
  if not isinstance(payload, dict):
600
600
  raise ApiError("model catalog response is not an object", details=payload)
601
601
  code = payload.get("code")
@@ -614,26 +614,26 @@ def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
614
614
  items = []
615
615
  if not isinstance(items, list):
616
616
  raise ApiError("model catalog data is not a list", details=payload)
617
- return [item for item in items if isinstance(item, dict)]
618
-
619
-
620
- def _resolve_chat_endpoint(
621
- runtime: ModelRuntime,
622
- *,
623
- model_group: str,
624
- requested_model: str | None = None,
625
- ) -> ModelEndpoint:
626
- endpoint = _resolve_endpoint(runtime, "chat", model_group=model_group)
627
- selected_model = _selected_model(requested_model, endpoint.model)
628
- endpoint = replace(endpoint, model=selected_model)
629
- try:
630
- detail = _fetch_model_detail(runtime, selected_model)
631
- except ApiError:
632
- return endpoint
633
- return _apply_model_detail(endpoint, detail)
634
-
635
-
636
- def _resolve_endpoint(
617
+ return [item for item in items if isinstance(item, dict)]
618
+
619
+
620
+ def _resolve_chat_endpoint(
621
+ runtime: ModelRuntime,
622
+ *,
623
+ model_group: str,
624
+ requested_model: str | None = None,
625
+ ) -> ModelEndpoint:
626
+ endpoint = _resolve_endpoint(runtime, "chat", model_group=model_group)
627
+ selected_model = _selected_model(requested_model, endpoint.model)
628
+ endpoint = replace(endpoint, model=selected_model)
629
+ try:
630
+ detail = _fetch_model_detail(runtime, selected_model)
631
+ except ApiError:
632
+ return endpoint
633
+ return _apply_model_detail(endpoint, detail)
634
+
635
+
636
+ def _resolve_endpoint(
637
637
  runtime: ModelRuntime,
638
638
  capability_key: str,
639
639
  *,
@@ -666,59 +666,59 @@ def _resolve_endpoint(
666
666
  required_headers=_parse_required_headers(capability.get("requiredHeaders")),
667
667
  endpoint=capability,
668
668
  )
669
- raise ApiError(f"capability `{capability_key}` is not available in model catalog", details=catalogs)
670
-
671
-
672
- def _apply_model_detail(endpoint: ModelEndpoint, detail: dict[str, Any] | None) -> ModelEndpoint:
673
- if not detail:
674
- return endpoint
675
- base_url = _first_non_empty_field(detail, ["baseUrl", "base_url", "url"]) or endpoint.base_url
676
- response_url = _first_non_empty_field(
677
- detail,
678
- ["responseUrl", "response_url", "responsesUrl", "responses_url"],
679
- )
680
- cache_context = _bool_field(detail, ["cacheContext", "cache_context"])
681
- return replace(
682
- endpoint,
683
- base_url=base_url,
684
- response_url=response_url or endpoint.response_url,
685
- cache_context=endpoint.cache_context if cache_context is None else cache_context,
686
- context_length=_int_field(
687
- detail,
688
- [
689
- "contextLength",
690
- "context_length",
691
- "contextWindow",
692
- "context_window",
693
- "maxContextTokens",
694
- "max_context_tokens",
695
- ],
696
- ),
697
- max_input_length=_int_field(
698
- detail,
699
- [
700
- "maxInputLength",
701
- "max_input_length",
702
- "maxInputTokens",
703
- "max_input_tokens",
704
- "inputTokenLimit",
705
- "input_token_limit",
706
- ],
707
- ),
708
- max_output_length=_int_field(
709
- detail,
710
- [
711
- "maxOutputLength",
712
- "max_output_length",
713
- "maxOutputTokens",
714
- "max_output_tokens",
715
- "outputTokenLimit",
716
- "output_token_limit",
717
- "maxCompletionTokens",
718
- "max_completion_tokens",
719
- ],
720
- ),
721
- )
669
+ raise ApiError(f"capability `{capability_key}` is not available in model catalog", details=catalogs)
670
+
671
+
672
+ def _apply_model_detail(endpoint: ModelEndpoint, detail: dict[str, Any] | None) -> ModelEndpoint:
673
+ if not detail:
674
+ return endpoint
675
+ base_url = _first_non_empty_field(detail, ["baseUrl", "base_url", "url"]) or endpoint.base_url
676
+ response_url = _first_non_empty_field(
677
+ detail,
678
+ ["responseUrl", "response_url", "responsesUrl", "responses_url"],
679
+ )
680
+ cache_context = _bool_field(detail, ["cacheContext", "cache_context"])
681
+ return replace(
682
+ endpoint,
683
+ base_url=base_url,
684
+ response_url=response_url or endpoint.response_url,
685
+ cache_context=endpoint.cache_context if cache_context is None else cache_context,
686
+ context_length=_int_field(
687
+ detail,
688
+ [
689
+ "contextLength",
690
+ "context_length",
691
+ "contextWindow",
692
+ "context_window",
693
+ "maxContextTokens",
694
+ "max_context_tokens",
695
+ ],
696
+ ),
697
+ max_input_length=_int_field(
698
+ detail,
699
+ [
700
+ "maxInputLength",
701
+ "max_input_length",
702
+ "maxInputTokens",
703
+ "max_input_tokens",
704
+ "inputTokenLimit",
705
+ "input_token_limit",
706
+ ],
707
+ ),
708
+ max_output_length=_int_field(
709
+ detail,
710
+ [
711
+ "maxOutputLength",
712
+ "max_output_length",
713
+ "maxOutputTokens",
714
+ "max_output_tokens",
715
+ "outputTokenLimit",
716
+ "output_token_limit",
717
+ "maxCompletionTokens",
718
+ "max_completion_tokens",
719
+ ],
720
+ ),
721
+ )
722
722
 
723
723
 
724
724
  def _capability_base_url(item: dict[str, Any], capability: dict[str, Any]) -> str | None:
@@ -764,7 +764,7 @@ def _first_array_model(models: Any) -> str | None:
764
764
  return _first_string(models)
765
765
 
766
766
 
767
- def _first_string(value: Any) -> str | None:
767
+ def _first_string(value: Any) -> str | None:
768
768
  if isinstance(value, str):
769
769
  trimmed = value.strip()
770
770
  return trimmed or None
@@ -772,60 +772,60 @@ def _first_string(value: Any) -> str | None:
772
772
  for item in value:
773
773
  if isinstance(item, str) and item.strip():
774
774
  return item.strip()
775
- return None
776
-
777
-
778
- def _first_non_empty_field(data: dict[str, Any], names: Sequence[str]) -> str | None:
779
- for name in names:
780
- value = data.get(name)
781
- if isinstance(value, str) and value.strip():
782
- return value.strip()
783
- return None
784
-
785
-
786
- def _bool_field(data: dict[str, Any], names: Sequence[str]) -> bool | None:
787
- for name in names:
788
- value = data.get(name)
789
- if isinstance(value, bool):
790
- return value
791
- if isinstance(value, str):
792
- normalized = value.strip().lower()
793
- if normalized in {"true", "1", "yes", "y"}:
794
- return True
795
- if normalized in {"false", "0", "no", "n"}:
796
- return False
797
- return None
798
-
799
-
800
- def _int_field(data: dict[str, Any], names: Sequence[str]) -> int | None:
801
- for name in names:
802
- value = data.get(name)
803
- if isinstance(value, bool):
804
- continue
805
- if isinstance(value, int):
806
- return value if value > 0 else None
807
- if isinstance(value, float):
808
- parsed = int(value)
809
- return parsed if parsed > 0 else None
810
- if isinstance(value, str):
811
- try:
812
- parsed = int(value.strip())
813
- except ValueError:
814
- continue
815
- return parsed if parsed > 0 else None
816
- return None
817
-
818
-
819
- def _effective_max_completion_tokens(requested: int | None, endpoint: ModelEndpoint) -> int | None:
820
- limit = endpoint.max_output_length
821
- if requested is None:
822
- return limit
823
- if limit is None:
824
- return requested
825
- return min(requested, limit)
826
-
827
-
828
- def _post_model_json(
775
+ return None
776
+
777
+
778
+ def _first_non_empty_field(data: dict[str, Any], names: Sequence[str]) -> str | None:
779
+ for name in names:
780
+ value = data.get(name)
781
+ if isinstance(value, str) and value.strip():
782
+ return value.strip()
783
+ return None
784
+
785
+
786
+ def _bool_field(data: dict[str, Any], names: Sequence[str]) -> bool | None:
787
+ for name in names:
788
+ value = data.get(name)
789
+ if isinstance(value, bool):
790
+ return value
791
+ if isinstance(value, str):
792
+ normalized = value.strip().lower()
793
+ if normalized in {"true", "1", "yes", "y"}:
794
+ return True
795
+ if normalized in {"false", "0", "no", "n"}:
796
+ return False
797
+ return None
798
+
799
+
800
+ def _int_field(data: dict[str, Any], names: Sequence[str]) -> int | None:
801
+ for name in names:
802
+ value = data.get(name)
803
+ if isinstance(value, bool):
804
+ continue
805
+ if isinstance(value, int):
806
+ return value if value > 0 else None
807
+ if isinstance(value, float):
808
+ parsed = int(value)
809
+ return parsed if parsed > 0 else None
810
+ if isinstance(value, str):
811
+ try:
812
+ parsed = int(value.strip())
813
+ except ValueError:
814
+ continue
815
+ return parsed if parsed > 0 else None
816
+ return None
817
+
818
+
819
+ def _effective_max_completion_tokens(requested: int | None, endpoint: ModelEndpoint) -> int | None:
820
+ limit = endpoint.max_output_length
821
+ if requested is None:
822
+ return limit
823
+ if limit is None:
824
+ return requested
825
+ return min(requested, limit)
826
+
827
+
828
+ def _post_model_json(
829
829
  endpoint: ModelEndpoint,
830
830
  runtime: ModelRuntime,
831
831
  body: dict[str, Any],
@@ -40,7 +40,7 @@ class _FakeErrorBody:
40
40
  return json.dumps(self._payload).encode("utf-8")
41
41
 
42
42
 
43
- def _catalog_payload() -> dict[str, Any]:
43
+ def _catalog_payload() -> dict[str, Any]:
44
44
  return {
45
45
  "code": 0,
46
46
  "success": True,
@@ -69,26 +69,26 @@ def _catalog_payload() -> dict[str, Any]:
69
69
  }
70
70
  ]
71
71
  },
72
- }
73
-
74
-
75
- def _model_detail_payload(model_code: str = "chat-text") -> dict[str, Any]:
76
- return {
77
- "code": 0,
78
- "success": True,
79
- "data": {
80
- "code": model_code,
81
- "baseUrl": "http://detail.example/v1/chat/completions",
82
- "responseUrl": "http://detail.example/v1/responses",
83
- "cacheContext": True,
84
- "contextLength": 192000,
85
- "maxInputLength": 128000,
86
- "maxOutputLength": 64000,
87
- },
88
- }
89
-
90
-
91
- class ModelSdkTests(unittest.TestCase):
72
+ }
73
+
74
+
75
+ def _model_detail_payload(model_code: str = "chat-text") -> dict[str, Any]:
76
+ return {
77
+ "code": 0,
78
+ "success": True,
79
+ "data": {
80
+ "code": model_code,
81
+ "baseUrl": "http://detail.example/v1/chat/completions",
82
+ "responseUrl": "http://detail.example/v1/responses",
83
+ "cacheContext": True,
84
+ "contextLength": 192000,
85
+ "maxInputLength": 128000,
86
+ "maxOutputLength": 64000,
87
+ },
88
+ }
89
+
90
+
91
+ class ModelSdkTests(unittest.TestCase):
92
92
  def setUp(self) -> None:
93
93
  self.env = mock.patch.dict(
94
94
  os.environ,
@@ -112,16 +112,16 @@ class ModelSdkTests(unittest.TestCase):
112
112
  captured["exchange_headers"] = dict(req.header_items())
113
113
  captured["exchange_body"] = json.loads(req.data.decode("utf-8"))
114
114
  return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
115
- if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
116
- captured["catalog_headers"] = dict(req.header_items())
117
- return _FakeResponse(_catalog_payload())
118
- if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
119
- captured["detail_headers"] = dict(req.header_items())
120
- return _FakeResponse(_model_detail_payload("chat-text"))
121
- if req.full_url == "http://detail.example/v1/chat/completions":
122
- captured["chat_headers"] = dict(req.header_items())
123
- captured["chat_body"] = json.loads(req.data.decode("utf-8"))
124
- return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
115
+ if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
116
+ captured["catalog_headers"] = dict(req.header_items())
117
+ return _FakeResponse(_catalog_payload())
118
+ if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
119
+ captured["detail_headers"] = dict(req.header_items())
120
+ return _FakeResponse(_model_detail_payload("chat-text"))
121
+ if req.full_url == "http://detail.example/v1/chat/completions":
122
+ captured["chat_headers"] = dict(req.header_items())
123
+ captured["chat_body"] = json.loads(req.data.decode("utf-8"))
124
+ return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
125
125
  raise AssertionError(req.full_url)
126
126
 
127
127
  with mock.patch.object(model.request, "urlopen", fake_urlopen):
@@ -129,12 +129,12 @@ class ModelSdkTests(unittest.TestCase):
129
129
 
130
130
  self.assertEqual(result.text, "hello")
131
131
  self.assertEqual(captured["exchange_headers"]["Authorization"], "Bearer ai-agent-token")
132
- self.assertEqual(captured["exchange_body"], {"userId": "user-1"})
133
- self.assertEqual(captured["catalog_headers"]["Authorization"], "Bearer user-token")
134
- self.assertEqual(captured["detail_headers"]["Authorization"], "Bearer user-token")
135
- self.assertEqual(captured["chat_headers"]["Authorization"], "Bearer user-token")
136
- self.assertEqual(captured["chat_body"]["model"], "chat-text")
137
- self.assertEqual(captured["chat_body"]["max_completion_tokens"], 64000)
132
+ self.assertEqual(captured["exchange_body"], {"userId": "user-1"})
133
+ self.assertEqual(captured["catalog_headers"]["Authorization"], "Bearer user-token")
134
+ self.assertEqual(captured["detail_headers"]["Authorization"], "Bearer user-token")
135
+ self.assertEqual(captured["chat_headers"]["Authorization"], "Bearer user-token")
136
+ self.assertEqual(captured["chat_body"]["model"], "chat-text")
137
+ self.assertEqual(captured["chat_body"]["max_completion_tokens"], 64000)
138
138
 
139
139
  def test_llm_explicit_model_overrides_catalog_default(self) -> None:
140
140
  captured: dict[str, Any] = {}
@@ -142,13 +142,13 @@ class ModelSdkTests(unittest.TestCase):
142
142
  def fake_urlopen(req: Any, timeout: int = 0) -> _FakeResponse:
143
143
  if req.full_url == "http://platform.example/api/v1/internal/auth/ai-user-token":
144
144
  return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
145
- if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
146
- return _FakeResponse(_catalog_payload())
147
- if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
148
- return _FakeResponse(_model_detail_payload("custom-chat-model"))
149
- if req.full_url == "http://detail.example/v1/chat/completions":
150
- captured["chat_body"] = json.loads(req.data.decode("utf-8"))
151
- return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
145
+ if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
146
+ return _FakeResponse(_catalog_payload())
147
+ if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
148
+ return _FakeResponse(_model_detail_payload("custom-chat-model"))
149
+ if req.full_url == "http://detail.example/v1/chat/completions":
150
+ captured["chat_body"] = json.loads(req.data.decode("utf-8"))
151
+ return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
152
152
  raise AssertionError(req.full_url)
153
153
 
154
154
  with mock.patch.object(model.request, "urlopen", fake_urlopen):
@@ -165,14 +165,14 @@ class ModelSdkTests(unittest.TestCase):
165
165
  def fake_urlopen(req: Any, timeout: int = 0) -> _FakeResponse:
166
166
  if req.full_url == "http://platform.example/api/v1/internal/auth/ai-user-token":
167
167
  return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": next(tokens)}})
168
- if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
169
- catalog_tokens.append(dict(req.header_items())["Authorization"])
170
- return _FakeResponse(_catalog_payload())
171
- if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
172
- return _FakeResponse(_model_detail_payload("chat-text"))
173
- if req.full_url == "http://detail.example/v1/chat/completions":
174
- chat_tokens.append(dict(req.header_items())["Authorization"])
175
- if len(chat_tokens) == 1:
168
+ if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
169
+ catalog_tokens.append(dict(req.header_items())["Authorization"])
170
+ return _FakeResponse(_catalog_payload())
171
+ if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
172
+ return _FakeResponse(_model_detail_payload("chat-text"))
173
+ if req.full_url == "http://detail.example/v1/chat/completions":
174
+ chat_tokens.append(dict(req.header_items())["Authorization"])
175
+ if len(chat_tokens) == 1:
176
176
  raise urlerror.HTTPError(
177
177
  req.full_url,
178
178
  401,
@@ -187,28 +187,28 @@ class ModelSdkTests(unittest.TestCase):
187
187
  result = model.LLMClient().invoke("Say hello", model="auto")
188
188
 
189
189
  self.assertEqual(result.text, "hello")
190
- self.assertEqual(catalog_tokens, ["Bearer old-token"])
191
- self.assertEqual(chat_tokens, ["Bearer old-token", "Bearer new-token"])
192
-
193
- def test_resolve_llm_endpoint_applies_model_detail_limits(self) -> None:
194
- def fake_urlopen(req: Any, timeout: int = 0) -> _FakeResponse:
195
- if req.full_url == "http://platform.example/api/v1/internal/auth/ai-user-token":
196
- return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
197
- if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
198
- return _FakeResponse(_catalog_payload())
199
- if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
200
- return _FakeResponse(_model_detail_payload())
201
- raise AssertionError(req.full_url)
202
-
203
- with mock.patch.object(model.request, "urlopen", fake_urlopen):
204
- endpoint = model.resolve_llm_endpoint()
205
-
206
- self.assertEqual(endpoint.base_url, "http://detail.example/v1/chat/completions")
207
- self.assertEqual(endpoint.response_url, "http://detail.example/v1/responses")
208
- self.assertTrue(endpoint.cache_context)
209
- self.assertEqual(endpoint.context_length, 192000)
210
- self.assertEqual(endpoint.max_input_length, 128000)
211
- self.assertEqual(endpoint.max_output_length, 64000)
190
+ self.assertEqual(catalog_tokens, ["Bearer old-token"])
191
+ self.assertEqual(chat_tokens, ["Bearer old-token", "Bearer new-token"])
192
+
193
+ def test_resolve_llm_endpoint_applies_model_detail_limits(self) -> None:
194
+ def fake_urlopen(req: Any, timeout: int = 0) -> _FakeResponse:
195
+ if req.full_url == "http://platform.example/api/v1/internal/auth/ai-user-token":
196
+ return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
197
+ if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
198
+ return _FakeResponse(_catalog_payload())
199
+ if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
200
+ return _FakeResponse(_model_detail_payload())
201
+ raise AssertionError(req.full_url)
202
+
203
+ with mock.patch.object(model.request, "urlopen", fake_urlopen):
204
+ endpoint = model.resolve_llm_endpoint()
205
+
206
+ self.assertEqual(endpoint.base_url, "http://detail.example/v1/chat/completions")
207
+ self.assertEqual(endpoint.response_url, "http://detail.example/v1/responses")
208
+ self.assertTrue(endpoint.cache_context)
209
+ self.assertEqual(endpoint.context_length, 192000)
210
+ self.assertEqual(endpoint.max_input_length, 128000)
211
+ self.assertEqual(endpoint.max_output_length, 64000)
212
212
 
213
213
  def test_image_generation_defaults_to_one_image(self) -> None:
214
214
  captured: dict[str, Any] = {}