licos-dev-sdk 0.2.3__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: licos-dev-sdk
3
- Version: 0.2.3
3
+ Version: 0.2.4
4
4
  Summary: LICOS Dev SDK - file generation and model capability clients
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: graphviz>=0.20
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "licos-dev-sdk"
7
- version = "0.2.3"
7
+ version = "0.2.4"
8
8
  description = "LICOS Dev SDK - file generation and model capability clients"
9
9
  requires-python = ">=3.10"
10
10
  dependencies = [
@@ -47,9 +47,10 @@ def __getattr__(name: str):
47
47
  "ImageGenerationClient": ("model", "ImageGenerationClient"),
48
48
  "VideoGenerationClient": ("model", "VideoGenerationClient"),
49
49
  "SpeechRecognitionClient": ("model", "SpeechRecognitionClient"),
50
- "ASRClient": ("model", "ASRClient"),
51
- "fetch_model_catalogs": ("model", "fetch_model_catalogs"),
52
- "resolve_llm_endpoint": ("model", "resolve_llm_endpoint"),
50
+ "ASRClient": ("model", "ASRClient"),
51
+ "fetch_model_catalogs": ("model", "fetch_model_catalogs"),
52
+ "fetch_model_detail": ("model", "fetch_model_detail"),
53
+ "resolve_llm_endpoint": ("model", "resolve_llm_endpoint"),
53
54
  "resolve_vision_endpoint": ("model", "resolve_vision_endpoint"),
54
55
  "resolve_image_generation_endpoint": ("model", "resolve_image_generation_endpoint"),
55
56
  "resolve_video_generation_endpoint": ("model", "resolve_video_generation_endpoint"),
@@ -92,8 +93,8 @@ __all__ = [
92
93
  "ModelRuntime", "ModelEndpoint", "ModelResult",
93
94
  "ApiError", "ConfigurationError",
94
95
  "LLMClient", "VisionClient", "VisionUnderstandingClient", "ImageGenerationClient", "VideoGenerationClient",
95
- "SpeechRecognitionClient", "ASRClient",
96
- "fetch_model_catalogs", "resolve_llm_endpoint", "resolve_vision_endpoint",
96
+ "SpeechRecognitionClient", "ASRClient",
97
+ "fetch_model_catalogs", "fetch_model_detail", "resolve_llm_endpoint", "resolve_vision_endpoint",
97
98
  "resolve_image_generation_endpoint", "resolve_video_generation_endpoint",
98
99
  "resolve_speech_recognition_endpoint",
99
100
  "invoke_llm", "generate_image", "generate_video", "recognize_speech", "understand_image",
@@ -18,13 +18,15 @@ from licos_platform_sdk._runtime import (
18
18
  )
19
19
 
20
20
 
21
- MODEL_CATALOG_PATH = "/api/v1/llm-gateway/ai/model-catalog"
22
- DEFAULT_REQUEST_TIMEOUT_SECS = 120
23
- DEFAULT_ASYNC_TIMEOUT_SECS = 600
24
- DEFAULT_ASYNC_POLL_INTERVAL_SECS = 2.0
25
- DEFAULT_CATALOG_CACHE_TTL_SECS = 300
26
-
27
- _CATALOG_CACHE: dict[tuple[str, str], tuple[float, list[dict[str, Any]]]] = {}
21
+ MODEL_CATALOG_PATH = "/api/v1/llm-gateway/ai/model-catalog"
22
+ MODEL_DETAIL_PATH = "/api/v1/admin/workspaces/models/detail"
23
+ DEFAULT_REQUEST_TIMEOUT_SECS = 120
24
+ DEFAULT_ASYNC_TIMEOUT_SECS = 600
25
+ DEFAULT_ASYNC_POLL_INTERVAL_SECS = 2.0
26
+ DEFAULT_CATALOG_CACHE_TTL_SECS = 300
27
+
28
+ _CATALOG_CACHE: dict[tuple[str, str], tuple[float, list[dict[str, Any]]]] = {}
29
+ _DETAIL_CACHE: dict[tuple[str, str, str, str], tuple[float, dict[str, Any] | None]] = {}
28
30
 
29
31
 
30
32
  @dataclass(frozen=True)
@@ -35,13 +37,18 @@ class ModelRuntime:
35
37
 
36
38
 
37
39
  @dataclass(frozen=True)
38
- class ModelEndpoint:
39
- provider: str
40
- capability: str
41
- base_url: str
42
- model: str
43
- required_headers: dict[str, str] = field(default_factory=dict)
44
- endpoint: dict[str, Any] = field(default_factory=dict)
40
+ class ModelEndpoint:
41
+ provider: str
42
+ capability: str
43
+ base_url: str
44
+ model: str
45
+ required_headers: dict[str, str] = field(default_factory=dict)
46
+ endpoint: dict[str, Any] = field(default_factory=dict)
47
+ response_url: str | None = None
48
+ cache_context: bool = False
49
+ context_length: int | None = None
50
+ max_input_length: int | None = None
51
+ max_output_length: int | None = None
45
52
 
46
53
  @property
47
54
  def async_task(self) -> bool:
@@ -89,7 +96,7 @@ class ModelResult:
89
96
  return result
90
97
 
91
98
 
92
- def fetch_model_catalogs(
99
+ def fetch_model_catalogs(
93
100
  *,
94
101
  base_url: str | None = None,
95
102
  user_token: str | None = None,
@@ -97,19 +104,33 @@ def fetch_model_catalogs(
97
104
  refresh: bool = False,
98
105
  ) -> list[dict[str, Any]]:
99
106
  """Fetch platform model capability catalog using the project owner token."""
100
- runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
101
- return _fetch_model_catalogs(runtime, refresh=refresh)
102
-
103
-
104
- def resolve_llm_endpoint(
107
+ runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
108
+ return _fetch_model_catalogs(runtime, refresh=refresh)
109
+
110
+
111
+ def fetch_model_detail(
112
+ model_code: str,
113
+ *,
114
+ base_url: str | None = None,
115
+ user_token: str | None = None,
116
+ user_id: str | None = None,
117
+ workspace_id: str | None = None,
118
+ refresh: bool = False,
119
+ ) -> dict[str, Any] | None:
120
+ """Fetch model detail, including base URL, Response API support and token limits."""
121
+ runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
122
+ return _fetch_model_detail(runtime, model_code, workspace_id=workspace_id, refresh=refresh)
123
+
124
+
125
+ def resolve_llm_endpoint(
105
126
  *,
106
127
  model_group: str = "text",
107
128
  base_url: str | None = None,
108
129
  user_token: str | None = None,
109
130
  user_id: str | None = None,
110
- ) -> ModelEndpoint:
111
- runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
112
- return _resolve_endpoint(runtime, "chat", model_group=model_group)
131
+ ) -> ModelEndpoint:
132
+ runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
133
+ return _resolve_chat_endpoint(runtime, model_group=model_group)
113
134
 
114
135
 
115
136
  def resolve_vision_endpoint(
@@ -164,7 +185,7 @@ class LLMClient:
164
185
  self.ctx = ctx
165
186
  self.runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
166
187
 
167
- def invoke(
188
+ def invoke(
168
189
  self,
169
190
  messages: Sequence[Any] | str,
170
191
  *,
@@ -173,19 +194,22 @@ class LLMClient:
173
194
  max_completion_tokens: int | None = None,
174
195
  timeout: int | None = None,
175
196
  **extra: Any,
176
- ) -> ModelResult:
177
- endpoint = _resolve_endpoint(self.runtime, "chat", model_group="text")
178
- selected_model = _selected_model(model, endpoint.model)
179
- body = {
180
- "model": selected_model,
181
- "messages": _normalize_messages(messages),
182
- **_not_none(
183
- {
184
- "temperature": temperature,
185
- "max_completion_tokens": max_completion_tokens,
186
- **extra,
187
- }
188
- ),
197
+ ) -> ModelResult:
198
+ endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
199
+ selected_model = endpoint.model
200
+ body = {
201
+ "model": selected_model,
202
+ "messages": _normalize_messages(messages),
203
+ **_not_none(
204
+ {
205
+ "temperature": temperature,
206
+ "max_completion_tokens": _effective_max_completion_tokens(
207
+ max_completion_tokens,
208
+ endpoint,
209
+ ),
210
+ **extra,
211
+ }
212
+ ),
189
213
  }
190
214
  response = _post_model_json(endpoint, self.runtime, body, timeout=timeout)
191
215
  texts = _extract_chat_texts(response) or _collect_texts(response)
@@ -207,20 +231,23 @@ class LLMClient:
207
231
  max_completion_tokens: int | None = None,
208
232
  timeout: int | None = None,
209
233
  **extra: Any,
210
- ) -> Iterator[str]:
211
- endpoint = _resolve_endpoint(self.runtime, "chat", model_group="text")
212
- selected_model = _selected_model(model, endpoint.model)
213
- body = {
214
- "model": selected_model,
215
- "messages": _normalize_messages(messages),
216
- "stream": True,
217
- **_not_none(
218
- {
219
- "temperature": temperature,
220
- "max_completion_tokens": max_completion_tokens,
221
- **extra,
222
- }
223
- ),
234
+ ) -> Iterator[str]:
235
+ endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
236
+ selected_model = endpoint.model
237
+ body = {
238
+ "model": selected_model,
239
+ "messages": _normalize_messages(messages),
240
+ "stream": True,
241
+ **_not_none(
242
+ {
243
+ "temperature": temperature,
244
+ "max_completion_tokens": _effective_max_completion_tokens(
245
+ max_completion_tokens,
246
+ endpoint,
247
+ ),
248
+ **extra,
249
+ }
250
+ ),
224
251
  }
225
252
  yield from _stream_model_json(endpoint, self.runtime, body, timeout=timeout)
226
253
 
@@ -451,8 +478,9 @@ def understand_image(**kwargs: Any) -> ModelResult:
451
478
  return VisionClient().understand(**kwargs)
452
479
 
453
480
 
454
- def clear_model_catalog_cache_for_tests() -> None:
455
- _CATALOG_CACHE.clear()
481
+ def clear_model_catalog_cache_for_tests() -> None:
482
+ _CATALOG_CACHE.clear()
483
+ _DETAIL_CACHE.clear()
456
484
 
457
485
 
458
486
  def _model_runtime(
@@ -472,7 +500,7 @@ def _refresh_model_runtime(runtime: ModelRuntime) -> ModelRuntime:
472
500
  return replace(runtime, token=token)
473
501
 
474
502
 
475
- def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> list[dict[str, Any]]:
503
+ def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> list[dict[str, Any]]:
476
504
  cache_key = (runtime.base_url, runtime.token)
477
505
  ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
478
506
  cached = _CATALOG_CACHE.get(cache_key)
@@ -493,11 +521,81 @@ def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> li
493
521
  catalogs = _catalogs_from_payload(payload)
494
522
  if not catalogs:
495
523
  raise ApiError("model catalog has no provider entries", details=payload)
496
- _CATALOG_CACHE[cache_key] = (time.time(), catalogs)
497
- return catalogs
498
-
499
-
500
- def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
524
+ _CATALOG_CACHE[cache_key] = (time.time(), catalogs)
525
+ return catalogs
526
+
527
+
528
+ def _workspace_id(workspace_id: str | None = None) -> str | None:
529
+ value = workspace_id or env("AGENT_WORKSPACE_ID") or env("LICOS_WORKSPACE_ID")
530
+ if value is None:
531
+ return None
532
+ value = str(value).strip()
533
+ return value or None
534
+
535
+
536
+ def _fetch_model_detail(
537
+ runtime: ModelRuntime,
538
+ model_code: str,
539
+ *,
540
+ workspace_id: str | None = None,
541
+ refresh: bool = False,
542
+ ) -> dict[str, Any] | None:
543
+ model_code = str(model_code or "").strip()
544
+ if not model_code:
545
+ return None
546
+ resolved_workspace_id = _workspace_id(workspace_id) or ""
547
+ cache_key = (runtime.base_url, runtime.token, model_code, resolved_workspace_id)
548
+ ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
549
+ cached = _DETAIL_CACHE.get(cache_key)
550
+ if cached and not refresh and time.time() - cached[0] <= ttl:
551
+ return cached[1]
552
+
553
+ query = {"code": model_code}
554
+ if resolved_workspace_id:
555
+ query["workspaceId"] = resolved_workspace_id
556
+ url = f"{runtime.base_url}{MODEL_DETAIL_PATH}?{parse.urlencode(query)}"
557
+ headers = {"X-Workspace-Id": resolved_workspace_id} if resolved_workspace_id else None
558
+ try:
559
+ payload = _request_json(
560
+ "GET",
561
+ url,
562
+ token=runtime.token,
563
+ headers=headers,
564
+ timeout=30,
565
+ )
566
+ except ApiError as exc:
567
+ if not refresh and should_refresh_user_token(exc):
568
+ return _fetch_model_detail(
569
+ _refresh_model_runtime(runtime),
570
+ model_code,
571
+ workspace_id=workspace_id,
572
+ refresh=True,
573
+ )
574
+ raise
575
+ detail = _model_detail_from_payload(payload)
576
+ _DETAIL_CACHE[cache_key] = (time.time(), detail)
577
+ return detail
578
+
579
+
580
+ def _model_detail_from_payload(payload: Any) -> dict[str, Any] | None:
581
+ if not isinstance(payload, dict):
582
+ raise ApiError("model detail response is not an object", details=payload)
583
+ code = payload.get("code")
584
+ if code not in (None, 0) or payload.get("success") is False:
585
+ raise ApiError(
586
+ str(payload.get("message") or "model detail API failed"),
587
+ code=code if isinstance(code, int) else None,
588
+ details=payload,
589
+ )
590
+ data = payload.get("data")
591
+ if data is None:
592
+ return None
593
+ if not isinstance(data, dict):
594
+ raise ApiError("model detail data is not an object", details=payload)
595
+ return data
596
+
597
+
598
+ def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
501
599
  if not isinstance(payload, dict):
502
600
  raise ApiError("model catalog response is not an object", details=payload)
503
601
  code = payload.get("code")
@@ -516,10 +614,26 @@ def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
516
614
  items = []
517
615
  if not isinstance(items, list):
518
616
  raise ApiError("model catalog data is not a list", details=payload)
519
- return [item for item in items if isinstance(item, dict)]
520
-
521
-
522
- def _resolve_endpoint(
617
+ return [item for item in items if isinstance(item, dict)]
618
+
619
+
620
+ def _resolve_chat_endpoint(
621
+ runtime: ModelRuntime,
622
+ *,
623
+ model_group: str,
624
+ requested_model: str | None = None,
625
+ ) -> ModelEndpoint:
626
+ endpoint = _resolve_endpoint(runtime, "chat", model_group=model_group)
627
+ selected_model = _selected_model(requested_model, endpoint.model)
628
+ endpoint = replace(endpoint, model=selected_model)
629
+ try:
630
+ detail = _fetch_model_detail(runtime, selected_model)
631
+ except ApiError:
632
+ return endpoint
633
+ return _apply_model_detail(endpoint, detail)
634
+
635
+
636
+ def _resolve_endpoint(
523
637
  runtime: ModelRuntime,
524
638
  capability_key: str,
525
639
  *,
@@ -552,7 +666,59 @@ def _resolve_endpoint(
552
666
  required_headers=_parse_required_headers(capability.get("requiredHeaders")),
553
667
  endpoint=capability,
554
668
  )
555
- raise ApiError(f"capability `{capability_key}` is not available in model catalog", details=catalogs)
669
+ raise ApiError(f"capability `{capability_key}` is not available in model catalog", details=catalogs)
670
+
671
+
672
+ def _apply_model_detail(endpoint: ModelEndpoint, detail: dict[str, Any] | None) -> ModelEndpoint:
673
+ if not detail:
674
+ return endpoint
675
+ base_url = _first_non_empty_field(detail, ["baseUrl", "base_url", "url"]) or endpoint.base_url
676
+ response_url = _first_non_empty_field(
677
+ detail,
678
+ ["responseUrl", "response_url", "responsesUrl", "responses_url"],
679
+ )
680
+ cache_context = _bool_field(detail, ["cacheContext", "cache_context"])
681
+ return replace(
682
+ endpoint,
683
+ base_url=base_url,
684
+ response_url=response_url or endpoint.response_url,
685
+ cache_context=endpoint.cache_context if cache_context is None else cache_context,
686
+ context_length=_int_field(
687
+ detail,
688
+ [
689
+ "contextLength",
690
+ "context_length",
691
+ "contextWindow",
692
+ "context_window",
693
+ "maxContextTokens",
694
+ "max_context_tokens",
695
+ ],
696
+ ),
697
+ max_input_length=_int_field(
698
+ detail,
699
+ [
700
+ "maxInputLength",
701
+ "max_input_length",
702
+ "maxInputTokens",
703
+ "max_input_tokens",
704
+ "inputTokenLimit",
705
+ "input_token_limit",
706
+ ],
707
+ ),
708
+ max_output_length=_int_field(
709
+ detail,
710
+ [
711
+ "maxOutputLength",
712
+ "max_output_length",
713
+ "maxOutputTokens",
714
+ "max_output_tokens",
715
+ "outputTokenLimit",
716
+ "output_token_limit",
717
+ "maxCompletionTokens",
718
+ "max_completion_tokens",
719
+ ],
720
+ ),
721
+ )
556
722
 
557
723
 
558
724
  def _capability_base_url(item: dict[str, Any], capability: dict[str, Any]) -> str | None:
@@ -598,7 +764,7 @@ def _first_array_model(models: Any) -> str | None:
598
764
  return _first_string(models)
599
765
 
600
766
 
601
- def _first_string(value: Any) -> str | None:
767
+ def _first_string(value: Any) -> str | None:
602
768
  if isinstance(value, str):
603
769
  trimmed = value.strip()
604
770
  return trimmed or None
@@ -606,10 +772,60 @@ def _first_string(value: Any) -> str | None:
606
772
  for item in value:
607
773
  if isinstance(item, str) and item.strip():
608
774
  return item.strip()
609
- return None
610
-
611
-
612
- def _post_model_json(
775
+ return None
776
+
777
+
778
+ def _first_non_empty_field(data: dict[str, Any], names: Sequence[str]) -> str | None:
779
+ for name in names:
780
+ value = data.get(name)
781
+ if isinstance(value, str) and value.strip():
782
+ return value.strip()
783
+ return None
784
+
785
+
786
+ def _bool_field(data: dict[str, Any], names: Sequence[str]) -> bool | None:
787
+ for name in names:
788
+ value = data.get(name)
789
+ if isinstance(value, bool):
790
+ return value
791
+ if isinstance(value, str):
792
+ normalized = value.strip().lower()
793
+ if normalized in {"true", "1", "yes", "y"}:
794
+ return True
795
+ if normalized in {"false", "0", "no", "n"}:
796
+ return False
797
+ return None
798
+
799
+
800
+ def _int_field(data: dict[str, Any], names: Sequence[str]) -> int | None:
801
+ for name in names:
802
+ value = data.get(name)
803
+ if isinstance(value, bool):
804
+ continue
805
+ if isinstance(value, int):
806
+ return value if value > 0 else None
807
+ if isinstance(value, float):
808
+ parsed = int(value)
809
+ return parsed if parsed > 0 else None
810
+ if isinstance(value, str):
811
+ try:
812
+ parsed = int(value.strip())
813
+ except ValueError:
814
+ continue
815
+ return parsed if parsed > 0 else None
816
+ return None
817
+
818
+
819
+ def _effective_max_completion_tokens(requested: int | None, endpoint: ModelEndpoint) -> int | None:
820
+ limit = endpoint.max_output_length
821
+ if requested is None:
822
+ return limit
823
+ if limit is None:
824
+ return requested
825
+ return min(requested, limit)
826
+
827
+
828
+ def _post_model_json(
613
829
  endpoint: ModelEndpoint,
614
830
  runtime: ModelRuntime,
615
831
  body: dict[str, Any],
@@ -40,7 +40,7 @@ class _FakeErrorBody:
40
40
  return json.dumps(self._payload).encode("utf-8")
41
41
 
42
42
 
43
- def _catalog_payload() -> dict[str, Any]:
43
+ def _catalog_payload() -> dict[str, Any]:
44
44
  return {
45
45
  "code": 0,
46
46
  "success": True,
@@ -69,10 +69,26 @@ def _catalog_payload() -> dict[str, Any]:
69
69
  }
70
70
  ]
71
71
  },
72
- }
73
-
74
-
75
- class ModelSdkTests(unittest.TestCase):
72
+ }
73
+
74
+
75
+ def _model_detail_payload(model_code: str = "chat-text") -> dict[str, Any]:
76
+ return {
77
+ "code": 0,
78
+ "success": True,
79
+ "data": {
80
+ "code": model_code,
81
+ "baseUrl": "http://detail.example/v1/chat/completions",
82
+ "responseUrl": "http://detail.example/v1/responses",
83
+ "cacheContext": True,
84
+ "contextLength": 192000,
85
+ "maxInputLength": 128000,
86
+ "maxOutputLength": 64000,
87
+ },
88
+ }
89
+
90
+
91
+ class ModelSdkTests(unittest.TestCase):
76
92
  def setUp(self) -> None:
77
93
  self.env = mock.patch.dict(
78
94
  os.environ,
@@ -96,13 +112,16 @@ class ModelSdkTests(unittest.TestCase):
96
112
  captured["exchange_headers"] = dict(req.header_items())
97
113
  captured["exchange_body"] = json.loads(req.data.decode("utf-8"))
98
114
  return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
99
- if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
100
- captured["catalog_headers"] = dict(req.header_items())
101
- return _FakeResponse(_catalog_payload())
102
- if req.full_url == "http://gateway.example/v1/chat/completions":
103
- captured["chat_headers"] = dict(req.header_items())
104
- captured["chat_body"] = json.loads(req.data.decode("utf-8"))
105
- return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
115
+ if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
116
+ captured["catalog_headers"] = dict(req.header_items())
117
+ return _FakeResponse(_catalog_payload())
118
+ if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
119
+ captured["detail_headers"] = dict(req.header_items())
120
+ return _FakeResponse(_model_detail_payload("chat-text"))
121
+ if req.full_url == "http://detail.example/v1/chat/completions":
122
+ captured["chat_headers"] = dict(req.header_items())
123
+ captured["chat_body"] = json.loads(req.data.decode("utf-8"))
124
+ return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
106
125
  raise AssertionError(req.full_url)
107
126
 
108
127
  with mock.patch.object(model.request, "urlopen", fake_urlopen):
@@ -110,10 +129,12 @@ class ModelSdkTests(unittest.TestCase):
110
129
 
111
130
  self.assertEqual(result.text, "hello")
112
131
  self.assertEqual(captured["exchange_headers"]["Authorization"], "Bearer ai-agent-token")
113
- self.assertEqual(captured["exchange_body"], {"userId": "user-1"})
114
- self.assertEqual(captured["catalog_headers"]["Authorization"], "Bearer user-token")
115
- self.assertEqual(captured["chat_headers"]["Authorization"], "Bearer user-token")
116
- self.assertEqual(captured["chat_body"]["model"], "chat-text")
132
+ self.assertEqual(captured["exchange_body"], {"userId": "user-1"})
133
+ self.assertEqual(captured["catalog_headers"]["Authorization"], "Bearer user-token")
134
+ self.assertEqual(captured["detail_headers"]["Authorization"], "Bearer user-token")
135
+ self.assertEqual(captured["chat_headers"]["Authorization"], "Bearer user-token")
136
+ self.assertEqual(captured["chat_body"]["model"], "chat-text")
137
+ self.assertEqual(captured["chat_body"]["max_completion_tokens"], 64000)
117
138
 
118
139
  def test_llm_explicit_model_overrides_catalog_default(self) -> None:
119
140
  captured: dict[str, Any] = {}
@@ -121,11 +142,13 @@ class ModelSdkTests(unittest.TestCase):
121
142
  def fake_urlopen(req: Any, timeout: int = 0) -> _FakeResponse:
122
143
  if req.full_url == "http://platform.example/api/v1/internal/auth/ai-user-token":
123
144
  return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
124
- if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
125
- return _FakeResponse(_catalog_payload())
126
- if req.full_url == "http://gateway.example/v1/chat/completions":
127
- captured["chat_body"] = json.loads(req.data.decode("utf-8"))
128
- return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
145
+ if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
146
+ return _FakeResponse(_catalog_payload())
147
+ if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
148
+ return _FakeResponse(_model_detail_payload("custom-chat-model"))
149
+ if req.full_url == "http://detail.example/v1/chat/completions":
150
+ captured["chat_body"] = json.loads(req.data.decode("utf-8"))
151
+ return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
129
152
  raise AssertionError(req.full_url)
130
153
 
131
154
  with mock.patch.object(model.request, "urlopen", fake_urlopen):
@@ -142,12 +165,14 @@ class ModelSdkTests(unittest.TestCase):
142
165
  def fake_urlopen(req: Any, timeout: int = 0) -> _FakeResponse:
143
166
  if req.full_url == "http://platform.example/api/v1/internal/auth/ai-user-token":
144
167
  return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": next(tokens)}})
145
- if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
146
- catalog_tokens.append(dict(req.header_items())["Authorization"])
147
- return _FakeResponse(_catalog_payload())
148
- if req.full_url == "http://gateway.example/v1/chat/completions":
149
- chat_tokens.append(dict(req.header_items())["Authorization"])
150
- if len(chat_tokens) == 1:
168
+ if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
169
+ catalog_tokens.append(dict(req.header_items())["Authorization"])
170
+ return _FakeResponse(_catalog_payload())
171
+ if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
172
+ return _FakeResponse(_model_detail_payload("chat-text"))
173
+ if req.full_url == "http://detail.example/v1/chat/completions":
174
+ chat_tokens.append(dict(req.header_items())["Authorization"])
175
+ if len(chat_tokens) == 1:
151
176
  raise urlerror.HTTPError(
152
177
  req.full_url,
153
178
  401,
@@ -162,8 +187,28 @@ class ModelSdkTests(unittest.TestCase):
162
187
  result = model.LLMClient().invoke("Say hello", model="auto")
163
188
 
164
189
  self.assertEqual(result.text, "hello")
165
- self.assertEqual(catalog_tokens, ["Bearer old-token"])
166
- self.assertEqual(chat_tokens, ["Bearer old-token", "Bearer new-token"])
190
+ self.assertEqual(catalog_tokens, ["Bearer old-token"])
191
+ self.assertEqual(chat_tokens, ["Bearer old-token", "Bearer new-token"])
192
+
193
+ def test_resolve_llm_endpoint_applies_model_detail_limits(self) -> None:
194
+ def fake_urlopen(req: Any, timeout: int = 0) -> _FakeResponse:
195
+ if req.full_url == "http://platform.example/api/v1/internal/auth/ai-user-token":
196
+ return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
197
+ if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
198
+ return _FakeResponse(_catalog_payload())
199
+ if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
200
+ return _FakeResponse(_model_detail_payload())
201
+ raise AssertionError(req.full_url)
202
+
203
+ with mock.patch.object(model.request, "urlopen", fake_urlopen):
204
+ endpoint = model.resolve_llm_endpoint()
205
+
206
+ self.assertEqual(endpoint.base_url, "http://detail.example/v1/chat/completions")
207
+ self.assertEqual(endpoint.response_url, "http://detail.example/v1/responses")
208
+ self.assertTrue(endpoint.cache_context)
209
+ self.assertEqual(endpoint.context_length, 192000)
210
+ self.assertEqual(endpoint.max_input_length, 128000)
211
+ self.assertEqual(endpoint.max_output_length, 64000)
167
212
 
168
213
  def test_image_generation_defaults_to_one_image(self) -> None:
169
214
  captured: dict[str, Any] = {}
File without changes