licos-dev-sdk 0.2.3__tar.gz → 0.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -17,9 +17,10 @@ packages/*/dist/
17
17
  .DS_Store
18
18
  Thumbs.db
19
19
 
20
- # Environment
21
- .env
22
- .env.local
20
+ # Environment
21
+ .env
22
+ .env.local
23
+ crates/industrial/industrial-stack.env
23
24
 
24
25
  # Workspace
25
26
  /workspace/
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: licos-dev-sdk
3
- Version: 0.2.3
3
+ Version: 0.2.5
4
4
  Summary: LICOS Dev SDK - file generation and model capability clients
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: graphviz>=0.20
7
7
  Requires-Dist: jinja2>=3.1
8
- Requires-Dist: licos-platform-sdk>=0.2.6
8
+ Requires-Dist: licos-platform-sdk>=0.2.8
9
9
  Requires-Dist: matplotlib>=3.9
10
10
  Requires-Dist: mistune>=3.0
11
11
  Requires-Dist: openpyxl>=3.1
@@ -4,11 +4,11 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "licos-dev-sdk"
7
- version = "0.2.3"
7
+ version = "0.2.5"
8
8
  description = "LICOS Dev SDK - file generation and model capability clients"
9
9
  requires-python = ">=3.10"
10
10
  dependencies = [
11
- "licos-platform-sdk>=0.2.6",
11
+ "licos-platform-sdk>=0.2.8",
12
12
  "weasyprint>=62.0",
13
13
  "python-docx>=1.1",
14
14
  "openpyxl>=3.1",
@@ -24,4 +24,4 @@ dependencies = [
24
24
  ]
25
25
 
26
26
  [tool.hatch.build.targets.wheel]
27
- packages = ["src/licos_dev_sdk"]
27
+ packages = ["src/licos_dev_sdk"]
@@ -49,6 +49,7 @@ def __getattr__(name: str):
49
49
  "SpeechRecognitionClient": ("model", "SpeechRecognitionClient"),
50
50
  "ASRClient": ("model", "ASRClient"),
51
51
  "fetch_model_catalogs": ("model", "fetch_model_catalogs"),
52
+ "fetch_model_detail": ("model", "fetch_model_detail"),
52
53
  "resolve_llm_endpoint": ("model", "resolve_llm_endpoint"),
53
54
  "resolve_vision_endpoint": ("model", "resolve_vision_endpoint"),
54
55
  "resolve_image_generation_endpoint": ("model", "resolve_image_generation_endpoint"),
@@ -93,7 +94,7 @@ __all__ = [
93
94
  "ApiError", "ConfigurationError",
94
95
  "LLMClient", "VisionClient", "VisionUnderstandingClient", "ImageGenerationClient", "VideoGenerationClient",
95
96
  "SpeechRecognitionClient", "ASRClient",
96
- "fetch_model_catalogs", "resolve_llm_endpoint", "resolve_vision_endpoint",
97
+ "fetch_model_catalogs", "fetch_model_detail", "resolve_llm_endpoint", "resolve_vision_endpoint",
97
98
  "resolve_image_generation_endpoint", "resolve_video_generation_endpoint",
98
99
  "resolve_speech_recognition_endpoint",
99
100
  "invoke_llm", "generate_image", "generate_video", "recognize_speech", "understand_image",
@@ -19,12 +19,14 @@ from licos_platform_sdk._runtime import (
19
19
 
20
20
 
21
21
  MODEL_CATALOG_PATH = "/api/v1/llm-gateway/ai/model-catalog"
22
+ MODEL_DETAIL_PATH = "/api/v1/admin/workspaces/models/detail"
22
23
  DEFAULT_REQUEST_TIMEOUT_SECS = 120
23
24
  DEFAULT_ASYNC_TIMEOUT_SECS = 600
24
25
  DEFAULT_ASYNC_POLL_INTERVAL_SECS = 2.0
25
26
  DEFAULT_CATALOG_CACHE_TTL_SECS = 300
26
27
 
27
28
  _CATALOG_CACHE: dict[tuple[str, str], tuple[float, list[dict[str, Any]]]] = {}
29
+ _DETAIL_CACHE: dict[tuple[str, str, str, str], tuple[float, dict[str, Any] | None]] = {}
28
30
 
29
31
 
30
32
  @dataclass(frozen=True)
@@ -42,6 +44,11 @@ class ModelEndpoint:
42
44
  model: str
43
45
  required_headers: dict[str, str] = field(default_factory=dict)
44
46
  endpoint: dict[str, Any] = field(default_factory=dict)
47
+ response_url: str | None = None
48
+ cache_context: bool = False
49
+ context_length: int | None = None
50
+ max_input_length: int | None = None
51
+ max_output_length: int | None = None
45
52
 
46
53
  @property
47
54
  def async_task(self) -> bool:
@@ -101,6 +108,20 @@ def fetch_model_catalogs(
101
108
  return _fetch_model_catalogs(runtime, refresh=refresh)
102
109
 
103
110
 
111
+ def fetch_model_detail(
112
+ model_code: str,
113
+ *,
114
+ base_url: str | None = None,
115
+ user_token: str | None = None,
116
+ user_id: str | None = None,
117
+ workspace_id: str | None = None,
118
+ refresh: bool = False,
119
+ ) -> dict[str, Any] | None:
120
+ """Fetch model detail, including base URL, Response API support and token limits."""
121
+ runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
122
+ return _fetch_model_detail(runtime, model_code, workspace_id=workspace_id, refresh=refresh)
123
+
124
+
104
125
  def resolve_llm_endpoint(
105
126
  *,
106
127
  model_group: str = "text",
@@ -109,7 +130,7 @@ def resolve_llm_endpoint(
109
130
  user_id: str | None = None,
110
131
  ) -> ModelEndpoint:
111
132
  runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
112
- return _resolve_endpoint(runtime, "chat", model_group=model_group)
133
+ return _resolve_chat_endpoint(runtime, model_group=model_group)
113
134
 
114
135
 
115
136
  def resolve_vision_endpoint(
@@ -174,15 +195,18 @@ class LLMClient:
174
195
  timeout: int | None = None,
175
196
  **extra: Any,
176
197
  ) -> ModelResult:
177
- endpoint = _resolve_endpoint(self.runtime, "chat", model_group="text")
178
- selected_model = _selected_model(model, endpoint.model)
198
+ endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
199
+ selected_model = endpoint.model
179
200
  body = {
180
201
  "model": selected_model,
181
202
  "messages": _normalize_messages(messages),
182
203
  **_not_none(
183
204
  {
184
205
  "temperature": temperature,
185
- "max_completion_tokens": max_completion_tokens,
206
+ "max_completion_tokens": _effective_max_completion_tokens(
207
+ max_completion_tokens,
208
+ endpoint,
209
+ ),
186
210
  **extra,
187
211
  }
188
212
  ),
@@ -208,8 +232,8 @@ class LLMClient:
208
232
  timeout: int | None = None,
209
233
  **extra: Any,
210
234
  ) -> Iterator[str]:
211
- endpoint = _resolve_endpoint(self.runtime, "chat", model_group="text")
212
- selected_model = _selected_model(model, endpoint.model)
235
+ endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
236
+ selected_model = endpoint.model
213
237
  body = {
214
238
  "model": selected_model,
215
239
  "messages": _normalize_messages(messages),
@@ -217,7 +241,10 @@ class LLMClient:
217
241
  **_not_none(
218
242
  {
219
243
  "temperature": temperature,
220
- "max_completion_tokens": max_completion_tokens,
244
+ "max_completion_tokens": _effective_max_completion_tokens(
245
+ max_completion_tokens,
246
+ endpoint,
247
+ ),
221
248
  **extra,
222
249
  }
223
250
  ),
@@ -453,6 +480,7 @@ def understand_image(**kwargs: Any) -> ModelResult:
453
480
 
454
481
  def clear_model_catalog_cache_for_tests() -> None:
455
482
  _CATALOG_CACHE.clear()
483
+ _DETAIL_CACHE.clear()
456
484
 
457
485
 
458
486
  def _model_runtime(
@@ -497,6 +525,76 @@ def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> li
497
525
  return catalogs
498
526
 
499
527
 
528
+ def _workspace_id(workspace_id: str | None = None) -> str | None:
529
+ value = workspace_id or env("AGENT_WORKSPACE_ID") or env("LICOS_WORKSPACE_ID")
530
+ if value is None:
531
+ return None
532
+ value = str(value).strip()
533
+ return value or None
534
+
535
+
536
+ def _fetch_model_detail(
537
+ runtime: ModelRuntime,
538
+ model_code: str,
539
+ *,
540
+ workspace_id: str | None = None,
541
+ refresh: bool = False,
542
+ ) -> dict[str, Any] | None:
543
+ model_code = str(model_code or "").strip()
544
+ if not model_code:
545
+ return None
546
+ resolved_workspace_id = _workspace_id(workspace_id) or ""
547
+ cache_key = (runtime.base_url, runtime.token, model_code, resolved_workspace_id)
548
+ ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
549
+ cached = _DETAIL_CACHE.get(cache_key)
550
+ if cached and not refresh and time.time() - cached[0] <= ttl:
551
+ return cached[1]
552
+
553
+ query = {"code": model_code}
554
+ if resolved_workspace_id:
555
+ query["workspaceId"] = resolved_workspace_id
556
+ url = f"{runtime.base_url}{MODEL_DETAIL_PATH}?{parse.urlencode(query)}"
557
+ headers = {"X-Workspace-Id": resolved_workspace_id} if resolved_workspace_id else None
558
+ try:
559
+ payload = _request_json(
560
+ "GET",
561
+ url,
562
+ token=runtime.token,
563
+ headers=headers,
564
+ timeout=30,
565
+ )
566
+ except ApiError as exc:
567
+ if not refresh and should_refresh_user_token(exc):
568
+ return _fetch_model_detail(
569
+ _refresh_model_runtime(runtime),
570
+ model_code,
571
+ workspace_id=workspace_id,
572
+ refresh=True,
573
+ )
574
+ raise
575
+ detail = _model_detail_from_payload(payload)
576
+ _DETAIL_CACHE[cache_key] = (time.time(), detail)
577
+ return detail
578
+
579
+
580
+ def _model_detail_from_payload(payload: Any) -> dict[str, Any] | None:
581
+ if not isinstance(payload, dict):
582
+ raise ApiError("model detail response is not an object", details=payload)
583
+ code = payload.get("code")
584
+ if code not in (None, 0) or payload.get("success") is False:
585
+ raise ApiError(
586
+ str(payload.get("message") or "model detail API failed"),
587
+ code=code if isinstance(code, int) else None,
588
+ details=payload,
589
+ )
590
+ data = payload.get("data")
591
+ if data is None:
592
+ return None
593
+ if not isinstance(data, dict):
594
+ raise ApiError("model detail data is not an object", details=payload)
595
+ return data
596
+
597
+
500
598
  def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
501
599
  if not isinstance(payload, dict):
502
600
  raise ApiError("model catalog response is not an object", details=payload)
@@ -519,6 +617,22 @@ def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
519
617
  return [item for item in items if isinstance(item, dict)]
520
618
 
521
619
 
620
+ def _resolve_chat_endpoint(
621
+ runtime: ModelRuntime,
622
+ *,
623
+ model_group: str,
624
+ requested_model: str | None = None,
625
+ ) -> ModelEndpoint:
626
+ endpoint = _resolve_endpoint(runtime, "chat", model_group=model_group)
627
+ selected_model = _selected_model(requested_model, endpoint.model)
628
+ endpoint = replace(endpoint, model=selected_model)
629
+ try:
630
+ detail = _fetch_model_detail(runtime, selected_model)
631
+ except ApiError:
632
+ return endpoint
633
+ return _apply_model_detail(endpoint, detail)
634
+
635
+
522
636
  def _resolve_endpoint(
523
637
  runtime: ModelRuntime,
524
638
  capability_key: str,
@@ -555,6 +669,58 @@ def _resolve_endpoint(
555
669
  raise ApiError(f"capability `{capability_key}` is not available in model catalog", details=catalogs)
556
670
 
557
671
 
672
+ def _apply_model_detail(endpoint: ModelEndpoint, detail: dict[str, Any] | None) -> ModelEndpoint:
673
+ if not detail:
674
+ return endpoint
675
+ base_url = _first_non_empty_field(detail, ["baseUrl", "base_url", "url"]) or endpoint.base_url
676
+ response_url = _first_non_empty_field(
677
+ detail,
678
+ ["responseUrl", "response_url", "responsesUrl", "responses_url"],
679
+ )
680
+ cache_context = _bool_field(detail, ["cacheContext", "cache_context"])
681
+ return replace(
682
+ endpoint,
683
+ base_url=base_url,
684
+ response_url=response_url or endpoint.response_url,
685
+ cache_context=endpoint.cache_context if cache_context is None else cache_context,
686
+ context_length=_int_field(
687
+ detail,
688
+ [
689
+ "contextLength",
690
+ "context_length",
691
+ "contextWindow",
692
+ "context_window",
693
+ "maxContextTokens",
694
+ "max_context_tokens",
695
+ ],
696
+ ),
697
+ max_input_length=_int_field(
698
+ detail,
699
+ [
700
+ "maxInputLength",
701
+ "max_input_length",
702
+ "maxInputTokens",
703
+ "max_input_tokens",
704
+ "inputTokenLimit",
705
+ "input_token_limit",
706
+ ],
707
+ ),
708
+ max_output_length=_int_field(
709
+ detail,
710
+ [
711
+ "maxOutputLength",
712
+ "max_output_length",
713
+ "maxOutputTokens",
714
+ "max_output_tokens",
715
+ "outputTokenLimit",
716
+ "output_token_limit",
717
+ "maxCompletionTokens",
718
+ "max_completion_tokens",
719
+ ],
720
+ ),
721
+ )
722
+
723
+
558
724
  def _capability_base_url(item: dict[str, Any], capability: dict[str, Any]) -> str | None:
559
725
  value = capability.get("baseUrl") or capability.get("base_url")
560
726
  if isinstance(value, str) and value.strip():
@@ -609,6 +775,56 @@ def _first_string(value: Any) -> str | None:
609
775
  return None
610
776
 
611
777
 
778
+ def _first_non_empty_field(data: dict[str, Any], names: Sequence[str]) -> str | None:
779
+ for name in names:
780
+ value = data.get(name)
781
+ if isinstance(value, str) and value.strip():
782
+ return value.strip()
783
+ return None
784
+
785
+
786
+ def _bool_field(data: dict[str, Any], names: Sequence[str]) -> bool | None:
787
+ for name in names:
788
+ value = data.get(name)
789
+ if isinstance(value, bool):
790
+ return value
791
+ if isinstance(value, str):
792
+ normalized = value.strip().lower()
793
+ if normalized in {"true", "1", "yes", "y"}:
794
+ return True
795
+ if normalized in {"false", "0", "no", "n"}:
796
+ return False
797
+ return None
798
+
799
+
800
+ def _int_field(data: dict[str, Any], names: Sequence[str]) -> int | None:
801
+ for name in names:
802
+ value = data.get(name)
803
+ if isinstance(value, bool):
804
+ continue
805
+ if isinstance(value, int):
806
+ return value if value > 0 else None
807
+ if isinstance(value, float):
808
+ parsed = int(value)
809
+ return parsed if parsed > 0 else None
810
+ if isinstance(value, str):
811
+ try:
812
+ parsed = int(value.strip())
813
+ except ValueError:
814
+ continue
815
+ return parsed if parsed > 0 else None
816
+ return None
817
+
818
+
819
+ def _effective_max_completion_tokens(requested: int | None, endpoint: ModelEndpoint) -> int | None:
820
+ limit = endpoint.max_output_length
821
+ if requested is None:
822
+ return limit
823
+ if limit is None:
824
+ return requested
825
+ return min(requested, limit)
826
+
827
+
612
828
  def _post_model_json(
613
829
  endpoint: ModelEndpoint,
614
830
  runtime: ModelRuntime,
@@ -72,6 +72,22 @@ def _catalog_payload() -> dict[str, Any]:
72
72
  }
73
73
 
74
74
 
75
+ def _model_detail_payload(model_code: str = "chat-text") -> dict[str, Any]:
76
+ return {
77
+ "code": 0,
78
+ "success": True,
79
+ "data": {
80
+ "code": model_code,
81
+ "baseUrl": "http://detail.example/v1/chat/completions",
82
+ "responseUrl": "http://detail.example/v1/responses",
83
+ "cacheContext": True,
84
+ "contextLength": 192000,
85
+ "maxInputLength": 128000,
86
+ "maxOutputLength": 64000,
87
+ },
88
+ }
89
+
90
+
75
91
  class ModelSdkTests(unittest.TestCase):
76
92
  def setUp(self) -> None:
77
93
  self.env = mock.patch.dict(
@@ -99,7 +115,10 @@ class ModelSdkTests(unittest.TestCase):
99
115
  if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
100
116
  captured["catalog_headers"] = dict(req.header_items())
101
117
  return _FakeResponse(_catalog_payload())
102
- if req.full_url == "http://gateway.example/v1/chat/completions":
118
+ if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
119
+ captured["detail_headers"] = dict(req.header_items())
120
+ return _FakeResponse(_model_detail_payload("chat-text"))
121
+ if req.full_url == "http://detail.example/v1/chat/completions":
103
122
  captured["chat_headers"] = dict(req.header_items())
104
123
  captured["chat_body"] = json.loads(req.data.decode("utf-8"))
105
124
  return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
@@ -112,8 +131,10 @@ class ModelSdkTests(unittest.TestCase):
112
131
  self.assertEqual(captured["exchange_headers"]["Authorization"], "Bearer ai-agent-token")
113
132
  self.assertEqual(captured["exchange_body"], {"userId": "user-1"})
114
133
  self.assertEqual(captured["catalog_headers"]["Authorization"], "Bearer user-token")
134
+ self.assertEqual(captured["detail_headers"]["Authorization"], "Bearer user-token")
115
135
  self.assertEqual(captured["chat_headers"]["Authorization"], "Bearer user-token")
116
136
  self.assertEqual(captured["chat_body"]["model"], "chat-text")
137
+ self.assertEqual(captured["chat_body"]["max_completion_tokens"], 64000)
117
138
 
118
139
  def test_llm_explicit_model_overrides_catalog_default(self) -> None:
119
140
  captured: dict[str, Any] = {}
@@ -123,7 +144,9 @@ class ModelSdkTests(unittest.TestCase):
123
144
  return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
124
145
  if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
125
146
  return _FakeResponse(_catalog_payload())
126
- if req.full_url == "http://gateway.example/v1/chat/completions":
147
+ if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
148
+ return _FakeResponse(_model_detail_payload("custom-chat-model"))
149
+ if req.full_url == "http://detail.example/v1/chat/completions":
127
150
  captured["chat_body"] = json.loads(req.data.decode("utf-8"))
128
151
  return _FakeResponse({"choices": [{"message": {"content": "hello"}}]})
129
152
  raise AssertionError(req.full_url)
@@ -145,7 +168,9 @@ class ModelSdkTests(unittest.TestCase):
145
168
  if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
146
169
  catalog_tokens.append(dict(req.header_items())["Authorization"])
147
170
  return _FakeResponse(_catalog_payload())
148
- if req.full_url == "http://gateway.example/v1/chat/completions":
171
+ if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
172
+ return _FakeResponse(_model_detail_payload("chat-text"))
173
+ if req.full_url == "http://detail.example/v1/chat/completions":
149
174
  chat_tokens.append(dict(req.header_items())["Authorization"])
150
175
  if len(chat_tokens) == 1:
151
176
  raise urlerror.HTTPError(
@@ -165,6 +190,26 @@ class ModelSdkTests(unittest.TestCase):
165
190
  self.assertEqual(catalog_tokens, ["Bearer old-token"])
166
191
  self.assertEqual(chat_tokens, ["Bearer old-token", "Bearer new-token"])
167
192
 
193
+ def test_resolve_llm_endpoint_applies_model_detail_limits(self) -> None:
194
+ def fake_urlopen(req: Any, timeout: int = 0) -> _FakeResponse:
195
+ if req.full_url == "http://platform.example/api/v1/internal/auth/ai-user-token":
196
+ return _FakeResponse({"code": 0, "success": True, "data": {"accessToken": "user-token"}})
197
+ if req.full_url == "http://platform.example/api/v1/llm-gateway/ai/model-catalog":
198
+ return _FakeResponse(_catalog_payload())
199
+ if req.full_url.startswith("http://platform.example/api/v1/admin/workspaces/models/detail?"):
200
+ return _FakeResponse(_model_detail_payload())
201
+ raise AssertionError(req.full_url)
202
+
203
+ with mock.patch.object(model.request, "urlopen", fake_urlopen):
204
+ endpoint = model.resolve_llm_endpoint()
205
+
206
+ self.assertEqual(endpoint.base_url, "http://detail.example/v1/chat/completions")
207
+ self.assertEqual(endpoint.response_url, "http://detail.example/v1/responses")
208
+ self.assertTrue(endpoint.cache_context)
209
+ self.assertEqual(endpoint.context_length, 192000)
210
+ self.assertEqual(endpoint.max_input_length, 128000)
211
+ self.assertEqual(endpoint.max_output_length, 64000)
212
+
168
213
  def test_image_generation_defaults_to_one_image(self) -> None:
169
214
  captured: dict[str, Any] = {}
170
215