licos-dev-sdk 0.2.2__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: licos-dev-sdk
3
- Version: 0.2.2
3
+ Version: 0.2.4
4
4
  Summary: LICOS Dev SDK - file generation and model capability clients
5
5
  Requires-Python: >=3.10
6
6
  Requires-Dist: graphviz>=0.20
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "licos-dev-sdk"
7
- version = "0.2.2"
7
+ version = "0.2.4"
8
8
  description = "LICOS Dev SDK - file generation and model capability clients"
9
9
  requires-python = ">=3.10"
10
10
  dependencies = [
@@ -47,9 +47,10 @@ def __getattr__(name: str):
47
47
  "ImageGenerationClient": ("model", "ImageGenerationClient"),
48
48
  "VideoGenerationClient": ("model", "VideoGenerationClient"),
49
49
  "SpeechRecognitionClient": ("model", "SpeechRecognitionClient"),
50
- "ASRClient": ("model", "ASRClient"),
51
- "fetch_model_catalogs": ("model", "fetch_model_catalogs"),
52
- "resolve_llm_endpoint": ("model", "resolve_llm_endpoint"),
50
+ "ASRClient": ("model", "ASRClient"),
51
+ "fetch_model_catalogs": ("model", "fetch_model_catalogs"),
52
+ "fetch_model_detail": ("model", "fetch_model_detail"),
53
+ "resolve_llm_endpoint": ("model", "resolve_llm_endpoint"),
53
54
  "resolve_vision_endpoint": ("model", "resolve_vision_endpoint"),
54
55
  "resolve_image_generation_endpoint": ("model", "resolve_image_generation_endpoint"),
55
56
  "resolve_video_generation_endpoint": ("model", "resolve_video_generation_endpoint"),
@@ -59,6 +60,17 @@ def __getattr__(name: str):
59
60
  "generate_video": ("model", "generate_video"),
60
61
  "recognize_speech": ("model", "recognize_speech"),
61
62
  "understand_image": ("model", "understand_image"),
63
+ # observability
64
+ "ObservabilityClient": ("observability", "ObservabilityClient"),
65
+ "ObservabilityRuntime": ("observability", "ObservabilityRuntime"),
66
+ "ensure_observability_database": ("observability", "ensure_observability_database"),
67
+ "log": ("observability", "log"),
68
+ "log_info": ("observability", "log_info"),
69
+ "log_warning": ("observability", "log_warning"),
70
+ "log_error": ("observability", "log_error"),
71
+ "record_trace": ("observability", "record_trace"),
72
+ "record_metric": ("observability", "record_metric"),
73
+ "record_error": ("observability", "record_error"),
62
74
  }
63
75
  if name in _map:
64
76
  mod_name, attr = _map[name]
@@ -81,9 +93,13 @@ __all__ = [
81
93
  "ModelRuntime", "ModelEndpoint", "ModelResult",
82
94
  "ApiError", "ConfigurationError",
83
95
  "LLMClient", "VisionClient", "VisionUnderstandingClient", "ImageGenerationClient", "VideoGenerationClient",
84
- "SpeechRecognitionClient", "ASRClient",
85
- "fetch_model_catalogs", "resolve_llm_endpoint", "resolve_vision_endpoint",
96
+ "SpeechRecognitionClient", "ASRClient",
97
+ "fetch_model_catalogs", "fetch_model_detail", "resolve_llm_endpoint", "resolve_vision_endpoint",
86
98
  "resolve_image_generation_endpoint", "resolve_video_generation_endpoint",
87
99
  "resolve_speech_recognition_endpoint",
88
100
  "invoke_llm", "generate_image", "generate_video", "recognize_speech", "understand_image",
101
+ "ObservabilityClient", "ObservabilityRuntime",
102
+ "ensure_observability_database",
103
+ "log", "log_info", "log_warning", "log_error",
104
+ "record_trace", "record_metric", "record_error",
89
105
  ]
@@ -18,13 +18,15 @@ from licos_platform_sdk._runtime import (
18
18
  )
19
19
 
20
20
 
21
- MODEL_CATALOG_PATH = "/api/v1/llm-gateway/ai/model-catalog"
22
- DEFAULT_REQUEST_TIMEOUT_SECS = 120
23
- DEFAULT_ASYNC_TIMEOUT_SECS = 600
24
- DEFAULT_ASYNC_POLL_INTERVAL_SECS = 2.0
25
- DEFAULT_CATALOG_CACHE_TTL_SECS = 300
26
-
27
- _CATALOG_CACHE: dict[tuple[str, str], tuple[float, list[dict[str, Any]]]] = {}
21
+ MODEL_CATALOG_PATH = "/api/v1/llm-gateway/ai/model-catalog"
22
+ MODEL_DETAIL_PATH = "/api/v1/admin/workspaces/models/detail"
23
+ DEFAULT_REQUEST_TIMEOUT_SECS = 120
24
+ DEFAULT_ASYNC_TIMEOUT_SECS = 600
25
+ DEFAULT_ASYNC_POLL_INTERVAL_SECS = 2.0
26
+ DEFAULT_CATALOG_CACHE_TTL_SECS = 300
27
+
28
+ _CATALOG_CACHE: dict[tuple[str, str], tuple[float, list[dict[str, Any]]]] = {}
29
+ _DETAIL_CACHE: dict[tuple[str, str, str, str], tuple[float, dict[str, Any] | None]] = {}
28
30
 
29
31
 
30
32
  @dataclass(frozen=True)
@@ -35,13 +37,18 @@ class ModelRuntime:
35
37
 
36
38
 
37
39
  @dataclass(frozen=True)
38
- class ModelEndpoint:
39
- provider: str
40
- capability: str
41
- base_url: str
42
- model: str
43
- required_headers: dict[str, str] = field(default_factory=dict)
44
- endpoint: dict[str, Any] = field(default_factory=dict)
40
+ class ModelEndpoint:
41
+ provider: str
42
+ capability: str
43
+ base_url: str
44
+ model: str
45
+ required_headers: dict[str, str] = field(default_factory=dict)
46
+ endpoint: dict[str, Any] = field(default_factory=dict)
47
+ response_url: str | None = None
48
+ cache_context: bool = False
49
+ context_length: int | None = None
50
+ max_input_length: int | None = None
51
+ max_output_length: int | None = None
45
52
 
46
53
  @property
47
54
  def async_task(self) -> bool:
@@ -89,7 +96,7 @@ class ModelResult:
89
96
  return result
90
97
 
91
98
 
92
- def fetch_model_catalogs(
99
+ def fetch_model_catalogs(
93
100
  *,
94
101
  base_url: str | None = None,
95
102
  user_token: str | None = None,
@@ -97,19 +104,33 @@ def fetch_model_catalogs(
97
104
  refresh: bool = False,
98
105
  ) -> list[dict[str, Any]]:
99
106
  """Fetch platform model capability catalog using the project owner token."""
100
- runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
101
- return _fetch_model_catalogs(runtime, refresh=refresh)
102
-
103
-
104
- def resolve_llm_endpoint(
107
+ runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
108
+ return _fetch_model_catalogs(runtime, refresh=refresh)
109
+
110
+
111
+ def fetch_model_detail(
112
+ model_code: str,
113
+ *,
114
+ base_url: str | None = None,
115
+ user_token: str | None = None,
116
+ user_id: str | None = None,
117
+ workspace_id: str | None = None,
118
+ refresh: bool = False,
119
+ ) -> dict[str, Any] | None:
120
+ """Fetch model detail, including base URL, Response API support and token limits."""
121
+ runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
122
+ return _fetch_model_detail(runtime, model_code, workspace_id=workspace_id, refresh=refresh)
123
+
124
+
125
+ def resolve_llm_endpoint(
105
126
  *,
106
127
  model_group: str = "text",
107
128
  base_url: str | None = None,
108
129
  user_token: str | None = None,
109
130
  user_id: str | None = None,
110
- ) -> ModelEndpoint:
111
- runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
112
- return _resolve_endpoint(runtime, "chat", model_group=model_group)
131
+ ) -> ModelEndpoint:
132
+ runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
133
+ return _resolve_chat_endpoint(runtime, model_group=model_group)
113
134
 
114
135
 
115
136
  def resolve_vision_endpoint(
@@ -164,7 +185,7 @@ class LLMClient:
164
185
  self.ctx = ctx
165
186
  self.runtime = _model_runtime(base_url=base_url, user_token=user_token, user_id=user_id)
166
187
 
167
- def invoke(
188
+ def invoke(
168
189
  self,
169
190
  messages: Sequence[Any] | str,
170
191
  *,
@@ -173,19 +194,22 @@ class LLMClient:
173
194
  max_completion_tokens: int | None = None,
174
195
  timeout: int | None = None,
175
196
  **extra: Any,
176
- ) -> ModelResult:
177
- endpoint = _resolve_endpoint(self.runtime, "chat", model_group="text")
178
- selected_model = _selected_model(model, endpoint.model)
179
- body = {
180
- "model": selected_model,
181
- "messages": _normalize_messages(messages),
182
- **_not_none(
183
- {
184
- "temperature": temperature,
185
- "max_completion_tokens": max_completion_tokens,
186
- **extra,
187
- }
188
- ),
197
+ ) -> ModelResult:
198
+ endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
199
+ selected_model = endpoint.model
200
+ body = {
201
+ "model": selected_model,
202
+ "messages": _normalize_messages(messages),
203
+ **_not_none(
204
+ {
205
+ "temperature": temperature,
206
+ "max_completion_tokens": _effective_max_completion_tokens(
207
+ max_completion_tokens,
208
+ endpoint,
209
+ ),
210
+ **extra,
211
+ }
212
+ ),
189
213
  }
190
214
  response = _post_model_json(endpoint, self.runtime, body, timeout=timeout)
191
215
  texts = _extract_chat_texts(response) or _collect_texts(response)
@@ -207,20 +231,23 @@ class LLMClient:
207
231
  max_completion_tokens: int | None = None,
208
232
  timeout: int | None = None,
209
233
  **extra: Any,
210
- ) -> Iterator[str]:
211
- endpoint = _resolve_endpoint(self.runtime, "chat", model_group="text")
212
- selected_model = _selected_model(model, endpoint.model)
213
- body = {
214
- "model": selected_model,
215
- "messages": _normalize_messages(messages),
216
- "stream": True,
217
- **_not_none(
218
- {
219
- "temperature": temperature,
220
- "max_completion_tokens": max_completion_tokens,
221
- **extra,
222
- }
223
- ),
234
+ ) -> Iterator[str]:
235
+ endpoint = _resolve_chat_endpoint(self.runtime, model_group="text", requested_model=model)
236
+ selected_model = endpoint.model
237
+ body = {
238
+ "model": selected_model,
239
+ "messages": _normalize_messages(messages),
240
+ "stream": True,
241
+ **_not_none(
242
+ {
243
+ "temperature": temperature,
244
+ "max_completion_tokens": _effective_max_completion_tokens(
245
+ max_completion_tokens,
246
+ endpoint,
247
+ ),
248
+ **extra,
249
+ }
250
+ ),
224
251
  }
225
252
  yield from _stream_model_json(endpoint, self.runtime, body, timeout=timeout)
226
253
 
@@ -451,8 +478,9 @@ def understand_image(**kwargs: Any) -> ModelResult:
451
478
  return VisionClient().understand(**kwargs)
452
479
 
453
480
 
454
- def clear_model_catalog_cache_for_tests() -> None:
455
- _CATALOG_CACHE.clear()
481
+ def clear_model_catalog_cache_for_tests() -> None:
482
+ _CATALOG_CACHE.clear()
483
+ _DETAIL_CACHE.clear()
456
484
 
457
485
 
458
486
  def _model_runtime(
@@ -472,7 +500,7 @@ def _refresh_model_runtime(runtime: ModelRuntime) -> ModelRuntime:
472
500
  return replace(runtime, token=token)
473
501
 
474
502
 
475
- def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> list[dict[str, Any]]:
503
+ def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> list[dict[str, Any]]:
476
504
  cache_key = (runtime.base_url, runtime.token)
477
505
  ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
478
506
  cached = _CATALOG_CACHE.get(cache_key)
@@ -493,11 +521,81 @@ def _fetch_model_catalogs(runtime: ModelRuntime, *, refresh: bool = False) -> li
493
521
  catalogs = _catalogs_from_payload(payload)
494
522
  if not catalogs:
495
523
  raise ApiError("model catalog has no provider entries", details=payload)
496
- _CATALOG_CACHE[cache_key] = (time.time(), catalogs)
497
- return catalogs
498
-
499
-
500
- def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
524
+ _CATALOG_CACHE[cache_key] = (time.time(), catalogs)
525
+ return catalogs
526
+
527
+
528
+ def _workspace_id(workspace_id: str | None = None) -> str | None:
529
+ value = workspace_id or env("AGENT_WORKSPACE_ID") or env("LICOS_WORKSPACE_ID")
530
+ if value is None:
531
+ return None
532
+ value = str(value).strip()
533
+ return value or None
534
+
535
+
536
+ def _fetch_model_detail(
537
+ runtime: ModelRuntime,
538
+ model_code: str,
539
+ *,
540
+ workspace_id: str | None = None,
541
+ refresh: bool = False,
542
+ ) -> dict[str, Any] | None:
543
+ model_code = str(model_code or "").strip()
544
+ if not model_code:
545
+ return None
546
+ resolved_workspace_id = _workspace_id(workspace_id) or ""
547
+ cache_key = (runtime.base_url, runtime.token, model_code, resolved_workspace_id)
548
+ ttl = _int_env("LICOS_MODEL_CATALOG_CACHE_TTL_SECS", DEFAULT_CATALOG_CACHE_TTL_SECS)
549
+ cached = _DETAIL_CACHE.get(cache_key)
550
+ if cached and not refresh and time.time() - cached[0] <= ttl:
551
+ return cached[1]
552
+
553
+ query = {"code": model_code}
554
+ if resolved_workspace_id:
555
+ query["workspaceId"] = resolved_workspace_id
556
+ url = f"{runtime.base_url}{MODEL_DETAIL_PATH}?{parse.urlencode(query)}"
557
+ headers = {"X-Workspace-Id": resolved_workspace_id} if resolved_workspace_id else None
558
+ try:
559
+ payload = _request_json(
560
+ "GET",
561
+ url,
562
+ token=runtime.token,
563
+ headers=headers,
564
+ timeout=30,
565
+ )
566
+ except ApiError as exc:
567
+ if not refresh and should_refresh_user_token(exc):
568
+ return _fetch_model_detail(
569
+ _refresh_model_runtime(runtime),
570
+ model_code,
571
+ workspace_id=workspace_id,
572
+ refresh=True,
573
+ )
574
+ raise
575
+ detail = _model_detail_from_payload(payload)
576
+ _DETAIL_CACHE[cache_key] = (time.time(), detail)
577
+ return detail
578
+
579
+
580
+ def _model_detail_from_payload(payload: Any) -> dict[str, Any] | None:
581
+ if not isinstance(payload, dict):
582
+ raise ApiError("model detail response is not an object", details=payload)
583
+ code = payload.get("code")
584
+ if code not in (None, 0) or payload.get("success") is False:
585
+ raise ApiError(
586
+ str(payload.get("message") or "model detail API failed"),
587
+ code=code if isinstance(code, int) else None,
588
+ details=payload,
589
+ )
590
+ data = payload.get("data")
591
+ if data is None:
592
+ return None
593
+ if not isinstance(data, dict):
594
+ raise ApiError("model detail data is not an object", details=payload)
595
+ return data
596
+
597
+
598
+ def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
501
599
  if not isinstance(payload, dict):
502
600
  raise ApiError("model catalog response is not an object", details=payload)
503
601
  code = payload.get("code")
@@ -516,10 +614,26 @@ def _catalogs_from_payload(payload: Any) -> list[dict[str, Any]]:
516
614
  items = []
517
615
  if not isinstance(items, list):
518
616
  raise ApiError("model catalog data is not a list", details=payload)
519
- return [item for item in items if isinstance(item, dict)]
520
-
521
-
522
- def _resolve_endpoint(
617
+ return [item for item in items if isinstance(item, dict)]
618
+
619
+
620
+ def _resolve_chat_endpoint(
621
+ runtime: ModelRuntime,
622
+ *,
623
+ model_group: str,
624
+ requested_model: str | None = None,
625
+ ) -> ModelEndpoint:
626
+ endpoint = _resolve_endpoint(runtime, "chat", model_group=model_group)
627
+ selected_model = _selected_model(requested_model, endpoint.model)
628
+ endpoint = replace(endpoint, model=selected_model)
629
+ try:
630
+ detail = _fetch_model_detail(runtime, selected_model)
631
+ except ApiError:
632
+ return endpoint
633
+ return _apply_model_detail(endpoint, detail)
634
+
635
+
636
+ def _resolve_endpoint(
523
637
  runtime: ModelRuntime,
524
638
  capability_key: str,
525
639
  *,
@@ -552,7 +666,59 @@ def _resolve_endpoint(
552
666
  required_headers=_parse_required_headers(capability.get("requiredHeaders")),
553
667
  endpoint=capability,
554
668
  )
555
- raise ApiError(f"capability `{capability_key}` is not available in model catalog", details=catalogs)
669
+ raise ApiError(f"capability `{capability_key}` is not available in model catalog", details=catalogs)
670
+
671
+
672
+ def _apply_model_detail(endpoint: ModelEndpoint, detail: dict[str, Any] | None) -> ModelEndpoint:
673
+ if not detail:
674
+ return endpoint
675
+ base_url = _first_non_empty_field(detail, ["baseUrl", "base_url", "url"]) or endpoint.base_url
676
+ response_url = _first_non_empty_field(
677
+ detail,
678
+ ["responseUrl", "response_url", "responsesUrl", "responses_url"],
679
+ )
680
+ cache_context = _bool_field(detail, ["cacheContext", "cache_context"])
681
+ return replace(
682
+ endpoint,
683
+ base_url=base_url,
684
+ response_url=response_url or endpoint.response_url,
685
+ cache_context=endpoint.cache_context if cache_context is None else cache_context,
686
+ context_length=_int_field(
687
+ detail,
688
+ [
689
+ "contextLength",
690
+ "context_length",
691
+ "contextWindow",
692
+ "context_window",
693
+ "maxContextTokens",
694
+ "max_context_tokens",
695
+ ],
696
+ ),
697
+ max_input_length=_int_field(
698
+ detail,
699
+ [
700
+ "maxInputLength",
701
+ "max_input_length",
702
+ "maxInputTokens",
703
+ "max_input_tokens",
704
+ "inputTokenLimit",
705
+ "input_token_limit",
706
+ ],
707
+ ),
708
+ max_output_length=_int_field(
709
+ detail,
710
+ [
711
+ "maxOutputLength",
712
+ "max_output_length",
713
+ "maxOutputTokens",
714
+ "max_output_tokens",
715
+ "outputTokenLimit",
716
+ "output_token_limit",
717
+ "maxCompletionTokens",
718
+ "max_completion_tokens",
719
+ ],
720
+ ),
721
+ )
556
722
 
557
723
 
558
724
  def _capability_base_url(item: dict[str, Any], capability: dict[str, Any]) -> str | None:
@@ -598,7 +764,7 @@ def _first_array_model(models: Any) -> str | None:
598
764
  return _first_string(models)
599
765
 
600
766
 
601
- def _first_string(value: Any) -> str | None:
767
+ def _first_string(value: Any) -> str | None:
602
768
  if isinstance(value, str):
603
769
  trimmed = value.strip()
604
770
  return trimmed or None
@@ -606,10 +772,60 @@ def _first_string(value: Any) -> str | None:
606
772
  for item in value:
607
773
  if isinstance(item, str) and item.strip():
608
774
  return item.strip()
609
- return None
610
-
611
-
612
- def _post_model_json(
775
+ return None
776
+
777
+
778
+ def _first_non_empty_field(data: dict[str, Any], names: Sequence[str]) -> str | None:
779
+ for name in names:
780
+ value = data.get(name)
781
+ if isinstance(value, str) and value.strip():
782
+ return value.strip()
783
+ return None
784
+
785
+
786
+ def _bool_field(data: dict[str, Any], names: Sequence[str]) -> bool | None:
787
+ for name in names:
788
+ value = data.get(name)
789
+ if isinstance(value, bool):
790
+ return value
791
+ if isinstance(value, str):
792
+ normalized = value.strip().lower()
793
+ if normalized in {"true", "1", "yes", "y"}:
794
+ return True
795
+ if normalized in {"false", "0", "no", "n"}:
796
+ return False
797
+ return None
798
+
799
+
800
+ def _int_field(data: dict[str, Any], names: Sequence[str]) -> int | None:
801
+ for name in names:
802
+ value = data.get(name)
803
+ if isinstance(value, bool):
804
+ continue
805
+ if isinstance(value, int):
806
+ return value if value > 0 else None
807
+ if isinstance(value, float):
808
+ parsed = int(value)
809
+ return parsed if parsed > 0 else None
810
+ if isinstance(value, str):
811
+ try:
812
+ parsed = int(value.strip())
813
+ except ValueError:
814
+ continue
815
+ return parsed if parsed > 0 else None
816
+ return None
817
+
818
+
819
+ def _effective_max_completion_tokens(requested: int | None, endpoint: ModelEndpoint) -> int | None:
820
+ limit = endpoint.max_output_length
821
+ if requested is None:
822
+ return limit
823
+ if limit is None:
824
+ return requested
825
+ return min(requested, limit)
826
+
827
+
828
+ def _post_model_json(
613
829
  endpoint: ModelEndpoint,
614
830
  runtime: ModelRuntime,
615
831
  body: dict[str, Any],