ltcai 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,450 @@
1
+ """Lattice AI Model Compatibility Layer.
2
+
3
+ 피드백 #3 (lattice_ai_model_compat_fast_path.txt) 반영.
4
+
5
+ 핵심 원칙:
6
+ - 무거운 호환성 검사는 모델 로드 시 1회만 (Slow Path).
7
+ - 실제 채팅 중에는 캐시된 profile을 사용하는 Fast Path.
8
+ - 답변이 깨졌을 때만 1회 retry하는 Recovery Path.
9
+
10
+ 모든 함수는 안전한 디폴트로 동작하므로 기존 코드를 깨뜨리지 않는다.
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import logging
16
+ import re
17
+ import threading
18
+ import time
19
+ from dataclasses import dataclass, field, asdict
20
+ from typing import Any, Dict, List, Optional, Tuple
21
+
22
+ logger = logging.getLogger(__name__)
23
+
24
+
25
+ # ── Model family detection ────────────────────────────────────────────────────
26
+
27
+ FAMILY_PATTERNS: List[Tuple[str, re.Pattern]] = [
28
+ ("gpt-oss", re.compile(r"gpt[-_]?oss", re.I)),
29
+ ("gemma", re.compile(r"gemma", re.I)),
30
+ ("qwen", re.compile(r"qwen", re.I)),
31
+ ("llama", re.compile(r"\bllama|meta[-_]?llama", re.I)),
32
+ ("mistral", re.compile(r"mistral|mixtral", re.I)),
33
+ ("phi", re.compile(r"\bphi[-_]?\d", re.I)),
34
+ ("deepseek", re.compile(r"deepseek", re.I)),
35
+ ("yi", re.compile(r"\byi[-_]?\d", re.I)),
36
+ ("claude", re.compile(r"claude", re.I)),
37
+ ("gpt-4", re.compile(r"gpt[-_]?4", re.I)),
38
+ ("gpt-3.5", re.compile(r"gpt[-_]?3\.?5", re.I)),
39
+ ("o1", re.compile(r"\bo1[-_]?", re.I)),
40
+ ]
41
+
42
+
43
+ def detect_model_family(model_id: str) -> str:
44
+ """주어진 model_id 문자열에서 family 코드를 추론한다."""
45
+ if not model_id:
46
+ return "unknown"
47
+ raw = str(model_id)
48
+ # provider prefix 제거
49
+ if ":" in raw:
50
+ raw = raw.split(":", 1)[1]
51
+ for family, pattern in FAMILY_PATTERNS:
52
+ if pattern.search(raw):
53
+ return family
54
+ return "unknown"
55
+
56
+
57
+ # ── Family profiles ───────────────────────────────────────────────────────────
58
+
59
+ DEFAULT_STOP = ["<|im_end|>", "<|endoftext|>", "</s>", "<|user|>", "<|assistant|>"]
60
+
61
+ FAMILY_PROFILES: Dict[str, Dict[str, Any]] = {
62
+ "gpt-oss": {
63
+ "family": "gpt-oss",
64
+ "supports_system": True,
65
+ "supports_vision": False,
66
+ "chat_template": "gpt_oss",
67
+ "preferred_engines": ["ollama", "llamacpp", "vllm", "local_mlx"],
68
+ "temperature": 0.1,
69
+ "top_p": 0.9,
70
+ "max_tokens": 2048,
71
+ "stop_sequences": ["<|im_end|>", "<|end|>", "</s>", "<|user|>", "<|assistant|>"],
72
+ "disable_draft": True,
73
+ # trim_after_user_marker는 <|user|>가 살아있어야 동작하므로 strip_role_tokens보다 먼저 실행.
74
+ "postprocess": ["trim_after_user_marker", "strip_role_tokens"],
75
+ },
76
+ "gemma": {
77
+ "family": "gemma",
78
+ "supports_system": True,
79
+ "supports_vision": True,
80
+ "chat_template": "tokenizer_default_or_gemma",
81
+ "preferred_engines": ["local_mlx", "ollama", "llamacpp"],
82
+ "temperature": 0.2,
83
+ "top_p": 0.95,
84
+ "max_tokens": 4096,
85
+ "stop_sequences": ["<end_of_turn>", "</s>"],
86
+ "disable_draft": False,
87
+ "postprocess": ["strip_role_tokens"],
88
+ },
89
+ "qwen": {
90
+ "family": "qwen",
91
+ "supports_system": True,
92
+ "supports_vision": False,
93
+ "chat_template": "qwen_chatml",
94
+ "preferred_engines": ["ollama", "local_mlx", "vllm"],
95
+ "temperature": 0.2,
96
+ "top_p": 0.9,
97
+ "max_tokens": 4096,
98
+ "stop_sequences": ["<|im_end|>", "<|endoftext|>"],
99
+ "disable_draft": False,
100
+ "postprocess": ["strip_role_tokens"],
101
+ },
102
+ "llama": {
103
+ "family": "llama",
104
+ "supports_system": True,
105
+ "supports_vision": False,
106
+ "chat_template": "tokenizer_default",
107
+ "preferred_engines": ["ollama", "local_mlx", "llamacpp", "vllm"],
108
+ "temperature": 0.2,
109
+ "top_p": 0.9,
110
+ "max_tokens": 4096,
111
+ "stop_sequences": ["</s>", "[INST]", "[/INST]"],
112
+ "disable_draft": False,
113
+ "postprocess": ["strip_role_tokens"],
114
+ },
115
+ "mistral": {
116
+ "family": "mistral",
117
+ "supports_system": False,
118
+ "supports_vision": False,
119
+ "chat_template": "tokenizer_default",
120
+ "preferred_engines": ["ollama", "local_mlx", "llamacpp"],
121
+ "temperature": 0.2,
122
+ "top_p": 0.9,
123
+ "max_tokens": 4096,
124
+ "stop_sequences": ["</s>", "[INST]", "[/INST]"],
125
+ "disable_draft": False,
126
+ "postprocess": ["strip_role_tokens"],
127
+ },
128
+ "phi": {
129
+ "family": "phi",
130
+ "supports_system": True,
131
+ "supports_vision": False,
132
+ "chat_template": "tokenizer_default",
133
+ "preferred_engines": ["ollama", "local_mlx"],
134
+ "temperature": 0.2,
135
+ "top_p": 0.9,
136
+ "max_tokens": 2048,
137
+ "stop_sequences": ["<|end|>", "<|endoftext|>"],
138
+ "disable_draft": False,
139
+ "postprocess": ["strip_role_tokens"],
140
+ },
141
+ "deepseek": {
142
+ "family": "deepseek",
143
+ "supports_system": True,
144
+ "supports_vision": False,
145
+ "chat_template": "tokenizer_default",
146
+ "preferred_engines": ["ollama", "local_mlx", "vllm"],
147
+ "temperature": 0.2,
148
+ "top_p": 0.9,
149
+ "max_tokens": 4096,
150
+ "stop_sequences": ["<|EOT|>", "</s>"],
151
+ "disable_draft": False,
152
+ "postprocess": ["strip_role_tokens"],
153
+ },
154
+ "unknown": {
155
+ "family": "unknown",
156
+ "supports_system": True,
157
+ "supports_vision": False,
158
+ "chat_template": "tokenizer_default",
159
+ "preferred_engines": [],
160
+ "temperature": 0.2,
161
+ "top_p": 0.9,
162
+ "max_tokens": 2048,
163
+ "stop_sequences": list(DEFAULT_STOP),
164
+ "disable_draft": False,
165
+ "postprocess": ["strip_role_tokens"],
166
+ },
167
+ }
168
+
169
+
170
+ def get_model_profile(model_id: str, engine: Optional[str] = None) -> Dict[str, Any]:
171
+ """주어진 모델/엔진 조합에 대한 기본 호환성 프로파일을 반환한다."""
172
+ family = detect_model_family(model_id)
173
+ base = dict(FAMILY_PROFILES.get(family) or FAMILY_PROFILES["unknown"])
174
+ base["engine"] = (engine or "").strip().lower() or None
175
+ base["model_id"] = model_id
176
+ base.setdefault("stop_sequences", list(DEFAULT_STOP))
177
+ return base
178
+
179
+
180
+ # ── Postprocessing ────────────────────────────────────────────────────────────
181
+
182
+ BAD_MARKERS = [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|user|>",
186
+ "<|assistant|>",
187
+ "<|endoftext|>",
188
+ "### Instruction:",
189
+ "### Response:",
190
+ "[/INST]",
191
+ "[INST]",
192
+ "<s>",
193
+ ]
194
+
195
+
196
+ def strip_role_tokens(text: str) -> str:
197
+ if not text:
198
+ return text
199
+ cleaned = text
200
+ for marker in BAD_MARKERS:
201
+ cleaned = cleaned.replace(marker, "")
202
+ # role: 형태의 prefix 정리
203
+ cleaned = re.sub(r"^\s*(?:user|assistant|system)\s*:\s*", "", cleaned, flags=re.I)
204
+ return cleaned.strip()
205
+
206
+
207
+ def trim_after_user_marker(text: str) -> str:
208
+ if not text:
209
+ return text
210
+ # 모델이 다음 user 발화까지 토해낸 경우 자르기
211
+ for marker in ("<|user|>", "\nuser:", "\nUser:", "### Instruction:"):
212
+ idx = text.find(marker)
213
+ if idx > 0:
214
+ text = text[:idx]
215
+ return text.strip()
216
+
217
+
218
+ POSTPROCESSORS = {
219
+ "strip_role_tokens": strip_role_tokens,
220
+ "trim_after_user_marker": trim_after_user_marker,
221
+ }
222
+
223
+
224
+ def fast_postprocess(text: str, profile: Dict[str, Any]) -> str:
225
+ """Fast Path 후처리. 매우 가볍게 동작한다."""
226
+ if not text:
227
+ return text
228
+ out = text
229
+ for name in profile.get("postprocess") or []:
230
+ fn = POSTPROCESSORS.get(name)
231
+ if fn:
232
+ try:
233
+ out = fn(out)
234
+ except Exception:
235
+ logger.debug("postprocessor %s failed", name, exc_info=True)
236
+ return out
237
+
238
+
239
+ # ── Smoke test validation ─────────────────────────────────────────────────────
240
+
241
+ SMOKE_PROMPT = "한국어로 한 문장만 답해. 2+2는?"
242
+
243
+
244
+ def classify_smoke_response(text: str) -> Tuple[str, str]:
245
+ """Smoke test 응답을 ok / degraded / failed 로 분류한다. (item 3-3)
246
+
247
+ - failed: 채팅에 쓸 수 없는 수준 (빈 응답, 특수/role 토큰 누출, 심한 반복,
248
+ 과도하게 긴 출력).
249
+ - degraded: 로드/채팅은 되지만 품질이 일정하지 않음 (가벼운 반복, 기대한
250
+ 정답 없음, 다소 긴 출력).
251
+ - ok: 형식·정답·길이 모두 정상.
252
+
253
+ 반환: (status, reason)
254
+ """
255
+ if text is None:
256
+ return "failed", "empty response"
257
+ raw = str(text).strip()
258
+ if not raw:
259
+ return "failed", "empty response"
260
+
261
+ # 1. role / 특수 토큰 누출 → 채팅 형식이 깨진 것이므로 failed.
262
+ for marker in BAD_MARKERS:
263
+ if marker in raw:
264
+ return "failed", f"role token leakage ({marker})"
265
+ if re.search(r"<\|[^|]{0,40}\|>", raw):
266
+ return "failed", "special token leakage"
267
+ # role marker 줄 출력 (예: "assistant:" 로 시작)
268
+ if re.match(r"^\s*(?:assistant|system|user)\s*:", raw, flags=re.I):
269
+ return "failed", "role marker leakage"
270
+
271
+ # 2. 반복 감지.
272
+ sentences = [s.strip() for s in re.split(r"[.!?\n]+", raw) if len(s.strip()) >= 3]
273
+ counts: Dict[str, int] = {}
274
+ for key in sentences:
275
+ counts[key] = counts.get(key, 0) + 1
276
+ max_rep = max(counts.values()) if counts else 0
277
+ if max_rep >= 5:
278
+ return "failed", "severe repetition"
279
+ # 문자열 단위 폭주 반복 (예: "안녕안녕안녕…", "AAAA…")
280
+ if re.search(r"(.{1,20}?)\1{6,}", raw):
281
+ return "failed", "runaway repetition"
282
+
283
+ # 3. 과도하게 긴 출력 → failed.
284
+ if len(raw) > 4000:
285
+ return "failed", "response too long"
286
+
287
+ # 4. 여기까지 왔으면 채팅은 가능. degraded 신호를 모은다.
288
+ degraded: List[str] = []
289
+ if max_rep >= 3:
290
+ degraded.append("mild repetition")
291
+ if len(raw) > 600:
292
+ degraded.append("response longer than expected")
293
+ has_answer = ("4" in raw) or ("네" in raw) or ("사" in raw)
294
+ if not has_answer:
295
+ degraded.append("answer did not contain expected result")
296
+ if degraded:
297
+ return "degraded", "; ".join(degraded)
298
+ return "ok", "ok"
299
+
300
+
301
+ def validate_smoke_response(text: str) -> Tuple[bool, str]:
302
+ """하위호환 wrapper. (ok 또는 degraded면 채팅 가능 → True)
303
+
304
+ 반환: (채팅 가능 여부, reason)
305
+ """
306
+ status, reason = classify_smoke_response(text)
307
+ return status != "failed", reason
308
+
309
+
310
+ # ── Compat cache (Slow Path) ──────────────────────────────────────────────────
311
+
312
+
313
+ @dataclass
314
+ class CompatProfile:
315
+ model_id: str
316
+ engine: Optional[str]
317
+ family: str
318
+ template: str
319
+ stop: List[str]
320
+ temperature: float
321
+ top_p: float
322
+ max_tokens: int
323
+ disable_draft: bool
324
+ postprocess: List[str]
325
+ loaded: bool = False
326
+ chat_compatible: bool = False
327
+ quality_status: str = "unknown" # ok / degraded / failed / unknown
328
+ last_test_error: Optional[str] = None
329
+ validated_at: Optional[float] = None
330
+
331
+ def to_dict(self) -> Dict[str, Any]:
332
+ return asdict(self)
333
+
334
+
335
+ _COMPAT_CACHE: Dict[str, CompatProfile] = {}
336
+ _CACHE_LOCK = threading.RLock()
337
+
338
+
339
+ def cache_key(model_id: str, engine: Optional[str] = None) -> str:
340
+ eng = (engine or "").strip().lower()
341
+ return f"{eng}:{model_id}" if eng else str(model_id)
342
+
343
+
344
+ def remember_profile(profile: CompatProfile) -> None:
345
+ with _CACHE_LOCK:
346
+ _COMPAT_CACHE[cache_key(profile.model_id, profile.engine)] = profile
347
+
348
+
349
+ def lookup_profile(model_id: str, engine: Optional[str] = None) -> Optional[CompatProfile]:
350
+ with _CACHE_LOCK:
351
+ return _COMPAT_CACHE.get(cache_key(model_id, engine))
352
+
353
+
354
+ def ensure_profile(model_id: str, engine: Optional[str] = None) -> CompatProfile:
355
+ """캐시된 프로파일이 있으면 그것을, 없으면 기본값으로 생성한다."""
356
+ cached = lookup_profile(model_id, engine)
357
+ if cached:
358
+ return cached
359
+ base = get_model_profile(model_id, engine)
360
+ profile = CompatProfile(
361
+ model_id=model_id,
362
+ engine=(engine or "").strip().lower() or None,
363
+ family=base["family"],
364
+ template=base["chat_template"],
365
+ stop=list(base["stop_sequences"]),
366
+ temperature=float(base["temperature"]),
367
+ top_p=float(base["top_p"]),
368
+ max_tokens=int(base["max_tokens"]),
369
+ disable_draft=bool(base.get("disable_draft", False)),
370
+ postprocess=list(base.get("postprocess") or []),
371
+ )
372
+ remember_profile(profile)
373
+ return profile
374
+
375
+
376
+ def record_smoke_result(
377
+ model_id: str,
378
+ engine: Optional[str],
379
+ ok: bool,
380
+ reason: str,
381
+ *,
382
+ status: Optional[str] = None,
383
+ ) -> CompatProfile:
384
+ """Smoke 결과를 프로필 캐시에 기록한다.
385
+
386
+ status 가 주어지면 ok/degraded/failed 3분류를 그대로 저장한다.
387
+ (하위호환: status 없이 ok bool만 오면 ok→"ok", False→"degraded")
388
+ """
389
+ profile = ensure_profile(model_id, engine)
390
+ profile.loaded = True
391
+ profile.chat_compatible = bool(ok)
392
+ if status in ("ok", "degraded", "failed"):
393
+ profile.quality_status = status
394
+ else:
395
+ profile.quality_status = "ok" if ok else "degraded"
396
+ profile.last_test_error = None if ok else reason
397
+ profile.validated_at = time.time()
398
+ remember_profile(profile)
399
+ return profile
400
+
401
+
402
+ def list_cached_profiles() -> List[Dict[str, Any]]:
403
+ with _CACHE_LOCK:
404
+ return [p.to_dict() for p in _COMPAT_CACHE.values()]
405
+
406
+
407
+ # ── Public helpers ────────────────────────────────────────────────────────────
408
+
409
+
410
+ def normalize_generation_params(
411
+ profile: Dict[str, Any],
412
+ overrides: Optional[Dict[str, Any]] = None,
413
+ ) -> Dict[str, Any]:
414
+ """Family profile 기반으로 generation parameter를 보정한다."""
415
+ out = {
416
+ "temperature": profile.get("temperature", 0.2),
417
+ "top_p": profile.get("top_p", 0.9),
418
+ "max_tokens": profile.get("max_tokens", 2048),
419
+ "stop": list(profile.get("stop_sequences") or DEFAULT_STOP),
420
+ }
421
+ if overrides:
422
+ for k, v in overrides.items():
423
+ if v is not None:
424
+ out[k] = v
425
+ return out
426
+
427
+
428
+ def get_stop_sequences(model_id: str, engine: Optional[str] = None) -> List[str]:
429
+ profile = ensure_profile(model_id, engine)
430
+ return list(profile.stop)
431
+
432
+
433
+ __all__ = [
434
+ "FAMILY_PROFILES",
435
+ "CompatProfile",
436
+ "detect_model_family",
437
+ "get_model_profile",
438
+ "fast_postprocess",
439
+ "validate_smoke_response",
440
+ "classify_smoke_response",
441
+ "ensure_profile",
442
+ "lookup_profile",
443
+ "remember_profile",
444
+ "record_smoke_result",
445
+ "list_cached_profiles",
446
+ "normalize_generation_params",
447
+ "get_stop_sequences",
448
+ "strip_role_tokens",
449
+ "SMOKE_PROMPT",
450
+ ]
@@ -0,0 +1,227 @@
1
+ """Lattice AI Model Resolution + Prepare State Machine.
2
+
3
+ 피드백 #1 (lattice_ai_model_recommend_download_load_issue.txt)
4
+ 피드백 #2 (lattice_ai_manual_model_select_auto_download_load_fix.txt)
5
+
6
+ 핵심 문제:
7
+ - 추천 카드 ID, 다운로드 ID, 로드 ID, router cache key,
8
+ 프론트가 current로 쓰는 ID가 단계마다 달라질 수 있음.
9
+ - /models/load 와 /engines/prepare-model/stream 로직이 중복.
10
+ - 다운로드 성공과 채팅 가능 상태가 다름.
11
+
12
+ 해결:
13
+ 1. ModelResolution: input_id → engine/resolved_model/download_id/load_id/expected_current.
14
+ 2. PrepareState: RESOLVING → ENGINE_CHECK → DOWNLOADING → SERVER_STARTING
15
+ → MODEL_LOADING → SMOKE_TEST → READY (또는 DEGRADED/FAILED).
16
+ 3. PrepareReport: 로드 직후 smoke test 결과까지 포함한 최종 응답 객체.
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import logging
22
+ import re
23
+ from dataclasses import dataclass, field, asdict
24
+ from enum import Enum
25
+ from typing import Any, Dict, List, Optional
26
+
27
+ logger = logging.getLogger(__name__)
28
+
29
+
30
+ # ── State enum ────────────────────────────────────────────────────────────────
31
+
32
+
33
+ class PrepareState(str, Enum):
34
+ RESOLVING = "RESOLVING"
35
+ ENGINE_CHECK = "ENGINE_CHECK"
36
+ ENGINE_INSTALL = "ENGINE_INSTALL"
37
+ DOWNLOADING = "DOWNLOADING"
38
+ SERVER_STARTING = "SERVER_STARTING"
39
+ MODEL_SERVING = "MODEL_SERVING"
40
+ MODEL_LOADING = "MODEL_LOADING"
41
+ SMOKE_TEST = "SMOKE_TEST"
42
+ READY = "READY"
43
+ DEGRADED = "DEGRADED"
44
+ FAILED = "FAILED"
45
+
46
+
47
+ LOCAL_ENGINES = {"local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"}
48
+ ENGINE_ALIASES = {
49
+ "mlx": "local_mlx",
50
+ "local-mlx": "local_mlx",
51
+ "llama.cpp": "llamacpp",
52
+ "llama-cpp": "llamacpp",
53
+ "lm-studio": "lmstudio",
54
+ "lmstudio:openai-compatible": "lmstudio",
55
+ }
56
+
57
+
58
+ def _canonical_engine(engine: Optional[str]) -> Optional[str]:
59
+ if not engine:
60
+ return None
61
+ e = str(engine).strip().lower()
62
+ e = ENGINE_ALIASES.get(e, e)
63
+ return e or None
64
+
65
+
66
+ # ── ModelResolution dataclass ─────────────────────────────────────────────────
67
+
68
+
69
+ @dataclass
70
+ class ModelResolution:
71
+ """모든 단계가 공유하는 canonical model identity."""
72
+
73
+ input_id: str
74
+ engine: str
75
+ provider: str
76
+ resolved_model: str
77
+ download_id: str
78
+ load_id: str
79
+ expected_current: str
80
+ display_name: str = ""
81
+ metadata: Dict[str, Any] = field(default_factory=dict)
82
+
83
+ def to_dict(self) -> Dict[str, Any]:
84
+ return asdict(self)
85
+
86
+ # ──────────────────────────────────────────────────────────────────────
87
+
88
+ @classmethod
89
+ def from_request(
90
+ cls,
91
+ input_id: str,
92
+ engine: Optional[str] = None,
93
+ *,
94
+ display_name: Optional[str] = None,
95
+ user_email: Optional[str] = None,
96
+ alias_resolver=None,
97
+ engine_aliases: Optional[Dict[str, Dict[str, str]]] = None,
98
+ ) -> "ModelResolution":
99
+ """사용자가 클릭한 input_id + engine 힌트로부터 ModelResolution 생성.
100
+
101
+ - alias_resolver: 선택. (model_id, engine) -> resolved_model_id
102
+ - engine_aliases: 선택. {short_name: {engine: real_id}}
103
+ """
104
+ raw = str(input_id or "").strip()
105
+ if not raw:
106
+ raise ValueError("모델 식별자가 비어 있습니다.")
107
+
108
+ engine_hint = _canonical_engine(engine)
109
+
110
+ # provider prefix가 붙어 있으면 그것을 우선 사용
111
+ provider: Optional[str] = None
112
+ model_name = raw
113
+ if ":" in raw:
114
+ prefix, rest = raw.split(":", 1)
115
+ prefix_canon = _canonical_engine(prefix)
116
+ if prefix_canon and prefix_canon in LOCAL_ENGINES.union({"openai", "anthropic", "openrouter", "groq", "together"}):
117
+ provider = prefix_canon
118
+ model_name = rest.strip()
119
+
120
+ if not provider:
121
+ provider = engine_hint or "local_mlx"
122
+
123
+ # alias 테이블 (예: {"gpt-oss-20b": {"local_mlx": "mlx-community/...","ollama":"gpt-oss:20b"}})
124
+ resolved_model = model_name
125
+ if engine_aliases:
126
+ aliases = engine_aliases.get(model_name.lower())
127
+ if aliases:
128
+ mapped = aliases.get(provider)
129
+ if mapped:
130
+ resolved_model = mapped
131
+
132
+ # 사용자가 외부에서 추가로 alias_resolver 제공 시 마지막에 한 번 더 정규화
133
+ if alias_resolver:
134
+ try:
135
+ maybe = alias_resolver(resolved_model, provider)
136
+ if maybe:
137
+ if ":" in maybe and maybe.split(":", 1)[0] in LOCAL_ENGINES:
138
+ provider2, resolved_model = maybe.split(":", 1)
139
+ provider = provider2
140
+ else:
141
+ resolved_model = maybe
142
+ except Exception:
143
+ logger.debug("alias_resolver failed for %s", resolved_model, exc_info=True)
144
+
145
+ download_id = resolved_model
146
+ if provider == "local_mlx":
147
+ load_id = resolved_model
148
+ else:
149
+ load_id = f"{provider}:{resolved_model}"
150
+
151
+ expected_current = load_id
152
+ if user_email and provider != "local_mlx":
153
+ expected_current = f"{load_id}::{user_email}"
154
+
155
+ return cls(
156
+ input_id=raw,
157
+ engine=provider,
158
+ provider=provider,
159
+ resolved_model=resolved_model,
160
+ download_id=download_id,
161
+ load_id=load_id,
162
+ expected_current=expected_current,
163
+ display_name=(display_name or raw),
164
+ metadata={"engine_hint": engine_hint or ""},
165
+ )
166
+
167
+ # ──────────────────────────────────────────────────────────────────────
168
+
169
+ def update_after_load(self, *, actual_current: Optional[str]) -> None:
170
+ """LM Studio처럼 로드 후 instance_id가 부여되는 경우 동기화."""
171
+ if not actual_current:
172
+ return
173
+ self.expected_current = actual_current
174
+ # provider:model 형태면 load_id 갱신
175
+ if ":" in actual_current:
176
+ head = actual_current.split("::", 1)[0]
177
+ self.load_id = head
178
+ if ":" in head:
179
+ self.resolved_model = head.split(":", 1)[1]
180
+
181
+
182
+ # ── PrepareReport ─────────────────────────────────────────────────────────────
183
+
184
+
185
+ @dataclass
186
+ class PrepareReport:
187
+ """prepare_model_core / SSE 흐름이 모두 같은 형태로 돌려주는 결과."""
188
+
189
+ status: str # "ok" | "degraded" | "failed"
190
+ state: PrepareState
191
+ resolution: ModelResolution
192
+ current: Optional[str]
193
+ message: Optional[str] = None
194
+ downloaded: bool = False
195
+ loaded: bool = False
196
+ ready_to_chat: bool = False
197
+ compatibility_status: str = "unknown" # ok / degraded / failed / unknown
198
+ smoke_test: Optional[Dict[str, Any]] = None
199
+ stage_logs: List[Dict[str, Any]] = field(default_factory=list)
200
+ error: Optional[Dict[str, Any]] = None
201
+ install_result: Dict[str, Any] = field(default_factory=dict)
202
+ download_result: Optional[Dict[str, Any]] = None
203
+
204
+ def to_dict(self) -> Dict[str, Any]:
205
+ data = asdict(self)
206
+ data["state"] = self.state.value if isinstance(self.state, PrepareState) else str(self.state)
207
+ data["resolution"] = self.resolution.to_dict()
208
+ return data
209
+
210
+
211
+ # ── State machine helpers ─────────────────────────────────────────────────────
212
+
213
+
214
+ def transition_log(state: PrepareState, message: str, extra: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
215
+ log: Dict[str, Any] = {"state": state.value, "message": message}
216
+ if extra:
217
+ log["extra"] = extra
218
+ return log
219
+
220
+
221
+ __all__ = [
222
+ "ModelResolution",
223
+ "PrepareState",
224
+ "PrepareReport",
225
+ "transition_log",
226
+ "LOCAL_ENGINES",
227
+ ]