ltcai 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/llm_router.py CHANGED
@@ -626,3 +626,150 @@ class LLMRouter:
626
626
  except Exception as e:
627
627
  print(f"⚠️ VLM image decode failed: {e}")
628
628
  return None
629
+
630
+ # ── Document Generation Pipeline ──────────────────────────────────────
631
+
632
+ async def generate_document(
633
+ self,
634
+ message: str,
635
+ system_prompt: str,
636
+ *,
637
+ max_tokens: int = 8192,
638
+ temperature: float = 0.3,
639
+ ) -> str:
640
+ """Generate a document using a specialized system prompt with graph context."""
641
+ if not self._current:
642
+ return "No model loaded."
643
+ self._touch()
644
+ cached = self._cache[self._current]
645
+
646
+ if isinstance(cached, CloudModel):
647
+ return await self._cloud_generate_document(cached, message, system_prompt, max_tokens, temperature)
648
+
649
+ model, tokenizer, draft_model = cached
650
+ if hasattr(tokenizer, "apply_chat_template"):
651
+ try:
652
+ msgs = [
653
+ {"role": "system", "content": system_prompt},
654
+ {"role": "user", "content": message},
655
+ ]
656
+ prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
657
+ except Exception:
658
+ prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
659
+ else:
660
+ prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
661
+
662
+ loop = asyncio.get_event_loop()
663
+ def _gen():
664
+ import mlx.core as mx
665
+ mx.set_default_device(mx.gpu)
666
+ is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
667
+ if is_gemma4 and VLM_AVAILABLE:
668
+ from mlx_vlm import generate as vlm_gen
669
+ return vlm_gen(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
670
+ else:
671
+ from mlx_lm import generate as lm_gen
672
+ return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
673
+ result = await loop.run_in_executor(executor, _gen)
674
+ if hasattr(result, "text"):
675
+ return normalize_branding(result.text)
676
+ return normalize_branding(str(result))
677
+
678
+ async def _cloud_generate_document(self, cloud: CloudModel, message: str, system_prompt: str, max_tokens: int, temperature: float) -> str:
679
+ try:
680
+ response = await cloud.client.chat.completions.create(
681
+ model=cloud.model,
682
+ messages=[
683
+ {"role": "system", "content": system_prompt},
684
+ {"role": "user", "content": message},
685
+ ],
686
+ max_tokens=max_tokens,
687
+ temperature=temperature,
688
+ )
689
+ except Exception as e:
690
+ raise RuntimeError(self._local_server_error_hint(cloud, e)) from e
691
+ return normalize_branding(response.choices[0].message.content or "")
692
+
693
+ async def stream_generate_document(
694
+ self,
695
+ message: str,
696
+ system_prompt: str,
697
+ *,
698
+ max_tokens: int = 8192,
699
+ temperature: float = 0.3,
700
+ ) -> AsyncIterator[str]:
701
+ """Stream document generation with specialized system prompt."""
702
+ if not self._current:
703
+ yield "No model loaded."
704
+ return
705
+ self._touch()
706
+ cached = self._cache[self._current]
707
+
708
+ if isinstance(cached, CloudModel):
709
+ async for chunk in self._cloud_stream_document(cached, message, system_prompt, max_tokens, temperature):
710
+ yield chunk
711
+ return
712
+
713
+ model, tokenizer, draft_model = cached
714
+ if hasattr(tokenizer, "apply_chat_template"):
715
+ try:
716
+ msgs = [
717
+ {"role": "system", "content": system_prompt},
718
+ {"role": "user", "content": message},
719
+ ]
720
+ prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
721
+ except Exception:
722
+ prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
723
+ else:
724
+ prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
725
+
726
+ loop = asyncio.get_event_loop()
727
+ queue = asyncio.Queue()
728
+
729
+ def _stream():
730
+ import mlx.core as mx
731
+ mx.set_default_device(mx.gpu)
732
+ try:
733
+ is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
734
+ if is_gemma4 and VLM_AVAILABLE:
735
+ from mlx_vlm import stream_generate as vlm_stream
736
+ gen = vlm_stream(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
737
+ else:
738
+ from mlx_lm import stream_generate as lm_stream
739
+ gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
740
+ for chunk in gen:
741
+ text = chunk.text if hasattr(chunk, "text") else (chunk[0] if isinstance(chunk, tuple) else str(chunk))
742
+ loop.call_soon_threadsafe(queue.put_nowait, text)
743
+ except Exception as e:
744
+ loop.call_soon_threadsafe(queue.put_nowait, f"⚠️ Error: {e}")
745
+ finally:
746
+ loop.call_soon_threadsafe(queue.put_nowait, None)
747
+
748
+ loop.run_in_executor(executor, _stream)
749
+ while True:
750
+ chunk = await queue.get()
751
+ if chunk is None:
752
+ break
753
+ yield normalize_branding(chunk)
754
+
755
+ async def _cloud_stream_document(self, cloud: CloudModel, message: str, system_prompt: str, max_tokens: int, temperature: float) -> AsyncIterator[str]:
756
+ try:
757
+ stream = await cloud.client.chat.completions.create(
758
+ model=cloud.model,
759
+ messages=[
760
+ {"role": "system", "content": system_prompt},
761
+ {"role": "user", "content": message},
762
+ ],
763
+ max_tokens=max_tokens,
764
+ temperature=temperature,
765
+ stream=True,
766
+ )
767
+ except Exception as e:
768
+ yield f"⚠️ {self._local_server_error_hint(cloud, e)}"
769
+ return
770
+ async for event in stream:
771
+ if not event.choices:
772
+ continue
773
+ delta = event.choices[0].delta.content
774
+ if delta:
775
+ yield normalize_branding(delta)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ltcai",
3
- "version": "0.2.2",
3
+ "version": "0.3.1",
4
4
  "description": "Lattice AI local MLX/cloud LLM workspace server",
5
5
  "homepage": "https://github.com/TaeSooPark-PTS/LatticeAI#readme",
6
6
  "repository": {
package/server.py CHANGED
@@ -46,8 +46,10 @@ from pydantic import BaseModel
46
46
  from PIL import Image
47
47
 
48
48
  from llm_router import AsyncOpenAI, LLMRouter, OPENAI_COMPATIBLE_PROVIDERS, HF_MODELS_ROOT, ensure_mlx_runtime, hf_model_dir, parse_model_ref, mx, normalize_branding
49
- from knowledge_graph import KnowledgeGraphStore
49
+ from knowledge_graph import KnowledgeGraphStore, set_llm_router
50
50
  from knowledge_graph_api import create_knowledge_graph_router
51
+ from latticeai.core.context_builder import retrieve_context_for_generation, format_sources_footnote
52
+ from latticeai.core.document_generator import detect_document_intent, DocumentGenerationSession
51
53
  from local_knowledge_api import LocalKnowledgeWatcher, create_local_knowledge_router
52
54
  from latticeai.core.security import (
53
55
  hash_password as _hash_password,
@@ -70,6 +72,24 @@ from latticeai.core.audit import (
70
72
  )
71
73
  from latticeai.api.auth import create_auth_router
72
74
  from latticeai.api.admin import create_admin_router
75
+ from latticeai.api.security_dashboard import create_security_router as _create_security_router
76
+ from latticeai.core.model_compat import (
77
+ ensure_profile as _ensure_compat_profile,
78
+ record_smoke_result as _record_smoke_result,
79
+ fast_postprocess as _compat_fast_postprocess,
80
+ validate_smoke_response as _validate_smoke_response,
81
+ list_cached_profiles as _list_compat_profiles,
82
+ SMOKE_PROMPT as _SMOKE_PROMPT,
83
+ )
84
+ from latticeai.core.model_resolution import (
85
+ ModelResolution as _ModelResolution,
86
+ PrepareState as _PrepareState,
87
+ PrepareReport as _PrepareReport,
88
+ )
89
+ from latticeai.core.graph_curator import (
90
+ auto_build_graph_overlay as _auto_build_graph_overlay,
91
+ mask_secrets as _curator_mask_secrets,
92
+ )
73
93
  import mcp_registry
74
94
  from mcp_registry import (
75
95
  MCP_REGISTRY, _THIRD_PARTY_SKILL_SOURCES, _KNOWN_REPO_LICENSES,
@@ -1001,7 +1021,9 @@ def build_admin_audit_report(users: Dict) -> Dict:
1001
1021
  )
1002
1022
 
1003
1023
  router = LLMRouter()
1024
+ set_llm_router(router)
1004
1025
  gardener = PReinforceGardener()
1026
+ _doc_gen_sessions: dict = {} # conversation_id → DocumentGenerationSession
1005
1027
 
1006
1028
  async def autoload_default_model() -> None:
1007
1029
  if not AUTOLOAD_MODELS:
@@ -1103,7 +1125,7 @@ async def lifespan(app: FastAPI):
1103
1125
  except Exception:
1104
1126
  pass
1105
1127
 
1106
- app = FastAPI(title=f"Lattice AI Server ({APP_MODE})", version="0.2.2", lifespan=lifespan)
1128
+ app = FastAPI(title=f"Lattice AI Server ({APP_MODE})", version="0.3.0", lifespan=lifespan)
1107
1129
 
1108
1130
  CORS_ALLOWED_ORIGINS = [
1109
1131
  f"http://localhost:{DEFAULT_PORT}",
@@ -1171,19 +1193,64 @@ app.include_router(create_admin_router(
1171
1193
  default_port=DEFAULT_PORT,
1172
1194
  ))
1173
1195
 
1196
+ # ── Security & Audit Command Center (피드백 #5) ──────────────────────────────
1197
+ def _security_audit_events_safe() -> List[Dict]:
1198
+ try:
1199
+ return _get_audit_log(AUDIT_FILE)
1200
+ except Exception as e:
1201
+ logging.warning("security audit events load failed: %s", e)
1202
+ return []
1203
+
1204
+ def _security_list_uploaded_files() -> List[Dict]:
1205
+ """Audit log에서 document_upload 이벤트를 가공해서 file 목록으로 노출."""
1206
+ files: List[Dict] = []
1207
+ for idx, e in enumerate(_security_audit_events_safe()):
1208
+ if e.get("event_type") != "document_upload":
1209
+ continue
1210
+ files.append({
1211
+ "file_id": str(e.get("filename") or idx),
1212
+ "filename": e.get("filename"),
1213
+ "user_email": e.get("user_email"),
1214
+ "user_nickname": e.get("user_nickname"),
1215
+ "uploaded_at": e.get("timestamp"),
1216
+ "ext": e.get("ext"),
1217
+ "bytes": e.get("bytes"),
1218
+ "sensitivity": e.get("sensitivity") or "none",
1219
+ "sensitive_labels": e.get("sensitive_labels") or [],
1220
+ "content_preview": e.get("content_preview"),
1221
+ })
1222
+ return files
1223
+
1224
+ app.include_router(_create_security_router(
1225
+ require_admin=require_admin,
1226
+ get_history=get_history,
1227
+ get_audit_events=_security_audit_events_safe,
1228
+ classify_sensitive_message=classify_sensitive_message,
1229
+ build_sensitivity_report=build_sensitivity_report,
1230
+ list_uploaded_files=_security_list_uploaded_files,
1231
+ append_audit_event=append_audit_event,
1232
+ ))
1233
+
1234
+ def ui_file_response(path: Path) -> FileResponse:
1235
+ response = FileResponse(path)
1236
+ response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
1237
+ response.headers["Pragma"] = "no-cache"
1238
+ response.headers["Expires"] = "0"
1239
+ return response
1240
+
1174
1241
  @app.get("/")
1175
1242
  async def root(request: Request, code: Optional[str] = None, authorized: Optional[str] = Cookie(None)):
1176
1243
  """로그인/회원가입 페이지. 초대 게이트 활성화 시 코드 검증 후 진입."""
1177
1244
  if not INVITE_GATE_ENABLED:
1178
- return FileResponse(STATIC_DIR / "account.html")
1245
+ return ui_file_response(STATIC_DIR / "account.html")
1179
1246
 
1180
1247
  # 1. 이미 쿠키로 인증된 경우
1181
1248
  if authorized == "true":
1182
- return FileResponse(STATIC_DIR / "account.html")
1249
+ return ui_file_response(STATIC_DIR / "account.html")
1183
1250
 
1184
1251
  # 2. 초대 코드가 일치하는 경우 (최초 진입)
1185
1252
  if code == INVITE_CODE:
1186
- response = FileResponse(STATIC_DIR / "account.html")
1253
+ response = ui_file_response(STATIC_DIR / "account.html")
1187
1254
  response.set_cookie(key="authorized", value="true", httponly=True, samesite="lax", max_age=60*60*24*7)
1188
1255
  return response
1189
1256
 
@@ -1203,7 +1270,7 @@ async def root(request: Request, code: Optional[str] = None, authorized: Optiona
1203
1270
  @app.get("/account")
1204
1271
  async def account_page():
1205
1272
  """Direct login/register page route used by logout and manual navigation."""
1206
- return FileResponse(STATIC_DIR / "account.html")
1273
+ return ui_file_response(STATIC_DIR / "account.html")
1207
1274
 
1208
1275
 
1209
1276
  @app.get("/manifest.json")
@@ -1226,7 +1293,7 @@ async def service_worker():
1226
1293
 
1227
1294
  @app.get("/chat")
1228
1295
  async def chat_page(request: Request):
1229
- return FileResponse(STATIC_DIR / "chat.html")
1296
+ return ui_file_response(STATIC_DIR / "chat.html")
1230
1297
 
1231
1298
 
1232
1299
  @app.get("/admin")
@@ -1959,15 +2026,11 @@ def get_lmstudio_models(*, force: bool = False) -> List[Dict[str, object]]:
1959
2026
  global _LMSTUDIO_MODELS_CACHE, _LMSTUDIO_MODELS_CACHE_TS
1960
2027
  if not force and time.monotonic() - _LMSTUDIO_MODELS_CACHE_TS < _LMSTUDIO_MODELS_CACHE_TTL:
1961
2028
  return _LMSTUDIO_MODELS_CACHE
1962
- try:
1963
- ensure_lmstudio_server()
1964
- except HTTPException:
1965
- return _LMSTUDIO_MODELS_CACHE
1966
2029
  try:
1967
2030
  payload = _json_request(
1968
2031
  f"{lmstudio_native_api_base()}/api/v1/models",
1969
2032
  headers={"Authorization": f"Bearer {os.getenv('LMSTUDIO_API_KEY') or 'lmstudio'}"},
1970
- timeout=5,
2033
+ timeout=2.5,
1971
2034
  )
1972
2035
  except Exception:
1973
2036
  return _LMSTUDIO_MODELS_CACHE
@@ -2935,6 +2998,82 @@ def ensure_engine_ready(engine: str) -> Dict[str, object]:
2935
2998
  return {"engine": engine, "installed": True, "installed_now": True, "install": result}
2936
2999
 
2937
3000
 
3001
+ def build_model_resolution(
3002
+ input_id: str,
3003
+ engine: Optional[str],
3004
+ *,
3005
+ user_email: Optional[str] = None,
3006
+ display_name: Optional[str] = None,
3007
+ ) -> _ModelResolution:
3008
+ """피드백 #1/#2 공용 ModelResolution 생성기.
3009
+
3010
+ 사용자가 클릭한 input_id + engine 힌트를 받아 모든 단계가 공유할
3011
+ canonical identity를 만든다.
3012
+ """
3013
+ normalized = normalize_local_model_request(input_id, engine)
3014
+ return _ModelResolution.from_request(
3015
+ normalized,
3016
+ engine=engine,
3017
+ user_email=user_email,
3018
+ display_name=display_name or input_id,
3019
+ engine_aliases=MODEL_ENGINE_ALIASES,
3020
+ )
3021
+
3022
+
3023
+ _LOCAL_SMOKE_ENGINES = {"local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"}
3024
+
3025
+
3026
+ async def _smoke_test_loaded_model(
3027
+ resolution: _ModelResolution,
3028
+ *,
3029
+ api_key_override: Optional[str] = None,
3030
+ ) -> Dict[str, object]:
3031
+ """로드 직후 짧은 채팅 테스트를 돌려 ready_to_chat 여부를 판정한다.
3032
+
3033
+ Cloud(OpenAI/Anthropic/OpenRouter 등) 모델은 사용자 비용 발생 가능성 때문에 skip.
3034
+ 실패해도 예외를 던지지 않는다. 결과는 compat_cache에도 기록된다.
3035
+ """
3036
+ if (resolution.engine or "").lower() not in _LOCAL_SMOKE_ENGINES:
3037
+ profile = _ensure_compat_profile(resolution.load_id, resolution.engine)
3038
+ return {
3039
+ "ok": True,
3040
+ "reason": "skipped (cloud model — smoke test would incur cost)",
3041
+ "answer": None,
3042
+ "profile": profile.to_dict(),
3043
+ "skipped": True,
3044
+ }
3045
+ try:
3046
+ text = await asyncio.wait_for(
3047
+ router.generate(
3048
+ _SMOKE_PROMPT,
3049
+ context=None,
3050
+ max_tokens=128,
3051
+ temperature=0.1,
3052
+ ),
3053
+ timeout=30,
3054
+ )
3055
+ except Exception as exc: # pragma: no cover - generator may not exist on all engines
3056
+ reason = str(exc)[:200] or "generation_failed"
3057
+ profile = _record_smoke_result(resolution.load_id, resolution.engine, False, reason)
3058
+ return {
3059
+ "ok": False,
3060
+ "reason": reason,
3061
+ "answer": None,
3062
+ "profile": profile.to_dict(),
3063
+ }
3064
+
3065
+ profile = _ensure_compat_profile(resolution.load_id, resolution.engine)
3066
+ cleaned = _compat_fast_postprocess(str(text or ""), profile.to_dict())
3067
+ ok, reason = _validate_smoke_response(cleaned)
3068
+ profile = _record_smoke_result(resolution.load_id, resolution.engine, ok, reason)
3069
+ return {
3070
+ "ok": ok,
3071
+ "reason": reason,
3072
+ "answer": cleaned,
3073
+ "profile": profile.to_dict(),
3074
+ }
3075
+
3076
+
2938
3077
  async def prepare_and_load_model(
2939
3078
  model_id: str,
2940
3079
  request: Request,
@@ -2947,6 +3086,14 @@ async def prepare_and_load_model(
2947
3086
  if not model_id:
2948
3087
  raise HTTPException(status_code=400, detail="모델 식별자가 비어 있습니다.")
2949
3088
 
3089
+ # 피드백 #1: ModelResolution을 모든 단계가 공유한다.
3090
+ resolution = _ModelResolution.from_request(
3091
+ model_id,
3092
+ engine=engine,
3093
+ user_email=user_email or get_current_user(request),
3094
+ engine_aliases=MODEL_ENGINE_ALIASES,
3095
+ )
3096
+
2950
3097
  parsed_provider, parsed_model = parse_model_ref(model_id)
2951
3098
  if parsed_provider == "mlx":
2952
3099
  parsed_provider = "local_mlx"
@@ -3004,6 +3151,18 @@ async def prepare_and_load_model(
3004
3151
  api_key_override=user_api_key,
3005
3152
  owner=effective_email or None,
3006
3153
  )
3154
+ # 피드백 #1/#2: 로드 직후 ModelResolution을 실제 current로 동기화하고 smoke test 수행.
3155
+ resolution.update_after_load(actual_current=router.current_model_id)
3156
+ smoke_result: Dict[str, object] = {}
3157
+ ready_to_chat = True
3158
+ compat_status = "ok"
3159
+ try:
3160
+ smoke_result = await _smoke_test_loaded_model(resolution, api_key_override=user_api_key)
3161
+ ready_to_chat = bool(smoke_result.get("ok"))
3162
+ compat_status = "ok" if ready_to_chat else "degraded"
3163
+ except Exception as exc: # never break load on smoke test failures
3164
+ logging.warning("smoke test failed for %s: %s", resolution.load_id, exc)
3165
+ compat_status = "unknown"
3007
3166
  return {
3008
3167
  "status": "ok",
3009
3168
  "message": msg,
@@ -3012,6 +3171,12 @@ async def prepare_and_load_model(
3012
3171
  "engine": parsed_provider,
3013
3172
  "installed_now": bool(install_result.get("installed_now")),
3014
3173
  "download": download_result,
3174
+ "resolution": resolution.to_dict(),
3175
+ "downloaded": True,
3176
+ "loaded": True,
3177
+ "ready_to_chat": ready_to_chat,
3178
+ "compatibility_status": compat_status,
3179
+ "smoke_test": smoke_result,
3015
3180
  }
3016
3181
 
3017
3182
 
@@ -3217,6 +3382,30 @@ async def prepare_and_load_model_stream(
3217
3382
  api_key_override=user_api_key,
3218
3383
  owner=effective_email or None,
3219
3384
  )
3385
+ # 피드백 #1/#2: SSE에도 ModelResolution과 smoke test 결과를 같이 내려준다.
3386
+ resolution_stream = _ModelResolution.from_request(
3387
+ prepared_model_id,
3388
+ engine=prepared_provider,
3389
+ user_email=effective_email or None,
3390
+ engine_aliases=MODEL_ENGINE_ALIASES,
3391
+ )
3392
+ resolution_stream.update_after_load(actual_current=router.current_model_id)
3393
+ yield sse_event("progress", model_download_progress_payload(
3394
+ "smoke_test",
3395
+ "채팅 호환성 테스트 중입니다.",
3396
+ percent=98,
3397
+ indeterminate=True,
3398
+ ))
3399
+ smoke_result: Dict[str, object] = {}
3400
+ ready_to_chat = True
3401
+ compat_status = "ok"
3402
+ try:
3403
+ smoke_result = await _smoke_test_loaded_model(resolution_stream, api_key_override=user_api_key)
3404
+ ready_to_chat = bool(smoke_result.get("ok"))
3405
+ compat_status = "ok" if ready_to_chat else "degraded"
3406
+ except Exception as exc:
3407
+ logging.warning("smoke test (stream) failed for %s: %s", resolution_stream.load_id, exc)
3408
+ compat_status = "unknown"
3220
3409
  result = {
3221
3410
  "status": "ok",
3222
3411
  "message": msg,
@@ -3225,6 +3414,12 @@ async def prepare_and_load_model_stream(
3225
3414
  "engine": prepared_provider,
3226
3415
  "installed_now": bool(isinstance(install_result, dict) and install_result.get("installed_now")),
3227
3416
  "download": download_result,
3417
+ "resolution": resolution_stream.to_dict(),
3418
+ "downloaded": True,
3419
+ "loaded": True,
3420
+ "ready_to_chat": ready_to_chat,
3421
+ "compatibility_status": compat_status,
3422
+ "smoke_test": smoke_result,
3228
3423
  }
3229
3424
  yield sse_event("progress", model_download_progress_payload(
3230
3425
  "done",
@@ -3296,7 +3491,7 @@ async def verify_cloud_models(force: bool = False, provider_filter: Optional[str
3296
3491
 
3297
3492
  @app.get("/health")
3298
3493
  async def health(request: Request):
3299
- base = {"status": "ok", "version": "0.2.2", "mode": APP_MODE}
3494
+ base = {"status": "ok", "version": "0.3.0", "mode": APP_MODE}
3300
3495
  if not get_current_user(request) and REQUIRE_AUTH:
3301
3496
  return base
3302
3497
  engines = await asyncio.to_thread(engine_status)
@@ -3451,22 +3646,69 @@ async def set_api_key(req: SetApiKeyRequest, request: Request):
3451
3646
  return {"ok": True, "provider": req.provider, "user_email": target_email, "scope": "user"}
3452
3647
 
3453
3648
 
3649
+ def _recommended_with_engine_options(items: List[Dict[str, object]]) -> List[Dict[str, object]]:
3650
+ """피드백 #1: 추천 모델에 엔진별 선택지(engine_options)를 붙여 내려준다.
3651
+
3652
+ 프론트에서 추천 카드를 누르는 순간 어느 엔진/실제 모델로 다운로드/로드할지가
3653
+ 이미 확정되도록 한다.
3654
+ """
3655
+ out: List[Dict[str, object]] = []
3656
+ for item in items:
3657
+ base = {
3658
+ "id": item["id"],
3659
+ "name": item["name"],
3660
+ "tag": item["tag"],
3661
+ "size": item["size"],
3662
+ "display_name": item.get("name") or item.get("id"),
3663
+ }
3664
+ short_id = str(item["id"]).lower()
3665
+ aliases = MODEL_ENGINE_ALIASES.get(short_id) or {}
3666
+ options: List[Dict[str, str]] = []
3667
+ for engine_name in ("local_mlx", "ollama", "lmstudio", "llamacpp", "vllm"):
3668
+ real = aliases.get(engine_name)
3669
+ if not real:
3670
+ continue
3671
+ options.append({
3672
+ "engine": engine_name,
3673
+ "model_id": real,
3674
+ "load_id": real if engine_name == "local_mlx" else f"{engine_name}:{real}",
3675
+ })
3676
+ # 어느 엔진도 alias가 없으면 local_mlx 카탈로그 자체를 사용한다.
3677
+ if not options:
3678
+ options.append({
3679
+ "engine": "local_mlx",
3680
+ "model_id": item["id"],
3681
+ "load_id": item["id"],
3682
+ })
3683
+ base["engine_options"] = options
3684
+ base["recommended_engine"] = options[0]["engine"]
3685
+ out.append(base)
3686
+ return out
3687
+
3688
+
3454
3689
  @app.get("/models")
3455
3690
  async def list_models():
3456
3691
  """HuggingFace 추천 모델 목록 및 로드 상태 반환"""
3457
- recommended = [
3458
- {"id": item["id"], "name": item["name"], "tag": item["tag"], "size": item["size"]}
3459
- for item in filter_lower_family_versions(ENGINE_MODEL_CATALOG.get("local_mlx", []))
3460
- ]
3692
+ recommended = _recommended_with_engine_options(
3693
+ list(filter_lower_family_versions(ENGINE_MODEL_CATALOG.get("local_mlx", [])))
3694
+ )
3461
3695
  return {
3462
3696
  "recommended": recommended,
3463
3697
  "cloud": router.detected_cloud_models(),
3464
3698
  "engines": await asyncio.to_thread(engine_status),
3465
3699
  "loaded": router.loaded_model_ids,
3466
3700
  "current": router.current_model_id,
3701
+ "compat_profiles": _list_compat_profiles(),
3467
3702
  }
3468
3703
 
3469
3704
 
3705
+ @app.get("/models/compat-profiles")
3706
+ async def list_model_compat_profiles(request: Request):
3707
+ """피드백 #3: Model Compatibility Layer 캐시 상태를 조회한다."""
3708
+ require_user(request)
3709
+ return {"profiles": _list_compat_profiles()}
3710
+
3711
+
3470
3712
  # ── Model Management ───────────────────────────────────────────────────────────
3471
3713
 
3472
3714
  @app.post("/models/load")
@@ -3636,12 +3878,24 @@ async def chat(req: ChatRequest, request: Request):
3636
3878
  except Exception as e:
3637
3879
  logging.warning("Knowledge reinforcement skipped: %s", e)
3638
3880
 
3881
+ is_doc_gen = detect_document_intent(req.message)
3882
+ doc_gen_context_result = None
3883
+
3639
3884
  try:
3640
3885
  if ENABLE_GRAPH and KNOWLEDGE_GRAPH:
3641
- graph_context = KNOWLEDGE_GRAPH.context_for_query(req.message)
3642
- if graph_context:
3643
- context += f"\n\n[KNOWLEDGE GRAPH]\n{graph_context}"
3644
- print("🕸️ Context reinforced with knowledge graph.")
3886
+ if is_doc_gen:
3887
+ doc_gen_context_result = retrieve_context_for_generation(
3888
+ KNOWLEDGE_GRAPH, req.message, max_results=10, max_hops=2,
3889
+ )
3890
+ graph_md = doc_gen_context_result.get("context_markdown", "")
3891
+ if graph_md:
3892
+ context += f"\n\n[KNOWLEDGE GRAPH — Document Generation Context]\n{graph_md}"
3893
+ print("📝 Document generation context retrieved from knowledge graph.")
3894
+ else:
3895
+ graph_context = KNOWLEDGE_GRAPH.context_for_query(req.message)
3896
+ if graph_context:
3897
+ context += f"\n\n[KNOWLEDGE GRAPH]\n{graph_context}"
3898
+ print("🕸️ Context reinforced with knowledge graph.")
3645
3899
  except Exception as e:
3646
3900
  logging.warning("Knowledge graph reinforcement skipped: %s", e)
3647
3901
 
@@ -3651,7 +3905,6 @@ async def chat(req: ChatRequest, request: Request):
3651
3905
  context += f"\n\n{screenshot_context}"
3652
3906
 
3653
3907
  if env_bool("LATTICEAI_AUTO_READ_CHAT_PATHS", default=False):
3654
- # Off by default: automatic local-file injection can leak files to cloud models.
3655
3908
  _file_path_re = re.compile(r'(?:^|[\s\'\"(])((~|/[\w.])[^\s\'")\]]*)', re.MULTILINE)
3656
3909
  for _m in _file_path_re.finditer(req.message or ""):
3657
3910
  _fpath = _m.group(1).strip()
@@ -3669,6 +3922,55 @@ async def chat(req: ChatRequest, request: Request):
3669
3922
  if req.source != "telegram":
3670
3923
  asyncio.create_task(broadcast_web_chat("user", req.message))
3671
3924
 
3925
+ if is_doc_gen and ENABLE_GRAPH and KNOWLEDGE_GRAPH:
3926
+ conv_key = req.conversation_id or "default"
3927
+ session = _doc_gen_sessions.get(conv_key)
3928
+ if session is None:
3929
+ session = DocumentGenerationSession()
3930
+ _doc_gen_sessions[conv_key] = session
3931
+ graph_md = (doc_gen_context_result or {}).get("context_markdown", "")
3932
+ system_prompt = session.get_system_prompt(graph_md)
3933
+ sources = (doc_gen_context_result or {}).get("sources", [])
3934
+ footnote = format_sources_footnote(sources)
3935
+
3936
+ if req.stream:
3937
+ async def _stream_doc_gen():
3938
+ collected = []
3939
+ async for chunk in router.stream_generate_document(
3940
+ req.message, system_prompt,
3941
+ max_tokens=req.max_tokens or 8192,
3942
+ temperature=req.temperature or 0.3,
3943
+ ):
3944
+ collected.append(chunk)
3945
+ yield f"data: {json.dumps({'text': chunk}, ensure_ascii=False)}\n\n"
3946
+ full_text = "".join(collected)
3947
+ if footnote:
3948
+ yield f"data: {json.dumps({'text': footnote}, ensure_ascii=False)}\n\n"
3949
+ full_text += footnote
3950
+ session.update(graph_md, full_text, req.conversation_id)
3951
+ save_to_history("assistant", full_text, source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3952
+ if req.source != "telegram":
3953
+ asyncio.create_task(broadcast_web_chat("assistant", full_text))
3954
+ yield "data: [DONE]\n\n"
3955
+ return StreamingResponse(
3956
+ _stream_doc_gen(),
3957
+ media_type="text/event-stream",
3958
+ headers={"X-Model": router.current_model_id, "X-Doc-Gen": "true"},
3959
+ )
3960
+ else:
3961
+ result = await router.generate_document(
3962
+ req.message, system_prompt,
3963
+ max_tokens=req.max_tokens or 8192,
3964
+ temperature=req.temperature or 0.3,
3965
+ )
3966
+ if footnote:
3967
+ result += footnote
3968
+ session.update(graph_md, result, req.conversation_id)
3969
+ save_to_history("assistant", str(result), source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3970
+ if req.source != "telegram":
3971
+ asyncio.create_task(broadcast_web_chat("assistant", str(result)))
3972
+ return JSONResponse(content={"response": str(result)})
3973
+
3672
3974
  if req.stream:
3673
3975
  recent_context = build_recent_chat_context(user_email=effective_email, conversation_id=req.conversation_id)
3674
3976
  stream_context = context
@@ -13,7 +13,7 @@
13
13
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
14
14
  <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap">
15
15
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@tabler/icons-webfont@latest/tabler-icons.min.css">
16
- <link rel="stylesheet" href="/static/lattice-reference.css">
16
+ <link rel="stylesheet" href="/static/lattice-reference.css?v=0.3.3">
17
17
  </head>
18
18
  <body class="lattice-ref-auth">
19
19
  <div class="orb orb-1"></div>
@@ -103,6 +103,6 @@
103
103
  <a href="#" onclick="return false;" id="privacy-link">개인정보 처리방침</a>
104
104
  </footer>
105
105
 
106
- <script src="/static/scripts/account.js"></script>
106
+ <script src="/static/scripts/account.js?v=0.3.3"></script>
107
107
  </body>
108
108
  </html>