npm - ltcai - Versions diffs - 0.2.2 → 0.3.1 - Mend

ltcai 0.2.2 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

package/README.md +24 -0
package/docs/CHANGELOG.md +125 -0
package/kg_schema.py +64 -15
package/knowledge_graph.py +299 -2
package/knowledge_graph_api.py +10 -2
package/latticeai/api/security_dashboard.py +580 -0
package/latticeai/core/__init__.py +1 -1
package/latticeai/core/context_builder.py +191 -0
package/latticeai/core/document_generator.py +103 -0
package/latticeai/core/graph_curator.py +417 -0
package/latticeai/core/model_compat.py +407 -0
package/latticeai/core/model_resolution.py +227 -0
package/llm_router.py +147 -0
package/package.json +1 -1
package/server.py +324 -22
package/static/account.html +2 -2
package/static/admin.html +75 -1
package/static/chat.html +2 -2
package/static/css/tokens.css +26 -0
package/static/graph.html +2 -2
package/static/lattice-reference.css +372 -414
package/static/scripts/account.js +10 -2
package/static/scripts/admin.js +296 -0
package/static/scripts/chat.js +82 -9
package/static/scripts/graph.js +6 -2
package/static/sw.js +1 -1

package/llm_router.py CHANGED Viewed

@@ -626,3 +626,150 @@ class LLMRouter:
         except Exception as e:
             print(f"⚠️ VLM image decode failed: {e}")
             return None
+    # ── Document Generation Pipeline ──────────────────────────────────────
+    async def generate_document(
+        self,
+        message: str,
+        system_prompt: str,
+        *,
+        max_tokens: int = 8192,
+        temperature: float = 0.3,
+    ) -> str:
+        """Generate a document using a specialized system prompt with graph context."""
+        if not self._current:
+            return "No model loaded."
+        self._touch()
+        cached = self._cache[self._current]
+        if isinstance(cached, CloudModel):
+            return await self._cloud_generate_document(cached, message, system_prompt, max_tokens, temperature)
+        model, tokenizer, draft_model = cached
+        if hasattr(tokenizer, "apply_chat_template"):
+            try:
+                msgs = [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": message},
+                ]
+                prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
+            except Exception:
+                prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+        else:
+            prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+        loop = asyncio.get_event_loop()
+        def _gen():
+            import mlx.core as mx
+            mx.set_default_device(mx.gpu)
+            is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
+            if is_gemma4 and VLM_AVAILABLE:
+                from mlx_vlm import generate as vlm_gen
+                return vlm_gen(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
+            else:
+                from mlx_lm import generate as lm_gen
+                return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
+        result = await loop.run_in_executor(executor, _gen)
+        if hasattr(result, "text"):
+            return normalize_branding(result.text)
+        return normalize_branding(str(result))
+    async def _cloud_generate_document(self, cloud: CloudModel, message: str, system_prompt: str, max_tokens: int, temperature: float) -> str:
+        try:
+            response = await cloud.client.chat.completions.create(
+                model=cloud.model,
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": message},
+                ],
+                max_tokens=max_tokens,
+                temperature=temperature,
+            )
+        except Exception as e:
+            raise RuntimeError(self._local_server_error_hint(cloud, e)) from e
+        return normalize_branding(response.choices[0].message.content or "")
+    async def stream_generate_document(
+        self,
+        message: str,
+        system_prompt: str,
+        *,
+        max_tokens: int = 8192,
+        temperature: float = 0.3,
+    ) -> AsyncIterator[str]:
+        """Stream document generation with specialized system prompt."""
+        if not self._current:
+            yield "No model loaded."
+            return
+        self._touch()
+        cached = self._cache[self._current]
+        if isinstance(cached, CloudModel):
+            async for chunk in self._cloud_stream_document(cached, message, system_prompt, max_tokens, temperature):
+                yield chunk
+            return
+        model, tokenizer, draft_model = cached
+        if hasattr(tokenizer, "apply_chat_template"):
+            try:
+                msgs = [
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": message},
+                ]
+                prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
+            except Exception:
+                prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+        else:
+            prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
+        loop = asyncio.get_event_loop()
+        queue = asyncio.Queue()
+        def _stream():
+            import mlx.core as mx
+            mx.set_default_device(mx.gpu)
+            try:
+                is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
+                if is_gemma4 and VLM_AVAILABLE:
+                    from mlx_vlm import stream_generate as vlm_stream
+                    gen = vlm_stream(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
+                else:
+                    from mlx_lm import stream_generate as lm_stream
+                    gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
+                for chunk in gen:
+                    text = chunk.text if hasattr(chunk, "text") else (chunk[0] if isinstance(chunk, tuple) else str(chunk))
+                    loop.call_soon_threadsafe(queue.put_nowait, text)
+            except Exception as e:
+                loop.call_soon_threadsafe(queue.put_nowait, f"⚠️ Error: {e}")
+            finally:
+                loop.call_soon_threadsafe(queue.put_nowait, None)
+        loop.run_in_executor(executor, _stream)
+        while True:
+            chunk = await queue.get()
+            if chunk is None:
+                break
+            yield normalize_branding(chunk)
+    async def _cloud_stream_document(self, cloud: CloudModel, message: str, system_prompt: str, max_tokens: int, temperature: float) -> AsyncIterator[str]:
+        try:
+            stream = await cloud.client.chat.completions.create(
+                model=cloud.model,
+                messages=[
+                    {"role": "system", "content": system_prompt},
+                    {"role": "user", "content": message},
+                ],
+                max_tokens=max_tokens,
+                temperature=temperature,
+                stream=True,
+            )
+        except Exception as e:
+            yield f"⚠️ {self._local_server_error_hint(cloud, e)}"
+            return
+        async for event in stream:
+            if not event.choices:
+                continue
+            delta = event.choices[0].delta.content
+            if delta:
+                yield normalize_branding(delta)

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "ltcai",
-  "version": "0.2.2",
+  "version": "0.3.1",
   "description": "Lattice AI local MLX/cloud LLM workspace server",
   "homepage": "https://github.com/TaeSooPark-PTS/LatticeAI#readme",
   "repository": {

package/server.py CHANGED Viewed

@@ -46,8 +46,10 @@ from pydantic import BaseModel
 from PIL import Image
 from llm_router import AsyncOpenAI, LLMRouter, OPENAI_COMPATIBLE_PROVIDERS, HF_MODELS_ROOT, ensure_mlx_runtime, hf_model_dir, parse_model_ref, mx, normalize_branding
-from knowledge_graph import KnowledgeGraphStore
+from knowledge_graph import KnowledgeGraphStore, set_llm_router
 from knowledge_graph_api import create_knowledge_graph_router
+from latticeai.core.context_builder import retrieve_context_for_generation, format_sources_footnote
+from latticeai.core.document_generator import detect_document_intent, DocumentGenerationSession
 from local_knowledge_api import LocalKnowledgeWatcher, create_local_knowledge_router
 from latticeai.core.security import (
     hash_password as _hash_password,
@@ -70,6 +72,24 @@ from latticeai.core.audit import (
 )
 from latticeai.api.auth import create_auth_router
 from latticeai.api.admin import create_admin_router
+from latticeai.api.security_dashboard import create_security_router as _create_security_router
+from latticeai.core.model_compat import (
+    ensure_profile as _ensure_compat_profile,
+    record_smoke_result as _record_smoke_result,
+    fast_postprocess as _compat_fast_postprocess,
+    validate_smoke_response as _validate_smoke_response,
+    list_cached_profiles as _list_compat_profiles,
+    SMOKE_PROMPT as _SMOKE_PROMPT,
+)
+from latticeai.core.model_resolution import (
+    ModelResolution as _ModelResolution,
+    PrepareState as _PrepareState,
+    PrepareReport as _PrepareReport,
+)
+from latticeai.core.graph_curator import (
+    auto_build_graph_overlay as _auto_build_graph_overlay,
+    mask_secrets as _curator_mask_secrets,
+)
 import mcp_registry
 from mcp_registry import (
     MCP_REGISTRY, _THIRD_PARTY_SKILL_SOURCES, _KNOWN_REPO_LICENSES,
@@ -1001,7 +1021,9 @@ def build_admin_audit_report(users: Dict) -> Dict:
     )
 router = LLMRouter()
+set_llm_router(router)
 gardener = PReinforceGardener()
+_doc_gen_sessions: dict = {}  # conversation_id → DocumentGenerationSession
 async def autoload_default_model() -> None:
     if not AUTOLOAD_MODELS:
@@ -1103,7 +1125,7 @@ async def lifespan(app: FastAPI):
             except Exception:
                 pass
-app = FastAPI(title=f"Lattice AI Server ({APP_MODE})", version="0.2.2", lifespan=lifespan)
+app = FastAPI(title=f"Lattice AI Server ({APP_MODE})", version="0.3.0", lifespan=lifespan)
 CORS_ALLOWED_ORIGINS = [
     f"http://localhost:{DEFAULT_PORT}",
@@ -1171,19 +1193,64 @@ app.include_router(create_admin_router(
     default_port=DEFAULT_PORT,
 ))
+# ── Security & Audit Command Center (피드백 #5) ──────────────────────────────
+def _security_audit_events_safe() -> List[Dict]:
+    try:
+        return _get_audit_log(AUDIT_FILE)
+    except Exception as e:
+        logging.warning("security audit events load failed: %s", e)
+        return []
+def _security_list_uploaded_files() -> List[Dict]:
+    """Audit log에서 document_upload 이벤트를 가공해서 file 목록으로 노출."""
+    files: List[Dict] = []
+    for idx, e in enumerate(_security_audit_events_safe()):
+        if e.get("event_type") != "document_upload":
+            continue
+        files.append({
+            "file_id": str(e.get("filename") or idx),
+            "filename": e.get("filename"),
+            "user_email": e.get("user_email"),
+            "user_nickname": e.get("user_nickname"),
+            "uploaded_at": e.get("timestamp"),
+            "ext": e.get("ext"),
+            "bytes": e.get("bytes"),
+            "sensitivity": e.get("sensitivity") or "none",
+            "sensitive_labels": e.get("sensitive_labels") or [],
+            "content_preview": e.get("content_preview"),
+        })
+    return files
+app.include_router(_create_security_router(
+    require_admin=require_admin,
+    get_history=get_history,
+    get_audit_events=_security_audit_events_safe,
+    classify_sensitive_message=classify_sensitive_message,
+    build_sensitivity_report=build_sensitivity_report,
+    list_uploaded_files=_security_list_uploaded_files,
+    append_audit_event=append_audit_event,
+))
+def ui_file_response(path: Path) -> FileResponse:
+    response = FileResponse(path)
+    response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
+    response.headers["Pragma"] = "no-cache"
+    response.headers["Expires"] = "0"
+    return response
 @app.get("/")
 async def root(request: Request, code: Optional[str] = None, authorized: Optional[str] = Cookie(None)):
     """로그인/회원가입 페이지. 초대 게이트 활성화 시 코드 검증 후 진입."""
     if not INVITE_GATE_ENABLED:
-        return FileResponse(STATIC_DIR / "account.html")
+        return ui_file_response(STATIC_DIR / "account.html")
     # 1. 이미 쿠키로 인증된 경우
     if authorized == "true":
-        return FileResponse(STATIC_DIR / "account.html")
+        return ui_file_response(STATIC_DIR / "account.html")
     # 2. 초대 코드가 일치하는 경우 (최초 진입)
     if code == INVITE_CODE:
-        response = FileResponse(STATIC_DIR / "account.html")
+        response = ui_file_response(STATIC_DIR / "account.html")
         response.set_cookie(key="authorized", value="true", httponly=True, samesite="lax", max_age=60*60*24*7)
         return response
@@ -1203,7 +1270,7 @@ async def root(request: Request, code: Optional[str] = None, authorized: Optiona
 @app.get("/account")
 async def account_page():
     """Direct login/register page route used by logout and manual navigation."""
-    return FileResponse(STATIC_DIR / "account.html")
+    return ui_file_response(STATIC_DIR / "account.html")
 @app.get("/manifest.json")
@@ -1226,7 +1293,7 @@ async def service_worker():
 @app.get("/chat")
 async def chat_page(request: Request):
-    return FileResponse(STATIC_DIR / "chat.html")
+    return ui_file_response(STATIC_DIR / "chat.html")
 @app.get("/admin")
@@ -1959,15 +2026,11 @@ def get_lmstudio_models(*, force: bool = False) -> List[Dict[str, object]]:
     global _LMSTUDIO_MODELS_CACHE, _LMSTUDIO_MODELS_CACHE_TS
     if not force and time.monotonic() - _LMSTUDIO_MODELS_CACHE_TS < _LMSTUDIO_MODELS_CACHE_TTL:
         return _LMSTUDIO_MODELS_CACHE
-    try:
-        ensure_lmstudio_server()
-    except HTTPException:
-        return _LMSTUDIO_MODELS_CACHE
     try:
         payload = _json_request(
             f"{lmstudio_native_api_base()}/api/v1/models",
             headers={"Authorization": f"Bearer {os.getenv('LMSTUDIO_API_KEY') or 'lmstudio'}"},
-            timeout=5,
+            timeout=2.5,
         )
     except Exception:
         return _LMSTUDIO_MODELS_CACHE
@@ -2935,6 +2998,82 @@ def ensure_engine_ready(engine: str) -> Dict[str, object]:
     return {"engine": engine, "installed": True, "installed_now": True, "install": result}
+def build_model_resolution(
+    input_id: str,
+    engine: Optional[str],
+    *,
+    user_email: Optional[str] = None,
+    display_name: Optional[str] = None,
+) -> _ModelResolution:
+    """피드백 #1/#2 공용 ModelResolution 생성기.
+    사용자가 클릭한 input_id + engine 힌트를 받아 모든 단계가 공유할
+    canonical identity를 만든다.
+    """
+    normalized = normalize_local_model_request(input_id, engine)
+    return _ModelResolution.from_request(
+        normalized,
+        engine=engine,
+        user_email=user_email,
+        display_name=display_name or input_id,
+        engine_aliases=MODEL_ENGINE_ALIASES,
+    )
+_LOCAL_SMOKE_ENGINES = {"local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"}
+async def _smoke_test_loaded_model(
+    resolution: _ModelResolution,
+    *,
+    api_key_override: Optional[str] = None,
+) -> Dict[str, object]:
+    """로드 직후 짧은 채팅 테스트를 돌려 ready_to_chat 여부를 판정한다.
+    Cloud(OpenAI/Anthropic/OpenRouter 등) 모델은 사용자 비용 발생 가능성 때문에 skip.
+    실패해도 예외를 던지지 않는다. 결과는 compat_cache에도 기록된다.
+    """
+    if (resolution.engine or "").lower() not in _LOCAL_SMOKE_ENGINES:
+        profile = _ensure_compat_profile(resolution.load_id, resolution.engine)
+        return {
+            "ok": True,
+            "reason": "skipped (cloud model — smoke test would incur cost)",
+            "answer": None,
+            "profile": profile.to_dict(),
+            "skipped": True,
+        }
+    try:
+        text = await asyncio.wait_for(
+            router.generate(
+                _SMOKE_PROMPT,
+                context=None,
+                max_tokens=128,
+                temperature=0.1,
+            ),
+            timeout=30,
+        )
+    except Exception as exc:  # pragma: no cover - generator may not exist on all engines
+        reason = str(exc)[:200] or "generation_failed"
+        profile = _record_smoke_result(resolution.load_id, resolution.engine, False, reason)
+        return {
+            "ok": False,
+            "reason": reason,
+            "answer": None,
+            "profile": profile.to_dict(),
+        }
+    profile = _ensure_compat_profile(resolution.load_id, resolution.engine)
+    cleaned = _compat_fast_postprocess(str(text or ""), profile.to_dict())
+    ok, reason = _validate_smoke_response(cleaned)
+    profile = _record_smoke_result(resolution.load_id, resolution.engine, ok, reason)
+    return {
+        "ok": ok,
+        "reason": reason,
+        "answer": cleaned,
+        "profile": profile.to_dict(),
+    }
 async def prepare_and_load_model(
     model_id: str,
     request: Request,
@@ -2947,6 +3086,14 @@ async def prepare_and_load_model(
     if not model_id:
         raise HTTPException(status_code=400, detail="모델 식별자가 비어 있습니다.")
+    # 피드백 #1: ModelResolution을 모든 단계가 공유한다.
+    resolution = _ModelResolution.from_request(
+        model_id,
+        engine=engine,
+        user_email=user_email or get_current_user(request),
+        engine_aliases=MODEL_ENGINE_ALIASES,
+    )
     parsed_provider, parsed_model = parse_model_ref(model_id)
     if parsed_provider == "mlx":
         parsed_provider = "local_mlx"
@@ -3004,6 +3151,18 @@ async def prepare_and_load_model(
         api_key_override=user_api_key,
         owner=effective_email or None,
     )
+    # 피드백 #1/#2: 로드 직후 ModelResolution을 실제 current로 동기화하고 smoke test 수행.
+    resolution.update_after_load(actual_current=router.current_model_id)
+    smoke_result: Dict[str, object] = {}
+    ready_to_chat = True
+    compat_status = "ok"
+    try:
+        smoke_result = await _smoke_test_loaded_model(resolution, api_key_override=user_api_key)
+        ready_to_chat = bool(smoke_result.get("ok"))
+        compat_status = "ok" if ready_to_chat else "degraded"
+    except Exception as exc:  # never break load on smoke test failures
+        logging.warning("smoke test failed for %s: %s", resolution.load_id, exc)
+        compat_status = "unknown"
     return {
         "status": "ok",
         "message": msg,
@@ -3012,6 +3171,12 @@ async def prepare_and_load_model(
         "engine": parsed_provider,
         "installed_now": bool(install_result.get("installed_now")),
         "download": download_result,
+        "resolution": resolution.to_dict(),
+        "downloaded": True,
+        "loaded": True,
+        "ready_to_chat": ready_to_chat,
+        "compatibility_status": compat_status,
+        "smoke_test": smoke_result,
     }
@@ -3217,6 +3382,30 @@ async def prepare_and_load_model_stream(
         api_key_override=user_api_key,
         owner=effective_email or None,
     )
+    # 피드백 #1/#2: SSE에도 ModelResolution과 smoke test 결과를 같이 내려준다.
+    resolution_stream = _ModelResolution.from_request(
+        prepared_model_id,
+        engine=prepared_provider,
+        user_email=effective_email or None,
+        engine_aliases=MODEL_ENGINE_ALIASES,
+    )
+    resolution_stream.update_after_load(actual_current=router.current_model_id)
+    yield sse_event("progress", model_download_progress_payload(
+        "smoke_test",
+        "채팅 호환성 테스트 중입니다.",
+        percent=98,
+        indeterminate=True,
+    ))
+    smoke_result: Dict[str, object] = {}
+    ready_to_chat = True
+    compat_status = "ok"
+    try:
+        smoke_result = await _smoke_test_loaded_model(resolution_stream, api_key_override=user_api_key)
+        ready_to_chat = bool(smoke_result.get("ok"))
+        compat_status = "ok" if ready_to_chat else "degraded"
+    except Exception as exc:
+        logging.warning("smoke test (stream) failed for %s: %s", resolution_stream.load_id, exc)
+        compat_status = "unknown"
     result = {
         "status": "ok",
         "message": msg,
@@ -3225,6 +3414,12 @@ async def prepare_and_load_model_stream(
         "engine": prepared_provider,
         "installed_now": bool(isinstance(install_result, dict) and install_result.get("installed_now")),
         "download": download_result,
+        "resolution": resolution_stream.to_dict(),
+        "downloaded": True,
+        "loaded": True,
+        "ready_to_chat": ready_to_chat,
+        "compatibility_status": compat_status,
+        "smoke_test": smoke_result,
     }
     yield sse_event("progress", model_download_progress_payload(
         "done",
@@ -3296,7 +3491,7 @@ async def verify_cloud_models(force: bool = False, provider_filter: Optional[str
 @app.get("/health")
 async def health(request: Request):
-    base = {"status": "ok", "version": "0.2.2", "mode": APP_MODE}
+    base = {"status": "ok", "version": "0.3.0", "mode": APP_MODE}
     if not get_current_user(request) and REQUIRE_AUTH:
         return base
     engines = await asyncio.to_thread(engine_status)
@@ -3451,22 +3646,69 @@ async def set_api_key(req: SetApiKeyRequest, request: Request):
     return {"ok": True, "provider": req.provider, "user_email": target_email, "scope": "user"}
+def _recommended_with_engine_options(items: List[Dict[str, object]]) -> List[Dict[str, object]]:
+    """피드백 #1: 추천 모델에 엔진별 선택지(engine_options)를 붙여 내려준다.
+    프론트에서 추천 카드를 누르는 순간 어느 엔진/실제 모델로 다운로드/로드할지가
+    이미 확정되도록 한다.
+    """
+    out: List[Dict[str, object]] = []
+    for item in items:
+        base = {
+            "id": item["id"],
+            "name": item["name"],
+            "tag": item["tag"],
+            "size": item["size"],
+            "display_name": item.get("name") or item.get("id"),
+        }
+        short_id = str(item["id"]).lower()
+        aliases = MODEL_ENGINE_ALIASES.get(short_id) or {}
+        options: List[Dict[str, str]] = []
+        for engine_name in ("local_mlx", "ollama", "lmstudio", "llamacpp", "vllm"):
+            real = aliases.get(engine_name)
+            if not real:
+                continue
+            options.append({
+                "engine": engine_name,
+                "model_id": real,
+                "load_id": real if engine_name == "local_mlx" else f"{engine_name}:{real}",
+            })
+        # 어느 엔진도 alias가 없으면 local_mlx 카탈로그 자체를 사용한다.
+        if not options:
+            options.append({
+                "engine": "local_mlx",
+                "model_id": item["id"],
+                "load_id": item["id"],
+            })
+        base["engine_options"] = options
+        base["recommended_engine"] = options[0]["engine"]
+        out.append(base)
+    return out
 @app.get("/models")
 async def list_models():
     """HuggingFace 추천 모델 목록 및 로드 상태 반환"""
-    recommended = [
-        {"id": item["id"], "name": item["name"], "tag": item["tag"], "size": item["size"]}
-        for item in filter_lower_family_versions(ENGINE_MODEL_CATALOG.get("local_mlx", []))
-    ]
+    recommended = _recommended_with_engine_options(
+        list(filter_lower_family_versions(ENGINE_MODEL_CATALOG.get("local_mlx", [])))
+    )
     return {
         "recommended": recommended,
         "cloud": router.detected_cloud_models(),
         "engines": await asyncio.to_thread(engine_status),
         "loaded": router.loaded_model_ids,
         "current": router.current_model_id,
+        "compat_profiles": _list_compat_profiles(),
     }
+@app.get("/models/compat-profiles")
+async def list_model_compat_profiles(request: Request):
+    """피드백 #3: Model Compatibility Layer 캐시 상태를 조회한다."""
+    require_user(request)
+    return {"profiles": _list_compat_profiles()}
 # ── Model Management ───────────────────────────────────────────────────────────
 @app.post("/models/load")
@@ -3636,12 +3878,24 @@ async def chat(req: ChatRequest, request: Request):
     except Exception as e:
         logging.warning("Knowledge reinforcement skipped: %s", e)
+    is_doc_gen = detect_document_intent(req.message)
+    doc_gen_context_result = None
     try:
         if ENABLE_GRAPH and KNOWLEDGE_GRAPH:
-            graph_context = KNOWLEDGE_GRAPH.context_for_query(req.message)
-            if graph_context:
-                context += f"\n\n[KNOWLEDGE GRAPH]\n{graph_context}"
-                print("🕸️ Context reinforced with knowledge graph.")
+            if is_doc_gen:
+                doc_gen_context_result = retrieve_context_for_generation(
+                    KNOWLEDGE_GRAPH, req.message, max_results=10, max_hops=2,
+                )
+                graph_md = doc_gen_context_result.get("context_markdown", "")
+                if graph_md:
+                    context += f"\n\n[KNOWLEDGE GRAPH — Document Generation Context]\n{graph_md}"
+                    print("📝 Document generation context retrieved from knowledge graph.")
+            else:
+                graph_context = KNOWLEDGE_GRAPH.context_for_query(req.message)
+                if graph_context:
+                    context += f"\n\n[KNOWLEDGE GRAPH]\n{graph_context}"
+                    print("🕸️ Context reinforced with knowledge graph.")
     except Exception as e:
         logging.warning("Knowledge graph reinforcement skipped: %s", e)
@@ -3651,7 +3905,6 @@ async def chat(req: ChatRequest, request: Request):
             context += f"\n\n{screenshot_context}"
     if env_bool("LATTICEAI_AUTO_READ_CHAT_PATHS", default=False):
-        # Off by default: automatic local-file injection can leak files to cloud models.
         _file_path_re = re.compile(r'(?:^|[\s\'\"(])((~|/[\w.])[^\s\'")\]]*)', re.MULTILINE)
         for _m in _file_path_re.finditer(req.message or ""):
             _fpath = _m.group(1).strip()
@@ -3669,6 +3922,55 @@ async def chat(req: ChatRequest, request: Request):
     if req.source != "telegram":
         asyncio.create_task(broadcast_web_chat("user", req.message))
+    if is_doc_gen and ENABLE_GRAPH and KNOWLEDGE_GRAPH:
+        conv_key = req.conversation_id or "default"
+        session = _doc_gen_sessions.get(conv_key)
+        if session is None:
+            session = DocumentGenerationSession()
+            _doc_gen_sessions[conv_key] = session
+        graph_md = (doc_gen_context_result or {}).get("context_markdown", "")
+        system_prompt = session.get_system_prompt(graph_md)
+        sources = (doc_gen_context_result or {}).get("sources", [])
+        footnote = format_sources_footnote(sources)
+        if req.stream:
+            async def _stream_doc_gen():
+                collected = []
+                async for chunk in router.stream_generate_document(
+                    req.message, system_prompt,
+                    max_tokens=req.max_tokens or 8192,
+                    temperature=req.temperature or 0.3,
+                ):
+                    collected.append(chunk)
+                    yield f"data: {json.dumps({'text': chunk}, ensure_ascii=False)}\n\n"
+                full_text = "".join(collected)
+                if footnote:
+                    yield f"data: {json.dumps({'text': footnote}, ensure_ascii=False)}\n\n"
+                    full_text += footnote
+                session.update(graph_md, full_text, req.conversation_id)
+                save_to_history("assistant", full_text, source=req.source or "web", conversation_id=req.conversation_id, **history_user)
+                if req.source != "telegram":
+                    asyncio.create_task(broadcast_web_chat("assistant", full_text))
+                yield "data: [DONE]\n\n"
+            return StreamingResponse(
+                _stream_doc_gen(),
+                media_type="text/event-stream",
+                headers={"X-Model": router.current_model_id, "X-Doc-Gen": "true"},
+            )
+        else:
+            result = await router.generate_document(
+                req.message, system_prompt,
+                max_tokens=req.max_tokens or 8192,
+                temperature=req.temperature or 0.3,
+            )
+            if footnote:
+                result += footnote
+            session.update(graph_md, result, req.conversation_id)
+            save_to_history("assistant", str(result), source=req.source or "web", conversation_id=req.conversation_id, **history_user)
+            if req.source != "telegram":
+                asyncio.create_task(broadcast_web_chat("assistant", str(result)))
+            return JSONResponse(content={"response": str(result)})
     if req.stream:
         recent_context = build_recent_chat_context(user_email=effective_email, conversation_id=req.conversation_id)
         stream_context = context

package/static/account.html CHANGED Viewed

@@ -13,7 +13,7 @@
     <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
     <link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap">
     <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@tabler/icons-webfont@latest/tabler-icons.min.css">
-    <link rel="stylesheet" href="/static/lattice-reference.css">
+    <link rel="stylesheet" href="/static/lattice-reference.css?v=0.3.3">
 </head>
 <body class="lattice-ref-auth">
     <div class="orb orb-1"></div>
@@ -103,6 +103,6 @@
         <a href="#" onclick="return false;" id="privacy-link">개인정보 처리방침</a>
     </footer>
-    <script src="/static/scripts/account.js"></script>
+    <script src="/static/scripts/account.js?v=0.3.3"></script>
 </body>
 </html>