ltcai 0.2.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -0
- package/docs/CHANGELOG.md +125 -0
- package/kg_schema.py +64 -15
- package/knowledge_graph.py +299 -2
- package/knowledge_graph_api.py +10 -2
- package/latticeai/api/security_dashboard.py +580 -0
- package/latticeai/core/__init__.py +1 -1
- package/latticeai/core/context_builder.py +191 -0
- package/latticeai/core/document_generator.py +103 -0
- package/latticeai/core/graph_curator.py +417 -0
- package/latticeai/core/model_compat.py +407 -0
- package/latticeai/core/model_resolution.py +227 -0
- package/llm_router.py +147 -0
- package/package.json +1 -1
- package/server.py +324 -22
- package/static/account.html +2 -2
- package/static/admin.html +75 -1
- package/static/chat.html +2 -2
- package/static/css/tokens.css +26 -0
- package/static/graph.html +2 -2
- package/static/lattice-reference.css +372 -414
- package/static/scripts/account.js +10 -2
- package/static/scripts/admin.js +296 -0
- package/static/scripts/chat.js +82 -9
- package/static/scripts/graph.js +6 -2
- package/static/sw.js +1 -1
package/llm_router.py
CHANGED
|
@@ -626,3 +626,150 @@ class LLMRouter:
|
|
|
626
626
|
except Exception as e:
|
|
627
627
|
print(f"⚠️ VLM image decode failed: {e}")
|
|
628
628
|
return None
|
|
629
|
+
|
|
630
|
+
# ── Document Generation Pipeline ──────────────────────────────────────
|
|
631
|
+
|
|
632
|
+
async def generate_document(
|
|
633
|
+
self,
|
|
634
|
+
message: str,
|
|
635
|
+
system_prompt: str,
|
|
636
|
+
*,
|
|
637
|
+
max_tokens: int = 8192,
|
|
638
|
+
temperature: float = 0.3,
|
|
639
|
+
) -> str:
|
|
640
|
+
"""Generate a document using a specialized system prompt with graph context."""
|
|
641
|
+
if not self._current:
|
|
642
|
+
return "No model loaded."
|
|
643
|
+
self._touch()
|
|
644
|
+
cached = self._cache[self._current]
|
|
645
|
+
|
|
646
|
+
if isinstance(cached, CloudModel):
|
|
647
|
+
return await self._cloud_generate_document(cached, message, system_prompt, max_tokens, temperature)
|
|
648
|
+
|
|
649
|
+
model, tokenizer, draft_model = cached
|
|
650
|
+
if hasattr(tokenizer, "apply_chat_template"):
|
|
651
|
+
try:
|
|
652
|
+
msgs = [
|
|
653
|
+
{"role": "system", "content": system_prompt},
|
|
654
|
+
{"role": "user", "content": message},
|
|
655
|
+
]
|
|
656
|
+
prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
|
|
657
|
+
except Exception:
|
|
658
|
+
prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
|
659
|
+
else:
|
|
660
|
+
prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
|
661
|
+
|
|
662
|
+
loop = asyncio.get_event_loop()
|
|
663
|
+
def _gen():
|
|
664
|
+
import mlx.core as mx
|
|
665
|
+
mx.set_default_device(mx.gpu)
|
|
666
|
+
is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
|
|
667
|
+
if is_gemma4 and VLM_AVAILABLE:
|
|
668
|
+
from mlx_vlm import generate as vlm_gen
|
|
669
|
+
return vlm_gen(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
|
|
670
|
+
else:
|
|
671
|
+
from mlx_lm import generate as lm_gen
|
|
672
|
+
return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
|
|
673
|
+
result = await loop.run_in_executor(executor, _gen)
|
|
674
|
+
if hasattr(result, "text"):
|
|
675
|
+
return normalize_branding(result.text)
|
|
676
|
+
return normalize_branding(str(result))
|
|
677
|
+
|
|
678
|
+
async def _cloud_generate_document(self, cloud: CloudModel, message: str, system_prompt: str, max_tokens: int, temperature: float) -> str:
|
|
679
|
+
try:
|
|
680
|
+
response = await cloud.client.chat.completions.create(
|
|
681
|
+
model=cloud.model,
|
|
682
|
+
messages=[
|
|
683
|
+
{"role": "system", "content": system_prompt},
|
|
684
|
+
{"role": "user", "content": message},
|
|
685
|
+
],
|
|
686
|
+
max_tokens=max_tokens,
|
|
687
|
+
temperature=temperature,
|
|
688
|
+
)
|
|
689
|
+
except Exception as e:
|
|
690
|
+
raise RuntimeError(self._local_server_error_hint(cloud, e)) from e
|
|
691
|
+
return normalize_branding(response.choices[0].message.content or "")
|
|
692
|
+
|
|
693
|
+
async def stream_generate_document(
|
|
694
|
+
self,
|
|
695
|
+
message: str,
|
|
696
|
+
system_prompt: str,
|
|
697
|
+
*,
|
|
698
|
+
max_tokens: int = 8192,
|
|
699
|
+
temperature: float = 0.3,
|
|
700
|
+
) -> AsyncIterator[str]:
|
|
701
|
+
"""Stream document generation with specialized system prompt."""
|
|
702
|
+
if not self._current:
|
|
703
|
+
yield "No model loaded."
|
|
704
|
+
return
|
|
705
|
+
self._touch()
|
|
706
|
+
cached = self._cache[self._current]
|
|
707
|
+
|
|
708
|
+
if isinstance(cached, CloudModel):
|
|
709
|
+
async for chunk in self._cloud_stream_document(cached, message, system_prompt, max_tokens, temperature):
|
|
710
|
+
yield chunk
|
|
711
|
+
return
|
|
712
|
+
|
|
713
|
+
model, tokenizer, draft_model = cached
|
|
714
|
+
if hasattr(tokenizer, "apply_chat_template"):
|
|
715
|
+
try:
|
|
716
|
+
msgs = [
|
|
717
|
+
{"role": "system", "content": system_prompt},
|
|
718
|
+
{"role": "user", "content": message},
|
|
719
|
+
]
|
|
720
|
+
prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
|
|
721
|
+
except Exception:
|
|
722
|
+
prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
|
723
|
+
else:
|
|
724
|
+
prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
|
725
|
+
|
|
726
|
+
loop = asyncio.get_event_loop()
|
|
727
|
+
queue = asyncio.Queue()
|
|
728
|
+
|
|
729
|
+
def _stream():
|
|
730
|
+
import mlx.core as mx
|
|
731
|
+
mx.set_default_device(mx.gpu)
|
|
732
|
+
try:
|
|
733
|
+
is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
|
|
734
|
+
if is_gemma4 and VLM_AVAILABLE:
|
|
735
|
+
from mlx_vlm import stream_generate as vlm_stream
|
|
736
|
+
gen = vlm_stream(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
|
|
737
|
+
else:
|
|
738
|
+
from mlx_lm import stream_generate as lm_stream
|
|
739
|
+
gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
|
|
740
|
+
for chunk in gen:
|
|
741
|
+
text = chunk.text if hasattr(chunk, "text") else (chunk[0] if isinstance(chunk, tuple) else str(chunk))
|
|
742
|
+
loop.call_soon_threadsafe(queue.put_nowait, text)
|
|
743
|
+
except Exception as e:
|
|
744
|
+
loop.call_soon_threadsafe(queue.put_nowait, f"⚠️ Error: {e}")
|
|
745
|
+
finally:
|
|
746
|
+
loop.call_soon_threadsafe(queue.put_nowait, None)
|
|
747
|
+
|
|
748
|
+
loop.run_in_executor(executor, _stream)
|
|
749
|
+
while True:
|
|
750
|
+
chunk = await queue.get()
|
|
751
|
+
if chunk is None:
|
|
752
|
+
break
|
|
753
|
+
yield normalize_branding(chunk)
|
|
754
|
+
|
|
755
|
+
async def _cloud_stream_document(self, cloud: CloudModel, message: str, system_prompt: str, max_tokens: int, temperature: float) -> AsyncIterator[str]:
|
|
756
|
+
try:
|
|
757
|
+
stream = await cloud.client.chat.completions.create(
|
|
758
|
+
model=cloud.model,
|
|
759
|
+
messages=[
|
|
760
|
+
{"role": "system", "content": system_prompt},
|
|
761
|
+
{"role": "user", "content": message},
|
|
762
|
+
],
|
|
763
|
+
max_tokens=max_tokens,
|
|
764
|
+
temperature=temperature,
|
|
765
|
+
stream=True,
|
|
766
|
+
)
|
|
767
|
+
except Exception as e:
|
|
768
|
+
yield f"⚠️ {self._local_server_error_hint(cloud, e)}"
|
|
769
|
+
return
|
|
770
|
+
async for event in stream:
|
|
771
|
+
if not event.choices:
|
|
772
|
+
continue
|
|
773
|
+
delta = event.choices[0].delta.content
|
|
774
|
+
if delta:
|
|
775
|
+
yield normalize_branding(delta)
|
package/package.json
CHANGED
package/server.py
CHANGED
|
@@ -46,8 +46,10 @@ from pydantic import BaseModel
|
|
|
46
46
|
from PIL import Image
|
|
47
47
|
|
|
48
48
|
from llm_router import AsyncOpenAI, LLMRouter, OPENAI_COMPATIBLE_PROVIDERS, HF_MODELS_ROOT, ensure_mlx_runtime, hf_model_dir, parse_model_ref, mx, normalize_branding
|
|
49
|
-
from knowledge_graph import KnowledgeGraphStore
|
|
49
|
+
from knowledge_graph import KnowledgeGraphStore, set_llm_router
|
|
50
50
|
from knowledge_graph_api import create_knowledge_graph_router
|
|
51
|
+
from latticeai.core.context_builder import retrieve_context_for_generation, format_sources_footnote
|
|
52
|
+
from latticeai.core.document_generator import detect_document_intent, DocumentGenerationSession
|
|
51
53
|
from local_knowledge_api import LocalKnowledgeWatcher, create_local_knowledge_router
|
|
52
54
|
from latticeai.core.security import (
|
|
53
55
|
hash_password as _hash_password,
|
|
@@ -70,6 +72,24 @@ from latticeai.core.audit import (
|
|
|
70
72
|
)
|
|
71
73
|
from latticeai.api.auth import create_auth_router
|
|
72
74
|
from latticeai.api.admin import create_admin_router
|
|
75
|
+
from latticeai.api.security_dashboard import create_security_router as _create_security_router
|
|
76
|
+
from latticeai.core.model_compat import (
|
|
77
|
+
ensure_profile as _ensure_compat_profile,
|
|
78
|
+
record_smoke_result as _record_smoke_result,
|
|
79
|
+
fast_postprocess as _compat_fast_postprocess,
|
|
80
|
+
validate_smoke_response as _validate_smoke_response,
|
|
81
|
+
list_cached_profiles as _list_compat_profiles,
|
|
82
|
+
SMOKE_PROMPT as _SMOKE_PROMPT,
|
|
83
|
+
)
|
|
84
|
+
from latticeai.core.model_resolution import (
|
|
85
|
+
ModelResolution as _ModelResolution,
|
|
86
|
+
PrepareState as _PrepareState,
|
|
87
|
+
PrepareReport as _PrepareReport,
|
|
88
|
+
)
|
|
89
|
+
from latticeai.core.graph_curator import (
|
|
90
|
+
auto_build_graph_overlay as _auto_build_graph_overlay,
|
|
91
|
+
mask_secrets as _curator_mask_secrets,
|
|
92
|
+
)
|
|
73
93
|
import mcp_registry
|
|
74
94
|
from mcp_registry import (
|
|
75
95
|
MCP_REGISTRY, _THIRD_PARTY_SKILL_SOURCES, _KNOWN_REPO_LICENSES,
|
|
@@ -1001,7 +1021,9 @@ def build_admin_audit_report(users: Dict) -> Dict:
|
|
|
1001
1021
|
)
|
|
1002
1022
|
|
|
1003
1023
|
router = LLMRouter()
|
|
1024
|
+
set_llm_router(router)
|
|
1004
1025
|
gardener = PReinforceGardener()
|
|
1026
|
+
_doc_gen_sessions: dict = {} # conversation_id → DocumentGenerationSession
|
|
1005
1027
|
|
|
1006
1028
|
async def autoload_default_model() -> None:
|
|
1007
1029
|
if not AUTOLOAD_MODELS:
|
|
@@ -1103,7 +1125,7 @@ async def lifespan(app: FastAPI):
|
|
|
1103
1125
|
except Exception:
|
|
1104
1126
|
pass
|
|
1105
1127
|
|
|
1106
|
-
app = FastAPI(title=f"Lattice AI Server ({APP_MODE})", version="0.
|
|
1128
|
+
app = FastAPI(title=f"Lattice AI Server ({APP_MODE})", version="0.3.0", lifespan=lifespan)
|
|
1107
1129
|
|
|
1108
1130
|
CORS_ALLOWED_ORIGINS = [
|
|
1109
1131
|
f"http://localhost:{DEFAULT_PORT}",
|
|
@@ -1171,19 +1193,64 @@ app.include_router(create_admin_router(
|
|
|
1171
1193
|
default_port=DEFAULT_PORT,
|
|
1172
1194
|
))
|
|
1173
1195
|
|
|
1196
|
+
# ── Security & Audit Command Center (피드백 #5) ──────────────────────────────
|
|
1197
|
+
def _security_audit_events_safe() -> List[Dict]:
|
|
1198
|
+
try:
|
|
1199
|
+
return _get_audit_log(AUDIT_FILE)
|
|
1200
|
+
except Exception as e:
|
|
1201
|
+
logging.warning("security audit events load failed: %s", e)
|
|
1202
|
+
return []
|
|
1203
|
+
|
|
1204
|
+
def _security_list_uploaded_files() -> List[Dict]:
|
|
1205
|
+
"""Audit log에서 document_upload 이벤트를 가공해서 file 목록으로 노출."""
|
|
1206
|
+
files: List[Dict] = []
|
|
1207
|
+
for idx, e in enumerate(_security_audit_events_safe()):
|
|
1208
|
+
if e.get("event_type") != "document_upload":
|
|
1209
|
+
continue
|
|
1210
|
+
files.append({
|
|
1211
|
+
"file_id": str(e.get("filename") or idx),
|
|
1212
|
+
"filename": e.get("filename"),
|
|
1213
|
+
"user_email": e.get("user_email"),
|
|
1214
|
+
"user_nickname": e.get("user_nickname"),
|
|
1215
|
+
"uploaded_at": e.get("timestamp"),
|
|
1216
|
+
"ext": e.get("ext"),
|
|
1217
|
+
"bytes": e.get("bytes"),
|
|
1218
|
+
"sensitivity": e.get("sensitivity") or "none",
|
|
1219
|
+
"sensitive_labels": e.get("sensitive_labels") or [],
|
|
1220
|
+
"content_preview": e.get("content_preview"),
|
|
1221
|
+
})
|
|
1222
|
+
return files
|
|
1223
|
+
|
|
1224
|
+
app.include_router(_create_security_router(
|
|
1225
|
+
require_admin=require_admin,
|
|
1226
|
+
get_history=get_history,
|
|
1227
|
+
get_audit_events=_security_audit_events_safe,
|
|
1228
|
+
classify_sensitive_message=classify_sensitive_message,
|
|
1229
|
+
build_sensitivity_report=build_sensitivity_report,
|
|
1230
|
+
list_uploaded_files=_security_list_uploaded_files,
|
|
1231
|
+
append_audit_event=append_audit_event,
|
|
1232
|
+
))
|
|
1233
|
+
|
|
1234
|
+
def ui_file_response(path: Path) -> FileResponse:
|
|
1235
|
+
response = FileResponse(path)
|
|
1236
|
+
response.headers["Cache-Control"] = "no-cache, no-store, must-revalidate"
|
|
1237
|
+
response.headers["Pragma"] = "no-cache"
|
|
1238
|
+
response.headers["Expires"] = "0"
|
|
1239
|
+
return response
|
|
1240
|
+
|
|
1174
1241
|
@app.get("/")
|
|
1175
1242
|
async def root(request: Request, code: Optional[str] = None, authorized: Optional[str] = Cookie(None)):
|
|
1176
1243
|
"""로그인/회원가입 페이지. 초대 게이트 활성화 시 코드 검증 후 진입."""
|
|
1177
1244
|
if not INVITE_GATE_ENABLED:
|
|
1178
|
-
return
|
|
1245
|
+
return ui_file_response(STATIC_DIR / "account.html")
|
|
1179
1246
|
|
|
1180
1247
|
# 1. 이미 쿠키로 인증된 경우
|
|
1181
1248
|
if authorized == "true":
|
|
1182
|
-
return
|
|
1249
|
+
return ui_file_response(STATIC_DIR / "account.html")
|
|
1183
1250
|
|
|
1184
1251
|
# 2. 초대 코드가 일치하는 경우 (최초 진입)
|
|
1185
1252
|
if code == INVITE_CODE:
|
|
1186
|
-
response =
|
|
1253
|
+
response = ui_file_response(STATIC_DIR / "account.html")
|
|
1187
1254
|
response.set_cookie(key="authorized", value="true", httponly=True, samesite="lax", max_age=60*60*24*7)
|
|
1188
1255
|
return response
|
|
1189
1256
|
|
|
@@ -1203,7 +1270,7 @@ async def root(request: Request, code: Optional[str] = None, authorized: Optiona
|
|
|
1203
1270
|
@app.get("/account")
|
|
1204
1271
|
async def account_page():
|
|
1205
1272
|
"""Direct login/register page route used by logout and manual navigation."""
|
|
1206
|
-
return
|
|
1273
|
+
return ui_file_response(STATIC_DIR / "account.html")
|
|
1207
1274
|
|
|
1208
1275
|
|
|
1209
1276
|
@app.get("/manifest.json")
|
|
@@ -1226,7 +1293,7 @@ async def service_worker():
|
|
|
1226
1293
|
|
|
1227
1294
|
@app.get("/chat")
|
|
1228
1295
|
async def chat_page(request: Request):
|
|
1229
|
-
return
|
|
1296
|
+
return ui_file_response(STATIC_DIR / "chat.html")
|
|
1230
1297
|
|
|
1231
1298
|
|
|
1232
1299
|
@app.get("/admin")
|
|
@@ -1959,15 +2026,11 @@ def get_lmstudio_models(*, force: bool = False) -> List[Dict[str, object]]:
|
|
|
1959
2026
|
global _LMSTUDIO_MODELS_CACHE, _LMSTUDIO_MODELS_CACHE_TS
|
|
1960
2027
|
if not force and time.monotonic() - _LMSTUDIO_MODELS_CACHE_TS < _LMSTUDIO_MODELS_CACHE_TTL:
|
|
1961
2028
|
return _LMSTUDIO_MODELS_CACHE
|
|
1962
|
-
try:
|
|
1963
|
-
ensure_lmstudio_server()
|
|
1964
|
-
except HTTPException:
|
|
1965
|
-
return _LMSTUDIO_MODELS_CACHE
|
|
1966
2029
|
try:
|
|
1967
2030
|
payload = _json_request(
|
|
1968
2031
|
f"{lmstudio_native_api_base()}/api/v1/models",
|
|
1969
2032
|
headers={"Authorization": f"Bearer {os.getenv('LMSTUDIO_API_KEY') or 'lmstudio'}"},
|
|
1970
|
-
timeout=5,
|
|
2033
|
+
timeout=2.5,
|
|
1971
2034
|
)
|
|
1972
2035
|
except Exception:
|
|
1973
2036
|
return _LMSTUDIO_MODELS_CACHE
|
|
@@ -2935,6 +2998,82 @@ def ensure_engine_ready(engine: str) -> Dict[str, object]:
|
|
|
2935
2998
|
return {"engine": engine, "installed": True, "installed_now": True, "install": result}
|
|
2936
2999
|
|
|
2937
3000
|
|
|
3001
|
+
def build_model_resolution(
|
|
3002
|
+
input_id: str,
|
|
3003
|
+
engine: Optional[str],
|
|
3004
|
+
*,
|
|
3005
|
+
user_email: Optional[str] = None,
|
|
3006
|
+
display_name: Optional[str] = None,
|
|
3007
|
+
) -> _ModelResolution:
|
|
3008
|
+
"""피드백 #1/#2 공용 ModelResolution 생성기.
|
|
3009
|
+
|
|
3010
|
+
사용자가 클릭한 input_id + engine 힌트를 받아 모든 단계가 공유할
|
|
3011
|
+
canonical identity를 만든다.
|
|
3012
|
+
"""
|
|
3013
|
+
normalized = normalize_local_model_request(input_id, engine)
|
|
3014
|
+
return _ModelResolution.from_request(
|
|
3015
|
+
normalized,
|
|
3016
|
+
engine=engine,
|
|
3017
|
+
user_email=user_email,
|
|
3018
|
+
display_name=display_name or input_id,
|
|
3019
|
+
engine_aliases=MODEL_ENGINE_ALIASES,
|
|
3020
|
+
)
|
|
3021
|
+
|
|
3022
|
+
|
|
3023
|
+
_LOCAL_SMOKE_ENGINES = {"local_mlx", "ollama", "vllm", "lmstudio", "llamacpp"}
|
|
3024
|
+
|
|
3025
|
+
|
|
3026
|
+
async def _smoke_test_loaded_model(
|
|
3027
|
+
resolution: _ModelResolution,
|
|
3028
|
+
*,
|
|
3029
|
+
api_key_override: Optional[str] = None,
|
|
3030
|
+
) -> Dict[str, object]:
|
|
3031
|
+
"""로드 직후 짧은 채팅 테스트를 돌려 ready_to_chat 여부를 판정한다.
|
|
3032
|
+
|
|
3033
|
+
Cloud(OpenAI/Anthropic/OpenRouter 등) 모델은 사용자 비용 발생 가능성 때문에 skip.
|
|
3034
|
+
실패해도 예외를 던지지 않는다. 결과는 compat_cache에도 기록된다.
|
|
3035
|
+
"""
|
|
3036
|
+
if (resolution.engine or "").lower() not in _LOCAL_SMOKE_ENGINES:
|
|
3037
|
+
profile = _ensure_compat_profile(resolution.load_id, resolution.engine)
|
|
3038
|
+
return {
|
|
3039
|
+
"ok": True,
|
|
3040
|
+
"reason": "skipped (cloud model — smoke test would incur cost)",
|
|
3041
|
+
"answer": None,
|
|
3042
|
+
"profile": profile.to_dict(),
|
|
3043
|
+
"skipped": True,
|
|
3044
|
+
}
|
|
3045
|
+
try:
|
|
3046
|
+
text = await asyncio.wait_for(
|
|
3047
|
+
router.generate(
|
|
3048
|
+
_SMOKE_PROMPT,
|
|
3049
|
+
context=None,
|
|
3050
|
+
max_tokens=128,
|
|
3051
|
+
temperature=0.1,
|
|
3052
|
+
),
|
|
3053
|
+
timeout=30,
|
|
3054
|
+
)
|
|
3055
|
+
except Exception as exc: # pragma: no cover - generator may not exist on all engines
|
|
3056
|
+
reason = str(exc)[:200] or "generation_failed"
|
|
3057
|
+
profile = _record_smoke_result(resolution.load_id, resolution.engine, False, reason)
|
|
3058
|
+
return {
|
|
3059
|
+
"ok": False,
|
|
3060
|
+
"reason": reason,
|
|
3061
|
+
"answer": None,
|
|
3062
|
+
"profile": profile.to_dict(),
|
|
3063
|
+
}
|
|
3064
|
+
|
|
3065
|
+
profile = _ensure_compat_profile(resolution.load_id, resolution.engine)
|
|
3066
|
+
cleaned = _compat_fast_postprocess(str(text or ""), profile.to_dict())
|
|
3067
|
+
ok, reason = _validate_smoke_response(cleaned)
|
|
3068
|
+
profile = _record_smoke_result(resolution.load_id, resolution.engine, ok, reason)
|
|
3069
|
+
return {
|
|
3070
|
+
"ok": ok,
|
|
3071
|
+
"reason": reason,
|
|
3072
|
+
"answer": cleaned,
|
|
3073
|
+
"profile": profile.to_dict(),
|
|
3074
|
+
}
|
|
3075
|
+
|
|
3076
|
+
|
|
2938
3077
|
async def prepare_and_load_model(
|
|
2939
3078
|
model_id: str,
|
|
2940
3079
|
request: Request,
|
|
@@ -2947,6 +3086,14 @@ async def prepare_and_load_model(
|
|
|
2947
3086
|
if not model_id:
|
|
2948
3087
|
raise HTTPException(status_code=400, detail="모델 식별자가 비어 있습니다.")
|
|
2949
3088
|
|
|
3089
|
+
# 피드백 #1: ModelResolution을 모든 단계가 공유한다.
|
|
3090
|
+
resolution = _ModelResolution.from_request(
|
|
3091
|
+
model_id,
|
|
3092
|
+
engine=engine,
|
|
3093
|
+
user_email=user_email or get_current_user(request),
|
|
3094
|
+
engine_aliases=MODEL_ENGINE_ALIASES,
|
|
3095
|
+
)
|
|
3096
|
+
|
|
2950
3097
|
parsed_provider, parsed_model = parse_model_ref(model_id)
|
|
2951
3098
|
if parsed_provider == "mlx":
|
|
2952
3099
|
parsed_provider = "local_mlx"
|
|
@@ -3004,6 +3151,18 @@ async def prepare_and_load_model(
|
|
|
3004
3151
|
api_key_override=user_api_key,
|
|
3005
3152
|
owner=effective_email or None,
|
|
3006
3153
|
)
|
|
3154
|
+
# 피드백 #1/#2: 로드 직후 ModelResolution을 실제 current로 동기화하고 smoke test 수행.
|
|
3155
|
+
resolution.update_after_load(actual_current=router.current_model_id)
|
|
3156
|
+
smoke_result: Dict[str, object] = {}
|
|
3157
|
+
ready_to_chat = True
|
|
3158
|
+
compat_status = "ok"
|
|
3159
|
+
try:
|
|
3160
|
+
smoke_result = await _smoke_test_loaded_model(resolution, api_key_override=user_api_key)
|
|
3161
|
+
ready_to_chat = bool(smoke_result.get("ok"))
|
|
3162
|
+
compat_status = "ok" if ready_to_chat else "degraded"
|
|
3163
|
+
except Exception as exc: # never break load on smoke test failures
|
|
3164
|
+
logging.warning("smoke test failed for %s: %s", resolution.load_id, exc)
|
|
3165
|
+
compat_status = "unknown"
|
|
3007
3166
|
return {
|
|
3008
3167
|
"status": "ok",
|
|
3009
3168
|
"message": msg,
|
|
@@ -3012,6 +3171,12 @@ async def prepare_and_load_model(
|
|
|
3012
3171
|
"engine": parsed_provider,
|
|
3013
3172
|
"installed_now": bool(install_result.get("installed_now")),
|
|
3014
3173
|
"download": download_result,
|
|
3174
|
+
"resolution": resolution.to_dict(),
|
|
3175
|
+
"downloaded": True,
|
|
3176
|
+
"loaded": True,
|
|
3177
|
+
"ready_to_chat": ready_to_chat,
|
|
3178
|
+
"compatibility_status": compat_status,
|
|
3179
|
+
"smoke_test": smoke_result,
|
|
3015
3180
|
}
|
|
3016
3181
|
|
|
3017
3182
|
|
|
@@ -3217,6 +3382,30 @@ async def prepare_and_load_model_stream(
|
|
|
3217
3382
|
api_key_override=user_api_key,
|
|
3218
3383
|
owner=effective_email or None,
|
|
3219
3384
|
)
|
|
3385
|
+
# 피드백 #1/#2: SSE에도 ModelResolution과 smoke test 결과를 같이 내려준다.
|
|
3386
|
+
resolution_stream = _ModelResolution.from_request(
|
|
3387
|
+
prepared_model_id,
|
|
3388
|
+
engine=prepared_provider,
|
|
3389
|
+
user_email=effective_email or None,
|
|
3390
|
+
engine_aliases=MODEL_ENGINE_ALIASES,
|
|
3391
|
+
)
|
|
3392
|
+
resolution_stream.update_after_load(actual_current=router.current_model_id)
|
|
3393
|
+
yield sse_event("progress", model_download_progress_payload(
|
|
3394
|
+
"smoke_test",
|
|
3395
|
+
"채팅 호환성 테스트 중입니다.",
|
|
3396
|
+
percent=98,
|
|
3397
|
+
indeterminate=True,
|
|
3398
|
+
))
|
|
3399
|
+
smoke_result: Dict[str, object] = {}
|
|
3400
|
+
ready_to_chat = True
|
|
3401
|
+
compat_status = "ok"
|
|
3402
|
+
try:
|
|
3403
|
+
smoke_result = await _smoke_test_loaded_model(resolution_stream, api_key_override=user_api_key)
|
|
3404
|
+
ready_to_chat = bool(smoke_result.get("ok"))
|
|
3405
|
+
compat_status = "ok" if ready_to_chat else "degraded"
|
|
3406
|
+
except Exception as exc:
|
|
3407
|
+
logging.warning("smoke test (stream) failed for %s: %s", resolution_stream.load_id, exc)
|
|
3408
|
+
compat_status = "unknown"
|
|
3220
3409
|
result = {
|
|
3221
3410
|
"status": "ok",
|
|
3222
3411
|
"message": msg,
|
|
@@ -3225,6 +3414,12 @@ async def prepare_and_load_model_stream(
|
|
|
3225
3414
|
"engine": prepared_provider,
|
|
3226
3415
|
"installed_now": bool(isinstance(install_result, dict) and install_result.get("installed_now")),
|
|
3227
3416
|
"download": download_result,
|
|
3417
|
+
"resolution": resolution_stream.to_dict(),
|
|
3418
|
+
"downloaded": True,
|
|
3419
|
+
"loaded": True,
|
|
3420
|
+
"ready_to_chat": ready_to_chat,
|
|
3421
|
+
"compatibility_status": compat_status,
|
|
3422
|
+
"smoke_test": smoke_result,
|
|
3228
3423
|
}
|
|
3229
3424
|
yield sse_event("progress", model_download_progress_payload(
|
|
3230
3425
|
"done",
|
|
@@ -3296,7 +3491,7 @@ async def verify_cloud_models(force: bool = False, provider_filter: Optional[str
|
|
|
3296
3491
|
|
|
3297
3492
|
@app.get("/health")
|
|
3298
3493
|
async def health(request: Request):
|
|
3299
|
-
base = {"status": "ok", "version": "0.
|
|
3494
|
+
base = {"status": "ok", "version": "0.3.0", "mode": APP_MODE}
|
|
3300
3495
|
if not get_current_user(request) and REQUIRE_AUTH:
|
|
3301
3496
|
return base
|
|
3302
3497
|
engines = await asyncio.to_thread(engine_status)
|
|
@@ -3451,22 +3646,69 @@ async def set_api_key(req: SetApiKeyRequest, request: Request):
|
|
|
3451
3646
|
return {"ok": True, "provider": req.provider, "user_email": target_email, "scope": "user"}
|
|
3452
3647
|
|
|
3453
3648
|
|
|
3649
|
+
def _recommended_with_engine_options(items: List[Dict[str, object]]) -> List[Dict[str, object]]:
|
|
3650
|
+
"""피드백 #1: 추천 모델에 엔진별 선택지(engine_options)를 붙여 내려준다.
|
|
3651
|
+
|
|
3652
|
+
프론트에서 추천 카드를 누르는 순간 어느 엔진/실제 모델로 다운로드/로드할지가
|
|
3653
|
+
이미 확정되도록 한다.
|
|
3654
|
+
"""
|
|
3655
|
+
out: List[Dict[str, object]] = []
|
|
3656
|
+
for item in items:
|
|
3657
|
+
base = {
|
|
3658
|
+
"id": item["id"],
|
|
3659
|
+
"name": item["name"],
|
|
3660
|
+
"tag": item["tag"],
|
|
3661
|
+
"size": item["size"],
|
|
3662
|
+
"display_name": item.get("name") or item.get("id"),
|
|
3663
|
+
}
|
|
3664
|
+
short_id = str(item["id"]).lower()
|
|
3665
|
+
aliases = MODEL_ENGINE_ALIASES.get(short_id) or {}
|
|
3666
|
+
options: List[Dict[str, str]] = []
|
|
3667
|
+
for engine_name in ("local_mlx", "ollama", "lmstudio", "llamacpp", "vllm"):
|
|
3668
|
+
real = aliases.get(engine_name)
|
|
3669
|
+
if not real:
|
|
3670
|
+
continue
|
|
3671
|
+
options.append({
|
|
3672
|
+
"engine": engine_name,
|
|
3673
|
+
"model_id": real,
|
|
3674
|
+
"load_id": real if engine_name == "local_mlx" else f"{engine_name}:{real}",
|
|
3675
|
+
})
|
|
3676
|
+
# 어느 엔진도 alias가 없으면 local_mlx 카탈로그 자체를 사용한다.
|
|
3677
|
+
if not options:
|
|
3678
|
+
options.append({
|
|
3679
|
+
"engine": "local_mlx",
|
|
3680
|
+
"model_id": item["id"],
|
|
3681
|
+
"load_id": item["id"],
|
|
3682
|
+
})
|
|
3683
|
+
base["engine_options"] = options
|
|
3684
|
+
base["recommended_engine"] = options[0]["engine"]
|
|
3685
|
+
out.append(base)
|
|
3686
|
+
return out
|
|
3687
|
+
|
|
3688
|
+
|
|
3454
3689
|
@app.get("/models")
|
|
3455
3690
|
async def list_models():
|
|
3456
3691
|
"""HuggingFace 추천 모델 목록 및 로드 상태 반환"""
|
|
3457
|
-
recommended =
|
|
3458
|
-
|
|
3459
|
-
|
|
3460
|
-
]
|
|
3692
|
+
recommended = _recommended_with_engine_options(
|
|
3693
|
+
list(filter_lower_family_versions(ENGINE_MODEL_CATALOG.get("local_mlx", [])))
|
|
3694
|
+
)
|
|
3461
3695
|
return {
|
|
3462
3696
|
"recommended": recommended,
|
|
3463
3697
|
"cloud": router.detected_cloud_models(),
|
|
3464
3698
|
"engines": await asyncio.to_thread(engine_status),
|
|
3465
3699
|
"loaded": router.loaded_model_ids,
|
|
3466
3700
|
"current": router.current_model_id,
|
|
3701
|
+
"compat_profiles": _list_compat_profiles(),
|
|
3467
3702
|
}
|
|
3468
3703
|
|
|
3469
3704
|
|
|
3705
|
+
@app.get("/models/compat-profiles")
|
|
3706
|
+
async def list_model_compat_profiles(request: Request):
|
|
3707
|
+
"""피드백 #3: Model Compatibility Layer 캐시 상태를 조회한다."""
|
|
3708
|
+
require_user(request)
|
|
3709
|
+
return {"profiles": _list_compat_profiles()}
|
|
3710
|
+
|
|
3711
|
+
|
|
3470
3712
|
# ── Model Management ───────────────────────────────────────────────────────────
|
|
3471
3713
|
|
|
3472
3714
|
@app.post("/models/load")
|
|
@@ -3636,12 +3878,24 @@ async def chat(req: ChatRequest, request: Request):
|
|
|
3636
3878
|
except Exception as e:
|
|
3637
3879
|
logging.warning("Knowledge reinforcement skipped: %s", e)
|
|
3638
3880
|
|
|
3881
|
+
is_doc_gen = detect_document_intent(req.message)
|
|
3882
|
+
doc_gen_context_result = None
|
|
3883
|
+
|
|
3639
3884
|
try:
|
|
3640
3885
|
if ENABLE_GRAPH and KNOWLEDGE_GRAPH:
|
|
3641
|
-
|
|
3642
|
-
|
|
3643
|
-
|
|
3644
|
-
|
|
3886
|
+
if is_doc_gen:
|
|
3887
|
+
doc_gen_context_result = retrieve_context_for_generation(
|
|
3888
|
+
KNOWLEDGE_GRAPH, req.message, max_results=10, max_hops=2,
|
|
3889
|
+
)
|
|
3890
|
+
graph_md = doc_gen_context_result.get("context_markdown", "")
|
|
3891
|
+
if graph_md:
|
|
3892
|
+
context += f"\n\n[KNOWLEDGE GRAPH — Document Generation Context]\n{graph_md}"
|
|
3893
|
+
print("📝 Document generation context retrieved from knowledge graph.")
|
|
3894
|
+
else:
|
|
3895
|
+
graph_context = KNOWLEDGE_GRAPH.context_for_query(req.message)
|
|
3896
|
+
if graph_context:
|
|
3897
|
+
context += f"\n\n[KNOWLEDGE GRAPH]\n{graph_context}"
|
|
3898
|
+
print("🕸️ Context reinforced with knowledge graph.")
|
|
3645
3899
|
except Exception as e:
|
|
3646
3900
|
logging.warning("Knowledge graph reinforcement skipped: %s", e)
|
|
3647
3901
|
|
|
@@ -3651,7 +3905,6 @@ async def chat(req: ChatRequest, request: Request):
|
|
|
3651
3905
|
context += f"\n\n{screenshot_context}"
|
|
3652
3906
|
|
|
3653
3907
|
if env_bool("LATTICEAI_AUTO_READ_CHAT_PATHS", default=False):
|
|
3654
|
-
# Off by default: automatic local-file injection can leak files to cloud models.
|
|
3655
3908
|
_file_path_re = re.compile(r'(?:^|[\s\'\"(])((~|/[\w.])[^\s\'")\]]*)', re.MULTILINE)
|
|
3656
3909
|
for _m in _file_path_re.finditer(req.message or ""):
|
|
3657
3910
|
_fpath = _m.group(1).strip()
|
|
@@ -3669,6 +3922,55 @@ async def chat(req: ChatRequest, request: Request):
|
|
|
3669
3922
|
if req.source != "telegram":
|
|
3670
3923
|
asyncio.create_task(broadcast_web_chat("user", req.message))
|
|
3671
3924
|
|
|
3925
|
+
if is_doc_gen and ENABLE_GRAPH and KNOWLEDGE_GRAPH:
|
|
3926
|
+
conv_key = req.conversation_id or "default"
|
|
3927
|
+
session = _doc_gen_sessions.get(conv_key)
|
|
3928
|
+
if session is None:
|
|
3929
|
+
session = DocumentGenerationSession()
|
|
3930
|
+
_doc_gen_sessions[conv_key] = session
|
|
3931
|
+
graph_md = (doc_gen_context_result or {}).get("context_markdown", "")
|
|
3932
|
+
system_prompt = session.get_system_prompt(graph_md)
|
|
3933
|
+
sources = (doc_gen_context_result or {}).get("sources", [])
|
|
3934
|
+
footnote = format_sources_footnote(sources)
|
|
3935
|
+
|
|
3936
|
+
if req.stream:
|
|
3937
|
+
async def _stream_doc_gen():
|
|
3938
|
+
collected = []
|
|
3939
|
+
async for chunk in router.stream_generate_document(
|
|
3940
|
+
req.message, system_prompt,
|
|
3941
|
+
max_tokens=req.max_tokens or 8192,
|
|
3942
|
+
temperature=req.temperature or 0.3,
|
|
3943
|
+
):
|
|
3944
|
+
collected.append(chunk)
|
|
3945
|
+
yield f"data: {json.dumps({'text': chunk}, ensure_ascii=False)}\n\n"
|
|
3946
|
+
full_text = "".join(collected)
|
|
3947
|
+
if footnote:
|
|
3948
|
+
yield f"data: {json.dumps({'text': footnote}, ensure_ascii=False)}\n\n"
|
|
3949
|
+
full_text += footnote
|
|
3950
|
+
session.update(graph_md, full_text, req.conversation_id)
|
|
3951
|
+
save_to_history("assistant", full_text, source=req.source or "web", conversation_id=req.conversation_id, **history_user)
|
|
3952
|
+
if req.source != "telegram":
|
|
3953
|
+
asyncio.create_task(broadcast_web_chat("assistant", full_text))
|
|
3954
|
+
yield "data: [DONE]\n\n"
|
|
3955
|
+
return StreamingResponse(
|
|
3956
|
+
_stream_doc_gen(),
|
|
3957
|
+
media_type="text/event-stream",
|
|
3958
|
+
headers={"X-Model": router.current_model_id, "X-Doc-Gen": "true"},
|
|
3959
|
+
)
|
|
3960
|
+
else:
|
|
3961
|
+
result = await router.generate_document(
|
|
3962
|
+
req.message, system_prompt,
|
|
3963
|
+
max_tokens=req.max_tokens or 8192,
|
|
3964
|
+
temperature=req.temperature or 0.3,
|
|
3965
|
+
)
|
|
3966
|
+
if footnote:
|
|
3967
|
+
result += footnote
|
|
3968
|
+
session.update(graph_md, result, req.conversation_id)
|
|
3969
|
+
save_to_history("assistant", str(result), source=req.source or "web", conversation_id=req.conversation_id, **history_user)
|
|
3970
|
+
if req.source != "telegram":
|
|
3971
|
+
asyncio.create_task(broadcast_web_chat("assistant", str(result)))
|
|
3972
|
+
return JSONResponse(content={"response": str(result)})
|
|
3973
|
+
|
|
3672
3974
|
if req.stream:
|
|
3673
3975
|
recent_context = build_recent_chat_context(user_email=effective_email, conversation_id=req.conversation_id)
|
|
3674
3976
|
stream_context = context
|
package/static/account.html
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
|
14
14
|
<link rel="stylesheet" href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800&display=swap">
|
|
15
15
|
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@tabler/icons-webfont@latest/tabler-icons.min.css">
|
|
16
|
-
<link rel="stylesheet" href="/static/lattice-reference.css">
|
|
16
|
+
<link rel="stylesheet" href="/static/lattice-reference.css?v=0.3.3">
|
|
17
17
|
</head>
|
|
18
18
|
<body class="lattice-ref-auth">
|
|
19
19
|
<div class="orb orb-1"></div>
|
|
@@ -103,6 +103,6 @@
|
|
|
103
103
|
<a href="#" onclick="return false;" id="privacy-link">개인정보 처리방침</a>
|
|
104
104
|
</footer>
|
|
105
105
|
|
|
106
|
-
<script src="/static/scripts/account.js"></script>
|
|
106
|
+
<script src="/static/scripts/account.js?v=0.3.3"></script>
|
|
107
107
|
</body>
|
|
108
108
|
</html>
|