ltcai 0.2.1 β 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/auto_setup.py +15 -1
- package/docs/CHANGELOG.md +67 -0
- package/kg_schema.py +64 -15
- package/knowledge_graph.py +499 -31
- package/latticeai/core/__init__.py +1 -1
- package/latticeai/core/context_builder.py +191 -0
- package/latticeai/core/document_generator.py +103 -0
- package/llm_router.py +148 -1
- package/package.json +2 -2
- package/server.py +207 -27
- package/static/css/tokens.css +26 -0
- package/static/lattice-reference.css +390 -375
- package/latticeai/__pycache__/__init__.cpython-314.pyc +0 -0
- package/latticeai/api/__pycache__/admin.cpython-314.pyc +0 -0
- package/latticeai/api/__pycache__/auth.cpython-314.pyc +0 -0
- package/latticeai/core/__pycache__/__init__.cpython-314.pyc +0 -0
- package/latticeai/core/__pycache__/audit.cpython-314.pyc +0 -0
- package/latticeai/core/__pycache__/security.cpython-314.pyc +0 -0
- package/latticeai/core/__pycache__/sessions.cpython-314.pyc +0 -0
package/server.py
CHANGED
|
@@ -46,8 +46,10 @@ from pydantic import BaseModel
|
|
|
46
46
|
from PIL import Image
|
|
47
47
|
|
|
48
48
|
from llm_router import AsyncOpenAI, LLMRouter, OPENAI_COMPATIBLE_PROVIDERS, HF_MODELS_ROOT, ensure_mlx_runtime, hf_model_dir, parse_model_ref, mx, normalize_branding
|
|
49
|
-
from knowledge_graph import KnowledgeGraphStore
|
|
49
|
+
from knowledge_graph import KnowledgeGraphStore, set_llm_router
|
|
50
50
|
from knowledge_graph_api import create_knowledge_graph_router
|
|
51
|
+
from latticeai.core.context_builder import retrieve_context_for_generation, format_sources_footnote
|
|
52
|
+
from latticeai.core.document_generator import detect_document_intent, DocumentGenerationSession
|
|
51
53
|
from local_knowledge_api import LocalKnowledgeWatcher, create_local_knowledge_router
|
|
52
54
|
from latticeai.core.security import (
|
|
53
55
|
hash_password as _hash_password,
|
|
@@ -1001,7 +1003,9 @@ def build_admin_audit_report(users: Dict) -> Dict:
|
|
|
1001
1003
|
)
|
|
1002
1004
|
|
|
1003
1005
|
router = LLMRouter()
|
|
1006
|
+
set_llm_router(router)
|
|
1004
1007
|
gardener = PReinforceGardener()
|
|
1008
|
+
_doc_gen_sessions: dict = {} # conversation_id β DocumentGenerationSession
|
|
1005
1009
|
|
|
1006
1010
|
async def autoload_default_model() -> None:
|
|
1007
1011
|
if not AUTOLOAD_MODELS:
|
|
@@ -1103,7 +1107,7 @@ async def lifespan(app: FastAPI):
|
|
|
1103
1107
|
except Exception:
|
|
1104
1108
|
pass
|
|
1105
1109
|
|
|
1106
|
-
app = FastAPI(title=f"Lattice AI Server ({APP_MODE})", version="2.
|
|
1110
|
+
app = FastAPI(title=f"Lattice AI Server ({APP_MODE})", version="0.2.2", lifespan=lifespan)
|
|
1107
1111
|
|
|
1108
1112
|
CORS_ALLOWED_ORIGINS = [
|
|
1109
1113
|
f"http://localhost:{DEFAULT_PORT}",
|
|
@@ -1620,6 +1624,9 @@ ENGINE_MODEL_CATALOG = {
|
|
|
1620
1624
|
{"id": "mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "name": "Qwen3-VL 30B A3B", "family": "Qwen3-VL", "tag": "local-vlm", "size": "18GB", "pullable": True},
|
|
1621
1625
|
{"id": "mlx-community/gemma-3-27b-it-4bit", "name": "Gemma 3 27B", "family": "Gemma 3", "tag": "local-vlm", "size": "17GB", "pullable": True},
|
|
1622
1626
|
{"id": "mlx-community/gemma-4-26b-a4b-it-4bit", "name": "Gemma 4 26B A4B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "15.6GB", "pullable": True},
|
|
1627
|
+
{"id": "mlx-community/gemma-4-31b-it-4bit", "name": "Gemma 4 31B Instruct", "family": "Gemma 4", "tag": "local-vlm", "size": "18.4GB", "pullable": True},
|
|
1628
|
+
{"id": "mlx-community/gpt-oss-20b-MXFP4-Q8", "name": "GPT-OSS 20B", "family": "GPT-OSS", "tag": "local-reasoning", "size": "12.1GB", "pullable": True},
|
|
1629
|
+
{"id": "mlx-community/gpt-oss-120b-MXFP4-Q4", "name": "GPT-OSS 120B", "family": "GPT-OSS", "tag": "local-large", "size": "62.3GB", "pullable": True},
|
|
1623
1630
|
{"id": "mlx-community/Llama-3.3-70B-Instruct-4bit", "name": "Llama 3.3 70B", "family": "Llama 3.x", "tag": "local-general", "size": "40GB+", "pullable": True},
|
|
1624
1631
|
{"id": "mlx-community/Llama-3.1-70B-Instruct-4bit", "name": "Llama 3.1 70B", "family": "Llama 3.1", "tag": "local-general", "size": "40GB+", "pullable": True},
|
|
1625
1632
|
],
|
|
@@ -1627,6 +1634,9 @@ ENGINE_MODEL_CATALOG = {
|
|
|
1627
1634
|
{"id": "ollama:qwen3-vl:4b", "name": "Qwen3-VL 4B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
|
|
1628
1635
|
{"id": "ollama:qwen3-vl:8b", "name": "Qwen3-VL 8B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
|
|
1629
1636
|
{"id": "ollama:qwen3-vl:30b", "name": "Qwen3-VL 30B via Ollama", "family": "Qwen3-VL", "tag": "local-vlm", "size": "pull required", "pullable": True},
|
|
1637
|
+
{"id": "ollama:gpt-oss:20b", "name": "GPT-OSS 20B via Ollama", "family": "GPT-OSS", "tag": "local-reasoning", "size": "pull required", "pullable": True},
|
|
1638
|
+
{"id": "ollama:gpt-oss:120b", "name": "GPT-OSS 120B via Ollama", "family": "GPT-OSS", "tag": "local-large", "size": "pull required", "pullable": True},
|
|
1639
|
+
{"id": "ollama:hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M", "name": "Gemma 4 31B Q4 via Ollama", "family": "Gemma 4", "tag": "local-vlm", "size": "18.7GB", "pullable": True},
|
|
1630
1640
|
{"id": "ollama:qwen3:8b", "name": "Qwen3 8B via Ollama", "family": "Qwen", "tag": "local-server", "size": "pull required", "pullable": True},
|
|
1631
1641
|
{"id": "ollama:qwen2.5-coder:14b", "name": "Qwen2.5 Coder 14B via Ollama", "family": "Qwen", "tag": "local-coding", "size": "pull required", "pullable": True},
|
|
1632
1642
|
{"id": "ollama:gemma3:1b", "name": "Gemma 3 1B via Ollama", "family": "Gemma", "tag": "local-light", "size": "pull required", "pullable": True},
|
|
@@ -1649,6 +1659,8 @@ ENGINE_MODEL_CATALOG = {
|
|
|
1649
1659
|
{"id": "ollama:smollm2:1.7b", "name": "SmolLM2 1.7B via Ollama", "family": "SmolLM", "tag": "local-light", "size": "pull required", "pullable": True},
|
|
1650
1660
|
],
|
|
1651
1661
|
"vllm": [
|
|
1662
|
+
{"id": "vllm:openai/gpt-oss-20b", "name": "GPT-OSS 20B via vLLM", "family": "GPT-OSS", "tag": "local-reasoning", "size": "server model", "pullable": True},
|
|
1663
|
+
{"id": "vllm:openai/gpt-oss-120b", "name": "GPT-OSS 120B via vLLM", "family": "GPT-OSS", "tag": "local-large", "size": "server model", "pullable": True},
|
|
1652
1664
|
{"id": "vllm:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
|
|
1653
1665
|
{"id": "vllm:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
|
|
1654
1666
|
{"id": "vllm:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via vLLM", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
|
|
@@ -1671,6 +1683,9 @@ ENGINE_MODEL_CATALOG = {
|
|
|
1671
1683
|
{"id": "vllm:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via vLLM", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
|
|
1672
1684
|
],
|
|
1673
1685
|
"lmstudio": [
|
|
1686
|
+
{"id": "lmstudio:openai/gpt-oss-20b", "name": "GPT-OSS 20B via LM Studio", "family": "GPT-OSS", "tag": "local-reasoning", "size": "server model", "pullable": True},
|
|
1687
|
+
{"id": "lmstudio:openai/gpt-oss-120b", "name": "GPT-OSS 120B via LM Studio", "family": "GPT-OSS", "tag": "local-large", "size": "server model", "pullable": True},
|
|
1688
|
+
{"id": "lmstudio:ggml-org/gemma-4-31B-it-GGUF", "name": "Gemma 4 31B 4-bit via LM Studio", "family": "Gemma 4", "tag": "local-vlm", "size": "server model", "pullable": True},
|
|
1674
1689
|
{"id": "lmstudio:Qwen/Qwen3-VL-4B-Instruct", "name": "Qwen3-VL 4B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
|
|
1675
1690
|
{"id": "lmstudio:Qwen/Qwen3-VL-8B-Instruct", "name": "Qwen3-VL 8B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
|
|
1676
1691
|
{"id": "lmstudio:Qwen/Qwen3-VL-30B-A3B-Instruct", "name": "Qwen3-VL 30B A3B via LM Studio", "family": "Qwen3-VL", "tag": "local-vlm", "size": "server model", "pullable": True},
|
|
@@ -1691,6 +1706,9 @@ ENGINE_MODEL_CATALOG = {
|
|
|
1691
1706
|
{"id": "lmstudio:meta-llama/Llama-3.1-70B-Instruct", "name": "Llama 3.1 70B via LM Studio", "family": "Llama 3.1", "tag": "local-server", "size": "server model", "pullable": True},
|
|
1692
1707
|
],
|
|
1693
1708
|
"llamacpp": [
|
|
1709
|
+
{"id": "llamacpp:ggml-org/gpt-oss-20b-GGUF", "name": "GPT-OSS 20B GGUF via llama.cpp", "family": "GPT-OSS", "tag": "gguf-q4", "size": "gguf", "pullable": True},
|
|
1710
|
+
{"id": "llamacpp:ggml-org/gpt-oss-120b-GGUF", "name": "GPT-OSS 120B GGUF via llama.cpp", "family": "GPT-OSS", "tag": "gguf-q4", "size": "gguf", "pullable": True},
|
|
1711
|
+
{"id": "llamacpp:ggml-org/gemma-4-31B-it-GGUF", "name": "Gemma 4 31B GGUF via llama.cpp", "family": "Gemma 4", "tag": "gguf-q4", "size": "gguf", "pullable": True},
|
|
1694
1712
|
{"id": "llamacpp:Qwen/Qwen3-VL-4B-Instruct-GGUF", "name": "Qwen3-VL 4B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
|
|
1695
1713
|
{"id": "llamacpp:Qwen/Qwen3-VL-8B-Instruct-GGUF", "name": "Qwen3-VL 8B GGUF via llama.cpp", "family": "Qwen3-VL", "tag": "gguf-vlm", "size": "gguf", "pullable": True},
|
|
1696
1714
|
{"id": "llamacpp:unsloth/gemma-2-2b-it-GGUF", "name": "Gemma 2 2B GGUF via llama.cpp", "family": "Gemma", "tag": "gguf-q4", "size": "gguf", "pullable": True},
|
|
@@ -1706,6 +1724,97 @@ ENGINE_MODEL_CATALOG = {
|
|
|
1706
1724
|
],
|
|
1707
1725
|
}
|
|
1708
1726
|
|
|
1727
|
+
MODEL_ENGINE_ALIASES = {
|
|
1728
|
+
"gpt-oss-20b": {
|
|
1729
|
+
"local_mlx": "mlx-community/gpt-oss-20b-MXFP4-Q8",
|
|
1730
|
+
"ollama": "gpt-oss:20b",
|
|
1731
|
+
"vllm": "openai/gpt-oss-20b",
|
|
1732
|
+
"lmstudio": "openai/gpt-oss-20b",
|
|
1733
|
+
"llamacpp": "ggml-org/gpt-oss-20b-GGUF",
|
|
1734
|
+
},
|
|
1735
|
+
"openai/gpt-oss-20b": {
|
|
1736
|
+
"local_mlx": "mlx-community/gpt-oss-20b-MXFP4-Q8",
|
|
1737
|
+
"ollama": "gpt-oss:20b",
|
|
1738
|
+
"vllm": "openai/gpt-oss-20b",
|
|
1739
|
+
"lmstudio": "openai/gpt-oss-20b",
|
|
1740
|
+
"llamacpp": "ggml-org/gpt-oss-20b-GGUF",
|
|
1741
|
+
},
|
|
1742
|
+
"gpt-oss-120b": {
|
|
1743
|
+
"local_mlx": "mlx-community/gpt-oss-120b-MXFP4-Q4",
|
|
1744
|
+
"ollama": "gpt-oss:120b",
|
|
1745
|
+
"vllm": "openai/gpt-oss-120b",
|
|
1746
|
+
"lmstudio": "openai/gpt-oss-120b",
|
|
1747
|
+
"llamacpp": "ggml-org/gpt-oss-120b-GGUF",
|
|
1748
|
+
},
|
|
1749
|
+
"openai/gpt-oss-120b": {
|
|
1750
|
+
"local_mlx": "mlx-community/gpt-oss-120b-MXFP4-Q4",
|
|
1751
|
+
"ollama": "gpt-oss:120b",
|
|
1752
|
+
"vllm": "openai/gpt-oss-120b",
|
|
1753
|
+
"lmstudio": "openai/gpt-oss-120b",
|
|
1754
|
+
"llamacpp": "ggml-org/gpt-oss-120b-GGUF",
|
|
1755
|
+
},
|
|
1756
|
+
"gemma-4-31b-it-4bit": {
|
|
1757
|
+
"local_mlx": "mlx-community/gemma-4-31b-it-4bit",
|
|
1758
|
+
"ollama": "hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M",
|
|
1759
|
+
"vllm": "suitch/gemma-4-31B-it-4bit",
|
|
1760
|
+
"lmstudio": "ggml-org/gemma-4-31B-it-GGUF",
|
|
1761
|
+
"llamacpp": "ggml-org/gemma-4-31B-it-GGUF",
|
|
1762
|
+
},
|
|
1763
|
+
"suitch/gemma-4-31b-it-4bit": {
|
|
1764
|
+
"local_mlx": "mlx-community/gemma-4-31b-it-4bit",
|
|
1765
|
+
"ollama": "hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M",
|
|
1766
|
+
"vllm": "suitch/gemma-4-31B-it-4bit",
|
|
1767
|
+
"lmstudio": "ggml-org/gemma-4-31B-it-GGUF",
|
|
1768
|
+
"llamacpp": "ggml-org/gemma-4-31B-it-GGUF",
|
|
1769
|
+
},
|
|
1770
|
+
"mlx-community/gemma-4-31b-it-4bit": {
|
|
1771
|
+
"local_mlx": "mlx-community/gemma-4-31b-it-4bit",
|
|
1772
|
+
"ollama": "hf.co/ggml-org/gemma-4-31B-it-GGUF:Q4_K_M",
|
|
1773
|
+
"vllm": "suitch/gemma-4-31B-it-4bit",
|
|
1774
|
+
"lmstudio": "ggml-org/gemma-4-31B-it-GGUF",
|
|
1775
|
+
"llamacpp": "ggml-org/gemma-4-31B-it-GGUF",
|
|
1776
|
+
},
|
|
1777
|
+
}
|
|
1778
|
+
|
|
1779
|
+
_VERSIONED_MODEL_PATTERNS = (
|
|
1780
|
+
("gemma", re.compile(r"\bgemma[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
|
|
1781
|
+
("qwen", re.compile(r"\bqwen[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
|
|
1782
|
+
("llama", re.compile(r"\bllama[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
|
|
1783
|
+
("phi", re.compile(r"\bphi[-\s]?(\d+(?:\.\d+)?)", re.IGNORECASE)),
|
|
1784
|
+
)
|
|
1785
|
+
|
|
1786
|
+
|
|
1787
|
+
def _version_tuple(raw: str) -> tuple[int, ...]:
|
|
1788
|
+
return tuple(int(part) for part in raw.split(".") if part.isdigit())
|
|
1789
|
+
|
|
1790
|
+
|
|
1791
|
+
def _model_family_version(model: Dict[str, object]) -> Optional[tuple[str, tuple[int, ...]]]:
|
|
1792
|
+
text = " ".join(str(model.get(key) or "") for key in ("family", "name", "id"))
|
|
1793
|
+
for family, pattern in _VERSIONED_MODEL_PATTERNS:
|
|
1794
|
+
match = pattern.search(text)
|
|
1795
|
+
if match:
|
|
1796
|
+
version = _version_tuple(match.group(1))
|
|
1797
|
+
if version:
|
|
1798
|
+
return family, version
|
|
1799
|
+
return None
|
|
1800
|
+
|
|
1801
|
+
|
|
1802
|
+
def filter_lower_family_versions(models: List[Dict[str, object]]) -> List[Dict[str, object]]:
|
|
1803
|
+
max_versions: Dict[str, tuple[int, ...]] = {}
|
|
1804
|
+
detected: List[tuple[Dict[str, object], Optional[tuple[str, tuple[int, ...]]]]] = []
|
|
1805
|
+
for model in models:
|
|
1806
|
+
version_info = _model_family_version(model)
|
|
1807
|
+
detected.append((model, version_info))
|
|
1808
|
+
if not version_info:
|
|
1809
|
+
continue
|
|
1810
|
+
family, version = version_info
|
|
1811
|
+
if version > max_versions.get(family, (0,)):
|
|
1812
|
+
max_versions[family] = version
|
|
1813
|
+
return [
|
|
1814
|
+
model for model, version_info in detected
|
|
1815
|
+
if not version_info or version_info[1] >= max_versions.get(version_info[0], version_info[1])
|
|
1816
|
+
]
|
|
1817
|
+
|
|
1709
1818
|
def _update_env_file(env_file: Path, key: str, value: str) -> None:
|
|
1710
1819
|
lines = []
|
|
1711
1820
|
found = False
|
|
@@ -2525,17 +2634,20 @@ def engine_status() -> List[Dict]:
|
|
|
2525
2634
|
for m in ENGINE_MODEL_CATALOG["ollama"]:
|
|
2526
2635
|
pull_name = m["id"].removeprefix("ollama:")
|
|
2527
2636
|
ollama_models.append({**m, "pulled": pull_name in pulled})
|
|
2637
|
+
ollama_models = filter_lower_family_versions(ollama_models)
|
|
2528
2638
|
|
|
2529
2639
|
HF_MODELS_ROOT.mkdir(parents=True, exist_ok=True)
|
|
2530
2640
|
mlx_models = []
|
|
2531
2641
|
for m in ENGINE_MODEL_CATALOG.get("local_mlx", []):
|
|
2532
2642
|
repo_id = m["id"]
|
|
2533
2643
|
mlx_models.append({**m, "pulled": hf_model_ready(repo_id, "local_mlx")})
|
|
2644
|
+
mlx_models = filter_lower_family_versions(mlx_models)
|
|
2534
2645
|
|
|
2535
2646
|
vllm_models = []
|
|
2536
2647
|
for m in ENGINE_MODEL_CATALOG.get("vllm", []):
|
|
2537
2648
|
repo_id = m["id"].removeprefix("vllm:")
|
|
2538
2649
|
vllm_models.append({**m, "pulled": hf_model_ready(repo_id, "vllm")})
|
|
2650
|
+
vllm_models = filter_lower_family_versions(vllm_models)
|
|
2539
2651
|
|
|
2540
2652
|
lmstudio_models = []
|
|
2541
2653
|
downloaded_lmstudio = get_lmstudio_models()
|
|
@@ -2567,11 +2679,13 @@ def engine_status() -> List[Dict]:
|
|
|
2567
2679
|
repo_id = m["id"].removeprefix("lmstudio:")
|
|
2568
2680
|
if f"lmstudio:{repo_id}" not in known_ids and repo_id not in downloaded_by_key:
|
|
2569
2681
|
lmstudio_models.append({**m, "pulled": False})
|
|
2682
|
+
lmstudio_models = filter_lower_family_versions(lmstudio_models)
|
|
2570
2683
|
|
|
2571
2684
|
llamacpp_models = []
|
|
2572
2685
|
for m in ENGINE_MODEL_CATALOG.get("llamacpp", []):
|
|
2573
2686
|
repo_id = m["id"].removeprefix("llamacpp:")
|
|
2574
2687
|
llamacpp_models.append({**m, "pulled": hf_model_ready(repo_id, "llamacpp")})
|
|
2688
|
+
llamacpp_models = filter_lower_family_versions(llamacpp_models)
|
|
2575
2689
|
|
|
2576
2690
|
local_server_specs = [
|
|
2577
2691
|
{
|
|
@@ -2768,8 +2882,29 @@ def install_engine(engine: str) -> Dict:
|
|
|
2768
2882
|
return result
|
|
2769
2883
|
|
|
2770
2884
|
|
|
2885
|
+
def _resolve_model_alias(model_id: str, engine: Optional[str] = None) -> str:
|
|
2886
|
+
raw = model_id.strip()
|
|
2887
|
+
engine_hint = (engine or "").strip().lower()
|
|
2888
|
+
provider: Optional[str] = None
|
|
2889
|
+
model_name = raw
|
|
2890
|
+
if ":" in raw:
|
|
2891
|
+
prefix, rest = raw.split(":", 1)
|
|
2892
|
+
prefix = prefix.strip().lower()
|
|
2893
|
+
if prefix in {"ollama", "vllm", "lmstudio", "llamacpp", "local_mlx", "mlx"}:
|
|
2894
|
+
provider = "local_mlx" if prefix in {"local_mlx", "mlx"} else prefix
|
|
2895
|
+
model_name = rest.strip()
|
|
2896
|
+
provider = provider or ("local_mlx" if engine_hint in {"", "local_mlx", "mlx"} else engine_hint)
|
|
2897
|
+
aliases = MODEL_ENGINE_ALIASES.get(model_name.lower())
|
|
2898
|
+
if not aliases:
|
|
2899
|
+
return raw
|
|
2900
|
+
mapped = aliases.get(provider)
|
|
2901
|
+
if not mapped:
|
|
2902
|
+
return raw
|
|
2903
|
+
return mapped if provider == "local_mlx" else f"{provider}:{mapped}"
|
|
2904
|
+
|
|
2905
|
+
|
|
2771
2906
|
def normalize_local_model_request(model_id: str, engine: Optional[str] = None) -> str:
|
|
2772
|
-
model_id = model_id
|
|
2907
|
+
model_id = _resolve_model_alias(model_id, engine)
|
|
2773
2908
|
engine = (engine or "").strip().lower()
|
|
2774
2909
|
if engine in {"local_mlx", "mlx"} and model_id.startswith(("local_mlx:", "mlx:")):
|
|
2775
2910
|
return model_id.split(":", 1)[1].strip()
|
|
@@ -3165,7 +3300,7 @@ async def verify_cloud_models(force: bool = False, provider_filter: Optional[str
|
|
|
3165
3300
|
|
|
3166
3301
|
@app.get("/health")
|
|
3167
3302
|
async def health(request: Request):
|
|
3168
|
-
base = {"status": "ok", "version": "2.
|
|
3303
|
+
base = {"status": "ok", "version": "0.2.2", "mode": APP_MODE}
|
|
3169
3304
|
if not get_current_user(request) and REQUIRE_AUTH:
|
|
3170
3305
|
return base
|
|
3171
3306
|
engines = await asyncio.to_thread(engine_status)
|
|
@@ -3206,7 +3341,7 @@ async def engines_verify_cloud(req: VerifyCloudRequest, request: Request):
|
|
|
3206
3341
|
@app.post("/engines/pull-model")
|
|
3207
3342
|
async def pull_ollama_model(req: PullModelRequest, request: Request):
|
|
3208
3343
|
require_user(request)
|
|
3209
|
-
model_ref = req.model
|
|
3344
|
+
model_ref = normalize_local_model_request(req.model, None)
|
|
3210
3345
|
if not model_ref:
|
|
3211
3346
|
raise HTTPException(status_code=400, detail="λͺ¨λΈ μλ³μκ° λΉμ΄ μμ΅λλ€.")
|
|
3212
3347
|
|
|
@@ -3324,23 +3459,8 @@ async def set_api_key(req: SetApiKeyRequest, request: Request):
|
|
|
3324
3459
|
async def list_models():
|
|
3325
3460
|
"""HuggingFace μΆμ² λͺ¨λΈ λͺ©λ‘ λ° λ‘λ μν λ°ν"""
|
|
3326
3461
|
recommended = [
|
|
3327
|
-
{"id": "
|
|
3328
|
-
|
|
3329
|
-
{"id": "mlx-community/Qwen3-VL-30B-A3B-Instruct-4bit", "name": "Qwen3-VL 30B A3B","tag": "multimodal", "size": "18GB"},
|
|
3330
|
-
{"id": "mlx-community/SmolLM-1.7B-Instruct-4bit", "name": "SmolLM 1.7B", "tag": "ultra-light", "size": "963MB"},
|
|
3331
|
-
{"id": "mlx-community/gemma-3-1b-it-4bit", "name": "Gemma 3 1B", "tag": "ultra-light", "size": "733MB"},
|
|
3332
|
-
{"id": "mlx-community/Llama-3.2-1B-Instruct-4bit", "name": "Llama 3.2 1B", "tag": "light", "size": "1.3GB"},
|
|
3333
|
-
{"id": "mlx-community/Llama-3.2-3B-Instruct-4bit", "name": "Llama 3.2 3B", "tag": "light", "size": "2.0GB"},
|
|
3334
|
-
{"id": "mlx-community/Phi-4-mini-instruct-4bit", "name": "Phi 4 Mini", "tag": "coding", "size": "2.2GB"},
|
|
3335
|
-
{"id": "mlx-community/Qwen2.5-VL-7B-Instruct-4bit", "name": "Qwen2.5-VL 7B", "tag": "multimodal", "size": "4.4GB"},
|
|
3336
|
-
{"id": "mlx-community/Mistral-7B-Instruct-v0.3-4bit", "name": "Mistral 7B v0.3", "tag": "general", "size": "4.1GB"},
|
|
3337
|
-
{"id": "mlx-community/Llama-3.1-8B-Instruct-4bit", "name": "Llama 3.1 8B", "tag": "general", "size": "4.7GB"},
|
|
3338
|
-
{"id": "mlx-community/gemma-4-e4b-it-4bit", "name": "Gemma 4 E4B", "tag": "multimodal", "size": "5.2GB"},
|
|
3339
|
-
{"id": "mlx-community/gemma-3-12b-it-4bit", "name": "Gemma 3 12B", "tag": "balanced", "size": "8.0GB"},
|
|
3340
|
-
{"id": "mlx-community/phi-4-4bit", "name": "Phi 4", "tag": "coding", "size": "8.3GB"},
|
|
3341
|
-
{"id": "mlx-community/Mistral-Small-24B-Instruct-2501-4bit", "name": "Mistral Small 24B", "tag": "large", "size": "13.3GB"},
|
|
3342
|
-
{"id": "mlx-community/Qwen2.5-Coder-32B-Instruct-4bit", "name": "Qwen2.5 Coder 32B","tag": "coding", "size": "18.5GB"},
|
|
3343
|
-
{"id": "mlx-community/gemma-4-26b-a4b-it-4bit", "name": "Gemma 4 26B A4B", "tag": "multimodal", "size": "15.6GB"},
|
|
3462
|
+
{"id": item["id"], "name": item["name"], "tag": item["tag"], "size": item["size"]}
|
|
3463
|
+
for item in filter_lower_family_versions(ENGINE_MODEL_CATALOG.get("local_mlx", []))
|
|
3344
3464
|
]
|
|
3345
3465
|
return {
|
|
3346
3466
|
"recommended": recommended,
|
|
@@ -3520,12 +3640,24 @@ async def chat(req: ChatRequest, request: Request):
|
|
|
3520
3640
|
except Exception as e:
|
|
3521
3641
|
logging.warning("Knowledge reinforcement skipped: %s", e)
|
|
3522
3642
|
|
|
3643
|
+
is_doc_gen = detect_document_intent(req.message)
|
|
3644
|
+
doc_gen_context_result = None
|
|
3645
|
+
|
|
3523
3646
|
try:
|
|
3524
3647
|
if ENABLE_GRAPH and KNOWLEDGE_GRAPH:
|
|
3525
|
-
|
|
3526
|
-
|
|
3527
|
-
|
|
3528
|
-
|
|
3648
|
+
if is_doc_gen:
|
|
3649
|
+
doc_gen_context_result = retrieve_context_for_generation(
|
|
3650
|
+
KNOWLEDGE_GRAPH, req.message, max_results=10, max_hops=2,
|
|
3651
|
+
)
|
|
3652
|
+
graph_md = doc_gen_context_result.get("context_markdown", "")
|
|
3653
|
+
if graph_md:
|
|
3654
|
+
context += f"\n\n[KNOWLEDGE GRAPH β Document Generation Context]\n{graph_md}"
|
|
3655
|
+
print("π Document generation context retrieved from knowledge graph.")
|
|
3656
|
+
else:
|
|
3657
|
+
graph_context = KNOWLEDGE_GRAPH.context_for_query(req.message)
|
|
3658
|
+
if graph_context:
|
|
3659
|
+
context += f"\n\n[KNOWLEDGE GRAPH]\n{graph_context}"
|
|
3660
|
+
print("πΈοΈ Context reinforced with knowledge graph.")
|
|
3529
3661
|
except Exception as e:
|
|
3530
3662
|
logging.warning("Knowledge graph reinforcement skipped: %s", e)
|
|
3531
3663
|
|
|
@@ -3535,7 +3667,6 @@ async def chat(req: ChatRequest, request: Request):
|
|
|
3535
3667
|
context += f"\n\n{screenshot_context}"
|
|
3536
3668
|
|
|
3537
3669
|
if env_bool("LATTICEAI_AUTO_READ_CHAT_PATHS", default=False):
|
|
3538
|
-
# Off by default: automatic local-file injection can leak files to cloud models.
|
|
3539
3670
|
_file_path_re = re.compile(r'(?:^|[\s\'\"(])((~|/[\w.])[^\s\'")\]]*)', re.MULTILINE)
|
|
3540
3671
|
for _m in _file_path_re.finditer(req.message or ""):
|
|
3541
3672
|
_fpath = _m.group(1).strip()
|
|
@@ -3553,6 +3684,55 @@ async def chat(req: ChatRequest, request: Request):
|
|
|
3553
3684
|
if req.source != "telegram":
|
|
3554
3685
|
asyncio.create_task(broadcast_web_chat("user", req.message))
|
|
3555
3686
|
|
|
3687
|
+
if is_doc_gen and ENABLE_GRAPH and KNOWLEDGE_GRAPH:
|
|
3688
|
+
conv_key = req.conversation_id or "default"
|
|
3689
|
+
session = _doc_gen_sessions.get(conv_key)
|
|
3690
|
+
if session is None:
|
|
3691
|
+
session = DocumentGenerationSession()
|
|
3692
|
+
_doc_gen_sessions[conv_key] = session
|
|
3693
|
+
graph_md = (doc_gen_context_result or {}).get("context_markdown", "")
|
|
3694
|
+
system_prompt = session.get_system_prompt(graph_md)
|
|
3695
|
+
sources = (doc_gen_context_result or {}).get("sources", [])
|
|
3696
|
+
footnote = format_sources_footnote(sources)
|
|
3697
|
+
|
|
3698
|
+
if req.stream:
|
|
3699
|
+
async def _stream_doc_gen():
|
|
3700
|
+
collected = []
|
|
3701
|
+
async for chunk in router.stream_generate_document(
|
|
3702
|
+
req.message, system_prompt,
|
|
3703
|
+
max_tokens=req.max_tokens or 8192,
|
|
3704
|
+
temperature=req.temperature or 0.3,
|
|
3705
|
+
):
|
|
3706
|
+
collected.append(chunk)
|
|
3707
|
+
yield f"data: {json.dumps({'text': chunk}, ensure_ascii=False)}\n\n"
|
|
3708
|
+
full_text = "".join(collected)
|
|
3709
|
+
if footnote:
|
|
3710
|
+
yield f"data: {json.dumps({'text': footnote}, ensure_ascii=False)}\n\n"
|
|
3711
|
+
full_text += footnote
|
|
3712
|
+
session.update(graph_md, full_text, req.conversation_id)
|
|
3713
|
+
save_to_history("assistant", full_text, source=req.source or "web", conversation_id=req.conversation_id, **history_user)
|
|
3714
|
+
if req.source != "telegram":
|
|
3715
|
+
asyncio.create_task(broadcast_web_chat("assistant", full_text))
|
|
3716
|
+
yield "data: [DONE]\n\n"
|
|
3717
|
+
return StreamingResponse(
|
|
3718
|
+
_stream_doc_gen(),
|
|
3719
|
+
media_type="text/event-stream",
|
|
3720
|
+
headers={"X-Model": router.current_model_id, "X-Doc-Gen": "true"},
|
|
3721
|
+
)
|
|
3722
|
+
else:
|
|
3723
|
+
result = await router.generate_document(
|
|
3724
|
+
req.message, system_prompt,
|
|
3725
|
+
max_tokens=req.max_tokens or 8192,
|
|
3726
|
+
temperature=req.temperature or 0.3,
|
|
3727
|
+
)
|
|
3728
|
+
if footnote:
|
|
3729
|
+
result += footnote
|
|
3730
|
+
session.update(graph_md, result, req.conversation_id)
|
|
3731
|
+
save_to_history("assistant", str(result), source=req.source or "web", conversation_id=req.conversation_id, **history_user)
|
|
3732
|
+
if req.source != "telegram":
|
|
3733
|
+
asyncio.create_task(broadcast_web_chat("assistant", str(result)))
|
|
3734
|
+
return JSONResponse(content={"response": str(result)})
|
|
3735
|
+
|
|
3556
3736
|
if req.stream:
|
|
3557
3737
|
recent_context = build_recent_chat_context(user_email=effective_email, conversation_id=req.conversation_id)
|
|
3558
3738
|
stream_context = context
|
package/static/css/tokens.css
CHANGED
|
@@ -159,6 +159,32 @@
|
|
|
159
159
|
--radius-sm: var(--lt-radius-sm);
|
|
160
160
|
}
|
|
161
161
|
|
|
162
|
+
/* ββ Global polish βββββββββββββββββββββββββββββββββββββββββββ */
|
|
163
|
+
::selection {
|
|
164
|
+
background: rgba(110, 74, 230, 0.18);
|
|
165
|
+
color: var(--lt-color-ink-900);
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
:focus-visible {
|
|
169
|
+
outline: 2px solid rgba(110, 74, 230, 0.40);
|
|
170
|
+
outline-offset: 2px;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
::-webkit-scrollbar {
|
|
174
|
+
width: 6px;
|
|
175
|
+
height: 6px;
|
|
176
|
+
}
|
|
177
|
+
::-webkit-scrollbar-track {
|
|
178
|
+
background: transparent;
|
|
179
|
+
}
|
|
180
|
+
::-webkit-scrollbar-thumb {
|
|
181
|
+
background: rgba(110, 74, 230, 0.16);
|
|
182
|
+
border-radius: 99px;
|
|
183
|
+
}
|
|
184
|
+
::-webkit-scrollbar-thumb:hover {
|
|
185
|
+
background: rgba(110, 74, 230, 0.28);
|
|
186
|
+
}
|
|
187
|
+
|
|
162
188
|
/* ββ Reduced motion (a11y) βββββββββββββββββββββββββββββββββββ */
|
|
163
189
|
@media (prefers-reduced-motion: reduce) {
|
|
164
190
|
:root {
|