ltcai 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -2
- package/auto_setup.py +15 -1
- package/docs/CHANGELOG.md +67 -0
- package/kg_schema.py +64 -15
- package/knowledge_graph.py +499 -31
- package/latticeai/core/__init__.py +1 -1
- package/latticeai/core/context_builder.py +191 -0
- package/latticeai/core/document_generator.py +103 -0
- package/llm_router.py +148 -1
- package/package.json +2 -2
- package/server.py +207 -27
- package/static/css/tokens.css +26 -0
- package/static/lattice-reference.css +390 -375
- package/latticeai/__pycache__/__init__.cpython-314.pyc +0 -0
- package/latticeai/api/__pycache__/admin.cpython-314.pyc +0 -0
- package/latticeai/api/__pycache__/auth.cpython-314.pyc +0 -0
- package/latticeai/core/__pycache__/__init__.cpython-314.pyc +0 -0
- package/latticeai/core/__pycache__/audit.cpython-314.pyc +0 -0
- package/latticeai/core/__pycache__/security.cpython-314.pyc +0 -0
- package/latticeai/core/__pycache__/sessions.cpython-314.pyc +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
"""Core utilities: security, sessions, audit."""
|
|
1
|
+
"""Core utilities: security, sessions, audit, context_builder, document_generator."""
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Context Builder — Knowledge Graph 기반 문서 생성용 컨텍스트 조합 모듈.
|
|
3
|
+
|
|
4
|
+
retrieve_context_for_generation() 파이프라인:
|
|
5
|
+
Step 1: Query → Hybrid Search (text + graph + recency)
|
|
6
|
+
Step 2: Seed nodes → Multi-hop traversal (Document → Project → Concept)
|
|
7
|
+
Step 3: Top-K 결과를 구조화된 Markdown Context로 변환
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
import re
|
|
12
|
+
from typing import Any, Dict, List, Optional
|
|
13
|
+
|
|
14
|
+
_CLEAN_RE = re.compile(r"\s+")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def _clean(text: str, max_len: int = 700) -> str:
|
|
18
|
+
return _CLEAN_RE.sub(" ", str(text or "")).strip()[:max_len]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def retrieve_context_for_generation(
|
|
22
|
+
kg_store,
|
|
23
|
+
query: str,
|
|
24
|
+
*,
|
|
25
|
+
max_results: int = 10,
|
|
26
|
+
max_hops: int = 2,
|
|
27
|
+
) -> Dict[str, Any]:
|
|
28
|
+
"""Knowledge Graph에서 문서 생성에 필요한 컨텍스트를 검색·조합한다.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
{
|
|
32
|
+
"query": str,
|
|
33
|
+
"context_markdown": str, # LLM 프롬프트에 직접 주입할 Markdown
|
|
34
|
+
"sources": [...], # 참조된 소스 목록
|
|
35
|
+
"stats": {...}, # 검색 통계
|
|
36
|
+
}
|
|
37
|
+
"""
|
|
38
|
+
query = str(query or "").strip()
|
|
39
|
+
if not query or not kg_store:
|
|
40
|
+
return {"query": query, "context_markdown": "", "sources": [], "stats": {}}
|
|
41
|
+
|
|
42
|
+
results = kg_store.search_for_document_generation(query, limit=max_results)
|
|
43
|
+
if not results:
|
|
44
|
+
fallback_ctx = kg_store.context_for_query(query, limit=max_results)
|
|
45
|
+
return {
|
|
46
|
+
"query": query,
|
|
47
|
+
"context_markdown": fallback_ctx,
|
|
48
|
+
"sources": [],
|
|
49
|
+
"stats": {"method": "fallback", "matches": 0},
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
seed_ids = [r["id"] for r in results]
|
|
53
|
+
hop_data = kg_store.multi_hop_context(seed_ids, max_hops=max_hops)
|
|
54
|
+
|
|
55
|
+
extra_nodes_by_id = {}
|
|
56
|
+
for node in hop_data.get("nodes", []):
|
|
57
|
+
if node["id"] not in {r["id"] for r in results}:
|
|
58
|
+
extra_nodes_by_id[node["id"]] = node
|
|
59
|
+
|
|
60
|
+
sections = _build_context_sections(results, extra_nodes_by_id, hop_data.get("edges", []))
|
|
61
|
+
context_md = _render_markdown(query, sections)
|
|
62
|
+
sources = _extract_sources(results)
|
|
63
|
+
|
|
64
|
+
return {
|
|
65
|
+
"query": query,
|
|
66
|
+
"context_markdown": context_md,
|
|
67
|
+
"sources": sources,
|
|
68
|
+
"stats": {
|
|
69
|
+
"method": "hybrid",
|
|
70
|
+
"primary_matches": len(results),
|
|
71
|
+
"graph_nodes": len(hop_data.get("nodes", [])),
|
|
72
|
+
"graph_edges": len(hop_data.get("edges", [])),
|
|
73
|
+
},
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _build_context_sections(
|
|
78
|
+
primary_results: List[Dict[str, Any]],
|
|
79
|
+
extra_nodes: Dict[str, Dict[str, Any]],
|
|
80
|
+
edges: List[Dict[str, Any]],
|
|
81
|
+
) -> List[Dict[str, Any]]:
|
|
82
|
+
sections = []
|
|
83
|
+
|
|
84
|
+
docs = [r for r in primary_results if r["type"] in (
|
|
85
|
+
"Document", "File", "SlideDeck", "Spreadsheet", "CodeFile", "Image", "ImageText",
|
|
86
|
+
)]
|
|
87
|
+
if docs:
|
|
88
|
+
sections.append({
|
|
89
|
+
"title": "관련 문서/파일",
|
|
90
|
+
"items": docs,
|
|
91
|
+
"icon": "📄",
|
|
92
|
+
})
|
|
93
|
+
|
|
94
|
+
decisions = [r for r in primary_results if r["type"] in ("Decision", "Task")]
|
|
95
|
+
if decisions:
|
|
96
|
+
sections.append({
|
|
97
|
+
"title": "관련 결정사항/작업",
|
|
98
|
+
"items": decisions,
|
|
99
|
+
"icon": "✅",
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
conversations = [r for r in primary_results if r["type"] == "Chat"]
|
|
103
|
+
if conversations:
|
|
104
|
+
sections.append({
|
|
105
|
+
"title": "관련 대화",
|
|
106
|
+
"items": conversations,
|
|
107
|
+
"icon": "💬",
|
|
108
|
+
})
|
|
109
|
+
|
|
110
|
+
concepts = [r for r in primary_results if r["type"] in ("Concept", "Feature")]
|
|
111
|
+
extra_concepts = [n for n in extra_nodes.values() if n["type"] in ("Concept", "Feature")]
|
|
112
|
+
all_concepts = concepts + extra_concepts[:8]
|
|
113
|
+
if all_concepts:
|
|
114
|
+
sections.append({
|
|
115
|
+
"title": "관련 개념/기술",
|
|
116
|
+
"items": all_concepts,
|
|
117
|
+
"icon": "🔗",
|
|
118
|
+
})
|
|
119
|
+
|
|
120
|
+
return sections
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def _render_markdown(query: str, sections: List[Dict[str, Any]]) -> str:
|
|
124
|
+
lines = []
|
|
125
|
+
for section in sections:
|
|
126
|
+
if not section["items"]:
|
|
127
|
+
continue
|
|
128
|
+
lines.append(f"### {section['icon']} {section['title']}")
|
|
129
|
+
lines.append("")
|
|
130
|
+
for item in section["items"][:8]:
|
|
131
|
+
title = item.get("title", "")
|
|
132
|
+
summary = _clean(item.get("summary", ""))
|
|
133
|
+
item_type = item.get("type", "")
|
|
134
|
+
score_info = ""
|
|
135
|
+
if "hybrid_score" in item:
|
|
136
|
+
score_info = f" (relevance: {item['hybrid_score']:.2f})"
|
|
137
|
+
|
|
138
|
+
meta = item.get("metadata") or {}
|
|
139
|
+
source = (
|
|
140
|
+
meta.get("relative_path")
|
|
141
|
+
or meta.get("filename")
|
|
142
|
+
or meta.get("conversation_id")
|
|
143
|
+
or meta.get("source")
|
|
144
|
+
or item.get("id", "")
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
lines.append(f"- **[{item_type}] {title}**{score_info}")
|
|
148
|
+
if source and source != item.get("id", ""):
|
|
149
|
+
lines.append(f" - 출처: {source}")
|
|
150
|
+
if summary:
|
|
151
|
+
lines.append(f" - {summary}")
|
|
152
|
+
|
|
153
|
+
related = item.get("related_concepts", [])
|
|
154
|
+
if related:
|
|
155
|
+
tags = ", ".join(c["title"] for c in related[:5])
|
|
156
|
+
lines.append(f" - 관련: {tags}")
|
|
157
|
+
|
|
158
|
+
lines.append("")
|
|
159
|
+
|
|
160
|
+
return "\n".join(lines).strip()
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
def _extract_sources(results: List[Dict[str, Any]]) -> List[Dict[str, str]]:
|
|
164
|
+
sources = []
|
|
165
|
+
seen = set()
|
|
166
|
+
for r in results:
|
|
167
|
+
meta = r.get("metadata") or {}
|
|
168
|
+
source_key = (
|
|
169
|
+
meta.get("relative_path")
|
|
170
|
+
or meta.get("filename")
|
|
171
|
+
or meta.get("conversation_id")
|
|
172
|
+
or r.get("id", "")
|
|
173
|
+
)
|
|
174
|
+
if source_key and source_key not in seen:
|
|
175
|
+
seen.add(source_key)
|
|
176
|
+
sources.append({
|
|
177
|
+
"id": r.get("id", ""),
|
|
178
|
+
"type": r.get("type", ""),
|
|
179
|
+
"title": r.get("title", ""),
|
|
180
|
+
"source": source_key,
|
|
181
|
+
})
|
|
182
|
+
return sources
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def format_sources_footnote(sources: List[Dict[str, str]]) -> str:
|
|
186
|
+
if not sources:
|
|
187
|
+
return ""
|
|
188
|
+
lines = ["\n---\n**참조된 지식 그래프 노드:**"]
|
|
189
|
+
for i, src in enumerate(sources[:10], 1):
|
|
190
|
+
lines.append(f"{i}. [{src['type']}] {src['title']} ({src['source']})")
|
|
191
|
+
return "\n".join(lines)
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Document Generator — 지식 그래프 기반 고품질 문서 자동 생성 모듈.
|
|
3
|
+
|
|
4
|
+
사용자가 "Q3 마케팅 전략 보고서 작성해줘" 같은 일반 채팅을 하면,
|
|
5
|
+
Knowledge Graph에서 관련 과거 문서/개념/관계를 자동으로 찾아
|
|
6
|
+
LLM이 자연스럽고 일관성 있게 새로운 문서를 생성한다.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import re
|
|
10
|
+
from typing import Optional
|
|
11
|
+
|
|
12
|
+
_DOCUMENT_INTENT_PATTERNS = [
|
|
13
|
+
re.compile(r"(보고서|계획서|기획서|제안서|문서|리포트|요약서|분석서|전략서|매뉴얼|가이드)", re.IGNORECASE),
|
|
14
|
+
re.compile(r"(작성|만들어|생성|써|줘|write|create|generate|draft|compose|prepare)", re.IGNORECASE),
|
|
15
|
+
re.compile(r"(report|proposal|plan|document|summary|analysis|strategy|guide|manual|brief)", re.IGNORECASE),
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
_STRONG_INTENT_PATTERNS = [
|
|
19
|
+
re.compile(r"(작성해|만들어\s*줘|써\s*줘|생성해|write\s+(?:a|me|the)|create\s+(?:a|me|the)|draft\s+(?:a|me|the))", re.IGNORECASE),
|
|
20
|
+
re.compile(r"(보고서|계획서|기획서|제안서|전략서|매뉴얼).*(작성|만들|생성|써)", re.IGNORECASE),
|
|
21
|
+
re.compile(r"(작성|만들|생성|써).*(보고서|계획서|기획서|제안서|전략서|매뉴얼)", re.IGNORECASE),
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def detect_document_intent(message: str) -> bool:
|
|
26
|
+
"""Detect whether the user's message is requesting document generation."""
|
|
27
|
+
if not message or len(message) < 5:
|
|
28
|
+
return False
|
|
29
|
+
for pattern in _STRONG_INTENT_PATTERNS:
|
|
30
|
+
if pattern.search(message):
|
|
31
|
+
return True
|
|
32
|
+
hit_count = sum(1 for p in _DOCUMENT_INTENT_PATTERNS if p.search(message))
|
|
33
|
+
return hit_count >= 2
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
DOCUMENT_GENERATION_SYSTEM_PROMPT = """당신은 사용자의 개인 AI 지식 어시스턴트 Lattice AI입니다.
|
|
37
|
+
사용자의 기존 지식 기반을 활용하여 고품질 문서를 생성합니다.
|
|
38
|
+
|
|
39
|
+
## 지침
|
|
40
|
+
1. 아래 제공된 지식 그래프 컨텍스트를 최대한 활용하세요.
|
|
41
|
+
2. 이전 문서의 스타일과 톤을 유지하면서 최신적이고 전문적인 문서를 작성하세요.
|
|
42
|
+
3. 출처는 자연스럽게 본문이나 각주에 포함하세요.
|
|
43
|
+
4. 사용자의 언어(한국어/영어)에 맞춰 작성하세요.
|
|
44
|
+
5. 구조화된 포맷(제목, 소제목, 목록 등)을 사용하세요.
|
|
45
|
+
|
|
46
|
+
## 사용자의 지식 기반
|
|
47
|
+
|
|
48
|
+
{graph_context}"""
|
|
49
|
+
|
|
50
|
+
DOCUMENT_GENERATION_FOLLOWUP_PROMPT = """당신은 사용자의 개인 AI 지식 어시스턴트 Lattice AI입니다.
|
|
51
|
+
이전에 생성한 문서를 사용자의 요청에 따라 수정/보완합니다.
|
|
52
|
+
|
|
53
|
+
## 이전 생성 컨텍스트
|
|
54
|
+
|
|
55
|
+
{graph_context}
|
|
56
|
+
|
|
57
|
+
## 이전 문서
|
|
58
|
+
{previous_document}
|
|
59
|
+
|
|
60
|
+
위 문서를 사용자의 요청에 따라 수정하세요. 기존 스타일과 톤을 유지하세요."""
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def build_document_system_prompt(graph_context: str) -> str:
|
|
64
|
+
if not graph_context:
|
|
65
|
+
return DOCUMENT_GENERATION_SYSTEM_PROMPT.replace("{graph_context}", "(사용 가능한 지식 기반이 없습니다. 일반 지식을 활용하여 작성합니다.)")
|
|
66
|
+
return DOCUMENT_GENERATION_SYSTEM_PROMPT.replace("{graph_context}", graph_context)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def build_followup_system_prompt(graph_context: str, previous_document: str) -> str:
|
|
70
|
+
prompt = DOCUMENT_GENERATION_FOLLOWUP_PROMPT.replace("{graph_context}", graph_context or "(없음)")
|
|
71
|
+
return prompt.replace("{previous_document}", previous_document or "(없음)")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
class DocumentGenerationSession:
|
|
75
|
+
"""Maintains state across iterative document generation requests."""
|
|
76
|
+
|
|
77
|
+
def __init__(self):
|
|
78
|
+
self._last_context: Optional[str] = None
|
|
79
|
+
self._last_document: Optional[str] = None
|
|
80
|
+
self._conversation_id: Optional[str] = None
|
|
81
|
+
|
|
82
|
+
@property
|
|
83
|
+
def has_previous(self) -> bool:
|
|
84
|
+
return self._last_document is not None
|
|
85
|
+
|
|
86
|
+
def update(self, context: str, document: str, conversation_id: Optional[str] = None) -> None:
|
|
87
|
+
self._last_context = context
|
|
88
|
+
self._last_document = document
|
|
89
|
+
if conversation_id:
|
|
90
|
+
self._conversation_id = conversation_id
|
|
91
|
+
|
|
92
|
+
def get_system_prompt(self, graph_context: str) -> str:
|
|
93
|
+
if self.has_previous:
|
|
94
|
+
return build_followup_system_prompt(
|
|
95
|
+
graph_context or self._last_context or "",
|
|
96
|
+
self._last_document or "",
|
|
97
|
+
)
|
|
98
|
+
return build_document_system_prompt(graph_context)
|
|
99
|
+
|
|
100
|
+
def clear(self) -> None:
|
|
101
|
+
self._last_context = None
|
|
102
|
+
self._last_document = None
|
|
103
|
+
self._conversation_id = None
|
package/llm_router.py
CHANGED
|
@@ -329,7 +329,7 @@ class LLMRouter:
|
|
|
329
329
|
return f"Cached: {cache_key}"
|
|
330
330
|
|
|
331
331
|
self._enforce_local_model_limit(cache_key)
|
|
332
|
-
print(f"⏳ Loading
|
|
332
|
+
print(f"⏳ Loading local model stack: {cache_key}...")
|
|
333
333
|
loop = asyncio.get_event_loop()
|
|
334
334
|
target_model_id = _resolve_local_hf_model(model_id)
|
|
335
335
|
target_draft_model_id = _resolve_local_hf_model(draft_model_id) if draft_model_id else None
|
|
@@ -626,3 +626,150 @@ class LLMRouter:
|
|
|
626
626
|
except Exception as e:
|
|
627
627
|
print(f"⚠️ VLM image decode failed: {e}")
|
|
628
628
|
return None
|
|
629
|
+
|
|
630
|
+
# ── Document Generation Pipeline ──────────────────────────────────────
|
|
631
|
+
|
|
632
|
+
async def generate_document(
|
|
633
|
+
self,
|
|
634
|
+
message: str,
|
|
635
|
+
system_prompt: str,
|
|
636
|
+
*,
|
|
637
|
+
max_tokens: int = 8192,
|
|
638
|
+
temperature: float = 0.3,
|
|
639
|
+
) -> str:
|
|
640
|
+
"""Generate a document using a specialized system prompt with graph context."""
|
|
641
|
+
if not self._current:
|
|
642
|
+
return "No model loaded."
|
|
643
|
+
self._touch()
|
|
644
|
+
cached = self._cache[self._current]
|
|
645
|
+
|
|
646
|
+
if isinstance(cached, CloudModel):
|
|
647
|
+
return await self._cloud_generate_document(cached, message, system_prompt, max_tokens, temperature)
|
|
648
|
+
|
|
649
|
+
model, tokenizer, draft_model = cached
|
|
650
|
+
if hasattr(tokenizer, "apply_chat_template"):
|
|
651
|
+
try:
|
|
652
|
+
msgs = [
|
|
653
|
+
{"role": "system", "content": system_prompt},
|
|
654
|
+
{"role": "user", "content": message},
|
|
655
|
+
]
|
|
656
|
+
prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
|
|
657
|
+
except Exception:
|
|
658
|
+
prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
|
659
|
+
else:
|
|
660
|
+
prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
|
661
|
+
|
|
662
|
+
loop = asyncio.get_event_loop()
|
|
663
|
+
def _gen():
|
|
664
|
+
import mlx.core as mx
|
|
665
|
+
mx.set_default_device(mx.gpu)
|
|
666
|
+
is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
|
|
667
|
+
if is_gemma4 and VLM_AVAILABLE:
|
|
668
|
+
from mlx_vlm import generate as vlm_gen
|
|
669
|
+
return vlm_gen(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
|
|
670
|
+
else:
|
|
671
|
+
from mlx_lm import generate as lm_gen
|
|
672
|
+
return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
|
|
673
|
+
result = await loop.run_in_executor(executor, _gen)
|
|
674
|
+
if hasattr(result, "text"):
|
|
675
|
+
return normalize_branding(result.text)
|
|
676
|
+
return normalize_branding(str(result))
|
|
677
|
+
|
|
678
|
+
async def _cloud_generate_document(self, cloud: CloudModel, message: str, system_prompt: str, max_tokens: int, temperature: float) -> str:
|
|
679
|
+
try:
|
|
680
|
+
response = await cloud.client.chat.completions.create(
|
|
681
|
+
model=cloud.model,
|
|
682
|
+
messages=[
|
|
683
|
+
{"role": "system", "content": system_prompt},
|
|
684
|
+
{"role": "user", "content": message},
|
|
685
|
+
],
|
|
686
|
+
max_tokens=max_tokens,
|
|
687
|
+
temperature=temperature,
|
|
688
|
+
)
|
|
689
|
+
except Exception as e:
|
|
690
|
+
raise RuntimeError(self._local_server_error_hint(cloud, e)) from e
|
|
691
|
+
return normalize_branding(response.choices[0].message.content or "")
|
|
692
|
+
|
|
693
|
+
async def stream_generate_document(
|
|
694
|
+
self,
|
|
695
|
+
message: str,
|
|
696
|
+
system_prompt: str,
|
|
697
|
+
*,
|
|
698
|
+
max_tokens: int = 8192,
|
|
699
|
+
temperature: float = 0.3,
|
|
700
|
+
) -> AsyncIterator[str]:
|
|
701
|
+
"""Stream document generation with specialized system prompt."""
|
|
702
|
+
if not self._current:
|
|
703
|
+
yield "No model loaded."
|
|
704
|
+
return
|
|
705
|
+
self._touch()
|
|
706
|
+
cached = self._cache[self._current]
|
|
707
|
+
|
|
708
|
+
if isinstance(cached, CloudModel):
|
|
709
|
+
async for chunk in self._cloud_stream_document(cached, message, system_prompt, max_tokens, temperature):
|
|
710
|
+
yield chunk
|
|
711
|
+
return
|
|
712
|
+
|
|
713
|
+
model, tokenizer, draft_model = cached
|
|
714
|
+
if hasattr(tokenizer, "apply_chat_template"):
|
|
715
|
+
try:
|
|
716
|
+
msgs = [
|
|
717
|
+
{"role": "system", "content": system_prompt},
|
|
718
|
+
{"role": "user", "content": message},
|
|
719
|
+
]
|
|
720
|
+
prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
|
|
721
|
+
except Exception:
|
|
722
|
+
prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
|
723
|
+
else:
|
|
724
|
+
prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
|
|
725
|
+
|
|
726
|
+
loop = asyncio.get_event_loop()
|
|
727
|
+
queue = asyncio.Queue()
|
|
728
|
+
|
|
729
|
+
def _stream():
|
|
730
|
+
import mlx.core as mx
|
|
731
|
+
mx.set_default_device(mx.gpu)
|
|
732
|
+
try:
|
|
733
|
+
is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
|
|
734
|
+
if is_gemma4 and VLM_AVAILABLE:
|
|
735
|
+
from mlx_vlm import stream_generate as vlm_stream
|
|
736
|
+
gen = vlm_stream(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
|
|
737
|
+
else:
|
|
738
|
+
from mlx_lm import stream_generate as lm_stream
|
|
739
|
+
gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
|
|
740
|
+
for chunk in gen:
|
|
741
|
+
text = chunk.text if hasattr(chunk, "text") else (chunk[0] if isinstance(chunk, tuple) else str(chunk))
|
|
742
|
+
loop.call_soon_threadsafe(queue.put_nowait, text)
|
|
743
|
+
except Exception as e:
|
|
744
|
+
loop.call_soon_threadsafe(queue.put_nowait, f"⚠️ Error: {e}")
|
|
745
|
+
finally:
|
|
746
|
+
loop.call_soon_threadsafe(queue.put_nowait, None)
|
|
747
|
+
|
|
748
|
+
loop.run_in_executor(executor, _stream)
|
|
749
|
+
while True:
|
|
750
|
+
chunk = await queue.get()
|
|
751
|
+
if chunk is None:
|
|
752
|
+
break
|
|
753
|
+
yield normalize_branding(chunk)
|
|
754
|
+
|
|
755
|
+
async def _cloud_stream_document(self, cloud: CloudModel, message: str, system_prompt: str, max_tokens: int, temperature: float) -> AsyncIterator[str]:
|
|
756
|
+
try:
|
|
757
|
+
stream = await cloud.client.chat.completions.create(
|
|
758
|
+
model=cloud.model,
|
|
759
|
+
messages=[
|
|
760
|
+
{"role": "system", "content": system_prompt},
|
|
761
|
+
{"role": "user", "content": message},
|
|
762
|
+
],
|
|
763
|
+
max_tokens=max_tokens,
|
|
764
|
+
temperature=temperature,
|
|
765
|
+
stream=True,
|
|
766
|
+
)
|
|
767
|
+
except Exception as e:
|
|
768
|
+
yield f"⚠️ {self._local_server_error_hint(cloud, e)}"
|
|
769
|
+
return
|
|
770
|
+
async for event in stream:
|
|
771
|
+
if not event.choices:
|
|
772
|
+
continue
|
|
773
|
+
delta = event.choices[0].delta.content
|
|
774
|
+
if delta:
|
|
775
|
+
yield normalize_branding(delta)
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "ltcai",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "Lattice AI local MLX/cloud LLM workspace server",
|
|
5
5
|
"homepage": "https://github.com/TaeSooPark-PTS/LatticeAI#readme",
|
|
6
6
|
"repository": {
|
|
@@ -54,7 +54,7 @@
|
|
|
54
54
|
"tools.py",
|
|
55
55
|
"codex_telegram_bot.py",
|
|
56
56
|
"mcp_registry.py",
|
|
57
|
-
"latticeai
|
|
57
|
+
"latticeai/**/*.py",
|
|
58
58
|
"skills/",
|
|
59
59
|
"static/account.html",
|
|
60
60
|
"static/chat.html",
|