ltcai 0.2.2 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,191 @@
1
+ """
2
+ Context Builder — Knowledge Graph 기반 문서 생성용 컨텍스트 조합 모듈.
3
+
4
+ retrieve_context_for_generation() 파이프라인:
5
+ Step 1: Query → Hybrid Search (text + graph + recency)
6
+ Step 2: Seed nodes → Multi-hop traversal (Document → Project → Concept)
7
+ Step 3: Top-K 결과를 구조화된 Markdown Context로 변환
8
+ """
9
+
10
+ import logging
11
+ import re
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ _CLEAN_RE = re.compile(r"\s+")
15
+
16
+
17
+ def _clean(text: str, max_len: int = 700) -> str:
18
+ return _CLEAN_RE.sub(" ", str(text or "")).strip()[:max_len]
19
+
20
+
21
+ def retrieve_context_for_generation(
22
+ kg_store,
23
+ query: str,
24
+ *,
25
+ max_results: int = 10,
26
+ max_hops: int = 2,
27
+ ) -> Dict[str, Any]:
28
+ """Knowledge Graph에서 문서 생성에 필요한 컨텍스트를 검색·조합한다.
29
+
30
+ Returns:
31
+ {
32
+ "query": str,
33
+ "context_markdown": str, # LLM 프롬프트에 직접 주입할 Markdown
34
+ "sources": [...], # 참조된 소스 목록
35
+ "stats": {...}, # 검색 통계
36
+ }
37
+ """
38
+ query = str(query or "").strip()
39
+ if not query or not kg_store:
40
+ return {"query": query, "context_markdown": "", "sources": [], "stats": {}}
41
+
42
+ results = kg_store.search_for_document_generation(query, limit=max_results)
43
+ if not results:
44
+ fallback_ctx = kg_store.context_for_query(query, limit=max_results)
45
+ return {
46
+ "query": query,
47
+ "context_markdown": fallback_ctx,
48
+ "sources": [],
49
+ "stats": {"method": "fallback", "matches": 0},
50
+ }
51
+
52
+ seed_ids = [r["id"] for r in results]
53
+ hop_data = kg_store.multi_hop_context(seed_ids, max_hops=max_hops)
54
+
55
+ extra_nodes_by_id = {}
56
+ for node in hop_data.get("nodes", []):
57
+ if node["id"] not in {r["id"] for r in results}:
58
+ extra_nodes_by_id[node["id"]] = node
59
+
60
+ sections = _build_context_sections(results, extra_nodes_by_id, hop_data.get("edges", []))
61
+ context_md = _render_markdown(query, sections)
62
+ sources = _extract_sources(results)
63
+
64
+ return {
65
+ "query": query,
66
+ "context_markdown": context_md,
67
+ "sources": sources,
68
+ "stats": {
69
+ "method": "hybrid",
70
+ "primary_matches": len(results),
71
+ "graph_nodes": len(hop_data.get("nodes", [])),
72
+ "graph_edges": len(hop_data.get("edges", [])),
73
+ },
74
+ }
75
+
76
+
77
+ def _build_context_sections(
78
+ primary_results: List[Dict[str, Any]],
79
+ extra_nodes: Dict[str, Dict[str, Any]],
80
+ edges: List[Dict[str, Any]],
81
+ ) -> List[Dict[str, Any]]:
82
+ sections = []
83
+
84
+ docs = [r for r in primary_results if r["type"] in (
85
+ "Document", "File", "SlideDeck", "Spreadsheet", "CodeFile", "Image", "ImageText",
86
+ )]
87
+ if docs:
88
+ sections.append({
89
+ "title": "관련 문서/파일",
90
+ "items": docs,
91
+ "icon": "📄",
92
+ })
93
+
94
+ decisions = [r for r in primary_results if r["type"] in ("Decision", "Task")]
95
+ if decisions:
96
+ sections.append({
97
+ "title": "관련 결정사항/작업",
98
+ "items": decisions,
99
+ "icon": "✅",
100
+ })
101
+
102
+ conversations = [r for r in primary_results if r["type"] == "Chat"]
103
+ if conversations:
104
+ sections.append({
105
+ "title": "관련 대화",
106
+ "items": conversations,
107
+ "icon": "💬",
108
+ })
109
+
110
+ concepts = [r for r in primary_results if r["type"] in ("Concept", "Feature")]
111
+ extra_concepts = [n for n in extra_nodes.values() if n["type"] in ("Concept", "Feature")]
112
+ all_concepts = concepts + extra_concepts[:8]
113
+ if all_concepts:
114
+ sections.append({
115
+ "title": "관련 개념/기술",
116
+ "items": all_concepts,
117
+ "icon": "🔗",
118
+ })
119
+
120
+ return sections
121
+
122
+
123
+ def _render_markdown(query: str, sections: List[Dict[str, Any]]) -> str:
124
+ lines = []
125
+ for section in sections:
126
+ if not section["items"]:
127
+ continue
128
+ lines.append(f"### {section['icon']} {section['title']}")
129
+ lines.append("")
130
+ for item in section["items"][:8]:
131
+ title = item.get("title", "")
132
+ summary = _clean(item.get("summary", ""))
133
+ item_type = item.get("type", "")
134
+ score_info = ""
135
+ if "hybrid_score" in item:
136
+ score_info = f" (relevance: {item['hybrid_score']:.2f})"
137
+
138
+ meta = item.get("metadata") or {}
139
+ source = (
140
+ meta.get("relative_path")
141
+ or meta.get("filename")
142
+ or meta.get("conversation_id")
143
+ or meta.get("source")
144
+ or item.get("id", "")
145
+ )
146
+
147
+ lines.append(f"- **[{item_type}] {title}**{score_info}")
148
+ if source and source != item.get("id", ""):
149
+ lines.append(f" - 출처: {source}")
150
+ if summary:
151
+ lines.append(f" - {summary}")
152
+
153
+ related = item.get("related_concepts", [])
154
+ if related:
155
+ tags = ", ".join(c["title"] for c in related[:5])
156
+ lines.append(f" - 관련: {tags}")
157
+
158
+ lines.append("")
159
+
160
+ return "\n".join(lines).strip()
161
+
162
+
163
+ def _extract_sources(results: List[Dict[str, Any]]) -> List[Dict[str, str]]:
164
+ sources = []
165
+ seen = set()
166
+ for r in results:
167
+ meta = r.get("metadata") or {}
168
+ source_key = (
169
+ meta.get("relative_path")
170
+ or meta.get("filename")
171
+ or meta.get("conversation_id")
172
+ or r.get("id", "")
173
+ )
174
+ if source_key and source_key not in seen:
175
+ seen.add(source_key)
176
+ sources.append({
177
+ "id": r.get("id", ""),
178
+ "type": r.get("type", ""),
179
+ "title": r.get("title", ""),
180
+ "source": source_key,
181
+ })
182
+ return sources
183
+
184
+
185
+ def format_sources_footnote(sources: List[Dict[str, str]]) -> str:
186
+ if not sources:
187
+ return ""
188
+ lines = ["\n---\n**참조된 지식 그래프 노드:**"]
189
+ for i, src in enumerate(sources[:10], 1):
190
+ lines.append(f"{i}. [{src['type']}] {src['title']} ({src['source']})")
191
+ return "\n".join(lines)
@@ -0,0 +1,103 @@
1
+ """
2
+ Document Generator — 지식 그래프 기반 고품질 문서 자동 생성 모듈.
3
+
4
+ 사용자가 "Q3 마케팅 전략 보고서 작성해줘" 같은 일반 채팅을 하면,
5
+ Knowledge Graph에서 관련 과거 문서/개념/관계를 자동으로 찾아
6
+ LLM이 자연스럽고 일관성 있게 새로운 문서를 생성한다.
7
+ """
8
+
9
+ import re
10
+ from typing import Optional
11
+
12
+ _DOCUMENT_INTENT_PATTERNS = [
13
+ re.compile(r"(보고서|계획서|기획서|제안서|문서|리포트|요약서|분석서|전략서|매뉴얼|가이드)", re.IGNORECASE),
14
+ re.compile(r"(작성|만들어|생성|써|줘|write|create|generate|draft|compose|prepare)", re.IGNORECASE),
15
+ re.compile(r"(report|proposal|plan|document|summary|analysis|strategy|guide|manual|brief)", re.IGNORECASE),
16
+ ]
17
+
18
+ _STRONG_INTENT_PATTERNS = [
19
+ re.compile(r"(작성해|만들어\s*줘|써\s*줘|생성해|write\s+(?:a|me|the)|create\s+(?:a|me|the)|draft\s+(?:a|me|the))", re.IGNORECASE),
20
+ re.compile(r"(보고서|계획서|기획서|제안서|전략서|매뉴얼).*(작성|만들|생성|써)", re.IGNORECASE),
21
+ re.compile(r"(작성|만들|생성|써).*(보고서|계획서|기획서|제안서|전략서|매뉴얼)", re.IGNORECASE),
22
+ ]
23
+
24
+
25
+ def detect_document_intent(message: str) -> bool:
26
+ """Detect whether the user's message is requesting document generation."""
27
+ if not message or len(message) < 5:
28
+ return False
29
+ for pattern in _STRONG_INTENT_PATTERNS:
30
+ if pattern.search(message):
31
+ return True
32
+ hit_count = sum(1 for p in _DOCUMENT_INTENT_PATTERNS if p.search(message))
33
+ return hit_count >= 2
34
+
35
+
36
+ DOCUMENT_GENERATION_SYSTEM_PROMPT = """당신은 사용자의 개인 AI 지식 어시스턴트 Lattice AI입니다.
37
+ 사용자의 기존 지식 기반을 활용하여 고품질 문서를 생성합니다.
38
+
39
+ ## 지침
40
+ 1. 아래 제공된 지식 그래프 컨텍스트를 최대한 활용하세요.
41
+ 2. 이전 문서의 스타일과 톤을 유지하면서 최신적이고 전문적인 문서를 작성하세요.
42
+ 3. 출처는 자연스럽게 본문이나 각주에 포함하세요.
43
+ 4. 사용자의 언어(한국어/영어)에 맞춰 작성하세요.
44
+ 5. 구조화된 포맷(제목, 소제목, 목록 등)을 사용하세요.
45
+
46
+ ## 사용자의 지식 기반
47
+
48
+ {graph_context}"""
49
+
50
+ DOCUMENT_GENERATION_FOLLOWUP_PROMPT = """당신은 사용자의 개인 AI 지식 어시스턴트 Lattice AI입니다.
51
+ 이전에 생성한 문서를 사용자의 요청에 따라 수정/보완합니다.
52
+
53
+ ## 이전 생성 컨텍스트
54
+
55
+ {graph_context}
56
+
57
+ ## 이전 문서
58
+ {previous_document}
59
+
60
+ 위 문서를 사용자의 요청에 따라 수정하세요. 기존 스타일과 톤을 유지하세요."""
61
+
62
+
63
+ def build_document_system_prompt(graph_context: str) -> str:
64
+ if not graph_context:
65
+ return DOCUMENT_GENERATION_SYSTEM_PROMPT.replace("{graph_context}", "(사용 가능한 지식 기반이 없습니다. 일반 지식을 활용하여 작성합니다.)")
66
+ return DOCUMENT_GENERATION_SYSTEM_PROMPT.replace("{graph_context}", graph_context)
67
+
68
+
69
+ def build_followup_system_prompt(graph_context: str, previous_document: str) -> str:
70
+ prompt = DOCUMENT_GENERATION_FOLLOWUP_PROMPT.replace("{graph_context}", graph_context or "(없음)")
71
+ return prompt.replace("{previous_document}", previous_document or "(없음)")
72
+
73
+
74
+ class DocumentGenerationSession:
75
+ """Maintains state across iterative document generation requests."""
76
+
77
+ def __init__(self):
78
+ self._last_context: Optional[str] = None
79
+ self._last_document: Optional[str] = None
80
+ self._conversation_id: Optional[str] = None
81
+
82
+ @property
83
+ def has_previous(self) -> bool:
84
+ return self._last_document is not None
85
+
86
+ def update(self, context: str, document: str, conversation_id: Optional[str] = None) -> None:
87
+ self._last_context = context
88
+ self._last_document = document
89
+ if conversation_id:
90
+ self._conversation_id = conversation_id
91
+
92
+ def get_system_prompt(self, graph_context: str) -> str:
93
+ if self.has_previous:
94
+ return build_followup_system_prompt(
95
+ graph_context or self._last_context or "",
96
+ self._last_document or "",
97
+ )
98
+ return build_document_system_prompt(graph_context)
99
+
100
+ def clear(self) -> None:
101
+ self._last_context = None
102
+ self._last_document = None
103
+ self._conversation_id = None
package/llm_router.py CHANGED
@@ -626,3 +626,150 @@ class LLMRouter:
626
626
  except Exception as e:
627
627
  print(f"⚠️ VLM image decode failed: {e}")
628
628
  return None
629
+
630
+ # ── Document Generation Pipeline ──────────────────────────────────────
631
+
632
+ async def generate_document(
633
+ self,
634
+ message: str,
635
+ system_prompt: str,
636
+ *,
637
+ max_tokens: int = 8192,
638
+ temperature: float = 0.3,
639
+ ) -> str:
640
+ """Generate a document using a specialized system prompt with graph context."""
641
+ if not self._current:
642
+ return "No model loaded."
643
+ self._touch()
644
+ cached = self._cache[self._current]
645
+
646
+ if isinstance(cached, CloudModel):
647
+ return await self._cloud_generate_document(cached, message, system_prompt, max_tokens, temperature)
648
+
649
+ model, tokenizer, draft_model = cached
650
+ if hasattr(tokenizer, "apply_chat_template"):
651
+ try:
652
+ msgs = [
653
+ {"role": "system", "content": system_prompt},
654
+ {"role": "user", "content": message},
655
+ ]
656
+ prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
657
+ except Exception:
658
+ prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
659
+ else:
660
+ prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
661
+
662
+ loop = asyncio.get_event_loop()
663
+ def _gen():
664
+ import mlx.core as mx
665
+ mx.set_default_device(mx.gpu)
666
+ is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
667
+ if is_gemma4 and VLM_AVAILABLE:
668
+ from mlx_vlm import generate as vlm_gen
669
+ return vlm_gen(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
670
+ else:
671
+ from mlx_lm import generate as lm_gen
672
+ return lm_gen(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
673
+ result = await loop.run_in_executor(executor, _gen)
674
+ if hasattr(result, "text"):
675
+ return normalize_branding(result.text)
676
+ return normalize_branding(str(result))
677
+
678
+ async def _cloud_generate_document(self, cloud: CloudModel, message: str, system_prompt: str, max_tokens: int, temperature: float) -> str:
679
+ try:
680
+ response = await cloud.client.chat.completions.create(
681
+ model=cloud.model,
682
+ messages=[
683
+ {"role": "system", "content": system_prompt},
684
+ {"role": "user", "content": message},
685
+ ],
686
+ max_tokens=max_tokens,
687
+ temperature=temperature,
688
+ )
689
+ except Exception as e:
690
+ raise RuntimeError(self._local_server_error_hint(cloud, e)) from e
691
+ return normalize_branding(response.choices[0].message.content or "")
692
+
693
+ async def stream_generate_document(
694
+ self,
695
+ message: str,
696
+ system_prompt: str,
697
+ *,
698
+ max_tokens: int = 8192,
699
+ temperature: float = 0.3,
700
+ ) -> AsyncIterator[str]:
701
+ """Stream document generation with specialized system prompt."""
702
+ if not self._current:
703
+ yield "No model loaded."
704
+ return
705
+ self._touch()
706
+ cached = self._cache[self._current]
707
+
708
+ if isinstance(cached, CloudModel):
709
+ async for chunk in self._cloud_stream_document(cached, message, system_prompt, max_tokens, temperature):
710
+ yield chunk
711
+ return
712
+
713
+ model, tokenizer, draft_model = cached
714
+ if hasattr(tokenizer, "apply_chat_template"):
715
+ try:
716
+ msgs = [
717
+ {"role": "system", "content": system_prompt},
718
+ {"role": "user", "content": message},
719
+ ]
720
+ prompt = tokenizer.apply_chat_template(msgs, tokenize=False, add_generation_prompt=True)
721
+ except Exception:
722
+ prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
723
+ else:
724
+ prompt = f"<|im_start|>system\n{system_prompt}<|im_end|>\n<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n"
725
+
726
+ loop = asyncio.get_event_loop()
727
+ queue = asyncio.Queue()
728
+
729
+ def _stream():
730
+ import mlx.core as mx
731
+ mx.set_default_device(mx.gpu)
732
+ try:
733
+ is_gemma4 = "gemma-4" in self._current.lower() or "gemma4" in self._current.lower()
734
+ if is_gemma4 and VLM_AVAILABLE:
735
+ from mlx_vlm import stream_generate as vlm_stream
736
+ gen = vlm_stream(model, tokenizer, prompt=prompt, image=None, max_tokens=max_tokens, temp=temperature, draft_model=draft_model, draft_kind="mtp")
737
+ else:
738
+ from mlx_lm import stream_generate as lm_stream
739
+ gen = lm_stream(model, tokenizer, prompt=prompt, max_tokens=max_tokens, temp=temperature, draft_model=draft_model)
740
+ for chunk in gen:
741
+ text = chunk.text if hasattr(chunk, "text") else (chunk[0] if isinstance(chunk, tuple) else str(chunk))
742
+ loop.call_soon_threadsafe(queue.put_nowait, text)
743
+ except Exception as e:
744
+ loop.call_soon_threadsafe(queue.put_nowait, f"⚠️ Error: {e}")
745
+ finally:
746
+ loop.call_soon_threadsafe(queue.put_nowait, None)
747
+
748
+ loop.run_in_executor(executor, _stream)
749
+ while True:
750
+ chunk = await queue.get()
751
+ if chunk is None:
752
+ break
753
+ yield normalize_branding(chunk)
754
+
755
+ async def _cloud_stream_document(self, cloud: CloudModel, message: str, system_prompt: str, max_tokens: int, temperature: float) -> AsyncIterator[str]:
756
+ try:
757
+ stream = await cloud.client.chat.completions.create(
758
+ model=cloud.model,
759
+ messages=[
760
+ {"role": "system", "content": system_prompt},
761
+ {"role": "user", "content": message},
762
+ ],
763
+ max_tokens=max_tokens,
764
+ temperature=temperature,
765
+ stream=True,
766
+ )
767
+ except Exception as e:
768
+ yield f"⚠️ {self._local_server_error_hint(cloud, e)}"
769
+ return
770
+ async for event in stream:
771
+ if not event.choices:
772
+ continue
773
+ delta = event.choices[0].delta.content
774
+ if delta:
775
+ yield normalize_branding(delta)
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ltcai",
3
- "version": "0.2.2",
3
+ "version": "0.3.0",
4
4
  "description": "Lattice AI local MLX/cloud LLM workspace server",
5
5
  "homepage": "https://github.com/TaeSooPark-PTS/LatticeAI#readme",
6
6
  "repository": {
package/server.py CHANGED
@@ -46,8 +46,10 @@ from pydantic import BaseModel
46
46
  from PIL import Image
47
47
 
48
48
  from llm_router import AsyncOpenAI, LLMRouter, OPENAI_COMPATIBLE_PROVIDERS, HF_MODELS_ROOT, ensure_mlx_runtime, hf_model_dir, parse_model_ref, mx, normalize_branding
49
- from knowledge_graph import KnowledgeGraphStore
49
+ from knowledge_graph import KnowledgeGraphStore, set_llm_router
50
50
  from knowledge_graph_api import create_knowledge_graph_router
51
+ from latticeai.core.context_builder import retrieve_context_for_generation, format_sources_footnote
52
+ from latticeai.core.document_generator import detect_document_intent, DocumentGenerationSession
51
53
  from local_knowledge_api import LocalKnowledgeWatcher, create_local_knowledge_router
52
54
  from latticeai.core.security import (
53
55
  hash_password as _hash_password,
@@ -1001,7 +1003,9 @@ def build_admin_audit_report(users: Dict) -> Dict:
1001
1003
  )
1002
1004
 
1003
1005
  router = LLMRouter()
1006
+ set_llm_router(router)
1004
1007
  gardener = PReinforceGardener()
1008
+ _doc_gen_sessions: dict = {} # conversation_id → DocumentGenerationSession
1005
1009
 
1006
1010
  async def autoload_default_model() -> None:
1007
1011
  if not AUTOLOAD_MODELS:
@@ -3636,12 +3640,24 @@ async def chat(req: ChatRequest, request: Request):
3636
3640
  except Exception as e:
3637
3641
  logging.warning("Knowledge reinforcement skipped: %s", e)
3638
3642
 
3643
+ is_doc_gen = detect_document_intent(req.message)
3644
+ doc_gen_context_result = None
3645
+
3639
3646
  try:
3640
3647
  if ENABLE_GRAPH and KNOWLEDGE_GRAPH:
3641
- graph_context = KNOWLEDGE_GRAPH.context_for_query(req.message)
3642
- if graph_context:
3643
- context += f"\n\n[KNOWLEDGE GRAPH]\n{graph_context}"
3644
- print("🕸️ Context reinforced with knowledge graph.")
3648
+ if is_doc_gen:
3649
+ doc_gen_context_result = retrieve_context_for_generation(
3650
+ KNOWLEDGE_GRAPH, req.message, max_results=10, max_hops=2,
3651
+ )
3652
+ graph_md = doc_gen_context_result.get("context_markdown", "")
3653
+ if graph_md:
3654
+ context += f"\n\n[KNOWLEDGE GRAPH — Document Generation Context]\n{graph_md}"
3655
+ print("📝 Document generation context retrieved from knowledge graph.")
3656
+ else:
3657
+ graph_context = KNOWLEDGE_GRAPH.context_for_query(req.message)
3658
+ if graph_context:
3659
+ context += f"\n\n[KNOWLEDGE GRAPH]\n{graph_context}"
3660
+ print("🕸️ Context reinforced with knowledge graph.")
3645
3661
  except Exception as e:
3646
3662
  logging.warning("Knowledge graph reinforcement skipped: %s", e)
3647
3663
 
@@ -3651,7 +3667,6 @@ async def chat(req: ChatRequest, request: Request):
3651
3667
  context += f"\n\n{screenshot_context}"
3652
3668
 
3653
3669
  if env_bool("LATTICEAI_AUTO_READ_CHAT_PATHS", default=False):
3654
- # Off by default: automatic local-file injection can leak files to cloud models.
3655
3670
  _file_path_re = re.compile(r'(?:^|[\s\'\"(])((~|/[\w.])[^\s\'")\]]*)', re.MULTILINE)
3656
3671
  for _m in _file_path_re.finditer(req.message or ""):
3657
3672
  _fpath = _m.group(1).strip()
@@ -3669,6 +3684,55 @@ async def chat(req: ChatRequest, request: Request):
3669
3684
  if req.source != "telegram":
3670
3685
  asyncio.create_task(broadcast_web_chat("user", req.message))
3671
3686
 
3687
+ if is_doc_gen and ENABLE_GRAPH and KNOWLEDGE_GRAPH:
3688
+ conv_key = req.conversation_id or "default"
3689
+ session = _doc_gen_sessions.get(conv_key)
3690
+ if session is None:
3691
+ session = DocumentGenerationSession()
3692
+ _doc_gen_sessions[conv_key] = session
3693
+ graph_md = (doc_gen_context_result or {}).get("context_markdown", "")
3694
+ system_prompt = session.get_system_prompt(graph_md)
3695
+ sources = (doc_gen_context_result or {}).get("sources", [])
3696
+ footnote = format_sources_footnote(sources)
3697
+
3698
+ if req.stream:
3699
+ async def _stream_doc_gen():
3700
+ collected = []
3701
+ async for chunk in router.stream_generate_document(
3702
+ req.message, system_prompt,
3703
+ max_tokens=req.max_tokens or 8192,
3704
+ temperature=req.temperature or 0.3,
3705
+ ):
3706
+ collected.append(chunk)
3707
+ yield f"data: {json.dumps({'text': chunk}, ensure_ascii=False)}\n\n"
3708
+ full_text = "".join(collected)
3709
+ if footnote:
3710
+ yield f"data: {json.dumps({'text': footnote}, ensure_ascii=False)}\n\n"
3711
+ full_text += footnote
3712
+ session.update(graph_md, full_text, req.conversation_id)
3713
+ save_to_history("assistant", full_text, source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3714
+ if req.source != "telegram":
3715
+ asyncio.create_task(broadcast_web_chat("assistant", full_text))
3716
+ yield "data: [DONE]\n\n"
3717
+ return StreamingResponse(
3718
+ _stream_doc_gen(),
3719
+ media_type="text/event-stream",
3720
+ headers={"X-Model": router.current_model_id, "X-Doc-Gen": "true"},
3721
+ )
3722
+ else:
3723
+ result = await router.generate_document(
3724
+ req.message, system_prompt,
3725
+ max_tokens=req.max_tokens or 8192,
3726
+ temperature=req.temperature or 0.3,
3727
+ )
3728
+ if footnote:
3729
+ result += footnote
3730
+ session.update(graph_md, result, req.conversation_id)
3731
+ save_to_history("assistant", str(result), source=req.source or "web", conversation_id=req.conversation_id, **history_user)
3732
+ if req.source != "telegram":
3733
+ asyncio.create_task(broadcast_web_chat("assistant", str(result)))
3734
+ return JSONResponse(content={"response": str(result)})
3735
+
3672
3736
  if req.stream:
3673
3737
  recent_context = build_recent_chat_context(user_email=effective_email, conversation_id=req.conversation_id)
3674
3738
  stream_context = context
@@ -159,6 +159,32 @@
159
159
  --radius-sm: var(--lt-radius-sm);
160
160
  }
161
161
 
162
+ /* ── Global polish ─────────────────────────────────────────── */
163
+ ::selection {
164
+ background: rgba(110, 74, 230, 0.18);
165
+ color: var(--lt-color-ink-900);
166
+ }
167
+
168
+ :focus-visible {
169
+ outline: 2px solid rgba(110, 74, 230, 0.40);
170
+ outline-offset: 2px;
171
+ }
172
+
173
+ ::-webkit-scrollbar {
174
+ width: 6px;
175
+ height: 6px;
176
+ }
177
+ ::-webkit-scrollbar-track {
178
+ background: transparent;
179
+ }
180
+ ::-webkit-scrollbar-thumb {
181
+ background: rgba(110, 74, 230, 0.16);
182
+ border-radius: 99px;
183
+ }
184
+ ::-webkit-scrollbar-thumb:hover {
185
+ background: rgba(110, 74, 230, 0.28);
186
+ }
187
+
162
188
  /* ── Reduced motion (a11y) ─────────────────────────────────── */
163
189
  @media (prefers-reduced-motion: reduce) {
164
190
  :root {