ltcai 0.2.2 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/CHANGELOG.md +43 -0
- package/kg_schema.py +64 -15
- package/knowledge_graph.py +299 -2
- package/latticeai/core/__init__.py +1 -1
- package/latticeai/core/context_builder.py +191 -0
- package/latticeai/core/document_generator.py +103 -0
- package/llm_router.py +147 -0
- package/package.json +1 -1
- package/server.py +70 -6
- package/static/css/tokens.css +26 -0
- package/static/lattice-reference.css +296 -370
package/docs/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,48 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.3.0] - 2026-05-27
|
|
4
|
+
|
|
5
|
+
### Knowledge Graph — LLM Structured Output Extraction
|
|
6
|
+
|
|
7
|
+
- `_extract_concepts()` / `_extract_triples()`를 LLM 기반으로 전환 (rule-based 폴백 유지)
|
|
8
|
+
- LLM Router 참조를 knowledge_graph에 주입하는 `set_llm_router()` 함수 추가
|
|
9
|
+
- `LATTICEAI_LLM_EXTRACTION` 환경변수로 LLM extraction on/off 제어
|
|
10
|
+
|
|
11
|
+
### Knowledge Graph — Hybrid Retrieval & Document Generation
|
|
12
|
+
|
|
13
|
+
- `search_for_document_generation()` 추가 — Hybrid Score (0.5×text + 0.3×graph + 0.2×recency) 기반 검색
|
|
14
|
+
- `multi_hop_context()` 추가 — Seed nodes에서 N-hop 그래프 탐색
|
|
15
|
+
- `DOCUMENT` NodeType, `USED_IN` / `INSPIRED_BY` / `CONTRADICTS` / `EVOLVES_FROM` EdgeType 추가
|
|
16
|
+
- Node에 `style`, `tone`, `importance_score`, `last_used` 필드 추가 (SQLite v2 스키마 반영)
|
|
17
|
+
|
|
18
|
+
### 문서 자동 생성 파이프라인
|
|
19
|
+
|
|
20
|
+
- `latticeai/core/context_builder.py` 신규 — Knowledge Graph → 구조화 Markdown Context 변환
|
|
21
|
+
- `latticeai/core/document_generator.py` 신규 — Intent detection + 전용 System Prompt + Session 관리
|
|
22
|
+
- `llm_router.py`에 `generate_document()` / `stream_generate_document()` 추가
|
|
23
|
+
- `/chat` 엔드포인트에서 "보고서 작성해줘" 같은 문서 생성 의도 자동 감지 → 전용 파이프라인 활성화
|
|
24
|
+
- 생성 문서에 참조 Knowledge Graph 노드 각주 자동 첨부
|
|
25
|
+
- 대화별 `DocumentGenerationSession`으로 반복 수정("이 부분 더 수정해") 지원
|
|
26
|
+
|
|
27
|
+
### UI/UX — 디자인 통일
|
|
28
|
+
|
|
29
|
+
- Account/Chat/Graph/Admin 전체 페이지를 통일된 lavender purple 테마로 전환
|
|
30
|
+
- 다크 모드 base 스타일 완전 제거 (`.app-layout` Obsidian dark, account dark base 등)
|
|
31
|
+
- 초록 테마(`#22d3a0`) 60+ 인스턴스를 보라(`#6f42e8`) 계열로 교체
|
|
32
|
+
- 메시지 버블: 다크 green → 보라 gradient(user), 밝은 lavender glass(AI)
|
|
33
|
+
- 사이드바, 입력창, 버튼, 모달 오버레이 모두 라이트 lavender로 통일
|
|
34
|
+
- 카드/패널에 hover lift 효과, 커스텀 스크롤바, focus ring, selection 색상 추가
|
|
35
|
+
- tokens.css에 글로벌 polish (scrollbar, selection, focus-visible) 추가
|
|
36
|
+
|
|
37
|
+
### 테스트
|
|
38
|
+
|
|
39
|
+
- `test_document_generation.py` 33개 테스트 추가 (intent detection, session, extraction, hybrid retrieval, context builder, schema v2)
|
|
40
|
+
|
|
41
|
+
### Release
|
|
42
|
+
|
|
43
|
+
- 배포 버전을 `0.3.0`으로 상향
|
|
44
|
+
- 대상 채널: `npm` · `PyPI` · `VS Code Marketplace` · `Open VSX`
|
|
45
|
+
|
|
3
46
|
## [0.2.2] - 2026-05-26
|
|
4
47
|
|
|
5
48
|
### 모델 카탈로그
|
package/kg_schema.py
CHANGED
|
@@ -81,6 +81,7 @@ class NodeType(str, Enum):
|
|
|
81
81
|
CONVERSATION = "CONVERSATION" # 대화 세션 전체
|
|
82
82
|
MESSAGE = "MESSAGE" # 단일 발화
|
|
83
83
|
FILE = "FILE" # 업로드/연결된 파일
|
|
84
|
+
DOCUMENT = "DOCUMENT" # 생성/관리되는 문서 (보고서, 계획서 등)
|
|
84
85
|
CHUNK = "CHUNK" # 파일의 분할 청크
|
|
85
86
|
CODE_SYMBOL = "CODE_SYMBOL" # 함수·클래스·모듈
|
|
86
87
|
CONCEPT = "CONCEPT" # 추출된 개념 / 태그
|
|
@@ -110,6 +111,10 @@ class EdgeType(str, Enum):
|
|
|
110
111
|
TAGGED_AS = "TAGGED_AS" # ANY → CONCEPT
|
|
111
112
|
VERSION_OF = "VERSION_OF" # FILE → FILE (히스토리)
|
|
112
113
|
GRANTS_ACCESS = "GRANTS_ACCESS" # PERSON → RESOURCE
|
|
114
|
+
USED_IN = "USED_IN" # CONCEPT → DOCUMENT (문서에 활용됨)
|
|
115
|
+
INSPIRED_BY = "INSPIRED_BY" # DOCUMENT → DOCUMENT (영감/참조 관계)
|
|
116
|
+
CONTRADICTS = "CONTRADICTS" # DOCUMENT ↔ DOCUMENT (상충 관계)
|
|
117
|
+
EVOLVES_FROM = "EVOLVES_FROM" # DOCUMENT → DOCUMENT (발전/개정 관계)
|
|
113
118
|
|
|
114
119
|
@classmethod
|
|
115
120
|
def from_legacy(cls, label: str) -> "EdgeType":
|
|
@@ -140,6 +145,13 @@ _LEGACY_NODE_MAP: Dict[str, NodeType] = {
|
|
|
140
145
|
"mcp": NodeType.TOOL,
|
|
141
146
|
"project": NodeType.PROJECT,
|
|
142
147
|
"workspace": NodeType.PROJECT,
|
|
148
|
+
"document": NodeType.DOCUMENT,
|
|
149
|
+
"report": NodeType.DOCUMENT,
|
|
150
|
+
"plan": NodeType.DOCUMENT,
|
|
151
|
+
"proposal": NodeType.DOCUMENT,
|
|
152
|
+
"보고서": NodeType.DOCUMENT,
|
|
153
|
+
"계획서": NodeType.DOCUMENT,
|
|
154
|
+
"기획서": NodeType.DOCUMENT,
|
|
143
155
|
}
|
|
144
156
|
|
|
145
157
|
_LEGACY_EDGE_MAP: Dict[str, EdgeType] = {
|
|
@@ -171,18 +183,27 @@ _LEGACY_EDGE_MAP: Dict[str, EdgeType] = {
|
|
|
171
183
|
"tagged_as": EdgeType.TAGGED_AS,
|
|
172
184
|
"version_of": EdgeType.VERSION_OF,
|
|
173
185
|
"grants_access": EdgeType.GRANTS_ACCESS,
|
|
186
|
+
"used_in": EdgeType.USED_IN,
|
|
187
|
+
"inspired_by": EdgeType.INSPIRED_BY,
|
|
188
|
+
"contradicts": EdgeType.CONTRADICTS,
|
|
189
|
+
"evolves_from": EdgeType.EVOLVES_FROM,
|
|
190
|
+
"활용됨": EdgeType.USED_IN,
|
|
191
|
+
"영감받음": EdgeType.INSPIRED_BY,
|
|
192
|
+
"상충함": EdgeType.CONTRADICTS,
|
|
193
|
+
"발전함": EdgeType.EVOLVES_FROM,
|
|
174
194
|
}
|
|
175
195
|
|
|
176
196
|
# 노드 타입별로 허용되는 source / target 조합 (PPT 카탈로그 그대로)
|
|
177
197
|
# None == 모든 타입 허용
|
|
178
198
|
EDGE_ENDPOINT_RULES: Dict[EdgeType, Tuple[Optional[Sequence[NodeType]], Optional[Sequence[NodeType]]]] = {
|
|
179
|
-
EdgeType.CONTAINS: ((NodeType.FILE,
|
|
180
|
-
|
|
199
|
+
EdgeType.CONTAINS: ((NodeType.FILE, NodeType.DOCUMENT),
|
|
200
|
+
(NodeType.CHUNK,)),
|
|
201
|
+
EdgeType.MENTIONS: ((NodeType.MESSAGE, NodeType.FILE, NodeType.CHUNK, NodeType.DOCUMENT),
|
|
181
202
|
(NodeType.CONCEPT, NodeType.PERSON, NodeType.MODEL, NodeType.TOOL)),
|
|
182
203
|
EdgeType.REFERENCES: ((NodeType.FILE, NodeType.MESSAGE, NodeType.CHUNK),
|
|
183
204
|
(NodeType.FILE, NodeType.MESSAGE, NodeType.CHUNK)),
|
|
184
205
|
EdgeType.REPLIES_TO: ((NodeType.MESSAGE,), (NodeType.MESSAGE,)),
|
|
185
|
-
EdgeType.AUTHORED_BY: ((NodeType.FILE, NodeType.MESSAGE, NodeType.CONVERSATION),
|
|
206
|
+
EdgeType.AUTHORED_BY: ((NodeType.FILE, NodeType.MESSAGE, NodeType.CONVERSATION, NodeType.DOCUMENT),
|
|
186
207
|
(NodeType.PERSON,)),
|
|
187
208
|
EdgeType.USES: ((NodeType.PROJECT, NodeType.CONVERSATION),
|
|
188
209
|
(NodeType.TOOL, NodeType.MODEL)),
|
|
@@ -194,6 +215,14 @@ EDGE_ENDPOINT_RULES: Dict[EdgeType, Tuple[Optional[Sequence[NodeType]], Optional
|
|
|
194
215
|
EdgeType.VERSION_OF: ((NodeType.FILE,), (NodeType.FILE,)),
|
|
195
216
|
EdgeType.GRANTS_ACCESS: ((NodeType.PERSON,),
|
|
196
217
|
(NodeType.FILE, NodeType.CONVERSATION, NodeType.PROJECT)),
|
|
218
|
+
EdgeType.USED_IN: ((NodeType.CONCEPT,),
|
|
219
|
+
(NodeType.DOCUMENT, NodeType.FILE)),
|
|
220
|
+
EdgeType.INSPIRED_BY: ((NodeType.DOCUMENT, NodeType.FILE),
|
|
221
|
+
(NodeType.DOCUMENT, NodeType.FILE)),
|
|
222
|
+
EdgeType.CONTRADICTS: ((NodeType.DOCUMENT, NodeType.FILE),
|
|
223
|
+
(NodeType.DOCUMENT, NodeType.FILE)),
|
|
224
|
+
EdgeType.EVOLVES_FROM: ((NodeType.DOCUMENT, NodeType.FILE),
|
|
225
|
+
(NodeType.DOCUMENT, NodeType.FILE)),
|
|
197
226
|
}
|
|
198
227
|
|
|
199
228
|
|
|
@@ -262,6 +291,10 @@ class Node:
|
|
|
262
291
|
visibility: Visibility = Visibility.PRIVATE
|
|
263
292
|
created_at: str = field(default_factory=_now_iso)
|
|
264
293
|
updated_at: str = field(default_factory=_now_iso)
|
|
294
|
+
style: Optional[str] = None
|
|
295
|
+
tone: Optional[str] = None
|
|
296
|
+
importance_score: float = 0.0
|
|
297
|
+
last_used: Optional[str] = None
|
|
265
298
|
|
|
266
299
|
def validate(self) -> None:
|
|
267
300
|
if not isinstance(self.type, NodeType):
|
|
@@ -345,15 +378,19 @@ CREATE TABLE IF NOT EXISTS kg_meta (
|
|
|
345
378
|
);
|
|
346
379
|
|
|
347
380
|
CREATE TABLE IF NOT EXISTS nodes_v2 (
|
|
348
|
-
id
|
|
349
|
-
type
|
|
350
|
-
label
|
|
351
|
-
attrs
|
|
352
|
-
embedding
|
|
353
|
-
owner_id
|
|
354
|
-
visibility
|
|
355
|
-
created_at
|
|
356
|
-
updated_at
|
|
381
|
+
id TEXT PRIMARY KEY,
|
|
382
|
+
type TEXT NOT NULL,
|
|
383
|
+
label TEXT NOT NULL,
|
|
384
|
+
attrs TEXT NOT NULL DEFAULT '{}',
|
|
385
|
+
embedding BLOB,
|
|
386
|
+
owner_id TEXT,
|
|
387
|
+
visibility TEXT NOT NULL DEFAULT 'private',
|
|
388
|
+
created_at TEXT NOT NULL,
|
|
389
|
+
updated_at TEXT NOT NULL,
|
|
390
|
+
style TEXT,
|
|
391
|
+
tone TEXT,
|
|
392
|
+
importance_score REAL NOT NULL DEFAULT 0.0,
|
|
393
|
+
last_used TEXT
|
|
357
394
|
);
|
|
358
395
|
|
|
359
396
|
CREATE TABLE IF NOT EXISTS edges_v2 (
|
|
@@ -418,8 +455,9 @@ class KGStoreV2:
|
|
|
418
455
|
conn.execute(
|
|
419
456
|
"""
|
|
420
457
|
INSERT INTO nodes_v2(id, type, label, attrs, embedding,
|
|
421
|
-
owner_id, visibility, created_at, updated_at
|
|
422
|
-
|
|
458
|
+
owner_id, visibility, created_at, updated_at,
|
|
459
|
+
style, tone, importance_score, last_used)
|
|
460
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
423
461
|
ON CONFLICT(id) DO UPDATE SET
|
|
424
462
|
type=excluded.type,
|
|
425
463
|
label=excluded.label,
|
|
@@ -427,7 +465,11 @@ class KGStoreV2:
|
|
|
427
465
|
embedding=COALESCE(excluded.embedding, nodes_v2.embedding),
|
|
428
466
|
owner_id=excluded.owner_id,
|
|
429
467
|
visibility=excluded.visibility,
|
|
430
|
-
updated_at=excluded.updated_at
|
|
468
|
+
updated_at=excluded.updated_at,
|
|
469
|
+
style=COALESCE(excluded.style, nodes_v2.style),
|
|
470
|
+
tone=COALESCE(excluded.tone, nodes_v2.tone),
|
|
471
|
+
importance_score=MAX(excluded.importance_score, nodes_v2.importance_score),
|
|
472
|
+
last_used=COALESCE(excluded.last_used, nodes_v2.last_used)
|
|
431
473
|
""",
|
|
432
474
|
(
|
|
433
475
|
node.id, node.type.value, node.label,
|
|
@@ -435,6 +477,8 @@ class KGStoreV2:
|
|
|
435
477
|
encode_embedding(node.embedding),
|
|
436
478
|
node.owner_id, node.visibility.value,
|
|
437
479
|
node.created_at, node.updated_at,
|
|
480
|
+
node.style, node.tone,
|
|
481
|
+
float(node.importance_score), node.last_used,
|
|
438
482
|
),
|
|
439
483
|
)
|
|
440
484
|
return node.id
|
|
@@ -575,6 +619,7 @@ class KGStoreV2:
|
|
|
575
619
|
|
|
576
620
|
# ── Row → model helpers ────────────────────────────────────────────────────
|
|
577
621
|
def _row_to_node(row: sqlite3.Row) -> Node:
|
|
622
|
+
keys = row.keys() if hasattr(row, "keys") else []
|
|
578
623
|
return Node(
|
|
579
624
|
id=row["id"],
|
|
580
625
|
type=NodeType(row["type"]),
|
|
@@ -585,6 +630,10 @@ def _row_to_node(row: sqlite3.Row) -> Node:
|
|
|
585
630
|
visibility=Visibility(row["visibility"]),
|
|
586
631
|
created_at=row["created_at"],
|
|
587
632
|
updated_at=row["updated_at"],
|
|
633
|
+
style=row["style"] if "style" in keys else None,
|
|
634
|
+
tone=row["tone"] if "tone" in keys else None,
|
|
635
|
+
importance_score=float(row["importance_score"]) if "importance_score" in keys else 0.0,
|
|
636
|
+
last_used=row["last_used"] if "last_used" in keys else None,
|
|
588
637
|
)
|
|
589
638
|
|
|
590
639
|
|
package/knowledge_graph.py
CHANGED
|
@@ -6,6 +6,7 @@ portable database so it can later migrate to Neo4j/Postgres without changing
|
|
|
6
6
|
the ingestion contract.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
+
import asyncio
|
|
9
10
|
import hashlib
|
|
10
11
|
import json
|
|
11
12
|
import logging
|
|
@@ -26,6 +27,12 @@ try:
|
|
|
26
27
|
except Exception: # pragma: no cover - v2 schema is optional at import time
|
|
27
28
|
KGStoreV2 = None # type: ignore[assignment]
|
|
28
29
|
|
|
30
|
+
_llm_router_ref = None
|
|
31
|
+
|
|
32
|
+
def set_llm_router(router_instance):
|
|
33
|
+
global _llm_router_ref
|
|
34
|
+
_llm_router_ref = router_instance
|
|
35
|
+
|
|
29
36
|
|
|
30
37
|
GRAPH_SCHEMA_VERSION = 1
|
|
31
38
|
|
|
@@ -365,6 +372,109 @@ def _chunks(text: str, size: int = 1200, overlap: int = 160) -> List[str]:
|
|
|
365
372
|
return chunks
|
|
366
373
|
|
|
367
374
|
|
|
375
|
+
_LLM_EXTRACT_CONCEPT_PROMPT = """Extract the key concepts from the following text.
|
|
376
|
+
Return ONLY a JSON array of objects, each with "concept" (string) and "importance" (float 0-1).
|
|
377
|
+
Extract up to {limit} concepts. Focus on named entities, technical terms, and domain-specific nouns.
|
|
378
|
+
Do NOT include common words, stop words, or generic terms.
|
|
379
|
+
|
|
380
|
+
Text:
|
|
381
|
+
{text}
|
|
382
|
+
|
|
383
|
+
JSON:"""
|
|
384
|
+
|
|
385
|
+
_LLM_EXTRACT_TRIPLE_PROMPT = """Extract relationship triples from the following text.
|
|
386
|
+
Return ONLY a JSON array of objects, each with:
|
|
387
|
+
- "subject": source concept (string)
|
|
388
|
+
- "relation": relationship verb (string, Korean or English)
|
|
389
|
+
- "object": target concept (string)
|
|
390
|
+
- "evidence": the sentence supporting this triple (string, max 240 chars)
|
|
391
|
+
- "confidence": how confident you are (float 0-1)
|
|
392
|
+
|
|
393
|
+
Extract up to {limit} triples. Focus on meaningful semantic relationships.
|
|
394
|
+
|
|
395
|
+
Text:
|
|
396
|
+
{text}
|
|
397
|
+
|
|
398
|
+
Concepts already identified: {concepts}
|
|
399
|
+
|
|
400
|
+
JSON:"""
|
|
401
|
+
|
|
402
|
+
ENABLE_LLM_EXTRACTION = os.getenv("LATTICEAI_LLM_EXTRACTION", "true").lower() in ("1", "true", "yes")
|
|
403
|
+
|
|
404
|
+
|
|
405
|
+
def _llm_extract_concepts(text: str, limit: int = 12) -> Optional[List[str]]:
|
|
406
|
+
if not ENABLE_LLM_EXTRACTION or not _llm_router_ref:
|
|
407
|
+
return None
|
|
408
|
+
if not _llm_router_ref.current_model_id:
|
|
409
|
+
return None
|
|
410
|
+
prompt = _LLM_EXTRACT_CONCEPT_PROMPT.format(text=text[:3000], limit=limit)
|
|
411
|
+
try:
|
|
412
|
+
loop = asyncio.get_event_loop()
|
|
413
|
+
if loop.is_running():
|
|
414
|
+
import concurrent.futures
|
|
415
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
|
416
|
+
future = pool.submit(asyncio.run, _llm_router_ref.generate(prompt, max_tokens=1024, temperature=0.1))
|
|
417
|
+
raw = future.result(timeout=30)
|
|
418
|
+
else:
|
|
419
|
+
raw = asyncio.run(_llm_router_ref.generate(prompt, max_tokens=1024, temperature=0.1))
|
|
420
|
+
raw = raw.strip()
|
|
421
|
+
if raw.startswith("```"):
|
|
422
|
+
raw = re.sub(r"^```(?:json)?\s*", "", raw)
|
|
423
|
+
raw = re.sub(r"\s*```$", "", raw)
|
|
424
|
+
parsed = json.loads(raw)
|
|
425
|
+
if isinstance(parsed, list):
|
|
426
|
+
concepts = []
|
|
427
|
+
for item in parsed[:limit]:
|
|
428
|
+
if isinstance(item, dict) and "concept" in item:
|
|
429
|
+
concepts.append(item["concept"])
|
|
430
|
+
elif isinstance(item, str):
|
|
431
|
+
concepts.append(item)
|
|
432
|
+
return concepts if concepts else None
|
|
433
|
+
except Exception as e:
|
|
434
|
+
logging.debug("LLM concept extraction failed (falling back to rules): %s", e)
|
|
435
|
+
return None
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
def _llm_extract_triples(text: str, concepts: List[str], limit: int = 20) -> Optional[List[Dict[str, str]]]:
|
|
439
|
+
if not ENABLE_LLM_EXTRACTION or not _llm_router_ref:
|
|
440
|
+
return None
|
|
441
|
+
if not _llm_router_ref.current_model_id:
|
|
442
|
+
return None
|
|
443
|
+
prompt = _LLM_EXTRACT_TRIPLE_PROMPT.format(
|
|
444
|
+
text=text[:3000], limit=limit,
|
|
445
|
+
concepts=", ".join(concepts[:15]),
|
|
446
|
+
)
|
|
447
|
+
try:
|
|
448
|
+
loop = asyncio.get_event_loop()
|
|
449
|
+
if loop.is_running():
|
|
450
|
+
import concurrent.futures
|
|
451
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool:
|
|
452
|
+
future = pool.submit(asyncio.run, _llm_router_ref.generate(prompt, max_tokens=2048, temperature=0.1))
|
|
453
|
+
raw = future.result(timeout=30)
|
|
454
|
+
else:
|
|
455
|
+
raw = asyncio.run(_llm_router_ref.generate(prompt, max_tokens=2048, temperature=0.1))
|
|
456
|
+
raw = raw.strip()
|
|
457
|
+
if raw.startswith("```"):
|
|
458
|
+
raw = re.sub(r"^```(?:json)?\s*", "", raw)
|
|
459
|
+
raw = re.sub(r"\s*```$", "", raw)
|
|
460
|
+
parsed = json.loads(raw)
|
|
461
|
+
if isinstance(parsed, list):
|
|
462
|
+
triples = []
|
|
463
|
+
for item in parsed[:limit]:
|
|
464
|
+
if isinstance(item, dict) and "subject" in item and "object" in item:
|
|
465
|
+
triples.append({
|
|
466
|
+
"subject": str(item["subject"]),
|
|
467
|
+
"relation": str(item.get("relation", "관련됨")),
|
|
468
|
+
"object": str(item["object"]),
|
|
469
|
+
"context": str(item.get("evidence", ""))[:240],
|
|
470
|
+
"confidence": float(item.get("confidence", 0.8)),
|
|
471
|
+
})
|
|
472
|
+
return triples if triples else None
|
|
473
|
+
except Exception as e:
|
|
474
|
+
logging.debug("LLM triple extraction failed (falling back to rules): %s", e)
|
|
475
|
+
return None
|
|
476
|
+
|
|
477
|
+
|
|
368
478
|
_CONCEPT_STOP: set = {
|
|
369
479
|
# English stop words
|
|
370
480
|
"the", "and", "for", "with", "this", "that", "from", "into", "which",
|
|
@@ -385,7 +495,15 @@ _CONCEPT_STOP: set = {
|
|
|
385
495
|
|
|
386
496
|
|
|
387
497
|
def _extract_concepts(text: str, limit: int = 12) -> List[str]:
|
|
388
|
-
"""
|
|
498
|
+
"""LLM-first concept extraction with rule-based fallback."""
|
|
499
|
+
llm_result = _llm_extract_concepts(text, limit)
|
|
500
|
+
if llm_result:
|
|
501
|
+
return llm_result
|
|
502
|
+
return _extract_concepts_rules(text, limit)
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def _extract_concepts_rules(text: str, limit: int = 12) -> List[str]:
|
|
506
|
+
"""Extract meaningful named concepts from text (rule-based).
|
|
389
507
|
|
|
390
508
|
Priority order:
|
|
391
509
|
1. Backtick / quoted terms (explicitly technical)
|
|
@@ -586,7 +704,19 @@ def _extract_triples(
|
|
|
586
704
|
concepts: List[str],
|
|
587
705
|
limit: int = 20,
|
|
588
706
|
) -> List[Dict[str, str]]:
|
|
589
|
-
"""
|
|
707
|
+
"""LLM-first triple extraction with rule-based fallback."""
|
|
708
|
+
llm_result = _llm_extract_triples(text, concepts, limit)
|
|
709
|
+
if llm_result:
|
|
710
|
+
return llm_result
|
|
711
|
+
return _extract_triples_rules(text, concepts, limit)
|
|
712
|
+
|
|
713
|
+
|
|
714
|
+
def _extract_triples_rules(
|
|
715
|
+
text: str,
|
|
716
|
+
concepts: List[str],
|
|
717
|
+
limit: int = 20,
|
|
718
|
+
) -> List[Dict[str, str]]:
|
|
719
|
+
"""Extract (subject, verb-edge, object, context) triples from text (rule-based).
|
|
590
720
|
|
|
591
721
|
For each sentence containing ≥2 concepts, infer the verb-form edge label
|
|
592
722
|
from surrounding context and create a directed triple.
|
|
@@ -2810,3 +2940,170 @@ class KnowledgeGraphStore:
|
|
|
2810
2940
|
"local_file_status": local_file_status,
|
|
2811
2941
|
"v2": v2,
|
|
2812
2942
|
}
|
|
2943
|
+
|
|
2944
|
+
def search_for_document_generation(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
|
2945
|
+
"""Hybrid retrieval optimized for document generation.
|
|
2946
|
+
|
|
2947
|
+
Scoring: 0.5*text_relevance + 0.3*graph_relationship + 0.2*recency
|
|
2948
|
+
Returns nodes with rich context for document generation prompts.
|
|
2949
|
+
"""
|
|
2950
|
+
query = str(query or "").strip()
|
|
2951
|
+
if not query:
|
|
2952
|
+
return []
|
|
2953
|
+
limit = max(1, min(int(limit or 10), 50))
|
|
2954
|
+
terms = _topic_candidates(query, limit=12)
|
|
2955
|
+
now = datetime.now()
|
|
2956
|
+
|
|
2957
|
+
with self._connect() as conn:
|
|
2958
|
+
candidate_rows = []
|
|
2959
|
+
seen_ids = set()
|
|
2960
|
+
|
|
2961
|
+
if query:
|
|
2962
|
+
q = f"%{query}%"
|
|
2963
|
+
rows = conn.execute(
|
|
2964
|
+
"""
|
|
2965
|
+
SELECT id, type, title, summary, metadata_json, updated_at
|
|
2966
|
+
FROM nodes
|
|
2967
|
+
WHERE (title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)
|
|
2968
|
+
AND type IN ('Document', 'File', 'CodeFile', 'SlideDeck',
|
|
2969
|
+
'Spreadsheet', 'Image', 'ImageText', 'Chat',
|
|
2970
|
+
'Decision', 'Task', 'Concept', 'Feature',
|
|
2971
|
+
'Page', 'Slide')
|
|
2972
|
+
ORDER BY updated_at DESC
|
|
2973
|
+
LIMIT ?
|
|
2974
|
+
""",
|
|
2975
|
+
(q, q, q, limit * 5),
|
|
2976
|
+
).fetchall()
|
|
2977
|
+
for row in rows:
|
|
2978
|
+
if row["id"] not in seen_ids:
|
|
2979
|
+
seen_ids.add(row["id"])
|
|
2980
|
+
candidate_rows.append(row)
|
|
2981
|
+
|
|
2982
|
+
for term in terms:
|
|
2983
|
+
t = f"%{term}%"
|
|
2984
|
+
rows = conn.execute(
|
|
2985
|
+
"""
|
|
2986
|
+
SELECT id, type, title, summary, metadata_json, updated_at
|
|
2987
|
+
FROM nodes
|
|
2988
|
+
WHERE (title LIKE ? OR summary LIKE ? OR metadata_json LIKE ?)
|
|
2989
|
+
AND type IN ('Document', 'File', 'CodeFile', 'SlideDeck',
|
|
2990
|
+
'Spreadsheet', 'Image', 'ImageText', 'Chat',
|
|
2991
|
+
'Decision', 'Task', 'Concept', 'Feature',
|
|
2992
|
+
'Page', 'Slide')
|
|
2993
|
+
ORDER BY updated_at DESC
|
|
2994
|
+
LIMIT ?
|
|
2995
|
+
""",
|
|
2996
|
+
(t, t, t, limit * 3),
|
|
2997
|
+
).fetchall()
|
|
2998
|
+
for row in rows:
|
|
2999
|
+
if row["id"] not in seen_ids:
|
|
3000
|
+
seen_ids.add(row["id"])
|
|
3001
|
+
candidate_rows.append(row)
|
|
3002
|
+
|
|
3003
|
+
scored_results = []
|
|
3004
|
+
for row in candidate_rows:
|
|
3005
|
+
haystack = f"{row['title']} {row['summary']} {row['metadata_json']}".lower()
|
|
3006
|
+
|
|
3007
|
+
text_hits = sum(1 for term in terms if term.lower() in haystack)
|
|
3008
|
+
text_score = min(1.0, text_hits / max(len(terms), 1))
|
|
3009
|
+
|
|
3010
|
+
edge_count = conn.execute(
|
|
3011
|
+
"SELECT COUNT(*) AS c FROM edges WHERE from_node=? OR to_node=?",
|
|
3012
|
+
(row["id"], row["id"]),
|
|
3013
|
+
).fetchone()["c"]
|
|
3014
|
+
graph_score = min(1.0, math.log1p(edge_count) / 4.0)
|
|
3015
|
+
|
|
3016
|
+
recency = _recency_score(row["updated_at"], now=now, half_life_days=14.0)
|
|
3017
|
+
|
|
3018
|
+
doc_type_boost = 1.2 if row["type"] in (
|
|
3019
|
+
"Document", "File", "SlideDeck", "Decision",
|
|
3020
|
+
) else 1.0
|
|
3021
|
+
|
|
3022
|
+
hybrid_score = (
|
|
3023
|
+
0.5 * text_score
|
|
3024
|
+
+ 0.3 * graph_score
|
|
3025
|
+
+ 0.2 * recency
|
|
3026
|
+
) * doc_type_boost
|
|
3027
|
+
|
|
3028
|
+
meta = _safe_loads(row["metadata_json"])
|
|
3029
|
+
neighbor_concepts = []
|
|
3030
|
+
neighbor_rows = conn.execute(
|
|
3031
|
+
"""
|
|
3032
|
+
SELECT n.title, n.type FROM edges e
|
|
3033
|
+
JOIN nodes n ON n.id = CASE WHEN e.from_node = ? THEN e.to_node ELSE e.from_node END
|
|
3034
|
+
WHERE (e.from_node = ? OR e.to_node = ?)
|
|
3035
|
+
AND n.type IN ('Concept', 'Feature', 'Decision', 'Task')
|
|
3036
|
+
LIMIT 8
|
|
3037
|
+
""",
|
|
3038
|
+
(row["id"], row["id"], row["id"]),
|
|
3039
|
+
).fetchall()
|
|
3040
|
+
for nr in neighbor_rows:
|
|
3041
|
+
neighbor_concepts.append({"title": nr["title"], "type": nr["type"]})
|
|
3042
|
+
|
|
3043
|
+
scored_results.append({
|
|
3044
|
+
"id": row["id"],
|
|
3045
|
+
"type": row["type"],
|
|
3046
|
+
"title": row["title"],
|
|
3047
|
+
"summary": row["summary"],
|
|
3048
|
+
"metadata": meta,
|
|
3049
|
+
"updated_at": row["updated_at"],
|
|
3050
|
+
"hybrid_score": round(hybrid_score, 4),
|
|
3051
|
+
"scores": {
|
|
3052
|
+
"text": round(text_score, 4),
|
|
3053
|
+
"graph": round(graph_score, 4),
|
|
3054
|
+
"recency": round(recency, 4),
|
|
3055
|
+
},
|
|
3056
|
+
"related_concepts": neighbor_concepts,
|
|
3057
|
+
})
|
|
3058
|
+
|
|
3059
|
+
scored_results.sort(key=lambda x: x["hybrid_score"], reverse=True)
|
|
3060
|
+
return scored_results[:limit]
|
|
3061
|
+
|
|
3062
|
+
def multi_hop_context(self, node_ids: List[str], max_hops: int = 2) -> Dict[str, Any]:
|
|
3063
|
+
"""Multi-hop graph traversal from seed nodes for richer context."""
|
|
3064
|
+
visited_nodes = set()
|
|
3065
|
+
visited_edges = set()
|
|
3066
|
+
all_nodes = []
|
|
3067
|
+
all_edges = []
|
|
3068
|
+
frontier = set(node_ids)
|
|
3069
|
+
|
|
3070
|
+
with self._connect() as conn:
|
|
3071
|
+
for hop in range(max_hops):
|
|
3072
|
+
if not frontier:
|
|
3073
|
+
break
|
|
3074
|
+
next_frontier = set()
|
|
3075
|
+
for nid in frontier:
|
|
3076
|
+
if nid in visited_nodes:
|
|
3077
|
+
continue
|
|
3078
|
+
visited_nodes.add(nid)
|
|
3079
|
+
row = conn.execute(
|
|
3080
|
+
"SELECT id, type, title, summary, metadata_json, updated_at FROM nodes WHERE id=?",
|
|
3081
|
+
(nid,),
|
|
3082
|
+
).fetchone()
|
|
3083
|
+
if row:
|
|
3084
|
+
all_nodes.append({
|
|
3085
|
+
"id": row["id"], "type": row["type"],
|
|
3086
|
+
"title": row["title"], "summary": row["summary"],
|
|
3087
|
+
"metadata": _safe_loads(row["metadata_json"]),
|
|
3088
|
+
"hop": hop,
|
|
3089
|
+
})
|
|
3090
|
+
edge_rows = conn.execute(
|
|
3091
|
+
"""
|
|
3092
|
+
SELECT id, from_node, to_node, type, weight
|
|
3093
|
+
FROM edges WHERE from_node=? OR to_node=?
|
|
3094
|
+
""",
|
|
3095
|
+
(nid, nid),
|
|
3096
|
+
).fetchall()
|
|
3097
|
+
for er in edge_rows:
|
|
3098
|
+
if er["id"] not in visited_edges:
|
|
3099
|
+
visited_edges.add(er["id"])
|
|
3100
|
+
all_edges.append({
|
|
3101
|
+
"from": er["from_node"], "to": er["to_node"],
|
|
3102
|
+
"type": er["type"], "weight": er["weight"],
|
|
3103
|
+
})
|
|
3104
|
+
other = er["to_node"] if er["from_node"] == nid else er["from_node"]
|
|
3105
|
+
if other not in visited_nodes:
|
|
3106
|
+
next_frontier.add(other)
|
|
3107
|
+
frontier = next_frontier
|
|
3108
|
+
|
|
3109
|
+
return {"nodes": all_nodes, "edges": all_edges}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
"""Core utilities: security, sessions, audit."""
|
|
1
|
+
"""Core utilities: security, sessions, audit, context_builder, document_generator."""
|