ltcai 0.1.24 → 0.1.25
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/docs/CHANGELOG.md +37 -0
- package/knowledge_graph.py +442 -87
- package/ltcai_cli.py +19 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -294,7 +294,7 @@ Or: `./start_ai.sh` (auto-restart + caffeinate)
|
|
|
294
294
|
| VS Code Marketplace | [marketplace.visualstudio.com](https://marketplace.visualstudio.com/items?itemName=parktaesoo.ltcai) |
|
|
295
295
|
| Open VSX | [open-vsx.org](https://open-vsx.org/extension/parktaesoo/ltcai) |
|
|
296
296
|
|
|
297
|
-
Current version: **0.1.
|
|
297
|
+
Current version: **0.1.25** — [Changelog](docs/CHANGELOG.md)
|
|
298
298
|
|
|
299
299
|
---
|
|
300
300
|
|
package/docs/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,42 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.1.25] - 2026-05-24
|
|
4
|
+
|
|
5
|
+
### Knowledge Graph 전면 재설계 — 점=명사, 선=동사
|
|
6
|
+
|
|
7
|
+
**설계 원칙**
|
|
8
|
+
- **점(Node) = 명사** — 의미 있는 대상 (문서, 사람, 개념, 에러, 코드, 채팅 등)
|
|
9
|
+
- **선(Edge) = 동사** — 대상 간의 관계 (언급함, 포함함, 해결함, 의존함 등)
|
|
10
|
+
- 원본 데이터(PDF·PPT·채팅·코드 등)는 그대로 보관, AI가 핵심 개념을 추출해 점으로 만들고 관계를 선으로 연결
|
|
11
|
+
|
|
12
|
+
**노드 타입 (점 = 명사)**
|
|
13
|
+
- `Chat` — 대화 세션
|
|
14
|
+
- `Document` — 파일 (PDF·PPT·Word·Excel·이미지)
|
|
15
|
+
- `Concept` — 개념·아이디어·기술 용어
|
|
16
|
+
- `Person` — 사람 (사용자, 언급된 인물)
|
|
17
|
+
- `Error` — 오류·버그·예외
|
|
18
|
+
- `Code` — 코드·함수·클래스
|
|
19
|
+
- `Feature` — 소프트웨어 기능
|
|
20
|
+
- `Task` — 할 일·액션 아이템
|
|
21
|
+
- `Decision` — 결정 사항
|
|
22
|
+
|
|
23
|
+
**엣지 어휘 (선 = 동사형)**
|
|
24
|
+
`언급함` · `포함함` · `해결함` · `의존함` · `설명함` · `비교함` · `사용함` · `연결함` · `확장함` · `생성함` · `대체함` · `지원함` · `발생함` · `관련됨` · `작성함` · `업로드함`
|
|
25
|
+
|
|
26
|
+
**핵심 개선**
|
|
27
|
+
- `_extract_concepts()` — 고유명사·복합어·기술 용어 추출 (Lattice AI, Graph RAG, VS Code 등)
|
|
28
|
+
- `_classify_node_type()` — 개념별 노드 타입 자동 분류 (윈도우 컨텍스트 기반)
|
|
29
|
+
- `_infer_edge()` — 문장 내 동사·조사 패턴으로 엣지 레이블 자동 결정
|
|
30
|
+
- `_extract_triples()` — 문장 단위 개념 쌍 → (주어, 동사, 목적어) 트리플 추출
|
|
31
|
+
- `ingest_message()` 재설계 — 메시지 단위 → 대화 세션(Chat) 단위 노드
|
|
32
|
+
- `ingest_document()` 재설계 — Document 노드 + 동사형 엣지 (포함함, 업로드함)
|
|
33
|
+
- 중복 제거 — 하위 개념이 상위 복합어에 완전히 흡수될 때만 제거
|
|
34
|
+
- Message·AIResponse·Chunk 노드는 RAG 검색용으로만 저장, 그래프 비표시
|
|
35
|
+
|
|
36
|
+
### Release
|
|
37
|
+
- 배포 버전을 `0.1.25`로 상향
|
|
38
|
+
- 대상 채널: `npm` · `PyPI` · `VS Code Marketplace` · `Open VSX`
|
|
39
|
+
|
|
3
40
|
## [0.1.24] - 2026-05-24
|
|
4
41
|
|
|
5
42
|
### 안정화 및 UX 개선
|
package/knowledge_graph.py
CHANGED
|
@@ -94,31 +94,274 @@ def _chunks(text: str, size: int = 1200, overlap: int = 160) -> List[str]:
|
|
|
94
94
|
return chunks
|
|
95
95
|
|
|
96
96
|
|
|
97
|
-
|
|
97
|
+
_CONCEPT_STOP: set = {
|
|
98
|
+
# English stop words
|
|
99
|
+
"the", "and", "for", "with", "this", "that", "from", "into", "which",
|
|
100
|
+
"are", "was", "were", "has", "have", "had", "can", "will", "would",
|
|
101
|
+
"could", "should", "may", "might", "must", "shall", "being", "been",
|
|
102
|
+
"also", "just", "then", "than", "when", "where", "what", "how", "why",
|
|
103
|
+
"its", "their", "your", "our", "you", "they", "them", "these", "those",
|
|
104
|
+
"use", "used", "using", "based", "like", "such", "via", "per", "let",
|
|
105
|
+
"yes", "not", "but", "are", "all", "any", "out", "new", "get", "set",
|
|
106
|
+
# Korean stop words
|
|
107
|
+
"사용자", "내용", "파일", "채팅", "답변", "입니다", "그리고", "처럼",
|
|
108
|
+
"있어", "없어", "이야", "이다", "한다", "하다", "되다", "됩니다",
|
|
109
|
+
"경우", "방법", "부분", "상태", "정도", "결과", "이후", "이전",
|
|
110
|
+
"그것", "이것", "저것", "여기", "거기", "저기", "우리", "저희",
|
|
111
|
+
"기능", "서버", "모델", "설정", "설명", "버전", "지원", "사용", "실행",
|
|
112
|
+
"todo", "fixme", "note", "참고", "주의", "warning",
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _extract_concepts(text: str, limit: int = 12) -> List[str]:
|
|
117
|
+
"""Extract meaningful named concepts from text.
|
|
118
|
+
|
|
119
|
+
Priority order:
|
|
120
|
+
1. Backtick / quoted terms (explicitly technical)
|
|
121
|
+
2. Multi-word proper nouns (Lattice AI, GPT-4o, Claude Sonnet)
|
|
122
|
+
3. Single capitalized proper nouns not at sentence start (Claude, Python, FastAPI)
|
|
123
|
+
4. Korean compound technical terms (멀티모달, 에이전트, 그래프RAG)
|
|
124
|
+
5. Hyphenated / versioned identifiers (gpt-4o, mlx-lm, llama-3.3)
|
|
125
|
+
"""
|
|
98
126
|
text = str(text or "")
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
127
|
+
seen: dict = {} # concept_lower → original form
|
|
128
|
+
|
|
129
|
+
def _add(term: str) -> None:
|
|
130
|
+
key = term.strip().lower()
|
|
131
|
+
if (
|
|
132
|
+
key
|
|
133
|
+
and key not in _CONCEPT_STOP
|
|
134
|
+
and not key.isdigit()
|
|
135
|
+
and len(key) >= 2
|
|
136
|
+
):
|
|
137
|
+
seen.setdefault(key, term.strip())
|
|
138
|
+
|
|
139
|
+
# 1. Backtick-quoted code/term (highest confidence)
|
|
140
|
+
for m in re.findall(r'`([^`]{2,40})`', text):
|
|
141
|
+
if not re.search(r'[\(\)\[\]{}]', m): # skip code expressions
|
|
142
|
+
_add(m)
|
|
143
|
+
|
|
144
|
+
# 2. Double/single quoted terms
|
|
145
|
+
for m in re.findall(r'"([^"]{2,40})"', text):
|
|
146
|
+
_add(m)
|
|
147
|
+
|
|
148
|
+
# 3. Multi-word English proper nouns (Title Case or ALL-CAPS first word, 2–4 words).
|
|
149
|
+
# Pattern A: Mixed-case first word — "Lattice AI", "Tool Use", "Graph RAG"
|
|
150
|
+
for m in re.findall(
|
|
151
|
+
r'([A-Z][a-z]{1,20}(?:\s+(?:[A-Z]{2,10}|[A-Z][a-z0-9]{1,20}|\d[\w.]{0,6})){1,3})',
|
|
152
|
+
text,
|
|
153
|
+
):
|
|
154
|
+
_add(m)
|
|
155
|
+
# Pattern B: ALL-CAPS first word — "VS Code", "MCP Server", "GPT-4o Mini"
|
|
156
|
+
for m in re.findall(
|
|
157
|
+
r'([A-Z]{2,6}(?:\s+(?:[A-Z]{2,10}|[A-Z][a-z0-9]{1,20})){1,2})',
|
|
158
|
+
text,
|
|
159
|
+
):
|
|
160
|
+
_add(m)
|
|
161
|
+
|
|
162
|
+
# 4. Single capitalized proper noun.
|
|
163
|
+
# Use ASCII-boundary lookaround instead of \b so Korean particles
|
|
164
|
+
# (와, 의, 는 …) after an English word don't block the match.
|
|
165
|
+
all_caps_words = re.findall(r'(?<![A-Za-z0-9])([A-Z][A-Za-z0-9]{2,24})(?![A-Za-z0-9])', text)
|
|
166
|
+
freq: Dict[str, int] = {}
|
|
167
|
+
for w in all_caps_words:
|
|
168
|
+
freq[w] = freq.get(w, 0) + 1
|
|
169
|
+
sentence_starts = set(re.findall(r'(?:^|(?<=[.!?])\s+)([A-Z][a-z]+)', text))
|
|
170
|
+
for m, cnt in freq.items():
|
|
171
|
+
if m.lower() in _CONCEPT_STOP:
|
|
172
|
+
continue
|
|
173
|
+
if cnt >= 2 or m not in sentence_starts:
|
|
174
|
+
_add(m)
|
|
175
|
+
|
|
176
|
+
# 5. Korean technical compound nouns (3–12 chars, no common particles)
|
|
177
|
+
for m in re.findall(r'[가-힣]{2,12}(?:AI|LLM|API|UI|RAG|bot|Bot|기능|모델|서버|에이전트|파이프라인|워크플로)', text):
|
|
178
|
+
_add(m)
|
|
179
|
+
# Korean standalone terms that appear after topic markers (은/는/이/가 앞)
|
|
180
|
+
for m in re.findall(r'([가-힣]{2,12})(?:은|는|이|가|을|를|의|에서|으로|와|과)', text):
|
|
181
|
+
if m.lower() not in _CONCEPT_STOP and len(m) >= 2:
|
|
182
|
+
# Only add if it's non-trivial (has 3+ chars or appears multiple times)
|
|
183
|
+
cnt = text.count(m)
|
|
184
|
+
if len(m) >= 3 or cnt >= 2:
|
|
185
|
+
_add(m)
|
|
186
|
+
|
|
187
|
+
# 6. Hyphenated / versioned identifiers (gpt-4o, llama-3.3, mlx-lm)
|
|
188
|
+
for m in re.findall(r'\b([a-zA-Z][a-zA-Z0-9]*(?:-[a-zA-Z0-9.]+)+)\b', text):
|
|
189
|
+
if len(m) >= 4:
|
|
190
|
+
_add(m)
|
|
191
|
+
|
|
192
|
+
# De-duplicate: remove shorter if ALL its occurrences in the source text
|
|
193
|
+
# are followed immediately by the suffix that forms the longer concept.
|
|
194
|
+
# "Lattice" → dropped when every occurrence is "Lattice AI"
|
|
195
|
+
# "Claude" → kept because it appears as just "Claude" too.
|
|
196
|
+
values = list(seen.values())
|
|
197
|
+
values_lower = [v.lower() for v in values]
|
|
198
|
+
keep = set(range(len(values)))
|
|
199
|
+
for i, v in enumerate(values):
|
|
200
|
+
vl = v.lower()
|
|
201
|
+
for j, wl in enumerate(values_lower):
|
|
202
|
+
if i == j or j not in keep:
|
|
203
|
+
continue
|
|
204
|
+
# Check if vl is a word-prefix of wl
|
|
205
|
+
suffix = wl[len(vl):]
|
|
206
|
+
if not (wl.startswith(vl) and re.match(r'^[\s\-]', suffix)):
|
|
112
207
|
continue
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
208
|
+
# Count occurrences of v NOT followed by the suffix
|
|
209
|
+
suffix_stripped = suffix.lstrip(" -")
|
|
210
|
+
# Escape for regex
|
|
211
|
+
pattern_with_suffix = re.escape(v) + r'[\s\-]+' + re.escape(suffix_stripped)
|
|
212
|
+
pattern_alone = re.escape(v) + r'(?![\s\-]*' + re.escape(suffix_stripped) + r')'
|
|
213
|
+
alone_count = len(re.findall(pattern_alone, text, re.IGNORECASE))
|
|
214
|
+
if alone_count == 0:
|
|
215
|
+
# Shorter term never appears alone → safe to remove
|
|
216
|
+
keep.discard(i)
|
|
217
|
+
break
|
|
218
|
+
|
|
219
|
+
final = [values[i] for i in range(len(values)) if i in keep]
|
|
220
|
+
return final[:limit]
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
224
|
+
# Node type taxonomy (점 = 명사)
|
|
225
|
+
# ──────────────────────────────────────────────────────────────────────────────
|
|
226
|
+
# Chat — 대화 세션
|
|
227
|
+
# Document — 파일 (PDF·PPT·Word·Excel·이미지 등)
|
|
228
|
+
# Concept — 개념·아이디어·기술 용어
|
|
229
|
+
# Person — 사람 (사용자, 언급된 인물)
|
|
230
|
+
# Error — 오류·버그·예외
|
|
231
|
+
# Code — 코드 스니펫·함수·클래스
|
|
232
|
+
# Feature — 소프트웨어 기능
|
|
233
|
+
# Task — 할 일·액션 아이템
|
|
234
|
+
# Decision — 결정 사항
|
|
235
|
+
|
|
236
|
+
# Edge type vocabulary (선 = 동사 — 과거형 서술어)
|
|
237
|
+
EDGE_VERB = {
|
|
238
|
+
"언급함": r"언급|mention|refer|cited",
|
|
239
|
+
"포함함": r"포함|include|consist|구성|탑재|contains",
|
|
240
|
+
"해결함": r"해결|resolv|fix|수정|고쳤|closed",
|
|
241
|
+
"의존함": r"의존|depend|require|필요|based on",
|
|
242
|
+
"설명함": r"설명|explain|describe|정의|란|이란|means",
|
|
243
|
+
"비교함": r"비교|versus|vs\.?|차이|다르|compare",
|
|
244
|
+
"사용함": r"사용|use|활용|이용|apply",
|
|
245
|
+
"연결함": r"연결|connect|통합|integrate|연동|link",
|
|
246
|
+
"확장함": r"확장|extend|플러그인|plugin|addon",
|
|
247
|
+
"생성함": r"생성|만들|create|generate|build|produced",
|
|
248
|
+
"대체함": r"대체|replace|instead|alternative",
|
|
249
|
+
"지원함": r"지원|support|제공|provide|offer",
|
|
250
|
+
"발생함": r"발생|occur|throw|raise|triggered",
|
|
251
|
+
"관련됨": r"관련|related|associated|연관",
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def _infer_edge(sentence: str) -> str:
|
|
256
|
+
"""Return the best-matching verb-form edge label for a sentence."""
|
|
257
|
+
s = sentence.lower()
|
|
258
|
+
for label, pattern in EDGE_VERB.items():
|
|
259
|
+
if re.search(pattern, s):
|
|
260
|
+
return label
|
|
261
|
+
return "관련됨"
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
# Technical words that cannot be person names
|
|
265
|
+
_NOT_PERSON_WORDS: set = {
|
|
266
|
+
"use", "api", "rag", "sdk", "ide", "cli", "llm", "mcp", "ui", "ux",
|
|
267
|
+
"new", "old", "get", "set", "run", "add", "fix", "tool", "code",
|
|
268
|
+
"base", "core", "data", "file", "test", "type", "mode", "view",
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
|
|
272
|
+
def _classify_node_type(concept: str, text: str) -> str:
|
|
273
|
+
"""Classify a concept into the node taxonomy.
|
|
274
|
+
|
|
275
|
+
Term-level signals take priority; then a tight ±60-char window is used
|
|
276
|
+
so distant keywords don't cause mis-classification.
|
|
277
|
+
"""
|
|
278
|
+
term = concept.lower()
|
|
279
|
+
|
|
280
|
+
# ── Term-level signals (highest confidence) ───────────────────────────
|
|
281
|
+
if re.search(r'(?:error|exception|traceback|오류|에러|버그)$', term, re.I):
|
|
282
|
+
return "Error"
|
|
283
|
+
if re.search(r'error|exception|err\b', term, re.I) and len(concept) < 30:
|
|
284
|
+
return "Error"
|
|
285
|
+
if re.search(r'\(\)|\.py$|\.js$|\.ts$|\.go$|::\w', term):
|
|
286
|
+
return "Code"
|
|
287
|
+
|
|
288
|
+
# Person: "First Last" pattern, neither word is a known technical term
|
|
289
|
+
if re.match(r'^[A-Z][a-z]{1,15} [A-Z][a-z]{1,15}$', concept):
|
|
290
|
+
words = term.split()
|
|
291
|
+
if not any(w in _NOT_PERSON_WORDS for w in words):
|
|
292
|
+
return "Person"
|
|
293
|
+
|
|
294
|
+
# ── Windowed context (±60 chars) — NOT used for Error to avoid false positives
|
|
295
|
+
idx = text.lower().find(term)
|
|
296
|
+
if idx >= 0:
|
|
297
|
+
win = text[max(0, idx - 60): idx + len(concept) + 60].lower()
|
|
298
|
+
if re.search(r'def |class |function|함수|클래스|메서드|import', win):
|
|
299
|
+
return "Code"
|
|
300
|
+
# Feature: concept appears DIRECTLY adjacent to 기능/feature keyword
|
|
301
|
+
if (
|
|
302
|
+
len(concept) <= 12
|
|
303
|
+
and re.search(
|
|
304
|
+
rf'{re.escape(term)}.{{0,8}}(?:기능|feature)|(?:기능|feature).{{0,8}}{re.escape(term)}',
|
|
305
|
+
win,
|
|
306
|
+
)
|
|
307
|
+
):
|
|
308
|
+
return "Feature"
|
|
309
|
+
|
|
310
|
+
return "Concept"
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def _extract_triples(
|
|
314
|
+
text: str,
|
|
315
|
+
concepts: List[str],
|
|
316
|
+
limit: int = 20,
|
|
317
|
+
) -> List[Dict[str, str]]:
|
|
318
|
+
"""Extract (subject, verb-edge, object, context) triples from text.
|
|
319
|
+
|
|
320
|
+
For each sentence containing ≥2 concepts, infer the verb-form edge label
|
|
321
|
+
from surrounding context and create a directed triple.
|
|
322
|
+
"""
|
|
323
|
+
if len(concepts) < 2:
|
|
324
|
+
return []
|
|
325
|
+
|
|
326
|
+
concept_lower = {c.lower(): c for c in concepts}
|
|
327
|
+
triples: List[Dict[str, str]] = []
|
|
328
|
+
seen_pairs: set = set()
|
|
329
|
+
|
|
330
|
+
# Split on sentence boundaries
|
|
331
|
+
sentences = re.split(r'(?<=[.!?\n])\s+|\n{2,}', text)
|
|
332
|
+
for sent in sentences:
|
|
333
|
+
sent = sent.strip()
|
|
334
|
+
if len(sent) < 8:
|
|
335
|
+
continue
|
|
336
|
+
sent_lower = sent.lower()
|
|
337
|
+
|
|
338
|
+
present = [concept_lower[k] for k in concept_lower if k in sent_lower]
|
|
339
|
+
if len(present) < 2:
|
|
340
|
+
continue
|
|
341
|
+
|
|
342
|
+
edge = _infer_edge(sent)
|
|
343
|
+
|
|
344
|
+
for i in range(len(present) - 1):
|
|
345
|
+
subj, obj = present[i], present[i + 1]
|
|
346
|
+
# Deduplicate by (subj, obj) regardless of direction for same edge
|
|
347
|
+
pair_key = tuple(sorted([subj.lower(), obj.lower()])) + (edge,)
|
|
348
|
+
if pair_key in seen_pairs:
|
|
349
|
+
continue
|
|
350
|
+
seen_pairs.add(pair_key)
|
|
351
|
+
triples.append({
|
|
352
|
+
"subject": subj,
|
|
353
|
+
"relation": edge, # verb form (동사)
|
|
354
|
+
"object": obj,
|
|
355
|
+
"context": sent[:240],
|
|
356
|
+
})
|
|
357
|
+
if len(triples) >= limit:
|
|
358
|
+
return triples
|
|
359
|
+
|
|
360
|
+
return triples
|
|
118
361
|
|
|
119
362
|
|
|
120
363
|
def _semantic_items(text: str) -> List[Dict[str, str]]:
|
|
121
|
-
"""
|
|
364
|
+
"""Extract explicit decision / task items from text."""
|
|
122
365
|
items: List[Dict[str, str]] = []
|
|
123
366
|
for raw_line in str(text or "").splitlines():
|
|
124
367
|
line = _clean_text(raw_line)
|
|
@@ -129,7 +372,7 @@ def _semantic_items(text: str) -> List[Dict[str, str]]:
|
|
|
129
372
|
items.append({"type": "Decision", "title": line[:120], "summary": line[:500]})
|
|
130
373
|
if re.search(r"(todo|해야|하자|진행|구현|수정|확인|next|task|\[ \])", lowered):
|
|
131
374
|
items.append({"type": "Task", "title": line[:120], "summary": line[:500]})
|
|
132
|
-
return items[:
|
|
375
|
+
return items[:8]
|
|
133
376
|
|
|
134
377
|
|
|
135
378
|
class KnowledgeGraphStore:
|
|
@@ -268,54 +511,108 @@ class KnowledgeGraphStore:
|
|
|
268
511
|
"user_nickname": user_nickname,
|
|
269
512
|
"chars": len(content),
|
|
270
513
|
}
|
|
514
|
+
concepts = _extract_concepts(content)
|
|
515
|
+
triples = _extract_triples(content, concepts)
|
|
516
|
+
semantic = _semantic_items(content)
|
|
517
|
+
|
|
271
518
|
with self._connect() as conn:
|
|
272
|
-
|
|
519
|
+
# ── 1. Chat node (점: 명사 — 대화 세션 단위) ─────────────────────
|
|
520
|
+
# One Chat node per conversation_id; title = first 80 chars of
|
|
521
|
+
# the first user message in this session (updated on each call).
|
|
522
|
+
chat_title = _clean_text(content)[:80] or (conversation_id or "대화")
|
|
523
|
+
self._upsert_node(
|
|
524
|
+
conn, conv_id, "Chat",
|
|
525
|
+
chat_title,
|
|
526
|
+
summary=_clean_text(content)[:400],
|
|
527
|
+
metadata={"source": source, "conversation_id": conversation_id},
|
|
528
|
+
)
|
|
529
|
+
|
|
530
|
+
# ── 2. Person node (점: 명사 — 사람) ─────────────────────────────
|
|
531
|
+
person_id = None
|
|
532
|
+
if user_email or user_nickname:
|
|
533
|
+
person_key = user_email or user_nickname or "unknown"
|
|
534
|
+
person_id = f"person:{_slug(person_key)}"
|
|
535
|
+
self._upsert_node(
|
|
536
|
+
conn, person_id, "Person",
|
|
537
|
+
user_nickname or user_email or "Unknown",
|
|
538
|
+
metadata={"email": user_email, "nickname": user_nickname},
|
|
539
|
+
)
|
|
540
|
+
# 선: 동사 — Person이 Chat을 "작성함"
|
|
541
|
+
self._upsert_edge(conn, person_id, conv_id, "작성함",
|
|
542
|
+
weight=1.0, metadata={"role": role})
|
|
543
|
+
|
|
544
|
+
# ── 3. Raw message node (RAG 검색용, 그래프에서 숨김) ─────────────
|
|
273
545
|
self._upsert_node(
|
|
274
|
-
conn,
|
|
275
|
-
node_id,
|
|
276
|
-
node_type,
|
|
546
|
+
conn, node_id, node_type,
|
|
277
547
|
_clean_text(content)[:80] or role,
|
|
278
548
|
summary=_clean_text(content)[:500],
|
|
279
549
|
metadata=metadata,
|
|
280
550
|
raw=raw or metadata,
|
|
281
551
|
)
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
self._upsert_edge(conn, person_id, node_id, "authored", metadata={"role": role})
|
|
552
|
+
# 선: Chat이 메시지를 "포함함"
|
|
553
|
+
self._upsert_edge(conn, conv_id, node_id, "포함함",
|
|
554
|
+
weight=0.3, metadata={"role": role})
|
|
555
|
+
|
|
556
|
+
# ── 4. RAG chunks (검색용, 그래프에서 숨김) ──────────────────────
|
|
288
557
|
for index, chunk in enumerate(_chunks(content)):
|
|
289
558
|
chunk_id = f"chunk:{_sha256_text(f'{node_id}:{index}:{chunk}')[:24]}"
|
|
290
|
-
self._upsert_node(
|
|
559
|
+
self._upsert_node(
|
|
560
|
+
conn, chunk_id, "Chunk",
|
|
561
|
+
f"chunk {index + 1}",
|
|
562
|
+
summary=chunk[:500],
|
|
563
|
+
metadata={"index": index, "source_node": node_id},
|
|
564
|
+
)
|
|
291
565
|
conn.execute(
|
|
292
|
-
""
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
(chunk_id, node_id, chunk, _json({"index": index, "source_node": node_id}), _now()),
|
|
566
|
+
"INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
|
|
567
|
+
"VALUES (?, ?, ?, ?, ?)",
|
|
568
|
+
(chunk_id, node_id, chunk,
|
|
569
|
+
_json({"index": index, "source_node": node_id}), _now()),
|
|
297
570
|
)
|
|
298
|
-
self._upsert_edge(conn, node_id, chunk_id, "
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
571
|
+
self._upsert_edge(conn, node_id, chunk_id, "포함함")
|
|
572
|
+
|
|
573
|
+
# ── 5. Concept / Feature / Error / Code 노드 (점: 명사) ───────────
|
|
574
|
+
concept_ids: Dict[str, str] = {}
|
|
575
|
+
for concept in concepts:
|
|
576
|
+
node_t = _classify_node_type(concept, content)
|
|
577
|
+
cid = f"{node_t.lower()}:{_slug(concept)}"
|
|
578
|
+
concept_ids[concept.lower()] = cid
|
|
579
|
+
self._upsert_node(
|
|
580
|
+
conn, cid, node_t, concept,
|
|
581
|
+
metadata={"auto_extracted": True, "source": source},
|
|
582
|
+
)
|
|
583
|
+
# 선: Chat이 개념을 "언급함"
|
|
584
|
+
self._upsert_edge(conn, conv_id, cid, "언급함",
|
|
585
|
+
weight=0.7, metadata={"source": source})
|
|
586
|
+
|
|
587
|
+
# ── 6. Concept–Concept 엣지 (선: 동사형) ─────────────────────────
|
|
588
|
+
for triple in triples:
|
|
589
|
+
subj_id = concept_ids.get(triple["subject"].lower())
|
|
590
|
+
obj_id = concept_ids.get(triple["object"].lower())
|
|
591
|
+
if subj_id and obj_id and subj_id != obj_id:
|
|
592
|
+
self._upsert_edge(
|
|
593
|
+
conn, subj_id, obj_id,
|
|
594
|
+
triple["relation"], # 동사형 레이블
|
|
595
|
+
weight=1.0,
|
|
596
|
+
metadata={"context": triple.get("context", "")[:240]},
|
|
597
|
+
)
|
|
598
|
+
|
|
599
|
+
# ── 7. Task / Decision 노드 (점: 명사) ────────────────────────────
|
|
600
|
+
for item in semantic:
|
|
601
|
+
sem_type = item["type"]
|
|
602
|
+
sem_title = item["title"]
|
|
603
|
+
sem_id = f"{sem_type.lower()}:{_sha256_text(f'{conv_id}:{sem_type}:{sem_title}')[:24]}"
|
|
307
604
|
self._upsert_node(
|
|
308
|
-
conn,
|
|
309
|
-
semantic_id,
|
|
310
|
-
semantic_type,
|
|
311
|
-
semantic_title,
|
|
605
|
+
conn, sem_id, sem_type, sem_title,
|
|
312
606
|
summary=item["summary"],
|
|
313
607
|
metadata={"auto_extracted": True, "source_node": node_id},
|
|
314
608
|
raw=item,
|
|
315
609
|
)
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
610
|
+
# 선: Chat이 Task/Decision을 "생성함"
|
|
611
|
+
self._upsert_edge(conn, conv_id, sem_id, "생성함", weight=0.9)
|
|
612
|
+
# Task/Decision이 관련 개념을 "언급함"
|
|
613
|
+
for cid in list(concept_ids.values())[:3]:
|
|
614
|
+
self._upsert_edge(conn, sem_id, cid, "언급함", weight=0.6)
|
|
615
|
+
|
|
319
616
|
return {"node_id": node_id, "type": node_type}
|
|
320
617
|
|
|
321
618
|
def ingest_document(
|
|
@@ -353,43 +650,92 @@ class KnowledgeGraphStore:
|
|
|
353
650
|
"extracted": {k: v for k, v in (extracted or {}).items() if k != "content"},
|
|
354
651
|
"structure": doc_meta,
|
|
355
652
|
}
|
|
653
|
+
full_text = f"{filename}\n{text}"
|
|
654
|
+
concepts = _extract_concepts(full_text, limit=15)
|
|
655
|
+
triples = _extract_triples(full_text, concepts)
|
|
656
|
+
|
|
356
657
|
with self._connect() as conn:
|
|
357
|
-
|
|
658
|
+
# ── Document 노드 (점: 명사 — 파일) ────────────────────────────────
|
|
659
|
+
self._upsert_node(
|
|
660
|
+
conn, file_id, "Document", filename,
|
|
661
|
+
summary=(text or filename)[:500],
|
|
662
|
+
metadata=metadata, raw=metadata,
|
|
663
|
+
)
|
|
358
664
|
self._ingest_structure_nodes(conn, file_id, filename, doc_meta)
|
|
665
|
+
|
|
666
|
+
# ── Person 노드 + 동사형 엣지 ─────────────────────────────────────
|
|
359
667
|
if uploader:
|
|
360
668
|
person_id = f"person:{_slug(uploader)}"
|
|
361
|
-
self._upsert_node(
|
|
362
|
-
|
|
669
|
+
self._upsert_node(
|
|
670
|
+
conn, person_id, "Person", uploader,
|
|
671
|
+
metadata={"email": uploader},
|
|
672
|
+
)
|
|
673
|
+
# 선: 동사 — Person이 Document를 "업로드함"
|
|
674
|
+
self._upsert_edge(conn, person_id, file_id, "업로드함", weight=1.0)
|
|
675
|
+
|
|
676
|
+
# ── Chat 노드와 연결 ──────────────────────────────────────────────
|
|
363
677
|
if conversation_id:
|
|
364
678
|
conv_id = f"conversation:{_slug(conversation_id)}"
|
|
365
|
-
self._upsert_node(conn, conv_id, "
|
|
366
|
-
|
|
679
|
+
self._upsert_node(conn, conv_id, "Chat", conversation_id)
|
|
680
|
+
# 선: 동사 — Chat이 Document를 "언급함"
|
|
681
|
+
self._upsert_edge(conn, conv_id, file_id, "언급함", weight=0.8)
|
|
682
|
+
|
|
683
|
+
# ── RAG chunks (검색용, 그래프 비표시) ────────────────────────────
|
|
367
684
|
for index, chunk in enumerate(_chunks(text)):
|
|
368
685
|
chunk_id = f"chunk:{_sha256_text(f'{file_id}:{index}:{chunk}')[:24]}"
|
|
369
|
-
self._upsert_node(
|
|
686
|
+
self._upsert_node(
|
|
687
|
+
conn, chunk_id, "Chunk",
|
|
688
|
+
f"{filename} chunk {index + 1}",
|
|
689
|
+
summary=chunk[:500],
|
|
690
|
+
metadata={"index": index, "source_node": file_id},
|
|
691
|
+
)
|
|
370
692
|
conn.execute(
|
|
371
|
-
"INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at)
|
|
372
|
-
|
|
693
|
+
"INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
|
|
694
|
+
"VALUES (?, ?, ?, ?, ?)",
|
|
695
|
+
(chunk_id, file_id, chunk,
|
|
696
|
+
_json({"index": index, "source_node": file_id}), _now()),
|
|
373
697
|
)
|
|
374
|
-
self._upsert_edge(conn, file_id, chunk_id, "
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
698
|
+
self._upsert_edge(conn, file_id, chunk_id, "포함함")
|
|
699
|
+
|
|
700
|
+
# ── Concept / Feature / Error / Code 노드 + 동사형 엣지 ───────────
|
|
701
|
+
concept_ids: Dict[str, str] = {}
|
|
702
|
+
for concept in concepts:
|
|
703
|
+
node_t = _classify_node_type(concept, full_text)
|
|
704
|
+
cid = f"{node_t.lower()}:{_slug(concept)}"
|
|
705
|
+
concept_ids[concept.lower()] = cid
|
|
706
|
+
self._upsert_node(
|
|
707
|
+
conn, cid, node_t, concept,
|
|
708
|
+
metadata={"auto_extracted": True, "source_file": filename},
|
|
709
|
+
)
|
|
710
|
+
# 선: 동사 — Document가 Concept을 "포함함"
|
|
711
|
+
self._upsert_edge(conn, file_id, cid, "포함함", weight=0.8)
|
|
712
|
+
|
|
713
|
+
# ── Concept–Concept 엣지 (선: 동사형) ───────────────────────────
|
|
714
|
+
for triple in triples:
|
|
715
|
+
subj_id = concept_ids.get(triple["subject"].lower())
|
|
716
|
+
obj_id = concept_ids.get(triple["object"].lower())
|
|
717
|
+
if subj_id and obj_id and subj_id != obj_id:
|
|
718
|
+
self._upsert_edge(
|
|
719
|
+
conn, subj_id, obj_id,
|
|
720
|
+
triple["relation"],
|
|
721
|
+
weight=1.0,
|
|
722
|
+
metadata={"context": triple.get("context", "")[:240]},
|
|
723
|
+
)
|
|
724
|
+
|
|
725
|
+
# ── Task / Decision 노드 ──────────────────────────────────────────
|
|
379
726
|
for item in _semantic_items(text):
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
727
|
+
sem_type = item["type"]
|
|
728
|
+
sem_title = item["title"]
|
|
729
|
+
sem_id = f"{sem_type.lower()}:{_sha256_text(f'{file_id}:{sem_type}:{sem_title}')[:24]}"
|
|
383
730
|
self._upsert_node(
|
|
384
|
-
conn,
|
|
385
|
-
semantic_id,
|
|
386
|
-
semantic_type,
|
|
387
|
-
semantic_title,
|
|
731
|
+
conn, sem_id, sem_type, sem_title,
|
|
388
732
|
summary=item["summary"],
|
|
389
733
|
metadata={"auto_extracted": True, "source_node": file_id, "filename": filename},
|
|
390
734
|
raw=item,
|
|
391
735
|
)
|
|
392
|
-
|
|
736
|
+
# 선: Document가 Task/Decision을 "포함함"
|
|
737
|
+
self._upsert_edge(conn, file_id, sem_id, "포함함", weight=0.9)
|
|
738
|
+
|
|
393
739
|
return {"node_id": file_id, "sha256": digest, "metadata": metadata}
|
|
394
740
|
|
|
395
741
|
def ingest_event(
|
|
@@ -597,8 +943,23 @@ class KnowledgeGraphStore:
|
|
|
597
943
|
sheets.append({"title": ws.title, "max_row": ws.max_row, "max_column": ws.max_column})
|
|
598
944
|
return {"sheets": sheets}
|
|
599
945
|
|
|
946
|
+
# ── 그래프에 표시되는 노드 타입 (점 = 명사) ──────────────────────────────
|
|
947
|
+
# Message / AIResponse / Chunk 는 RAG 검색용으로만 저장, 그래프에서 숨김.
|
|
948
|
+
_GRAPH_VISIBLE_TYPES = (
|
|
949
|
+
"Chat", # 대화 세션
|
|
950
|
+
"Document", # 파일 (PDF·PPT·Word·Excel·이미지)
|
|
951
|
+
"Concept", # 개념 / 아이디어 / 기술 용어
|
|
952
|
+
"Person", # 사람
|
|
953
|
+
"Error", # 오류 / 버그
|
|
954
|
+
"Code", # 코드 / 함수
|
|
955
|
+
"Feature", # 소프트웨어 기능
|
|
956
|
+
"Task", # 할 일
|
|
957
|
+
"Decision", # 결정 사항
|
|
958
|
+
)
|
|
959
|
+
|
|
600
960
|
def graph(self, limit: int = 300) -> Dict[str, Any]:
|
|
601
961
|
limit = max(1, min(int(limit or 300), 2000))
|
|
962
|
+
visible = ",".join(f"'{t}'" for t in self._GRAPH_VISIBLE_TYPES)
|
|
602
963
|
with self._connect() as conn:
|
|
603
964
|
nodes = [
|
|
604
965
|
{
|
|
@@ -610,7 +971,7 @@ class KnowledgeGraphStore:
|
|
|
610
971
|
"updated_at": row["updated_at"],
|
|
611
972
|
}
|
|
612
973
|
for row in conn.execute(
|
|
613
|
-
"SELECT id, type, title, summary, metadata_json, updated_at FROM nodes WHERE type
|
|
974
|
+
f"SELECT id, type, title, summary, metadata_json, updated_at FROM nodes WHERE type IN ({visible}) ORDER BY updated_at DESC LIMIT ?",
|
|
614
975
|
(limit,),
|
|
615
976
|
)
|
|
616
977
|
]
|
|
@@ -618,24 +979,18 @@ class KnowledgeGraphStore:
|
|
|
618
979
|
edges: List[Dict[str, Any]] = []
|
|
619
980
|
if node_ids:
|
|
620
981
|
edge_rows = conn.execute(
|
|
621
|
-
"""
|
|
982
|
+
f"""
|
|
622
983
|
SELECT id, from_node, to_node, type, weight, metadata_json
|
|
623
984
|
FROM edges
|
|
624
985
|
WHERE from_node IN (
|
|
625
|
-
SELECT id
|
|
626
|
-
|
|
627
|
-
WHERE type != 'Chunk'
|
|
628
|
-
ORDER BY updated_at DESC
|
|
629
|
-
LIMIT ?
|
|
986
|
+
SELECT id FROM nodes WHERE type IN ({visible})
|
|
987
|
+
ORDER BY updated_at DESC LIMIT ?
|
|
630
988
|
)
|
|
631
989
|
AND to_node IN (
|
|
632
|
-
SELECT id
|
|
633
|
-
|
|
634
|
-
WHERE type != 'Chunk'
|
|
635
|
-
ORDER BY updated_at DESC
|
|
636
|
-
LIMIT ?
|
|
990
|
+
SELECT id FROM nodes WHERE type IN ({visible})
|
|
991
|
+
ORDER BY updated_at DESC LIMIT ?
|
|
637
992
|
)
|
|
638
|
-
ORDER BY created_at DESC
|
|
993
|
+
ORDER BY weight DESC, created_at DESC
|
|
639
994
|
""",
|
|
640
995
|
(limit, limit),
|
|
641
996
|
).fetchall()
|
package/ltcai_cli.py
CHANGED
|
@@ -248,6 +248,10 @@ def main() -> None:
|
|
|
248
248
|
|
|
249
249
|
os.chdir(app_dir)
|
|
250
250
|
|
|
251
|
+
# LATTICEAI_TUNNEL=true in .env acts like --tunnel flag
|
|
252
|
+
if not args.tunnel and os.getenv("LATTICEAI_TUNNEL", "").lower() in ("1", "true", "yes"):
|
|
253
|
+
args.tunnel = True
|
|
254
|
+
|
|
251
255
|
# --tunnel forces 0.0.0.0 so cloudflared can reach the server
|
|
252
256
|
if args.tunnel and args.host == "127.0.0.1":
|
|
253
257
|
args.host = "0.0.0.0"
|
|
@@ -265,6 +269,21 @@ def main() -> None:
|
|
|
265
269
|
|
|
266
270
|
_print_banner(args.host, args.port, tunnel_url)
|
|
267
271
|
|
|
272
|
+
# Telegram startup notification (local start, tunnel handled separately inside _start_tunnel)
|
|
273
|
+
if not args.tunnel:
|
|
274
|
+
_tg_token = os.getenv("LATTICEAI_TELEGRAM_BOT_TOKEN", "")
|
|
275
|
+
_tg_chat = os.getenv("LATTICEAI_TELEGRAM_CHAT_ID", "")
|
|
276
|
+
if _tg_token and _tg_chat:
|
|
277
|
+
_local_msg = (
|
|
278
|
+
f"✅ Lattice AI 시작됨\n\n"
|
|
279
|
+
f"🏠 로컬: http://localhost:{args.port}"
|
|
280
|
+
)
|
|
281
|
+
threading.Thread(
|
|
282
|
+
target=_send_telegram,
|
|
283
|
+
args=(_tg_token, _tg_chat, _local_msg),
|
|
284
|
+
daemon=True,
|
|
285
|
+
).start()
|
|
286
|
+
|
|
268
287
|
import uvicorn
|
|
269
288
|
|
|
270
289
|
uvicorn.run(
|