ltcai 0.1.24 → 0.1.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -294,7 +294,7 @@ Or: `./start_ai.sh` (auto-restart + caffeinate)
294
294
  | VS Code Marketplace | [marketplace.visualstudio.com](https://marketplace.visualstudio.com/items?itemName=parktaesoo.ltcai) |
295
295
  | Open VSX | [open-vsx.org](https://open-vsx.org/extension/parktaesoo/ltcai) |
296
296
 
297
- Current version: **0.1.24** — [Changelog](docs/CHANGELOG.md)
297
+ Current version: **0.1.25** — [Changelog](docs/CHANGELOG.md)
298
298
 
299
299
  ---
300
300
 
package/docs/CHANGELOG.md CHANGED
@@ -1,5 +1,42 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.1.25] - 2026-05-24
4
+
5
+ ### Knowledge Graph 전면 재설계 — 점=명사, 선=동사
6
+
7
+ **설계 원칙**
8
+ - **점(Node) = 명사** — 의미 있는 대상 (문서, 사람, 개념, 에러, 코드, 채팅 등)
9
+ - **선(Edge) = 동사** — 대상 간의 관계 (언급함, 포함함, 해결함, 의존함 등)
10
+ - 원본 데이터(PDF·PPT·채팅·코드 등)는 그대로 보관, AI가 핵심 개념을 추출해 점으로 만들고 관계를 선으로 연결
11
+
12
+ **노드 타입 (점 = 명사)**
13
+ - `Chat` — 대화 세션
14
+ - `Document` — 파일 (PDF·PPT·Word·Excel·이미지)
15
+ - `Concept` — 개념·아이디어·기술 용어
16
+ - `Person` — 사람 (사용자, 언급된 인물)
17
+ - `Error` — 오류·버그·예외
18
+ - `Code` — 코드·함수·클래스
19
+ - `Feature` — 소프트웨어 기능
20
+ - `Task` — 할 일·액션 아이템
21
+ - `Decision` — 결정 사항
22
+
23
+ **엣지 어휘 (선 = 동사형)**
24
+ `언급함` · `포함함` · `해결함` · `의존함` · `설명함` · `비교함` · `사용함` · `연결함` · `확장함` · `생성함` · `대체함` · `지원함` · `발생함` · `관련됨` · `작성함` · `업로드함`
25
+
26
+ **핵심 개선**
27
+ - `_extract_concepts()` — 고유명사·복합어·기술 용어 추출 (Lattice AI, Graph RAG, VS Code 등)
28
+ - `_classify_node_type()` — 개념별 노드 타입 자동 분류 (윈도우 컨텍스트 기반)
29
+ - `_infer_edge()` — 문장 내 동사·조사 패턴으로 엣지 레이블 자동 결정
30
+ - `_extract_triples()` — 문장 단위 개념 쌍 → (주어, 동사, 목적어) 트리플 추출
31
+ - `ingest_message()` 재설계 — 메시지 단위 → 대화 세션(Chat) 단위 노드
32
+ - `ingest_document()` 재설계 — Document 노드 + 동사형 엣지 (포함함, 업로드함)
33
+ - 중복 제거 — 하위 개념이 상위 복합어에 완전히 흡수될 때만 제거
34
+ - Message·AIResponse·Chunk 노드는 RAG 검색용으로만 저장, 그래프 비표시
35
+
36
+ ### Release
37
+ - 배포 버전을 `0.1.25`로 상향
38
+ - 대상 채널: `npm` · `PyPI` · `VS Code Marketplace` · `Open VSX`
39
+
3
40
  ## [0.1.24] - 2026-05-24
4
41
 
5
42
  ### 안정화 및 UX 개선
@@ -94,31 +94,274 @@ def _chunks(text: str, size: int = 1200, overlap: int = 160) -> List[str]:
94
94
  return chunks
95
95
 
96
96
 
97
- def _topic_candidates(text: str, limit: int = 8) -> List[str]:
97
+ _CONCEPT_STOP: set = {
98
+ # English stop words
99
+ "the", "and", "for", "with", "this", "that", "from", "into", "which",
100
+ "are", "was", "were", "has", "have", "had", "can", "will", "would",
101
+ "could", "should", "may", "might", "must", "shall", "being", "been",
102
+ "also", "just", "then", "than", "when", "where", "what", "how", "why",
103
+ "its", "their", "your", "our", "you", "they", "them", "these", "those",
104
+ "use", "used", "using", "based", "like", "such", "via", "per", "let",
105
+ "yes", "not", "but", "are", "all", "any", "out", "new", "get", "set",
106
+ # Korean stop words
107
+ "사용자", "내용", "파일", "채팅", "답변", "입니다", "그리고", "처럼",
108
+ "있어", "없어", "이야", "이다", "한다", "하다", "되다", "됩니다",
109
+ "경우", "방법", "부분", "상태", "정도", "결과", "이후", "이전",
110
+ "그것", "이것", "저것", "여기", "거기", "저기", "우리", "저희",
111
+ "기능", "서버", "모델", "설정", "설명", "버전", "지원", "사용", "실행",
112
+ "todo", "fixme", "note", "참고", "주의", "warning",
113
+ }
114
+
115
+
116
+ def _extract_concepts(text: str, limit: int = 12) -> List[str]:
117
+ """Extract meaningful named concepts from text.
118
+
119
+ Priority order:
120
+ 1. Backtick / quoted terms (explicitly technical)
121
+ 2. Multi-word proper nouns (Lattice AI, GPT-4o, Claude Sonnet)
122
+ 3. Single capitalized proper nouns not at sentence start (Claude, Python, FastAPI)
123
+ 4. Korean compound technical terms (멀티모달, 에이전트, 그래프RAG)
124
+ 5. Hyphenated / versioned identifiers (gpt-4o, mlx-lm, llama-3.3)
125
+ """
98
126
  text = str(text or "")
99
- candidates: Dict[str, int] = {}
100
- patterns = [
101
- r"[A-Za-z][A-Za-z0-9_\-./]{2,}",
102
- r"[가-힣][가-힣A-Za-z0-9_\-]{1,}",
103
- ]
104
- stop = {
105
- "the", "and", "for", "with", "this", "that", "from", "into",
106
- "사용자", "내용", "파일", "채팅", "답변", "입니다", "그리고", "처럼",
107
- }
108
- for pattern in patterns:
109
- for match in re.findall(pattern, text):
110
- key = match.strip("._-/").lower()
111
- if (len(key) < 3 and not re.search(r"[가-힣]", key)) or key in stop or key.isdigit():
127
+ seen: dict = {} # concept_lower → original form
128
+
129
+ def _add(term: str) -> None:
130
+ key = term.strip().lower()
131
+ if (
132
+ key
133
+ and key not in _CONCEPT_STOP
134
+ and not key.isdigit()
135
+ and len(key) >= 2
136
+ ):
137
+ seen.setdefault(key, term.strip())
138
+
139
+ # 1. Backtick-quoted code/term (highest confidence)
140
+ for m in re.findall(r'`([^`]{2,40})`', text):
141
+ if not re.search(r'[\(\)\[\]{}]', m): # skip code expressions
142
+ _add(m)
143
+
144
+ # 2. Double/single quoted terms
145
+ for m in re.findall(r'"([^"]{2,40})"', text):
146
+ _add(m)
147
+
148
+ # 3. Multi-word English proper nouns (Title Case or ALL-CAPS first word, 2–4 words).
149
+ # Pattern A: Mixed-case first word — "Lattice AI", "Tool Use", "Graph RAG"
150
+ for m in re.findall(
151
+ r'([A-Z][a-z]{1,20}(?:\s+(?:[A-Z]{2,10}|[A-Z][a-z0-9]{1,20}|\d[\w.]{0,6})){1,3})',
152
+ text,
153
+ ):
154
+ _add(m)
155
+ # Pattern B: ALL-CAPS first word — "VS Code", "MCP Server", "GPT-4o Mini"
156
+ for m in re.findall(
157
+ r'([A-Z]{2,6}(?:\s+(?:[A-Z]{2,10}|[A-Z][a-z0-9]{1,20})){1,2})',
158
+ text,
159
+ ):
160
+ _add(m)
161
+
162
+ # 4. Single capitalized proper noun.
163
+ # Use ASCII-boundary lookaround instead of \b so Korean particles
164
+ # (와, 의, 는 …) after an English word don't block the match.
165
+ all_caps_words = re.findall(r'(?<![A-Za-z0-9])([A-Z][A-Za-z0-9]{2,24})(?![A-Za-z0-9])', text)
166
+ freq: Dict[str, int] = {}
167
+ for w in all_caps_words:
168
+ freq[w] = freq.get(w, 0) + 1
169
+ sentence_starts = set(re.findall(r'(?:^|(?<=[.!?])\s+)([A-Z][a-z]+)', text))
170
+ for m, cnt in freq.items():
171
+ if m.lower() in _CONCEPT_STOP:
172
+ continue
173
+ if cnt >= 2 or m not in sentence_starts:
174
+ _add(m)
175
+
176
+ # 5. Korean technical compound nouns (3–12 chars, no common particles)
177
+ for m in re.findall(r'[가-힣]{2,12}(?:AI|LLM|API|UI|RAG|bot|Bot|기능|모델|서버|에이전트|파이프라인|워크플로)', text):
178
+ _add(m)
179
+ # Korean standalone terms that appear after topic markers (은/는/이/가 앞)
180
+ for m in re.findall(r'([가-힣]{2,12})(?:은|는|이|가|을|를|의|에서|으로|와|과)', text):
181
+ if m.lower() not in _CONCEPT_STOP and len(m) >= 2:
182
+ # Only add if it's non-trivial (has 3+ chars or appears multiple times)
183
+ cnt = text.count(m)
184
+ if len(m) >= 3 or cnt >= 2:
185
+ _add(m)
186
+
187
+ # 6. Hyphenated / versioned identifiers (gpt-4o, llama-3.3, mlx-lm)
188
+ for m in re.findall(r'\b([a-zA-Z][a-zA-Z0-9]*(?:-[a-zA-Z0-9.]+)+)\b', text):
189
+ if len(m) >= 4:
190
+ _add(m)
191
+
192
+ # De-duplicate: remove shorter if ALL its occurrences in the source text
193
+ # are followed immediately by the suffix that forms the longer concept.
194
+ # "Lattice" → dropped when every occurrence is "Lattice AI"
195
+ # "Claude" → kept because it appears as just "Claude" too.
196
+ values = list(seen.values())
197
+ values_lower = [v.lower() for v in values]
198
+ keep = set(range(len(values)))
199
+ for i, v in enumerate(values):
200
+ vl = v.lower()
201
+ for j, wl in enumerate(values_lower):
202
+ if i == j or j not in keep:
203
+ continue
204
+ # Check if vl is a word-prefix of wl
205
+ suffix = wl[len(vl):]
206
+ if not (wl.startswith(vl) and re.match(r'^[\s\-]', suffix)):
112
207
  continue
113
- candidates[key] = candidates.get(key, 0) + 1
114
- return [
115
- k for k, v in sorted(candidates.items(), key=lambda item: (-item[1], item[0]))
116
- if (re.search(r"[가-힣]", k) and len(k) >= 2) or (len(k) >= 4 and (v >= 2 or len(k) >= 6))
117
- ][:limit]
208
+ # Count occurrences of v NOT followed by the suffix
209
+ suffix_stripped = suffix.lstrip(" -")
210
+ # Escape for regex
211
+ pattern_with_suffix = re.escape(v) + r'[\s\-]+' + re.escape(suffix_stripped)
212
+ pattern_alone = re.escape(v) + r'(?![\s\-]*' + re.escape(suffix_stripped) + r')'
213
+ alone_count = len(re.findall(pattern_alone, text, re.IGNORECASE))
214
+ if alone_count == 0:
215
+ # Shorter term never appears alone → safe to remove
216
+ keep.discard(i)
217
+ break
218
+
219
+ final = [values[i] for i in range(len(values)) if i in keep]
220
+ return final[:limit]
221
+
222
+
223
+ # ──────────────────────────────────────────────────────────────────────────────
224
+ # Node type taxonomy (점 = 명사)
225
+ # ──────────────────────────────────────────────────────────────────────────────
226
+ # Chat — 대화 세션
227
+ # Document — 파일 (PDF·PPT·Word·Excel·이미지 등)
228
+ # Concept — 개념·아이디어·기술 용어
229
+ # Person — 사람 (사용자, 언급된 인물)
230
+ # Error — 오류·버그·예외
231
+ # Code — 코드 스니펫·함수·클래스
232
+ # Feature — 소프트웨어 기능
233
+ # Task — 할 일·액션 아이템
234
+ # Decision — 결정 사항
235
+
236
+ # Edge type vocabulary (선 = 동사 — 과거형 서술어)
237
+ EDGE_VERB = {
238
+ "언급함": r"언급|mention|refer|cited",
239
+ "포함함": r"포함|include|consist|구성|탑재|contains",
240
+ "해결함": r"해결|resolv|fix|수정|고쳤|closed",
241
+ "의존함": r"의존|depend|require|필요|based on",
242
+ "설명함": r"설명|explain|describe|정의|란|이란|means",
243
+ "비교함": r"비교|versus|vs\.?|차이|다르|compare",
244
+ "사용함": r"사용|use|활용|이용|apply",
245
+ "연결함": r"연결|connect|통합|integrate|연동|link",
246
+ "확장함": r"확장|extend|플러그인|plugin|addon",
247
+ "생성함": r"생성|만들|create|generate|build|produced",
248
+ "대체함": r"대체|replace|instead|alternative",
249
+ "지원함": r"지원|support|제공|provide|offer",
250
+ "발생함": r"발생|occur|throw|raise|triggered",
251
+ "관련됨": r"관련|related|associated|연관",
252
+ }
253
+
254
+
255
+ def _infer_edge(sentence: str) -> str:
256
+ """Return the best-matching verb-form edge label for a sentence."""
257
+ s = sentence.lower()
258
+ for label, pattern in EDGE_VERB.items():
259
+ if re.search(pattern, s):
260
+ return label
261
+ return "관련됨"
262
+
263
+
264
+ # Technical words that cannot be person names
265
+ _NOT_PERSON_WORDS: set = {
266
+ "use", "api", "rag", "sdk", "ide", "cli", "llm", "mcp", "ui", "ux",
267
+ "new", "old", "get", "set", "run", "add", "fix", "tool", "code",
268
+ "base", "core", "data", "file", "test", "type", "mode", "view",
269
+ }
270
+
271
+
272
+ def _classify_node_type(concept: str, text: str) -> str:
273
+ """Classify a concept into the node taxonomy.
274
+
275
+ Term-level signals take priority; then a tight ±60-char window is used
276
+ so distant keywords don't cause mis-classification.
277
+ """
278
+ term = concept.lower()
279
+
280
+ # ── Term-level signals (highest confidence) ───────────────────────────
281
+ if re.search(r'(?:error|exception|traceback|오류|에러|버그)$', term, re.I):
282
+ return "Error"
283
+ if re.search(r'error|exception|err\b', term, re.I) and len(concept) < 30:
284
+ return "Error"
285
+ if re.search(r'\(\)|\.py$|\.js$|\.ts$|\.go$|::\w', term):
286
+ return "Code"
287
+
288
+ # Person: "First Last" pattern, neither word is a known technical term
289
+ if re.match(r'^[A-Z][a-z]{1,15} [A-Z][a-z]{1,15}$', concept):
290
+ words = term.split()
291
+ if not any(w in _NOT_PERSON_WORDS for w in words):
292
+ return "Person"
293
+
294
+ # ── Windowed context (±60 chars) — NOT used for Error to avoid false positives
295
+ idx = text.lower().find(term)
296
+ if idx >= 0:
297
+ win = text[max(0, idx - 60): idx + len(concept) + 60].lower()
298
+ if re.search(r'def |class |function|함수|클래스|메서드|import', win):
299
+ return "Code"
300
+ # Feature: concept appears DIRECTLY adjacent to 기능/feature keyword
301
+ if (
302
+ len(concept) <= 12
303
+ and re.search(
304
+ rf'{re.escape(term)}.{{0,8}}(?:기능|feature)|(?:기능|feature).{{0,8}}{re.escape(term)}',
305
+ win,
306
+ )
307
+ ):
308
+ return "Feature"
309
+
310
+ return "Concept"
311
+
312
+
313
+ def _extract_triples(
314
+ text: str,
315
+ concepts: List[str],
316
+ limit: int = 20,
317
+ ) -> List[Dict[str, str]]:
318
+ """Extract (subject, verb-edge, object, context) triples from text.
319
+
320
+ For each sentence containing ≥2 concepts, infer the verb-form edge label
321
+ from surrounding context and create a directed triple.
322
+ """
323
+ if len(concepts) < 2:
324
+ return []
325
+
326
+ concept_lower = {c.lower(): c for c in concepts}
327
+ triples: List[Dict[str, str]] = []
328
+ seen_pairs: set = set()
329
+
330
+ # Split on sentence boundaries
331
+ sentences = re.split(r'(?<=[.!?\n])\s+|\n{2,}', text)
332
+ for sent in sentences:
333
+ sent = sent.strip()
334
+ if len(sent) < 8:
335
+ continue
336
+ sent_lower = sent.lower()
337
+
338
+ present = [concept_lower[k] for k in concept_lower if k in sent_lower]
339
+ if len(present) < 2:
340
+ continue
341
+
342
+ edge = _infer_edge(sent)
343
+
344
+ for i in range(len(present) - 1):
345
+ subj, obj = present[i], present[i + 1]
346
+ # Deduplicate by (subj, obj) regardless of direction for same edge
347
+ pair_key = tuple(sorted([subj.lower(), obj.lower()])) + (edge,)
348
+ if pair_key in seen_pairs:
349
+ continue
350
+ seen_pairs.add(pair_key)
351
+ triples.append({
352
+ "subject": subj,
353
+ "relation": edge, # verb form (동사)
354
+ "object": obj,
355
+ "context": sent[:240],
356
+ })
357
+ if len(triples) >= limit:
358
+ return triples
359
+
360
+ return triples
118
361
 
119
362
 
120
363
  def _semantic_items(text: str) -> List[Dict[str, str]]:
121
- """Lightweight extraction for product MVP before model-based IE is wired in."""
364
+ """Extract explicit decision / task items from text."""
122
365
  items: List[Dict[str, str]] = []
123
366
  for raw_line in str(text or "").splitlines():
124
367
  line = _clean_text(raw_line)
@@ -129,7 +372,7 @@ def _semantic_items(text: str) -> List[Dict[str, str]]:
129
372
  items.append({"type": "Decision", "title": line[:120], "summary": line[:500]})
130
373
  if re.search(r"(todo|해야|하자|진행|구현|수정|확인|next|task|\[ \])", lowered):
131
374
  items.append({"type": "Task", "title": line[:120], "summary": line[:500]})
132
- return items[:12]
375
+ return items[:8]
133
376
 
134
377
 
135
378
  class KnowledgeGraphStore:
@@ -268,54 +511,108 @@ class KnowledgeGraphStore:
268
511
  "user_nickname": user_nickname,
269
512
  "chars": len(content),
270
513
  }
514
+ concepts = _extract_concepts(content)
515
+ triples = _extract_triples(content, concepts)
516
+ semantic = _semantic_items(content)
517
+
271
518
  with self._connect() as conn:
272
- self._upsert_node(conn, conv_id, "Conversation", conversation_id or "Default conversation", metadata={"source": source})
519
+ # ── 1. Chat node (점: 명사 대화 세션 단위) ─────────────────────
520
+ # One Chat node per conversation_id; title = first 80 chars of
521
+ # the first user message in this session (updated on each call).
522
+ chat_title = _clean_text(content)[:80] or (conversation_id or "대화")
523
+ self._upsert_node(
524
+ conn, conv_id, "Chat",
525
+ chat_title,
526
+ summary=_clean_text(content)[:400],
527
+ metadata={"source": source, "conversation_id": conversation_id},
528
+ )
529
+
530
+ # ── 2. Person node (점: 명사 — 사람) ─────────────────────────────
531
+ person_id = None
532
+ if user_email or user_nickname:
533
+ person_key = user_email or user_nickname or "unknown"
534
+ person_id = f"person:{_slug(person_key)}"
535
+ self._upsert_node(
536
+ conn, person_id, "Person",
537
+ user_nickname or user_email or "Unknown",
538
+ metadata={"email": user_email, "nickname": user_nickname},
539
+ )
540
+ # 선: 동사 — Person이 Chat을 "작성함"
541
+ self._upsert_edge(conn, person_id, conv_id, "작성함",
542
+ weight=1.0, metadata={"role": role})
543
+
544
+ # ── 3. Raw message node (RAG 검색용, 그래프에서 숨김) ─────────────
273
545
  self._upsert_node(
274
- conn,
275
- node_id,
276
- node_type,
546
+ conn, node_id, node_type,
277
547
  _clean_text(content)[:80] or role,
278
548
  summary=_clean_text(content)[:500],
279
549
  metadata=metadata,
280
550
  raw=raw or metadata,
281
551
  )
282
- self._upsert_edge(conn, conv_id, node_id, "contains", metadata={"source": source})
283
- if user_email or user_nickname:
284
- person_key = user_email or user_nickname or "unknown"
285
- person_id = f"person:{_slug(person_key)}"
286
- self._upsert_node(conn, person_id, "Person", user_nickname or user_email or "Unknown user", metadata={"email": user_email})
287
- self._upsert_edge(conn, person_id, node_id, "authored", metadata={"role": role})
552
+ # 선: Chat이 메시지를 "포함함"
553
+ self._upsert_edge(conn, conv_id, node_id, "포함함",
554
+ weight=0.3, metadata={"role": role})
555
+
556
+ # ── 4. RAG chunks (검색용, 그래프에서 숨김) ──────────────────────
288
557
  for index, chunk in enumerate(_chunks(content)):
289
558
  chunk_id = f"chunk:{_sha256_text(f'{node_id}:{index}:{chunk}')[:24]}"
290
- self._upsert_node(conn, chunk_id, "Chunk", f"{node_type} chunk {index + 1}", summary=chunk[:500], metadata={"index": index, "source_node": node_id})
559
+ self._upsert_node(
560
+ conn, chunk_id, "Chunk",
561
+ f"chunk {index + 1}",
562
+ summary=chunk[:500],
563
+ metadata={"index": index, "source_node": node_id},
564
+ )
291
565
  conn.execute(
292
- """
293
- INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at)
294
- VALUES (?, ?, ?, ?, ?)
295
- """,
296
- (chunk_id, node_id, chunk, _json({"index": index, "source_node": node_id}), _now()),
566
+ "INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
567
+ "VALUES (?, ?, ?, ?, ?)",
568
+ (chunk_id, node_id, chunk,
569
+ _json({"index": index, "source_node": node_id}), _now()),
297
570
  )
298
- self._upsert_edge(conn, node_id, chunk_id, "has_chunk")
299
- for topic in _topic_candidates(content):
300
- topic_id = f"topic:{_slug(topic)}"
301
- self._upsert_node(conn, topic_id, "Topic", topic, metadata={"auto_extracted": True})
302
- self._upsert_edge(conn, node_id, topic_id, "mentions", weight=0.5)
303
- for item in _semantic_items(content):
304
- semantic_type = item["type"]
305
- semantic_title = item["title"]
306
- semantic_id = f"{semantic_type.lower()}:{_sha256_text(f'{node_id}:{semantic_type}:{semantic_title}')[:24]}"
571
+ self._upsert_edge(conn, node_id, chunk_id, "포함함")
572
+
573
+ # ── 5. Concept / Feature / Error / Code 노드 (점: 명사) ───────────
574
+ concept_ids: Dict[str, str] = {}
575
+ for concept in concepts:
576
+ node_t = _classify_node_type(concept, content)
577
+ cid = f"{node_t.lower()}:{_slug(concept)}"
578
+ concept_ids[concept.lower()] = cid
579
+ self._upsert_node(
580
+ conn, cid, node_t, concept,
581
+ metadata={"auto_extracted": True, "source": source},
582
+ )
583
+ # 선: Chat이 개념을 "언급함"
584
+ self._upsert_edge(conn, conv_id, cid, "언급함",
585
+ weight=0.7, metadata={"source": source})
586
+
587
+ # ── 6. Concept–Concept 엣지 (선: 동사형) ─────────────────────────
588
+ for triple in triples:
589
+ subj_id = concept_ids.get(triple["subject"].lower())
590
+ obj_id = concept_ids.get(triple["object"].lower())
591
+ if subj_id and obj_id and subj_id != obj_id:
592
+ self._upsert_edge(
593
+ conn, subj_id, obj_id,
594
+ triple["relation"], # 동사형 레이블
595
+ weight=1.0,
596
+ metadata={"context": triple.get("context", "")[:240]},
597
+ )
598
+
599
+ # ── 7. Task / Decision 노드 (점: 명사) ────────────────────────────
600
+ for item in semantic:
601
+ sem_type = item["type"]
602
+ sem_title = item["title"]
603
+ sem_id = f"{sem_type.lower()}:{_sha256_text(f'{conv_id}:{sem_type}:{sem_title}')[:24]}"
307
604
  self._upsert_node(
308
- conn,
309
- semantic_id,
310
- semantic_type,
311
- semantic_title,
605
+ conn, sem_id, sem_type, sem_title,
312
606
  summary=item["summary"],
313
607
  metadata={"auto_extracted": True, "source_node": node_id},
314
608
  raw=item,
315
609
  )
316
- self._upsert_edge(conn, node_id, semantic_id, "implies", weight=0.8)
317
- if node_type == "AIResponse":
318
- self._upsert_edge(conn, semantic_id, node_id, "based_on", weight=0.6)
610
+ # 선: Chat이 Task/Decision을 "생성함"
611
+ self._upsert_edge(conn, conv_id, sem_id, "생성함", weight=0.9)
612
+ # Task/Decision이 관련 개념을 "언급함"
613
+ for cid in list(concept_ids.values())[:3]:
614
+ self._upsert_edge(conn, sem_id, cid, "언급함", weight=0.6)
615
+
319
616
  return {"node_id": node_id, "type": node_type}
320
617
 
321
618
  def ingest_document(
@@ -353,43 +650,92 @@ class KnowledgeGraphStore:
353
650
  "extracted": {k: v for k, v in (extracted or {}).items() if k != "content"},
354
651
  "structure": doc_meta,
355
652
  }
653
+ full_text = f"{filename}\n{text}"
654
+ concepts = _extract_concepts(full_text, limit=15)
655
+ triples = _extract_triples(full_text, concepts)
656
+
356
657
  with self._connect() as conn:
357
- self._upsert_node(conn, file_id, "File", filename, summary=(text or filename)[:500], metadata=metadata, raw=metadata)
658
+ # ── Document 노드 (점: 명사 — 파일) ────────────────────────────────
659
+ self._upsert_node(
660
+ conn, file_id, "Document", filename,
661
+ summary=(text or filename)[:500],
662
+ metadata=metadata, raw=metadata,
663
+ )
358
664
  self._ingest_structure_nodes(conn, file_id, filename, doc_meta)
665
+
666
+ # ── Person 노드 + 동사형 엣지 ─────────────────────────────────────
359
667
  if uploader:
360
668
  person_id = f"person:{_slug(uploader)}"
361
- self._upsert_node(conn, person_id, "Person", uploader, metadata={"email": uploader})
362
- self._upsert_edge(conn, person_id, file_id, "uploaded")
669
+ self._upsert_node(
670
+ conn, person_id, "Person", uploader,
671
+ metadata={"email": uploader},
672
+ )
673
+ # 선: 동사 — Person이 Document를 "업로드함"
674
+ self._upsert_edge(conn, person_id, file_id, "업로드함", weight=1.0)
675
+
676
+ # ── Chat 노드와 연결 ──────────────────────────────────────────────
363
677
  if conversation_id:
364
678
  conv_id = f"conversation:{_slug(conversation_id)}"
365
- self._upsert_node(conn, conv_id, "Conversation", conversation_id)
366
- self._upsert_edge(conn, conv_id, file_id, "contains")
679
+ self._upsert_node(conn, conv_id, "Chat", conversation_id)
680
+ # 선: 동사 — Chat이 Document를 "언급함"
681
+ self._upsert_edge(conn, conv_id, file_id, "언급함", weight=0.8)
682
+
683
+ # ── RAG chunks (검색용, 그래프 비표시) ────────────────────────────
367
684
  for index, chunk in enumerate(_chunks(text)):
368
685
  chunk_id = f"chunk:{_sha256_text(f'{file_id}:{index}:{chunk}')[:24]}"
369
- self._upsert_node(conn, chunk_id, "Chunk", f"{filename} chunk {index + 1}", summary=chunk[:500], metadata={"index": index, "source_node": file_id})
686
+ self._upsert_node(
687
+ conn, chunk_id, "Chunk",
688
+ f"{filename} chunk {index + 1}",
689
+ summary=chunk[:500],
690
+ metadata={"index": index, "source_node": file_id},
691
+ )
370
692
  conn.execute(
371
- "INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) VALUES (?, ?, ?, ?, ?)",
372
- (chunk_id, file_id, chunk, _json({"index": index, "source_node": file_id}), _now()),
693
+ "INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
694
+ "VALUES (?, ?, ?, ?, ?)",
695
+ (chunk_id, file_id, chunk,
696
+ _json({"index": index, "source_node": file_id}), _now()),
373
697
  )
374
- self._upsert_edge(conn, file_id, chunk_id, "has_chunk")
375
- for topic in _topic_candidates(f"{filename}\n{text}"):
376
- topic_id = f"topic:{_slug(topic)}"
377
- self._upsert_node(conn, topic_id, "Topic", topic, metadata={"auto_extracted": True})
378
- self._upsert_edge(conn, file_id, topic_id, "discusses", weight=0.7)
698
+ self._upsert_edge(conn, file_id, chunk_id, "포함함")
699
+
700
+ # ── Concept / Feature / Error / Code 노드 + 동사형 엣지 ───────────
701
+ concept_ids: Dict[str, str] = {}
702
+ for concept in concepts:
703
+ node_t = _classify_node_type(concept, full_text)
704
+ cid = f"{node_t.lower()}:{_slug(concept)}"
705
+ concept_ids[concept.lower()] = cid
706
+ self._upsert_node(
707
+ conn, cid, node_t, concept,
708
+ metadata={"auto_extracted": True, "source_file": filename},
709
+ )
710
+ # 선: 동사 — Document가 Concept을 "포함함"
711
+ self._upsert_edge(conn, file_id, cid, "포함함", weight=0.8)
712
+
713
+ # ── Concept–Concept 엣지 (선: 동사형) ───────────────────────────
714
+ for triple in triples:
715
+ subj_id = concept_ids.get(triple["subject"].lower())
716
+ obj_id = concept_ids.get(triple["object"].lower())
717
+ if subj_id and obj_id and subj_id != obj_id:
718
+ self._upsert_edge(
719
+ conn, subj_id, obj_id,
720
+ triple["relation"],
721
+ weight=1.0,
722
+ metadata={"context": triple.get("context", "")[:240]},
723
+ )
724
+
725
+ # ── Task / Decision 노드 ──────────────────────────────────────────
379
726
  for item in _semantic_items(text):
380
- semantic_type = item["type"]
381
- semantic_title = item["title"]
382
- semantic_id = f"{semantic_type.lower()}:{_sha256_text(f'{file_id}:{semantic_type}:{semantic_title}')[:24]}"
727
+ sem_type = item["type"]
728
+ sem_title = item["title"]
729
+ sem_id = f"{sem_type.lower()}:{_sha256_text(f'{file_id}:{sem_type}:{sem_title}')[:24]}"
383
730
  self._upsert_node(
384
- conn,
385
- semantic_id,
386
- semantic_type,
387
- semantic_title,
731
+ conn, sem_id, sem_type, sem_title,
388
732
  summary=item["summary"],
389
733
  metadata={"auto_extracted": True, "source_node": file_id, "filename": filename},
390
734
  raw=item,
391
735
  )
392
- self._upsert_edge(conn, file_id, semantic_id, "contains_signal", weight=0.8)
736
+ # 선: Document가 Task/Decision을 "포함함"
737
+ self._upsert_edge(conn, file_id, sem_id, "포함함", weight=0.9)
738
+
393
739
  return {"node_id": file_id, "sha256": digest, "metadata": metadata}
394
740
 
395
741
  def ingest_event(
@@ -597,8 +943,23 @@ class KnowledgeGraphStore:
597
943
  sheets.append({"title": ws.title, "max_row": ws.max_row, "max_column": ws.max_column})
598
944
  return {"sheets": sheets}
599
945
 
946
+ # ── 그래프에 표시되는 노드 타입 (점 = 명사) ──────────────────────────────
947
+ # Message / AIResponse / Chunk 는 RAG 검색용으로만 저장, 그래프에서 숨김.
948
+ _GRAPH_VISIBLE_TYPES = (
949
+ "Chat", # 대화 세션
950
+ "Document", # 파일 (PDF·PPT·Word·Excel·이미지)
951
+ "Concept", # 개념 / 아이디어 / 기술 용어
952
+ "Person", # 사람
953
+ "Error", # 오류 / 버그
954
+ "Code", # 코드 / 함수
955
+ "Feature", # 소프트웨어 기능
956
+ "Task", # 할 일
957
+ "Decision", # 결정 사항
958
+ )
959
+
600
960
  def graph(self, limit: int = 300) -> Dict[str, Any]:
601
961
  limit = max(1, min(int(limit or 300), 2000))
962
+ visible = ",".join(f"'{t}'" for t in self._GRAPH_VISIBLE_TYPES)
602
963
  with self._connect() as conn:
603
964
  nodes = [
604
965
  {
@@ -610,7 +971,7 @@ class KnowledgeGraphStore:
610
971
  "updated_at": row["updated_at"],
611
972
  }
612
973
  for row in conn.execute(
613
- "SELECT id, type, title, summary, metadata_json, updated_at FROM nodes WHERE type != 'Chunk' ORDER BY updated_at DESC LIMIT ?",
974
+ f"SELECT id, type, title, summary, metadata_json, updated_at FROM nodes WHERE type IN ({visible}) ORDER BY updated_at DESC LIMIT ?",
614
975
  (limit,),
615
976
  )
616
977
  ]
@@ -618,24 +979,18 @@ class KnowledgeGraphStore:
618
979
  edges: List[Dict[str, Any]] = []
619
980
  if node_ids:
620
981
  edge_rows = conn.execute(
621
- """
982
+ f"""
622
983
  SELECT id, from_node, to_node, type, weight, metadata_json
623
984
  FROM edges
624
985
  WHERE from_node IN (
625
- SELECT id
626
- FROM nodes
627
- WHERE type != 'Chunk'
628
- ORDER BY updated_at DESC
629
- LIMIT ?
986
+ SELECT id FROM nodes WHERE type IN ({visible})
987
+ ORDER BY updated_at DESC LIMIT ?
630
988
  )
631
989
  AND to_node IN (
632
- SELECT id
633
- FROM nodes
634
- WHERE type != 'Chunk'
635
- ORDER BY updated_at DESC
636
- LIMIT ?
990
+ SELECT id FROM nodes WHERE type IN ({visible})
991
+ ORDER BY updated_at DESC LIMIT ?
637
992
  )
638
- ORDER BY created_at DESC
993
+ ORDER BY weight DESC, created_at DESC
639
994
  """,
640
995
  (limit, limit),
641
996
  ).fetchall()
package/ltcai_cli.py CHANGED
@@ -248,6 +248,10 @@ def main() -> None:
248
248
 
249
249
  os.chdir(app_dir)
250
250
 
251
+ # LATTICEAI_TUNNEL=true in .env acts like --tunnel flag
252
+ if not args.tunnel and os.getenv("LATTICEAI_TUNNEL", "").lower() in ("1", "true", "yes"):
253
+ args.tunnel = True
254
+
251
255
  # --tunnel forces 0.0.0.0 so cloudflared can reach the server
252
256
  if args.tunnel and args.host == "127.0.0.1":
253
257
  args.host = "0.0.0.0"
@@ -265,6 +269,21 @@ def main() -> None:
265
269
 
266
270
  _print_banner(args.host, args.port, tunnel_url)
267
271
 
272
+ # Telegram startup notification (local start, tunnel handled separately inside _start_tunnel)
273
+ if not args.tunnel:
274
+ _tg_token = os.getenv("LATTICEAI_TELEGRAM_BOT_TOKEN", "")
275
+ _tg_chat = os.getenv("LATTICEAI_TELEGRAM_CHAT_ID", "")
276
+ if _tg_token and _tg_chat:
277
+ _local_msg = (
278
+ f"✅ Lattice AI 시작됨\n\n"
279
+ f"🏠 로컬: http://localhost:{args.port}"
280
+ )
281
+ threading.Thread(
282
+ target=_send_telegram,
283
+ args=(_tg_token, _tg_chat, _local_msg),
284
+ daemon=True,
285
+ ).start()
286
+
268
287
  import uvicorn
269
288
 
270
289
  uvicorn.run(
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ltcai",
3
- "version": "0.1.24",
3
+ "version": "0.1.25",
4
4
  "description": "Lattice AI local MLX/cloud LLM workspace server",
5
5
  "homepage": "https://github.com/TaeSooPark-PTS/LatticeAI#readme",
6
6
  "repository": {