ltcai 4.3.1 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (131) hide show
  1. package/README.md +191 -278
  2. package/docs/CHANGELOG.md +128 -0
  3. package/docs/V4_3_2_DEADCODE_AUDIT_REPORT.md +174 -0
  4. package/docs/V4_3_2_DOCUMENTATION_CLEANUP_REPORT.md +81 -0
  5. package/docs/V4_3_2_GITHUB_VERCEL_CHECK_REPORT.md +75 -0
  6. package/docs/V4_3_2_GRAPH_UX_REPORT.md +48 -0
  7. package/docs/V4_3_2_INDEPENDENT_AUDIT_PACKAGE.md +209 -0
  8. package/docs/V4_3_2_PRODUCT_POLISH_REPORT.md +57 -0
  9. package/docs/V4_3_2_SELF_AUDIT_REPORT.md +63 -0
  10. package/docs/V4_3_2_VALIDATION_REPORT.md +97 -0
  11. package/docs/V4_3_3_VALIDATION_REPORT.md +46 -0
  12. package/docs/V4_4_0_EXTRACTION_REPORT.md +239 -0
  13. package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +18 -19
  14. package/frontend/openapi.json +1 -1
  15. package/frontend/src/components/primitives.tsx +92 -10
  16. package/frontend/src/pages/Act.tsx +11 -9
  17. package/frontend/src/pages/Ask.tsx +2 -2
  18. package/frontend/src/pages/Brain.tsx +607 -65
  19. package/frontend/src/pages/Capture.tsx +11 -7
  20. package/frontend/src/pages/Library.tsx +3 -3
  21. package/frontend/src/pages/System.tsx +186 -23
  22. package/lattice_brain/__init__.py +38 -23
  23. package/lattice_brain/_kg_common.py +11 -1
  24. package/lattice_brain/context.py +212 -2
  25. package/lattice_brain/conversations.py +234 -1
  26. package/lattice_brain/discovery.py +11 -1
  27. package/lattice_brain/documents.py +11 -1
  28. package/lattice_brain/graph/__init__.py +28 -0
  29. package/lattice_brain/graph/_kg_common.py +1123 -0
  30. package/lattice_brain/graph/curator.py +473 -0
  31. package/lattice_brain/graph/discovery.py +1455 -0
  32. package/lattice_brain/graph/documents.py +218 -0
  33. package/lattice_brain/graph/identity.py +175 -0
  34. package/lattice_brain/graph/ingest.py +644 -0
  35. package/lattice_brain/graph/network.py +205 -0
  36. package/lattice_brain/graph/projection.py +571 -0
  37. package/lattice_brain/graph/provenance.py +401 -0
  38. package/lattice_brain/graph/retrieval.py +1341 -0
  39. package/lattice_brain/graph/schema.py +640 -0
  40. package/lattice_brain/graph/store.py +237 -0
  41. package/lattice_brain/graph/write_master.py +225 -0
  42. package/lattice_brain/identity.py +11 -13
  43. package/lattice_brain/ingest.py +11 -1
  44. package/lattice_brain/ingestion.py +318 -0
  45. package/lattice_brain/memory.py +100 -1
  46. package/lattice_brain/network.py +11 -1
  47. package/lattice_brain/portability.py +431 -0
  48. package/lattice_brain/projection.py +11 -1
  49. package/lattice_brain/provenance.py +11 -1
  50. package/lattice_brain/retrieval.py +11 -1
  51. package/lattice_brain/runtime/__init__.py +32 -0
  52. package/lattice_brain/runtime/agent_runtime.py +569 -0
  53. package/lattice_brain/runtime/hooks.py +754 -0
  54. package/lattice_brain/runtime/multi_agent.py +795 -0
  55. package/lattice_brain/schema.py +11 -1
  56. package/lattice_brain/store.py +10 -2
  57. package/lattice_brain/workflow.py +461 -0
  58. package/lattice_brain/write_master.py +11 -1
  59. package/latticeai/__init__.py +1 -1
  60. package/latticeai/api/agents.py +2 -2
  61. package/latticeai/api/browser.py +1 -1
  62. package/latticeai/api/chat.py +1 -1
  63. package/latticeai/api/computer_use.py +1 -1
  64. package/latticeai/api/hooks.py +2 -2
  65. package/latticeai/api/mcp.py +1 -1
  66. package/latticeai/api/tools.py +1 -1
  67. package/latticeai/api/workflow_designer.py +2 -2
  68. package/latticeai/app_factory.py +4 -4
  69. package/latticeai/brain/__init__.py +24 -6
  70. package/latticeai/brain/_kg_common.py +11 -1117
  71. package/latticeai/brain/context.py +12 -208
  72. package/latticeai/brain/conversations.py +12 -231
  73. package/latticeai/brain/discovery.py +13 -1451
  74. package/latticeai/brain/documents.py +13 -214
  75. package/latticeai/brain/identity.py +11 -169
  76. package/latticeai/brain/ingest.py +13 -640
  77. package/latticeai/brain/memory.py +12 -97
  78. package/latticeai/brain/network.py +12 -200
  79. package/latticeai/brain/projection.py +13 -567
  80. package/latticeai/brain/provenance.py +13 -397
  81. package/latticeai/brain/retrieval.py +13 -1337
  82. package/latticeai/brain/schema.py +12 -635
  83. package/latticeai/brain/store.py +13 -233
  84. package/latticeai/brain/write_master.py +13 -221
  85. package/latticeai/core/agent.py +1 -1
  86. package/latticeai/core/agent_registry.py +2 -2
  87. package/latticeai/core/builtin_hooks.py +2 -2
  88. package/latticeai/core/graph_curator.py +6 -468
  89. package/latticeai/core/hooks.py +6 -749
  90. package/latticeai/core/marketplace.py +1 -1
  91. package/latticeai/core/multi_agent.py +6 -790
  92. package/latticeai/core/workflow_engine.py +6 -456
  93. package/latticeai/core/workspace_os.py +1 -1
  94. package/latticeai/services/agent_runtime.py +6 -564
  95. package/latticeai/services/ingestion.py +6 -313
  96. package/latticeai/services/kg_portability.py +6 -426
  97. package/latticeai/services/platform_runtime.py +3 -3
  98. package/latticeai/services/run_executor.py +1 -1
  99. package/latticeai/services/upload_service.py +1 -1
  100. package/p_reinforce.py +1 -1
  101. package/package.json +3 -6
  102. package/scripts/build_vercel_static.mjs +77 -0
  103. package/scripts/bump_version.py +1 -1
  104. package/scripts/check_markdown_links.mjs +75 -0
  105. package/scripts/wheel_smoke.py +7 -0
  106. package/src-tauri/Cargo.lock +1 -1
  107. package/src-tauri/Cargo.toml +1 -1
  108. package/src-tauri/src/main.rs +12 -2
  109. package/src-tauri/tauri.conf.json +1 -1
  110. package/static/app/asset-manifest.json +5 -5
  111. package/static/app/assets/index-CHHal8Zl.css +2 -0
  112. package/static/app/assets/index-pdzil9ac.js +333 -0
  113. package/static/app/assets/index-pdzil9ac.js.map +1 -0
  114. package/static/app/index.html +2 -2
  115. package/latticeai/api/deps.py +0 -15
  116. package/scripts/capture/README.md +0 -28
  117. package/scripts/capture/capture_enterprise.js +0 -8
  118. package/scripts/capture/capture_graph.js +0 -8
  119. package/scripts/capture/capture_onboarding.js +0 -8
  120. package/scripts/capture/capture_page.js +0 -43
  121. package/scripts/capture/capture_release_media.js +0 -125
  122. package/scripts/capture/capture_skills.js +0 -8
  123. package/scripts/capture/capture_v340.js +0 -88
  124. package/scripts/capture/capture_workspace.js +0 -8
  125. package/scripts/generate_diagrams.py +0 -512
  126. package/scripts/release-0.3.1.sh +0 -105
  127. package/scripts/take_screenshots.js +0 -69
  128. package/static/app/assets/index-BhPuj8rT.js +0 -333
  129. package/static/app/assets/index-BhPuj8rT.js.map +0 -1
  130. package/static/app/assets/index-yZswHE3d.css +0 -2
  131. package/static/css/tokens.3ba22e37.css +0 -260
@@ -0,0 +1,644 @@
1
+ from __future__ import annotations
2
+
3
+ # ruff: noqa: F403,F405
4
+
5
+ from ._kg_common import * # noqa: F403,F401
6
+
7
+
8
+ class KnowledgeGraphIngestMixin:
9
+ def ingest_message(
10
+ self,
11
+ role: str,
12
+ content: str,
13
+ *,
14
+ user_email: Optional[str] = None,
15
+ user_nickname: Optional[str] = None,
16
+ source: Optional[str] = None,
17
+ conversation_id: Optional[str] = None,
18
+ raw: Optional[Dict[str, Any]] = None,
19
+ ) -> Dict[str, Any]:
20
+ content = str(content or "")
21
+ digest = _sha256_text(
22
+ "|".join([role or "", content, conversation_id or "", user_email or ""])
23
+ )[:24]
24
+ node_type = "AIResponse" if role == "assistant" else "Message"
25
+ node_id = f"{node_type.lower()}:{digest}"
26
+ conv_id = f"conversation:{_slug(conversation_id or 'default')}"
27
+ metadata = {
28
+ "role": role,
29
+ "source": source,
30
+ "conversation_id": conversation_id,
31
+ "user_email": user_email,
32
+ "user_nickname": user_nickname,
33
+ "chars": len(content),
34
+ }
35
+ concepts = _extract_concepts(content)
36
+ triples = _extract_triples(content, concepts)
37
+ semantic = _semantic_items(content)
38
+
39
+ with self._connect() as conn:
40
+ # ── 1. Chat node (점: 명사 — 대화 세션 단위) ─────────────────────
41
+ # One Chat node per conversation_id; title = first 80 chars of
42
+ # the first user message in this session (updated on each call).
43
+ chat_title = _clean_text(content)[:80] or (conversation_id or "대화")
44
+ self._upsert_node(
45
+ conn,
46
+ conv_id,
47
+ "Chat",
48
+ chat_title,
49
+ summary=_clean_text(content)[:400],
50
+ metadata={"source": source, "conversation_id": conversation_id},
51
+ )
52
+
53
+ # ── 2. Person node (점: 명사 — 사람) ─────────────────────────────
54
+ person_id = None
55
+ if user_email or user_nickname:
56
+ person_key = user_email or user_nickname or "unknown"
57
+ person_id = f"person:{_slug(person_key)}"
58
+ self._upsert_node(
59
+ conn,
60
+ person_id,
61
+ "Person",
62
+ user_nickname or user_email or "Unknown",
63
+ metadata={"email": user_email, "nickname": user_nickname},
64
+ )
65
+ # 선: 동사 — Person이 Chat을 "작성함"
66
+ self._upsert_edge(
67
+ conn,
68
+ person_id,
69
+ conv_id,
70
+ "작성함",
71
+ weight=1.0,
72
+ metadata={"role": role},
73
+ )
74
+
75
+ # ── 3. Raw message node (RAG 검색용, 그래프에서 숨김) ─────────────
76
+ self._upsert_node(
77
+ conn,
78
+ node_id,
79
+ node_type,
80
+ _clean_text(content)[:80] or role,
81
+ summary=_clean_text(content)[:500],
82
+ metadata=metadata,
83
+ raw=raw or metadata,
84
+ )
85
+ # 선: Chat이 메시지를 "포함함"
86
+ self._upsert_edge(
87
+ conn, conv_id, node_id, "포함함", weight=0.3, metadata={"role": role}
88
+ )
89
+
90
+ # ── 4. RAG chunks (검색용, 그래프에서 숨김) ──────────────────────
91
+ for index, chunk in enumerate(_chunks(content)):
92
+ chunk_id = f"chunk:{_sha256_text(f'{node_id}:{index}:{chunk}')[:24]}"
93
+ self._upsert_node(
94
+ conn,
95
+ chunk_id,
96
+ "Chunk",
97
+ f"chunk {index + 1}",
98
+ summary=chunk[:500],
99
+ metadata={"index": index, "source_node": node_id},
100
+ )
101
+ self._upsert_chunk(
102
+ conn,
103
+ chunk_id=chunk_id,
104
+ source_node=node_id,
105
+ text=chunk,
106
+ metadata={"index": index, "source_node": node_id},
107
+ )
108
+ self._upsert_edge(conn, node_id, chunk_id, "포함함")
109
+
110
+ # ── 5. Concept / Feature / Error / Code 노드 (점: 명사) ───────────
111
+ concept_ids: Dict[str, str] = {}
112
+ for concept in concepts:
113
+ node_t = _classify_node_type(concept, content)
114
+ cid = f"{node_t.lower()}:{_slug(concept)}"
115
+ concept_ids[concept.lower()] = cid
116
+ self._upsert_node(
117
+ conn,
118
+ cid,
119
+ node_t,
120
+ concept,
121
+ metadata={"auto_extracted": True, "source": source},
122
+ )
123
+ # 선: Chat이 개념을 "언급함"
124
+ self._upsert_edge(
125
+ conn,
126
+ conv_id,
127
+ cid,
128
+ "언급함",
129
+ weight=0.7,
130
+ metadata={"source": source},
131
+ )
132
+
133
+ # ── 6. Concept–Concept 엣지 (선: 동사형) ─────────────────────────
134
+ for triple in triples:
135
+ subj_id = concept_ids.get(triple["subject"].lower())
136
+ obj_id = concept_ids.get(triple["object"].lower())
137
+ if subj_id and obj_id and subj_id != obj_id:
138
+ self._upsert_edge(
139
+ conn,
140
+ subj_id,
141
+ obj_id,
142
+ triple["relation"], # 동사형 레이블
143
+ weight=1.0,
144
+ metadata={"context": triple.get("context", "")[:240]},
145
+ )
146
+
147
+ # ── 7. Task / Decision 노드 (점: 명사) ────────────────────────────
148
+ for item in semantic:
149
+ sem_type = item["type"]
150
+ sem_title = item["title"]
151
+ sem_id = f"{sem_type.lower()}:{_sha256_text(f'{conv_id}:{sem_type}:{sem_title}')[:24]}"
152
+ self._upsert_node(
153
+ conn,
154
+ sem_id,
155
+ sem_type,
156
+ sem_title,
157
+ summary=item["summary"],
158
+ metadata={"auto_extracted": True, "source_node": node_id},
159
+ raw=item,
160
+ )
161
+ # 선: Chat이 Task/Decision을 "생성함"
162
+ self._upsert_edge(conn, conv_id, sem_id, "생성함", weight=0.9)
163
+ # Task/Decision이 관련 개념을 "언급함"
164
+ for cid in list(concept_ids.values())[:3]:
165
+ self._upsert_edge(conn, sem_id, cid, "언급함", weight=0.6)
166
+
167
+ return {"node_id": node_id, "type": node_type}
168
+
169
+ def ingest_document(
170
+ self,
171
+ path: Path,
172
+ *,
173
+ original_filename: Optional[str] = None,
174
+ mime_type: Optional[str] = None,
175
+ uploader: Optional[str] = None,
176
+ conversation_id: Optional[str] = None,
177
+ extracted: Optional[Dict[str, Any]] = None,
178
+ source_type: Optional[str] = None,
179
+ source_uri: Optional[str] = None,
180
+ captured_at: Optional[str] = None,
181
+ modified_at: Optional[str] = None,
182
+ owner: Optional[str] = None,
183
+ workspace_id: Optional[str] = None,
184
+ permissions: Optional[Dict[str, Any]] = None,
185
+ ) -> Dict[str, Any]:
186
+ path = Path(path)
187
+ data = path.read_bytes()
188
+ digest = _sha256_bytes(data)
189
+ ext = path.suffix.lower()
190
+ filename = original_filename or path.name
191
+ captured_at = captured_at or _now()
192
+ blob_path = self.blob_dir / digest[:2] / f"{digest}{ext}"
193
+ blob_path.parent.mkdir(parents=True, exist_ok=True)
194
+ if not blob_path.exists():
195
+ shutil.copyfile(path, blob_path)
196
+
197
+ doc_meta = self._document_structure(path, ext)
198
+ text = str(
199
+ (extracted or {}).get("content") or (extracted or {}).get("preview") or ""
200
+ )
201
+ file_id = f"file:{digest[:24]}"
202
+ metadata = {
203
+ "filename": filename,
204
+ "ext": ext,
205
+ "mime_type": mime_type,
206
+ "bytes": len(data),
207
+ "sha256": digest,
208
+ "content_hash": digest,
209
+ "blob_path": str(blob_path),
210
+ "uploader": uploader,
211
+ "owner": owner or uploader,
212
+ "workspace_id": workspace_id,
213
+ "permissions": permissions or {},
214
+ "source_type": source_type or "file",
215
+ "source_uri": source_uri or str(path),
216
+ "captured_at": captured_at,
217
+ "modified_at": modified_at,
218
+ "conversation_id": conversation_id,
219
+ "extracted": {k: v for k, v in (extracted or {}).items() if k != "content"},
220
+ "structure": doc_meta,
221
+ }
222
+ full_text = f"{filename}\n{text}"
223
+ concepts = _extract_concepts(full_text, limit=15)
224
+ triples = _extract_triples(full_text, concepts)
225
+ chunk_ids: List[str] = []
226
+ source_node_id: Optional[str] = None
227
+
228
+ with self._connect() as conn:
229
+ duplicate = self._node_exists(conn, file_id)
230
+ # ── Document 노드 (점: 명사 — 파일) ────────────────────────────────
231
+ self._upsert_node(
232
+ conn,
233
+ file_id,
234
+ "Document",
235
+ filename,
236
+ summary=(text or filename)[:500],
237
+ metadata=metadata,
238
+ raw=metadata,
239
+ )
240
+ self._ingest_structure_nodes(conn, file_id, filename, doc_meta)
241
+
242
+ # ── SOURCE 노드 + indexed_from (v3.6.0, source_type 지정 시) ──────
243
+ if source_type:
244
+ source_node_id = self._attach_source_node(
245
+ conn,
246
+ file_id,
247
+ source_type=source_type,
248
+ source_uri=source_uri or str(path),
249
+ title=filename,
250
+ content_hash=digest,
251
+ captured_at=captured_at,
252
+ extra={
253
+ "owner": owner or uploader,
254
+ "workspace_id": workspace_id,
255
+ "ext": ext,
256
+ },
257
+ )
258
+
259
+ # ── Person 노드 + 동사형 엣지 ─────────────────────────────────────
260
+ if uploader:
261
+ person_id = f"person:{_slug(uploader)}"
262
+ self._upsert_node(
263
+ conn,
264
+ person_id,
265
+ "Person",
266
+ uploader,
267
+ metadata={"email": uploader},
268
+ )
269
+ # 선: 동사 — Person이 Document를 "업로드함"
270
+ self._upsert_edge(conn, person_id, file_id, "업로드함", weight=1.0)
271
+
272
+ # ── Chat 노드와 연결 ──────────────────────────────────────────────
273
+ if conversation_id:
274
+ conv_id = f"conversation:{_slug(conversation_id)}"
275
+ self._upsert_node(conn, conv_id, "Chat", conversation_id)
276
+ # 선: 동사 — Chat이 Document를 "언급함"
277
+ self._upsert_edge(conn, conv_id, file_id, "언급함", weight=0.8)
278
+
279
+ # ── RAG chunks (검색용, 그래프 비표시) ────────────────────────────
280
+ for index, chunk in enumerate(_chunks(text)):
281
+ chunk_id = f"chunk:{_sha256_text(f'{file_id}:{index}:{chunk}')[:24]}"
282
+ chunk_ids.append(chunk_id)
283
+ self._upsert_node(
284
+ conn,
285
+ chunk_id,
286
+ "Chunk",
287
+ f"{filename} chunk {index + 1}",
288
+ summary=chunk[:500],
289
+ metadata={"index": index, "source_node": file_id},
290
+ )
291
+ self._upsert_chunk(
292
+ conn,
293
+ chunk_id=chunk_id,
294
+ source_node=file_id,
295
+ text=chunk,
296
+ metadata={"index": index, "source_node": file_id},
297
+ )
298
+ self._upsert_edge(conn, file_id, chunk_id, "포함함")
299
+
300
+ # ── Concept / Feature / Error / Code 노드 + 동사형 엣지 ───────────
301
+ concept_ids: Dict[str, str] = {}
302
+ for concept in concepts:
303
+ node_t = _classify_node_type(concept, full_text)
304
+ cid = f"{node_t.lower()}:{_slug(concept)}"
305
+ concept_ids[concept.lower()] = cid
306
+ self._upsert_node(
307
+ conn,
308
+ cid,
309
+ node_t,
310
+ concept,
311
+ metadata={"auto_extracted": True, "source_file": filename},
312
+ )
313
+ # 선: 동사 — Document가 Concept을 "포함함"
314
+ self._upsert_edge(conn, file_id, cid, "포함함", weight=0.8)
315
+
316
+ # ── Concept–Concept 엣지 (선: 동사형) ───────────────────────────
317
+ for triple in triples:
318
+ subj_id = concept_ids.get(triple["subject"].lower())
319
+ obj_id = concept_ids.get(triple["object"].lower())
320
+ if subj_id and obj_id and subj_id != obj_id:
321
+ self._upsert_edge(
322
+ conn,
323
+ subj_id,
324
+ obj_id,
325
+ triple["relation"],
326
+ weight=1.0,
327
+ metadata={"context": triple.get("context", "")[:240]},
328
+ )
329
+
330
+ # ── Task / Decision 노드 ──────────────────────────────────────────
331
+ for item in _semantic_items(text):
332
+ sem_type = item["type"]
333
+ sem_title = item["title"]
334
+ sem_id = f"{sem_type.lower()}:{_sha256_text(f'{file_id}:{sem_type}:{sem_title}')[:24]}"
335
+ self._upsert_node(
336
+ conn,
337
+ sem_id,
338
+ sem_type,
339
+ sem_title,
340
+ summary=item["summary"],
341
+ metadata={
342
+ "auto_extracted": True,
343
+ "source_node": file_id,
344
+ "filename": filename,
345
+ },
346
+ raw=item,
347
+ )
348
+ # 선: Document가 Task/Decision을 "포함함"
349
+ self._upsert_edge(conn, file_id, sem_id, "포함함", weight=0.9)
350
+
351
+ return {
352
+ "node_id": file_id,
353
+ "type": "Document",
354
+ "sha256": digest,
355
+ "content_hash": digest,
356
+ "source_node_id": source_node_id,
357
+ "chunk_ids": chunk_ids,
358
+ "chunk_count": len(chunk_ids),
359
+ "duplicate": duplicate,
360
+ "captured_at": captured_at,
361
+ "metadata": metadata,
362
+ }
363
+
364
+ def ingest_event(
365
+ self,
366
+ event_type: str,
367
+ title: str,
368
+ *,
369
+ user_email: Optional[str] = None,
370
+ user_nickname: Optional[str] = None,
371
+ source: Optional[str] = None,
372
+ conversation_id: Optional[str] = None,
373
+ metadata: Optional[Dict[str, Any]] = None,
374
+ ) -> Dict[str, Any]:
375
+ event_type = str(event_type or "Event")
376
+ title = str(title or event_type)
377
+ payload = {
378
+ "event_type": event_type,
379
+ "title": title,
380
+ "user_email": user_email,
381
+ "user_nickname": user_nickname,
382
+ "source": source,
383
+ "conversation_id": conversation_id,
384
+ "metadata": metadata or {},
385
+ "timestamp": _now(),
386
+ }
387
+ event_id = f"event:{_sha256_text(_json(payload))[:24]}"
388
+ conv_id = f"conversation:{_slug(conversation_id or 'default')}"
389
+ with self._connect() as conn:
390
+ self._upsert_node(
391
+ conn,
392
+ event_id,
393
+ event_type,
394
+ title,
395
+ summary=title,
396
+ metadata=payload,
397
+ raw=payload,
398
+ )
399
+ self._upsert_node(
400
+ conn,
401
+ conv_id,
402
+ "Conversation",
403
+ conversation_id or "Default conversation",
404
+ metadata={"source": source},
405
+ )
406
+ self._upsert_edge(
407
+ conn, conv_id, event_id, "has_event", metadata={"source": source}
408
+ )
409
+ if user_email or user_nickname:
410
+ person_key = user_email or user_nickname or "unknown"
411
+ person_id = f"person:{_slug(person_key)}"
412
+ self._upsert_node(
413
+ conn,
414
+ person_id,
415
+ "Person",
416
+ user_nickname or user_email or "Unknown user",
417
+ metadata={"email": user_email},
418
+ )
419
+ self._upsert_edge(
420
+ conn,
421
+ person_id,
422
+ event_id,
423
+ "triggered",
424
+ metadata={"event_type": event_type},
425
+ )
426
+ return {"node_id": event_id, "type": event_type}
427
+
428
+ def _node_exists(self, conn: sqlite3.Connection, node_id: str) -> bool:
429
+ row = conn.execute("SELECT 1 FROM nodes WHERE id = ?", (node_id,)).fetchone()
430
+ return row is not None
431
+
432
+ def node_is_embedded(self, node_id: str) -> bool:
433
+ """True when a vector embedding exists for ``node_id`` (RAG-ready)."""
434
+ with self._connect() as conn:
435
+ row = conn.execute(
436
+ "SELECT 1 FROM vector_embeddings WHERE item_id = ? LIMIT 1",
437
+ (node_id,),
438
+ ).fetchone()
439
+ return row is not None
440
+
441
+ def _attach_source_node(
442
+ self,
443
+ conn: sqlite3.Connection,
444
+ content_node_id: str,
445
+ *,
446
+ source_type: str,
447
+ source_uri: Optional[str] = None,
448
+ title: Optional[str] = None,
449
+ content_hash: Optional[str] = None,
450
+ captured_at: Optional[str] = None,
451
+ extra: Optional[Dict[str, Any]] = None,
452
+ ) -> str:
453
+ """Create the SOURCE node for an ingested item and link it via INDEXED_FROM.
454
+
455
+ Every ingested content node points at exactly one SOURCE node, so the
456
+ graph is always able to explain *where* a node came from. The source id
457
+ is derived from (source_type, source_uri | content_hash) so re-ingesting
458
+ the same origin reuses the same SOURCE node (idempotent).
459
+ """
460
+ key = source_uri or content_hash or content_node_id
461
+ source_id = f"source:{_sha256_text(f'{source_type}|{key}')[:24]}"
462
+ meta = {
463
+ "source_type": source_type,
464
+ "source_uri": source_uri,
465
+ "content_hash": content_hash,
466
+ "captured_at": captured_at or _now(),
467
+ **(extra or {}),
468
+ }
469
+ label = title or source_uri or source_type
470
+ self._upsert_node(
471
+ conn,
472
+ source_id,
473
+ "Source",
474
+ label,
475
+ summary=str(source_uri or title or source_type)[:400],
476
+ metadata=meta,
477
+ )
478
+ # 선: 콘텐츠 노드가 "이 출처에서 색인됨" (indexed_from → SOURCE)
479
+ self._upsert_edge(
480
+ conn,
481
+ content_node_id,
482
+ source_id,
483
+ "indexed_from",
484
+ weight=1.0,
485
+ metadata={"source_type": source_type},
486
+ )
487
+ return source_id
488
+
489
+ def ingest_source(
490
+ self,
491
+ *,
492
+ source_type: str,
493
+ title: str,
494
+ text: str,
495
+ source_uri: Optional[str] = None,
496
+ owner: Optional[str] = None,
497
+ workspace_id: Optional[str] = None,
498
+ permissions: Optional[Dict[str, Any]] = None,
499
+ captured_at: Optional[str] = None,
500
+ modified_at: Optional[str] = None,
501
+ conversation_id: Optional[str] = None,
502
+ metadata: Optional[Dict[str, Any]] = None,
503
+ ) -> Dict[str, Any]:
504
+ """Unified text/web ingestion: one shape for URL, browser tab, note, text.
505
+
506
+ Creates a content ``Document`` node (idempotent by content hash), a
507
+ ``Source`` node linked via ``indexed_from``, RAG chunks, and extracted
508
+ Concept/Task/Decision nodes — mirroring ingest_document for non-file
509
+ sources. Returns the full set of ids the caller needs to record
510
+ provenance, including ``duplicate`` (was the content already indexed).
511
+ """
512
+ source_type = str(source_type or "text")
513
+ text = str(text or "")
514
+ title = (
515
+ _clean_text(str(title or source_uri or source_type))[:240] or source_type
516
+ )
517
+ captured_at = captured_at or _now()
518
+ content_hash = _sha256_text(f"{source_type}|{source_uri or ''}|{text}")
519
+ content_id = f"webdoc:{content_hash[:24]}"
520
+ full_text = f"{title}\n{text}"
521
+ node_meta = {
522
+ "source_type": source_type,
523
+ "source_uri": source_uri,
524
+ "content_hash": content_hash,
525
+ "title": title,
526
+ "captured_at": captured_at,
527
+ "modified_at": modified_at,
528
+ "owner": owner,
529
+ "workspace_id": workspace_id,
530
+ "permissions": permissions or {},
531
+ "chars": len(text),
532
+ **(metadata or {}),
533
+ }
534
+ concepts = _extract_concepts(full_text, limit=15)
535
+ triples = _extract_triples(full_text, concepts)
536
+ chunk_ids: List[str] = []
537
+
538
+ with self._connect() as conn:
539
+ duplicate = self._node_exists(conn, content_id)
540
+ # ── 콘텐츠 노드 (점: 명사 — 문서) ────────────────────────────────
541
+ self._upsert_node(
542
+ conn,
543
+ content_id,
544
+ "Document",
545
+ title,
546
+ summary=(text or title)[:500],
547
+ metadata=node_meta,
548
+ raw=node_meta,
549
+ )
550
+ # ── SOURCE 노드 + indexed_from 엣지 (출처 추적) ──────────────────
551
+ source_node_id = self._attach_source_node(
552
+ conn,
553
+ content_id,
554
+ source_type=source_type,
555
+ source_uri=source_uri,
556
+ title=title,
557
+ content_hash=content_hash,
558
+ captured_at=captured_at,
559
+ extra={"owner": owner, "workspace_id": workspace_id},
560
+ )
561
+ # ── 소유자(Person) + 동사형 엣지 ────────────────────────────────
562
+ if owner:
563
+ person_id = f"person:{_slug(owner)}"
564
+ self._upsert_node(
565
+ conn, person_id, "Person", owner, metadata={"email": owner}
566
+ )
567
+ self._upsert_edge(conn, person_id, content_id, "업로드함", weight=1.0)
568
+ # ── 대화 연결 ───────────────────────────────────────────────────
569
+ if conversation_id:
570
+ conv_id = f"conversation:{_slug(conversation_id)}"
571
+ self._upsert_node(conn, conv_id, "Chat", conversation_id)
572
+ self._upsert_edge(conn, conv_id, content_id, "언급함", weight=0.8)
573
+ # ── RAG 청크 ────────────────────────────────────────────────────
574
+ for index, chunk in enumerate(_chunks(text)):
575
+ chunk_id = f"chunk:{_sha256_text(f'{content_id}:{index}:{chunk}')[:24]}"
576
+ chunk_ids.append(chunk_id)
577
+ self._upsert_node(
578
+ conn,
579
+ chunk_id,
580
+ "Chunk",
581
+ f"{title} chunk {index + 1}",
582
+ summary=chunk[:500],
583
+ metadata={"index": index, "source_node": content_id},
584
+ )
585
+ self._upsert_chunk(
586
+ conn,
587
+ chunk_id=chunk_id,
588
+ source_node=content_id,
589
+ text=chunk,
590
+ metadata={"index": index, "source_node": content_id},
591
+ )
592
+ self._upsert_edge(conn, content_id, chunk_id, "포함함")
593
+ # ── Concept / Feature / Error / Code 노드 + 엣지 ────────────────
594
+ concept_ids: Dict[str, str] = {}
595
+ for concept in concepts:
596
+ node_t = _classify_node_type(concept, full_text)
597
+ cid = f"{node_t.lower()}:{_slug(concept)}"
598
+ concept_ids[concept.lower()] = cid
599
+ self._upsert_node(
600
+ conn,
601
+ cid,
602
+ node_t,
603
+ concept,
604
+ metadata={"auto_extracted": True, "source_type": source_type},
605
+ )
606
+ self._upsert_edge(conn, content_id, cid, "포함함", weight=0.8)
607
+ for triple in triples:
608
+ subj_id = concept_ids.get(triple["subject"].lower())
609
+ obj_id = concept_ids.get(triple["object"].lower())
610
+ if subj_id and obj_id and subj_id != obj_id:
611
+ self._upsert_edge(
612
+ conn,
613
+ subj_id,
614
+ obj_id,
615
+ triple["relation"],
616
+ weight=1.0,
617
+ metadata={"context": triple.get("context", "")[:240]},
618
+ )
619
+ # ── Task / Decision 노드 ────────────────────────────────────────
620
+ for item in _semantic_items(text):
621
+ sem_type = item["type"]
622
+ sem_title = item["title"]
623
+ sem_id = f"{sem_type.lower()}:{_sha256_text(f'{content_id}:{sem_type}:{sem_title}')[:24]}"
624
+ self._upsert_node(
625
+ conn,
626
+ sem_id,
627
+ sem_type,
628
+ sem_title,
629
+ summary=item["summary"],
630
+ metadata={"auto_extracted": True, "source_node": content_id},
631
+ raw=item,
632
+ )
633
+ self._upsert_edge(conn, content_id, sem_id, "포함함", weight=0.9)
634
+
635
+ return {
636
+ "node_id": content_id,
637
+ "type": "Document",
638
+ "source_node_id": source_node_id,
639
+ "content_hash": content_hash,
640
+ "chunk_ids": chunk_ids,
641
+ "chunk_count": len(chunk_ids),
642
+ "duplicate": duplicate,
643
+ "captured_at": captured_at,
644
+ }