ltcai 4.3.3 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. package/README.md +21 -16
  2. package/docs/CHANGELOG.md +37 -0
  3. package/docs/V4_4_0_EXTRACTION_REPORT.md +239 -0
  4. package/lattice_brain/__init__.py +38 -23
  5. package/lattice_brain/_kg_common.py +11 -1
  6. package/lattice_brain/context.py +212 -2
  7. package/lattice_brain/conversations.py +234 -1
  8. package/lattice_brain/discovery.py +11 -1
  9. package/lattice_brain/documents.py +11 -1
  10. package/lattice_brain/graph/__init__.py +28 -0
  11. package/lattice_brain/graph/_kg_common.py +1123 -0
  12. package/lattice_brain/graph/curator.py +473 -0
  13. package/lattice_brain/graph/discovery.py +1455 -0
  14. package/lattice_brain/graph/documents.py +218 -0
  15. package/lattice_brain/graph/identity.py +175 -0
  16. package/lattice_brain/graph/ingest.py +644 -0
  17. package/lattice_brain/graph/network.py +205 -0
  18. package/lattice_brain/graph/projection.py +571 -0
  19. package/lattice_brain/graph/provenance.py +401 -0
  20. package/lattice_brain/graph/retrieval.py +1341 -0
  21. package/lattice_brain/graph/schema.py +640 -0
  22. package/lattice_brain/graph/store.py +237 -0
  23. package/lattice_brain/graph/write_master.py +225 -0
  24. package/lattice_brain/identity.py +11 -13
  25. package/lattice_brain/ingest.py +11 -1
  26. package/lattice_brain/ingestion.py +318 -0
  27. package/lattice_brain/memory.py +100 -1
  28. package/lattice_brain/network.py +11 -1
  29. package/lattice_brain/portability.py +431 -0
  30. package/lattice_brain/projection.py +11 -1
  31. package/lattice_brain/provenance.py +11 -1
  32. package/lattice_brain/retrieval.py +11 -1
  33. package/lattice_brain/runtime/__init__.py +32 -0
  34. package/lattice_brain/runtime/agent_runtime.py +569 -0
  35. package/lattice_brain/runtime/hooks.py +754 -0
  36. package/lattice_brain/runtime/multi_agent.py +795 -0
  37. package/lattice_brain/schema.py +11 -1
  38. package/lattice_brain/store.py +10 -2
  39. package/lattice_brain/workflow.py +461 -0
  40. package/lattice_brain/write_master.py +11 -1
  41. package/latticeai/__init__.py +1 -1
  42. package/latticeai/api/agents.py +2 -2
  43. package/latticeai/api/browser.py +1 -1
  44. package/latticeai/api/chat.py +1 -1
  45. package/latticeai/api/computer_use.py +1 -1
  46. package/latticeai/api/hooks.py +2 -2
  47. package/latticeai/api/mcp.py +1 -1
  48. package/latticeai/api/tools.py +1 -1
  49. package/latticeai/api/workflow_designer.py +2 -2
  50. package/latticeai/app_factory.py +4 -4
  51. package/latticeai/brain/__init__.py +24 -6
  52. package/latticeai/brain/_kg_common.py +11 -1117
  53. package/latticeai/brain/context.py +12 -208
  54. package/latticeai/brain/conversations.py +12 -231
  55. package/latticeai/brain/discovery.py +13 -1451
  56. package/latticeai/brain/documents.py +13 -214
  57. package/latticeai/brain/identity.py +11 -169
  58. package/latticeai/brain/ingest.py +13 -640
  59. package/latticeai/brain/memory.py +12 -97
  60. package/latticeai/brain/network.py +12 -200
  61. package/latticeai/brain/projection.py +13 -567
  62. package/latticeai/brain/provenance.py +13 -397
  63. package/latticeai/brain/retrieval.py +13 -1337
  64. package/latticeai/brain/schema.py +12 -635
  65. package/latticeai/brain/store.py +13 -233
  66. package/latticeai/brain/write_master.py +13 -221
  67. package/latticeai/core/agent.py +1 -1
  68. package/latticeai/core/agent_registry.py +2 -2
  69. package/latticeai/core/builtin_hooks.py +2 -2
  70. package/latticeai/core/graph_curator.py +6 -468
  71. package/latticeai/core/hooks.py +6 -749
  72. package/latticeai/core/marketplace.py +1 -1
  73. package/latticeai/core/multi_agent.py +6 -790
  74. package/latticeai/core/workflow_engine.py +6 -456
  75. package/latticeai/core/workspace_os.py +1 -1
  76. package/latticeai/services/agent_runtime.py +6 -564
  77. package/latticeai/services/ingestion.py +6 -313
  78. package/latticeai/services/kg_portability.py +6 -426
  79. package/latticeai/services/platform_runtime.py +3 -3
  80. package/latticeai/services/run_executor.py +1 -1
  81. package/latticeai/services/upload_service.py +1 -1
  82. package/p_reinforce.py +1 -1
  83. package/package.json +1 -1
  84. package/scripts/bump_version.py +1 -1
  85. package/scripts/wheel_smoke.py +7 -0
  86. package/src-tauri/Cargo.lock +1 -1
  87. package/src-tauri/Cargo.toml +1 -1
  88. package/src-tauri/tauri.conf.json +1 -1
  89. package/static/app/asset-manifest.json +1 -1
@@ -0,0 +1,640 @@
1
+ """
2
+ Lattice AI — Knowledge Graph v2 schema (PPT spec aligned)
3
+ =========================================================
4
+
5
+ 명세: ``lattice_ai_full_spec.pptx`` 슬라이드 20~22 (Node / Edge / Data Model)
6
+
7
+ 목적
8
+ ----
9
+ 기존 ``knowledge_graph.py`` 의 자유 문자열 노드/엣지 타입을 **명시 enum + SQLite v2
10
+ 스키마** 로 정식화한다. 이 모듈은 **스키마/초기화/프로젝션 지원** 역할만 담당한다:
11
+ ``NodeType``/``EdgeType`` taxonomy + legacy 정규화 매핑, ``nodes_v2``/``edges_v2``
12
+ DDL(``SCHEMA_SQL``), 그리고 ``KGStoreV2``(스키마 init·heal·stats).
13
+
14
+ 실제 데이터 read/write 는 ``knowledge_graph.py`` 의 ``KnowledgeGraphStore`` 가
15
+ legacy 테이블에 대한 dual-write 프로젝션(raw SQL) + ``kgv2_*`` 재구성 뷰로 수행한다.
16
+ (과거의 native ``Node``/``Edge`` 모델과 ``KGStoreV2.upsert_*``/``get_node``/
17
+ ``search_*`` API 는 production 에서 쓰이지 않아 제거되었다.)
18
+
19
+ 설계 원칙
20
+ ---------
21
+ 1. **기존 코드를 깨지 않는다**: 새 테이블 이름은 ``nodes_v2`` / ``edges_v2``
22
+ 로 분리. 기존 ``nodes`` / ``edges`` 와 공존한다. legacy → v2 reprojection 은
23
+ ``knowledge_graph.py`` 의 버전 게이트 백필 한 곳에서만 수행한다.
24
+ 2. **정규화 + 무손실**: legacy 자유 문자열 타입은 ``NodeType``/``EdgeType``
25
+ superset 으로 정규화해 ``type`` 칼럼에 저장하고, 원본 문자열은 ``legacy_type``
26
+ 칼럼에 그대로 보존한다. summary 와 metadata 는 ``attrs._kg`` 패스스루 blob 이
27
+ 아니라 전용 ``summary`` 칼럼 / ``attrs``·``metadata`` 칼럼에 1급으로 저장한다.
28
+ 3. **표준 라이브러리만 사용**: 외부 의존성 없이 ``sqlite3`` 만으로 동작한다.
29
+ 4. **정규화 매핑은 명시적**: 한글 동사/legacy 라벨 → 영문 enum 표가 코드 안에
30
+ 들어 있어서 어떤 옛 라벨이 어디로 매핑되는지 한눈에 보인다.
31
+
32
+ 사용 예
33
+ -------
34
+ ```python
35
+ from kg_schema import KGStoreV2
36
+
37
+ store = KGStoreV2("/Users/me/.ltcai/kg_v2.db")
38
+ store.init_schema() # nodes_v2 / edges_v2 생성 + 컬럼 drift self-heal
39
+ print(store.stats()) # {"nodes": ..., "by_node_type": {...}, ...}
40
+ ```
41
+ """
42
+
43
+ from __future__ import annotations
44
+
45
+ import json
46
+ import os
47
+ import logging
48
+ import sqlite3
49
+ from contextlib import contextmanager
50
+ from enum import Enum
51
+ from typing import Any, Dict, Optional
52
+
53
+
54
+ # ── Schema version ──────────────────────────────────────────────────────────
55
+ KG_SCHEMA_V2_VERSION = 2
56
+ EMBED_DIM = int(os.getenv("LATTICEAI_EMBED_DIM", "1024"))
57
+
58
+
59
+ # ── Node / Edge taxonomy (PPT 슬라이드 20·21) ──────────────────────────────
60
+ class NodeType(str, Enum):
61
+ """워크스페이스의 모든 ‘명사’.
62
+
63
+ PPT 슬라이드 20 카탈로그(상단 그룹)에 더해, ``knowledge_graph.py`` 가 실제로
64
+ 써오던 legacy 자유 문자열 타입을 **무손실 superset**(하단 그룹)으로 1급 enum 화
65
+ 한다. 덕분에 ``from_legacy`` 정규화가 의미를 잃지 않고(예: ``Computer`` →
66
+ ``COMPUTER``), 알 수 없는/동적(이벤트) 타입만 ``CONCEPT`` 로 폴백한다.
67
+ 원본 문자열은 ``nodes_v2.legacy_type`` 에 그대로 보존되므로 정규화는 항상 무손실.
68
+ """
69
+
70
+ # PPT 슬라이드 20 정식 카탈로그
71
+ CONVERSATION = "CONVERSATION" # 대화 세션 전체
72
+ MESSAGE = "MESSAGE" # 단일 발화
73
+ FILE = "FILE" # 업로드/연결된 파일
74
+ DOCUMENT = "DOCUMENT" # 생성/관리되는 문서 (보고서, 계획서 등)
75
+ CHUNK = "CHUNK" # 파일의 분할 청크
76
+ CODE_SYMBOL = "CODE_SYMBOL" # 함수·클래스·모듈
77
+ CONCEPT = "CONCEPT" # 추출된 개념 / 태그
78
+ PERSON = "PERSON" # 사용자·협업자
79
+ MODEL = "MODEL" # 로컬/원격 LLM
80
+ TOOL = "TOOL" # MCP 서버·외부 도구
81
+ PROJECT = "PROJECT" # 주제별 작업 공간
82
+ # legacy superset — knowledge_graph.py 가 실제로 생성하던 노드 타입들
83
+ COMPUTER = "COMPUTER" # 내 컴퓨터 (로컬 스캔 루트)
84
+ DRIVE = "DRIVE" # 드라이브 / 볼륨
85
+ FOLDER = "FOLDER" # 폴더
86
+ CODE_FILE = "CODE_FILE" # 코드 파일 (.py/.ts 등)
87
+ SPREADSHEET = "SPREADSHEET" # 엑셀 / CSV
88
+ SLIDE_DECK = "SLIDE_DECK" # 프레젠테이션
89
+ IMAGE = "IMAGE" # 이미지 파일
90
+ IMAGE_TEXT = "IMAGE_TEXT" # OCR 텍스트
91
+ SLIDE = "SLIDE" # 슬라이드 (덱의 한 장)
92
+ PAGE = "PAGE" # 페이지 (문서의 한 면)
93
+ SHEET = "SHEET" # 시트 (스프레드시트의 한 탭)
94
+ SECTION = "SECTION" # 문서 섹션
95
+ CHAT = "CHAT" # 대화 세션(채팅 UI)
96
+ AI_RESPONSE = "AI_RESPONSE" # 어시스턴트 발화
97
+ TOPIC = "TOPIC" # 주제 / 토픽
98
+ FEATURE = "FEATURE" # 소프트웨어 기능
99
+ TASK = "TASK" # 할 일
100
+ DECISION = "DECISION" # 결정 사항
101
+ ERROR = "ERROR" # 오류 / 버그
102
+ EVENT = "EVENT" # 분석/시스템 이벤트(동적 타입 폴백)
103
+ # v3.6.0 Knowledge Graph First — 모든 데이터 소스가 그래프로 수렴하기 위한
104
+ # 1급 엔티티. 추가형(additive)·확장 가능(extensible): 새 도메인 엔티티는
105
+ # 여기에 enum 멤버를 추가하고 _LEGACY_NODE_MAP 에 별칭만 등록하면 된다.
106
+ SOURCE = "SOURCE" # 수집 출처(파일/URL/브라우저 탭/git 등)의 출처 노드
107
+ REPOSITORY = "REPOSITORY" # git 저장소
108
+ MEETING = "MEETING" # 회의 / 미팅
109
+ ORGANIZATION = "ORGANIZATION" # 조직 / 회사 / 팀
110
+ WORKFLOW = "WORKFLOW" # 워크플로우 정의/실행
111
+ AGENT = "AGENT" # 에이전트(역할/실행 주체)
112
+
113
+ @classmethod
114
+ def from_legacy(cls, label: str) -> "NodeType":
115
+ """legacy ``knowledge_graph.py`` 의 자유 문자열을 정식 enum 으로 정규화.
116
+
117
+ 매핑이 없는(동적 이벤트 등) 타입은 ``CONCEPT`` 로 폴백하지만, 호출부는
118
+ 원본 문자열을 ``legacy_type`` 칼럼에 별도 보존하므로 정보 손실은 없다.
119
+ """
120
+ m = (label or "").strip()
121
+ # Canonical values round-trip exactly (v4 native writes use them);
122
+ # without this, CODE_FILE/AI_RESPONSE etc. would degrade to CONCEPT.
123
+ try:
124
+ return cls(m.upper())
125
+ except ValueError:
126
+ pass
127
+ return _LEGACY_NODE_MAP.get(m.lower(), cls.CONCEPT)
128
+
129
+
130
+ class EdgeType(str, Enum):
131
+ """노드 사이의 ‘방향성 있고 타입이 명시된’ 관계. PPT 슬라이드 21."""
132
+
133
+ CONTAINS = "CONTAINS" # FILE → CHUNK
134
+ MENTIONS = "MENTIONS" # MESSAGE → CONCEPT
135
+ REFERENCES = "REFERENCES" # FILE → FILE / URL
136
+ REPLIES_TO = "REPLIES_TO" # MESSAGE → MESSAGE
137
+ AUTHORED_BY = "AUTHORED_BY" # FILE → PERSON
138
+ USES = "USES" # PROJECT → TOOL / MODEL
139
+ DERIVED_FROM = "DERIVED_FROM" # CHUNK → CHUNK (요약 등)
140
+ SIMILAR_TO = "SIMILAR_TO" # ANY ↔ ANY (의미 유사도)
141
+ DEPENDS_ON = "DEPENDS_ON" # CODE_SYMBOL → CODE_SYMBOL
142
+ TAGGED_AS = "TAGGED_AS" # ANY → CONCEPT
143
+ VERSION_OF = "VERSION_OF" # FILE → FILE (히스토리)
144
+ GRANTS_ACCESS = "GRANTS_ACCESS" # PERSON → RESOURCE
145
+ USED_IN = "USED_IN" # CONCEPT → DOCUMENT (문서에 활용됨)
146
+ INSPIRED_BY = "INSPIRED_BY" # DOCUMENT → DOCUMENT (영감/참조 관계)
147
+ CONTRADICTS = "CONTRADICTS" # DOCUMENT ↔ DOCUMENT (상충 관계)
148
+ EVOLVES_FROM = "EVOLVES_FROM" # DOCUMENT → DOCUMENT (발전/개정 관계)
149
+ # legacy superset — knowledge_graph.py 가 실제로 생성하던 엣지 타입들
150
+ UPLOADED_BY = "UPLOADED_BY" # PERSON → FILE (업로드함)
151
+ WROTE = "WROTE" # PERSON → CONVERSATION (작성함)
152
+ HAS_EVENT = "HAS_EVENT" # CONVERSATION → EVENT (has_event)
153
+ TRIGGERED = "TRIGGERED" # PERSON → EVENT (triggered)
154
+ HAS_SLIDE = "HAS_SLIDE" # SLIDE_DECK → SLIDE (has_slide)
155
+ HAS_PAGE = "HAS_PAGE" # DOCUMENT → PAGE (has_page)
156
+ HAS_SHEET = "HAS_SHEET" # SPREADSHEET → SHEET (has_sheet)
157
+ HAS_CHUNK = "HAS_CHUNK" # FILE → CHUNK (has_chunk)
158
+ CONTAINS_IMAGE = "CONTAINS_IMAGE" # FILE → IMAGE (contains_image)
159
+ CONTAINS_SIGNAL = "CONTAINS_SIGNAL" # NODE → CONCEPT (contains_signal)
160
+ DISCUSSES = "DISCUSSES" # SLIDE/PAGE → TOPIC (discusses)
161
+ IMPLIES = "IMPLIES" # NODE → NODE (implies)
162
+ RELATED_TO = "RELATED_TO" # ANY ↔ ANY (related_to)
163
+ # v3.6.0 Knowledge Graph First — 출처/소유/구성/결정 관계를 1급 엣지로 승격.
164
+ # 추가형: 새 관계는 enum 멤버 추가 + _LEGACY_EDGE_MAP 별칭 등록만으로 확장된다.
165
+ INDEXED_FROM = "INDEXED_FROM" # NODE → SOURCE (어떤 출처에서 색인됐는가)
166
+ MODIFIED_BY = "MODIFIED_BY" # NODE → PERSON (마지막 수정자)
167
+ BELONGS_TO_PROJECT = "BELONGS_TO_PROJECT" # NODE → PROJECT
168
+ PART_OF = "PART_OF" # NODE → NODE (구성요소 관계)
169
+ DISCUSSED_IN = "DISCUSSED_IN" # CONCEPT/DECISION → MEETING/CHAT
170
+ DECIDED_BY = "DECIDED_BY" # DECISION → PERSON
171
+ GENERATED_BY = "GENERATED_BY" # NODE → AGENT/MODEL/WORKFLOW
172
+ USED_BY_AGENT = "USED_BY_AGENT" # NODE → AGENT (에이전트가 사용함)
173
+
174
+ @classmethod
175
+ def from_legacy(cls, label: str) -> "EdgeType":
176
+ """legacy 자유 문자열/한글 동사를 정식 enum 으로 정규화.
177
+
178
+ 매핑이 없는 동적 타입은 ``MENTIONS`` 로 폴백하지만, 호출부는 원본 문자열을
179
+ ``edges_v2.legacy_type`` 에 보존하므로 정보 손실은 없다.
180
+ """
181
+ m = (label or "").strip()
182
+ # Canonical values round-trip exactly (v4 native writes use them).
183
+ try:
184
+ return cls(m.upper())
185
+ except ValueError:
186
+ pass
187
+ return _LEGACY_EDGE_MAP.get(m.lower(), cls.MENTIONS)
188
+
189
+
190
+ # legacy(자유 문자열 / 한글 동사) → enum 매핑 표.
191
+ # superset 정규화: 알려진 legacy 타입은 1:1 의미 보존 매핑, 미지/동적 타입만 폴백.
192
+ _LEGACY_NODE_MAP: Dict[str, NodeType] = {
193
+ "conversation": NodeType.CONVERSATION,
194
+ "chat": NodeType.CHAT,
195
+ "message": NodeType.MESSAGE,
196
+ "airesponse": NodeType.AI_RESPONSE,
197
+ "file": NodeType.FILE,
198
+ "codefile": NodeType.CODE_FILE,
199
+ "spreadsheet": NodeType.SPREADSHEET,
200
+ "slidedeck": NodeType.SLIDE_DECK,
201
+ "image": NodeType.IMAGE,
202
+ "imagetext": NodeType.IMAGE_TEXT,
203
+ "computer": NodeType.COMPUTER,
204
+ "drive": NodeType.DRIVE,
205
+ "folder": NodeType.FOLDER,
206
+ "page": NodeType.PAGE,
207
+ "sheet": NodeType.SHEET,
208
+ "slide": NodeType.SLIDE,
209
+ "section": NodeType.SECTION,
210
+ "chunk": NodeType.CHUNK,
211
+ "code": NodeType.CODE_SYMBOL,
212
+ "concept": NodeType.CONCEPT,
213
+ "topic": NodeType.TOPIC,
214
+ "feature": NodeType.FEATURE,
215
+ "task": NodeType.TASK,
216
+ "decision": NodeType.DECISION,
217
+ "error": NodeType.ERROR,
218
+ "event": NodeType.EVENT,
219
+ "tag": NodeType.CONCEPT,
220
+ "person": NodeType.PERSON,
221
+ "user": NodeType.PERSON,
222
+ "model": NodeType.MODEL,
223
+ "tool": NodeType.TOOL,
224
+ "mcp": NodeType.TOOL,
225
+ "project": NodeType.PROJECT,
226
+ "workspace": NodeType.PROJECT,
227
+ "document": NodeType.DOCUMENT,
228
+ "report": NodeType.DOCUMENT,
229
+ "plan": NodeType.DOCUMENT,
230
+ "proposal": NodeType.DOCUMENT,
231
+ "보고서": NodeType.DOCUMENT,
232
+ "계획서": NodeType.DOCUMENT,
233
+ "기획서": NodeType.DOCUMENT,
234
+ # v3.6.0 Knowledge Graph First 엔티티
235
+ "source": NodeType.SOURCE,
236
+ "ingestionsource": NodeType.SOURCE,
237
+ "repository": NodeType.REPOSITORY,
238
+ "repo": NodeType.REPOSITORY,
239
+ "gitrepo": NodeType.REPOSITORY,
240
+ "meeting": NodeType.MEETING,
241
+ "organization": NodeType.ORGANIZATION,
242
+ "org": NodeType.ORGANIZATION,
243
+ "company": NodeType.ORGANIZATION,
244
+ "team": NodeType.ORGANIZATION,
245
+ "workflow": NodeType.WORKFLOW,
246
+ "agent": NodeType.AGENT,
247
+ }
248
+
249
+ _LEGACY_EDGE_MAP: Dict[str, EdgeType] = {
250
+ # 한글 동사 (knowledge_graph.py 의 EDGE_VERB)
251
+ "언급함": EdgeType.MENTIONS,
252
+ "포함함": EdgeType.CONTAINS,
253
+ "해결함": EdgeType.REFERENCES,
254
+ "의존함": EdgeType.DEPENDS_ON,
255
+ "설명함": EdgeType.MENTIONS,
256
+ "비교함": EdgeType.SIMILAR_TO,
257
+ "사용함": EdgeType.USES,
258
+ "연결함": EdgeType.REFERENCES,
259
+ "확장함": EdgeType.DERIVED_FROM,
260
+ "생성함": EdgeType.AUTHORED_BY,
261
+ "작성함": EdgeType.WROTE,
262
+ "업로드함": EdgeType.UPLOADED_BY,
263
+ "대체함": EdgeType.VERSION_OF,
264
+ "지원함": EdgeType.USES,
265
+ "발생함": EdgeType.REFERENCES,
266
+ "관련됨": EdgeType.MENTIONS,
267
+ # 영문 별칭
268
+ "mentions": EdgeType.MENTIONS,
269
+ "contains": EdgeType.CONTAINS,
270
+ "references": EdgeType.REFERENCES,
271
+ "replies_to": EdgeType.REPLIES_TO,
272
+ "authored_by": EdgeType.AUTHORED_BY,
273
+ "uses": EdgeType.USES,
274
+ "derived_from": EdgeType.DERIVED_FROM,
275
+ "similar_to": EdgeType.SIMILAR_TO,
276
+ "depends_on": EdgeType.DEPENDS_ON,
277
+ "tagged_as": EdgeType.TAGGED_AS,
278
+ "version_of": EdgeType.VERSION_OF,
279
+ "grants_access": EdgeType.GRANTS_ACCESS,
280
+ "used_in": EdgeType.USED_IN,
281
+ "inspired_by": EdgeType.INSPIRED_BY,
282
+ "contradicts": EdgeType.CONTRADICTS,
283
+ "evolves_from": EdgeType.EVOLVES_FROM,
284
+ # legacy superset 별칭 (knowledge_graph.py 가 실제로 쓰던 엣지 타입)
285
+ "uploaded_by": EdgeType.UPLOADED_BY,
286
+ "wrote": EdgeType.WROTE,
287
+ "has_event": EdgeType.HAS_EVENT,
288
+ "triggered": EdgeType.TRIGGERED,
289
+ "has_slide": EdgeType.HAS_SLIDE,
290
+ "has_page": EdgeType.HAS_PAGE,
291
+ "has_sheet": EdgeType.HAS_SHEET,
292
+ "has_chunk": EdgeType.HAS_CHUNK,
293
+ "contains_image": EdgeType.CONTAINS_IMAGE,
294
+ "contains_signal": EdgeType.CONTAINS_SIGNAL,
295
+ "discusses": EdgeType.DISCUSSES,
296
+ "implies": EdgeType.IMPLIES,
297
+ "related_to": EdgeType.RELATED_TO,
298
+ "활용됨": EdgeType.USED_IN,
299
+ "영감받음": EdgeType.INSPIRED_BY,
300
+ "상충함": EdgeType.CONTRADICTS,
301
+ "발전함": EdgeType.EVOLVES_FROM,
302
+ # v3.6.0 Knowledge Graph First 관계
303
+ "indexed_from": EdgeType.INDEXED_FROM,
304
+ "modified_by": EdgeType.MODIFIED_BY,
305
+ "belongs_to_project": EdgeType.BELONGS_TO_PROJECT,
306
+ "belongs_to": EdgeType.BELONGS_TO_PROJECT,
307
+ "part_of": EdgeType.PART_OF,
308
+ "discussed_in": EdgeType.DISCUSSED_IN,
309
+ "decided_by": EdgeType.DECIDED_BY,
310
+ "generated_by": EdgeType.GENERATED_BY,
311
+ "used_by_agent": EdgeType.USED_BY_AGENT,
312
+ "색인됨": EdgeType.INDEXED_FROM,
313
+ "수정함": EdgeType.MODIFIED_BY,
314
+ "결정함": EdgeType.DECIDED_BY,
315
+ "구성요소": EdgeType.PART_OF,
316
+ }
317
+
318
+ # ── SQLite v2 store ─────────────────────────────────────────────────────────
319
+ SCHEMA_SQL = """
320
+ CREATE TABLE IF NOT EXISTS kg_meta (
321
+ key TEXT PRIMARY KEY,
322
+ value TEXT NOT NULL
323
+ );
324
+
325
+ CREATE TABLE IF NOT EXISTS nodes_v2 (
326
+ id TEXT PRIMARY KEY,
327
+ type TEXT NOT NULL,
328
+ legacy_type TEXT,
329
+ label TEXT NOT NULL,
330
+ summary TEXT,
331
+ attrs TEXT NOT NULL DEFAULT '{}',
332
+ embedding BLOB,
333
+ owner_id TEXT,
334
+ -- NULL workspace_id = legacy-global (pre-scoping rows, readable machine-wide).
335
+ workspace_id TEXT,
336
+ -- 'legacy' marks rows that predate scoping — the 'private' default must not
337
+ -- silently privatize previously machine-shared data (design-review ruling).
338
+ visibility TEXT NOT NULL DEFAULT 'private',
339
+ -- Revision chain: a node replaced by a newer one points at its successor.
340
+ superseded_by TEXT,
341
+ created_at TEXT NOT NULL,
342
+ updated_at TEXT NOT NULL,
343
+ style TEXT,
344
+ tone TEXT,
345
+ importance_score REAL NOT NULL DEFAULT 0.0,
346
+ last_used TEXT
347
+ );
348
+
349
+ CREATE TABLE IF NOT EXISTS edges_v2 (
350
+ id TEXT PRIMARY KEY,
351
+ source TEXT NOT NULL,
352
+ target TEXT NOT NULL,
353
+ type TEXT NOT NULL,
354
+ legacy_type TEXT NOT NULL DEFAULT '',
355
+ weight REAL NOT NULL DEFAULT 1.0,
356
+ confidence REAL NOT NULL DEFAULT 1.0,
357
+ evidence TEXT NOT NULL DEFAULT '[]',
358
+ metadata TEXT NOT NULL DEFAULT '{}',
359
+ created_by TEXT NOT NULL DEFAULT 'user',
360
+ created_at TEXT NOT NULL,
361
+ -- Edge identity (v4): the normalized type AND the raw legacy type.
362
+ -- Migrated rows keep their legacy_type discriminator, so two distinct
363
+ -- legacy strings between one pair (e.g. "mentions" / "관련됨") stay
364
+ -- distinct even though both normalize to MENTIONS. Native canonical
365
+ -- writes carry legacy_type='' so their identity is effectively
366
+ -- (source, target, type) — two canonical types between the same pair
367
+ -- (e.g. MENTIONS + CONTAINS) never collide. The pre-v4
368
+ -- UNIQUE(source, target, legacy_type) would have silently merged them.
369
+ UNIQUE(source, target, type, legacy_type),
370
+ FOREIGN KEY(source) REFERENCES nodes_v2(id) ON DELETE CASCADE,
371
+ FOREIGN KEY(target) REFERENCES nodes_v2(id) ON DELETE CASCADE
372
+ );
373
+
374
+ -- Temporal dimension (v4): every repeated observation of a relationship is
375
+ -- recorded — edges_v2's UNIQUE identity + weight=max would otherwise erase
376
+ -- when something was learned, how often, and whether it still holds.
377
+ CREATE TABLE IF NOT EXISTS edge_occurrences (
378
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
379
+ edge_id TEXT NOT NULL,
380
+ observed_at TEXT NOT NULL,
381
+ weight REAL NOT NULL DEFAULT 1.0,
382
+ source TEXT,
383
+ FOREIGN KEY(edge_id) REFERENCES edges_v2(id) ON DELETE CASCADE
384
+ );
385
+ CREATE INDEX IF NOT EXISTS idx_edge_occurrences_edge ON edge_occurrences(edge_id);
386
+ CREATE INDEX IF NOT EXISTS idx_edge_occurrences_time ON edge_occurrences(observed_at);
387
+
388
+ CREATE INDEX IF NOT EXISTS idx_nodes_v2_type ON nodes_v2(type);
389
+ CREATE INDEX IF NOT EXISTS idx_nodes_v2_legacy ON nodes_v2(legacy_type);
390
+ CREATE INDEX IF NOT EXISTS idx_nodes_v2_owner ON nodes_v2(owner_id);
391
+ CREATE INDEX IF NOT EXISTS idx_edges_v2_source ON edges_v2(source);
392
+ CREATE INDEX IF NOT EXISTS idx_edges_v2_target ON edges_v2(target);
393
+ CREATE INDEX IF NOT EXISTS idx_edges_v2_type ON edges_v2(type);
394
+ CREATE INDEX IF NOT EXISTS idx_edges_v2_legacy ON edges_v2(legacy_type);
395
+ """
396
+
397
+
398
+ def _exec_script(conn: sqlite3.Connection, script: str) -> None:
399
+ """Run a multi-statement SQL script on ``conn`` statement-by-statement.
400
+
401
+ Unlike ``sqlite3.Connection.executescript``, this does NOT issue an implicit
402
+ COMMIT before running, so the statements join the caller's open transaction.
403
+ Safe for our schema/view DDL (no ``;`` inside string literals).
404
+ """
405
+ for stmt in script.split(";"):
406
+ s = stmt.strip()
407
+ if s:
408
+ conn.execute(s)
409
+
410
+
411
+ class KGStoreV2:
412
+ """가벼운 SQLite 기반 v2 스토어 — **스키마/초기화 지원 전용**.
413
+
414
+ ``init_schema`` 으로 ``nodes_v2``/``edges_v2`` 를 생성·heal 하고 ``stats`` 로
415
+ 집계를 노출한다. 데이터 read/write 는 ``knowledge_graph.KnowledgeGraphStore``
416
+ 프로젝션이 담당하므로 native upsert/get/search API 는 두지 않는다.
417
+ """
418
+
419
+ def __init__(self, db_path: str):
420
+ self.db_path = db_path
421
+
422
+ @contextmanager
423
+ def _conn(self):
424
+ conn = sqlite3.connect(self.db_path)
425
+ conn.row_factory = sqlite3.Row
426
+ conn.execute("PRAGMA foreign_keys = ON")
427
+ try:
428
+ yield conn
429
+ conn.commit()
430
+ finally:
431
+ conn.close()
432
+
433
+ # Columns the current code writes; used to detect schema-evolution drift in
434
+ # v2 tables that an older ``CREATE TABLE IF NOT EXISTS`` left behind.
435
+ _V2_EXPECTED_COLUMNS = {
436
+ "edges_v2": {
437
+ "id",
438
+ "source",
439
+ "target",
440
+ "type",
441
+ "legacy_type",
442
+ "weight",
443
+ "confidence",
444
+ "evidence",
445
+ "metadata",
446
+ "created_by",
447
+ "created_at",
448
+ },
449
+ "nodes_v2": {
450
+ "id",
451
+ "type",
452
+ "legacy_type",
453
+ "label",
454
+ "summary",
455
+ "attrs",
456
+ "embedding",
457
+ "owner_id",
458
+ "workspace_id",
459
+ "visibility",
460
+ "superseded_by",
461
+ "created_at",
462
+ "updated_at",
463
+ "style",
464
+ "tone",
465
+ "importance_score",
466
+ "last_used",
467
+ },
468
+ }
469
+
470
+ # Columns added after a table's first release that can be healed in place
471
+ # with ALTER TABLE ADD COLUMN (nullable / defaulted only).
472
+ _V2_ADDABLE_COLUMNS = {
473
+ "nodes_v2": {"workspace_id": "TEXT", "superseded_by": "TEXT"},
474
+ "edges_v2": {},
475
+ }
476
+
477
+ def _drop_stale_empty_v2_tables(self, conn: sqlite3.Connection) -> None:
478
+ """Drop v2 tables that predate a schema change — but only when empty.
479
+
480
+ ``CREATE TABLE IF NOT EXISTS`` never upgrades an existing table, so a
481
+ v2 table created by an older version keeps its old columns and breaks
482
+ inserts. Recreating is safe precisely because these tables have never
483
+ held data (the v2 read-path isn't wired yet); we refuse to drop any
484
+ table that contains rows.
485
+ """
486
+ # edges_v2 first (it has FKs into nodes_v2)
487
+ for table in ("edges_v2", "nodes_v2"):
488
+ exists = conn.execute(
489
+ "SELECT 1 FROM sqlite_master WHERE type='table' AND name=?", (table,)
490
+ ).fetchone()
491
+ if not exists:
492
+ continue
493
+ cols = {
494
+ r[1] for r in conn.execute(f"PRAGMA table_info({table})").fetchall()
495
+ }
496
+ missing = self._V2_EXPECTED_COLUMNS[table] - cols
497
+ if not missing:
498
+ continue
499
+ # Additive columns heal in place without touching data.
500
+ addable = self._V2_ADDABLE_COLUMNS.get(table, {})
501
+ for col in sorted(missing & set(addable)):
502
+ conn.execute(f"ALTER TABLE {table} ADD COLUMN {col} {addable[col]}")
503
+ missing -= set(addable)
504
+ if not missing:
505
+ continue
506
+ count = conn.execute(f"SELECT COUNT(*) FROM {table}").fetchone()[0]
507
+ if count == 0:
508
+ conn.execute(f"DROP TABLE {table}")
509
+ else:
510
+ logging.warning(
511
+ "kg_schema: %s is missing columns %s but holds %d rows — "
512
+ "leaving it untouched (manual migration required).",
513
+ table,
514
+ sorted(missing),
515
+ count,
516
+ )
517
+
518
+ def init_schema(self, conn: Optional[sqlite3.Connection] = None) -> None:
519
+ """Create the v2 schema and record metadata.
520
+
521
+ Pass ``conn`` to run inside the caller's open transaction (used by the
522
+ atomic knowledge_graph migration); otherwise a private connection is
523
+ opened and committed. Uses ``_exec_script`` rather than
524
+ ``executescript`` so it never force-commits the caller's transaction.
525
+ """
526
+ if conn is not None:
527
+ self._init_schema_on(conn)
528
+ return
529
+ with self._conn() as own:
530
+ self._init_schema_on(own)
531
+
532
+ def _rebuild_edges_identity(self, conn: sqlite3.Connection) -> None:
533
+ """Migrate edges_v2 from the pre-v4 UNIQUE(source, target, legacy_type)
534
+ identity to UNIQUE(source, target, type, legacy_type).
535
+
536
+ SQLite cannot alter constraints, so this is a create→copy→swap inside
537
+ the caller's transaction. Data-preserving: every existing row keeps its
538
+ legacy_type discriminator. Re-entrant: keyed on the actual constraint
539
+ in sqlite_master, not a one-time stamp.
540
+ """
541
+ row = conn.execute(
542
+ "SELECT sql FROM sqlite_master WHERE type='table' AND name='edges_v2'"
543
+ ).fetchone()
544
+ if not row or "UNIQUE(source, target, type, legacy_type)" in (row["sql"] or ""):
545
+ return
546
+ conn.execute("ALTER TABLE edges_v2 RENAME TO edges_v2_old")
547
+ # Recreate from the canonical DDL (edges_v2 portion of SCHEMA_SQL).
548
+ start = SCHEMA_SQL.index("CREATE TABLE IF NOT EXISTS edges_v2")
549
+ end = SCHEMA_SQL.index(");", start) + 2
550
+ conn.execute(SCHEMA_SQL[start:end].rstrip(";"))
551
+ conn.execute(
552
+ """
553
+ INSERT INTO edges_v2 (id, source, target, type, legacy_type, weight,
554
+ confidence, evidence, metadata, created_by, created_at)
555
+ SELECT id, source, target, type, legacy_type, weight,
556
+ confidence, evidence, metadata, created_by, created_at
557
+ FROM edges_v2_old
558
+ """
559
+ )
560
+ conn.execute("DROP TABLE edges_v2_old")
561
+ logging.info(
562
+ "kg_schema: rebuilt edges_v2 with (source, target, type, legacy_type) identity"
563
+ )
564
+
565
+ def _init_schema_on(self, conn: sqlite3.Connection) -> None:
566
+ self._drop_stale_empty_v2_tables(conn)
567
+ self._rebuild_edges_identity(conn)
568
+ _exec_script(conn, SCHEMA_SQL)
569
+ conn.execute(
570
+ "INSERT OR REPLACE INTO kg_meta(key, value) VALUES (?, ?)",
571
+ ("schema_version", str(KG_SCHEMA_V2_VERSION)),
572
+ )
573
+ conn.execute(
574
+ "INSERT OR REPLACE INTO kg_meta(key, value) VALUES (?, ?)",
575
+ ("embed_dim", str(EMBED_DIM)),
576
+ )
577
+
578
+ # ── Maintenance ──────────────────────────────────────────
579
+ def stats(self) -> Dict[str, Any]:
580
+ with self._conn() as conn:
581
+ n_nodes = conn.execute("SELECT COUNT(*) FROM nodes_v2").fetchone()[0]
582
+ n_edges = conn.execute("SELECT COUNT(*) FROM edges_v2").fetchone()[0]
583
+ per_type = {
584
+ r["type"]: r["c"]
585
+ for r in conn.execute(
586
+ "SELECT type, COUNT(*) AS c FROM nodes_v2 GROUP BY type"
587
+ ).fetchall()
588
+ }
589
+ per_edge = {
590
+ r["type"]: r["c"]
591
+ for r in conn.execute(
592
+ "SELECT type, COUNT(*) AS c FROM edges_v2 GROUP BY type"
593
+ ).fetchall()
594
+ }
595
+ return {
596
+ "schema_version": KG_SCHEMA_V2_VERSION,
597
+ "embed_dim": EMBED_DIM,
598
+ "nodes": n_nodes,
599
+ "edges": n_edges,
600
+ "by_node_type": per_type,
601
+ "by_edge_type": per_edge,
602
+ }
603
+
604
+
605
+ # NOTE: legacy → v2 reprojection lives in ``knowledge_graph.py``
606
+ # (``KnowledgeGraphStore._backfill_v2_if_needed`` / ``_v2_project_node``/_edge),
607
+ # which is the single live, version-gated migration path. The old standalone
608
+ # ``migrate_legacy_to_v2()`` helper + CLI ``migrate`` subcommand were removed as
609
+ # dead code (no callers); the normalized projection now writes the first-class
610
+ # ``legacy_type``/``summary``/``metadata`` columns directly.
611
+
612
+
613
+ # ── CLI ────────────────────────────────────────────────────────────────────
614
+ def _cli() -> int:
615
+ import argparse
616
+
617
+ p = argparse.ArgumentParser(
618
+ prog="kg_schema", description="Lattice AI KG v2 utilities"
619
+ )
620
+ sub = p.add_subparsers(dest="cmd", required=True)
621
+
622
+ sub_init = sub.add_parser("init", help="initialize v2 schema in a DB")
623
+ sub_init.add_argument("db", help="path to sqlite db")
624
+
625
+ sub_stats = sub.add_parser("stats", help="print store statistics")
626
+ sub_stats.add_argument("db", help="path to sqlite db")
627
+
628
+ args = p.parse_args()
629
+ if args.cmd == "init":
630
+ KGStoreV2(args.db).init_schema()
631
+ print(f"initialized v2 schema in {args.db}")
632
+ return 0
633
+ if args.cmd == "stats":
634
+ print(json.dumps(KGStoreV2(args.db).stats(), indent=2, ensure_ascii=False))
635
+ return 0
636
+ return 2
637
+
638
+
639
+ if __name__ == "__main__":
640
+ raise SystemExit(_cli())