ltcai 3.6.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +39 -31
- package/docs/CHANGELOG.md +64 -0
- package/docs/REALTIME_COLLABORATION.md +3 -3
- package/docs/V3_FRONTEND.md +9 -8
- package/docs/V4_BRAIN_ARCHITECTURE.md +322 -0
- package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +552 -0
- package/docs/V4_IMPLEMENTATION_PLAN.md +470 -0
- package/docs/kg-schema.md +51 -53
- package/docs/spec-vs-impl.md +10 -10
- package/kg_schema.py +2 -520
- package/knowledge_graph.py +37 -4629
- package/knowledge_graph_api.py +11 -127
- package/latticeai/__init__.py +1 -1
- package/latticeai/api/admin.py +16 -17
- package/latticeai/api/agents.py +20 -7
- package/latticeai/api/auth.py +46 -15
- package/latticeai/api/chat.py +112 -76
- package/latticeai/api/health.py +1 -1
- package/latticeai/api/hooks.py +1 -1
- package/latticeai/api/invitations.py +100 -0
- package/latticeai/api/knowledge_graph.py +139 -0
- package/latticeai/api/local_files.py +1 -1
- package/latticeai/api/mcp.py +23 -11
- package/latticeai/api/memory.py +1 -1
- package/latticeai/api/models.py +1 -1
- package/latticeai/api/network.py +81 -0
- package/latticeai/api/plugins.py +3 -6
- package/latticeai/api/realtime.py +5 -8
- package/latticeai/api/search.py +26 -2
- package/latticeai/api/security_dashboard.py +2 -3
- package/latticeai/api/setup.py +2 -2
- package/latticeai/api/static_routes.py +11 -16
- package/latticeai/api/tools.py +3 -0
- package/latticeai/api/ui_redirects.py +26 -0
- package/latticeai/api/workflow_designer.py +85 -6
- package/latticeai/api/workspace.py +93 -57
- package/latticeai/app_factory.py +1781 -0
- package/latticeai/brain/__init__.py +18 -0
- package/latticeai/brain/_kg_common.py +1123 -0
- package/latticeai/brain/context.py +213 -0
- package/latticeai/brain/conversations.py +236 -0
- package/latticeai/brain/discovery.py +1455 -0
- package/latticeai/brain/documents.py +218 -0
- package/latticeai/brain/identity.py +175 -0
- package/latticeai/brain/ingest.py +644 -0
- package/latticeai/brain/memory.py +102 -0
- package/latticeai/brain/network.py +205 -0
- package/latticeai/brain/projection.py +561 -0
- package/latticeai/brain/provenance.py +401 -0
- package/latticeai/brain/retrieval.py +1316 -0
- package/latticeai/brain/schema.py +640 -0
- package/latticeai/brain/store.py +216 -0
- package/latticeai/brain/write_master.py +225 -0
- package/latticeai/core/agent.py +31 -7
- package/latticeai/core/audit.py +0 -7
- package/latticeai/core/config.py +1 -1
- package/latticeai/core/context_builder.py +1 -2
- package/latticeai/core/enterprise.py +1 -1
- package/latticeai/core/graph_curator.py +2 -2
- package/latticeai/core/invitations.py +131 -0
- package/latticeai/core/marketplace.py +1 -1
- package/latticeai/core/mcp_registry.py +791 -0
- package/latticeai/core/model_compat.py +1 -1
- package/latticeai/core/model_resolution.py +0 -1
- package/latticeai/core/multi_agent.py +238 -4
- package/latticeai/core/policy.py +54 -0
- package/latticeai/core/realtime.py +65 -44
- package/latticeai/core/security.py +1 -1
- package/latticeai/core/sessions.py +66 -10
- package/latticeai/core/users.py +147 -0
- package/latticeai/core/workflow_engine.py +114 -2
- package/latticeai/core/workspace_os.py +477 -29
- package/latticeai/models/__init__.py +7 -0
- package/latticeai/models/router.py +779 -0
- package/latticeai/server_app.py +29 -1536
- package/latticeai/services/agent_runtime.py +243 -4
- package/latticeai/services/app_context.py +75 -14
- package/latticeai/services/ingestion.py +47 -0
- package/latticeai/services/kg_portability.py +33 -3
- package/latticeai/services/memory_service.py +39 -11
- package/latticeai/services/model_runtime.py +2 -5
- package/latticeai/services/platform_runtime.py +100 -23
- package/latticeai/services/run_executor.py +328 -0
- package/latticeai/services/search_service.py +17 -8
- package/latticeai/services/tool_dispatch.py +12 -2
- package/latticeai/services/triggers.py +241 -0
- package/latticeai/services/upload_service.py +37 -12
- package/latticeai/services/workspace_service.py +55 -16
- package/llm_router.py +29 -772
- package/ltcai_cli.py +1 -2
- package/mcp_registry.py +25 -788
- package/p_reinforce.py +124 -14
- package/package.json +10 -20
- package/scripts/bump_version.py +99 -0
- package/scripts/generate_diagrams.py +0 -1
- package/scripts/lint_v3.mjs +105 -18
- package/scripts/validate_release_artifacts.py +0 -1
- package/scripts/wheel_smoke.py +142 -0
- package/server.py +11 -7
- package/setup_wizard.py +1142 -0
- package/static/sw.js +81 -52
- package/static/v3/asset-manifest.json +33 -25
- package/static/v3/css/{lattice.base.e4cdd05d.css → lattice.base.49deefb5.css} +1 -1
- package/static/v3/css/lattice.base.css +1 -1
- package/static/v3/css/{lattice.components.9b49d614.css → lattice.components.cde18231.css} +1 -1
- package/static/v3/css/lattice.components.css +1 -1
- package/static/v3/css/{lattice.shell.8fcc9d33.css → lattice.shell.29d36d85.css} +1 -1
- package/static/v3/css/lattice.shell.css +1 -1
- package/static/v3/css/{lattice.tokens.e7018963.css → lattice.tokens.304cbc40.css} +3 -0
- package/static/v3/css/lattice.tokens.css +3 -0
- package/static/v3/css/{lattice.views.22f69117.css → lattice.views.0a18b6c5.css} +2 -2
- package/static/v3/css/lattice.views.css +2 -2
- package/static/v3/index.html +3 -4
- package/static/v3/js/{app.c541f955.js → app.c5c80c46.js} +1 -1
- package/static/v3/js/core/{api.33d6320e.js → api.ba0fbf14.js} +58 -1
- package/static/v3/js/core/api.js +57 -0
- package/static/v3/js/core/i18n.880e1fec.js +575 -0
- package/static/v3/js/core/i18n.js +575 -0
- package/static/v3/js/core/routes.37522821.js +101 -0
- package/static/v3/js/core/routes.js +71 -63
- package/static/v3/js/core/{shell.8c163e0e.js → shell.e3f6bbfa.js} +68 -39
- package/static/v3/js/core/shell.js +66 -37
- package/static/v3/js/core/{store.34ebd5e6.js → store.7b2aa044.js} +11 -1
- package/static/v3/js/core/store.js +11 -1
- package/static/v3/js/views/account.eff40715.js +143 -0
- package/static/v3/js/views/account.js +143 -0
- package/static/v3/js/views/activity.0d271ef9.js +67 -0
- package/static/v3/js/views/activity.js +67 -0
- package/static/v3/js/views/{admin-users.03bac88c.js → admin-users.f7ac7b43.js} +4 -6
- package/static/v3/js/views/admin-users.js +4 -6
- package/static/v3/js/views/{agents.014d0b74.js → agents.17c5288d.js} +35 -12
- package/static/v3/js/views/agents.js +35 -12
- package/static/v3/js/views/{chat.e6dd7dd0.js → chat.e250e2cc.js} +23 -0
- package/static/v3/js/views/chat.js +23 -0
- package/static/v3/js/views/graph-canvas.17c15d65.js +509 -0
- package/static/v3/js/views/graph-canvas.js +509 -0
- package/static/v3/js/views/{hybrid-search.b22b97e0.js → hybrid-search.2fb63ed9.js} +1 -2
- package/static/v3/js/views/hybrid-search.js +1 -2
- package/static/v3/js/views/{knowledge-graph.a96040a5.js → knowledge-graph.4d09c537.js} +60 -44
- package/static/v3/js/views/knowledge-graph.js +60 -44
- package/static/v3/js/views/network.52a4f181.js +97 -0
- package/static/v3/js/views/network.js +97 -0
- package/static/v3/js/views/{planning.9ac3e313.js → planning.4876fd77.js} +26 -5
- package/static/v3/js/views/planning.js +26 -5
- package/static/v3/js/views/runs.b63b2afa.js +144 -0
- package/static/v3/js/views/runs.js +144 -0
- package/static/v3/js/views/{settings.8631fa5e.js → settings.b7140634.js} +7 -8
- package/static/v3/js/views/settings.js +7 -8
- package/static/v3/js/views/snapshots.6f5db095.js +135 -0
- package/static/v3/js/views/snapshots.js +135 -0
- package/static/v3/js/views/{workflows.26c57290.js → workflows.7752225a.js} +87 -2
- package/static/v3/js/views/workflows.js +87 -2
- package/static/v3/js/views/workspace-admin.c466029b.js +156 -0
- package/static/v3/js/views/workspace-admin.js +156 -0
- package/static/vendor/chart.umd.min.js +20 -0
- package/static/vendor/fonts/inter-latin-300-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-400-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-500-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-600-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-700-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-800-normal.woff2 +0 -0
- package/static/vendor/fonts/inter.css +44 -0
- package/static/vendor/icons/tabler-icons.min.css +4 -0
- package/static/vendor/icons/tabler-icons.woff2 +0 -0
- package/static/vendor/marked.min.js +69 -0
- package/telegram_bot.py +1 -2
- package/tools/commands.py +4 -2
- package/tools/computer.py +1 -1
- package/tools/documents.py +1 -3
- package/tools/filesystem.py +0 -4
- package/tools/knowledge.py +1 -3
- package/tools/network.py +1 -3
- package/codex_telegram_bot.py +0 -195
- package/docs/assets/v3.4.0/agent-run.png +0 -0
- package/docs/assets/v3.4.0/agents.png +0 -0
- package/docs/assets/v3.4.0/before/chat-before.png +0 -0
- package/docs/assets/v3.4.0/before/files-before.png +0 -0
- package/docs/assets/v3.4.0/chat.png +0 -0
- package/docs/assets/v3.4.0/connect-folder.png +0 -0
- package/docs/assets/v3.4.0/files.png +0 -0
- package/docs/assets/v3.4.0/home.png +0 -0
- package/docs/assets/v3.4.0/hooks-dispatch.png +0 -0
- package/docs/assets/v3.4.0/knowledge-graph.png +0 -0
- package/docs/assets/v3.4.0/local-agent.png +0 -0
- package/docs/assets/v3.4.0/memory.png +0 -0
- package/docs/assets/v3.4.0/settings.png +0 -0
- package/docs/assets/v3.4.0/vision-input.png +0 -0
- package/docs/assets/v3.4.0/workflows.png +0 -0
- package/docs/assets/v3.4.1/e2e_runtime_log.txt +0 -42
- package/docs/assets/v3.4.1/hooks-dispatch.png +0 -0
- package/docs/assets/v3.4.1/local-agent.png +0 -0
- package/docs/images/admin-dashboard.png +0 -0
- package/docs/images/architecture.png +0 -0
- package/docs/images/enterprise.png +0 -0
- package/docs/images/graph.png +0 -0
- package/docs/images/hero.gif +0 -0
- package/docs/images/knowledge-graph.png +0 -0
- package/docs/images/lattice-ai-demo.gif +0 -0
- package/docs/images/lattice-ai-hero.png +0 -0
- package/docs/images/logo.svg +0 -33
- package/docs/images/mobile-responsive.png +0 -0
- package/docs/images/model-recommendation.png +0 -0
- package/docs/images/onboarding.png +0 -0
- package/docs/images/organization.png +0 -0
- package/docs/images/pipeline.png +0 -0
- package/docs/images/screenshot-admin.png +0 -0
- package/docs/images/screenshot-chat.png +0 -0
- package/docs/images/screenshot-graph.png +0 -0
- package/docs/images/skills.png +0 -0
- package/docs/images/workspace-dark.png +0 -0
- package/docs/images/workspace-light.png +0 -0
- package/docs/images/workspace.png +0 -0
- package/requirements.txt +0 -16
- package/static/account.html +0 -115
- package/static/activity.html +0 -73
- package/static/admin.html +0 -488
- package/static/agents.html +0 -139
- package/static/chat.html +0 -844
- package/static/css/reference/account.css +0 -439
- package/static/css/reference/admin.css +0 -610
- package/static/css/reference/base.css +0 -1661
- package/static/css/reference/chat.css +0 -4623
- package/static/css/reference/graph.css +0 -1016
- package/static/css/responsive.css +0 -861
- package/static/graph.html +0 -124
- package/static/platform.css +0 -104
- package/static/plugins.html +0 -136
- package/static/scripts/account.js +0 -238
- package/static/scripts/admin.js +0 -1614
- package/static/scripts/chat.js +0 -5081
- package/static/scripts/graph.js +0 -1804
- package/static/scripts/platform.js +0 -64
- package/static/scripts/ux.js +0 -167
- package/static/scripts/workspace.js +0 -948
- package/static/v3/js/core/routes.2ce3815a.js +0 -93
- package/static/workflows.html +0 -146
- package/static/workspace.css +0 -1121
- package/static/workspace.html +0 -357
|
@@ -0,0 +1,644 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
# ruff: noqa: F403,F405
|
|
4
|
+
|
|
5
|
+
from ._kg_common import * # noqa: F403,F401
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class KnowledgeGraphIngestMixin:
|
|
9
|
+
def ingest_message(
|
|
10
|
+
self,
|
|
11
|
+
role: str,
|
|
12
|
+
content: str,
|
|
13
|
+
*,
|
|
14
|
+
user_email: Optional[str] = None,
|
|
15
|
+
user_nickname: Optional[str] = None,
|
|
16
|
+
source: Optional[str] = None,
|
|
17
|
+
conversation_id: Optional[str] = None,
|
|
18
|
+
raw: Optional[Dict[str, Any]] = None,
|
|
19
|
+
) -> Dict[str, Any]:
|
|
20
|
+
content = str(content or "")
|
|
21
|
+
digest = _sha256_text(
|
|
22
|
+
"|".join([role or "", content, conversation_id or "", user_email or ""])
|
|
23
|
+
)[:24]
|
|
24
|
+
node_type = "AIResponse" if role == "assistant" else "Message"
|
|
25
|
+
node_id = f"{node_type.lower()}:{digest}"
|
|
26
|
+
conv_id = f"conversation:{_slug(conversation_id or 'default')}"
|
|
27
|
+
metadata = {
|
|
28
|
+
"role": role,
|
|
29
|
+
"source": source,
|
|
30
|
+
"conversation_id": conversation_id,
|
|
31
|
+
"user_email": user_email,
|
|
32
|
+
"user_nickname": user_nickname,
|
|
33
|
+
"chars": len(content),
|
|
34
|
+
}
|
|
35
|
+
concepts = _extract_concepts(content)
|
|
36
|
+
triples = _extract_triples(content, concepts)
|
|
37
|
+
semantic = _semantic_items(content)
|
|
38
|
+
|
|
39
|
+
with self._connect() as conn:
|
|
40
|
+
# ── 1. Chat node (점: 명사 — 대화 세션 단위) ─────────────────────
|
|
41
|
+
# One Chat node per conversation_id; title = first 80 chars of
|
|
42
|
+
# the first user message in this session (updated on each call).
|
|
43
|
+
chat_title = _clean_text(content)[:80] or (conversation_id or "대화")
|
|
44
|
+
self._upsert_node(
|
|
45
|
+
conn,
|
|
46
|
+
conv_id,
|
|
47
|
+
"Chat",
|
|
48
|
+
chat_title,
|
|
49
|
+
summary=_clean_text(content)[:400],
|
|
50
|
+
metadata={"source": source, "conversation_id": conversation_id},
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# ── 2. Person node (점: 명사 — 사람) ─────────────────────────────
|
|
54
|
+
person_id = None
|
|
55
|
+
if user_email or user_nickname:
|
|
56
|
+
person_key = user_email or user_nickname or "unknown"
|
|
57
|
+
person_id = f"person:{_slug(person_key)}"
|
|
58
|
+
self._upsert_node(
|
|
59
|
+
conn,
|
|
60
|
+
person_id,
|
|
61
|
+
"Person",
|
|
62
|
+
user_nickname or user_email or "Unknown",
|
|
63
|
+
metadata={"email": user_email, "nickname": user_nickname},
|
|
64
|
+
)
|
|
65
|
+
# 선: 동사 — Person이 Chat을 "작성함"
|
|
66
|
+
self._upsert_edge(
|
|
67
|
+
conn,
|
|
68
|
+
person_id,
|
|
69
|
+
conv_id,
|
|
70
|
+
"작성함",
|
|
71
|
+
weight=1.0,
|
|
72
|
+
metadata={"role": role},
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# ── 3. Raw message node (RAG 검색용, 그래프에서 숨김) ─────────────
|
|
76
|
+
self._upsert_node(
|
|
77
|
+
conn,
|
|
78
|
+
node_id,
|
|
79
|
+
node_type,
|
|
80
|
+
_clean_text(content)[:80] or role,
|
|
81
|
+
summary=_clean_text(content)[:500],
|
|
82
|
+
metadata=metadata,
|
|
83
|
+
raw=raw or metadata,
|
|
84
|
+
)
|
|
85
|
+
# 선: Chat이 메시지를 "포함함"
|
|
86
|
+
self._upsert_edge(
|
|
87
|
+
conn, conv_id, node_id, "포함함", weight=0.3, metadata={"role": role}
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
# ── 4. RAG chunks (검색용, 그래프에서 숨김) ──────────────────────
|
|
91
|
+
for index, chunk in enumerate(_chunks(content)):
|
|
92
|
+
chunk_id = f"chunk:{_sha256_text(f'{node_id}:{index}:{chunk}')[:24]}"
|
|
93
|
+
self._upsert_node(
|
|
94
|
+
conn,
|
|
95
|
+
chunk_id,
|
|
96
|
+
"Chunk",
|
|
97
|
+
f"chunk {index + 1}",
|
|
98
|
+
summary=chunk[:500],
|
|
99
|
+
metadata={"index": index, "source_node": node_id},
|
|
100
|
+
)
|
|
101
|
+
self._upsert_chunk(
|
|
102
|
+
conn,
|
|
103
|
+
chunk_id=chunk_id,
|
|
104
|
+
source_node=node_id,
|
|
105
|
+
text=chunk,
|
|
106
|
+
metadata={"index": index, "source_node": node_id},
|
|
107
|
+
)
|
|
108
|
+
self._upsert_edge(conn, node_id, chunk_id, "포함함")
|
|
109
|
+
|
|
110
|
+
# ── 5. Concept / Feature / Error / Code 노드 (점: 명사) ───────────
|
|
111
|
+
concept_ids: Dict[str, str] = {}
|
|
112
|
+
for concept in concepts:
|
|
113
|
+
node_t = _classify_node_type(concept, content)
|
|
114
|
+
cid = f"{node_t.lower()}:{_slug(concept)}"
|
|
115
|
+
concept_ids[concept.lower()] = cid
|
|
116
|
+
self._upsert_node(
|
|
117
|
+
conn,
|
|
118
|
+
cid,
|
|
119
|
+
node_t,
|
|
120
|
+
concept,
|
|
121
|
+
metadata={"auto_extracted": True, "source": source},
|
|
122
|
+
)
|
|
123
|
+
# 선: Chat이 개념을 "언급함"
|
|
124
|
+
self._upsert_edge(
|
|
125
|
+
conn,
|
|
126
|
+
conv_id,
|
|
127
|
+
cid,
|
|
128
|
+
"언급함",
|
|
129
|
+
weight=0.7,
|
|
130
|
+
metadata={"source": source},
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# ── 6. Concept–Concept 엣지 (선: 동사형) ─────────────────────────
|
|
134
|
+
for triple in triples:
|
|
135
|
+
subj_id = concept_ids.get(triple["subject"].lower())
|
|
136
|
+
obj_id = concept_ids.get(triple["object"].lower())
|
|
137
|
+
if subj_id and obj_id and subj_id != obj_id:
|
|
138
|
+
self._upsert_edge(
|
|
139
|
+
conn,
|
|
140
|
+
subj_id,
|
|
141
|
+
obj_id,
|
|
142
|
+
triple["relation"], # 동사형 레이블
|
|
143
|
+
weight=1.0,
|
|
144
|
+
metadata={"context": triple.get("context", "")[:240]},
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# ── 7. Task / Decision 노드 (점: 명사) ────────────────────────────
|
|
148
|
+
for item in semantic:
|
|
149
|
+
sem_type = item["type"]
|
|
150
|
+
sem_title = item["title"]
|
|
151
|
+
sem_id = f"{sem_type.lower()}:{_sha256_text(f'{conv_id}:{sem_type}:{sem_title}')[:24]}"
|
|
152
|
+
self._upsert_node(
|
|
153
|
+
conn,
|
|
154
|
+
sem_id,
|
|
155
|
+
sem_type,
|
|
156
|
+
sem_title,
|
|
157
|
+
summary=item["summary"],
|
|
158
|
+
metadata={"auto_extracted": True, "source_node": node_id},
|
|
159
|
+
raw=item,
|
|
160
|
+
)
|
|
161
|
+
# 선: Chat이 Task/Decision을 "생성함"
|
|
162
|
+
self._upsert_edge(conn, conv_id, sem_id, "생성함", weight=0.9)
|
|
163
|
+
# Task/Decision이 관련 개념을 "언급함"
|
|
164
|
+
for cid in list(concept_ids.values())[:3]:
|
|
165
|
+
self._upsert_edge(conn, sem_id, cid, "언급함", weight=0.6)
|
|
166
|
+
|
|
167
|
+
return {"node_id": node_id, "type": node_type}
|
|
168
|
+
|
|
169
|
+
def ingest_document(
|
|
170
|
+
self,
|
|
171
|
+
path: Path,
|
|
172
|
+
*,
|
|
173
|
+
original_filename: Optional[str] = None,
|
|
174
|
+
mime_type: Optional[str] = None,
|
|
175
|
+
uploader: Optional[str] = None,
|
|
176
|
+
conversation_id: Optional[str] = None,
|
|
177
|
+
extracted: Optional[Dict[str, Any]] = None,
|
|
178
|
+
source_type: Optional[str] = None,
|
|
179
|
+
source_uri: Optional[str] = None,
|
|
180
|
+
captured_at: Optional[str] = None,
|
|
181
|
+
modified_at: Optional[str] = None,
|
|
182
|
+
owner: Optional[str] = None,
|
|
183
|
+
workspace_id: Optional[str] = None,
|
|
184
|
+
permissions: Optional[Dict[str, Any]] = None,
|
|
185
|
+
) -> Dict[str, Any]:
|
|
186
|
+
path = Path(path)
|
|
187
|
+
data = path.read_bytes()
|
|
188
|
+
digest = _sha256_bytes(data)
|
|
189
|
+
ext = path.suffix.lower()
|
|
190
|
+
filename = original_filename or path.name
|
|
191
|
+
captured_at = captured_at or _now()
|
|
192
|
+
blob_path = self.blob_dir / digest[:2] / f"{digest}{ext}"
|
|
193
|
+
blob_path.parent.mkdir(parents=True, exist_ok=True)
|
|
194
|
+
if not blob_path.exists():
|
|
195
|
+
shutil.copyfile(path, blob_path)
|
|
196
|
+
|
|
197
|
+
doc_meta = self._document_structure(path, ext)
|
|
198
|
+
text = str(
|
|
199
|
+
(extracted or {}).get("content") or (extracted or {}).get("preview") or ""
|
|
200
|
+
)
|
|
201
|
+
file_id = f"file:{digest[:24]}"
|
|
202
|
+
metadata = {
|
|
203
|
+
"filename": filename,
|
|
204
|
+
"ext": ext,
|
|
205
|
+
"mime_type": mime_type,
|
|
206
|
+
"bytes": len(data),
|
|
207
|
+
"sha256": digest,
|
|
208
|
+
"content_hash": digest,
|
|
209
|
+
"blob_path": str(blob_path),
|
|
210
|
+
"uploader": uploader,
|
|
211
|
+
"owner": owner or uploader,
|
|
212
|
+
"workspace_id": workspace_id,
|
|
213
|
+
"permissions": permissions or {},
|
|
214
|
+
"source_type": source_type or "file",
|
|
215
|
+
"source_uri": source_uri or str(path),
|
|
216
|
+
"captured_at": captured_at,
|
|
217
|
+
"modified_at": modified_at,
|
|
218
|
+
"conversation_id": conversation_id,
|
|
219
|
+
"extracted": {k: v for k, v in (extracted or {}).items() if k != "content"},
|
|
220
|
+
"structure": doc_meta,
|
|
221
|
+
}
|
|
222
|
+
full_text = f"{filename}\n{text}"
|
|
223
|
+
concepts = _extract_concepts(full_text, limit=15)
|
|
224
|
+
triples = _extract_triples(full_text, concepts)
|
|
225
|
+
chunk_ids: List[str] = []
|
|
226
|
+
source_node_id: Optional[str] = None
|
|
227
|
+
|
|
228
|
+
with self._connect() as conn:
|
|
229
|
+
duplicate = self._node_exists(conn, file_id)
|
|
230
|
+
# ── Document 노드 (점: 명사 — 파일) ────────────────────────────────
|
|
231
|
+
self._upsert_node(
|
|
232
|
+
conn,
|
|
233
|
+
file_id,
|
|
234
|
+
"Document",
|
|
235
|
+
filename,
|
|
236
|
+
summary=(text or filename)[:500],
|
|
237
|
+
metadata=metadata,
|
|
238
|
+
raw=metadata,
|
|
239
|
+
)
|
|
240
|
+
self._ingest_structure_nodes(conn, file_id, filename, doc_meta)
|
|
241
|
+
|
|
242
|
+
# ── SOURCE 노드 + indexed_from (v3.6.0, source_type 지정 시) ──────
|
|
243
|
+
if source_type:
|
|
244
|
+
source_node_id = self._attach_source_node(
|
|
245
|
+
conn,
|
|
246
|
+
file_id,
|
|
247
|
+
source_type=source_type,
|
|
248
|
+
source_uri=source_uri or str(path),
|
|
249
|
+
title=filename,
|
|
250
|
+
content_hash=digest,
|
|
251
|
+
captured_at=captured_at,
|
|
252
|
+
extra={
|
|
253
|
+
"owner": owner or uploader,
|
|
254
|
+
"workspace_id": workspace_id,
|
|
255
|
+
"ext": ext,
|
|
256
|
+
},
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
# ── Person 노드 + 동사형 엣지 ─────────────────────────────────────
|
|
260
|
+
if uploader:
|
|
261
|
+
person_id = f"person:{_slug(uploader)}"
|
|
262
|
+
self._upsert_node(
|
|
263
|
+
conn,
|
|
264
|
+
person_id,
|
|
265
|
+
"Person",
|
|
266
|
+
uploader,
|
|
267
|
+
metadata={"email": uploader},
|
|
268
|
+
)
|
|
269
|
+
# 선: 동사 — Person이 Document를 "업로드함"
|
|
270
|
+
self._upsert_edge(conn, person_id, file_id, "업로드함", weight=1.0)
|
|
271
|
+
|
|
272
|
+
# ── Chat 노드와 연결 ──────────────────────────────────────────────
|
|
273
|
+
if conversation_id:
|
|
274
|
+
conv_id = f"conversation:{_slug(conversation_id)}"
|
|
275
|
+
self._upsert_node(conn, conv_id, "Chat", conversation_id)
|
|
276
|
+
# 선: 동사 — Chat이 Document를 "언급함"
|
|
277
|
+
self._upsert_edge(conn, conv_id, file_id, "언급함", weight=0.8)
|
|
278
|
+
|
|
279
|
+
# ── RAG chunks (검색용, 그래프 비표시) ────────────────────────────
|
|
280
|
+
for index, chunk in enumerate(_chunks(text)):
|
|
281
|
+
chunk_id = f"chunk:{_sha256_text(f'{file_id}:{index}:{chunk}')[:24]}"
|
|
282
|
+
chunk_ids.append(chunk_id)
|
|
283
|
+
self._upsert_node(
|
|
284
|
+
conn,
|
|
285
|
+
chunk_id,
|
|
286
|
+
"Chunk",
|
|
287
|
+
f"{filename} chunk {index + 1}",
|
|
288
|
+
summary=chunk[:500],
|
|
289
|
+
metadata={"index": index, "source_node": file_id},
|
|
290
|
+
)
|
|
291
|
+
self._upsert_chunk(
|
|
292
|
+
conn,
|
|
293
|
+
chunk_id=chunk_id,
|
|
294
|
+
source_node=file_id,
|
|
295
|
+
text=chunk,
|
|
296
|
+
metadata={"index": index, "source_node": file_id},
|
|
297
|
+
)
|
|
298
|
+
self._upsert_edge(conn, file_id, chunk_id, "포함함")
|
|
299
|
+
|
|
300
|
+
# ── Concept / Feature / Error / Code 노드 + 동사형 엣지 ───────────
|
|
301
|
+
concept_ids: Dict[str, str] = {}
|
|
302
|
+
for concept in concepts:
|
|
303
|
+
node_t = _classify_node_type(concept, full_text)
|
|
304
|
+
cid = f"{node_t.lower()}:{_slug(concept)}"
|
|
305
|
+
concept_ids[concept.lower()] = cid
|
|
306
|
+
self._upsert_node(
|
|
307
|
+
conn,
|
|
308
|
+
cid,
|
|
309
|
+
node_t,
|
|
310
|
+
concept,
|
|
311
|
+
metadata={"auto_extracted": True, "source_file": filename},
|
|
312
|
+
)
|
|
313
|
+
# 선: 동사 — Document가 Concept을 "포함함"
|
|
314
|
+
self._upsert_edge(conn, file_id, cid, "포함함", weight=0.8)
|
|
315
|
+
|
|
316
|
+
# ── Concept–Concept 엣지 (선: 동사형) ───────────────────────────
|
|
317
|
+
for triple in triples:
|
|
318
|
+
subj_id = concept_ids.get(triple["subject"].lower())
|
|
319
|
+
obj_id = concept_ids.get(triple["object"].lower())
|
|
320
|
+
if subj_id and obj_id and subj_id != obj_id:
|
|
321
|
+
self._upsert_edge(
|
|
322
|
+
conn,
|
|
323
|
+
subj_id,
|
|
324
|
+
obj_id,
|
|
325
|
+
triple["relation"],
|
|
326
|
+
weight=1.0,
|
|
327
|
+
metadata={"context": triple.get("context", "")[:240]},
|
|
328
|
+
)
|
|
329
|
+
|
|
330
|
+
# ── Task / Decision 노드 ──────────────────────────────────────────
|
|
331
|
+
for item in _semantic_items(text):
|
|
332
|
+
sem_type = item["type"]
|
|
333
|
+
sem_title = item["title"]
|
|
334
|
+
sem_id = f"{sem_type.lower()}:{_sha256_text(f'{file_id}:{sem_type}:{sem_title}')[:24]}"
|
|
335
|
+
self._upsert_node(
|
|
336
|
+
conn,
|
|
337
|
+
sem_id,
|
|
338
|
+
sem_type,
|
|
339
|
+
sem_title,
|
|
340
|
+
summary=item["summary"],
|
|
341
|
+
metadata={
|
|
342
|
+
"auto_extracted": True,
|
|
343
|
+
"source_node": file_id,
|
|
344
|
+
"filename": filename,
|
|
345
|
+
},
|
|
346
|
+
raw=item,
|
|
347
|
+
)
|
|
348
|
+
# 선: Document가 Task/Decision을 "포함함"
|
|
349
|
+
self._upsert_edge(conn, file_id, sem_id, "포함함", weight=0.9)
|
|
350
|
+
|
|
351
|
+
return {
|
|
352
|
+
"node_id": file_id,
|
|
353
|
+
"type": "Document",
|
|
354
|
+
"sha256": digest,
|
|
355
|
+
"content_hash": digest,
|
|
356
|
+
"source_node_id": source_node_id,
|
|
357
|
+
"chunk_ids": chunk_ids,
|
|
358
|
+
"chunk_count": len(chunk_ids),
|
|
359
|
+
"duplicate": duplicate,
|
|
360
|
+
"captured_at": captured_at,
|
|
361
|
+
"metadata": metadata,
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
def ingest_event(
|
|
365
|
+
self,
|
|
366
|
+
event_type: str,
|
|
367
|
+
title: str,
|
|
368
|
+
*,
|
|
369
|
+
user_email: Optional[str] = None,
|
|
370
|
+
user_nickname: Optional[str] = None,
|
|
371
|
+
source: Optional[str] = None,
|
|
372
|
+
conversation_id: Optional[str] = None,
|
|
373
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
374
|
+
) -> Dict[str, Any]:
|
|
375
|
+
event_type = str(event_type or "Event")
|
|
376
|
+
title = str(title or event_type)
|
|
377
|
+
payload = {
|
|
378
|
+
"event_type": event_type,
|
|
379
|
+
"title": title,
|
|
380
|
+
"user_email": user_email,
|
|
381
|
+
"user_nickname": user_nickname,
|
|
382
|
+
"source": source,
|
|
383
|
+
"conversation_id": conversation_id,
|
|
384
|
+
"metadata": metadata or {},
|
|
385
|
+
"timestamp": _now(),
|
|
386
|
+
}
|
|
387
|
+
event_id = f"event:{_sha256_text(_json(payload))[:24]}"
|
|
388
|
+
conv_id = f"conversation:{_slug(conversation_id or 'default')}"
|
|
389
|
+
with self._connect() as conn:
|
|
390
|
+
self._upsert_node(
|
|
391
|
+
conn,
|
|
392
|
+
event_id,
|
|
393
|
+
event_type,
|
|
394
|
+
title,
|
|
395
|
+
summary=title,
|
|
396
|
+
metadata=payload,
|
|
397
|
+
raw=payload,
|
|
398
|
+
)
|
|
399
|
+
self._upsert_node(
|
|
400
|
+
conn,
|
|
401
|
+
conv_id,
|
|
402
|
+
"Conversation",
|
|
403
|
+
conversation_id or "Default conversation",
|
|
404
|
+
metadata={"source": source},
|
|
405
|
+
)
|
|
406
|
+
self._upsert_edge(
|
|
407
|
+
conn, conv_id, event_id, "has_event", metadata={"source": source}
|
|
408
|
+
)
|
|
409
|
+
if user_email or user_nickname:
|
|
410
|
+
person_key = user_email or user_nickname or "unknown"
|
|
411
|
+
person_id = f"person:{_slug(person_key)}"
|
|
412
|
+
self._upsert_node(
|
|
413
|
+
conn,
|
|
414
|
+
person_id,
|
|
415
|
+
"Person",
|
|
416
|
+
user_nickname or user_email or "Unknown user",
|
|
417
|
+
metadata={"email": user_email},
|
|
418
|
+
)
|
|
419
|
+
self._upsert_edge(
|
|
420
|
+
conn,
|
|
421
|
+
person_id,
|
|
422
|
+
event_id,
|
|
423
|
+
"triggered",
|
|
424
|
+
metadata={"event_type": event_type},
|
|
425
|
+
)
|
|
426
|
+
return {"node_id": event_id, "type": event_type}
|
|
427
|
+
|
|
428
|
+
def _node_exists(self, conn: sqlite3.Connection, node_id: str) -> bool:
|
|
429
|
+
row = conn.execute("SELECT 1 FROM nodes WHERE id = ?", (node_id,)).fetchone()
|
|
430
|
+
return row is not None
|
|
431
|
+
|
|
432
|
+
def node_is_embedded(self, node_id: str) -> bool:
|
|
433
|
+
"""True when a vector embedding exists for ``node_id`` (RAG-ready)."""
|
|
434
|
+
with self._connect() as conn:
|
|
435
|
+
row = conn.execute(
|
|
436
|
+
"SELECT 1 FROM vector_embeddings WHERE item_id = ? LIMIT 1",
|
|
437
|
+
(node_id,),
|
|
438
|
+
).fetchone()
|
|
439
|
+
return row is not None
|
|
440
|
+
|
|
441
|
+
def _attach_source_node(
|
|
442
|
+
self,
|
|
443
|
+
conn: sqlite3.Connection,
|
|
444
|
+
content_node_id: str,
|
|
445
|
+
*,
|
|
446
|
+
source_type: str,
|
|
447
|
+
source_uri: Optional[str] = None,
|
|
448
|
+
title: Optional[str] = None,
|
|
449
|
+
content_hash: Optional[str] = None,
|
|
450
|
+
captured_at: Optional[str] = None,
|
|
451
|
+
extra: Optional[Dict[str, Any]] = None,
|
|
452
|
+
) -> str:
|
|
453
|
+
"""Create the SOURCE node for an ingested item and link it via INDEXED_FROM.
|
|
454
|
+
|
|
455
|
+
Every ingested content node points at exactly one SOURCE node, so the
|
|
456
|
+
graph is always able to explain *where* a node came from. The source id
|
|
457
|
+
is derived from (source_type, source_uri | content_hash) so re-ingesting
|
|
458
|
+
the same origin reuses the same SOURCE node (idempotent).
|
|
459
|
+
"""
|
|
460
|
+
key = source_uri or content_hash or content_node_id
|
|
461
|
+
source_id = f"source:{_sha256_text(f'{source_type}|{key}')[:24]}"
|
|
462
|
+
meta = {
|
|
463
|
+
"source_type": source_type,
|
|
464
|
+
"source_uri": source_uri,
|
|
465
|
+
"content_hash": content_hash,
|
|
466
|
+
"captured_at": captured_at or _now(),
|
|
467
|
+
**(extra or {}),
|
|
468
|
+
}
|
|
469
|
+
label = title or source_uri or source_type
|
|
470
|
+
self._upsert_node(
|
|
471
|
+
conn,
|
|
472
|
+
source_id,
|
|
473
|
+
"Source",
|
|
474
|
+
label,
|
|
475
|
+
summary=str(source_uri or title or source_type)[:400],
|
|
476
|
+
metadata=meta,
|
|
477
|
+
)
|
|
478
|
+
# 선: 콘텐츠 노드가 "이 출처에서 색인됨" (indexed_from → SOURCE)
|
|
479
|
+
self._upsert_edge(
|
|
480
|
+
conn,
|
|
481
|
+
content_node_id,
|
|
482
|
+
source_id,
|
|
483
|
+
"indexed_from",
|
|
484
|
+
weight=1.0,
|
|
485
|
+
metadata={"source_type": source_type},
|
|
486
|
+
)
|
|
487
|
+
return source_id
|
|
488
|
+
|
|
489
|
+
def ingest_source(
|
|
490
|
+
self,
|
|
491
|
+
*,
|
|
492
|
+
source_type: str,
|
|
493
|
+
title: str,
|
|
494
|
+
text: str,
|
|
495
|
+
source_uri: Optional[str] = None,
|
|
496
|
+
owner: Optional[str] = None,
|
|
497
|
+
workspace_id: Optional[str] = None,
|
|
498
|
+
permissions: Optional[Dict[str, Any]] = None,
|
|
499
|
+
captured_at: Optional[str] = None,
|
|
500
|
+
modified_at: Optional[str] = None,
|
|
501
|
+
conversation_id: Optional[str] = None,
|
|
502
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
503
|
+
) -> Dict[str, Any]:
|
|
504
|
+
"""Unified text/web ingestion: one shape for URL, browser tab, note, text.
|
|
505
|
+
|
|
506
|
+
Creates a content ``Document`` node (idempotent by content hash), a
|
|
507
|
+
``Source`` node linked via ``indexed_from``, RAG chunks, and extracted
|
|
508
|
+
Concept/Task/Decision nodes — mirroring ingest_document for non-file
|
|
509
|
+
sources. Returns the full set of ids the caller needs to record
|
|
510
|
+
provenance, including ``duplicate`` (was the content already indexed).
|
|
511
|
+
"""
|
|
512
|
+
source_type = str(source_type or "text")
|
|
513
|
+
text = str(text or "")
|
|
514
|
+
title = (
|
|
515
|
+
_clean_text(str(title or source_uri or source_type))[:240] or source_type
|
|
516
|
+
)
|
|
517
|
+
captured_at = captured_at or _now()
|
|
518
|
+
content_hash = _sha256_text(f"{source_type}|{source_uri or ''}|{text}")
|
|
519
|
+
content_id = f"webdoc:{content_hash[:24]}"
|
|
520
|
+
full_text = f"{title}\n{text}"
|
|
521
|
+
node_meta = {
|
|
522
|
+
"source_type": source_type,
|
|
523
|
+
"source_uri": source_uri,
|
|
524
|
+
"content_hash": content_hash,
|
|
525
|
+
"title": title,
|
|
526
|
+
"captured_at": captured_at,
|
|
527
|
+
"modified_at": modified_at,
|
|
528
|
+
"owner": owner,
|
|
529
|
+
"workspace_id": workspace_id,
|
|
530
|
+
"permissions": permissions or {},
|
|
531
|
+
"chars": len(text),
|
|
532
|
+
**(metadata or {}),
|
|
533
|
+
}
|
|
534
|
+
concepts = _extract_concepts(full_text, limit=15)
|
|
535
|
+
triples = _extract_triples(full_text, concepts)
|
|
536
|
+
chunk_ids: List[str] = []
|
|
537
|
+
|
|
538
|
+
with self._connect() as conn:
|
|
539
|
+
duplicate = self._node_exists(conn, content_id)
|
|
540
|
+
# ── 콘텐츠 노드 (점: 명사 — 문서) ────────────────────────────────
|
|
541
|
+
self._upsert_node(
|
|
542
|
+
conn,
|
|
543
|
+
content_id,
|
|
544
|
+
"Document",
|
|
545
|
+
title,
|
|
546
|
+
summary=(text or title)[:500],
|
|
547
|
+
metadata=node_meta,
|
|
548
|
+
raw=node_meta,
|
|
549
|
+
)
|
|
550
|
+
# ── SOURCE 노드 + indexed_from 엣지 (출처 추적) ──────────────────
|
|
551
|
+
source_node_id = self._attach_source_node(
|
|
552
|
+
conn,
|
|
553
|
+
content_id,
|
|
554
|
+
source_type=source_type,
|
|
555
|
+
source_uri=source_uri,
|
|
556
|
+
title=title,
|
|
557
|
+
content_hash=content_hash,
|
|
558
|
+
captured_at=captured_at,
|
|
559
|
+
extra={"owner": owner, "workspace_id": workspace_id},
|
|
560
|
+
)
|
|
561
|
+
# ── 소유자(Person) + 동사형 엣지 ────────────────────────────────
|
|
562
|
+
if owner:
|
|
563
|
+
person_id = f"person:{_slug(owner)}"
|
|
564
|
+
self._upsert_node(
|
|
565
|
+
conn, person_id, "Person", owner, metadata={"email": owner}
|
|
566
|
+
)
|
|
567
|
+
self._upsert_edge(conn, person_id, content_id, "업로드함", weight=1.0)
|
|
568
|
+
# ── 대화 연결 ───────────────────────────────────────────────────
|
|
569
|
+
if conversation_id:
|
|
570
|
+
conv_id = f"conversation:{_slug(conversation_id)}"
|
|
571
|
+
self._upsert_node(conn, conv_id, "Chat", conversation_id)
|
|
572
|
+
self._upsert_edge(conn, conv_id, content_id, "언급함", weight=0.8)
|
|
573
|
+
# ── RAG 청크 ────────────────────────────────────────────────────
|
|
574
|
+
for index, chunk in enumerate(_chunks(text)):
|
|
575
|
+
chunk_id = f"chunk:{_sha256_text(f'{content_id}:{index}:{chunk}')[:24]}"
|
|
576
|
+
chunk_ids.append(chunk_id)
|
|
577
|
+
self._upsert_node(
|
|
578
|
+
conn,
|
|
579
|
+
chunk_id,
|
|
580
|
+
"Chunk",
|
|
581
|
+
f"{title} chunk {index + 1}",
|
|
582
|
+
summary=chunk[:500],
|
|
583
|
+
metadata={"index": index, "source_node": content_id},
|
|
584
|
+
)
|
|
585
|
+
self._upsert_chunk(
|
|
586
|
+
conn,
|
|
587
|
+
chunk_id=chunk_id,
|
|
588
|
+
source_node=content_id,
|
|
589
|
+
text=chunk,
|
|
590
|
+
metadata={"index": index, "source_node": content_id},
|
|
591
|
+
)
|
|
592
|
+
self._upsert_edge(conn, content_id, chunk_id, "포함함")
|
|
593
|
+
# ── Concept / Feature / Error / Code 노드 + 엣지 ────────────────
|
|
594
|
+
concept_ids: Dict[str, str] = {}
|
|
595
|
+
for concept in concepts:
|
|
596
|
+
node_t = _classify_node_type(concept, full_text)
|
|
597
|
+
cid = f"{node_t.lower()}:{_slug(concept)}"
|
|
598
|
+
concept_ids[concept.lower()] = cid
|
|
599
|
+
self._upsert_node(
|
|
600
|
+
conn,
|
|
601
|
+
cid,
|
|
602
|
+
node_t,
|
|
603
|
+
concept,
|
|
604
|
+
metadata={"auto_extracted": True, "source_type": source_type},
|
|
605
|
+
)
|
|
606
|
+
self._upsert_edge(conn, content_id, cid, "포함함", weight=0.8)
|
|
607
|
+
for triple in triples:
|
|
608
|
+
subj_id = concept_ids.get(triple["subject"].lower())
|
|
609
|
+
obj_id = concept_ids.get(triple["object"].lower())
|
|
610
|
+
if subj_id and obj_id and subj_id != obj_id:
|
|
611
|
+
self._upsert_edge(
|
|
612
|
+
conn,
|
|
613
|
+
subj_id,
|
|
614
|
+
obj_id,
|
|
615
|
+
triple["relation"],
|
|
616
|
+
weight=1.0,
|
|
617
|
+
metadata={"context": triple.get("context", "")[:240]},
|
|
618
|
+
)
|
|
619
|
+
# ── Task / Decision 노드 ────────────────────────────────────────
|
|
620
|
+
for item in _semantic_items(text):
|
|
621
|
+
sem_type = item["type"]
|
|
622
|
+
sem_title = item["title"]
|
|
623
|
+
sem_id = f"{sem_type.lower()}:{_sha256_text(f'{content_id}:{sem_type}:{sem_title}')[:24]}"
|
|
624
|
+
self._upsert_node(
|
|
625
|
+
conn,
|
|
626
|
+
sem_id,
|
|
627
|
+
sem_type,
|
|
628
|
+
sem_title,
|
|
629
|
+
summary=item["summary"],
|
|
630
|
+
metadata={"auto_extracted": True, "source_node": content_id},
|
|
631
|
+
raw=item,
|
|
632
|
+
)
|
|
633
|
+
self._upsert_edge(conn, content_id, sem_id, "포함함", weight=0.9)
|
|
634
|
+
|
|
635
|
+
return {
|
|
636
|
+
"node_id": content_id,
|
|
637
|
+
"type": "Document",
|
|
638
|
+
"source_node_id": source_node_id,
|
|
639
|
+
"content_hash": content_hash,
|
|
640
|
+
"chunk_ids": chunk_ids,
|
|
641
|
+
"chunk_count": len(chunk_ids),
|
|
642
|
+
"duplicate": duplicate,
|
|
643
|
+
"captured_at": captured_at,
|
|
644
|
+
}
|