ltcai 3.5.0 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +73 -35
- package/docs/CARRYOVER_AUDIT_v3.6.0.md +61 -0
- package/docs/CHANGELOG.md +32 -0
- package/docs/HANDOVER_v3.6.0.md +46 -0
- package/docs/RUNTIME_HOOK_COVERAGE_v3.6.0.md +49 -0
- package/docs/V4_BRAIN_ARCHITECTURE.md +322 -0
- package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +509 -0
- package/docs/V4_IMPLEMENTATION_PLAN.md +470 -0
- package/docs/architecture.md +13 -12
- package/docs/kg-schema.md +102 -53
- package/docs/privacy.md +18 -2
- package/docs/security-model.md +17 -0
- package/kg_schema.py +139 -10
- package/knowledge_graph.py +874 -26
- package/knowledge_graph_api.py +11 -127
- package/latticeai/__init__.py +1 -1
- package/latticeai/api/admin.py +1 -1
- package/latticeai/api/agents.py +7 -1
- package/latticeai/api/auth.py +27 -4
- package/latticeai/api/browser.py +217 -0
- package/latticeai/api/chat.py +112 -76
- package/latticeai/api/health.py +1 -1
- package/latticeai/api/hooks.py +1 -1
- package/latticeai/api/knowledge_graph.py +146 -0
- package/latticeai/api/local_files.py +1 -1
- package/latticeai/api/mcp.py +23 -11
- package/latticeai/api/memory.py +1 -1
- package/latticeai/api/models.py +1 -1
- package/latticeai/api/network.py +81 -0
- package/latticeai/api/portability.py +93 -0
- package/latticeai/api/realtime.py +1 -1
- package/latticeai/api/search.py +26 -2
- package/latticeai/api/security_dashboard.py +2 -3
- package/latticeai/api/setup.py +2 -2
- package/latticeai/api/static_routes.py +2 -4
- package/latticeai/api/tools.py +3 -0
- package/latticeai/api/workflow_designer.py +46 -0
- package/latticeai/api/workspace.py +71 -49
- package/latticeai/app_factory.py +1710 -0
- package/latticeai/brain/__init__.py +18 -0
- package/latticeai/brain/context.py +213 -0
- package/latticeai/brain/conversations.py +236 -0
- package/latticeai/brain/identity.py +175 -0
- package/latticeai/brain/memory.py +102 -0
- package/latticeai/brain/network.py +205 -0
- package/latticeai/core/agent.py +31 -7
- package/latticeai/core/audit.py +0 -7
- package/latticeai/core/config.py +1 -1
- package/latticeai/core/context_builder.py +1 -2
- package/latticeai/core/enterprise.py +1 -1
- package/latticeai/core/graph_curator.py +2 -2
- package/latticeai/core/marketplace.py +1 -1
- package/latticeai/core/mcp_registry.py +791 -0
- package/latticeai/core/model_compat.py +1 -1
- package/latticeai/core/model_resolution.py +0 -1
- package/latticeai/core/multi_agent.py +238 -4
- package/latticeai/core/security.py +1 -1
- package/latticeai/core/sessions.py +37 -7
- package/latticeai/core/workflow_engine.py +114 -2
- package/latticeai/core/workspace_os.py +58 -10
- package/latticeai/models/__init__.py +7 -0
- package/latticeai/models/router.py +779 -0
- package/latticeai/server_app.py +29 -1504
- package/latticeai/services/agent_runtime.py +1 -0
- package/latticeai/services/app_context.py +75 -14
- package/latticeai/services/ingestion.py +318 -0
- package/latticeai/services/kg_portability.py +207 -0
- package/latticeai/services/memory_service.py +39 -11
- package/latticeai/services/model_runtime.py +2 -5
- package/latticeai/services/platform_runtime.py +100 -23
- package/latticeai/services/search_service.py +17 -8
- package/latticeai/services/tool_dispatch.py +12 -2
- package/latticeai/services/triggers.py +241 -0
- package/latticeai/services/upload_service.py +37 -12
- package/latticeai/services/workspace_service.py +31 -0
- package/llm_router.py +29 -772
- package/ltcai_cli.py +1 -2
- package/mcp_registry.py +25 -788
- package/p_reinforce.py +124 -14
- package/package.json +11 -8
- package/scripts/build_vsix.mjs +72 -0
- package/scripts/bump_version.py +99 -0
- package/scripts/generate_diagrams.py +0 -1
- package/scripts/lint_v3.mjs +82 -18
- package/scripts/validate_release_artifacts.py +0 -1
- package/scripts/wheel_smoke.py +142 -0
- package/server.py +11 -7
- package/setup_wizard.py +1142 -0
- package/static/account.html +2 -4
- package/static/admin.html +3 -5
- package/static/chat.html +3 -6
- package/static/graph.html +2 -4
- package/static/sw.js +81 -52
- package/static/v3/asset-manifest.json +20 -19
- package/static/v3/css/{lattice.base.e4cdd05d.css → lattice.base.49deefb5.css} +1 -1
- package/static/v3/css/lattice.base.css +1 -1
- package/static/v3/css/{lattice.components.9b49d614.css → lattice.components.cde18231.css} +1 -1
- package/static/v3/css/lattice.components.css +1 -1
- package/static/v3/css/{lattice.shell.8fcc9d33.css → lattice.shell.29d36d85.css} +1 -1
- package/static/v3/css/lattice.shell.css +1 -1
- package/static/v3/css/{lattice.tokens.e7018963.css → lattice.tokens.304cbc40.css} +3 -0
- package/static/v3/css/lattice.tokens.css +3 -0
- package/static/v3/css/{lattice.views.22f69117.css → lattice.views.0a18b6c5.css} +2 -2
- package/static/v3/css/lattice.views.css +2 -2
- package/static/v3/index.html +3 -4
- package/static/v3/js/{app.d086489d.js → app.356e6452.js} +1 -1
- package/static/v3/js/core/{api.12b568ad.js → api.7a308b89.js} +39 -1
- package/static/v3/js/core/api.js +38 -0
- package/static/v3/js/core/{routes.d214b399.js → routes.7222343d.js} +22 -22
- package/static/v3/js/core/routes.js +22 -22
- package/static/v3/js/core/{shell.d05266f5.js → shell.a1657f20.js} +4 -4
- package/static/v3/js/core/shell.js +1 -1
- package/static/v3/js/core/{store.34ebd5e6.js → store.204a08b2.js} +1 -1
- package/static/v3/js/core/store.js +1 -1
- package/static/v3/js/views/graph-canvas.17c15d65.js +509 -0
- package/static/v3/js/views/graph-canvas.js +509 -0
- package/static/v3/js/views/{hybrid-search.b22b97e0.js → hybrid-search.2fb63ed9.js} +1 -2
- package/static/v3/js/views/hybrid-search.js +1 -2
- package/static/v3/js/views/knowledge-graph.5e40cbeb.js +509 -0
- package/static/v3/js/views/knowledge-graph.js +326 -54
- package/static/vendor/chart.umd.min.js +20 -0
- package/static/vendor/fonts/inter-latin-300-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-400-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-500-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-600-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-700-normal.woff2 +0 -0
- package/static/vendor/fonts/inter-latin-800-normal.woff2 +0 -0
- package/static/vendor/fonts/inter.css +44 -0
- package/static/vendor/icons/tabler-icons.min.css +4 -0
- package/static/vendor/icons/tabler-icons.woff2 +0 -0
- package/static/vendor/marked.min.js +69 -0
- package/static/workspace.html +2 -2
- package/telegram_bot.py +1 -2
- package/tools/commands.py +4 -2
- package/tools/computer.py +1 -1
- package/tools/documents.py +1 -3
- package/tools/filesystem.py +0 -4
- package/tools/knowledge.py +1 -3
- package/tools/network.py +1 -3
- package/codex_telegram_bot.py +0 -195
- package/docs/assets/v3.4.0/agent-run.png +0 -0
- package/docs/assets/v3.4.0/agents.png +0 -0
- package/docs/assets/v3.4.0/before/chat-before.png +0 -0
- package/docs/assets/v3.4.0/before/files-before.png +0 -0
- package/docs/assets/v3.4.0/chat.png +0 -0
- package/docs/assets/v3.4.0/connect-folder.png +0 -0
- package/docs/assets/v3.4.0/files.png +0 -0
- package/docs/assets/v3.4.0/home.png +0 -0
- package/docs/assets/v3.4.0/hooks-dispatch.png +0 -0
- package/docs/assets/v3.4.0/knowledge-graph.png +0 -0
- package/docs/assets/v3.4.0/local-agent.png +0 -0
- package/docs/assets/v3.4.0/memory.png +0 -0
- package/docs/assets/v3.4.0/settings.png +0 -0
- package/docs/assets/v3.4.0/vision-input.png +0 -0
- package/docs/assets/v3.4.0/workflows.png +0 -0
- package/docs/assets/v3.4.1/e2e_runtime_log.txt +0 -42
- package/docs/assets/v3.4.1/hooks-dispatch.png +0 -0
- package/docs/assets/v3.4.1/local-agent.png +0 -0
- package/docs/images/admin-dashboard.png +0 -0
- package/docs/images/architecture.png +0 -0
- package/docs/images/enterprise.png +0 -0
- package/docs/images/graph.png +0 -0
- package/docs/images/hero.gif +0 -0
- package/docs/images/knowledge-graph.png +0 -0
- package/docs/images/lattice-ai-demo.gif +0 -0
- package/docs/images/lattice-ai-hero.png +0 -0
- package/docs/images/logo.svg +0 -33
- package/docs/images/mobile-responsive.png +0 -0
- package/docs/images/model-recommendation.png +0 -0
- package/docs/images/onboarding.png +0 -0
- package/docs/images/organization.png +0 -0
- package/docs/images/pipeline.png +0 -0
- package/docs/images/screenshot-admin.png +0 -0
- package/docs/images/screenshot-chat.png +0 -0
- package/docs/images/screenshot-graph.png +0 -0
- package/docs/images/skills.png +0 -0
- package/docs/images/workspace-dark.png +0 -0
- package/docs/images/workspace-light.png +0 -0
- package/docs/images/workspace.png +0 -0
- package/requirements.txt +0 -16
- package/static/v3/js/views/knowledge-graph.a14ea7e7.js +0 -237
|
@@ -1,27 +1,88 @@
|
|
|
1
1
|
"""Application dependency context for router assembly.
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
``latticeai.app_factory.create_app`` builds one ``AppContext`` per app and
|
|
4
|
+
hands it to router factories, replacing the historical 25-30-kwarg closure
|
|
5
|
+
wiring. Every field defaults to ``None``-ish so tests can construct a context
|
|
6
|
+
carrying only the dependencies a router actually touches.
|
|
7
|
+
|
|
8
|
+
Fields are grouped by the consumer that motivated them; routers must treat the
|
|
9
|
+
context as read-only.
|
|
6
10
|
"""
|
|
7
11
|
|
|
8
12
|
from __future__ import annotations
|
|
9
13
|
|
|
10
14
|
from dataclasses import dataclass
|
|
11
15
|
from pathlib import Path
|
|
12
|
-
from typing import Any, Callable
|
|
16
|
+
from typing import Any, Callable, Optional
|
|
13
17
|
|
|
14
18
|
|
|
15
19
|
@dataclass(frozen=True)
|
|
16
20
|
class AppContext:
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
21
|
+
# ── core configuration / paths ────────────────────────────────────────
|
|
22
|
+
config: Any = None
|
|
23
|
+
data_dir: Optional[Path] = None
|
|
24
|
+
static_dir: Optional[Path] = None
|
|
25
|
+
base_dir: Optional[Path] = None
|
|
26
|
+
skills_dir: Optional[Path] = None
|
|
27
|
+
|
|
28
|
+
# ── singletons ────────────────────────────────────────────────────────
|
|
29
|
+
model_router: Any = None
|
|
30
|
+
workspace_store: Any = None
|
|
31
|
+
workspace_service: Any = None
|
|
32
|
+
knowledge_graph: Any = None
|
|
33
|
+
local_kg_watcher: Any = None
|
|
34
|
+
chat_service: Any = None
|
|
35
|
+
context_assembler: Any = None
|
|
36
|
+
brain_memory: Any = None
|
|
37
|
+
gardener: Any = None
|
|
38
|
+
hooks: Any = None
|
|
39
|
+
realtime_bus: Any = None
|
|
40
|
+
capability_registry: Any = None
|
|
41
|
+
|
|
42
|
+
# ── auth / session callables ──────────────────────────────────────────
|
|
43
|
+
require_user: Optional[Callable[..., str]] = None
|
|
44
|
+
require_admin: Optional[Callable[..., tuple]] = None
|
|
45
|
+
get_current_user: Optional[Callable[..., Optional[str]]] = None
|
|
46
|
+
load_users: Optional[Callable[[], dict]] = None
|
|
47
|
+
get_user_role: Optional[Callable[..., str]] = None
|
|
48
|
+
enforce_rate_limit: Optional[Callable[..., None]] = None
|
|
49
|
+
|
|
50
|
+
# ── audit / history callables ─────────────────────────────────────────
|
|
51
|
+
append_audit_event: Optional[Callable[..., None]] = None
|
|
52
|
+
get_audit_log: Optional[Callable[[], list]] = None
|
|
53
|
+
get_history: Optional[Callable[[], list]] = None
|
|
54
|
+
get_history_user: Optional[Callable[..., dict]] = None
|
|
55
|
+
save_to_history: Optional[Callable[..., None]] = None
|
|
56
|
+
clear_history: Optional[Callable[..., dict]] = None
|
|
57
|
+
clear_conversation: Optional[Callable[..., dict]] = None
|
|
58
|
+
group_history_conversations: Optional[Callable[..., list]] = None
|
|
59
|
+
get_conversation_messages: Optional[Callable[..., list]] = None
|
|
60
|
+
conversation_title: Optional[Callable[..., str]] = None
|
|
61
|
+
|
|
62
|
+
# ── knowledge graph access ────────────────────────────────────────────
|
|
63
|
+
enable_graph: bool = False
|
|
64
|
+
require_graph: Optional[Callable[[], None]] = None
|
|
65
|
+
workspace_graph: Optional[Callable[[], Any]] = None
|
|
66
|
+
graph_stats: Optional[Callable[[], dict]] = None
|
|
67
|
+
|
|
68
|
+
# ── workspace payload providers / skills ──────────────────────────────
|
|
69
|
+
workspace_models: Optional[Callable[[], dict]] = None
|
|
70
|
+
workspace_settings: Optional[Callable[[], dict]] = None
|
|
71
|
+
scan_environment: Optional[Callable[[], Any]] = None
|
|
72
|
+
local_sysinfo: Optional[Callable[..., Any]] = None
|
|
73
|
+
get_recommendations: Optional[Callable[..., Any]] = None
|
|
74
|
+
fetch_skills_marketplace: Optional[Callable[..., Any]] = None
|
|
75
|
+
install_skill: Optional[Callable[..., Any]] = None
|
|
76
|
+
remove_skill_directory: Optional[Callable[..., dict]] = None
|
|
77
|
+
redact_secret_text: Optional[Callable[[str], str]] = None
|
|
78
|
+
ui_file_response: Optional[Callable[..., Any]] = None
|
|
79
|
+
|
|
80
|
+
# ── models ────────────────────────────────────────────────────────────
|
|
81
|
+
public_model: str = ""
|
|
82
|
+
local_model: str = ""
|
|
27
83
|
|
|
84
|
+
# ── integrations ──────────────────────────────────────────────────────
|
|
85
|
+
# Fired as on_chat_message(role, text, source) after a chat exchange is
|
|
86
|
+
# persisted; ``None`` means no external chat mirror is registered. The
|
|
87
|
+
# telegram bridge subscribes here only when ENABLE_TELEGRAM is truthy.
|
|
88
|
+
on_chat_message: Optional[Callable[..., None]] = None
|
|
@@ -0,0 +1,318 @@
|
|
|
1
|
+
"""Unified ingestion pipeline — the single write-side seam into the Knowledge Graph.
|
|
2
|
+
|
|
3
|
+
v3.6.0 Knowledge Graph First principle: *no data source bypasses the Knowledge
|
|
4
|
+
Graph and no source creates an isolated silo*. Every source — local files,
|
|
5
|
+
connected folders, PDFs/Markdown/text/code, web URLs, browser tabs — is
|
|
6
|
+
normalized into one :class:`IngestionItem` and pushed through one
|
|
7
|
+
:meth:`IngestionPipeline.ingest` entrypoint:
|
|
8
|
+
|
|
9
|
+
Source → normalize → content hash → (file | text) ingest → provenance
|
|
10
|
+
|
|
11
|
+
The pipeline is deliberately thin. It owns normalization, idempotency reporting,
|
|
12
|
+
provenance capture, and — crucially — routing every ingest through the shared
|
|
13
|
+
``dispatch_tool`` lifecycle so ``pre_tool``/``post_tool`` hooks fire on data
|
|
14
|
+
ingestion exactly as they do on tool calls. The heavy graph construction lives in
|
|
15
|
+
:class:`knowledge_graph.KnowledgeGraphStore` (``ingest_document`` for files,
|
|
16
|
+
``ingest_source`` for text/web), which this module composes rather than
|
|
17
|
+
re-implements.
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import hashlib
|
|
23
|
+
from dataclasses import dataclass, field
|
|
24
|
+
from datetime import datetime, timezone
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any, Dict, List, Optional
|
|
27
|
+
|
|
28
|
+
from latticeai.core.hooks import dispatch_tool
|
|
29
|
+
|
|
30
|
+
# Source types that arrive as a file on disk (read via ingest_document).
|
|
31
|
+
FILE_SOURCE_TYPES = frozenset({"file", "local_file", "upload", "pdf"})
|
|
32
|
+
# Source types that arrive as extracted text (read via ingest_source).
|
|
33
|
+
TEXT_SOURCE_TYPES = frozenset(
|
|
34
|
+
{"web_url", "browser_tab", "text", "markdown", "note", "code", "clipboard"}
|
|
35
|
+
)
|
|
36
|
+
# Conversational exchanges (read via ingest_message — role/content semantics,
|
|
37
|
+
# conversation chaining). v4: chat and MCP messages stop bypassing the
|
|
38
|
+
# pipeline, so they carry provenance and fire the hook lifecycle like every
|
|
39
|
+
# other source.
|
|
40
|
+
CHAT_SOURCE_TYPES = frozenset({"chat_message", "mcp_message"})
|
|
41
|
+
# Typed memory records (read via ingest_event → Decision/Experience/Event
|
|
42
|
+
# nodes). The Memory System writes through the same door as everything else.
|
|
43
|
+
MEMORY_SOURCE_TYPES = frozenset({"decision", "experience", "workspace_event"})
|
|
44
|
+
_MEMORY_NODE_TYPES = {"decision": "Decision", "experience": "Experience", "workspace_event": "Event"}
|
|
45
|
+
|
|
46
|
+
DEFAULT_MAX_TEXT_BYTES = 5 * 1024 * 1024 # 5 MB of extracted text per item
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _now_iso() -> str:
|
|
50
|
+
return datetime.now(timezone.utc).isoformat()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class IngestionItem:
|
|
55
|
+
"""A single thing to ingest, normalized across every source type."""
|
|
56
|
+
|
|
57
|
+
source_type: str
|
|
58
|
+
title: Optional[str] = None
|
|
59
|
+
text: Optional[str] = None # text/web sources
|
|
60
|
+
path: Optional[str] = None # file sources
|
|
61
|
+
source_uri: Optional[str] = None
|
|
62
|
+
mime_type: Optional[str] = None
|
|
63
|
+
owner: Optional[str] = None
|
|
64
|
+
workspace_id: Optional[str] = None
|
|
65
|
+
permissions: Optional[Dict[str, Any]] = None
|
|
66
|
+
captured_at: Optional[str] = None
|
|
67
|
+
modified_at: Optional[str] = None
|
|
68
|
+
conversation_id: Optional[str] = None
|
|
69
|
+
agent_used: Optional[str] = None
|
|
70
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
@dataclass
|
|
74
|
+
class IngestionResult:
|
|
75
|
+
"""The outcome of one ingestion, including provenance and idempotency."""
|
|
76
|
+
|
|
77
|
+
status: str # ok | unavailable | blocked | failed
|
|
78
|
+
source_type: str
|
|
79
|
+
node_id: Optional[str] = None
|
|
80
|
+
source_node_id: Optional[str] = None
|
|
81
|
+
content_hash: Optional[str] = None
|
|
82
|
+
title: Optional[str] = None
|
|
83
|
+
chunk_ids: List[str] = field(default_factory=list)
|
|
84
|
+
chunk_count: int = 0
|
|
85
|
+
duplicate: bool = False
|
|
86
|
+
embedded: bool = False
|
|
87
|
+
indexing_status: str = "pending" # indexed | skipped | failed | pending
|
|
88
|
+
provenance_id: Optional[str] = None
|
|
89
|
+
detail: Optional[str] = None
|
|
90
|
+
|
|
91
|
+
def as_dict(self) -> Dict[str, Any]:
|
|
92
|
+
return {
|
|
93
|
+
"status": self.status,
|
|
94
|
+
"source_type": self.source_type,
|
|
95
|
+
"node_id": self.node_id,
|
|
96
|
+
"source_node_id": self.source_node_id,
|
|
97
|
+
"content_hash": self.content_hash,
|
|
98
|
+
"title": self.title,
|
|
99
|
+
"chunk_ids": self.chunk_ids,
|
|
100
|
+
"chunk_count": self.chunk_count,
|
|
101
|
+
"duplicate": self.duplicate,
|
|
102
|
+
"embedded": self.embedded,
|
|
103
|
+
"indexing_status": self.indexing_status,
|
|
104
|
+
"provenance_id": self.provenance_id,
|
|
105
|
+
"detail": self.detail,
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class IngestionPipeline:
|
|
110
|
+
"""Single normalized entrypoint that feeds every source into the graph."""
|
|
111
|
+
|
|
112
|
+
def __init__(
|
|
113
|
+
self,
|
|
114
|
+
knowledge_graph: Any,
|
|
115
|
+
*,
|
|
116
|
+
hooks: Any = None,
|
|
117
|
+
enable_graph: bool = True,
|
|
118
|
+
audit: Optional[Any] = None,
|
|
119
|
+
max_text_bytes: int = DEFAULT_MAX_TEXT_BYTES,
|
|
120
|
+
pipeline_name: str = "unified-ingestion",
|
|
121
|
+
) -> None:
|
|
122
|
+
self._kg = knowledge_graph
|
|
123
|
+
self._hooks = hooks
|
|
124
|
+
self._enable = bool(enable_graph)
|
|
125
|
+
self._audit = audit
|
|
126
|
+
self._max_text_bytes = int(max_text_bytes)
|
|
127
|
+
self._pipeline_name = pipeline_name
|
|
128
|
+
|
|
129
|
+
def available(self) -> bool:
|
|
130
|
+
return self._enable and self._kg is not None
|
|
131
|
+
|
|
132
|
+
# ── public API ───────────────────────────────────────────────────────────
|
|
133
|
+
def ingest(self, item: IngestionItem, *, user_email: Optional[str] = None) -> IngestionResult:
|
|
134
|
+
"""Normalize, hash, route through dispatch_tool, and record provenance."""
|
|
135
|
+
source_type = str(item.source_type or "text").strip()
|
|
136
|
+
if not self.available():
|
|
137
|
+
return IngestionResult(
|
|
138
|
+
status="unavailable", source_type=source_type,
|
|
139
|
+
indexing_status="skipped",
|
|
140
|
+
detail="Knowledge Graph is disabled (LATTICEAI_ENABLE_GRAPH).",
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
captured_at = item.captured_at or _now_iso()
|
|
144
|
+
owner = item.owner or user_email
|
|
145
|
+
tool_name = f"kg_ingest.{source_type}"
|
|
146
|
+
# Only the keys are read by the hook payload, so this dict is safe/cheap.
|
|
147
|
+
args = {
|
|
148
|
+
"source_type": source_type,
|
|
149
|
+
"source_uri": item.source_uri,
|
|
150
|
+
"owner": owner,
|
|
151
|
+
"workspace_id": item.workspace_id,
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
def _run() -> Dict[str, Any]:
|
|
155
|
+
if source_type in CHAT_SOURCE_TYPES:
|
|
156
|
+
return self._ingest_chat(item, source_type=source_type, owner=owner)
|
|
157
|
+
if source_type in MEMORY_SOURCE_TYPES:
|
|
158
|
+
return self._ingest_memory_record(item, source_type=source_type, owner=owner)
|
|
159
|
+
if source_type in FILE_SOURCE_TYPES or (item.path and not item.text):
|
|
160
|
+
return self._ingest_file(item, source_type=source_type, owner=owner, captured_at=captured_at)
|
|
161
|
+
return self._ingest_text(item, source_type=source_type, owner=owner, captured_at=captured_at)
|
|
162
|
+
|
|
163
|
+
try:
|
|
164
|
+
raw = dispatch_tool(
|
|
165
|
+
self._hooks, tool_name, args, _run,
|
|
166
|
+
user_email=user_email, workspace_id=item.workspace_id, source="ingestion",
|
|
167
|
+
)
|
|
168
|
+
except PermissionError as exc:
|
|
169
|
+
return IngestionResult(
|
|
170
|
+
status="blocked", source_type=source_type,
|
|
171
|
+
indexing_status="skipped", detail=str(exc),
|
|
172
|
+
)
|
|
173
|
+
except FileNotFoundError as exc:
|
|
174
|
+
return IngestionResult(
|
|
175
|
+
status="failed", source_type=source_type,
|
|
176
|
+
indexing_status="failed", detail=str(exc),
|
|
177
|
+
)
|
|
178
|
+
except Exception as exc: # noqa: BLE001 — surface as a failed result, never crash the caller
|
|
179
|
+
return IngestionResult(
|
|
180
|
+
status="failed", source_type=source_type,
|
|
181
|
+
indexing_status="failed", detail=str(exc),
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
node_id = raw.get("node_id")
|
|
185
|
+
content_hash = raw.get("content_hash") or raw.get("sha256")
|
|
186
|
+
chunk_ids = list(raw.get("chunk_ids") or [])
|
|
187
|
+
embedded = bool(self._kg.node_is_embedded(node_id)) if node_id else False
|
|
188
|
+
title = raw.get("title") or item.title
|
|
189
|
+
|
|
190
|
+
prov = self._kg.record_provenance(
|
|
191
|
+
node_id=node_id,
|
|
192
|
+
source_type=source_type,
|
|
193
|
+
pipeline=self._pipeline_name,
|
|
194
|
+
source_uri=item.source_uri,
|
|
195
|
+
content_hash=content_hash,
|
|
196
|
+
title=title,
|
|
197
|
+
owner=owner,
|
|
198
|
+
workspace_id=item.workspace_id,
|
|
199
|
+
captured_at=captured_at,
|
|
200
|
+
modified_at=item.modified_at,
|
|
201
|
+
embedded=embedded,
|
|
202
|
+
linked=bool(raw.get("source_node_id")),
|
|
203
|
+
duplicate=bool(raw.get("duplicate")),
|
|
204
|
+
agent_used=item.agent_used,
|
|
205
|
+
chunk_count=len(chunk_ids),
|
|
206
|
+
permissions=item.permissions,
|
|
207
|
+
metadata=item.metadata,
|
|
208
|
+
)
|
|
209
|
+
if self._audit is not None:
|
|
210
|
+
try:
|
|
211
|
+
self._audit(
|
|
212
|
+
"kg_ingest",
|
|
213
|
+
{
|
|
214
|
+
"source_type": source_type, "node_id": node_id,
|
|
215
|
+
"content_hash": content_hash, "duplicate": bool(raw.get("duplicate")),
|
|
216
|
+
},
|
|
217
|
+
user_email,
|
|
218
|
+
)
|
|
219
|
+
except Exception: # noqa: BLE001 — audit must never break ingestion
|
|
220
|
+
pass
|
|
221
|
+
|
|
222
|
+
return IngestionResult(
|
|
223
|
+
status="ok",
|
|
224
|
+
source_type=source_type,
|
|
225
|
+
node_id=node_id,
|
|
226
|
+
source_node_id=raw.get("source_node_id"),
|
|
227
|
+
content_hash=content_hash,
|
|
228
|
+
title=title,
|
|
229
|
+
chunk_ids=chunk_ids,
|
|
230
|
+
chunk_count=len(chunk_ids),
|
|
231
|
+
duplicate=bool(raw.get("duplicate")),
|
|
232
|
+
embedded=embedded,
|
|
233
|
+
indexing_status="indexed",
|
|
234
|
+
provenance_id=prov.get("id"),
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# ── routing helpers ──────────────────────────────────────────────────────
|
|
238
|
+
def _ingest_text(self, item, *, source_type, owner, captured_at) -> Dict[str, Any]:
|
|
239
|
+
text = item.text or ""
|
|
240
|
+
if len(text.encode("utf-8", "ignore")) > self._max_text_bytes:
|
|
241
|
+
raise ValueError(
|
|
242
|
+
f"Text payload exceeds the {self._max_text_bytes // (1024 * 1024)}MB ingestion limit."
|
|
243
|
+
)
|
|
244
|
+
title = item.title or item.source_uri or source_type
|
|
245
|
+
return self._kg.ingest_source(
|
|
246
|
+
source_type=source_type,
|
|
247
|
+
title=title,
|
|
248
|
+
text=text,
|
|
249
|
+
source_uri=item.source_uri,
|
|
250
|
+
owner=owner,
|
|
251
|
+
workspace_id=item.workspace_id,
|
|
252
|
+
permissions=item.permissions,
|
|
253
|
+
captured_at=captured_at,
|
|
254
|
+
modified_at=item.modified_at,
|
|
255
|
+
conversation_id=item.conversation_id,
|
|
256
|
+
metadata={"mime_type": item.mime_type, **(item.metadata or {})},
|
|
257
|
+
)
|
|
258
|
+
|
|
259
|
+
def _ingest_chat(self, item, *, source_type, owner) -> Dict[str, Any]:
|
|
260
|
+
text = item.text or ""
|
|
261
|
+
meta = item.metadata or {}
|
|
262
|
+
role = str(meta.get("role") or "user")
|
|
263
|
+
result = self._kg.ingest_message(
|
|
264
|
+
role,
|
|
265
|
+
text,
|
|
266
|
+
user_email=owner,
|
|
267
|
+
user_nickname=meta.get("user_nickname"),
|
|
268
|
+
source=meta.get("source") or source_type,
|
|
269
|
+
conversation_id=item.conversation_id,
|
|
270
|
+
raw=meta.get("raw"),
|
|
271
|
+
)
|
|
272
|
+
# ingest_message reports message/response node ids; normalize the keys
|
|
273
|
+
# the provenance step expects.
|
|
274
|
+
result.setdefault("node_id", result.get("node_id") or result.get("message_node_id") or result.get("id"))
|
|
275
|
+
result.setdefault("title", item.title or text[:80])
|
|
276
|
+
return result
|
|
277
|
+
|
|
278
|
+
def _ingest_memory_record(self, item, *, source_type, owner) -> Dict[str, Any]:
|
|
279
|
+
node_type = _MEMORY_NODE_TYPES[source_type]
|
|
280
|
+
meta = item.metadata or {}
|
|
281
|
+
result = self._kg.ingest_event(
|
|
282
|
+
node_type,
|
|
283
|
+
item.title or (item.text or node_type)[:120],
|
|
284
|
+
user_email=owner,
|
|
285
|
+
source=meta.get("source") or source_type,
|
|
286
|
+
conversation_id=item.conversation_id,
|
|
287
|
+
metadata={**meta, "detail": (item.text or "")[:2000]},
|
|
288
|
+
)
|
|
289
|
+
result.setdefault("node_id", result.get("node_id") or result.get("id"))
|
|
290
|
+
result.setdefault("title", item.title)
|
|
291
|
+
return result
|
|
292
|
+
|
|
293
|
+
def _ingest_file(self, item, *, source_type, owner, captured_at) -> Dict[str, Any]:
|
|
294
|
+
if not item.path:
|
|
295
|
+
raise ValueError("File ingestion requires a path.")
|
|
296
|
+
path = Path(item.path)
|
|
297
|
+
if not path.exists():
|
|
298
|
+
raise FileNotFoundError(f"File not found: {path}")
|
|
299
|
+
return self._kg.ingest_document(
|
|
300
|
+
path,
|
|
301
|
+
original_filename=item.title or path.name,
|
|
302
|
+
mime_type=item.mime_type,
|
|
303
|
+
uploader=owner,
|
|
304
|
+
conversation_id=item.conversation_id,
|
|
305
|
+
extracted=item.metadata.get("extracted") if item.metadata else None,
|
|
306
|
+
source_type=source_type,
|
|
307
|
+
source_uri=item.source_uri or str(path),
|
|
308
|
+
captured_at=captured_at,
|
|
309
|
+
modified_at=item.modified_at,
|
|
310
|
+
owner=owner,
|
|
311
|
+
workspace_id=item.workspace_id,
|
|
312
|
+
permissions=item.permissions,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def content_hash_text(text: str) -> str:
|
|
317
|
+
"""Canonical content hash for a text payload (matches store hashing scheme)."""
|
|
318
|
+
return hashlib.sha256((text or "").encode("utf-8", "ignore")).hexdigest()
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""Knowledge Graph portability — local export / import / backup / restore.
|
|
2
|
+
|
|
3
|
+
The Knowledge Graph is the user's durable asset, so it must be portable without
|
|
4
|
+
any cloud service. Two complementary mechanisms, both fully local:
|
|
5
|
+
|
|
6
|
+
* **Logical export/import** (JSON): nodes/edges/chunks/sources/provenance with a
|
|
7
|
+
versioned header (schema + projection + embed-dim). Re-embeds on import, so it
|
|
8
|
+
is portable across machines.
|
|
9
|
+
* **Binary backup/restore** (ZIP): a faithful snapshot of the SQLite DB (incl.
|
|
10
|
+
vector embeddings) plus the blob directory, integrity-checked, for
|
|
11
|
+
same-machine recovery.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import hashlib
|
|
17
|
+
import json
|
|
18
|
+
import shutil
|
|
19
|
+
import tempfile
|
|
20
|
+
import zipfile
|
|
21
|
+
from datetime import datetime, timezone
|
|
22
|
+
from pathlib import Path
|
|
23
|
+
from typing import Any, Dict, Optional
|
|
24
|
+
|
|
25
|
+
FORMAT = "latticeai.kg.export"
|
|
26
|
+
FORMAT_VERSION = 1
|
|
27
|
+
BACKUP_FORMAT = "latticeai.kg.backup"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _now_iso() -> str:
|
|
31
|
+
return datetime.now(timezone.utc).isoformat()
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _stamp() -> str:
|
|
35
|
+
return _now_iso().replace(":", "").replace("-", "").replace(".", "")[:15]
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def _sha256_file(path: Path) -> str:
|
|
39
|
+
h = hashlib.sha256()
|
|
40
|
+
with open(path, "rb") as fh:
|
|
41
|
+
for block in iter(lambda: fh.read(65536), b""):
|
|
42
|
+
h.update(block)
|
|
43
|
+
return h.hexdigest()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class KGPortabilityService:
|
|
47
|
+
def __init__(self, *, knowledge_graph: Any, data_dir, enable_graph: bool = True, device_identity: Any = None) -> None:
|
|
48
|
+
self._kg = knowledge_graph
|
|
49
|
+
self._data_dir = Path(data_dir)
|
|
50
|
+
self._enable = bool(enable_graph)
|
|
51
|
+
self._exports_dir = self._data_dir / "workspace_exports"
|
|
52
|
+
# v4 sovereignty: when a DeviceIdentity is wired, exports are signed
|
|
53
|
+
# and imports record origin provenance. Pre-v4 unsigned bundles stay
|
|
54
|
+
# importable locally (origin='unsigned-legacy') — signatures are
|
|
55
|
+
# mandatory only on the Brain Network peer path.
|
|
56
|
+
self._identity = device_identity
|
|
57
|
+
|
|
58
|
+
def available(self) -> bool:
|
|
59
|
+
return self._enable and self._kg is not None
|
|
60
|
+
|
|
61
|
+
def _require(self) -> None:
|
|
62
|
+
if not self.available():
|
|
63
|
+
raise RuntimeError("Knowledge Graph is disabled (LATTICEAI_ENABLE_GRAPH).")
|
|
64
|
+
|
|
65
|
+
# ── logical export / import ──────────────────────────────────────────────
|
|
66
|
+
def export(self, *, workspace_id: Optional[str] = None) -> Dict[str, Any]:
|
|
67
|
+
self._require()
|
|
68
|
+
data = self._kg.export_graph_data(workspace_id=workspace_id)
|
|
69
|
+
header = {
|
|
70
|
+
"format": FORMAT,
|
|
71
|
+
"format_version": FORMAT_VERSION,
|
|
72
|
+
**self._kg.schema_versions(),
|
|
73
|
+
"exported_at": _now_iso(),
|
|
74
|
+
"workspace_id": workspace_id,
|
|
75
|
+
"counts": data.get("counts"),
|
|
76
|
+
}
|
|
77
|
+
artifact = {"header": header, **data}
|
|
78
|
+
if self._identity is not None:
|
|
79
|
+
artifact["signature"] = self._identity.sign_manifest(header)
|
|
80
|
+
return artifact
|
|
81
|
+
|
|
82
|
+
def export_to_file(self, path=None, *, workspace_id: Optional[str] = None) -> Dict[str, Any]:
|
|
83
|
+
artifact = self.export(workspace_id=workspace_id)
|
|
84
|
+
self._exports_dir.mkdir(parents=True, exist_ok=True)
|
|
85
|
+
path = Path(path) if path else self._exports_dir / f"kg-export-{_stamp()}.json"
|
|
86
|
+
path.write_text(json.dumps(artifact, ensure_ascii=False, indent=2), encoding="utf-8")
|
|
87
|
+
return {"path": str(path), "header": artifact["header"], "bytes": path.stat().st_size}
|
|
88
|
+
|
|
89
|
+
def import_data(self, artifact: Dict[str, Any], *, mode: str = "merge", dry_run: bool = False) -> Dict[str, Any]:
|
|
90
|
+
self._require()
|
|
91
|
+
if not isinstance(artifact, dict) or "nodes" not in artifact:
|
|
92
|
+
raise ValueError("Invalid Knowledge Graph export artifact.")
|
|
93
|
+
if mode not in ("merge", "replace"):
|
|
94
|
+
raise ValueError("mode must be 'merge' or 'replace'.")
|
|
95
|
+
origin = "unsigned-legacy"
|
|
96
|
+
signature = artifact.get("signature")
|
|
97
|
+
if signature:
|
|
98
|
+
from latticeai.brain.identity import verify_manifest
|
|
99
|
+
|
|
100
|
+
if not verify_manifest(artifact.get("header") or {}, signature):
|
|
101
|
+
raise ValueError("Bundle signature verification failed — refusing to import.")
|
|
102
|
+
origin = f"device:{signature.get('fingerprint') or 'unknown'}"
|
|
103
|
+
result = self._kg.import_graph_data(artifact, mode=mode, dry_run=dry_run)
|
|
104
|
+
result["header"] = artifact.get("header")
|
|
105
|
+
result["origin"] = origin
|
|
106
|
+
result["signed"] = bool(signature)
|
|
107
|
+
if not dry_run:
|
|
108
|
+
try:
|
|
109
|
+
self._kg.record_provenance(
|
|
110
|
+
node_id="import:" + str((artifact.get("header") or {}).get("exported_at") or _now_iso()),
|
|
111
|
+
source_type="bundle_import",
|
|
112
|
+
pipeline="kg-portability",
|
|
113
|
+
owner=None,
|
|
114
|
+
metadata={"origin": origin, "mode": mode,
|
|
115
|
+
"counts": (artifact.get("header") or {}).get("counts")},
|
|
116
|
+
)
|
|
117
|
+
except Exception:
|
|
118
|
+
pass
|
|
119
|
+
return result
|
|
120
|
+
|
|
121
|
+
def import_from_file(self, path, *, mode: str = "merge", dry_run: bool = False) -> Dict[str, Any]:
|
|
122
|
+
artifact = json.loads(Path(path).read_text(encoding="utf-8"))
|
|
123
|
+
return self.import_data(artifact, mode=mode, dry_run=dry_run)
|
|
124
|
+
|
|
125
|
+
# ── binary backup / restore ──────────────────────────────────────────────
|
|
126
|
+
def backup(self, dest_path=None) -> Dict[str, Any]:
|
|
127
|
+
self._require()
|
|
128
|
+
self._exports_dir.mkdir(parents=True, exist_ok=True)
|
|
129
|
+
dest = Path(dest_path) if dest_path else self._exports_dir / f"kg-backup-{_stamp()}.zip"
|
|
130
|
+
with tempfile.TemporaryDirectory() as tmp_s:
|
|
131
|
+
tmp = Path(tmp_s)
|
|
132
|
+
db_copy = tmp / "knowledge_graph.sqlite"
|
|
133
|
+
self._kg.backup_database(db_copy)
|
|
134
|
+
manifest = {
|
|
135
|
+
"format": BACKUP_FORMAT,
|
|
136
|
+
"format_version": FORMAT_VERSION,
|
|
137
|
+
**self._kg.schema_versions(),
|
|
138
|
+
"created_at": _now_iso(),
|
|
139
|
+
"db_sha256": _sha256_file(db_copy),
|
|
140
|
+
"has_blobs": Path(self._kg.blob_dir).exists(),
|
|
141
|
+
}
|
|
142
|
+
with zipfile.ZipFile(dest, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
143
|
+
zf.write(db_copy, "knowledge_graph.sqlite")
|
|
144
|
+
zf.writestr("manifest.json", json.dumps(manifest, ensure_ascii=False, indent=2))
|
|
145
|
+
blob_dir = Path(self._kg.blob_dir)
|
|
146
|
+
if blob_dir.exists():
|
|
147
|
+
for f in blob_dir.rglob("*"):
|
|
148
|
+
if f.is_file():
|
|
149
|
+
zf.write(f, f"blobs/{f.relative_to(blob_dir)}")
|
|
150
|
+
return {"path": str(dest), "bytes": dest.stat().st_size, "manifest": manifest}
|
|
151
|
+
|
|
152
|
+
def restore(self, archive_path, *, verify: bool = True) -> Dict[str, Any]:
|
|
153
|
+
self._require()
|
|
154
|
+
archive = Path(archive_path)
|
|
155
|
+
if not archive.exists():
|
|
156
|
+
raise FileNotFoundError(f"Backup archive not found: {archive}")
|
|
157
|
+
with zipfile.ZipFile(archive) as zf:
|
|
158
|
+
names = zf.namelist()
|
|
159
|
+
if "knowledge_graph.sqlite" not in names:
|
|
160
|
+
raise ValueError("Archive is missing knowledge_graph.sqlite.")
|
|
161
|
+
manifest = json.loads(zf.read("manifest.json")) if "manifest.json" in names else {}
|
|
162
|
+
with tempfile.TemporaryDirectory() as tmp_s:
|
|
163
|
+
tmp = Path(tmp_s)
|
|
164
|
+
zf.extractall(tmp)
|
|
165
|
+
db_src = tmp / "knowledge_graph.sqlite"
|
|
166
|
+
if verify and manifest.get("db_sha256"):
|
|
167
|
+
if _sha256_file(db_src) != manifest["db_sha256"]:
|
|
168
|
+
raise ValueError("Backup integrity check failed (db sha256 mismatch).")
|
|
169
|
+
db_dest = Path(self._kg.db_path)
|
|
170
|
+
blob_dest = Path(self._kg.blob_dir)
|
|
171
|
+
db_dest.parent.mkdir(parents=True, exist_ok=True)
|
|
172
|
+
# Drop the live DB + stale WAL/SHM siblings so the restored copy
|
|
173
|
+
# is authoritative (no stale journal overlaying old pages).
|
|
174
|
+
for sib in (db_dest, Path(str(db_dest) + "-wal"), Path(str(db_dest) + "-shm")):
|
|
175
|
+
if sib.exists():
|
|
176
|
+
sib.unlink()
|
|
177
|
+
shutil.copyfile(db_src, db_dest)
|
|
178
|
+
blob_src = tmp / "blobs"
|
|
179
|
+
if blob_src.exists():
|
|
180
|
+
if blob_dest.exists():
|
|
181
|
+
shutil.rmtree(blob_dest)
|
|
182
|
+
shutil.copytree(blob_src, blob_dest)
|
|
183
|
+
else:
|
|
184
|
+
blob_dest.mkdir(parents=True, exist_ok=True)
|
|
185
|
+
stats = self._kg.stats()
|
|
186
|
+
return {
|
|
187
|
+
"restored": True,
|
|
188
|
+
"manifest": manifest,
|
|
189
|
+
"nodes": sum(stats.get("nodes", {}).values()),
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
# ── status surface ───────────────────────────────────────────────────────
|
|
193
|
+
def snapshot_metadata(self) -> Dict[str, Any]:
|
|
194
|
+
if not self.available():
|
|
195
|
+
return {"available": False}
|
|
196
|
+
return {
|
|
197
|
+
"available": True,
|
|
198
|
+
**self._kg.schema_versions(),
|
|
199
|
+
"stats": self._kg.stats(),
|
|
200
|
+
"provenance": self._kg.provenance_stats(),
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
def recent_ingestions(self, *, limit: int = 50, source_type: Optional[str] = None) -> Dict[str, Any]:
|
|
204
|
+
"""Recent provenance records (newest first) for the ingestion-sources UI."""
|
|
205
|
+
if not self.available():
|
|
206
|
+
return {"items": [], "count": 0}
|
|
207
|
+
return self._kg.list_provenance(limit=limit, source_type=source_type)
|