ltcai 3.5.0 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. package/README.md +73 -35
  2. package/docs/CARRYOVER_AUDIT_v3.6.0.md +61 -0
  3. package/docs/CHANGELOG.md +32 -0
  4. package/docs/HANDOVER_v3.6.0.md +46 -0
  5. package/docs/RUNTIME_HOOK_COVERAGE_v3.6.0.md +49 -0
  6. package/docs/V4_BRAIN_ARCHITECTURE.md +322 -0
  7. package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +509 -0
  8. package/docs/V4_IMPLEMENTATION_PLAN.md +470 -0
  9. package/docs/architecture.md +13 -12
  10. package/docs/kg-schema.md +102 -53
  11. package/docs/privacy.md +18 -2
  12. package/docs/security-model.md +17 -0
  13. package/kg_schema.py +139 -10
  14. package/knowledge_graph.py +874 -26
  15. package/knowledge_graph_api.py +11 -127
  16. package/latticeai/__init__.py +1 -1
  17. package/latticeai/api/admin.py +1 -1
  18. package/latticeai/api/agents.py +7 -1
  19. package/latticeai/api/auth.py +27 -4
  20. package/latticeai/api/browser.py +217 -0
  21. package/latticeai/api/chat.py +112 -76
  22. package/latticeai/api/health.py +1 -1
  23. package/latticeai/api/hooks.py +1 -1
  24. package/latticeai/api/knowledge_graph.py +146 -0
  25. package/latticeai/api/local_files.py +1 -1
  26. package/latticeai/api/mcp.py +23 -11
  27. package/latticeai/api/memory.py +1 -1
  28. package/latticeai/api/models.py +1 -1
  29. package/latticeai/api/network.py +81 -0
  30. package/latticeai/api/portability.py +93 -0
  31. package/latticeai/api/realtime.py +1 -1
  32. package/latticeai/api/search.py +26 -2
  33. package/latticeai/api/security_dashboard.py +2 -3
  34. package/latticeai/api/setup.py +2 -2
  35. package/latticeai/api/static_routes.py +2 -4
  36. package/latticeai/api/tools.py +3 -0
  37. package/latticeai/api/workflow_designer.py +46 -0
  38. package/latticeai/api/workspace.py +71 -49
  39. package/latticeai/app_factory.py +1710 -0
  40. package/latticeai/brain/__init__.py +18 -0
  41. package/latticeai/brain/context.py +213 -0
  42. package/latticeai/brain/conversations.py +236 -0
  43. package/latticeai/brain/identity.py +175 -0
  44. package/latticeai/brain/memory.py +102 -0
  45. package/latticeai/brain/network.py +205 -0
  46. package/latticeai/core/agent.py +31 -7
  47. package/latticeai/core/audit.py +0 -7
  48. package/latticeai/core/config.py +1 -1
  49. package/latticeai/core/context_builder.py +1 -2
  50. package/latticeai/core/enterprise.py +1 -1
  51. package/latticeai/core/graph_curator.py +2 -2
  52. package/latticeai/core/marketplace.py +1 -1
  53. package/latticeai/core/mcp_registry.py +791 -0
  54. package/latticeai/core/model_compat.py +1 -1
  55. package/latticeai/core/model_resolution.py +0 -1
  56. package/latticeai/core/multi_agent.py +238 -4
  57. package/latticeai/core/security.py +1 -1
  58. package/latticeai/core/sessions.py +37 -7
  59. package/latticeai/core/workflow_engine.py +114 -2
  60. package/latticeai/core/workspace_os.py +58 -10
  61. package/latticeai/models/__init__.py +7 -0
  62. package/latticeai/models/router.py +779 -0
  63. package/latticeai/server_app.py +29 -1504
  64. package/latticeai/services/agent_runtime.py +1 -0
  65. package/latticeai/services/app_context.py +75 -14
  66. package/latticeai/services/ingestion.py +318 -0
  67. package/latticeai/services/kg_portability.py +207 -0
  68. package/latticeai/services/memory_service.py +39 -11
  69. package/latticeai/services/model_runtime.py +2 -5
  70. package/latticeai/services/platform_runtime.py +100 -23
  71. package/latticeai/services/search_service.py +17 -8
  72. package/latticeai/services/tool_dispatch.py +12 -2
  73. package/latticeai/services/triggers.py +241 -0
  74. package/latticeai/services/upload_service.py +37 -12
  75. package/latticeai/services/workspace_service.py +31 -0
  76. package/llm_router.py +29 -772
  77. package/ltcai_cli.py +1 -2
  78. package/mcp_registry.py +25 -788
  79. package/p_reinforce.py +124 -14
  80. package/package.json +11 -8
  81. package/scripts/build_vsix.mjs +72 -0
  82. package/scripts/bump_version.py +99 -0
  83. package/scripts/generate_diagrams.py +0 -1
  84. package/scripts/lint_v3.mjs +82 -18
  85. package/scripts/validate_release_artifacts.py +0 -1
  86. package/scripts/wheel_smoke.py +142 -0
  87. package/server.py +11 -7
  88. package/setup_wizard.py +1142 -0
  89. package/static/account.html +2 -4
  90. package/static/admin.html +3 -5
  91. package/static/chat.html +3 -6
  92. package/static/graph.html +2 -4
  93. package/static/sw.js +81 -52
  94. package/static/v3/asset-manifest.json +20 -19
  95. package/static/v3/css/{lattice.base.e4cdd05d.css → lattice.base.49deefb5.css} +1 -1
  96. package/static/v3/css/lattice.base.css +1 -1
  97. package/static/v3/css/{lattice.components.9b49d614.css → lattice.components.cde18231.css} +1 -1
  98. package/static/v3/css/lattice.components.css +1 -1
  99. package/static/v3/css/{lattice.shell.8fcc9d33.css → lattice.shell.29d36d85.css} +1 -1
  100. package/static/v3/css/lattice.shell.css +1 -1
  101. package/static/v3/css/{lattice.tokens.e7018963.css → lattice.tokens.304cbc40.css} +3 -0
  102. package/static/v3/css/lattice.tokens.css +3 -0
  103. package/static/v3/css/{lattice.views.22f69117.css → lattice.views.0a18b6c5.css} +2 -2
  104. package/static/v3/css/lattice.views.css +2 -2
  105. package/static/v3/index.html +3 -4
  106. package/static/v3/js/{app.d086489d.js → app.356e6452.js} +1 -1
  107. package/static/v3/js/core/{api.12b568ad.js → api.7a308b89.js} +39 -1
  108. package/static/v3/js/core/api.js +38 -0
  109. package/static/v3/js/core/{routes.d214b399.js → routes.7222343d.js} +22 -22
  110. package/static/v3/js/core/routes.js +22 -22
  111. package/static/v3/js/core/{shell.d05266f5.js → shell.a1657f20.js} +4 -4
  112. package/static/v3/js/core/shell.js +1 -1
  113. package/static/v3/js/core/{store.34ebd5e6.js → store.204a08b2.js} +1 -1
  114. package/static/v3/js/core/store.js +1 -1
  115. package/static/v3/js/views/graph-canvas.17c15d65.js +509 -0
  116. package/static/v3/js/views/graph-canvas.js +509 -0
  117. package/static/v3/js/views/{hybrid-search.b22b97e0.js → hybrid-search.2fb63ed9.js} +1 -2
  118. package/static/v3/js/views/hybrid-search.js +1 -2
  119. package/static/v3/js/views/knowledge-graph.5e40cbeb.js +509 -0
  120. package/static/v3/js/views/knowledge-graph.js +326 -54
  121. package/static/vendor/chart.umd.min.js +20 -0
  122. package/static/vendor/fonts/inter-latin-300-normal.woff2 +0 -0
  123. package/static/vendor/fonts/inter-latin-400-normal.woff2 +0 -0
  124. package/static/vendor/fonts/inter-latin-500-normal.woff2 +0 -0
  125. package/static/vendor/fonts/inter-latin-600-normal.woff2 +0 -0
  126. package/static/vendor/fonts/inter-latin-700-normal.woff2 +0 -0
  127. package/static/vendor/fonts/inter-latin-800-normal.woff2 +0 -0
  128. package/static/vendor/fonts/inter.css +44 -0
  129. package/static/vendor/icons/tabler-icons.min.css +4 -0
  130. package/static/vendor/icons/tabler-icons.woff2 +0 -0
  131. package/static/vendor/marked.min.js +69 -0
  132. package/static/workspace.html +2 -2
  133. package/telegram_bot.py +1 -2
  134. package/tools/commands.py +4 -2
  135. package/tools/computer.py +1 -1
  136. package/tools/documents.py +1 -3
  137. package/tools/filesystem.py +0 -4
  138. package/tools/knowledge.py +1 -3
  139. package/tools/network.py +1 -3
  140. package/codex_telegram_bot.py +0 -195
  141. package/docs/assets/v3.4.0/agent-run.png +0 -0
  142. package/docs/assets/v3.4.0/agents.png +0 -0
  143. package/docs/assets/v3.4.0/before/chat-before.png +0 -0
  144. package/docs/assets/v3.4.0/before/files-before.png +0 -0
  145. package/docs/assets/v3.4.0/chat.png +0 -0
  146. package/docs/assets/v3.4.0/connect-folder.png +0 -0
  147. package/docs/assets/v3.4.0/files.png +0 -0
  148. package/docs/assets/v3.4.0/home.png +0 -0
  149. package/docs/assets/v3.4.0/hooks-dispatch.png +0 -0
  150. package/docs/assets/v3.4.0/knowledge-graph.png +0 -0
  151. package/docs/assets/v3.4.0/local-agent.png +0 -0
  152. package/docs/assets/v3.4.0/memory.png +0 -0
  153. package/docs/assets/v3.4.0/settings.png +0 -0
  154. package/docs/assets/v3.4.0/vision-input.png +0 -0
  155. package/docs/assets/v3.4.0/workflows.png +0 -0
  156. package/docs/assets/v3.4.1/e2e_runtime_log.txt +0 -42
  157. package/docs/assets/v3.4.1/hooks-dispatch.png +0 -0
  158. package/docs/assets/v3.4.1/local-agent.png +0 -0
  159. package/docs/images/admin-dashboard.png +0 -0
  160. package/docs/images/architecture.png +0 -0
  161. package/docs/images/enterprise.png +0 -0
  162. package/docs/images/graph.png +0 -0
  163. package/docs/images/hero.gif +0 -0
  164. package/docs/images/knowledge-graph.png +0 -0
  165. package/docs/images/lattice-ai-demo.gif +0 -0
  166. package/docs/images/lattice-ai-hero.png +0 -0
  167. package/docs/images/logo.svg +0 -33
  168. package/docs/images/mobile-responsive.png +0 -0
  169. package/docs/images/model-recommendation.png +0 -0
  170. package/docs/images/onboarding.png +0 -0
  171. package/docs/images/organization.png +0 -0
  172. package/docs/images/pipeline.png +0 -0
  173. package/docs/images/screenshot-admin.png +0 -0
  174. package/docs/images/screenshot-chat.png +0 -0
  175. package/docs/images/screenshot-graph.png +0 -0
  176. package/docs/images/skills.png +0 -0
  177. package/docs/images/workspace-dark.png +0 -0
  178. package/docs/images/workspace-light.png +0 -0
  179. package/docs/images/workspace.png +0 -0
  180. package/requirements.txt +0 -16
  181. package/static/v3/js/views/knowledge-graph.a14ea7e7.js +0 -237
@@ -242,6 +242,7 @@ class AgentRuntime:
242
242
  user_email=user_email or None,
243
243
  graph=self._workspace_graph(),
244
244
  workspace_id=scope,
245
+ mode=getattr(result, "mode", "simulation"),
245
246
  )
246
247
  self._append_audit_event(
247
248
  "multi_agent_run",
@@ -1,27 +1,88 @@
1
1
  """Application dependency context for router assembly.
2
2
 
3
- The concrete FastAPI app is still assembled in ``server_app``. This dataclass
4
- documents the shared dependency boundary for routers and services so future
5
- extractions can receive a typed context instead of importing the app module.
3
+ ``latticeai.app_factory.create_app`` builds one ``AppContext`` per app and
4
+ hands it to router factories, replacing the historical 25-30-kwarg closure
5
+ wiring. Every field defaults to ``None``-ish so tests can construct a context
6
+ carrying only the dependencies a router actually touches.
7
+
8
+ Fields are grouped by the consumer that motivated them; routers must treat the
9
+ context as read-only.
6
10
  """
7
11
 
8
12
  from __future__ import annotations
9
13
 
10
14
  from dataclasses import dataclass
11
15
  from pathlib import Path
12
- from typing import Any, Callable
16
+ from typing import Any, Callable, Optional
13
17
 
14
18
 
15
19
  @dataclass(frozen=True)
16
20
  class AppContext:
17
- config: Any
18
- data_dir: Path
19
- static_dir: Path
20
- model_router: Any
21
- workspace_store: Any
22
- workspace_service: Any
23
- knowledge_graph: Any
24
- local_kg_watcher: Any
25
- require_user: Callable[..., str]
26
- require_admin: Callable[..., tuple]
21
+ # ── core configuration / paths ────────────────────────────────────────
22
+ config: Any = None
23
+ data_dir: Optional[Path] = None
24
+ static_dir: Optional[Path] = None
25
+ base_dir: Optional[Path] = None
26
+ skills_dir: Optional[Path] = None
27
+
28
+ # ── singletons ────────────────────────────────────────────────────────
29
+ model_router: Any = None
30
+ workspace_store: Any = None
31
+ workspace_service: Any = None
32
+ knowledge_graph: Any = None
33
+ local_kg_watcher: Any = None
34
+ chat_service: Any = None
35
+ context_assembler: Any = None
36
+ brain_memory: Any = None
37
+ gardener: Any = None
38
+ hooks: Any = None
39
+ realtime_bus: Any = None
40
+ capability_registry: Any = None
41
+
42
+ # ── auth / session callables ──────────────────────────────────────────
43
+ require_user: Optional[Callable[..., str]] = None
44
+ require_admin: Optional[Callable[..., tuple]] = None
45
+ get_current_user: Optional[Callable[..., Optional[str]]] = None
46
+ load_users: Optional[Callable[[], dict]] = None
47
+ get_user_role: Optional[Callable[..., str]] = None
48
+ enforce_rate_limit: Optional[Callable[..., None]] = None
49
+
50
+ # ── audit / history callables ─────────────────────────────────────────
51
+ append_audit_event: Optional[Callable[..., None]] = None
52
+ get_audit_log: Optional[Callable[[], list]] = None
53
+ get_history: Optional[Callable[[], list]] = None
54
+ get_history_user: Optional[Callable[..., dict]] = None
55
+ save_to_history: Optional[Callable[..., None]] = None
56
+ clear_history: Optional[Callable[..., dict]] = None
57
+ clear_conversation: Optional[Callable[..., dict]] = None
58
+ group_history_conversations: Optional[Callable[..., list]] = None
59
+ get_conversation_messages: Optional[Callable[..., list]] = None
60
+ conversation_title: Optional[Callable[..., str]] = None
61
+
62
+ # ── knowledge graph access ────────────────────────────────────────────
63
+ enable_graph: bool = False
64
+ require_graph: Optional[Callable[[], None]] = None
65
+ workspace_graph: Optional[Callable[[], Any]] = None
66
+ graph_stats: Optional[Callable[[], dict]] = None
67
+
68
+ # ── workspace payload providers / skills ──────────────────────────────
69
+ workspace_models: Optional[Callable[[], dict]] = None
70
+ workspace_settings: Optional[Callable[[], dict]] = None
71
+ scan_environment: Optional[Callable[[], Any]] = None
72
+ local_sysinfo: Optional[Callable[..., Any]] = None
73
+ get_recommendations: Optional[Callable[..., Any]] = None
74
+ fetch_skills_marketplace: Optional[Callable[..., Any]] = None
75
+ install_skill: Optional[Callable[..., Any]] = None
76
+ remove_skill_directory: Optional[Callable[..., dict]] = None
77
+ redact_secret_text: Optional[Callable[[str], str]] = None
78
+ ui_file_response: Optional[Callable[..., Any]] = None
79
+
80
+ # ── models ────────────────────────────────────────────────────────────
81
+ public_model: str = ""
82
+ local_model: str = ""
27
83
 
84
+ # ── integrations ──────────────────────────────────────────────────────
85
+ # Fired as on_chat_message(role, text, source) after a chat exchange is
86
+ # persisted; ``None`` means no external chat mirror is registered. The
87
+ # telegram bridge subscribes here only when ENABLE_TELEGRAM is truthy.
88
+ on_chat_message: Optional[Callable[..., None]] = None
@@ -0,0 +1,318 @@
1
+ """Unified ingestion pipeline — the single write-side seam into the Knowledge Graph.
2
+
3
+ v3.6.0 Knowledge Graph First principle: *no data source bypasses the Knowledge
4
+ Graph and no source creates an isolated silo*. Every source — local files,
5
+ connected folders, PDFs/Markdown/text/code, web URLs, browser tabs — is
6
+ normalized into one :class:`IngestionItem` and pushed through one
7
+ :meth:`IngestionPipeline.ingest` entrypoint:
8
+
9
+ Source → normalize → content hash → (file | text) ingest → provenance
10
+
11
+ The pipeline is deliberately thin. It owns normalization, idempotency reporting,
12
+ provenance capture, and — crucially — routing every ingest through the shared
13
+ ``dispatch_tool`` lifecycle so ``pre_tool``/``post_tool`` hooks fire on data
14
+ ingestion exactly as they do on tool calls. The heavy graph construction lives in
15
+ :class:`knowledge_graph.KnowledgeGraphStore` (``ingest_document`` for files,
16
+ ``ingest_source`` for text/web), which this module composes rather than
17
+ re-implements.
18
+ """
19
+
20
+ from __future__ import annotations
21
+
22
+ import hashlib
23
+ from dataclasses import dataclass, field
24
+ from datetime import datetime, timezone
25
+ from pathlib import Path
26
+ from typing import Any, Dict, List, Optional
27
+
28
+ from latticeai.core.hooks import dispatch_tool
29
+
30
+ # Source types that arrive as a file on disk (read via ingest_document).
31
+ FILE_SOURCE_TYPES = frozenset({"file", "local_file", "upload", "pdf"})
32
+ # Source types that arrive as extracted text (read via ingest_source).
33
+ TEXT_SOURCE_TYPES = frozenset(
34
+ {"web_url", "browser_tab", "text", "markdown", "note", "code", "clipboard"}
35
+ )
36
+ # Conversational exchanges (read via ingest_message — role/content semantics,
37
+ # conversation chaining). v4: chat and MCP messages stop bypassing the
38
+ # pipeline, so they carry provenance and fire the hook lifecycle like every
39
+ # other source.
40
+ CHAT_SOURCE_TYPES = frozenset({"chat_message", "mcp_message"})
41
+ # Typed memory records (read via ingest_event → Decision/Experience/Event
42
+ # nodes). The Memory System writes through the same door as everything else.
43
+ MEMORY_SOURCE_TYPES = frozenset({"decision", "experience", "workspace_event"})
44
+ _MEMORY_NODE_TYPES = {"decision": "Decision", "experience": "Experience", "workspace_event": "Event"}
45
+
46
+ DEFAULT_MAX_TEXT_BYTES = 5 * 1024 * 1024 # 5 MB of extracted text per item
47
+
48
+
49
+ def _now_iso() -> str:
50
+ return datetime.now(timezone.utc).isoformat()
51
+
52
+
53
+ @dataclass
54
+ class IngestionItem:
55
+ """A single thing to ingest, normalized across every source type."""
56
+
57
+ source_type: str
58
+ title: Optional[str] = None
59
+ text: Optional[str] = None # text/web sources
60
+ path: Optional[str] = None # file sources
61
+ source_uri: Optional[str] = None
62
+ mime_type: Optional[str] = None
63
+ owner: Optional[str] = None
64
+ workspace_id: Optional[str] = None
65
+ permissions: Optional[Dict[str, Any]] = None
66
+ captured_at: Optional[str] = None
67
+ modified_at: Optional[str] = None
68
+ conversation_id: Optional[str] = None
69
+ agent_used: Optional[str] = None
70
+ metadata: Dict[str, Any] = field(default_factory=dict)
71
+
72
+
73
+ @dataclass
74
+ class IngestionResult:
75
+ """The outcome of one ingestion, including provenance and idempotency."""
76
+
77
+ status: str # ok | unavailable | blocked | failed
78
+ source_type: str
79
+ node_id: Optional[str] = None
80
+ source_node_id: Optional[str] = None
81
+ content_hash: Optional[str] = None
82
+ title: Optional[str] = None
83
+ chunk_ids: List[str] = field(default_factory=list)
84
+ chunk_count: int = 0
85
+ duplicate: bool = False
86
+ embedded: bool = False
87
+ indexing_status: str = "pending" # indexed | skipped | failed | pending
88
+ provenance_id: Optional[str] = None
89
+ detail: Optional[str] = None
90
+
91
+ def as_dict(self) -> Dict[str, Any]:
92
+ return {
93
+ "status": self.status,
94
+ "source_type": self.source_type,
95
+ "node_id": self.node_id,
96
+ "source_node_id": self.source_node_id,
97
+ "content_hash": self.content_hash,
98
+ "title": self.title,
99
+ "chunk_ids": self.chunk_ids,
100
+ "chunk_count": self.chunk_count,
101
+ "duplicate": self.duplicate,
102
+ "embedded": self.embedded,
103
+ "indexing_status": self.indexing_status,
104
+ "provenance_id": self.provenance_id,
105
+ "detail": self.detail,
106
+ }
107
+
108
+
109
+ class IngestionPipeline:
110
+ """Single normalized entrypoint that feeds every source into the graph."""
111
+
112
+ def __init__(
113
+ self,
114
+ knowledge_graph: Any,
115
+ *,
116
+ hooks: Any = None,
117
+ enable_graph: bool = True,
118
+ audit: Optional[Any] = None,
119
+ max_text_bytes: int = DEFAULT_MAX_TEXT_BYTES,
120
+ pipeline_name: str = "unified-ingestion",
121
+ ) -> None:
122
+ self._kg = knowledge_graph
123
+ self._hooks = hooks
124
+ self._enable = bool(enable_graph)
125
+ self._audit = audit
126
+ self._max_text_bytes = int(max_text_bytes)
127
+ self._pipeline_name = pipeline_name
128
+
129
+ def available(self) -> bool:
130
+ return self._enable and self._kg is not None
131
+
132
+ # ── public API ───────────────────────────────────────────────────────────
133
+ def ingest(self, item: IngestionItem, *, user_email: Optional[str] = None) -> IngestionResult:
134
+ """Normalize, hash, route through dispatch_tool, and record provenance."""
135
+ source_type = str(item.source_type or "text").strip()
136
+ if not self.available():
137
+ return IngestionResult(
138
+ status="unavailable", source_type=source_type,
139
+ indexing_status="skipped",
140
+ detail="Knowledge Graph is disabled (LATTICEAI_ENABLE_GRAPH).",
141
+ )
142
+
143
+ captured_at = item.captured_at or _now_iso()
144
+ owner = item.owner or user_email
145
+ tool_name = f"kg_ingest.{source_type}"
146
+ # Only the keys are read by the hook payload, so this dict is safe/cheap.
147
+ args = {
148
+ "source_type": source_type,
149
+ "source_uri": item.source_uri,
150
+ "owner": owner,
151
+ "workspace_id": item.workspace_id,
152
+ }
153
+
154
+ def _run() -> Dict[str, Any]:
155
+ if source_type in CHAT_SOURCE_TYPES:
156
+ return self._ingest_chat(item, source_type=source_type, owner=owner)
157
+ if source_type in MEMORY_SOURCE_TYPES:
158
+ return self._ingest_memory_record(item, source_type=source_type, owner=owner)
159
+ if source_type in FILE_SOURCE_TYPES or (item.path and not item.text):
160
+ return self._ingest_file(item, source_type=source_type, owner=owner, captured_at=captured_at)
161
+ return self._ingest_text(item, source_type=source_type, owner=owner, captured_at=captured_at)
162
+
163
+ try:
164
+ raw = dispatch_tool(
165
+ self._hooks, tool_name, args, _run,
166
+ user_email=user_email, workspace_id=item.workspace_id, source="ingestion",
167
+ )
168
+ except PermissionError as exc:
169
+ return IngestionResult(
170
+ status="blocked", source_type=source_type,
171
+ indexing_status="skipped", detail=str(exc),
172
+ )
173
+ except FileNotFoundError as exc:
174
+ return IngestionResult(
175
+ status="failed", source_type=source_type,
176
+ indexing_status="failed", detail=str(exc),
177
+ )
178
+ except Exception as exc: # noqa: BLE001 — surface as a failed result, never crash the caller
179
+ return IngestionResult(
180
+ status="failed", source_type=source_type,
181
+ indexing_status="failed", detail=str(exc),
182
+ )
183
+
184
+ node_id = raw.get("node_id")
185
+ content_hash = raw.get("content_hash") or raw.get("sha256")
186
+ chunk_ids = list(raw.get("chunk_ids") or [])
187
+ embedded = bool(self._kg.node_is_embedded(node_id)) if node_id else False
188
+ title = raw.get("title") or item.title
189
+
190
+ prov = self._kg.record_provenance(
191
+ node_id=node_id,
192
+ source_type=source_type,
193
+ pipeline=self._pipeline_name,
194
+ source_uri=item.source_uri,
195
+ content_hash=content_hash,
196
+ title=title,
197
+ owner=owner,
198
+ workspace_id=item.workspace_id,
199
+ captured_at=captured_at,
200
+ modified_at=item.modified_at,
201
+ embedded=embedded,
202
+ linked=bool(raw.get("source_node_id")),
203
+ duplicate=bool(raw.get("duplicate")),
204
+ agent_used=item.agent_used,
205
+ chunk_count=len(chunk_ids),
206
+ permissions=item.permissions,
207
+ metadata=item.metadata,
208
+ )
209
+ if self._audit is not None:
210
+ try:
211
+ self._audit(
212
+ "kg_ingest",
213
+ {
214
+ "source_type": source_type, "node_id": node_id,
215
+ "content_hash": content_hash, "duplicate": bool(raw.get("duplicate")),
216
+ },
217
+ user_email,
218
+ )
219
+ except Exception: # noqa: BLE001 — audit must never break ingestion
220
+ pass
221
+
222
+ return IngestionResult(
223
+ status="ok",
224
+ source_type=source_type,
225
+ node_id=node_id,
226
+ source_node_id=raw.get("source_node_id"),
227
+ content_hash=content_hash,
228
+ title=title,
229
+ chunk_ids=chunk_ids,
230
+ chunk_count=len(chunk_ids),
231
+ duplicate=bool(raw.get("duplicate")),
232
+ embedded=embedded,
233
+ indexing_status="indexed",
234
+ provenance_id=prov.get("id"),
235
+ )
236
+
237
+ # ── routing helpers ──────────────────────────────────────────────────────
238
+ def _ingest_text(self, item, *, source_type, owner, captured_at) -> Dict[str, Any]:
239
+ text = item.text or ""
240
+ if len(text.encode("utf-8", "ignore")) > self._max_text_bytes:
241
+ raise ValueError(
242
+ f"Text payload exceeds the {self._max_text_bytes // (1024 * 1024)}MB ingestion limit."
243
+ )
244
+ title = item.title or item.source_uri or source_type
245
+ return self._kg.ingest_source(
246
+ source_type=source_type,
247
+ title=title,
248
+ text=text,
249
+ source_uri=item.source_uri,
250
+ owner=owner,
251
+ workspace_id=item.workspace_id,
252
+ permissions=item.permissions,
253
+ captured_at=captured_at,
254
+ modified_at=item.modified_at,
255
+ conversation_id=item.conversation_id,
256
+ metadata={"mime_type": item.mime_type, **(item.metadata or {})},
257
+ )
258
+
259
+ def _ingest_chat(self, item, *, source_type, owner) -> Dict[str, Any]:
260
+ text = item.text or ""
261
+ meta = item.metadata or {}
262
+ role = str(meta.get("role") or "user")
263
+ result = self._kg.ingest_message(
264
+ role,
265
+ text,
266
+ user_email=owner,
267
+ user_nickname=meta.get("user_nickname"),
268
+ source=meta.get("source") or source_type,
269
+ conversation_id=item.conversation_id,
270
+ raw=meta.get("raw"),
271
+ )
272
+ # ingest_message reports message/response node ids; normalize the keys
273
+ # the provenance step expects.
274
+ result.setdefault("node_id", result.get("node_id") or result.get("message_node_id") or result.get("id"))
275
+ result.setdefault("title", item.title or text[:80])
276
+ return result
277
+
278
+ def _ingest_memory_record(self, item, *, source_type, owner) -> Dict[str, Any]:
279
+ node_type = _MEMORY_NODE_TYPES[source_type]
280
+ meta = item.metadata or {}
281
+ result = self._kg.ingest_event(
282
+ node_type,
283
+ item.title or (item.text or node_type)[:120],
284
+ user_email=owner,
285
+ source=meta.get("source") or source_type,
286
+ conversation_id=item.conversation_id,
287
+ metadata={**meta, "detail": (item.text or "")[:2000]},
288
+ )
289
+ result.setdefault("node_id", result.get("node_id") or result.get("id"))
290
+ result.setdefault("title", item.title)
291
+ return result
292
+
293
+ def _ingest_file(self, item, *, source_type, owner, captured_at) -> Dict[str, Any]:
294
+ if not item.path:
295
+ raise ValueError("File ingestion requires a path.")
296
+ path = Path(item.path)
297
+ if not path.exists():
298
+ raise FileNotFoundError(f"File not found: {path}")
299
+ return self._kg.ingest_document(
300
+ path,
301
+ original_filename=item.title or path.name,
302
+ mime_type=item.mime_type,
303
+ uploader=owner,
304
+ conversation_id=item.conversation_id,
305
+ extracted=item.metadata.get("extracted") if item.metadata else None,
306
+ source_type=source_type,
307
+ source_uri=item.source_uri or str(path),
308
+ captured_at=captured_at,
309
+ modified_at=item.modified_at,
310
+ owner=owner,
311
+ workspace_id=item.workspace_id,
312
+ permissions=item.permissions,
313
+ )
314
+
315
+
316
+ def content_hash_text(text: str) -> str:
317
+ """Canonical content hash for a text payload (matches store hashing scheme)."""
318
+ return hashlib.sha256((text or "").encode("utf-8", "ignore")).hexdigest()
@@ -0,0 +1,207 @@
1
+ """Knowledge Graph portability — local export / import / backup / restore.
2
+
3
+ The Knowledge Graph is the user's durable asset, so it must be portable without
4
+ any cloud service. Two complementary mechanisms, both fully local:
5
+
6
+ * **Logical export/import** (JSON): nodes/edges/chunks/sources/provenance with a
7
+ versioned header (schema + projection + embed-dim). Re-embeds on import, so it
8
+ is portable across machines.
9
+ * **Binary backup/restore** (ZIP): a faithful snapshot of the SQLite DB (incl.
10
+ vector embeddings) plus the blob directory, integrity-checked, for
11
+ same-machine recovery.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import hashlib
17
+ import json
18
+ import shutil
19
+ import tempfile
20
+ import zipfile
21
+ from datetime import datetime, timezone
22
+ from pathlib import Path
23
+ from typing import Any, Dict, Optional
24
+
25
+ FORMAT = "latticeai.kg.export"
26
+ FORMAT_VERSION = 1
27
+ BACKUP_FORMAT = "latticeai.kg.backup"
28
+
29
+
30
+ def _now_iso() -> str:
31
+ return datetime.now(timezone.utc).isoformat()
32
+
33
+
34
+ def _stamp() -> str:
35
+ return _now_iso().replace(":", "").replace("-", "").replace(".", "")[:15]
36
+
37
+
38
+ def _sha256_file(path: Path) -> str:
39
+ h = hashlib.sha256()
40
+ with open(path, "rb") as fh:
41
+ for block in iter(lambda: fh.read(65536), b""):
42
+ h.update(block)
43
+ return h.hexdigest()
44
+
45
+
46
+ class KGPortabilityService:
47
+ def __init__(self, *, knowledge_graph: Any, data_dir, enable_graph: bool = True, device_identity: Any = None) -> None:
48
+ self._kg = knowledge_graph
49
+ self._data_dir = Path(data_dir)
50
+ self._enable = bool(enable_graph)
51
+ self._exports_dir = self._data_dir / "workspace_exports"
52
+ # v4 sovereignty: when a DeviceIdentity is wired, exports are signed
53
+ # and imports record origin provenance. Pre-v4 unsigned bundles stay
54
+ # importable locally (origin='unsigned-legacy') — signatures are
55
+ # mandatory only on the Brain Network peer path.
56
+ self._identity = device_identity
57
+
58
+ def available(self) -> bool:
59
+ return self._enable and self._kg is not None
60
+
61
+ def _require(self) -> None:
62
+ if not self.available():
63
+ raise RuntimeError("Knowledge Graph is disabled (LATTICEAI_ENABLE_GRAPH).")
64
+
65
+ # ── logical export / import ──────────────────────────────────────────────
66
+ def export(self, *, workspace_id: Optional[str] = None) -> Dict[str, Any]:
67
+ self._require()
68
+ data = self._kg.export_graph_data(workspace_id=workspace_id)
69
+ header = {
70
+ "format": FORMAT,
71
+ "format_version": FORMAT_VERSION,
72
+ **self._kg.schema_versions(),
73
+ "exported_at": _now_iso(),
74
+ "workspace_id": workspace_id,
75
+ "counts": data.get("counts"),
76
+ }
77
+ artifact = {"header": header, **data}
78
+ if self._identity is not None:
79
+ artifact["signature"] = self._identity.sign_manifest(header)
80
+ return artifact
81
+
82
+ def export_to_file(self, path=None, *, workspace_id: Optional[str] = None) -> Dict[str, Any]:
83
+ artifact = self.export(workspace_id=workspace_id)
84
+ self._exports_dir.mkdir(parents=True, exist_ok=True)
85
+ path = Path(path) if path else self._exports_dir / f"kg-export-{_stamp()}.json"
86
+ path.write_text(json.dumps(artifact, ensure_ascii=False, indent=2), encoding="utf-8")
87
+ return {"path": str(path), "header": artifact["header"], "bytes": path.stat().st_size}
88
+
89
+ def import_data(self, artifact: Dict[str, Any], *, mode: str = "merge", dry_run: bool = False) -> Dict[str, Any]:
90
+ self._require()
91
+ if not isinstance(artifact, dict) or "nodes" not in artifact:
92
+ raise ValueError("Invalid Knowledge Graph export artifact.")
93
+ if mode not in ("merge", "replace"):
94
+ raise ValueError("mode must be 'merge' or 'replace'.")
95
+ origin = "unsigned-legacy"
96
+ signature = artifact.get("signature")
97
+ if signature:
98
+ from latticeai.brain.identity import verify_manifest
99
+
100
+ if not verify_manifest(artifact.get("header") or {}, signature):
101
+ raise ValueError("Bundle signature verification failed — refusing to import.")
102
+ origin = f"device:{signature.get('fingerprint') or 'unknown'}"
103
+ result = self._kg.import_graph_data(artifact, mode=mode, dry_run=dry_run)
104
+ result["header"] = artifact.get("header")
105
+ result["origin"] = origin
106
+ result["signed"] = bool(signature)
107
+ if not dry_run:
108
+ try:
109
+ self._kg.record_provenance(
110
+ node_id="import:" + str((artifact.get("header") or {}).get("exported_at") or _now_iso()),
111
+ source_type="bundle_import",
112
+ pipeline="kg-portability",
113
+ owner=None,
114
+ metadata={"origin": origin, "mode": mode,
115
+ "counts": (artifact.get("header") or {}).get("counts")},
116
+ )
117
+ except Exception:
118
+ pass
119
+ return result
120
+
121
+ def import_from_file(self, path, *, mode: str = "merge", dry_run: bool = False) -> Dict[str, Any]:
122
+ artifact = json.loads(Path(path).read_text(encoding="utf-8"))
123
+ return self.import_data(artifact, mode=mode, dry_run=dry_run)
124
+
125
+ # ── binary backup / restore ──────────────────────────────────────────────
126
+ def backup(self, dest_path=None) -> Dict[str, Any]:
127
+ self._require()
128
+ self._exports_dir.mkdir(parents=True, exist_ok=True)
129
+ dest = Path(dest_path) if dest_path else self._exports_dir / f"kg-backup-{_stamp()}.zip"
130
+ with tempfile.TemporaryDirectory() as tmp_s:
131
+ tmp = Path(tmp_s)
132
+ db_copy = tmp / "knowledge_graph.sqlite"
133
+ self._kg.backup_database(db_copy)
134
+ manifest = {
135
+ "format": BACKUP_FORMAT,
136
+ "format_version": FORMAT_VERSION,
137
+ **self._kg.schema_versions(),
138
+ "created_at": _now_iso(),
139
+ "db_sha256": _sha256_file(db_copy),
140
+ "has_blobs": Path(self._kg.blob_dir).exists(),
141
+ }
142
+ with zipfile.ZipFile(dest, "w", zipfile.ZIP_DEFLATED) as zf:
143
+ zf.write(db_copy, "knowledge_graph.sqlite")
144
+ zf.writestr("manifest.json", json.dumps(manifest, ensure_ascii=False, indent=2))
145
+ blob_dir = Path(self._kg.blob_dir)
146
+ if blob_dir.exists():
147
+ for f in blob_dir.rglob("*"):
148
+ if f.is_file():
149
+ zf.write(f, f"blobs/{f.relative_to(blob_dir)}")
150
+ return {"path": str(dest), "bytes": dest.stat().st_size, "manifest": manifest}
151
+
152
+ def restore(self, archive_path, *, verify: bool = True) -> Dict[str, Any]:
153
+ self._require()
154
+ archive = Path(archive_path)
155
+ if not archive.exists():
156
+ raise FileNotFoundError(f"Backup archive not found: {archive}")
157
+ with zipfile.ZipFile(archive) as zf:
158
+ names = zf.namelist()
159
+ if "knowledge_graph.sqlite" not in names:
160
+ raise ValueError("Archive is missing knowledge_graph.sqlite.")
161
+ manifest = json.loads(zf.read("manifest.json")) if "manifest.json" in names else {}
162
+ with tempfile.TemporaryDirectory() as tmp_s:
163
+ tmp = Path(tmp_s)
164
+ zf.extractall(tmp)
165
+ db_src = tmp / "knowledge_graph.sqlite"
166
+ if verify and manifest.get("db_sha256"):
167
+ if _sha256_file(db_src) != manifest["db_sha256"]:
168
+ raise ValueError("Backup integrity check failed (db sha256 mismatch).")
169
+ db_dest = Path(self._kg.db_path)
170
+ blob_dest = Path(self._kg.blob_dir)
171
+ db_dest.parent.mkdir(parents=True, exist_ok=True)
172
+ # Drop the live DB + stale WAL/SHM siblings so the restored copy
173
+ # is authoritative (no stale journal overlaying old pages).
174
+ for sib in (db_dest, Path(str(db_dest) + "-wal"), Path(str(db_dest) + "-shm")):
175
+ if sib.exists():
176
+ sib.unlink()
177
+ shutil.copyfile(db_src, db_dest)
178
+ blob_src = tmp / "blobs"
179
+ if blob_src.exists():
180
+ if blob_dest.exists():
181
+ shutil.rmtree(blob_dest)
182
+ shutil.copytree(blob_src, blob_dest)
183
+ else:
184
+ blob_dest.mkdir(parents=True, exist_ok=True)
185
+ stats = self._kg.stats()
186
+ return {
187
+ "restored": True,
188
+ "manifest": manifest,
189
+ "nodes": sum(stats.get("nodes", {}).values()),
190
+ }
191
+
192
+ # ── status surface ───────────────────────────────────────────────────────
193
+ def snapshot_metadata(self) -> Dict[str, Any]:
194
+ if not self.available():
195
+ return {"available": False}
196
+ return {
197
+ "available": True,
198
+ **self._kg.schema_versions(),
199
+ "stats": self._kg.stats(),
200
+ "provenance": self._kg.provenance_stats(),
201
+ }
202
+
203
+ def recent_ingestions(self, *, limit: int = 50, source_type: Optional[str] = None) -> Dict[str, Any]:
204
+ """Recent provenance records (newest first) for the ingestion-sources UI."""
205
+ if not self.available():
206
+ return {"items": [], "count": 0}
207
+ return self._kg.list_provenance(limit=limit, source_type=source_type)