ltcai 4.1.0 → 4.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +33 -24
- package/docs/CHANGELOG.md +84 -0
- package/docs/V4_2_BRAIN_CORE_ARCHITECTURE.md +97 -0
- package/docs/V4_2_STORAGE_MIGRATION_REPORT.md +91 -0
- package/docs/V4_2_VALIDATION_REPORT.md +89 -0
- package/docs/V4_3_PORTABILITY_ARCHITECTURE.md +69 -0
- package/docs/V4_3_PRIVACY_AUDIT.md +60 -0
- package/docs/V4_3_PRODUCT_HARDENING_REPORT.md +53 -0
- package/docs/V4_3_VALIDATION_REPORT.md +58 -0
- package/docs/V4_DIGITAL_BRAIN_RECOVERY.md +31 -33
- package/frontend/openapi.json +449 -1
- package/frontend/src/api/client.ts +10 -0
- package/frontend/src/api/openapi.ts +542 -0
- package/frontend/src/pages/System.tsx +92 -0
- package/kg_schema.py +1 -1
- package/knowledge_graph.py +4 -4
- package/lattice_brain/__init__.py +70 -0
- package/lattice_brain/_kg_common.py +1 -0
- package/lattice_brain/archive.py +446 -0
- package/lattice_brain/context.py +3 -0
- package/lattice_brain/conversations.py +3 -0
- package/lattice_brain/core.py +82 -0
- package/lattice_brain/discovery.py +1 -0
- package/lattice_brain/documents.py +1 -0
- package/lattice_brain/embeddings.py +82 -0
- package/lattice_brain/identity.py +13 -0
- package/lattice_brain/ingest.py +1 -0
- package/lattice_brain/memory.py +3 -0
- package/lattice_brain/network.py +1 -0
- package/lattice_brain/projection.py +1 -0
- package/lattice_brain/provenance.py +1 -0
- package/lattice_brain/retrieval.py +1 -0
- package/lattice_brain/schema.py +1 -0
- package/lattice_brain/storage/__init__.py +22 -0
- package/lattice_brain/storage/base.py +72 -0
- package/lattice_brain/storage/docker.py +105 -0
- package/lattice_brain/storage/factory.py +31 -0
- package/lattice_brain/storage/migration.py +190 -0
- package/lattice_brain/storage/postgres.py +123 -0
- package/lattice_brain/storage/sqlite.py +128 -0
- package/lattice_brain/store.py +3 -0
- package/lattice_brain/write_master.py +1 -0
- package/latticeai/__init__.py +1 -1
- package/latticeai/api/admin.py +11 -0
- package/latticeai/api/portability.py +127 -1
- package/latticeai/app_factory.py +26 -10
- package/latticeai/brain/__init__.py +6 -6
- package/latticeai/brain/_kg_common.py +1 -1
- package/latticeai/brain/network.py +1 -1
- package/latticeai/brain/retrieval.py +15 -0
- package/latticeai/brain/store.py +22 -6
- package/latticeai/core/config.py +9 -1
- package/latticeai/core/marketplace.py +1 -1
- package/latticeai/core/multi_agent.py +1 -1
- package/latticeai/core/product_hardening.py +217 -0
- package/latticeai/core/workspace_os.py +1 -1
- package/latticeai/services/kg_portability.py +227 -3
- package/ltcai_cli.py +2 -1
- package/package.json +4 -3
- package/scripts/bump_version.py +3 -0
- package/scripts/clean_release_artifacts.mjs +27 -0
- package/scripts/lint_frontend.mjs +10 -0
- package/scripts/migrate_brain_storage.py +53 -0
- package/scripts/validate_release_artifacts.py +10 -0
- package/scripts/wheel_smoke.py +3 -0
- package/src-tauri/Cargo.lock +1 -1
- package/src-tauri/Cargo.toml +1 -1
- package/src-tauri/src/main.rs +113 -13
- package/src-tauri/tauri.conf.json +5 -2
- package/static/app/asset-manifest.json +5 -5
- package/static/app/assets/{index-CJRAzNnf.js → index-RiJTJliG.js} +3 -3
- package/static/app/assets/index-RiJTJliG.js.map +1 -0
- package/static/app/assets/index-yZswHE3d.css +2 -0
- package/static/app/index.html +2 -2
- package/static/app/assets/index-CJRAzNnf.js.map +0 -1
- package/static/app/assets/index-CSwBBgf4.css +0 -2
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Independent Brain Core package facade."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Optional
|
|
8
|
+
|
|
9
|
+
from .archive import BrainArchivePaths, EncryptedBrainArchive
|
|
10
|
+
from .storage import SQLiteEngine, StorageEngine, StorageUnavailable
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(frozen=True)
|
|
14
|
+
class BrainCoreConfig:
|
|
15
|
+
data_dir: Path
|
|
16
|
+
blob_dir: Optional[Path] = None
|
|
17
|
+
storage_engine: Optional[StorageEngine] = None
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class BrainCore:
|
|
21
|
+
"""Stable application boundary for the local Digital Brain.
|
|
22
|
+
|
|
23
|
+
FastAPI, CLI, tests, and future tools should depend on this package-level
|
|
24
|
+
facade instead of constructing scattered storage objects directly.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, config: BrainCoreConfig, *, embedder: Any = None) -> None:
|
|
28
|
+
self.config = config
|
|
29
|
+
self.data_dir = Path(config.data_dir)
|
|
30
|
+
self.db_path = self.data_dir / "knowledge_graph.sqlite"
|
|
31
|
+
self.blob_dir = Path(config.blob_dir) if config.blob_dir else self.data_dir / "knowledge_graph_blobs"
|
|
32
|
+
self.storage_engine = config.storage_engine or SQLiteEngine(self.db_path)
|
|
33
|
+
caps = self.storage_engine.capabilities()
|
|
34
|
+
if not caps.available:
|
|
35
|
+
raise StorageUnavailable(caps.reason or f"{caps.engine} storage is unavailable")
|
|
36
|
+
if caps.engine != "sqlite":
|
|
37
|
+
raise StorageUnavailable(
|
|
38
|
+
"The active FastAPI Brain Core runtime currently requires SQLiteEngine. "
|
|
39
|
+
"Use PostgresEngine through the explicit migration/scale tooling; no SQLite fallback was attempted."
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
from .conversations import ConversationStore
|
|
43
|
+
from .store import KnowledgeGraphStore
|
|
44
|
+
|
|
45
|
+
self.knowledge = KnowledgeGraphStore(
|
|
46
|
+
self.db_path,
|
|
47
|
+
self.blob_dir,
|
|
48
|
+
embedder=embedder,
|
|
49
|
+
storage_engine=self.storage_engine,
|
|
50
|
+
)
|
|
51
|
+
self.conversations = ConversationStore(self.db_path)
|
|
52
|
+
self.archive = EncryptedBrainArchive(
|
|
53
|
+
BrainArchivePaths(db_path=self.db_path, blob_dir=self.blob_dir)
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
@classmethod
|
|
57
|
+
def from_paths(
|
|
58
|
+
cls,
|
|
59
|
+
data_dir: Path,
|
|
60
|
+
*,
|
|
61
|
+
blob_dir: Optional[Path] = None,
|
|
62
|
+
embedder: Any = None,
|
|
63
|
+
storage_engine: Optional[StorageEngine] = None,
|
|
64
|
+
) -> "BrainCore":
|
|
65
|
+
return cls(
|
|
66
|
+
BrainCoreConfig(
|
|
67
|
+
data_dir=Path(data_dir),
|
|
68
|
+
blob_dir=blob_dir,
|
|
69
|
+
storage_engine=storage_engine,
|
|
70
|
+
),
|
|
71
|
+
embedder=embedder,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
def status(self) -> dict:
|
|
75
|
+
return {
|
|
76
|
+
"storage": self.storage_engine.capabilities().as_dict(),
|
|
77
|
+
"db_path": str(self.db_path),
|
|
78
|
+
"blob_dir": str(self.blob_dir),
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
__all__ = ["BrainCore", "BrainCoreConfig"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from latticeai.brain.discovery import * # noqa: F401,F403
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from latticeai.brain.documents import * # noqa: F401,F403
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
"""Local deterministic embeddings used by the standalone Brain Core package."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import hashlib
|
|
6
|
+
import math
|
|
7
|
+
import os
|
|
8
|
+
import re
|
|
9
|
+
import struct
|
|
10
|
+
from dataclasses import dataclass
|
|
11
|
+
from typing import Iterable, List
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
DEFAULT_EMBEDDING_DIM = int(os.getenv("LATTICEAI_VECTOR_DIM", "384"))
|
|
15
|
+
EMBEDDING_MODEL_ID = f"lattice-local-hash-v1:{DEFAULT_EMBEDDING_DIM}"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _tokenize(text: str) -> List[str]:
|
|
19
|
+
raw = str(text or "").lower()
|
|
20
|
+
tokens = re.findall(r"[a-z0-9][a-z0-9_.:/+-]{1,}|[가-힣]{2,}", raw)
|
|
21
|
+
features: List[str] = []
|
|
22
|
+
for token in tokens:
|
|
23
|
+
features.append(f"tok:{token}")
|
|
24
|
+
if len(token) >= 5 and re.search(r"[a-z]", token):
|
|
25
|
+
for i in range(0, len(token) - 2):
|
|
26
|
+
features.append(f"tri:{token[i:i+3]}")
|
|
27
|
+
if re.search(r"[가-힣]", token) and len(token) >= 3:
|
|
28
|
+
for i in range(0, len(token) - 1):
|
|
29
|
+
features.append(f"ko:{token[i:i+2]}")
|
|
30
|
+
return features
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _hash_to_index(feature: str, dim: int) -> tuple[int, float]:
|
|
34
|
+
digest = hashlib.blake2b(feature.encode("utf-8"), digest_size=8).digest()
|
|
35
|
+
value = int.from_bytes(digest, "big", signed=False)
|
|
36
|
+
sign = 1.0 if (value & 1) == 0 else -1.0
|
|
37
|
+
return value % dim, sign
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@dataclass(frozen=True)
|
|
41
|
+
class LocalEmbeddingModel:
|
|
42
|
+
"""Deterministic local embedder.
|
|
43
|
+
|
|
44
|
+
This is intentionally not presented as a production semantic model. It is
|
|
45
|
+
a real, offline cosine signal for local-first operation and tests; setup
|
|
46
|
+
wizard provisioning can replace it with a user-consented model/provider.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
dim: int = DEFAULT_EMBEDDING_DIM
|
|
50
|
+
model_id: str = EMBEDDING_MODEL_ID
|
|
51
|
+
|
|
52
|
+
def embed(self, text: str) -> List[float]:
|
|
53
|
+
vector = [0.0] * self.dim
|
|
54
|
+
features = _tokenize(text)
|
|
55
|
+
if not features:
|
|
56
|
+
return vector
|
|
57
|
+
for feature in features:
|
|
58
|
+
index, sign = _hash_to_index(feature, self.dim)
|
|
59
|
+
vector[index] += sign
|
|
60
|
+
norm = math.sqrt(sum(value * value for value in vector))
|
|
61
|
+
if norm <= 0:
|
|
62
|
+
return vector
|
|
63
|
+
return [value / norm for value in vector]
|
|
64
|
+
|
|
65
|
+
def similarity(self, left: Iterable[float], right: Iterable[float]) -> float:
|
|
66
|
+
return float(sum(a * b for a, b in zip(left, right)))
|
|
67
|
+
|
|
68
|
+
def encode(self, vector: Iterable[float]) -> bytes:
|
|
69
|
+
values = list(vector)
|
|
70
|
+
return struct.pack(f"<{len(values)}f", *values)
|
|
71
|
+
|
|
72
|
+
def decode(self, payload: bytes, dim: int | None = None) -> List[float]:
|
|
73
|
+
if not payload:
|
|
74
|
+
return []
|
|
75
|
+
count = int(dim or self.dim)
|
|
76
|
+
expected = count * 4
|
|
77
|
+
if len(payload) != expected:
|
|
78
|
+
count = len(payload) // 4
|
|
79
|
+
return list(struct.unpack(f"<{count}f", payload[: count * 4]))
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
__all__ = ["DEFAULT_EMBEDDING_DIM", "EMBEDDING_MODEL_ID", "LocalEmbeddingModel"]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from latticeai.brain.ingest import * # noqa: F401,F403
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from latticeai.brain.network import * # noqa: F401,F403
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from latticeai.brain.projection import * # noqa: F401,F403
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from latticeai.brain.provenance import * # noqa: F401,F403
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from latticeai.brain.retrieval import * # noqa: F401,F403
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from latticeai.brain.schema import * # noqa: F401,F403
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Pluggable storage layer for lattice-brain."""
|
|
2
|
+
|
|
3
|
+
from .base import StorageCapabilities, StorageEngine, StorageUnavailable
|
|
4
|
+
from .docker import DockerPostgresPlan, DockerPostgresWizard
|
|
5
|
+
from .factory import storage_from_env
|
|
6
|
+
from .migration import SQLiteToPostgresMigrator, TablePlan
|
|
7
|
+
from .postgres import PostgresConfig, PostgresEngine
|
|
8
|
+
from .sqlite import SQLiteEngine
|
|
9
|
+
|
|
10
|
+
__all__ = [
|
|
11
|
+
"DockerPostgresPlan",
|
|
12
|
+
"DockerPostgresWizard",
|
|
13
|
+
"PostgresConfig",
|
|
14
|
+
"PostgresEngine",
|
|
15
|
+
"SQLiteEngine",
|
|
16
|
+
"SQLiteToPostgresMigrator",
|
|
17
|
+
"StorageCapabilities",
|
|
18
|
+
"StorageEngine",
|
|
19
|
+
"StorageUnavailable",
|
|
20
|
+
"TablePlan",
|
|
21
|
+
"storage_from_env",
|
|
22
|
+
]
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
"""StorageEngine contracts for the independent Brain Core package."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from dataclasses import dataclass, field
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class StorageUnavailable(RuntimeError):
|
|
12
|
+
"""Raised when an explicitly requested storage engine cannot be used."""
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(frozen=True)
|
|
16
|
+
class StorageCapabilities:
|
|
17
|
+
engine: str
|
|
18
|
+
available: bool
|
|
19
|
+
reason: Optional[str] = None
|
|
20
|
+
vector_backend: str = "none"
|
|
21
|
+
vector_available: bool = False
|
|
22
|
+
backup_restore: bool = False
|
|
23
|
+
migrations: bool = False
|
|
24
|
+
encrypted_archives: bool = False
|
|
25
|
+
metadata: Dict[str, Any] = field(default_factory=dict)
|
|
26
|
+
|
|
27
|
+
def as_dict(self) -> Dict[str, Any]:
|
|
28
|
+
return {
|
|
29
|
+
"engine": self.engine,
|
|
30
|
+
"available": self.available,
|
|
31
|
+
"reason": self.reason,
|
|
32
|
+
"vector_backend": self.vector_backend,
|
|
33
|
+
"vector_available": self.vector_available,
|
|
34
|
+
"backup_restore": self.backup_restore,
|
|
35
|
+
"migrations": self.migrations,
|
|
36
|
+
"encrypted_archives": self.encrypted_archives,
|
|
37
|
+
"metadata": self.metadata,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
class StorageEngine(ABC):
|
|
42
|
+
"""Unified storage interface used by Brain Core.
|
|
43
|
+
|
|
44
|
+
The knowledge graph currently uses SQL directly, so ``connect`` is part of
|
|
45
|
+
the contract. Engines must fail loudly when unavailable; callers must not
|
|
46
|
+
silently fall back to SQLite after an explicit Postgres selection.
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
name: str
|
|
50
|
+
|
|
51
|
+
@abstractmethod
|
|
52
|
+
def capabilities(self) -> StorageCapabilities:
|
|
53
|
+
"""Return an honest capability report."""
|
|
54
|
+
|
|
55
|
+
@abstractmethod
|
|
56
|
+
def initialize(self) -> Dict[str, Any]:
|
|
57
|
+
"""Create required storage structures or raise ``StorageUnavailable``."""
|
|
58
|
+
|
|
59
|
+
@abstractmethod
|
|
60
|
+
def connect(self) -> Any:
|
|
61
|
+
"""Return a DB-API-like connection for this engine."""
|
|
62
|
+
|
|
63
|
+
@abstractmethod
|
|
64
|
+
def backup(self, destination: Path) -> Dict[str, Any]:
|
|
65
|
+
"""Create a faithful engine backup at ``destination``."""
|
|
66
|
+
|
|
67
|
+
@abstractmethod
|
|
68
|
+
def restore(self, source: Path) -> Dict[str, Any]:
|
|
69
|
+
"""Restore a faithful engine backup from ``source``."""
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
__all__ = ["StorageCapabilities", "StorageEngine", "StorageUnavailable"]
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Explicit-consent Docker setup wizard for Postgres/pgvector."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import subprocess
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Dict, List
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass(frozen=True)
|
|
12
|
+
class DockerPostgresPlan:
|
|
13
|
+
compose_path: Path
|
|
14
|
+
project_name: str
|
|
15
|
+
service_name: str = "postgres"
|
|
16
|
+
port: int = 5432
|
|
17
|
+
|
|
18
|
+
def command(self) -> List[str]:
|
|
19
|
+
return [
|
|
20
|
+
"docker",
|
|
21
|
+
"compose",
|
|
22
|
+
"-p",
|
|
23
|
+
self.project_name,
|
|
24
|
+
"-f",
|
|
25
|
+
str(self.compose_path),
|
|
26
|
+
"up",
|
|
27
|
+
"-d",
|
|
28
|
+
self.service_name,
|
|
29
|
+
]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DockerPostgresWizard:
|
|
33
|
+
"""Creates and starts a local Postgres container only after consent."""
|
|
34
|
+
|
|
35
|
+
def __init__(self, data_dir: Path, *, port: int = 5432) -> None:
|
|
36
|
+
self.data_dir = Path(data_dir)
|
|
37
|
+
self.port = int(port)
|
|
38
|
+
|
|
39
|
+
def write_compose(self) -> DockerPostgresPlan:
|
|
40
|
+
self.data_dir.mkdir(parents=True, exist_ok=True)
|
|
41
|
+
compose = self.data_dir / "postgres.compose.yml"
|
|
42
|
+
compose.write_text(
|
|
43
|
+
f"""services:
|
|
44
|
+
postgres:
|
|
45
|
+
image: pgvector/pgvector:pg16
|
|
46
|
+
restart: unless-stopped
|
|
47
|
+
environment:
|
|
48
|
+
POSTGRES_DB: lattice_brain
|
|
49
|
+
POSTGRES_USER: lattice
|
|
50
|
+
POSTGRES_PASSWORD: lattice-local-only
|
|
51
|
+
ports:
|
|
52
|
+
- "127.0.0.1:{self.port}:5432"
|
|
53
|
+
volumes:
|
|
54
|
+
- ./postgres-data:/var/lib/postgresql/data
|
|
55
|
+
""",
|
|
56
|
+
encoding="utf-8",
|
|
57
|
+
)
|
|
58
|
+
return DockerPostgresPlan(
|
|
59
|
+
compose_path=compose,
|
|
60
|
+
project_name="lattice-brain",
|
|
61
|
+
port=self.port,
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
def start(
|
|
65
|
+
self,
|
|
66
|
+
*,
|
|
67
|
+
consent: bool,
|
|
68
|
+
dry_run: bool = False,
|
|
69
|
+
runner=subprocess.run,
|
|
70
|
+
) -> Dict[str, object]:
|
|
71
|
+
plan = self.write_compose()
|
|
72
|
+
if not consent:
|
|
73
|
+
return {
|
|
74
|
+
"status": "consent_required",
|
|
75
|
+
"started": False,
|
|
76
|
+
"compose_path": str(plan.compose_path),
|
|
77
|
+
"command": plan.command(),
|
|
78
|
+
}
|
|
79
|
+
if dry_run:
|
|
80
|
+
return {
|
|
81
|
+
"status": "dry_run",
|
|
82
|
+
"started": False,
|
|
83
|
+
"compose_path": str(plan.compose_path),
|
|
84
|
+
"command": plan.command(),
|
|
85
|
+
}
|
|
86
|
+
completed = runner(plan.command(), check=False, capture_output=True, text=True)
|
|
87
|
+
if completed.returncode != 0:
|
|
88
|
+
return {
|
|
89
|
+
"status": "failed",
|
|
90
|
+
"started": False,
|
|
91
|
+
"compose_path": str(plan.compose_path),
|
|
92
|
+
"returncode": completed.returncode,
|
|
93
|
+
"stdout": completed.stdout,
|
|
94
|
+
"stderr": completed.stderr,
|
|
95
|
+
}
|
|
96
|
+
return {
|
|
97
|
+
"status": "started",
|
|
98
|
+
"started": True,
|
|
99
|
+
"compose_path": str(plan.compose_path),
|
|
100
|
+
"stdout": completed.stdout,
|
|
101
|
+
"stderr": completed.stderr,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
__all__ = ["DockerPostgresPlan", "DockerPostgresWizard"]
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
"""StorageEngine construction from environment/config values."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Mapping
|
|
7
|
+
|
|
8
|
+
from .base import StorageEngine, StorageUnavailable
|
|
9
|
+
from .postgres import PostgresEngine
|
|
10
|
+
from .sqlite import SQLiteEngine
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def storage_from_env(env: Mapping[str, str], *, data_dir: Path) -> StorageEngine:
|
|
14
|
+
engine = (env.get("LATTICEAI_STORAGE_ENGINE") or "sqlite").strip().lower()
|
|
15
|
+
if engine in {"", "sqlite"}:
|
|
16
|
+
return SQLiteEngine(Path(data_dir) / "knowledge_graph.sqlite")
|
|
17
|
+
if engine in {"postgres", "pg", "pgvector"}:
|
|
18
|
+
dsn = env.get("LATTICEAI_POSTGRES_DSN") or ""
|
|
19
|
+
if not dsn:
|
|
20
|
+
raise StorageUnavailable(
|
|
21
|
+
"LATTICEAI_STORAGE_ENGINE=postgres requires LATTICEAI_POSTGRES_DSN; "
|
|
22
|
+
"SQLite fallback is disabled for explicit Postgres selection."
|
|
23
|
+
)
|
|
24
|
+
return PostgresEngine(
|
|
25
|
+
dsn,
|
|
26
|
+
schema=env.get("LATTICEAI_POSTGRES_SCHEMA") or "lattice_brain",
|
|
27
|
+
)
|
|
28
|
+
raise StorageUnavailable(f"Unknown Brain Core storage engine: {engine}")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
__all__ = ["storage_from_env"]
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
"""Safe SQLite to Postgres migration tooling."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import sqlite3
|
|
6
|
+
from dataclasses import dataclass
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any, Dict, List
|
|
9
|
+
|
|
10
|
+
from .postgres import PostgresEngine, _quote_ident
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _pg_type(sqlite_type: str) -> str:
|
|
14
|
+
t = str(sqlite_type or "").upper()
|
|
15
|
+
if "INT" in t:
|
|
16
|
+
return "bigint"
|
|
17
|
+
if any(token in t for token in ("REAL", "FLOA", "DOUB")):
|
|
18
|
+
return "double precision"
|
|
19
|
+
if "BLOB" in t:
|
|
20
|
+
return "bytea"
|
|
21
|
+
return "text"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _adapt_value(value: Any) -> Any:
|
|
25
|
+
if isinstance(value, memoryview):
|
|
26
|
+
return bytes(value)
|
|
27
|
+
return value
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass(frozen=True)
|
|
31
|
+
class TablePlan:
|
|
32
|
+
name: str
|
|
33
|
+
columns: List[Dict[str, str]]
|
|
34
|
+
rows: int
|
|
35
|
+
conflict_key: str
|
|
36
|
+
conflict_columns: List[str]
|
|
37
|
+
rowid_available: bool
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class SQLiteToPostgresMigrator:
|
|
41
|
+
"""Copies every user table from a Lattice SQLite brain into Postgres.
|
|
42
|
+
|
|
43
|
+
The migration is idempotent: tables with an ``id`` column upsert on ``id``;
|
|
44
|
+
tables without one use the preserved SQLite rowid in ``__source_rowid``.
|
|
45
|
+
SQLite remains untouched throughout.
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
def __init__(self, sqlite_path: Path, target: PostgresEngine) -> None:
|
|
49
|
+
self.sqlite_path = Path(sqlite_path)
|
|
50
|
+
self.target = target
|
|
51
|
+
|
|
52
|
+
def plan(self) -> Dict[str, Any]:
|
|
53
|
+
if not self.sqlite_path.exists():
|
|
54
|
+
raise FileNotFoundError(f"SQLite brain database not found: {self.sqlite_path}")
|
|
55
|
+
with sqlite3.connect(str(self.sqlite_path)) as conn:
|
|
56
|
+
conn.row_factory = sqlite3.Row
|
|
57
|
+
table_names = [
|
|
58
|
+
row["name"]
|
|
59
|
+
for row in conn.execute(
|
|
60
|
+
"""
|
|
61
|
+
SELECT name FROM sqlite_master
|
|
62
|
+
WHERE type='table' AND name NOT LIKE 'sqlite_%'
|
|
63
|
+
ORDER BY name
|
|
64
|
+
"""
|
|
65
|
+
)
|
|
66
|
+
]
|
|
67
|
+
tables = []
|
|
68
|
+
for table in table_names:
|
|
69
|
+
cols = [
|
|
70
|
+
{"name": row["name"], "type": row["type"] or "TEXT"}
|
|
71
|
+
for row in conn.execute(f"PRAGMA table_info({_quote_sqlite_ident(table)})")
|
|
72
|
+
]
|
|
73
|
+
row_count = conn.execute(f"SELECT COUNT(*) FROM {_quote_sqlite_ident(table)}").fetchone()[0]
|
|
74
|
+
names = {c["name"] for c in cols}
|
|
75
|
+
rowid_available = _rowid_available(conn, table)
|
|
76
|
+
pk_columns = [
|
|
77
|
+
row["name"]
|
|
78
|
+
for row in sorted(
|
|
79
|
+
conn.execute(f"PRAGMA table_info({_quote_sqlite_ident(table)})"),
|
|
80
|
+
key=lambda item: int(item["pk"] or 0),
|
|
81
|
+
)
|
|
82
|
+
if int(row["pk"] or 0) > 0
|
|
83
|
+
]
|
|
84
|
+
conflict_columns = (
|
|
85
|
+
["id"]
|
|
86
|
+
if "id" in names
|
|
87
|
+
else pk_columns
|
|
88
|
+
if pk_columns
|
|
89
|
+
else ["__source_rowid"]
|
|
90
|
+
if rowid_available
|
|
91
|
+
else []
|
|
92
|
+
)
|
|
93
|
+
if not conflict_columns:
|
|
94
|
+
raise RuntimeError(
|
|
95
|
+
f"Cannot safely migrate rowid-less SQLite table without a primary key: {table}"
|
|
96
|
+
)
|
|
97
|
+
tables.append(
|
|
98
|
+
TablePlan(
|
|
99
|
+
name=table,
|
|
100
|
+
columns=cols,
|
|
101
|
+
rows=int(row_count),
|
|
102
|
+
conflict_key=conflict_columns[0],
|
|
103
|
+
conflict_columns=conflict_columns,
|
|
104
|
+
rowid_available=rowid_available,
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
return {
|
|
108
|
+
"source": str(self.sqlite_path),
|
|
109
|
+
"target_engine": self.target.name,
|
|
110
|
+
"target_schema": self.target.config.schema,
|
|
111
|
+
"tables": [table.__dict__ for table in tables],
|
|
112
|
+
"total_rows": sum(table.rows for table in tables),
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
def migrate(self, *, dry_run: bool = False) -> Dict[str, Any]:
|
|
116
|
+
plan = self.plan()
|
|
117
|
+
if dry_run:
|
|
118
|
+
return {"status": "planned", **plan}
|
|
119
|
+
schema = _quote_ident(self.target.config.schema)
|
|
120
|
+
copied: Dict[str, int] = {}
|
|
121
|
+
self.target.initialize()
|
|
122
|
+
with sqlite3.connect(str(self.sqlite_path)) as src, self.target.connect() as dst:
|
|
123
|
+
src.row_factory = sqlite3.Row
|
|
124
|
+
with dst.cursor() as cur:
|
|
125
|
+
for table in plan["tables"]:
|
|
126
|
+
name = str(table["name"])
|
|
127
|
+
cols = list(table["columns"])
|
|
128
|
+
conflict_columns = list(table.get("conflict_columns") or [table["conflict_key"]])
|
|
129
|
+
rowid_available = bool(table.get("rowid_available", True))
|
|
130
|
+
pg_table = f"{schema}.{_quote_ident(name)}"
|
|
131
|
+
defs = ["__source_rowid bigint NOT NULL"] if rowid_available else []
|
|
132
|
+
for col in cols:
|
|
133
|
+
defs.append(f"{_quote_ident(col['name'])} {_pg_type(col['type'])}")
|
|
134
|
+
pk = ", ".join(_quote_ident(c) for c in conflict_columns)
|
|
135
|
+
cur.execute(
|
|
136
|
+
f"CREATE TABLE IF NOT EXISTS {pg_table} ({', '.join(defs)}, PRIMARY KEY ({pk}))"
|
|
137
|
+
)
|
|
138
|
+
if rowid_available:
|
|
139
|
+
select_sql = (
|
|
140
|
+
f"SELECT rowid AS __source_rowid, * FROM {_quote_sqlite_ident(name)} ORDER BY rowid"
|
|
141
|
+
)
|
|
142
|
+
else:
|
|
143
|
+
order_by = ", ".join(_quote_sqlite_ident(c) for c in conflict_columns)
|
|
144
|
+
select_sql = f"SELECT * FROM {_quote_sqlite_ident(name)} ORDER BY {order_by}"
|
|
145
|
+
rows = src.execute(select_sql).fetchall()
|
|
146
|
+
if not rows:
|
|
147
|
+
copied[name] = 0
|
|
148
|
+
continue
|
|
149
|
+
columns = (["__source_rowid"] if rowid_available else []) + [c["name"] for c in cols]
|
|
150
|
+
placeholders = ", ".join(["%s"] * len(columns))
|
|
151
|
+
quoted_columns = ", ".join(_quote_ident(c) for c in columns)
|
|
152
|
+
updates = ", ".join(
|
|
153
|
+
f"{_quote_ident(c)} = EXCLUDED.{_quote_ident(c)}"
|
|
154
|
+
for c in columns
|
|
155
|
+
if c not in conflict_columns
|
|
156
|
+
)
|
|
157
|
+
conflict_action = f"DO UPDATE SET {updates}" if updates else "DO NOTHING"
|
|
158
|
+
sql = (
|
|
159
|
+
f"INSERT INTO {pg_table} ({quoted_columns}) VALUES ({placeholders}) "
|
|
160
|
+
f"ON CONFLICT ({pk}) {conflict_action}"
|
|
161
|
+
)
|
|
162
|
+
cur.executemany(
|
|
163
|
+
sql,
|
|
164
|
+
[
|
|
165
|
+
tuple(_adapt_value(row[col]) for col in columns)
|
|
166
|
+
for row in rows
|
|
167
|
+
],
|
|
168
|
+
)
|
|
169
|
+
copied[name] = len(rows)
|
|
170
|
+
return {
|
|
171
|
+
"status": "migrated",
|
|
172
|
+
**plan,
|
|
173
|
+
"copied_rows": copied,
|
|
174
|
+
"total_copied_rows": sum(copied.values()),
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
def _quote_sqlite_ident(value: str) -> str:
|
|
179
|
+
return '"' + str(value).replace('"', '""') + '"'
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _rowid_available(conn: sqlite3.Connection, table: str) -> bool:
|
|
183
|
+
try:
|
|
184
|
+
conn.execute(f"SELECT rowid FROM {_quote_sqlite_ident(table)} LIMIT 1").fetchall()
|
|
185
|
+
return True
|
|
186
|
+
except sqlite3.OperationalError:
|
|
187
|
+
return False
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
__all__ = ["SQLiteToPostgresMigrator", "TablePlan"]
|