ltcai 4.3.3 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +21 -16
- package/docs/CHANGELOG.md +37 -0
- package/docs/V4_4_0_EXTRACTION_REPORT.md +239 -0
- package/lattice_brain/__init__.py +38 -23
- package/lattice_brain/_kg_common.py +11 -1
- package/lattice_brain/context.py +212 -2
- package/lattice_brain/conversations.py +234 -1
- package/lattice_brain/discovery.py +11 -1
- package/lattice_brain/documents.py +11 -1
- package/lattice_brain/graph/__init__.py +28 -0
- package/lattice_brain/graph/_kg_common.py +1123 -0
- package/lattice_brain/graph/curator.py +473 -0
- package/lattice_brain/graph/discovery.py +1455 -0
- package/lattice_brain/graph/documents.py +218 -0
- package/lattice_brain/graph/identity.py +175 -0
- package/lattice_brain/graph/ingest.py +644 -0
- package/lattice_brain/graph/network.py +205 -0
- package/lattice_brain/graph/projection.py +571 -0
- package/lattice_brain/graph/provenance.py +401 -0
- package/lattice_brain/graph/retrieval.py +1341 -0
- package/lattice_brain/graph/schema.py +640 -0
- package/lattice_brain/graph/store.py +237 -0
- package/lattice_brain/graph/write_master.py +225 -0
- package/lattice_brain/identity.py +11 -13
- package/lattice_brain/ingest.py +11 -1
- package/lattice_brain/ingestion.py +318 -0
- package/lattice_brain/memory.py +100 -1
- package/lattice_brain/network.py +11 -1
- package/lattice_brain/portability.py +431 -0
- package/lattice_brain/projection.py +11 -1
- package/lattice_brain/provenance.py +11 -1
- package/lattice_brain/retrieval.py +11 -1
- package/lattice_brain/runtime/__init__.py +32 -0
- package/lattice_brain/runtime/agent_runtime.py +569 -0
- package/lattice_brain/runtime/hooks.py +754 -0
- package/lattice_brain/runtime/multi_agent.py +795 -0
- package/lattice_brain/schema.py +11 -1
- package/lattice_brain/store.py +10 -2
- package/lattice_brain/workflow.py +461 -0
- package/lattice_brain/write_master.py +11 -1
- package/latticeai/__init__.py +1 -1
- package/latticeai/api/agents.py +2 -2
- package/latticeai/api/browser.py +1 -1
- package/latticeai/api/chat.py +1 -1
- package/latticeai/api/computer_use.py +1 -1
- package/latticeai/api/hooks.py +2 -2
- package/latticeai/api/mcp.py +1 -1
- package/latticeai/api/tools.py +1 -1
- package/latticeai/api/workflow_designer.py +2 -2
- package/latticeai/app_factory.py +4 -4
- package/latticeai/brain/__init__.py +24 -6
- package/latticeai/brain/_kg_common.py +11 -1117
- package/latticeai/brain/context.py +12 -208
- package/latticeai/brain/conversations.py +12 -231
- package/latticeai/brain/discovery.py +13 -1451
- package/latticeai/brain/documents.py +13 -214
- package/latticeai/brain/identity.py +11 -169
- package/latticeai/brain/ingest.py +13 -640
- package/latticeai/brain/memory.py +12 -97
- package/latticeai/brain/network.py +12 -200
- package/latticeai/brain/projection.py +13 -567
- package/latticeai/brain/provenance.py +13 -397
- package/latticeai/brain/retrieval.py +13 -1337
- package/latticeai/brain/schema.py +12 -635
- package/latticeai/brain/store.py +13 -233
- package/latticeai/brain/write_master.py +13 -221
- package/latticeai/core/agent.py +1 -1
- package/latticeai/core/agent_registry.py +2 -2
- package/latticeai/core/builtin_hooks.py +2 -2
- package/latticeai/core/graph_curator.py +6 -468
- package/latticeai/core/hooks.py +6 -749
- package/latticeai/core/marketplace.py +1 -1
- package/latticeai/core/multi_agent.py +6 -790
- package/latticeai/core/workflow_engine.py +6 -456
- package/latticeai/core/workspace_os.py +1 -1
- package/latticeai/services/agent_runtime.py +6 -564
- package/latticeai/services/ingestion.py +6 -313
- package/latticeai/services/kg_portability.py +6 -426
- package/latticeai/services/platform_runtime.py +3 -3
- package/latticeai/services/run_executor.py +1 -1
- package/latticeai/services/upload_service.py +1 -1
- package/p_reinforce.py +1 -1
- package/package.json +1 -1
- package/scripts/bump_version.py +1 -1
- package/scripts/wheel_smoke.py +7 -0
- package/src-tauri/Cargo.lock +1 -1
- package/src-tauri/Cargo.toml +1 -1
- package/src-tauri/tauri.conf.json +1 -1
- package/static/app/asset-manifest.json +1 -1
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
# ruff: noqa: F403,F405
|
|
4
|
+
|
|
5
|
+
from ._kg_common import * # noqa: F403,F401
|
|
6
|
+
from .documents import KnowledgeGraphDocumentsMixin
|
|
7
|
+
from .discovery import KnowledgeGraphDiscoveryMixin
|
|
8
|
+
from .ingest import KnowledgeGraphIngestMixin
|
|
9
|
+
from .projection import KnowledgeGraphProjectionMixin
|
|
10
|
+
from .provenance import KnowledgeGraphProvenanceMixin
|
|
11
|
+
from .retrieval import KnowledgeGraphRetrievalMixin
|
|
12
|
+
from .write_master import KnowledgeGraphWriteMixin
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class KnowledgeGraphStore(
|
|
16
|
+
KnowledgeGraphProjectionMixin,
|
|
17
|
+
KnowledgeGraphWriteMixin,
|
|
18
|
+
KnowledgeGraphDiscoveryMixin,
|
|
19
|
+
KnowledgeGraphIngestMixin,
|
|
20
|
+
KnowledgeGraphProvenanceMixin,
|
|
21
|
+
KnowledgeGraphDocumentsMixin,
|
|
22
|
+
KnowledgeGraphRetrievalMixin,
|
|
23
|
+
):
|
|
24
|
+
def __init__(
|
|
25
|
+
self,
|
|
26
|
+
db_path: Path,
|
|
27
|
+
blob_dir: Path,
|
|
28
|
+
embedder: Any = None,
|
|
29
|
+
storage_engine: Any = None,
|
|
30
|
+
):
|
|
31
|
+
self.db_path = Path(db_path)
|
|
32
|
+
self.blob_dir = Path(blob_dir)
|
|
33
|
+
self.db_path.parent.mkdir(parents=True, exist_ok=True)
|
|
34
|
+
self.blob_dir.mkdir(parents=True, exist_ok=True)
|
|
35
|
+
if storage_engine is None:
|
|
36
|
+
from ..storage import SQLiteEngine
|
|
37
|
+
|
|
38
|
+
storage_engine = SQLiteEngine(self.db_path)
|
|
39
|
+
storage_caps = storage_engine.capabilities()
|
|
40
|
+
if not storage_caps.available:
|
|
41
|
+
raise RuntimeError(storage_caps.reason or "Brain storage is unavailable.")
|
|
42
|
+
if storage_caps.engine != "sqlite":
|
|
43
|
+
raise RuntimeError(
|
|
44
|
+
"KnowledgeGraphStore currently requires SQLiteEngine. "
|
|
45
|
+
"Explicit non-SQLite storage must use the migration/scale tooling; "
|
|
46
|
+
"no SQLite fallback is attempted."
|
|
47
|
+
)
|
|
48
|
+
self.storage_engine = storage_engine
|
|
49
|
+
# The embedder is swappable behind a fixed interface
|
|
50
|
+
# (model_id/dim/embed/encode/decode/similarity). Defaults to the
|
|
51
|
+
# deterministic, offline hash model so the store works with no config;
|
|
52
|
+
# server_app injects a provider-backed embedder from Config.
|
|
53
|
+
self._embedding_model = (
|
|
54
|
+
embedder if embedder is not None else LocalEmbeddingModel()
|
|
55
|
+
)
|
|
56
|
+
self._v2_projection_available = False
|
|
57
|
+
self._init_db()
|
|
58
|
+
# Read graph queries from the v2 projection (kgv2_* views) when available.
|
|
59
|
+
# Toggle off (e.g. in tests) to compare against the legacy tables.
|
|
60
|
+
self._read_from_v2 = (
|
|
61
|
+
KGStoreV2 is not None
|
|
62
|
+
and _READ_FROM_V2_DEFAULT
|
|
63
|
+
and self._v2_projection_available
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
def _read_tables(self) -> tuple:
|
|
67
|
+
"""Return (nodes_table, edges_table) for read queries.
|
|
68
|
+
|
|
69
|
+
Same read code runs against the legacy tables or the v2 reconstruction
|
|
70
|
+
views, so the two paths are equivalent by construction.
|
|
71
|
+
"""
|
|
72
|
+
if self._read_from_v2:
|
|
73
|
+
return ("kgv2_nodes", "kgv2_edges")
|
|
74
|
+
return ("nodes", "edges")
|
|
75
|
+
|
|
76
|
+
def _connect(self) -> sqlite3.Connection:
|
|
77
|
+
return self.storage_engine.connect()
|
|
78
|
+
|
|
79
|
+
def _init_db(self) -> None:
|
|
80
|
+
with self._connect() as conn:
|
|
81
|
+
db_format = int(conn.execute("PRAGMA user_version").fetchone()[0] or 0)
|
|
82
|
+
if db_format > _KG_DB_FORMAT_VERSION:
|
|
83
|
+
raise RuntimeError(
|
|
84
|
+
f"Knowledge Graph DB format {db_format} is newer than this build "
|
|
85
|
+
f"({_KG_DB_FORMAT_VERSION}); restore a pre-upgrade backup or upgrade Lattice AI."
|
|
86
|
+
)
|
|
87
|
+
conn.executescript(
|
|
88
|
+
"""
|
|
89
|
+
CREATE TABLE IF NOT EXISTS graph_meta (
|
|
90
|
+
key TEXT PRIMARY KEY,
|
|
91
|
+
value TEXT NOT NULL
|
|
92
|
+
);
|
|
93
|
+
CREATE TABLE IF NOT EXISTS nodes (
|
|
94
|
+
id TEXT PRIMARY KEY,
|
|
95
|
+
type TEXT NOT NULL,
|
|
96
|
+
title TEXT NOT NULL,
|
|
97
|
+
summary TEXT,
|
|
98
|
+
metadata_json TEXT NOT NULL CHECK (json_valid(metadata_json)),
|
|
99
|
+
raw_json TEXT NOT NULL CHECK (json_valid(raw_json)),
|
|
100
|
+
created_at TEXT NOT NULL,
|
|
101
|
+
updated_at TEXT NOT NULL
|
|
102
|
+
);
|
|
103
|
+
CREATE TABLE IF NOT EXISTS edges (
|
|
104
|
+
id TEXT PRIMARY KEY,
|
|
105
|
+
from_node TEXT NOT NULL,
|
|
106
|
+
to_node TEXT NOT NULL,
|
|
107
|
+
type TEXT NOT NULL,
|
|
108
|
+
weight REAL NOT NULL DEFAULT 1.0,
|
|
109
|
+
metadata_json TEXT NOT NULL CHECK (json_valid(metadata_json)),
|
|
110
|
+
created_at TEXT NOT NULL,
|
|
111
|
+
UNIQUE(from_node, to_node, type),
|
|
112
|
+
FOREIGN KEY(from_node) REFERENCES nodes(id) ON DELETE CASCADE,
|
|
113
|
+
FOREIGN KEY(to_node) REFERENCES nodes(id) ON DELETE CASCADE
|
|
114
|
+
);
|
|
115
|
+
CREATE TABLE IF NOT EXISTS chunks (
|
|
116
|
+
id TEXT PRIMARY KEY,
|
|
117
|
+
source_node TEXT NOT NULL,
|
|
118
|
+
text TEXT NOT NULL,
|
|
119
|
+
metadata_json TEXT NOT NULL CHECK (json_valid(metadata_json)),
|
|
120
|
+
created_at TEXT NOT NULL,
|
|
121
|
+
FOREIGN KEY(source_node) REFERENCES nodes(id) ON DELETE CASCADE
|
|
122
|
+
);
|
|
123
|
+
CREATE TABLE IF NOT EXISTS knowledge_sources (
|
|
124
|
+
id TEXT PRIMARY KEY,
|
|
125
|
+
root_path TEXT NOT NULL UNIQUE,
|
|
126
|
+
os_type TEXT NOT NULL,
|
|
127
|
+
drive_id TEXT,
|
|
128
|
+
label TEXT,
|
|
129
|
+
status TEXT NOT NULL,
|
|
130
|
+
include_ocr INTEGER NOT NULL DEFAULT 0,
|
|
131
|
+
watch_enabled INTEGER NOT NULL DEFAULT 0,
|
|
132
|
+
consent_json TEXT NOT NULL CHECK (json_valid(consent_json)),
|
|
133
|
+
created_at TEXT NOT NULL,
|
|
134
|
+
updated_at TEXT NOT NULL,
|
|
135
|
+
last_scanned_at TEXT
|
|
136
|
+
);
|
|
137
|
+
CREATE TABLE IF NOT EXISTS local_file_index (
|
|
138
|
+
id TEXT PRIMARY KEY,
|
|
139
|
+
source_id TEXT NOT NULL,
|
|
140
|
+
os_type TEXT NOT NULL,
|
|
141
|
+
drive_id TEXT,
|
|
142
|
+
root_path TEXT NOT NULL,
|
|
143
|
+
file_path TEXT NOT NULL,
|
|
144
|
+
relative_path TEXT NOT NULL,
|
|
145
|
+
file_name TEXT NOT NULL,
|
|
146
|
+
extension TEXT NOT NULL,
|
|
147
|
+
size_bytes INTEGER,
|
|
148
|
+
modified_at TEXT,
|
|
149
|
+
sha256 TEXT,
|
|
150
|
+
last_scanned_at TEXT,
|
|
151
|
+
last_indexed_at TEXT,
|
|
152
|
+
parser_type TEXT,
|
|
153
|
+
status TEXT NOT NULL,
|
|
154
|
+
error_message TEXT,
|
|
155
|
+
graph_node_id TEXT,
|
|
156
|
+
deleted INTEGER NOT NULL DEFAULT 0,
|
|
157
|
+
metadata_json TEXT NOT NULL CHECK (json_valid(metadata_json)),
|
|
158
|
+
UNIQUE(source_id, relative_path),
|
|
159
|
+
FOREIGN KEY(source_id) REFERENCES knowledge_sources(id) ON DELETE CASCADE
|
|
160
|
+
);
|
|
161
|
+
CREATE TABLE IF NOT EXISTS vector_embeddings (
|
|
162
|
+
item_id TEXT PRIMARY KEY,
|
|
163
|
+
item_type TEXT NOT NULL,
|
|
164
|
+
source_node TEXT NOT NULL,
|
|
165
|
+
text_hash TEXT NOT NULL,
|
|
166
|
+
embedding BLOB NOT NULL,
|
|
167
|
+
embedding_dim INTEGER NOT NULL,
|
|
168
|
+
embedding_model TEXT NOT NULL,
|
|
169
|
+
metadata_json TEXT NOT NULL CHECK (json_valid(metadata_json)),
|
|
170
|
+
indexed_at TEXT NOT NULL,
|
|
171
|
+
FOREIGN KEY(source_node) REFERENCES nodes(id) ON DELETE CASCADE
|
|
172
|
+
);
|
|
173
|
+
CREATE TABLE IF NOT EXISTS vector_index_operations (
|
|
174
|
+
id TEXT PRIMARY KEY,
|
|
175
|
+
operation TEXT NOT NULL,
|
|
176
|
+
status TEXT NOT NULL,
|
|
177
|
+
requested_at TEXT NOT NULL,
|
|
178
|
+
started_at TEXT,
|
|
179
|
+
completed_at TEXT,
|
|
180
|
+
items_total INTEGER NOT NULL DEFAULT 0,
|
|
181
|
+
items_indexed INTEGER NOT NULL DEFAULT 0,
|
|
182
|
+
items_skipped INTEGER NOT NULL DEFAULT 0,
|
|
183
|
+
error_message TEXT,
|
|
184
|
+
metadata_json TEXT NOT NULL CHECK (json_valid(metadata_json))
|
|
185
|
+
);
|
|
186
|
+
-- v3.6.0 Knowledge Graph First: per-ingestion provenance trail.
|
|
187
|
+
-- Append-only audit of where every graph node came from, when it
|
|
188
|
+
-- was captured, how it was processed, and whether it was embedded /
|
|
189
|
+
-- linked / used by an agent. get_provenance() returns the latest row.
|
|
190
|
+
CREATE TABLE IF NOT EXISTS ingestion_provenance (
|
|
191
|
+
id TEXT PRIMARY KEY,
|
|
192
|
+
node_id TEXT NOT NULL,
|
|
193
|
+
source_type TEXT NOT NULL,
|
|
194
|
+
source_uri TEXT,
|
|
195
|
+
content_hash TEXT,
|
|
196
|
+
title TEXT,
|
|
197
|
+
pipeline TEXT NOT NULL,
|
|
198
|
+
owner TEXT,
|
|
199
|
+
workspace_id TEXT,
|
|
200
|
+
captured_at TEXT,
|
|
201
|
+
modified_at TEXT,
|
|
202
|
+
embedded INTEGER NOT NULL DEFAULT 0,
|
|
203
|
+
linked INTEGER NOT NULL DEFAULT 0,
|
|
204
|
+
duplicate INTEGER NOT NULL DEFAULT 0,
|
|
205
|
+
agent_used TEXT,
|
|
206
|
+
chunk_count INTEGER NOT NULL DEFAULT 0,
|
|
207
|
+
permissions_json TEXT NOT NULL DEFAULT '{}' CHECK (json_valid(permissions_json)),
|
|
208
|
+
metadata_json TEXT NOT NULL DEFAULT '{}' CHECK (json_valid(metadata_json)),
|
|
209
|
+
created_at TEXT NOT NULL
|
|
210
|
+
);
|
|
211
|
+
CREATE INDEX IF NOT EXISTS idx_nodes_type ON nodes(type);
|
|
212
|
+
CREATE INDEX IF NOT EXISTS idx_edges_from ON edges(from_node);
|
|
213
|
+
CREATE INDEX IF NOT EXISTS idx_edges_to ON edges(to_node);
|
|
214
|
+
CREATE INDEX IF NOT EXISTS idx_chunks_source ON chunks(source_node);
|
|
215
|
+
CREATE INDEX IF NOT EXISTS idx_knowledge_sources_root ON knowledge_sources(root_path);
|
|
216
|
+
CREATE INDEX IF NOT EXISTS idx_local_file_index_source ON local_file_index(source_id);
|
|
217
|
+
CREATE INDEX IF NOT EXISTS idx_local_file_index_status ON local_file_index(status);
|
|
218
|
+
CREATE INDEX IF NOT EXISTS idx_local_file_index_graph_node ON local_file_index(graph_node_id);
|
|
219
|
+
CREATE INDEX IF NOT EXISTS idx_vector_embeddings_type ON vector_embeddings(item_type);
|
|
220
|
+
CREATE INDEX IF NOT EXISTS idx_vector_embeddings_source ON vector_embeddings(source_node);
|
|
221
|
+
CREATE INDEX IF NOT EXISTS idx_vector_embeddings_model ON vector_embeddings(embedding_model);
|
|
222
|
+
CREATE INDEX IF NOT EXISTS idx_vector_index_operations_requested ON vector_index_operations(requested_at);
|
|
223
|
+
CREATE INDEX IF NOT EXISTS idx_provenance_node ON ingestion_provenance(node_id);
|
|
224
|
+
CREATE INDEX IF NOT EXISTS idx_provenance_source_type ON ingestion_provenance(source_type);
|
|
225
|
+
CREATE INDEX IF NOT EXISTS idx_provenance_hash ON ingestion_provenance(content_hash);
|
|
226
|
+
CREATE INDEX IF NOT EXISTS idx_provenance_created ON ingestion_provenance(created_at);
|
|
227
|
+
"""
|
|
228
|
+
)
|
|
229
|
+
conn.execute(
|
|
230
|
+
"INSERT OR REPLACE INTO graph_meta(key, value) VALUES (?, ?)",
|
|
231
|
+
("schema_version", str(GRAPH_SCHEMA_VERSION)),
|
|
232
|
+
)
|
|
233
|
+
self._init_v2_schema()
|
|
234
|
+
self._init_fts()
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
__all__ = ["KnowledgeGraphStore"]
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
# ruff: noqa: F403,F405
|
|
4
|
+
|
|
5
|
+
from ._kg_common import * # noqa: F403,F401
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class KnowledgeGraphWriteMixin:
|
|
9
|
+
def _upsert_node(
|
|
10
|
+
self,
|
|
11
|
+
conn: sqlite3.Connection,
|
|
12
|
+
node_id: str,
|
|
13
|
+
node_type: str,
|
|
14
|
+
title: str,
|
|
15
|
+
summary: str = "",
|
|
16
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
17
|
+
raw: Optional[Dict[str, Any]] = None,
|
|
18
|
+
owner: Optional[str] = None,
|
|
19
|
+
workspace_id: Optional[str] = None,
|
|
20
|
+
visibility: Optional[str] = None,
|
|
21
|
+
) -> str:
|
|
22
|
+
now = _now()
|
|
23
|
+
# v4 write-mastering: nodes_v2 is authoritative; the legacy nodes
|
|
24
|
+
# table is maintained as the compatibility projection.
|
|
25
|
+
title_s = title[:240]
|
|
26
|
+
summary_s = summary[:1000]
|
|
27
|
+
meta_json = _json(metadata)
|
|
28
|
+
self._v2_project_node(
|
|
29
|
+
conn,
|
|
30
|
+
node_id,
|
|
31
|
+
node_type,
|
|
32
|
+
title_s,
|
|
33
|
+
summary_s,
|
|
34
|
+
meta_json,
|
|
35
|
+
created_at=now,
|
|
36
|
+
updated_at=now,
|
|
37
|
+
owner=owner,
|
|
38
|
+
workspace_id=workspace_id,
|
|
39
|
+
visibility=visibility,
|
|
40
|
+
strict=True,
|
|
41
|
+
)
|
|
42
|
+
conn.execute(
|
|
43
|
+
"""
|
|
44
|
+
INSERT INTO nodes(id, type, title, summary, metadata_json, raw_json, created_at, updated_at)
|
|
45
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
|
|
46
|
+
ON CONFLICT(id) DO UPDATE SET
|
|
47
|
+
title=excluded.title,
|
|
48
|
+
summary=excluded.summary,
|
|
49
|
+
metadata_json=excluded.metadata_json,
|
|
50
|
+
raw_json=excluded.raw_json,
|
|
51
|
+
updated_at=excluded.updated_at
|
|
52
|
+
""",
|
|
53
|
+
(node_id, node_type, title_s, summary_s, meta_json, _json(raw), now, now),
|
|
54
|
+
)
|
|
55
|
+
if node_type != "Chunk":
|
|
56
|
+
self._upsert_vector_item(
|
|
57
|
+
conn,
|
|
58
|
+
item_id=node_id,
|
|
59
|
+
item_type="node",
|
|
60
|
+
source_node=node_id,
|
|
61
|
+
text=self._vector_text_for_node(
|
|
62
|
+
title=title_s, summary=summary_s, metadata=metadata
|
|
63
|
+
),
|
|
64
|
+
metadata={"node_type": node_type, **(metadata or {})},
|
|
65
|
+
)
|
|
66
|
+
return node_id
|
|
67
|
+
|
|
68
|
+
def _upsert_edge(
|
|
69
|
+
self,
|
|
70
|
+
conn: sqlite3.Connection,
|
|
71
|
+
from_node: str,
|
|
72
|
+
to_node: str,
|
|
73
|
+
edge_type: str,
|
|
74
|
+
weight: float = 1.0,
|
|
75
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
76
|
+
) -> str:
|
|
77
|
+
# v4 write door: every new edge stores the canonical EdgeType value —
|
|
78
|
+
# free-string types (e.g. '포함함', '언급함') are normalized here, so no
|
|
79
|
+
# caller can mint new legacy taxonomy. The original label survives in
|
|
80
|
+
# metadata.legacy_label for traceability.
|
|
81
|
+
if EdgeType is not None:
|
|
82
|
+
canonical = EdgeType.from_legacy(edge_type).value
|
|
83
|
+
if canonical != edge_type:
|
|
84
|
+
metadata = dict(metadata or {})
|
|
85
|
+
metadata.setdefault("legacy_label", edge_type)
|
|
86
|
+
edge_type = canonical
|
|
87
|
+
edge_id = f"edge:{_sha256_text(f'{from_node}|{edge_type}|{to_node}')[:24]}"
|
|
88
|
+
now = _now()
|
|
89
|
+
meta_json = _json(metadata) # canonical string shared with the projection
|
|
90
|
+
self._v2_project_edge(
|
|
91
|
+
conn,
|
|
92
|
+
from_node,
|
|
93
|
+
to_node,
|
|
94
|
+
edge_type,
|
|
95
|
+
float(weight),
|
|
96
|
+
meta_json,
|
|
97
|
+
edge_id=edge_id,
|
|
98
|
+
created_at=now,
|
|
99
|
+
strict=True,
|
|
100
|
+
)
|
|
101
|
+
conn.execute(
|
|
102
|
+
"""
|
|
103
|
+
INSERT INTO edges(id, from_node, to_node, type, weight, metadata_json, created_at)
|
|
104
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
105
|
+
ON CONFLICT(from_node, to_node, type) DO UPDATE SET
|
|
106
|
+
weight=max(edges.weight, excluded.weight),
|
|
107
|
+
metadata_json=excluded.metadata_json
|
|
108
|
+
""",
|
|
109
|
+
(edge_id, from_node, to_node, edge_type, float(weight), meta_json, now),
|
|
110
|
+
)
|
|
111
|
+
return edge_id
|
|
112
|
+
|
|
113
|
+
def _vector_text_for_node(
|
|
114
|
+
self,
|
|
115
|
+
*,
|
|
116
|
+
title: str,
|
|
117
|
+
summary: str = "",
|
|
118
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
119
|
+
) -> str:
|
|
120
|
+
metadata = metadata or {}
|
|
121
|
+
meta_parts = []
|
|
122
|
+
for key in (
|
|
123
|
+
"filename",
|
|
124
|
+
"relative_path",
|
|
125
|
+
"file_path",
|
|
126
|
+
"conversation_id",
|
|
127
|
+
"source",
|
|
128
|
+
"category",
|
|
129
|
+
"ext",
|
|
130
|
+
"role",
|
|
131
|
+
):
|
|
132
|
+
value = metadata.get(key)
|
|
133
|
+
if value:
|
|
134
|
+
meta_parts.append(str(value))
|
|
135
|
+
return _clean_text(
|
|
136
|
+
"\n".join([str(title or ""), str(summary or ""), " ".join(meta_parts)])
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
def _upsert_vector_item(
|
|
140
|
+
self,
|
|
141
|
+
conn: sqlite3.Connection,
|
|
142
|
+
*,
|
|
143
|
+
item_id: str,
|
|
144
|
+
item_type: str,
|
|
145
|
+
source_node: str,
|
|
146
|
+
text: str,
|
|
147
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
148
|
+
) -> bool:
|
|
149
|
+
text = _clean_text(text)
|
|
150
|
+
if len(text) < 2:
|
|
151
|
+
conn.execute("DELETE FROM vector_embeddings WHERE item_id=?", (item_id,))
|
|
152
|
+
return False
|
|
153
|
+
text_hash = _sha256_text(text)
|
|
154
|
+
existing = conn.execute(
|
|
155
|
+
"""
|
|
156
|
+
SELECT text_hash, embedding_dim, embedding_model
|
|
157
|
+
FROM vector_embeddings
|
|
158
|
+
WHERE item_id=?
|
|
159
|
+
""",
|
|
160
|
+
(item_id,),
|
|
161
|
+
).fetchone()
|
|
162
|
+
if (
|
|
163
|
+
existing
|
|
164
|
+
and existing["text_hash"] == text_hash
|
|
165
|
+
and existing["embedding_dim"] == self._embedding_model.dim
|
|
166
|
+
and existing["embedding_model"] == self._embedding_model.model_id
|
|
167
|
+
):
|
|
168
|
+
return False
|
|
169
|
+
embedding = self._embedding_model.encode(
|
|
170
|
+
self._embedding_model.embed(text[:50_000])
|
|
171
|
+
)
|
|
172
|
+
conn.execute(
|
|
173
|
+
"""
|
|
174
|
+
INSERT INTO vector_embeddings(
|
|
175
|
+
item_id, item_type, source_node, text_hash, embedding,
|
|
176
|
+
embedding_dim, embedding_model, metadata_json, indexed_at
|
|
177
|
+
)
|
|
178
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
179
|
+
ON CONFLICT(item_id) DO UPDATE SET
|
|
180
|
+
item_type=excluded.item_type,
|
|
181
|
+
source_node=excluded.source_node,
|
|
182
|
+
text_hash=excluded.text_hash,
|
|
183
|
+
embedding=excluded.embedding,
|
|
184
|
+
embedding_dim=excluded.embedding_dim,
|
|
185
|
+
embedding_model=excluded.embedding_model,
|
|
186
|
+
metadata_json=excluded.metadata_json,
|
|
187
|
+
indexed_at=excluded.indexed_at
|
|
188
|
+
""",
|
|
189
|
+
(
|
|
190
|
+
item_id,
|
|
191
|
+
item_type,
|
|
192
|
+
source_node,
|
|
193
|
+
text_hash,
|
|
194
|
+
embedding,
|
|
195
|
+
self._embedding_model.dim,
|
|
196
|
+
self._embedding_model.model_id,
|
|
197
|
+
_json(metadata),
|
|
198
|
+
_now(),
|
|
199
|
+
),
|
|
200
|
+
)
|
|
201
|
+
return True
|
|
202
|
+
|
|
203
|
+
def _upsert_chunk(
|
|
204
|
+
self,
|
|
205
|
+
conn: sqlite3.Connection,
|
|
206
|
+
*,
|
|
207
|
+
chunk_id: str,
|
|
208
|
+
source_node: str,
|
|
209
|
+
text: str,
|
|
210
|
+
metadata: Optional[Dict[str, Any]] = None,
|
|
211
|
+
) -> None:
|
|
212
|
+
metadata = metadata or {}
|
|
213
|
+
conn.execute(
|
|
214
|
+
"INSERT OR REPLACE INTO chunks(id, source_node, text, metadata_json, created_at) "
|
|
215
|
+
"VALUES (?, ?, ?, ?, ?)",
|
|
216
|
+
(chunk_id, source_node, text, _json(metadata), _now()),
|
|
217
|
+
)
|
|
218
|
+
self._upsert_vector_item(
|
|
219
|
+
conn,
|
|
220
|
+
item_id=chunk_id,
|
|
221
|
+
item_type="chunk",
|
|
222
|
+
source_node=chunk_id,
|
|
223
|
+
text=text,
|
|
224
|
+
metadata={**metadata, "parent_source_node": source_node},
|
|
225
|
+
)
|
|
@@ -1,13 +1,11 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
"verify_signature",
|
|
13
|
-
]
|
|
1
|
+
"""Compatibility shim: implementation moved to lattice_brain.graph.identity.
|
|
2
|
+
|
|
3
|
+
This module aliases itself to the physical module so identity, singletons,
|
|
4
|
+
and monkeypatching behave as if the old flat path were the real module.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
from .graph import identity as _impl
|
|
10
|
+
|
|
11
|
+
sys.modules[__name__] = _impl
|
package/lattice_brain/ingest.py
CHANGED
|
@@ -1 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
"""Compatibility shim: implementation moved to lattice_brain.graph.ingest.
|
|
2
|
+
|
|
3
|
+
This module aliases itself to the physical module so identity, singletons,
|
|
4
|
+
and monkeypatching behave as if the old flat path were the real module.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import sys
|
|
8
|
+
|
|
9
|
+
from .graph import ingest as _impl
|
|
10
|
+
|
|
11
|
+
sys.modules[__name__] = _impl
|