@geravant/sinain 1.18.3 → 1.20.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.js +10 -0
- package/onboard.js +32 -6
- package/package.json +1 -1
- package/sinain-core/package-lock.json +439 -0
- package/sinain-core/package.json +2 -0
- package/sinain-core/src/index.ts +283 -0
- package/sinain-core/src/server.ts +1001 -4
- package/sinain-core/src/web-db/schema.ts +100 -0
- package/sinain-core/src/web-db/store.ts +279 -0
- package/sinain-memory/concept_export.py +310 -0
- package/sinain-memory/concept_import.py +254 -0
- package/sinain-memory/graph_query.py +455 -0
- package/sinain-memory/page_renderer.py +447 -0
- package/sinain-memory/retract.py +236 -0
|
@@ -0,0 +1,254 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Concept Import — replay a sinain-concept/v1 bundle into the local triplestore.
|
|
3
|
+
|
|
4
|
+
Designed for idempotency: re-importing the same bundle in `merge` mode is a
|
|
5
|
+
no-op (existing triples skip-as-duplicate). The receiver re-issues tx_ids
|
|
6
|
+
locally but preserves the source-tx grouping, so the digest atomicity that
|
|
7
|
+
knowledge_integrator.py relies on survives the round-trip.
|
|
8
|
+
|
|
9
|
+
Usage:
|
|
10
|
+
python3 concept_import.py --db <kg.db> --bundle <bundle.json> \\
|
|
11
|
+
[--web-db <web.db>] [--conflict skip|merge|overwrite]
|
|
12
|
+
"""
|
|
13
|
+
from __future__ import annotations
|
|
14
|
+
|
|
15
|
+
import argparse
|
|
16
|
+
import hashlib
|
|
17
|
+
import json
|
|
18
|
+
import sqlite3
|
|
19
|
+
import sys
|
|
20
|
+
import time
|
|
21
|
+
from pathlib import Path
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def verify_envelope(envelope: dict) -> tuple[bool, str]:
|
|
25
|
+
"""Validate format and checksum. Returns (ok, error_msg)."""
|
|
26
|
+
fmt = envelope.get("format")
|
|
27
|
+
if fmt != "sinain-concept/v1":
|
|
28
|
+
return False, f"unsupported format: {fmt!r} (need sinain-concept/v1)"
|
|
29
|
+
if "root_entity" not in envelope or "entities" not in envelope:
|
|
30
|
+
return False, "envelope missing root_entity or entities"
|
|
31
|
+
expected = envelope.get("checksum")
|
|
32
|
+
if expected:
|
|
33
|
+
canonical = json.dumps(
|
|
34
|
+
{"root_entity": envelope["root_entity"], "entities": envelope["entities"]},
|
|
35
|
+
sort_keys=True, ensure_ascii=False, separators=(",", ":"),
|
|
36
|
+
)
|
|
37
|
+
actual = "sha256:" + hashlib.sha256(canonical.encode("utf-8")).hexdigest()
|
|
38
|
+
if actual != expected:
|
|
39
|
+
return False, f"checksum mismatch (expected {expected[:23]}..., got {actual[:23]}...)"
|
|
40
|
+
return True, ""
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def bundle_sha(envelope: dict) -> str:
|
|
44
|
+
"""Compute envelope-level sha for idempotency tracking (web.db.concept_imports)."""
|
|
45
|
+
body = json.dumps(envelope, sort_keys=True, ensure_ascii=False).encode("utf-8")
|
|
46
|
+
return hashlib.sha256(body).hexdigest()
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def import_bundle(db_path: str, envelope: dict, conflict: str = "merge",
|
|
50
|
+
web_db_path: str | None = None) -> dict:
|
|
51
|
+
"""Replay envelope into the local knowledge graph.
|
|
52
|
+
|
|
53
|
+
conflict modes:
|
|
54
|
+
- skip: existing (entity, attribute, value) wins; imported dropped.
|
|
55
|
+
- merge: duplicate triples skipped; new (attribute, value) inserted as new.
|
|
56
|
+
- overwrite: retract conflicting active local triples, then assert imported.
|
|
57
|
+
"""
|
|
58
|
+
from triplestore import TripleStore
|
|
59
|
+
|
|
60
|
+
store = TripleStore(db_path)
|
|
61
|
+
|
|
62
|
+
# Group imported triples by their source_tx so we preserve atomicity.
|
|
63
|
+
# source_tx_id → list of (entity_id, attribute, value, value_type, retracted, valid_to, original_created_at)
|
|
64
|
+
by_source_tx: dict[int, list[tuple]] = {}
|
|
65
|
+
for ent in envelope.get("entities", []):
|
|
66
|
+
eid = ent.get("id")
|
|
67
|
+
for t in ent.get("triples", []):
|
|
68
|
+
stx = int(t.get("tx_id") or 0)
|
|
69
|
+
by_source_tx.setdefault(stx, []).append((
|
|
70
|
+
eid,
|
|
71
|
+
t["attribute"],
|
|
72
|
+
t["value"],
|
|
73
|
+
t.get("value_type", "string"),
|
|
74
|
+
int(t.get("retracted", 0)),
|
|
75
|
+
t.get("valid_to"),
|
|
76
|
+
t.get("created_at"),
|
|
77
|
+
))
|
|
78
|
+
|
|
79
|
+
inserted = 0
|
|
80
|
+
skipped_dup = 0
|
|
81
|
+
overwritten = 0
|
|
82
|
+
tx_mapping: dict[int, int] = {} # source_tx → new_tx
|
|
83
|
+
|
|
84
|
+
for source_tx in sorted(by_source_tx.keys()):
|
|
85
|
+
triples = by_source_tx[source_tx]
|
|
86
|
+
|
|
87
|
+
# Begin one local tx per source tx (preserves grouping)
|
|
88
|
+
new_tx = store.begin_tx(
|
|
89
|
+
source="concept-import",
|
|
90
|
+
metadata={"source_tx": source_tx, "bundle_root": envelope.get("root_entity")},
|
|
91
|
+
)
|
|
92
|
+
tx_mapping[source_tx] = new_tx
|
|
93
|
+
|
|
94
|
+
for (eid, attr, value, value_type, retracted, valid_to, created_at) in triples:
|
|
95
|
+
# Imported retracted triples → preserve retracted state. (They're
|
|
96
|
+
# part of the bundle's audit trail.)
|
|
97
|
+
if retracted:
|
|
98
|
+
# Insert as retracted; valid_to stays as-is.
|
|
99
|
+
# We use a direct INSERT to preserve the retracted flag — assert_triple
|
|
100
|
+
# always sets retracted=0.
|
|
101
|
+
store._conn.execute(
|
|
102
|
+
"""INSERT INTO triples
|
|
103
|
+
(tx_id, entity_id, attribute, value, value_type, retracted, retracted_tx, valid_to, created_at)
|
|
104
|
+
VALUES (?, ?, ?, ?, ?, 1, ?, ?, ?)""",
|
|
105
|
+
(new_tx, eid, attr, value, value_type, new_tx, valid_to, created_at or _iso_now()),
|
|
106
|
+
)
|
|
107
|
+
store._conn.execute(
|
|
108
|
+
"INSERT OR IGNORE INTO entity_types (entity_id, entity_type) VALUES (?, ?)",
|
|
109
|
+
(eid, eid.split(":", 1)[0] if ":" in eid else "unknown"),
|
|
110
|
+
)
|
|
111
|
+
inserted += 1
|
|
112
|
+
continue
|
|
113
|
+
|
|
114
|
+
# Active triple — apply conflict mode for (entity, attribute, value)
|
|
115
|
+
existing = store._conn.execute(
|
|
116
|
+
"""SELECT id FROM triples WHERE entity_id = ? AND attribute = ?
|
|
117
|
+
AND value = ? AND retracted = 0 LIMIT 1""",
|
|
118
|
+
(eid, attr, value),
|
|
119
|
+
).fetchone()
|
|
120
|
+
|
|
121
|
+
if existing:
|
|
122
|
+
# Exact triple already present
|
|
123
|
+
if conflict == "skip" or conflict == "merge":
|
|
124
|
+
skipped_dup += 1
|
|
125
|
+
continue
|
|
126
|
+
if conflict == "overwrite":
|
|
127
|
+
# Retract conflicting then insert imported
|
|
128
|
+
store.retract_triple(new_tx, eid, attr, value)
|
|
129
|
+
overwritten += 1
|
|
130
|
+
|
|
131
|
+
# Insert
|
|
132
|
+
store.assert_triple(new_tx, eid, attr, value, value_type=value_type)
|
|
133
|
+
# Patch created_at to preserve source timeline (assert_triple uses now()).
|
|
134
|
+
if created_at:
|
|
135
|
+
store._conn.execute(
|
|
136
|
+
"""UPDATE triples SET created_at = ?
|
|
137
|
+
WHERE tx_id = ? AND entity_id = ? AND attribute = ? AND value = ?
|
|
138
|
+
AND id = (SELECT MAX(id) FROM triples WHERE tx_id = ? AND entity_id = ? AND attribute = ? AND value = ?)""",
|
|
139
|
+
(created_at, new_tx, eid, attr, value, new_tx, eid, attr, value),
|
|
140
|
+
)
|
|
141
|
+
inserted += 1
|
|
142
|
+
|
|
143
|
+
store._conn.commit()
|
|
144
|
+
|
|
145
|
+
store.close()
|
|
146
|
+
|
|
147
|
+
# Page cache reuse
|
|
148
|
+
page_cached = False
|
|
149
|
+
if web_db_path and envelope.get("rendered_page"):
|
|
150
|
+
page = envelope["rendered_page"]
|
|
151
|
+
# Map source tx_watermark to local — find max new_tx for facts in the bundle
|
|
152
|
+
local_watermark = max(tx_mapping.values()) if tx_mapping else 0
|
|
153
|
+
page["tx_watermark"] = local_watermark
|
|
154
|
+
try:
|
|
155
|
+
conn = sqlite3.connect(web_db_path)
|
|
156
|
+
conn.execute(
|
|
157
|
+
"""INSERT OR REPLACE INTO page_cache
|
|
158
|
+
(entity_id, tx_watermark, page_json, generated_at, tokens_in, tokens_out, cost_usd)
|
|
159
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)""",
|
|
160
|
+
(
|
|
161
|
+
envelope["root_entity"],
|
|
162
|
+
local_watermark,
|
|
163
|
+
json.dumps(page, ensure_ascii=False),
|
|
164
|
+
int(time.time() * 1000),
|
|
165
|
+
(page.get("rendered_with") or {}).get("tokens_in"),
|
|
166
|
+
(page.get("rendered_with") or {}).get("tokens_out"),
|
|
167
|
+
(page.get("rendered_with") or {}).get("cost_usd"),
|
|
168
|
+
),
|
|
169
|
+
)
|
|
170
|
+
conn.commit()
|
|
171
|
+
conn.close()
|
|
172
|
+
page_cached = True
|
|
173
|
+
except Exception as e:
|
|
174
|
+
sys.stderr.write(f"page cache reuse failed: {e}\n")
|
|
175
|
+
|
|
176
|
+
# Audit row in concept_imports
|
|
177
|
+
if web_db_path:
|
|
178
|
+
try:
|
|
179
|
+
sha = bundle_sha(envelope)
|
|
180
|
+
conn = sqlite3.connect(web_db_path)
|
|
181
|
+
conn.execute(
|
|
182
|
+
"""INSERT INTO concept_imports
|
|
183
|
+
(imported_at, root_entity, source_tool, source_version, envelope_format,
|
|
184
|
+
bundle_sha256, conflict_mode, triples_count, redactions_seen, notes)
|
|
185
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
186
|
+
(
|
|
187
|
+
int(time.time() * 1000),
|
|
188
|
+
envelope["root_entity"],
|
|
189
|
+
(envelope.get("exporter") or {}).get("tool"),
|
|
190
|
+
(envelope.get("exporter") or {}).get("tool_version"),
|
|
191
|
+
envelope.get("format"),
|
|
192
|
+
sha,
|
|
193
|
+
conflict,
|
|
194
|
+
inserted,
|
|
195
|
+
json.dumps((envelope.get("redactions") or {}).get("applied", [])),
|
|
196
|
+
None,
|
|
197
|
+
),
|
|
198
|
+
)
|
|
199
|
+
conn.commit()
|
|
200
|
+
conn.close()
|
|
201
|
+
except Exception as e:
|
|
202
|
+
sys.stderr.write(f"concept_imports log failed: {e}\n")
|
|
203
|
+
|
|
204
|
+
return {
|
|
205
|
+
"ok": True,
|
|
206
|
+
"imported": True,
|
|
207
|
+
"root_entity": envelope.get("root_entity"),
|
|
208
|
+
"stats": {
|
|
209
|
+
"entities_seen": len(envelope.get("entities", [])),
|
|
210
|
+
"triples_inserted": inserted,
|
|
211
|
+
"triples_skipped_duplicate": skipped_dup,
|
|
212
|
+
"triples_overwritten": overwritten,
|
|
213
|
+
"tx_mapping_count": len(tx_mapping),
|
|
214
|
+
},
|
|
215
|
+
"rendered_page_cached": page_cached,
|
|
216
|
+
"view_url": f"/knowledge/ui/entity/{envelope.get('root_entity', '').replace(':', '%3A')}",
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
def _iso_now() -> str:
|
|
221
|
+
return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def main() -> None:
|
|
225
|
+
parser = argparse.ArgumentParser(description="Concept Import")
|
|
226
|
+
parser.add_argument("--db", required=True)
|
|
227
|
+
parser.add_argument("--bundle", required=True, help="Path to .sinain-concept.json (or - for stdin)")
|
|
228
|
+
parser.add_argument("--web-db", default=None)
|
|
229
|
+
parser.add_argument("--conflict", choices=["skip", "merge", "overwrite"], default="merge")
|
|
230
|
+
args = parser.parse_args()
|
|
231
|
+
|
|
232
|
+
if not Path(args.db).exists():
|
|
233
|
+
# Auto-create empty knowledge DB on first import — receiver may have nothing yet.
|
|
234
|
+
Path(args.db).parent.mkdir(parents=True, exist_ok=True)
|
|
235
|
+
from triplestore import TripleStore
|
|
236
|
+
TripleStore(args.db).close()
|
|
237
|
+
|
|
238
|
+
if args.bundle == "-":
|
|
239
|
+
envelope = json.load(sys.stdin)
|
|
240
|
+
else:
|
|
241
|
+
envelope = json.loads(Path(args.bundle).read_text(encoding="utf-8"))
|
|
242
|
+
|
|
243
|
+
ok, err = verify_envelope(envelope)
|
|
244
|
+
if not ok:
|
|
245
|
+
print(json.dumps({"ok": False, "error": err}))
|
|
246
|
+
sys.exit(1)
|
|
247
|
+
|
|
248
|
+
result = import_bundle(args.db, envelope, conflict=args.conflict,
|
|
249
|
+
web_db_path=args.web_db)
|
|
250
|
+
print(json.dumps(result, ensure_ascii=False))
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
if __name__ == "__main__":
|
|
254
|
+
main()
|