@geravant/sinain 1.19.0 → 1.22.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,310 @@
1
+ #!/usr/bin/env python3
2
+ """Concept Export — package an entity + its neighborhood as a portable bundle.
3
+
4
+ Produces a sinain-concept/v1 envelope that can be transferred to another
5
+ machine and re-imported with concept_import.py to reconstruct the same
6
+ entity page (including the LLM-rendered view, if bundled).
7
+
8
+ The reproducibility invariant we honor:
9
+ "On a new machine: import the bundle → open the same URL → see the same page."
10
+
11
+ For that to hold:
12
+ 1. Entity IDs are content-addressed slugs → stable across machines.
13
+ 2. Triples are exported verbatim (created_at, retracted) for round-trip.
14
+ 3. Optionally bundle the rendered_page JSON so the receiver gets a
15
+ cache hit on first view (deterministic visual identity).
16
+
17
+ We do NOT bundle embeddings — same model on both ends → same vectors,
18
+ so receiver recomputes for ~1.5KB/fact saved.
19
+
20
+ Usage:
21
+ python3 concept_export.py --db <kg.db> --root entity:foo \\
22
+ [--depth 1] [--include-retracted] [--include-page] [--web-db <web.db>] \\
23
+ [--redact private,creditcard,apikey,...]
24
+ """
25
+ from __future__ import annotations
26
+
27
+ import argparse
28
+ import hashlib
29
+ import json
30
+ import re
31
+ import sys
32
+ import time
33
+ from pathlib import Path
34
+
35
+ # ---------------------------------------------------------------------------
36
+ # Redaction (MIRROR OF sense_client/privacy.py — keep patterns in sync until
37
+ # we extract a shared sinain-memory/redaction.py module).
38
+ # ---------------------------------------------------------------------------
39
+ REDACT_RULES_VERSION = "1.2"
40
+
41
+ _REDACT_PATTERNS: list[tuple[re.Pattern, str, str]] = [
42
+ # (regex, replacement, rule-name)
43
+ (re.compile(r"\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b"), "[REDACTED:card]", "creditcard"),
44
+ (re.compile(r"\b(?:sk-|pk-|api[_-]?key[=:]\s*)[A-Za-z0-9_\-]{20,}\b"), "[REDACTED:apikey]", "apikey"),
45
+ (re.compile(r"Bearer\s+[A-Za-z0-9_\-\.]{20,}"), "[REDACTED:bearer]", "bearer"),
46
+ (re.compile(r"\b(?:AKIA|ASIA)[A-Z0-9]{16}\b"), "[REDACTED:awskey]", "awskey"),
47
+ (re.compile(r"(?:password|passwd|pwd)\s*[:=]\s*\S+", re.IGNORECASE), "[REDACTED:password]", "password"),
48
+ (re.compile(r"\bghp_[A-Za-z0-9]{36}\b"), "[REDACTED:github_pat]", "github_pat"),
49
+ (re.compile(r"\bghs_[A-Za-z0-9]{36}\b"), "[REDACTED:github_srv]", "github_srv"),
50
+ (re.compile(r"\bxox[bpoa]-[0-9A-Za-z\-]+"), "[REDACTED:slack]", "slack"),
51
+ (re.compile(r"\bya29\.[0-9A-Za-z\-_]+"), "[REDACTED:google_oauth]", "google_oauth"),
52
+ (re.compile(r"\beyJ[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+\.[A-Za-z0-9\-_]+"), "[REDACTED:jwt]", "jwt"),
53
+ (re.compile(r"(?:secret|token|key)\s*[:=]\s*[A-Za-z0-9_\-\.]{10,}", re.IGNORECASE), "[REDACTED:secret]", "secret"),
54
+ (re.compile(r"\b\d{3}-\d{2}-\d{4}\b"), "[REDACTED:ssn]", "ssn"),
55
+ (re.compile(r"-----BEGIN (?:RSA |EC |OPENSSH )?PRIVATE KEY-----"), "[REDACTED:privkey]", "privkey"),
56
+ ]
57
+ _PRIVATE_TAG = re.compile(r"<private>.*?</private>", re.DOTALL)
58
+
59
+
60
+ def apply_redactions(text: str, enabled_rules: set[str]) -> tuple[str, list[str]]:
61
+ """Run enabled redaction rules over *text*. Returns (redacted_text, applied)."""
62
+ applied: list[str] = []
63
+ if "private" in enabled_rules:
64
+ new_text = _PRIVATE_TAG.sub("[REDACTED:private]", text)
65
+ if new_text != text:
66
+ applied.append("private")
67
+ text = new_text
68
+ for pattern, replacement, name in _REDACT_PATTERNS:
69
+ if name in enabled_rules:
70
+ new_text = pattern.sub(replacement, text)
71
+ if new_text != text:
72
+ applied.append(name)
73
+ text = new_text
74
+ return text, applied
75
+
76
+
77
+ # ---------------------------------------------------------------------------
78
+ # Export
79
+ # ---------------------------------------------------------------------------
80
+
81
+ def collect_neighborhood(store, root_entity: str, depth: int,
82
+ include_retracted: bool) -> list[str]:
83
+ """BFS from root, following both incoming and outgoing refs.
84
+
85
+ Outgoing: triples WHERE entity_id = X AND value_type = 'ref' → recurse to value.
86
+ Incoming: triples WHERE value = X AND value_type = 'ref' → recurse to entity_id.
87
+
88
+ Returns deterministically-ordered list of entity_ids reachable within depth.
89
+ """
90
+ visited: dict[str, int] = {root_entity: 0}
91
+ queue: list[tuple[str, int]] = [(root_entity, 0)]
92
+ retracted_filter = "" if include_retracted else "AND retracted = 0"
93
+
94
+ while queue:
95
+ eid, d = queue.pop(0)
96
+ if d >= depth:
97
+ continue
98
+
99
+ # Outgoing refs
100
+ rows_out = store._conn.execute(
101
+ f"""SELECT DISTINCT value FROM triples
102
+ WHERE entity_id = ? AND value_type = 'ref' {retracted_filter}""",
103
+ (eid,),
104
+ ).fetchall()
105
+ for r in rows_out:
106
+ ref = r["value"]
107
+ if ref and ref not in visited:
108
+ visited[ref] = d + 1
109
+ queue.append((ref, d + 1))
110
+
111
+ # Incoming refs
112
+ rows_in = store._conn.execute(
113
+ f"""SELECT DISTINCT entity_id FROM triples
114
+ WHERE value = ? AND value_type = 'ref' {retracted_filter}""",
115
+ (eid,),
116
+ ).fetchall()
117
+ for r in rows_in:
118
+ ref = r["entity_id"]
119
+ if ref and ref not in visited:
120
+ visited[ref] = d + 1
121
+ queue.append((ref, d + 1))
122
+
123
+ return sorted(visited.keys()) # deterministic ordering for stable checksum
124
+
125
+
126
+ def serialize_entity(store, entity_id: str, include_retracted: bool,
127
+ redact_rules: set[str]) -> tuple[dict, list[str], int]:
128
+ """Pull all triples for entity_id, apply redactions to string values.
129
+
130
+ Returns ({id, type, triples: [...]}, applied_rules, redacted_count).
131
+ """
132
+ where = "" if include_retracted else "AND retracted = 0"
133
+ rows = store._conn.execute(
134
+ f"""SELECT attribute, value, value_type, tx_id, created_at, retracted, valid_to
135
+ FROM triples WHERE entity_id = ? {where}
136
+ ORDER BY tx_id, attribute, value""",
137
+ (entity_id,),
138
+ ).fetchall()
139
+
140
+ triples = []
141
+ all_applied: list[str] = []
142
+ redacted_count = 0
143
+ for r in rows:
144
+ value = r["value"]
145
+ if r["value_type"] == "string" and redact_rules and value:
146
+ new_value, applied = apply_redactions(value, redact_rules)
147
+ if applied:
148
+ all_applied.extend(applied)
149
+ redacted_count += 1
150
+ value = new_value
151
+ triples.append({
152
+ "attribute": r["attribute"],
153
+ "value": value,
154
+ "value_type": r["value_type"],
155
+ "tx_id": r["tx_id"],
156
+ "created_at": r["created_at"],
157
+ "retracted": int(r["retracted"]),
158
+ "valid_to": r["valid_to"],
159
+ })
160
+
161
+ type_prefix = entity_id.split(":", 1)[0] if ":" in entity_id else "unknown"
162
+ return ({"id": entity_id, "type": type_prefix, "triples": triples},
163
+ all_applied, redacted_count)
164
+
165
+
166
+ def fetch_cached_page(web_db_path: str, root_entity: str,
167
+ redact_rules: set[str]) -> dict | None:
168
+ """Pull the most recent cached rendered_page for root_entity, if any.
169
+ Apply redactions over the page summary + bullet text too — the LLM may
170
+ have woven sensitive content into its synthesis.
171
+ """
172
+ import sqlite3
173
+ if not Path(web_db_path).exists():
174
+ return None
175
+ try:
176
+ conn = sqlite3.connect(web_db_path)
177
+ conn.row_factory = sqlite3.Row
178
+ row = conn.execute(
179
+ """SELECT page_json, generated_at, tokens_in, tokens_out, cost_usd
180
+ FROM page_cache WHERE entity_id = ?
181
+ ORDER BY generated_at DESC LIMIT 1""",
182
+ (root_entity,),
183
+ ).fetchone()
184
+ conn.close()
185
+ if not row:
186
+ return None
187
+ page = json.loads(row["page_json"])
188
+ # Redact rendered_page content.
189
+ if redact_rules:
190
+ if page.get("summary"):
191
+ new_summary, _ = apply_redactions(page["summary"], redact_rules)
192
+ page["summary"] = new_summary
193
+ for sec in page.get("sections", []) or []:
194
+ for b in sec.get("bullets", []) or []:
195
+ if b.get("text"):
196
+ new_text, _ = apply_redactions(b["text"], redact_rules)
197
+ b["text"] = new_text
198
+ page["generated_at"] = row["generated_at"]
199
+ page.setdefault("rendered_with", {})
200
+ if row["tokens_in"]: page["rendered_with"]["tokens_in"] = row["tokens_in"]
201
+ if row["tokens_out"]: page["rendered_with"]["tokens_out"] = row["tokens_out"]
202
+ return page
203
+ except Exception as e:
204
+ sys.stderr.write(f"fetch_cached_page failed: {e}\n")
205
+ return None
206
+
207
+
208
+ def export_concept(db_path: str, root_entity: str, depth: int = 1,
209
+ include_retracted: bool = False,
210
+ include_page: bool = True,
211
+ web_db_path: str | None = None,
212
+ redact_rules: set[str] | None = None) -> dict:
213
+ from triplestore import TripleStore
214
+
215
+ if redact_rules is None:
216
+ redact_rules = {"private", "creditcard", "apikey", "bearer", "awskey",
217
+ "password", "secret"}
218
+
219
+ store = TripleStore(db_path)
220
+ entity_ids = collect_neighborhood(store, root_entity, depth, include_retracted)
221
+
222
+ entities = []
223
+ all_applied: list[str] = []
224
+ total_redacted = 0
225
+ triple_count = 0
226
+ fact_count = 0
227
+
228
+ for eid in entity_ids:
229
+ entity_obj, applied, n_redacted = serialize_entity(
230
+ store, eid, include_retracted, redact_rules,
231
+ )
232
+ entities.append(entity_obj)
233
+ all_applied.extend(applied)
234
+ total_redacted += n_redacted
235
+ triple_count += len(entity_obj["triples"])
236
+ if eid.startswith("fact:"):
237
+ fact_count += 1
238
+
239
+ store.close()
240
+
241
+ rendered_page = None
242
+ if include_page and web_db_path:
243
+ rendered_page = fetch_cached_page(web_db_path, root_entity, redact_rules)
244
+
245
+ envelope = {
246
+ "format": "sinain-concept/v1",
247
+ "exported_at": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()),
248
+ "exporter": {
249
+ "tool": "sinain-core",
250
+ "tool_version": "1.14.0",
251
+ "schema_version": "triplestore/v3",
252
+ "embedding_model": "all-MiniLM-L6-v2",
253
+ },
254
+ "root_entity": root_entity,
255
+ "depth": depth,
256
+ "stats": {
257
+ "entities": len(entity_ids),
258
+ "facts": fact_count,
259
+ "triples": triple_count,
260
+ },
261
+ "entities": entities,
262
+ "rendered_page": rendered_page,
263
+ "redactions": {
264
+ "applied": sorted(set(all_applied)),
265
+ "rules_version": REDACT_RULES_VERSION,
266
+ "redacted_count": total_redacted,
267
+ },
268
+ }
269
+
270
+ # Compute checksum over canonical JSON of (root_entity + entities) — this is
271
+ # what the receiver should validate against on import.
272
+ canonical = json.dumps(
273
+ {"root_entity": root_entity, "entities": entities},
274
+ sort_keys=True, ensure_ascii=False, separators=(",", ":"),
275
+ )
276
+ envelope["checksum"] = "sha256:" + hashlib.sha256(canonical.encode("utf-8")).hexdigest()
277
+ return envelope
278
+
279
+
280
+ def main() -> None:
281
+ parser = argparse.ArgumentParser(description="Concept Export")
282
+ parser.add_argument("--db", required=True)
283
+ parser.add_argument("--root", required=True, help="Root entity id (e.g. entity:citibank)")
284
+ parser.add_argument("--depth", type=int, default=1)
285
+ parser.add_argument("--include-retracted", action="store_true")
286
+ parser.add_argument("--include-page", action="store_true",
287
+ help="Bundle the cached rendered_page if available")
288
+ parser.add_argument("--web-db", default=None,
289
+ help="Path to web.db for page cache lookup")
290
+ parser.add_argument("--redact", default="private,creditcard,apikey,bearer,awskey,password,secret",
291
+ help="Comma-separated redaction rule names")
292
+ args = parser.parse_args()
293
+
294
+ if not Path(args.db).exists():
295
+ print(json.dumps({"error": f"db not found: {args.db}"}))
296
+ sys.exit(1)
297
+
298
+ rules = {r.strip() for r in args.redact.split(",") if r.strip()}
299
+ envelope = export_concept(
300
+ args.db, args.root, depth=args.depth,
301
+ include_retracted=args.include_retracted,
302
+ include_page=args.include_page,
303
+ web_db_path=args.web_db,
304
+ redact_rules=rules,
305
+ )
306
+ print(json.dumps(envelope, ensure_ascii=False))
307
+
308
+
309
+ if __name__ == "__main__":
310
+ main()
@@ -0,0 +1,254 @@
1
+ #!/usr/bin/env python3
2
+ """Concept Import — replay a sinain-concept/v1 bundle into the local triplestore.
3
+
4
+ Designed for idempotency: re-importing the same bundle in `merge` mode is a
5
+ no-op (existing triples skip-as-duplicate). The receiver re-issues tx_ids
6
+ locally but preserves the source-tx grouping, so the digest atomicity that
7
+ knowledge_integrator.py relies on survives the round-trip.
8
+
9
+ Usage:
10
+ python3 concept_import.py --db <kg.db> --bundle <bundle.json> \\
11
+ [--web-db <web.db>] [--conflict skip|merge|overwrite]
12
+ """
13
+ from __future__ import annotations
14
+
15
+ import argparse
16
+ import hashlib
17
+ import json
18
+ import sqlite3
19
+ import sys
20
+ import time
21
+ from pathlib import Path
22
+
23
+
24
+ def verify_envelope(envelope: dict) -> tuple[bool, str]:
25
+ """Validate format and checksum. Returns (ok, error_msg)."""
26
+ fmt = envelope.get("format")
27
+ if fmt != "sinain-concept/v1":
28
+ return False, f"unsupported format: {fmt!r} (need sinain-concept/v1)"
29
+ if "root_entity" not in envelope or "entities" not in envelope:
30
+ return False, "envelope missing root_entity or entities"
31
+ expected = envelope.get("checksum")
32
+ if expected:
33
+ canonical = json.dumps(
34
+ {"root_entity": envelope["root_entity"], "entities": envelope["entities"]},
35
+ sort_keys=True, ensure_ascii=False, separators=(",", ":"),
36
+ )
37
+ actual = "sha256:" + hashlib.sha256(canonical.encode("utf-8")).hexdigest()
38
+ if actual != expected:
39
+ return False, f"checksum mismatch (expected {expected[:23]}..., got {actual[:23]}...)"
40
+ return True, ""
41
+
42
+
43
+ def bundle_sha(envelope: dict) -> str:
44
+ """Compute envelope-level sha for idempotency tracking (web.db.concept_imports)."""
45
+ body = json.dumps(envelope, sort_keys=True, ensure_ascii=False).encode("utf-8")
46
+ return hashlib.sha256(body).hexdigest()
47
+
48
+
49
+ def import_bundle(db_path: str, envelope: dict, conflict: str = "merge",
50
+ web_db_path: str | None = None) -> dict:
51
+ """Replay envelope into the local knowledge graph.
52
+
53
+ conflict modes:
54
+ - skip: existing (entity, attribute, value) wins; imported dropped.
55
+ - merge: duplicate triples skipped; new (attribute, value) inserted as new.
56
+ - overwrite: retract conflicting active local triples, then assert imported.
57
+ """
58
+ from triplestore import TripleStore
59
+
60
+ store = TripleStore(db_path)
61
+
62
+ # Group imported triples by their source_tx so we preserve atomicity.
63
+ # source_tx_id → list of (entity_id, attribute, value, value_type, retracted, valid_to, original_created_at)
64
+ by_source_tx: dict[int, list[tuple]] = {}
65
+ for ent in envelope.get("entities", []):
66
+ eid = ent.get("id")
67
+ for t in ent.get("triples", []):
68
+ stx = int(t.get("tx_id") or 0)
69
+ by_source_tx.setdefault(stx, []).append((
70
+ eid,
71
+ t["attribute"],
72
+ t["value"],
73
+ t.get("value_type", "string"),
74
+ int(t.get("retracted", 0)),
75
+ t.get("valid_to"),
76
+ t.get("created_at"),
77
+ ))
78
+
79
+ inserted = 0
80
+ skipped_dup = 0
81
+ overwritten = 0
82
+ tx_mapping: dict[int, int] = {} # source_tx → new_tx
83
+
84
+ for source_tx in sorted(by_source_tx.keys()):
85
+ triples = by_source_tx[source_tx]
86
+
87
+ # Begin one local tx per source tx (preserves grouping)
88
+ new_tx = store.begin_tx(
89
+ source="concept-import",
90
+ metadata={"source_tx": source_tx, "bundle_root": envelope.get("root_entity")},
91
+ )
92
+ tx_mapping[source_tx] = new_tx
93
+
94
+ for (eid, attr, value, value_type, retracted, valid_to, created_at) in triples:
95
+ # Imported retracted triples → preserve retracted state. (They're
96
+ # part of the bundle's audit trail.)
97
+ if retracted:
98
+ # Insert as retracted; valid_to stays as-is.
99
+ # We use a direct INSERT to preserve the retracted flag — assert_triple
100
+ # always sets retracted=0.
101
+ store._conn.execute(
102
+ """INSERT INTO triples
103
+ (tx_id, entity_id, attribute, value, value_type, retracted, retracted_tx, valid_to, created_at)
104
+ VALUES (?, ?, ?, ?, ?, 1, ?, ?, ?)""",
105
+ (new_tx, eid, attr, value, value_type, new_tx, valid_to, created_at or _iso_now()),
106
+ )
107
+ store._conn.execute(
108
+ "INSERT OR IGNORE INTO entity_types (entity_id, entity_type) VALUES (?, ?)",
109
+ (eid, eid.split(":", 1)[0] if ":" in eid else "unknown"),
110
+ )
111
+ inserted += 1
112
+ continue
113
+
114
+ # Active triple — apply conflict mode for (entity, attribute, value)
115
+ existing = store._conn.execute(
116
+ """SELECT id FROM triples WHERE entity_id = ? AND attribute = ?
117
+ AND value = ? AND retracted = 0 LIMIT 1""",
118
+ (eid, attr, value),
119
+ ).fetchone()
120
+
121
+ if existing:
122
+ # Exact triple already present
123
+ if conflict == "skip" or conflict == "merge":
124
+ skipped_dup += 1
125
+ continue
126
+ if conflict == "overwrite":
127
+ # Retract conflicting then insert imported
128
+ store.retract_triple(new_tx, eid, attr, value)
129
+ overwritten += 1
130
+
131
+ # Insert
132
+ store.assert_triple(new_tx, eid, attr, value, value_type=value_type)
133
+ # Patch created_at to preserve source timeline (assert_triple uses now()).
134
+ if created_at:
135
+ store._conn.execute(
136
+ """UPDATE triples SET created_at = ?
137
+ WHERE tx_id = ? AND entity_id = ? AND attribute = ? AND value = ?
138
+ AND id = (SELECT MAX(id) FROM triples WHERE tx_id = ? AND entity_id = ? AND attribute = ? AND value = ?)""",
139
+ (created_at, new_tx, eid, attr, value, new_tx, eid, attr, value),
140
+ )
141
+ inserted += 1
142
+
143
+ store._conn.commit()
144
+
145
+ store.close()
146
+
147
+ # Page cache reuse
148
+ page_cached = False
149
+ if web_db_path and envelope.get("rendered_page"):
150
+ page = envelope["rendered_page"]
151
+ # Map source tx_watermark to local — find max new_tx for facts in the bundle
152
+ local_watermark = max(tx_mapping.values()) if tx_mapping else 0
153
+ page["tx_watermark"] = local_watermark
154
+ try:
155
+ conn = sqlite3.connect(web_db_path)
156
+ conn.execute(
157
+ """INSERT OR REPLACE INTO page_cache
158
+ (entity_id, tx_watermark, page_json, generated_at, tokens_in, tokens_out, cost_usd)
159
+ VALUES (?, ?, ?, ?, ?, ?, ?)""",
160
+ (
161
+ envelope["root_entity"],
162
+ local_watermark,
163
+ json.dumps(page, ensure_ascii=False),
164
+ int(time.time() * 1000),
165
+ (page.get("rendered_with") or {}).get("tokens_in"),
166
+ (page.get("rendered_with") or {}).get("tokens_out"),
167
+ (page.get("rendered_with") or {}).get("cost_usd"),
168
+ ),
169
+ )
170
+ conn.commit()
171
+ conn.close()
172
+ page_cached = True
173
+ except Exception as e:
174
+ sys.stderr.write(f"page cache reuse failed: {e}\n")
175
+
176
+ # Audit row in concept_imports
177
+ if web_db_path:
178
+ try:
179
+ sha = bundle_sha(envelope)
180
+ conn = sqlite3.connect(web_db_path)
181
+ conn.execute(
182
+ """INSERT INTO concept_imports
183
+ (imported_at, root_entity, source_tool, source_version, envelope_format,
184
+ bundle_sha256, conflict_mode, triples_count, redactions_seen, notes)
185
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
186
+ (
187
+ int(time.time() * 1000),
188
+ envelope["root_entity"],
189
+ (envelope.get("exporter") or {}).get("tool"),
190
+ (envelope.get("exporter") or {}).get("tool_version"),
191
+ envelope.get("format"),
192
+ sha,
193
+ conflict,
194
+ inserted,
195
+ json.dumps((envelope.get("redactions") or {}).get("applied", [])),
196
+ None,
197
+ ),
198
+ )
199
+ conn.commit()
200
+ conn.close()
201
+ except Exception as e:
202
+ sys.stderr.write(f"concept_imports log failed: {e}\n")
203
+
204
+ return {
205
+ "ok": True,
206
+ "imported": True,
207
+ "root_entity": envelope.get("root_entity"),
208
+ "stats": {
209
+ "entities_seen": len(envelope.get("entities", [])),
210
+ "triples_inserted": inserted,
211
+ "triples_skipped_duplicate": skipped_dup,
212
+ "triples_overwritten": overwritten,
213
+ "tx_mapping_count": len(tx_mapping),
214
+ },
215
+ "rendered_page_cached": page_cached,
216
+ "view_url": f"/knowledge/ui/entity/{envelope.get('root_entity', '').replace(':', '%3A')}",
217
+ }
218
+
219
+
220
+ def _iso_now() -> str:
221
+ return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())
222
+
223
+
224
+ def main() -> None:
225
+ parser = argparse.ArgumentParser(description="Concept Import")
226
+ parser.add_argument("--db", required=True)
227
+ parser.add_argument("--bundle", required=True, help="Path to .sinain-concept.json (or - for stdin)")
228
+ parser.add_argument("--web-db", default=None)
229
+ parser.add_argument("--conflict", choices=["skip", "merge", "overwrite"], default="merge")
230
+ args = parser.parse_args()
231
+
232
+ if not Path(args.db).exists():
233
+ # Auto-create empty knowledge DB on first import — receiver may have nothing yet.
234
+ Path(args.db).parent.mkdir(parents=True, exist_ok=True)
235
+ from triplestore import TripleStore
236
+ TripleStore(args.db).close()
237
+
238
+ if args.bundle == "-":
239
+ envelope = json.load(sys.stdin)
240
+ else:
241
+ envelope = json.loads(Path(args.bundle).read_text(encoding="utf-8"))
242
+
243
+ ok, err = verify_envelope(envelope)
244
+ if not ok:
245
+ print(json.dumps({"ok": False, "error": err}))
246
+ sys.exit(1)
247
+
248
+ result = import_bundle(args.db, envelope, conflict=args.conflict,
249
+ web_db_path=args.web_db)
250
+ print(json.dumps(result, ensure_ascii=False))
251
+
252
+
253
+ if __name__ == "__main__":
254
+ main()