@geravant/sinain 1.18.3 → 1.20.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,447 @@
1
+ #!/usr/bin/env python3
2
+ """Page Renderer — synthesize a Confluence-style page for an entity.
3
+
4
+ Given an entity_id, loads top-K facts from the triplestore, calls an LLM to
5
+ group them into themed sections with a summary, and emits validated JSON.
6
+
7
+ Output schema:
8
+ {
9
+ "entity": "entity:citibank",
10
+ "tx_watermark": 14823,
11
+ "fact_count": 247,
12
+ "facts_used": 247,
13
+ "summary": "Citibank is …",
14
+ "sections": [
15
+ {
16
+ "heading": "Key People",
17
+ "bullets": [
18
+ { "fact_id": "fact:citibank-cto-17yrs",
19
+ "text": "CTO has 17 yrs tenure",
20
+ "confidence": 0.92,
21
+ "domain": "people",
22
+ "first_seen": "2026-04-12" }
23
+ ]
24
+ }
25
+ ],
26
+ "stats": { "tokens_in": 18420, "tokens_out": 1380, "dropped_bullets": 0 }
27
+ }
28
+
29
+ Hallucinated fact_ids (in LLM output but not in input) are dropped with a
30
+ count in stats.dropped_bullets — same defensive pattern as
31
+ knowledge_integrator.py.
32
+
33
+ Usage:
34
+ python3 page_renderer.py --db memory/knowledge-graph.db \
35
+ --entity entity:citibank [--max-facts 1000]
36
+ """
37
+ from __future__ import annotations
38
+
39
+ import argparse
40
+ import json
41
+ import sys
42
+ from pathlib import Path
43
+
44
+ from common import LLMError, call_llm_with_fallback, extract_json
45
+
46
+
47
+ SYSTEM_PROMPT = """\
48
+ You are organizing knowledge into a Confluence-style page about a specific entity.
49
+ INPUT: a list of facts each with a stable fact_id, value, confidence, domain, and first_seen.
50
+ OUTPUT: JSON matching the schema below.
51
+
52
+ RULES:
53
+ - Group facts into 2 to 8 themed sections.
54
+ - Section ordering preference: Overview → People → Projects/Work → Decisions → Open Questions → Recent Activity. Only include sections that fit the available facts.
55
+ - EVERY bullet MUST reference a real fact_id from the input list. Do not invent fact_ids.
56
+ - Each bullet's "text" is at most 140 characters, present-tense, plain English. Rewrite the fact for readability — do NOT just quote it verbatim if it can be tighter.
57
+ - The "summary" is 2 to 4 sentences synthesizing the entity at a glance. Cite no fact_ids in the summary.
58
+ - If facts contradict, prefer the higher-confidence fact and note the disagreement in the section's optional "notes" field.
59
+ - If the entity has very few facts (<5), produce one "Overview" section with all of them.
60
+
61
+ OUTPUT ONLY a JSON object, no other text:
62
+ {
63
+ "summary": "2-4 sentence summary",
64
+ "sections": [
65
+ {
66
+ "heading": "Section title",
67
+ "bullets": [
68
+ { "fact_id": "fact:...", "text": "...", "confidence": 0.0 }
69
+ ],
70
+ "notes": "optional: contradictions or caveats"
71
+ }
72
+ ]
73
+ }
74
+ """
75
+
76
+
77
+ def load_entity_facts(db_path: str, entity_id: str, max_facts: int) -> tuple[list[dict], int]:
78
+ """Load facts for an entity. Returns (facts, tx_watermark).
79
+
80
+ Resolution strategy:
81
+ 1. If entity_id starts with `entity:` — find all facts referencing it
82
+ via (attribute='entity', value=entity_id, value_type='ref'),
83
+ then return their full attribute sets.
84
+ 2. If entity_id starts with `fact:` — return its own attributes as a
85
+ single-fact "page" (the fact IS the page).
86
+ 3. Otherwise — try entity:<slug> first, then fall back to all fact:<slug>-*.
87
+ """
88
+ from triplestore import TripleStore
89
+
90
+ store = TripleStore(db_path)
91
+ fact_ids: list[str] = []
92
+
93
+ if entity_id.startswith("fact:"):
94
+ fact_ids = [entity_id]
95
+ elif entity_id.startswith("entity:"):
96
+ # Find ANY incoming ref (any attribute), matching the same broad
97
+ # predicate that graph_children uses. Filtering to attribute='entity'
98
+ # was too narrow — real data references entities via many attribute
99
+ # names (related_to, parent_org, employed_by, etc.).
100
+ rows = store._conn.execute(
101
+ """SELECT DISTINCT entity_id FROM triples
102
+ WHERE value = ? AND value_type = 'ref' AND retracted = 0
103
+ LIMIT ?""",
104
+ (entity_id, max_facts),
105
+ ).fetchall()
106
+ fact_ids = [r["entity_id"] for r in rows]
107
+ # Some installs store the entity-pointer as value_type='string' (the
108
+ # slug, not a typed ref). Match those too via the slugified entity name
109
+ # against the 'entity' attribute (the most common holder for that
110
+ # legacy shape).
111
+ slug_part = entity_id.split(":", 1)[1] if ":" in entity_id else entity_id
112
+ legacy_rows = store._conn.execute(
113
+ """SELECT DISTINCT entity_id FROM triples
114
+ WHERE attribute = 'entity' AND value = ?
115
+ AND value_type = 'string' AND retracted = 0
116
+ LIMIT ?""",
117
+ (slug_part, max_facts),
118
+ ).fetchall()
119
+ for r in legacy_rows:
120
+ if r["entity_id"] not in fact_ids:
121
+ fact_ids.append(r["entity_id"])
122
+ # Always include the entity itself's own attributes as a "self" fact.
123
+ self_attrs_count = store._conn.execute(
124
+ "SELECT COUNT(*) AS n FROM triples WHERE entity_id = ? AND retracted = 0",
125
+ (entity_id,),
126
+ ).fetchone()["n"]
127
+ if self_attrs_count > 0:
128
+ fact_ids.insert(0, entity_id)
129
+ fact_ids = fact_ids[:max_facts]
130
+ else:
131
+ # Bare slug — try entity: first, then fact: prefix scan, then
132
+ # broader substring match across both prefixes (handles cases where
133
+ # the slug is a fragment of the actual entity_id).
134
+ eid = f"entity:{entity_id}"
135
+ exists = store._conn.execute(
136
+ "SELECT 1 FROM triples WHERE entity_id = ? AND retracted = 0 LIMIT 1",
137
+ (eid,),
138
+ ).fetchone()
139
+ if exists:
140
+ return load_entity_facts(db_path, eid, max_facts)
141
+ # Try fact:<slug>* (prefix), then *<slug>* (substring) across both prefixes.
142
+ rows = store._conn.execute(
143
+ """SELECT DISTINCT entity_id FROM triples
144
+ WHERE entity_id LIKE ? AND retracted = 0
145
+ LIMIT ?""",
146
+ (f"fact:{entity_id}%", max_facts),
147
+ ).fetchall()
148
+ fact_ids = [r["entity_id"] for r in rows]
149
+ if not fact_ids:
150
+ rows = store._conn.execute(
151
+ """SELECT DISTINCT entity_id FROM triples
152
+ WHERE (entity_id LIKE ? OR entity_id LIKE ?) AND retracted = 0
153
+ LIMIT ?""",
154
+ (f"fact:%{entity_id}%", f"entity:%{entity_id}%", max_facts),
155
+ ).fetchall()
156
+ fact_ids = [r["entity_id"] for r in rows]
157
+
158
+ # Load full attribute sets for each fact
159
+ facts: list[dict] = []
160
+ tx_watermark = 0
161
+ for fid in fact_ids:
162
+ attrs = store.entity(fid)
163
+ if not attrs:
164
+ continue
165
+ # Compute tx_watermark across all triples for this fact
166
+ max_tx_row = store._conn.execute(
167
+ "SELECT MAX(tx_id) AS m FROM triples WHERE entity_id = ? AND retracted = 0",
168
+ (fid,),
169
+ ).fetchone()
170
+ if max_tx_row and max_tx_row["m"] is not None:
171
+ tx_watermark = max(tx_watermark, max_tx_row["m"])
172
+
173
+ fact = {"fact_id": fid}
174
+ for attr, values in attrs.items():
175
+ v = values[0] if len(values) == 1 else values
176
+ if attr == "tag":
177
+ fact["tags"] = values
178
+ else:
179
+ fact[attr] = v
180
+ # Coerce confidence to float
181
+ try:
182
+ fact["confidence"] = float(fact.get("confidence", 0.5))
183
+ except (ValueError, TypeError):
184
+ fact["confidence"] = 0.5
185
+ facts.append(fact)
186
+
187
+ store.close()
188
+
189
+ # Sort by composite score: confidence * recency-bonus
190
+ facts.sort(key=lambda f: (
191
+ -float(f.get("confidence", 0.5)),
192
+ f.get("first_seen", ""),
193
+ ))
194
+ return facts[:max_facts], tx_watermark
195
+
196
+
197
+ def build_user_prompt(entity_id: str, facts: list[dict]) -> str:
198
+ """Compact representation for the LLM. Strips noise, keeps essential fields."""
199
+ parts = [f"Entity: {entity_id}", f"Total facts: {len(facts)}", "", "Facts:"]
200
+ for f in facts:
201
+ fid = f.get("fact_id", "?")
202
+ value = (f.get("value") or "").strip().replace("\n", " ")[:300]
203
+ conf = f.get("confidence", "?")
204
+ domain = f.get("domain", "")
205
+ first_seen = f.get("first_seen", "")[:10] # YYYY-MM-DD
206
+ tags = ",".join(f.get("tags", [])[:5]) if f.get("tags") else ""
207
+ meta_bits = []
208
+ if domain: meta_bits.append(f"domain={domain}")
209
+ if first_seen: meta_bits.append(f"first_seen={first_seen}")
210
+ if tags: meta_bits.append(f"tags={tags}")
211
+ meta = " | ".join(meta_bits)
212
+ parts.append(f"- [{fid}] (conf={conf}{', ' + meta if meta else ''}): {value}")
213
+ return "\n".join(parts)
214
+
215
+
216
+ def render_page(db_path: str, entity_id: str, max_facts: int = 1000) -> dict:
217
+ facts, tx_watermark = load_entity_facts(db_path, entity_id, max_facts)
218
+ fact_count_total = len(facts)
219
+
220
+ if not facts:
221
+ return {
222
+ "entity": entity_id,
223
+ "tx_watermark": tx_watermark,
224
+ "fact_count": 0,
225
+ "facts_used": 0,
226
+ "summary": "No knowledge captured for this entity yet.",
227
+ "sections": [],
228
+ "stats": {"tokens_in": 0, "tokens_out": 0, "dropped_bullets": 0,
229
+ "from_cache": False},
230
+ }
231
+
232
+ # Single-fact short-circuit (no LLM needed)
233
+ if fact_count_total == 1:
234
+ f = facts[0]
235
+ return {
236
+ "entity": entity_id,
237
+ "tx_watermark": tx_watermark,
238
+ "fact_count": 1,
239
+ "facts_used": 1,
240
+ "summary": (f.get("value") or "")[:200],
241
+ "sections": [{
242
+ "heading": "Overview",
243
+ "bullets": [{
244
+ "fact_id": f["fact_id"],
245
+ "text": (f.get("value") or "")[:140],
246
+ "confidence": f.get("confidence", 0.5),
247
+ "domain": f.get("domain"),
248
+ "first_seen": f.get("first_seen"),
249
+ }],
250
+ }],
251
+ "stats": {"tokens_in": 0, "tokens_out": 0, "dropped_bullets": 0,
252
+ "from_cache": False},
253
+ }
254
+
255
+ # Multi-fact: LLM rendering
256
+ user_prompt = build_user_prompt(entity_id, facts)
257
+
258
+ try:
259
+ raw = call_llm_with_fallback(
260
+ SYSTEM_PROMPT, user_prompt, script="page_renderer",
261
+ json_mode=True, retries=1,
262
+ )
263
+ parsed = extract_json(raw)
264
+ except Exception as e:
265
+ # Catch broad — covers LLMError, ValueError, missing API key
266
+ # (RuntimeError), JSON parse errors, network errors, etc. The web UI
267
+ # always wants a renderable response; degraded > broken.
268
+ sys.stderr.write(f"page_renderer LLM failed: {e}\n")
269
+ return _fallback_page(entity_id, facts, tx_watermark, error=str(e))
270
+
271
+ if not isinstance(parsed, dict):
272
+ return _fallback_page(entity_id, facts, tx_watermark, error="LLM did not return object")
273
+
274
+ # Validate fact_ids — drop hallucinated ones
275
+ valid_fids = {f["fact_id"] for f in facts}
276
+ fact_meta = {f["fact_id"]: f for f in facts}
277
+ sections_in = parsed.get("sections", []) or []
278
+ sections_out: list[dict] = []
279
+ dropped = 0
280
+
281
+ for sec in sections_in:
282
+ if not isinstance(sec, dict): continue
283
+ heading = (sec.get("heading") or "").strip()[:80]
284
+ if not heading: continue
285
+ bullets_in = sec.get("bullets", []) or []
286
+ bullets_out: list[dict] = []
287
+ for b in bullets_in:
288
+ if not isinstance(b, dict): continue
289
+ fid = b.get("fact_id")
290
+ if not fid or fid not in valid_fids:
291
+ dropped += 1
292
+ continue
293
+ meta = fact_meta[fid]
294
+ bullets_out.append({
295
+ "fact_id": fid,
296
+ "text": (b.get("text") or meta.get("value") or "")[:200],
297
+ "confidence": meta.get("confidence", 0.5),
298
+ "domain": meta.get("domain"),
299
+ "first_seen": meta.get("first_seen"),
300
+ })
301
+ if bullets_out:
302
+ sec_out = {"heading": heading, "bullets": bullets_out}
303
+ if sec.get("notes"):
304
+ sec_out["notes"] = str(sec["notes"])[:300]
305
+ sections_out.append(sec_out)
306
+
307
+ summary = (parsed.get("summary") or "").strip()[:1000]
308
+ if not summary:
309
+ summary = f"Knowledge about {entity_id}: {fact_count_total} facts."
310
+
311
+ return {
312
+ "entity": entity_id,
313
+ "tx_watermark": tx_watermark,
314
+ "fact_count": fact_count_total,
315
+ "facts_used": fact_count_total - dropped,
316
+ "summary": summary,
317
+ "sections": sections_out,
318
+ "stats": {
319
+ "tokens_in": 0, # tracked in stderr; aggregating here would require parser
320
+ "tokens_out": 0,
321
+ "dropped_bullets": dropped,
322
+ "from_cache": False,
323
+ },
324
+ }
325
+
326
+
327
+ def _fallback_page(entity_id: str, facts: list[dict], tx_watermark: int, error: str = "") -> dict:
328
+ """Ungrouped fallback when LLM fails or returns garbage."""
329
+ bullets = [{
330
+ "fact_id": f["fact_id"],
331
+ "text": (f.get("value") or "")[:140],
332
+ "confidence": f.get("confidence", 0.5),
333
+ "domain": f.get("domain"),
334
+ "first_seen": f.get("first_seen"),
335
+ } for f in facts]
336
+ return {
337
+ "entity": entity_id,
338
+ "tx_watermark": tx_watermark,
339
+ "fact_count": len(facts),
340
+ "facts_used": len(facts),
341
+ "summary": f"Knowledge about {entity_id} ({len(facts)} facts). LLM rendering unavailable.",
342
+ "sections": [{"heading": "All Facts", "bullets": bullets}],
343
+ "stats": {"tokens_in": 0, "tokens_out": 0, "dropped_bullets": 0,
344
+ "from_cache": False, "fallback": True, "error": error[:200]},
345
+ }
346
+
347
+
348
+ def lookup_cache(web_db_path: str, entity_id: str, tx_watermark: int) -> dict | None:
349
+ """Read page_cache row matching (entity, tx_watermark). Returns parsed JSON or None."""
350
+ import sqlite3
351
+ if not Path(web_db_path).exists():
352
+ return None
353
+ try:
354
+ conn = sqlite3.connect(web_db_path)
355
+ conn.row_factory = sqlite3.Row
356
+ row = conn.execute(
357
+ "SELECT page_json, generated_at FROM page_cache WHERE entity_id = ? AND tx_watermark = ?",
358
+ (entity_id, tx_watermark),
359
+ ).fetchone()
360
+ conn.close()
361
+ if row:
362
+ page = json.loads(row["page_json"])
363
+ page.setdefault("stats", {})["from_cache"] = True
364
+ page["generated_at"] = row["generated_at"]
365
+ return page
366
+ except Exception as e:
367
+ sys.stderr.write(f"page_renderer cache lookup failed: {e}\n")
368
+ return None
369
+
370
+
371
+ def write_cache(web_db_path: str, entity_id: str, page: dict) -> None:
372
+ """Persist a freshly-rendered page to web.db.page_cache."""
373
+ import sqlite3
374
+ import time
375
+ try:
376
+ conn = sqlite3.connect(web_db_path)
377
+ stats = page.get("stats", {})
378
+ conn.execute(
379
+ """INSERT OR REPLACE INTO page_cache
380
+ (entity_id, tx_watermark, page_json, generated_at, tokens_in, tokens_out, cost_usd)
381
+ VALUES (?, ?, ?, ?, ?, ?, ?)""",
382
+ (
383
+ entity_id,
384
+ page.get("tx_watermark", 0),
385
+ json.dumps(page, ensure_ascii=False),
386
+ int(time.time() * 1000),
387
+ stats.get("tokens_in"),
388
+ stats.get("tokens_out"),
389
+ stats.get("cost_usd"),
390
+ ),
391
+ )
392
+ conn.commit()
393
+ conn.close()
394
+ except Exception as e:
395
+ sys.stderr.write(f"page_renderer cache write failed: {e}\n")
396
+
397
+
398
+ def main() -> None:
399
+ parser = argparse.ArgumentParser(description="Page Renderer")
400
+ parser.add_argument("--db", required=True, help="Path to knowledge-graph.db")
401
+ parser.add_argument("--entity", required=True, help="Entity id (entity:* or fact:* or bare slug)")
402
+ parser.add_argument("--max-facts", type=int, default=1000, help="Max facts to consider")
403
+ parser.add_argument("--web-db", default=None,
404
+ help="Path to web.db for page cache (optional). If provided, hits and writes cache.")
405
+ parser.add_argument("--refresh", action="store_true",
406
+ help="Bypass cache and always re-render via LLM.")
407
+ args = parser.parse_args()
408
+
409
+ if not Path(args.db).exists():
410
+ print(json.dumps({"error": f"db not found: {args.db}"}))
411
+ sys.exit(1)
412
+
413
+ # Determine tx_watermark cheaply for the cache key, before we commit to LLM.
414
+ facts, tx_watermark = load_entity_facts(args.db, args.entity, args.max_facts)
415
+
416
+ # Cache hit fast-path
417
+ if args.web_db and not args.refresh:
418
+ cached = lookup_cache(args.web_db, args.entity, tx_watermark)
419
+ if cached:
420
+ print(json.dumps(cached, ensure_ascii=False))
421
+ return
422
+
423
+ # Render (uses already-loaded facts via a thin reuse path)
424
+ if not facts:
425
+ page = {
426
+ "entity": args.entity,
427
+ "tx_watermark": tx_watermark,
428
+ "fact_count": 0,
429
+ "facts_used": 0,
430
+ "summary": "No knowledge captured for this entity yet.",
431
+ "sections": [],
432
+ "stats": {"tokens_in": 0, "tokens_out": 0, "dropped_bullets": 0, "from_cache": False},
433
+ }
434
+ else:
435
+ page = render_page(args.db, args.entity, max_facts=args.max_facts)
436
+ # render_page reloads facts internally; the load above is duplicated. Keep
437
+ # the cleaner separation rather than threading state — the second load
438
+ # hits the in-process SQLite page cache, so the cost is negligible.
439
+
440
+ if args.web_db and page.get("fact_count", 0) > 0:
441
+ write_cache(args.web_db, args.entity, page)
442
+
443
+ print(json.dumps(page, ensure_ascii=False))
444
+
445
+
446
+ if __name__ == "__main__":
447
+ main()
@@ -0,0 +1,236 @@
1
+ #!/usr/bin/env python3
2
+ """Fact retraction (soft-delete) for the web UI.
3
+
4
+ The triplestore already supports retraction via store.retract_triple() —
5
+ this script is the user-initiated equivalent of what knowledge_integrator
6
+ does automatically. The new ingredients are:
7
+
8
+ 1. Audit triples (retracted_reason, retracted_by) so the WHY survives.
9
+ 2. Pre-retraction snapshot saved to web.db.retraction_undo, single-use,
10
+ 10-minute TTL — gives the UI a real "undo" button.
11
+
12
+ Soft delete: rows stay; retracted=1 + retracted_tx + valid_to are set.
13
+ Bi-temporal queries (entity_as_of) still see the fact at past tx_ids.
14
+ Physical removal only happens via gc_retracted_triples (off by default).
15
+
16
+ Usage:
17
+ python3 retract.py --retract --db <db> --web-db <web.db> \
18
+ --fact-id fact:foo [--reason "..."] [--actor "..."]
19
+
20
+ python3 retract.py --restore --db <db> --web-db <web.db> \
21
+ --fact-id fact:foo --undo-token <token>
22
+ """
23
+ from __future__ import annotations
24
+
25
+ import argparse
26
+ import json
27
+ import secrets
28
+ import sqlite3
29
+ import sys
30
+ import time
31
+ from pathlib import Path
32
+
33
+ UNDO_TTL_MS = 10 * 60 * 1000 # 10 minutes
34
+
35
+
36
+ def snapshot_triples(store, fact_id: str) -> list[dict]:
37
+ """Capture every active triple for a fact entity for restore. Includes
38
+ value_type so re-asserts preserve string-vs-ref semantics."""
39
+ rows = store._conn.execute(
40
+ """SELECT attribute, value, value_type, tx_id, created_at
41
+ FROM triples
42
+ WHERE entity_id = ? AND retracted = 0""",
43
+ (fact_id,),
44
+ ).fetchall()
45
+ return [
46
+ {
47
+ "attribute": r["attribute"],
48
+ "value": r["value"],
49
+ "value_type": r["value_type"],
50
+ "original_tx_id": r["tx_id"],
51
+ "original_created_at": r["created_at"],
52
+ }
53
+ for r in rows
54
+ ]
55
+
56
+
57
+ def retract_fact(db_path: str, web_db_path: str, fact_id: str,
58
+ reason: str | None, actor: str | None,
59
+ source_entity: str | None = None) -> dict:
60
+ """Retract all triples for a fact entity + persist undo snapshot."""
61
+ from triplestore import TripleStore
62
+
63
+ store = TripleStore(db_path)
64
+ snapshot = snapshot_triples(store, fact_id)
65
+ if not snapshot:
66
+ store.close()
67
+ return {"ok": False, "error": "fact not found or already retracted",
68
+ "fact_id": fact_id}
69
+
70
+ metadata = {"actor": actor, "reason": reason, "source": "web-ui"}
71
+ tx_id = store.begin_tx(source="web-ui-retract",
72
+ metadata={k: v for k, v in metadata.items() if v})
73
+
74
+ # Retract every active triple
75
+ triples_retracted = 0
76
+ for t in snapshot:
77
+ triples_retracted += store.retract_triple(
78
+ tx_id, fact_id, t["attribute"], t["value"],
79
+ )
80
+
81
+ # Audit triples — these are NEW assertions ABOUT the retraction event
82
+ if reason:
83
+ store.assert_triple(tx_id, fact_id, "retracted_reason", reason, "string")
84
+ if actor:
85
+ store.assert_triple(tx_id, fact_id, "retracted_by", actor, "string")
86
+
87
+ store.close()
88
+
89
+ # Persist undo snapshot
90
+ token = secrets.token_hex(16)
91
+ now_ms = int(time.time() * 1000)
92
+ expires_at = now_ms + UNDO_TTL_MS
93
+
94
+ if Path(web_db_path).exists():
95
+ try:
96
+ conn = sqlite3.connect(web_db_path)
97
+ conn.execute(
98
+ """INSERT INTO retraction_undo
99
+ (token, fact_id, snapshot_json, retracted_tx,
100
+ reason, actor, created_at, expires_at)
101
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
102
+ (token, fact_id, json.dumps(snapshot),
103
+ tx_id, reason, actor, now_ms, expires_at),
104
+ )
105
+ conn.execute(
106
+ """INSERT INTO retraction_log
107
+ (ts, fact_id, reason, actor, source_entity)
108
+ VALUES (?, ?, ?, ?, ?)""",
109
+ (now_ms, fact_id, reason, actor, source_entity),
110
+ )
111
+ conn.commit()
112
+ conn.close()
113
+ except Exception as e:
114
+ sys.stderr.write(f"undo persist failed: {e}\n")
115
+
116
+ return {
117
+ "ok": True,
118
+ "fact_id": fact_id,
119
+ "retracted": True,
120
+ "retracted_tx": tx_id,
121
+ "triples_retracted": triples_retracted,
122
+ "undo_token": token,
123
+ "expires_at": expires_at,
124
+ }
125
+
126
+
127
+ def restore_fact(db_path: str, web_db_path: str, fact_id: str,
128
+ undo_token: str) -> dict:
129
+ """Re-assert a previously retracted fact from the undo snapshot."""
130
+ from triplestore import TripleStore
131
+
132
+ if not Path(web_db_path).exists():
133
+ return {"ok": False, "error": "web.db not available"}
134
+
135
+ conn = sqlite3.connect(web_db_path)
136
+ conn.row_factory = sqlite3.Row
137
+ row = conn.execute(
138
+ "SELECT * FROM retraction_undo WHERE token = ? AND fact_id = ?",
139
+ (undo_token, fact_id),
140
+ ).fetchone()
141
+
142
+ if not row:
143
+ conn.close()
144
+ return {"ok": False, "error": "undo token not found"}
145
+ if row["consumed_at"] is not None:
146
+ conn.close()
147
+ return {"ok": False, "error": "undo token already consumed"}
148
+ if row["expires_at"] < int(time.time() * 1000):
149
+ conn.close()
150
+ return {"ok": False, "error": "undo token expired"}
151
+
152
+ original_retracted_tx = row["retracted_tx"]
153
+
154
+ store = TripleStore(db_path)
155
+ tx_id = store.begin_tx(source="web-ui-restore",
156
+ metadata={"undo_token": undo_token,
157
+ "reverses_tx": original_retracted_tx})
158
+
159
+ # Un-retract: flip retracted=0 on triples that were closed by the original
160
+ # retraction tx. Avoids creating duplicate triples — the originals come back
161
+ # with their original tx_ids and created_at intact, preserving bi-temporal
162
+ # history.
163
+ cur = store._conn.execute(
164
+ """UPDATE triples SET retracted = 0, retracted_tx = NULL, valid_to = NULL
165
+ WHERE entity_id = ? AND retracted_tx = ?""",
166
+ (fact_id, original_retracted_tx),
167
+ )
168
+ triples_restored = cur.rowcount
169
+
170
+ # Also retract the audit triples we wrote during retraction so they don't
171
+ # linger as active facts on the restored entity.
172
+ store.retract_triple(tx_id, fact_id, "retracted_reason")
173
+ store.retract_triple(tx_id, fact_id, "retracted_by")
174
+
175
+ store._conn.commit()
176
+ store.close()
177
+
178
+ # Mark consumed + log undo
179
+ conn.execute(
180
+ "UPDATE retraction_undo SET consumed_at = ? WHERE token = ?",
181
+ (int(time.time() * 1000), undo_token),
182
+ )
183
+ conn.execute(
184
+ """UPDATE retraction_log SET undone_at = ?
185
+ WHERE rowid = (
186
+ SELECT rowid FROM retraction_log
187
+ WHERE fact_id = ? AND undone_at IS NULL
188
+ ORDER BY ts DESC LIMIT 1
189
+ )""",
190
+ (int(time.time() * 1000), fact_id),
191
+ )
192
+ conn.commit()
193
+ conn.close()
194
+
195
+ return {
196
+ "ok": True,
197
+ "fact_id": fact_id,
198
+ "restored": True,
199
+ "restored_tx": tx_id,
200
+ "triples_restored": triples_restored,
201
+ }
202
+
203
+
204
+ def main() -> None:
205
+ parser = argparse.ArgumentParser(description="Fact retraction / restore")
206
+ parser.add_argument("--db", required=True, help="Knowledge graph DB path")
207
+ parser.add_argument("--web-db", required=True, help="Web metadata DB path")
208
+ parser.add_argument("--fact-id", required=True, help="Fact entity id (e.g. fact:foo)")
209
+ mode = parser.add_mutually_exclusive_group(required=True)
210
+ mode.add_argument("--retract", action="store_true")
211
+ mode.add_argument("--restore", action="store_true")
212
+ parser.add_argument("--reason", default=None)
213
+ parser.add_argument("--actor", default=None)
214
+ parser.add_argument("--source-entity", default=None,
215
+ help="Entity page user was on when retracting (telemetry)")
216
+ parser.add_argument("--undo-token", default=None)
217
+ args = parser.parse_args()
218
+
219
+ if not Path(args.db).exists():
220
+ print(json.dumps({"ok": False, "error": f"db not found: {args.db}"}))
221
+ sys.exit(1)
222
+
223
+ if args.retract:
224
+ out = retract_fact(args.db, args.web_db, args.fact_id,
225
+ args.reason, args.actor, args.source_entity)
226
+ else:
227
+ if not args.undo_token:
228
+ print(json.dumps({"ok": False, "error": "--undo-token required for --restore"}))
229
+ sys.exit(1)
230
+ out = restore_fact(args.db, args.web_db, args.fact_id, args.undo_token)
231
+
232
+ print(json.dumps(out, ensure_ascii=False))
233
+
234
+
235
+ if __name__ == "__main__":
236
+ main()