scriptoria 0.4.0__tar.gz → 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {scriptoria-0.4.0 → scriptoria-0.5.0}/PKG-INFO +1 -1
  2. {scriptoria-0.4.0 → scriptoria-0.5.0}/pyproject.toml +1 -1
  3. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/__init__.py +1 -1
  4. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/anchors.py +27 -0
  5. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/cli.py +43 -3
  6. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/facts.py +97 -7
  7. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/query.py +21 -3
  8. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_fact_cmd.py +116 -0
  9. scriptoria-0.5.0/tests/test_query.py +109 -0
  10. scriptoria-0.5.0/tests/test_span_cmd.py +86 -0
  11. {scriptoria-0.4.0 → scriptoria-0.5.0}/uv.lock +1 -1
  12. scriptoria-0.4.0/tests/test_query.py +0 -58
  13. {scriptoria-0.4.0 → scriptoria-0.5.0}/.gitignore +0 -0
  14. {scriptoria-0.4.0 → scriptoria-0.5.0}/README.md +0 -0
  15. {scriptoria-0.4.0 → scriptoria-0.5.0}/pyrightconfig.json +0 -0
  16. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/blocks.py +0 -0
  17. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/embeddings.py +0 -0
  18. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/errors.py +0 -0
  19. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/frontmatter.py +0 -0
  20. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/graph.py +0 -0
  21. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/hashing.py +0 -0
  22. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/ingest.py +0 -0
  23. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/lock.py +0 -0
  24. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/manifest.py +0 -0
  25. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/retrieval.py +0 -0
  26. {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/similar.py +0 -0
  27. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/conftest.py +0 -0
  28. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_anchor_cmd.py +0 -0
  29. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_anchors.py +0 -0
  30. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_blocks.py +0 -0
  31. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_embeddings.py +0 -0
  32. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_graph_status.py +0 -0
  33. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_hashing.py +0 -0
  34. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_index_cmd.py +0 -0
  35. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_ingest.py +0 -0
  36. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_json_shapes.py +0 -0
  37. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_lock.py +0 -0
  38. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_manifest.py +0 -0
  39. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_new_cmd.py +0 -0
  40. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_retrieval.py +0 -0
  41. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_similar_cmd.py +0 -0
  42. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_stamp.py +0 -0
  43. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_status_cmd.py +0 -0
  44. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_unlock_cmd.py +0 -0
  45. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_verify.py +0 -0
  46. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_version.py +0 -0
  47. {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_watch.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: scriptoria
3
- Version: 0.4.0
3
+ Version: 0.5.0
4
4
  Summary: Deterministic scriptorium-keeper (the `scrip` CLI): staleness, provenance integrity, and fact queries for an agent-compiled knowledge base
5
5
  Project-URL: Homepage, https://github.com/coredipper/scriptorium
6
6
  Project-URL: Changelog, https://github.com/coredipper/scriptorium/blob/main/CHANGELOG.md
@@ -2,7 +2,7 @@
2
2
  # Distribution name on PyPI is `scriptoria` (scrip/scriptorium were taken); the
3
3
  # CLI command and the import package both remain `scrip`.
4
4
  name = "scriptoria"
5
- version = "0.4.0"
5
+ version = "0.5.0"
6
6
  description = "Deterministic scriptorium-keeper (the `scrip` CLI): staleness, provenance integrity, and fact queries for an agent-compiled knowledge base"
7
7
  readme = "README.md"
8
8
  requires-python = ">=3.10"
@@ -13,7 +13,7 @@ from __future__ import annotations
13
13
 
14
14
  from pathlib import Path
15
15
 
16
- __version__ = "0.4.0"
16
+ __version__ = "0.5.0"
17
17
 
18
18
  # --- canonical vault layout ------------------------------------------------
19
19
  # ``root`` is the repo/instance root: the directory containing ``vault/``.
@@ -90,6 +90,33 @@ def resolve(source_text: str, anchor: str) -> str:
90
90
  return "OK" if matches == 1 else "AMBIGUOUS"
91
91
 
92
92
 
93
+ def span(source_text: str, anchor: str) -> tuple[str, str | None]:
94
+ """Return ``(status, cited_text)`` for ``anchor`` in ``source_text``.
95
+
96
+ Same verdicts as :func:`resolve`, but also returns the matched span (the
97
+ normalized cited text) so a caller can *read* what an anchor cites. For
98
+ ``AMBIGUOUS`` the window nearest the anchor's ``loc`` hint is returned; for
99
+ ``BROKEN`` the text is ``None``.
100
+ """
101
+ a = parse_anchor(anchor)
102
+ ns = normalize(source_text)
103
+ n, target, loc = a["len"], a["qh"], a["loc"]
104
+ length = len(ns)
105
+ if n <= 0 or n > length:
106
+ return "BROKEN", None
107
+ hits = [
108
+ start
109
+ for start in range(length - n + 1)
110
+ if hashlib.sha256(ns[start : start + n].encode("utf-8")).hexdigest() == target
111
+ ]
112
+ if not hits:
113
+ return "BROKEN", None
114
+ if len(hits) == 1:
115
+ return "OK", ns[hits[0] : hits[0] + n]
116
+ nearest = min(hits, key=lambda s: abs(s - loc * length))
117
+ return "AMBIGUOUS", ns[nearest : nearest + n]
118
+
119
+
93
120
  # --------------------------------------------------------------------------- #
94
121
  # Vault-wide verification
95
122
  # --------------------------------------------------------------------------- #
@@ -352,6 +352,31 @@ def _parse_source_ids(raw: str) -> list[str]:
352
352
  return ids
353
353
 
354
354
 
355
+ def cmd_span(args: argparse.Namespace) -> int:
356
+ from . import anchors
357
+
358
+ root = resolve_root(args.root)
359
+ if args.claim:
360
+ from . import facts
361
+
362
+ source_id, anchor = facts.claim_source_anchor(root, args.claim)
363
+ else:
364
+ if "#" not in args.target:
365
+ raise errors.UsageError("target must be raw/<slug>#<anchor>")
366
+ source_id, anchor = args.target.split("#", 1)
367
+ source_id = source_id if source_id.startswith("raw/") else f"raw/{source_id}"
368
+ _safe_slug(source_id[len("raw/") :], "source")
369
+ text = anchors.source_text(root, source_id)
370
+ status, cited = anchors.span(text, anchor)
371
+ if args.json:
372
+ _emit({"target": f"{source_id}#{anchor}", "status": status, "text": cited})
373
+ else:
374
+ print(f"[{status}] {source_id}")
375
+ if cited is not None:
376
+ print(cited)
377
+ return 0 if status == "OK" else 1
378
+
379
+
355
380
  def cmd_similar(args: argparse.Namespace) -> int:
356
381
  from . import similar
357
382
 
@@ -388,7 +413,12 @@ def cmd_fact_add(args: argparse.Namespace) -> int:
388
413
  _emit(result)
389
414
  else:
390
415
  for r in result["appended"]:
391
- ident = r.get("claim_id") or r.get("entity_id") or f"{r['src']} -> {r['dst']}"
416
+ ident = (
417
+ r.get("claim_id")
418
+ or r.get("entity_id")
419
+ or r.get("reconciliation_id")
420
+ or f"{r.get('src')} -> {r.get('dst')}"
421
+ )
392
422
  print(f" appended {ident}")
393
423
  for s in result["skipped"]:
394
424
  print(f" = record {s['index']} skipped (duplicate)")
@@ -517,7 +547,7 @@ def build_parser() -> argparse.ArgumentParser:
517
547
  pq.add_argument(
518
548
  "name",
519
549
  nargs="?",
520
- choices=["claims", "entities", "edges", "contradictions"],
550
+ choices=["claims", "entities", "edges", "contradictions", "reconciliations"],
521
551
  help="a named query (omit when using --sql)",
522
552
  )
523
553
  pq.add_argument("--sql", help="raw DuckDB SQL (views: claims, entities, edges)")
@@ -585,6 +615,16 @@ def build_parser() -> argparse.ArgumentParser:
585
615
  pn.add_argument("--title", help="human title (default: the slug)")
586
616
  pn.set_defaults(func=cmd_new)
587
617
 
618
+ psp = sub.add_parser(
619
+ "span",
620
+ parents=[common],
621
+ help="resolve an anchor and print the cited text (read both sides of a contradiction)",
622
+ )
623
+ span_src = psp.add_mutually_exclusive_group(required=True)
624
+ span_src.add_argument("target", nargs="?", metavar="raw/<slug>#<anchor>", help="anchor target")
625
+ span_src.add_argument("--claim", metavar="ID", help="resolve this claim's anchor instead")
626
+ psp.set_defaults(func=cmd_span)
627
+
588
628
  psim = sub.add_parser(
589
629
  "similar",
590
630
  parents=[common],
@@ -629,7 +669,7 @@ def build_parser() -> argparse.ArgumentParser:
629
669
  )
630
670
  pfa.add_argument(
631
671
  "--table",
632
- choices=["claims", "entities", "edges"],
672
+ choices=["claims", "entities", "edges", "reconciliations"],
633
673
  default="claims",
634
674
  help="facts table to append to (default: claims)",
635
675
  )
@@ -36,10 +36,12 @@ _FILES = {
36
36
  "claims": "claims.ndjson",
37
37
  "entities": "entities.ndjson",
38
38
  "edges": "graph.ndjson",
39
+ "reconciliations": "reconciliations.ndjson",
39
40
  }
40
41
 
41
42
  # Fields scrip mints itself; proposing them is a schema error, not a finding.
42
43
  _SCRIP_OWNED = ("claim_id", "anchor", "extracted_at")
44
+ _RECON_OWNED = ("reconciliation_id", "at")
43
45
 
44
46
  _CLAIM_REQUIRED = ("quote", "source_id", "subject", "predicate", "object", "polarity", "confidence")
45
47
  _CLAIM_ALLOWED = frozenset((*_CLAIM_REQUIRED, "claim_text", "tags"))
@@ -47,12 +49,16 @@ _ENTITY_REQUIRED = ("entity_id", "name", "kind")
47
49
  _ENTITY_ALLOWED = frozenset((*_ENTITY_REQUIRED, "tags"))
48
50
  _EDGE_REQUIRED = ("src", "dst", "kind")
49
51
  _EDGE_ALLOWED = frozenset(_EDGE_REQUIRED)
52
+ _DECISIONS = ("supersede", "qualify", "keep-both")
53
+ _RECON_REQUIRED = ("decision", "claim_a", "claim_b")
54
+ _RECON_ALLOWED = frozenset((*_RECON_REQUIRED, "winner", "rationale"))
50
55
 
51
56
  # Same conservative shape ``cli._safe_slug`` enforces — no path separators,
52
57
  # '..', or leading dot — applied to source ids arriving as record *data*.
53
58
  _SLUG_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
54
59
 
55
60
  _CLAIM_ID_RE = re.compile(r"clm_(\d+)")
61
+ _RECON_ID_RE = re.compile(r"rec_(\d+)")
56
62
 
57
63
 
58
64
  def _now() -> str:
@@ -96,12 +102,18 @@ def _check_tags(rec: dict, index: int) -> None:
96
102
  raise DataError(f"record {index}: 'tags' must be a list of strings")
97
103
 
98
104
 
99
- def _check_shape(rec: dict, index: int, required: tuple[str, ...], allowed: frozenset[str]) -> None:
100
- owned = [k for k in _SCRIP_OWNED if k in rec]
101
- if owned:
105
+ def _check_shape(
106
+ rec: dict,
107
+ index: int,
108
+ required: tuple[str, ...],
109
+ allowed: frozenset[str],
110
+ owned: tuple[str, ...] = _SCRIP_OWNED,
111
+ ) -> None:
112
+ present = [k for k in owned if k in rec]
113
+ if present:
102
114
  raise DataError(
103
- f"record {index}: scrip mints {', '.join(owned)} itself — propose a "
104
- f"verbatim 'quote', not precomputed ids/anchors/timestamps"
115
+ f"record {index}: scrip mints {', '.join(present)} itself — do not "
116
+ f"propose precomputed ids/anchors/timestamps"
105
117
  )
106
118
  unknown = sorted(rec.keys() - allowed)
107
119
  if unknown:
@@ -136,10 +148,27 @@ def _validate(table: str, rec: dict, index: int) -> None:
136
148
  if not (eid.startswith("entity/") and _SLUG_RE.fullmatch(eid[len("entity/") :])):
137
149
  raise DataError(f"record {index}: entity_id must look like entity/<slug>")
138
150
  _check_tags(rec, index)
139
- else: # edges
151
+ elif table == "edges":
140
152
  _check_shape(rec, index, _EDGE_REQUIRED, _EDGE_ALLOWED)
141
153
  for key in _EDGE_REQUIRED:
142
154
  _check_str(rec, key, index)
155
+ else: # reconciliations
156
+ _check_shape(rec, index, _RECON_REQUIRED, _RECON_ALLOWED, owned=_RECON_OWNED)
157
+ for key in ("decision", "claim_a", "claim_b"):
158
+ _check_str(rec, key, index)
159
+ if rec["decision"] not in _DECISIONS:
160
+ raise DataError(f"record {index}: decision must be one of {', '.join(_DECISIONS)}")
161
+ if "rationale" in rec:
162
+ _check_str(rec, "rationale", index, allow_blank=True)
163
+ # winner is required for (and only for) supersede, and must be one of the pair
164
+ if rec["decision"] == "supersede":
165
+ winner = rec.get("winner")
166
+ if winner not in (rec["claim_a"], rec["claim_b"]):
167
+ raise DataError(
168
+ f"record {index}: supersede needs 'winner' = claim_a or claim_b"
169
+ )
170
+ elif "winner" in rec:
171
+ raise DataError(f"record {index}: 'winner' is only valid for decision 'supersede'")
143
172
 
144
173
 
145
174
  # --------------------------------------------------------------------------- #
@@ -218,6 +247,19 @@ def _read_table(path: Path) -> tuple[list[dict], str]:
218
247
  return records, text
219
248
 
220
249
 
250
+ def claim_source_anchor(root: Path, claim_id: str) -> tuple[str, str]:
251
+ """Return ``(source_id, anchor)`` for a claim, for `scrip span --claim`.
252
+ Raises :class:`DataError` if the claim is missing or lacks the fields."""
253
+ records, _ = _read_table(facts_dir(root) / "claims.ndjson")
254
+ for rec in records:
255
+ if rec.get("claim_id") == claim_id:
256
+ sid, anchor = rec.get("source_id"), rec.get("anchor")
257
+ if not isinstance(sid, str) or not isinstance(anchor, str):
258
+ raise DataError(f"claim {claim_id} is missing source_id/anchor")
259
+ return sid, anchor
260
+ raise DataError(f"no such claim: {claim_id}")
261
+
262
+
221
263
  def _claim_key(source_id: str, qh: str, rec: dict) -> tuple:
222
264
  return (
223
265
  source_id,
@@ -251,6 +293,17 @@ def _next_claim_id(existing: list[dict]) -> tuple[int, int]:
251
293
  return highest + 1, max(4, len(str(highest)))
252
294
 
253
295
 
296
+ def _next_recon_id(existing: list[dict]) -> tuple[int, int]:
297
+ """Return ``(next_number, pad_width)`` continuing the ``rec_NNNN`` sequence."""
298
+ numbers = [
299
+ int(m.group(1))
300
+ for rec in existing
301
+ if (m := _RECON_ID_RE.fullmatch(str(rec.get("reconciliation_id", ""))))
302
+ ]
303
+ highest = max(numbers, default=0)
304
+ return highest + 1, max(4, len(str(highest)))
305
+
306
+
254
307
  # --------------------------------------------------------------------------- #
255
308
  # facts/_meta.yaml: merge derived-from, never stamp
256
309
  # --------------------------------------------------------------------------- #
@@ -394,7 +447,7 @@ def add(root: Path, table: str, proposals: list[dict]) -> dict:
394
447
  "detail": "an entity with this id already exists with different fields",
395
448
  }
396
449
  )
397
- else: # edges
450
+ elif table == "edges":
398
451
  seen_edges = {
399
452
  (rec.get("src"), rec.get("dst"), rec.get("kind")) for rec in existing
400
453
  }
@@ -405,6 +458,43 @@ def add(root: Path, table: str, proposals: list[dict]) -> dict:
405
458
  continue
406
459
  seen_edges.add(key)
407
460
  appended.append({"src": rec["src"], "dst": rec["dst"], "kind": rec["kind"]})
461
+ else: # reconciliations
462
+ claim_ids = {c.get("claim_id") for c in _read_table(facts_dir(root) / "claims.ndjson")[0]}
463
+ for i, rec in enumerate(proposals):
464
+ refs = [rec["claim_a"], rec["claim_b"]]
465
+ if rec["decision"] == "supersede":
466
+ refs.append(rec["winner"])
467
+ missing = next((r for r in refs if r not in claim_ids), None)
468
+ if missing is not None:
469
+ failures.append({
470
+ "index": i, "status": "MISSING_CLAIM", "claim": missing,
471
+ "detail": f"{missing!r} is not a claim in claims.ndjson",
472
+ })
473
+ if failures:
474
+ return {"table": table, "appended": [], "skipped": [], "failures": failures}
475
+ seen_pairs = {frozenset((r.get("claim_a"), r.get("claim_b"))) for r in existing}
476
+ number, width = _next_recon_id(existing)
477
+ now = _now()
478
+ for i, rec in enumerate(proposals):
479
+ pair = frozenset((rec["claim_a"], rec["claim_b"]))
480
+ if pair in seen_pairs:
481
+ skipped.append({"index": i, "reason": "duplicate", "existing_id": None})
482
+ continue
483
+ seen_pairs.add(pair)
484
+ rid = f"rec_{number:0{width}d}"
485
+ number += 1
486
+ full = {
487
+ "reconciliation_id": rid,
488
+ "decision": rec["decision"],
489
+ "claim_a": rec["claim_a"],
490
+ "claim_b": rec["claim_b"],
491
+ }
492
+ if rec["decision"] == "supersede":
493
+ full["winner"] = rec["winner"]
494
+ if rec.get("rationale"):
495
+ full["rationale"] = rec["rationale"]
496
+ full["at"] = now
497
+ appended.append(full)
408
498
 
409
499
  if failures:
410
500
  return {"table": table, "appended": [], "skipped": skipped, "failures": failures}
@@ -22,15 +22,18 @@ _VIEWS = {
22
22
  "claims": "claims.ndjson",
23
23
  "entities": "entities.ndjson",
24
24
  "edges": "graph.ndjson",
25
+ "reconciliations": "reconciliations.ndjson",
25
26
  }
26
27
 
27
28
  _NAMED = {
28
29
  "claims": "SELECT * FROM claims",
29
30
  "entities": "SELECT * FROM entities",
30
31
  "edges": "SELECT * FROM edges",
32
+ "reconciliations": "SELECT * FROM reconciliations",
31
33
  # contradiction *candidates*: same subject+predicate, opposing polarity,
32
- # from different sources. Detection is deterministic; adjudication is the
33
- # agent's job.
34
+ # from different sources, AND not yet adjudicated (no reconciliation record
35
+ # for the pair, either order) — so RECONCILE makes the set converge.
36
+ # Detection is deterministic; adjudication is the agent's job.
34
37
  "contradictions": """
35
38
  SELECT a.claim_id AS claim_a, b.claim_id AS claim_b,
36
39
  a.subject, a.predicate,
@@ -41,10 +44,15 @@ _NAMED = {
41
44
  WHERE a.polarity = 'asserts'
42
45
  AND b.polarity = 'denies'
43
46
  AND a.source_id <> b.source_id
47
+ AND NOT EXISTS (
48
+ SELECT 1 FROM reconciliations r
49
+ WHERE (r.claim_a = a.claim_id AND r.claim_b = b.claim_id)
50
+ OR (r.claim_a = b.claim_id AND r.claim_b = a.claim_id)
51
+ )
44
52
  """,
45
53
  }
46
54
 
47
- _FILTERABLE = {"claims", "entities", "edges"}
55
+ _FILTERABLE = {"claims", "entities", "edges", "reconciliations"}
48
56
 
49
57
 
50
58
  def _connect(root: Path) -> duckdb.DuckDBPyConnection:
@@ -57,6 +65,16 @@ def _connect(root: Path) -> duckdb.DuckDBPyConnection:
57
65
  f"CREATE VIEW {view} AS "
58
66
  f"SELECT * FROM read_ndjson_auto('{p.as_posix()}')"
59
67
  )
68
+ elif view == "reconciliations":
69
+ # Always present (empty stub) so `contradictions` can anti-join it and
70
+ # raw SQL over its columns works even before any reconciliation exists.
71
+ con.execute(
72
+ "CREATE VIEW reconciliations AS SELECT "
73
+ "NULL::VARCHAR AS reconciliation_id, NULL::VARCHAR AS decision, "
74
+ "NULL::VARCHAR AS claim_a, NULL::VARCHAR AS claim_b, "
75
+ "NULL::VARCHAR AS winner, NULL::VARCHAR AS rationale, "
76
+ "NULL::VARCHAR AS at WHERE FALSE"
77
+ )
60
78
  return con
61
79
 
62
80
 
@@ -48,6 +48,122 @@ def _claims_lines(kb):
48
48
  return [json.loads(s) for s in p.read_text(encoding="utf-8").splitlines() if s.strip()]
49
49
 
50
50
 
51
+ def _recs_lines(kb):
52
+ p = kb.root / "vault" / "facts" / "reconciliations.ndjson"
53
+ if not p.exists():
54
+ return []
55
+ return [json.loads(s) for s in p.read_text(encoding="utf-8").splitlines() if s.strip()]
56
+
57
+
58
+ def _two_claims(kb):
59
+ """Seed a contradiction pair to reconcile."""
60
+ kb.add_raw("s", SRC)
61
+ kb.add_claim("clm_0001", "s", "The quick brown fox jumps over the lazy dog.",
62
+ subject="chunking", predicate="discards", polarity="asserts")
63
+ kb.add_claim("clm_0002", "s", "Caching answers beats recomputing them.",
64
+ subject="chunking", predicate="discards", polarity="denies")
65
+
66
+
67
+ def _recon(decision, **kw):
68
+ rec = {"decision": decision, "claim_a": "clm_0001", "claim_b": "clm_0002"}
69
+ rec.update(kw)
70
+ return rec
71
+
72
+
73
+ # --------------------------------------------------------------------------- #
74
+ # Reconciliations table
75
+ # --------------------------------------------------------------------------- #
76
+ def test_fact_add_reconciliation_supersede(kb, capsys):
77
+ _two_claims(kb)
78
+ rc = _run_add(
79
+ kb,
80
+ _ndjson(_recon("supersede", winner="clm_0001", rationale="newer source wins")),
81
+ "--table", "reconciliations", "--json",
82
+ )
83
+ assert rc == 0
84
+ [rec] = _recs_lines(kb)
85
+ assert rec["reconciliation_id"] == "rec_0001"
86
+ assert rec["decision"] == "supersede"
87
+ assert rec["winner"] == "clm_0001"
88
+ assert rec["claim_a"] == "clm_0001" and rec["claim_b"] == "clm_0002"
89
+ assert rec["rationale"] == "newer source wins"
90
+ assert ISO_Z.fullmatch(rec["at"])
91
+ assert json.loads(capsys.readouterr().out)["appended"][0]["reconciliation_id"] == "rec_0001"
92
+
93
+
94
+ def test_fact_add_reconciliation_qualify_and_keep_both(kb):
95
+ _two_claims(kb)
96
+ kb.add_claim("clm_0003", "s", "Caching answers beats recomputing them.", subject="x")
97
+ assert _run_add(kb, _ndjson(_recon("qualify")), "--table", "reconciliations") == 0
98
+ # a *different* pair so dedup doesn't skip it
99
+ assert _run_add(kb, _ndjson(_recon("keep-both", claim_b="clm_0003")),
100
+ "--table", "reconciliations") == 0
101
+ recs = _recs_lines(kb)
102
+ assert [r["decision"] for r in recs] == ["qualify", "keep-both"]
103
+ assert [r["reconciliation_id"] for r in recs] == ["rec_0001", "rec_0002"]
104
+ assert all("winner" not in r for r in recs) # winner only for supersede
105
+
106
+
107
+ def test_fact_add_reconciliation_supersede_requires_valid_winner(kb):
108
+ _two_claims(kb)
109
+ assert _run_add(kb, _ndjson(_recon("supersede")), "--table", "reconciliations") == 3 # no winner
110
+ assert _run_add(kb, _ndjson(_recon("supersede", winner="clm_9999")),
111
+ "--table", "reconciliations") == 3 # winner not in pair
112
+
113
+
114
+ def test_fact_add_reconciliation_winner_forbidden_unless_supersede(kb):
115
+ _two_claims(kb)
116
+ assert _run_add(kb, _ndjson(_recon("qualify", winner="clm_0001")),
117
+ "--table", "reconciliations") == 3
118
+
119
+
120
+ def test_fact_add_reconciliation_bad_decision_is_data_error(kb):
121
+ _two_claims(kb)
122
+ assert _run_add(kb, _ndjson(_recon("ignore-it")), "--table", "reconciliations") == 3
123
+
124
+
125
+ def test_fact_add_reconciliation_rejects_minted_fields(kb):
126
+ _two_claims(kb)
127
+ assert _run_add(kb, _ndjson(_recon("qualify", reconciliation_id="rec_0001")),
128
+ "--table", "reconciliations") == 3
129
+ assert _run_add(kb, _ndjson(_recon("qualify", at="2026-01-01T00:00:00Z")),
130
+ "--table", "reconciliations") == 3
131
+
132
+
133
+ def test_fact_add_reconciliation_missing_claim_fails(kb, capsys):
134
+ _two_claims(kb)
135
+ rc = _run_add(kb, _ndjson(_recon("qualify", claim_b="clm_9999")),
136
+ "--table", "reconciliations", "--json")
137
+ assert rc == 1
138
+ assert _recs_lines(kb) == []
139
+ [failure] = json.loads(capsys.readouterr().out)["failures"]
140
+ assert failure["status"] == "MISSING_CLAIM"
141
+
142
+
143
+ def test_fact_add_reconciliation_dedups_unordered_pair(kb, capsys):
144
+ _two_claims(kb)
145
+ assert _run_add(kb, _ndjson(_recon("qualify")), "--table", "reconciliations") == 0
146
+ capsys.readouterr()
147
+ # same pair, reversed order → already adjudicated → skipped, not re-appended
148
+ rc = _run_add(
149
+ kb,
150
+ _ndjson({"decision": "supersede", "claim_a": "clm_0002", "claim_b": "clm_0001", "winner": "clm_0002"}),
151
+ "--table", "reconciliations", "--json",
152
+ )
153
+ assert rc == 0
154
+ assert len(_recs_lines(kb)) == 1
155
+ assert json.loads(capsys.readouterr().out)["skipped"][0]["reason"] == "duplicate"
156
+
157
+
158
+ def test_fact_add_reconciliation_id_sequencing(kb):
159
+ _two_claims(kb)
160
+ kb.add_claim("clm_0003", "s", "The quick brown fox jumps over the lazy dog.", subject="x")
161
+ assert _run_add(kb, _ndjson(_recon("qualify")), "--table", "reconciliations") == 0
162
+ assert _run_add(kb, _ndjson(_recon("keep-both", claim_a="clm_0001", claim_b="clm_0003")),
163
+ "--table", "reconciliations") == 0
164
+ assert [r["reconciliation_id"] for r in _recs_lines(kb)] == ["rec_0001", "rec_0002"]
165
+
166
+
51
167
  # --------------------------------------------------------------------------- #
52
168
  # Happy path
53
169
  # --------------------------------------------------------------------------- #
@@ -0,0 +1,109 @@
1
+ import pytest
2
+ from scrip.errors import UsageError
3
+
4
+ from scrip import cli, query
5
+
6
+
7
+ def test_named_claims_query(kb):
8
+ kb.add_raw("a", "# A\n\nThe sky is blue.\n")
9
+ kb.add_claim("clm_1", "a", "the sky is blue", subject="sky", tags=["color"])
10
+ cols, rows = query.run(kb.root, name="claims")
11
+ assert "subject" in cols
12
+ assert any(r["claim_id"] == "clm_1" for r in rows)
13
+
14
+
15
+ def test_raw_sql_aggregate(kb):
16
+ kb.add_raw("a", "# A\n\nThe sky is blue.\n")
17
+ kb.add_claim("clm_1", "a", "the sky is blue")
18
+ kb.add_claim("clm_2", "a", "the sky is blue")
19
+ cols, rows = query.run(kb.root, sql="SELECT count(*) AS n FROM claims")
20
+ assert rows[0]["n"] == 2
21
+
22
+
23
+ def test_contradictions_detected(kb):
24
+ kb.add_raw("a", "# A\n\nThe sky is blue.\n")
25
+ kb.add_raw("b", "# B\n\nThe sky is not blue.\n")
26
+ kb.add_claim(
27
+ "clm_1", "a", "the sky is blue", subject="sky", predicate="color",
28
+ polarity="asserts",
29
+ )
30
+ kb.add_claim(
31
+ "clm_2", "b", "the sky is not blue", subject="sky", predicate="color",
32
+ polarity="denies",
33
+ )
34
+ cols, rows = query.run(kb.root, name="contradictions")
35
+ assert len(rows) == 1
36
+ assert rows[0]["subject"] == "sky"
37
+ assert {rows[0]["source_a"], rows[0]["source_b"]} == {"raw/a", "raw/b"}
38
+
39
+
40
+ def _contradiction_pair(kb):
41
+ kb.add_raw("a", "# A\n\nThe sky is blue.\n")
42
+ kb.add_raw("b", "# B\n\nThe sky is not blue.\n")
43
+ kb.add_claim("clm_1", "a", "the sky is blue", subject="sky", predicate="color", polarity="asserts")
44
+ kb.add_claim("clm_2", "b", "the sky is not blue", subject="sky", predicate="color", polarity="denies")
45
+
46
+
47
+ def test_contradictions_excludes_reconciled_pairs(kb):
48
+ _contradiction_pair(kb)
49
+ assert len(query.run(kb.root, name="contradictions")[1]) == 1
50
+ # record a reconciliation for that pair (reversed order, to test symmetry)
51
+ (kb.root / "vault" / "facts" / "reconciliations.ndjson").write_text(
52
+ '{"reconciliation_id":"rec_0001","decision":"supersede","claim_a":"clm_2",'
53
+ '"claim_b":"clm_1","winner":"clm_2","at":"2026-01-01T00:00:00Z"}\n',
54
+ encoding="utf-8",
55
+ )
56
+ assert query.run(kb.root, name="contradictions")[1] == [] # adjudicated → gone
57
+
58
+
59
+ def test_reconciliations_named_query(kb):
60
+ _contradiction_pair(kb)
61
+ (kb.root / "vault" / "facts" / "reconciliations.ndjson").write_text(
62
+ '{"reconciliation_id":"rec_0001","decision":"keep-both","claim_a":"clm_1",'
63
+ '"claim_b":"clm_2","at":"2026-01-01T00:00:00Z"}\n',
64
+ encoding="utf-8",
65
+ )
66
+ cols, rows = query.run(kb.root, name="reconciliations")
67
+ assert rows[0]["reconciliation_id"] == "rec_0001"
68
+ assert rows[0]["decision"] == "keep-both"
69
+
70
+
71
+ def test_contradictions_works_without_reconciliations_file(kb):
72
+ # the reconciliations view is an empty stub when the file is absent
73
+ _contradiction_pair(kb)
74
+ assert len(query.run(kb.root, name="contradictions")[1]) == 1
75
+
76
+
77
+ def test_query_reconciliations_cli_choice(kb):
78
+ # the named query must be a valid CLI `query` choice, not just in query.run
79
+ _contradiction_pair(kb)
80
+ assert cli.main(["query", "reconciliations", "--json", "--root", str(kb.root)]) == 0
81
+
82
+
83
+ def test_reconciliations_stub_exposes_full_schema(kb):
84
+ # before the file exists, raw SQL over the stub's columns must still work
85
+ _contradiction_pair(kb)
86
+ cols, rows = query.run(kb.root, sql="SELECT decision, winner, reconciliation_id FROM reconciliations")
87
+ assert rows == []
88
+ assert "decision" in cols
89
+
90
+
91
+ def test_where_and_limit(kb):
92
+ kb.add_raw("a", "# A\n\nThe sky is blue.\n")
93
+ kb.add_claim("clm_1", "a", "the sky is blue")
94
+ kb.add_claim("clm_2", "a", "the sky is blue")
95
+ cols, rows = query.run(kb.root, name="claims", where="claim_id = 'clm_1'", limit=10)
96
+ assert len(rows) == 1
97
+ assert rows[0]["claim_id"] == "clm_1"
98
+
99
+
100
+ def test_where_rejects_statement_chaining(kb):
101
+ kb.add_raw("a", "# A\n\nThe sky is blue.\n")
102
+ kb.add_claim("clm_1", "a", "the sky is blue")
103
+ with pytest.raises(UsageError):
104
+ query.run(kb.root, name="claims", where="1=1; DROP TABLE claims")
105
+
106
+
107
+ def test_unknown_named_query_is_usage_error(kb):
108
+ with pytest.raises(UsageError):
109
+ query.run(kb.root, name="bogus")
@@ -0,0 +1,86 @@
1
+ """`scrip span` — resolve an anchor and print the cited text. Read-only; lets an
2
+ agent read both sides of a contradiction (RECONCILE) without re-implementing
3
+ anchor resolution."""
4
+
5
+ import json
6
+
7
+ import pytest
8
+
9
+ from scrip import anchors, cli
10
+
11
+ SRC = "# H\n\nThe quick brown fox jumps over the lazy dog.\n\nalpha beta. alpha beta.\n"
12
+
13
+
14
+ def test_span_prints_cited_text(kb, capsys):
15
+ kb.add_raw("s", SRC)
16
+ anchor = anchors.make_anchor(SRC, "The quick brown fox jumps over the lazy dog.")
17
+ rc = cli.main(["span", f"raw/s#{anchor}", "--root", str(kb.root)])
18
+ assert rc == 0
19
+ # the cited span is shown (normalized: lowercased, whitespace-collapsed)
20
+ assert "the quick brown fox jumps over the lazy dog." in capsys.readouterr().out.lower()
21
+
22
+
23
+ def test_span_json_shape(kb, capsys):
24
+ kb.add_raw("s", SRC)
25
+ anchor = anchors.make_anchor(SRC, "The quick brown fox jumps over the lazy dog.")
26
+ rc = cli.main(["span", f"raw/s#{anchor}", "--json", "--root", str(kb.root)])
27
+ assert rc == 0
28
+ data = json.loads(capsys.readouterr().out)
29
+ assert set(data) == {"target", "status", "text"}
30
+ assert data["status"] == "OK"
31
+ assert "quick brown fox" in data["text"]
32
+
33
+
34
+ def test_span_by_claim_id(kb, capsys):
35
+ kb.add_raw("s", SRC)
36
+ kb.add_claim("clm_0001", "s", "The quick brown fox jumps over the lazy dog.")
37
+ rc = cli.main(["span", "--claim", "clm_0001", "--json", "--root", str(kb.root)])
38
+ assert rc == 0
39
+ data = json.loads(capsys.readouterr().out)
40
+ assert data["status"] == "OK"
41
+ assert "quick brown fox" in data["text"]
42
+
43
+
44
+ def test_span_ambiguous_exits_1(kb, capsys):
45
+ kb.add_raw("s", SRC)
46
+ anchor = anchors.make_anchor(SRC, "alpha beta.") # appears twice
47
+ rc = cli.main(["span", f"raw/s#{anchor}", "--json", "--root", str(kb.root)])
48
+ assert rc == 1
49
+ assert json.loads(capsys.readouterr().out)["status"] == "AMBIGUOUS"
50
+
51
+
52
+ def test_span_broken_exits_1(kb, capsys):
53
+ kb.add_raw("s", SRC)
54
+ # a well-formed anchor whose quote is absent from the source
55
+ anchor = anchors.make_anchor("a totally different document about cats", "totally different")
56
+ rc = cli.main(["span", f"raw/s#{anchor}", "--json", "--root", str(kb.root)])
57
+ assert rc == 1
58
+ assert json.loads(capsys.readouterr().out)["status"] == "BROKEN"
59
+
60
+
61
+ def test_span_unsafe_source_is_usage_error(kb):
62
+ kb.add_raw("s", SRC)
63
+ anchor = anchors.make_anchor(SRC, "The quick brown fox jumps over the lazy dog.")
64
+ assert cli.main(["span", f"../../etc/passwd#{anchor}", "--root", str(kb.root)]) == 2
65
+
66
+
67
+ def test_span_missing_source_is_data_error(kb):
68
+ kb.add_raw("s", SRC)
69
+ anchor = anchors.make_anchor(SRC, "The quick brown fox jumps over the lazy dog.")
70
+ assert cli.main(["span", f"raw/absent#{anchor}", "--root", str(kb.root)]) == 3
71
+
72
+
73
+ def test_span_unknown_claim_is_data_error(kb):
74
+ kb.add_raw("s", SRC)
75
+ assert cli.main(["span", "--claim", "clm_9999", "--root", str(kb.root)]) == 3
76
+
77
+
78
+ def test_span_requires_a_target(kb):
79
+ with pytest.raises(SystemExit) as e:
80
+ cli.main(["span", "--root", str(kb.root)])
81
+ assert e.value.code == 2
82
+
83
+
84
+ def test_span_target_without_anchor_is_usage_error(kb):
85
+ kb.add_raw("s", SRC)
86
+ assert cli.main(["span", "raw/s", "--root", str(kb.root)]) == 2 # no '#<anchor>'
@@ -1141,7 +1141,7 @@ wheels = [
1141
1141
 
1142
1142
  [[package]]
1143
1143
  name = "scriptoria"
1144
- version = "0.4.0"
1144
+ version = "0.5.0"
1145
1145
  source = { editable = "." }
1146
1146
  dependencies = [
1147
1147
  { name = "duckdb" },
@@ -1,58 +0,0 @@
1
- import pytest
2
- from scrip.errors import UsageError
3
-
4
- from scrip import query
5
-
6
-
7
- def test_named_claims_query(kb):
8
- kb.add_raw("a", "# A\n\nThe sky is blue.\n")
9
- kb.add_claim("clm_1", "a", "the sky is blue", subject="sky", tags=["color"])
10
- cols, rows = query.run(kb.root, name="claims")
11
- assert "subject" in cols
12
- assert any(r["claim_id"] == "clm_1" for r in rows)
13
-
14
-
15
- def test_raw_sql_aggregate(kb):
16
- kb.add_raw("a", "# A\n\nThe sky is blue.\n")
17
- kb.add_claim("clm_1", "a", "the sky is blue")
18
- kb.add_claim("clm_2", "a", "the sky is blue")
19
- cols, rows = query.run(kb.root, sql="SELECT count(*) AS n FROM claims")
20
- assert rows[0]["n"] == 2
21
-
22
-
23
- def test_contradictions_detected(kb):
24
- kb.add_raw("a", "# A\n\nThe sky is blue.\n")
25
- kb.add_raw("b", "# B\n\nThe sky is not blue.\n")
26
- kb.add_claim(
27
- "clm_1", "a", "the sky is blue", subject="sky", predicate="color",
28
- polarity="asserts",
29
- )
30
- kb.add_claim(
31
- "clm_2", "b", "the sky is not blue", subject="sky", predicate="color",
32
- polarity="denies",
33
- )
34
- cols, rows = query.run(kb.root, name="contradictions")
35
- assert len(rows) == 1
36
- assert rows[0]["subject"] == "sky"
37
- assert {rows[0]["source_a"], rows[0]["source_b"]} == {"raw/a", "raw/b"}
38
-
39
-
40
- def test_where_and_limit(kb):
41
- kb.add_raw("a", "# A\n\nThe sky is blue.\n")
42
- kb.add_claim("clm_1", "a", "the sky is blue")
43
- kb.add_claim("clm_2", "a", "the sky is blue")
44
- cols, rows = query.run(kb.root, name="claims", where="claim_id = 'clm_1'", limit=10)
45
- assert len(rows) == 1
46
- assert rows[0]["claim_id"] == "clm_1"
47
-
48
-
49
- def test_where_rejects_statement_chaining(kb):
50
- kb.add_raw("a", "# A\n\nThe sky is blue.\n")
51
- kb.add_claim("clm_1", "a", "the sky is blue")
52
- with pytest.raises(UsageError):
53
- query.run(kb.root, name="claims", where="1=1; DROP TABLE claims")
54
-
55
-
56
- def test_unknown_named_query_is_usage_error(kb):
57
- with pytest.raises(UsageError):
58
- query.run(kb.root, name="bogus")
File without changes
File without changes
File without changes
File without changes