scriptoria 0.4.0__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {scriptoria-0.4.0 → scriptoria-0.5.0}/PKG-INFO +1 -1
- {scriptoria-0.4.0 → scriptoria-0.5.0}/pyproject.toml +1 -1
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/__init__.py +1 -1
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/anchors.py +27 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/cli.py +43 -3
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/facts.py +97 -7
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/query.py +21 -3
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_fact_cmd.py +116 -0
- scriptoria-0.5.0/tests/test_query.py +109 -0
- scriptoria-0.5.0/tests/test_span_cmd.py +86 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/uv.lock +1 -1
- scriptoria-0.4.0/tests/test_query.py +0 -58
- {scriptoria-0.4.0 → scriptoria-0.5.0}/.gitignore +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/README.md +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/pyrightconfig.json +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/blocks.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/embeddings.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/errors.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/frontmatter.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/graph.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/hashing.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/ingest.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/lock.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/manifest.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/retrieval.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/src/scrip/similar.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/conftest.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_anchor_cmd.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_anchors.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_blocks.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_embeddings.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_graph_status.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_hashing.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_index_cmd.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_ingest.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_json_shapes.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_lock.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_manifest.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_new_cmd.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_retrieval.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_similar_cmd.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_stamp.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_status_cmd.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_unlock_cmd.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_verify.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_version.py +0 -0
- {scriptoria-0.4.0 → scriptoria-0.5.0}/tests/test_watch.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: scriptoria
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: Deterministic scriptorium-keeper (the `scrip` CLI): staleness, provenance integrity, and fact queries for an agent-compiled knowledge base
|
|
5
5
|
Project-URL: Homepage, https://github.com/coredipper/scriptorium
|
|
6
6
|
Project-URL: Changelog, https://github.com/coredipper/scriptorium/blob/main/CHANGELOG.md
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
# Distribution name on PyPI is `scriptoria` (scrip/scriptorium were taken); the
|
|
3
3
|
# CLI command and the import package both remain `scrip`.
|
|
4
4
|
name = "scriptoria"
|
|
5
|
-
version = "0.
|
|
5
|
+
version = "0.5.0"
|
|
6
6
|
description = "Deterministic scriptorium-keeper (the `scrip` CLI): staleness, provenance integrity, and fact queries for an agent-compiled knowledge base"
|
|
7
7
|
readme = "README.md"
|
|
8
8
|
requires-python = ">=3.10"
|
|
@@ -13,7 +13,7 @@ from __future__ import annotations
|
|
|
13
13
|
|
|
14
14
|
from pathlib import Path
|
|
15
15
|
|
|
16
|
-
__version__ = "0.
|
|
16
|
+
__version__ = "0.5.0"
|
|
17
17
|
|
|
18
18
|
# --- canonical vault layout ------------------------------------------------
|
|
19
19
|
# ``root`` is the repo/instance root: the directory containing ``vault/``.
|
|
@@ -90,6 +90,33 @@ def resolve(source_text: str, anchor: str) -> str:
|
|
|
90
90
|
return "OK" if matches == 1 else "AMBIGUOUS"
|
|
91
91
|
|
|
92
92
|
|
|
93
|
+
def span(source_text: str, anchor: str) -> tuple[str, str | None]:
|
|
94
|
+
"""Return ``(status, cited_text)`` for ``anchor`` in ``source_text``.
|
|
95
|
+
|
|
96
|
+
Same verdicts as :func:`resolve`, but also returns the matched span (the
|
|
97
|
+
normalized cited text) so a caller can *read* what an anchor cites. For
|
|
98
|
+
``AMBIGUOUS`` the window nearest the anchor's ``loc`` hint is returned; for
|
|
99
|
+
``BROKEN`` the text is ``None``.
|
|
100
|
+
"""
|
|
101
|
+
a = parse_anchor(anchor)
|
|
102
|
+
ns = normalize(source_text)
|
|
103
|
+
n, target, loc = a["len"], a["qh"], a["loc"]
|
|
104
|
+
length = len(ns)
|
|
105
|
+
if n <= 0 or n > length:
|
|
106
|
+
return "BROKEN", None
|
|
107
|
+
hits = [
|
|
108
|
+
start
|
|
109
|
+
for start in range(length - n + 1)
|
|
110
|
+
if hashlib.sha256(ns[start : start + n].encode("utf-8")).hexdigest() == target
|
|
111
|
+
]
|
|
112
|
+
if not hits:
|
|
113
|
+
return "BROKEN", None
|
|
114
|
+
if len(hits) == 1:
|
|
115
|
+
return "OK", ns[hits[0] : hits[0] + n]
|
|
116
|
+
nearest = min(hits, key=lambda s: abs(s - loc * length))
|
|
117
|
+
return "AMBIGUOUS", ns[nearest : nearest + n]
|
|
118
|
+
|
|
119
|
+
|
|
93
120
|
# --------------------------------------------------------------------------- #
|
|
94
121
|
# Vault-wide verification
|
|
95
122
|
# --------------------------------------------------------------------------- #
|
|
@@ -352,6 +352,31 @@ def _parse_source_ids(raw: str) -> list[str]:
|
|
|
352
352
|
return ids
|
|
353
353
|
|
|
354
354
|
|
|
355
|
+
def cmd_span(args: argparse.Namespace) -> int:
|
|
356
|
+
from . import anchors
|
|
357
|
+
|
|
358
|
+
root = resolve_root(args.root)
|
|
359
|
+
if args.claim:
|
|
360
|
+
from . import facts
|
|
361
|
+
|
|
362
|
+
source_id, anchor = facts.claim_source_anchor(root, args.claim)
|
|
363
|
+
else:
|
|
364
|
+
if "#" not in args.target:
|
|
365
|
+
raise errors.UsageError("target must be raw/<slug>#<anchor>")
|
|
366
|
+
source_id, anchor = args.target.split("#", 1)
|
|
367
|
+
source_id = source_id if source_id.startswith("raw/") else f"raw/{source_id}"
|
|
368
|
+
_safe_slug(source_id[len("raw/") :], "source")
|
|
369
|
+
text = anchors.source_text(root, source_id)
|
|
370
|
+
status, cited = anchors.span(text, anchor)
|
|
371
|
+
if args.json:
|
|
372
|
+
_emit({"target": f"{source_id}#{anchor}", "status": status, "text": cited})
|
|
373
|
+
else:
|
|
374
|
+
print(f"[{status}] {source_id}")
|
|
375
|
+
if cited is not None:
|
|
376
|
+
print(cited)
|
|
377
|
+
return 0 if status == "OK" else 1
|
|
378
|
+
|
|
379
|
+
|
|
355
380
|
def cmd_similar(args: argparse.Namespace) -> int:
|
|
356
381
|
from . import similar
|
|
357
382
|
|
|
@@ -388,7 +413,12 @@ def cmd_fact_add(args: argparse.Namespace) -> int:
|
|
|
388
413
|
_emit(result)
|
|
389
414
|
else:
|
|
390
415
|
for r in result["appended"]:
|
|
391
|
-
ident =
|
|
416
|
+
ident = (
|
|
417
|
+
r.get("claim_id")
|
|
418
|
+
or r.get("entity_id")
|
|
419
|
+
or r.get("reconciliation_id")
|
|
420
|
+
or f"{r.get('src')} -> {r.get('dst')}"
|
|
421
|
+
)
|
|
392
422
|
print(f" appended {ident}")
|
|
393
423
|
for s in result["skipped"]:
|
|
394
424
|
print(f" = record {s['index']} skipped (duplicate)")
|
|
@@ -517,7 +547,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
517
547
|
pq.add_argument(
|
|
518
548
|
"name",
|
|
519
549
|
nargs="?",
|
|
520
|
-
choices=["claims", "entities", "edges", "contradictions"],
|
|
550
|
+
choices=["claims", "entities", "edges", "contradictions", "reconciliations"],
|
|
521
551
|
help="a named query (omit when using --sql)",
|
|
522
552
|
)
|
|
523
553
|
pq.add_argument("--sql", help="raw DuckDB SQL (views: claims, entities, edges)")
|
|
@@ -585,6 +615,16 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
585
615
|
pn.add_argument("--title", help="human title (default: the slug)")
|
|
586
616
|
pn.set_defaults(func=cmd_new)
|
|
587
617
|
|
|
618
|
+
psp = sub.add_parser(
|
|
619
|
+
"span",
|
|
620
|
+
parents=[common],
|
|
621
|
+
help="resolve an anchor and print the cited text (read both sides of a contradiction)",
|
|
622
|
+
)
|
|
623
|
+
span_src = psp.add_mutually_exclusive_group(required=True)
|
|
624
|
+
span_src.add_argument("target", nargs="?", metavar="raw/<slug>#<anchor>", help="anchor target")
|
|
625
|
+
span_src.add_argument("--claim", metavar="ID", help="resolve this claim's anchor instead")
|
|
626
|
+
psp.set_defaults(func=cmd_span)
|
|
627
|
+
|
|
588
628
|
psim = sub.add_parser(
|
|
589
629
|
"similar",
|
|
590
630
|
parents=[common],
|
|
@@ -629,7 +669,7 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
629
669
|
)
|
|
630
670
|
pfa.add_argument(
|
|
631
671
|
"--table",
|
|
632
|
-
choices=["claims", "entities", "edges"],
|
|
672
|
+
choices=["claims", "entities", "edges", "reconciliations"],
|
|
633
673
|
default="claims",
|
|
634
674
|
help="facts table to append to (default: claims)",
|
|
635
675
|
)
|
|
@@ -36,10 +36,12 @@ _FILES = {
|
|
|
36
36
|
"claims": "claims.ndjson",
|
|
37
37
|
"entities": "entities.ndjson",
|
|
38
38
|
"edges": "graph.ndjson",
|
|
39
|
+
"reconciliations": "reconciliations.ndjson",
|
|
39
40
|
}
|
|
40
41
|
|
|
41
42
|
# Fields scrip mints itself; proposing them is a schema error, not a finding.
|
|
42
43
|
_SCRIP_OWNED = ("claim_id", "anchor", "extracted_at")
|
|
44
|
+
_RECON_OWNED = ("reconciliation_id", "at")
|
|
43
45
|
|
|
44
46
|
_CLAIM_REQUIRED = ("quote", "source_id", "subject", "predicate", "object", "polarity", "confidence")
|
|
45
47
|
_CLAIM_ALLOWED = frozenset((*_CLAIM_REQUIRED, "claim_text", "tags"))
|
|
@@ -47,12 +49,16 @@ _ENTITY_REQUIRED = ("entity_id", "name", "kind")
|
|
|
47
49
|
_ENTITY_ALLOWED = frozenset((*_ENTITY_REQUIRED, "tags"))
|
|
48
50
|
_EDGE_REQUIRED = ("src", "dst", "kind")
|
|
49
51
|
_EDGE_ALLOWED = frozenset(_EDGE_REQUIRED)
|
|
52
|
+
_DECISIONS = ("supersede", "qualify", "keep-both")
|
|
53
|
+
_RECON_REQUIRED = ("decision", "claim_a", "claim_b")
|
|
54
|
+
_RECON_ALLOWED = frozenset((*_RECON_REQUIRED, "winner", "rationale"))
|
|
50
55
|
|
|
51
56
|
# Same conservative shape ``cli._safe_slug`` enforces — no path separators,
|
|
52
57
|
# '..', or leading dot — applied to source ids arriving as record *data*.
|
|
53
58
|
_SLUG_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
|
|
54
59
|
|
|
55
60
|
_CLAIM_ID_RE = re.compile(r"clm_(\d+)")
|
|
61
|
+
_RECON_ID_RE = re.compile(r"rec_(\d+)")
|
|
56
62
|
|
|
57
63
|
|
|
58
64
|
def _now() -> str:
|
|
@@ -96,12 +102,18 @@ def _check_tags(rec: dict, index: int) -> None:
|
|
|
96
102
|
raise DataError(f"record {index}: 'tags' must be a list of strings")
|
|
97
103
|
|
|
98
104
|
|
|
99
|
-
def _check_shape(
|
|
100
|
-
|
|
101
|
-
|
|
105
|
+
def _check_shape(
|
|
106
|
+
rec: dict,
|
|
107
|
+
index: int,
|
|
108
|
+
required: tuple[str, ...],
|
|
109
|
+
allowed: frozenset[str],
|
|
110
|
+
owned: tuple[str, ...] = _SCRIP_OWNED,
|
|
111
|
+
) -> None:
|
|
112
|
+
present = [k for k in owned if k in rec]
|
|
113
|
+
if present:
|
|
102
114
|
raise DataError(
|
|
103
|
-
f"record {index}: scrip mints {', '.join(
|
|
104
|
-
f"
|
|
115
|
+
f"record {index}: scrip mints {', '.join(present)} itself — do not "
|
|
116
|
+
f"propose precomputed ids/anchors/timestamps"
|
|
105
117
|
)
|
|
106
118
|
unknown = sorted(rec.keys() - allowed)
|
|
107
119
|
if unknown:
|
|
@@ -136,10 +148,27 @@ def _validate(table: str, rec: dict, index: int) -> None:
|
|
|
136
148
|
if not (eid.startswith("entity/") and _SLUG_RE.fullmatch(eid[len("entity/") :])):
|
|
137
149
|
raise DataError(f"record {index}: entity_id must look like entity/<slug>")
|
|
138
150
|
_check_tags(rec, index)
|
|
139
|
-
|
|
151
|
+
elif table == "edges":
|
|
140
152
|
_check_shape(rec, index, _EDGE_REQUIRED, _EDGE_ALLOWED)
|
|
141
153
|
for key in _EDGE_REQUIRED:
|
|
142
154
|
_check_str(rec, key, index)
|
|
155
|
+
else: # reconciliations
|
|
156
|
+
_check_shape(rec, index, _RECON_REQUIRED, _RECON_ALLOWED, owned=_RECON_OWNED)
|
|
157
|
+
for key in ("decision", "claim_a", "claim_b"):
|
|
158
|
+
_check_str(rec, key, index)
|
|
159
|
+
if rec["decision"] not in _DECISIONS:
|
|
160
|
+
raise DataError(f"record {index}: decision must be one of {', '.join(_DECISIONS)}")
|
|
161
|
+
if "rationale" in rec:
|
|
162
|
+
_check_str(rec, "rationale", index, allow_blank=True)
|
|
163
|
+
# winner is required for (and only for) supersede, and must be one of the pair
|
|
164
|
+
if rec["decision"] == "supersede":
|
|
165
|
+
winner = rec.get("winner")
|
|
166
|
+
if winner not in (rec["claim_a"], rec["claim_b"]):
|
|
167
|
+
raise DataError(
|
|
168
|
+
f"record {index}: supersede needs 'winner' = claim_a or claim_b"
|
|
169
|
+
)
|
|
170
|
+
elif "winner" in rec:
|
|
171
|
+
raise DataError(f"record {index}: 'winner' is only valid for decision 'supersede'")
|
|
143
172
|
|
|
144
173
|
|
|
145
174
|
# --------------------------------------------------------------------------- #
|
|
@@ -218,6 +247,19 @@ def _read_table(path: Path) -> tuple[list[dict], str]:
|
|
|
218
247
|
return records, text
|
|
219
248
|
|
|
220
249
|
|
|
250
|
+
def claim_source_anchor(root: Path, claim_id: str) -> tuple[str, str]:
|
|
251
|
+
"""Return ``(source_id, anchor)`` for a claim, for `scrip span --claim`.
|
|
252
|
+
Raises :class:`DataError` if the claim is missing or lacks the fields."""
|
|
253
|
+
records, _ = _read_table(facts_dir(root) / "claims.ndjson")
|
|
254
|
+
for rec in records:
|
|
255
|
+
if rec.get("claim_id") == claim_id:
|
|
256
|
+
sid, anchor = rec.get("source_id"), rec.get("anchor")
|
|
257
|
+
if not isinstance(sid, str) or not isinstance(anchor, str):
|
|
258
|
+
raise DataError(f"claim {claim_id} is missing source_id/anchor")
|
|
259
|
+
return sid, anchor
|
|
260
|
+
raise DataError(f"no such claim: {claim_id}")
|
|
261
|
+
|
|
262
|
+
|
|
221
263
|
def _claim_key(source_id: str, qh: str, rec: dict) -> tuple:
|
|
222
264
|
return (
|
|
223
265
|
source_id,
|
|
@@ -251,6 +293,17 @@ def _next_claim_id(existing: list[dict]) -> tuple[int, int]:
|
|
|
251
293
|
return highest + 1, max(4, len(str(highest)))
|
|
252
294
|
|
|
253
295
|
|
|
296
|
+
def _next_recon_id(existing: list[dict]) -> tuple[int, int]:
|
|
297
|
+
"""Return ``(next_number, pad_width)`` continuing the ``rec_NNNN`` sequence."""
|
|
298
|
+
numbers = [
|
|
299
|
+
int(m.group(1))
|
|
300
|
+
for rec in existing
|
|
301
|
+
if (m := _RECON_ID_RE.fullmatch(str(rec.get("reconciliation_id", ""))))
|
|
302
|
+
]
|
|
303
|
+
highest = max(numbers, default=0)
|
|
304
|
+
return highest + 1, max(4, len(str(highest)))
|
|
305
|
+
|
|
306
|
+
|
|
254
307
|
# --------------------------------------------------------------------------- #
|
|
255
308
|
# facts/_meta.yaml: merge derived-from, never stamp
|
|
256
309
|
# --------------------------------------------------------------------------- #
|
|
@@ -394,7 +447,7 @@ def add(root: Path, table: str, proposals: list[dict]) -> dict:
|
|
|
394
447
|
"detail": "an entity with this id already exists with different fields",
|
|
395
448
|
}
|
|
396
449
|
)
|
|
397
|
-
|
|
450
|
+
elif table == "edges":
|
|
398
451
|
seen_edges = {
|
|
399
452
|
(rec.get("src"), rec.get("dst"), rec.get("kind")) for rec in existing
|
|
400
453
|
}
|
|
@@ -405,6 +458,43 @@ def add(root: Path, table: str, proposals: list[dict]) -> dict:
|
|
|
405
458
|
continue
|
|
406
459
|
seen_edges.add(key)
|
|
407
460
|
appended.append({"src": rec["src"], "dst": rec["dst"], "kind": rec["kind"]})
|
|
461
|
+
else: # reconciliations
|
|
462
|
+
claim_ids = {c.get("claim_id") for c in _read_table(facts_dir(root) / "claims.ndjson")[0]}
|
|
463
|
+
for i, rec in enumerate(proposals):
|
|
464
|
+
refs = [rec["claim_a"], rec["claim_b"]]
|
|
465
|
+
if rec["decision"] == "supersede":
|
|
466
|
+
refs.append(rec["winner"])
|
|
467
|
+
missing = next((r for r in refs if r not in claim_ids), None)
|
|
468
|
+
if missing is not None:
|
|
469
|
+
failures.append({
|
|
470
|
+
"index": i, "status": "MISSING_CLAIM", "claim": missing,
|
|
471
|
+
"detail": f"{missing!r} is not a claim in claims.ndjson",
|
|
472
|
+
})
|
|
473
|
+
if failures:
|
|
474
|
+
return {"table": table, "appended": [], "skipped": [], "failures": failures}
|
|
475
|
+
seen_pairs = {frozenset((r.get("claim_a"), r.get("claim_b"))) for r in existing}
|
|
476
|
+
number, width = _next_recon_id(existing)
|
|
477
|
+
now = _now()
|
|
478
|
+
for i, rec in enumerate(proposals):
|
|
479
|
+
pair = frozenset((rec["claim_a"], rec["claim_b"]))
|
|
480
|
+
if pair in seen_pairs:
|
|
481
|
+
skipped.append({"index": i, "reason": "duplicate", "existing_id": None})
|
|
482
|
+
continue
|
|
483
|
+
seen_pairs.add(pair)
|
|
484
|
+
rid = f"rec_{number:0{width}d}"
|
|
485
|
+
number += 1
|
|
486
|
+
full = {
|
|
487
|
+
"reconciliation_id": rid,
|
|
488
|
+
"decision": rec["decision"],
|
|
489
|
+
"claim_a": rec["claim_a"],
|
|
490
|
+
"claim_b": rec["claim_b"],
|
|
491
|
+
}
|
|
492
|
+
if rec["decision"] == "supersede":
|
|
493
|
+
full["winner"] = rec["winner"]
|
|
494
|
+
if rec.get("rationale"):
|
|
495
|
+
full["rationale"] = rec["rationale"]
|
|
496
|
+
full["at"] = now
|
|
497
|
+
appended.append(full)
|
|
408
498
|
|
|
409
499
|
if failures:
|
|
410
500
|
return {"table": table, "appended": [], "skipped": skipped, "failures": failures}
|
|
@@ -22,15 +22,18 @@ _VIEWS = {
|
|
|
22
22
|
"claims": "claims.ndjson",
|
|
23
23
|
"entities": "entities.ndjson",
|
|
24
24
|
"edges": "graph.ndjson",
|
|
25
|
+
"reconciliations": "reconciliations.ndjson",
|
|
25
26
|
}
|
|
26
27
|
|
|
27
28
|
_NAMED = {
|
|
28
29
|
"claims": "SELECT * FROM claims",
|
|
29
30
|
"entities": "SELECT * FROM entities",
|
|
30
31
|
"edges": "SELECT * FROM edges",
|
|
32
|
+
"reconciliations": "SELECT * FROM reconciliations",
|
|
31
33
|
# contradiction *candidates*: same subject+predicate, opposing polarity,
|
|
32
|
-
# from different sources
|
|
33
|
-
#
|
|
34
|
+
# from different sources, AND not yet adjudicated (no reconciliation record
|
|
35
|
+
# for the pair, either order) — so RECONCILE makes the set converge.
|
|
36
|
+
# Detection is deterministic; adjudication is the agent's job.
|
|
34
37
|
"contradictions": """
|
|
35
38
|
SELECT a.claim_id AS claim_a, b.claim_id AS claim_b,
|
|
36
39
|
a.subject, a.predicate,
|
|
@@ -41,10 +44,15 @@ _NAMED = {
|
|
|
41
44
|
WHERE a.polarity = 'asserts'
|
|
42
45
|
AND b.polarity = 'denies'
|
|
43
46
|
AND a.source_id <> b.source_id
|
|
47
|
+
AND NOT EXISTS (
|
|
48
|
+
SELECT 1 FROM reconciliations r
|
|
49
|
+
WHERE (r.claim_a = a.claim_id AND r.claim_b = b.claim_id)
|
|
50
|
+
OR (r.claim_a = b.claim_id AND r.claim_b = a.claim_id)
|
|
51
|
+
)
|
|
44
52
|
""",
|
|
45
53
|
}
|
|
46
54
|
|
|
47
|
-
_FILTERABLE = {"claims", "entities", "edges"}
|
|
55
|
+
_FILTERABLE = {"claims", "entities", "edges", "reconciliations"}
|
|
48
56
|
|
|
49
57
|
|
|
50
58
|
def _connect(root: Path) -> duckdb.DuckDBPyConnection:
|
|
@@ -57,6 +65,16 @@ def _connect(root: Path) -> duckdb.DuckDBPyConnection:
|
|
|
57
65
|
f"CREATE VIEW {view} AS "
|
|
58
66
|
f"SELECT * FROM read_ndjson_auto('{p.as_posix()}')"
|
|
59
67
|
)
|
|
68
|
+
elif view == "reconciliations":
|
|
69
|
+
# Always present (empty stub) so `contradictions` can anti-join it and
|
|
70
|
+
# raw SQL over its columns works even before any reconciliation exists.
|
|
71
|
+
con.execute(
|
|
72
|
+
"CREATE VIEW reconciliations AS SELECT "
|
|
73
|
+
"NULL::VARCHAR AS reconciliation_id, NULL::VARCHAR AS decision, "
|
|
74
|
+
"NULL::VARCHAR AS claim_a, NULL::VARCHAR AS claim_b, "
|
|
75
|
+
"NULL::VARCHAR AS winner, NULL::VARCHAR AS rationale, "
|
|
76
|
+
"NULL::VARCHAR AS at WHERE FALSE"
|
|
77
|
+
)
|
|
60
78
|
return con
|
|
61
79
|
|
|
62
80
|
|
|
@@ -48,6 +48,122 @@ def _claims_lines(kb):
|
|
|
48
48
|
return [json.loads(s) for s in p.read_text(encoding="utf-8").splitlines() if s.strip()]
|
|
49
49
|
|
|
50
50
|
|
|
51
|
+
def _recs_lines(kb):
|
|
52
|
+
p = kb.root / "vault" / "facts" / "reconciliations.ndjson"
|
|
53
|
+
if not p.exists():
|
|
54
|
+
return []
|
|
55
|
+
return [json.loads(s) for s in p.read_text(encoding="utf-8").splitlines() if s.strip()]
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def _two_claims(kb):
|
|
59
|
+
"""Seed a contradiction pair to reconcile."""
|
|
60
|
+
kb.add_raw("s", SRC)
|
|
61
|
+
kb.add_claim("clm_0001", "s", "The quick brown fox jumps over the lazy dog.",
|
|
62
|
+
subject="chunking", predicate="discards", polarity="asserts")
|
|
63
|
+
kb.add_claim("clm_0002", "s", "Caching answers beats recomputing them.",
|
|
64
|
+
subject="chunking", predicate="discards", polarity="denies")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def _recon(decision, **kw):
|
|
68
|
+
rec = {"decision": decision, "claim_a": "clm_0001", "claim_b": "clm_0002"}
|
|
69
|
+
rec.update(kw)
|
|
70
|
+
return rec
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# --------------------------------------------------------------------------- #
|
|
74
|
+
# Reconciliations table
|
|
75
|
+
# --------------------------------------------------------------------------- #
|
|
76
|
+
def test_fact_add_reconciliation_supersede(kb, capsys):
|
|
77
|
+
_two_claims(kb)
|
|
78
|
+
rc = _run_add(
|
|
79
|
+
kb,
|
|
80
|
+
_ndjson(_recon("supersede", winner="clm_0001", rationale="newer source wins")),
|
|
81
|
+
"--table", "reconciliations", "--json",
|
|
82
|
+
)
|
|
83
|
+
assert rc == 0
|
|
84
|
+
[rec] = _recs_lines(kb)
|
|
85
|
+
assert rec["reconciliation_id"] == "rec_0001"
|
|
86
|
+
assert rec["decision"] == "supersede"
|
|
87
|
+
assert rec["winner"] == "clm_0001"
|
|
88
|
+
assert rec["claim_a"] == "clm_0001" and rec["claim_b"] == "clm_0002"
|
|
89
|
+
assert rec["rationale"] == "newer source wins"
|
|
90
|
+
assert ISO_Z.fullmatch(rec["at"])
|
|
91
|
+
assert json.loads(capsys.readouterr().out)["appended"][0]["reconciliation_id"] == "rec_0001"
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def test_fact_add_reconciliation_qualify_and_keep_both(kb):
|
|
95
|
+
_two_claims(kb)
|
|
96
|
+
kb.add_claim("clm_0003", "s", "Caching answers beats recomputing them.", subject="x")
|
|
97
|
+
assert _run_add(kb, _ndjson(_recon("qualify")), "--table", "reconciliations") == 0
|
|
98
|
+
# a *different* pair so dedup doesn't skip it
|
|
99
|
+
assert _run_add(kb, _ndjson(_recon("keep-both", claim_b="clm_0003")),
|
|
100
|
+
"--table", "reconciliations") == 0
|
|
101
|
+
recs = _recs_lines(kb)
|
|
102
|
+
assert [r["decision"] for r in recs] == ["qualify", "keep-both"]
|
|
103
|
+
assert [r["reconciliation_id"] for r in recs] == ["rec_0001", "rec_0002"]
|
|
104
|
+
assert all("winner" not in r for r in recs) # winner only for supersede
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_fact_add_reconciliation_supersede_requires_valid_winner(kb):
|
|
108
|
+
_two_claims(kb)
|
|
109
|
+
assert _run_add(kb, _ndjson(_recon("supersede")), "--table", "reconciliations") == 3 # no winner
|
|
110
|
+
assert _run_add(kb, _ndjson(_recon("supersede", winner="clm_9999")),
|
|
111
|
+
"--table", "reconciliations") == 3 # winner not in pair
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def test_fact_add_reconciliation_winner_forbidden_unless_supersede(kb):
|
|
115
|
+
_two_claims(kb)
|
|
116
|
+
assert _run_add(kb, _ndjson(_recon("qualify", winner="clm_0001")),
|
|
117
|
+
"--table", "reconciliations") == 3
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_fact_add_reconciliation_bad_decision_is_data_error(kb):
|
|
121
|
+
_two_claims(kb)
|
|
122
|
+
assert _run_add(kb, _ndjson(_recon("ignore-it")), "--table", "reconciliations") == 3
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
def test_fact_add_reconciliation_rejects_minted_fields(kb):
|
|
126
|
+
_two_claims(kb)
|
|
127
|
+
assert _run_add(kb, _ndjson(_recon("qualify", reconciliation_id="rec_0001")),
|
|
128
|
+
"--table", "reconciliations") == 3
|
|
129
|
+
assert _run_add(kb, _ndjson(_recon("qualify", at="2026-01-01T00:00:00Z")),
|
|
130
|
+
"--table", "reconciliations") == 3
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def test_fact_add_reconciliation_missing_claim_fails(kb, capsys):
|
|
134
|
+
_two_claims(kb)
|
|
135
|
+
rc = _run_add(kb, _ndjson(_recon("qualify", claim_b="clm_9999")),
|
|
136
|
+
"--table", "reconciliations", "--json")
|
|
137
|
+
assert rc == 1
|
|
138
|
+
assert _recs_lines(kb) == []
|
|
139
|
+
[failure] = json.loads(capsys.readouterr().out)["failures"]
|
|
140
|
+
assert failure["status"] == "MISSING_CLAIM"
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def test_fact_add_reconciliation_dedups_unordered_pair(kb, capsys):
|
|
144
|
+
_two_claims(kb)
|
|
145
|
+
assert _run_add(kb, _ndjson(_recon("qualify")), "--table", "reconciliations") == 0
|
|
146
|
+
capsys.readouterr()
|
|
147
|
+
# same pair, reversed order → already adjudicated → skipped, not re-appended
|
|
148
|
+
rc = _run_add(
|
|
149
|
+
kb,
|
|
150
|
+
_ndjson({"decision": "supersede", "claim_a": "clm_0002", "claim_b": "clm_0001", "winner": "clm_0002"}),
|
|
151
|
+
"--table", "reconciliations", "--json",
|
|
152
|
+
)
|
|
153
|
+
assert rc == 0
|
|
154
|
+
assert len(_recs_lines(kb)) == 1
|
|
155
|
+
assert json.loads(capsys.readouterr().out)["skipped"][0]["reason"] == "duplicate"
|
|
156
|
+
|
|
157
|
+
|
|
158
|
+
def test_fact_add_reconciliation_id_sequencing(kb):
|
|
159
|
+
_two_claims(kb)
|
|
160
|
+
kb.add_claim("clm_0003", "s", "The quick brown fox jumps over the lazy dog.", subject="x")
|
|
161
|
+
assert _run_add(kb, _ndjson(_recon("qualify")), "--table", "reconciliations") == 0
|
|
162
|
+
assert _run_add(kb, _ndjson(_recon("keep-both", claim_a="clm_0001", claim_b="clm_0003")),
|
|
163
|
+
"--table", "reconciliations") == 0
|
|
164
|
+
assert [r["reconciliation_id"] for r in _recs_lines(kb)] == ["rec_0001", "rec_0002"]
|
|
165
|
+
|
|
166
|
+
|
|
51
167
|
# --------------------------------------------------------------------------- #
|
|
52
168
|
# Happy path
|
|
53
169
|
# --------------------------------------------------------------------------- #
|
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
from scrip.errors import UsageError
|
|
3
|
+
|
|
4
|
+
from scrip import cli, query
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_named_claims_query(kb):
|
|
8
|
+
kb.add_raw("a", "# A\n\nThe sky is blue.\n")
|
|
9
|
+
kb.add_claim("clm_1", "a", "the sky is blue", subject="sky", tags=["color"])
|
|
10
|
+
cols, rows = query.run(kb.root, name="claims")
|
|
11
|
+
assert "subject" in cols
|
|
12
|
+
assert any(r["claim_id"] == "clm_1" for r in rows)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_raw_sql_aggregate(kb):
|
|
16
|
+
kb.add_raw("a", "# A\n\nThe sky is blue.\n")
|
|
17
|
+
kb.add_claim("clm_1", "a", "the sky is blue")
|
|
18
|
+
kb.add_claim("clm_2", "a", "the sky is blue")
|
|
19
|
+
cols, rows = query.run(kb.root, sql="SELECT count(*) AS n FROM claims")
|
|
20
|
+
assert rows[0]["n"] == 2
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_contradictions_detected(kb):
|
|
24
|
+
kb.add_raw("a", "# A\n\nThe sky is blue.\n")
|
|
25
|
+
kb.add_raw("b", "# B\n\nThe sky is not blue.\n")
|
|
26
|
+
kb.add_claim(
|
|
27
|
+
"clm_1", "a", "the sky is blue", subject="sky", predicate="color",
|
|
28
|
+
polarity="asserts",
|
|
29
|
+
)
|
|
30
|
+
kb.add_claim(
|
|
31
|
+
"clm_2", "b", "the sky is not blue", subject="sky", predicate="color",
|
|
32
|
+
polarity="denies",
|
|
33
|
+
)
|
|
34
|
+
cols, rows = query.run(kb.root, name="contradictions")
|
|
35
|
+
assert len(rows) == 1
|
|
36
|
+
assert rows[0]["subject"] == "sky"
|
|
37
|
+
assert {rows[0]["source_a"], rows[0]["source_b"]} == {"raw/a", "raw/b"}
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _contradiction_pair(kb):
|
|
41
|
+
kb.add_raw("a", "# A\n\nThe sky is blue.\n")
|
|
42
|
+
kb.add_raw("b", "# B\n\nThe sky is not blue.\n")
|
|
43
|
+
kb.add_claim("clm_1", "a", "the sky is blue", subject="sky", predicate="color", polarity="asserts")
|
|
44
|
+
kb.add_claim("clm_2", "b", "the sky is not blue", subject="sky", predicate="color", polarity="denies")
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_contradictions_excludes_reconciled_pairs(kb):
|
|
48
|
+
_contradiction_pair(kb)
|
|
49
|
+
assert len(query.run(kb.root, name="contradictions")[1]) == 1
|
|
50
|
+
# record a reconciliation for that pair (reversed order, to test symmetry)
|
|
51
|
+
(kb.root / "vault" / "facts" / "reconciliations.ndjson").write_text(
|
|
52
|
+
'{"reconciliation_id":"rec_0001","decision":"supersede","claim_a":"clm_2",'
|
|
53
|
+
'"claim_b":"clm_1","winner":"clm_2","at":"2026-01-01T00:00:00Z"}\n',
|
|
54
|
+
encoding="utf-8",
|
|
55
|
+
)
|
|
56
|
+
assert query.run(kb.root, name="contradictions")[1] == [] # adjudicated → gone
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_reconciliations_named_query(kb):
|
|
60
|
+
_contradiction_pair(kb)
|
|
61
|
+
(kb.root / "vault" / "facts" / "reconciliations.ndjson").write_text(
|
|
62
|
+
'{"reconciliation_id":"rec_0001","decision":"keep-both","claim_a":"clm_1",'
|
|
63
|
+
'"claim_b":"clm_2","at":"2026-01-01T00:00:00Z"}\n',
|
|
64
|
+
encoding="utf-8",
|
|
65
|
+
)
|
|
66
|
+
cols, rows = query.run(kb.root, name="reconciliations")
|
|
67
|
+
assert rows[0]["reconciliation_id"] == "rec_0001"
|
|
68
|
+
assert rows[0]["decision"] == "keep-both"
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_contradictions_works_without_reconciliations_file(kb):
|
|
72
|
+
# the reconciliations view is an empty stub when the file is absent
|
|
73
|
+
_contradiction_pair(kb)
|
|
74
|
+
assert len(query.run(kb.root, name="contradictions")[1]) == 1
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def test_query_reconciliations_cli_choice(kb):
|
|
78
|
+
# the named query must be a valid CLI `query` choice, not just in query.run
|
|
79
|
+
_contradiction_pair(kb)
|
|
80
|
+
assert cli.main(["query", "reconciliations", "--json", "--root", str(kb.root)]) == 0
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_reconciliations_stub_exposes_full_schema(kb):
|
|
84
|
+
# before the file exists, raw SQL over the stub's columns must still work
|
|
85
|
+
_contradiction_pair(kb)
|
|
86
|
+
cols, rows = query.run(kb.root, sql="SELECT decision, winner, reconciliation_id FROM reconciliations")
|
|
87
|
+
assert rows == []
|
|
88
|
+
assert "decision" in cols
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_where_and_limit(kb):
|
|
92
|
+
kb.add_raw("a", "# A\n\nThe sky is blue.\n")
|
|
93
|
+
kb.add_claim("clm_1", "a", "the sky is blue")
|
|
94
|
+
kb.add_claim("clm_2", "a", "the sky is blue")
|
|
95
|
+
cols, rows = query.run(kb.root, name="claims", where="claim_id = 'clm_1'", limit=10)
|
|
96
|
+
assert len(rows) == 1
|
|
97
|
+
assert rows[0]["claim_id"] == "clm_1"
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_where_rejects_statement_chaining(kb):
|
|
101
|
+
kb.add_raw("a", "# A\n\nThe sky is blue.\n")
|
|
102
|
+
kb.add_claim("clm_1", "a", "the sky is blue")
|
|
103
|
+
with pytest.raises(UsageError):
|
|
104
|
+
query.run(kb.root, name="claims", where="1=1; DROP TABLE claims")
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_unknown_named_query_is_usage_error(kb):
|
|
108
|
+
with pytest.raises(UsageError):
|
|
109
|
+
query.run(kb.root, name="bogus")
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""`scrip span` — resolve an anchor and print the cited text. Read-only; lets an
|
|
2
|
+
agent read both sides of a contradiction (RECONCILE) without re-implementing
|
|
3
|
+
anchor resolution."""
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
|
|
7
|
+
import pytest
|
|
8
|
+
|
|
9
|
+
from scrip import anchors, cli
|
|
10
|
+
|
|
11
|
+
SRC = "# H\n\nThe quick brown fox jumps over the lazy dog.\n\nalpha beta. alpha beta.\n"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_span_prints_cited_text(kb, capsys):
|
|
15
|
+
kb.add_raw("s", SRC)
|
|
16
|
+
anchor = anchors.make_anchor(SRC, "The quick brown fox jumps over the lazy dog.")
|
|
17
|
+
rc = cli.main(["span", f"raw/s#{anchor}", "--root", str(kb.root)])
|
|
18
|
+
assert rc == 0
|
|
19
|
+
# the cited span is shown (normalized: lowercased, whitespace-collapsed)
|
|
20
|
+
assert "the quick brown fox jumps over the lazy dog." in capsys.readouterr().out.lower()
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_span_json_shape(kb, capsys):
|
|
24
|
+
kb.add_raw("s", SRC)
|
|
25
|
+
anchor = anchors.make_anchor(SRC, "The quick brown fox jumps over the lazy dog.")
|
|
26
|
+
rc = cli.main(["span", f"raw/s#{anchor}", "--json", "--root", str(kb.root)])
|
|
27
|
+
assert rc == 0
|
|
28
|
+
data = json.loads(capsys.readouterr().out)
|
|
29
|
+
assert set(data) == {"target", "status", "text"}
|
|
30
|
+
assert data["status"] == "OK"
|
|
31
|
+
assert "quick brown fox" in data["text"]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def test_span_by_claim_id(kb, capsys):
|
|
35
|
+
kb.add_raw("s", SRC)
|
|
36
|
+
kb.add_claim("clm_0001", "s", "The quick brown fox jumps over the lazy dog.")
|
|
37
|
+
rc = cli.main(["span", "--claim", "clm_0001", "--json", "--root", str(kb.root)])
|
|
38
|
+
assert rc == 0
|
|
39
|
+
data = json.loads(capsys.readouterr().out)
|
|
40
|
+
assert data["status"] == "OK"
|
|
41
|
+
assert "quick brown fox" in data["text"]
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def test_span_ambiguous_exits_1(kb, capsys):
|
|
45
|
+
kb.add_raw("s", SRC)
|
|
46
|
+
anchor = anchors.make_anchor(SRC, "alpha beta.") # appears twice
|
|
47
|
+
rc = cli.main(["span", f"raw/s#{anchor}", "--json", "--root", str(kb.root)])
|
|
48
|
+
assert rc == 1
|
|
49
|
+
assert json.loads(capsys.readouterr().out)["status"] == "AMBIGUOUS"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def test_span_broken_exits_1(kb, capsys):
|
|
53
|
+
kb.add_raw("s", SRC)
|
|
54
|
+
# a well-formed anchor whose quote is absent from the source
|
|
55
|
+
anchor = anchors.make_anchor("a totally different document about cats", "totally different")
|
|
56
|
+
rc = cli.main(["span", f"raw/s#{anchor}", "--json", "--root", str(kb.root)])
|
|
57
|
+
assert rc == 1
|
|
58
|
+
assert json.loads(capsys.readouterr().out)["status"] == "BROKEN"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def test_span_unsafe_source_is_usage_error(kb):
|
|
62
|
+
kb.add_raw("s", SRC)
|
|
63
|
+
anchor = anchors.make_anchor(SRC, "The quick brown fox jumps over the lazy dog.")
|
|
64
|
+
assert cli.main(["span", f"../../etc/passwd#{anchor}", "--root", str(kb.root)]) == 2
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_span_missing_source_is_data_error(kb):
|
|
68
|
+
kb.add_raw("s", SRC)
|
|
69
|
+
anchor = anchors.make_anchor(SRC, "The quick brown fox jumps over the lazy dog.")
|
|
70
|
+
assert cli.main(["span", f"raw/absent#{anchor}", "--root", str(kb.root)]) == 3
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def test_span_unknown_claim_is_data_error(kb):
|
|
74
|
+
kb.add_raw("s", SRC)
|
|
75
|
+
assert cli.main(["span", "--claim", "clm_9999", "--root", str(kb.root)]) == 3
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def test_span_requires_a_target(kb):
|
|
79
|
+
with pytest.raises(SystemExit) as e:
|
|
80
|
+
cli.main(["span", "--root", str(kb.root)])
|
|
81
|
+
assert e.value.code == 2
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def test_span_target_without_anchor_is_usage_error(kb):
|
|
85
|
+
kb.add_raw("s", SRC)
|
|
86
|
+
assert cli.main(["span", "raw/s", "--root", str(kb.root)]) == 2 # no '#<anchor>'
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
import pytest
|
|
2
|
-
from scrip.errors import UsageError
|
|
3
|
-
|
|
4
|
-
from scrip import query
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
def test_named_claims_query(kb):
|
|
8
|
-
kb.add_raw("a", "# A\n\nThe sky is blue.\n")
|
|
9
|
-
kb.add_claim("clm_1", "a", "the sky is blue", subject="sky", tags=["color"])
|
|
10
|
-
cols, rows = query.run(kb.root, name="claims")
|
|
11
|
-
assert "subject" in cols
|
|
12
|
-
assert any(r["claim_id"] == "clm_1" for r in rows)
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
def test_raw_sql_aggregate(kb):
|
|
16
|
-
kb.add_raw("a", "# A\n\nThe sky is blue.\n")
|
|
17
|
-
kb.add_claim("clm_1", "a", "the sky is blue")
|
|
18
|
-
kb.add_claim("clm_2", "a", "the sky is blue")
|
|
19
|
-
cols, rows = query.run(kb.root, sql="SELECT count(*) AS n FROM claims")
|
|
20
|
-
assert rows[0]["n"] == 2
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
def test_contradictions_detected(kb):
|
|
24
|
-
kb.add_raw("a", "# A\n\nThe sky is blue.\n")
|
|
25
|
-
kb.add_raw("b", "# B\n\nThe sky is not blue.\n")
|
|
26
|
-
kb.add_claim(
|
|
27
|
-
"clm_1", "a", "the sky is blue", subject="sky", predicate="color",
|
|
28
|
-
polarity="asserts",
|
|
29
|
-
)
|
|
30
|
-
kb.add_claim(
|
|
31
|
-
"clm_2", "b", "the sky is not blue", subject="sky", predicate="color",
|
|
32
|
-
polarity="denies",
|
|
33
|
-
)
|
|
34
|
-
cols, rows = query.run(kb.root, name="contradictions")
|
|
35
|
-
assert len(rows) == 1
|
|
36
|
-
assert rows[0]["subject"] == "sky"
|
|
37
|
-
assert {rows[0]["source_a"], rows[0]["source_b"]} == {"raw/a", "raw/b"}
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
def test_where_and_limit(kb):
|
|
41
|
-
kb.add_raw("a", "# A\n\nThe sky is blue.\n")
|
|
42
|
-
kb.add_claim("clm_1", "a", "the sky is blue")
|
|
43
|
-
kb.add_claim("clm_2", "a", "the sky is blue")
|
|
44
|
-
cols, rows = query.run(kb.root, name="claims", where="claim_id = 'clm_1'", limit=10)
|
|
45
|
-
assert len(rows) == 1
|
|
46
|
-
assert rows[0]["claim_id"] == "clm_1"
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
def test_where_rejects_statement_chaining(kb):
|
|
50
|
-
kb.add_raw("a", "# A\n\nThe sky is blue.\n")
|
|
51
|
-
kb.add_claim("clm_1", "a", "the sky is blue")
|
|
52
|
-
with pytest.raises(UsageError):
|
|
53
|
-
query.run(kb.root, name="claims", where="1=1; DROP TABLE claims")
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def test_unknown_named_query_is_usage_error(kb):
|
|
57
|
-
with pytest.raises(UsageError):
|
|
58
|
-
query.run(kb.root, name="bogus")
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|