PyPI - scriptoria - Versions diffs - 0.3.0__tar.gz → 0.5.0__tar.gz - Mend

scriptoria 0.3.0tar.gz → 0.5.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{scriptoria-0.3.0 → scriptoria-0.5.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: scriptoria
-Version: 0.3.0
+Version: 0.5.0
 Summary: Deterministic scriptorium-keeper (the `scrip` CLI): staleness, provenance integrity, and fact queries for an agent-compiled knowledge base
 Project-URL: Homepage, https://github.com/coredipper/scriptorium
 Project-URL: Changelog, https://github.com/coredipper/scriptorium/blob/main/CHANGELOG.md

{scriptoria-0.3.0 → scriptoria-0.5.0}/pyproject.toml RENAMED Viewed

@@ -2,7 +2,7 @@
 # Distribution name on PyPI is `scriptoria` (scrip/scriptorium were taken); the
 # CLI command and the import package both remain `scrip`.
 name = "scriptoria"
-version = "0.3.0"
+version = "0.5.0"
 description = "Deterministic scriptorium-keeper (the `scrip` CLI): staleness, provenance integrity, and fact queries for an agent-compiled knowledge base"
 readme = "README.md"
 requires-python = ">=3.10"

{scriptoria-0.3.0 → scriptoria-0.5.0}/src/scrip/__init__.py RENAMED Viewed

@@ -13,7 +13,7 @@ from __future__ import annotations
 from pathlib import Path
-__version__ = "0.3.0"
+__version__ = "0.5.0"
 # --- canonical vault layout ------------------------------------------------
 # ``root`` is the repo/instance root: the directory containing ``vault/``.

{scriptoria-0.3.0 → scriptoria-0.5.0}/src/scrip/anchors.py RENAMED Viewed

@@ -90,6 +90,33 @@ def resolve(source_text: str, anchor: str) -> str:
     return "OK" if matches == 1 else "AMBIGUOUS"
+def span(source_text: str, anchor: str) -> tuple[str, str | None]:
+    """Return ``(status, cited_text)`` for ``anchor`` in ``source_text``.
+    Same verdicts as :func:`resolve`, but also returns the matched span (the
+    normalized cited text) so a caller can *read* what an anchor cites. For
+    ``AMBIGUOUS`` the window nearest the anchor's ``loc`` hint is returned; for
+    ``BROKEN`` the text is ``None``.
+    """
+    a = parse_anchor(anchor)
+    ns = normalize(source_text)
+    n, target, loc = a["len"], a["qh"], a["loc"]
+    length = len(ns)
+    if n <= 0 or n > length:
+        return "BROKEN", None
+    hits = [
+        start
+        for start in range(length - n + 1)
+        if hashlib.sha256(ns[start : start + n].encode("utf-8")).hexdigest() == target
+    ]
+    if not hits:
+        return "BROKEN", None
+    if len(hits) == 1:
+        return "OK", ns[hits[0] : hits[0] + n]
+    nearest = min(hits, key=lambda s: abs(s - loc * length))
+    return "AMBIGUOUS", ns[nearest : nearest + n]
 # --------------------------------------------------------------------------- #
 # Vault-wide verification
 # --------------------------------------------------------------------------- #

{scriptoria-0.3.0 → scriptoria-0.5.0}/src/scrip/cli.py RENAMED Viewed

@@ -336,6 +336,67 @@ def cmd_new(args: argparse.Namespace) -> int:
     return 0
+def _parse_source_ids(raw: str) -> list[str]:
+    """Parse a comma-separated `--from` value into validated source ids, WITHOUT
+    requiring the sources to exist (unlike `cmd_new`): scoring a not-yet-ingested
+    proposed topic is legitimate. Keeps the traversal-safety check."""
+    ids: list[str] = []
+    for s in (part.strip() for part in raw.split(",")):
+        if not s:
+            continue
+        sid = s if s.startswith("raw/") else f"raw/{s}"
+        _safe_slug(sid.split("#", 1)[0][len("raw/") :], "source")
+        ids.append(sid)
+    if not ids:
+        raise errors.UsageError("--from requires at least one source id")
+    return ids
+def cmd_span(args: argparse.Namespace) -> int:
+    from . import anchors
+    root = resolve_root(args.root)
+    if args.claim:
+        from . import facts
+        source_id, anchor = facts.claim_source_anchor(root, args.claim)
+    else:
+        if "#" not in args.target:
+            raise errors.UsageError("target must be raw/<slug>#<anchor>")
+        source_id, anchor = args.target.split("#", 1)
+        source_id = source_id if source_id.startswith("raw/") else f"raw/{source_id}"
+        _safe_slug(source_id[len("raw/") :], "source")
+    text = anchors.source_text(root, source_id)
+    status, cited = anchors.span(text, anchor)
+    if args.json:
+        _emit({"target": f"{source_id}#{anchor}", "status": status, "text": cited})
+    else:
+        print(f"[{status}] {source_id}")
+        if cited is not None:
+            print(cited)
+    return 0 if status == "OK" else 1
+def cmd_similar(args: argparse.Namespace) -> int:
+    from . import similar
+    root = resolve_root(args.root)
+    sources = _parse_source_ids(args.sources)
+    result = similar.compute_similar(
+        root,
+        title=args.title,
+        sources=sources,
+        kind=args.kind,
+        exclude=set(args.exclude),
+        top=args.top,
+    )
+    if args.json:
+        _emit(result)
+    else:
+        similar.print_similar(result)
+    return 0
 def cmd_fact_add(args: argparse.Namespace) -> int:
     from . import facts
@@ -352,7 +413,12 @@ def cmd_fact_add(args: argparse.Namespace) -> int:
         _emit(result)
     else:
         for r in result["appended"]:
-            ident = r.get("claim_id") or r.get("entity_id") or f"{r['src']} -> {r['dst']}"
+            ident = (
+                r.get("claim_id")
+                or r.get("entity_id")
+                or r.get("reconciliation_id")
+                or f"{r.get('src')} -> {r.get('dst')}"
+            )
             print(f"  appended {ident}")
         for s in result["skipped"]:
             print(f"  = record {s['index']} skipped (duplicate)")
@@ -481,7 +547,7 @@ def build_parser() -> argparse.ArgumentParser:
     pq.add_argument(
         "name",
         nargs="?",
-        choices=["claims", "entities", "edges", "contradictions"],
+        choices=["claims", "entities", "edges", "contradictions", "reconciliations"],
         help="a named query (omit when using --sql)",
     )
     pq.add_argument("--sql", help="raw DuckDB SQL (views: claims, entities, edges)")
@@ -549,6 +615,47 @@ def build_parser() -> argparse.ArgumentParser:
     pn.add_argument("--title", help="human title (default: the slug)")
     pn.set_defaults(func=cmd_new)
+    psp = sub.add_parser(
+        "span",
+        parents=[common],
+        help="resolve an anchor and print the cited text (read both sides of a contradiction)",
+    )
+    span_src = psp.add_mutually_exclusive_group(required=True)
+    span_src.add_argument("target", nargs="?", metavar="raw/<slug>#<anchor>", help="anchor target")
+    span_src.add_argument("--claim", metavar="ID", help="resolve this claim's anchor instead")
+    psp.set_defaults(func=cmd_span)
+    psim = sub.add_parser(
+        "similar",
+        parents=[common],
+        help="score existing wiki pages by topic overlap with a proposed page (PROMOTE step 1)",
+    )
+    psim.add_argument(
+        "--title", required=True, help="proposed page title (tokenized for title overlap)"
+    )
+    psim.add_argument(
+        "--from",
+        dest="sources",
+        required=True,
+        metavar="raw/a,raw/b",
+        help="comma-separated source ids the proposed page would derive from",
+    )
+    psim.add_argument(
+        "--kind",
+        choices=["concept", "entity"],
+        default="concept",
+        help="score only candidates of this kind (default: concept)",
+    )
+    psim.add_argument(
+        "--exclude",
+        metavar="ID",
+        action="append",
+        default=[],
+        help="page id to skip (repeatable); use when re-scoring an existing page",
+    )
+    psim.add_argument("--top", type=int, metavar="N", help="limit to the N highest-scoring candidates")
+    psim.set_defaults(func=cmd_similar)
     pfact = sub.add_parser(
         "fact",
         help="validated writers for the facts/ layer (claims mint verified anchors)",
@@ -562,7 +669,7 @@ def build_parser() -> argparse.ArgumentParser:
     )
     pfa.add_argument(
         "--table",
-        choices=["claims", "entities", "edges"],
+        choices=["claims", "entities", "edges", "reconciliations"],
         default="claims",
         help="facts table to append to (default: claims)",
     )

{scriptoria-0.3.0 → scriptoria-0.5.0}/src/scrip/facts.py RENAMED Viewed

@@ -36,10 +36,12 @@ _FILES = {
     "claims": "claims.ndjson",
     "entities": "entities.ndjson",
     "edges": "graph.ndjson",
+    "reconciliations": "reconciliations.ndjson",
 }
 # Fields scrip mints itself; proposing them is a schema error, not a finding.
 _SCRIP_OWNED = ("claim_id", "anchor", "extracted_at")
+_RECON_OWNED = ("reconciliation_id", "at")
 _CLAIM_REQUIRED = ("quote", "source_id", "subject", "predicate", "object", "polarity", "confidence")
 _CLAIM_ALLOWED = frozenset((*_CLAIM_REQUIRED, "claim_text", "tags"))
@@ -47,12 +49,16 @@ _ENTITY_REQUIRED = ("entity_id", "name", "kind")
 _ENTITY_ALLOWED = frozenset((*_ENTITY_REQUIRED, "tags"))
 _EDGE_REQUIRED = ("src", "dst", "kind")
 _EDGE_ALLOWED = frozenset(_EDGE_REQUIRED)
+_DECISIONS = ("supersede", "qualify", "keep-both")
+_RECON_REQUIRED = ("decision", "claim_a", "claim_b")
+_RECON_ALLOWED = frozenset((*_RECON_REQUIRED, "winner", "rationale"))
 # Same conservative shape ``cli._safe_slug`` enforces — no path separators,
 # '..', or leading dot — applied to source ids arriving as record *data*.
 _SLUG_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")
 _CLAIM_ID_RE = re.compile(r"clm_(\d+)")
+_RECON_ID_RE = re.compile(r"rec_(\d+)")
 def _now() -> str:
@@ -96,12 +102,18 @@ def _check_tags(rec: dict, index: int) -> None:
         raise DataError(f"record {index}: 'tags' must be a list of strings")
-def _check_shape(rec: dict, index: int, required: tuple[str, ...], allowed: frozenset[str]) -> None:
-    owned = [k for k in _SCRIP_OWNED if k in rec]
-    if owned:
+def _check_shape(
+    rec: dict,
+    index: int,
+    required: tuple[str, ...],
+    allowed: frozenset[str],
+    owned: tuple[str, ...] = _SCRIP_OWNED,
+) -> None:
+    present = [k for k in owned if k in rec]
+    if present:
         raise DataError(
-            f"record {index}: scrip mints {', '.join(owned)} itself — propose a "
-            f"verbatim 'quote', not precomputed ids/anchors/timestamps"
+            f"record {index}: scrip mints {', '.join(present)} itself — do not "
+            f"propose precomputed ids/anchors/timestamps"
         )
     unknown = sorted(rec.keys() - allowed)
     if unknown:
@@ -136,10 +148,27 @@ def _validate(table: str, rec: dict, index: int) -> None:
         if not (eid.startswith("entity/") and _SLUG_RE.fullmatch(eid[len("entity/") :])):
             raise DataError(f"record {index}: entity_id must look like entity/<slug>")
         _check_tags(rec, index)
-    else:  # edges
+    elif table == "edges":
         _check_shape(rec, index, _EDGE_REQUIRED, _EDGE_ALLOWED)
         for key in _EDGE_REQUIRED:
             _check_str(rec, key, index)
+    else:  # reconciliations
+        _check_shape(rec, index, _RECON_REQUIRED, _RECON_ALLOWED, owned=_RECON_OWNED)
+        for key in ("decision", "claim_a", "claim_b"):
+            _check_str(rec, key, index)
+        if rec["decision"] not in _DECISIONS:
+            raise DataError(f"record {index}: decision must be one of {', '.join(_DECISIONS)}")
+        if "rationale" in rec:
+            _check_str(rec, "rationale", index, allow_blank=True)
+        # winner is required for (and only for) supersede, and must be one of the pair
+        if rec["decision"] == "supersede":
+            winner = rec.get("winner")
+            if winner not in (rec["claim_a"], rec["claim_b"]):
+                raise DataError(
+                    f"record {index}: supersede needs 'winner' = claim_a or claim_b"
+                )
+        elif "winner" in rec:
+            raise DataError(f"record {index}: 'winner' is only valid for decision 'supersede'")
 # --------------------------------------------------------------------------- #
@@ -218,6 +247,19 @@ def _read_table(path: Path) -> tuple[list[dict], str]:
     return records, text
+def claim_source_anchor(root: Path, claim_id: str) -> tuple[str, str]:
+    """Return ``(source_id, anchor)`` for a claim, for `scrip span --claim`.
+    Raises :class:`DataError` if the claim is missing or lacks the fields."""
+    records, _ = _read_table(facts_dir(root) / "claims.ndjson")
+    for rec in records:
+        if rec.get("claim_id") == claim_id:
+            sid, anchor = rec.get("source_id"), rec.get("anchor")
+            if not isinstance(sid, str) or not isinstance(anchor, str):
+                raise DataError(f"claim {claim_id} is missing source_id/anchor")
+            return sid, anchor
+    raise DataError(f"no such claim: {claim_id}")
 def _claim_key(source_id: str, qh: str, rec: dict) -> tuple:
     return (
         source_id,
@@ -251,6 +293,17 @@ def _next_claim_id(existing: list[dict]) -> tuple[int, int]:
     return highest + 1, max(4, len(str(highest)))
+def _next_recon_id(existing: list[dict]) -> tuple[int, int]:
+    """Return ``(next_number, pad_width)`` continuing the ``rec_NNNN`` sequence."""
+    numbers = [
+        int(m.group(1))
+        for rec in existing
+        if (m := _RECON_ID_RE.fullmatch(str(rec.get("reconciliation_id", ""))))
+    ]
+    highest = max(numbers, default=0)
+    return highest + 1, max(4, len(str(highest)))
 # --------------------------------------------------------------------------- #
 # facts/_meta.yaml: merge derived-from, never stamp
 # --------------------------------------------------------------------------- #
@@ -394,7 +447,7 @@ def add(root: Path, table: str, proposals: list[dict]) -> dict:
                             "detail": "an entity with this id already exists with different fields",
                         }
                     )
-        else:  # edges
+        elif table == "edges":
             seen_edges = {
                 (rec.get("src"), rec.get("dst"), rec.get("kind")) for rec in existing
             }
@@ -405,6 +458,43 @@ def add(root: Path, table: str, proposals: list[dict]) -> dict:
                     continue
                 seen_edges.add(key)
                 appended.append({"src": rec["src"], "dst": rec["dst"], "kind": rec["kind"]})
+        else:  # reconciliations
+            claim_ids = {c.get("claim_id") for c in _read_table(facts_dir(root) / "claims.ndjson")[0]}
+            for i, rec in enumerate(proposals):
+                refs = [rec["claim_a"], rec["claim_b"]]
+                if rec["decision"] == "supersede":
+                    refs.append(rec["winner"])
+                missing = next((r for r in refs if r not in claim_ids), None)
+                if missing is not None:
+                    failures.append({
+                        "index": i, "status": "MISSING_CLAIM", "claim": missing,
+                        "detail": f"{missing!r} is not a claim in claims.ndjson",
+                    })
+            if failures:
+                return {"table": table, "appended": [], "skipped": [], "failures": failures}
+            seen_pairs = {frozenset((r.get("claim_a"), r.get("claim_b"))) for r in existing}
+            number, width = _next_recon_id(existing)
+            now = _now()
+            for i, rec in enumerate(proposals):
+                pair = frozenset((rec["claim_a"], rec["claim_b"]))
+                if pair in seen_pairs:
+                    skipped.append({"index": i, "reason": "duplicate", "existing_id": None})
+                    continue
+                seen_pairs.add(pair)
+                rid = f"rec_{number:0{width}d}"
+                number += 1
+                full = {
+                    "reconciliation_id": rid,
+                    "decision": rec["decision"],
+                    "claim_a": rec["claim_a"],
+                    "claim_b": rec["claim_b"],
+                }
+                if rec["decision"] == "supersede":
+                    full["winner"] = rec["winner"]
+                if rec.get("rationale"):
+                    full["rationale"] = rec["rationale"]
+                full["at"] = now
+                appended.append(full)
         if failures:
             return {"table": table, "appended": [], "skipped": skipped, "failures": failures}

{scriptoria-0.3.0 → scriptoria-0.5.0}/src/scrip/query.py RENAMED Viewed

@@ -22,15 +22,18 @@ _VIEWS = {
     "claims": "claims.ndjson",
     "entities": "entities.ndjson",
     "edges": "graph.ndjson",
+    "reconciliations": "reconciliations.ndjson",
 }
 _NAMED = {
     "claims": "SELECT * FROM claims",
     "entities": "SELECT * FROM entities",
     "edges": "SELECT * FROM edges",
+    "reconciliations": "SELECT * FROM reconciliations",
     # contradiction *candidates*: same subject+predicate, opposing polarity,
-    # from different sources. Detection is deterministic; adjudication is the
-    # agent's job.
+    # from different sources, AND not yet adjudicated (no reconciliation record
+    # for the pair, either order) — so RECONCILE makes the set converge.
+    # Detection is deterministic; adjudication is the agent's job.
     "contradictions": """
         SELECT a.claim_id AS claim_a, b.claim_id AS claim_b,
                a.subject, a.predicate,
@@ -41,10 +44,15 @@ _NAMED = {
         WHERE a.polarity = 'asserts'
           AND b.polarity = 'denies'
           AND a.source_id <> b.source_id
+          AND NOT EXISTS (
+            SELECT 1 FROM reconciliations r
+            WHERE (r.claim_a = a.claim_id AND r.claim_b = b.claim_id)
+               OR (r.claim_a = b.claim_id AND r.claim_b = a.claim_id)
+          )
     """,
 }
-_FILTERABLE = {"claims", "entities", "edges"}
+_FILTERABLE = {"claims", "entities", "edges", "reconciliations"}
 def _connect(root: Path) -> duckdb.DuckDBPyConnection:
@@ -57,6 +65,16 @@ def _connect(root: Path) -> duckdb.DuckDBPyConnection:
                 f"CREATE VIEW {view} AS "
                 f"SELECT * FROM read_ndjson_auto('{p.as_posix()}')"
             )
+        elif view == "reconciliations":
+            # Always present (empty stub) so `contradictions` can anti-join it and
+            # raw SQL over its columns works even before any reconciliation exists.
+            con.execute(
+                "CREATE VIEW reconciliations AS SELECT "
+                "NULL::VARCHAR AS reconciliation_id, NULL::VARCHAR AS decision, "
+                "NULL::VARCHAR AS claim_a, NULL::VARCHAR AS claim_b, "
+                "NULL::VARCHAR AS winner, NULL::VARCHAR AS rationale, "
+                "NULL::VARCHAR AS at WHERE FALSE"
+            )
     return con

scriptoria-0.5.0/src/scrip/similar.py ADDED Viewed

@@ -0,0 +1,168 @@
+"""Deterministic topic-overlap scoring for PROMOTE step 1 (`scrip similar`).
+Ranks existing wiki pages by how much a proposed topic overlaps each, from three
+file-derived signals:
+- **title** — Jaccard of normalized title tokens (the §6 normalization).
+- **sources** — Jaccard of `derived-from` source ids (block suffix stripped).
+- **tags** — Jaccard of tag sets. Pages carry no `tags` frontmatter (SPEC §4),
+  so a page's tags are *derived*: the union of `tags` over claims whose
+  `source_id` is one of the page's sources.
+`combined` is a weighted sum (sources dominates — shared sources is the strongest
+same-topic signal). This is **purely informational**: it reports scores and
+leaves the High/Middle/Low merge decision of AGENT.md PROMOTE to the caller,
+exactly as `query contradictions` leaves adjudication to the agent. No lock, no
+model, no DuckDB.
+"""
+from __future__ import annotations
+import json
+from collections.abc import Iterable, Mapping
+from pathlib import Path
+from . import facts_dir, frontmatter
+from .errors import DataError
+from .graph import scan_derived
+from .hashing import normalize
+DEFAULT_WEIGHTS = {"title": 0.25, "sources": 0.5, "tags": 0.25}
+def _tokens(title: str) -> set[str]:
+    return set(normalize(title).split())
+def _strip_block(dep: str) -> str:
+    """`raw/x#b3` -> `raw/x` (block-scoped deps share their whole source)."""
+    return dep.split("#", 1)[0]
+def _source_set(derived_from: Iterable[str]) -> set[str]:
+    return {_strip_block(d) for d in derived_from}
+def _jaccard(a: set[str], b: set[str]) -> float:
+    if not a and not b:
+        return 0.0
+    return len(a & b) / len(a | b)
+def _source_tags(root: Path) -> dict[str, set[str]]:
+    """Map each `source_id` to the union of `tags` over its claims. Built once
+    per run from facts/claims.ndjson (parsed directly — no DuckDB dependency)."""
+    out: dict[str, set[str]] = {}
+    p = facts_dir(root) / "claims.ndjson"
+    if not p.exists():
+        return out
+    for lineno, raw_line in enumerate(p.read_text(encoding="utf-8").splitlines(), start=1):
+        line = raw_line.strip()
+        if not line:
+            continue
+        try:
+            rec = json.loads(line)
+        except json.JSONDecodeError as e:
+            raise DataError(f"claims.ndjson:{lineno}: invalid JSON: {e}") from e
+        if not isinstance(rec, dict):
+            raise DataError(f"claims.ndjson:{lineno}: expected a JSON object")
+        sid = rec.get("source_id")
+        if not isinstance(sid, str):
+            raise DataError(f"claims.ndjson:{lineno}: 'source_id' must be a string")
+        tags = rec.get("tags")
+        if tags is None:
+            continue
+        if not isinstance(tags, list) or any(not isinstance(t, str) for t in tags):
+            raise DataError(f"claims.ndjson:{lineno}: 'tags' must be a list of strings")
+        out.setdefault(sid, set()).update(tags)
+    return out
+def _page_tags(sources: set[str], source_tags: Mapping[str, set[str]]) -> set[str]:
+    out: set[str] = set()
+    for s in sources:
+        out |= source_tags.get(s, set())
+    return out
+def compute_similar(
+    root: str | Path,
+    *,
+    title: str,
+    sources: Iterable[str],
+    kind: str = "concept",
+    exclude: Iterable[str] | None = None,
+    top: int | None = None,
+    weights: Mapping[str, float] | None = None,
+) -> dict:
+    """Score existing `kind` wiki pages against the proposed (title, sources).
+    Returns ``{proposed, weights, candidates}`` with candidates sorted by
+    ``combined`` desc then id asc, truncated to ``top``.
+    """
+    root = Path(root)
+    w = dict(weights or DEFAULT_WEIGHTS)
+    skip = set(exclude or ())
+    prop_sources = _source_set(sources)
+    prop_tokens = _tokens(title)
+    source_tags = _source_tags(root)
+    prop_tags = _page_tags(prop_sources, source_tags)
+    want_type = f"wiki.{kind}"
+    candidates: list[dict] = []
+    for cid, d in scan_derived(root).items():
+        if d.get("type") != want_type or cid in skip:
+            continue  # other-kind pages and the facts.set row are dropped here
+        c_sources = _source_set(d["derived_from"])
+        meta, _ = frontmatter.load(root / d["path"])
+        c_title = (meta.get("title") if meta else "") or ""
+        c_tags = _page_tags(c_sources, source_tags)
+        title_s = _jaccard(prop_tokens, _tokens(c_title))
+        sources_s = _jaccard(prop_sources, c_sources)
+        tags_s = _jaccard(prop_tags, c_tags)
+        combined = w["title"] * title_s + w["sources"] * sources_s + w["tags"] * tags_s
+        candidates.append(
+            {
+                "id": cid,
+                "title": c_title,
+                "path": d["path"],
+                "kind": kind,
+                "scores": {
+                    "title": round(title_s, 6),
+                    "sources": round(sources_s, 6),
+                    "tags": round(tags_s, 6),
+                    "combined": round(combined, 6),
+                },
+                "shared": {
+                    "sources": sorted(prop_sources & c_sources),
+                    "tags": sorted(prop_tags & c_tags),
+                },
+            }
+        )
+    candidates.sort(key=lambda c: (-c["scores"]["combined"], c["id"]))
+    if top is not None:
+        candidates = candidates[:top]
+    return {
+        "proposed": {"title": title, "derived_from": list(sources), "kind": kind},
+        "weights": w,
+        "candidates": candidates,
+    }
+def print_similar(result: dict) -> None:
+    p = result["proposed"]
+    print(f'proposed: "{p["title"]}"  ({p["kind"]}, from {len(p["derived_from"])} source(s))')
+    cands = result["candidates"]
+    if not cands:
+        print(f"no existing {p['kind']} pages to compare.")
+        return
+    for c in cands:
+        s = c["scores"]
+        print(f'  {s["combined"]:.3f}  {c["id"]}  "{c["title"]}"')
+        print(
+            f'         sources {s["sources"]:.2f}  tags {s["tags"]:.2f}  title {s["title"]:.2f}'
+            f'   shared sources: {len(c["shared"]["sources"])}, tags: {len(c["shared"]["tags"])}'
+        )
+    print(f"({len(cands)} candidate(s))")

{scriptoria-0.3.0 → scriptoria-0.5.0}/tests/conftest.py RENAMED Viewed

@@ -70,6 +70,8 @@ class KB:
         *,
         stamp: bool = True,
         body: str = "Body.\n",
+        title: str | None = None,
+        kind: str = "concept",
     ) -> str:
         deps = {
             sid: h
@@ -77,16 +79,18 @@ class KB:
             if (h := self._dep_hash(sid)) is not None
         }
         meta: dict = {
-            "id": f"concept/{slug}",
-            "type": "wiki.concept",
-            "title": slug,
+            "id": f"{kind}/{slug}",
+            "type": f"wiki.{kind}",
+            "title": title or slug,
             "derived-from": list(derived_from),
         }
         if stamp:
             meta["input-hash"] = hashing.input_hash(deps)
         meta["last-compiled"] = "2026-01-01T00:00:00Z"
         meta["confidence"] = 0.9
-        path = self.root / "vault" / "wiki" / "concepts" / f"{slug}.md"
+        subdir = "concepts" if kind == "concept" else "entities"
+        path = self.root / "vault" / "wiki" / subdir / f"{slug}.md"
+        path.parent.mkdir(parents=True, exist_ok=True)
         path.write_text(frontmatter.dump(meta, body), encoding="utf-8")
         return meta["id"]

scriptoria 0.3.0__tar.gz → 0.5.0__tar.gz

scriptoria 0.3.0tar.gz → 0.5.0tar.gz