PyPI - scriptoria - Versions diffs - 0.2.0__tar.gz → 0.4.0__tar.gz - Mend

scriptoria 0.2.0tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{scriptoria-0.2.0 → scriptoria-0.4.0}/.gitignore RENAMED Viewed

@@ -21,8 +21,11 @@ build/
 # Advisory write lock: ephemeral runtime state, never committed (see SPEC §11)
 /.kb/lock
-# NOTE: .kb/manifest.json is intentionally NOT ignored — it is a committed,
-# regenerable cache (see SPEC §8). On a merge conflict, discard it and run
+# Manifest: a regenerable speed cache. SPEC §8 says it *may* be committed; we
+# choose not to — it stores (mtime, size) that are wrong on every fresh clone
+# anyway, and its hashes/timestamps churn diffs. Rebuild any time with
 # `scrip status --rebuild-manifest`.
+/.kb/manifest.json
 # roborev snapshots
 /.roborev/

{scriptoria-0.2.0 → scriptoria-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,8 +1,23 @@
 Metadata-Version: 2.4
 Name: scriptoria
-Version: 0.2.0
+Version: 0.4.0
 Summary: Deterministic scriptorium-keeper (the `scrip` CLI): staleness, provenance integrity, and fact queries for an agent-compiled knowledge base
+Project-URL: Homepage, https://github.com/coredipper/scriptorium
+Project-URL: Changelog, https://github.com/coredipper/scriptorium/blob/main/CHANGELOG.md
+Project-URL: Issues, https://github.com/coredipper/scriptorium/issues
 License: MIT
+Keywords: agent,knowledge-base,markdown,provenance,staleness,wiki
+Classifier: Development Status :: 4 - Beta
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Topic :: Text Processing :: Markup :: Markdown
 Requires-Python: >=3.10
 Requires-Dist: duckdb>=1.0
 Requires-Dist: pyyaml>=6.0

{scriptoria-0.2.0 → scriptoria-0.4.0}/pyproject.toml RENAMED Viewed

@@ -2,16 +2,35 @@
 # Distribution name on PyPI is `scriptoria` (scrip/scriptorium were taken); the
 # CLI command and the import package both remain `scrip`.
 name = "scriptoria"
-version = "0.2.0"
+version = "0.4.0"
 description = "Deterministic scriptorium-keeper (the `scrip` CLI): staleness, provenance integrity, and fact queries for an agent-compiled knowledge base"
 readme = "README.md"
 requires-python = ">=3.10"
 license = { text = "MIT" }
+keywords = ["knowledge-base", "provenance", "staleness", "agent", "markdown", "wiki"]
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Environment :: Console",
+    "Intended Audience :: Developers",
+    "Operating System :: OS Independent",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.10",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Programming Language :: Python :: 3.13",
+    "Programming Language :: Python :: 3.14",
+    "Topic :: Text Processing :: Markup :: Markdown",
+]
 dependencies = [
     "duckdb>=1.0",
     "pyyaml>=6.0",
 ]
+[project.urls]
+Homepage = "https://github.com/coredipper/scriptorium"
+Changelog = "https://github.com/coredipper/scriptorium/blob/main/CHANGELOG.md"
+Issues = "https://github.com/coredipper/scriptorium/issues"
 [project.scripts]
 scrip = "scrip.cli:main"
@@ -26,7 +45,9 @@ embeddings = ["model2vec>=0.3", "numpy>=1.24"]
 ingest = ["trafilatura>=1.8", "pypdf>=4.0"]
 [dependency-groups]
-dev = ["pytest>=8"]
+# numpy is here so the embeddings index/search path is testable with a toy
+# encoder — the real backend (model2vec) stays an optional [embeddings] extra.
+dev = ["pytest>=8", "numpy>=1.24"]
 [build-system]
 requires = ["hatchling"]

{scriptoria-0.2.0 → scriptoria-0.4.0}/src/scrip/__init__.py RENAMED Viewed

@@ -13,7 +13,7 @@ from __future__ import annotations
 from pathlib import Path
-__version__ = "0.2.0"
+__version__ = "0.4.0"
 # --- canonical vault layout ------------------------------------------------
 # ``root`` is the repo/instance root: the directory containing ``vault/``.

{scriptoria-0.2.0 → scriptoria-0.4.0}/src/scrip/cli.py RENAMED Viewed

@@ -336,6 +336,76 @@ def cmd_new(args: argparse.Namespace) -> int:
     return 0
+def _parse_source_ids(raw: str) -> list[str]:
+    """Parse a comma-separated `--from` value into validated source ids, WITHOUT
+    requiring the sources to exist (unlike `cmd_new`): scoring a not-yet-ingested
+    proposed topic is legitimate. Keeps the traversal-safety check."""
+    ids: list[str] = []
+    for s in (part.strip() for part in raw.split(",")):
+        if not s:
+            continue
+        sid = s if s.startswith("raw/") else f"raw/{s}"
+        _safe_slug(sid.split("#", 1)[0][len("raw/") :], "source")
+        ids.append(sid)
+    if not ids:
+        raise errors.UsageError("--from requires at least one source id")
+    return ids
+def cmd_similar(args: argparse.Namespace) -> int:
+    from . import similar
+    root = resolve_root(args.root)
+    sources = _parse_source_ids(args.sources)
+    result = similar.compute_similar(
+        root,
+        title=args.title,
+        sources=sources,
+        kind=args.kind,
+        exclude=set(args.exclude),
+        top=args.top,
+    )
+    if args.json:
+        _emit(result)
+    else:
+        similar.print_similar(result)
+    return 0
+def cmd_fact_add(args: argparse.Namespace) -> int:
+    from . import facts
+    root = resolve_root(args.root)
+    if args.file:
+        try:
+            text = Path(args.file).read_text(encoding="utf-8")
+        except OSError as e:
+            raise errors.UsageError(f"cannot read --file: {e}") from e
+    else:
+        text = sys.stdin.read()
+    result = facts.add(root, args.table, facts.parse_ndjson(text))
+    if args.json:
+        _emit(result)
+    else:
+        for r in result["appended"]:
+            ident = r.get("claim_id") or r.get("entity_id") or f"{r['src']} -> {r['dst']}"
+            print(f"  appended {ident}")
+        for s in result["skipped"]:
+            print(f"  = record {s['index']} skipped (duplicate)")
+        for f in result["failures"]:
+            print(f"  ✗ record {f['index']}: {f['status']} — {f['detail']}")
+        if result["failures"]:
+            print(
+                f"nothing appended: {len(result['failures'])} record(s) failed "
+                f"(the batch is all-or-nothing)"
+            )
+        else:
+            print(f"{len(result['appended'])} record(s) appended to facts/")
+            if result["appended"]:
+                print("  next: `scrip stamp vault/facts/_meta.yaml`, then `scrip verify`")
+    return 1 if result["failures"] else 0
 def cmd_ingest(args: argparse.Namespace) -> int:
     from . import ingest, lock
@@ -515,6 +585,61 @@ def build_parser() -> argparse.ArgumentParser:
     pn.add_argument("--title", help="human title (default: the slug)")
     pn.set_defaults(func=cmd_new)
+    psim = sub.add_parser(
+        "similar",
+        parents=[common],
+        help="score existing wiki pages by topic overlap with a proposed page (PROMOTE step 1)",
+    )
+    psim.add_argument(
+        "--title", required=True, help="proposed page title (tokenized for title overlap)"
+    )
+    psim.add_argument(
+        "--from",
+        dest="sources",
+        required=True,
+        metavar="raw/a,raw/b",
+        help="comma-separated source ids the proposed page would derive from",
+    )
+    psim.add_argument(
+        "--kind",
+        choices=["concept", "entity"],
+        default="concept",
+        help="score only candidates of this kind (default: concept)",
+    )
+    psim.add_argument(
+        "--exclude",
+        metavar="ID",
+        action="append",
+        default=[],
+        help="page id to skip (repeatable); use when re-scoring an existing page",
+    )
+    psim.add_argument("--top", type=int, metavar="N", help="limit to the N highest-scoring candidates")
+    psim.set_defaults(func=cmd_similar)
+    pfact = sub.add_parser(
+        "fact",
+        help="validated writers for the facts/ layer (claims mint verified anchors)",
+    )
+    fact_sub = pfact.add_subparsers(dest="fact_command", required=True, metavar="<action>")
+    pfa = fact_sub.add_parser(
+        "add",
+        parents=[common],
+        help="validate proposed NDJSON records and append them all-or-nothing; "
+        "claims carry a verbatim `quote` and scrip mints the anchor/id/timestamp",
+    )
+    pfa.add_argument(
+        "--table",
+        choices=["claims", "entities", "edges"],
+        default="claims",
+        help="facts table to append to (default: claims)",
+    )
+    fact_in = pfa.add_mutually_exclusive_group(required=True)
+    fact_in.add_argument("--file", metavar="NDJSON", help="read proposed records from a file")
+    fact_in.add_argument(
+        "--stdin", action="store_true", help="read proposed records from stdin"
+    )
+    pfa.set_defaults(func=cmd_fact_add)
     pin = sub.add_parser(
         "ingest",
         parents=[common],

{scriptoria-0.2.0 → scriptoria-0.4.0}/src/scrip/embeddings.py RENAMED Viewed

@@ -47,7 +47,7 @@ def _get_model():
     os.environ.setdefault("HF_HUB_DISABLE_PROGRESS_BARS", "1")
     try:
-        from model2vec import StaticModel
+        from model2vec import StaticModel  # pyright: ignore[reportMissingImports]
     except Exception:
         return None
     try:
@@ -95,7 +95,7 @@ def build_index(root: Path) -> int:
     model = _get_model()
     if model is None:
         raise RuntimeError("no embeddings backend available")
-    import numpy as np
+    import numpy as np  # pyright: ignore[reportMissingImports]
     items = list(_iter_blocks(root))
     if items:
@@ -126,7 +126,7 @@ def vector_search(root: Path, query: str, k: int = 5):
     d = _embeddings_dir(root)
     if model is None or not (d / "vectors.npy").exists() or not (d / "meta.json").exists():
         return None
-    import numpy as np
+    import numpy as np  # pyright: ignore[reportMissingImports]
     meta = json.loads((d / "meta.json").read_text(encoding="utf-8"))
     items = meta["items"]

scriptoria 0.2.0__tar.gz → 0.4.0__tar.gz

scriptoria 0.2.0tar.gz → 0.4.0tar.gz