scrip-harness 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrip_harness-0.3.0/.gitignore +31 -0
- scrip_harness-0.3.0/PKG-INFO +102 -0
- scrip_harness-0.3.0/README.md +83 -0
- scrip_harness-0.3.0/pyproject.toml +48 -0
- scrip_harness-0.3.0/pyrightconfig.json +7 -0
- scrip_harness-0.3.0/src/scrip_harness/__init__.py +13 -0
- scrip_harness-0.3.0/src/scrip_harness/cli.py +92 -0
- scrip_harness-0.3.0/src/scrip_harness/compile.py +69 -0
- scrip_harness-0.3.0/src/scrip_harness/extract.py +97 -0
- scrip_harness-0.3.0/src/scrip_harness/model.py +78 -0
- scrip_harness-0.3.0/src/scrip_harness/runner.py +236 -0
- scrip_harness-0.3.0/tests/test_compile.py +44 -0
- scrip_harness-0.3.0/tests/test_extract.py +246 -0
- scrip_harness-0.3.0/tests/test_runner.py +161 -0
- scrip_harness-0.3.0/uv.lock +668 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# Python / uv
|
|
2
|
+
.venv/
|
|
3
|
+
**/.venv/
|
|
4
|
+
__pycache__/
|
|
5
|
+
*.pyc
|
|
6
|
+
*.egg-info/
|
|
7
|
+
dist/
|
|
8
|
+
build/
|
|
9
|
+
|
|
10
|
+
# DuckDB scratch / temp writes
|
|
11
|
+
*.duckdb
|
|
12
|
+
*.duckdb.wal
|
|
13
|
+
*.tmp
|
|
14
|
+
|
|
15
|
+
# OS
|
|
16
|
+
.DS_Store
|
|
17
|
+
|
|
18
|
+
# Embeddings index: a large, regenerable binary cache (rebuild with `scrip index`)
|
|
19
|
+
/.kb/embeddings/
|
|
20
|
+
|
|
21
|
+
# Advisory write lock: ephemeral runtime state, never committed (see SPEC §11)
|
|
22
|
+
/.kb/lock
|
|
23
|
+
|
|
24
|
+
# Manifest: a regenerable speed cache. SPEC §8 says it *may* be committed; we
|
|
25
|
+
# choose not to — it stores (mtime, size) that are wrong on every fresh clone
|
|
26
|
+
# anyway, and its hashes/timestamps churn diffs. Rebuild any time with
|
|
27
|
+
# `scrip status --rebuild-manifest`.
|
|
28
|
+
/.kb/manifest.json
|
|
29
|
+
|
|
30
|
+
# roborev snapshots
|
|
31
|
+
/.roborev/
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: scrip-harness
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Runnable AGENT.md compile loop for scriptorium: drives `scrip` via subprocess + Claude. Not part of the deterministic scrip core.
|
|
5
|
+
Project-URL: Homepage, https://github.com/coredipper/scriptorium/tree/main/harness
|
|
6
|
+
Project-URL: Changelog, https://github.com/coredipper/scriptorium/blob/main/CHANGELOG.md
|
|
7
|
+
Project-URL: Issues, https://github.com/coredipper/scriptorium/issues
|
|
8
|
+
License: MIT
|
|
9
|
+
Classifier: Development Status :: 4 - Beta
|
|
10
|
+
Classifier: Environment :: Console
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Operating System :: OS Independent
|
|
13
|
+
Classifier: Programming Language :: Python :: 3
|
|
14
|
+
Requires-Python: >=3.10
|
|
15
|
+
Requires-Dist: anthropic>=0.40
|
|
16
|
+
Requires-Dist: pydantic>=2
|
|
17
|
+
Requires-Dist: scriptoria>=0.3
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# scrip-harness — the runnable compile loop
|
|
21
|
+
|
|
22
|
+
The deterministic `scrip` keeper does staleness, provenance, and queries. It never
|
|
23
|
+
calls a model. **scrip-harness** is the optional *judgment* layer that makes the
|
|
24
|
+
[AGENT.md](../AGENT.md) COMPILE step runnable: it asks Claude to synthesize a wiki
|
|
25
|
+
page from a source, then hands every verifiable step back to `scrip`.
|
|
26
|
+
|
|
27
|
+
The dependency points one way only: the harness depends on `scrip` (and the
|
|
28
|
+
Anthropic SDK); `scrip` depends on neither. Removing this directory leaves a fully
|
|
29
|
+
valid, fully deterministic vault and CLI behind.
|
|
30
|
+
|
|
31
|
+
## How a compile runs
|
|
32
|
+
|
|
33
|
+
`scrip-harness compile <slug>` (for `vault/raw/<slug>.md`):
|
|
34
|
+
|
|
35
|
+
1. **Draft** — Claude (`claude-opus-4-8`, adaptive thinking, structured output)
|
|
36
|
+
returns a `DraftPage`: a title, markdown prose with footnote markers
|
|
37
|
+
`[^a1], [^a2], …`, and one *verbatim quote* per marker.
|
|
38
|
+
2. **Mint** — each quote goes through `scrip anchor`, which **fails the compile**
|
|
39
|
+
if the quote isn't present in the source or isn't unique. A hallucinated or
|
|
40
|
+
paraphrased quote cannot get past this step.
|
|
41
|
+
3. **Scaffold + fill** — `scrip new` writes the frontmatter; the harness fills the
|
|
42
|
+
body with the prose + the minted footnote definitions.
|
|
43
|
+
4. **Stamp + verify** — `scrip stamp` records provenance hashes; `scrip verify`
|
|
44
|
+
proves every citation resolves. If verify fails, the compile errors out rather
|
|
45
|
+
than leaving a stamped-but-broken page.
|
|
46
|
+
|
|
47
|
+
So the model owns *what to say*; `scrip` owns *what is true on disk*.
|
|
48
|
+
|
|
49
|
+
## How an extract runs
|
|
50
|
+
|
|
51
|
+
`scrip-harness extract <slug>` (for `vault/raw/<slug>.md`):
|
|
52
|
+
|
|
53
|
+
1. **Draft** — Claude returns a `DraftExtraction`: structured claims, each with a
|
|
54
|
+
*verbatim quote*, a subject/predicate/object triple, and a polarity.
|
|
55
|
+
2. **Mint + append** — the claims go to `scrip fact add --stdin`, which verifies
|
|
56
|
+
every quote (minting anchors), assigns ids and timestamps, skips exact
|
|
57
|
+
duplicates, and appends **all-or-nothing** under the write lock.
|
|
58
|
+
3. **Retry** — if quotes come back BROKEN/AMBIGUOUS, the failures go back to
|
|
59
|
+
Claude for one replacement per failure (lengthened until unique, or an empty
|
|
60
|
+
quote to drop the claim); bounded retries, then the extract fails cleanly.
|
|
61
|
+
4. **Stamp + verify** — `scrip stamp vault/facts/_meta.yaml`, then `scrip verify`;
|
|
62
|
+
contradiction candidates from `scrip query contradictions` are surfaced for
|
|
63
|
+
the operator to RECONCILE per [AGENT.md](../AGENT.md).
|
|
64
|
+
|
|
65
|
+
## Install & run
|
|
66
|
+
|
|
67
|
+
Both packages are on PyPI. `scrip-harness` bundles `scriptoria` as a dependency
|
|
68
|
+
and drives it through its own interpreter, so it is self-sufficient — install
|
|
69
|
+
`scriptoria` as a tool too only if you want the `scrip` command on PATH for
|
|
70
|
+
direct use:
|
|
71
|
+
|
|
72
|
+
```sh
|
|
73
|
+
uv tool install scrip-harness # this package → `scrip-harness` (pulls scriptoria)
|
|
74
|
+
uv tool install 'scriptoria[ingest]' # optional: `scrip` on PATH + HTML/PDF ingest
|
|
75
|
+
export ANTHROPIC_API_KEY=... # the harness calls Claude; scrip never does
|
|
76
|
+
|
|
77
|
+
scrip-harness compile article # synthesize + verify a page from raw/article
|
|
78
|
+
scrip-harness extract article # pull claims into facts/
|
|
79
|
+
scrip ingest <url> --slug article # bring a source in (needs the install above)
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
(From a checkout, `uv tool install ./scrip` and `uv tool install ./harness`
|
|
83
|
+
install the local versions instead.)
|
|
84
|
+
|
|
85
|
+
## Develop / test
|
|
86
|
+
|
|
87
|
+
```sh
|
|
88
|
+
cd harness && uv run pytest # hermetic: the model is stubbed; scrip runs for real
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
The tests inject a stub draft function (no network, no API key) and drive the real
|
|
92
|
+
`scrip` subcommands over a temp vault, asserting the result is stamped and verified.
|
|
93
|
+
|
|
94
|
+
## Scope & limits (v1)
|
|
95
|
+
|
|
96
|
+
- Covers **COMPILE** (one source → one wiki page) and **EXTRACT** (one source →
|
|
97
|
+
claims in `facts/`, with the bounded quote-retry loop). Entities/edges go
|
|
98
|
+
through `scrip fact add --table entities|edges` by hand; PROMOTE (merge/dedup)
|
|
99
|
+
and RECONCILE (contradictions) are not yet automated here — drive them with
|
|
100
|
+
`scrip` directly per [AGENT.md](../AGENT.md).
|
|
101
|
+
- Single source per page/extract. Multi-source synthesis, and adopting the
|
|
102
|
+
quote-retry loop in COMPILE too, are future work.
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
# scrip-harness — the runnable compile loop
|
|
2
|
+
|
|
3
|
+
The deterministic `scrip` keeper does staleness, provenance, and queries. It never
|
|
4
|
+
calls a model. **scrip-harness** is the optional *judgment* layer that makes the
|
|
5
|
+
[AGENT.md](../AGENT.md) COMPILE step runnable: it asks Claude to synthesize a wiki
|
|
6
|
+
page from a source, then hands every verifiable step back to `scrip`.
|
|
7
|
+
|
|
8
|
+
The dependency points one way only: the harness depends on `scrip` (and the
|
|
9
|
+
Anthropic SDK); `scrip` depends on neither. Removing this directory leaves a fully
|
|
10
|
+
valid, fully deterministic vault and CLI behind.
|
|
11
|
+
|
|
12
|
+
## How a compile runs
|
|
13
|
+
|
|
14
|
+
`scrip-harness compile <slug>` (for `vault/raw/<slug>.md`):
|
|
15
|
+
|
|
16
|
+
1. **Draft** — Claude (`claude-opus-4-8`, adaptive thinking, structured output)
|
|
17
|
+
returns a `DraftPage`: a title, markdown prose with footnote markers
|
|
18
|
+
`[^a1], [^a2], …`, and one *verbatim quote* per marker.
|
|
19
|
+
2. **Mint** — each quote goes through `scrip anchor`, which **fails the compile**
|
|
20
|
+
if the quote isn't present in the source or isn't unique. A hallucinated or
|
|
21
|
+
paraphrased quote cannot get past this step.
|
|
22
|
+
3. **Scaffold + fill** — `scrip new` writes the frontmatter; the harness fills the
|
|
23
|
+
body with the prose + the minted footnote definitions.
|
|
24
|
+
4. **Stamp + verify** — `scrip stamp` records provenance hashes; `scrip verify`
|
|
25
|
+
proves every citation resolves. If verify fails, the compile errors out rather
|
|
26
|
+
than leaving a stamped-but-broken page.
|
|
27
|
+
|
|
28
|
+
So the model owns *what to say*; `scrip` owns *what is true on disk*.
|
|
29
|
+
|
|
30
|
+
## How an extract runs
|
|
31
|
+
|
|
32
|
+
`scrip-harness extract <slug>` (for `vault/raw/<slug>.md`):
|
|
33
|
+
|
|
34
|
+
1. **Draft** — Claude returns a `DraftExtraction`: structured claims, each with a
|
|
35
|
+
*verbatim quote*, a subject/predicate/object triple, and a polarity.
|
|
36
|
+
2. **Mint + append** — the claims go to `scrip fact add --stdin`, which verifies
|
|
37
|
+
every quote (minting anchors), assigns ids and timestamps, skips exact
|
|
38
|
+
duplicates, and appends **all-or-nothing** under the write lock.
|
|
39
|
+
3. **Retry** — if quotes come back BROKEN/AMBIGUOUS, the failures go back to
|
|
40
|
+
Claude for one replacement per failure (lengthened until unique, or an empty
|
|
41
|
+
quote to drop the claim); bounded retries, then the extract fails cleanly.
|
|
42
|
+
4. **Stamp + verify** — `scrip stamp vault/facts/_meta.yaml`, then `scrip verify`;
|
|
43
|
+
contradiction candidates from `scrip query contradictions` are surfaced for
|
|
44
|
+
the operator to RECONCILE per [AGENT.md](../AGENT.md).
|
|
45
|
+
|
|
46
|
+
## Install & run
|
|
47
|
+
|
|
48
|
+
Both packages are on PyPI. `scrip-harness` bundles `scriptoria` as a dependency
|
|
49
|
+
and drives it through its own interpreter, so it is self-sufficient — install
|
|
50
|
+
`scriptoria` as a tool too only if you want the `scrip` command on PATH for
|
|
51
|
+
direct use:
|
|
52
|
+
|
|
53
|
+
```sh
|
|
54
|
+
uv tool install scrip-harness # this package → `scrip-harness` (pulls scriptoria)
|
|
55
|
+
uv tool install 'scriptoria[ingest]' # optional: `scrip` on PATH + HTML/PDF ingest
|
|
56
|
+
export ANTHROPIC_API_KEY=... # the harness calls Claude; scrip never does
|
|
57
|
+
|
|
58
|
+
scrip-harness compile article # synthesize + verify a page from raw/article
|
|
59
|
+
scrip-harness extract article # pull claims into facts/
|
|
60
|
+
scrip ingest <url> --slug article # bring a source in (needs the install above)
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
(From a checkout, `uv tool install ./scrip` and `uv tool install ./harness`
|
|
64
|
+
install the local versions instead.)
|
|
65
|
+
|
|
66
|
+
## Develop / test
|
|
67
|
+
|
|
68
|
+
```sh
|
|
69
|
+
cd harness && uv run pytest # hermetic: the model is stubbed; scrip runs for real
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
The tests inject a stub draft function (no network, no API key) and drive the real
|
|
73
|
+
`scrip` subcommands over a temp vault, asserting the result is stamped and verified.
|
|
74
|
+
|
|
75
|
+
## Scope & limits (v1)
|
|
76
|
+
|
|
77
|
+
- Covers **COMPILE** (one source → one wiki page) and **EXTRACT** (one source →
|
|
78
|
+
claims in `facts/`, with the bounded quote-retry loop). Entities/edges go
|
|
79
|
+
through `scrip fact add --table entities|edges` by hand; PROMOTE (merge/dedup)
|
|
80
|
+
and RECONCILE (contradictions) are not yet automated here — drive them with
|
|
81
|
+
`scrip` directly per [AGENT.md](../AGENT.md).
|
|
82
|
+
- Single source per page/extract. Multi-source synthesis, and adopting the
|
|
83
|
+
quote-retry loop in COMPILE too, are future work.
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "scrip-harness"
|
|
3
|
+
version = "0.3.0"
|
|
4
|
+
description = "Runnable AGENT.md compile loop for scriptorium: drives `scrip` via subprocess + Claude. Not part of the deterministic scrip core."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
requires-python = ">=3.10"
|
|
7
|
+
license = { text = "MIT" }
|
|
8
|
+
classifiers = [
|
|
9
|
+
"Development Status :: 4 - Beta",
|
|
10
|
+
"Environment :: Console",
|
|
11
|
+
"Intended Audience :: Developers",
|
|
12
|
+
"Operating System :: OS Independent",
|
|
13
|
+
"Programming Language :: Python :: 3",
|
|
14
|
+
]
|
|
15
|
+
dependencies = [
|
|
16
|
+
"anthropic>=0.40",
|
|
17
|
+
# Imported directly for the structured-output schemas (compile/extract) — don't
|
|
18
|
+
# rely on anthropic to pull it in transitively; declare what we import.
|
|
19
|
+
"pydantic>=2",
|
|
20
|
+
# Version floor (not the dev path source below) is what ships in the wheel's
|
|
21
|
+
# Requires-Dist, so an installed scrip-harness pulls scriptoria from PyPI.
|
|
22
|
+
"scriptoria>=0.3",
|
|
23
|
+
]
|
|
24
|
+
|
|
25
|
+
[project.urls]
|
|
26
|
+
Homepage = "https://github.com/coredipper/scriptorium/tree/main/harness"
|
|
27
|
+
Changelog = "https://github.com/coredipper/scriptorium/blob/main/CHANGELOG.md"
|
|
28
|
+
Issues = "https://github.com/coredipper/scriptorium/issues"
|
|
29
|
+
|
|
30
|
+
[project.scripts]
|
|
31
|
+
scrip-harness = "scrip_harness.cli:main"
|
|
32
|
+
|
|
33
|
+
[tool.uv.sources]
|
|
34
|
+
scriptoria = { path = "../scrip" }
|
|
35
|
+
|
|
36
|
+
[dependency-groups]
|
|
37
|
+
dev = ["pytest>=8"]
|
|
38
|
+
|
|
39
|
+
[build-system]
|
|
40
|
+
requires = ["hatchling"]
|
|
41
|
+
build-backend = "hatchling.build"
|
|
42
|
+
|
|
43
|
+
[tool.hatch.build.targets.wheel]
|
|
44
|
+
packages = ["src/scrip_harness"]
|
|
45
|
+
|
|
46
|
+
[tool.pytest.ini_options]
|
|
47
|
+
testpaths = ["tests"]
|
|
48
|
+
addopts = "-q"
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
"""scrip-harness — the runnable AGENT.md compile loop for scriptorium.
|
|
2
|
+
|
|
3
|
+
This is the **judgment** layer: it calls a model (Claude) to synthesize a wiki
|
|
4
|
+
page from a source, then delegates every *verifiable* step to the deterministic
|
|
5
|
+
``scrip`` CLI via subprocess — minting each citation with ``scrip anchor`` (which
|
|
6
|
+
rejects a quote that is not verbatim and unique) and recording provenance with
|
|
7
|
+
``scrip stamp``. So a hallucinated quote cannot survive into a stamped page.
|
|
8
|
+
|
|
9
|
+
``scrip`` never imports this package or any SDK; the dependency points the other
|
|
10
|
+
way. The harness is optional and lives outside the deterministic core.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
__version__ = "0.2.0"
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""``scrip-harness compile|extract <slug>`` — run the AGENT.md COMPILE or
|
|
2
|
+
EXTRACT step for one source.
|
|
3
|
+
|
|
4
|
+
This is the model-driven entry point. It resolves the scriptorium root, calls
|
|
5
|
+
Claude to draft the page or the claims, and hands every verifiable step to
|
|
6
|
+
``scrip``.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import sys
|
|
13
|
+
from pathlib import Path
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def _resolve_root(root_arg: str | None) -> Path:
|
|
17
|
+
if root_arg:
|
|
18
|
+
return Path(root_arg).expanduser()
|
|
19
|
+
cur = Path.cwd().resolve()
|
|
20
|
+
for cand in (cur, *cur.parents):
|
|
21
|
+
if (cand / "vault").is_dir() and ((cand / "SPEC.md").exists() or (cand / ".kb").is_dir()):
|
|
22
|
+
return cand
|
|
23
|
+
raise SystemExit("scrip-harness: could not locate a scriptorium root; pass --root")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def main(argv: list[str] | None = None) -> int:
|
|
27
|
+
p = argparse.ArgumentParser(
|
|
28
|
+
prog="scrip-harness",
|
|
29
|
+
description="Runnable scriptorium compile loop (drives scrip + Claude).",
|
|
30
|
+
)
|
|
31
|
+
sub = p.add_subparsers(dest="command", required=True, metavar="<command>")
|
|
32
|
+
pc = sub.add_parser(
|
|
33
|
+
"compile",
|
|
34
|
+
help="synthesize wiki/<kind>s/<slug> from raw/<slug> via Claude, then stamp + verify",
|
|
35
|
+
)
|
|
36
|
+
pc.add_argument("slug")
|
|
37
|
+
pc.add_argument("--kind", choices=["concept", "entity"], default="concept")
|
|
38
|
+
pc.add_argument("--root")
|
|
39
|
+
pc.add_argument("--model", help="Claude model id (default: claude-opus-4-8)")
|
|
40
|
+
pe = sub.add_parser(
|
|
41
|
+
"extract",
|
|
42
|
+
help="extract claims from raw/<slug> into facts/ via Claude (anchors minted "
|
|
43
|
+
"and verified by `scrip fact add`), then stamp + verify",
|
|
44
|
+
)
|
|
45
|
+
pe.add_argument("slug")
|
|
46
|
+
pe.add_argument("--root")
|
|
47
|
+
pe.add_argument("--model", help="Claude model id (default: claude-opus-4-8)")
|
|
48
|
+
args = p.parse_args(argv)
|
|
49
|
+
|
|
50
|
+
from . import model as model_mod
|
|
51
|
+
from .runner import CompileError, ExtractError, compile_page, extract_facts
|
|
52
|
+
|
|
53
|
+
root = _resolve_root(args.root)
|
|
54
|
+
chosen_model = args.model or model_mod.DEFAULT_MODEL
|
|
55
|
+
|
|
56
|
+
if args.command == "extract":
|
|
57
|
+
def extract_draft_fn(text: str, *, source_id: str, failures=None):
|
|
58
|
+
return model_mod.draft_extraction(
|
|
59
|
+
text, source_id=source_id, model=chosen_model, failures=failures
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
try:
|
|
63
|
+
result = extract_facts(root, args.slug, draft_fn=extract_draft_fn)
|
|
64
|
+
except ExtractError as e:
|
|
65
|
+
print(f"scrip-harness: {e}", file=sys.stderr)
|
|
66
|
+
return 1
|
|
67
|
+
appended, skipped = result["appended"], result["skipped"]
|
|
68
|
+
print(
|
|
69
|
+
f"extracted {len(appended)} claim(s) from raw/{args.slug} (verified"
|
|
70
|
+
f"{f', {len(skipped)} duplicate(s) skipped' if skipped else ''})"
|
|
71
|
+
)
|
|
72
|
+
if result["contradictions"]:
|
|
73
|
+
print(
|
|
74
|
+
f" {len(result['contradictions'])} contradiction candidate(s) — "
|
|
75
|
+
f"run `scrip query contradictions` and RECONCILE per AGENT.md"
|
|
76
|
+
)
|
|
77
|
+
return 0
|
|
78
|
+
|
|
79
|
+
def draft_fn(text: str, *, source_id: str):
|
|
80
|
+
return model_mod.draft_page(text, source_id=source_id, model=chosen_model)
|
|
81
|
+
|
|
82
|
+
try:
|
|
83
|
+
page = compile_page(root, args.slug, kind=args.kind, draft_fn=draft_fn)
|
|
84
|
+
except CompileError as e:
|
|
85
|
+
print(f"scrip-harness: {e}", file=sys.stderr)
|
|
86
|
+
return 1
|
|
87
|
+
print(f"compiled {page.relative_to(root)} (verified)")
|
|
88
|
+
return 0
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
if __name__ == "__main__":
|
|
92
|
+
raise SystemExit(main())
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""Deterministic pieces of the compile loop: the structured draft schema, prompt
|
|
2
|
+
construction, and page-body assembly. No network, no scrip — unit-testable."""
|
|
3
|
+
|
|
4
|
+
from __future__ import annotations
|
|
5
|
+
|
|
6
|
+
import re
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel
|
|
9
|
+
|
|
10
|
+
# Match ANY footnote reference label, not just well-formed a-markers, so that a
|
|
11
|
+
# foreign ([^b1]) or malformed ([^a01]) reference is surfaced and rejected rather
|
|
12
|
+
# than silently ignored (which would leave an undefined footnote in the page).
|
|
13
|
+
_MARKER = re.compile(r"\[\^([^\]]+)\]")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def extract_markers(body: str) -> list[str]:
|
|
17
|
+
"""Footnote reference *labels* in ``body``, distinct, in first-appearance order
|
|
18
|
+
(``[^a1]`` → ``"a1"``). Returned verbatim — the caller requires them to be
|
|
19
|
+
exactly ``a1..aN`` (no leading zeros, no foreign labels) before stamping."""
|
|
20
|
+
seen: list[str] = []
|
|
21
|
+
for m in _MARKER.finditer(body):
|
|
22
|
+
label = m.group(1)
|
|
23
|
+
if label not in seen:
|
|
24
|
+
seen.append(label)
|
|
25
|
+
return seen
|
|
26
|
+
|
|
27
|
+
SYSTEM = (
|
|
28
|
+
"You are the scribe for a scriptorium knowledge base. From the single source "
|
|
29
|
+
"you are given, synthesize a concise, accurate concept page in markdown.\n"
|
|
30
|
+
"Rules:\n"
|
|
31
|
+
"- Write only what the source supports; do not add outside facts.\n"
|
|
32
|
+
"- Mark each claim-bearing sentence with a footnote marker ([^a1], [^a2], …) "
|
|
33
|
+
"in order of first appearance.\n"
|
|
34
|
+
"- For every marker, return one claim whose `quote` is copied VERBATIM from the "
|
|
35
|
+
"source (it is machine-verified against the source text; paraphrases are "
|
|
36
|
+
"rejected). Quote enough words to be unique.\n"
|
|
37
|
+
"- Keep the body free of the footnote *definitions* — only the markers. The "
|
|
38
|
+
"definitions are generated from your quotes."
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
class DraftClaim(BaseModel):
|
|
43
|
+
quote: str
|
|
44
|
+
"""Verbatim text copied from the source, supporting the matching marker."""
|
|
45
|
+
note: str = ""
|
|
46
|
+
"""Optional human-readable note on what the claim asserts."""
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
class DraftPage(BaseModel):
|
|
50
|
+
title: str
|
|
51
|
+
body: str
|
|
52
|
+
"""Markdown prose containing footnote markers [^a1], [^a2], … in order."""
|
|
53
|
+
claims: list[DraftClaim]
|
|
54
|
+
"""One claim per marker, in the same order as the markers in `body`."""
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def build_user_prompt(source_text: str) -> str:
|
|
58
|
+
return (
|
|
59
|
+
"Synthesize a concept page from the source below. In the body, mark each "
|
|
60
|
+
"claim-bearing sentence with a footnote marker [^a1], [^a2], … in order. "
|
|
61
|
+
"Return one claim per marker (same order), each with a `quote` copied "
|
|
62
|
+
"verbatim from the source.\n\n----- SOURCE -----\n" + source_text
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def assemble_body(draft: DraftPage, footnotes: list[str]) -> str:
|
|
67
|
+
"""Combine the model's prose (with markers) and the scrip-minted footnote
|
|
68
|
+
definition lines into the final page body."""
|
|
69
|
+
return draft.body.rstrip() + "\n\n" + "\n".join(footnotes) + "\n"
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
"""Deterministic pieces of the extract loop: the structured fact schema, prompt
|
|
2
|
+
construction, and the NDJSON serialization `scrip fact add` consumes. No
|
|
3
|
+
network, no scrip — unit-testable."""
|
|
4
|
+
|
|
5
|
+
from __future__ import annotations
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
from typing import Literal
|
|
9
|
+
|
|
10
|
+
from pydantic import BaseModel
|
|
11
|
+
|
|
12
|
+
EXTRACT_SYSTEM = (
|
|
13
|
+
"You are the scribe for a scriptorium knowledge base. From the single source "
|
|
14
|
+
"you are given, extract atomic factual claims as structured records.\n"
|
|
15
|
+
"Rules:\n"
|
|
16
|
+
"- Extract only what the source supports; do not add outside facts.\n"
|
|
17
|
+
"- Each claim's `quote` is copied VERBATIM from the source (it is "
|
|
18
|
+
"machine-verified against the source text; paraphrases are rejected). Quote "
|
|
19
|
+
"enough words to be unique within the source.\n"
|
|
20
|
+
"- `subject`/`predicate`/`object` form a coarse triple used for grouping and "
|
|
21
|
+
"contradiction detection: keep them short, lowercase noun/verb phrases, and "
|
|
22
|
+
"reuse the same wording for the same idea across claims.\n"
|
|
23
|
+
"- `polarity` is `asserts`, `denies`, or `qualifies` — what the source does "
|
|
24
|
+
"to the triple, not your judgment of it.\n"
|
|
25
|
+
"- `claim_text` is an optional one-sentence restatement; leave it empty to "
|
|
26
|
+
"reuse the quote.\n"
|
|
27
|
+
"- `confidence` in [0, 1] is your honest rating that the claim faithfully "
|
|
28
|
+
"represents the source."
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class DraftFact(BaseModel):
|
|
33
|
+
quote: str
|
|
34
|
+
"""Verbatim text copied from the source; anchors are minted from this."""
|
|
35
|
+
subject: str
|
|
36
|
+
predicate: str
|
|
37
|
+
object: str
|
|
38
|
+
polarity: Literal["asserts", "denies", "qualifies"] = "asserts"
|
|
39
|
+
confidence: float = 0.8
|
|
40
|
+
claim_text: str = ""
|
|
41
|
+
"""Optional restatement; empty means the quote itself is the claim text."""
|
|
42
|
+
tags: list[str] = []
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
class DraftExtraction(BaseModel):
|
|
46
|
+
claims: list[DraftFact]
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def build_extract_prompt(source_text: str) -> str:
|
|
50
|
+
return (
|
|
51
|
+
"Extract the atomic factual claims from the source below as structured "
|
|
52
|
+
"records. Each claim needs a verbatim `quote`, a subject/predicate/object "
|
|
53
|
+
"triple, and a polarity.\n\n----- SOURCE -----\n" + source_text
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def build_retry_prompt(source_text: str, failures: list[dict]) -> str:
|
|
58
|
+
"""Ask for a replacement for each failed quote, in the reported order. An
|
|
59
|
+
empty replacement quote tells the runner to drop that claim."""
|
|
60
|
+
listing = "\n".join(
|
|
61
|
+
f"- status {f['status']}: {json.dumps(f.get('quote', ''), ensure_ascii=False)}"
|
|
62
|
+
f" ({f.get('detail', '')})"
|
|
63
|
+
for f in failures
|
|
64
|
+
)
|
|
65
|
+
return (
|
|
66
|
+
"Some quotes you proposed did not verify against the source: an AMBIGUOUS "
|
|
67
|
+
"quote appears more than once (lengthen it until unique); a BROKEN quote "
|
|
68
|
+
"is not present verbatim (re-copy it exactly).\n\n"
|
|
69
|
+
f"Failed quotes, in order:\n{listing}\n\n"
|
|
70
|
+
"Return exactly one replacement claim per failed quote, in the same "
|
|
71
|
+
"order, with the corrected verbatim `quote` and the claim's triple/"
|
|
72
|
+
"polarity. If a claim cannot be supported by a verbatim quote, return it "
|
|
73
|
+
"with an empty `quote` to drop it.\n\n----- SOURCE -----\n" + source_text
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def to_ndjson(facts: list[DraftFact], source_id: str) -> str:
|
|
78
|
+
"""Serialize proposed facts as the NDJSON `scrip fact add --stdin` expects.
|
|
79
|
+
scrip owns `anchor`/`claim_id`/`extracted_at`, so they never appear here;
|
|
80
|
+
empty `claim_text`/`tags` are omitted so scrip applies its defaults."""
|
|
81
|
+
lines = []
|
|
82
|
+
for f in facts:
|
|
83
|
+
rec: dict = {
|
|
84
|
+
"quote": f.quote,
|
|
85
|
+
"subject": f.subject,
|
|
86
|
+
"predicate": f.predicate,
|
|
87
|
+
"object": f.object,
|
|
88
|
+
"polarity": f.polarity,
|
|
89
|
+
"confidence": f.confidence,
|
|
90
|
+
"source_id": source_id,
|
|
91
|
+
}
|
|
92
|
+
if f.claim_text:
|
|
93
|
+
rec["claim_text"] = f.claim_text
|
|
94
|
+
if f.tags:
|
|
95
|
+
rec["tags"] = f.tags
|
|
96
|
+
lines.append(json.dumps(rec, ensure_ascii=False))
|
|
97
|
+
return "".join(line + "\n" for line in lines)
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
"""The only LLM-touching module. ``scrip`` never imports this; the harness does.
|
|
2
|
+
|
|
3
|
+
Uses the Anthropic SDK's structured-output parse helper so the draft comes back
|
|
4
|
+
as a validated ``DraftPage`` rather than free text to scrape.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from .compile import SYSTEM, DraftPage, build_user_prompt
|
|
10
|
+
from .extract import (
|
|
11
|
+
EXTRACT_SYSTEM,
|
|
12
|
+
DraftExtraction,
|
|
13
|
+
build_extract_prompt,
|
|
14
|
+
build_retry_prompt,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
DEFAULT_MODEL = "claude-opus-4-8"
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def draft_page(
|
|
21
|
+
source_text: str,
|
|
22
|
+
*,
|
|
23
|
+
source_id: str,
|
|
24
|
+
model: str = DEFAULT_MODEL,
|
|
25
|
+
client=None,
|
|
26
|
+
) -> DraftPage:
|
|
27
|
+
"""Ask Claude to synthesize a concept page from ``source_text``. Returns a
|
|
28
|
+
validated :class:`DraftPage`. Lazily imports the SDK so the rest of the
|
|
29
|
+
harness (and its tests) need no network or API key."""
|
|
30
|
+
import anthropic
|
|
31
|
+
|
|
32
|
+
client = client or anthropic.Anthropic()
|
|
33
|
+
resp = client.messages.parse(
|
|
34
|
+
model=model,
|
|
35
|
+
max_tokens=16000,
|
|
36
|
+
thinking={"type": "adaptive"},
|
|
37
|
+
system=SYSTEM,
|
|
38
|
+
messages=[{"role": "user", "content": build_user_prompt(source_text)}],
|
|
39
|
+
output_format=DraftPage,
|
|
40
|
+
)
|
|
41
|
+
out = resp.parsed_output
|
|
42
|
+
if out is None:
|
|
43
|
+
raise RuntimeError(f"model returned no parseable draft for {source_id}")
|
|
44
|
+
return out
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def draft_extraction(
|
|
48
|
+
source_text: str,
|
|
49
|
+
*,
|
|
50
|
+
source_id: str,
|
|
51
|
+
model: str = DEFAULT_MODEL,
|
|
52
|
+
client=None,
|
|
53
|
+
failures: list[dict] | None = None,
|
|
54
|
+
) -> DraftExtraction:
|
|
55
|
+
"""Ask Claude to extract structured claims from ``source_text``. With
|
|
56
|
+
``failures`` (the per-record findings from ``scrip fact add``), asks instead
|
|
57
|
+
for one replacement claim per failure, in order — the retry half of the
|
|
58
|
+
extract loop. Lazily imports the SDK so tests need no network or API key."""
|
|
59
|
+
import anthropic
|
|
60
|
+
|
|
61
|
+
client = client or anthropic.Anthropic()
|
|
62
|
+
prompt = (
|
|
63
|
+
build_extract_prompt(source_text)
|
|
64
|
+
if failures is None
|
|
65
|
+
else build_retry_prompt(source_text, failures)
|
|
66
|
+
)
|
|
67
|
+
resp = client.messages.parse(
|
|
68
|
+
model=model,
|
|
69
|
+
max_tokens=16000,
|
|
70
|
+
thinking={"type": "adaptive"},
|
|
71
|
+
system=EXTRACT_SYSTEM,
|
|
72
|
+
messages=[{"role": "user", "content": prompt}],
|
|
73
|
+
output_format=DraftExtraction,
|
|
74
|
+
)
|
|
75
|
+
out = resp.parsed_output
|
|
76
|
+
if out is None:
|
|
77
|
+
raise RuntimeError(f"model returned no parseable extraction for {source_id}")
|
|
78
|
+
return out
|