feed-protocol 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- feed/__init__.py +38 -0
- feed/authoring.py +157 -0
- feed/cli.py +182 -0
- feed/constants.py +86 -0
- feed/document.py +196 -0
- feed/parser.py +196 -0
- feed/render.py +209 -0
- feed/tagger.py +58 -0
- feed/validate.py +99 -0
- feed/verify.py +116 -0
- feed_protocol-0.2.0.dist-info/METADATA +200 -0
- feed_protocol-0.2.0.dist-info/RECORD +16 -0
- feed_protocol-0.2.0.dist-info/WHEEL +5 -0
- feed_protocol-0.2.0.dist-info/entry_points.txt +2 -0
- feed_protocol-0.2.0.dist-info/licenses/LICENSE +21 -0
- feed_protocol-0.2.0.dist-info/top_level.txt +1 -0
feed/parser.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""Parse a FEED markdown document back into a FeedDocument.
|
|
2
|
+
|
|
3
|
+
Robust to extra prose between blocks: we scan for FEED markers and ignore
|
|
4
|
+
everything else (including the visible notice blockquote and headings).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from .constants import (
|
|
10
|
+
CLOSE_MARKER_RE,
|
|
11
|
+
DEFAULT_GROUNDING,
|
|
12
|
+
OPEN_MARKER_RE,
|
|
13
|
+
parse_attrs,
|
|
14
|
+
)
|
|
15
|
+
from .document import FeedDocument
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def parse(text: str) -> FeedDocument:
|
|
19
|
+
blocks = _scan_blocks(text)
|
|
20
|
+
|
|
21
|
+
# Document-level header.
|
|
22
|
+
doc_attrs: dict[str, str] = {}
|
|
23
|
+
meta: dict[str, str] = {}
|
|
24
|
+
for kind, attrs, body in blocks:
|
|
25
|
+
if kind == "DOC":
|
|
26
|
+
doc_attrs = attrs
|
|
27
|
+
elif kind == "META":
|
|
28
|
+
meta = _parse_kv(body)
|
|
29
|
+
|
|
30
|
+
grounding = doc_attrs.get("grounding") or meta.get("grounding") or DEFAULT_GROUNDING
|
|
31
|
+
title = meta.get("title") or _first_heading(text) or "Untitled"
|
|
32
|
+
|
|
33
|
+
doc = FeedDocument(
|
|
34
|
+
title=title,
|
|
35
|
+
author=meta.get("author"),
|
|
36
|
+
grounding=grounding,
|
|
37
|
+
created=meta.get("created"),
|
|
38
|
+
summary=meta.get("summary"),
|
|
39
|
+
version=doc_attrs.get("version", "0.2"),
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Evidence must be added before claims that reference it, so do two passes.
|
|
43
|
+
for kind, attrs, body in blocks:
|
|
44
|
+
if kind == "EVIDENCE":
|
|
45
|
+
eid = attrs.get("id")
|
|
46
|
+
if not eid:
|
|
47
|
+
continue
|
|
48
|
+
fields = _parse_kv(body, drop_keys={"note"}, drop_id_marker=eid)
|
|
49
|
+
note = _parse_kv(body).get("note")
|
|
50
|
+
doc.add_evidence(
|
|
51
|
+
eid,
|
|
52
|
+
type=attrs.get("type", "data"),
|
|
53
|
+
confidence=attrs.get("confidence", "medium"),
|
|
54
|
+
note=note,
|
|
55
|
+
**fields,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
for kind, attrs, body in blocks:
|
|
59
|
+
if kind == "CLAIM":
|
|
60
|
+
cid = attrs.get("id")
|
|
61
|
+
if not cid:
|
|
62
|
+
continue
|
|
63
|
+
evidence = [
|
|
64
|
+
e.strip()
|
|
65
|
+
for e in (attrs.get("evidence", "").split(","))
|
|
66
|
+
if e.strip()
|
|
67
|
+
]
|
|
68
|
+
text_body = _claim_text(body)
|
|
69
|
+
doc.add_claim(
|
|
70
|
+
cid,
|
|
71
|
+
text=text_body,
|
|
72
|
+
evidence=evidence,
|
|
73
|
+
decision=attrs.get("decision") or _claim_decision(body),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
# Findings: the markdown under a "## Findings" heading that is not inside a
|
|
77
|
+
# FEED block. Captured separately so round-tripping preserves narrative.
|
|
78
|
+
for para in _findings_paragraphs(text):
|
|
79
|
+
doc.add_finding(para)
|
|
80
|
+
|
|
81
|
+
return doc
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# --- low-level scanning ---------------------------------------------------
|
|
85
|
+
def _scan_blocks(text: str):
|
|
86
|
+
"""Yield (kind, attrs_dict, body_text) for every FEED block in order.
|
|
87
|
+
|
|
88
|
+
DOC has no body. META/CLAIM/EVIDENCE run until their matching close marker.
|
|
89
|
+
"""
|
|
90
|
+
results = []
|
|
91
|
+
pos = 0
|
|
92
|
+
while True:
|
|
93
|
+
m = OPEN_MARKER_RE.search(text, pos)
|
|
94
|
+
if not m:
|
|
95
|
+
break
|
|
96
|
+
kind = m.group("kind")
|
|
97
|
+
attrs = parse_attrs(m.group("attrs"))
|
|
98
|
+
if kind in ("DOC", "END"):
|
|
99
|
+
results.append((kind, attrs, ""))
|
|
100
|
+
pos = m.end()
|
|
101
|
+
continue
|
|
102
|
+
close = CLOSE_MARKER_RE.search(text, m.end())
|
|
103
|
+
if close and close.group("kind") == kind:
|
|
104
|
+
body = text[m.end() : close.start()]
|
|
105
|
+
results.append((kind, attrs, body))
|
|
106
|
+
pos = close.end()
|
|
107
|
+
else:
|
|
108
|
+
# Unterminated block — record what we have and move on.
|
|
109
|
+
results.append((kind, attrs, text[m.end() :]))
|
|
110
|
+
pos = m.end()
|
|
111
|
+
return results
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def _parse_kv(
|
|
115
|
+
body: str, drop_keys: set[str] | None = None, drop_id_marker: str | None = None
|
|
116
|
+
) -> dict[str, str]:
|
|
117
|
+
drop_keys = drop_keys or set()
|
|
118
|
+
out: dict[str, str] = {}
|
|
119
|
+
for raw in body.splitlines():
|
|
120
|
+
line = raw.strip()
|
|
121
|
+
if not line:
|
|
122
|
+
continue
|
|
123
|
+
if drop_id_marker and line in (f"**[{drop_id_marker}]**", f"[{drop_id_marker}]"):
|
|
124
|
+
continue
|
|
125
|
+
if ":" not in line:
|
|
126
|
+
continue
|
|
127
|
+
key, _, value = line.partition(":")
|
|
128
|
+
key = key.strip()
|
|
129
|
+
if not key or " " in key:
|
|
130
|
+
continue # not a key/value line (probably prose)
|
|
131
|
+
if key in drop_keys:
|
|
132
|
+
continue
|
|
133
|
+
out[key] = value.strip()
|
|
134
|
+
return out
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def _claim_text(body: str) -> str:
|
|
138
|
+
for raw in body.splitlines():
|
|
139
|
+
line = raw.strip()
|
|
140
|
+
if not line or line.startswith("- **Decision:**"):
|
|
141
|
+
continue
|
|
142
|
+
# strip trailing [E001][E002] citations from the stored text
|
|
143
|
+
return _strip_citations(line)
|
|
144
|
+
return ""
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _strip_citations(line: str) -> str:
|
|
148
|
+
import re
|
|
149
|
+
|
|
150
|
+
return re.sub(r"\s*(\[E\d{1,4}\])+\s*$", "", line).strip()
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _claim_decision(body: str) -> str | None:
|
|
154
|
+
for raw in body.splitlines():
|
|
155
|
+
line = raw.strip()
|
|
156
|
+
if line.startswith("- **Decision:**"):
|
|
157
|
+
return line.split("**Decision:**", 1)[1].strip()
|
|
158
|
+
return None
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _first_heading(text: str) -> str | None:
|
|
162
|
+
for raw in text.splitlines():
|
|
163
|
+
line = raw.strip()
|
|
164
|
+
if line.startswith("# "):
|
|
165
|
+
return line[2:].strip()
|
|
166
|
+
return None
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _findings_paragraphs(text: str) -> list[str]:
|
|
170
|
+
"""Pull paragraphs under '## Findings' up to the next heading, skipping any
|
|
171
|
+
FEED comment markers."""
|
|
172
|
+
lines = text.splitlines()
|
|
173
|
+
out: list[str] = []
|
|
174
|
+
in_section = False
|
|
175
|
+
buf: list[str] = []
|
|
176
|
+
for raw in lines:
|
|
177
|
+
line = raw.rstrip()
|
|
178
|
+
stripped = line.strip()
|
|
179
|
+
if stripped.startswith("## "):
|
|
180
|
+
if in_section: # leaving the section
|
|
181
|
+
break
|
|
182
|
+
in_section = stripped[3:].strip().lower().startswith("finding")
|
|
183
|
+
continue
|
|
184
|
+
if not in_section:
|
|
185
|
+
continue
|
|
186
|
+
if stripped.startswith("<!--") or stripped.startswith("# "):
|
|
187
|
+
continue
|
|
188
|
+
if not stripped:
|
|
189
|
+
if buf:
|
|
190
|
+
out.append(" ".join(buf).strip())
|
|
191
|
+
buf = []
|
|
192
|
+
continue
|
|
193
|
+
buf.append(stripped)
|
|
194
|
+
if buf:
|
|
195
|
+
out.append(" ".join(buf).strip())
|
|
196
|
+
return [p for p in out if p]
|
feed/render.py
ADDED
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
"""Render a FeedDocument to Markdown (canonical) or styled HTML.
|
|
2
|
+
|
|
3
|
+
Document order encodes priority — tier 0 (claims/decisions) first, tier 1
|
|
4
|
+
(findings) next, tier 2 (full evidence) last — so a small-context model that
|
|
5
|
+
truncates the tail still keeps the core.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import html as _html
|
|
11
|
+
|
|
12
|
+
from .constants import ingestion_notice
|
|
13
|
+
from .document import Evidence, FeedDocument
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def to_markdown(doc: FeedDocument) -> str:
|
|
17
|
+
out: list[str] = []
|
|
18
|
+
out.append(f'<!-- FEED:DOC version="{doc.version}" grounding="{doc.grounding}" -->')
|
|
19
|
+
out.append("")
|
|
20
|
+
out.append(ingestion_notice(doc.grounding))
|
|
21
|
+
out.append("")
|
|
22
|
+
out.append(f"# {doc.title}")
|
|
23
|
+
out.append("")
|
|
24
|
+
|
|
25
|
+
# --- META (machine header) ---
|
|
26
|
+
out.append("<!-- FEED:META -->")
|
|
27
|
+
out.append(f"title: {doc.title}")
|
|
28
|
+
if doc.author:
|
|
29
|
+
out.append(f"author: {doc.author}")
|
|
30
|
+
if doc.created:
|
|
31
|
+
out.append(f"created: {doc.created}")
|
|
32
|
+
out.append(f"grounding: {doc.grounding}")
|
|
33
|
+
if doc.summary:
|
|
34
|
+
out.append(f"summary: {doc.summary}")
|
|
35
|
+
out.append("<!-- /FEED:META -->")
|
|
36
|
+
out.append("")
|
|
37
|
+
|
|
38
|
+
# --- TIER 0: claims & decisions, front-loaded ---
|
|
39
|
+
if doc.summary:
|
|
40
|
+
out.append(f"**TL;DR.** {doc.summary}")
|
|
41
|
+
out.append("")
|
|
42
|
+
if doc.claims:
|
|
43
|
+
out.append("## Claims & Decisions")
|
|
44
|
+
out.append("")
|
|
45
|
+
for c in doc.claims:
|
|
46
|
+
ev = ",".join(c.evidence)
|
|
47
|
+
attrs = f' id="{c.id}"'
|
|
48
|
+
if ev:
|
|
49
|
+
attrs += f' evidence="{ev}"'
|
|
50
|
+
if c.decision:
|
|
51
|
+
attrs += f' decision="{_attr(c.decision)}"'
|
|
52
|
+
out.append(f"<!-- FEED:CLAIM{attrs} -->")
|
|
53
|
+
line = c.text.strip()
|
|
54
|
+
if c.evidence:
|
|
55
|
+
line += " " + "".join(f"[{e}]" for e in c.evidence)
|
|
56
|
+
out.append(line)
|
|
57
|
+
if c.decision:
|
|
58
|
+
out.append(f"- **Decision:** {c.decision}")
|
|
59
|
+
out.append("<!-- /FEED:CLAIM -->")
|
|
60
|
+
out.append("")
|
|
61
|
+
|
|
62
|
+
# --- TIER 1: findings narrative ---
|
|
63
|
+
if doc.findings:
|
|
64
|
+
out.append("## Findings")
|
|
65
|
+
out.append("")
|
|
66
|
+
for para in doc.findings:
|
|
67
|
+
out.append(para.strip())
|
|
68
|
+
out.append("")
|
|
69
|
+
|
|
70
|
+
# --- TIER 2: full evidence appendix ---
|
|
71
|
+
if doc.evidence:
|
|
72
|
+
out.append("## Evidence")
|
|
73
|
+
out.append("")
|
|
74
|
+
for ev in doc.evidence:
|
|
75
|
+
out.append(_evidence_md(ev))
|
|
76
|
+
out.append("")
|
|
77
|
+
|
|
78
|
+
out.append("<!-- FEED:END -->")
|
|
79
|
+
out.append("")
|
|
80
|
+
return "\n".join(out)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def _evidence_md(ev: Evidence) -> str:
|
|
84
|
+
lines = [
|
|
85
|
+
f'<!-- FEED:EVIDENCE id="{ev.id}" type="{ev.type}" confidence="{ev.confidence}" -->'
|
|
86
|
+
]
|
|
87
|
+
lines.append(f"**[{ev.id}]**")
|
|
88
|
+
for k, v in ev.fields.items():
|
|
89
|
+
lines.append(f"{k}: {v}")
|
|
90
|
+
if ev.note:
|
|
91
|
+
lines.append(f"note: {ev.note}")
|
|
92
|
+
lines.append("<!-- /FEED:EVIDENCE -->")
|
|
93
|
+
return "\n".join(lines)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _attr(value: str) -> str:
|
|
97
|
+
return value.replace('"', "'")
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# --- HTML ----------------------------------------------------------------
|
|
101
|
+
def to_html(doc: FeedDocument) -> str:
|
|
102
|
+
"""Styled, self-contained HTML. FEED markers live in HTML comments so they
|
|
103
|
+
are invisible on screen but present in the raw source for any AI."""
|
|
104
|
+
e = _html.escape
|
|
105
|
+
parts: list[str] = []
|
|
106
|
+
parts.append("<!doctype html>")
|
|
107
|
+
parts.append('<html lang="en"><head><meta charset="utf-8">')
|
|
108
|
+
parts.append(f"<title>{e(doc.title)}</title>")
|
|
109
|
+
parts.append(f"<!-- FEED:DOC version=\"{doc.version}\" grounding=\"{doc.grounding}\" -->")
|
|
110
|
+
parts.append(_STYLE)
|
|
111
|
+
parts.append("</head><body><main>")
|
|
112
|
+
|
|
113
|
+
# Visible notice (rendered from the same markdown blockquote text).
|
|
114
|
+
notice = ingestion_notice(doc.grounding).replace("> ", "").replace(">", "")
|
|
115
|
+
parts.append(f'<aside class="feed-notice">{_md_inline(notice)}</aside>')
|
|
116
|
+
|
|
117
|
+
parts.append(f"<h1>{e(doc.title)}</h1>")
|
|
118
|
+
meta_bits = []
|
|
119
|
+
if doc.author:
|
|
120
|
+
meta_bits.append(e(doc.author))
|
|
121
|
+
if doc.created:
|
|
122
|
+
meta_bits.append(e(doc.created))
|
|
123
|
+
meta_bits.append(f"grounding: {e(doc.grounding)}")
|
|
124
|
+
parts.append(f'<p class="feed-meta">{" · ".join(meta_bits)}</p>')
|
|
125
|
+
# Machine META mirror, hidden from view but in the source.
|
|
126
|
+
parts.append("<!-- FEED:META -->")
|
|
127
|
+
parts.append(f"<!-- title: {doc.title} -->")
|
|
128
|
+
if doc.author:
|
|
129
|
+
parts.append(f"<!-- author: {doc.author} -->")
|
|
130
|
+
parts.append(f"<!-- grounding: {doc.grounding} -->")
|
|
131
|
+
parts.append("<!-- /FEED:META -->")
|
|
132
|
+
|
|
133
|
+
if doc.summary:
|
|
134
|
+
parts.append(f'<p class="feed-tldr"><strong>TL;DR.</strong> {e(doc.summary)}</p>')
|
|
135
|
+
|
|
136
|
+
if doc.claims:
|
|
137
|
+
parts.append("<h2>Claims & Decisions</h2>")
|
|
138
|
+
for c in doc.claims:
|
|
139
|
+
ev = ",".join(c.evidence)
|
|
140
|
+
attrs = f' id="{c.id}"'
|
|
141
|
+
if ev:
|
|
142
|
+
attrs += f' evidence="{ev}"'
|
|
143
|
+
parts.append(f"<!-- FEED:CLAIM{attrs} -->")
|
|
144
|
+
cites = " ".join(f'<span class="cite">[{e(x)}]</span>' for x in c.evidence)
|
|
145
|
+
parts.append(f'<div class="claim"><p>{e(c.text)} {cites}</p>')
|
|
146
|
+
if c.decision:
|
|
147
|
+
parts.append(f'<p class="decision"><strong>Decision:</strong> {e(c.decision)}</p>')
|
|
148
|
+
parts.append("</div>")
|
|
149
|
+
parts.append("<!-- /FEED:CLAIM -->")
|
|
150
|
+
|
|
151
|
+
if doc.findings:
|
|
152
|
+
parts.append("<h2>Findings</h2>")
|
|
153
|
+
for para in doc.findings:
|
|
154
|
+
parts.append(f"<p>{_md_inline(e(para))}</p>")
|
|
155
|
+
|
|
156
|
+
if doc.evidence:
|
|
157
|
+
parts.append("<h2>Evidence</h2>")
|
|
158
|
+
for ev in doc.evidence:
|
|
159
|
+
parts.append(
|
|
160
|
+
f'<!-- FEED:EVIDENCE id="{ev.id}" type="{ev.type}" confidence="{ev.confidence}" -->'
|
|
161
|
+
)
|
|
162
|
+
parts.append('<table class="evidence">')
|
|
163
|
+
parts.append(
|
|
164
|
+
f'<caption><span class="eid">[{e(ev.id)}]</span> '
|
|
165
|
+
f'<span class="etype">{e(ev.type)} · {e(ev.confidence)} confidence</span></caption>'
|
|
166
|
+
)
|
|
167
|
+
for k, v in ev.fields.items():
|
|
168
|
+
parts.append(f"<tr><th>{e(k)}</th><td>{e(v)}</td></tr>")
|
|
169
|
+
if ev.note:
|
|
170
|
+
parts.append(f'<tr><th>note</th><td>{e(ev.note)}</td></tr>')
|
|
171
|
+
parts.append("</table>")
|
|
172
|
+
parts.append("<!-- /FEED:EVIDENCE -->")
|
|
173
|
+
|
|
174
|
+
parts.append("<!-- FEED:END -->")
|
|
175
|
+
parts.append("</main></body></html>")
|
|
176
|
+
return "\n".join(parts)
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _md_inline(text: str) -> str:
|
|
180
|
+
"""Tiny inline markdown: **bold** and `code`. Enough for the notice."""
|
|
181
|
+
import re
|
|
182
|
+
|
|
183
|
+
text = re.sub(r"\*\*([^*]+)\*\*", r"<strong>\1</strong>", text)
|
|
184
|
+
text = re.sub(r"`([^`]+)`", r"<code>\1</code>", text)
|
|
185
|
+
return text.replace("\n", "<br>")
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
_STYLE = """<style>
|
|
189
|
+
:root { --ink:#1c1b18; --muted:#6b6357; --line:#e4ddd0; --accent:#b1542f; --bg:#f7f4ee; }
|
|
190
|
+
body { background:var(--bg); color:var(--ink); font:16px/1.6 Georgia, 'Times New Roman', serif;
|
|
191
|
+
margin:0; padding:2.5rem 1rem; }
|
|
192
|
+
main { max-width:46rem; margin:0 auto; }
|
|
193
|
+
h1 { font-size:2rem; margin:.2em 0 0; }
|
|
194
|
+
h2 { font-size:1.25rem; margin:2rem 0 .6rem; border-bottom:1px solid var(--line); padding-bottom:.3rem; }
|
|
195
|
+
.feed-meta { color:var(--muted); font-style:italic; margin:.2rem 0 1.4rem; }
|
|
196
|
+
.feed-notice { background:#fff; border:1px solid var(--line); border-left:4px solid var(--accent);
|
|
197
|
+
padding:.9rem 1.1rem; font-size:.86rem; color:var(--muted); border-radius:4px; margin-bottom:1.6rem; }
|
|
198
|
+
.feed-tldr { background:#fff; border:1px solid var(--line); padding:.8rem 1rem; border-radius:4px; }
|
|
199
|
+
.claim { border-left:3px solid var(--accent); padding:.1rem 0 .1rem 1rem; margin:.8rem 0; }
|
|
200
|
+
.decision { color:var(--accent); margin:.2rem 0 0; }
|
|
201
|
+
.cite, .eid { font-family:ui-monospace, Menlo, monospace; font-size:.82em; color:var(--accent); }
|
|
202
|
+
table.evidence { width:100%; border-collapse:collapse; background:#fff; margin:.8rem 0;
|
|
203
|
+
border:1px solid var(--line); font-size:.92rem; }
|
|
204
|
+
table.evidence caption { text-align:left; padding:.5rem .7rem; background:#faf7f1; border-bottom:1px solid var(--line); }
|
|
205
|
+
.etype { color:var(--muted); font-style:italic; font-family:Georgia,serif; margin-left:.5rem; }
|
|
206
|
+
table.evidence th { text-align:left; width:34%; vertical-align:top; color:var(--muted); font-weight:normal;
|
|
207
|
+
padding:.35rem .7rem; border-top:1px solid var(--line); font-family:ui-monospace,Menlo,monospace; font-size:.85rem; }
|
|
208
|
+
table.evidence td { padding:.35rem .7rem; border-top:1px solid var(--line); }
|
|
209
|
+
</style>"""
|
feed/tagger.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Optional convenience: auto-tag a plain document into FEED by calling Claude.
|
|
2
|
+
|
|
3
|
+
THIS IS NOT THE PRIMARY PATH. FEED is AI-to-AI — normally the AI already in your
|
|
4
|
+
loop emits FEED natively using `feed.authoring.AUTHORING_PROMPT` +
|
|
5
|
+
`FEED_JSON_SCHEMA`, and you render it with `feed.authoring.build()` (no API key).
|
|
6
|
+
|
|
7
|
+
This module exists only for the case where you have a plain document and *no* AI
|
|
8
|
+
already in the loop, and want a one-shot CLI. It needs the `anthropic` package and
|
|
9
|
+
an ANTHROPIC_API_KEY (`pip install feed-protocol[tagger]`). Defaults to Opus 4.8.
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
import json
|
|
15
|
+
|
|
16
|
+
from .authoring import AUTHORING_PROMPT, FEED_JSON_SCHEMA, build
|
|
17
|
+
from .document import FeedDocument
|
|
18
|
+
|
|
19
|
+
DEFAULT_MODEL = "claude-opus-4-8"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def auto_tag(
|
|
23
|
+
text: str,
|
|
24
|
+
title: str | None = None,
|
|
25
|
+
author: str | None = None,
|
|
26
|
+
grounding: str = "strict",
|
|
27
|
+
created: str | None = None,
|
|
28
|
+
model: str = DEFAULT_MODEL,
|
|
29
|
+
) -> FeedDocument:
|
|
30
|
+
"""Read a plain document and return a FeedDocument, using Claude to extract the
|
|
31
|
+
structure. Raises if `anthropic` is missing or no API key is configured.
|
|
32
|
+
|
|
33
|
+
Note this reuses the exact same portable prompt and schema any other AI would
|
|
34
|
+
use — the API call is just one way to drive it."""
|
|
35
|
+
try:
|
|
36
|
+
import anthropic
|
|
37
|
+
except ImportError as exc: # pragma: no cover - environment dependent
|
|
38
|
+
raise RuntimeError(
|
|
39
|
+
"The auto-tagger needs the anthropic package. Install with: "
|
|
40
|
+
"pip install feed-protocol[tagger]. (Or skip it: have the AI already "
|
|
41
|
+
"in your pipeline emit FEED JSON using feed.authoring, then feed.build.)"
|
|
42
|
+
) from exc
|
|
43
|
+
|
|
44
|
+
client = anthropic.Anthropic()
|
|
45
|
+
instruction = "Convert the following document into FEED structure."
|
|
46
|
+
if title:
|
|
47
|
+
instruction += f' Use the title "{title}".'
|
|
48
|
+
|
|
49
|
+
response = client.messages.create(
|
|
50
|
+
model=model,
|
|
51
|
+
max_tokens=16000,
|
|
52
|
+
system=AUTHORING_PROMPT,
|
|
53
|
+
output_config={"format": {"type": "json_schema", "schema": FEED_JSON_SCHEMA}},
|
|
54
|
+
messages=[{"role": "user", "content": f"{instruction}\n\n---\n\n{text}"}],
|
|
55
|
+
)
|
|
56
|
+
raw = next(b.text for b in response.content if b.type == "text")
|
|
57
|
+
data = json.loads(raw)
|
|
58
|
+
return build(data, title=title, author=author, grounding=grounding, created=created)
|
feed/validate.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
"""Structural validation for FEED documents.
|
|
2
|
+
|
|
3
|
+
Errors block compliance; warnings flag quality/robustness issues. The intent is
|
|
4
|
+
that a CI step can run `feed validate` and fail the build on errors.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
|
|
11
|
+
from .constants import (
|
|
12
|
+
CONFIDENCE_LEVELS,
|
|
13
|
+
EVIDENCE_ID_RE,
|
|
14
|
+
EVIDENCE_TYPES,
|
|
15
|
+
GROUNDING_MODES,
|
|
16
|
+
CLAIM_ID_RE,
|
|
17
|
+
)
|
|
18
|
+
from .document import FeedDocument
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class ValidationReport:
|
|
23
|
+
errors: list[str] = field(default_factory=list)
|
|
24
|
+
warnings: list[str] = field(default_factory=list)
|
|
25
|
+
|
|
26
|
+
@property
|
|
27
|
+
def ok(self) -> bool:
|
|
28
|
+
return not self.errors
|
|
29
|
+
|
|
30
|
+
def __str__(self) -> str: # pragma: no cover - cosmetic
|
|
31
|
+
lines = []
|
|
32
|
+
for e in self.errors:
|
|
33
|
+
lines.append(f" ERROR {e}")
|
|
34
|
+
for w in self.warnings:
|
|
35
|
+
lines.append(f" WARN {w}")
|
|
36
|
+
if not lines:
|
|
37
|
+
lines.append(" OK — valid FEED document")
|
|
38
|
+
return "\n".join(lines)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def validate(doc: FeedDocument) -> ValidationReport:
|
|
42
|
+
r = ValidationReport()
|
|
43
|
+
|
|
44
|
+
if doc.grounding not in GROUNDING_MODES:
|
|
45
|
+
r.errors.append(f"grounding {doc.grounding!r} is not one of {GROUNDING_MODES}")
|
|
46
|
+
|
|
47
|
+
if not doc.title or not doc.title.strip():
|
|
48
|
+
r.errors.append("document has no title")
|
|
49
|
+
|
|
50
|
+
# Evidence checks.
|
|
51
|
+
seen: set[str] = set()
|
|
52
|
+
for ev in doc.evidence:
|
|
53
|
+
if not EVIDENCE_ID_RE.match(ev.id):
|
|
54
|
+
r.errors.append(f"evidence id {ev.id!r} is malformed (expected E001)")
|
|
55
|
+
if ev.id in seen:
|
|
56
|
+
r.errors.append(f"duplicate evidence id {ev.id!r}")
|
|
57
|
+
seen.add(ev.id)
|
|
58
|
+
if not ev.fields:
|
|
59
|
+
r.errors.append(f"evidence {ev.id!r} has no key/value fields")
|
|
60
|
+
if ev.type not in EVIDENCE_TYPES:
|
|
61
|
+
r.warnings.append(f"evidence {ev.id!r} has unusual type {ev.type!r}")
|
|
62
|
+
if ev.confidence not in CONFIDENCE_LEVELS:
|
|
63
|
+
r.warnings.append(
|
|
64
|
+
f"evidence {ev.id!r} has unusual confidence {ev.confidence!r}"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Claim checks.
|
|
68
|
+
claim_ids: set[str] = set()
|
|
69
|
+
for c in doc.claims:
|
|
70
|
+
if not CLAIM_ID_RE.match(c.id):
|
|
71
|
+
r.errors.append(f"claim id {c.id!r} is malformed (expected C1)")
|
|
72
|
+
if c.id in claim_ids:
|
|
73
|
+
r.errors.append(f"duplicate claim id {c.id!r}")
|
|
74
|
+
claim_ids.add(c.id)
|
|
75
|
+
for e in c.evidence:
|
|
76
|
+
if e not in seen:
|
|
77
|
+
r.errors.append(f"claim {c.id!r} cites missing evidence {e!r}")
|
|
78
|
+
if doc.grounding == "strict" and not c.evidence:
|
|
79
|
+
r.warnings.append(
|
|
80
|
+
f"claim {c.id!r} has no evidence, but grounding is strict"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# Whole-document sanity.
|
|
84
|
+
if not doc.evidence:
|
|
85
|
+
r.warnings.append("document has no evidence blocks — nothing to ground answers in")
|
|
86
|
+
if not doc.claims and not doc.findings:
|
|
87
|
+
r.warnings.append("document has no claims or findings — nothing for a reader to act on")
|
|
88
|
+
|
|
89
|
+
# Cheap density check: flag evidence written as prose rather than key/value.
|
|
90
|
+
for ev in doc.evidence:
|
|
91
|
+
for k, v in ev.fields.items():
|
|
92
|
+
if len(v) > 240:
|
|
93
|
+
r.warnings.append(
|
|
94
|
+
f"evidence {ev.id!r} field {k!r} is very long — "
|
|
95
|
+
"prefer compact key/value facts over prose"
|
|
96
|
+
)
|
|
97
|
+
break
|
|
98
|
+
|
|
99
|
+
return r
|
feed/verify.py
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
"""Verify that an AI's answer is actually grounded in a FEED document.
|
|
2
|
+
|
|
3
|
+
This is FEED's defensible edge over plain "AI-friendly" formats: because every
|
|
4
|
+
evidence block has a stable plain-text ID and answers must cite by ID, you can
|
|
5
|
+
mechanically check an answer:
|
|
6
|
+
|
|
7
|
+
- every [E###] it cites exists in the document (no invented citations)
|
|
8
|
+
- in strict mode, the answer cites at least one block (it didn't free-wheel)
|
|
9
|
+
- optionally, cited figures actually appear in the answer (loose corroboration)
|
|
10
|
+
|
|
11
|
+
It is deliberately simple and dependency-free — a 20-line idea with teeth.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
from dataclasses import dataclass, field
|
|
17
|
+
|
|
18
|
+
from .constants import CITATION_RE
|
|
19
|
+
from .document import FeedDocument
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class VerificationReport:
|
|
24
|
+
cited: list[str] = field(default_factory=list) # all IDs cited, in order
|
|
25
|
+
valid: list[str] = field(default_factory=list) # cited and exist
|
|
26
|
+
invalid: list[str] = field(default_factory=list) # cited but do not exist
|
|
27
|
+
uncited_evidence: list[str] = field(default_factory=list) # exist but never cited
|
|
28
|
+
corroborated: list[str] = field(default_factory=list) # cited & a value appears in answer
|
|
29
|
+
grounding: str = "strict"
|
|
30
|
+
passed: bool = False
|
|
31
|
+
reasons: list[str] = field(default_factory=list)
|
|
32
|
+
|
|
33
|
+
def __str__(self) -> str: # pragma: no cover - cosmetic
|
|
34
|
+
status = "PASS" if self.passed else "FAIL"
|
|
35
|
+
lines = [f" grounding: {self.grounding} result: {status}"]
|
|
36
|
+
lines.append(f" cited: {', '.join(self.cited) or '(none)'}")
|
|
37
|
+
if self.invalid:
|
|
38
|
+
lines.append(f" INVALID citations (no such evidence): {', '.join(self.invalid)}")
|
|
39
|
+
if self.corroborated:
|
|
40
|
+
lines.append(f" corroborated (a value appears in the answer): {', '.join(self.corroborated)}")
|
|
41
|
+
if self.uncited_evidence:
|
|
42
|
+
lines.append(f" unused evidence: {', '.join(self.uncited_evidence)}")
|
|
43
|
+
for reason in self.reasons:
|
|
44
|
+
lines.append(f" - {reason}")
|
|
45
|
+
return "\n".join(lines)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def verify(answer: str, doc: FeedDocument) -> VerificationReport:
|
|
49
|
+
evidence_by_id = {ev.id: ev for ev in doc.evidence}
|
|
50
|
+
|
|
51
|
+
cited: list[str] = []
|
|
52
|
+
for match in CITATION_RE.finditer(answer):
|
|
53
|
+
for part in match.group(1).split(","):
|
|
54
|
+
cited.append(part.strip())
|
|
55
|
+
|
|
56
|
+
# Preserve order, dedupe for set operations.
|
|
57
|
+
seen: list[str] = []
|
|
58
|
+
for c in cited:
|
|
59
|
+
if c not in seen:
|
|
60
|
+
seen.append(c)
|
|
61
|
+
|
|
62
|
+
valid = [c for c in seen if c in evidence_by_id]
|
|
63
|
+
invalid = [c for c in seen if c not in evidence_by_id]
|
|
64
|
+
uncited = [eid for eid in evidence_by_id if eid not in seen]
|
|
65
|
+
|
|
66
|
+
lower_answer = answer.lower()
|
|
67
|
+
corroborated: list[str] = []
|
|
68
|
+
for cid in valid:
|
|
69
|
+
ev = evidence_by_id[cid]
|
|
70
|
+
for value in ev.fields.values():
|
|
71
|
+
token = _significant_token(value)
|
|
72
|
+
if token and token.lower() in lower_answer:
|
|
73
|
+
corroborated.append(cid)
|
|
74
|
+
break
|
|
75
|
+
|
|
76
|
+
report = VerificationReport(
|
|
77
|
+
cited=cited,
|
|
78
|
+
valid=valid,
|
|
79
|
+
invalid=invalid,
|
|
80
|
+
uncited_evidence=uncited,
|
|
81
|
+
corroborated=corroborated,
|
|
82
|
+
grounding=doc.grounding,
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
# Decide pass/fail.
|
|
86
|
+
passed = True
|
|
87
|
+
if invalid:
|
|
88
|
+
passed = False
|
|
89
|
+
report.reasons.append(
|
|
90
|
+
f"answer cites {len(invalid)} evidence ID(s) that do not exist in the document"
|
|
91
|
+
)
|
|
92
|
+
if doc.grounding == "strict" and not cited:
|
|
93
|
+
passed = False
|
|
94
|
+
report.reasons.append(
|
|
95
|
+
"grounding is strict but the answer contains no [E###] citations"
|
|
96
|
+
)
|
|
97
|
+
if doc.grounding == "open" and not cited:
|
|
98
|
+
report.reasons.append("no citations found (allowed in open mode)")
|
|
99
|
+
report.passed = passed
|
|
100
|
+
if passed and not report.reasons:
|
|
101
|
+
report.reasons.append("all citations resolve to real evidence")
|
|
102
|
+
return report
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _significant_token(value: str) -> str | None:
|
|
106
|
+
"""Pull a distinctive token (e.g. a number) from an evidence value to look
|
|
107
|
+
for in the answer. Returns None if nothing distinctive enough."""
|
|
108
|
+
import re
|
|
109
|
+
|
|
110
|
+
m = re.search(r"\d[\d,.]*", value)
|
|
111
|
+
if m and len(m.group(0)) >= 2:
|
|
112
|
+
return m.group(0)
|
|
113
|
+
# else fall back to the longest word if it is reasonably specific
|
|
114
|
+
words = [w for w in re.findall(r"[A-Za-z0-9_\-]{4,}", value)]
|
|
115
|
+
words.sort(key=len, reverse=True)
|
|
116
|
+
return words[0] if words else None
|