feed-protocol 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- feed/__init__.py +38 -0
- feed/authoring.py +157 -0
- feed/cli.py +182 -0
- feed/constants.py +86 -0
- feed/document.py +196 -0
- feed/parser.py +196 -0
- feed/render.py +209 -0
- feed/tagger.py +58 -0
- feed/validate.py +99 -0
- feed/verify.py +116 -0
- feed_protocol-0.2.0.dist-info/METADATA +200 -0
- feed_protocol-0.2.0.dist-info/RECORD +16 -0
- feed_protocol-0.2.0.dist-info/WHEEL +5 -0
- feed_protocol-0.2.0.dist-info/entry_points.txt +2 -0
- feed_protocol-0.2.0.dist-info/licenses/LICENSE +21 -0
- feed_protocol-0.2.0.dist-info/top_level.txt +1 -0
feed/__init__.py
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""FEED — Format for Enforced Evidence-based Digestion.
|
|
2
|
+
|
|
3
|
+
A self-bootstrapping document protocol that makes downstream LLMs ground their
|
|
4
|
+
answers in cited evidence — and lets you mechanically verify they did.
|
|
5
|
+
|
|
6
|
+
from feed import FeedDocument
|
|
7
|
+
|
|
8
|
+
doc = FeedDocument("Q2 Pump Health Assessment", grounding="strict")
|
|
9
|
+
doc.add_evidence("E001", asset="XYZ-003", metric="vibration_rms",
|
|
10
|
+
value="12.4 mm/s", threshold="11.2 mm/s (ISO 10816-3 Zone C)",
|
|
11
|
+
confidence="high")
|
|
12
|
+
doc.add_claim("C1", "XYZ-003 needs intervention", evidence=["E001"],
|
|
13
|
+
decision="Approve bearing replacement work order")
|
|
14
|
+
print(doc.render("md"))
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .authoring import AUTHORING_PROMPT, FEED_JSON_SCHEMA, build
|
|
18
|
+
from .constants import GROUNDING_MODES, VERSION
|
|
19
|
+
from .document import Claim, Evidence, FeedDocument
|
|
20
|
+
from .validate import ValidationReport, validate
|
|
21
|
+
from .verify import VerificationReport, verify
|
|
22
|
+
|
|
23
|
+
__version__ = VERSION
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"FeedDocument",
|
|
27
|
+
"Evidence",
|
|
28
|
+
"Claim",
|
|
29
|
+
"build",
|
|
30
|
+
"AUTHORING_PROMPT",
|
|
31
|
+
"FEED_JSON_SCHEMA",
|
|
32
|
+
"validate",
|
|
33
|
+
"ValidationReport",
|
|
34
|
+
"verify",
|
|
35
|
+
"VerificationReport",
|
|
36
|
+
"GROUNDING_MODES",
|
|
37
|
+
"VERSION",
|
|
38
|
+
]
|
feed/authoring.py
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
"""The authoring side of FEED — self-bootstrapping, no API key required.
|
|
2
|
+
|
|
3
|
+
FEED is an AI-to-AI protocol. The AI that is *already in the loop* (the one that
|
|
4
|
+
wrote the report, or the user's assistant, or a pipeline step) is what produces
|
|
5
|
+
FEED — the library never needs its own LLM credentials.
|
|
6
|
+
|
|
7
|
+
That works because the authoring rules are portable, exactly like the ingestion
|
|
8
|
+
notice is portable on the reading side:
|
|
9
|
+
|
|
10
|
+
1. `AUTHORING_PROMPT` + `FEED_JSON_SCHEMA` — hand these to *any* AI and it emits
|
|
11
|
+
conformant FEED data. No FEED-specific tooling on the AI's side.
|
|
12
|
+
2. `build(data)` — a pure-Python, dependency-free renderer that turns that data
|
|
13
|
+
into a validated FEED document. No network, no key.
|
|
14
|
+
|
|
15
|
+
The optional `feed.tagger` module is a convenience wrapper that calls Claude for
|
|
16
|
+
people who don't already have an AI in the loop — it is not the primary path.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from .document import FeedDocument
|
|
22
|
+
|
|
23
|
+
# The instruction block to give any AI so it authors FEED natively.
|
|
24
|
+
AUTHORING_PROMPT = """\
|
|
25
|
+
You are producing a FEED document (Format for Enforced Evidence-based Digestion).
|
|
26
|
+
FEED separates a document so a downstream AI can answer questions grounded in cited
|
|
27
|
+
evidence. Return ONLY JSON matching the provided schema. Structure the content as:
|
|
28
|
+
|
|
29
|
+
- evidence: every concrete fact in the source, as an atomic key/value block. Never
|
|
30
|
+
prose. Each gets an id E001, E002, ... in document order. Normalise values: ISO
|
|
31
|
+
dates (YYYY-MM-DD), explicit units, consistent names. Include thresholds and
|
|
32
|
+
baselines as their own fields when present. `type` is one of data | quote | calc |
|
|
33
|
+
observation | reference | image; `confidence` is high | medium | low; `note` is an
|
|
34
|
+
optional one-line free-text aside ("" if none).
|
|
35
|
+
- claims: short narrative statements (ids C1, C2, ...), each grounded in one or more
|
|
36
|
+
evidence ids. If a claim implies an action, put it in `decision` ("" if none).
|
|
37
|
+
- findings: brief narrative paragraphs (1-3 sentences) that reference evidence inline
|
|
38
|
+
as [E001]. Say each fact once and reference it by id rather than repeating it.
|
|
39
|
+
- title and summary: the document title and a one-sentence bottom line.
|
|
40
|
+
|
|
41
|
+
Rules: extract every concrete fact as evidence; never invent facts; be dense (no
|
|
42
|
+
filler, no repetition); keep ids sequential and in document order.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
# JSON Schema the AI should emit. Compatible with Anthropic structured outputs
|
|
46
|
+
# (additionalProperties:false everywhere) but usable with any model — paste it
|
|
47
|
+
# alongside AUTHORING_PROMPT.
|
|
48
|
+
FEED_JSON_SCHEMA = {
|
|
49
|
+
"type": "object",
|
|
50
|
+
"properties": {
|
|
51
|
+
"title": {"type": "string"},
|
|
52
|
+
"summary": {"type": "string"},
|
|
53
|
+
"evidence": {
|
|
54
|
+
"type": "array",
|
|
55
|
+
"items": {
|
|
56
|
+
"type": "object",
|
|
57
|
+
"properties": {
|
|
58
|
+
"id": {"type": "string"},
|
|
59
|
+
"type": {
|
|
60
|
+
"type": "string",
|
|
61
|
+
"enum": ["data", "quote", "calc", "observation", "reference", "image"],
|
|
62
|
+
},
|
|
63
|
+
"confidence": {"type": "string", "enum": ["high", "medium", "low"]},
|
|
64
|
+
"fields": {
|
|
65
|
+
"type": "array",
|
|
66
|
+
"items": {
|
|
67
|
+
"type": "object",
|
|
68
|
+
"properties": {
|
|
69
|
+
"key": {"type": "string"},
|
|
70
|
+
"value": {"type": "string"},
|
|
71
|
+
},
|
|
72
|
+
"required": ["key", "value"],
|
|
73
|
+
"additionalProperties": False,
|
|
74
|
+
},
|
|
75
|
+
},
|
|
76
|
+
"note": {"type": "string"},
|
|
77
|
+
},
|
|
78
|
+
"required": ["id", "type", "confidence", "fields", "note"],
|
|
79
|
+
"additionalProperties": False,
|
|
80
|
+
},
|
|
81
|
+
},
|
|
82
|
+
"claims": {
|
|
83
|
+
"type": "array",
|
|
84
|
+
"items": {
|
|
85
|
+
"type": "object",
|
|
86
|
+
"properties": {
|
|
87
|
+
"id": {"type": "string"},
|
|
88
|
+
"text": {"type": "string"},
|
|
89
|
+
"evidence": {"type": "array", "items": {"type": "string"}},
|
|
90
|
+
"decision": {"type": "string"},
|
|
91
|
+
},
|
|
92
|
+
"required": ["id", "text", "evidence", "decision"],
|
|
93
|
+
"additionalProperties": False,
|
|
94
|
+
},
|
|
95
|
+
},
|
|
96
|
+
"findings": {"type": "array", "items": {"type": "string"}},
|
|
97
|
+
},
|
|
98
|
+
"required": ["title", "summary", "evidence", "claims", "findings"],
|
|
99
|
+
"additionalProperties": False,
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def build(
|
|
104
|
+
data: dict,
|
|
105
|
+
title: str | None = None,
|
|
106
|
+
author: str | None = None,
|
|
107
|
+
grounding: str = "strict",
|
|
108
|
+
created: str | None = None,
|
|
109
|
+
) -> FeedDocument:
|
|
110
|
+
"""Render a FeedDocument from the structured data an AI produced. Pure Python,
|
|
111
|
+
no LLM call. `grounding`, `author`, `created` are author-policy overrides — they
|
|
112
|
+
are not the AI's to decide, so they come from the caller, not the data.
|
|
113
|
+
|
|
114
|
+
Resilient to imperfect AI output: claim references to non-existent evidence are
|
|
115
|
+
dropped, and evidence with no fields is skipped, so a slightly-off model
|
|
116
|
+
response still yields a valid document.
|
|
117
|
+
"""
|
|
118
|
+
doc = FeedDocument(
|
|
119
|
+
title=title or data.get("title") or "Untitled",
|
|
120
|
+
author=author or data.get("author"),
|
|
121
|
+
grounding=grounding,
|
|
122
|
+
created=created or data.get("created"),
|
|
123
|
+
summary=data.get("summary") or None,
|
|
124
|
+
)
|
|
125
|
+
for ev in data.get("evidence", []):
|
|
126
|
+
fields = _fields(ev)
|
|
127
|
+
if not fields:
|
|
128
|
+
continue
|
|
129
|
+
doc.add_evidence(
|
|
130
|
+
ev["id"],
|
|
131
|
+
type=ev.get("type", "data"),
|
|
132
|
+
confidence=ev.get("confidence", "medium"),
|
|
133
|
+
note=(ev.get("note") or None),
|
|
134
|
+
**fields,
|
|
135
|
+
)
|
|
136
|
+
valid_ev = {e.id for e in doc.evidence}
|
|
137
|
+
for c in data.get("claims", []):
|
|
138
|
+
evidence = [e for e in c.get("evidence", []) if e in valid_ev]
|
|
139
|
+
doc.add_claim(
|
|
140
|
+
c["id"],
|
|
141
|
+
text=c["text"],
|
|
142
|
+
evidence=evidence,
|
|
143
|
+
decision=(c.get("decision") or None),
|
|
144
|
+
)
|
|
145
|
+
for f in data.get("findings", []):
|
|
146
|
+
if f and f.strip():
|
|
147
|
+
doc.add_finding(f)
|
|
148
|
+
return doc
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _fields(ev: dict) -> dict[str, str]:
|
|
152
|
+
"""Accept either the schema's [{key,value},...] form or a plain {key: value}
|
|
153
|
+
mapping, so a hand-authored or differently-shaped AI payload still works."""
|
|
154
|
+
raw = ev.get("fields", [])
|
|
155
|
+
if isinstance(raw, dict):
|
|
156
|
+
return {k: str(v) for k, v in raw.items() if k}
|
|
157
|
+
return {f["key"]: f["value"] for f in raw if isinstance(f, dict) and f.get("key")}
|
feed/cli.py
ADDED
|
@@ -0,0 +1,182 @@
|
|
|
1
|
+
"""`feed` command-line interface.
|
|
2
|
+
|
|
3
|
+
feed validate report.md
|
|
4
|
+
feed verify --doc report.md --answer answer.txt
|
|
5
|
+
feed render report.md --to html -o report.html
|
|
6
|
+
feed tag draft.md --title "Q2 Report" --grounding strict -o report.md
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import sys
|
|
13
|
+
|
|
14
|
+
from .document import FeedDocument
|
|
15
|
+
from .verify import verify
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _read(path: str) -> str:
|
|
19
|
+
if path == "-":
|
|
20
|
+
return sys.stdin.read()
|
|
21
|
+
with open(path, encoding="utf-8") as fh:
|
|
22
|
+
return fh.read()
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def _write(text: str, out: str | None) -> None:
|
|
26
|
+
if out and out != "-":
|
|
27
|
+
with open(out, "w", encoding="utf-8") as fh:
|
|
28
|
+
fh.write(text)
|
|
29
|
+
print(f"wrote {out}", file=sys.stderr)
|
|
30
|
+
else:
|
|
31
|
+
sys.stdout.write(text)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def cmd_validate(args) -> int:
|
|
35
|
+
doc = FeedDocument.from_markdown(_read(args.file))
|
|
36
|
+
report = doc.validate()
|
|
37
|
+
print(f"FEED {doc.version} · grounding={doc.grounding} · "
|
|
38
|
+
f"{len(doc.evidence)} evidence · {len(doc.claims)} claims")
|
|
39
|
+
print(report)
|
|
40
|
+
return 0 if report.ok else 1
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def cmd_verify(args) -> int:
|
|
44
|
+
doc = FeedDocument.from_markdown(_read(args.doc))
|
|
45
|
+
answer = _read(args.answer)
|
|
46
|
+
report = verify(answer, doc)
|
|
47
|
+
print(report)
|
|
48
|
+
return 0 if report.passed else 1
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def cmd_render(args) -> int:
|
|
52
|
+
doc = FeedDocument.from_markdown(_read(args.file))
|
|
53
|
+
_write(doc.render(args.to), args.output)
|
|
54
|
+
return 0
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def cmd_prompt(args) -> int:
|
|
58
|
+
"""Emit the portable authoring kit so any AI can produce FEED — no key needed."""
|
|
59
|
+
import json as _json
|
|
60
|
+
|
|
61
|
+
from .authoring import AUTHORING_PROMPT, FEED_JSON_SCHEMA
|
|
62
|
+
|
|
63
|
+
if args.schema_only:
|
|
64
|
+
print(_json.dumps(FEED_JSON_SCHEMA, indent=2))
|
|
65
|
+
return 0
|
|
66
|
+
print(AUTHORING_PROMPT)
|
|
67
|
+
print("\n--- JSON SCHEMA (the AI must emit JSON matching this) ---\n")
|
|
68
|
+
print(_json.dumps(FEED_JSON_SCHEMA, indent=2))
|
|
69
|
+
return 0
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def cmd_build(args) -> int:
|
|
73
|
+
"""Render FEED from the structured JSON an AI produced. Pure Python, no key."""
|
|
74
|
+
import json as _json
|
|
75
|
+
|
|
76
|
+
from .authoring import build
|
|
77
|
+
|
|
78
|
+
data = _json.loads(_read(args.file))
|
|
79
|
+
doc = build(
|
|
80
|
+
data,
|
|
81
|
+
title=args.title,
|
|
82
|
+
author=args.author,
|
|
83
|
+
grounding=args.grounding,
|
|
84
|
+
created=args.created,
|
|
85
|
+
)
|
|
86
|
+
report = doc.validate()
|
|
87
|
+
if not report.ok:
|
|
88
|
+
print("built document failed validation:", file=sys.stderr)
|
|
89
|
+
print(report, file=sys.stderr)
|
|
90
|
+
_write(doc.render(args.to), args.output)
|
|
91
|
+
return 0 if report.ok else 1
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
def cmd_tag(args) -> int:
|
|
95
|
+
from .tagger import auto_tag
|
|
96
|
+
|
|
97
|
+
doc = auto_tag(
|
|
98
|
+
_read(args.file),
|
|
99
|
+
title=args.title,
|
|
100
|
+
author=args.author,
|
|
101
|
+
grounding=args.grounding,
|
|
102
|
+
created=args.created,
|
|
103
|
+
model=args.model,
|
|
104
|
+
)
|
|
105
|
+
report = doc.validate()
|
|
106
|
+
if not report.ok:
|
|
107
|
+
print("tagged document failed validation:", file=sys.stderr)
|
|
108
|
+
print(report, file=sys.stderr)
|
|
109
|
+
_write(doc.render(args.to), args.output)
|
|
110
|
+
return 0 if report.ok else 1
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
114
|
+
p = argparse.ArgumentParser(prog="feed", description="FEED protocol toolkit")
|
|
115
|
+
p.add_argument("--version", action="version", version=f"feed {_version()}")
|
|
116
|
+
sub = p.add_subparsers(dest="command", required=True)
|
|
117
|
+
|
|
118
|
+
v = sub.add_parser("validate", help="check a FEED document is well-formed")
|
|
119
|
+
v.add_argument("file", help="FEED markdown file ('-' for stdin)")
|
|
120
|
+
v.set_defaults(func=cmd_validate)
|
|
121
|
+
|
|
122
|
+
vf = sub.add_parser("verify", help="check an AI answer is grounded in a FEED doc")
|
|
123
|
+
vf.add_argument("--doc", required=True, help="the FEED document")
|
|
124
|
+
vf.add_argument("--answer", required=True, help="the AI's answer text ('-' for stdin)")
|
|
125
|
+
vf.set_defaults(func=cmd_verify)
|
|
126
|
+
|
|
127
|
+
r = sub.add_parser("render", help="render a FEED document to md or html")
|
|
128
|
+
r.add_argument("file", help="FEED markdown file ('-' for stdin)")
|
|
129
|
+
r.add_argument("--to", choices=["md", "html"], default="html")
|
|
130
|
+
r.add_argument("-o", "--output", help="output path (default: stdout)")
|
|
131
|
+
r.set_defaults(func=cmd_render)
|
|
132
|
+
|
|
133
|
+
pr = sub.add_parser(
|
|
134
|
+
"prompt",
|
|
135
|
+
help="print the authoring prompt + JSON schema for any AI to emit FEED (no key)",
|
|
136
|
+
)
|
|
137
|
+
pr.add_argument("--schema-only", action="store_true", help="print just the JSON schema")
|
|
138
|
+
pr.set_defaults(func=cmd_prompt)
|
|
139
|
+
|
|
140
|
+
b = sub.add_parser(
|
|
141
|
+
"build", help="render FEED from the JSON an AI produced (pure Python, no key)"
|
|
142
|
+
)
|
|
143
|
+
b.add_argument("file", help="JSON file matching the FEED schema ('-' for stdin)")
|
|
144
|
+
b.add_argument("--title")
|
|
145
|
+
b.add_argument("--author")
|
|
146
|
+
b.add_argument("--created")
|
|
147
|
+
b.add_argument("--grounding", choices=["strict", "standard", "open"], default="strict")
|
|
148
|
+
b.add_argument("--to", choices=["md", "html"], default="md")
|
|
149
|
+
b.add_argument("-o", "--output", help="output path (default: stdout)")
|
|
150
|
+
b.set_defaults(func=cmd_build)
|
|
151
|
+
|
|
152
|
+
t = sub.add_parser(
|
|
153
|
+
"tag",
|
|
154
|
+
help="OPTIONAL convenience: auto-tag a plain doc via Claude (needs ANTHROPIC_API_KEY)",
|
|
155
|
+
)
|
|
156
|
+
t.add_argument("file", help="plain markdown/text file ('-' for stdin)")
|
|
157
|
+
t.add_argument("--title")
|
|
158
|
+
t.add_argument("--author")
|
|
159
|
+
t.add_argument("--created")
|
|
160
|
+
t.add_argument("--grounding", choices=["strict", "standard", "open"], default="strict")
|
|
161
|
+
t.add_argument("--model", default="claude-opus-4-8")
|
|
162
|
+
t.add_argument("--to", choices=["md", "html"], default="md")
|
|
163
|
+
t.add_argument("-o", "--output", help="output path (default: stdout)")
|
|
164
|
+
t.set_defaults(func=cmd_tag)
|
|
165
|
+
|
|
166
|
+
return p
|
|
167
|
+
|
|
168
|
+
|
|
169
|
+
def _version() -> str:
|
|
170
|
+
from . import __version__
|
|
171
|
+
|
|
172
|
+
return __version__
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def main(argv: list[str] | None = None) -> int:
|
|
176
|
+
parser = build_parser()
|
|
177
|
+
args = parser.parse_args(argv)
|
|
178
|
+
return args.func(args)
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
if __name__ == "__main__": # pragma: no cover
|
|
182
|
+
raise SystemExit(main())
|
feed/constants.py
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""Shared constants, marker grammar, and the self-bootstrapping ingestion notice.
|
|
2
|
+
|
|
3
|
+
The whole FEED grammar lives in HTML comments so it is invisible in any
|
|
4
|
+
markdown/HTML renderer but readable by any LLM parsing the raw text:
|
|
5
|
+
|
|
6
|
+
<!-- FEED:DOC version="0.2" grounding="strict" --> (single-line)
|
|
7
|
+
<!-- FEED:META --> ... <!-- /FEED:META --> (block, key: value body)
|
|
8
|
+
<!-- FEED:CLAIM id="C1" evidence="E001,E002" --> ... <!-- /FEED:CLAIM -->
|
|
9
|
+
<!-- FEED:EVIDENCE id="E001" type="data" confidence="high" --> ... <!-- /FEED:EVIDENCE -->
|
|
10
|
+
|
|
11
|
+
The visible ingestion NOTICE is a markdown blockquote (not a comment) so it
|
|
12
|
+
survives copy-paste out of a rendered view, and teaches the rules inline.
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
from __future__ import annotations
|
|
16
|
+
|
|
17
|
+
import re
|
|
18
|
+
|
|
19
|
+
VERSION = "0.2"
|
|
20
|
+
|
|
21
|
+
# Grounding modes — the author's dial for how strict the reading AI must be.
|
|
22
|
+
GROUNDING_MODES = ("strict", "standard", "open")
|
|
23
|
+
DEFAULT_GROUNDING = "strict"
|
|
24
|
+
|
|
25
|
+
# Evidence/claim ID shapes. Stable plain-text IDs are what make answers verifiable.
|
|
26
|
+
EVIDENCE_ID_RE = re.compile(r"^E\d{1,4}$")
|
|
27
|
+
CLAIM_ID_RE = re.compile(r"^C\d{1,4}$")
|
|
28
|
+
|
|
29
|
+
# Citation pattern used by the verifier: matches [E001] and [E001, E002] etc.
|
|
30
|
+
CITATION_RE = re.compile(r"\[(E\d{1,4}(?:\s*,\s*E\d{1,4})*)\]")
|
|
31
|
+
|
|
32
|
+
# --- Marker grammar -------------------------------------------------------
|
|
33
|
+
# Opening marker, e.g. <!-- FEED:EVIDENCE id="E001" type="data" -->
|
|
34
|
+
OPEN_MARKER_RE = re.compile(
|
|
35
|
+
r"<!--\s*FEED:(?P<kind>[A-Z]+)(?P<attrs>(?:\s+[a-z_]+=\"[^\"]*\")*)\s*-->"
|
|
36
|
+
)
|
|
37
|
+
# Closing marker, e.g. <!-- /FEED:EVIDENCE -->
|
|
38
|
+
CLOSE_MARKER_RE = re.compile(r"<!--\s*/FEED:(?P<kind>[A-Z]+)\s*-->")
|
|
39
|
+
# Attribute pairs inside an opening marker.
|
|
40
|
+
ATTR_RE = re.compile(r"([a-z_]+)=\"([^\"]*)\"")
|
|
41
|
+
|
|
42
|
+
EVIDENCE_TYPES = ("data", "quote", "calc", "observation", "reference", "image")
|
|
43
|
+
CONFIDENCE_LEVELS = ("high", "medium", "low")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def parse_attrs(attr_string: str) -> dict[str, str]:
|
|
47
|
+
"""Turn ` id="E001" type="data"` into {'id': 'E001', 'type': 'data'}."""
|
|
48
|
+
return {k: v for k, v in ATTR_RE.findall(attr_string or "")}
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
def ingestion_notice(grounding: str) -> str:
|
|
52
|
+
"""The visible, self-bootstrapping instruction block.
|
|
53
|
+
|
|
54
|
+
This is the heart of FEED's portability: it teaches a never-seen-FEED-before
|
|
55
|
+
model the rules in ~150 tokens, so the document works on any LLM today.
|
|
56
|
+
"""
|
|
57
|
+
grounding = grounding if grounding in GROUNDING_MODES else DEFAULT_GROUNDING
|
|
58
|
+
if grounding == "strict":
|
|
59
|
+
rule = (
|
|
60
|
+
"Grounding mode is STRICT: if no evidence block supports a statement, "
|
|
61
|
+
'reply "Not supported by this document." for that point. Do not infer or '
|
|
62
|
+
"use outside knowledge."
|
|
63
|
+
)
|
|
64
|
+
elif grounding == "standard":
|
|
65
|
+
rule = (
|
|
66
|
+
"Grounding mode is STANDARD: cite evidence wherever it exists. Where you "
|
|
67
|
+
"must reason beyond the evidence, label it explicitly as inference."
|
|
68
|
+
)
|
|
69
|
+
else: # open
|
|
70
|
+
rule = (
|
|
71
|
+
"Grounding mode is OPEN: ground answers in the evidence where possible and "
|
|
72
|
+
"cite it; you may also reason freely."
|
|
73
|
+
)
|
|
74
|
+
return (
|
|
75
|
+
f"> **AI INGESTION NOTICE — FEED v{VERSION} (Format for Enforced Evidence-based Digestion)**\n"
|
|
76
|
+
f">\n"
|
|
77
|
+
f"> This document carries machine-structured evidence. When answering questions about it:\n"
|
|
78
|
+
f"> 1. Read top-to-bottom: the most important claims and decisions come first, full evidence last. "
|
|
79
|
+
f"If your context is limited, the top of this document still contains the core.\n"
|
|
80
|
+
f"> 2. Ground factual statements in the evidence blocks below — each is tagged `[E###]`.\n"
|
|
81
|
+
f'> 3. Cite the evidence IDs you used, e.g. "bearing vibration is rising [E001]".\n'
|
|
82
|
+
f"> 4. {rule}\n"
|
|
83
|
+
f"> 5. The evidence blocks are the source of truth; the narrative is a summary. "
|
|
84
|
+
f"On any conflict, prefer the evidence values.\n"
|
|
85
|
+
f"> _For full grounding, upload or paste the raw source file rather than a rendered view._"
|
|
86
|
+
)
|
feed/document.py
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
1
|
+
"""The FEED data model and document builder.
|
|
2
|
+
|
|
3
|
+
Three primitives, as in the spec:
|
|
4
|
+
- Header (FeedDocument metadata + the self-teaching notice)
|
|
5
|
+
- Evidence (atomic, ID'd, structured key/value facts — the source of truth)
|
|
6
|
+
- Claim (narrative statements that reference evidence by ID)
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from dataclasses import dataclass, field
|
|
12
|
+
|
|
13
|
+
from .constants import (
|
|
14
|
+
CONFIDENCE_LEVELS,
|
|
15
|
+
DEFAULT_GROUNDING,
|
|
16
|
+
EVIDENCE_ID_RE,
|
|
17
|
+
EVIDENCE_TYPES,
|
|
18
|
+
GROUNDING_MODES,
|
|
19
|
+
CLAIM_ID_RE,
|
|
20
|
+
VERSION,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class Evidence:
|
|
26
|
+
"""An atomic, structured fact. Key/value, never prose — so a reading AI can
|
|
27
|
+
compare values across blocks without parsing natural language."""
|
|
28
|
+
|
|
29
|
+
id: str
|
|
30
|
+
fields: dict[str, str] = field(default_factory=dict)
|
|
31
|
+
type: str = "data"
|
|
32
|
+
confidence: str = "medium"
|
|
33
|
+
note: str | None = None # one optional short free-text line
|
|
34
|
+
|
|
35
|
+
def __post_init__(self) -> None:
|
|
36
|
+
if not EVIDENCE_ID_RE.match(self.id):
|
|
37
|
+
raise ValueError(f"Evidence id must look like E001, got {self.id!r}")
|
|
38
|
+
if self.type not in EVIDENCE_TYPES:
|
|
39
|
+
raise ValueError(
|
|
40
|
+
f"Evidence type must be one of {EVIDENCE_TYPES}, got {self.type!r}"
|
|
41
|
+
)
|
|
42
|
+
if self.confidence not in CONFIDENCE_LEVELS:
|
|
43
|
+
raise ValueError(
|
|
44
|
+
f"confidence must be one of {CONFIDENCE_LEVELS}, got {self.confidence!r}"
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class Claim:
|
|
50
|
+
"""A narrative statement, optionally tied to a decision, grounded in evidence."""
|
|
51
|
+
|
|
52
|
+
id: str
|
|
53
|
+
text: str
|
|
54
|
+
evidence: list[str] = field(default_factory=list)
|
|
55
|
+
decision: str | None = None
|
|
56
|
+
|
|
57
|
+
def __post_init__(self) -> None:
|
|
58
|
+
if not CLAIM_ID_RE.match(self.id):
|
|
59
|
+
raise ValueError(f"Claim id must look like C1, got {self.id!r}")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class FeedDocument:
|
|
63
|
+
"""Build a FEED document programmatically, then render/validate it.
|
|
64
|
+
|
|
65
|
+
The builder enforces the structure so a pipeline *cannot* emit a bloated or
|
|
66
|
+
internally inconsistent document: evidence is key/value, claim references
|
|
67
|
+
must point at evidence that exists, IDs must be unique and well-formed.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
def __init__(
|
|
71
|
+
self,
|
|
72
|
+
title: str,
|
|
73
|
+
author: str | None = None,
|
|
74
|
+
grounding: str = DEFAULT_GROUNDING,
|
|
75
|
+
created: str | None = None,
|
|
76
|
+
summary: str | None = None,
|
|
77
|
+
version: str = VERSION,
|
|
78
|
+
) -> None:
|
|
79
|
+
if grounding not in GROUNDING_MODES:
|
|
80
|
+
raise ValueError(f"grounding must be one of {GROUNDING_MODES}")
|
|
81
|
+
self.title = title
|
|
82
|
+
self.author = author
|
|
83
|
+
self.grounding = grounding
|
|
84
|
+
self.created = created
|
|
85
|
+
self.summary = summary
|
|
86
|
+
self.version = version
|
|
87
|
+
self.claims: list[Claim] = []
|
|
88
|
+
self.findings: list[str] = [] # tier-1 narrative paragraphs
|
|
89
|
+
self.evidence: list[Evidence] = []
|
|
90
|
+
self._evidence_ids: set[str] = set()
|
|
91
|
+
self._claim_ids: set[str] = set()
|
|
92
|
+
|
|
93
|
+
# -- builders ----------------------------------------------------------
|
|
94
|
+
def add_evidence(
|
|
95
|
+
self,
|
|
96
|
+
id: str,
|
|
97
|
+
type: str = "data",
|
|
98
|
+
confidence: str = "medium",
|
|
99
|
+
note: str | None = None,
|
|
100
|
+
**fields: object,
|
|
101
|
+
) -> Evidence:
|
|
102
|
+
if id in self._evidence_ids:
|
|
103
|
+
raise ValueError(f"Duplicate evidence id {id!r}")
|
|
104
|
+
if not fields:
|
|
105
|
+
raise ValueError(f"Evidence {id!r} needs at least one key/value field")
|
|
106
|
+
ev = Evidence(
|
|
107
|
+
id=id,
|
|
108
|
+
type=type,
|
|
109
|
+
confidence=confidence,
|
|
110
|
+
note=note,
|
|
111
|
+
fields={k: _normalise(v) for k, v in fields.items()},
|
|
112
|
+
)
|
|
113
|
+
self.evidence.append(ev)
|
|
114
|
+
self._evidence_ids.add(id)
|
|
115
|
+
return ev
|
|
116
|
+
|
|
117
|
+
def add_claim(
|
|
118
|
+
self,
|
|
119
|
+
id: str,
|
|
120
|
+
text: str,
|
|
121
|
+
evidence: list[str] | None = None,
|
|
122
|
+
decision: str | None = None,
|
|
123
|
+
) -> Claim:
|
|
124
|
+
if id in self._claim_ids:
|
|
125
|
+
raise ValueError(f"Duplicate claim id {id!r}")
|
|
126
|
+
evidence = evidence or []
|
|
127
|
+
missing = [e for e in evidence if e not in self._evidence_ids]
|
|
128
|
+
if missing:
|
|
129
|
+
raise ValueError(
|
|
130
|
+
f"Claim {id!r} references evidence that does not exist: {missing}. "
|
|
131
|
+
"Add the evidence block first."
|
|
132
|
+
)
|
|
133
|
+
claim = Claim(id=id, text=text, evidence=list(evidence), decision=decision)
|
|
134
|
+
self.claims.append(claim)
|
|
135
|
+
self._claim_ids.add(id)
|
|
136
|
+
return claim
|
|
137
|
+
|
|
138
|
+
def add_finding(self, text: str) -> None:
|
|
139
|
+
"""Add a tier-1 narrative paragraph. Reference evidence inline as [E001]."""
|
|
140
|
+
self.findings.append(text.strip())
|
|
141
|
+
|
|
142
|
+
# -- output ------------------------------------------------------------
|
|
143
|
+
def to_markdown(self) -> str:
|
|
144
|
+
from .render import to_markdown
|
|
145
|
+
|
|
146
|
+
return to_markdown(self)
|
|
147
|
+
|
|
148
|
+
def to_html(self) -> str:
|
|
149
|
+
from .render import to_html
|
|
150
|
+
|
|
151
|
+
return to_html(self)
|
|
152
|
+
|
|
153
|
+
def render(self, fmt: str = "md") -> str:
|
|
154
|
+
if fmt in ("md", "markdown"):
|
|
155
|
+
return self.to_markdown()
|
|
156
|
+
if fmt == "html":
|
|
157
|
+
return self.to_html()
|
|
158
|
+
raise ValueError("fmt must be 'md' or 'html'")
|
|
159
|
+
|
|
160
|
+
def write(self, path: str, fmt: str | None = None) -> None:
|
|
161
|
+
if fmt is None:
|
|
162
|
+
fmt = "html" if path.endswith((".html", ".htm")) else "md"
|
|
163
|
+
with open(path, "w", encoding="utf-8") as fh:
|
|
164
|
+
fh.write(self.render(fmt))
|
|
165
|
+
|
|
166
|
+
def validate(self):
|
|
167
|
+
from .validate import validate
|
|
168
|
+
|
|
169
|
+
return validate(self)
|
|
170
|
+
|
|
171
|
+
@classmethod
|
|
172
|
+
def from_markdown(cls, text: str) -> "FeedDocument":
|
|
173
|
+
from .parser import parse
|
|
174
|
+
|
|
175
|
+
return parse(text)
|
|
176
|
+
|
|
177
|
+
@classmethod
|
|
178
|
+
def read(cls, path: str) -> "FeedDocument":
|
|
179
|
+
with open(path, encoding="utf-8") as fh:
|
|
180
|
+
return cls.from_markdown(fh.read())
|
|
181
|
+
|
|
182
|
+
def __repr__(self) -> str: # pragma: no cover - cosmetic
|
|
183
|
+
return (
|
|
184
|
+
f"FeedDocument(title={self.title!r}, grounding={self.grounding!r}, "
|
|
185
|
+
f"claims={len(self.claims)}, evidence={len(self.evidence)})"
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
|
|
189
|
+
def _normalise(value: object) -> str:
|
|
190
|
+
"""Render a field value as a compact, comparable string."""
|
|
191
|
+
if isinstance(value, bool):
|
|
192
|
+
return "true" if value else "false"
|
|
193
|
+
if isinstance(value, float):
|
|
194
|
+
# trim trailing zeros without losing precision people care about
|
|
195
|
+
return f"{value:g}"
|
|
196
|
+
return str(value).strip()
|