delimit-cli 4.5.0 → 4.5.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +108 -0
- package/README.md +2 -2
- package/adapters/cursor-rules.js +17 -4
- package/bin/delimit-cli.js +109 -24
- package/gateway/ai/content_engine.py +3 -4
- package/gateway/ai/inbox_classifier.py +215 -0
- package/gateway/ai/integrations/opensage_wrapper.py +4 -1
- package/gateway/ai/ledger_manager.py +218 -38
- package/gateway/ai/license.py +26 -0
- package/gateway/ai/notify.py +68 -3
- package/gateway/ai/reddit_proxy.py +93 -15
- package/gateway/ai/reddit_scanner.py +36 -18
- package/gateway/ai/server.py +128 -6
- package/gateway/ai/social_capability/__init__.py +6 -0
- package/gateway/ai/social_capability/capability_validator.py +273 -0
- package/gateway/ai/social_capability/current_capabilities.yaml +95 -0
- package/gateway/ai/social_queue.py +307 -0
- package/gateway/ai/supabase_sync.py +14 -2
- package/gateway/ai/swarm.py +29 -11
- package/gateway/ai/tui.py +6 -2
- package/gateway/ai/x_ranker.py +276 -0
- package/lib/attest-mcp.js +487 -0
- package/lib/attest-telemetry.js +48 -0
- package/lib/delimit-home.js +35 -0
- package/lib/delimit-template.js +14 -0
- package/lib/managed-section.js +92 -0
- package/lib/trust-page-engine.js +6 -2
- package/lib/wrap-engine.js +21 -4
- package/package.json +8 -2
- package/scripts/postinstall.js +89 -40
- package/gateway/ai/content_grounding/__init__.py +0 -98
- package/gateway/ai/content_grounding/build.py +0 -350
- package/gateway/ai/content_grounding/consume.py +0 -280
- package/gateway/ai/content_grounding/features.py +0 -218
- package/gateway/ai/content_grounding/fixtures/fail/01_missing_evidence.json +0 -9
- package/gateway/ai/content_grounding/fixtures/fail/02_unknown_evidence_prefix.json +0 -9
- package/gateway/ai/content_grounding/fixtures/fail/03_banned_comparative.json +0 -17
- package/gateway/ai/content_grounding/fixtures/fail/04_banned_adoption.json +0 -17
- package/gateway/ai/content_grounding/fixtures/fail/05_aggregate_no_numeric.json +0 -17
- package/gateway/ai/content_grounding/fixtures/fail/06_unversioned_inference_rule.json +0 -18
- package/gateway/ai/content_grounding/fixtures/pass/01_feature_shipped.json +0 -18
- package/gateway/ai/content_grounding/fixtures/pass/02_aggregate_claim.json +0 -23
- package/gateway/ai/content_grounding/fixtures/pass/03_attestation.json +0 -16
- package/gateway/ai/content_grounding/schemas/claim.schema.json +0 -40
- package/gateway/ai/content_grounding/schemas/event.schema.json +0 -23
- package/gateway/ai/content_grounding/schemas.py +0 -276
- package/gateway/ai/content_grounding/telemetry.py +0 -221
- package/gateway/ai/inbox_drafts/__init__.py +0 -61
- package/gateway/ai/inbox_drafts/registry.py +0 -412
- package/gateway/ai/inbox_drafts/schema.py +0 -374
- package/gateway/ai/inbox_executor.py +0 -565
|
@@ -1,350 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Ingestion + validation for the grounding layer (LED-1084 Week 1).
|
|
3
|
-
|
|
4
|
-
Reads three canonical sources:
|
|
5
|
-
- ~/.delimit/ledger/*.jsonl → decisions / incidents / outreach / releases
|
|
6
|
-
- ~/.delimit/attestations/*.json → HMAC-signed delimit wrap bundles
|
|
7
|
-
- `git log` on delimit-gateway → commit events
|
|
8
|
-
|
|
9
|
-
Produces a `GroundingIndex` snapshot that downstream generators consume.
|
|
10
|
-
|
|
11
|
-
Week 1 posture: ingestion + validation only. No generation, no publishing.
|
|
12
|
-
`_PUBLISH_DISABLED = True` in `__init__` enforces this at import time.
|
|
13
|
-
"""
|
|
14
|
-
from __future__ import annotations
|
|
15
|
-
|
|
16
|
-
import json
|
|
17
|
-
import logging
|
|
18
|
-
import os
|
|
19
|
-
import re
|
|
20
|
-
import subprocess
|
|
21
|
-
from dataclasses import asdict
|
|
22
|
-
from datetime import datetime, timezone, timedelta
|
|
23
|
-
from pathlib import Path
|
|
24
|
-
from typing import Any, Dict, List, Optional
|
|
25
|
-
|
|
26
|
-
from .schemas import (
|
|
27
|
-
Claim,
|
|
28
|
-
ClaimType,
|
|
29
|
-
EventType,
|
|
30
|
-
GroundedEvent,
|
|
31
|
-
GroundingIndex,
|
|
32
|
-
Visibility,
|
|
33
|
-
)
|
|
34
|
-
|
|
35
|
-
logger = logging.getLogger("delimit.ai.content_grounding")
|
|
36
|
-
|
|
37
|
-
# Default paths — overridable via env for testing.
|
|
38
|
-
LEDGER_DIR = Path(os.environ.get("DELIMIT_LEDGER_DIR", str(Path.home() / ".delimit" / "ledger")))
|
|
39
|
-
ATTESTATIONS_DIR = Path(os.environ.get("DELIMIT_ATTESTATIONS_DIR", str(Path.home() / ".delimit" / "attestations")))
|
|
40
|
-
GATEWAY_REPO = Path(os.environ.get("DELIMIT_GATEWAY_REPO", "/home/delimit/delimit-gateway"))
|
|
41
|
-
GROUNDING_OUT = Path(os.environ.get("DELIMIT_GROUNDING_OUT", str(Path.home() / ".delimit" / "content" / "grounding")))
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
# ---------------------------------------------------------------------------
|
|
45
|
-
# Ledger ingestion
|
|
46
|
-
# ---------------------------------------------------------------------------
|
|
47
|
-
|
|
48
|
-
# Ledger item_type → grounded EventType. Items with types not in this map
|
|
49
|
-
# fall into DECISION as a safe default.
|
|
50
|
-
_LEDGER_TYPE_MAP: Dict[str, EventType] = {
|
|
51
|
-
"release": EventType.RELEASE,
|
|
52
|
-
"feature": EventType.FEATURE_SHIPPED,
|
|
53
|
-
"fix": EventType.INCIDENT_RESOLVED,
|
|
54
|
-
"incident": EventType.INCIDENT,
|
|
55
|
-
"audit": EventType.DECISION,
|
|
56
|
-
"strategy": EventType.DECISION,
|
|
57
|
-
"watch": EventType.OUTREACH_EVENT,
|
|
58
|
-
"outreach": EventType.OUTREACH_EVENT,
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
def _ledger_item_to_event(item: Dict[str, Any]) -> Optional[GroundedEvent]:
|
|
63
|
-
"""Normalize a ledger JSONL record into a GroundedEvent. Skip on malformed."""
|
|
64
|
-
try:
|
|
65
|
-
led_id = item.get("id") or item.get("ledger_id") or ""
|
|
66
|
-
if not led_id:
|
|
67
|
-
return None
|
|
68
|
-
title = (item.get("title") or "").strip()
|
|
69
|
-
date = item.get("created_at") or item.get("timestamp") or ""
|
|
70
|
-
if not date:
|
|
71
|
-
return None
|
|
72
|
-
venture = (item.get("venture") or "delimit").lower()
|
|
73
|
-
item_type = (item.get("item_type") or item.get("type") or "decision").lower()
|
|
74
|
-
|
|
75
|
-
event_type = _LEDGER_TYPE_MAP.get(item_type, EventType.DECISION)
|
|
76
|
-
|
|
77
|
-
# A ledger item has at minimum its own LED-id as evidence. Link
|
|
78
|
-
# field also counts if present.
|
|
79
|
-
evidence: List[str] = [f"LED-{led_id.replace('LED-', '')}"]
|
|
80
|
-
link = item.get("link") or ""
|
|
81
|
-
if link and link.startswith("http"):
|
|
82
|
-
evidence.append(f"url:{link}")
|
|
83
|
-
|
|
84
|
-
# Build a FEATURE or INCIDENT claim from the title. Claim text
|
|
85
|
-
# is the exact title (no paraphrase permitted by Week 1/2 rules).
|
|
86
|
-
claims: List[Claim] = []
|
|
87
|
-
if title and event_type in (EventType.FEATURE_SHIPPED, EventType.INCIDENT_RESOLVED):
|
|
88
|
-
ctype = ClaimType.FEATURE if event_type == EventType.FEATURE_SHIPPED else ClaimType.INCIDENT
|
|
89
|
-
claims.append(Claim(
|
|
90
|
-
claim_id=f"CLM-{led_id}-title",
|
|
91
|
-
type=ctype,
|
|
92
|
-
text=title,
|
|
93
|
-
evidence_refs=list(evidence),
|
|
94
|
-
visibility=Visibility.INTERNAL, # default private; author must promote
|
|
95
|
-
))
|
|
96
|
-
|
|
97
|
-
return GroundedEvent(
|
|
98
|
-
event_id=f"evt-ledger-{led_id}",
|
|
99
|
-
type=event_type,
|
|
100
|
-
date=date,
|
|
101
|
-
venture=venture,
|
|
102
|
-
evidence_refs=list(evidence),
|
|
103
|
-
claims=claims,
|
|
104
|
-
visibility=Visibility.INTERNAL,
|
|
105
|
-
source=f"ledger:{item_type}",
|
|
106
|
-
raw={"ledger_id": led_id, "status": item.get("status"), "priority": item.get("priority")},
|
|
107
|
-
)
|
|
108
|
-
except Exception as e:
|
|
109
|
-
logger.debug("skipping malformed ledger item: %s", e)
|
|
110
|
-
return None
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
def _ingest_ledger(since: Optional[datetime] = None) -> List[GroundedEvent]:
|
|
114
|
-
events: List[GroundedEvent] = []
|
|
115
|
-
if not LEDGER_DIR.is_dir():
|
|
116
|
-
logger.warning("ledger dir not found: %s", LEDGER_DIR)
|
|
117
|
-
return events
|
|
118
|
-
for p in sorted(LEDGER_DIR.glob("*.jsonl")):
|
|
119
|
-
try:
|
|
120
|
-
for line in p.read_text(errors="replace").splitlines():
|
|
121
|
-
line = line.strip()
|
|
122
|
-
if not line:
|
|
123
|
-
continue
|
|
124
|
-
try:
|
|
125
|
-
item = json.loads(line)
|
|
126
|
-
except json.JSONDecodeError:
|
|
127
|
-
continue
|
|
128
|
-
event = _ledger_item_to_event(item)
|
|
129
|
-
if not event:
|
|
130
|
-
continue
|
|
131
|
-
if since:
|
|
132
|
-
try:
|
|
133
|
-
evt_dt = datetime.fromisoformat(event.date.replace("Z", "+00:00"))
|
|
134
|
-
if evt_dt < since:
|
|
135
|
-
continue
|
|
136
|
-
except ValueError:
|
|
137
|
-
continue
|
|
138
|
-
events.append(event)
|
|
139
|
-
except Exception as e:
|
|
140
|
-
logger.warning("failed to read %s: %s", p, e)
|
|
141
|
-
return events
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
# ---------------------------------------------------------------------------
|
|
145
|
-
# Attestation ingestion
|
|
146
|
-
# ---------------------------------------------------------------------------
|
|
147
|
-
|
|
148
|
-
def _attestation_to_event(record: Dict[str, Any]) -> Optional[GroundedEvent]:
|
|
149
|
-
try:
|
|
150
|
-
att_id = record.get("id") or ""
|
|
151
|
-
if not att_id.startswith("att_"):
|
|
152
|
-
return None
|
|
153
|
-
bundle = record.get("bundle") or {}
|
|
154
|
-
started = bundle.get("started_at") or bundle.get("completed_at") or ""
|
|
155
|
-
if not started:
|
|
156
|
-
return None
|
|
157
|
-
kind = bundle.get("kind", "merge_attestation")
|
|
158
|
-
event_type = EventType.ATTESTATION
|
|
159
|
-
gates = (bundle.get("governance") or {}).get("gates", [])
|
|
160
|
-
gate_names = ",".join(g.get("name", "?") for g in gates if isinstance(g, dict))
|
|
161
|
-
title = f"{kind}: {bundle.get('wrapped_command', '?')[:60]} | gates: {gate_names or 'none'}"
|
|
162
|
-
evidence: List[str] = [f"attest:{att_id}"]
|
|
163
|
-
before = bundle.get("before_head")
|
|
164
|
-
after = bundle.get("after_head")
|
|
165
|
-
if before and len(before) >= 7:
|
|
166
|
-
evidence.append(f"git:{before[:12]}")
|
|
167
|
-
if after and after != before and len(after) >= 7:
|
|
168
|
-
evidence.append(f"git:{after[:12]}")
|
|
169
|
-
return GroundedEvent(
|
|
170
|
-
event_id=f"evt-att-{att_id}",
|
|
171
|
-
type=event_type,
|
|
172
|
-
date=started,
|
|
173
|
-
venture="delimit", # attestations are all delimit-venture for now
|
|
174
|
-
evidence_refs=evidence,
|
|
175
|
-
claims=[], # attestations don't produce direct claim text
|
|
176
|
-
visibility=Visibility.INTERNAL,
|
|
177
|
-
source="attestation",
|
|
178
|
-
raw={
|
|
179
|
-
"attestation_id": att_id,
|
|
180
|
-
"kind": kind,
|
|
181
|
-
"wrapped_exit": bundle.get("wrapped_exit"),
|
|
182
|
-
"signature_alg": record.get("signature_alg"),
|
|
183
|
-
},
|
|
184
|
-
)
|
|
185
|
-
except Exception as e:
|
|
186
|
-
logger.debug("skipping malformed attestation: %s", e)
|
|
187
|
-
return None
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
def _ingest_attestations(since: Optional[datetime] = None) -> List[GroundedEvent]:
|
|
191
|
-
events: List[GroundedEvent] = []
|
|
192
|
-
if not ATTESTATIONS_DIR.is_dir():
|
|
193
|
-
return events
|
|
194
|
-
for p in sorted(ATTESTATIONS_DIR.glob("att_*.json")):
|
|
195
|
-
try:
|
|
196
|
-
record = json.loads(p.read_text(errors="replace"))
|
|
197
|
-
except Exception:
|
|
198
|
-
continue
|
|
199
|
-
event = _attestation_to_event(record)
|
|
200
|
-
if not event:
|
|
201
|
-
continue
|
|
202
|
-
if since:
|
|
203
|
-
try:
|
|
204
|
-
evt_dt = datetime.fromisoformat(event.date.replace("Z", "+00:00"))
|
|
205
|
-
if evt_dt < since:
|
|
206
|
-
continue
|
|
207
|
-
except ValueError:
|
|
208
|
-
continue
|
|
209
|
-
events.append(event)
|
|
210
|
-
return events
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
# ---------------------------------------------------------------------------
|
|
214
|
-
# Git log ingestion
|
|
215
|
-
# ---------------------------------------------------------------------------
|
|
216
|
-
|
|
217
|
-
_RELEASE_TAG_RE = re.compile(r"^v\d+\.\d+\.\d+$")
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
def _ingest_git_commits(since: Optional[datetime] = None, limit: int = 200) -> List[GroundedEvent]:
|
|
221
|
-
"""Recent commits on delimit-gateway. `since` cuts by date."""
|
|
222
|
-
events: List[GroundedEvent] = []
|
|
223
|
-
if not (GATEWAY_REPO / ".git").is_dir():
|
|
224
|
-
return events
|
|
225
|
-
after_arg = []
|
|
226
|
-
if since:
|
|
227
|
-
after_arg = [f"--since={since.strftime('%Y-%m-%d')}"]
|
|
228
|
-
try:
|
|
229
|
-
result = subprocess.run(
|
|
230
|
-
[
|
|
231
|
-
"git", "-C", str(GATEWAY_REPO),
|
|
232
|
-
"log", f"--max-count={limit}",
|
|
233
|
-
"--pretty=format:%H%x00%aI%x00%s",
|
|
234
|
-
*after_arg,
|
|
235
|
-
],
|
|
236
|
-
capture_output=True, text=True, timeout=30,
|
|
237
|
-
)
|
|
238
|
-
if result.returncode != 0:
|
|
239
|
-
logger.warning("git log failed: %s", result.stderr[:200])
|
|
240
|
-
return events
|
|
241
|
-
for line in result.stdout.splitlines():
|
|
242
|
-
parts = line.split("\x00")
|
|
243
|
-
if len(parts) != 3:
|
|
244
|
-
continue
|
|
245
|
-
sha, iso_date, subject = parts
|
|
246
|
-
events.append(GroundedEvent(
|
|
247
|
-
event_id=f"evt-git-{sha[:12]}",
|
|
248
|
-
type=EventType.COMMIT,
|
|
249
|
-
date=iso_date,
|
|
250
|
-
venture="delimit",
|
|
251
|
-
evidence_refs=[f"git:{sha[:12]}"],
|
|
252
|
-
claims=[], # commit subject is NOT a claim — subjects paraphrase
|
|
253
|
-
visibility=Visibility.INTERNAL,
|
|
254
|
-
source="git-log",
|
|
255
|
-
raw={"subject": subject[:200], "sha": sha},
|
|
256
|
-
))
|
|
257
|
-
except Exception as e:
|
|
258
|
-
logger.warning("git log exception: %s", e)
|
|
259
|
-
return events
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
# ---------------------------------------------------------------------------
|
|
263
|
-
# Public API
|
|
264
|
-
# ---------------------------------------------------------------------------
|
|
265
|
-
|
|
266
|
-
def build_grounding_index(
|
|
267
|
-
venture: str = "delimit",
|
|
268
|
-
days: int = 30,
|
|
269
|
-
whitelist: Optional[frozenset] = None,
|
|
270
|
-
) -> GroundingIndex:
|
|
271
|
-
"""Build a fresh grounding index over the last `days`.
|
|
272
|
-
|
|
273
|
-
Week 1: ingest + normalize + validate. No publishing, no generation.
|
|
274
|
-
"""
|
|
275
|
-
since = datetime.now(timezone.utc) - timedelta(days=days)
|
|
276
|
-
events: List[GroundedEvent] = []
|
|
277
|
-
events.extend(_ingest_ledger(since=since))
|
|
278
|
-
events.extend(_ingest_attestations(since=since))
|
|
279
|
-
events.extend(_ingest_git_commits(since=since))
|
|
280
|
-
|
|
281
|
-
# Filter to the requested venture. Attestations + git commits are
|
|
282
|
-
# `delimit`-venture by construction; ledger items carry their own.
|
|
283
|
-
events = [e for e in events if e.venture == venture]
|
|
284
|
-
|
|
285
|
-
index = GroundingIndex(
|
|
286
|
-
venture=venture,
|
|
287
|
-
built_at=datetime.now(timezone.utc).isoformat(),
|
|
288
|
-
events=sorted(events, key=lambda e: e.date, reverse=True),
|
|
289
|
-
)
|
|
290
|
-
# Validation is best-effort at build time — errors get logged but
|
|
291
|
-
# do not block index construction. Caller can call `validate_claims`
|
|
292
|
-
# for a strict pass.
|
|
293
|
-
errs = index.validate(whitelist=whitelist)
|
|
294
|
-
if errs:
|
|
295
|
-
logger.info(
|
|
296
|
-
"build_grounding_index: %d validation warnings (first 5): %s",
|
|
297
|
-
len(errs), errs[:5],
|
|
298
|
-
)
|
|
299
|
-
return index
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
def load_grounded_events(
|
|
303
|
-
venture: str = "delimit",
|
|
304
|
-
days: int = 30,
|
|
305
|
-
visibility: Optional[Visibility] = None,
|
|
306
|
-
event_type: Optional[EventType] = None,
|
|
307
|
-
whitelist: Optional[frozenset] = None,
|
|
308
|
-
) -> List[GroundedEvent]:
|
|
309
|
-
"""Filtered view. Generators use this — not `build_grounding_index`."""
|
|
310
|
-
idx = build_grounding_index(venture=venture, days=days, whitelist=whitelist)
|
|
311
|
-
events = idx.events
|
|
312
|
-
if visibility is not None:
|
|
313
|
-
events = [e for e in events if e.visibility == visibility]
|
|
314
|
-
if event_type is not None:
|
|
315
|
-
events = [e for e in events if e.type == event_type]
|
|
316
|
-
return events
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
def validate_claims(
|
|
320
|
-
claims: List[Claim],
|
|
321
|
-
whitelist: Optional[frozenset] = None,
|
|
322
|
-
) -> List[Dict[str, Any]]:
|
|
323
|
-
"""Strict per-claim validation. Returns a list of {claim_id, errors}.
|
|
324
|
-
|
|
325
|
-
Used as the gate in front of any generator output (A9). Callers
|
|
326
|
-
MUST fail-closed on any non-empty errors.
|
|
327
|
-
"""
|
|
328
|
-
out: List[Dict[str, Any]] = []
|
|
329
|
-
wl = whitelist or frozenset()
|
|
330
|
-
for claim in claims:
|
|
331
|
-
errs = claim.validate(whitelist=wl)
|
|
332
|
-
out.append({"claim_id": claim.claim_id, "errors": errs, "valid": not errs})
|
|
333
|
-
return out
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
def persist_grounding_index(index: GroundingIndex, out_dir: Path = GROUNDING_OUT) -> Path:
|
|
337
|
-
"""Write the index as events.jsonl for consumption. Week 1 artifact."""
|
|
338
|
-
out_dir.mkdir(parents=True, exist_ok=True)
|
|
339
|
-
events_path = out_dir / f"events-{index.venture}.jsonl"
|
|
340
|
-
with open(events_path, "w") as f:
|
|
341
|
-
for event in index.events:
|
|
342
|
-
f.write(json.dumps(event.to_dict()) + "\n")
|
|
343
|
-
meta = {
|
|
344
|
-
"venture": index.venture,
|
|
345
|
-
"built_at": index.built_at,
|
|
346
|
-
"event_count": len(index.events),
|
|
347
|
-
"canon_version": index.canon_version,
|
|
348
|
-
}
|
|
349
|
-
(out_dir / f"meta-{index.venture}.json").write_text(json.dumps(meta, indent=2))
|
|
350
|
-
return events_path
|
|
@@ -1,280 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Week 2 consumer API for the grounding layer (LED-1084).
|
|
3
|
-
|
|
4
|
-
Exposes the gate generators (social drafter, blog pipeline, storyline) must
|
|
5
|
-
call before emitting text. Amendments A6+A9+A10 from the adversarial rebuttal
|
|
6
|
-
(/home/delimit/delimit-private/strategy/CONTENT_GROUNDING_REBUTTAL_2026_04.md):
|
|
7
|
-
|
|
8
|
-
A6. Hard bans on unresolved claim categories (comparative, adoption,
|
|
9
|
-
customer, roadmap) apply here at the gate — not just at schema time.
|
|
10
|
-
A9. Deterministic extraction gate: extract candidate claims from output →
|
|
11
|
-
classify → map to allowed_claim_ids OR approved inference rule →
|
|
12
|
-
reject on any unmatched/uncertain claim.
|
|
13
|
-
A10. One-strike kill-switch semantics: callers that detect a slippage
|
|
14
|
-
MUST revert ALL generators to manual-only mode.
|
|
15
|
-
|
|
16
|
-
Week 2 scope:
|
|
17
|
-
- fetch_grounding_bundle(venture, days) → GroundingBundle for a window
|
|
18
|
-
- build_allowed_claim_set(bundle) → frozenset of safe-to-use texts
|
|
19
|
-
- load_feature_whitelist() → shipped-feature list
|
|
20
|
-
- unreleased_feature_detector(text) → True if text claims a feature
|
|
21
|
-
that is NOT in the whitelist
|
|
22
|
-
- score_draft_grounding(text, bundle) → 0.0-1.0 grounding score
|
|
23
|
-
(simple coverage heuristic for
|
|
24
|
-
v1; Week 3 upgrades to
|
|
25
|
-
classifier-based)
|
|
26
|
-
|
|
27
|
-
Not in scope (Week 3+):
|
|
28
|
-
- Paraphrase classifier (we reject non-verbatim for now)
|
|
29
|
-
- Implication detector (hard-ban suffices for now)
|
|
30
|
-
- Comparative claim classifier (ban + whitelist-only for now)
|
|
31
|
-
"""
|
|
32
|
-
from __future__ import annotations
|
|
33
|
-
|
|
34
|
-
import json
|
|
35
|
-
import logging
|
|
36
|
-
import os
|
|
37
|
-
import re
|
|
38
|
-
from dataclasses import dataclass, field
|
|
39
|
-
from pathlib import Path
|
|
40
|
-
from typing import Any, Dict, List, Optional
|
|
41
|
-
|
|
42
|
-
from .schemas import Claim, ClaimType, GroundedEvent, Visibility
|
|
43
|
-
from .build import build_grounding_index
|
|
44
|
-
|
|
45
|
-
logger = logging.getLogger("delimit.ai.content_grounding.consume")
|
|
46
|
-
|
|
47
|
-
FEATURES_FILE = Path(os.environ.get(
|
|
48
|
-
"DELIMIT_GROUNDING_FEATURES",
|
|
49
|
-
str(Path.home() / ".delimit" / "content" / "grounding" / "features.json"),
|
|
50
|
-
))
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
# ---------------------------------------------------------------------------
|
|
54
|
-
# GroundingBundle — what generators receive
|
|
55
|
-
# ---------------------------------------------------------------------------
|
|
56
|
-
|
|
57
|
-
@dataclass
|
|
58
|
-
class GroundingBundle:
|
|
59
|
-
"""A time-windowed snapshot generators use to ground their output.
|
|
60
|
-
|
|
61
|
-
Generators MUST NOT emit claims that don't appear (verbatim) in
|
|
62
|
-
`allowed_claim_texts` or that mention features not in `features`.
|
|
63
|
-
"""
|
|
64
|
-
venture: str
|
|
65
|
-
built_at: str
|
|
66
|
-
window_days: int
|
|
67
|
-
events: List[GroundedEvent] = field(default_factory=list)
|
|
68
|
-
allowed_claim_texts: frozenset = field(default_factory=frozenset)
|
|
69
|
-
features: frozenset = field(default_factory=frozenset)
|
|
70
|
-
|
|
71
|
-
def public_events(self) -> List[GroundedEvent]:
|
|
72
|
-
return [e for e in self.events if e.visibility == Visibility.PUBLIC]
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
# ---------------------------------------------------------------------------
|
|
76
|
-
# Bundle construction
|
|
77
|
-
# ---------------------------------------------------------------------------
|
|
78
|
-
|
|
79
|
-
def load_feature_whitelist() -> frozenset:
|
|
80
|
-
"""Load the shipped-feature whitelist from features.json.
|
|
81
|
-
|
|
82
|
-
If the file is missing, return an empty frozenset. Generators MUST
|
|
83
|
-
fail-closed on empty whitelist — i.e., refuse to name any feature —
|
|
84
|
-
rather than fall through to prompt-level trust.
|
|
85
|
-
"""
|
|
86
|
-
if not FEATURES_FILE.exists():
|
|
87
|
-
logger.info(
|
|
88
|
-
"feature whitelist not found at %s — generators will fail-closed on feature claims",
|
|
89
|
-
FEATURES_FILE,
|
|
90
|
-
)
|
|
91
|
-
return frozenset()
|
|
92
|
-
try:
|
|
93
|
-
data = json.loads(FEATURES_FILE.read_text())
|
|
94
|
-
feats = data.get("features") if isinstance(data, dict) else data
|
|
95
|
-
if isinstance(feats, list):
|
|
96
|
-
return frozenset(str(f).strip() for f in feats if f)
|
|
97
|
-
except Exception as e:
|
|
98
|
-
logger.warning("feature whitelist load failed: %s", e)
|
|
99
|
-
return frozenset()
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
def build_allowed_claim_set(events: List[GroundedEvent]) -> frozenset:
|
|
103
|
-
"""Extract the set of exact claim texts a generator may use verbatim.
|
|
104
|
-
|
|
105
|
-
Returns a frozenset so downstream code can do O(1) membership checks.
|
|
106
|
-
"""
|
|
107
|
-
texts: List[str] = []
|
|
108
|
-
for ev in events:
|
|
109
|
-
for claim in ev.claims:
|
|
110
|
-
if claim.visibility == Visibility.PUBLIC and not claim.validate():
|
|
111
|
-
texts.append(claim.text.strip())
|
|
112
|
-
return frozenset(t for t in texts if t)
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
def fetch_grounding_bundle(
|
|
116
|
-
venture: str = "delimit",
|
|
117
|
-
days: int = 7,
|
|
118
|
-
include_internal: bool = False,
|
|
119
|
-
) -> GroundingBundle:
|
|
120
|
-
"""Primary entrypoint generators call.
|
|
121
|
-
|
|
122
|
-
Week 2 default: 7-day window, public-only events. Tighter than the
|
|
123
|
-
30-day grounding index so generators see recent-and-relevant, not
|
|
124
|
-
the full history.
|
|
125
|
-
"""
|
|
126
|
-
idx = build_grounding_index(venture=venture, days=days)
|
|
127
|
-
events = idx.events if include_internal else idx.public_events()
|
|
128
|
-
return GroundingBundle(
|
|
129
|
-
venture=venture,
|
|
130
|
-
built_at=idx.built_at,
|
|
131
|
-
window_days=days,
|
|
132
|
-
events=list(events),
|
|
133
|
-
allowed_claim_texts=build_allowed_claim_set(events),
|
|
134
|
-
features=load_feature_whitelist(),
|
|
135
|
-
)
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
# ---------------------------------------------------------------------------
|
|
139
|
-
# Gate functions (generators call these before emitting)
|
|
140
|
-
# ---------------------------------------------------------------------------
|
|
141
|
-
|
|
142
|
-
# Very common English verbs that often anchor feature claims in social copy.
|
|
143
|
-
# Used as a lightweight trigger for the unreleased-feature scan. Not a full
|
|
144
|
-
# NLP pipeline — just enough to flag "delimit does X" / "the CLI does Y"
|
|
145
|
-
# constructions for review.
|
|
146
|
-
_FEATURE_CLAIM_TRIGGERS = [
|
|
147
|
-
r"\bdelimit\s+\w+s\b", # "delimit detects", "delimit signs"
|
|
148
|
-
r"\b(?:our|the)\s+(?:cli|action|mcp|server)\s+\w+s\b",
|
|
149
|
-
r"\b(?:we|delimit)\s+(?:built|ship|shipped|support|supports|have|has)\b",
|
|
150
|
-
r"\b(?:new|latest)\s+feature\b",
|
|
151
|
-
r"\b(?:supports|offers|provides)\s+[a-z]",
|
|
152
|
-
]
|
|
153
|
-
|
|
154
|
-
_TRIGGER_RE = re.compile("|".join(_FEATURE_CLAIM_TRIGGERS), re.IGNORECASE)
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
def unreleased_feature_detector(
|
|
158
|
-
text: str,
|
|
159
|
-
features: Optional[frozenset] = None,
|
|
160
|
-
) -> Dict[str, Any]:
|
|
161
|
-
"""Scan generated text for feature claims NOT in the shipped whitelist.
|
|
162
|
-
|
|
163
|
-
Returns a dict with:
|
|
164
|
-
- status: "clean" | "flagged"
|
|
165
|
-
- triggers: list of regex matches indicating claim-like language
|
|
166
|
-
- unknown_features_mentioned: list of feature-like substrings that
|
|
167
|
-
look specific enough to be a claim but don't match any entry in
|
|
168
|
-
`features`.
|
|
169
|
-
|
|
170
|
-
Generators MUST fail-closed on status == "flagged" when the whitelist
|
|
171
|
-
is loaded (empty whitelist → fail-closed by default).
|
|
172
|
-
"""
|
|
173
|
-
feats = features if features is not None else load_feature_whitelist()
|
|
174
|
-
triggers = _TRIGGER_RE.findall(text or "")
|
|
175
|
-
|
|
176
|
-
# Word-level scan for "delimit-sounding" specific feature names.
|
|
177
|
-
# A "specific feature claim" looks like an identifier or a compound
|
|
178
|
-
# noun that doesn't appear in the features list and is adjacent to a
|
|
179
|
-
# trigger phrase.
|
|
180
|
-
unknown_specifics: List[str] = []
|
|
181
|
-
if triggers:
|
|
182
|
-
# Pull noun-like tokens near triggers. Low recall, intentionally
|
|
183
|
-
# conservative. Week 3 upgrade: proper NER.
|
|
184
|
-
sentences = re.split(r"[.!?]\s+", text or "")
|
|
185
|
-
for sentence in sentences:
|
|
186
|
-
if not _TRIGGER_RE.search(sentence):
|
|
187
|
-
continue
|
|
188
|
-
for token in re.findall(r"\b([a-z][a-z0-9-]{3,})\b", sentence.lower()):
|
|
189
|
-
# Only flag tokens that look domain-specific (mixed case markers,
|
|
190
|
-
# hyphens, or longer-than-common-English). For v1 we skip this
|
|
191
|
-
# lookup entirely and rely on the whitelist match at caller.
|
|
192
|
-
if token in feats:
|
|
193
|
-
continue
|
|
194
|
-
# Don't spam: only flag once per token per text.
|
|
195
|
-
if token not in unknown_specifics and len(token) > 8 and "-" in token: # nosec B-secret-detection: `token` here is a Python variable holding one word from the text being grounded, not a credential
|
|
196
|
-
unknown_specifics.append(token)
|
|
197
|
-
|
|
198
|
-
# If we have no whitelist and triggers fired, flag regardless — the
|
|
199
|
-
# generator has no basis to claim any feature.
|
|
200
|
-
if triggers and not feats:
|
|
201
|
-
return {
|
|
202
|
-
"status": "flagged",
|
|
203
|
-
"reason": "feature-claim triggers present but feature whitelist is empty",
|
|
204
|
-
"triggers": triggers[:5],
|
|
205
|
-
"unknown_features_mentioned": [],
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
if triggers and unknown_specifics:
|
|
209
|
-
return {
|
|
210
|
-
"status": "flagged",
|
|
211
|
-
"reason": f"{len(unknown_specifics)} unknown-feature-looking tokens near claim triggers",
|
|
212
|
-
"triggers": triggers[:5],
|
|
213
|
-
"unknown_features_mentioned": unknown_specifics[:10],
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
return {
|
|
217
|
-
"status": "clean",
|
|
218
|
-
"triggers": triggers[:5],
|
|
219
|
-
"unknown_features_mentioned": [],
|
|
220
|
-
}
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
def score_draft_grounding(
|
|
224
|
-
text: str,
|
|
225
|
-
bundle: GroundingBundle,
|
|
226
|
-
threshold: float = 0.85,
|
|
227
|
-
) -> Dict[str, Any]:
|
|
228
|
-
"""Score how much of a draft is covered by the allowed-claim set.
|
|
229
|
-
|
|
230
|
-
v1 algorithm (simple):
|
|
231
|
-
- Score = proportion of sentences in `text` that either (a) contain
|
|
232
|
-
at least one verbatim allowed-claim text, or (b) contain no
|
|
233
|
-
specific feature claim at all (safe conversational filler).
|
|
234
|
-
- If sentence contains a trigger AND no allowed-claim match,
|
|
235
|
-
the sentence counts as UNGROUNDED.
|
|
236
|
-
|
|
237
|
-
Returns:
|
|
238
|
-
{
|
|
239
|
-
"score": 0.0-1.0,
|
|
240
|
-
"threshold": threshold,
|
|
241
|
-
"passed": bool,
|
|
242
|
-
"sentence_count": int,
|
|
243
|
-
"ungrounded_sentences": List[str] (first 3 for debug)
|
|
244
|
-
}
|
|
245
|
-
|
|
246
|
-
Week 3 upgrade: proper claim classifier + paraphrase detection.
|
|
247
|
-
"""
|
|
248
|
-
if not text or not text.strip():
|
|
249
|
-
return {"score": 1.0, "threshold": threshold, "passed": True, "sentence_count": 0, "ungrounded_sentences": []}
|
|
250
|
-
|
|
251
|
-
sentences = [s.strip() for s in re.split(r"(?<=[.!?])\s+", text) if s.strip()]
|
|
252
|
-
if not sentences:
|
|
253
|
-
return {"score": 1.0, "threshold": threshold, "passed": True, "sentence_count": 0, "ungrounded_sentences": []}
|
|
254
|
-
|
|
255
|
-
ungrounded: List[str] = []
|
|
256
|
-
grounded_count = 0
|
|
257
|
-
allowed = bundle.allowed_claim_texts
|
|
258
|
-
|
|
259
|
-
for sent in sentences:
|
|
260
|
-
has_trigger = bool(_TRIGGER_RE.search(sent))
|
|
261
|
-
if not has_trigger:
|
|
262
|
-
# No specific feature claim being made → safe.
|
|
263
|
-
grounded_count += 1
|
|
264
|
-
continue
|
|
265
|
-
# Claim-like sentence: require a verbatim match in allowed set.
|
|
266
|
-
matched = any(allowed_text in sent for allowed_text in allowed)
|
|
267
|
-
if matched:
|
|
268
|
-
grounded_count += 1
|
|
269
|
-
else:
|
|
270
|
-
ungrounded.append(sent[:120])
|
|
271
|
-
|
|
272
|
-
score = grounded_count / len(sentences)
|
|
273
|
-
return {
|
|
274
|
-
"score": round(score, 3),
|
|
275
|
-
"threshold": threshold,
|
|
276
|
-
"passed": score >= threshold,
|
|
277
|
-
"sentence_count": len(sentences),
|
|
278
|
-
"grounded_count": grounded_count,
|
|
279
|
-
"ungrounded_sentences": ungrounded[:3],
|
|
280
|
-
}
|