delimit-cli 4.3.3 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +47 -0
- package/README.md +25 -18
- package/adapters/codex-security.js +64 -0
- package/adapters/codex-skill.js +78 -0
- package/adapters/cursor-rules.js +73 -0
- package/bin/delimit-cli.js +4 -4
- package/bin/delimit-setup.js +23 -0
- package/gateway/ai/backends/governance_bridge.py +168 -2
- package/gateway/ai/backends/tools_design.py +563 -83
- package/gateway/ai/backends/tools_infra.py +11 -4
- package/gateway/ai/backends/tools_real.py +3 -1
- package/gateway/ai/content_grounding/__init__.py +98 -0
- package/gateway/ai/content_grounding/build.py +350 -0
- package/gateway/ai/content_grounding/consume.py +280 -0
- package/gateway/ai/content_grounding/features.py +218 -0
- package/gateway/ai/content_grounding/fixtures/fail/01_missing_evidence.json +9 -0
- package/gateway/ai/content_grounding/fixtures/fail/02_unknown_evidence_prefix.json +9 -0
- package/gateway/ai/content_grounding/fixtures/fail/03_banned_comparative.json +17 -0
- package/gateway/ai/content_grounding/fixtures/fail/04_banned_adoption.json +17 -0
- package/gateway/ai/content_grounding/fixtures/fail/05_aggregate_no_numeric.json +17 -0
- package/gateway/ai/content_grounding/fixtures/fail/06_unversioned_inference_rule.json +18 -0
- package/gateway/ai/content_grounding/fixtures/pass/01_feature_shipped.json +18 -0
- package/gateway/ai/content_grounding/fixtures/pass/02_aggregate_claim.json +23 -0
- package/gateway/ai/content_grounding/fixtures/pass/03_attestation.json +16 -0
- package/gateway/ai/content_grounding/schemas/claim.schema.json +40 -0
- package/gateway/ai/content_grounding/schemas/event.schema.json +23 -0
- package/gateway/ai/content_grounding/schemas.py +276 -0
- package/gateway/ai/content_grounding/telemetry.py +221 -0
- package/gateway/ai/governance.py +89 -0
- package/gateway/ai/hot_reload.py +148 -7
- package/gateway/ai/ledger_manager.py +9 -2
- package/gateway/ai/license_core.py +3 -1
- package/gateway/ai/mcp_bridge.py +1 -1
- package/gateway/ai/reddit_proxy.py +8 -6
- package/gateway/ai/server.py +27 -0
- package/gateway/ai/supabase_sync.py +47 -7
- package/gateway/ai/swarm.py +1 -1
- package/gateway/ai/workers/executor.py +1 -1
- package/gateway/core/zero_spec/express_extractor.py +1 -1
- package/lib/agent.js +3 -3
- package/lib/cross-model-hooks.js +1 -1
- package/lib/delimit-template.js +5 -0
- package/lib/wrap-engine.js +19 -1
- package/package.json +1 -1
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://delimit.ai/schemas/grounding/claim.schema.json",
|
|
4
|
+
"title": "Claim",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"required": ["claim_id", "type", "text", "evidence_refs", "visibility"],
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"properties": {
|
|
9
|
+
"claim_id": { "type": "string", "pattern": "^CLM-.+" },
|
|
10
|
+
"type": { "enum": ["feature", "capability", "incident", "comparative", "adoption", "customer", "aggregate", "roadmap", "process"] },
|
|
11
|
+
"text": { "type": "string", "minLength": 1 },
|
|
12
|
+
"evidence_refs": {
|
|
13
|
+
"type": "array",
|
|
14
|
+
"minItems": 1,
|
|
15
|
+
"items": { "type": "string", "pattern": "^(LED-|sha256:|git:|attest:|url:).+" }
|
|
16
|
+
},
|
|
17
|
+
"visibility": { "enum": ["public", "internal", "private"] },
|
|
18
|
+
"inference_rule": { "type": ["string", "null"] },
|
|
19
|
+
"inference_rule_version": { "type": ["string", "null"] },
|
|
20
|
+
"numeric_evidence": {
|
|
21
|
+
"type": ["object", "null"],
|
|
22
|
+
"properties": {
|
|
23
|
+
"value": { "type": ["number", "string"] },
|
|
24
|
+
"unit": { "type": ["string", "null"] },
|
|
25
|
+
"commit_sha": { "type": "string" }
|
|
26
|
+
},
|
|
27
|
+
"required": ["value", "commit_sha"]
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
"allOf": [
|
|
31
|
+
{
|
|
32
|
+
"if": { "properties": { "type": { "const": "aggregate" } } },
|
|
33
|
+
"then": { "required": ["numeric_evidence"] }
|
|
34
|
+
},
|
|
35
|
+
{
|
|
36
|
+
"if": { "required": ["inference_rule"], "properties": { "inference_rule": { "type": "string" } } },
|
|
37
|
+
"then": { "required": ["inference_rule_version"] }
|
|
38
|
+
}
|
|
39
|
+
]
|
|
40
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
3
|
+
"$id": "https://delimit.ai/schemas/grounding/event.schema.json",
|
|
4
|
+
"title": "GroundedEvent",
|
|
5
|
+
"type": "object",
|
|
6
|
+
"required": ["event_id", "type", "date", "venture", "evidence_refs", "visibility"],
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"properties": {
|
|
9
|
+
"event_id": { "type": "string", "pattern": "^evt-[a-z]+-.+" },
|
|
10
|
+
"type": { "enum": ["feature_shipped", "release", "incident", "incident_resolved", "decision", "outreach_event", "attestation", "commit"] },
|
|
11
|
+
"date": { "type": "string", "format": "date-time" },
|
|
12
|
+
"venture": { "enum": ["delimit", "domainvested", "wirereport", "livetube"] },
|
|
13
|
+
"evidence_refs": {
|
|
14
|
+
"type": "array",
|
|
15
|
+
"minItems": 1,
|
|
16
|
+
"items": { "type": "string", "pattern": "^(LED-|sha256:|git:|attest:|url:).+" }
|
|
17
|
+
},
|
|
18
|
+
"claims": { "type": "array", "items": { "$ref": "claim.schema.json" } },
|
|
19
|
+
"visibility": { "enum": ["public", "internal", "private"] },
|
|
20
|
+
"source": { "type": ["string", "null"] },
|
|
21
|
+
"raw": { "type": ["object", "null"] }
|
|
22
|
+
}
|
|
23
|
+
}
|
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Typed schemas for the content grounding layer (LED-1084 Week 1, A5 + A6).
|
|
3
|
+
|
|
4
|
+
Design constraints (from 2026-04-24 adversarial rebuttal):
|
|
5
|
+
|
|
6
|
+
- Atomic typed claims (not prose arrays). Each claim names its type,
|
|
7
|
+
carries evidence_refs, has a visibility flag, and optionally names a
|
|
8
|
+
versioned inference rule.
|
|
9
|
+
- Hard bans through Week 2: comparative, adoption, customer, and
|
|
10
|
+
roadmap claim types fail-closed unless the exact text is whitelisted.
|
|
11
|
+
Aggregate claims require a structured numeric_evidence field linked
|
|
12
|
+
to a commit SHA.
|
|
13
|
+
- Evidence refs are strings in a documented namespace:
|
|
14
|
+
LED-<id> → ledger item
|
|
15
|
+
sha256:<hex> → content hash
|
|
16
|
+
git:<sha> → commit SHA on delimit-gateway
|
|
17
|
+
attest:<id> → attestation bundle ID (att_…)
|
|
18
|
+
url:<https…> → public URL (for outreach events)
|
|
19
|
+
Unknown prefixes fail-closed.
|
|
20
|
+
|
|
21
|
+
Back-compat: consumers should import from `ai.content_grounding`
|
|
22
|
+
(re-export) rather than this internal module directly.
|
|
23
|
+
"""
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
from dataclasses import dataclass, field, asdict
|
|
27
|
+
from datetime import datetime
|
|
28
|
+
from enum import Enum
|
|
29
|
+
from typing import Any, Dict, List, Optional, Union
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class ClaimType(str, Enum):
|
|
33
|
+
"""Atomic claim categories. New types require a schema+validator update."""
|
|
34
|
+
FEATURE = "feature" # "Delimit wrap emits a signed attestation"
|
|
35
|
+
CAPABILITY = "capability" # "The scanner detects 27 breaking-change types"
|
|
36
|
+
INCIDENT = "incident" # "LED-1076 caused 4 false positives"
|
|
37
|
+
COMPARATIVE = "comparative" # "Faster than openapi-diff" — BANNED unless whitelisted
|
|
38
|
+
ADOPTION = "adoption" # "Used by Harbor" — BANNED unless whitelisted
|
|
39
|
+
CUSTOMER = "customer" # "X paid Y for Z" — BANNED unless whitelisted
|
|
40
|
+
AGGREGATE = "aggregate" # "134 tests passing" — requires numeric_evidence
|
|
41
|
+
ROADMAP = "roadmap" # "We're building X next" — BANNED outright
|
|
42
|
+
PROCESS = "process" # "Advisory by default, flip to enforcing"
|
|
43
|
+
|
|
44
|
+
# Claim types that fail-closed during Week 1/2 hardening unless the exact
|
|
45
|
+
# `text` value is present in the canonical phrase whitelist (A6).
|
|
46
|
+
HARD_BANNED_CLAIM_TYPES: frozenset = frozenset({
|
|
47
|
+
ClaimType.COMPARATIVE,
|
|
48
|
+
ClaimType.ADOPTION,
|
|
49
|
+
ClaimType.CUSTOMER,
|
|
50
|
+
ClaimType.ROADMAP,
|
|
51
|
+
})
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
class Visibility(str, Enum):
|
|
55
|
+
"""Allowlist, not denylist (per consensus privacy model)."""
|
|
56
|
+
PUBLIC = "public" # safe to surface on delimit.ai / social / storyline
|
|
57
|
+
INTERNAL = "internal" # founder + ops only
|
|
58
|
+
PRIVATE = "private" # customer data, never leaves local ledger
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class EventType(str, Enum):
|
|
62
|
+
"""Normalized event categories ingested from ledger/attestations/git."""
|
|
63
|
+
FEATURE_SHIPPED = "feature_shipped"
|
|
64
|
+
RELEASE = "release"
|
|
65
|
+
INCIDENT = "incident"
|
|
66
|
+
INCIDENT_RESOLVED = "incident_resolved"
|
|
67
|
+
DECISION = "decision"
|
|
68
|
+
OUTREACH_EVENT = "outreach_event"
|
|
69
|
+
ATTESTATION = "attestation"
|
|
70
|
+
COMMIT = "commit"
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# ---------------------------------------------------------------------------
|
|
74
|
+
# EvidenceRef
|
|
75
|
+
# ---------------------------------------------------------------------------
|
|
76
|
+
|
|
77
|
+
_VALID_EVIDENCE_PREFIXES: tuple = ("LED-", "sha256:", "git:", "attest:", "url:")
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@dataclass
|
|
81
|
+
class EvidenceRef:
|
|
82
|
+
"""A single evidence pointer. Deterministic namespace."""
|
|
83
|
+
ref: str
|
|
84
|
+
|
|
85
|
+
def validate(self) -> List[str]:
|
|
86
|
+
errs: List[str] = []
|
|
87
|
+
if not self.ref:
|
|
88
|
+
errs.append("evidence_ref: empty string")
|
|
89
|
+
return errs
|
|
90
|
+
if not any(self.ref.startswith(p) for p in _VALID_EVIDENCE_PREFIXES):
|
|
91
|
+
errs.append(
|
|
92
|
+
f"evidence_ref: unknown prefix '{self.ref}'. "
|
|
93
|
+
f"Must start with one of: {_VALID_EVIDENCE_PREFIXES}"
|
|
94
|
+
)
|
|
95
|
+
return errs
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
# ---------------------------------------------------------------------------
|
|
99
|
+
# Claim
|
|
100
|
+
# ---------------------------------------------------------------------------
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class Claim:
|
|
104
|
+
"""A typed atomic assertion with evidence.
|
|
105
|
+
|
|
106
|
+
Per A5, every claim is an object. Generators may only mention facts
|
|
107
|
+
whose corresponding Claim has:
|
|
108
|
+
- type in an allowed set for the target surface
|
|
109
|
+
- non-empty evidence_refs
|
|
110
|
+
- visibility compatible with the surface (public for outreach)
|
|
111
|
+
- optional inference_rule (versioned) if derived rather than direct
|
|
112
|
+
|
|
113
|
+
`text` is the exact phrasing allowed. Paraphrases are NOT permitted
|
|
114
|
+
during Week 1/2 — caller must either reuse `text` verbatim or fail.
|
|
115
|
+
"""
|
|
116
|
+
claim_id: str
|
|
117
|
+
type: ClaimType
|
|
118
|
+
text: str
|
|
119
|
+
evidence_refs: List[str] = field(default_factory=list)
|
|
120
|
+
visibility: Visibility = Visibility.INTERNAL
|
|
121
|
+
inference_rule: Optional[str] = None
|
|
122
|
+
inference_rule_version: Optional[str] = None
|
|
123
|
+
numeric_evidence: Optional[Dict[str, Any]] = None # for AGGREGATE claims
|
|
124
|
+
|
|
125
|
+
def validate(self, whitelist: Optional[frozenset] = None) -> List[str]:
|
|
126
|
+
"""Return list of validation errors. Empty = valid."""
|
|
127
|
+
errs: List[str] = []
|
|
128
|
+
if not self.claim_id or not self.claim_id.startswith("CLM-"):
|
|
129
|
+
errs.append(f"claim_id must start with 'CLM-', got '{self.claim_id}'")
|
|
130
|
+
if not self.text or not self.text.strip():
|
|
131
|
+
errs.append("text: empty")
|
|
132
|
+
if not self.evidence_refs:
|
|
133
|
+
errs.append("evidence_refs: at least one required")
|
|
134
|
+
else:
|
|
135
|
+
for ref in self.evidence_refs:
|
|
136
|
+
errs.extend(EvidenceRef(ref).validate())
|
|
137
|
+
|
|
138
|
+
# A6: hard-banned claim types require whitelist match on exact text
|
|
139
|
+
if self.type in HARD_BANNED_CLAIM_TYPES:
|
|
140
|
+
allowed = whitelist or frozenset()
|
|
141
|
+
if self.text not in allowed:
|
|
142
|
+
errs.append(
|
|
143
|
+
f"claim_type '{self.type.value}' is HARD-BANNED in "
|
|
144
|
+
f"Week 1/2 unless exact text is whitelisted. "
|
|
145
|
+
f"text='{self.text[:80]}...' not in whitelist (size={len(allowed)})"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
# AGGREGATE claims need structured numeric evidence
|
|
149
|
+
if self.type == ClaimType.AGGREGATE:
|
|
150
|
+
if not self.numeric_evidence:
|
|
151
|
+
errs.append(
|
|
152
|
+
"aggregate claim requires numeric_evidence dict with "
|
|
153
|
+
"{value, unit, commit_sha} at minimum"
|
|
154
|
+
)
|
|
155
|
+
else:
|
|
156
|
+
for k in ("value", "commit_sha"):
|
|
157
|
+
if k not in self.numeric_evidence:
|
|
158
|
+
errs.append(f"numeric_evidence missing required key: '{k}'")
|
|
159
|
+
|
|
160
|
+
# Inference rules must be versioned
|
|
161
|
+
if self.inference_rule and not self.inference_rule_version:
|
|
162
|
+
errs.append(
|
|
163
|
+
"inference_rule requires inference_rule_version "
|
|
164
|
+
"(deterministic, versioned, testable)"
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
return errs
|
|
168
|
+
|
|
169
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
170
|
+
d = asdict(self)
|
|
171
|
+
d["type"] = self.type.value
|
|
172
|
+
d["visibility"] = self.visibility.value
|
|
173
|
+
return d
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# ---------------------------------------------------------------------------
|
|
177
|
+
# GroundedEvent
|
|
178
|
+
# ---------------------------------------------------------------------------
|
|
179
|
+
|
|
180
|
+
@dataclass
|
|
181
|
+
class GroundedEvent:
|
|
182
|
+
"""One normalized event in the grounding layer.
|
|
183
|
+
|
|
184
|
+
Produced by ingesting ledger + attestations + git log. An event
|
|
185
|
+
carries its own evidence chain and a list of Claims generators
|
|
186
|
+
may reuse verbatim.
|
|
187
|
+
"""
|
|
188
|
+
event_id: str
|
|
189
|
+
type: EventType
|
|
190
|
+
date: str # ISO-8601
|
|
191
|
+
venture: str # delimit / domainvested / wirereport / livetube
|
|
192
|
+
evidence_refs: List[str] = field(default_factory=list)
|
|
193
|
+
claims: List[Claim] = field(default_factory=list)
|
|
194
|
+
visibility: Visibility = Visibility.INTERNAL
|
|
195
|
+
source: Optional[str] = None # where the event was ingested from
|
|
196
|
+
raw: Optional[Dict[str, Any]] = None # original record for audit
|
|
197
|
+
|
|
198
|
+
def validate(self, whitelist: Optional[frozenset] = None) -> List[str]:
|
|
199
|
+
errs: List[str] = []
|
|
200
|
+
if not self.event_id:
|
|
201
|
+
errs.append("event_id: empty")
|
|
202
|
+
if not self.date:
|
|
203
|
+
errs.append("date: empty")
|
|
204
|
+
else:
|
|
205
|
+
try:
|
|
206
|
+
datetime.fromisoformat(self.date.replace("Z", "+00:00"))
|
|
207
|
+
except ValueError:
|
|
208
|
+
errs.append(f"date: not ISO-8601: {self.date}")
|
|
209
|
+
if not self.venture:
|
|
210
|
+
errs.append("venture: empty")
|
|
211
|
+
if self.venture not in {"delimit", "domainvested", "wirereport", "livetube"}:
|
|
212
|
+
errs.append(f"venture: unknown '{self.venture}'")
|
|
213
|
+
if not self.evidence_refs:
|
|
214
|
+
errs.append("evidence_refs: at least one required")
|
|
215
|
+
else:
|
|
216
|
+
for ref in self.evidence_refs:
|
|
217
|
+
errs.extend(EvidenceRef(ref).validate())
|
|
218
|
+
for claim in self.claims:
|
|
219
|
+
claim_errs = claim.validate(whitelist=whitelist)
|
|
220
|
+
errs.extend(f"claim {claim.claim_id}: {e}" for e in claim_errs)
|
|
221
|
+
return errs
|
|
222
|
+
|
|
223
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
224
|
+
return {
|
|
225
|
+
"event_id": self.event_id,
|
|
226
|
+
"type": self.type.value,
|
|
227
|
+
"date": self.date,
|
|
228
|
+
"venture": self.venture,
|
|
229
|
+
"evidence_refs": list(self.evidence_refs),
|
|
230
|
+
"claims": [c.to_dict() for c in self.claims],
|
|
231
|
+
"visibility": self.visibility.value,
|
|
232
|
+
"source": self.source,
|
|
233
|
+
"raw": self.raw,
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
# ---------------------------------------------------------------------------
|
|
238
|
+
# GroundingIndex
|
|
239
|
+
# ---------------------------------------------------------------------------
|
|
240
|
+
|
|
241
|
+
@dataclass
|
|
242
|
+
class GroundingIndex:
|
|
243
|
+
"""A point-in-time snapshot of all grounded events for a venture.
|
|
244
|
+
|
|
245
|
+
Produced by `build_grounding_index`. Generators consume a filtered
|
|
246
|
+
view of this index (e.g. `events where visibility='public' and
|
|
247
|
+
date within last 7 days`).
|
|
248
|
+
"""
|
|
249
|
+
venture: str
|
|
250
|
+
built_at: str # ISO-8601
|
|
251
|
+
events: List[GroundedEvent] = field(default_factory=list)
|
|
252
|
+
feature_whitelist: List[str] = field(default_factory=list)
|
|
253
|
+
canon_version: Optional[str] = None
|
|
254
|
+
|
|
255
|
+
def validate(self, whitelist: Optional[frozenset] = None) -> List[str]:
|
|
256
|
+
errs: List[str] = []
|
|
257
|
+
if not self.venture:
|
|
258
|
+
errs.append("venture: empty")
|
|
259
|
+
if not self.built_at:
|
|
260
|
+
errs.append("built_at: empty")
|
|
261
|
+
else:
|
|
262
|
+
try:
|
|
263
|
+
datetime.fromisoformat(self.built_at.replace("Z", "+00:00"))
|
|
264
|
+
except ValueError:
|
|
265
|
+
errs.append(f"built_at: not ISO-8601: {self.built_at}")
|
|
266
|
+
for event in self.events:
|
|
267
|
+
event_errs = event.validate(whitelist=whitelist)
|
|
268
|
+
errs.extend(f"event {event.event_id}: {e}" for e in event_errs)
|
|
269
|
+
return errs
|
|
270
|
+
|
|
271
|
+
def public_events(self) -> List[GroundedEvent]:
|
|
272
|
+
"""Events safe to surface publicly. Visibility is allowlist."""
|
|
273
|
+
return [e for e in self.events if e.visibility == Visibility.PUBLIC]
|
|
274
|
+
|
|
275
|
+
def events_by_type(self, event_type: EventType) -> List[GroundedEvent]:
|
|
276
|
+
return [e for e in self.events if e.type == event_type]
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Gate telemetry viewer for LED-1084 week 2 → week 3 transition.
|
|
3
|
+
|
|
4
|
+
Reads `[social-grounding-gate]` P3 ledger entries and summarizes:
|
|
5
|
+
- score distribution (min, median, mean, p95, max)
|
|
6
|
+
- flagged-vs-clean ratio per model
|
|
7
|
+
- feature-detector status counts (clean / flagged-no-whitelist /
|
|
8
|
+
flagged-unknown-specifics / error)
|
|
9
|
+
- recent failing examples for qualitative review
|
|
10
|
+
|
|
11
|
+
Purpose: the Week 3 decision to flip `enforce_grounding=True` must be
|
|
12
|
+
data-driven. This tool reads the ledger the gate writes and produces a
|
|
13
|
+
snapshot the founder can review to decide the threshold + timing.
|
|
14
|
+
|
|
15
|
+
Usage:
|
|
16
|
+
python -m ai.content_grounding.telemetry summary
|
|
17
|
+
python -m ai.content_grounding.telemetry summary --days 14
|
|
18
|
+
python -m ai.content_grounding.telemetry samples --n 5
|
|
19
|
+
|
|
20
|
+
JSON output for piping:
|
|
21
|
+
python -m ai.content_grounding.telemetry summary --json
|
|
22
|
+
"""
|
|
23
|
+
from __future__ import annotations
|
|
24
|
+
|
|
25
|
+
import argparse
|
|
26
|
+
import json
|
|
27
|
+
import logging
|
|
28
|
+
import os
|
|
29
|
+
import re
|
|
30
|
+
import statistics
|
|
31
|
+
import sys
|
|
32
|
+
from datetime import datetime, timezone, timedelta
|
|
33
|
+
from pathlib import Path
|
|
34
|
+
from typing import Any, Dict, List, Optional
|
|
35
|
+
|
|
36
|
+
logger = logging.getLogger("delimit.ai.content_grounding.telemetry")
|
|
37
|
+
|
|
38
|
+
LEDGER_DIR = Path(os.environ.get("DELIMIT_LEDGER_DIR", str(Path.home() / ".delimit" / "ledger")))
|
|
39
|
+
|
|
40
|
+
# Parse the structured fields the gate bakes into its description text.
|
|
41
|
+
_SCORE_RE = re.compile(r"^Score:\s*([0-9.]+)", re.MULTILINE)
|
|
42
|
+
_THRESH_RE = re.compile(r"threshold:\s*([0-9.]+)", re.IGNORECASE)
|
|
43
|
+
_FEATURE_RE = re.compile(r"^Feature status:\s*(\w+)", re.MULTILINE)
|
|
44
|
+
_MODEL_RE = re.compile(r"^Model:\s*(\S+)", re.MULTILINE)
|
|
45
|
+
_PLATFORM_RE = re.compile(r"^Platform:\s*(\w+)", re.MULTILINE)
|
|
46
|
+
_VENTURE_RE = re.compile(r"^Venture:\s*(\w+)", re.MULTILINE)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def _iter_gate_entries(since: Optional[datetime] = None) -> List[Dict[str, Any]]:
|
|
50
|
+
"""Read every [social-grounding-gate] P3 ledger entry."""
|
|
51
|
+
entries: List[Dict[str, Any]] = []
|
|
52
|
+
if not LEDGER_DIR.is_dir():
|
|
53
|
+
return entries
|
|
54
|
+
for p in sorted(LEDGER_DIR.glob("*.jsonl")):
|
|
55
|
+
try:
|
|
56
|
+
for line in p.read_text(errors="replace").splitlines():
|
|
57
|
+
line = line.strip()
|
|
58
|
+
if not line:
|
|
59
|
+
continue
|
|
60
|
+
try:
|
|
61
|
+
item = json.loads(line)
|
|
62
|
+
except json.JSONDecodeError:
|
|
63
|
+
continue
|
|
64
|
+
title = item.get("title", "")
|
|
65
|
+
if "[social-grounding-gate]" not in title:
|
|
66
|
+
continue
|
|
67
|
+
ts = item.get("created_at") or item.get("timestamp") or ""
|
|
68
|
+
if since and ts:
|
|
69
|
+
try:
|
|
70
|
+
dt = datetime.fromisoformat(ts.replace("Z", "+00:00"))
|
|
71
|
+
if dt < since:
|
|
72
|
+
continue
|
|
73
|
+
except ValueError:
|
|
74
|
+
pass
|
|
75
|
+
entries.append(item)
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logger.debug("skipping %s: %s", p, e)
|
|
78
|
+
return entries
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _parse_entry(item: Dict[str, Any]) -> Dict[str, Any]:
|
|
82
|
+
"""Extract the structured fields the gate logged in the description."""
|
|
83
|
+
desc = item.get("description", "")
|
|
84
|
+
score_m = _SCORE_RE.search(desc)
|
|
85
|
+
thresh_m = _THRESH_RE.search(desc)
|
|
86
|
+
feat_m = _FEATURE_RE.search(desc)
|
|
87
|
+
model_m = _MODEL_RE.search(desc)
|
|
88
|
+
platform_m = _PLATFORM_RE.search(desc)
|
|
89
|
+
venture_m = _VENTURE_RE.search(desc)
|
|
90
|
+
return {
|
|
91
|
+
"id": item.get("id", ""),
|
|
92
|
+
"timestamp": item.get("created_at") or item.get("timestamp", ""),
|
|
93
|
+
"score": float(score_m.group(1)) if score_m else None,
|
|
94
|
+
"threshold": float(thresh_m.group(1)) if thresh_m else None,
|
|
95
|
+
"feat_status": feat_m.group(1) if feat_m else None,
|
|
96
|
+
"model": model_m.group(1) if model_m else None,
|
|
97
|
+
"platform": platform_m.group(1) if platform_m else None,
|
|
98
|
+
"venture": venture_m.group(1) if venture_m else None,
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def summarize(days: int = 14) -> Dict[str, Any]:
|
|
103
|
+
"""Aggregate stats for the last `days` of gate entries."""
|
|
104
|
+
since = datetime.now(timezone.utc) - timedelta(days=days)
|
|
105
|
+
raw = _iter_gate_entries(since=since)
|
|
106
|
+
parsed = [_parse_entry(e) for e in raw]
|
|
107
|
+
|
|
108
|
+
# Score stats (ignore entries with no score)
|
|
109
|
+
scores = [p["score"] for p in parsed if p["score"] is not None]
|
|
110
|
+
|
|
111
|
+
by_model: Dict[str, List[float]] = {}
|
|
112
|
+
by_feat: Dict[str, int] = {}
|
|
113
|
+
by_platform: Dict[str, int] = {}
|
|
114
|
+
for p in parsed:
|
|
115
|
+
m = p.get("model") or "unknown"
|
|
116
|
+
by_model.setdefault(m, []).append(p["score"] if p["score"] is not None else 0.0)
|
|
117
|
+
f = p.get("feat_status") or "unknown"
|
|
118
|
+
by_feat[f] = by_feat.get(f, 0) + 1
|
|
119
|
+
plat = p.get("platform") or "unknown"
|
|
120
|
+
by_platform[plat] = by_platform.get(plat, 0) + 1
|
|
121
|
+
|
|
122
|
+
def _stats(vals: List[float]) -> Dict[str, Any]:
|
|
123
|
+
if not vals:
|
|
124
|
+
return {"n": 0}
|
|
125
|
+
return {
|
|
126
|
+
"n": len(vals),
|
|
127
|
+
"min": round(min(vals), 3),
|
|
128
|
+
"median": round(statistics.median(vals), 3),
|
|
129
|
+
"mean": round(statistics.mean(vals), 3),
|
|
130
|
+
"p95": round(sorted(vals)[int(len(vals) * 0.95)], 3) if len(vals) > 1 else vals[0],
|
|
131
|
+
"max": round(max(vals), 3),
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
# Threshold inferred from first entry that has one (default 0.85).
|
|
135
|
+
threshold = next((p["threshold"] for p in parsed if p["threshold"] is not None), 0.85)
|
|
136
|
+
pass_count = sum(1 for s in scores if s >= threshold)
|
|
137
|
+
fail_count = len(scores) - pass_count
|
|
138
|
+
|
|
139
|
+
return {
|
|
140
|
+
"window_days": days,
|
|
141
|
+
"since": since.isoformat(),
|
|
142
|
+
"total_entries": len(parsed),
|
|
143
|
+
"threshold_inferred": threshold,
|
|
144
|
+
"would_pass_count": pass_count,
|
|
145
|
+
"would_fail_count": fail_count,
|
|
146
|
+
"pass_rate": round(pass_count / max(1, len(scores)), 3),
|
|
147
|
+
"score_stats": _stats(scores),
|
|
148
|
+
"by_model": {m: _stats(v) for m, v in by_model.items()},
|
|
149
|
+
"by_feat_status": by_feat,
|
|
150
|
+
"by_platform": by_platform,
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def recent_samples(n: int = 5, days: int = 14) -> List[Dict[str, Any]]:
|
|
155
|
+
"""Return the N most-recent gate entries in parsed form."""
|
|
156
|
+
since = datetime.now(timezone.utc) - timedelta(days=days)
|
|
157
|
+
raw = _iter_gate_entries(since=since)
|
|
158
|
+
# Latest last, so grab from the end
|
|
159
|
+
return [_parse_entry(e) for e in raw[-n:]]
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _print_summary_text(s: Dict[str, Any]) -> None:
|
|
163
|
+
print(f"Gate telemetry — last {s['window_days']} day(s)")
|
|
164
|
+
print(f" total entries: {s['total_entries']}")
|
|
165
|
+
print(f" threshold: {s['threshold_inferred']}")
|
|
166
|
+
print(f" pass rate: {s['pass_rate']:.1%} ({s['would_pass_count']} pass / {s['would_fail_count']} fail)")
|
|
167
|
+
ss = s.get("score_stats") or {}
|
|
168
|
+
if ss.get("n"):
|
|
169
|
+
print(f" score stats: n={ss['n']} min={ss['min']} median={ss['median']} mean={ss['mean']} p95={ss['p95']} max={ss['max']}")
|
|
170
|
+
if s.get("by_feat_status"):
|
|
171
|
+
print(" feat_status:")
|
|
172
|
+
for k, v in sorted(s["by_feat_status"].items(), key=lambda kv: -kv[1]):
|
|
173
|
+
print(f" {k:12} {v}")
|
|
174
|
+
if s.get("by_platform"):
|
|
175
|
+
print(" by platform:")
|
|
176
|
+
for k, v in sorted(s["by_platform"].items(), key=lambda kv: -kv[1]):
|
|
177
|
+
print(f" {k:10} {v}")
|
|
178
|
+
if s.get("by_model"):
|
|
179
|
+
print(" by model:")
|
|
180
|
+
for m, st in s["by_model"].items():
|
|
181
|
+
if st.get("n"):
|
|
182
|
+
print(f" {m:16} n={st['n']} median={st['median']} mean={st['mean']}")
|
|
183
|
+
|
|
184
|
+
|
|
185
|
+
def _main() -> int:
|
|
186
|
+
parser = argparse.ArgumentParser(prog="ai.content_grounding.telemetry")
|
|
187
|
+
sub = parser.add_subparsers(dest="cmd", required=True)
|
|
188
|
+
|
|
189
|
+
s_sum = sub.add_parser("summary", help="aggregate gate telemetry stats")
|
|
190
|
+
s_sum.add_argument("--days", type=int, default=14)
|
|
191
|
+
s_sum.add_argument("--json", action="store_true", help="emit JSON instead of pretty text")
|
|
192
|
+
|
|
193
|
+
s_samp = sub.add_parser("samples", help="recent gate entries, parsed")
|
|
194
|
+
s_samp.add_argument("--days", type=int, default=14)
|
|
195
|
+
s_samp.add_argument("--n", type=int, default=5)
|
|
196
|
+
s_samp.add_argument("--json", action="store_true")
|
|
197
|
+
|
|
198
|
+
args = parser.parse_args()
|
|
199
|
+
|
|
200
|
+
if args.cmd == "summary":
|
|
201
|
+
s = summarize(days=args.days)
|
|
202
|
+
if args.json:
|
|
203
|
+
print(json.dumps(s, indent=2))
|
|
204
|
+
else:
|
|
205
|
+
_print_summary_text(s)
|
|
206
|
+
return 0
|
|
207
|
+
|
|
208
|
+
if args.cmd == "samples":
|
|
209
|
+
samples = recent_samples(n=args.n, days=args.days)
|
|
210
|
+
if args.json:
|
|
211
|
+
print(json.dumps(samples, indent=2))
|
|
212
|
+
else:
|
|
213
|
+
for s in samples:
|
|
214
|
+
print(f"- {s['timestamp']} model={s['model']} score={s['score']} feat={s['feat_status']}")
|
|
215
|
+
return 0
|
|
216
|
+
|
|
217
|
+
return 2
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
if __name__ == "__main__":
|
|
221
|
+
sys.exit(_main())
|
package/gateway/ai/governance.py
CHANGED
|
@@ -35,6 +35,95 @@ def _is_test_mode() -> bool:
|
|
|
35
35
|
logger = logging.getLogger("delimit.governance")
|
|
36
36
|
|
|
37
37
|
|
|
38
|
+
# ── STR-183 V2-hardening B-PREREQ-4: non-delegable operation registry ─
|
|
39
|
+
# Per /root/CLAUDE.md "Non-Delegable Decisions" and the 2026-04-07 ruleset-bypass postmortem,
|
|
40
|
+
# these operation classes can never be auto-approved by a generic gate (e.g. "all_gates_passed").
|
|
41
|
+
# Each invocation requires fresh, named-human attestation at gate-entry time.
|
|
42
|
+
# This constant is the code-level encoding of the constitutional boundary.
|
|
43
|
+
# Do not extend this set without an explicit founder-attested deliberation.
|
|
44
|
+
NON_DELEGABLE_OPERATION_CLASSES = frozenset({
|
|
45
|
+
"ruleset_disable", # disabling branch protection / repository rulesets
|
|
46
|
+
"force_push_shared", # force-push to main, release branches, or floating tags (v1, latest)
|
|
47
|
+
"account_switch", # switching gh / git author identity mid-flow
|
|
48
|
+
"cross_account_ops", # operating on one org from another org's identity
|
|
49
|
+
"constitutional_rewrite", # edits to founder doctrine canon outside managed sections
|
|
50
|
+
"authority_class_expansion", # adding a new class of tool / agent / gate
|
|
51
|
+
"irreversible_capital_commit", # capital commitments above non-delegable threshold
|
|
52
|
+
"venture_kill", # shutting down a Jamsons venture
|
|
53
|
+
"permission_escalation", # granting elevated access (sudo, admin, write-as-other)
|
|
54
|
+
"public_truth_claim", # public statement / marketing assertion outrunning evidence
|
|
55
|
+
})
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def is_non_delegable(operation_class: str) -> bool:
|
|
59
|
+
"""Return True iff the operation class is in the non-delegable registry.
|
|
60
|
+
|
|
61
|
+
Per the 2026-04-07 postmortem and the V2 pressure-test (STR-183, unanimous round 3),
|
|
62
|
+
non-delegable operations cannot pass through any "all_gates_passed" mechanism.
|
|
63
|
+
They require per-invocation founder attestation, checked live at gate entry.
|
|
64
|
+
"""
|
|
65
|
+
return operation_class in NON_DELEGABLE_OPERATION_CLASSES
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def require_founder_attestation(operation_class: str, attestation: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
|
|
69
|
+
"""Fail-closed gate for non-delegable operations.
|
|
70
|
+
|
|
71
|
+
Returns a verdict dict. The caller must refuse to proceed unless verdict["allowed"] is True.
|
|
72
|
+
|
|
73
|
+
A valid attestation must include:
|
|
74
|
+
- "founder_id": the named human performing the attestation
|
|
75
|
+
- "scope": the exact operation being attested (must match operation_class)
|
|
76
|
+
- "timestamp": ISO-8601 UTC
|
|
77
|
+
- "evidence_ref": pointer to the evidence (ledger ID, postmortem path, or signed message)
|
|
78
|
+
|
|
79
|
+
Pre-approval of a parent plan does NOT extend to non-delegable escalations
|
|
80
|
+
(2026-04-07 postmortem rule). Each invocation needs its own attestation.
|
|
81
|
+
"""
|
|
82
|
+
if not is_non_delegable(operation_class):
|
|
83
|
+
return {"allowed": True, "operation_class": operation_class, "non_delegable": False}
|
|
84
|
+
|
|
85
|
+
if not attestation:
|
|
86
|
+
return {
|
|
87
|
+
"allowed": False,
|
|
88
|
+
"operation_class": operation_class,
|
|
89
|
+
"non_delegable": True,
|
|
90
|
+
"reason": (
|
|
91
|
+
f"{operation_class} is non-delegable (STR-183 / 2026-04-07 postmortem). "
|
|
92
|
+
"Pre-approval of a parent plan does not extend to this operation. "
|
|
93
|
+
"Per-invocation founder attestation is required."
|
|
94
|
+
),
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
required = {"founder_id", "scope", "timestamp", "evidence_ref"}
|
|
98
|
+
missing = required - set(attestation.keys())
|
|
99
|
+
if missing:
|
|
100
|
+
return {
|
|
101
|
+
"allowed": False,
|
|
102
|
+
"operation_class": operation_class,
|
|
103
|
+
"non_delegable": True,
|
|
104
|
+
"reason": f"Attestation missing required fields: {sorted(missing)}",
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if attestation["scope"] != operation_class:
|
|
108
|
+
return {
|
|
109
|
+
"allowed": False,
|
|
110
|
+
"operation_class": operation_class,
|
|
111
|
+
"non_delegable": True,
|
|
112
|
+
"reason": (
|
|
113
|
+
f"Attestation scope mismatch: attested for '{attestation['scope']}' "
|
|
114
|
+
f"but invocation is for '{operation_class}'. The scope of approval is "
|
|
115
|
+
"the scope stated, not beyond (CLAUDE.md escalation rule)."
|
|
116
|
+
),
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
return {
|
|
120
|
+
"allowed": True,
|
|
121
|
+
"operation_class": operation_class,
|
|
122
|
+
"non_delegable": True,
|
|
123
|
+
"attestation": attestation,
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
|
|
38
127
|
# ── LED-263: Beta CTA for conversion ────────────────────────────────
|
|
39
128
|
# Tools that should show a beta signup prompt on successful results.
|
|
40
129
|
_BETA_CTA_TOOLS = frozenset({"lint", "scan", "activate", "diff", "quickstart"})
|