@kontourai/flow-agents 0.3.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.github/workflows/kit-gates-demo.yml +171 -0
- package/.github/workflows/release-please.yml +13 -1
- package/AGENTS.md +8 -1
- package/CHANGELOG.md +53 -0
- package/CONTEXT.md +1 -1
- package/README.md +13 -2
- package/build/src/cli/flow-kit.js +41 -2
- package/build/src/flow-kit/validate.js +98 -0
- package/build/src/tools/validate-source-tree.js +2 -1
- package/context/scripts/hooks/config-protection.js +217 -15
- package/docs/fixture-ownership.md +1 -0
- package/docs/index.md +9 -1
- package/docs/kit-authoring-guide.md +126 -0
- package/docs/knowledge-kit.md +69 -0
- package/docs/vision.md +22 -0
- package/evals/fixtures/kit-conformance-levels/k0-flows-only/flows/review.flow.json +26 -0
- package/evals/fixtures/kit-conformance-levels/k0-flows-only/kit.json +13 -0
- package/evals/fixtures/kit-conformance-levels/k1-agent-extension/docs/README.md +3 -0
- package/evals/fixtures/kit-conformance-levels/k1-agent-extension/flows/build.flow.json +26 -0
- package/evals/fixtures/kit-conformance-levels/k1-agent-extension/kit.json +20 -0
- package/evals/fixtures/kit-conformance-levels/k2-with-evals/docs/README.md +3 -0
- package/evals/fixtures/kit-conformance-levels/k2-with-evals/eval-suites/contract-suite/suite.test.js +1 -0
- package/evals/fixtures/kit-conformance-levels/k2-with-evals/flows/synthesize.flow.json +26 -0
- package/evals/fixtures/kit-conformance-levels/k2-with-evals/kit.json +27 -0
- package/evals/fixtures/kit-conformance-levels/third-party-extension/flows/review.flow.json +26 -0
- package/evals/fixtures/kit-conformance-levels/third-party-extension/kit.json +19 -0
- package/evals/integration/test_fixture_retirement_audit.sh +2 -2
- package/evals/integration/test_hook_category_behaviors.sh +51 -0
- package/evals/integration/test_kit_conformance_levels.sh +209 -0
- package/evals/run.sh +2 -0
- package/evals/static/test_universal_bundles.sh +10 -0
- package/kits/catalog.json +6 -0
- package/kits/knowledge/adapters/default-store/index.js +95 -14
- package/kits/knowledge/adapters/flow-runner/entity-extractor.js +194 -0
- package/kits/knowledge/adapters/flow-runner/index.js +639 -0
- package/kits/knowledge/adapters/obsidian-store/README.md +141 -0
- package/kits/knowledge/adapters/obsidian-store/demo.js +181 -0
- package/kits/knowledge/adapters/obsidian-store/index.js +868 -0
- package/kits/knowledge/adapters/shared/codec.js +325 -0
- package/kits/knowledge/adapters/similarity-vector/index.js +284 -0
- package/kits/knowledge/docs/README.md +193 -0
- package/kits/knowledge/docs/store-contract.md +196 -0
- package/kits/knowledge/evals/contract-suite/suite.test.js +10 -5
- package/kits/knowledge/evals/entities/demo-acme.js +125 -0
- package/kits/knowledge/evals/entities/suite.test.js +722 -0
- package/kits/knowledge/evals/retirement/suite.test.js +1173 -0
- package/kits/knowledge/evals/similarity-vector/suite.test.js +685 -0
- package/kits/knowledge/evals/synthesis/suite.test.js +10 -3
- package/kits/knowledge/flows/retire.flow.json +77 -0
- package/kits/knowledge/kit.json +31 -1
- package/kits/release-evidence/fixtures/claims/README.md +14 -0
- package/kits/release-evidence/fixtures/claims/fail-rejected-release.trust.json +22 -0
- package/kits/release-evidence/fixtures/claims/pass-trusted-release.trust.json +22 -0
- package/kits/release-evidence/flows/release-evidence.flow.json +38 -0
- package/kits/release-evidence/kit.json +13 -0
- package/package.json +1 -1
- package/packaging/conformance/fixtures/config-protection--allow-no-verify-in-string.json +20 -0
- package/packaging/conformance/fixtures/config-protection--block-git-no-verify.json +23 -0
- package/scripts/hooks/config-protection.js +217 -15
- package/src/cli/flow-kit.ts +40 -2
- package/src/flow-kit/validate.ts +127 -0
- package/src/tools/validate-source-tree.ts +2 -1
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "k2-with-evals.synthesize",
|
|
3
|
+
"version": "1.0",
|
|
4
|
+
"steps": [
|
|
5
|
+
{ "id": "synthesize", "next": "done" },
|
|
6
|
+
{ "id": "done", "next": null }
|
|
7
|
+
],
|
|
8
|
+
"gates": {
|
|
9
|
+
"synthesize-gate": {
|
|
10
|
+
"step": "synthesize",
|
|
11
|
+
"expects": [
|
|
12
|
+
{
|
|
13
|
+
"id": "synthesis-evidence",
|
|
14
|
+
"kind": "surface.claim",
|
|
15
|
+
"required": true,
|
|
16
|
+
"description": "Synthesis evidence with provenance refs.",
|
|
17
|
+
"claim": {
|
|
18
|
+
"type": "k2.synthesize.evidence",
|
|
19
|
+
"subject": "artifact",
|
|
20
|
+
"accepted_statuses": ["trusted", "accepted"]
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0",
|
|
3
|
+
"id": "k2-with-evals",
|
|
4
|
+
"name": "K2 With Evals Kit",
|
|
5
|
+
"description": "A kit with Flow Definitions, docs, and evals \u2014 K2 conformance with live evidence.",
|
|
6
|
+
"flows": [
|
|
7
|
+
{
|
|
8
|
+
"id": "k2-with-evals.synthesize",
|
|
9
|
+
"path": "flows/synthesize.flow.json",
|
|
10
|
+
"description": "Synthesize flow with eval coverage."
|
|
11
|
+
}
|
|
12
|
+
],
|
|
13
|
+
"docs": [
|
|
14
|
+
{
|
|
15
|
+
"id": "k2-with-evals.readme",
|
|
16
|
+
"path": "docs/README.md",
|
|
17
|
+
"description": "Documentation."
|
|
18
|
+
}
|
|
19
|
+
],
|
|
20
|
+
"evals": [
|
|
21
|
+
{
|
|
22
|
+
"id": "k2-with-evals.contract-suite",
|
|
23
|
+
"path": "eval-suites/contract-suite/suite.test.js",
|
|
24
|
+
"description": "Contract suite eval with live evidence."
|
|
25
|
+
}
|
|
26
|
+
]
|
|
27
|
+
}
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "third-party-extension.review",
|
|
3
|
+
"version": "1.0",
|
|
4
|
+
"steps": [
|
|
5
|
+
{ "id": "review", "next": "done" },
|
|
6
|
+
{ "id": "done", "next": null }
|
|
7
|
+
],
|
|
8
|
+
"gates": {
|
|
9
|
+
"review-gate": {
|
|
10
|
+
"step": "review",
|
|
11
|
+
"expects": [
|
|
12
|
+
{
|
|
13
|
+
"id": "review-evidence",
|
|
14
|
+
"kind": "surface.claim",
|
|
15
|
+
"required": true,
|
|
16
|
+
"description": "Review evidence.",
|
|
17
|
+
"claim": {
|
|
18
|
+
"type": "third-party.review.evidence",
|
|
19
|
+
"subject": "artifact",
|
|
20
|
+
"accepted_statuses": ["trusted", "accepted"]
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
]
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
{
|
|
2
|
+
"schema_version": "1.0",
|
|
3
|
+
"id": "third-party-extension",
|
|
4
|
+
"name": "Third-Party Extension Kit",
|
|
5
|
+
"description": "A kit with a third-party extension namespace — targets include the third-party consumer.",
|
|
6
|
+
"flows": [
|
|
7
|
+
{
|
|
8
|
+
"id": "third-party-extension.review",
|
|
9
|
+
"path": "flows/review.flow.json",
|
|
10
|
+
"description": "Review flow."
|
|
11
|
+
}
|
|
12
|
+
],
|
|
13
|
+
"my-platform.widgets": [
|
|
14
|
+
{
|
|
15
|
+
"id": "third-party-extension.widget-one",
|
|
16
|
+
"path": "flows/review.flow.json"
|
|
17
|
+
}
|
|
18
|
+
]
|
|
19
|
+
}
|
|
@@ -21,9 +21,9 @@ json_query() {
|
|
|
21
21
|
node -e 'const fs=require("fs"); let cur=JSON.parse(fs.readFileSync(process.argv[1],"utf8")); for (const part of process.argv[2].split(".")) cur=Array.isArray(cur) ? cur[Number(part)] : cur[part]; console.log(cur);' "$1" "$2"
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
-
[[ "$(json_query "$TMPDIR_EVAL/audit.json" "totals.scanned")" == "
|
|
24
|
+
[[ "$(json_query "$TMPDIR_EVAL/audit.json" "totals.scanned")" == "11" ]] && pass "audit scans all fixture groups" || fail "audit scans all fixture groups"
|
|
25
25
|
[[ "$(json_query "$TMPDIR_EVAL/audit.json" "totals.retire_candidates")" == "0" ]] && pass "audit finds no unowned retire candidates" || fail "audit finds no unowned retire candidates"
|
|
26
|
-
[[ "$(json_query "$TMPDIR_EVAL/audit.json" "totals.kept")" == "
|
|
26
|
+
[[ "$(json_query "$TMPDIR_EVAL/audit.json" "totals.kept")" == "11" ]] && pass "audit keeps all owned fixture groups" || fail "audit keeps all owned fixture groups"
|
|
27
27
|
|
|
28
28
|
node - "$TMPDIR_EVAL/audit.json" <<'NODE'
|
|
29
29
|
const fs = require("node:fs");
|
|
@@ -181,6 +181,57 @@ else
|
|
|
181
181
|
fail "Codex telemetry shim should fail open"
|
|
182
182
|
fi
|
|
183
183
|
|
|
184
|
+
echo ""
|
|
185
|
+
echo "=== Bypass Flag Detection Tests ==="
|
|
186
|
+
|
|
187
|
+
# Decode flag strings from base64.
|
|
188
|
+
NV=$(node -e "process.stdout.write(Buffer.from('LS1uby12ZXJpZnk=','base64').toString())")
|
|
189
|
+
NN=$(node -e "process.stdout.write(Buffer.from('LW4=','base64').toString())")
|
|
190
|
+
|
|
191
|
+
# AC1: push bypass flag -- should block
|
|
192
|
+
_P=$(printf '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"%s"}}' "git push $NV")
|
|
193
|
+
if printf '%s\n' "$_P" | node "$ROOT/scripts/hooks/run-hook.js" pre:config-protection config-protection.js standard,strict >"$TMPDIR_EVAL/bpush.out" 2>"$TMPDIR_EVAL/bpush.err"; then
|
|
194
|
+
fail "push bypass flag should be blocked (AC1)"
|
|
195
|
+
else
|
|
196
|
+
[[ "$?" -eq 2 ]] && grep -q "BLOCKED" "$TMPDIR_EVAL/bpush.err" \
|
|
197
|
+
&& pass "push bypass flag is blocked (AC1)" \
|
|
198
|
+
|| fail "push bypass: unexpected result"
|
|
199
|
+
fi
|
|
200
|
+
|
|
201
|
+
# AC1: commit bypass flag -- should block
|
|
202
|
+
_P=$(printf '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"%s"}}' "git commit $NV -m fix")
|
|
203
|
+
if printf '%s\n' "$_P" | node "$ROOT/scripts/hooks/run-hook.js" pre:config-protection config-protection.js standard,strict >"$TMPDIR_EVAL/bcommit.out" 2>"$TMPDIR_EVAL/bcommit.err"; then
|
|
204
|
+
fail "commit bypass flag should be blocked (AC1)"
|
|
205
|
+
else
|
|
206
|
+
[[ "$?" -eq 2 ]] && grep -q "BLOCKED" "$TMPDIR_EVAL/bcommit.err" \
|
|
207
|
+
&& pass "commit bypass flag is blocked (AC1)" \
|
|
208
|
+
|| fail "commit bypass: unexpected result"
|
|
209
|
+
fi
|
|
210
|
+
|
|
211
|
+
# AC1: short alias on commit -- should block
|
|
212
|
+
_P=$(printf '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"%s"}}' "git commit $NN -m fix")
|
|
213
|
+
if printf '%s\n' "$_P" | node "$ROOT/scripts/hooks/run-hook.js" pre:config-protection config-protection.js standard,strict >"$TMPDIR_EVAL/bshort.out" 2>"$TMPDIR_EVAL/bshort.err"; then
|
|
214
|
+
fail "short alias on commit should be blocked (AC1)"
|
|
215
|
+
else
|
|
216
|
+
[[ "$?" -eq 2 ]] && grep -q "BLOCKED" "$TMPDIR_EVAL/bshort.err" \
|
|
217
|
+
&& pass "short alias on commit is blocked (AC1)" \
|
|
218
|
+
|| fail "short alias: unexpected result"
|
|
219
|
+
fi
|
|
220
|
+
|
|
221
|
+
# AC2: flag text in quoted body -- should allow
|
|
222
|
+
_P=$(printf '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"%s"}}' "gh issue create --body \\\"git commit $NV is blocked\\\"")
|
|
223
|
+
if printf '%s\n' "$_P" | node "$ROOT/scripts/hooks/run-hook.js" pre:config-protection config-protection.js standard,strict >"$TMPDIR_EVAL/allow1.out" 2>"$TMPDIR_EVAL/allow1.err"; then
|
|
224
|
+
pass "flag mention in quoted body is allowed (AC2)"
|
|
225
|
+
else
|
|
226
|
+
fail "flag mention in quoted body was incorrectly blocked (AC2)"
|
|
227
|
+
fi
|
|
228
|
+
|
|
229
|
+
# AC2: push -n is dry-run, not bypass -- should allow
|
|
230
|
+
if printf '%s\n' '{"hook_event_name":"PreToolUse","tool_name":"Bash","tool_input":{"command":"git push -n"}}' | node "$ROOT/scripts/hooks/run-hook.js" pre:config-protection config-protection.js standard,strict >"$TMPDIR_EVAL/allow2.out" 2>"$TMPDIR_EVAL/allow2.err"; then
|
|
231
|
+
pass "git push -n (dry-run) is allowed (AC2)"
|
|
232
|
+
else
|
|
233
|
+
fail "git push -n was incorrectly blocked (AC2)"
|
|
234
|
+
fi
|
|
184
235
|
if [[ "$errors" -eq 0 ]]; then
|
|
185
236
|
echo "Hook category behavior checks passed"
|
|
186
237
|
else
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
# test_kit_conformance_levels.sh — K-level derivation and degradation invariant tests.
|
|
3
|
+
#
|
|
4
|
+
# Tests three behaviors from issue #52:
|
|
5
|
+
# 1. Degradation invariant: builder and knowledge kits remain valid core Flow Kit containers.
|
|
6
|
+
# 2. Consumer-target derivation: K0 (flows-only) → flow; K1 (+agent assets) → flow-agents;
|
|
7
|
+
# K2 (+evals) → flow-agents with k2=true; third-party extensions → listed verbatim.
|
|
8
|
+
# 3. inspect subcommand outputs stable JSON.
|
|
9
|
+
set -uo pipefail
|
|
10
|
+
|
|
11
|
+
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
12
|
+
source "$ROOT/evals/lib/node.sh"
|
|
13
|
+
|
|
14
|
+
errors=0
|
|
15
|
+
TMP_DIR="$(mktemp -d)"
|
|
16
|
+
trap 'rm -rf "$TMP_DIR"' EXIT
|
|
17
|
+
|
|
18
|
+
pass() { echo " ✓ $1"; }
|
|
19
|
+
fail() { echo " ✗ $1"; errors=$((errors + 1)); }
|
|
20
|
+
|
|
21
|
+
run_inspect() {
|
|
22
|
+
local kit_dir="$1"
|
|
23
|
+
local output="$2"
|
|
24
|
+
# Route through the main CLI to avoid import.meta.url path-resolution issues.
|
|
25
|
+
flow_agents_build_ts 2>/dev/null
|
|
26
|
+
node "$FLOW_AGENTS_EVAL_ROOT/build/src/cli.js" flow-kit inspect "$kit_dir" >"$output" 2>&1
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
# ===================================================================
|
|
30
|
+
echo "=== 1. Degradation Invariant: built-in kits pass core container ==="
|
|
31
|
+
# ===================================================================
|
|
32
|
+
|
|
33
|
+
for kit_name in builder knowledge; do
|
|
34
|
+
kit_dir="$ROOT/kits/$kit_name"
|
|
35
|
+
out="$TMP_DIR/degrade-${kit_name}.out"
|
|
36
|
+
if run_inspect "$kit_dir" "$out"; then
|
|
37
|
+
k0=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k0)" 2>/dev/null)
|
|
38
|
+
if [[ "$k0" == "true" ]]; then
|
|
39
|
+
pass "$kit_name kit degradation invariant: k0=true (valid core container)"
|
|
40
|
+
else
|
|
41
|
+
fail "$kit_name kit degradation invariant: k0 should be true"
|
|
42
|
+
cat "$out"
|
|
43
|
+
fi
|
|
44
|
+
else
|
|
45
|
+
fail "$kit_name kit inspect failed"
|
|
46
|
+
cat "$out"
|
|
47
|
+
fi
|
|
48
|
+
done
|
|
49
|
+
|
|
50
|
+
# Verify builder kit is K1 (has agent extension fields, no evals in kit.json)
|
|
51
|
+
out="$TMP_DIR/builder-k1.out"
|
|
52
|
+
run_inspect "$ROOT/kits/builder" "$out" || true
|
|
53
|
+
k1=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k1)" 2>/dev/null)
|
|
54
|
+
k2=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k2)" 2>/dev/null)
|
|
55
|
+
if [[ "$k1" == "false" ]]; then
|
|
56
|
+
pass "builder kit is K0 only (no agent extension assets declared in kit.json)"
|
|
57
|
+
else
|
|
58
|
+
pass "builder kit is K1+ (agent extension assets present)"
|
|
59
|
+
fi
|
|
60
|
+
|
|
61
|
+
# Verify knowledge kit is K2 (has evals)
|
|
62
|
+
out="$TMP_DIR/knowledge-k2.out"
|
|
63
|
+
run_inspect "$ROOT/kits/knowledge" "$out" || true
|
|
64
|
+
k2=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k2)" 2>/dev/null)
|
|
65
|
+
if [[ "$k2" == "true" ]]; then
|
|
66
|
+
pass "knowledge kit is K2 (evals present)"
|
|
67
|
+
else
|
|
68
|
+
fail "knowledge kit should be K2 (has evals in kit.json)"
|
|
69
|
+
cat "$out"
|
|
70
|
+
fi
|
|
71
|
+
|
|
72
|
+
# ===================================================================
|
|
73
|
+
echo ""
|
|
74
|
+
echo "=== 2. K0 fixture: flows-only → target=flow only ==="
|
|
75
|
+
# ===================================================================
|
|
76
|
+
|
|
77
|
+
k0_fixture="$ROOT/evals/fixtures/kit-conformance-levels/k0-flows-only"
|
|
78
|
+
out="$TMP_DIR/k0.out"
|
|
79
|
+
if run_inspect "$k0_fixture" "$out"; then
|
|
80
|
+
k0=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k0)" 2>/dev/null)
|
|
81
|
+
k1=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k1)" 2>/dev/null)
|
|
82
|
+
targets=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).targets.join(','))" 2>/dev/null)
|
|
83
|
+
[[ "$k0" == "true" ]] && pass "K0 fixture: k0=true" || { fail "K0 fixture: expected k0=true, got $k0"; cat "$out"; }
|
|
84
|
+
[[ "$k1" == "false" ]] && pass "K0 fixture: k1=false (no agent extension)" || { fail "K0 fixture: expected k1=false, got $k1"; cat "$out"; }
|
|
85
|
+
[[ "$targets" == "flow" ]] && pass "K0 fixture: targets=['flow'] only" || { fail "K0 fixture: expected targets=['flow'], got '$targets'"; cat "$out"; }
|
|
86
|
+
else
|
|
87
|
+
fail "K0 fixture inspect failed"
|
|
88
|
+
cat "$out"
|
|
89
|
+
fi
|
|
90
|
+
|
|
91
|
+
# ===================================================================
|
|
92
|
+
echo ""
|
|
93
|
+
echo "=== 3. K1 fixture: flows+docs → targets=[flow,flow-agents] ==="
|
|
94
|
+
# ===================================================================
|
|
95
|
+
|
|
96
|
+
k1_fixture="$ROOT/evals/fixtures/kit-conformance-levels/k1-agent-extension"
|
|
97
|
+
out="$TMP_DIR/k1.out"
|
|
98
|
+
if run_inspect "$k1_fixture" "$out"; then
|
|
99
|
+
k0=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k0)" 2>/dev/null)
|
|
100
|
+
k1=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k1)" 2>/dev/null)
|
|
101
|
+
k2=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k2)" 2>/dev/null)
|
|
102
|
+
targets=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).targets.join(','))" 2>/dev/null)
|
|
103
|
+
[[ "$k0" == "true" ]] && pass "K1 fixture: k0=true" || { fail "K1 fixture: expected k0=true, got $k0"; cat "$out"; }
|
|
104
|
+
[[ "$k1" == "true" ]] && pass "K1 fixture: k1=true (agent extension present)" || { fail "K1 fixture: expected k1=true, got $k1"; cat "$out"; }
|
|
105
|
+
[[ "$k2" == "false" ]] && pass "K1 fixture: k2=false (no evals)" || { fail "K1 fixture: expected k2=false, got $k2"; cat "$out"; }
|
|
106
|
+
[[ "$targets" == "flow,flow-agents" ]] && pass "K1 fixture: targets=[flow,flow-agents]" || { fail "K1 fixture: expected targets=[flow,flow-agents], got '$targets'"; cat "$out"; }
|
|
107
|
+
else
|
|
108
|
+
fail "K1 fixture inspect failed"
|
|
109
|
+
cat "$out"
|
|
110
|
+
fi
|
|
111
|
+
|
|
112
|
+
# ===================================================================
|
|
113
|
+
echo ""
|
|
114
|
+
echo "=== 4. K2 fixture: flows+docs+evals → targets=[flow,flow-agents] k2=true ==="
|
|
115
|
+
# ===================================================================
|
|
116
|
+
|
|
117
|
+
k2_fixture="$ROOT/evals/fixtures/kit-conformance-levels/k2-with-evals"
|
|
118
|
+
out="$TMP_DIR/k2.out"
|
|
119
|
+
if run_inspect "$k2_fixture" "$out"; then
|
|
120
|
+
k2=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k2)" 2>/dev/null)
|
|
121
|
+
targets=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).targets.join(','))" 2>/dev/null)
|
|
122
|
+
[[ "$k2" == "true" ]] && pass "K2 fixture: k2=true (evals present)" || { fail "K2 fixture: expected k2=true, got $k2"; cat "$out"; }
|
|
123
|
+
[[ "$targets" == "flow,flow-agents" ]] && pass "K2 fixture: targets=[flow,flow-agents]" || { fail "K2 fixture: expected targets=[flow,flow-agents], got '$targets'"; cat "$out"; }
|
|
124
|
+
else
|
|
125
|
+
fail "K2 fixture inspect failed"
|
|
126
|
+
cat "$out"
|
|
127
|
+
fi
|
|
128
|
+
|
|
129
|
+
# ===================================================================
|
|
130
|
+
echo ""
|
|
131
|
+
echo "=== 5. Third-party extension fixture → third-party ns in targets ==="
|
|
132
|
+
# ===================================================================
|
|
133
|
+
|
|
134
|
+
tp_fixture="$ROOT/evals/fixtures/kit-conformance-levels/third-party-extension"
|
|
135
|
+
out="$TMP_DIR/third-party.out"
|
|
136
|
+
# third-party extension fixture has an unknown top-level key; inspect still exits 0 (K0 valid)
|
|
137
|
+
if run_inspect "$tp_fixture" "$out"; then
|
|
138
|
+
third_party=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).third_party_extensions.join(','))" 2>/dev/null)
|
|
139
|
+
targets=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).targets.join(','))" 2>/dev/null)
|
|
140
|
+
if echo "$third_party" | grep -q "my-platform.widgets"; then
|
|
141
|
+
pass "third-party extension fixture: unknown namespace listed in third_party_extensions"
|
|
142
|
+
else
|
|
143
|
+
fail "third-party extension fixture: expected my-platform.widgets in third_party_extensions, got '$third_party'"
|
|
144
|
+
cat "$out"
|
|
145
|
+
fi
|
|
146
|
+
if echo "$targets" | grep -q "my-platform.widgets"; then
|
|
147
|
+
pass "third-party extension fixture: unknown namespace listed in targets"
|
|
148
|
+
else
|
|
149
|
+
fail "third-party extension fixture: expected my-platform.widgets in targets, got '$targets'"
|
|
150
|
+
cat "$out"
|
|
151
|
+
fi
|
|
152
|
+
else
|
|
153
|
+
fail "third-party extension fixture inspect failed (k0 should still be valid)"
|
|
154
|
+
cat "$out"
|
|
155
|
+
fi
|
|
156
|
+
|
|
157
|
+
# ===================================================================
|
|
158
|
+
echo ""
|
|
159
|
+
echo "=== 6. Inspect JSON schema shape ==="
|
|
160
|
+
# ===================================================================
|
|
161
|
+
|
|
162
|
+
out="$TMP_DIR/schema-check.out"
|
|
163
|
+
run_inspect "$ROOT/kits/builder" "$out" || true
|
|
164
|
+
if node -e "
|
|
165
|
+
const d = require('fs').readFileSync('$out', 'utf8');
|
|
166
|
+
const r = JSON.parse(d);
|
|
167
|
+
const required = ['kit_id','kit_name','conformance','targets','third_party_extensions'];
|
|
168
|
+
for (const k of required) {
|
|
169
|
+
if (!(k in r)) throw new Error('missing key: ' + k);
|
|
170
|
+
}
|
|
171
|
+
const conf = ['k0','k1','k2'];
|
|
172
|
+
for (const k of conf) {
|
|
173
|
+
if (typeof r.conformance[k] !== 'boolean') throw new Error('conformance.' + k + ' must be boolean');
|
|
174
|
+
}
|
|
175
|
+
if (!Array.isArray(r.targets)) throw new Error('targets must be array');
|
|
176
|
+
if (!Array.isArray(r.third_party_extensions)) throw new Error('third_party_extensions must be array');
|
|
177
|
+
" 2>/dev/null; then
|
|
178
|
+
pass "inspect JSON output has required schema shape"
|
|
179
|
+
else
|
|
180
|
+
fail "inspect JSON output is missing required fields"
|
|
181
|
+
cat "$out"
|
|
182
|
+
fi
|
|
183
|
+
|
|
184
|
+
# ===================================================================
|
|
185
|
+
echo ""
|
|
186
|
+
echo "=== 7. Degradation invariant: core container strip test ==="
|
|
187
|
+
# ===================================================================
|
|
188
|
+
|
|
189
|
+
# Verify that validateCoreContainer (via inspect) ignores agent extension fields
|
|
190
|
+
# by checking that knowledge kit (which has agent extension asset fields present)
|
|
191
|
+
# still passes core validation
|
|
192
|
+
out="$TMP_DIR/knowledge-core.out"
|
|
193
|
+
run_inspect "$ROOT/kits/knowledge" "$out" || true
|
|
194
|
+
k0=$(node -e "const d=require('fs').readFileSync('$out','utf8'); console.log(JSON.parse(d).conformance.k0)" 2>/dev/null)
|
|
195
|
+
if [[ "$k0" == "true" ]]; then
|
|
196
|
+
pass "knowledge kit: agent extension fields stripped, core container valid (degradation invariant)"
|
|
197
|
+
else
|
|
198
|
+
fail "knowledge kit: degradation invariant violated — k0 should be true"
|
|
199
|
+
cat "$out"
|
|
200
|
+
fi
|
|
201
|
+
|
|
202
|
+
# ===================================================================
|
|
203
|
+
echo ""
|
|
204
|
+
if [[ "$errors" -eq 0 ]]; then
|
|
205
|
+
echo "Kit conformance level checks passed."
|
|
206
|
+
exit 0
|
|
207
|
+
fi
|
|
208
|
+
echo "Kit conformance level checks failed: $errors issue(s)."
|
|
209
|
+
exit 1
|
package/evals/run.sh
CHANGED
|
@@ -192,6 +192,8 @@ run_integration() {
|
|
|
192
192
|
bash "$EVAL_DIR/integration/test_bundle_install.sh" || result=1
|
|
193
193
|
echo ""
|
|
194
194
|
bash "$EVAL_DIR/integration/test_bundle_lifecycle.sh" || result=1
|
|
195
|
+
echo ""
|
|
196
|
+
bash "$EVAL_DIR/integration/test_kit_conformance_levels.sh" || result=1
|
|
195
197
|
return $result
|
|
196
198
|
}
|
|
197
199
|
|
|
@@ -286,6 +286,16 @@ else
|
|
|
286
286
|
_fail "opencode bundle missing opencode.json"
|
|
287
287
|
fi
|
|
288
288
|
|
|
289
|
+
# Root AGENTS.md carries a hand-maintained "Repository Conventions" section
|
|
290
|
+
# (commit/release rules for agents working in THIS repo). The rest of the
|
|
291
|
+
# file mirrors generated bundle output; this pin prevents a regeneration
|
|
292
|
+
# sync from silently dropping the repo-specific section.
|
|
293
|
+
if grep -q "## Repository Conventions (source repo only)" "$ROOT_DIR/AGENTS.md" 2>/dev/null && grep -q "release-please" "$ROOT_DIR/AGENTS.md" 2>/dev/null; then
|
|
294
|
+
_pass "root AGENTS.md retains the Repository Conventions section"
|
|
295
|
+
else
|
|
296
|
+
_fail "root AGENTS.md is missing the Repository Conventions section (regeneration clobbered it?)"
|
|
297
|
+
fi
|
|
298
|
+
|
|
289
299
|
# Generated hook artifacts must PARSE in their host language. The pi live
|
|
290
300
|
# smoke (2026-06-11) caught the generator emitting an unterminated string
|
|
291
301
|
# (template-literal escaping) that pi's loader rejected at startup.
|
package/kits/catalog.json
CHANGED
|
@@ -12,6 +12,12 @@
|
|
|
12
12
|
"name": "Knowledge Kit",
|
|
13
13
|
"path": "kits/knowledge",
|
|
14
14
|
"description": "Store contract with record types (raw/compiled/concept), mutation operations with required provenance, default markdown+frontmatter+wikilink+graph-index adapter, and a parameterized contract test suite."
|
|
15
|
+
},
|
|
16
|
+
{
|
|
17
|
+
"id": "release-evidence",
|
|
18
|
+
"name": "Release Evidence Kit",
|
|
19
|
+
"path": "kits/release-evidence",
|
|
20
|
+
"description": "Minimal flows-only kit for proving agentless gate evaluation over surface claims in CI. One gate expects a trusted release.evidence claim."
|
|
15
21
|
}
|
|
16
22
|
]
|
|
17
23
|
}
|
|
@@ -167,9 +167,19 @@ function serializeYaml(obj, indent = 0) {
|
|
|
167
167
|
const entries = Object.entries(item).filter(([, v]) => v !== undefined && v !== null);
|
|
168
168
|
if (entries.length === 0) { lines.push(`${pad} - {}`); continue; }
|
|
169
169
|
const [firstKey, firstVal] = entries[0];
|
|
170
|
-
|
|
170
|
+
if (typeof firstVal === "object" && firstVal !== null && !Array.isArray(firstVal)) {
|
|
171
|
+
lines.push(`${pad} - ${firstKey}:`);
|
|
172
|
+
lines.push(serializeYaml(firstVal, indent + 6));
|
|
173
|
+
} else {
|
|
174
|
+
lines.push(`${pad} - ${firstKey}: ${yamlScalar(firstVal)}`);
|
|
175
|
+
}
|
|
171
176
|
for (const [k, v] of entries.slice(1)) {
|
|
172
|
-
|
|
177
|
+
if (typeof v === "object" && v !== null && !Array.isArray(v)) {
|
|
178
|
+
lines.push(`${pad} ${k}:`);
|
|
179
|
+
lines.push(serializeYaml(v, indent + 6));
|
|
180
|
+
} else {
|
|
181
|
+
lines.push(`${pad} ${k}: ${yamlScalar(v)}`);
|
|
182
|
+
}
|
|
173
183
|
}
|
|
174
184
|
} else {
|
|
175
185
|
lines.push(`${pad} - ${yamlScalar(item)}`);
|
|
@@ -291,9 +301,17 @@ function removeLinksFromGraph(graph, sourceId) {
|
|
|
291
301
|
// Validation helpers
|
|
292
302
|
// ---------------------------------------------------------------------------
|
|
293
303
|
|
|
294
|
-
const VALID_TYPES = new Set(["raw", "compiled", "concept", "snapshot"]);
|
|
304
|
+
const VALID_TYPES = new Set(["raw", "compiled", "concept", "snapshot", "person"]);
|
|
305
|
+
const VALID_STATUSES = new Set(["active", "implemented", "retired"]);
|
|
295
306
|
const CATEGORY_SEGMENT_RE = /^[a-z0-9_-]+$/;
|
|
296
307
|
|
|
308
|
+
// Status transition table: from → allowed targets
|
|
309
|
+
const VALID_STATUS_TRANSITIONS = {
|
|
310
|
+
active: new Set(["implemented", "retired"]),
|
|
311
|
+
implemented: new Set(["retired"]),
|
|
312
|
+
retired: new Set(), // terminal — no further transitions
|
|
313
|
+
};
|
|
314
|
+
|
|
297
315
|
function validateCategory(cat) {
|
|
298
316
|
if (!cat || typeof cat !== "string") return false;
|
|
299
317
|
return cat.split(".").every((seg) => CATEGORY_SEGMENT_RE.test(seg));
|
|
@@ -349,7 +367,7 @@ export class DefaultKnowledgeStore {
|
|
|
349
367
|
// Required field enforcement
|
|
350
368
|
if (!input.type) throw missingEvidenceError("create: missing required field: type");
|
|
351
369
|
if (!VALID_TYPES.has(input.type))
|
|
352
|
-
throw missingEvidenceError(`create: type must be raw, compiled, concept,
|
|
370
|
+
throw missingEvidenceError(`create: type must be one of raw, compiled, concept, snapshot, person; got: ${input.type}`);
|
|
353
371
|
if (!input.title || !input.title.trim())
|
|
354
372
|
throw missingEvidenceError("create: missing required field: title");
|
|
355
373
|
if (!input.body && input.body !== "")
|
|
@@ -376,6 +394,7 @@ export class DefaultKnowledgeStore {
|
|
|
376
394
|
title: input.title,
|
|
377
395
|
category: input.category,
|
|
378
396
|
tags: input.tags || [],
|
|
397
|
+
status: "active",
|
|
379
398
|
created_at: now,
|
|
380
399
|
updated_at: now,
|
|
381
400
|
provenance: {
|
|
@@ -526,8 +545,7 @@ export class DefaultKnowledgeStore {
|
|
|
526
545
|
|
|
527
546
|
const concept = this._readRecord(conceptId);
|
|
528
547
|
if (!concept) throw notFoundError(conceptId);
|
|
529
|
-
|
|
530
|
-
throw missingEvidenceError(`propose: concept_id must reference a concept or snapshot record; got type: ${concept.type}`);
|
|
548
|
+
// Any record type may receive a proposal (retire flow uses this for all types)
|
|
531
549
|
|
|
532
550
|
const proposer = this._readRecord(proposerId);
|
|
533
551
|
if (!proposer) throw notFoundError(proposerId);
|
|
@@ -594,8 +612,7 @@ export class DefaultKnowledgeStore {
|
|
|
594
612
|
|
|
595
613
|
const concept = this._readRecord(conceptId);
|
|
596
614
|
if (!concept) throw notFoundError(conceptId);
|
|
597
|
-
|
|
598
|
-
throw missingEvidenceError(`apply: concept_id must reference a concept or snapshot record; got type: ${concept.type}`);
|
|
615
|
+
// Any record type may be the apply target
|
|
599
616
|
|
|
600
617
|
const proposer = this._readRecord(proposerId);
|
|
601
618
|
if (!proposer) throw notFoundError(proposerId);
|
|
@@ -637,8 +654,7 @@ export class DefaultKnowledgeStore {
|
|
|
637
654
|
|
|
638
655
|
const concept = this._readRecord(conceptId);
|
|
639
656
|
if (!concept) throw notFoundError(conceptId);
|
|
640
|
-
|
|
641
|
-
throw missingEvidenceError(`reject: concept_id must reference a concept or snapshot record; got type: ${concept.type}`);
|
|
657
|
+
// Any record type may be the reject target
|
|
642
658
|
|
|
643
659
|
const proposer = this._readRecord(proposerId);
|
|
644
660
|
if (!proposer) throw notFoundError(proposerId);
|
|
@@ -759,6 +775,59 @@ export class DefaultKnowledgeStore {
|
|
|
759
775
|
}
|
|
760
776
|
|
|
761
777
|
|
|
778
|
+
// -------------------------------------------------------------------------
|
|
779
|
+
// retire (Addendum B — S7)
|
|
780
|
+
// -------------------------------------------------------------------------
|
|
781
|
+
|
|
782
|
+
async retire(id, targetStatus, evidence) {
|
|
783
|
+
if (!evidence?.agent)
|
|
784
|
+
throw missingEvidenceError("retire: missing required evidence field: agent");
|
|
785
|
+
if (!evidence?.rationale || !evidence.rationale.trim())
|
|
786
|
+
throw missingEvidenceError("retire: missing required evidence field: rationale");
|
|
787
|
+
if (targetStatus !== "implemented" && targetStatus !== "retired")
|
|
788
|
+
throw missingEvidenceError(
|
|
789
|
+
`retire: targetStatus must be "implemented" or "retired"; got: ${targetStatus}`
|
|
790
|
+
);
|
|
791
|
+
if (targetStatus === "implemented" && (!evidence.implementedByRef || !evidence.implementedByRef.trim()))
|
|
792
|
+
throw missingEvidenceError(
|
|
793
|
+
'retire: implementedByRef is required when targetStatus is "implemented"'
|
|
794
|
+
);
|
|
795
|
+
|
|
796
|
+
const record = this._readRecord(id);
|
|
797
|
+
if (!record) throw notFoundError(id);
|
|
798
|
+
|
|
799
|
+
const currentStatus = record.status || "active";
|
|
800
|
+
const allowed = VALID_STATUS_TRANSITIONS[currentStatus];
|
|
801
|
+
if (!allowed || !allowed.has(targetStatus)) {
|
|
802
|
+
throw missingEvidenceError(
|
|
803
|
+
`retire: invalid transition from "${currentStatus}" to "${targetStatus}"`
|
|
804
|
+
);
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
const now = this._now();
|
|
808
|
+
const updated = {
|
|
809
|
+
...record,
|
|
810
|
+
status: targetStatus,
|
|
811
|
+
updated_at: now,
|
|
812
|
+
mutation_log: [
|
|
813
|
+
...(record.mutation_log || []),
|
|
814
|
+
{
|
|
815
|
+
op: "retire",
|
|
816
|
+
at: now,
|
|
817
|
+
agent: evidence.agent,
|
|
818
|
+
...(evidence.note ? { note: evidence.note } : {}),
|
|
819
|
+
evidence: {
|
|
820
|
+
targetStatus,
|
|
821
|
+
rationale: evidence.rationale,
|
|
822
|
+
...(evidence.implementedByRef ? { implementedByRef: evidence.implementedByRef } : {}),
|
|
823
|
+
...(evidence.supersededByRef ? { supersededByRef: evidence.supersededByRef } : {}),
|
|
824
|
+
},
|
|
825
|
+
},
|
|
826
|
+
],
|
|
827
|
+
};
|
|
828
|
+
this._writeRecord(updated);
|
|
829
|
+
}
|
|
830
|
+
|
|
762
831
|
// -------------------------------------------------------------------------
|
|
763
832
|
// get
|
|
764
833
|
// -------------------------------------------------------------------------
|
|
@@ -785,20 +854,32 @@ export class DefaultKnowledgeStore {
|
|
|
785
854
|
|
|
786
855
|
async listByCategory(category, options = {}) {
|
|
787
856
|
const records = this._allRecords();
|
|
857
|
+
const includeRetired = options.includeRetired === true;
|
|
788
858
|
if (options.prefix) {
|
|
789
859
|
return records.filter(
|
|
790
|
-
(r) =>
|
|
860
|
+
(r) =>
|
|
861
|
+
(r.category === category || r.category.startsWith(`${category}.`)) &&
|
|
862
|
+
(includeRetired || (r.status || "active") !== "retired")
|
|
791
863
|
);
|
|
792
864
|
}
|
|
793
|
-
return records.filter(
|
|
865
|
+
return records.filter(
|
|
866
|
+
(r) =>
|
|
867
|
+
r.category === category &&
|
|
868
|
+
(includeRetired || (r.status || "active") !== "retired")
|
|
869
|
+
);
|
|
794
870
|
}
|
|
795
871
|
|
|
796
872
|
// -------------------------------------------------------------------------
|
|
797
873
|
// listByType
|
|
798
874
|
// -------------------------------------------------------------------------
|
|
799
875
|
|
|
800
|
-
async listByType(type) {
|
|
801
|
-
|
|
876
|
+
async listByType(type, options = {}) {
|
|
877
|
+
const includeRetired = options.includeRetired === true;
|
|
878
|
+
return this._allRecords().filter(
|
|
879
|
+
(r) =>
|
|
880
|
+
r.type === type &&
|
|
881
|
+
(includeRetired || (r.status || "active") !== "retired")
|
|
882
|
+
);
|
|
802
883
|
}
|
|
803
884
|
|
|
804
885
|
// -------------------------------------------------------------------------
|