@heytherevibin/skillforge 0.7.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/CONTRIBUTING.md +30 -19
- package/README.md +248 -198
- package/RELEASING.md +19 -7
- package/SECURITY.md +61 -13
- package/STRATEGY.md +40 -14
- package/bin/cli.js +112 -5
- package/ci/bundle-gate.json +4 -0
- package/lib/host-setup.js +312 -0
- package/lib/templates/claude-code-skillforge-global.md +19 -0
- package/lib/templates/cursor-skillforge-global.md +16 -0
- package/package.json +3 -2
- package/python/app/eval_cli.py +133 -0
- package/python/app/feedback_meta.py +96 -0
- package/python/app/health_cli.py +160 -0
- package/python/app/main.py +502 -26
- package/python/app/materialize.py +72 -4
- package/python/app/mcp_contract.py +13 -1
- package/python/app/mcp_server.py +344 -25
- package/python/app/route_cli.py +32 -13
- package/python/app/route_eval_harness.py +98 -0
- package/python/app/route_policies.py +243 -0
- package/python/app/route_quality.py +99 -0
- package/python/app/routing_signals.py +155 -0
- package/python/app/weights_cli.py +152 -0
- package/python/fixtures/route_eval/smoke.json +18 -0
- package/python/requirements.txt +1 -0
- package/python/tests/test_feedback_weights.py +77 -0
- package/python/tests/test_materialize.py +51 -0
- package/python/tests/test_mcp_contract.py +117 -0
- package/python/tests/test_route_eval_harness.py +45 -0
- package/python/tests/test_route_policies.py +115 -0
- package/python/tests/test_route_quality.py +120 -0
- package/python/tests/test_routing_overlay.py +55 -0
- package/python/tests/test_routing_signals.py +112 -0
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
"""Export / import per-user skill_weights rows (JSON snapshot)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import argparse
|
|
5
|
+
import json
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
from app.db_paths import resolve_orchestrator_db
|
|
11
|
+
from app.main import init_db
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def _parse_args(argv: list[str] | None) -> argparse.Namespace:
|
|
15
|
+
p = argparse.ArgumentParser(description="Export or import learned skill_weights (uses, thumbs, routing bias).")
|
|
16
|
+
sub = p.add_subparsers(dest="cmd", required=True)
|
|
17
|
+
|
|
18
|
+
ex = sub.add_parser("export", help="Dump skill_weights rows to JSON (stdout unless -o).")
|
|
19
|
+
ex.add_argument("-o", "--output", type=Path, default=None, help="Output file (default: stdout).")
|
|
20
|
+
ex.add_argument("--user-id", default="", help="Logical user id (default '' = global row set).")
|
|
21
|
+
ex.add_argument(
|
|
22
|
+
"--project-root",
|
|
23
|
+
default="",
|
|
24
|
+
help="Resolve DB from <root>/.skillforge/orchestrator.db (else env / global).",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
im = sub.add_parser("import", help="Load JSON snapshot into skill_weights.")
|
|
28
|
+
im.add_argument("file", type=Path, help="JSON file from skillforge weights export.")
|
|
29
|
+
im.add_argument(
|
|
30
|
+
"--user-id",
|
|
31
|
+
default=None,
|
|
32
|
+
help="Override user_id for all imported rows (default: use file's user_id).",
|
|
33
|
+
)
|
|
34
|
+
im.add_argument(
|
|
35
|
+
"--project-root",
|
|
36
|
+
default="",
|
|
37
|
+
help="Target DB path (same as export).",
|
|
38
|
+
)
|
|
39
|
+
im.add_argument(
|
|
40
|
+
"--replace-user",
|
|
41
|
+
action="store_true",
|
|
42
|
+
help="Delete existing rows for the target user_id before import.",
|
|
43
|
+
)
|
|
44
|
+
return p.parse_args(argv)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def export_weights(con, user_id: str) -> dict:
|
|
48
|
+
cur = con.execute(
|
|
49
|
+
"""
|
|
50
|
+
SELECT skill_name, weight, uses, referenced, thumbs_up, thumbs_down, disabled, updated_at
|
|
51
|
+
FROM skill_weights WHERE user_id = ? ORDER BY skill_name
|
|
52
|
+
""",
|
|
53
|
+
(user_id,),
|
|
54
|
+
)
|
|
55
|
+
rows = []
|
|
56
|
+
for r in cur.fetchall():
|
|
57
|
+
rows.append({
|
|
58
|
+
"skill_name": r[0],
|
|
59
|
+
"weight": float(r[1]),
|
|
60
|
+
"uses": int(r[2]),
|
|
61
|
+
"referenced": int(r[3]),
|
|
62
|
+
"thumbs_up": int(r[4]),
|
|
63
|
+
"thumbs_down": int(r[5]),
|
|
64
|
+
"disabled": int(r[6]),
|
|
65
|
+
"updated_at": float(r[7]) if r[7] is not None else None,
|
|
66
|
+
})
|
|
67
|
+
return {"version": 1, "user_id": user_id, "exported_at": time.time(), "rows": rows}
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def import_weights(con, data: dict, *, user_id_override: str | None, replace_user: bool) -> int:
|
|
71
|
+
if not isinstance(data, dict):
|
|
72
|
+
raise ValueError("root must be object")
|
|
73
|
+
rows = data.get("rows")
|
|
74
|
+
if not isinstance(rows, list):
|
|
75
|
+
raise ValueError("rows must be array")
|
|
76
|
+
uid = user_id_override if user_id_override is not None else str(data.get("user_id") or "")
|
|
77
|
+
if replace_user:
|
|
78
|
+
con.execute("DELETE FROM skill_weights WHERE user_id = ?", (uid,))
|
|
79
|
+
n = 0
|
|
80
|
+
now = time.time()
|
|
81
|
+
for raw in rows:
|
|
82
|
+
if not isinstance(raw, dict):
|
|
83
|
+
continue
|
|
84
|
+
name = raw.get("skill_name")
|
|
85
|
+
if not name or not isinstance(name, str):
|
|
86
|
+
continue
|
|
87
|
+
con.execute(
|
|
88
|
+
"""
|
|
89
|
+
INSERT INTO skill_weights
|
|
90
|
+
(user_id, skill_name, weight, uses, referenced, thumbs_up, thumbs_down, disabled, updated_at)
|
|
91
|
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
92
|
+
ON CONFLICT(user_id, skill_name) DO UPDATE SET
|
|
93
|
+
weight = excluded.weight,
|
|
94
|
+
uses = excluded.uses,
|
|
95
|
+
referenced = excluded.referenced,
|
|
96
|
+
thumbs_up = excluded.thumbs_up,
|
|
97
|
+
thumbs_down = excluded.thumbs_down,
|
|
98
|
+
disabled = excluded.disabled,
|
|
99
|
+
updated_at = excluded.updated_at
|
|
100
|
+
""",
|
|
101
|
+
(
|
|
102
|
+
uid,
|
|
103
|
+
name,
|
|
104
|
+
float(raw.get("weight", 0.0)),
|
|
105
|
+
int(raw.get("uses", 0)),
|
|
106
|
+
int(raw.get("referenced", 0)),
|
|
107
|
+
int(raw.get("thumbs_up", 0)),
|
|
108
|
+
int(raw.get("thumbs_down", 0)),
|
|
109
|
+
int(raw.get("disabled", 0)),
|
|
110
|
+
float(raw.get("updated_at") or now),
|
|
111
|
+
),
|
|
112
|
+
)
|
|
113
|
+
n += 1
|
|
114
|
+
con.commit()
|
|
115
|
+
return n
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def main(argv: list[str] | None = None) -> None:
|
|
119
|
+
args = _parse_args(argv)
|
|
120
|
+
pr = (getattr(args, "project_root", "") or "").strip() or None
|
|
121
|
+
db_path = resolve_orchestrator_db(pr)
|
|
122
|
+
con = init_db(db_path)
|
|
123
|
+
try:
|
|
124
|
+
if args.cmd == "export":
|
|
125
|
+
payload = export_weights(con, args.user_id)
|
|
126
|
+
text = json.dumps(payload, indent=2)
|
|
127
|
+
if args.output:
|
|
128
|
+
args.output.write_text(text + "\n", encoding="utf-8")
|
|
129
|
+
print(f"Wrote {len(payload['rows'])} rows → {args.output}", file=sys.stderr)
|
|
130
|
+
else:
|
|
131
|
+
print(text)
|
|
132
|
+
raise SystemExit(0)
|
|
133
|
+
if args.cmd == "import":
|
|
134
|
+
path = args.file.expanduser().resolve()
|
|
135
|
+
if not path.is_file():
|
|
136
|
+
print(f"skillforge weights import: not found {path}", file=sys.stderr)
|
|
137
|
+
raise SystemExit(2)
|
|
138
|
+
data = json.loads(path.read_text(encoding="utf-8"))
|
|
139
|
+
n = import_weights(
|
|
140
|
+
con,
|
|
141
|
+
data,
|
|
142
|
+
user_id_override=args.user_id,
|
|
143
|
+
replace_user=bool(args.replace_user),
|
|
144
|
+
)
|
|
145
|
+
print(f"Imported {n} row(s) into {db_path}", file=sys.stderr)
|
|
146
|
+
raise SystemExit(0)
|
|
147
|
+
finally:
|
|
148
|
+
con.close()
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
if __name__ == "__main__":
|
|
152
|
+
main()
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": 1,
|
|
3
|
+
"defaults": {
|
|
4
|
+
"candidate_window": 22
|
|
5
|
+
},
|
|
6
|
+
"cases": [
|
|
7
|
+
{
|
|
8
|
+
"id": "python-testing",
|
|
9
|
+
"prompt": "pytest fixtures caplog and monkeypatch for an API integration test",
|
|
10
|
+
"expect_in_candidates": ["python-testing"]
|
|
11
|
+
},
|
|
12
|
+
{
|
|
13
|
+
"id": "docker-patterns",
|
|
14
|
+
"prompt": "docker compose healthcheck restart policy and rollout",
|
|
15
|
+
"expect_in_candidates": ["docker-patterns"]
|
|
16
|
+
}
|
|
17
|
+
]
|
|
18
|
+
}
|
package/python/requirements.txt
CHANGED
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Tests for feedback_effect snapshot and weights export/import."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from app.feedback_meta import build_feedback_effect, get_skill_weight_detail
|
|
5
|
+
from app.main import init_db, update_skill_stat
|
|
6
|
+
from app.weights_cli import export_weights, import_weights
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_get_skill_weight_detail_missing(tmp_path) -> None:
|
|
10
|
+
con = init_db(tmp_path / "a.db")
|
|
11
|
+
assert get_skill_weight_detail(con, "nope", "") is None
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def test_build_feedback_effect_after_use(tmp_path) -> None:
|
|
15
|
+
con = init_db(tmp_path / "b.db")
|
|
16
|
+
update_skill_stat(con, "alpha", "uses", 1, user_id="")
|
|
17
|
+
update_skill_stat(con, "alpha", "thumbs_up", 1, user_id="")
|
|
18
|
+
fe = build_feedback_effect(con, ["alpha", "beta"], user_id="")
|
|
19
|
+
assert fe["schema"] == "feedback_effect/1"
|
|
20
|
+
assert len(fe["picked"]) == 2
|
|
21
|
+
alpha = next(p for p in fe["picked"] if p["skill"] == "alpha")
|
|
22
|
+
assert alpha["has_db_row"] is True
|
|
23
|
+
assert alpha["uses"] >= 1
|
|
24
|
+
beta = next(p for p in fe["picked"] if p["skill"] == "beta")
|
|
25
|
+
assert beta["has_db_row"] is False
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_weights_export_import_roundtrip(tmp_path) -> None:
|
|
29
|
+
db1 = tmp_path / "w1.db"
|
|
30
|
+
db2 = tmp_path / "w2.db"
|
|
31
|
+
con = init_db(db1)
|
|
32
|
+
update_skill_stat(con, "x-skill", "uses", 2, user_id="u1")
|
|
33
|
+
update_skill_stat(con, "x-skill", "referenced", 1, user_id="u1")
|
|
34
|
+
con.close()
|
|
35
|
+
|
|
36
|
+
con = init_db(db1)
|
|
37
|
+
blob = export_weights(con, "u1")
|
|
38
|
+
con.close()
|
|
39
|
+
assert any(r["skill_name"] == "x-skill" for r in blob["rows"])
|
|
40
|
+
|
|
41
|
+
con2 = init_db(db2)
|
|
42
|
+
n = import_weights(con2, blob, user_id_override=None, replace_user=False)
|
|
43
|
+
con2.close()
|
|
44
|
+
assert n >= 1
|
|
45
|
+
|
|
46
|
+
con2 = init_db(db2)
|
|
47
|
+
cur = con2.execute(
|
|
48
|
+
"SELECT uses, referenced FROM skill_weights WHERE user_id = ? AND skill_name = ?",
|
|
49
|
+
("u1", "x-skill"),
|
|
50
|
+
)
|
|
51
|
+
row = cur.fetchone()
|
|
52
|
+
con2.close()
|
|
53
|
+
assert row is not None
|
|
54
|
+
assert int(row[0]) == 2
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_weights_import_replace_user(tmp_path) -> None:
|
|
58
|
+
db = tmp_path / "w3.db"
|
|
59
|
+
con = init_db(db)
|
|
60
|
+
update_skill_stat(con, "a", "uses", 1, user_id="")
|
|
61
|
+
update_skill_stat(con, "b", "uses", 1, user_id="")
|
|
62
|
+
con.close()
|
|
63
|
+
|
|
64
|
+
payload = {
|
|
65
|
+
"version": 1,
|
|
66
|
+
"user_id": "",
|
|
67
|
+
"rows": [{"skill_name": "only", "weight": 0.0, "uses": 1, "referenced": 0, "thumbs_up": 0, "thumbs_down": 0, "disabled": 0}],
|
|
68
|
+
}
|
|
69
|
+
con = init_db(db)
|
|
70
|
+
import_weights(con, payload, user_id_override="", replace_user=True)
|
|
71
|
+
con.close()
|
|
72
|
+
|
|
73
|
+
con = init_db(db)
|
|
74
|
+
cur = con.execute("SELECT skill_name FROM skill_weights WHERE user_id = '' ORDER BY skill_name")
|
|
75
|
+
names = [r[0] for r in cur.fetchall()]
|
|
76
|
+
con.close()
|
|
77
|
+
assert names == ["only"]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Tests for project bootstrap file writes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from app.materialize import materialize_project_files
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def test_materialize_writes_cursor_command(tmp_path: Path) -> None:
|
|
11
|
+
root = tmp_path / "proj"
|
|
12
|
+
root.mkdir()
|
|
13
|
+
out = materialize_project_files(
|
|
14
|
+
str(root),
|
|
15
|
+
["alpha"],
|
|
16
|
+
{"alpha": "desc"},
|
|
17
|
+
merge=True,
|
|
18
|
+
)
|
|
19
|
+
rel = {Path(p).as_posix() for p in out["written"]}
|
|
20
|
+
assert ".cursor/commands/skillforge.md" in rel
|
|
21
|
+
assert ".claude/commands/skillforge.md" in rel
|
|
22
|
+
cc = root / ".claude" / "commands" / "skillforge.md"
|
|
23
|
+
assert cc.is_file()
|
|
24
|
+
cct = cc.read_text(encoding="utf-8")
|
|
25
|
+
assert "route_skills" in cct
|
|
26
|
+
assert "alpha" in cct
|
|
27
|
+
cur = root / ".cursor" / "commands" / "skillforge.md"
|
|
28
|
+
assert "alpha" in cur.read_text(encoding="utf-8")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_materialize_merge_false_skips_existing_command(tmp_path: Path) -> None:
|
|
32
|
+
root = tmp_path / "proj"
|
|
33
|
+
root.mkdir()
|
|
34
|
+
cmd = root / ".cursor" / "commands" / "skillforge.md"
|
|
35
|
+
cmd.parent.mkdir(parents=True)
|
|
36
|
+
cmd.write_text("keep-me", encoding="utf-8")
|
|
37
|
+
rule = root / ".cursor" / "rules" / "skillforge.mdc"
|
|
38
|
+
rule.parent.mkdir(parents=True, exist_ok=True)
|
|
39
|
+
rule.write_text("keep-rule", encoding="utf-8")
|
|
40
|
+
ccmd = root / ".claude" / "commands" / "skillforge.md"
|
|
41
|
+
ccmd.parent.mkdir(parents=True, exist_ok=True)
|
|
42
|
+
ccmd.write_text("keep-cc", encoding="utf-8")
|
|
43
|
+
materialize_project_files(
|
|
44
|
+
str(root),
|
|
45
|
+
["b"],
|
|
46
|
+
{},
|
|
47
|
+
merge=False,
|
|
48
|
+
)
|
|
49
|
+
assert cmd.read_text(encoding="utf-8") == "keep-me"
|
|
50
|
+
assert rule.read_text(encoding="utf-8") == "keep-rule"
|
|
51
|
+
assert ccmd.read_text(encoding="utf-8") == "keep-cc"
|
|
@@ -135,3 +135,120 @@ def test_build_route_skills_meta_error_field() -> None:
|
|
|
135
135
|
)
|
|
136
136
|
assert meta["error"] == "empty_prompt"
|
|
137
137
|
assert meta["sources"] == []
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_build_route_skills_meta_includes_route_quality() -> None:
|
|
141
|
+
rq = {"schema": "route_quality/1", "picked_count": 1}
|
|
142
|
+
meta = build_route_skills_meta(
|
|
143
|
+
result={
|
|
144
|
+
"candidates": [],
|
|
145
|
+
"session_id": "s",
|
|
146
|
+
"rerouted": False,
|
|
147
|
+
"change": 0.0,
|
|
148
|
+
"route_ms": 1.0,
|
|
149
|
+
"route_quality": rq,
|
|
150
|
+
},
|
|
151
|
+
picked_names=[],
|
|
152
|
+
user_id="u",
|
|
153
|
+
db_path="db.sqlite",
|
|
154
|
+
skills_map={},
|
|
155
|
+
response_text="x",
|
|
156
|
+
)
|
|
157
|
+
assert meta.get("route_quality") == rq
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def test_build_route_skills_meta_ignores_non_dict_route_quality() -> None:
|
|
161
|
+
meta = build_route_skills_meta(
|
|
162
|
+
result={
|
|
163
|
+
"candidates": [],
|
|
164
|
+
"session_id": "s",
|
|
165
|
+
"rerouted": False,
|
|
166
|
+
"change": 0.0,
|
|
167
|
+
"route_ms": 1.0,
|
|
168
|
+
"route_quality": "not-a-dict",
|
|
169
|
+
},
|
|
170
|
+
picked_names=[],
|
|
171
|
+
user_id="u",
|
|
172
|
+
db_path="db.sqlite",
|
|
173
|
+
skills_map={},
|
|
174
|
+
response_text="x",
|
|
175
|
+
)
|
|
176
|
+
assert "route_quality" not in meta
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def test_build_route_skills_meta_includes_feedback_effect() -> None:
|
|
180
|
+
fb = {"schema": "feedback_effect/1", "picked": []}
|
|
181
|
+
meta = build_route_skills_meta(
|
|
182
|
+
result={
|
|
183
|
+
"candidates": [],
|
|
184
|
+
"session_id": "s",
|
|
185
|
+
"rerouted": False,
|
|
186
|
+
"change": 0.0,
|
|
187
|
+
"route_ms": 1.0,
|
|
188
|
+
"feedback_effect": fb,
|
|
189
|
+
},
|
|
190
|
+
picked_names=[],
|
|
191
|
+
user_id="u",
|
|
192
|
+
db_path="db.sqlite",
|
|
193
|
+
skills_map={},
|
|
194
|
+
response_text="x",
|
|
195
|
+
)
|
|
196
|
+
assert meta.get("feedback_effect") == fb
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def test_build_route_skills_meta_ignores_non_dict_feedback_effect() -> None:
|
|
200
|
+
meta = build_route_skills_meta(
|
|
201
|
+
result={
|
|
202
|
+
"candidates": [],
|
|
203
|
+
"session_id": "s",
|
|
204
|
+
"rerouted": False,
|
|
205
|
+
"change": 0.0,
|
|
206
|
+
"route_ms": 1.0,
|
|
207
|
+
"feedback_effect": [1, 2],
|
|
208
|
+
},
|
|
209
|
+
picked_names=[],
|
|
210
|
+
user_id="u",
|
|
211
|
+
db_path="db.sqlite",
|
|
212
|
+
skills_map={},
|
|
213
|
+
response_text="x",
|
|
214
|
+
)
|
|
215
|
+
assert "feedback_effect" not in meta
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def test_build_route_skills_meta_includes_routing_overlay() -> None:
|
|
219
|
+
ro = {"schema": "routing_overlay/1", "exclude_skills": ["x"]}
|
|
220
|
+
meta = build_route_skills_meta(
|
|
221
|
+
result={
|
|
222
|
+
"candidates": [],
|
|
223
|
+
"session_id": "s",
|
|
224
|
+
"rerouted": False,
|
|
225
|
+
"change": 0.0,
|
|
226
|
+
"route_ms": 1.0,
|
|
227
|
+
"routing_overlay": ro,
|
|
228
|
+
},
|
|
229
|
+
picked_names=[],
|
|
230
|
+
user_id="u",
|
|
231
|
+
db_path="db.sqlite",
|
|
232
|
+
skills_map={},
|
|
233
|
+
response_text="x",
|
|
234
|
+
)
|
|
235
|
+
assert meta.get("routing_overlay") == ro
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def test_build_route_skills_meta_ignores_non_dict_routing_overlay() -> None:
|
|
239
|
+
meta = build_route_skills_meta(
|
|
240
|
+
result={
|
|
241
|
+
"candidates": [],
|
|
242
|
+
"session_id": "s",
|
|
243
|
+
"rerouted": False,
|
|
244
|
+
"change": 0.0,
|
|
245
|
+
"route_ms": 1.0,
|
|
246
|
+
"routing_overlay": "bad",
|
|
247
|
+
},
|
|
248
|
+
picked_names=[],
|
|
249
|
+
user_id="u",
|
|
250
|
+
db_path="db.sqlite",
|
|
251
|
+
skills_map={},
|
|
252
|
+
response_text="x",
|
|
253
|
+
)
|
|
254
|
+
assert "routing_overlay" not in meta
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Unit tests for route eval fixture matcher (no embedding load)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from types import SimpleNamespace
|
|
5
|
+
|
|
6
|
+
from app.route_eval_harness import evaluate_case_result, load_eval_fixture
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _cands(names: list[str]) -> list:
|
|
10
|
+
return [(SimpleNamespace(name=n), 0.9) for n in names]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_evaluate_case_expect_in_candidates() -> None:
|
|
14
|
+
r = {"candidates": _cands(["a", "b", "python-testing"]), "picked_names": ["a"]}
|
|
15
|
+
case = {"id": "t", "prompt": "x", "expect_in_candidates": ["python-testing"]}
|
|
16
|
+
assert evaluate_case_result(r, case, defaults={"candidate_window": 10}) == []
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def test_evaluate_case_missing_candidate() -> None:
|
|
20
|
+
r = {"candidates": _cands(["x", "y"]), "picked_names": ["x"]}
|
|
21
|
+
case = {"id": "t", "prompt": "x", "expect_in_candidates": ["python-testing"]}
|
|
22
|
+
err = evaluate_case_result(r, case, defaults={"candidate_window": 5})
|
|
23
|
+
assert err and "python-testing" in err[0]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_evaluate_picked_any() -> None:
|
|
27
|
+
r = {"candidates": _cands(["a", "b"]), "picked_names": ["b"]}
|
|
28
|
+
case = {"id": "t", "expect_picked_any": ["b"]}
|
|
29
|
+
assert evaluate_case_result(r, case) == []
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_host_shortlist_fails() -> None:
|
|
33
|
+
r = {"host_pick_shortlist": True, "candidates": [], "picked_names": []}
|
|
34
|
+
err = evaluate_case_result(r, {"id": "h"}, defaults={})
|
|
35
|
+
assert any("host shortlist" in e for e in err)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_load_fixture(tmp_path) -> None:
|
|
39
|
+
p = tmp_path / "f.json"
|
|
40
|
+
p.write_text(
|
|
41
|
+
'{"version":1,"cases":[{"prompt":"hi","expect_in_candidates":["z"]}]}',
|
|
42
|
+
encoding="utf-8",
|
|
43
|
+
)
|
|
44
|
+
data = load_eval_fixture(p)
|
|
45
|
+
assert len(data["cases"]) == 1
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
"""Tests for route policy loading and merge."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import pytest
|
|
5
|
+
|
|
6
|
+
from app.main import Skill, init_db
|
|
7
|
+
from app.route_policies import load_route_policies_config, merge_policy_includes
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@pytest.fixture
|
|
11
|
+
def skill_alpha() -> Skill:
|
|
12
|
+
return Skill(
|
|
13
|
+
name="alpha-skill",
|
|
14
|
+
title="Alpha",
|
|
15
|
+
description="test",
|
|
16
|
+
body="body",
|
|
17
|
+
source="bundled",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_merge_adds_on_regex_match(tmp_path, skill_alpha, monkeypatch) -> None:
|
|
22
|
+
monkeypatch.delenv("SKILLFORGE_ROUTE_POLICIES", raising=False)
|
|
23
|
+
monkeypatch.delenv("SKILLFORGE_ROUTE_POLICIES_FILE", raising=False)
|
|
24
|
+
con = init_db(tmp_path / "x.db")
|
|
25
|
+
policies = {"rules": [{"if_text_matches": r"(?i)oauth", "include": ["alpha-skill"]}]}
|
|
26
|
+
by_name = {skill_alpha.name: skill_alpha}
|
|
27
|
+
merged, audit = merge_policy_includes(
|
|
28
|
+
"Fix OAuth callback",
|
|
29
|
+
["other-skill"],
|
|
30
|
+
policies,
|
|
31
|
+
by_name,
|
|
32
|
+
con,
|
|
33
|
+
"",
|
|
34
|
+
max_active=7,
|
|
35
|
+
)
|
|
36
|
+
assert merged[0] == "other-skill"
|
|
37
|
+
assert "alpha-skill" in merged
|
|
38
|
+
assert any(r.get("effect") == "added" for r in audit)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_merge_unknown_skill_audited(tmp_path, skill_alpha, monkeypatch) -> None:
|
|
42
|
+
monkeypatch.delenv("SKILLFORGE_ROUTE_POLICIES", raising=False)
|
|
43
|
+
con = init_db(tmp_path / "y.db")
|
|
44
|
+
policies = {"rules": [{"if_text_matches": "auth", "include": ["missing"]}]}
|
|
45
|
+
by_name = {skill_alpha.name: skill_alpha}
|
|
46
|
+
merged, audit = merge_policy_includes(
|
|
47
|
+
"auth bug",
|
|
48
|
+
[],
|
|
49
|
+
policies,
|
|
50
|
+
by_name,
|
|
51
|
+
con,
|
|
52
|
+
"",
|
|
53
|
+
max_active=7,
|
|
54
|
+
)
|
|
55
|
+
assert merged == []
|
|
56
|
+
assert any(r.get("effect") == "unknown_skill" for r in audit)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_merge_respects_max_active(tmp_path, skill_alpha, monkeypatch) -> None:
|
|
60
|
+
monkeypatch.delenv("SKILLFORGE_ROUTE_POLICIES", raising=False)
|
|
61
|
+
con = init_db(tmp_path / "z.db")
|
|
62
|
+
policies = {"rules": [{"if_text_matches": "x", "include": ["alpha-skill"]}]}
|
|
63
|
+
by_name = {skill_alpha.name: skill_alpha}
|
|
64
|
+
picked = ["a", "b", "c", "d", "e", "f", "g"]
|
|
65
|
+
merged, audit = merge_policy_includes(
|
|
66
|
+
"x",
|
|
67
|
+
picked,
|
|
68
|
+
policies,
|
|
69
|
+
by_name,
|
|
70
|
+
con,
|
|
71
|
+
"",
|
|
72
|
+
max_active=7,
|
|
73
|
+
)
|
|
74
|
+
assert len(merged) == 7
|
|
75
|
+
assert "alpha-skill" not in merged
|
|
76
|
+
assert any(r.get("effect") == "skipped_max_active" for r in audit)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def test_load_from_project_file(tmp_path, monkeypatch) -> None:
|
|
80
|
+
monkeypatch.delenv("SKILLFORGE_ROUTE_POLICIES", raising=False)
|
|
81
|
+
monkeypatch.delenv("SKILLFORGE_ROUTE_POLICIES_FILE", raising=False)
|
|
82
|
+
p = tmp_path / "skillforge-policies.json"
|
|
83
|
+
p.write_text(
|
|
84
|
+
'{"rules": [{"if_text_matches": "hi", "include": ["z"]}]}',
|
|
85
|
+
encoding="utf-8",
|
|
86
|
+
)
|
|
87
|
+
root = str(tmp_path)
|
|
88
|
+
cfg = load_route_policies_config(root)
|
|
89
|
+
assert len(cfg.get("rules") or []) == 1
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def test_load_inline_env_json(monkeypatch) -> None:
|
|
93
|
+
monkeypatch.setenv(
|
|
94
|
+
"SKILLFORGE_ROUTE_POLICIES",
|
|
95
|
+
'{"rules": [{"if_text_matches": "a", "include": ["b"]}]}',
|
|
96
|
+
)
|
|
97
|
+
cfg = load_route_policies_config(None)
|
|
98
|
+
assert cfg["rules"][0]["include"] == ["b"]
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_invalid_regex_recorded(tmp_path, skill_alpha, monkeypatch) -> None:
|
|
102
|
+
monkeypatch.delenv("SKILLFORGE_ROUTE_POLICIES", raising=False)
|
|
103
|
+
con = init_db(tmp_path / "r.db")
|
|
104
|
+
policies = {"rules": [{"if_text_matches": "(bad[regex", "include": ["alpha-skill"]}]}
|
|
105
|
+
by_name = {skill_alpha.name: skill_alpha}
|
|
106
|
+
_m, audit = merge_policy_includes(
|
|
107
|
+
"x",
|
|
108
|
+
[],
|
|
109
|
+
policies,
|
|
110
|
+
by_name,
|
|
111
|
+
con,
|
|
112
|
+
"",
|
|
113
|
+
max_active=7,
|
|
114
|
+
)
|
|
115
|
+
assert any(r.get("effect") == "invalid_regex" for r in audit)
|