@heytherevibin/skillforge 0.7.0 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +29 -0
- package/CONTRIBUTING.md +30 -19
- package/README.md +248 -198
- package/RELEASING.md +19 -7
- package/SECURITY.md +61 -13
- package/STRATEGY.md +40 -14
- package/bin/cli.js +112 -5
- package/ci/bundle-gate.json +4 -0
- package/lib/host-setup.js +312 -0
- package/lib/templates/claude-code-skillforge-global.md +19 -0
- package/lib/templates/cursor-skillforge-global.md +16 -0
- package/package.json +3 -2
- package/python/app/eval_cli.py +133 -0
- package/python/app/feedback_meta.py +96 -0
- package/python/app/health_cli.py +160 -0
- package/python/app/main.py +502 -26
- package/python/app/materialize.py +72 -4
- package/python/app/mcp_contract.py +13 -1
- package/python/app/mcp_server.py +344 -25
- package/python/app/route_cli.py +32 -13
- package/python/app/route_eval_harness.py +98 -0
- package/python/app/route_policies.py +243 -0
- package/python/app/route_quality.py +99 -0
- package/python/app/routing_signals.py +155 -0
- package/python/app/weights_cli.py +152 -0
- package/python/fixtures/route_eval/smoke.json +18 -0
- package/python/requirements.txt +1 -0
- package/python/tests/test_feedback_weights.py +77 -0
- package/python/tests/test_materialize.py +51 -0
- package/python/tests/test_mcp_contract.py +117 -0
- package/python/tests/test_route_eval_harness.py +45 -0
- package/python/tests/test_route_policies.py +115 -0
- package/python/tests/test_route_quality.py +120 -0
- package/python/tests/test_routing_overlay.py +55 -0
- package/python/tests/test_routing_signals.py +112 -0
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""Tests for route_quality telemetry (robust coercion, hybrid diagnostics)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from app.route_quality import (
|
|
5
|
+
build_route_quality,
|
|
6
|
+
coerce_route_float,
|
|
7
|
+
policy_includes_added_count,
|
|
8
|
+
top1_cosine_vs_routing_agreement,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def test_coerce_route_float() -> None:
|
|
13
|
+
assert coerce_route_float("0.5") == 0.5
|
|
14
|
+
assert coerce_route_float(None) == 0.0
|
|
15
|
+
assert coerce_route_float("nope") == 0.0
|
|
16
|
+
assert coerce_route_float(float("nan")) == 0.0
|
|
17
|
+
assert coerce_route_float(float("inf")) == 0.0
|
|
18
|
+
assert coerce_route_float(1.0, default=-1.0) == 1.0
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def test_policy_includes_added_count() -> None:
|
|
22
|
+
assert policy_includes_added_count(None) == 0
|
|
23
|
+
assert policy_includes_added_count([{"effect": "added"}, {"effect": "skip"}, "bad", {}]) == 1
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_top1_agreement() -> None:
|
|
27
|
+
assert top1_cosine_vs_routing_agreement([]) is None
|
|
28
|
+
assert top1_cosine_vs_routing_agreement([{"name": "a"}]) is None
|
|
29
|
+
facets = [
|
|
30
|
+
{"name": "high_route", "cosine_similarity": 0.1, "routing_score": 1.0},
|
|
31
|
+
{"name": "high_cos", "cosine_similarity": 0.9, "routing_score": 0.2},
|
|
32
|
+
]
|
|
33
|
+
assert top1_cosine_vs_routing_agreement(facets) is False
|
|
34
|
+
facets2 = [
|
|
35
|
+
{"name": "winner", "cosine_similarity": 0.9, "routing_score": 1.0},
|
|
36
|
+
{"name": "lose", "cosine_similarity": 0.1, "routing_score": 0.2},
|
|
37
|
+
]
|
|
38
|
+
assert top1_cosine_vs_routing_agreement(facets2) is True
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_build_route_quality_empty_facets() -> None:
|
|
42
|
+
rq = build_route_quality(
|
|
43
|
+
facet_list=[],
|
|
44
|
+
router_mode="auto",
|
|
45
|
+
router_hybrid="off",
|
|
46
|
+
picked_names=[],
|
|
47
|
+
rerouted=False,
|
|
48
|
+
change=float("nan"),
|
|
49
|
+
policy_rules_loaded="bogus",
|
|
50
|
+
policy_audit=None,
|
|
51
|
+
host_picked=False,
|
|
52
|
+
pick_path="embedding_top",
|
|
53
|
+
)
|
|
54
|
+
assert rq["shortlist"]["size"] == 0
|
|
55
|
+
assert rq["shortlist"]["top_cosine_similarity"] is None
|
|
56
|
+
assert rq["session"]["change_jaccard"] == 0.0
|
|
57
|
+
assert rq["policy"]["rules_loaded"] == 0
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_build_route_quality_malformed_metrics() -> None:
|
|
61
|
+
facets = [
|
|
62
|
+
{"name": "a", "cosine_similarity": "bad", "routing_score": float("nan")},
|
|
63
|
+
{"name": "b", "cosine_similarity": 0.5, "routing_score": 0.2},
|
|
64
|
+
]
|
|
65
|
+
rq = build_route_quality(
|
|
66
|
+
facet_list=facets,
|
|
67
|
+
router_mode="host",
|
|
68
|
+
router_hybrid="weighted",
|
|
69
|
+
picked_names=["a"],
|
|
70
|
+
rerouted=True,
|
|
71
|
+
change=0.25,
|
|
72
|
+
policy_rules_loaded=-3,
|
|
73
|
+
policy_audit=[{"effect": "added"}, {"effect": "added"}],
|
|
74
|
+
host_picked=False,
|
|
75
|
+
pick_path="haiku_pick",
|
|
76
|
+
)
|
|
77
|
+
assert rq["shortlist"]["top_cosine_similarity"] == 0.0
|
|
78
|
+
assert rq["shortlist"]["top_routing_score"] == 0.0
|
|
79
|
+
assert rq["shortlist"]["second_cosine_similarity"] == 0.5
|
|
80
|
+
assert rq["shortlist"]["cosine_margin"] == round(-0.5, 6)
|
|
81
|
+
assert rq["shortlist"]["top1_dense_and_fused_agree"] is False
|
|
82
|
+
assert rq["policy"]["rules_loaded"] == 0
|
|
83
|
+
assert rq["policy"]["includes_added"] == 2
|
|
84
|
+
assert rq["picked_count"] == 1
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def test_build_route_quality_hybrid_off_skips_agree() -> None:
|
|
88
|
+
facets = [
|
|
89
|
+
{"name": "x", "cosine_similarity": 0.1},
|
|
90
|
+
{"name": "y", "cosine_similarity": 0.9},
|
|
91
|
+
]
|
|
92
|
+
rq = build_route_quality(
|
|
93
|
+
facet_list=facets,
|
|
94
|
+
router_mode="auto",
|
|
95
|
+
router_hybrid="off",
|
|
96
|
+
picked_names=[],
|
|
97
|
+
rerouted=False,
|
|
98
|
+
change=0.0,
|
|
99
|
+
policy_rules_loaded=0,
|
|
100
|
+
policy_audit=[],
|
|
101
|
+
host_picked=False,
|
|
102
|
+
pick_path="embedding_top",
|
|
103
|
+
)
|
|
104
|
+
assert rq["shortlist"]["top1_dense_and_fused_agree"] is None
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
def test_build_route_quality_rules_loaded_ok() -> None:
|
|
108
|
+
rq = build_route_quality(
|
|
109
|
+
facet_list=[],
|
|
110
|
+
router_mode="auto",
|
|
111
|
+
router_hybrid="off",
|
|
112
|
+
picked_names=[],
|
|
113
|
+
rerouted=False,
|
|
114
|
+
change=0.0,
|
|
115
|
+
policy_rules_loaded=12,
|
|
116
|
+
policy_audit=[],
|
|
117
|
+
host_picked=False,
|
|
118
|
+
pick_path="host_shortlist",
|
|
119
|
+
)
|
|
120
|
+
assert rq["policy"]["rules_loaded"] == 12
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
"""Project routing overlay: notes, excludes, boosts."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from app.route_policies import (
|
|
5
|
+
build_routing_overlay_payload,
|
|
6
|
+
merge_project_notes_into_route_query,
|
|
7
|
+
parse_routing_overlay,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def test_merge_notes_requires_project_root() -> None:
|
|
12
|
+
rq = merge_project_notes_into_route_query("hello", "note text", None)
|
|
13
|
+
assert rq == "hello"
|
|
14
|
+
rq2 = merge_project_notes_into_route_query("hello", "note text", "")
|
|
15
|
+
assert rq2 == "hello"
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def test_merge_notes_prepends_when_project_set() -> None:
|
|
19
|
+
rq = merge_project_notes_into_route_query("task", "We use Django 5.", "/repo", max_chars=100)
|
|
20
|
+
assert rq.startswith("Project routing notes:\n")
|
|
21
|
+
assert "Django" in rq
|
|
22
|
+
assert "task" in rq
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_parse_routing_overlay_boost_clamp() -> None:
|
|
26
|
+
ex, boosts, notes = parse_routing_overlay(
|
|
27
|
+
{"routing_boosts": {"a": 9.0, "b": -9.0}},
|
|
28
|
+
by_name={"a": 1, "b": 1},
|
|
29
|
+
)
|
|
30
|
+
assert not ex
|
|
31
|
+
assert boosts["a"] == 2.0
|
|
32
|
+
assert boosts["b"] == -2.0
|
|
33
|
+
assert notes == ""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_parse_exclude_unknown_with_audit() -> None:
|
|
37
|
+
audit = []
|
|
38
|
+
ex, _b, _n = parse_routing_overlay(
|
|
39
|
+
{"exclude_skills": ["ghost"]},
|
|
40
|
+
by_name={"real": 1},
|
|
41
|
+
audit_out=audit,
|
|
42
|
+
)
|
|
43
|
+
assert "ghost" not in ex
|
|
44
|
+
assert any(a.get("effect") == "unknown_skill" for a in audit)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def test_build_payload_none_when_empty() -> None:
|
|
48
|
+
assert build_routing_overlay_payload(
|
|
49
|
+
project_root="",
|
|
50
|
+
exclude_skills=frozenset(),
|
|
51
|
+
routing_boosts={},
|
|
52
|
+
project_notes_applied=False,
|
|
53
|
+
project_notes_len=0,
|
|
54
|
+
audit=[],
|
|
55
|
+
) is None
|
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
"""Tests for conversation-aware route text, skill cards, and hybrid helpers."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from app.main import Skill, parse_skill_md
|
|
8
|
+
from app.routing_signals import (
|
|
9
|
+
build_route_query_text,
|
|
10
|
+
host_pick_shortlist_lines,
|
|
11
|
+
keyword_overlap_scores,
|
|
12
|
+
normalize_minmax,
|
|
13
|
+
skill_routing_card,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def test_build_route_query_legacy(monkeypatch) -> None:
|
|
18
|
+
monkeypatch.setenv("SKILLFORGE_ROUTER_CONV_MAX_TURNS", "0")
|
|
19
|
+
out = build_route_query_text("hello", [{"role": "user", "content": "prev"}])
|
|
20
|
+
assert out == "hello"
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def test_build_route_query_merges_turns(monkeypatch) -> None:
|
|
24
|
+
monkeypatch.setenv("SKILLFORGE_ROUTER_CONV_MAX_TURNS", "2")
|
|
25
|
+
monkeypatch.setenv("SKILLFORGE_ROUTER_CONV_MSG_CHARS", "80")
|
|
26
|
+
conv = [
|
|
27
|
+
{"role": "user", "content": "first msg"},
|
|
28
|
+
{"role": "assistant", "content": "reply"},
|
|
29
|
+
]
|
|
30
|
+
out = build_route_query_text("current ask", conv)
|
|
31
|
+
assert "user: first msg" in out
|
|
32
|
+
assert "assistant: reply" in out
|
|
33
|
+
assert "Current user message:" in out
|
|
34
|
+
assert out.endswith("current ask")
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_skill_routing_card_includes_triggers() -> None:
|
|
38
|
+
s = Skill(
|
|
39
|
+
name="x",
|
|
40
|
+
title="X Skill",
|
|
41
|
+
description="does things",
|
|
42
|
+
body="",
|
|
43
|
+
source="bundled",
|
|
44
|
+
triggers="when foo",
|
|
45
|
+
anti_triggers="not bar",
|
|
46
|
+
)
|
|
47
|
+
card = skill_routing_card(s)
|
|
48
|
+
assert "X Skill" in card
|
|
49
|
+
assert "Triggers: when foo" in card
|
|
50
|
+
assert "Anti-triggers: not bar" in card
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def test_normalize_minmax() -> None:
|
|
54
|
+
a = np.array([1.0, 3.0, 5.0])
|
|
55
|
+
assert np.allclose(normalize_minmax(a), [0.0, 0.5, 1.0])
|
|
56
|
+
flat = np.array([2.0, 2.0, 2.0])
|
|
57
|
+
assert np.allclose(normalize_minmax(flat), [0.0, 0.0, 0.0])
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_keyword_overlap_scores() -> None:
|
|
61
|
+
cards = ["alpha beta gamma", "foo bar"]
|
|
62
|
+
q = "beta search"
|
|
63
|
+
sc = keyword_overlap_scores(q, cards)
|
|
64
|
+
assert sc[0] > sc[1]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def test_host_pick_shortlist_lines_basic() -> None:
|
|
68
|
+
facets = [
|
|
69
|
+
{
|
|
70
|
+
"name": "alpha-skill",
|
|
71
|
+
"title": "Alpha",
|
|
72
|
+
"cosine_similarity": 0.42,
|
|
73
|
+
"description_preview": "Does alpha testing patterns for flaky CI.",
|
|
74
|
+
}
|
|
75
|
+
]
|
|
76
|
+
md, rows = host_pick_shortlist_lines(
|
|
77
|
+
prompt="fix flaky tests",
|
|
78
|
+
route_query="fix flaky tests",
|
|
79
|
+
facet_rows=facets,
|
|
80
|
+
max_candidates=5,
|
|
81
|
+
line_chars=90,
|
|
82
|
+
)
|
|
83
|
+
assert "alpha-skill" in md
|
|
84
|
+
assert "fix flaky" in md
|
|
85
|
+
assert len(rows) == 1
|
|
86
|
+
assert rows[0]["name"] == "alpha-skill"
|
|
87
|
+
assert rows[0]["id"] == "alpha-skill"
|
|
88
|
+
assert rows[0]["rank"] == 1
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def test_normalize_host_picked_main() -> None:
|
|
92
|
+
from app.main import Skill, normalize_host_picked_names
|
|
93
|
+
|
|
94
|
+
a = Skill(name="a", title="A", description="", body="", source="bundled")
|
|
95
|
+
b = Skill(name="b", title="B", description="", body="", source="bundled")
|
|
96
|
+
by_name = {"a": a, "b": b}
|
|
97
|
+
assert normalize_host_picked_names(["b", "a", "b", "unknown"], by_name, 1) == ["b"]
|
|
98
|
+
assert normalize_host_picked_names([], by_name, 7) == []
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def test_parse_skill_triggers(tmp_path) -> None:
|
|
102
|
+
md = tmp_path / "my-skill" / "SKILL.md"
|
|
103
|
+
md.parent.mkdir(parents=True, exist_ok=True)
|
|
104
|
+
md.write_text(
|
|
105
|
+
"---\nname: Nice\ndescription: Desc\ntriggers: when testing\n"
|
|
106
|
+
"anti_triggers: never for prod\n---\n\n# Body\n",
|
|
107
|
+
encoding="utf-8",
|
|
108
|
+
)
|
|
109
|
+
s = parse_skill_md(md, "bundled")
|
|
110
|
+
assert s is not None
|
|
111
|
+
assert s.triggers == "when testing"
|
|
112
|
+
assert s.anti_triggers == "never for prod"
|