@heytherevibin/skillforge 0.2.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -0
- package/README.md +44 -53
- package/RELEASING.md +1 -1
- package/SECURITY.md +2 -2
- package/STRATEGY.md +1 -3
- package/bin/cli.js +32 -138
- package/package.json +2 -2
- package/python/app/chunking.py +116 -0
- package/python/app/context_fusion.py +77 -0
- package/python/app/events_cli.py +1 -1
- package/python/app/index_cli.py +89 -0
- package/python/app/main.py +380 -214
- package/python/app/mcp_contract.py +121 -0
- package/python/app/mcp_server.py +80 -28
- package/python/app/project_index.py +600 -0
- package/python/app/redaction.py +128 -0
- package/python/app/route_cli.py +42 -19
- package/python/requirements.txt +0 -4
- package/python/tests/test_chunking.py +34 -0
- package/python/tests/test_context_fusion.py +45 -0
- package/python/tests/test_mcp_contract.py +137 -0
- package/python/tests/test_project_index.py +76 -0
- package/python/tests/test_redaction.py +51 -0
- package/python/app/auth.py +0 -63
- package/python/app/cli.py +0 -78
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
"""Best-effort redaction of secrets and user home paths in exported context (defense in depth)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
_HOME_RESOLVED: str | None = None
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def redaction_enabled() -> bool:
|
|
12
|
+
return os.getenv("SKILLFORGE_REDACT_CONTEXT", "1").strip().lower() not in ("0", "false", "no", "")
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def redact_home_in_paths_enabled() -> bool:
|
|
16
|
+
return os.getenv("SKILLFORGE_REDACT_HOME_IN_PATHS", "1").strip().lower() not in ("0", "false", "no", "")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def _home_prefix() -> str | None:
|
|
20
|
+
global _HOME_RESOLVED
|
|
21
|
+
if _HOME_RESOLVED is not None:
|
|
22
|
+
return _HOME_RESOLVED or None
|
|
23
|
+
try:
|
|
24
|
+
_HOME_RESOLVED = str(Path.home().resolve())
|
|
25
|
+
except Exception:
|
|
26
|
+
_HOME_RESOLVED = ""
|
|
27
|
+
return _HOME_RESOLVED or None
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
COMPILED: list[tuple[re.Pattern[str], str]] = [
|
|
31
|
+
(re.compile(r"sk-ant-api\d\d-[A-Za-z0-9_\-]{20,}"), "[REDACTED_ANTHROPIC_KEY]"),
|
|
32
|
+
(re.compile(r"\bAIza[0-9A-Za-z\-_]{35}\b"), "[REDACTED_GOOGLE_API_KEY]"),
|
|
33
|
+
(re.compile(r"xox[baprs]-[0-9A-Za-z\-]{10,}"), "[REDACTED_SLACK_TOKEN]"),
|
|
34
|
+
(re.compile(r"gh[pP]_[0-9A-Za-z]{36,}"), "[REDACTED_GITHUB_TOKEN]"),
|
|
35
|
+
(re.compile(r"github_pat_[0-9A-Za-z_]{20,}"), "[REDACTED_GITHUB_PAT]"),
|
|
36
|
+
(re.compile(
|
|
37
|
+
r"-----BEGIN [A-Z ]*PRIVATE KEY-----[\s\S]*?-----END [A-Z ]*PRIVATE KEY-----",
|
|
38
|
+
re.MULTILINE,
|
|
39
|
+
), "[REDACTED_PRIVATE_KEY]"),
|
|
40
|
+
(re.compile(r"\bAKIA[0-9A-Z]{16}\b"), "[REDACTED_AWS_ACCESS_KEY_ID]"),
|
|
41
|
+
(re.compile(r"\bASIA[0-9A-Z]{16}\b"), "[REDACTED_AWS_TEMP_KEY_ID]"),
|
|
42
|
+
# OAuth / Bearer-style (avoid eating normal words — require length)
|
|
43
|
+
(re.compile(r"\bBearer\s+[A-Za-z0-9\-._~+/]{16,}={0,2}\b", re.IGNORECASE), "Bearer [REDACTED]"),
|
|
44
|
+
(re.compile(r"\bBasic\s+[A-Za-z0-9+/]{16,}={0,2}\b", re.IGNORECASE), "Basic [REDACTED]"),
|
|
45
|
+
# Env assignment leaks in pasted logs
|
|
46
|
+
(re.compile(
|
|
47
|
+
r"\b(ANTHROPIC_API_KEY|OPENAI_API_KEY|"
|
|
48
|
+
r"AWS_SECRET_ACCESS_KEY|AWS_SESSION_TOKEN|GITHUB_TOKEN|"
|
|
49
|
+
r"HF_TOKEN|HUGGINGFACE_TOKEN|SLACK_BOT_TOKEN|DATABASE_URL|"
|
|
50
|
+
r"SUPABASE_SERVICE_ROLE_KEY|SUPABASE_JWT_SECRET)\s*=\s*(\S+)",
|
|
51
|
+
re.IGNORECASE,
|
|
52
|
+
), r"\1=[REDACTED]"),
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def redact_secret_patterns(text: str) -> tuple[str, int]:
|
|
57
|
+
"""Replace known secret shapes; returns ``(new_text, number_of_pattern_matches)``."""
|
|
58
|
+
if not text:
|
|
59
|
+
return text, 0
|
|
60
|
+
hits = 0
|
|
61
|
+
out = text
|
|
62
|
+
for pat, repl in COMPILED:
|
|
63
|
+
found = pat.findall(out)
|
|
64
|
+
if found:
|
|
65
|
+
hits += len(found)
|
|
66
|
+
out = pat.sub(repl, out)
|
|
67
|
+
return out, hits
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def redact_home_path_prefix(path: str) -> tuple[str, int]:
|
|
71
|
+
"""If ``path`` starts with the resolved home directory, replace that prefix with ``[HOME]``."""
|
|
72
|
+
if not path or not redact_home_in_paths_enabled():
|
|
73
|
+
return path, 0
|
|
74
|
+
home = _home_prefix()
|
|
75
|
+
if not home:
|
|
76
|
+
return path, 0
|
|
77
|
+
# Normalize slashes for comparison
|
|
78
|
+
norm = path.replace("\\", "/")
|
|
79
|
+
home_n = home.replace("\\", "/")
|
|
80
|
+
if norm == home_n or norm.rstrip("/") == home_n.rstrip("/"):
|
|
81
|
+
return "[HOME]", 1
|
|
82
|
+
if norm.startswith(home_n + "/") or norm.startswith(home_n + "\\"):
|
|
83
|
+
rest = path[len(home) :].lstrip("/\\")
|
|
84
|
+
return "[HOME]/" + rest.replace("\\", "/"), 1
|
|
85
|
+
# Windows-style profile (best effort when HOME is /Users/x but path is C:\Users\x)
|
|
86
|
+
if len(path) > 3 and path[1] == ":":
|
|
87
|
+
try:
|
|
88
|
+
from os.path import expanduser
|
|
89
|
+
|
|
90
|
+
eu = expanduser("~")
|
|
91
|
+
if eu and path.lower().startswith(eu.lower().replace("/", "\\")):
|
|
92
|
+
return "[HOME]/" + path[len(eu) :].lstrip("\\/").replace("\\", "/"), 1
|
|
93
|
+
except Exception:
|
|
94
|
+
pass
|
|
95
|
+
return path, 0
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def redact_context_path_field(path: str | None) -> tuple[str | None, int]:
|
|
99
|
+
if not path:
|
|
100
|
+
return path, 0
|
|
101
|
+
s, n = redact_home_path_prefix(path)
|
|
102
|
+
return s, n
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def sanitize_context_items(items: list[dict]) -> tuple[int, int]:
|
|
106
|
+
"""Mutate each item's ``text`` / ``path`` in place. Returns ``(secret_hits, path_hits)``."""
|
|
107
|
+
sh = ph = 0
|
|
108
|
+
for c in items:
|
|
109
|
+
t = c.get("text") or ""
|
|
110
|
+
nt, h = redact_secret_patterns(t)
|
|
111
|
+
if h:
|
|
112
|
+
sh += h
|
|
113
|
+
c["text"] = nt
|
|
114
|
+
p = c.get("path")
|
|
115
|
+
if p is not None:
|
|
116
|
+
np, h2 = redact_context_path_field(str(p))
|
|
117
|
+
if h2:
|
|
118
|
+
ph += h2
|
|
119
|
+
c["path"] = np
|
|
120
|
+
return sh, ph
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
def redact_display_path(p: str | Path) -> str:
|
|
124
|
+
"""Single path string safe for logs / ``_meta`` (home prefix only + pattern redaction)."""
|
|
125
|
+
s = str(p)
|
|
126
|
+
s, _ = redact_home_path_prefix(s)
|
|
127
|
+
s, _ = redact_secret_patterns(s)
|
|
128
|
+
return s
|
package/python/app/route_cli.py
CHANGED
|
@@ -9,7 +9,14 @@ import time
|
|
|
9
9
|
from pathlib import Path
|
|
10
10
|
|
|
11
11
|
from app.db_paths import resolve_orchestrator_db
|
|
12
|
-
from app.main import
|
|
12
|
+
from app.main import (
|
|
13
|
+
build_router_and_skills,
|
|
14
|
+
format_context_items_markdown,
|
|
15
|
+
init_db,
|
|
16
|
+
run_route_turn,
|
|
17
|
+
)
|
|
18
|
+
from app.mcp_contract import MCP_RESPONSE_SCHEMA_VERSION, build_route_skills_meta
|
|
19
|
+
from app.redaction import redaction_enabled, redact_display_path
|
|
13
20
|
|
|
14
21
|
|
|
15
22
|
def _parse_args(argv: list[str] | None) -> argparse.Namespace:
|
|
@@ -28,6 +35,11 @@ def _parse_args(argv: list[str] | None) -> argparse.Namespace:
|
|
|
28
35
|
p.add_argument("--session-id", default="", help="Stable session id (reuse across turns for reroute stats).")
|
|
29
36
|
p.add_argument("--user-id", default="", help="Logical user id for weights/sessions/events.")
|
|
30
37
|
p.add_argument("--json-meta", action="store_true", help="Print routing metadata as JSON on stderr after output.")
|
|
38
|
+
p.add_argument(
|
|
39
|
+
"--include-project-rag",
|
|
40
|
+
action="store_true",
|
|
41
|
+
help="Append chunks from `skillforge index` (same DB as --project-root). Requires --project-root.",
|
|
42
|
+
)
|
|
31
43
|
return p.parse_args(argv)
|
|
32
44
|
|
|
33
45
|
|
|
@@ -38,6 +50,9 @@ async def _run(args: argparse.Namespace) -> int:
|
|
|
38
50
|
return 2
|
|
39
51
|
|
|
40
52
|
pr = (args.project_root or "").strip() or None
|
|
53
|
+
if args.include_project_rag and not pr:
|
|
54
|
+
print("skillforge route: --include-project-rag requires --project-root.", file=sys.stderr)
|
|
55
|
+
return 2
|
|
41
56
|
db_path = resolve_orchestrator_db(pr)
|
|
42
57
|
con = init_db(db_path)
|
|
43
58
|
|
|
@@ -53,6 +68,8 @@ async def _run(args: argparse.Namespace) -> int:
|
|
|
53
68
|
conversation=[],
|
|
54
69
|
user_id=user_id,
|
|
55
70
|
session_id=session_id,
|
|
71
|
+
project_root=pr,
|
|
72
|
+
include_project_rag=bool(args.include_project_rag),
|
|
56
73
|
)
|
|
57
74
|
finally:
|
|
58
75
|
con.close()
|
|
@@ -60,6 +77,7 @@ async def _run(args: argparse.Namespace) -> int:
|
|
|
60
77
|
picked_names = result["picked_names"]
|
|
61
78
|
reasoning = result["reasoning"]
|
|
62
79
|
sid = result["session_id"]
|
|
80
|
+
context_items = result.get("context_items") or []
|
|
63
81
|
|
|
64
82
|
if pr:
|
|
65
83
|
try:
|
|
@@ -73,36 +91,41 @@ async def _run(args: argparse.Namespace) -> int:
|
|
|
73
91
|
"route_ms": round(result["route_ms"], 1),
|
|
74
92
|
"user_id": user_id,
|
|
75
93
|
"source": "cli_route",
|
|
94
|
+
"schema_version": MCP_RESPONSE_SCHEMA_VERSION,
|
|
95
|
+
"context_mode": router.context_mode,
|
|
96
|
+
"context_items_count": len(context_items),
|
|
97
|
+
"project_rag_items_count": (result.get("event") or {}).get("project_rag_items_count", 0),
|
|
76
98
|
}
|
|
77
99
|
(d / "last_route.json").write_text(json.dumps(snap, indent=2), encoding="utf-8")
|
|
78
100
|
except OSError:
|
|
79
101
|
pass
|
|
80
102
|
|
|
103
|
+
db_disp = redact_display_path(db_path) if redaction_enabled() else str(db_path)
|
|
81
104
|
blocks = [
|
|
82
|
-
f"# Skillforge — routed {len(picked_names)} skill(s)",
|
|
83
|
-
f"_DB:_ `{
|
|
105
|
+
f"# Skillforge — routed {len(picked_names)} skill(s); context=`{router.context_mode}`",
|
|
106
|
+
f"_DB:_ `{db_disp}`",
|
|
84
107
|
f"_Reasoning: {reasoning}_" if reasoning else "",
|
|
85
108
|
"",
|
|
86
109
|
]
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
blocks.append(f"---\n## Skill: {s.name}\n\n{s.body}\n")
|
|
91
|
-
if not picked_names:
|
|
110
|
+
if context_items:
|
|
111
|
+
blocks.append(format_context_items_markdown(context_items))
|
|
112
|
+
elif not picked_names:
|
|
92
113
|
blocks.append("_No skills matched this prompt closely enough to load._")
|
|
93
|
-
|
|
114
|
+
response_text = "\n".join(b for b in blocks if b is not None)
|
|
115
|
+
print(response_text)
|
|
94
116
|
|
|
95
117
|
if args.json_meta:
|
|
96
|
-
meta =
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
"
|
|
105
|
-
|
|
118
|
+
meta = build_route_skills_meta(
|
|
119
|
+
result=result,
|
|
120
|
+
picked_names=picked_names,
|
|
121
|
+
user_id=user_id,
|
|
122
|
+
db_path=db_path,
|
|
123
|
+
skills_map=skills,
|
|
124
|
+
response_text=response_text,
|
|
125
|
+
context_items=context_items,
|
|
126
|
+
fusion=(result.get("event") or {}).get("context_fusion"),
|
|
127
|
+
context_redaction=(result.get("event") or {}).get("context_redaction"),
|
|
128
|
+
)
|
|
106
129
|
print(json.dumps(meta, indent=2), file=sys.stderr)
|
|
107
130
|
|
|
108
131
|
return 0
|
package/python/requirements.txt
CHANGED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Unit tests for skill body chunking (no ML)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from app.chunking import chunk_raw_document, chunk_skill_body
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def test_chunk_respects_headings() -> None:
|
|
8
|
+
body = "# Title\n\nintro\n\n## A\n\none\n\n## B\n\ntwo three"
|
|
9
|
+
chunks = chunk_skill_body(body, max_chars=500, overlap=50)
|
|
10
|
+
assert len(chunks) >= 2
|
|
11
|
+
names = [c.text for c in chunks]
|
|
12
|
+
assert any("one" in t for t in names)
|
|
13
|
+
assert any("two three" in t for t in names)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def test_chunk_line_numbers_monotonic() -> None:
|
|
17
|
+
body = "a\nb\nc\nd"
|
|
18
|
+
chunks = chunk_skill_body(body, max_chars=5, overlap=0)
|
|
19
|
+
assert chunks
|
|
20
|
+
for c in chunks:
|
|
21
|
+
assert c.line_start <= c.line_end
|
|
22
|
+
assert c.line_start >= 1
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def test_empty_body() -> None:
|
|
26
|
+
assert chunk_skill_body("", max_chars=100, overlap=0) == []
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def test_chunk_raw_document_small_file() -> None:
|
|
30
|
+
body = "line1\nline2\nline3"
|
|
31
|
+
chunks = chunk_raw_document(body, max_chars=100, overlap=0)
|
|
32
|
+
assert len(chunks) == 1
|
|
33
|
+
assert chunks[0].line_start == 1
|
|
34
|
+
assert "line1" in chunks[0].text
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
"""Tests for MMR context fusion (numpy only)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from app.context_fusion import mmr_select
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_mmr_prefers_diverse_second_item() -> None:
|
|
10
|
+
"""Two near-duplicate high-rel docs: second pick should favor the orthogonal one when lambda < 1."""
|
|
11
|
+
# query-aligned
|
|
12
|
+
e0 = np.array([1.0, 0.0, 0.0], dtype=np.float32)
|
|
13
|
+
e1 = np.array([0.99, 0.14, 0.0], dtype=np.float32) # almost same as e0
|
|
14
|
+
e2 = np.array([0.0, 1.0, 0.0], dtype=np.float32) # different direction
|
|
15
|
+
emb = np.stack([e0, e1, e2], axis=0)
|
|
16
|
+
rel = np.array([1.0, 0.98, 0.5], dtype=np.float64)
|
|
17
|
+
lens = np.array([10, 10, 10], dtype=np.int64)
|
|
18
|
+
ovh = np.full(3, 8, dtype=np.int64)
|
|
19
|
+
order, trace = mmr_select(
|
|
20
|
+
emb,
|
|
21
|
+
rel,
|
|
22
|
+
lens,
|
|
23
|
+
char_budget=500,
|
|
24
|
+
overhead_per_chunk=ovh,
|
|
25
|
+
lambda_mult=0.5,
|
|
26
|
+
)
|
|
27
|
+
assert order[0] == 0
|
|
28
|
+
assert order[1] == 2
|
|
29
|
+
assert len(trace) == len(order)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def test_mmr_respects_char_budget() -> None:
|
|
33
|
+
emb = np.eye(3, dtype=np.float32)
|
|
34
|
+
rel = np.array([1.0, 0.9, 0.8])
|
|
35
|
+
lens = np.array([100, 100, 100], dtype=np.int64)
|
|
36
|
+
ovh = np.array([10, 10, 10], dtype=np.int64)
|
|
37
|
+
order, _ = mmr_select(
|
|
38
|
+
emb,
|
|
39
|
+
rel,
|
|
40
|
+
lens,
|
|
41
|
+
char_budget=150,
|
|
42
|
+
overhead_per_chunk=ovh,
|
|
43
|
+
lambda_mult=1.0,
|
|
44
|
+
)
|
|
45
|
+
assert len(order) == 1
|
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
"""Tests for MCP Phase 0 response contract (no heavy deps)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from types import SimpleNamespace
|
|
5
|
+
|
|
6
|
+
from app.mcp_contract import MCP_RESPONSE_SCHEMA_VERSION, build_route_skills_meta
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def test_build_route_skills_meta_basic() -> None:
|
|
10
|
+
sk_a = SimpleNamespace(name="skill-a", body="alpha" * 100)
|
|
11
|
+
sk_b = SimpleNamespace(name="skill-b", body="beta")
|
|
12
|
+
skills_map = {"skill-a": sk_a, "skill-b": sk_b}
|
|
13
|
+
cand_skill = SimpleNamespace(name="skill-a")
|
|
14
|
+
result = {
|
|
15
|
+
"candidates": [(cand_skill, 0.91), (sk_b, 0.5)],
|
|
16
|
+
"reasoning": "test",
|
|
17
|
+
"session_id": "sid-1",
|
|
18
|
+
"rerouted": False,
|
|
19
|
+
"change": 0.2,
|
|
20
|
+
"route_ms": 12.3,
|
|
21
|
+
}
|
|
22
|
+
text = "# header\n\nbody"
|
|
23
|
+
meta = build_route_skills_meta(
|
|
24
|
+
result=result,
|
|
25
|
+
picked_names=["skill-a"],
|
|
26
|
+
user_id="u1",
|
|
27
|
+
db_path="/tmp/db.sqlite",
|
|
28
|
+
skills_map=skills_map,
|
|
29
|
+
response_text=text,
|
|
30
|
+
)
|
|
31
|
+
assert meta["schema_version"] == MCP_RESPONSE_SCHEMA_VERSION
|
|
32
|
+
assert "fusion" not in meta
|
|
33
|
+
assert meta["context_items_count"] == 0
|
|
34
|
+
assert meta["budget"]["chars_project_chunks"] == 0
|
|
35
|
+
assert meta["budget"]["chars_context_items_total"] == meta["budget"]["chars_skill_bodies"]
|
|
36
|
+
assert meta["sources"] == [
|
|
37
|
+
{"kind": "skill", "ref": "skill-a", "line_start": None, "line_end": None, "score": None},
|
|
38
|
+
]
|
|
39
|
+
assert meta["budget"]["chars_skill_bodies"] == 100 * len("alpha")
|
|
40
|
+
assert meta["budget"]["chars_response_total"] == len(text)
|
|
41
|
+
assert meta["picked"] == ["skill-a"]
|
|
42
|
+
assert len(meta["candidates_preview"]) >= 1
|
|
43
|
+
assert meta["candidates_preview"][0]["name"] == "skill-a"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def test_build_route_skills_meta_with_context_items() -> None:
|
|
47
|
+
sk = __import__("types").SimpleNamespace(name="skill-a", body="full")
|
|
48
|
+
skills_map = {"skill-a": sk}
|
|
49
|
+
result = {"candidates": [], "reasoning": "r", "session_id": "s", "rerouted": False, "change": 0.0, "route_ms": 1.0}
|
|
50
|
+
items = [
|
|
51
|
+
{"skill": "skill-a", "path": None, "line_start": 1, "line_end": 5, "text": "chunktext", "score": 0.88},
|
|
52
|
+
]
|
|
53
|
+
meta = build_route_skills_meta(
|
|
54
|
+
result=result,
|
|
55
|
+
picked_names=["skill-a"],
|
|
56
|
+
user_id="u",
|
|
57
|
+
db_path="db.sqlite",
|
|
58
|
+
skills_map=skills_map,
|
|
59
|
+
response_text="out",
|
|
60
|
+
context_items=items,
|
|
61
|
+
)
|
|
62
|
+
assert meta["schema_version"] == MCP_RESPONSE_SCHEMA_VERSION
|
|
63
|
+
assert meta["context_items_count"] == 1
|
|
64
|
+
assert meta["sources"][0]["line_start"] == 1
|
|
65
|
+
assert meta["sources"][0]["line_end"] == 5
|
|
66
|
+
assert meta["budget"]["chars_skill_bodies"] == len("chunktext")
|
|
67
|
+
assert meta["budget"]["chars_project_chunks"] == 0
|
|
68
|
+
assert meta["budget"]["chars_context_items_total"] == len("chunktext")
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def test_build_route_skills_meta_mixed_skill_and_file() -> None:
|
|
72
|
+
skills_map = {}
|
|
73
|
+
result = {"candidates": [], "session_id": "s", "rerouted": False, "change": 0.0, "route_ms": 1.0}
|
|
74
|
+
items = [
|
|
75
|
+
{"skill": "sk", "path": None, "line_start": 1, "line_end": 2, "text": "AA", "score": 0.9},
|
|
76
|
+
{"skill": None, "path": "lib/x.py", "line_start": 10, "line_end": 12, "text": "BB", "score": 0.8},
|
|
77
|
+
]
|
|
78
|
+
meta = build_route_skills_meta(
|
|
79
|
+
result=result,
|
|
80
|
+
picked_names=["sk"],
|
|
81
|
+
user_id="u",
|
|
82
|
+
db_path="db.sqlite",
|
|
83
|
+
skills_map=skills_map,
|
|
84
|
+
response_text="o",
|
|
85
|
+
context_items=items,
|
|
86
|
+
)
|
|
87
|
+
assert meta["sources"][0]["kind"] == "skill"
|
|
88
|
+
assert meta["sources"][1]["kind"] == "file"
|
|
89
|
+
assert meta["sources"][1]["ref"] == "lib/x.py"
|
|
90
|
+
assert meta["budget"]["chars_skill_bodies"] == 2
|
|
91
|
+
assert meta["budget"]["chars_project_chunks"] == 2
|
|
92
|
+
assert meta["budget"]["chars_context_items_total"] == 4
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def test_build_route_skills_meta_includes_fusion() -> None:
|
|
96
|
+
items = [
|
|
97
|
+
{"skill": "sk", "path": None, "line_start": 1, "line_end": 2, "text": "a", "score": 0.9, "mmr_rank": 1},
|
|
98
|
+
]
|
|
99
|
+
meta = build_route_skills_meta(
|
|
100
|
+
result={"candidates": [], "session_id": "s", "rerouted": False, "change": 0.0, "route_ms": 1.0},
|
|
101
|
+
picked_names=["sk"],
|
|
102
|
+
user_id="u",
|
|
103
|
+
db_path="db.sqlite",
|
|
104
|
+
skills_map={},
|
|
105
|
+
response_text="o",
|
|
106
|
+
context_items=items,
|
|
107
|
+
fusion={"enabled": True, "lambda": 0.7, "selected_count": 1},
|
|
108
|
+
)
|
|
109
|
+
assert meta["fusion"]["enabled"] is True
|
|
110
|
+
assert meta["sources"][0].get("mmr_rank") == 1
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def test_build_route_skills_meta_includes_context_redaction() -> None:
|
|
114
|
+
meta = build_route_skills_meta(
|
|
115
|
+
result={"candidates": [], "session_id": "s", "rerouted": False, "change": 0.0, "route_ms": 1.0},
|
|
116
|
+
picked_names=[],
|
|
117
|
+
user_id="u",
|
|
118
|
+
db_path="db.sqlite",
|
|
119
|
+
skills_map={},
|
|
120
|
+
response_text="x",
|
|
121
|
+
context_redaction={"enabled": True, "secret_hits": 2, "path_hits": 1},
|
|
122
|
+
)
|
|
123
|
+
assert meta["context_redaction"]["secret_hits"] == 2
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def test_build_route_skills_meta_error_field() -> None:
|
|
127
|
+
meta = build_route_skills_meta(
|
|
128
|
+
result={"candidates": []},
|
|
129
|
+
picked_names=[],
|
|
130
|
+
user_id="",
|
|
131
|
+
db_path="x.db",
|
|
132
|
+
skills_map={},
|
|
133
|
+
response_text="err",
|
|
134
|
+
error="empty_prompt",
|
|
135
|
+
)
|
|
136
|
+
assert meta["error"] == "empty_prompt"
|
|
137
|
+
assert meta["sources"] == []
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Project index: DB + retrieval (lightweight fake embedder)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import sqlite3
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
import numpy as np
|
|
8
|
+
|
|
9
|
+
from app.project_index import (
|
|
10
|
+
ensure_project_index_schema,
|
|
11
|
+
index_project,
|
|
12
|
+
is_indexable_file,
|
|
13
|
+
retrieve_project_context_items,
|
|
14
|
+
should_skip_dir,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class _FakeEmbed:
|
|
19
|
+
dim = 16
|
|
20
|
+
|
|
21
|
+
def encode(self, texts, **kwargs):
|
|
22
|
+
if isinstance(texts, str):
|
|
23
|
+
texts = [texts]
|
|
24
|
+
out = []
|
|
25
|
+
for t in texts:
|
|
26
|
+
seed = sum(ord(c) for c in t[:120]) % (2**31)
|
|
27
|
+
rng = np.random.RandomState(seed)
|
|
28
|
+
v = rng.randn(self.dim).astype(np.float32)
|
|
29
|
+
nrm = float(np.linalg.norm(v)) or 1.0
|
|
30
|
+
v /= nrm
|
|
31
|
+
out.append(v)
|
|
32
|
+
return np.stack(out, axis=0)
|
|
33
|
+
|
|
34
|
+
def get_sentence_embedding_dimension(self):
|
|
35
|
+
return self.dim
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def test_should_skip_dir() -> None:
|
|
39
|
+
assert should_skip_dir("node_modules")
|
|
40
|
+
assert not should_skip_dir("src")
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_is_indexable_file() -> None:
|
|
44
|
+
assert is_indexable_file(Path("foo.py"))
|
|
45
|
+
assert not is_indexable_file(Path("image.png"))
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def test_index_and_retrieve_roundtrip(tmp_path, monkeypatch) -> None:
|
|
49
|
+
monkeypatch.setenv("SKILLFORGE_EMBED_MODEL", "fake-for-test")
|
|
50
|
+
monkeypatch.setenv("SKILLFORGE_PROJECT_RAG_MAX_CHARS", "8000")
|
|
51
|
+
|
|
52
|
+
root = tmp_path / "proj"
|
|
53
|
+
root.mkdir()
|
|
54
|
+
(root / "src").mkdir()
|
|
55
|
+
(root / "src" / "hello.py").write_text(
|
|
56
|
+
"def hello():\n return 42\n\n# explanation\n",
|
|
57
|
+
encoding="utf-8",
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
db_path = tmp_path / "orchestrator.db"
|
|
61
|
+
con = sqlite3.connect(str(db_path))
|
|
62
|
+
ensure_project_index_schema(con)
|
|
63
|
+
try:
|
|
64
|
+
fake = _FakeEmbed()
|
|
65
|
+
stats = index_project(con, root, fake, reset=True)
|
|
66
|
+
assert stats["chunks_written"] >= 1
|
|
67
|
+
cur = con.execute("SELECT COUNT(*) FROM project_chunks")
|
|
68
|
+
assert int(cur.fetchone()[0]) >= 1
|
|
69
|
+
|
|
70
|
+
items = retrieve_project_context_items(con, fake, "hello function return", max_total_chars=5000)
|
|
71
|
+
assert items
|
|
72
|
+
assert items[0]["path"] == "src/hello.py"
|
|
73
|
+
assert items[0]["line_start"] >= 1
|
|
74
|
+
assert "42" in items[0]["text"] or "hello" in items[0]["text"]
|
|
75
|
+
finally:
|
|
76
|
+
con.close()
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Tests for context redaction (stdlib)."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
import app.redaction as R
|
|
5
|
+
from app.redaction import (
|
|
6
|
+
redact_context_path_field,
|
|
7
|
+
redact_home_path_prefix,
|
|
8
|
+
redact_secret_patterns,
|
|
9
|
+
sanitize_context_items,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_redact_anthropic_key_shape() -> None:
|
|
14
|
+
t, n = redact_secret_patterns(
|
|
15
|
+
"token sk-ant-api03-ABCDEFGHIJKLMNOPQRSTUVWXYZ123456"
|
|
16
|
+
)
|
|
17
|
+
assert n >= 1
|
|
18
|
+
assert "sk-ant-api" not in t
|
|
19
|
+
assert "[REDACTED_ANTHROPIC_KEY]" in t
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def test_redact_assignment_line() -> None:
|
|
23
|
+
t, n = redact_secret_patterns("export ANTHROPIC_API_KEY=sk-not-real-value-here")
|
|
24
|
+
assert n >= 1
|
|
25
|
+
assert "sk-not-real" not in t
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def test_redact_home_path_prefix() -> None:
|
|
29
|
+
R._HOME_RESOLVED = "/Users/testuser"
|
|
30
|
+
s, n = redact_home_path_prefix("/Users/testuser/repos/app/main.py")
|
|
31
|
+
assert n == 1
|
|
32
|
+
assert s == "[HOME]/repos/app/main.py"
|
|
33
|
+
R._HOME_RESOLVED = None # reset for other tests
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_sanitize_context_mutates_text_and_path() -> None:
|
|
37
|
+
R._HOME_RESOLVED = "/Users/x"
|
|
38
|
+
items = [
|
|
39
|
+
{"skill": "s", "path": "/Users/x/p/a.py", "text": "k sk-ant-api03-AAAAAAAAAAAAAAAAAAAAAAAAAAAA", "line_start": 1, "line_end": 2},
|
|
40
|
+
]
|
|
41
|
+
sh, ph = sanitize_context_items(items)
|
|
42
|
+
assert sh >= 1
|
|
43
|
+
assert ph >= 1
|
|
44
|
+
assert "sk-ant" not in items[0]["text"]
|
|
45
|
+
assert str(items[0]["path"]).startswith("[HOME]/")
|
|
46
|
+
R._HOME_RESOLVED = None
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def test_redact_context_path_none() -> None:
|
|
50
|
+
s, n = redact_context_path_field(None)
|
|
51
|
+
assert s is None and n == 0
|
package/python/app/auth.py
DELETED
|
@@ -1,63 +0,0 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Bearer-token auth and per-user namespacing.
|
|
3
|
-
|
|
4
|
-
Single-user mode (default): no token required, all state goes to user_id=''.
|
|
5
|
-
Multi-user mode: set SKILLFORGE_AUTH_TOKENS env var to a JSON map of
|
|
6
|
-
{"token-value": "user-id"}. Requests must send Authorization: Bearer <token>.
|
|
7
|
-
The resolved user_id is then used to scope sessions, weights, and events.
|
|
8
|
-
|
|
9
|
-
This keeps the architecture single-process (one SQLite, one router instance)
|
|
10
|
-
while letting each user have isolated learning state.
|
|
11
|
-
"""
|
|
12
|
-
from __future__ import annotations
|
|
13
|
-
|
|
14
|
-
import json
|
|
15
|
-
import os
|
|
16
|
-
from typing import Optional
|
|
17
|
-
|
|
18
|
-
from fastapi import HTTPException, Request
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
# ---- Token registry ----
|
|
22
|
-
|
|
23
|
-
def _load_tokens() -> dict[str, str]:
|
|
24
|
-
"""Read SKILLFORGE_AUTH_TOKENS env (JSON: {token: user_id})."""
|
|
25
|
-
raw = os.getenv("SKILLFORGE_AUTH_TOKENS", "").strip()
|
|
26
|
-
if not raw:
|
|
27
|
-
return {}
|
|
28
|
-
try:
|
|
29
|
-
m = json.loads(raw)
|
|
30
|
-
if not isinstance(m, dict):
|
|
31
|
-
print("[skillforge] SKILLFORGE_AUTH_TOKENS must be a JSON object")
|
|
32
|
-
return {}
|
|
33
|
-
return {str(k): str(v) for k, v in m.items()}
|
|
34
|
-
except json.JSONDecodeError:
|
|
35
|
-
print("[skillforge] SKILLFORGE_AUTH_TOKENS is not valid JSON, ignoring")
|
|
36
|
-
return {}
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
_TOKENS = _load_tokens()
|
|
40
|
-
_AUTH_REQUIRED = bool(_TOKENS)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
def auth_enabled() -> bool:
|
|
44
|
-
return _AUTH_REQUIRED
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
def resolve_user(request: Request) -> str:
|
|
48
|
-
"""Get user_id from a request.
|
|
49
|
-
|
|
50
|
-
- If auth is not configured (single-user mode): returns ''.
|
|
51
|
-
- If auth is configured: extracts bearer token, returns mapped user_id,
|
|
52
|
-
or raises 401.
|
|
53
|
-
"""
|
|
54
|
-
if not _AUTH_REQUIRED:
|
|
55
|
-
return ""
|
|
56
|
-
header = request.headers.get("authorization", "")
|
|
57
|
-
if not header.lower().startswith("bearer "):
|
|
58
|
-
raise HTTPException(status_code=401, detail="Missing bearer token")
|
|
59
|
-
token = header[7:].strip()
|
|
60
|
-
user_id = _TOKENS.get(token)
|
|
61
|
-
if not user_id:
|
|
62
|
-
raise HTTPException(status_code=401, detail="Invalid token")
|
|
63
|
-
return user_id
|