agent-skilltree 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
skilltree/registry.py ADDED
@@ -0,0 +1,120 @@
1
+ """P5.2 — the marketplace as a git repo: registry.json + a contribution gate.
2
+
3
+ The registry is DATA (pointers), never code we run. A contribution is a PR that
4
+ ADDS entries — and the gate enforces the locked policy:
5
+
6
+ - new entries must land `unverified` (promotion is maintainer-only)
7
+ - contributions may not change another entry's trust, remove others' entries,
8
+ or rename/re-parent the registry
9
+ - every entry carries provenance
10
+
11
+ `promote()` is the separate, maintainer-only step that flips trust upward. This
12
+ is "validate → queue → gated promote" — no auto-merge of agent-loadable content.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ from dataclasses import dataclass, field
17
+ import datetime as _dt
18
+ import json
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+ KINDS = ("skill", "tree", "exchange", "mcp", "registry") # `registry` = a federated child marketplace
23
+ TRUST = ("unverified", "verified", "featured")
24
+ _REQUIRED = ("name", "kind", "repo")
25
+
26
+
27
+ @dataclass
28
+ class Entry:
29
+ name: str
30
+ kind: str
31
+ repo: str
32
+ manifest: str = ""
33
+ version: str = "0.1.0"
34
+ trust: str = "unverified"
35
+ provenance: dict = field(default_factory=dict)
36
+
37
+
38
+ def load_registry(path: str | Path) -> dict:
39
+ return json.loads(Path(path).read_text(encoding="utf-8"))
40
+
41
+
42
+ def _by_name(data: dict) -> dict[str, dict]:
43
+ return {e["name"]: e for e in data.get("entries", [])}
44
+
45
+
46
+ def validate_registry(data: dict) -> list[str]:
47
+ """Schema check on a registry document."""
48
+ errs: list[str] = []
49
+ if not isinstance(data.get("name"), str):
50
+ errs.append("registry: missing string `name`")
51
+ if "parent" not in data:
52
+ errs.append("registry: missing `parent` (use null at the root)")
53
+ if not isinstance(data.get("entries"), list):
54
+ errs.append("registry: `entries` must be a list")
55
+ return errs
56
+ seen: set[str] = set()
57
+ for i, e in enumerate(data["entries"]):
58
+ tag = e.get("name", f"#{i}")
59
+ for f in _REQUIRED:
60
+ if not e.get(f):
61
+ errs.append(f"entry {tag}: missing `{f}`")
62
+ if e.get("kind") not in KINDS:
63
+ errs.append(f"entry {tag}: kind must be one of {KINDS}")
64
+ if e.get("trust", "unverified") not in TRUST:
65
+ errs.append(f"entry {tag}: trust must be one of {TRUST}")
66
+ if e.get("name") in seen:
67
+ errs.append(f"entry {tag}: duplicate name")
68
+ seen.add(e.get("name"))
69
+ return errs
70
+
71
+
72
+ def validate_contribution(base: dict, head: dict, *, actor: str | None = None) -> list[str]:
73
+ """Gate a PR's registry change. Empty list = a valid (queued, unverified) contribution."""
74
+ errs = validate_registry(head)
75
+ if errs:
76
+ return errs
77
+ if head.get("name") != base.get("name"):
78
+ errs.append("contribution may not rename the registry")
79
+ if head.get("parent") != base.get("parent"):
80
+ errs.append("contribution may not change the registry parent")
81
+ b, h = _by_name(base), _by_name(head)
82
+ for name in h.keys() - b.keys(): # ADDED
83
+ e = h[name]
84
+ if e.get("trust", "unverified") != "unverified":
85
+ errs.append(f"new entry {name!r} must be `unverified` (promotion is maintainer-only)")
86
+ if not (e.get("provenance") or {}).get("by"):
87
+ errs.append(f"new entry {name!r} must carry provenance.by")
88
+ for name in b.keys() - h.keys(): # REMOVED
89
+ errs.append(f"contribution removes existing entry {name!r} (maintainer-only)")
90
+ for name in b.keys() & h.keys(): # MODIFIED
91
+ if h[name].get("trust") != b[name].get("trust"):
92
+ errs.append(f"contribution changes trust of {name!r} (maintainer-only — use promote)")
93
+ return errs
94
+
95
+
96
+ def promote(path: str | Path, name: str, *, to: str = "verified", by: str | None = None,
97
+ now: str | None = None) -> Entry:
98
+ """Maintainer-only: flip an entry's trust. Direct (not via PR)."""
99
+ if to not in TRUST:
100
+ raise ValueError(f"trust must be one of {TRUST}")
101
+ p = Path(path)
102
+ data = load_registry(p)
103
+ for e in data.get("entries", []):
104
+ if e["name"] == name:
105
+ e["trust"] = to
106
+ e.setdefault("provenance", {})["promoted_by"] = by or "maintainer"
107
+ e["provenance"]["promoted_at"] = now or _dt.datetime.now().isoformat(timespec="seconds")
108
+ p.write_text(json.dumps(data, indent=2) + "\n", encoding="utf-8")
109
+ return Entry(**{k: e.get(k) for k in ("name", "kind", "repo", "manifest", "version", "trust", "provenance")})
110
+ raise KeyError(f"no entry named {name!r}")
111
+
112
+
113
+ def search(path: str | Path, query: str | None = None, *, min_trust: str = "unverified") -> list[dict]:
114
+ """Consumer view with a trust floor."""
115
+ floor = TRUST.index(min_trust)
116
+ rows = [e for e in load_registry(path).get("entries", []) if TRUST.index(e.get("trust", "unverified")) >= floor]
117
+ if query:
118
+ q = query.lower()
119
+ rows = [e for e in rows if q in e["name"].lower()]
120
+ return rows
skilltree/reports.py ADDED
@@ -0,0 +1,95 @@
1
+ """The feedback store — the observe→improve half of the Skill OS.
2
+
3
+ Reports accumulate the gaps and problems found *in use*, so an improver can act
4
+ on them later. Two report kinds to start:
5
+
6
+ - `missed_skill` — a needed skill didn't exist (filed by the agent, or by
7
+ the user telling the agent "you missed X").
8
+ - `expected_not_used` — a skill SHOULD have fired for a task but didn't
9
+ ("expected xyz but the skill wasn't used").
10
+
11
+ `report-missed-skill` (the shipped skill) calls `report_missed` via the CLI. An
12
+ improver agent reads the open reports, then creates/improves skills and calls
13
+ `resolve` to close them.
14
+ """
15
+ from __future__ import annotations
16
+
17
+ import datetime as _dt
18
+ import json
19
+ from pathlib import Path
20
+ from typing import Any
21
+
22
+ DEFAULT_REPORTS = Path.home() / ".claude" / "skill-reports.json"
23
+
24
+
25
+ def _now() -> str:
26
+ return _dt.datetime.now().isoformat(timespec="seconds")
27
+
28
+
29
+ def _load(path: Path) -> list[dict]:
30
+ return json.loads(path.read_text(encoding="utf-8")) if path.is_file() else []
31
+
32
+
33
+ def _save(path: Path, rows: list[dict]) -> None:
34
+ path.parent.mkdir(parents=True, exist_ok=True)
35
+ path.write_text(json.dumps(rows, indent=2) + "\n", encoding="utf-8")
36
+
37
+
38
+ def file_report(reports_path: str | Path, *, kind: str, by: str, now: str | None = None,
39
+ **fields: Any) -> dict:
40
+ path = Path(reports_path)
41
+ rows = _load(path)
42
+ entry = {"id": f"r{len(rows) + 1}", "kind": kind, "by": by, "at": now or _now(),
43
+ "status": "open", **fields}
44
+ rows.append(entry)
45
+ _save(path, rows)
46
+ return entry
47
+
48
+
49
+ def report_missed(reports_path: str | Path = DEFAULT_REPORTS, *, needed: str, happened: str,
50
+ suggests: str | None = None, by: str = "agent", now: str | None = None) -> dict:
51
+ """File a missed-skill report: a capability was needed but no skill existed."""
52
+ return file_report(reports_path, kind="missed_skill", by=by, now=now,
53
+ needed=needed, happened=happened, suggests=suggests)
54
+
55
+
56
+ def mark_problem(reports_path: str | Path = DEFAULT_REPORTS, *, skill: str, expected: str,
57
+ happened: str, by: str = "user", now: str | None = None) -> dict:
58
+ """Mark 'expected this skill to be used, but it wasn't' — feeds the improver."""
59
+ return file_report(reports_path, kind="expected_not_used", by=by, now=now,
60
+ skill=skill, expected=expected, happened=happened)
61
+
62
+
63
+ def list_reports(reports_path: str | Path = DEFAULT_REPORTS, *, kind: str | None = None,
64
+ status: str | None = "open") -> list[dict]:
65
+ rows = _load(Path(reports_path))
66
+ if kind:
67
+ rows = [r for r in rows if r.get("kind") == kind]
68
+ if status:
69
+ rows = [r for r in rows if r.get("status") == status]
70
+ return rows
71
+
72
+
73
+ def summary(reports_path: str | Path = DEFAULT_REPORTS) -> dict:
74
+ rows = _load(Path(reports_path))
75
+ open_rows = [r for r in rows if r.get("status") == "open"]
76
+ by_kind: dict[str, int] = {}
77
+ for r in open_rows:
78
+ by_kind[r["kind"]] = by_kind.get(r["kind"], 0) + 1
79
+ return {"total": len(rows), "open": len(open_rows), "open_by_kind": by_kind}
80
+
81
+
82
+ def resolve(reports_path: str | Path, report_id: str, *, resolution: str,
83
+ by: str = "improver", now: str | None = None) -> dict:
84
+ """Close a report (after creating/improving the skill it asked for)."""
85
+ path = Path(reports_path)
86
+ rows = _load(path)
87
+ for r in rows:
88
+ if r["id"] == report_id:
89
+ r["status"] = "resolved"
90
+ r["resolution"] = resolution
91
+ r["resolved_by"] = by
92
+ r["resolved_at"] = now or _now()
93
+ _save(path, rows)
94
+ return r
95
+ raise KeyError(f"no report {report_id!r}")
skilltree/search.py ADDED
@@ -0,0 +1,148 @@
1
+ """The `search` arm — coordinate-scoped lexical search over the skill corpus.
2
+
3
+ v1 (evidence-driven, see .claude/rules + the research): SQLite **FTS5 / BM25** over
4
+ the skill files, with **coordinate-scoped subtree filtering** — rank within any
5
+ coord-rooted region of the tree (`scope_coord="0.1"` → only `0.1` and its
6
+ descendants). For a small corpus of short, keyword-dense skill docs, BM25 is
7
+ enough; the differentiated capability is the subtree scoping (the coordinate is
8
+ the address AND the search scope).
9
+
10
+ A dense/vector layer fused via RRF is a LATER, evidence-driven upgrade — add it
11
+ only when logged BM25 misses are semantic (synonym/paraphrase), not lexical.
12
+ MCTS-style tree search is for skill *composition* (SCCC), not lookup.
13
+ """
14
+ from __future__ import annotations
15
+
16
+ import re
17
+ import sqlite3
18
+ from pathlib import Path
19
+
20
+ _COORD_NAME = re.compile(r"^([0-9][0-9.]*)-(.+)$")
21
+ _FTS_TERMS = re.compile(r"\w+")
22
+ DEFAULT_EXTS = (".md", ".txt", ".mdx", ".rst") # folder-mode (foldersearch) corpus
23
+
24
+
25
+ def _title(text: str, fallback: str) -> str:
26
+ """First real heading/line of a doc — the title for a non-SKILL.md file (folder mode)."""
27
+ for line in text.splitlines():
28
+ s = line.strip()
29
+ if s and not s.startswith("---"):
30
+ return s.lstrip("# ").strip()[:120]
31
+ return fallback
32
+
33
+
34
+ def _corpus(root: Path, exts: "tuple[str, ...] | None"):
35
+ """The files to index. `exts=None` → SKILL.md only (tree mode, the default). `exts` given →
36
+ every text file with a matching extension (folder mode), skipping dotfiles."""
37
+ if exts is None:
38
+ yield from root.rglob("SKILL.md")
39
+ else:
40
+ for p in root.rglob("*"):
41
+ if (p.is_file() and p.suffix.lower() in exts
42
+ and not any(part.startswith(".") for part in p.parts)):
43
+ yield p
44
+
45
+
46
+ def _read_skill(md: Path) -> tuple[str, str, str, str, str, int]:
47
+ txt = md.read_text(encoding="utf-8", errors="replace")
48
+ nm = re.search(r"^\s*name:\s*(.+?)\s*$", txt, re.M)
49
+ ds = re.search(r"^\s*description:\s*(.+?)\s*$", txt, re.M)
50
+ gl = re.search(r"^\s*glyphs:\s*(.+?)\s*$", txt, re.M)
51
+ vs = re.search(r"^\s*version:\s*(\d+)", txt, re.M)
52
+ name = nm.group(1).strip() if nm else _title(txt, md.stem)
53
+ desc = ds.group(1).strip() if ds else ""
54
+ glyphs = gl.group(1).strip() if gl else ""
55
+ version = int(vs.group(1)) if vs else 1
56
+ body = txt.split("---", 2)[-1].strip() if txt.lstrip().startswith("---") else txt.strip()
57
+ m = _COORD_NAME.match(name)
58
+ coord = m.group(1) if m else ""
59
+ return coord, name, desc, body, glyphs, version
60
+
61
+
62
+ def _logical(name: str) -> str:
63
+ """The stable logical identity of a skill: drop the coord prefix and any `-v<N>` version suffix."""
64
+ m = _COORD_NAME.match(name)
65
+ base = m.group(2) if m else name
66
+ return re.sub(r"-v\d+$", "", base)
67
+
68
+
69
+ def build_index(root_dir: str | Path, db_path: str = ":memory:",
70
+ vocab=None, *, exts: "tuple[str, ...] | None" = None) -> sqlite3.Connection:
71
+ """Index a corpus under `root_dir` into ONE FTS5/BM25 table. Returns the connection.
72
+
73
+ Two corpus modes, ONE engine:
74
+ - `exts=None` (default) → **tree mode**: index every `SKILL.md` (the skilltree/skillmap
75
+ corpus; coordinates come from each skill's `<coord>-<name>` frontmatter).
76
+ - `exts=(...)` → **folder mode**: index every text file with a matching extension
77
+ (the former `foldersearch` — any folder; a file with no `<coord>-<name>` name has coord "").
78
+
79
+ The `--scope` filter in `search()` is the only difference between the two: it restricts to a
80
+ coordinate subtree when coordinates are present, and is simply unused on a plain folder.
81
+
82
+ GlyphSteer (optional): if `vocab` is given, each skill's `glyphs:` code is rendered to ASCII
83
+ sentinel **tags** (indexed for faceting; FTS5 drops the emoji); the raw code is kept for display.
84
+ """
85
+ con = sqlite3.connect(db_path)
86
+ con.execute("CREATE VIRTUAL TABLE skills USING fts5(name, description, body, tags, "
87
+ "coord UNINDEXED, path UNINDEXED, glyphs UNINDEXED, version UNINDEXED)")
88
+ rows = []
89
+ for md in _corpus(Path(root_dir), exts):
90
+ coord, name, desc, body, glyphs, version = _read_skill(md)
91
+ tags = " ".join(vocab.code_tags(glyphs)) if (vocab and glyphs) else ""
92
+ rows.append((name, desc, body, tags, coord, str(md), glyphs, version))
93
+ con.executemany("INSERT INTO skills(name, description, body, tags, coord, path, glyphs, version) "
94
+ "VALUES (?,?,?,?,?,?,?,?)", rows)
95
+ con.commit()
96
+ return con
97
+
98
+
99
+ def _fts_query(q: str) -> str:
100
+ terms = _FTS_TERMS.findall(q)
101
+ return " OR ".join(terms) if terms else q
102
+
103
+
104
+ def search(con: sqlite3.Connection, query: str, *, scope_coord: str | None = None,
105
+ facet: str | None = None, vocab=None, limit: int = 10,
106
+ newest_only: bool = False) -> list[dict]:
107
+ """BM25-ranked search, optionally scoped to a coordinate subtree and/or faceted by a
108
+ GlyphSteer glyph (`facet`, resolved to its ASCII sentinel tag via `vocab`).
109
+ `newest_only=True` forwards only the **newest `version` per logical skill** (history kept on disk,
110
+ but search returns the latest) — the self-expansion/freshness routing."""
111
+ sql = ("SELECT name, coord, description, path, glyphs, version, bm25(skills) AS score "
112
+ "FROM skills WHERE skills MATCH ?")
113
+ params: list = [_fts_query(query)]
114
+ if scope_coord:
115
+ sql += " AND (coord = ? OR coord LIKE ?)"
116
+ params += [scope_coord, f"{scope_coord}.%"]
117
+ if facet:
118
+ tag = (vocab.tag_for(facet) if vocab else None) or facet
119
+ sql += " AND tags MATCH ?"
120
+ params.append(tag)
121
+ sql += " ORDER BY score" # bm25(): lower = better
122
+ hits = [{"name": r[0], "coord": r[1], "description": r[2], "path": r[3], "glyphs": r[4],
123
+ "version": int(r[5]) if str(r[5]).isdigit() else 1, "score": r[6]}
124
+ for r in con.execute(sql, params)]
125
+ if newest_only:
126
+ best: dict[str, dict] = {}
127
+ for h in hits: # keep the highest version per logical name
128
+ k = _logical(h["name"])
129
+ if k not in best or h["version"] > best[k]["version"]:
130
+ best[k] = h
131
+ hits = sorted(best.values(), key=lambda h: h["score"])
132
+ return hits[:limit]
133
+
134
+
135
+ def search_tree(root_dir: str | Path, query: str, *, scope_coord: str | None = None,
136
+ facet: str | None = None, vocab=None, limit: int = 10,
137
+ newest_only: bool = False) -> list[dict]:
138
+ """Convenience: index a SKILL.md tree and search it in one call."""
139
+ return search(build_index(root_dir, vocab=vocab), query, scope_coord=scope_coord,
140
+ facet=facet, vocab=vocab, limit=limit, newest_only=newest_only)
141
+
142
+
143
+ def search_folder(folder: str | Path, query: str, *, scope_coord: str | None = None,
144
+ exts: "tuple[str, ...]" = DEFAULT_EXTS, limit: int = 10) -> list[dict]:
145
+ """Convenience (the folded `foldersearch` + `skillsearch`): index ANY folder's text files and
146
+ BM25-search them in one call. `scope_coord` restricts to a coordinate subtree when the folder
147
+ carries skilltree coordinates; on a plain folder it is simply coordinate-free search."""
148
+ return search(build_index(folder, exts=exts), query, scope_coord=scope_coord, limit=limit)
skilltree/validate.py ADDED
@@ -0,0 +1,89 @@
1
+ """Validate a materialized SkillTree (the `cat`-breadcrumb tree).
2
+
3
+ The auto-loader only loads the root and never follows the breadcrumbs, so nothing
4
+ guarantees the tree is traversable — this validator is that guarantee. It checks
5
+ the things the filesystem won't:
6
+
7
+ - every node has a loadable SKILL.md (name + description frontmatter)
8
+ - every non-leaf node's body has a Read breadcrumb for EACH of its children
9
+ - every breadcrumb path actually resolves to a file (no dead ends)
10
+ - sibling names are unique (their dirs collide otherwise)
11
+
12
+ A non-empty error list is a kill-criterion: someone walking the tree hits a dead
13
+ end the platform would never warn about.
14
+ """
15
+ from __future__ import annotations
16
+
17
+ from dataclasses import dataclass
18
+ from pathlib import Path
19
+ import re
20
+
21
+ from .model import KINDS, SkillTree, TreeNode, skill_name
22
+
23
+ # a breadcrumb is `- <name> (<kind>): Read `<abspath>`` — match the backticked SKILL.md path
24
+ # (verb-agnostic, so it survives wording tweaks; the verb is the Read tool, never `cat`)
25
+ _CRUMB_RE = re.compile(r"`([^`]+/SKILL\.md)`")
26
+
27
+
28
+ @dataclass
29
+ class Violation:
30
+ severity: str # "error" | "warning"
31
+ where: str
32
+ message: str
33
+
34
+
35
+ def _has_frontmatter(skill_md: Path) -> bool:
36
+ if not skill_md.is_file():
37
+ return False
38
+ txt = skill_md.read_text(encoding="utf-8", errors="replace")
39
+ return bool(re.search(r"^\s*name:\s*\S", txt, re.M) and re.search(r"^\s*description:\s*\S", txt, re.M))
40
+
41
+
42
+ def _skill_md(node_dir: Path, name: str) -> Path:
43
+ return node_dir / ".claude" / "skills" / name / "SKILL.md"
44
+
45
+
46
+ def _walk(node: TreeNode, node_dir: Path, out: list[Violation]) -> None:
47
+ skill_md = _skill_md(node_dir, skill_name(node))
48
+ if node.kind not in KINDS:
49
+ out.append(Violation("warning", node.name, f"unknown kind {node.kind!r}"))
50
+ if not skill_md.is_file():
51
+ out.append(Violation("error", node.name, f"missing SKILL.md at {skill_md}"))
52
+ return
53
+ if not _has_frontmatter(skill_md):
54
+ out.append(Violation("error", node.name, "SKILL.md lacks name/description frontmatter (won't auto-load)"))
55
+
56
+ child_names = [c.name for c in node.children]
57
+ for d in {n for n in child_names if child_names.count(n) > 1}:
58
+ out.append(Violation("error", node.name, f"duplicate child name {d!r} (sibling dirs collide)"))
59
+
60
+ if node.children:
61
+ text = skill_md.read_text(encoding="utf-8")
62
+ found = {Path(m).resolve() for m in _CRUMB_RE.findall(text)}
63
+ expected = {_skill_md(node_dir / c.name, skill_name(c)).resolve() for c in node.children}
64
+ for missing in expected - found:
65
+ out.append(Violation("error", node.name, f"no Read breadcrumb for child → {missing}"))
66
+ for p in found:
67
+ if not Path(p).is_file():
68
+ out.append(Violation("error", node.name, f"dead breadcrumb: `{p}` does not resolve"))
69
+ elif p not in expected:
70
+ out.append(Violation("warning", node.name, f"breadcrumb `{p}` is not a declared child"))
71
+
72
+ for child in node.children:
73
+ _walk(child, node_dir / child.name, out)
74
+
75
+
76
+ def validate(root: str | Path, tree: SkillTree | None = None) -> list[Violation]:
77
+ root = Path(root)
78
+ if tree is None:
79
+ manifest = root / "skilltree.json"
80
+ if not manifest.is_file():
81
+ return [Violation("error", str(root), "no skilltree.json manifest found")]
82
+ tree = SkillTree.load(manifest)
83
+ out: list[Violation] = []
84
+ _walk(tree.root, root, out) # root node's dir IS the tree root
85
+ return out
86
+
87
+
88
+ def is_valid(root: str | Path, tree: SkillTree | None = None) -> bool:
89
+ return not any(v.severity == "error" for v in validate(root, tree))