skill-router 0.0.1-snapshot

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +48 -0
  2. package/package.json +17 -0
  3. package/router.py +333 -0
package/README.md ADDED
@@ -0,0 +1,48 @@
1
+ # skill-router
2
+
3
+ Local semantic skill router for `gemini-cli-pro`.
4
+
5
+ ## Install
6
+
7
+ ```bash
8
+ npm install -g .
9
+ ```
10
+
11
+ The package runs this postinstall hook:
12
+
13
+ ```bash
14
+ python3 -m pip install fastembed numpy --user
15
+ ```
16
+
17
+ ## Usage
18
+
19
+ ```bash
20
+ skill-router --prompt "rename API layer" --skills-dir /path/to/.skills
21
+ ```
22
+
23
+ Output is always JSON:
24
+
25
+ ```json
26
+ {"skill_id":"node-backend","score":0.8732}
27
+ ```
28
+
29
+ If score is below threshold:
30
+
31
+ ```json
32
+ {"skill_id":null,"score":0.0}
33
+ ```
34
+
35
+ If the provided `--skills-dir` does not exist or has no `.md` files, it returns:
36
+
37
+ ```json
38
+ {"skill_id":null,"score":0.0}
39
+ ```
40
+
41
+ ## Cache
42
+
43
+ Cache is stored in:
44
+
45
+ - `~/.cache/skill-router/data.npy`
46
+ - `~/.cache/skill-router/.skills_cache.npy`
47
+
48
+ Works on Ubuntu/Linux and macOS.
package/package.json ADDED
@@ -0,0 +1,17 @@
1
+ {
2
+ "name": "skill-router",
3
+ "version": "0.0.1-snapshot",
4
+ "description": "Local semantic skill router for gemini-cli-pro",
5
+ "license": "MIT",
6
+ "type": "commonjs",
7
+ "bin": {
8
+ "skill-router": "./router.py"
9
+ },
10
+ "scripts": {
11
+ "postinstall": "python3 -m pip install fastembed numpy --user"
12
+ },
13
+ "files": [
14
+ "router.py",
15
+ "README.md"
16
+ ]
17
+ }
package/router.py ADDED
@@ -0,0 +1,333 @@
1
+ #!/usr/bin/env python3
2
+ """skill-router: local semantic router for Gemini CLI skills."""
3
+
4
+ from __future__ import annotations
5
+
6
+ import argparse
7
+ import hashlib
8
+ import json
9
+ import os
10
+ import sys
11
+ from pathlib import Path
12
+ from typing import Iterable
13
+
14
+ import numpy as np
15
+
16
+ try:
17
+ from fastembed import TextEmbedding
18
+ except Exception: # pragma: no cover - runtime dependency may be unavailable
19
+ TextEmbedding = None
20
+
21
+
22
+ MODEL_NAME = "BAAI/bge-small-en-v1.5"
23
+ CACHE_ROOT = Path.home() / ".cache" / "skill-router"
24
+ DATA_FILE = CACHE_ROOT / "data.npy"
25
+ SKILLS_CACHE_FILE = CACHE_ROOT / ".skills_cache.npy"
26
+
27
+ _EMBEDDER = None
28
+
29
+
30
+ class JsonArgumentParser(argparse.ArgumentParser):
31
+ """Argument parser that always prints JSON on errors."""
32
+
33
+ def error(self, message: str) -> None:
34
+ payload = {
35
+ "skill_id": None,
36
+ "score": 0.0,
37
+ "error": f"invalid_arguments: {message}",
38
+ }
39
+ print(json.dumps(payload, ensure_ascii=False))
40
+ raise SystemExit(2)
41
+
42
+
43
+ def _ok(skill_id: str | None, score: float) -> dict:
44
+ return {
45
+ "skill_id": skill_id,
46
+ "score": round(float(score), 4),
47
+ }
48
+
49
+
50
+ def _err(message: str) -> dict:
51
+ return {
52
+ "skill_id": None,
53
+ "score": 0.0,
54
+ "error": message,
55
+ }
56
+
57
+
58
+ def _ensure_cache_dir() -> None:
59
+ try:
60
+ CACHE_ROOT.mkdir(parents=True, exist_ok=True)
61
+ except PermissionError as exc:
62
+ raise RuntimeError("cache_write_permission_denied") from exc
63
+ except OSError as exc:
64
+ raise RuntimeError(f"cache_init_failed: {exc}") from exc
65
+
66
+
67
+ def _iter_skill_files(skills_dir: Path) -> Iterable[Path]:
68
+ for path in sorted(skills_dir.rglob("*.md")):
69
+ if path.is_file():
70
+ yield path
71
+
72
+
73
+ def _has_skill_markdown(skills_dir: Path) -> bool:
74
+ if not skills_dir.exists() or not skills_dir.is_dir():
75
+ return False
76
+ return any(True for _ in _iter_skill_files(skills_dir))
77
+
78
+
79
+ def _resolve_available_skills_dir(preferred_skills_dir: Path) -> Path | None:
80
+ candidate = preferred_skills_dir.resolve()
81
+ if _has_skill_markdown(candidate):
82
+ return candidate
83
+ return None
84
+
85
+
86
+ def _normalize_heading(text: str) -> str:
87
+ return (
88
+ text.strip()
89
+ .lower()
90
+ .replace("#", "")
91
+ .replace(":", "")
92
+ .replace("\u00a0", " ")
93
+ .strip()
94
+ )
95
+
96
+
97
+ def _extract_section(content: str, headings: set[str]) -> str:
98
+ lines = content.splitlines()
99
+ capture = False
100
+ bucket: list[str] = []
101
+
102
+ for line in lines:
103
+ stripped = line.strip()
104
+ if stripped.startswith("###"):
105
+ current = _normalize_heading(stripped)
106
+ if current in headings:
107
+ capture = True
108
+ continue
109
+ if capture:
110
+ break
111
+ if capture:
112
+ bucket.append(line)
113
+
114
+ return "\n".join(bucket).strip()
115
+
116
+
117
+ def _skill_id_for_path(path: Path, skills_dir: Path) -> str:
118
+ if path.stem.lower() == "skill" and path.parent != skills_dir:
119
+ return path.parent.name
120
+ return path.stem
121
+
122
+
123
+ def _extract_index_text(content: str) -> str:
124
+ triggers = _extract_section(content, {"gatilhos", "triggers"})
125
+ description = _extract_section(content, {"descricao", "descrição", "description"})
126
+
127
+ merged = "\n\n".join(part for part in [triggers, description] if part).strip()
128
+ if merged:
129
+ return merged
130
+
131
+ # Fallback: if sections do not exist, index the first relevant chunk.
132
+ return content.strip()[:2000]
133
+
134
+
135
+ def _compute_skills_hash(skills_dir: Path) -> str:
136
+ digest = hashlib.sha256()
137
+
138
+ for path in _iter_skill_files(skills_dir):
139
+ rel = path.relative_to(skills_dir).as_posix()
140
+ digest.update(rel.encode("utf-8"))
141
+ stat = path.stat()
142
+ digest.update(str(stat.st_mtime_ns).encode("utf-8"))
143
+ digest.update(str(stat.st_size).encode("utf-8"))
144
+ with path.open("rb") as fh:
145
+ while True:
146
+ chunk = fh.read(8192)
147
+ if not chunk:
148
+ break
149
+ digest.update(chunk)
150
+
151
+ return digest.hexdigest()
152
+
153
+
154
+ def _get_embedder() -> TextEmbedding:
155
+ global _EMBEDDER
156
+ if _EMBEDDER is None:
157
+ if TextEmbedding is None:
158
+ raise RuntimeError("missing_dependency: fastembed")
159
+ _EMBEDDER = TextEmbedding(model_name=MODEL_NAME)
160
+ return _EMBEDDER
161
+
162
+
163
+ def index_skills(skills_dir: Path) -> dict:
164
+ if not skills_dir.exists() or not skills_dir.is_dir():
165
+ raise RuntimeError(f"skills_dir_not_found: {skills_dir}")
166
+
167
+ skill_files = list(_iter_skill_files(skills_dir))
168
+ if not skill_files:
169
+ raise RuntimeError("skills_dir_empty")
170
+
171
+ _ensure_cache_dir()
172
+ skills_hash = _compute_skills_hash(skills_dir)
173
+
174
+ ids: list[str] = []
175
+ texts: list[str] = []
176
+ seen: dict[str, int] = {}
177
+
178
+ for path in skill_files:
179
+ content = path.read_text(encoding="utf-8", errors="ignore")
180
+ index_text = _extract_index_text(content)
181
+ if not index_text:
182
+ continue
183
+
184
+ skill_id = _skill_id_for_path(path, skills_dir)
185
+ if skill_id in seen:
186
+ seen[skill_id] += 1
187
+ skill_id = f"{skill_id}-{seen[skill_id]}"
188
+ else:
189
+ seen[skill_id] = 1
190
+
191
+ ids.append(skill_id)
192
+ texts.append(index_text)
193
+
194
+ if not ids:
195
+ raise RuntimeError("no_valid_skill_content")
196
+
197
+ embedder = _get_embedder()
198
+ vectors = list(embedder.embed(texts))
199
+ matrix = np.vstack(vectors).astype(np.float32)
200
+
201
+ data_payload = {
202
+ "skills_hash": skills_hash,
203
+ "skills_dir": str(skills_dir.resolve()),
204
+ "ids": ids,
205
+ "matrix": matrix,
206
+ }
207
+
208
+ # Main cache requested by spec.
209
+ np.save(DATA_FILE, data_payload, allow_pickle=True)
210
+
211
+ # Marker cache requested by spec for hash invalidation checks.
212
+ marker_payload = {
213
+ "skills_hash": skills_hash,
214
+ "skills_dir": str(skills_dir.resolve()),
215
+ }
216
+ np.save(SKILLS_CACHE_FILE, marker_payload, allow_pickle=True)
217
+
218
+ return {
219
+ "indexed": len(ids),
220
+ "skills_hash": skills_hash,
221
+ }
222
+
223
+
224
+ def _load_cache_payload() -> dict:
225
+ if not DATA_FILE.exists():
226
+ raise RuntimeError("cache_data_missing")
227
+ data = np.load(DATA_FILE, allow_pickle=True)
228
+ payload = data.item() if hasattr(data, "item") else data
229
+ if not isinstance(payload, dict):
230
+ raise RuntimeError("cache_data_invalid")
231
+ return payload
232
+
233
+
234
+ def _needs_reindex(skills_dir: Path) -> bool:
235
+ if not DATA_FILE.exists() or not SKILLS_CACHE_FILE.exists():
236
+ return True
237
+
238
+ try:
239
+ marker = np.load(SKILLS_CACHE_FILE, allow_pickle=True)
240
+ meta = marker.item() if hasattr(marker, "item") else marker
241
+ if not isinstance(meta, dict):
242
+ return True
243
+
244
+ cached_hash = meta.get("skills_hash")
245
+ cached_dir = meta.get("skills_dir")
246
+ except Exception:
247
+ return True
248
+
249
+ current_hash = _compute_skills_hash(skills_dir)
250
+ return (
251
+ cached_hash != current_hash
252
+ or cached_dir != str(skills_dir.resolve())
253
+ )
254
+
255
+
256
+ def get_best_skill(prompt: str, skills_dir: Path, threshold: float = 0.7) -> dict:
257
+ if not prompt or not prompt.strip():
258
+ return _ok(None, 0.0)
259
+
260
+ active_skills_dir = _resolve_available_skills_dir(skills_dir)
261
+ if active_skills_dir is None:
262
+ return _ok(None, 0.0)
263
+
264
+ try:
265
+ if _needs_reindex(active_skills_dir):
266
+ index_skills(active_skills_dir)
267
+
268
+ payload = _load_cache_payload()
269
+ ids = payload.get("ids", [])
270
+ matrix = payload.get("matrix", None)
271
+
272
+ if not ids or matrix is None:
273
+ return _err("cache_data_empty")
274
+
275
+ matrix = np.asarray(matrix, dtype=np.float32)
276
+ if matrix.ndim != 2 or matrix.shape[0] == 0:
277
+ return _err("cache_matrix_invalid")
278
+
279
+ embedder = _get_embedder()
280
+ prompt_vec = np.asarray(list(embedder.embed([prompt]))[0], dtype=np.float32)
281
+
282
+ matrix_norm = matrix / (np.linalg.norm(matrix, axis=1, keepdims=True) + 1e-12)
283
+ prompt_norm = prompt_vec / (np.linalg.norm(prompt_vec) + 1e-12)
284
+ scores = matrix_norm @ prompt_norm
285
+
286
+ if scores.size == 0:
287
+ return _ok(None, 0.0)
288
+
289
+ best_idx = int(np.argmax(scores))
290
+ best_score = float(scores[best_idx])
291
+
292
+ if not np.isfinite(best_score) or best_score < float(threshold):
293
+ return _ok(None, 0.0)
294
+
295
+ return _ok(ids[best_idx], best_score)
296
+ except PermissionError:
297
+ return _err("cache_write_permission_denied")
298
+ except RuntimeError as exc:
299
+ return _err(str(exc))
300
+ except Exception as exc: # pragma: no cover - defensive fallback
301
+ return _err(f"router_failure: {exc}")
302
+
303
+
304
+ def main() -> int:
305
+ parser = JsonArgumentParser(add_help=True)
306
+ parser.add_argument("--prompt", default="", help="User text prompt")
307
+ parser.add_argument(
308
+ "--skills-dir",
309
+ default=".skills",
310
+ help="Path to skills directory",
311
+ )
312
+ parser.add_argument(
313
+ "--threshold",
314
+ type=float,
315
+ default=0.7,
316
+ help="Cosine similarity threshold",
317
+ )
318
+
319
+ args = parser.parse_args()
320
+ skills_dir = Path(os.path.expanduser(args.skills_dir)).resolve()
321
+
322
+ result = get_best_skill(
323
+ prompt=args.prompt,
324
+ skills_dir=skills_dir,
325
+ threshold=args.threshold,
326
+ )
327
+ print(json.dumps(result, ensure_ascii=False))
328
+
329
+ return 0 if "error" not in result else 1
330
+
331
+
332
+ if __name__ == "__main__":
333
+ sys.exit(main())