skillshelf 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,267 @@
1
+ // Crawl skill roots into Skill[] applying the crawl rules (see docs/ARCHITECTURE.md §6).
2
+ //
3
+ // Rules:
4
+ // - Dedupe by realpath (aliased mounts like cloud-sync mirror locations).
5
+ // - Treat `.agents/skills` as bridge mirrors of `.claude/skills`: set mirrorOf,
6
+ // do not double-count as an independent skill.
7
+ // - Skip `_retired/`: tag `retired: true`, do not activate.
8
+ // - Ignore any path containing `node_modules`.
9
+ // - Support both `name/SKILL.md` and `skills/name/SKILL.md` layouts.
10
+
11
+ import { createHash } from "node:crypto";
12
+ import { join, basename, dirname, sep } from "node:path";
13
+ import { readdir } from "node:fs/promises";
14
+ import { existsSync } from "node:fs";
15
+ import type { Skill } from "../types.ts";
16
+ import { parseFrontmatter } from "../lib/frontmatter.ts";
17
+ import { realpathOrSelf, isDirectory } from "../lib/fs.ts";
18
+
19
+ const SKILL_FILE = "SKILL.md";
20
+ const RETIRED_DIR = "_retired";
21
+
22
+ export function hashContent(content: string): string {
23
+ return createHash("sha256").update(content, "utf8").digest("hex");
24
+ }
25
+
26
+ function pathHasSegment(p: string, seg: string): boolean {
27
+ return p.split(sep).includes(seg);
28
+ }
29
+
30
+ /** Pull effective domains from frontmatter (`domains` or `primaryDomain`). */
31
+ function readDomains(data: Record<string, unknown>): string[] {
32
+ const out: string[] = [];
33
+ const primary = data.primaryDomain ?? data.primary_domain;
34
+ if (typeof primary === "string" && primary.trim() !== "") out.push(primary.trim());
35
+ const d = data.domains;
36
+ if (Array.isArray(d)) {
37
+ for (const x of d) {
38
+ const s = String(x).trim();
39
+ if (s !== "") out.push(s);
40
+ }
41
+ } else if (typeof d === "string" && d.trim() !== "") {
42
+ out.push(d.trim());
43
+ }
44
+ return [...new Set(out)];
45
+ }
46
+
47
+ /** True if a directory looks like a skill dir (contains SKILL.md). */
48
+ function isSkillDir(dir: string): boolean {
49
+ return existsSync(join(dir, SKILL_FILE));
50
+ }
51
+
52
+ async function listRefFiles(skillDir: string, skillName: string): Promise<string[]> {
53
+ const out: string[] = [];
54
+ let entries: Awaited<ReturnType<typeof readdir>>;
55
+ try {
56
+ entries = await readdir(skillDir, { withFileTypes: true });
57
+ } catch {
58
+ return out;
59
+ }
60
+ for (const e of entries) {
61
+ if (e.name === SKILL_FILE) continue;
62
+ if (e.name === `${skillName}.shelf.json`) continue;
63
+ if (e.name === "shelf.lock.json") continue;
64
+ if (e.name === ".DS_Store") continue;
65
+ out.push(join(skillDir, e.name));
66
+ }
67
+ return out.sort();
68
+ }
69
+
70
+ async function buildSkill(
71
+ skillDir: string,
72
+ opts: { retired: boolean; mirrorOf: string | null; primaryDomain: string | null },
73
+ ): Promise<Skill | null> {
74
+ const bodyPath = join(skillDir, SKILL_FILE);
75
+ if (!existsSync(bodyPath)) return null;
76
+ let raw: string;
77
+ try {
78
+ raw = await Bun.file(bodyPath).text();
79
+ } catch {
80
+ return null;
81
+ }
82
+ const { data, body } = parseFrontmatter(raw);
83
+ const dirName = basename(skillDir);
84
+ const name =
85
+ typeof data.name === "string" && data.name.trim() !== ""
86
+ ? data.name.trim()
87
+ : dirName;
88
+ const description =
89
+ typeof data.description === "string" ? data.description.trim() : "";
90
+ const domains = readDomains(data);
91
+ const primaryDomain =
92
+ opts.primaryDomain ?? (domains.length > 0 ? domains[0]! : null);
93
+ const refFiles = await listRefFiles(skillDir, name);
94
+
95
+ return {
96
+ name,
97
+ description,
98
+ primaryDomain,
99
+ domains: domains.length > 0 ? domains : primaryDomain ? [primaryDomain] : [],
100
+ path: skillDir,
101
+ bodyPath,
102
+ refFiles,
103
+ source: null,
104
+ retired: opts.retired,
105
+ mirrorOf: opts.mirrorOf,
106
+ contentHash: hashContent(body),
107
+ };
108
+ }
109
+
110
+ /**
111
+ * Find every skill dir under a root, supporting:
112
+ * - <root>/<name>/SKILL.md
113
+ * - <root>/skills/<name>/SKILL.md
114
+ * - <root>/_retired/<name>/SKILL.md (retired)
115
+ * Returns absolute skill dirs paired with whether they're retired.
116
+ */
117
+ async function discoverSkillDirs(
118
+ root: string,
119
+ ): Promise<Array<{ dir: string; retired: boolean }>> {
120
+ const out: Array<{ dir: string; retired: boolean }> = [];
121
+ if (!existsSync(root)) return out;
122
+
123
+ // Bounded recursive descent. A directory containing SKILL.md is a skill leaf.
124
+ // Otherwise it's a grouping dir (domain folder, `skills/`, `.agents/skills/`,
125
+ // `_retired/`, project root) and we recurse. `_retired` taints everything below.
126
+ const SKIP = new Set(["node_modules", ".git"]);
127
+ const MAX_DEPTH = 8;
128
+
129
+ async function recurse(dir: string, depth: number, retired: boolean): Promise<void> {
130
+ if (depth > MAX_DEPTH) return;
131
+ if (isSkillDir(dir)) {
132
+ out.push({ dir, retired });
133
+ return; // do not descend into a skill's own subtree (reference/ etc.)
134
+ }
135
+ let entries: Awaited<ReturnType<typeof readdir>>;
136
+ try {
137
+ entries = await readdir(dir, { withFileTypes: true });
138
+ } catch {
139
+ return;
140
+ }
141
+ for (const e of entries) {
142
+ if (SKIP.has(e.name)) continue;
143
+ const full = join(dir, e.name);
144
+ if (pathHasSegment(full, "node_modules")) continue;
145
+ const isDir =
146
+ e.isDirectory() || (e.isSymbolicLink() && (await isDirectory(full)));
147
+ if (!isDir) continue;
148
+ const childRetired = retired || e.name === RETIRED_DIR;
149
+ await recurse(full, depth + 1, childRetired);
150
+ }
151
+ }
152
+
153
+ await recurse(root, 0, false);
154
+ return out;
155
+ }
156
+
157
+ export interface CrawlOptions {
158
+ /** primary-domain hint applied to all skills found under this root (library mode) */
159
+ primaryDomainOf?: (skillDir: string) => string | null;
160
+ }
161
+
162
+ export interface CrawlResult {
163
+ skills: Skill[];
164
+ /** roots that were skipped because they realpath-dedupe to an earlier root */
165
+ dedupedRoots: string[];
166
+ }
167
+
168
+ /**
169
+ * Crawl a set of roots into Skill[]. Applies realpath-dedupe across roots and
170
+ * across individual skill dirs, marks `.agents/skills` mirrors, tags retired,
171
+ * skips node_modules.
172
+ */
173
+ export async function crawl(
174
+ roots: string[],
175
+ opts: CrawlOptions = {},
176
+ ): Promise<CrawlResult> {
177
+ const dedupedRoots: string[] = [];
178
+ const seenRootReal = new Set<string>();
179
+ const effectiveRoots: string[] = [];
180
+ for (const r of roots) {
181
+ const rp = realpathOrSelf(r);
182
+ if (seenRootReal.has(rp)) {
183
+ dedupedRoots.push(r);
184
+ continue;
185
+ }
186
+ seenRootReal.add(rp);
187
+ effectiveRoots.push(r);
188
+ }
189
+
190
+ // Map realpath(skillDir) -> Skill, so aliased copies collapse.
191
+ const byReal = new Map<string, Skill>();
192
+ // Track canonical (.claude / non-.agents) skill dirs by realpath of body,
193
+ // so .agents mirrors can point mirrorOf at them.
194
+ const claudeByName = new Map<string, string>(); // name -> canonical skill dir path
195
+
196
+ // First pass: collect all skill dirs with their root + agents flag.
197
+ interface Found {
198
+ dir: string;
199
+ retired: boolean;
200
+ isAgents: boolean;
201
+ root: string;
202
+ }
203
+ const found: Found[] = [];
204
+ for (const root of effectiveRoots) {
205
+ const dirs = await discoverSkillDirs(root);
206
+ for (const d of dirs) {
207
+ // A skill dir is a bridge mirror if it lives under a `.agents` path.
208
+ const isAgents = pathHasSegment(d.dir, ".agents");
209
+ found.push({ dir: d.dir, retired: d.retired, isAgents, root });
210
+ }
211
+ }
212
+
213
+ // Record canonical (non-.agents) names first, so mirrors can point at them.
214
+ for (const f of found) {
215
+ if (f.isAgents) continue;
216
+ const name = basename(f.dir);
217
+ if (!claudeByName.has(name)) claudeByName.set(name, f.dir);
218
+ }
219
+
220
+ for (const f of found) {
221
+ const real = realpathOrSelf(f.dir);
222
+ if (byReal.has(real)) continue; // aliased duplicate dir
223
+
224
+ let mirrorOf: string | null = null;
225
+ if (f.isAgents) {
226
+ const name = basename(f.dir);
227
+ mirrorOf = claudeByName.get(name) ?? null;
228
+ }
229
+ const primaryDomain = opts.primaryDomainOf?.(f.dir) ?? null;
230
+ const skill = await buildSkill(f.dir, {
231
+ retired: f.retired,
232
+ mirrorOf,
233
+ primaryDomain,
234
+ });
235
+ if (skill) byReal.set(real, skill);
236
+ }
237
+
238
+ return { skills: [...byReal.values()], dedupedRoots };
239
+ }
240
+
241
+ /** Expand a parent dir (like ~/Documents/GitHub) into candidate skill roots. */
242
+ export async function expandProjectRoots(parent: string): Promise<string[]> {
243
+ const out: string[] = [];
244
+ if (!existsSync(parent)) return out;
245
+ let entries: Awaited<ReturnType<typeof readdir>>;
246
+ try {
247
+ entries = await readdir(parent, { withFileTypes: true });
248
+ } catch {
249
+ return out;
250
+ }
251
+ for (const e of entries) {
252
+ if (!e.isDirectory()) continue;
253
+ if (e.name === "node_modules") continue;
254
+ const proj = join(parent, e.name);
255
+ for (const sub of [
256
+ join(proj, ".claude", "skills"),
257
+ join(proj, ".agents", "skills"),
258
+ join(proj, "skills"),
259
+ join(proj, "skill"),
260
+ ]) {
261
+ if (existsSync(sub)) out.push(sub);
262
+ }
263
+ }
264
+ return out;
265
+ }
266
+
267
+ export { dirname as _dirname };
@@ -0,0 +1,67 @@
1
+ // Group duplicate / drifted skills by name + content hash.
2
+ // Classify a canonical copy vs divergent (drifted) copies vs exact duplicates.
3
+
4
+ import type { DuplicateGroup, Skill } from "../types.ts";
5
+
6
+ /**
7
+ * Rank a skill as a canonical candidate. Higher is more canonical.
8
+ * Preference: non-mirror > non-retired > has-domains > has-provenance(third-party kept) > path length (shorter).
9
+ */
10
+ function canonicalScore(s: Skill): number {
11
+ let score = 0;
12
+ if (!s.mirrorOf) score += 1000; // real file, not a bridge mirror
13
+ if (!s.retired) score += 500; // active over retired
14
+ if (s.domains.length > 0) score += 100; // tagged
15
+ score += Math.max(0, 50 - Math.min(50, s.path.length / 4)); // prefer shorter path
16
+ return score;
17
+ }
18
+
19
+ /**
20
+ * Group skills that share a `name`. For each group:
21
+ * - pick `canonical` by canonicalScore
22
+ * - `duplicates` = other copies with the SAME contentHash as canonical
23
+ * - `divergent` = copies with a DIFFERENT contentHash (drift)
24
+ * - `identical` = every copy shares one hash
25
+ * Single-copy names are not returned (no duplication).
26
+ */
27
+ export function findDuplicates(skills: Skill[]): DuplicateGroup[] {
28
+ const byName = new Map<string, Skill[]>();
29
+ for (const s of skills) {
30
+ const arr = byName.get(s.name);
31
+ if (arr) arr.push(s);
32
+ else byName.set(s.name, [s]);
33
+ }
34
+
35
+ const groups: DuplicateGroup[] = [];
36
+ for (const [name, copies] of byName) {
37
+ if (copies.length < 2) continue;
38
+ const sorted = [...copies].sort((a, b) => canonicalScore(b) - canonicalScore(a));
39
+ const canonical = sorted[0]!;
40
+ const rest = sorted.slice(1);
41
+ const duplicates = rest.filter((s) => s.contentHash === canonical.contentHash);
42
+ const divergent = rest.filter((s) => s.contentHash !== canonical.contentHash);
43
+ const hashes = new Set(copies.map((s) => s.contentHash));
44
+ groups.push({
45
+ name,
46
+ canonical,
47
+ duplicates,
48
+ divergent,
49
+ identical: hashes.size === 1,
50
+ });
51
+ }
52
+ groups.sort((a, b) => (a.name < b.name ? -1 : a.name > b.name ? 1 : 0));
53
+ return groups;
54
+ }
55
+
56
+ /**
57
+ * Groups that have at least one divergent (drifted) copy — the ones needing
58
+ * human review during migration.
59
+ */
60
+ export function driftedGroups(groups: DuplicateGroup[]): DuplicateGroup[] {
61
+ return groups.filter((g) => g.divergent.length > 0);
62
+ }
63
+
64
+ /** Exact-duplicate groups (identical content in multiple non-mirror locations). */
65
+ export function exactDuplicateGroups(groups: DuplicateGroup[]): DuplicateGroup[] {
66
+ return groups.filter((g) => g.identical && g.duplicates.length > 0);
67
+ }