skilld 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +171 -0
- package/dist/agents.d.mts +56 -0
- package/dist/agents.d.mts.map +1 -0
- package/dist/agents.mjs +148 -0
- package/dist/agents.mjs.map +1 -0
- package/dist/cli.d.mts +1 -0
- package/dist/cli.mjs +503 -0
- package/dist/cli.mjs.map +1 -0
- package/dist/index.d.mts +14 -0
- package/dist/index.d.mts.map +1 -0
- package/dist/index.mjs +181 -0
- package/dist/index.mjs.map +1 -0
- package/dist/npm.d.mts +48 -0
- package/dist/npm.d.mts.map +1 -0
- package/dist/npm.mjs +90 -0
- package/dist/npm.mjs.map +1 -0
- package/dist/split-text.d.mts +24 -0
- package/dist/split-text.d.mts.map +1 -0
- package/dist/split-text.mjs +87 -0
- package/dist/split-text.mjs.map +1 -0
- package/dist/types.d.mts +37 -0
- package/dist/types.d.mts.map +1 -0
- package/dist/types.mjs +1 -0
- package/package.json +52 -0
package/dist/index.mjs
ADDED
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
2
|
+
import { join as join$1 } from "node:path";
|
|
3
|
+
async function generateSkill(config, onProgress) {
|
|
4
|
+
const { url, outputDir = ".skilld", chunkSize = 1e3, chunkOverlap = 200, maxPages = 100, skipLlmsTxt = false, model = "Xenova/bge-small-en-v1.5" } = config;
|
|
5
|
+
const siteName = getSiteName(url);
|
|
6
|
+
const skillDir = join$1(outputDir, siteName);
|
|
7
|
+
const referencesDir = join$1(skillDir, "references");
|
|
8
|
+
const dbPath = join$1(skillDir, "search.db");
|
|
9
|
+
mkdirSync(referencesDir, { recursive: true });
|
|
10
|
+
let docs;
|
|
11
|
+
let skillContent;
|
|
12
|
+
if (!skipLlmsTxt) {
|
|
13
|
+
const llmsResult = await fetchFromLlmsTxt(url, maxPages, onProgress);
|
|
14
|
+
if (llmsResult) {
|
|
15
|
+
docs = llmsResult.docs;
|
|
16
|
+
skillContent = llmsResult.llmsContent;
|
|
17
|
+
} else docs = await crawlSite(url, maxPages, onProgress);
|
|
18
|
+
} else docs = await crawlSite(url, maxPages, onProgress);
|
|
19
|
+
if (docs.length === 0) throw new Error("No documents found to index");
|
|
20
|
+
const skillPath = join$1(skillDir, "SKILL.md");
|
|
21
|
+
if (skillContent) writeFileSync(skillPath, skillContent);
|
|
22
|
+
const { splitText } = await import("./split-text.mjs");
|
|
23
|
+
const { sqliteVec } = await import("retriv/db/sqlite-vec");
|
|
24
|
+
const { transformers } = await import("retriv/embeddings/transformers");
|
|
25
|
+
const documents = [];
|
|
26
|
+
for (const doc of docs) {
|
|
27
|
+
const chunks = splitText(doc.content, {
|
|
28
|
+
chunkSize,
|
|
29
|
+
chunkOverlap
|
|
30
|
+
});
|
|
31
|
+
for (const chunk of chunks) {
|
|
32
|
+
const section = extractSection(chunk.text);
|
|
33
|
+
const docId = chunks.length > 1 ? `${doc.url}#chunk-${chunk.index}` : doc.url;
|
|
34
|
+
const prefix = [doc.title, section].filter(Boolean).join(" > ");
|
|
35
|
+
const content = prefix ? `${prefix}\n\n${chunk.text}` : chunk.text;
|
|
36
|
+
documents.push({
|
|
37
|
+
id: docId,
|
|
38
|
+
content,
|
|
39
|
+
metadata: {
|
|
40
|
+
source: doc.url,
|
|
41
|
+
title: doc.title,
|
|
42
|
+
...section && { section },
|
|
43
|
+
...chunks.length > 1 && {
|
|
44
|
+
chunkIndex: chunk.index,
|
|
45
|
+
chunkTotal: chunks.length
|
|
46
|
+
}
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
writeFileSync(join$1(referencesDir, sanitizeFilename(docId) + ".md"), formatReferenceFile(docId, doc, section, chunk, chunks.length));
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
onProgress?.({
|
|
53
|
+
url: "embedding",
|
|
54
|
+
count: documents.length,
|
|
55
|
+
phase: "index"
|
|
56
|
+
});
|
|
57
|
+
const db = await sqliteVec({
|
|
58
|
+
path: dbPath,
|
|
59
|
+
embeddings: transformers({ model })
|
|
60
|
+
});
|
|
61
|
+
await db.index(documents);
|
|
62
|
+
await db.close?.();
|
|
63
|
+
return {
|
|
64
|
+
siteName,
|
|
65
|
+
skillPath,
|
|
66
|
+
referencesDir,
|
|
67
|
+
dbPath,
|
|
68
|
+
chunkCount: documents.length
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
function getSiteName(url) {
|
|
72
|
+
return new URL(url).hostname.replace(/^www\./, "");
|
|
73
|
+
}
|
|
74
|
+
function sanitizeFilename(id) {
|
|
75
|
+
return id.replace(/^https?:\/\//, "").replace(/[#?]/g, "-").replace(/[^a-z0-9.-]/gi, "-").replace(/-+/g, "-").replace(/^-|-$/g, "").slice(0, 100);
|
|
76
|
+
}
|
|
77
|
+
function extractSection(text) {
|
|
78
|
+
const headings = [];
|
|
79
|
+
for (const line of text.split("\n")) {
|
|
80
|
+
const match = line.match(/^(#{1,6}) ([^\n]+)$/);
|
|
81
|
+
if (match) {
|
|
82
|
+
const level = match[1].length;
|
|
83
|
+
const heading = match[2].trim();
|
|
84
|
+
headings.length = level - 1;
|
|
85
|
+
headings[level - 1] = heading;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return headings.filter(Boolean).join(" > ") || void 0;
|
|
89
|
+
}
|
|
90
|
+
function formatReferenceFile(docId, doc, section, chunk, totalChunks) {
|
|
91
|
+
const frontmatter = [
|
|
92
|
+
"---",
|
|
93
|
+
`id: "${docId}"`,
|
|
94
|
+
`source: "${doc.url}"`,
|
|
95
|
+
`title: "${doc.title}"`
|
|
96
|
+
];
|
|
97
|
+
if (section) frontmatter.push(`section: "${section}"`);
|
|
98
|
+
if (totalChunks > 1) frontmatter.push(`chunk: ${chunk.index + 1}/${totalChunks}`);
|
|
99
|
+
frontmatter.push("---", "");
|
|
100
|
+
const prefix = [doc.title, section].filter(Boolean).join(" > ");
|
|
101
|
+
return frontmatter.join("\n") + (prefix ? `${prefix}\n\n` : "") + chunk.text;
|
|
102
|
+
}
|
|
103
|
+
async function fetchFromLlmsTxt(baseUrl, maxPages, onProgress) {
|
|
104
|
+
const llmsUrl = `${new URL(baseUrl).origin}/llms.txt`;
|
|
105
|
+
const res = await fetch(llmsUrl, { headers: { "User-Agent": "skilld/1.0" } }).catch(() => null);
|
|
106
|
+
if (!res?.ok) return null;
|
|
107
|
+
const llmsContent = await res.text();
|
|
108
|
+
if (llmsContent.length < 50) return null;
|
|
109
|
+
const links = parseLinks(llmsContent);
|
|
110
|
+
const docs = [];
|
|
111
|
+
let count = 0;
|
|
112
|
+
for (const { title, url } of links.slice(0, maxPages)) {
|
|
113
|
+
count++;
|
|
114
|
+
onProgress?.({
|
|
115
|
+
url,
|
|
116
|
+
count,
|
|
117
|
+
phase: "fetch"
|
|
118
|
+
});
|
|
119
|
+
const content = await fetchMarkdown(url);
|
|
120
|
+
if (content && content.length >= 50) docs.push({
|
|
121
|
+
url,
|
|
122
|
+
title,
|
|
123
|
+
content
|
|
124
|
+
});
|
|
125
|
+
}
|
|
126
|
+
return {
|
|
127
|
+
docs,
|
|
128
|
+
llmsContent
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
function parseLinks(content) {
|
|
132
|
+
const links = [];
|
|
133
|
+
const linkRegex = /\[([^\]]+)\]\(([^)]+)\)/g;
|
|
134
|
+
let match;
|
|
135
|
+
while ((match = linkRegex.exec(content)) !== null) {
|
|
136
|
+
const [, title, url] = match;
|
|
137
|
+
if (url.includes("/raw/") || url.endsWith(".md")) links.push({
|
|
138
|
+
title,
|
|
139
|
+
url
|
|
140
|
+
});
|
|
141
|
+
}
|
|
142
|
+
return links;
|
|
143
|
+
}
|
|
144
|
+
async function fetchMarkdown(url) {
|
|
145
|
+
const res = await fetch(url, { headers: { "User-Agent": "skilld/1.0" } }).catch(() => null);
|
|
146
|
+
if (!res?.ok) return null;
|
|
147
|
+
return res.text();
|
|
148
|
+
}
|
|
149
|
+
async function crawlSite(url, maxPages, onProgress) {
|
|
150
|
+
const { htmlToMarkdown } = await import("mdream");
|
|
151
|
+
const { crawlAndGenerate } = await import("@mdream/crawl");
|
|
152
|
+
const { tmpdir } = await import("node:os");
|
|
153
|
+
const { join } = await import("node:path");
|
|
154
|
+
const docs = [];
|
|
155
|
+
let count = 0;
|
|
156
|
+
const outputDir = join(tmpdir(), `skilld-crawl-${Date.now()}`);
|
|
157
|
+
await crawlAndGenerate({
|
|
158
|
+
urls: [url],
|
|
159
|
+
outputDir,
|
|
160
|
+
maxRequestsPerCrawl: maxPages,
|
|
161
|
+
followLinks: true,
|
|
162
|
+
onPage: async ({ url: pageUrl, html, title }) => {
|
|
163
|
+
count++;
|
|
164
|
+
onProgress?.({
|
|
165
|
+
url: pageUrl,
|
|
166
|
+
count,
|
|
167
|
+
phase: "fetch"
|
|
168
|
+
});
|
|
169
|
+
const markdown = htmlToMarkdown(html, { origin: new URL(pageUrl).origin });
|
|
170
|
+
if (markdown && markdown.length >= 50) docs.push({
|
|
171
|
+
url: pageUrl,
|
|
172
|
+
title: title || pageUrl,
|
|
173
|
+
content: markdown
|
|
174
|
+
});
|
|
175
|
+
}
|
|
176
|
+
});
|
|
177
|
+
return docs;
|
|
178
|
+
}
|
|
179
|
+
export { generateSkill };
|
|
180
|
+
|
|
181
|
+
//# sourceMappingURL=index.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.mjs","names":["join"],"sources":["../src/index.ts"],"sourcesContent":["import type { FetchedDoc, SkillConfig, SkillResult } from './types'\nimport { existsSync, mkdirSync, writeFileSync } from 'node:fs'\nimport { join } from 'node:path'\n\nexport type { FetchedDoc, SkillConfig, SkillResult }\n\n/**\n * Generate a skill from a documentation site\n */\nexport async function generateSkill(\n config: SkillConfig,\n onProgress?: (info: { url: string, count: number, phase: 'fetch' | 'index' }) => void,\n): Promise<SkillResult> {\n const {\n url,\n outputDir = '.skilld',\n chunkSize = 1000,\n chunkOverlap = 200,\n maxPages = 100,\n skipLlmsTxt = false,\n model = 'Xenova/bge-small-en-v1.5',\n } = config\n\n const siteName = getSiteName(url)\n const skillDir = join(outputDir, siteName)\n const referencesDir = join(skillDir, 'references')\n const dbPath = join(skillDir, 'search.db')\n\n mkdirSync(referencesDir, { recursive: true })\n\n // Fetch docs\n let docs: FetchedDoc[]\n let skillContent: string | undefined\n\n if (!skipLlmsTxt) {\n const llmsResult = await fetchFromLlmsTxt(url, maxPages, onProgress)\n if (llmsResult) {\n docs = llmsResult.docs\n skillContent = llmsResult.llmsContent\n }\n else {\n docs = await crawlSite(url, maxPages, onProgress)\n }\n }\n else {\n docs = await crawlSite(url, maxPages, onProgress)\n }\n\n if (docs.length === 0) {\n throw new Error('No documents found to index')\n }\n\n // Write SKILL.md\n const skillPath = join(skillDir, 'SKILL.md')\n if (skillContent) {\n writeFileSync(skillPath, skillContent)\n }\n\n // Chunk and index docs\n const { splitText } = await import('./split-text')\n const { sqliteVec } = await import('retriv/db/sqlite-vec')\n const { transformers } = await import('retriv/embeddings/transformers')\n\n const documents: Array<{ id: string, content: string, metadata: Record<string, any> }> = []\n\n for (const doc of docs) {\n const chunks = splitText(doc.content, { chunkSize, chunkOverlap })\n\n for (const chunk of chunks) {\n const section = extractSection(chunk.text)\n const docId = chunks.length > 1\n ? `${doc.url}#chunk-${chunk.index}`\n : doc.url\n\n // Prepend title/section for better semantic matching\n const prefix = [doc.title, section].filter(Boolean).join(' > ')\n const content = prefix ? `${prefix}\\n\\n${chunk.text}` : chunk.text\n\n documents.push({\n id: docId,\n content,\n metadata: {\n source: doc.url,\n title: doc.title,\n ...(section && { section }),\n ...(chunks.length > 1 && {\n chunkIndex: chunk.index,\n chunkTotal: chunks.length,\n }),\n },\n })\n\n // Write reference file\n const filename = sanitizeFilename(docId) + '.md'\n const refPath = join(referencesDir, filename)\n writeFileSync(refPath, formatReferenceFile(docId, doc, section, chunk, chunks.length))\n }\n }\n\n // Index with retriv\n onProgress?.({ url: 'embedding', count: documents.length, phase: 'index' })\n\n const db = await sqliteVec({\n path: dbPath,\n embeddings: transformers({ model }),\n })\n\n await db.index(documents)\n await db.close?.()\n\n return {\n siteName,\n skillPath,\n referencesDir,\n dbPath,\n chunkCount: documents.length,\n }\n}\n\nfunction getSiteName(url: string): string {\n const urlObj = new URL(url)\n return urlObj.hostname.replace(/^www\\./, '')\n}\n\nfunction sanitizeFilename(id: string): string {\n return id\n .replace(/^https?:\\/\\//, '')\n .replace(/[#?]/g, '-')\n .replace(/[^a-z0-9.-]/gi, '-')\n .replace(/-+/g, '-')\n .replace(/^-|-$/g, '')\n .slice(0, 100)\n}\n\nfunction extractSection(text: string): string | undefined {\n const headings: string[] = []\n for (const line of text.split('\\n')) {\n const match = line.match(/^(#{1,6}) ([^\\n]+)$/)\n if (match) {\n const level = match[1]!.length\n const heading = match[2]!.trim()\n headings.length = level - 1\n headings[level - 1] = heading\n }\n }\n const section = headings.filter(Boolean).join(' > ')\n return section || undefined\n}\n\nfunction formatReferenceFile(\n docId: string,\n doc: FetchedDoc,\n section: string | undefined,\n chunk: { text: string, index: number },\n totalChunks: number,\n): string {\n const frontmatter = [\n '---',\n `id: \"${docId}\"`,\n `source: \"${doc.url}\"`,\n `title: \"${doc.title}\"`,\n ]\n if (section)\n frontmatter.push(`section: \"${section}\"`)\n if (totalChunks > 1)\n frontmatter.push(`chunk: ${chunk.index + 1}/${totalChunks}`)\n frontmatter.push('---', '')\n\n const prefix = [doc.title, section].filter(Boolean).join(' > ')\n return frontmatter.join('\\n') + (prefix ? `${prefix}\\n\\n` : '') + chunk.text\n}\n\nasync function fetchFromLlmsTxt(\n baseUrl: string,\n maxPages: number,\n onProgress?: (info: { url: string, count: number, phase: 'fetch' | 'index' }) => void,\n): Promise<{ docs: FetchedDoc[], llmsContent: string } | null> {\n const urlObj = new URL(baseUrl)\n const llmsUrl = `${urlObj.origin}/llms.txt`\n\n const res = await fetch(llmsUrl, {\n headers: { 'User-Agent': 'skilld/1.0' },\n }).catch(() => null)\n\n if (!res?.ok)\n return null\n\n const llmsContent = await res.text()\n if (llmsContent.length < 50)\n return null\n\n // Parse markdown links\n const links = parseLinks(llmsContent)\n const docs: FetchedDoc[] = []\n\n let count = 0\n for (const { title, url } of links.slice(0, maxPages)) {\n count++\n onProgress?.({ url, count, phase: 'fetch' })\n\n const content = await fetchMarkdown(url)\n if (content && content.length >= 50) {\n docs.push({ url, title, content })\n }\n }\n\n return { docs, llmsContent }\n}\n\nfunction parseLinks(content: string): Array<{ title: string, url: string }> {\n const links: Array<{ title: string, url: string }> = []\n const linkRegex = /\\[([^\\]]+)\\]\\(([^)]+)\\)/g\n let match\n\n while ((match = linkRegex.exec(content)) !== null) {\n const [, title, url] = match\n if (url.includes('/raw/') || url.endsWith('.md')) {\n links.push({ title, url })\n }\n }\n\n return links\n}\n\nasync function fetchMarkdown(url: string): Promise<string | null> {\n const res = await fetch(url, {\n headers: { 'User-Agent': 'skilld/1.0' },\n }).catch(() => null)\n\n if (!res?.ok)\n return null\n\n return res.text()\n}\n\nasync function crawlSite(\n url: string,\n maxPages: number,\n onProgress?: (info: { url: string, count: number, phase: 'fetch' | 'index' }) => void,\n): Promise<FetchedDoc[]> {\n const { htmlToMarkdown } = await import('mdream')\n const { crawlAndGenerate } = await import('@mdream/crawl')\n const { tmpdir } = await import('node:os')\n const { join } = await import('node:path')\n\n const docs: FetchedDoc[] = []\n let count = 0\n const outputDir = join(tmpdir(), `skilld-crawl-${Date.now()}`)\n\n await crawlAndGenerate({\n urls: [url],\n outputDir,\n maxRequestsPerCrawl: maxPages,\n followLinks: true,\n onPage: async ({ url: pageUrl, html, title }) => {\n count++\n onProgress?.({ url: pageUrl, count, phase: 'fetch' })\n\n const urlObj = new URL(pageUrl)\n const markdown = htmlToMarkdown(html, { origin: urlObj.origin })\n\n if (markdown && markdown.length >= 50) {\n docs.push({ url: pageUrl, title: title || pageUrl, content: markdown })\n }\n },\n })\n\n return docs\n}\n"],"mappings":";;AASA,eAAsB,cACpB,QACA,YACsB;CACtB,MAAM,EACJ,KACA,YAAY,WACZ,YAAY,KACZ,eAAe,KACf,WAAW,KACX,cAAc,OACd,QAAQ,+BACN;CAEJ,MAAM,WAAW,YAAY,IAAI;CACjC,MAAM,WAAWA,OAAK,WAAW,SAAS;CAC1C,MAAM,gBAAgBA,OAAK,UAAU,aAAa;CAClD,MAAM,SAASA,OAAK,UAAU,YAAY;AAE1C,WAAU,eAAe,EAAE,WAAW,MAAM,CAAC;CAG7C,IAAI;CACJ,IAAI;AAEJ,KAAI,CAAC,aAAa;EAChB,MAAM,aAAa,MAAM,iBAAiB,KAAK,UAAU,WAAW;AACpE,MAAI,YAAY;AACd,UAAO,WAAW;AAClB,kBAAe,WAAW;QAG1B,QAAO,MAAM,UAAU,KAAK,UAAU,WAAW;OAInD,QAAO,MAAM,UAAU,KAAK,UAAU,WAAW;AAGnD,KAAI,KAAK,WAAW,EAClB,OAAM,IAAI,MAAM,8BAA8B;CAIhD,MAAM,YAAYA,OAAK,UAAU,WAAW;AAC5C,KAAI,aACF,eAAc,WAAW,aAAa;CAIxC,MAAM,EAAE,cAAc,MAAM,OAAO;CACnC,MAAM,EAAE,cAAc,MAAM,OAAO;CACnC,MAAM,EAAE,iBAAiB,MAAM,OAAO;CAEtC,MAAM,YAAmF,EAAE;AAE3F,MAAK,MAAM,OAAO,MAAM;EACtB,MAAM,SAAS,UAAU,IAAI,SAAS;GAAE;GAAW;GAAc,CAAC;AAElE,OAAK,MAAM,SAAS,QAAQ;GAC1B,MAAM,UAAU,eAAe,MAAM,KAAK;GAC1C,MAAM,QAAQ,OAAO,SAAS,IAC1B,GAAG,IAAI,IAAI,SAAS,MAAM,UAC1B,IAAI;GAGR,MAAM,SAAS,CAAC,IAAI,OAAO,QAAQ,CAAC,OAAO,QAAQ,CAAC,KAAK,MAAM;GAC/D,MAAM,UAAU,SAAS,GAAG,OAAO,MAAM,MAAM,SAAS,MAAM;AAE9D,aAAU,KAAK;IACb,IAAI;IACJ;IACA,UAAU;KACR,QAAQ,IAAI;KACZ,OAAO,IAAI;KACX,GAAI,WAAW,EAAE,SAAS;KAC1B,GAAI,OAAO,SAAS,KAAK;MACvB,YAAY,MAAM;MAClB,YAAY,OAAO;MACpB;KACF;IACF,CAAC;AAKF,iBADgBA,OAAK,eADJ,iBAAiB,MAAM,GAAG,MACE,EACtB,oBAAoB,OAAO,KAAK,SAAS,OAAO,OAAO,OAAO,CAAC;;;AAK1F,cAAa;EAAE,KAAK;EAAa,OAAO,UAAU;EAAQ,OAAO;EAAS,CAAC;CAE3E,MAAM,KAAK,MAAM,UAAU;EACzB,MAAM;EACN,YAAY,aAAa,EAAE,OAAO,CAAC;EACpC,CAAC;AAEF,OAAM,GAAG,MAAM,UAAU;AACzB,OAAM,GAAG,SAAS;AAElB,QAAO;EACL;EACA;EACA;EACA;EACA,YAAY,UAAU;EACvB;;AAGH,SAAS,YAAY,KAAqB;AAExC,QADe,IAAI,IAAI,IAAI,CACb,SAAS,QAAQ,UAAU,GAAG;;AAG9C,SAAS,iBAAiB,IAAoB;AAC5C,QAAO,GACJ,QAAQ,gBAAgB,GAAG,CAC3B,QAAQ,SAAS,IAAI,CACrB,QAAQ,iBAAiB,IAAI,CAC7B,QAAQ,OAAO,IAAI,CACnB,QAAQ,UAAU,GAAG,CACrB,MAAM,GAAG,IAAI;;AAGlB,SAAS,eAAe,MAAkC;CACxD,MAAM,WAAqB,EAAE;AAC7B,MAAK,MAAM,QAAQ,KAAK,MAAM,KAAK,EAAE;EACnC,MAAM,QAAQ,KAAK,MAAM,sBAAsB;AAC/C,MAAI,OAAO;GACT,MAAM,QAAQ,MAAM,GAAI;GACxB,MAAM,UAAU,MAAM,GAAI,MAAM;AAChC,YAAS,SAAS,QAAQ;AAC1B,YAAS,QAAQ,KAAK;;;AAI1B,QADgB,SAAS,OAAO,QAAQ,CAAC,KAAK,MAAM,IAClC,KAAA;;AAGpB,SAAS,oBACP,OACA,KACA,SACA,OACA,aACQ;CACR,MAAM,cAAc;EAClB;EACA,QAAQ,MAAM;EACd,YAAY,IAAI,IAAI;EACpB,WAAW,IAAI,MAAM;EACtB;AACD,KAAI,QACF,aAAY,KAAK,aAAa,QAAQ,GAAG;AAC3C,KAAI,cAAc,EAChB,aAAY,KAAK,UAAU,MAAM,QAAQ,EAAE,GAAG,cAAc;AAC9D,aAAY,KAAK,OAAO,GAAG;CAE3B,MAAM,SAAS,CAAC,IAAI,OAAO,QAAQ,CAAC,OAAO,QAAQ,CAAC,KAAK,MAAM;AAC/D,QAAO,YAAY,KAAK,KAAK,IAAI,SAAS,GAAG,OAAO,QAAQ,MAAM,MAAM;;AAG1E,eAAe,iBACb,SACA,UACA,YAC6D;CAE7D,MAAM,UAAU,GADD,IAAI,IAAI,QAAQ,CACL,OAAO;CAEjC,MAAM,MAAM,MAAM,MAAM,SAAS,EAC/B,SAAS,EAAE,cAAc,cAAc,EACxC,CAAC,CAAC,YAAY,KAAK;AAEpB,KAAI,CAAC,KAAK,GACR,QAAO;CAET,MAAM,cAAc,MAAM,IAAI,MAAM;AACpC,KAAI,YAAY,SAAS,GACvB,QAAO;CAGT,MAAM,QAAQ,WAAW,YAAY;CACrC,MAAM,OAAqB,EAAE;CAE7B,IAAI,QAAQ;AACZ,MAAK,MAAM,EAAE,OAAO,SAAS,MAAM,MAAM,GAAG,SAAS,EAAE;AACrD;AACA,eAAa;GAAE;GAAK;GAAO,OAAO;GAAS,CAAC;EAE5C,MAAM,UAAU,MAAM,cAAc,IAAI;AACxC,MAAI,WAAW,QAAQ,UAAU,GAC/B,MAAK,KAAK;GAAE;GAAK;GAAO;GAAS,CAAC;;AAItC,QAAO;EAAE;EAAM;EAAa;;AAG9B,SAAS,WAAW,SAAwD;CAC1E,MAAM,QAA+C,EAAE;CACvD,MAAM,YAAY;CAClB,IAAI;AAEJ,SAAQ,QAAQ,UAAU,KAAK,QAAQ,MAAM,MAAM;EACjD,MAAM,GAAG,OAAO,OAAO;AACvB,MAAI,IAAI,SAAS,QAAQ,IAAI,IAAI,SAAS,MAAM,CAC9C,OAAM,KAAK;GAAE;GAAO;GAAK,CAAC;;AAI9B,QAAO;;AAGT,eAAe,cAAc,KAAqC;CAChE,MAAM,MAAM,MAAM,MAAM,KAAK,EAC3B,SAAS,EAAE,cAAc,cAAc,EACxC,CAAC,CAAC,YAAY,KAAK;AAEpB,KAAI,CAAC,KAAK,GACR,QAAO;AAET,QAAO,IAAI,MAAM;;AAGnB,eAAe,UACb,KACA,UACA,YACuB;CACvB,MAAM,EAAE,mBAAmB,MAAM,OAAO;CACxC,MAAM,EAAE,qBAAqB,MAAM,OAAO;CAC1C,MAAM,EAAE,WAAW,MAAM,OAAO;CAChC,MAAM,EAAE,SAAS,MAAM,OAAO;CAE9B,MAAM,OAAqB,EAAE;CAC7B,IAAI,QAAQ;CACZ,MAAM,YAAY,KAAK,QAAQ,EAAE,gBAAgB,KAAK,KAAK,GAAG;AAE9D,OAAM,iBAAiB;EACrB,MAAM,CAAC,IAAI;EACX;EACA,qBAAqB;EACrB,aAAa;EACb,QAAQ,OAAO,EAAE,KAAK,SAAS,MAAM,YAAY;AAC/C;AACA,gBAAa;IAAE,KAAK;IAAS;IAAO,OAAO;IAAS,CAAC;GAGrD,MAAM,WAAW,eAAe,MAAM,EAAE,QADzB,IAAI,IAAI,QAAQ,CACwB,QAAQ,CAAC;AAEhE,OAAI,YAAY,SAAS,UAAU,GACjC,MAAK,KAAK;IAAE,KAAK;IAAS,OAAO,SAAS;IAAS,SAAS;IAAU,CAAC;;EAG5E,CAAC;AAEF,QAAO"}
|
package/dist/npm.d.mts
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
//#region src/npm.d.ts
|
|
2
|
+
/**
|
|
3
|
+
* NPM package discovery and documentation resolution
|
|
4
|
+
*/
|
|
5
|
+
interface NpmPackageInfo {
|
|
6
|
+
name: string;
|
|
7
|
+
version?: string;
|
|
8
|
+
description?: string;
|
|
9
|
+
homepage?: string;
|
|
10
|
+
repository?: {
|
|
11
|
+
type: string;
|
|
12
|
+
url: string;
|
|
13
|
+
directory?: string;
|
|
14
|
+
};
|
|
15
|
+
readme?: string;
|
|
16
|
+
}
|
|
17
|
+
interface ResolvedPackage {
|
|
18
|
+
name: string;
|
|
19
|
+
version?: string;
|
|
20
|
+
description?: string;
|
|
21
|
+
docsUrl?: string;
|
|
22
|
+
llmsUrl?: string;
|
|
23
|
+
readmeUrl?: string;
|
|
24
|
+
repoUrl?: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Fetch package info from npm registry
|
|
28
|
+
*/
|
|
29
|
+
declare function fetchNpmPackage(packageName: string): Promise<NpmPackageInfo | null>;
|
|
30
|
+
/**
|
|
31
|
+
* Resolve documentation URL for a package
|
|
32
|
+
*/
|
|
33
|
+
declare function resolvePackageDocs(packageName: string): Promise<ResolvedPackage | null>;
|
|
34
|
+
interface LocalDependency {
|
|
35
|
+
name: string;
|
|
36
|
+
version: string;
|
|
37
|
+
}
|
|
38
|
+
/**
|
|
39
|
+
* Read package.json dependencies with versions
|
|
40
|
+
*/
|
|
41
|
+
declare function readLocalDependencies(cwd: string): Promise<LocalDependency[]>;
|
|
42
|
+
/**
|
|
43
|
+
* Get installed skill version from SKILL.md
|
|
44
|
+
*/
|
|
45
|
+
declare function getInstalledSkillVersion(skillDir: string): Promise<string | null>;
|
|
46
|
+
//#endregion
|
|
47
|
+
export { LocalDependency, NpmPackageInfo, ResolvedPackage, fetchNpmPackage, getInstalledSkillVersion, readLocalDependencies, resolvePackageDocs };
|
|
48
|
+
//# sourceMappingURL=npm.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"npm.d.mts","names":[],"sources":["../src/npm.ts"],"mappings":";;AAIA;;UAAiB,cAAA;EACf,IAAA;EACA,OAAA;EACA,WAAA;EACA,QAAA;EACA,UAAA;IACE,IAAA;IACA,GAAA;IACA,SAAA;EAAA;EAEF,MAAA;AAAA;AAAA,UAGe,eAAA;EACf,IAAA;EACA,OAAA;EACA,WAAA;EACA,OAAA;EACA,OAAA;EACA,SAAA;EACA,OAAA;AAAA;;;;iBAMoB,eAAA,CAAgB,WAAA,WAAsB,OAAA,CAAQ,cAAA;;AAApE;;iBAYsB,kBAAA,CAAmB,WAAA,WAAsB,OAAA,CAAQ,eAAA;AAAA,UA0EtD,eAAA;EACf,IAAA;EACA,OAAA;AAAA;;;AA5EF;iBAkFsB,qBAAA,CAAsB,GAAA,WAAc,OAAA,CAAQ,eAAA;;;;iBA+B5C,wBAAA,CACpB,QAAA,WACC,OAAA"}
|
package/dist/npm.mjs
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
async function fetchNpmPackage(packageName) {
|
|
2
|
+
const res = await fetch(`https://registry.npmjs.org/${packageName}/latest`, { headers: { "User-Agent": "skilld/1.0" } }).catch(() => null);
|
|
3
|
+
if (!res?.ok) return null;
|
|
4
|
+
return res.json();
|
|
5
|
+
}
|
|
6
|
+
async function resolvePackageDocs(packageName) {
|
|
7
|
+
const pkg = await fetchNpmPackage(packageName);
|
|
8
|
+
if (!pkg) return null;
|
|
9
|
+
const result = {
|
|
10
|
+
name: pkg.name,
|
|
11
|
+
version: pkg.version,
|
|
12
|
+
description: pkg.description
|
|
13
|
+
};
|
|
14
|
+
if (pkg.repository?.url) result.repoUrl = pkg.repository.url.replace(/^git\+/, "").replace(/\.git$/, "").replace(/^git:\/\//, "https://").replace(/^ssh:\/\/git@github\.com/, "https://github.com");
|
|
15
|
+
if (pkg.homepage && !isGitHubRepoUrl(pkg.homepage)) {
|
|
16
|
+
result.docsUrl = pkg.homepage;
|
|
17
|
+
const llmsUrl = `${pkg.homepage.replace(/\/$/, "")}/llms.txt`;
|
|
18
|
+
if (await verifyUrl(llmsUrl)) result.llmsUrl = llmsUrl;
|
|
19
|
+
}
|
|
20
|
+
if (result.repoUrl?.includes("github.com")) {
|
|
21
|
+
const match = result.repoUrl.match(/github\.com\/([^/]+)\/([^/]+)/);
|
|
22
|
+
if (match) {
|
|
23
|
+
const owner = match[1];
|
|
24
|
+
const repo = match[2];
|
|
25
|
+
const subdir = pkg.repository?.directory;
|
|
26
|
+
const unghUrl = subdir ? `https://ungh.cc/repos/${owner}/${repo}/files/main/${subdir}/README.md` : `https://ungh.cc/repos/${owner}/${repo}/readme`;
|
|
27
|
+
if ((await fetch(unghUrl, { headers: { "User-Agent": "skilld/1.0" } }).catch(() => null))?.ok) result.readmeUrl = `ungh://${owner}/${repo}${subdir ? `/${subdir}` : ""}`;
|
|
28
|
+
else {
|
|
29
|
+
const basePath = subdir ? `${subdir}/` : "";
|
|
30
|
+
for (const branch of ["main", "master"]) {
|
|
31
|
+
const readmeUrl = `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${basePath}README.md`;
|
|
32
|
+
if (await verifyUrl(readmeUrl)) {
|
|
33
|
+
result.readmeUrl = readmeUrl;
|
|
34
|
+
break;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
if (!result.docsUrl && !result.llmsUrl && !result.readmeUrl) return null;
|
|
41
|
+
return result;
|
|
42
|
+
}
|
|
43
|
+
async function readLocalDependencies(cwd) {
|
|
44
|
+
const { readFileSync, existsSync } = await import("node:fs");
|
|
45
|
+
const { join } = await import("node:path");
|
|
46
|
+
const pkgPath = join(cwd, "package.json");
|
|
47
|
+
if (!existsSync(pkgPath)) throw new Error("No package.json found in current directory");
|
|
48
|
+
const pkg = JSON.parse(readFileSync(pkgPath, "utf-8"));
|
|
49
|
+
const deps = {
|
|
50
|
+
...pkg.dependencies,
|
|
51
|
+
...pkg.devDependencies
|
|
52
|
+
};
|
|
53
|
+
return Object.entries(deps).filter(([name]) => !name.startsWith("@types/") && ![
|
|
54
|
+
"typescript",
|
|
55
|
+
"eslint",
|
|
56
|
+
"prettier",
|
|
57
|
+
"vitest",
|
|
58
|
+
"jest"
|
|
59
|
+
].includes(name)).map(([name, version]) => ({
|
|
60
|
+
name,
|
|
61
|
+
version: version.replace(/^[\^~>=<]/, "")
|
|
62
|
+
}));
|
|
63
|
+
}
|
|
64
|
+
async function getInstalledSkillVersion(skillDir) {
|
|
65
|
+
const { readFileSync, existsSync } = await import("node:fs");
|
|
66
|
+
const { join } = await import("node:path");
|
|
67
|
+
const skillPath = join(skillDir, "SKILL.md");
|
|
68
|
+
if (!existsSync(skillPath)) return null;
|
|
69
|
+
return readFileSync(skillPath, "utf-8").match(/^version:\s*"?([^"\n]+)"?/m)?.[1] || null;
|
|
70
|
+
}
|
|
71
|
+
function isGitHubRepoUrl(url) {
|
|
72
|
+
try {
|
|
73
|
+
const parsed = new URL(url);
|
|
74
|
+
if (parsed.hostname === "github.com" || parsed.hostname === "www.github.com") return parsed.pathname.split("/").filter(Boolean).length <= 2;
|
|
75
|
+
return false;
|
|
76
|
+
} catch {
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
async function verifyUrl(url) {
|
|
81
|
+
const res = await fetch(url, {
|
|
82
|
+
method: "HEAD",
|
|
83
|
+
headers: { "User-Agent": "skilld/1.0" }
|
|
84
|
+
}).catch(() => null);
|
|
85
|
+
if (!res?.ok) return false;
|
|
86
|
+
return !(res.headers.get("content-type") || "").includes("text/html");
|
|
87
|
+
}
|
|
88
|
+
export { fetchNpmPackage, getInstalledSkillVersion, readLocalDependencies, resolvePackageDocs };
|
|
89
|
+
|
|
90
|
+
//# sourceMappingURL=npm.mjs.map
|
package/dist/npm.mjs.map
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"npm.mjs","names":[],"sources":["../src/npm.ts"],"sourcesContent":["/**\n * NPM package discovery and documentation resolution\n */\n\nexport interface NpmPackageInfo {\n name: string\n version?: string\n description?: string\n homepage?: string\n repository?: {\n type: string\n url: string\n directory?: string\n }\n readme?: string\n}\n\nexport interface ResolvedPackage {\n name: string\n version?: string\n description?: string\n docsUrl?: string\n llmsUrl?: string\n readmeUrl?: string\n repoUrl?: string\n}\n\n/**\n * Fetch package info from npm registry\n */\nexport async function fetchNpmPackage(packageName: string): Promise<NpmPackageInfo | null> {\n const res = await fetch(`https://registry.npmjs.org/${packageName}/latest`, {\n headers: { 'User-Agent': 'skilld/1.0' },\n }).catch(() => null)\n\n if (!res?.ok) return null\n return res.json()\n}\n\n/**\n * Resolve documentation URL for a package\n */\nexport async function resolvePackageDocs(packageName: string): Promise<ResolvedPackage | null> {\n const pkg = await fetchNpmPackage(packageName)\n if (!pkg) return null\n\n const result: ResolvedPackage = {\n name: pkg.name,\n version: pkg.version,\n description: pkg.description,\n }\n\n // Extract repo URL\n if (pkg.repository?.url) {\n const repoUrl = pkg.repository.url\n .replace(/^git\\+/, '')\n .replace(/\\.git$/, '')\n .replace(/^git:\\/\\//, 'https://')\n .replace(/^ssh:\\/\\/git@github\\.com/, 'https://github.com')\n result.repoUrl = repoUrl\n }\n\n // Try homepage for docs (skip if it's just a GitHub repo URL)\n if (pkg.homepage && !isGitHubRepoUrl(pkg.homepage)) {\n result.docsUrl = pkg.homepage\n\n // Check for llms.txt\n const llmsUrl = `${pkg.homepage.replace(/\\/$/, '')}/llms.txt`\n if (await verifyUrl(llmsUrl)) {\n result.llmsUrl = llmsUrl\n }\n }\n\n // GitHub README fallback via ungh (unjs GitHub API proxy)\n if (result.repoUrl?.includes('github.com')) {\n const match = result.repoUrl.match(/github\\.com\\/([^/]+)\\/([^/]+)/)\n if (match) {\n const owner = match[1]\n const repo = match[2]\n const subdir = pkg.repository?.directory\n\n // Use ungh for README (auto-detects branch, returns JSON with markdown field)\n const unghUrl = subdir\n ? `https://ungh.cc/repos/${owner}/${repo}/files/main/${subdir}/README.md`\n : `https://ungh.cc/repos/${owner}/${repo}/readme`\n\n // Store as ungh:// URL for special handling in CLI\n const unghRes = await fetch(unghUrl, {\n headers: { 'User-Agent': 'skilld/1.0' },\n }).catch(() => null)\n\n if (unghRes?.ok) {\n result.readmeUrl = `ungh://${owner}/${repo}${subdir ? `/${subdir}` : ''}`\n }\n else {\n // Fallback to raw.githubusercontent.com\n const basePath = subdir ? `${subdir}/` : ''\n for (const branch of ['main', 'master']) {\n const readmeUrl = `https://raw.githubusercontent.com/${owner}/${repo}/${branch}/${basePath}README.md`\n if (await verifyUrl(readmeUrl)) {\n result.readmeUrl = readmeUrl\n break\n }\n }\n }\n }\n }\n\n // Must have at least one source\n if (!result.docsUrl && !result.llmsUrl && !result.readmeUrl) {\n return null\n }\n\n return result\n}\n\nexport interface LocalDependency {\n name: string\n version: string\n}\n\n/**\n * Read package.json dependencies with versions\n */\nexport async function readLocalDependencies(cwd: string): Promise<LocalDependency[]> {\n const { readFileSync, existsSync } = await import('node:fs')\n const { join } = await import('node:path')\n\n const pkgPath = join(cwd, 'package.json')\n if (!existsSync(pkgPath)) {\n throw new Error('No package.json found in current directory')\n }\n\n const pkg = JSON.parse(readFileSync(pkgPath, 'utf-8'))\n const deps: Record<string, string> = {\n ...pkg.dependencies,\n ...pkg.devDependencies,\n }\n\n return Object.entries(deps)\n .filter(([name]) =>\n // Skip common non-doc packages\n !name.startsWith('@types/')\n && !['typescript', 'eslint', 'prettier', 'vitest', 'jest'].includes(name),\n )\n .map(([name, version]) => ({\n name,\n // Clean version string (remove ^, ~, etc.)\n version: version.replace(/^[\\^~>=<]/, ''),\n }))\n}\n\n/**\n * Get installed skill version from SKILL.md\n */\nexport async function getInstalledSkillVersion(\n skillDir: string,\n): Promise<string | null> {\n const { readFileSync, existsSync } = await import('node:fs')\n const { join } = await import('node:path')\n\n const skillPath = join(skillDir, 'SKILL.md')\n if (!existsSync(skillPath)) return null\n\n const content = readFileSync(skillPath, 'utf-8')\n const match = content.match(/^version:\\s*\"?([^\"\\n]+)\"?/m)\n return match?.[1] || null\n}\n\n/**\n * Check if URL is a GitHub repository page (not a docs site)\n */\nfunction isGitHubRepoUrl(url: string): boolean {\n try {\n const parsed = new URL(url)\n // GitHub repo URLs: github.com/owner/repo or github.com/owner/repo#readme\n if (parsed.hostname === 'github.com' || parsed.hostname === 'www.github.com') {\n const parts = parsed.pathname.split('/').filter(Boolean)\n // owner/repo pattern without additional paths like /docs\n return parts.length <= 2\n }\n return false\n }\n catch {\n return false\n }\n}\n\nasync function verifyUrl(url: string): Promise<boolean> {\n const res = await fetch(url, {\n method: 'HEAD',\n headers: { 'User-Agent': 'skilld/1.0' },\n }).catch(() => null)\n\n if (!res?.ok) return false\n\n const contentType = res.headers.get('content-type') || ''\n // Reject HTML (likely 404 page)\n return !contentType.includes('text/html')\n}\n"],"mappings":"AA8BA,eAAsB,gBAAgB,aAAqD;CACzF,MAAM,MAAM,MAAM,MAAM,8BAA8B,YAAY,UAAU,EAC1E,SAAS,EAAE,cAAc,cAAc,EACxC,CAAC,CAAC,YAAY,KAAK;AAEpB,KAAI,CAAC,KAAK,GAAI,QAAO;AACrB,QAAO,IAAI,MAAM;;AAMnB,eAAsB,mBAAmB,aAAsD;CAC7F,MAAM,MAAM,MAAM,gBAAgB,YAAY;AAC9C,KAAI,CAAC,IAAK,QAAO;CAEjB,MAAM,SAA0B;EAC9B,MAAM,IAAI;EACV,SAAS,IAAI;EACb,aAAa,IAAI;EAClB;AAGD,KAAI,IAAI,YAAY,IAMlB,QAAO,UALS,IAAI,WAAW,IAC5B,QAAQ,UAAU,GAAG,CACrB,QAAQ,UAAU,GAAG,CACrB,QAAQ,aAAa,WAAW,CAChC,QAAQ,4BAA4B,qBAAqB;AAK9D,KAAI,IAAI,YAAY,CAAC,gBAAgB,IAAI,SAAS,EAAE;AAClD,SAAO,UAAU,IAAI;EAGrB,MAAM,UAAU,GAAG,IAAI,SAAS,QAAQ,OAAO,GAAG,CAAC;AACnD,MAAI,MAAM,UAAU,QAAQ,CAC1B,QAAO,UAAU;;AAKrB,KAAI,OAAO,SAAS,SAAS,aAAa,EAAE;EAC1C,MAAM,QAAQ,OAAO,QAAQ,MAAM,gCAAgC;AACnE,MAAI,OAAO;GACT,MAAM,QAAQ,MAAM;GACpB,MAAM,OAAO,MAAM;GACnB,MAAM,SAAS,IAAI,YAAY;GAG/B,MAAM,UAAU,SACZ,yBAAyB,MAAM,GAAG,KAAK,cAAc,OAAO,cAC5D,yBAAyB,MAAM,GAAG,KAAK;AAO3C,QAJgB,MAAM,MAAM,SAAS,EACnC,SAAS,EAAE,cAAc,cAAc,EACxC,CAAC,CAAC,YAAY,KAAK,GAEP,GACX,QAAO,YAAY,UAAU,MAAM,GAAG,OAAO,SAAS,IAAI,WAAW;QAElE;IAEH,MAAM,WAAW,SAAS,GAAG,OAAO,KAAK;AACzC,SAAK,MAAM,UAAU,CAAC,QAAQ,SAAS,EAAE;KACvC,MAAM,YAAY,qCAAqC,MAAM,GAAG,KAAK,GAAG,OAAO,GAAG,SAAS;AAC3F,SAAI,MAAM,UAAU,UAAU,EAAE;AAC9B,aAAO,YAAY;AACnB;;;;;;AAQV,KAAI,CAAC,OAAO,WAAW,CAAC,OAAO,WAAW,CAAC,OAAO,UAChD,QAAO;AAGT,QAAO;;AAWT,eAAsB,sBAAsB,KAAyC;CACnF,MAAM,EAAE,cAAc,eAAe,MAAM,OAAO;CAClD,MAAM,EAAE,SAAS,MAAM,OAAO;CAE9B,MAAM,UAAU,KAAK,KAAK,eAAe;AACzC,KAAI,CAAC,WAAW,QAAQ,CACtB,OAAM,IAAI,MAAM,6CAA6C;CAG/D,MAAM,MAAM,KAAK,MAAM,aAAa,SAAS,QAAQ,CAAC;CACtD,MAAM,OAA+B;EACnC,GAAG,IAAI;EACP,GAAG,IAAI;EACR;AAED,QAAO,OAAO,QAAQ,KAAK,CACxB,QAAQ,CAAC,UAER,CAAC,KAAK,WAAW,UAAU,IACxB,CAAC;EAAC;EAAc;EAAU;EAAY;EAAU;EAAO,CAAC,SAAS,KAAK,CAC1E,CACA,KAAK,CAAC,MAAM,cAAc;EACzB;EAEA,SAAS,QAAQ,QAAQ,aAAa,GAAG;EAC1C,EAAE;;AAMP,eAAsB,yBACpB,UACwB;CACxB,MAAM,EAAE,cAAc,eAAe,MAAM,OAAO;CAClD,MAAM,EAAE,SAAS,MAAM,OAAO;CAE9B,MAAM,YAAY,KAAK,UAAU,WAAW;AAC5C,KAAI,CAAC,WAAW,UAAU,CAAE,QAAO;AAInC,QAFgB,aAAa,WAAW,QAAQ,CAC1B,MAAM,6BAA6B,GAC1C,MAAM;;AAMvB,SAAS,gBAAgB,KAAsB;AAC7C,KAAI;EACF,MAAM,SAAS,IAAI,IAAI,IAAI;AAE3B,MAAI,OAAO,aAAa,gBAAgB,OAAO,aAAa,iBAG1D,QAFc,OAAO,SAAS,MAAM,IAAI,CAAC,OAAO,QAAQ,CAE3C,UAAU;AAEzB,SAAO;SAEH;AACJ,SAAO;;;AAIX,eAAe,UAAU,KAA+B;CACtD,MAAM,MAAM,MAAM,MAAM,KAAK;EAC3B,QAAQ;EACR,SAAS,EAAE,cAAc,cAAc;EACxC,CAAC,CAAC,YAAY,KAAK;AAEpB,KAAI,CAAC,KAAK,GAAI,QAAO;AAIrB,QAAO,EAFa,IAAI,QAAQ,IAAI,eAAe,IAAI,IAEnC,SAAS,YAAY"}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
//#region src/split-text.d.ts
|
|
2
|
+
/**
|
|
3
|
+
* Recursive markdown text splitter (LangChain-style)
|
|
4
|
+
*/
|
|
5
|
+
interface SplitTextOptions {
|
|
6
|
+
chunkSize?: number;
|
|
7
|
+
chunkOverlap?: number;
|
|
8
|
+
separators?: string[];
|
|
9
|
+
}
|
|
10
|
+
interface TextChunk {
|
|
11
|
+
text: string;
|
|
12
|
+
index: number;
|
|
13
|
+
/** Character range [start, end] in original text */
|
|
14
|
+
range: [number, number];
|
|
15
|
+
/** Line range [startLine, endLine] (1-indexed) */
|
|
16
|
+
lines: [number, number];
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Split text recursively using markdown-aware separators
|
|
20
|
+
*/
|
|
21
|
+
declare function splitText(text: string, options?: SplitTextOptions): TextChunk[];
|
|
22
|
+
//#endregion
|
|
23
|
+
export { SplitTextOptions, TextChunk, splitText };
|
|
24
|
+
//# sourceMappingURL=split-text.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"split-text.d.mts","names":[],"sources":["../src/split-text.ts"],"mappings":";;AAoBA;;UAAiB,gBAAA;EACf,SAAA;EACA,YAAA;EACA,UAAA;AAAA;AAAA,UAGe,SAAA;EACf,IAAA;EACA,KAAA;EAFwB;EAIxB,KAAA;EAJwB;EAMxB,KAAA;AAAA;;;;iBAkBc,SAAA,CACd,IAAA,UACA,OAAA,GAAS,gBAAA,GACR,SAAA"}
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
const MARKDOWN_SEPARATORS = [
|
|
2
|
+
"\n## ",
|
|
3
|
+
"\n### ",
|
|
4
|
+
"\n#### ",
|
|
5
|
+
"\n##### ",
|
|
6
|
+
"\n###### ",
|
|
7
|
+
"```\n\n",
|
|
8
|
+
"\n\n***\n\n",
|
|
9
|
+
"\n\n---\n\n",
|
|
10
|
+
"\n\n___\n\n",
|
|
11
|
+
"\n\n",
|
|
12
|
+
"\n",
|
|
13
|
+
" ",
|
|
14
|
+
""
|
|
15
|
+
];
|
|
16
|
+
function offsetToLine(text, offset) {
|
|
17
|
+
let line = 1;
|
|
18
|
+
for (let i = 0; i < offset && i < text.length; i++) if (text[i] === "\n") line++;
|
|
19
|
+
return line;
|
|
20
|
+
}
|
|
21
|
+
function splitText(text, options = {}) {
|
|
22
|
+
const { chunkSize = 1e3, chunkOverlap = 200, separators = MARKDOWN_SEPARATORS } = options;
|
|
23
|
+
if (text.length <= chunkSize) {
|
|
24
|
+
const endLine = offsetToLine(text, text.length);
|
|
25
|
+
return [{
|
|
26
|
+
text,
|
|
27
|
+
index: 0,
|
|
28
|
+
range: [0, text.length],
|
|
29
|
+
lines: [1, endLine]
|
|
30
|
+
}];
|
|
31
|
+
}
|
|
32
|
+
return mergeChunks(splitRecursive(text, chunkSize, separators), chunkSize, chunkOverlap, text);
|
|
33
|
+
}
|
|
34
|
+
function splitRecursive(text, chunkSize, separators) {
|
|
35
|
+
if (text.length <= chunkSize || separators.length === 0) return [text];
|
|
36
|
+
const separator = separators.find((sep) => sep === "" || text.includes(sep));
|
|
37
|
+
if (!separator && separator !== "") return [text];
|
|
38
|
+
const parts = separator === "" ? [...text] : text.split(separator);
|
|
39
|
+
const results = [];
|
|
40
|
+
for (let i = 0; i < parts.length; i++) {
|
|
41
|
+
const part = parts[i];
|
|
42
|
+
const withSep = i < parts.length - 1 && separator !== "" ? part + separator : part;
|
|
43
|
+
if (withSep.length <= chunkSize) results.push(withSep);
|
|
44
|
+
else {
|
|
45
|
+
const subParts = splitRecursive(withSep, chunkSize, separators.slice(1));
|
|
46
|
+
results.push(...subParts);
|
|
47
|
+
}
|
|
48
|
+
}
|
|
49
|
+
return results;
|
|
50
|
+
}
|
|
51
|
+
function mergeChunks(parts, chunkSize, chunkOverlap, originalText) {
|
|
52
|
+
const chunks = [];
|
|
53
|
+
let current = "";
|
|
54
|
+
let currentStart = 0;
|
|
55
|
+
for (const part of parts) if (current.length + part.length <= chunkSize) current += part;
|
|
56
|
+
else {
|
|
57
|
+
if (current) {
|
|
58
|
+
const start = originalText.indexOf(current, currentStart);
|
|
59
|
+
const actualStart = start >= 0 ? start : currentStart;
|
|
60
|
+
const actualEnd = actualStart + current.length;
|
|
61
|
+
chunks.push({
|
|
62
|
+
text: current,
|
|
63
|
+
index: chunks.length,
|
|
64
|
+
range: [actualStart, actualEnd],
|
|
65
|
+
lines: [offsetToLine(originalText, actualStart), offsetToLine(originalText, actualEnd)]
|
|
66
|
+
});
|
|
67
|
+
currentStart = Math.max(0, actualStart + current.length - chunkOverlap);
|
|
68
|
+
}
|
|
69
|
+
if (chunkOverlap > 0 && current.length > chunkOverlap) current = current.slice(-chunkOverlap) + part;
|
|
70
|
+
else current = part;
|
|
71
|
+
}
|
|
72
|
+
if (current) {
|
|
73
|
+
const start = originalText.indexOf(current, currentStart);
|
|
74
|
+
const actualStart = start >= 0 ? start : currentStart;
|
|
75
|
+
const actualEnd = start >= 0 ? start + current.length : originalText.length;
|
|
76
|
+
chunks.push({
|
|
77
|
+
text: current,
|
|
78
|
+
index: chunks.length,
|
|
79
|
+
range: [actualStart, actualEnd],
|
|
80
|
+
lines: [offsetToLine(originalText, actualStart), offsetToLine(originalText, actualEnd)]
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
return chunks;
|
|
84
|
+
}
|
|
85
|
+
export { splitText };
|
|
86
|
+
|
|
87
|
+
//# sourceMappingURL=split-text.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"split-text.mjs","names":[],"sources":["../src/split-text.ts"],"sourcesContent":["/**\n * Recursive markdown text splitter (LangChain-style)\n */\n\nconst MARKDOWN_SEPARATORS = [\n '\\n## ',\n '\\n### ',\n '\\n#### ',\n '\\n##### ',\n '\\n###### ',\n '```\\n\\n',\n '\\n\\n***\\n\\n',\n '\\n\\n---\\n\\n',\n '\\n\\n___\\n\\n',\n '\\n\\n',\n '\\n',\n ' ',\n '',\n]\n\nexport interface SplitTextOptions {\n chunkSize?: number\n chunkOverlap?: number\n separators?: string[]\n}\n\nexport interface TextChunk {\n text: string\n index: number\n /** Character range [start, end] in original text */\n range: [number, number]\n /** Line range [startLine, endLine] (1-indexed) */\n lines: [number, number]\n}\n\n/**\n * Convert character offset to line number (1-indexed)\n */\nfunction offsetToLine(text: string, offset: number): number {\n let line = 1\n for (let i = 0; i < offset && i < text.length; i++) {\n if (text[i] === '\\n')\n line++\n }\n return line\n}\n\n/**\n * Split text recursively using markdown-aware separators\n */\nexport function splitText(\n text: string,\n options: SplitTextOptions = {},\n): TextChunk[] {\n const {\n chunkSize = 1000,\n chunkOverlap = 200,\n separators = MARKDOWN_SEPARATORS,\n } = options\n\n if (text.length <= chunkSize) {\n const endLine = offsetToLine(text, text.length)\n return [{ text, index: 0, range: [0, text.length], lines: [1, endLine] }]\n }\n\n const chunks = splitRecursive(text, chunkSize, separators)\n return mergeChunks(chunks, chunkSize, chunkOverlap, text)\n}\n\nfunction splitRecursive(\n text: string,\n chunkSize: number,\n separators: string[],\n): string[] {\n if (text.length <= chunkSize || separators.length === 0) {\n return [text]\n }\n\n const separator = separators.find(sep => sep === '' || text.includes(sep))\n if (!separator && separator !== '') {\n return [text]\n }\n\n const parts = separator === '' ? [...text] : text.split(separator)\n const results: string[] = []\n\n for (let i = 0; i < parts.length; i++) {\n const part = parts[i]!\n const withSep = i < parts.length - 1 && separator !== ''\n ? part + separator\n : part\n\n if (withSep.length <= chunkSize) {\n results.push(withSep)\n }\n else {\n // Recurse with remaining separators\n const subParts = splitRecursive(withSep, chunkSize, separators.slice(1))\n results.push(...subParts)\n }\n }\n\n return results\n}\n\nfunction mergeChunks(\n parts: string[],\n chunkSize: number,\n chunkOverlap: number,\n originalText: string,\n): TextChunk[] {\n const chunks: TextChunk[] = []\n let current = ''\n let currentStart = 0\n\n for (const part of parts) {\n if (current.length + part.length <= chunkSize) {\n current += part\n }\n else {\n if (current) {\n const start = originalText.indexOf(current, currentStart)\n const actualStart = start >= 0 ? start : currentStart\n const actualEnd = actualStart + current.length\n chunks.push({\n text: current,\n index: chunks.length,\n range: [actualStart, actualEnd],\n lines: [offsetToLine(originalText, actualStart), offsetToLine(originalText, actualEnd)],\n })\n currentStart = Math.max(0, actualStart + current.length - chunkOverlap)\n }\n\n // Start new chunk, possibly with overlap from previous\n if (chunkOverlap > 0 && current.length > chunkOverlap) {\n const overlap = current.slice(-chunkOverlap)\n current = overlap + part\n }\n else {\n current = part\n }\n }\n }\n\n // Don't forget the last chunk\n if (current) {\n const start = originalText.indexOf(current, currentStart)\n const actualStart = start >= 0 ? start : currentStart\n const actualEnd = start >= 0 ? start + current.length : originalText.length\n chunks.push({\n text: current,\n index: chunks.length,\n range: [actualStart, actualEnd],\n lines: [offsetToLine(originalText, actualStart), offsetToLine(originalText, actualEnd)],\n })\n }\n\n return chunks\n}\n"],"mappings":"AAIA,MAAM,sBAAsB;CAC1B;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACD;AAoBD,SAAS,aAAa,MAAc,QAAwB;CAC1D,IAAI,OAAO;AACX,MAAK,IAAI,IAAI,GAAG,IAAI,UAAU,IAAI,KAAK,QAAQ,IAC7C,KAAI,KAAK,OAAO,KACd;AAEJ,QAAO;;AAMT,SAAgB,UACd,MACA,UAA4B,EAAE,EACjB;CACb,MAAM,EACJ,YAAY,KACZ,eAAe,KACf,aAAa,wBACX;AAEJ,KAAI,KAAK,UAAU,WAAW;EAC5B,MAAM,UAAU,aAAa,MAAM,KAAK,OAAO;AAC/C,SAAO,CAAC;GAAE;GAAM,OAAO;GAAG,OAAO,CAAC,GAAG,KAAK,OAAO;GAAE,OAAO,CAAC,GAAG,QAAQ;GAAE,CAAC;;AAI3E,QAAO,YADQ,eAAe,MAAM,WAAW,WAAW,EAC/B,WAAW,cAAc,KAAK;;AAG3D,SAAS,eACP,MACA,WACA,YACU;AACV,KAAI,KAAK,UAAU,aAAa,WAAW,WAAW,EACpD,QAAO,CAAC,KAAK;CAGf,MAAM,YAAY,WAAW,MAAK,QAAO,QAAQ,MAAM,KAAK,SAAS,IAAI,CAAC;AAC1E,KAAI,CAAC,aAAa,cAAc,GAC9B,QAAO,CAAC,KAAK;CAGf,MAAM,QAAQ,cAAc,KAAK,CAAC,GAAG,KAAK,GAAG,KAAK,MAAM,UAAU;CAClE,MAAM,UAAoB,EAAE;AAE5B,MAAK,IAAI,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;EACrC,MAAM,OAAO,MAAM;EACnB,MAAM,UAAU,IAAI,MAAM,SAAS,KAAK,cAAc,KAClD,OAAO,YACP;AAEJ,MAAI,QAAQ,UAAU,UACpB,SAAQ,KAAK,QAAQ;OAElB;GAEH,MAAM,WAAW,eAAe,SAAS,WAAW,WAAW,MAAM,EAAE,CAAC;AACxE,WAAQ,KAAK,GAAG,SAAS;;;AAI7B,QAAO;;AAGT,SAAS,YACP,OACA,WACA,cACA,cACa;CACb,MAAM,SAAsB,EAAE;CAC9B,IAAI,UAAU;CACd,IAAI,eAAe;AAEnB,MAAK,MAAM,QAAQ,MACjB,KAAI,QAAQ,SAAS,KAAK,UAAU,UAClC,YAAW;MAER;AACH,MAAI,SAAS;GACX,MAAM,QAAQ,aAAa,QAAQ,SAAS,aAAa;GACzD,MAAM,cAAc,SAAS,IAAI,QAAQ;GACzC,MAAM,YAAY,cAAc,QAAQ;AACxC,UAAO,KAAK;IACV,MAAM;IACN,OAAO,OAAO;IACd,OAAO,CAAC,aAAa,UAAU;IAC/B,OAAO,CAAC,aAAa,cAAc,YAAY,EAAE,aAAa,cAAc,UAAU,CAAC;IACxF,CAAC;AACF,kBAAe,KAAK,IAAI,GAAG,cAAc,QAAQ,SAAS,aAAa;;AAIzE,MAAI,eAAe,KAAK,QAAQ,SAAS,aAEvC,WADgB,QAAQ,MAAM,CAAC,aAAa,GACxB;MAGpB,WAAU;;AAMhB,KAAI,SAAS;EACX,MAAM,QAAQ,aAAa,QAAQ,SAAS,aAAa;EACzD,MAAM,cAAc,SAAS,IAAI,QAAQ;EACzC,MAAM,YAAY,SAAS,IAAI,QAAQ,QAAQ,SAAS,aAAa;AACrE,SAAO,KAAK;GACV,MAAM;GACN,OAAO,OAAO;GACd,OAAO,CAAC,aAAa,UAAU;GAC/B,OAAO,CAAC,aAAa,cAAc,YAAY,EAAE,aAAa,cAAc,UAAU,CAAC;GACxF,CAAC;;AAGJ,QAAO"}
|
package/dist/types.d.mts
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
//#region src/types.d.ts
|
|
2
|
+
interface SkillConfig {
|
|
3
|
+
/** Base URL or llms.txt URL */
|
|
4
|
+
url: string;
|
|
5
|
+
/** Output directory for skill files */
|
|
6
|
+
outputDir?: string;
|
|
7
|
+
/** Chunk size in characters */
|
|
8
|
+
chunkSize?: number;
|
|
9
|
+
/** Chunk overlap in characters */
|
|
10
|
+
chunkOverlap?: number;
|
|
11
|
+
/** Max pages to fetch */
|
|
12
|
+
maxPages?: number;
|
|
13
|
+
/** Skip llms.txt check and always crawl */
|
|
14
|
+
skipLlmsTxt?: boolean;
|
|
15
|
+
/** Embedding model */
|
|
16
|
+
model?: string;
|
|
17
|
+
}
|
|
18
|
+
interface SkillResult {
|
|
19
|
+
/** Site name (hostname) */
|
|
20
|
+
siteName: string;
|
|
21
|
+
/** Path to SKILL.md */
|
|
22
|
+
skillPath: string;
|
|
23
|
+
/** Path to references directory */
|
|
24
|
+
referencesDir: string;
|
|
25
|
+
/** Path to search database */
|
|
26
|
+
dbPath: string;
|
|
27
|
+
/** Number of chunks indexed */
|
|
28
|
+
chunkCount: number;
|
|
29
|
+
}
|
|
30
|
+
interface FetchedDoc {
|
|
31
|
+
url: string;
|
|
32
|
+
title: string;
|
|
33
|
+
content: string;
|
|
34
|
+
}
|
|
35
|
+
//#endregion
|
|
36
|
+
export { FetchedDoc, SkillConfig, SkillResult };
|
|
37
|
+
//# sourceMappingURL=types.d.mts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"types.d.mts","names":[],"sources":["../src/types.ts"],"mappings":";UAAiB,WAAA;EAAA;EAEf,GAAA;;EAEA,SAAA;EAFA;EAIA,SAAA;EAAA;EAEA,YAAA;EAEA;EAAA,QAAA;EAIA;EAFA,WAAA;EAEK;EAAL,KAAA;AAAA;AAAA,UAGe,WAAA;EAAW;EAE1B,QAAA;EAEA;EAAA,SAAA;EAIA;EAFA,aAAA;EAIU;EAFV,MAAA;EAKe;EAHf,UAAA;AAAA;AAAA,UAGe,UAAA;EACf,GAAA;EACA,KAAA;EACA,OAAA;AAAA"}
|
package/dist/types.mjs
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "skilld",
|
|
3
|
+
"type": "module",
|
|
4
|
+
"version": "0.0.1",
|
|
5
|
+
"description": "Generate searchable skills from documentation sites using llms.txt and crawling.",
|
|
6
|
+
"author": {
|
|
7
|
+
"name": "Harlan Wilton",
|
|
8
|
+
"email": "harlan@harlanzw.com",
|
|
9
|
+
"url": "https://harlanzw.com/"
|
|
10
|
+
},
|
|
11
|
+
"license": "MIT",
|
|
12
|
+
"repository": {
|
|
13
|
+
"type": "git",
|
|
14
|
+
"url": "https://github.com/harlan-zw/skilld"
|
|
15
|
+
},
|
|
16
|
+
"exports": {
|
|
17
|
+
".": {
|
|
18
|
+
"types": "./dist/index.d.mts",
|
|
19
|
+
"import": "./dist/index.mjs"
|
|
20
|
+
}
|
|
21
|
+
},
|
|
22
|
+
"main": "./dist/index.mjs",
|
|
23
|
+
"types": "./dist/index.d.mts",
|
|
24
|
+
"bin": {
|
|
25
|
+
"skilld": "./dist/cli.mjs"
|
|
26
|
+
},
|
|
27
|
+
"files": [
|
|
28
|
+
"dist"
|
|
29
|
+
],
|
|
30
|
+
"dependencies": {
|
|
31
|
+
"@huggingface/transformers": "^3.8.1",
|
|
32
|
+
"@mdream/crawl": "^0.15.3",
|
|
33
|
+
"citty": "^0.1.6",
|
|
34
|
+
"consola": "^3.4.2",
|
|
35
|
+
"mdream": "^0.15.3",
|
|
36
|
+
"retriv": "link:../retriv"
|
|
37
|
+
},
|
|
38
|
+
"devDependencies": {
|
|
39
|
+
"@antfu/eslint-config": "^6.7.3",
|
|
40
|
+
"@types/node": "^22.10.0",
|
|
41
|
+
"obuild": "^0.4.14",
|
|
42
|
+
"typescript": "^5.9.3",
|
|
43
|
+
"vitest": "^4.0.16"
|
|
44
|
+
},
|
|
45
|
+
"scripts": {
|
|
46
|
+
"build": "obuild",
|
|
47
|
+
"dev:prepare": "obuild --stub",
|
|
48
|
+
"lint": "eslint .",
|
|
49
|
+
"typecheck": "tsc --noEmit",
|
|
50
|
+
"test": "vitest"
|
|
51
|
+
}
|
|
52
|
+
}
|