skilld 1.7.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/_chunks/add.mjs +66 -0
- package/dist/_chunks/add.mjs.map +1 -0
- package/dist/_chunks/agent-prompt.mjs +88 -0
- package/dist/_chunks/agent-prompt.mjs.map +1 -0
- package/dist/_chunks/agent.mjs +737 -619
- package/dist/_chunks/agent.mjs.map +1 -1
- package/dist/_chunks/args.mjs +42 -0
- package/dist/_chunks/args.mjs.map +1 -0
- package/dist/_chunks/assemble.mjs +11 -8
- package/dist/_chunks/assemble.mjs.map +1 -1
- package/dist/_chunks/author.mjs +77 -131
- package/dist/_chunks/author.mjs.map +1 -1
- package/dist/_chunks/cache.mjs +320 -54
- package/dist/_chunks/cache.mjs.map +1 -1
- package/dist/_chunks/cache2.mjs +7 -6
- package/dist/_chunks/cache2.mjs.map +1 -1
- package/dist/_chunks/client.mjs +117 -0
- package/dist/_chunks/client.mjs.map +1 -0
- package/dist/_chunks/core.mjs +7 -4
- package/dist/_chunks/detect.mjs +54 -44
- package/dist/_chunks/detect.mjs.map +1 -1
- package/dist/_chunks/eject.mjs +69 -0
- package/dist/_chunks/eject.mjs.map +1 -0
- package/dist/_chunks/embedding-cache2.mjs +2 -2
- package/dist/_chunks/env.mjs +19 -0
- package/dist/_chunks/env.mjs.map +1 -0
- package/dist/_chunks/install-many.mjs +376 -0
- package/dist/_chunks/install-many.mjs.map +1 -0
- package/dist/_chunks/install.mjs +86 -371
- package/dist/_chunks/install.mjs.map +1 -1
- package/dist/_chunks/intro.mjs +63 -0
- package/dist/_chunks/intro.mjs.map +1 -0
- package/dist/_chunks/list.mjs +2 -2
- package/dist/_chunks/list.mjs.map +1 -1
- package/dist/_chunks/lockfile.mjs +31 -7
- package/dist/_chunks/lockfile.mjs.map +1 -1
- package/dist/_chunks/login.mjs +233 -0
- package/dist/_chunks/login.mjs.map +1 -0
- package/dist/_chunks/logout.mjs +27 -0
- package/dist/_chunks/logout.mjs.map +1 -0
- package/dist/_chunks/map.mjs +11 -0
- package/dist/_chunks/map.mjs.map +1 -0
- package/dist/_chunks/markdown.mjs +79 -54
- package/dist/_chunks/markdown.mjs.map +1 -1
- package/dist/_chunks/menu.mjs +33 -0
- package/dist/_chunks/menu.mjs.map +1 -0
- package/dist/_chunks/model-picker.mjs +61 -0
- package/dist/_chunks/model-picker.mjs.map +1 -0
- package/dist/_chunks/monorepo.mjs +73 -0
- package/dist/_chunks/monorepo.mjs.map +1 -0
- package/dist/_chunks/package-json.mjs.map +1 -1
- package/dist/_chunks/paths.mjs +47 -0
- package/dist/_chunks/paths.mjs.map +1 -0
- package/dist/_chunks/pipeline.mjs +985 -0
- package/dist/_chunks/pipeline.mjs.map +1 -0
- package/dist/_chunks/pool2.mjs +2 -2
- package/dist/_chunks/portable.mjs +151 -0
- package/dist/_chunks/portable.mjs.map +1 -0
- package/dist/_chunks/prepare-hook.mjs +2 -0
- package/dist/_chunks/prepare-hook2.mjs +61 -0
- package/dist/_chunks/prepare-hook2.mjs.map +1 -0
- package/dist/_chunks/prepare.mjs +47 -3
- package/dist/_chunks/prepare.mjs.map +1 -1
- package/dist/_chunks/prepare2.mjs +9 -8
- package/dist/_chunks/prepare2.mjs.map +1 -1
- package/dist/_chunks/prompts.mjs +784 -26
- package/dist/_chunks/prompts.mjs.map +1 -1
- package/dist/_chunks/pull.mjs +219 -0
- package/dist/_chunks/pull.mjs.map +1 -0
- package/dist/_chunks/regex.mjs +19 -0
- package/dist/_chunks/regex.mjs.map +1 -0
- package/dist/_chunks/retriv.mjs +2 -171
- package/dist/_chunks/retriv2.mjs +159 -0
- package/dist/_chunks/retriv2.mjs.map +1 -0
- package/dist/_chunks/sanitize.mjs +12 -9
- package/dist/_chunks/sanitize.mjs.map +1 -1
- package/dist/_chunks/search-helpers.mjs +9 -8
- package/dist/_chunks/search-helpers.mjs.map +1 -1
- package/dist/_chunks/search-interactive.mjs +23 -20
- package/dist/_chunks/search-interactive.mjs.map +1 -1
- package/dist/_chunks/search.mjs +3 -4
- package/dist/_chunks/search.mjs.map +1 -1
- package/dist/_chunks/{sources.mjs → semver.mjs} +1128 -838
- package/dist/_chunks/semver.mjs.map +1 -0
- package/dist/_chunks/skill-installer.mjs +2 -0
- package/dist/_chunks/skill-installer2.mjs +154 -0
- package/dist/_chunks/skill-installer2.mjs.map +1 -0
- package/dist/_chunks/skills.mjs +12 -12
- package/dist/_chunks/skills.mjs.map +1 -1
- package/dist/_chunks/store.mjs +107 -0
- package/dist/_chunks/store.mjs.map +1 -0
- package/dist/_chunks/sync.mjs +761 -1349
- package/dist/_chunks/sync.mjs.map +1 -1
- package/dist/_chunks/sync2.mjs +2 -3
- package/dist/_chunks/telemetry.mjs +26 -0
- package/dist/_chunks/telemetry.mjs.map +1 -0
- package/dist/_chunks/uninstall.mjs +15 -13
- package/dist/_chunks/uninstall.mjs.map +1 -1
- package/dist/_chunks/update.mjs +171 -0
- package/dist/_chunks/update.mjs.map +1 -0
- package/dist/_chunks/upload.mjs +4 -4
- package/dist/_chunks/validate.mjs +1 -1
- package/dist/_chunks/version.mjs +16 -27
- package/dist/_chunks/version.mjs.map +1 -1
- package/dist/_chunks/whoami.mjs +21 -0
- package/dist/_chunks/whoami.mjs.map +1 -0
- package/dist/_chunks/wizard.mjs +2 -190
- package/dist/_chunks/wizard2.mjs +200 -0
- package/dist/_chunks/wizard2.mjs.map +1 -0
- package/dist/cli.mjs +77 -59
- package/dist/cli.mjs.map +1 -1
- package/dist/prepare.mjs +5 -4
- package/dist/prepare.mjs.map +1 -1
- package/dist/retriv/worker.d.mts +5 -1
- package/dist/retriv/worker.d.mts.map +1 -1
- package/dist/retriv/worker.mjs +1 -1
- package/package.json +20 -29
- package/dist/_chunks/author-group.mjs +0 -17
- package/dist/_chunks/author-group.mjs.map +0 -1
- package/dist/_chunks/cli-helpers.mjs +0 -335
- package/dist/_chunks/cli-helpers.mjs.map +0 -1
- package/dist/_chunks/cli-helpers2.mjs +0 -2
- package/dist/_chunks/config.mjs +0 -122
- package/dist/_chunks/config.mjs.map +0 -1
- package/dist/_chunks/index.d.mts +0 -151
- package/dist/_chunks/index.d.mts.map +0 -1
- package/dist/_chunks/index2.d.mts +0 -44
- package/dist/_chunks/index2.d.mts.map +0 -1
- package/dist/_chunks/index3.d.mts +0 -589
- package/dist/_chunks/index3.d.mts.map +0 -1
- package/dist/_chunks/prefix.mjs +0 -108
- package/dist/_chunks/prefix.mjs.map +0 -1
- package/dist/_chunks/retriv.mjs.map +0 -1
- package/dist/_chunks/setup.mjs +0 -17
- package/dist/_chunks/setup.mjs.map +0 -1
- package/dist/_chunks/shared.mjs +0 -503
- package/dist/_chunks/shared.mjs.map +0 -1
- package/dist/_chunks/skill.mjs +0 -329
- package/dist/_chunks/skill.mjs.map +0 -1
- package/dist/_chunks/sources.mjs.map +0 -1
- package/dist/_chunks/sync-registry.mjs +0 -59
- package/dist/_chunks/sync-registry.mjs.map +0 -1
- package/dist/_chunks/sync-shared.mjs +0 -2
- package/dist/_chunks/sync-shared2.mjs +0 -1020
- package/dist/_chunks/sync-shared2.mjs.map +0 -1
- package/dist/_chunks/types.d.mts +0 -88
- package/dist/_chunks/types.d.mts.map +0 -1
- package/dist/_chunks/wizard.mjs.map +0 -1
- package/dist/agent/index.d.mts +0 -346
- package/dist/agent/index.d.mts.map +0 -1
- package/dist/agent/index.mjs +0 -5
- package/dist/cache/index.d.mts +0 -2
- package/dist/cache/index.mjs +0 -4
- package/dist/index.d.mts +0 -5
- package/dist/index.mjs +0 -5
- package/dist/retriv/index.d.mts +0 -3
- package/dist/retriv/index.mjs +0 -2
- package/dist/sources/index.d.mts +0 -2
- package/dist/sources/index.mjs +0 -3
- package/dist/types.d.mts +0 -4
- package/dist/types.mjs +0 -1
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
const API_CHANGE_BULLET_RE = /^- (?:BREAKING|DEPRECATED|NEW): /m;
|
|
2
|
+
const COMMA_OR_WHITESPACE_RE = /[,\s]+/;
|
|
3
|
+
const GIT_PROTOCOL_PREFIX_RE = /^git:\/\//;
|
|
4
|
+
const GIT_SUFFIX_RE = /\.git$/;
|
|
5
|
+
const GITHUB_SSH_URL_PREFIX_RE = /^ssh:\/\/git@github\.com/;
|
|
6
|
+
const GIT_PLUS_PREFIX_RE = /^git\+/;
|
|
7
|
+
const LEADING_SLASH_RE = /^\//;
|
|
8
|
+
const README_FILENAME_RE = /^readme\.md$/i;
|
|
9
|
+
const SECTION_HEADING_RE = /^##\s/m;
|
|
10
|
+
const SEMVER_MAJOR_MINOR_RE = /^(\d+)\.(\d+)/;
|
|
11
|
+
const SOURCE_LINK_RE = /\[source\]/;
|
|
12
|
+
const TRAILING_SLASH_RE = /\/$/;
|
|
13
|
+
const V_PREFIX_RE = /^v/;
|
|
14
|
+
const VERSION_RANGE_PREFIX_RE = /^[\^~>=<]+/;
|
|
15
|
+
const NPM_SCOPE_PREFIX_RE = /^@/;
|
|
16
|
+
const NPM_SCOPE_WITH_SLASH_RE = /^@.*\//;
|
|
17
|
+
export { GIT_PROTOCOL_PREFIX_RE as a, NPM_SCOPE_PREFIX_RE as c, SECTION_HEADING_RE as d, SEMVER_MAJOR_MINOR_RE as f, V_PREFIX_RE as g, VERSION_RANGE_PREFIX_RE as h, GIT_PLUS_PREFIX_RE as i, NPM_SCOPE_WITH_SLASH_RE as l, TRAILING_SLASH_RE as m, COMMA_OR_WHITESPACE_RE as n, GIT_SUFFIX_RE as o, SOURCE_LINK_RE as p, GITHUB_SSH_URL_PREFIX_RE as r, LEADING_SLASH_RE as s, API_CHANGE_BULLET_RE as t, README_FILENAME_RE as u };
|
|
18
|
+
|
|
19
|
+
//# sourceMappingURL=regex.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"regex.mjs","names":[],"sources":["../../src/core/regex.ts"],"sourcesContent":["export const API_CHANGE_BULLET_RE = /^- (?:BREAKING|DEPRECATED|NEW): /m\nexport const COMMA_OR_WHITESPACE_RE = /[,\\s]+/\nexport const GIT_PROTOCOL_PREFIX_RE = /^git:\\/\\//\nexport const GIT_SUFFIX_RE = /\\.git$/\nexport const GITHUB_SSH_URL_PREFIX_RE = /^ssh:\\/\\/git@github\\.com/\nexport const GIT_PLUS_PREFIX_RE = /^git\\+/\nexport const LEADING_SLASH_RE = /^\\//\nexport const README_FILENAME_RE = /^readme\\.md$/i\nexport const SECTION_HEADING_RE = /^##\\s/m\nexport const SEMVER_MAJOR_MINOR_RE = /^(\\d+)\\.(\\d+)/\nexport const SOURCE_LINK_RE = /\\[source\\]/\nexport const TRAILING_SLASH_RE = /\\/$/\nexport const V_PREFIX_RE = /^v/\nexport const VERSION_RANGE_PREFIX_RE = /^[\\^~>=<]+/\nexport const NPM_SCOPE_PREFIX_RE = /^@/\nexport const NPM_SCOPE_WITH_SLASH_RE = /^@.*\\//\n"],"mappings":"AAAA,MAAa,uBAAuB;AACpC,MAAa,yBAAyB;AACtC,MAAa,yBAAyB;AACtC,MAAa,gBAAgB;AAC7B,MAAa,2BAA2B;AACxC,MAAa,qBAAqB;AAClC,MAAa,mBAAmB;AAChC,MAAa,qBAAqB;AAClC,MAAa,qBAAqB;AAClC,MAAa,wBAAwB;AACrC,MAAa,iBAAiB;AAC9B,MAAa,oBAAoB;AACjC,MAAa,cAAc;AAC3B,MAAa,0BAA0B;AACvC,MAAa,sBAAsB;AACnC,MAAa,0BAA0B"}
|
package/dist/_chunks/retriv.mjs
CHANGED
|
@@ -1,171 +1,2 @@
|
|
|
1
|
-
import {
|
|
2
|
-
|
|
3
|
-
constructor(cause, message) {
|
|
4
|
-
super(message ?? "Search dependencies unavailable (sqlite-vec or retriv not installed). Search indexing skipped.");
|
|
5
|
-
this.name = "SearchDepsUnavailableError";
|
|
6
|
-
this.cause = cause;
|
|
7
|
-
}
|
|
8
|
-
};
|
|
9
|
-
let _fts5Available = null;
|
|
10
|
-
function checkFts5() {
|
|
11
|
-
if (_fts5Available !== null) return _fts5Available;
|
|
12
|
-
const nodeSqlite = globalThis.process?.getBuiltinModule?.("node:sqlite");
|
|
13
|
-
if (!nodeSqlite) {
|
|
14
|
-
_fts5Available = false;
|
|
15
|
-
return false;
|
|
16
|
-
}
|
|
17
|
-
const db = new nodeSqlite.DatabaseSync(":memory:");
|
|
18
|
-
try {
|
|
19
|
-
db.exec("CREATE VIRTUAL TABLE _fts5_probe USING fts5(content)");
|
|
20
|
-
db.exec("DROP TABLE _fts5_probe");
|
|
21
|
-
_fts5Available = true;
|
|
22
|
-
} catch {
|
|
23
|
-
_fts5Available = false;
|
|
24
|
-
} finally {
|
|
25
|
-
db.close();
|
|
26
|
-
}
|
|
27
|
-
return _fts5Available;
|
|
28
|
-
}
|
|
29
|
-
async function getDb(config) {
|
|
30
|
-
if (!checkFts5()) throw new SearchDepsUnavailableError(/* @__PURE__ */ new Error("FTS5 module not available"), "SQLite FTS5 module not available. Search indexing skipped. On Windows, run from WSL where FTS5 is included.");
|
|
31
|
-
let createRetriv, autoChunker, sqliteMod, sqliteVec, transformersJs, cachedEmbeddings;
|
|
32
|
-
try {
|
|
33
|
-
[{createRetriv}, {autoChunker}, sqliteMod, sqliteVec, {transformersJs}, {cachedEmbeddings}] = await Promise.all([
|
|
34
|
-
import("retriv"),
|
|
35
|
-
import("retriv/chunkers/auto"),
|
|
36
|
-
import("retriv/db/sqlite"),
|
|
37
|
-
import("sqlite-vec"),
|
|
38
|
-
import("retriv/embeddings/transformers-js"),
|
|
39
|
-
import("./embedding-cache.mjs")
|
|
40
|
-
]);
|
|
41
|
-
} catch (err) {
|
|
42
|
-
if (err?.code === "ERR_MODULE_NOT_FOUND") throw new SearchDepsUnavailableError(err);
|
|
43
|
-
throw err;
|
|
44
|
-
}
|
|
45
|
-
const embeddings = await cachedEmbeddings(transformersJs());
|
|
46
|
-
return createRetriv({
|
|
47
|
-
driver: sqliteMod.default({
|
|
48
|
-
path: config.dbPath,
|
|
49
|
-
embeddings,
|
|
50
|
-
sqliteVec
|
|
51
|
-
}),
|
|
52
|
-
chunking: autoChunker()
|
|
53
|
-
});
|
|
54
|
-
}
|
|
55
|
-
async function createIndexDirect(documents, config) {
|
|
56
|
-
const db = await getDb(config);
|
|
57
|
-
if (config.removeIds?.length) await db.remove?.(config.removeIds);
|
|
58
|
-
await db.index(documents, { onProgress: config.onProgress });
|
|
59
|
-
await db.close?.();
|
|
60
|
-
}
|
|
61
|
-
async function createIndex(documents, config) {
|
|
62
|
-
const { createIndexInWorker } = await import("./pool.mjs");
|
|
63
|
-
return createIndexInWorker(documents, config);
|
|
64
|
-
}
|
|
65
|
-
async function listIndexIds(config) {
|
|
66
|
-
const nodeSqlite = globalThis.process?.getBuiltinModule?.("node:sqlite");
|
|
67
|
-
if (!nodeSqlite) return [];
|
|
68
|
-
const db = new nodeSqlite.DatabaseSync(config.dbPath, {
|
|
69
|
-
open: true,
|
|
70
|
-
readOnly: true
|
|
71
|
-
});
|
|
72
|
-
try {
|
|
73
|
-
return db.prepare("SELECT id FROM documents_meta").all().map((r) => r.id);
|
|
74
|
-
} finally {
|
|
75
|
-
db.close();
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
async function removeFromIndex(ids, config) {
|
|
79
|
-
if (ids.length === 0) return;
|
|
80
|
-
const db = await getDb(config);
|
|
81
|
-
await db.remove?.(ids);
|
|
82
|
-
await db.close?.();
|
|
83
|
-
}
|
|
84
|
-
async function search(query, config, options = {}) {
|
|
85
|
-
const { limit = 10, filter } = options;
|
|
86
|
-
const db = await getDb(config);
|
|
87
|
-
const results = await db.search(query, {
|
|
88
|
-
limit,
|
|
89
|
-
filter,
|
|
90
|
-
returnContent: true,
|
|
91
|
-
returnMetadata: true,
|
|
92
|
-
returnMeta: true
|
|
93
|
-
});
|
|
94
|
-
await db.close?.();
|
|
95
|
-
return results.map((r) => ({
|
|
96
|
-
id: r.id,
|
|
97
|
-
content: r.content ?? "",
|
|
98
|
-
score: r.score,
|
|
99
|
-
metadata: r.metadata ?? {},
|
|
100
|
-
highlights: r._meta?.highlights ?? [],
|
|
101
|
-
lineRange: r._chunk?.lineRange,
|
|
102
|
-
entities: r._chunk?.entities,
|
|
103
|
-
scope: r._chunk?.scope
|
|
104
|
-
}));
|
|
105
|
-
}
|
|
106
|
-
async function searchSnippets(query, config, options = {}) {
|
|
107
|
-
return toSnippets(await search(query, config, options));
|
|
108
|
-
}
|
|
109
|
-
function toSnippets(results) {
|
|
110
|
-
return results.map((r) => {
|
|
111
|
-
const content = stripFrontmatter(r.content);
|
|
112
|
-
const source = r.metadata.source || r.id;
|
|
113
|
-
const lines = content.split("\n").length;
|
|
114
|
-
return {
|
|
115
|
-
package: r.metadata.package || "unknown",
|
|
116
|
-
source,
|
|
117
|
-
lineStart: r.lineRange?.[0] ?? 1,
|
|
118
|
-
lineEnd: r.lineRange?.[1] ?? lines,
|
|
119
|
-
content,
|
|
120
|
-
score: r.score,
|
|
121
|
-
highlights: r.highlights,
|
|
122
|
-
entities: r.entities,
|
|
123
|
-
scope: r.scope
|
|
124
|
-
};
|
|
125
|
-
});
|
|
126
|
-
}
|
|
127
|
-
async function openPool(dbPaths) {
|
|
128
|
-
const pool = /* @__PURE__ */ new Map();
|
|
129
|
-
await Promise.all(dbPaths.map(async (dbPath) => {
|
|
130
|
-
const db = await getDb({ dbPath });
|
|
131
|
-
pool.set(dbPath, db);
|
|
132
|
-
}));
|
|
133
|
-
return pool;
|
|
134
|
-
}
|
|
135
|
-
async function searchPooled(query, pool, options = {}) {
|
|
136
|
-
const { limit = 10, filter } = options;
|
|
137
|
-
const fetchLimit = limit * 2;
|
|
138
|
-
const allResults = await Promise.all(Array.from(pool.values(), async (db) => {
|
|
139
|
-
return (await db.search(query, {
|
|
140
|
-
limit: fetchLimit,
|
|
141
|
-
filter,
|
|
142
|
-
returnContent: true,
|
|
143
|
-
returnMetadata: true,
|
|
144
|
-
returnMeta: true
|
|
145
|
-
})).map((r) => ({
|
|
146
|
-
id: r.id,
|
|
147
|
-
content: r.content ?? "",
|
|
148
|
-
score: r.score,
|
|
149
|
-
metadata: r.metadata ?? {},
|
|
150
|
-
highlights: r._meta?.highlights ?? [],
|
|
151
|
-
lineRange: r._chunk?.lineRange,
|
|
152
|
-
entities: r._chunk?.entities,
|
|
153
|
-
scope: r._chunk?.scope
|
|
154
|
-
}));
|
|
155
|
-
}));
|
|
156
|
-
const seen = /* @__PURE__ */ new Set();
|
|
157
|
-
return toSnippets(allResults.flat().sort((a, b) => b.score - a.score).filter((r) => {
|
|
158
|
-
const lr = r.lineRange;
|
|
159
|
-
const key = `${r.metadata.source || r.id}:${lr?.[0]}-${lr?.[1]}`;
|
|
160
|
-
if (seen.has(key)) return false;
|
|
161
|
-
seen.add(key);
|
|
162
|
-
return true;
|
|
163
|
-
}).slice(0, limit));
|
|
164
|
-
}
|
|
165
|
-
async function closePool(pool) {
|
|
166
|
-
await Promise.all(Array.from(pool.values(), (db) => db.close?.()));
|
|
167
|
-
pool.clear();
|
|
168
|
-
}
|
|
169
|
-
export { getDb as a, removeFromIndex as c, searchSnippets as d, createIndexDirect as i, search as l, closePool as n, listIndexIds as o, createIndex as r, openPool as s, SearchDepsUnavailableError as t, searchPooled as u };
|
|
170
|
-
|
|
171
|
-
//# sourceMappingURL=retriv.mjs.map
|
|
1
|
+
import { i as getDb } from "./retriv2.mjs";
|
|
2
|
+
export { getDb };
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import { a as stripFrontmatter } from "./markdown.mjs";
|
|
2
|
+
var SearchDepsUnavailableError = class extends Error {
|
|
3
|
+
constructor(cause, message) {
|
|
4
|
+
super(message ?? "Search dependencies unavailable (sqlite-vec or retriv not installed). Search indexing skipped.");
|
|
5
|
+
this.name = "SearchDepsUnavailableError";
|
|
6
|
+
this.cause = cause;
|
|
7
|
+
}
|
|
8
|
+
};
|
|
9
|
+
let _fts5Available = null;
|
|
10
|
+
function checkFts5() {
|
|
11
|
+
if (_fts5Available !== null) return _fts5Available;
|
|
12
|
+
const nodeSqlite = globalThis.process?.getBuiltinModule?.("node:sqlite");
|
|
13
|
+
if (!nodeSqlite) {
|
|
14
|
+
_fts5Available = false;
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
const db = new nodeSqlite.DatabaseSync(":memory:");
|
|
18
|
+
try {
|
|
19
|
+
db.exec("CREATE VIRTUAL TABLE _fts5_probe USING fts5(content)");
|
|
20
|
+
db.exec("DROP TABLE _fts5_probe");
|
|
21
|
+
_fts5Available = true;
|
|
22
|
+
} catch {
|
|
23
|
+
_fts5Available = false;
|
|
24
|
+
} finally {
|
|
25
|
+
db.close();
|
|
26
|
+
}
|
|
27
|
+
return _fts5Available;
|
|
28
|
+
}
|
|
29
|
+
async function getDb(config) {
|
|
30
|
+
if (!checkFts5()) throw new SearchDepsUnavailableError(/* @__PURE__ */ new Error("FTS5 module not available"), "SQLite FTS5 module not available. Search indexing skipped. On Windows, run from WSL where FTS5 is included.");
|
|
31
|
+
let createRetriv, autoChunker, sqliteMod, sqliteVec, transformersJs, cachedEmbeddings;
|
|
32
|
+
try {
|
|
33
|
+
[{createRetriv}, {autoChunker}, sqliteMod, sqliteVec, {transformersJs}, {cachedEmbeddings}] = await Promise.all([
|
|
34
|
+
import("retriv"),
|
|
35
|
+
import("retriv/chunkers/auto"),
|
|
36
|
+
import("retriv/db/sqlite"),
|
|
37
|
+
import("sqlite-vec"),
|
|
38
|
+
import("retriv/embeddings/transformers-js"),
|
|
39
|
+
import("./embedding-cache.mjs")
|
|
40
|
+
]);
|
|
41
|
+
} catch (err) {
|
|
42
|
+
if (err?.code === "ERR_MODULE_NOT_FOUND") throw new SearchDepsUnavailableError(err);
|
|
43
|
+
throw err;
|
|
44
|
+
}
|
|
45
|
+
const embeddings = await cachedEmbeddings(transformersJs());
|
|
46
|
+
return createRetriv({
|
|
47
|
+
driver: sqliteMod.default({
|
|
48
|
+
path: config.dbPath,
|
|
49
|
+
embeddings,
|
|
50
|
+
sqliteVec
|
|
51
|
+
}),
|
|
52
|
+
chunking: autoChunker()
|
|
53
|
+
});
|
|
54
|
+
}
|
|
55
|
+
async function createIndex(documents, config) {
|
|
56
|
+
const { createIndexInWorker } = await import("./pool.mjs");
|
|
57
|
+
return createIndexInWorker(documents, config);
|
|
58
|
+
}
|
|
59
|
+
async function listIndexIds(config) {
|
|
60
|
+
const nodeSqlite = globalThis.process?.getBuiltinModule?.("node:sqlite");
|
|
61
|
+
if (!nodeSqlite) return [];
|
|
62
|
+
const db = new nodeSqlite.DatabaseSync(config.dbPath, {
|
|
63
|
+
open: true,
|
|
64
|
+
readOnly: true
|
|
65
|
+
});
|
|
66
|
+
try {
|
|
67
|
+
return db.prepare("SELECT id FROM documents_meta").all().map((r) => r.id);
|
|
68
|
+
} finally {
|
|
69
|
+
db.close();
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
async function search(query, config, options = {}) {
|
|
73
|
+
const { limit = 10, filter } = options;
|
|
74
|
+
const db = await getDb(config);
|
|
75
|
+
const results = await db.search(query, {
|
|
76
|
+
limit,
|
|
77
|
+
filter,
|
|
78
|
+
returnContent: true,
|
|
79
|
+
returnMetadata: true,
|
|
80
|
+
returnMeta: true
|
|
81
|
+
});
|
|
82
|
+
await db.close?.();
|
|
83
|
+
return results.map((r) => ({
|
|
84
|
+
id: r.id,
|
|
85
|
+
content: r.content ?? "",
|
|
86
|
+
score: r.score,
|
|
87
|
+
metadata: r.metadata ?? {},
|
|
88
|
+
highlights: r._meta?.highlights ?? [],
|
|
89
|
+
lineRange: r._chunk?.lineRange,
|
|
90
|
+
entities: r._chunk?.entities,
|
|
91
|
+
scope: r._chunk?.scope
|
|
92
|
+
}));
|
|
93
|
+
}
|
|
94
|
+
async function searchSnippets(query, config, options = {}) {
|
|
95
|
+
return toSnippets(await search(query, config, options));
|
|
96
|
+
}
|
|
97
|
+
function toSnippets(results) {
|
|
98
|
+
return results.map((r) => {
|
|
99
|
+
const content = stripFrontmatter(r.content);
|
|
100
|
+
const source = r.metadata.source || r.id;
|
|
101
|
+
const lines = content.split("\n").length;
|
|
102
|
+
return {
|
|
103
|
+
package: r.metadata.package || "unknown",
|
|
104
|
+
source,
|
|
105
|
+
lineStart: r.lineRange?.[0] ?? 1,
|
|
106
|
+
lineEnd: r.lineRange?.[1] ?? lines,
|
|
107
|
+
content,
|
|
108
|
+
score: r.score,
|
|
109
|
+
highlights: r.highlights,
|
|
110
|
+
entities: r.entities,
|
|
111
|
+
scope: r.scope
|
|
112
|
+
};
|
|
113
|
+
});
|
|
114
|
+
}
|
|
115
|
+
async function openPool(dbPaths) {
|
|
116
|
+
const pool = /* @__PURE__ */ new Map();
|
|
117
|
+
await Promise.all(dbPaths.map(async (dbPath) => {
|
|
118
|
+
const db = await getDb({ dbPath });
|
|
119
|
+
pool.set(dbPath, db);
|
|
120
|
+
}));
|
|
121
|
+
return pool;
|
|
122
|
+
}
|
|
123
|
+
async function searchPooled(query, pool, options = {}) {
|
|
124
|
+
const { limit = 10, filter } = options;
|
|
125
|
+
const fetchLimit = limit * 2;
|
|
126
|
+
const allResults = await Promise.all(Array.from(pool.values(), async (db) => {
|
|
127
|
+
return (await db.search(query, {
|
|
128
|
+
limit: fetchLimit,
|
|
129
|
+
filter,
|
|
130
|
+
returnContent: true,
|
|
131
|
+
returnMetadata: true,
|
|
132
|
+
returnMeta: true
|
|
133
|
+
})).map((r) => ({
|
|
134
|
+
id: r.id,
|
|
135
|
+
content: r.content ?? "",
|
|
136
|
+
score: r.score,
|
|
137
|
+
metadata: r.metadata ?? {},
|
|
138
|
+
highlights: r._meta?.highlights ?? [],
|
|
139
|
+
lineRange: r._chunk?.lineRange,
|
|
140
|
+
entities: r._chunk?.entities,
|
|
141
|
+
scope: r._chunk?.scope
|
|
142
|
+
}));
|
|
143
|
+
}));
|
|
144
|
+
const seen = /* @__PURE__ */ new Set();
|
|
145
|
+
return toSnippets(allResults.flat().sort((a, b) => b.score - a.score).filter((r) => {
|
|
146
|
+
const lr = r.lineRange;
|
|
147
|
+
const key = `${r.metadata.source || r.id}:${lr?.[0]}-${lr?.[1]}`;
|
|
148
|
+
if (seen.has(key)) return false;
|
|
149
|
+
seen.add(key);
|
|
150
|
+
return true;
|
|
151
|
+
}).slice(0, limit));
|
|
152
|
+
}
|
|
153
|
+
async function closePool(pool) {
|
|
154
|
+
await Promise.all(Array.from(pool.values(), (db) => db.close?.()));
|
|
155
|
+
pool.clear();
|
|
156
|
+
}
|
|
157
|
+
export { listIndexIds as a, searchPooled as c, getDb as i, searchSnippets as l, closePool as n, openPool as o, createIndex as r, search as s, SearchDepsUnavailableError as t };
|
|
158
|
+
|
|
159
|
+
//# sourceMappingURL=retriv2.mjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"retriv2.mjs","names":[],"sources":["../../src/retriv/index.ts"],"sourcesContent":["import type { ChunkEntity, Document, IndexConfig, IndexPhase, IndexProgress, SearchFilter, SearchOptions, SearchResult, SearchSnippet } from './types.ts'\nimport { stripFrontmatter } from '../core/markdown.ts'\n\nexport type { ChunkEntity, Document, IndexConfig, IndexPhase, IndexProgress, SearchFilter, SearchOptions, SearchResult, SearchSnippet }\n\ntype RetrivInstance = Awaited<ReturnType<typeof getDb>>\n\nexport class SearchDepsUnavailableError extends Error {\n constructor(cause: unknown, message?: string) {\n super(message ?? 'Search dependencies unavailable (sqlite-vec or retriv not installed). Search indexing skipped.')\n this.name = 'SearchDepsUnavailableError'\n this.cause = cause\n }\n}\n\nlet _fts5Available: boolean | null = null\n\n/**\n * Probe whether SQLite FTS5 module is available.\n * Windows Node.js binaries often ship without FTS5 compiled in.\n */\nfunction checkFts5(): boolean {\n if (_fts5Available !== null)\n return _fts5Available\n const nodeSqlite = globalThis.process?.getBuiltinModule?.('node:sqlite') as typeof import('node:sqlite') | undefined\n if (!nodeSqlite) {\n _fts5Available = false\n return false\n }\n const db = new nodeSqlite.DatabaseSync(':memory:')\n try {\n db.exec('CREATE VIRTUAL TABLE _fts5_probe USING fts5(content)')\n db.exec('DROP TABLE _fts5_probe')\n _fts5Available = true\n }\n catch {\n _fts5Available = false\n }\n finally {\n db.close()\n }\n return _fts5Available\n}\n\n// Dynamic imports: retriv/chunkers/auto eagerly loads typescript which may not be installed (e.g. npx)\nexport async function getDb(config: Pick<IndexConfig, 'dbPath'>) {\n if (!checkFts5())\n throw new SearchDepsUnavailableError(new Error('FTS5 module not available'), 'SQLite FTS5 module not available. Search indexing skipped. On Windows, run from WSL where FTS5 is included.')\n\n let createRetriv, autoChunker, sqliteMod, sqliteVec, transformersJs, cachedEmbeddings\n try {\n ;([\n { createRetriv },\n { autoChunker },\n sqliteMod,\n sqliteVec,\n { transformersJs },\n { cachedEmbeddings },\n ] = await Promise.all([\n import('retriv'),\n import('retriv/chunkers/auto'),\n import('retriv/db/sqlite'),\n import('sqlite-vec'),\n import('retriv/embeddings/transformers-js'),\n import('./embedding-cache.ts'),\n ]))\n }\n catch (err: any) {\n if (err?.code === 'ERR_MODULE_NOT_FOUND')\n throw new SearchDepsUnavailableError(err)\n throw err\n }\n const embeddings = await cachedEmbeddings(transformersJs())\n return createRetriv({\n driver: sqliteMod.default({\n path: config.dbPath,\n embeddings,\n sqliteVec,\n }),\n chunking: autoChunker(),\n })\n}\n\n/**\n * Index documents in a background worker thread.\n * Falls back to direct indexing if worker fails to spawn.\n */\nexport async function createIndex(\n documents: Document[],\n config: IndexConfig & { removeIds?: string[] },\n): Promise<void> {\n // Dynamic import justified: search/searchSnippets shouldn't pull in worker_threads\n const { createIndexInWorker } = await import('./pool.ts')\n return createIndexInWorker(documents, config)\n}\n\n/**\n * List all raw document IDs in an existing index.\n * Returns chunk IDs (e.g. \"doc-id#chunk-0\") for chunked docs.\n * Queries sqlite directly to bypass createRetriv's parent-ID deduplication,\n * so callers can use these IDs for exact removal and parent-ID grouping.\n */\nexport async function listIndexIds(\n config: Pick<IndexConfig, 'dbPath'>,\n): Promise<string[]> {\n const nodeSqlite = globalThis.process?.getBuiltinModule?.('node:sqlite') as typeof import('node:sqlite') | undefined\n if (!nodeSqlite)\n return []\n const db = new nodeSqlite.DatabaseSync(config.dbPath, { open: true, readOnly: true })\n try {\n const rows = db.prepare('SELECT id FROM documents_meta').all() as Array<{ id: string }>\n return rows.map(r => r.id)\n }\n finally {\n db.close()\n }\n}\n\nexport async function search(\n query: string,\n config: IndexConfig,\n options: SearchOptions = {},\n): Promise<SearchResult[]> {\n const { limit = 10, filter } = options\n const db = await getDb(config)\n const results = await db.search(query, { limit, filter, returnContent: true, returnMetadata: true, returnMeta: true })\n await db.close?.()\n\n return results.map(r => ({\n id: r.id,\n content: r.content ?? '',\n score: r.score,\n metadata: r.metadata ?? {},\n highlights: r._meta?.highlights ?? [],\n lineRange: r._chunk?.lineRange,\n entities: r._chunk?.entities,\n scope: r._chunk?.scope,\n }))\n}\n\n/**\n * Search and return formatted snippets\n */\nexport async function searchSnippets(\n query: string,\n config: IndexConfig,\n options: SearchOptions = {},\n): Promise<SearchSnippet[]> {\n const results = await search(query, config, options)\n return toSnippets(results)\n}\n\nfunction toSnippets(results: SearchResult[]): SearchSnippet[] {\n return results.map((r) => {\n const content = stripFrontmatter(r.content)\n const source = r.metadata.source || r.id\n const lines = content.split('\\n').length\n\n return {\n package: r.metadata.package || 'unknown',\n source,\n lineStart: r.lineRange?.[0] ?? 1,\n lineEnd: r.lineRange?.[1] ?? lines,\n content,\n score: r.score,\n highlights: r.highlights,\n entities: r.entities,\n scope: r.scope,\n }\n })\n}\n\n// ── Pooled DB access for interactive search ──\n\nexport async function openPool(dbPaths: string[]): Promise<Map<string, RetrivInstance>> {\n const pool = new Map<string, RetrivInstance>()\n await Promise.all(dbPaths.map(async (dbPath) => {\n const db = await getDb({ dbPath })\n pool.set(dbPath, db)\n }))\n return pool\n}\n\nexport async function searchPooled(\n query: string,\n pool: Map<string, RetrivInstance>,\n options: SearchOptions = {},\n): Promise<SearchSnippet[]> {\n const { limit = 10, filter } = options\n const fetchLimit = limit * 2 // Over-fetch to compensate for dedup\n const allResults = await Promise.all(\n Array.from(pool.values(), async (db) => {\n const results = await db.search(query, { limit: fetchLimit, filter, returnContent: true, returnMetadata: true, returnMeta: true })\n return results.map(r => ({\n id: r.id,\n content: r.content ?? '',\n score: r.score,\n metadata: r.metadata ?? {},\n highlights: r._meta?.highlights ?? [],\n lineRange: r._chunk?.lineRange as [number, number] | undefined,\n entities: r._chunk?.entities,\n scope: r._chunk?.scope,\n }))\n }),\n )\n // Deduplicate by source+lineRange (overlapping chunks from same doc)\n const seen = new Set<string>()\n const merged = allResults.flat()\n .sort((a, b) => b.score - a.score)\n .filter((r) => {\n const lr = r.lineRange\n const key = `${r.metadata.source || r.id}:${lr?.[0]}-${lr?.[1]}`\n if (seen.has(key))\n return false\n seen.add(key)\n return true\n })\n .slice(0, limit)\n return toSnippets(merged)\n}\n\nexport async function closePool(pool: Map<string, RetrivInstance>): Promise<void> {\n await Promise.all(Array.from(pool.values(), db => db.close?.()))\n pool.clear()\n}\n"],"mappings":";AAOA,IAAa,6BAAb,cAAgD,MAAM;CACpD,YAAY,OAAgB,SAAkB;EAC5C,MAAM,WAAW,iGAAiG;EAClH,KAAK,OAAO;EACZ,KAAK,QAAQ;;;AAIjB,IAAI,iBAAiC;;;;CAMrC,IAAA,CAAA,YAAS;EACP,iBAAI;EAEJ,OAAM;;OAEJ,KAAA,IAAA,WAAiB,aAAA,WAAA;KACjB;;EAEF,GAAA,KAAM,yBAAoB;EAC1B,iBAAI;SACC;EACH,iBAAQ;WACR;YAEI;;;;;CAMN,IAAA,CAAA,WAAO,EAAA,MAAA,IAAA,2CAAA,IAAA,MAAA,4BAAA,EAAA,8GAAA;;CAIT,IAAA;EACE,CAAA,CAAA,eACE,CAAA,cAAU,WAAA,WAAA,CAAA,iBAA2B,CAAA,qBAAU,MAAA,QAA4B,IAAE;GAE/E,OAAI;GACJ,OAAI;GAEA,OAAE;GAOF,OAAO;GACP,OAAO;GACP,OAAO;GACP,CAAA;UACO,KAAA;MACP,KAAO,SAAA,wBAAA,MAAA,IAAA,2BAAA,IAAA;QACP;;OAGE,aAAc,MAAA,iBAAA,gBACN,CAAA;QACN,aAAA;;GAER,MAAM,OAAA;GACN;GACE;GACE,CAAA;YACA,aAAA;GACA;;eAGF,YAAA,WAAA,QAAA;;;;;CAOJ,MAAA,aAAsB,WACpB,SACA,mBACe,cAAA;CAEf,IAAA,CAAA,YAAQ,OAAA,EAAA;CACR,MAAA,KAAO,IAAA,WAAA,aAA+B,OAAO,QAAA;;;;;;;;;;eAaxC,OACH,OAAS,QAAA,UAAA,EAAA,EAAA;CACX,MAAM,EAAA,QAAS,IAAA,WAAW;OAA8B,KAAM,MAAA,MAAA,OAAA;OAAM,UAAU,MAAA,GAAA,OAAA,OAAA;EAAM;EACpF;EAEE,eADgB;kBAGV;EACN,YAAU;;;CAId,OAAA,QAAsB,KAAA,OACpB;EAIA,IAAA,EAAM;EACN,SAAM,EAAK,WAAY;EACvB,OAAM,EAAA;EAAmC,UAAA,EAAA,YAAA,EAAA;EAAO,YAAA,EAAA,OAAA,cAAA,EAAA;EAAQ,WAAA,EAAA,QAAe;EAAM,UAAA,EAAA,QAAgB;EAAM,OAAA,EAAA,QAAY;EAAM,EAAC;;eAI9G,eAAA,OAAA,QAAA,UAAA,EAAA,EAAA;QACN,WAAW,MAAW,OAAA,OAAA,QAAA,QAAA,CAAA;;SAEtB,WAAY,SAAc;QAC1B,QAAc,KAAA,MAAO;EACrB,MAAA,UAAa,iBAAQ,EAAA,QAAA;EACrB,MAAA,SAAY,EAAA,SAAQ,UAAA,EAAA;EACpB,MAAA,QAAS,QAAQ,MAAA,KAAA,CAAA;EAClB,OAAE;;;;;GAML;GAME,OAAO,EAAA;;GAGT,UAAS,EAAA;GACP,OAAO,EAAA;GACL;GACA;;eAGO,SAAA,SAAA;OACL,uBAAoB,IAAW,KAAA;OAC/B,QAAA,IAAA,QAAA,IAAA,OAAA,WAAA;QACA,KAAA,MAAa,MAAA,EAAA,QAAkB,CAAA;OAC/B,IAAA,QAAW,GAAA;GACX,CAAA;QACA;;eAEY,aAAA,OAAA,MAAA,UAAA,EAAA,EAAA;OACZ,EAAA,QAAS,IAAA,WAAA;OACV,aAAA,QAAA;OACD,aAAA,MAAA,QAAA,IAAA,MAAA,KAAA,KAAA,QAAA,EAAA,OAAA,OAAA;;GAKJ,OAAA;GACE;GACA,eAAc;GACZ,gBAAiB;GACjB,YAAS;GACT,CAAC,EAAA,KAAA,OAAA;GACH,IAAA,EAAO;;GAGT,OAAA,EAAA;GAKE,UAAQ,EAAA,YAAY,EAAA;GACpB,YAAM,EAAA,OAAa,cAAQ,EAAA;GAC3B,WAAM,EAAA,QAAa;GAGf,UAAO,EAAA,QADkB;GAAgB,OAAO,EAAA,QAAA;GAAY,EAAA;GAAQ,CAAA;OAAqB,uBAAgB,IAAA,KAAA;QAAM,WAAY,WAAA,MAAA,CAAA,MAAA,GAAA,MAAA,EAAA,QAAA,EAAA,MAAA,CAAA,QAAA,MAAA;QAC5G,KAAI,EAAA;QACb,MAAE,GAAA,EAAA,SAAA,UAAA,EAAA,GAAA,GAAA,KAAA,GAAA,GAAA,KAAA;MACN,KAAA,IAAW,IAAA,EAAA,OAAW;OACtB,IAAO,IAAE;SACT;GACA,CAAA,MAAA,GAAA,MAAc,CAAA;;eAEF,UAAQ,MAAA;OACpB,QAAS,IAAA,MAAQ,KAAA,KAAA,QAAA,GAAA,OAAA,GAAA,SAAA,CAAA,CAAA;MAChB,OAAA;;SAgBA,gBAXQ,GAAA,gBACN,GAAG,SAAQ,GAAA,kBACjB,GAAQ,aAAM,GAAA,YAAA,GAAA,eAAA,GAAA,UAAA,GAAA,8BAAA"}
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
const STATIC_REGEX_1 = /^(`{3,}|~{3,})/;
|
|
2
|
+
const STATIC_REGEX_2 = /^(`{3,}|~{3,})\s*$/;
|
|
3
|
+
const STATIC_REGEX_3 = /^(`{3,}|~{3,})\S/;
|
|
1
4
|
const ZERO_WIDTH_RE = /[\u200B\u200C\uFEFF\u2060\u200D\u061C\u180E\u200E\u200F\u2028\u2029]/gu;
|
|
2
5
|
const HTML_COMMENT_RE = /<!--(?!\s*\/?skilld:)[\s\S]*?-->/g;
|
|
3
6
|
const AGENT_DIRECTIVE_TAGS = [
|
|
@@ -65,7 +68,7 @@ function processOutsideCodeBlocks(content, fn) {
|
|
|
65
68
|
for (const line of lines) {
|
|
66
69
|
const trimmed = line.trimStart();
|
|
67
70
|
if (!inCodeBlock) {
|
|
68
|
-
const match = trimmed.match(
|
|
71
|
+
const match = trimmed.match(STATIC_REGEX_1);
|
|
69
72
|
if (match) {
|
|
70
73
|
flushNonCode();
|
|
71
74
|
inCodeBlock = true;
|
|
@@ -76,7 +79,7 @@ function processOutsideCodeBlocks(content, fn) {
|
|
|
76
79
|
}
|
|
77
80
|
nonCodeBuffer.push(line);
|
|
78
81
|
} else {
|
|
79
|
-
const match = trimmed.match(
|
|
82
|
+
const match = trimmed.match(STATIC_REGEX_2);
|
|
80
83
|
if (match && match[1][0] === fenceChar && match[1].length >= fenceLen) {
|
|
81
84
|
result.push(codeBuffer.join("\n"));
|
|
82
85
|
result.push(line);
|
|
@@ -133,18 +136,18 @@ function closeUnclosedCodeBlocks(content) {
|
|
|
133
136
|
for (const line of lines) {
|
|
134
137
|
const trimmed = line.trimStart();
|
|
135
138
|
if (!inCodeBlock) {
|
|
136
|
-
const match = trimmed.match(
|
|
139
|
+
const match = trimmed.match(STATIC_REGEX_1);
|
|
137
140
|
if (match) {
|
|
138
141
|
inCodeBlock = true;
|
|
139
142
|
fence = match[1][0].repeat(match[1].length);
|
|
140
143
|
}
|
|
141
144
|
} else {
|
|
142
|
-
const match = trimmed.match(
|
|
145
|
+
const match = trimmed.match(STATIC_REGEX_2);
|
|
143
146
|
if (match && match[1][0] === fence[0] && match[1].length >= fence.length) {
|
|
144
147
|
inCodeBlock = false;
|
|
145
148
|
fence = "";
|
|
146
149
|
} else {
|
|
147
|
-
const openMatch = trimmed.match(
|
|
150
|
+
const openMatch = trimmed.match(STATIC_REGEX_3);
|
|
148
151
|
if (openMatch && openMatch[1][0] === fence[0] && openMatch[1].length === fence.length) result.push(fence);
|
|
149
152
|
else if (EMOJI_LINE_START_RE.test(trimmed)) {
|
|
150
153
|
result.push(fence);
|
|
@@ -168,7 +171,7 @@ function cleanupCodeBlocks(content) {
|
|
|
168
171
|
let i = 0;
|
|
169
172
|
while (i < lines.length) {
|
|
170
173
|
const trimmed = lines[i].trimStart();
|
|
171
|
-
const fm = trimmed.match(
|
|
174
|
+
const fm = trimmed.match(STATIC_REGEX_1);
|
|
172
175
|
if (!fm) {
|
|
173
176
|
if (trimmed) prevCodeContent = void 0;
|
|
174
177
|
i++;
|
|
@@ -180,7 +183,7 @@ function cleanupCodeBlocks(content) {
|
|
|
180
183
|
i++;
|
|
181
184
|
let closeIdx = -1;
|
|
182
185
|
while (i < lines.length) {
|
|
183
|
-
const cm = lines[i].trimStart().match(
|
|
186
|
+
const cm = lines[i].trimStart().match(STATIC_REGEX_2);
|
|
184
187
|
if (cm && cm[1][0] === fChar && cm[1].length >= fLen) {
|
|
185
188
|
closeIdx = i;
|
|
186
189
|
i++;
|
|
@@ -205,7 +208,7 @@ function closeUnclosedInlineCode(content) {
|
|
|
205
208
|
return lines.map((line) => {
|
|
206
209
|
const trimmed = line.trimStart();
|
|
207
210
|
if (!inFence) {
|
|
208
|
-
const m = trimmed.match(
|
|
211
|
+
const m = trimmed.match(STATIC_REGEX_1);
|
|
209
212
|
if (m) {
|
|
210
213
|
inFence = true;
|
|
211
214
|
fenceChar = m[1][0];
|
|
@@ -213,7 +216,7 @@ function closeUnclosedInlineCode(content) {
|
|
|
213
216
|
return line;
|
|
214
217
|
}
|
|
215
218
|
} else {
|
|
216
|
-
const m = trimmed.match(
|
|
219
|
+
const m = trimmed.match(STATIC_REGEX_2);
|
|
217
220
|
if (m && m[1][0] === fenceChar && m[1].length >= fenceLen) inFence = false;
|
|
218
221
|
return line;
|
|
219
222
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sanitize.mjs","names":[],"sources":["../../src/core/sanitize.ts"],"sourcesContent":["/**\n * Markdown sanitizer for prompt injection defense.\n *\n * Strips injection vectors from untrusted markdown before it reaches\n * agent-readable files (cached references, SKILL.md, search output).\n *\n * Threat model: agent instruction injection, not browser XSS.\n * Lightweight regex-based — markdown is consumed as text by AI agents.\n */\n\n/** Zero-width and invisible formatting characters used to hide text from human review */\n// eslint-disable-next-line no-misleading-character-class -- intentionally matching individual invisible chars\nconst ZERO_WIDTH_RE = /[\\u200B\\u200C\\uFEFF\\u2060\\u200D\\u061C\\u180E\\u200E\\u200F\\u2028\\u2029]/gu\n\n/** HTML comments (single-line and multi-line), except skilld section markers */\nconst HTML_COMMENT_RE = /<!--(?!\\s*\\/?skilld:)[\\s\\S]*?-->/g\n\n/**\n * Agent directive tags — stripped globally (including inside code blocks).\n * These are never legitimate in any context; they're purely injection vectors.\n */\nconst AGENT_DIRECTIVE_TAGS = [\n 'system',\n 'instructions',\n 'override',\n 'prompt',\n 'context',\n 'role',\n 'user-prompt',\n 'assistant',\n 'tool-use',\n 'tool-result',\n 'tool_call',\n 'tool_response',\n 'tool_result',\n 'system-prompt',\n 'human',\n 'admin',\n]\n\n/**\n * Dangerous HTML tags — stripped only outside fenced code blocks.\n * May appear legitimately in code examples (e.g. `<script setup>` in Vue docs).\n */\nconst DANGEROUS_HTML_TAGS = [\n 'script',\n 'iframe',\n 'style',\n 'meta',\n 'object',\n 'embed',\n 'form',\n]\n/**\n * Decode HTML entity-encoded angle brackets so tag stripping catches encoded variants.\n * Only decodes < and > (named, decimal, hex) — minimal to avoid false positives.\n */\nfunction decodeAngleBracketEntities(text: string): string {\n return text\n .replace(/</gi, '<')\n .replace(/>/gi, '>')\n .replace(/�*60;/g, '<')\n .replace(/�*62;/g, '>')\n .replace(/�*3c;/gi, '<')\n .replace(/�*3e;/gi, '>')\n}\n\n/** Strip paired and standalone instances of the given tag names */\nfunction stripTags(text: string, tags: string[]): string {\n if (!tags.length)\n return text\n const tagGroup = tags.join('|')\n // First strip paired tags with content between them\n const pairedRe = new RegExp(`<(${tagGroup})(\\\\s[^>]*)?>([\\\\s\\\\S]*?)<\\\\/\\\\1>`, 'gi')\n let result = text.replace(pairedRe, '')\n // Then strip any remaining standalone open/close/self-closing tags\n const standaloneRe = new RegExp(`<\\\\/?(${tagGroup})(\\\\s[^>]*)?\\\\/?>`, 'gi')\n result = result.replace(standaloneRe, '')\n return result\n}\n\n/** External image markdown:  or  */\nconst EXTERNAL_IMAGE_RE = /!\\[([^\\]]*)\\]\\(https?:\\/\\/[^)]+\\)/gi\n\n/**\n * External link markdown: [text](https://...) or [text](http://...)\n * Preserves relative links and anchors.\n */\nconst EXTERNAL_LINK_RE = /\\[([^\\]]*)\\]\\((https?:\\/\\/[^)]+)\\)/gi\n\n/** Dangerous URI protocols in links/images — match entire [text](protocol:...) */\nconst DANGEROUS_PROTOCOL_RE = /!?\\[([^\\]]*)\\]\\(\\s*(javascript|data|vbscript|file)\\s*:[^)]*\\)/gi\nconst DANGEROUS_PROTOCOL_ENCODED_RE = /!?\\[([^\\]]*)\\]\\(\\s*(?:(?:j|%6a|%4a)(?:a|%61|%41)(?:v|%76|%56)(?:a|%61|%41)(?:s|%73|%53)(?:c|%63|%43)(?:r|%72|%52)(?:i|%69|%49)(?:p|%70|%50)(?:t|%74|%54)|(?:d|%64|%44)(?:a|%61|%41)(?:t|%74|%54)(?:a|%61|%41)|(?:v|%76|%56)(?:b|%62|%42)(?:s|%73|%53)(?:c|%63|%43)(?:r|%72|%52)(?:i|%69|%49)(?:p|%70|%50)(?:t|%74|%54))\\s*:[^)]*\\)/gi\n\n/** Directive-style lines that look like agent instructions */\nconst DIRECTIVE_LINE_RE = /^[ \\t]*(SYSTEM|OVERRIDE|INSTRUCTION|NOTE TO AI|IGNORE PREVIOUS|IGNORE ALL PREVIOUS|DISREGARD|FORGET ALL|NEW INSTRUCTIONS?|IMPORTANT SYSTEM|ADMIN OVERRIDE)\\s*[:>].*/gim\n\n/** Base64 blob: 100+ chars of pure base64 alphabet on a single line */\nconst BASE64_BLOB_RE = /^[A-Z0-9+/=]{100,}$/gim\n\n/** Unicode escape spam: 4+ consecutive \\uXXXX sequences */\nconst UNICODE_ESCAPE_SPAM_RE = /(\\\\u[\\dA-Fa-f]{4}){4,}/g\n\n/**\n * Claude Code dynamic context: !`command` executes shell commands inline when a skill loads.\n * Matches !` followed by content and closing backtick(s) of same length.\n * Stripped globally — never legitimate in generated skills, always a command injection vector.\n */\nconst DYNAMIC_COMMAND_RE = /!(`+)([^`]+)\\1/g\n\n/** Emoji characters — token-inefficient (2-3x cost), distort embeddings, semantically ambiguous for LLMs */\n// Also strips variation selectors (\\uFE0E text, \\uFE0F emoji) which dangle after emoji removal\nconst EMOJI_RE = /[\\p{Extended_Pictographic}\\uFE0E\\uFE0F]/gu\n\n/**\n * Process content outside of fenced code blocks.\n * Uses a line-by-line state machine to properly track fence boundaries,\n * handling nested fences, mismatched lengths, and mixed backtick/tilde fences.\n * Unclosed fences are treated as non-code for security (prevents bypass via malformed fences).\n */\nexport function processOutsideCodeBlocks(content: string, fn: (text: string) => string): string {\n const lines = content.split('\\n')\n const result: string[] = []\n let nonCodeBuffer: string[] = []\n let codeBuffer: string[] = []\n let inCodeBlock = false\n let fenceChar = ''\n let fenceLen = 0\n\n function flushNonCode() {\n if (nonCodeBuffer.length > 0) {\n result.push(fn(nonCodeBuffer.join('\\n')))\n nonCodeBuffer = []\n }\n }\n\n for (const line of lines) {\n const trimmed = line.trimStart()\n\n if (!inCodeBlock) {\n const match = trimmed.match(/^(`{3,}|~{3,})/)\n if (match) {\n flushNonCode()\n inCodeBlock = true\n fenceChar = match[1]![0]!\n fenceLen = match[1]!.length\n codeBuffer = [line]\n continue\n }\n nonCodeBuffer.push(line)\n }\n else {\n const match = trimmed.match(/^(`{3,}|~{3,})\\s*$/)\n if (match && match[1]![0] === fenceChar && match[1]!.length >= fenceLen) {\n // Properly closed — emit code block as-is\n result.push(codeBuffer.join('\\n'))\n result.push(line)\n codeBuffer = []\n inCodeBlock = false\n fenceChar = ''\n fenceLen = 0\n continue\n }\n codeBuffer.push(line)\n }\n }\n\n flushNonCode()\n\n // Unclosed fence: treat as non-code so sanitization still applies\n if (inCodeBlock && codeBuffer.length > 0) {\n result.push(fn(codeBuffer.join('\\n')))\n }\n\n return result.join('\\n')\n}\n\n/**\n * Sanitize markdown content to strip prompt injection vectors.\n * Applied at every markdown emission point (cache writes, SKILL.md, search output).\n */\nexport function sanitizeMarkdown(content: string): string {\n if (!content)\n return content\n\n // Layer 1: Strip zero-width characters (global, including in code blocks)\n let result = content.replace(ZERO_WIDTH_RE, '')\n\n // Layer 2: Strip dynamic command placeholders globally (!`command` → command injection vector)\n result = result.replace(DYNAMIC_COMMAND_RE, '')\n\n // Layer 3: Strip agent directive tags globally (never legitimate, even in code blocks)\n result = stripTags(result, AGENT_DIRECTIVE_TAGS)\n\n // Layers 4-10: Only outside fenced code blocks\n result = processOutsideCodeBlocks(result, (text) => {\n // Protect inline code spans from tag stripping (e.g. `<script setup>` in Vue docs)\n const inlineCodeSpans: string[] = []\n let t = text.replace(/(`+)([^`]+)\\1/g, (match) => {\n const idx = inlineCodeSpans.length\n inlineCodeSpans.push(match)\n return `\\x00IC${idx}\\x00`\n })\n\n // Layer 4: Strip HTML comments (outside code blocks where they're hidden from review;\n // inside code blocks they render as visible text and are legitimate documentation)\n t = t.replace(HTML_COMMENT_RE, '')\n\n // Layer 5: Decode entities + strip remaining dangerous tags (HTML + entity-encoded agent directives)\n t = decodeAngleBracketEntities(t)\n t = stripTags(t, [...AGENT_DIRECTIVE_TAGS, ...DANGEROUS_HTML_TAGS])\n\n // Layer 6: Strip external images (exfil via query params)\n t = t.replace(EXTERNAL_IMAGE_RE, '')\n\n // Layer 7: Convert external links to plain text\n t = t.replace(EXTERNAL_LINK_RE, '$1')\n\n // Layer 8: Strip dangerous protocols (raw and URL-encoded)\n t = t.replace(DANGEROUS_PROTOCOL_RE, '')\n t = t.replace(DANGEROUS_PROTOCOL_ENCODED_RE, '')\n\n // Layer 9: Strip directive-style lines\n t = t.replace(DIRECTIVE_LINE_RE, '')\n\n // Layer 10: Strip encoded payloads\n t = t.replace(BASE64_BLOB_RE, '')\n t = t.replace(UNICODE_ESCAPE_SPAM_RE, '')\n\n // Layer 11: Strip emoji (token-inefficient, distort embeddings, semantically ambiguous)\n t = t.replace(EMOJI_RE, '')\n\n // Restore inline code spans\n t = t.replace(/\\0IC(\\d+)\\0/g, (_, idx) => inlineCodeSpans[Number(idx)] || '')\n\n return t\n })\n\n return result\n}\n\n// --- Markdown repair ---\n\n/** Heading missing space after #: `##Heading` → `## Heading` */\nconst HEADING_NO_SPACE_RE = /^(#{1,6})([^\\s#])/gm\n\n/** 3+ consecutive blank lines → 2 */\nconst EXCESSIVE_BLANKS_RE = /\\n{4,}/g\n\n/** Trailing whitespace on lines (preserve intentional double-space line breaks) */\nconst TRAILING_WHITESPACE_RE = /[ \\t]+$/gm\n\n/** Emoji at start of line inside a code block — LLM forgot to close the block */\nconst EMOJI_LINE_START_RE = /^\\p{Extended_Pictographic}/u\n\n/**\n * Close unclosed fenced code blocks.\n * Walks line-by-line tracking open/close state.\n */\nfunction closeUnclosedCodeBlocks(content: string): string {\n const lines = content.split('\\n')\n const result: string[] = []\n let inCodeBlock = false\n let fence = ''\n\n for (const line of lines) {\n const trimmed = line.trimStart()\n if (!inCodeBlock) {\n const match = trimmed.match(/^(`{3,}|~{3,})/)\n if (match) {\n inCodeBlock = true\n fence = match[1]![0]!.repeat(match[1]!.length)\n }\n }\n else {\n // Check for closing fence (same char, at least same length)\n const match = trimmed.match(/^(`{3,}|~{3,})\\s*$/)\n if (match && match[1]![0] === fence[0] && match[1]!.length >= fence.length) {\n inCodeBlock = false\n fence = ''\n }\n else {\n // New fence opener inside unclosed block (same char, same length, with lang tag)\n // LLMs commonly forget to close a code block before starting a new one\n const openMatch = trimmed.match(/^(`{3,}|~{3,})\\S/)\n if (openMatch && openMatch[1]![0] === fence[0] && openMatch[1]!.length === fence.length) {\n result.push(fence)\n // fence char/length stays the same since both match\n }\n // Emoji at line start → LLM forgot to close code block before markdown content\n else if (EMOJI_LINE_START_RE.test(trimmed)) {\n result.push(fence)\n inCodeBlock = false\n fence = ''\n }\n }\n }\n result.push(line)\n }\n\n // If still inside a code block, close it\n if (inCodeBlock) {\n // Ensure trailing newline before closing fence\n if (result.length > 0 && result.at(-1) !== '')\n result.push('')\n result.push(fence)\n }\n\n return result.join('\\n')\n}\n\n/**\n * Remove empty code blocks and deduplicate consecutive identical code blocks.\n * Empty blocks arise when emoji/fence recovery leaves orphaned fences.\n * Duplicate blocks arise when LLMs repeat the same code example.\n */\nfunction cleanupCodeBlocks(content: string): string {\n const lines = content.split('\\n')\n const toRemove = new Set<number>()\n let prevCodeContent: string | undefined\n let i = 0\n\n while (i < lines.length) {\n const trimmed = lines[i]!.trimStart()\n const fm = trimmed.match(/^(`{3,}|~{3,})/)\n if (!fm) {\n // Non-blank text between code blocks resets dedup tracking\n if (trimmed)\n prevCodeContent = undefined\n i++\n continue\n }\n\n const fChar = fm[1]![0]!\n const fLen = fm[1]!.length\n const openIdx = i\n i++\n\n let closeIdx = -1\n while (i < lines.length) {\n const ct = lines[i]!.trimStart()\n const cm = ct.match(/^(`{3,}|~{3,})\\s*$/)\n if (cm && cm[1]![0] === fChar && cm[1]!.length >= fLen) {\n closeIdx = i\n i++\n break\n }\n i++\n }\n\n if (closeIdx === -1)\n continue\n\n const inner = lines.slice(openIdx + 1, closeIdx).join('\\n').trim()\n\n if (!inner) {\n for (let j = openIdx; j <= closeIdx; j++) toRemove.add(j)\n }\n else if (inner === prevCodeContent) {\n for (let j = openIdx; j <= closeIdx; j++) toRemove.add(j)\n }\n else {\n prevCodeContent = inner\n }\n }\n\n if (!toRemove.size)\n return content\n return lines.filter((_, idx) => !toRemove.has(idx)).join('\\n')\n}\n\n/**\n * Close unclosed inline code spans.\n * Scans each line for unmatched backtick(s) and appends closing backtick(s).\n * Tracks fenced code blocks internally to handle any fence length.\n */\nfunction closeUnclosedInlineCode(content: string): string {\n const lines = content.split('\\n')\n let inFence = false\n let fenceChar = ''\n let fenceLen = 0\n\n return lines.map((line) => {\n const trimmed = line.trimStart()\n if (!inFence) {\n const m = trimmed.match(/^(`{3,}|~{3,})/)\n if (m) {\n inFence = true\n fenceChar = m[1]![0]!\n fenceLen = m[1]!.length\n return line\n }\n }\n else {\n const m = trimmed.match(/^(`{3,}|~{3,})\\s*$/)\n if (m && m[1]![0] === fenceChar && m[1]!.length >= fenceLen) {\n inFence = false\n }\n return line\n }\n\n // Outside fenced code blocks — fix unclosed inline backticks\n let i = 0\n while (i < line.length) {\n if (line[i] === '`') {\n const seqStart = i\n while (i < line.length && line[i] === '`') i++\n const seqLen = i - seqStart\n let found = false\n let j = i\n while (j < line.length) {\n if (line[j] === '`') {\n const closeStart = j\n while (j < line.length && line[j] === '`') j++\n if (j - closeStart === seqLen) {\n found = true\n i = j\n break\n }\n }\n else {\n j++\n }\n }\n if (!found) {\n line = `${line}${'`'.repeat(seqLen)}`\n i = line.length\n }\n }\n else {\n i++\n }\n }\n return line\n }).join('\\n')\n}\n\n/**\n * Repair broken markdown syntax.\n * Fixes common issues in fetched documentation:\n * - Unclosed fenced code blocks\n * - Unclosed inline code spans\n * - Missing space after heading # markers\n * - Excessive consecutive blank lines\n * - Trailing whitespace\n */\nexport function repairMarkdown(content: string): string {\n if (!content)\n return content\n\n let result = content\n\n // Fix unclosed fenced code blocks (must run before other line-level fixes)\n result = closeUnclosedCodeBlocks(result)\n\n // Remove empty and duplicate code blocks (artifacts from fence recovery)\n result = cleanupCodeBlocks(result)\n\n // Fix unclosed inline code spans\n result = closeUnclosedInlineCode(result)\n\n // Fix heading spacing (only outside code blocks)\n result = processOutsideCodeBlocks(result, text =>\n text.replace(HEADING_NO_SPACE_RE, '$1 $2'))\n\n // Normalize excessive blank lines\n result = result.replace(EXCESSIVE_BLANKS_RE, '\\n\\n\\n')\n\n // Strip trailing whitespace\n result = result.replace(TRAILING_WHITESPACE_RE, '')\n\n return result\n}\n"],"mappings":";;;;;;CAYA;;CAGA;;;;;CAMA;CACE;CACA;CACA;CACA;CACA;CACA;MAEA,sBAAA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;;;;AAOF,SAAM,UAAA,MAAA,MAAsB;CAC1B,IAAA,CAAA,KAAA,QAAA,OAAA;CACA,MAAA,WAAA,KAAA,KAAA,IAAA;CACA,MAAA,WAAA,IAAA,OAAA,KAAA,SAAA,oCAAA,KAAA;CACA,IAAA,SAAA,KAAA,QAAA,UAAA,GAAA;CACA,MAAA,eAAA,IAAA,OAAA,SAAA,SAAA,oBAAA,KAAA;CACA,SAAA,OAAA,QAAA,cAAA,GAAA;CACA,OAAA;;;;MAOA,wBACW;;AASb,MAAA,oBAAiC;MAG/B,iBAAsB;MAGlB,yBAAsB;MAG1B,qBAAwB;;AAK1B,SAAM,yBAAoB,SAAA,IAAA;;;;;CAM1B,IAAA,cAAM;;CAGN,IAAA,WAAM;CACN,SAAM,eAAA;;GAGN,OAAM,KAAA,GAAA,cAAoB,KAAA,KAAA,CAAA,CAAA;;;;CAM1B,KAAM,MAAA,QAAA,OAAA;;;;;;IAON,cAAM;;IAIN,WAAM,MAAW,GAAA;;;;;;;GAQjB,IAAA,SAAgB,MAAA,GAAA,OAAA,aAA0C,MAAsC,GAAA,UAAA,UAAA;IAC9F,OAAM,KAAQ,WAAQ,KAAM,KAAK,CAAA;IACjC,OAAM,KAAmB,KAAE;IAC3B,aAAI,EAAA;IACJ,cAA2B;IAC3B,YAAI;IACJ,WAAI;IACJ;;GAGE,WAAI,KAAA,KAAc;;;;;CAMpB,OAAK,OAAM,KAAQ,KAAA;;SAIT,iBAAgB,SAAM;KAC5B,CAAA,SAAW,OAAA;KACT,SAAA,QAAc,QAAA,eAAA,GAAA;UACd,OAAA,QAAc,oBAAA,GAAA;UACd,UAAY,QAAU,qBAAA;UACtB,yBAAqB,SAAA,SAAA;QACrB,kBAAmB,EAAA;MACnB,IAAA,KAAA,QAAA,mBAAA,UAAA;;GAEF,gBAAc,KAAK,MAAK;UAErB,SAAA,IAAA;IACH;MACA,EAAI,QAAS,iBAAiB,GAAA;MAE5B,2BAA4B,EAAA;MAC5B,UAAY,GAAA,CAAA,GAAK,sBAAA,GAAA,oBAAA,CAAA;MACjB,EAAA,QAAA,mBAAe,GAAA;MACf,EAAA,QAAA,kBAAc,KAAA;MACd,EAAA,QAAY,uBAAA,GAAA;MACZ,EAAA,QAAW,+BAAA,GAAA;MACX,EAAA,QAAA,mBAAA,GAAA;;MAEF,EAAA,QAAW,wBAAU,GAAA;;;EAIzB,OAAA;GAGA;CAIA,OAAO;;;;MAQF,yBACI;MAMT,sBAAwB;SAMxB,wBAAS,SAAyB;OAEhC,QAAM,QAAA,MAA8B,KAAA;OAChC,SAAS,EAAA;KACX,cAAY;KACZ,QAAA;MACA,MAAO,QAAS,OAAI;QACpB,UAAA,KAAA,WAAA;EAIF,IAAI,CAAA,aAAU;GAGd,MAAI,QAAA,QAAA,MAAA,iBAA6B;GACjC,IAAI,OAAA;IAGJ,cAAc;IAGd,QAAM,MAAQ,GAAA,GAAA,OAAA,MAAkB,GAAK,OAAA;;SAI/B;GAGN,MAAM,QAAQ,QAAA,MAAA,qBAAsB;GAGpC,IAAI,SAAU,MAAA,GAAA,OAAgB,MAAG,MAAA,MAAA,GAAA,UAAA,MAAA,QAAA;IACjC,cAAc;IAGd,QAAM;UAGA;IAEN,MAAO,YAAA,QAAA,MAAA,mBAAA;IACP,IAAA,aAAA,UAAA,GAAA,OAAA,MAAA,MAAA,UAAA,GAAA,WAAA,MAAA,QAAA,OAAA,KAAA,MAAA;SAEK,IAAA,oBAAA,KAAA,QAAA,EAAA;;;KAMT,QAAM;;;;EAMN,OAAM,KAAA,KAAA;;CAGN,IAAA,aAAM;;;;;;SAQE,kBAAqB,SAAA;CAC3B,MAAI,QAAA,QAAc,MAAA,KAAA;CAClB,MAAI,2BAAQ,IAAA,KAAA;CAEZ,IAAA;KACE,IAAM;QACD,IAAA,MAAA,QAAa;QAChB,UAAc,MAAA,GAAQ,WAAM;QACxB,KAAA,QAAO,MAAA,iBAAA;MACT,CAAA,IAAA;OACA,SAAQ,kBAAqB,KAAM;;;;QAMjC,QAAS,GAAA,GAAM;QACjB,OAAA,GAAc,GAAA;QACd,UAAQ;;MAKR,WAAM;SACF,IAAA,MAAA,QAAa;SAKZ,KAAI,MAAA,GAAA,WAAoB,CAAA,MAAK,qBAAU;OAC1C,MAAO,GAAA,GAAK,OAAM,SAAA,GAAA,GAAA,UAAA,MAAA;eAClB;;;;;;;EASR,MAAI,QAAA,MAAa,MAAA,UAAA,GAAA,SAAA,CAAA,KAAA,KAAA,CAAA,MAAA;EAEf,IAAI,CAAA,OAAO,KAAA,IAAS,IAAK,SAAO,KAAM,UACpC,KAAO,SAAQ,IAAA,EAAA;OACjB,IAAO,UAAW,iBAAA,KAAA,IAAA,IAAA,SAAA,KAAA,UAAA,KAAA,SAAA,IAAA,EAAA;;;;;;;;CAWtB,IAAA,UAAS;CACP,IAAA,YAAc;CACd,IAAA,WAAM;CACN,OAAI,MAAA,KAAA,SAAA;EACJ,MAAI,UAAI,KAAA,WAAA;EAER,IAAA,CAAA,SAAW;GACT,MAAM,IAAA,QAAU,MAAU,iBAAW;GACrC,IAAA,GAAM;IACN,UAAS;IAEP,YACE,EAAA,GAAA;IACF,WAAA,EAAA,GAAA;IACA,OAAA;;SAGI;GACN,MAAM,IAAA,QAAc,MAAA,qBAAA;GACpB,IAAA,KAAM,EAAA,GAAA,OAAU,aAAA,EAAA,GAAA,UAAA,UAAA,UAAA;GAChB,OAAA;;EAGA,IAAA,IAAO;SAEC,IAAA,KADK,QAAU,IAAA,KACP,OAAM,KAAA;GACpB,MAAI,WAAa;UACf,IAAA,KAAW,UAAA,KAAA,OAAA,KAAA;SACX,SAAA,IAAA;OACA,QAAA;;GAEF,OAAA,IAAA,KAAA,QAAA,IAAA,KAAA,OAAA,KAAA;;IAGF,OAAI,IAAA,KAAa,UACf,KAAA,OAAA,KAAA;IAEF,IAAM,IAAA,eAAc,QAAM;KAE1B,QAAK;SAGA;;;UAQF;GAEL,IAAA,CAAO,OAAM;;;;;;;;SAUT,eAAU,SAAA;CACd,IAAI,CAAA,SAAA,OAAY;CAChB,IAAI,SAAA;CAEJ,SAAO,wBAAoB,OAAA;UACnB,kBAAe,OAAW;UAC3B,wBAAS,OAAA;UACN,yBAAkB,SAAiB,SAAA,KAAA,QAAA,qBAAA,QAAA,CAAA;UAClC,OAAA,QAAA,qBAAA,SAAA;UACL,OAAU,QAAA,wBAAA,GAAA;QACV"}
|
|
1
|
+
{"version":3,"file":"sanitize.mjs","names":[],"sources":["../../src/core/sanitize.ts"],"sourcesContent":["const STATIC_REGEX_1 = /^(`{3,}|~{3,})/\nconst STATIC_REGEX_2 = /^(`{3,}|~{3,})\\s*$/\nconst STATIC_REGEX_3 = /^(`{3,}|~{3,})\\S/\n\n/**\n * Markdown sanitizer for prompt injection defense.\n *\n * Strips injection vectors from untrusted markdown before it reaches\n * agent-readable files (cached references, SKILL.md, search output).\n *\n * Threat model: agent instruction injection, not browser XSS.\n * Lightweight regex-based — markdown is consumed as text by AI agents.\n */\n\n/** Zero-width and invisible formatting characters used to hide text from human review */\n// eslint-disable-next-line no-misleading-character-class -- intentionally matching individual invisible chars\nconst ZERO_WIDTH_RE = /[\\u200B\\u200C\\uFEFF\\u2060\\u200D\\u061C\\u180E\\u200E\\u200F\\u2028\\u2029]/gu\n\n/** HTML comments (single-line and multi-line), except skilld section markers */\nconst HTML_COMMENT_RE = /<!--(?!\\s*\\/?skilld:)[\\s\\S]*?-->/g\n\n/**\n * Agent directive tags — stripped globally (including inside code blocks).\n * These are never legitimate in any context; they're purely injection vectors.\n */\nconst AGENT_DIRECTIVE_TAGS = [\n 'system',\n 'instructions',\n 'override',\n 'prompt',\n 'context',\n 'role',\n 'user-prompt',\n 'assistant',\n 'tool-use',\n 'tool-result',\n 'tool_call',\n 'tool_response',\n 'tool_result',\n 'system-prompt',\n 'human',\n 'admin',\n]\n\n/**\n * Dangerous HTML tags — stripped only outside fenced code blocks.\n * May appear legitimately in code examples (e.g. `<script setup>` in Vue docs).\n */\nconst DANGEROUS_HTML_TAGS = [\n 'script',\n 'iframe',\n 'style',\n 'meta',\n 'object',\n 'embed',\n 'form',\n]\n/**\n * Decode HTML entity-encoded angle brackets so tag stripping catches encoded variants.\n * Only decodes < and > (named, decimal, hex) — minimal to avoid false positives.\n */\nfunction decodeAngleBracketEntities(text: string): string {\n return text\n .replace(/</gi, '<')\n .replace(/>/gi, '>')\n .replace(/�*60;/g, '<')\n .replace(/�*62;/g, '>')\n .replace(/�*3c;/gi, '<')\n .replace(/�*3e;/gi, '>')\n}\n\n/** Strip paired and standalone instances of the given tag names */\nfunction stripTags(text: string, tags: string[]): string {\n if (!tags.length)\n return text\n const tagGroup = tags.join('|')\n // First strip paired tags with content between them\n const pairedRe = new RegExp(`<(${tagGroup})(\\\\s[^>]*)?>([\\\\s\\\\S]*?)<\\\\/\\\\1>`, 'gi')\n let result = text.replace(pairedRe, '')\n // Then strip any remaining standalone open/close/self-closing tags\n const standaloneRe = new RegExp(`<\\\\/?(${tagGroup})(\\\\s[^>]*)?\\\\/?>`, 'gi')\n result = result.replace(standaloneRe, '')\n return result\n}\n\n/** External image markdown:  or  */\nconst EXTERNAL_IMAGE_RE = /!\\[([^\\]]*)\\]\\(https?:\\/\\/[^)]+\\)/gi\n\n/**\n * External link markdown: [text](https://...) or [text](http://...)\n * Preserves relative links and anchors.\n */\nconst EXTERNAL_LINK_RE = /\\[([^\\]]*)\\]\\((https?:\\/\\/[^)]+)\\)/gi\n\n/** Dangerous URI protocols in links/images — match entire [text](protocol:...) */\nconst DANGEROUS_PROTOCOL_RE = /!?\\[([^\\]]*)\\]\\(\\s*(javascript|data|vbscript|file)\\s*:[^)]*\\)/gi\nconst DANGEROUS_PROTOCOL_ENCODED_RE = /!?\\[([^\\]]*)\\]\\(\\s*(?:(?:j|%6a|%4a)(?:a|%61|%41)(?:v|%76|%56)(?:a|%61|%41)(?:s|%73|%53)(?:c|%63|%43)(?:r|%72|%52)(?:i|%69|%49)(?:p|%70|%50)(?:t|%74|%54)|(?:d|%64|%44)(?:a|%61|%41)(?:t|%74|%54)(?:a|%61|%41)|(?:v|%76|%56)(?:b|%62|%42)(?:s|%73|%53)(?:c|%63|%43)(?:r|%72|%52)(?:i|%69|%49)(?:p|%70|%50)(?:t|%74|%54))\\s*:[^)]*\\)/gi\n\n/** Directive-style lines that look like agent instructions */\nconst DIRECTIVE_LINE_RE = /^[ \\t]*(SYSTEM|OVERRIDE|INSTRUCTION|NOTE TO AI|IGNORE PREVIOUS|IGNORE ALL PREVIOUS|DISREGARD|FORGET ALL|NEW INSTRUCTIONS?|IMPORTANT SYSTEM|ADMIN OVERRIDE)\\s*[:>].*/gim\n\n/** Base64 blob: 100+ chars of pure base64 alphabet on a single line */\nconst BASE64_BLOB_RE = /^[A-Z0-9+/=]{100,}$/gim\n\n/** Unicode escape spam: 4+ consecutive \\uXXXX sequences */\nconst UNICODE_ESCAPE_SPAM_RE = /(\\\\u[\\dA-Fa-f]{4}){4,}/g\n\n/**\n * Claude Code dynamic context: !`command` executes shell commands inline when a skill loads.\n * Matches !` followed by content and closing backtick(s) of same length.\n * Stripped globally — never legitimate in generated skills, always a command injection vector.\n */\nconst DYNAMIC_COMMAND_RE = /!(`+)([^`]+)\\1/g\n\n/** Emoji characters — token-inefficient (2-3x cost), distort embeddings, semantically ambiguous for LLMs */\n// Also strips variation selectors (\\uFE0E text, \\uFE0F emoji) which dangle after emoji removal\nconst EMOJI_RE = /[\\p{Extended_Pictographic}\\uFE0E\\uFE0F]/gu\n\n/**\n * Process content outside of fenced code blocks.\n * Uses a line-by-line state machine to properly track fence boundaries,\n * handling nested fences, mismatched lengths, and mixed backtick/tilde fences.\n * Unclosed fences are treated as non-code for security (prevents bypass via malformed fences).\n */\nexport function processOutsideCodeBlocks(content: string, fn: (text: string) => string): string {\n const lines = content.split('\\n')\n const result: string[] = []\n let nonCodeBuffer: string[] = []\n let codeBuffer: string[] = []\n let inCodeBlock = false\n let fenceChar = ''\n let fenceLen = 0\n\n function flushNonCode() {\n if (nonCodeBuffer.length > 0) {\n result.push(fn(nonCodeBuffer.join('\\n')))\n nonCodeBuffer = []\n }\n }\n\n for (const line of lines) {\n const trimmed = line.trimStart()\n\n if (!inCodeBlock) {\n const match = trimmed.match(STATIC_REGEX_1)\n if (match) {\n flushNonCode()\n inCodeBlock = true\n fenceChar = match[1]![0]!\n fenceLen = match[1]!.length\n codeBuffer = [line]\n continue\n }\n nonCodeBuffer.push(line)\n }\n else {\n const match = trimmed.match(STATIC_REGEX_2)\n if (match && match[1]![0] === fenceChar && match[1]!.length >= fenceLen) {\n // Properly closed — emit code block as-is\n result.push(codeBuffer.join('\\n'))\n result.push(line)\n codeBuffer = []\n inCodeBlock = false\n fenceChar = ''\n fenceLen = 0\n continue\n }\n codeBuffer.push(line)\n }\n }\n\n flushNonCode()\n\n // Unclosed fence: treat as non-code so sanitization still applies\n if (inCodeBlock && codeBuffer.length > 0) {\n result.push(fn(codeBuffer.join('\\n')))\n }\n\n return result.join('\\n')\n}\n\n/**\n * Sanitize markdown content to strip prompt injection vectors.\n * Applied at every markdown emission point (cache writes, SKILL.md, search output).\n */\nexport function sanitizeMarkdown(content: string): string {\n if (!content)\n return content\n\n // Layer 1: Strip zero-width characters (global, including in code blocks)\n let result = content.replace(ZERO_WIDTH_RE, '')\n\n // Layer 2: Strip dynamic command placeholders globally (!`command` → command injection vector)\n result = result.replace(DYNAMIC_COMMAND_RE, '')\n\n // Layer 3: Strip agent directive tags globally (never legitimate, even in code blocks)\n result = stripTags(result, AGENT_DIRECTIVE_TAGS)\n\n // Layers 4-10: Only outside fenced code blocks\n result = processOutsideCodeBlocks(result, (text) => {\n // Protect inline code spans from tag stripping (e.g. `<script setup>` in Vue docs)\n const inlineCodeSpans: string[] = []\n let t = text.replace(/(`+)([^`]+)\\1/g, (match) => {\n const idx = inlineCodeSpans.length\n inlineCodeSpans.push(match)\n return `\\x00IC${idx}\\x00`\n })\n\n // Layer 4: Strip HTML comments (outside code blocks where they're hidden from review;\n // inside code blocks they render as visible text and are legitimate documentation)\n t = t.replace(HTML_COMMENT_RE, '')\n\n // Layer 5: Decode entities + strip remaining dangerous tags (HTML + entity-encoded agent directives)\n t = decodeAngleBracketEntities(t)\n t = stripTags(t, [...AGENT_DIRECTIVE_TAGS, ...DANGEROUS_HTML_TAGS])\n\n // Layer 6: Strip external images (exfil via query params)\n t = t.replace(EXTERNAL_IMAGE_RE, '')\n\n // Layer 7: Convert external links to plain text\n t = t.replace(EXTERNAL_LINK_RE, '$1')\n\n // Layer 8: Strip dangerous protocols (raw and URL-encoded)\n t = t.replace(DANGEROUS_PROTOCOL_RE, '')\n t = t.replace(DANGEROUS_PROTOCOL_ENCODED_RE, '')\n\n // Layer 9: Strip directive-style lines\n t = t.replace(DIRECTIVE_LINE_RE, '')\n\n // Layer 10: Strip encoded payloads\n t = t.replace(BASE64_BLOB_RE, '')\n t = t.replace(UNICODE_ESCAPE_SPAM_RE, '')\n\n // Layer 11: Strip emoji (token-inefficient, distort embeddings, semantically ambiguous)\n t = t.replace(EMOJI_RE, '')\n\n // Restore inline code spans\n t = t.replace(/\\0IC(\\d+)\\0/g, (_, idx) => inlineCodeSpans[Number(idx)] || '')\n\n return t\n })\n\n return result\n}\n\n// --- Markdown repair ---\n\n/** Heading missing space after #: `##Heading` → `## Heading` */\nconst HEADING_NO_SPACE_RE = /^(#{1,6})([^\\s#])/gm\n\n/** 3+ consecutive blank lines → 2 */\nconst EXCESSIVE_BLANKS_RE = /\\n{4,}/g\n\n/** Trailing whitespace on lines (preserve intentional double-space line breaks) */\nconst TRAILING_WHITESPACE_RE = /[ \\t]+$/gm\n\n/** Emoji at start of line inside a code block — LLM forgot to close the block */\nconst EMOJI_LINE_START_RE = /^\\p{Extended_Pictographic}/u\n\n/**\n * Close unclosed fenced code blocks.\n * Walks line-by-line tracking open/close state.\n */\nfunction closeUnclosedCodeBlocks(content: string): string {\n const lines = content.split('\\n')\n const result: string[] = []\n let inCodeBlock = false\n let fence = ''\n\n for (const line of lines) {\n const trimmed = line.trimStart()\n if (!inCodeBlock) {\n const match = trimmed.match(STATIC_REGEX_1)\n if (match) {\n inCodeBlock = true\n fence = match[1]![0]!.repeat(match[1]!.length)\n }\n }\n else {\n // Check for closing fence (same char, at least same length)\n const match = trimmed.match(STATIC_REGEX_2)\n if (match && match[1]![0] === fence[0] && match[1]!.length >= fence.length) {\n inCodeBlock = false\n fence = ''\n }\n else {\n // New fence opener inside unclosed block (same char, same length, with lang tag)\n // LLMs commonly forget to close a code block before starting a new one\n const openMatch = trimmed.match(STATIC_REGEX_3)\n if (openMatch && openMatch[1]![0] === fence[0] && openMatch[1]!.length === fence.length) {\n result.push(fence)\n // fence char/length stays the same since both match\n }\n // Emoji at line start → LLM forgot to close code block before markdown content\n else if (EMOJI_LINE_START_RE.test(trimmed)) {\n result.push(fence)\n inCodeBlock = false\n fence = ''\n }\n }\n }\n result.push(line)\n }\n\n // If still inside a code block, close it\n if (inCodeBlock) {\n // Ensure trailing newline before closing fence\n if (result.length > 0 && result.at(-1) !== '')\n result.push('')\n result.push(fence)\n }\n\n return result.join('\\n')\n}\n\n/**\n * Remove empty code blocks and deduplicate consecutive identical code blocks.\n * Empty blocks arise when emoji/fence recovery leaves orphaned fences.\n * Duplicate blocks arise when LLMs repeat the same code example.\n */\nfunction cleanupCodeBlocks(content: string): string {\n const lines = content.split('\\n')\n const toRemove = new Set<number>()\n let prevCodeContent: string | undefined\n let i = 0\n\n while (i < lines.length) {\n const trimmed = lines[i]!.trimStart()\n const fm = trimmed.match(STATIC_REGEX_1)\n if (!fm) {\n // Non-blank text between code blocks resets dedup tracking\n if (trimmed)\n prevCodeContent = undefined\n i++\n continue\n }\n\n const fChar = fm[1]![0]!\n const fLen = fm[1]!.length\n const openIdx = i\n i++\n\n let closeIdx = -1\n while (i < lines.length) {\n const ct = lines[i]!.trimStart()\n const cm = ct.match(STATIC_REGEX_2)\n if (cm && cm[1]![0] === fChar && cm[1]!.length >= fLen) {\n closeIdx = i\n i++\n break\n }\n i++\n }\n\n if (closeIdx === -1)\n continue\n\n const inner = lines.slice(openIdx + 1, closeIdx).join('\\n').trim()\n\n if (!inner) {\n for (let j = openIdx; j <= closeIdx; j++) toRemove.add(j)\n }\n else if (inner === prevCodeContent) {\n for (let j = openIdx; j <= closeIdx; j++) toRemove.add(j)\n }\n else {\n prevCodeContent = inner\n }\n }\n\n if (!toRemove.size)\n return content\n return lines.filter((_, idx) => !toRemove.has(idx)).join('\\n')\n}\n\n/**\n * Close unclosed inline code spans.\n * Scans each line for unmatched backtick(s) and appends closing backtick(s).\n * Tracks fenced code blocks internally to handle any fence length.\n */\nfunction closeUnclosedInlineCode(content: string): string {\n const lines = content.split('\\n')\n let inFence = false\n let fenceChar = ''\n let fenceLen = 0\n\n return lines.map((line) => {\n const trimmed = line.trimStart()\n if (!inFence) {\n const m = trimmed.match(STATIC_REGEX_1)\n if (m) {\n inFence = true\n fenceChar = m[1]![0]!\n fenceLen = m[1]!.length\n return line\n }\n }\n else {\n const m = trimmed.match(STATIC_REGEX_2)\n if (m && m[1]![0] === fenceChar && m[1]!.length >= fenceLen) {\n inFence = false\n }\n return line\n }\n\n // Outside fenced code blocks — fix unclosed inline backticks\n let i = 0\n while (i < line.length) {\n if (line[i] === '`') {\n const seqStart = i\n while (i < line.length && line[i] === '`') i++\n const seqLen = i - seqStart\n let found = false\n let j = i\n while (j < line.length) {\n if (line[j] === '`') {\n const closeStart = j\n while (j < line.length && line[j] === '`') j++\n if (j - closeStart === seqLen) {\n found = true\n i = j\n break\n }\n }\n else {\n j++\n }\n }\n if (!found) {\n line = `${line}${'`'.repeat(seqLen)}`\n i = line.length\n }\n }\n else {\n i++\n }\n }\n return line\n }).join('\\n')\n}\n\n/**\n * Repair broken markdown syntax.\n * Fixes common issues in fetched documentation:\n * - Unclosed fenced code blocks\n * - Unclosed inline code spans\n * - Missing space after heading # markers\n * - Excessive consecutive blank lines\n * - Trailing whitespace\n */\nexport function repairMarkdown(content: string): string {\n if (!content)\n return content\n\n let result = content\n\n // Fix unclosed fenced code blocks (must run before other line-level fixes)\n result = closeUnclosedCodeBlocks(result)\n\n // Remove empty and duplicate code blocks (artifacts from fence recovery)\n result = cleanupCodeBlocks(result)\n\n // Fix unclosed inline code spans\n result = closeUnclosedInlineCode(result)\n\n // Fix heading spacing (only outside code blocks)\n result = processOutsideCodeBlocks(result, text =>\n text.replace(HEADING_NO_SPACE_RE, '$1 $2'))\n\n // Normalize excessive blank lines\n result = result.replace(EXCESSIVE_BLANKS_RE, '\\n\\n\\n')\n\n // Strip trailing whitespace\n result = result.replace(TRAILING_WHITESPACE_RE, '')\n\n return result\n}\n"],"mappings":"AAAA,MAAM,iBAAiB;AACvB,MAAM,iBAAiB;AACvB,MAAM,iBAAiB;;;;;;;CAcvB;;CAGA;;;;;CAMA;CACE;CACA;CACA;CACA;CACA;CACA;MAEA,sBAAA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;CACA;;;;AAOF,SAAM,UAAA,MAAA,MAAsB;CAC1B,IAAA,CAAA,KAAA,QAAA,OAAA;CACA,MAAA,WAAA,KAAA,KAAA,IAAA;CACA,MAAA,WAAA,IAAA,OAAA,KAAA,SAAA,oCAAA,KAAA;CACA,IAAA,SAAA,KAAA,QAAA,UAAA,GAAA;CACA,MAAA,eAAA,IAAA,OAAA,SAAA,SAAA,oBAAA,KAAA;CACA,SAAA,OAAA,QAAA,cAAA,GAAA;CACA,OAAA;;;;MAOA,wBACW;;AASb,MAAA,oBAAiC;MAG/B,iBAAsB;MAGlB,yBAAsB;MAG1B,qBAAwB;;AAK1B,SAAM,yBAAoB,SAAA,IAAA;;;;;CAM1B,IAAA,cAAM;;CAGN,IAAA,WAAM;CACN,SAAM,eAAA;;GAGN,OAAM,KAAA,GAAA,cAAoB,KAAA,KAAA,CAAA,CAAA;;;;CAM1B,KAAM,MAAA,QAAA,OAAA;;;;;;IAON,cAAM;;IAIN,WAAM,MAAW,GAAA;;;;;;;GAQjB,IAAA,SAAgB,MAAA,GAAA,OAAA,aAA0C,MAAsC,GAAA,UAAA,UAAA;IAC9F,OAAM,KAAQ,WAAQ,KAAM,KAAK,CAAA;IACjC,OAAM,KAAmB,KAAE;IAC3B,aAAI,EAAA;IACJ,cAA2B;IAC3B,YAAI;IACJ,WAAI;IACJ;;GAGE,WAAI,KAAA,KAAc;;;;;CAMpB,OAAK,OAAM,KAAQ,KAAA;;SAIT,iBAAgB,SAAM;KAC5B,CAAA,SAAW,OAAA;KACT,SAAA,QAAc,QAAA,eAAA,GAAA;UACd,OAAA,QAAc,oBAAA,GAAA;UACd,UAAY,QAAU,qBAAA;UACtB,yBAAqB,SAAA,SAAA;QACrB,kBAAmB,EAAA;MACnB,IAAA,KAAA,QAAA,mBAAA,UAAA;;GAEF,gBAAc,KAAK,MAAK;UAErB,SAAA,IAAA;IACH;MACA,EAAI,QAAS,iBAAiB,GAAA;MAE5B,2BAA4B,EAAA;MAC5B,UAAY,GAAA,CAAA,GAAK,sBAAA,GAAA,oBAAA,CAAA;MACjB,EAAA,QAAA,mBAAe,GAAA;MACf,EAAA,QAAA,kBAAc,KAAA;MACd,EAAA,QAAY,uBAAA,GAAA;MACZ,EAAA,QAAW,+BAAA,GAAA;MACX,EAAA,QAAA,mBAAA,GAAA;;MAEF,EAAA,QAAW,wBAAU,GAAA;;;EAIzB,OAAA;GAGA;CAIA,OAAO;;;;MAQF,yBACI;MAMT,sBAAwB;SAMxB,wBAAS,SAAyB;OAEhC,QAAM,QAAA,MAA8B,KAAA;OAChC,SAAS,EAAA;KACX,cAAY;KACZ,QAAA;MACA,MAAO,QAAS,OAAI;QACpB,UAAA,KAAA,WAAA;EAIF,IAAI,CAAA,aAAU;GAGd,MAAI,QAAA,QAAA,MAAA,eAA6B;GACjC,IAAI,OAAA;IAGJ,cAAc;IAGd,QAAM,MAAQ,GAAA,GAAA,OAAA,MAAkB,GAAK,OAAA;;SAI/B;GAGN,MAAM,QAAQ,QAAA,MAAA,eAAsB;GAGpC,IAAI,SAAU,MAAA,GAAA,OAAgB,MAAG,MAAA,MAAA,GAAA,UAAA,MAAA,QAAA;IACjC,cAAc;IAGd,QAAM;UAGA;IAEN,MAAO,YAAA,QAAA,MAAA,eAAA;IACP,IAAA,aAAA,UAAA,GAAA,OAAA,MAAA,MAAA,UAAA,GAAA,WAAA,MAAA,QAAA,OAAA,KAAA,MAAA;SAEK,IAAA,oBAAA,KAAA,QAAA,EAAA;;;KAMT,QAAM;;;;EAMN,OAAM,KAAA,KAAA;;CAGN,IAAA,aAAM;;;;;;SAQE,kBAAqB,SAAA;CAC3B,MAAI,QAAA,QAAc,MAAA,KAAA;CAClB,MAAI,2BAAQ,IAAA,KAAA;CAEZ,IAAA;KACE,IAAM;QACD,IAAA,MAAA,QAAa;QAChB,UAAc,MAAA,GAAQ,WAAM;QACxB,KAAA,QAAO,MAAA,eAAA;MACT,CAAA,IAAA;OACA,SAAQ,kBAAqB,KAAM;;;;QAMjC,QAAS,GAAA,GAAM;QACjB,OAAA,GAAc,GAAA;QACd,UAAQ;;MAKR,WAAM;SACF,IAAA,MAAA,QAAa;SAKZ,KAAI,MAAA,GAAA,WAAoB,CAAA,MAAK,eAAU;OAC1C,MAAO,GAAA,GAAK,OAAM,SAAA,GAAA,GAAA,UAAA,MAAA;eAClB;;;;;;;EASR,MAAI,QAAA,MAAa,MAAA,UAAA,GAAA,SAAA,CAAA,KAAA,KAAA,CAAA,MAAA;EAEf,IAAI,CAAA,OAAO,KAAA,IAAS,IAAK,SAAO,KAAM,UACpC,KAAO,SAAQ,IAAA,EAAA;OACjB,IAAO,UAAW,iBAAA,KAAA,IAAA,IAAA,SAAA,KAAA,UAAA,KAAA,SAAA,IAAA,EAAA;;;;;;;;CAWtB,IAAA,UAAS;CACP,IAAA,YAAc;CACd,IAAA,WAAM;CACN,OAAI,MAAA,KAAA,SAAA;EACJ,MAAI,UAAI,KAAA,WAAA;EAER,IAAA,CAAA,SAAW;GACT,MAAM,IAAA,QAAU,MAAU,eAAW;GACrC,IAAA,GAAM;IACN,UAAS;IAEP,YACE,EAAA,GAAA;IACF,WAAA,EAAA,GAAA;IACA,OAAA;;SAGI;GACN,MAAM,IAAA,QAAc,MAAA,eAAA;GACpB,IAAA,KAAM,EAAA,GAAA,OAAU,aAAA,EAAA,GAAA,UAAA,UAAA,UAAA;GAChB,OAAA;;EAGA,IAAA,IAAO;SAEC,IAAA,KADK,QAAU,IAAA,KACP,OAAM,KAAA;GACpB,MAAI,WAAa;UACf,IAAA,KAAW,UAAA,KAAA,OAAA,KAAA;SACX,SAAA,IAAA;OACA,QAAA;;GAEF,OAAA,IAAA,KAAA,QAAA,IAAA,KAAA,OAAA,KAAA;;IAGF,OAAI,IAAA,KAAa,UACf,KAAA,OAAA,KAAA;IAEF,IAAM,IAAA,eAAc,QAAM;KAE1B,QAAK;SAGA;;;UAQF;GAEL,IAAA,CAAO,OAAM;;;;;;;;SAUT,eAAU,SAAA;CACd,IAAI,CAAA,SAAA,OAAY;CAChB,IAAI,SAAA;CAEJ,SAAO,wBAAoB,OAAA;UACnB,kBAAe,OAAW;UAC3B,wBAAS,OAAA;UACN,yBAAkB,SAAe,SAAA,KAAA,QAAA,qBAAA,QAAA,CAAA;UAChC,OAAA,QAAA,qBAAA,SAAA;UACL,OAAU,QAAA,wBAAA,GAAA;QACV"}
|
|
@@ -1,14 +1,15 @@
|
|
|
1
|
+
import { a as targets, r as detectTargetAgent } from "./detect.mjs";
|
|
1
2
|
import "./agent.mjs";
|
|
2
|
-
import {
|
|
3
|
+
import { f as getSharedSkillsDir, s as REFERENCES_DIR, u as getPackageDbPath } from "./paths.mjs";
|
|
3
4
|
import "./cache.mjs";
|
|
4
|
-
import {
|
|
5
|
-
import {
|
|
6
|
-
import { a as readLock } from "./lockfile.mjs";
|
|
7
|
-
import { r as toStoragePackageName } from "./prefix.mjs";
|
|
5
|
+
import { l as toStoragePackageName } from "./semver.mjs";
|
|
6
|
+
import { c as readLock } from "./lockfile.mjs";
|
|
8
7
|
import "./core.mjs";
|
|
9
|
-
import { join } from "pathe";
|
|
10
8
|
import { existsSync, readdirSync } from "node:fs";
|
|
11
9
|
import * as p from "@clack/prompts";
|
|
10
|
+
import { join } from "pathe";
|
|
11
|
+
const STATIC_REGEX_1 = /[-_/]+/;
|
|
12
|
+
const STATIC_REGEX_2 = /^(issues?|docs?|releases?):(.+)$/i;
|
|
12
13
|
function findPackageDbs(packageFilter) {
|
|
13
14
|
const lock = readProjectLock(process.cwd());
|
|
14
15
|
if (!lock) return [];
|
|
@@ -40,7 +41,7 @@ function listLockPackages(cwd = process.cwd()) {
|
|
|
40
41
|
}
|
|
41
42
|
function filterLockDbs(lock, packageFilter) {
|
|
42
43
|
if (!lock) return [];
|
|
43
|
-
const tokenize = (s) => s.toLowerCase().replace(/@/g, "").split(
|
|
44
|
+
const tokenize = (s) => s.toLowerCase().replace(/@/g, "").split(STATIC_REGEX_1).filter(Boolean);
|
|
44
45
|
return Object.values(lock.skills).filter((info) => {
|
|
45
46
|
if (!info.packageName || !info.version) return false;
|
|
46
47
|
if (!packageFilter) return true;
|
|
@@ -77,7 +78,7 @@ function findAnyPackageDb(name) {
|
|
|
77
78
|
return null;
|
|
78
79
|
}
|
|
79
80
|
function parseFilterPrefix(rawQuery) {
|
|
80
|
-
const prefixMatch = rawQuery.match(
|
|
81
|
+
const prefixMatch = rawQuery.match(STATIC_REGEX_2);
|
|
81
82
|
if (!prefixMatch) return { query: rawQuery };
|
|
82
83
|
const prefix = prefixMatch[1].toLowerCase();
|
|
83
84
|
const query = prefixMatch[2];
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"search-helpers.mjs","names":["agents"],"sources":["../../src/commands/search-helpers.ts"],"sourcesContent":["import type { SearchFilter } from '../retriv/index.ts'\nimport { existsSync, readdirSync } from 'node:fs'\nimport * as p from '@clack/prompts'\nimport { join } from 'pathe'\nimport { agents, detectTargetAgent } from '../agent/index.ts'\nimport { getPackageDbPath, REFERENCES_DIR } from '../cache/index.ts'\nimport { readLock } from '../core/index.ts'\nimport {
|
|
1
|
+
{"version":3,"file":"search-helpers.mjs","names":["agents"],"sources":["../../src/commands/search-helpers.ts"],"sourcesContent":["import type { SearchFilter } from '../retriv/index.ts'\nimport { existsSync, readdirSync } from 'node:fs'\nimport * as p from '@clack/prompts'\nimport { join } from 'pathe'\nimport { agents, detectTargetAgent } from '../agent/index.ts'\nimport { getPackageDbPath, REFERENCES_DIR } from '../cache/index.ts'\nimport { readLock } from '../core/index.ts'\nimport { getSharedSkillsDir } from '../core/paths.ts'\nimport { toStoragePackageName } from '../core/prefix.ts'\n\nconst STATIC_REGEX_1 = /[-_/]+/\nconst STATIC_REGEX_2 = /^(issues?|docs?|releases?):(.+)$/i\n\n/** Collect search.db paths for packages installed in the current project (from skilld-lock.yaml) */\nexport function findPackageDbs(packageFilter?: string): string[] {\n const cwd = process.cwd()\n const lock = readProjectLock(cwd)\n if (!lock)\n return []\n return filterLockDbs(lock, packageFilter)\n}\n\n/** Build package name → version map from the project lockfile */\nexport function getPackageVersions(cwd: string = process.cwd()): Map<string, string> {\n const lock = readProjectLock(cwd)\n const map = new Map<string, string>()\n if (!lock)\n return map\n for (const s of Object.values(lock.skills)) {\n if (s.packageName && s.version)\n map.set(s.packageName, s.version)\n }\n return map\n}\n\n/** Read the project's skilld-lock.yaml (shared dir or agent skills dir) */\nfunction readProjectLock(cwd: string): ReturnType<typeof readLock> {\n const shared = getSharedSkillsDir(cwd)\n if (shared) {\n const lock = readLock(shared)\n if (lock)\n return lock\n }\n const agent = detectTargetAgent()\n if (!agent)\n return null\n return readLock(`${cwd}/${agents[agent].skillsDir}`)\n}\n\n/** List installed packages with versions from the project lockfile */\nexport function listLockPackages(cwd: string = process.cwd()): string[] {\n const lock = readProjectLock(cwd)\n if (!lock)\n return []\n const seen = new Map<string, string>()\n for (const s of Object.values(lock.skills)) {\n if (s.packageName && s.version)\n seen.set(s.packageName, s.version)\n }\n return Array.from(seen, ([name, version]) => `${name}@${version}`)\n}\n\nfunction filterLockDbs(lock: ReturnType<typeof readLock>, packageFilter?: string): string[] {\n if (!lock)\n return []\n const tokenize = (s: string) => s.toLowerCase().replace(/@/g, '').split(STATIC_REGEX_1).filter(Boolean)\n\n return Object.values(lock.skills)\n .filter((info) => {\n if (!info.packageName || !info.version)\n return false\n if (!packageFilter)\n return true\n const filterTokens = tokenize(packageFilter)\n const nameTokens = tokenize(info.packageName)\n return filterTokens.every(ft => nameTokens.some(nt => nt.includes(ft) || ft.includes(nt)))\n })\n .map((info) => {\n const storageName = toStoragePackageName(info.packageName!)\n const exact = getPackageDbPath(storageName, info.version!)\n if (existsSync(exact))\n return exact\n const fallback = findAnyPackageDb(storageName)\n if (fallback)\n p.log.warn(`Using cached search index for ${info.packageName} (v${info.version} not indexed). Run \\`skilld update ${info.packageName}\\` to re-index.`)\n return fallback\n })\n .filter((db): db is string => !!db)\n}\n\n/** Find any search.db for a package when exact version cache is missing */\nfunction findAnyPackageDb(name: string): string | null {\n if (!existsSync(REFERENCES_DIR))\n return null\n\n const prefix = `${name}@`\n\n if (name.startsWith('@')) {\n const [scope, pkg] = name.split('/')\n const scopeDir = join(REFERENCES_DIR, scope!)\n if (!existsSync(scopeDir))\n return null\n const scopePrefix = `${pkg}@`\n for (const entry of readdirSync(scopeDir)) {\n if (entry.startsWith(scopePrefix)) {\n const db = join(scopeDir, entry, 'search.db')\n if (existsSync(db))\n return db\n }\n }\n return null\n }\n\n for (const entry of readdirSync(REFERENCES_DIR)) {\n if (entry.startsWith(prefix)) {\n const db = join(REFERENCES_DIR, entry, 'search.db')\n if (existsSync(db))\n return db\n }\n }\n return null\n}\n\n/** Parse filter prefix (e.g., \"issues:bug\" -> filter by type=issue, query=\"bug\") */\nexport function parseFilterPrefix(rawQuery: string): { query: string, filter?: SearchFilter } {\n const prefixMatch = rawQuery.match(STATIC_REGEX_2)\n if (!prefixMatch)\n return { query: rawQuery }\n\n const prefix = prefixMatch[1]!.toLowerCase()\n const query = prefixMatch[2]!\n if (prefix.startsWith('issue'))\n return { query, filter: { type: 'issue' } }\n if (prefix.startsWith('release'))\n return { query, filter: { type: 'release' } }\n return { query, filter: { type: { $in: ['doc', 'docs'] } } }\n}\n"],"mappings":";;;;;;;;;;AAUA,MAAM,iBAAiB;AACvB,MAAM,iBAAiB;AAGvB,SAAgB,eAAe,eAAkC;CAE/D,MAAM,OAAO,gBADD,QAAQ,KACY,CAAC;CACjC,IAAI,CAAC,MACH,OAAO,EAAE;CACX,OAAO,cAAc,MAAM,cAAc;;AAI3C,SAAgB,mBAAmB,MAAc,QAAQ,KAAK,EAAuB;CACnF,MAAM,OAAO,gBAAgB,IAAI;CACjC,MAAM,sBAAM,IAAI,KAAqB;CACrC,IAAI,CAAC,MACH,OAAO;CACT,KAAK,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,EACxC,IAAI,EAAE,eAAe,EAAE,SACrB,IAAI,IAAI,EAAE,aAAa,EAAE,QAAQ;CAErC,OAAO;;AAIT,SAAS,gBAAgB,KAA0C;CACjE,MAAM,SAAS,mBAAmB,IAAI;CACtC,IAAI,QAAQ;EACV,MAAM,OAAO,SAAS,OAAO;EAC7B,IAAI,MACF,OAAO;;CAEX,MAAM,QAAQ,mBAAmB;CACjC,IAAI,CAAC,OACH,OAAO;CACT,OAAO,SAAS,GAAG,IAAI,GAAGA,QAAO,OAAO,YAAY;;AAItD,SAAgB,iBAAiB,MAAc,QAAQ,KAAK,EAAY;CACtE,MAAM,OAAO,gBAAgB,IAAI;CACjC,IAAI,CAAC,MACH,OAAO,EAAE;CACX,MAAM,uBAAO,IAAI,KAAqB;CACtC,KAAK,MAAM,KAAK,OAAO,OAAO,KAAK,OAAO,EACxC,IAAI,EAAE,eAAe,EAAE,SACrB,KAAK,IAAI,EAAE,aAAa,EAAE,QAAQ;CAEtC,OAAO,MAAM,KAAK,OAAO,CAAC,MAAM,aAAa,GAAG,KAAK,GAAG,UAAU;;AAGpE,SAAS,cAAc,MAAmC,eAAkC;CAC1F,IAAI,CAAC,MACH,OAAO,EAAE;CACX,MAAM,YAAY,MAAc,EAAE,aAAa,CAAC,QAAQ,MAAM,GAAG,CAAC,MAAM,eAAe,CAAC,OAAO,QAAQ;CAEvG,OAAO,OAAO,OAAO,KAAK,OAAO,CAC9B,QAAQ,SAAS;EAChB,IAAI,CAAC,KAAK,eAAe,CAAC,KAAK,SAC7B,OAAO;EACT,IAAI,CAAC,eACH,OAAO;EACT,MAAM,eAAe,SAAS,cAAc;EAC5C,MAAM,aAAa,SAAS,KAAK,YAAY;EAC7C,OAAO,aAAa,OAAM,OAAM,WAAW,MAAK,OAAM,GAAG,SAAS,GAAG,IAAI,GAAG,SAAS,GAAG,CAAC,CAAC;GAC1F,CACD,KAAK,SAAS;EACb,MAAM,cAAc,qBAAqB,KAAK,YAAa;EAC3D,MAAM,QAAQ,iBAAiB,aAAa,KAAK,QAAS;EAC1D,IAAI,WAAW,MAAM,EACnB,OAAO;EACT,MAAM,WAAW,iBAAiB,YAAY;EAC9C,IAAI,UACF,EAAE,IAAI,KAAK,iCAAiC,KAAK,YAAY,KAAK,KAAK,QAAQ,qCAAqC,KAAK,YAAY,iBAAiB;EACxJ,OAAO;GACP,CACD,QAAQ,OAAqB,CAAC,CAAC,GAAG;;AAIvC,SAAS,iBAAiB,MAA6B;CACrD,IAAI,CAAC,WAAW,eAAe,EAC7B,OAAO;CAET,MAAM,SAAS,GAAG,KAAK;CAEvB,IAAI,KAAK,WAAW,IAAI,EAAE;EACxB,MAAM,CAAC,OAAO,OAAO,KAAK,MAAM,IAAI;EACpC,MAAM,WAAW,KAAK,gBAAgB,MAAO;EAC7C,IAAI,CAAC,WAAW,SAAS,EACvB,OAAO;EACT,MAAM,cAAc,GAAG,IAAI;EAC3B,KAAK,MAAM,SAAS,YAAY,SAAS,EACvC,IAAI,MAAM,WAAW,YAAY,EAAE;GACjC,MAAM,KAAK,KAAK,UAAU,OAAO,YAAY;GAC7C,IAAI,WAAW,GAAG,EAChB,OAAO;;EAGb,OAAO;;CAGT,KAAK,MAAM,SAAS,YAAY,eAAe,EAC7C,IAAI,MAAM,WAAW,OAAO,EAAE;EAC5B,MAAM,KAAK,KAAK,gBAAgB,OAAO,YAAY;EACnD,IAAI,WAAW,GAAG,EAChB,OAAO;;CAGb,OAAO;;AAIT,SAAgB,kBAAkB,UAA4D;CAC5F,MAAM,cAAc,SAAS,MAAM,eAAe;CAClD,IAAI,CAAC,aACH,OAAO,EAAE,OAAO,UAAU;CAE5B,MAAM,SAAS,YAAY,GAAI,aAAa;CAC5C,MAAM,QAAQ,YAAY;CAC1B,IAAI,OAAO,WAAW,QAAQ,EAC5B,OAAO;EAAE;EAAO,QAAQ,EAAE,MAAM,SAAS;EAAE;CAC7C,IAAI,OAAO,WAAW,UAAU,EAC9B,OAAO;EAAE;EAAO,QAAQ,EAAE,MAAM,WAAW;EAAE;CAC/C,OAAO;EAAE;EAAO,QAAQ,EAAE,MAAM,EAAE,KAAK,CAAC,OAAO,OAAO,EAAE,EAAE;EAAE"}
|