akm-cli 0.5.0 → 0.6.0-rc2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +53 -5
- package/README.md +9 -9
- package/dist/cli.js +379 -1448
- package/dist/{completions.js → commands/completions.js} +1 -1
- package/dist/{config-cli.js → commands/config-cli.js} +109 -11
- package/dist/commands/curate.js +263 -0
- package/dist/{info.js → commands/info.js} +17 -11
- package/dist/{init.js → commands/init.js} +4 -4
- package/dist/{install-audit.js → commands/install-audit.js} +14 -2
- package/dist/{installed-kits.js → commands/installed-stashes.js} +122 -50
- package/dist/commands/migration-help.js +141 -0
- package/dist/{registry-search.js → commands/registry-search.js} +68 -9
- package/dist/commands/remember.js +178 -0
- package/dist/{stash-search.js → commands/search.js} +28 -69
- package/dist/{self-update.js → commands/self-update.js} +3 -3
- package/dist/{stash-show.js → commands/show.js} +106 -81
- package/dist/{stash-add.js → commands/source-add.js} +133 -67
- package/dist/{stash-clone.js → commands/source-clone.js} +15 -13
- package/dist/{stash-source-manage.js → commands/source-manage.js} +24 -24
- package/dist/{vault.js → commands/vault.js} +43 -0
- package/dist/{stash-ref.js → core/asset-ref.js} +4 -4
- package/dist/{asset-registry.js → core/asset-registry.js} +30 -6
- package/dist/{asset-spec.js → core/asset-spec.js} +13 -6
- package/dist/{common.js → core/common.js} +147 -50
- package/dist/{config.js → core/config.js} +288 -29
- package/dist/core/errors.js +90 -0
- package/dist/{frontmatter.js → core/frontmatter.js} +64 -8
- package/dist/{paths.js → core/paths.js} +4 -4
- package/dist/core/write-source.js +280 -0
- package/dist/{local-search.js → indexer/db-search.js} +49 -32
- package/dist/{db.js → indexer/db.js} +210 -81
- package/dist/{file-context.js → indexer/file-context.js} +3 -3
- package/dist/{indexer.js → indexer/indexer.js} +153 -30
- package/dist/{manifest.js → indexer/manifest.js} +10 -10
- package/dist/{matchers.js → indexer/matchers.js} +4 -7
- package/dist/{metadata.js → indexer/metadata.js} +9 -5
- package/dist/{search-source.js → indexer/search-source.js} +97 -55
- package/dist/{semantic-status.js → indexer/semantic-status.js} +2 -2
- package/dist/{walker.js → indexer/walker.js} +1 -1
- package/dist/{lockfile.js → integrations/lockfile.js} +29 -2
- package/dist/{llm.js → llm/client.js} +12 -48
- package/dist/llm/embedder.js +127 -0
- package/dist/llm/embedders/cache.js +47 -0
- package/dist/llm/embedders/local.js +152 -0
- package/dist/llm/embedders/remote.js +121 -0
- package/dist/llm/embedders/types.js +39 -0
- package/dist/llm/metadata-enhance.js +53 -0
- package/dist/output/cli-hints.js +301 -0
- package/dist/output/context.js +95 -0
- package/dist/{renderers.js → output/renderers.js} +57 -61
- package/dist/output/shapes.js +212 -0
- package/dist/output/text.js +520 -0
- package/dist/{registry-build-index.js → registry/build-index.js} +48 -32
- package/dist/{create-provider-registry.js → registry/create-provider-registry.js} +6 -2
- package/dist/registry/factory.js +33 -0
- package/dist/{origin-resolve.js → registry/origin-resolve.js} +1 -1
- package/dist/registry/providers/index.js +11 -0
- package/dist/{providers → registry/providers}/skills-sh.js +60 -4
- package/dist/{providers → registry/providers}/static-index.js +126 -56
- package/dist/registry/providers/types.js +25 -0
- package/dist/{registry-resolve.js → registry/resolve.js} +10 -6
- package/dist/{detect.js → setup/detect.js} +0 -27
- package/dist/{ripgrep-install.js → setup/ripgrep-install.js} +1 -1
- package/dist/{ripgrep-resolve.js → setup/ripgrep-resolve.js} +2 -2
- package/dist/{setup.js → setup/setup.js} +162 -129
- package/dist/setup/steps.js +45 -0
- package/dist/{kit-include.js → sources/include.js} +1 -1
- package/dist/sources/provider-factory.js +36 -0
- package/dist/sources/provider.js +21 -0
- package/dist/sources/providers/filesystem.js +35 -0
- package/dist/{stash-providers → sources/providers}/git.js +218 -28
- package/dist/{stash-providers → sources/providers}/index.js +4 -4
- package/dist/sources/providers/install-types.js +14 -0
- package/dist/sources/providers/npm.js +160 -0
- package/dist/sources/providers/provider-utils.js +173 -0
- package/dist/sources/providers/sync-from-ref.js +45 -0
- package/dist/sources/providers/tar-utils.js +154 -0
- package/dist/{stash-providers → sources/providers}/website.js +60 -20
- package/dist/{stash-resolve.js → sources/resolve.js} +13 -12
- package/dist/{wiki.js → wiki/wiki.js} +18 -17
- package/dist/{workflow-authoring.js → workflows/authoring.js} +48 -17
- package/dist/{workflow-cli.js → workflows/cli.js} +2 -1
- package/dist/{workflow-db.js → workflows/db.js} +1 -1
- package/dist/workflows/document-cache.js +20 -0
- package/dist/workflows/parser.js +379 -0
- package/dist/workflows/renderer.js +78 -0
- package/dist/{workflow-runs.js → workflows/runs.js} +84 -30
- package/dist/workflows/schema.js +11 -0
- package/dist/workflows/validator.js +48 -0
- package/docs/README.md +30 -0
- package/docs/migration/release-notes/0.0.13.md +4 -0
- package/docs/migration/release-notes/0.1.0.md +6 -0
- package/docs/migration/release-notes/0.2.0.md +6 -0
- package/docs/migration/release-notes/0.3.0.md +5 -0
- package/docs/migration/release-notes/0.5.0.md +6 -0
- package/docs/migration/release-notes/0.6.0.md +75 -0
- package/docs/migration/release-notes/README.md +21 -0
- package/package.json +3 -2
- package/dist/embedder.js +0 -351
- package/dist/errors.js +0 -34
- package/dist/migration-help.js +0 -110
- package/dist/registry-factory.js +0 -19
- package/dist/registry-install.js +0 -532
- package/dist/ripgrep.js +0 -2
- package/dist/stash-provider-factory.js +0 -35
- package/dist/stash-provider.js +0 -1
- package/dist/stash-providers/filesystem.js +0 -41
- package/dist/stash-providers/openviking.js +0 -348
- package/dist/stash-providers/provider-utils.js +0 -11
- package/dist/stash-types.js +0 -1
- package/dist/workflow-markdown.js +0 -251
- /package/dist/{markdown.js → core/markdown.js} +0 -0
- /package/dist/{warn.js → core/warn.js} +0 -0
- /package/dist/{search-fields.js → indexer/search-fields.js} +0 -0
- /package/dist/{usage-events.js → indexer/usage-events.js} +0 -0
- /package/dist/{github.js → integrations/github.js} +0 -0
- /package/dist/{registry-provider.js → registry/types.js} +0 -0
- /package/dist/{registry-types.js → sources/types.js} +0 -0
|
@@ -1,9 +1,35 @@
|
|
|
1
1
|
import fs from "node:fs";
|
|
2
2
|
import path from "node:path";
|
|
3
|
-
import { getConfigDir } from "
|
|
3
|
+
import { getConfigDir } from "../core/config";
|
|
4
4
|
// ── Paths ───────────────────────────────────────────────────────────────────
|
|
5
|
+
const LOCKFILE_NAME = "akm.lock";
|
|
6
|
+
const LEGACY_LOCKFILE_NAME = "stash.lock";
|
|
5
7
|
function getLockfilePath() {
|
|
6
|
-
return path.join(getConfigDir(),
|
|
8
|
+
return path.join(getConfigDir(), LOCKFILE_NAME);
|
|
9
|
+
}
|
|
10
|
+
function getLegacyLockfilePath() {
|
|
11
|
+
return path.join(getConfigDir(), LEGACY_LOCKFILE_NAME);
|
|
12
|
+
}
|
|
13
|
+
/**
|
|
14
|
+
* One-time migration: if the new `akm.lock` does not exist but the legacy
|
|
15
|
+
* `stash.lock` does, copy it across so installed-stash tracking survives the
|
|
16
|
+
* rename. Best-effort; failures are silent because the lockfile loader treats
|
|
17
|
+
* a missing file as an empty lockfile.
|
|
18
|
+
*/
|
|
19
|
+
function migrateLegacyLockfileIfNeeded() {
|
|
20
|
+
const newPath = getLockfilePath();
|
|
21
|
+
const legacyPath = getLegacyLockfilePath();
|
|
22
|
+
try {
|
|
23
|
+
if (fs.existsSync(newPath))
|
|
24
|
+
return;
|
|
25
|
+
if (!fs.existsSync(legacyPath))
|
|
26
|
+
return;
|
|
27
|
+
fs.mkdirSync(path.dirname(newPath), { recursive: true });
|
|
28
|
+
fs.copyFileSync(legacyPath, newPath);
|
|
29
|
+
}
|
|
30
|
+
catch {
|
|
31
|
+
/* best-effort — fall through to empty lockfile */
|
|
32
|
+
}
|
|
7
33
|
}
|
|
8
34
|
// ── Lock sentinel ────────────────────────────────────────────────────────────
|
|
9
35
|
const LOCK_MAX_RETRIES = 3;
|
|
@@ -74,6 +100,7 @@ function releaseLockSentinel() {
|
|
|
74
100
|
}
|
|
75
101
|
// ── Read / Write ────────────────────────────────────────────────────────────
|
|
76
102
|
export function readLockfile() {
|
|
103
|
+
migrateLegacyLockfileIfNeeded();
|
|
77
104
|
const lockfilePath = getLockfilePath();
|
|
78
105
|
try {
|
|
79
106
|
const raw = JSON.parse(fs.readFileSync(lockfilePath, "utf8"));
|
|
@@ -1,4 +1,13 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* Low-level OpenAI-compatible chat completions client and capability probing.
|
|
3
|
+
*
|
|
4
|
+
* Split out of `llm.ts` to keep the transport-layer concerns (HTTP request,
|
|
5
|
+
* response parsing, JSON-fence stripping, capability probe, availability
|
|
6
|
+
* check) separate from higher-level metadata-enhancement workflows.
|
|
7
|
+
*
|
|
8
|
+
* `llm.ts` re-exports everything from this module for backward compatibility.
|
|
9
|
+
*/
|
|
10
|
+
import { fetchWithTimeout } from "../core/common";
|
|
2
11
|
export async function chatCompletion(config, messages, options) {
|
|
3
12
|
const headers = { "Content-Type": "application/json" };
|
|
4
13
|
if (config.apiKey) {
|
|
@@ -22,7 +31,7 @@ export async function chatCompletion(config, messages, options) {
|
|
|
22
31
|
return json.choices?.[0]?.message?.content?.trim() ?? "";
|
|
23
32
|
}
|
|
24
33
|
/** Strip leading/trailing markdown code fences from an LLM response. */
|
|
25
|
-
function stripJsonFences(raw) {
|
|
34
|
+
export function stripJsonFences(raw) {
|
|
26
35
|
return raw
|
|
27
36
|
.trim()
|
|
28
37
|
.replace(/^```(?:json)?\s*\n?/i, "")
|
|
@@ -38,52 +47,7 @@ export function parseJsonResponse(raw) {
|
|
|
38
47
|
return undefined;
|
|
39
48
|
}
|
|
40
49
|
}
|
|
41
|
-
// ──
|
|
42
|
-
const SYSTEM_PROMPT = `You are a metadata generator for a developer asset registry. Given a script/skill/command/agent entry, generate improved metadata. Respond with ONLY valid JSON, no markdown fencing.`;
|
|
43
|
-
/**
|
|
44
|
-
* Use an LLM to enhance a stash entry's metadata: improve description,
|
|
45
|
-
* generate searchHints, and suggest tags.
|
|
46
|
-
*/
|
|
47
|
-
export async function enhanceMetadata(config, entry, fileContent) {
|
|
48
|
-
const contextParts = [`Name: ${entry.name}`, `Type: ${entry.type}`];
|
|
49
|
-
if (entry.description)
|
|
50
|
-
contextParts.push(`Current description: ${entry.description}`);
|
|
51
|
-
if (entry.tags?.length)
|
|
52
|
-
contextParts.push(`Current tags: ${entry.tags.join(", ")}`);
|
|
53
|
-
if (fileContent) {
|
|
54
|
-
// Limit content to first 2000 chars to stay within token limits
|
|
55
|
-
const truncated = fileContent.length > 2000 ? `${fileContent.slice(0, 2000)}\n... (truncated)` : fileContent;
|
|
56
|
-
contextParts.push(`File content:\n${truncated}`);
|
|
57
|
-
}
|
|
58
|
-
const userPrompt = `${contextParts.join("\n")}
|
|
59
|
-
|
|
60
|
-
Generate improved metadata for this ${entry.type}. Return JSON with these fields:
|
|
61
|
-
- "description": a clear, concise one-sentence description of what this does
|
|
62
|
-
- "searchHints": an array of 3-6 natural language task phrases an agent might use to find this (e.g. "deploy a docker container", "run database migrations")
|
|
63
|
-
- "tags": an array of 3-8 relevant keyword tags
|
|
64
|
-
|
|
65
|
-
Return ONLY the JSON object, no explanation.`;
|
|
66
|
-
const raw = await chatCompletion(config, [
|
|
67
|
-
{ role: "system", content: SYSTEM_PROMPT },
|
|
68
|
-
{ role: "user", content: userPrompt },
|
|
69
|
-
]);
|
|
70
|
-
const parsed = parseJsonResponse(raw);
|
|
71
|
-
if (!parsed)
|
|
72
|
-
return {};
|
|
73
|
-
const result = {};
|
|
74
|
-
if (typeof parsed.description === "string" && parsed.description) {
|
|
75
|
-
result.description = parsed.description;
|
|
76
|
-
}
|
|
77
|
-
if (Array.isArray(parsed.searchHints)) {
|
|
78
|
-
result.searchHints = parsed.searchHints
|
|
79
|
-
.filter((s) => typeof s === "string" && s.trim().length > 0)
|
|
80
|
-
.slice(0, 8);
|
|
81
|
-
}
|
|
82
|
-
if (Array.isArray(parsed.tags)) {
|
|
83
|
-
result.tags = parsed.tags.filter((s) => typeof s === "string" && s.trim().length > 0).slice(0, 10);
|
|
84
|
-
}
|
|
85
|
-
return result;
|
|
86
|
-
}
|
|
50
|
+
// ── Availability check ──────────────────────────────────────────────────────
|
|
87
51
|
/**
|
|
88
52
|
* Check if the LLM endpoint is reachable.
|
|
89
53
|
*/
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Backward-compatible facade for the embedder module.
|
|
3
|
+
*
|
|
4
|
+
* The implementation has been split into:
|
|
5
|
+
* - `./embedders/types` — `EmbeddingVector`, `Embedder`, `EmbeddingCheckResult`
|
|
6
|
+
* - `./embedders/local` — `LocalEmbedder`, `DEFAULT_LOCAL_MODEL`,
|
|
7
|
+
* `isTransformersAvailable`
|
|
8
|
+
* - `./embedders/remote` — `RemoteEmbedder`, `hasRemoteEndpoint`
|
|
9
|
+
* - `./embedders/cache` — LRU `embedCache`, `clearEmbeddingCache`,
|
|
10
|
+
* `embedCacheKey`
|
|
11
|
+
*
|
|
12
|
+
* This module wires them together: it picks the right implementation from the
|
|
13
|
+
* (optional) embedding config, applies the cache layer, and re-exports the
|
|
14
|
+
* existing public API so call sites (`db-search.ts`, `indexer.ts`, `db.ts`,
|
|
15
|
+
* `setup.ts`, `semantic-status.ts`, tests) keep working unmodified.
|
|
16
|
+
*
|
|
17
|
+
* Tests can construct fresh `LocalEmbedder` / `RemoteEmbedder` instances
|
|
18
|
+
* directly from their submodules to avoid module-level state pollution.
|
|
19
|
+
*/
|
|
20
|
+
import { embedCacheKey, getCachedEmbedding, setCachedEmbedding } from "./embedders/cache";
|
|
21
|
+
import { isTransformersAvailable, LocalEmbedder } from "./embedders/local";
|
|
22
|
+
import { hasRemoteEndpoint, RemoteEmbedder } from "./embedders/remote";
|
|
23
|
+
// ── Re-exports (public API) ─────────────────────────────────────────────────
|
|
24
|
+
export { clearEmbeddingCache } from "./embedders/cache";
|
|
25
|
+
export { DEFAULT_LOCAL_MODEL, isTransformersAvailable } from "./embedders/local";
|
|
26
|
+
// ── Singleton local embedder ────────────────────────────────────────────────
|
|
27
|
+
// `localEmbedder` is an intentional module-level singleton. The underlying
|
|
28
|
+
// @huggingface/transformers pipeline is expensive to initialise (model download
|
|
29
|
+
// + WASM compilation) and is safe to share across calls because it is
|
|
30
|
+
// stateless once created. Storing it here avoids re-initialising on every
|
|
31
|
+
// embed() call.
|
|
32
|
+
const localEmbedder = new LocalEmbedder();
|
|
33
|
+
/**
|
|
34
|
+
* Reset the cached local embedder pipeline. Used by tests that want a fresh
|
|
35
|
+
* pipeline construction (e.g. to assert the dtype-fallback retry logic).
|
|
36
|
+
*/
|
|
37
|
+
export function resetLocalEmbedder() {
|
|
38
|
+
localEmbedder.reset();
|
|
39
|
+
}
|
|
40
|
+
// ── Public API ──────────────────────────────────────────────────────────────
|
|
41
|
+
/**
|
|
42
|
+
* Generate an embedding for the given text.
|
|
43
|
+
* If embeddingConfig has a remote endpoint, uses the configured OpenAI-compatible endpoint.
|
|
44
|
+
* Otherwise falls back to local @huggingface/transformers using the model from
|
|
45
|
+
* `embeddingConfig.localModel` or `DEFAULT_LOCAL_MODEL`.
|
|
46
|
+
*
|
|
47
|
+
* Results are cached in an LRU cache (max ~100 entries) keyed by query text
|
|
48
|
+
* and embedding config. Repeated identical queries return the cached vector.
|
|
49
|
+
*/
|
|
50
|
+
export async function embed(text, embeddingConfig) {
|
|
51
|
+
const key = embedCacheKey(text, embeddingConfig);
|
|
52
|
+
const cached = getCachedEmbedding(key);
|
|
53
|
+
if (cached)
|
|
54
|
+
return cached;
|
|
55
|
+
const result = embeddingConfig && hasRemoteEndpoint(embeddingConfig)
|
|
56
|
+
? await new RemoteEmbedder(embeddingConfig).embed(text)
|
|
57
|
+
: await localEmbedder.embedWithModel(text, embeddingConfig?.localModel);
|
|
58
|
+
setCachedEmbedding(key, result);
|
|
59
|
+
return result;
|
|
60
|
+
}
|
|
61
|
+
/**
|
|
62
|
+
* Generate embeddings for multiple texts in batch.
|
|
63
|
+
* Uses the OpenAI-compatible batch API for remote endpoints (batches of 100).
|
|
64
|
+
* Falls back to sequential embedding for the local transformer pipeline.
|
|
65
|
+
*/
|
|
66
|
+
export async function embedBatch(texts, embeddingConfig) {
|
|
67
|
+
if (texts.length === 0)
|
|
68
|
+
return [];
|
|
69
|
+
if (embeddingConfig && hasRemoteEndpoint(embeddingConfig)) {
|
|
70
|
+
return new RemoteEmbedder(embeddingConfig).embedBatch(texts);
|
|
71
|
+
}
|
|
72
|
+
// Local transformer: process sequentially (pipeline handles one at a time)
|
|
73
|
+
const localModel = embeddingConfig?.localModel;
|
|
74
|
+
const results = [];
|
|
75
|
+
for (const text of texts) {
|
|
76
|
+
results.push(await localEmbedder.embedWithModel(text, localModel));
|
|
77
|
+
}
|
|
78
|
+
return results;
|
|
79
|
+
}
|
|
80
|
+
// ── Similarity ──────────────────────────────────────────────────────────────
|
|
81
|
+
// `cosineSimilarity` was moved to `./embedders/types.ts` so importers
|
|
82
|
+
// (notably `db.ts`) can pull the math function without dragging in this
|
|
83
|
+
// facade and its `@huggingface/transformers` import chain. Re-export
|
|
84
|
+
// preserves the existing public API.
|
|
85
|
+
export { cosineSimilarity } from "./embedders/types";
|
|
86
|
+
// ── Availability check ──────────────────────────────────────────────────────
|
|
87
|
+
/**
|
|
88
|
+
* Check whether embedding is available with a detailed reason on failure.
|
|
89
|
+
*/
|
|
90
|
+
export async function checkEmbeddingAvailability(embeddingConfig) {
|
|
91
|
+
if (embeddingConfig && hasRemoteEndpoint(embeddingConfig)) {
|
|
92
|
+
try {
|
|
93
|
+
await new RemoteEmbedder(embeddingConfig).embed("test");
|
|
94
|
+
return { available: true };
|
|
95
|
+
}
|
|
96
|
+
catch (err) {
|
|
97
|
+
return {
|
|
98
|
+
available: false,
|
|
99
|
+
reason: "remote-unreachable",
|
|
100
|
+
message: err instanceof Error ? err.message : String(err),
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
// Check if the package is importable before attempting the model download.
|
|
105
|
+
if (!isTransformersAvailable()) {
|
|
106
|
+
return {
|
|
107
|
+
available: false,
|
|
108
|
+
reason: "missing-package",
|
|
109
|
+
message: "@huggingface/transformers is not installed.",
|
|
110
|
+
};
|
|
111
|
+
}
|
|
112
|
+
try {
|
|
113
|
+
await localEmbedder.getPipeline(embeddingConfig?.localModel);
|
|
114
|
+
return { available: true };
|
|
115
|
+
}
|
|
116
|
+
catch (err) {
|
|
117
|
+
return {
|
|
118
|
+
available: false,
|
|
119
|
+
reason: "model-download-failed",
|
|
120
|
+
message: err instanceof Error ? err.message : String(err),
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
export async function isEmbeddingAvailable(embeddingConfig) {
|
|
125
|
+
const result = await checkEmbeddingAvailability(embeddingConfig);
|
|
126
|
+
return result.available;
|
|
127
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LRU embedding cache shared by the embedder facade.
|
|
3
|
+
*
|
|
4
|
+
* Caches query embeddings to avoid redundant computation for repeated
|
|
5
|
+
* queries. Uses a simple Map with LRU eviction (delete + re-insert to move
|
|
6
|
+
* an entry to the most-recently-used end).
|
|
7
|
+
*/
|
|
8
|
+
const EMBED_CACHE_MAX = 100;
|
|
9
|
+
const embedCache = new Map();
|
|
10
|
+
/**
|
|
11
|
+
* Build a cache key from query text and optional config.
|
|
12
|
+
* Different endpoints/models should not share cached embeddings.
|
|
13
|
+
* apiKey deliberately excluded: same endpoint+model produce identical embeddings regardless of auth.
|
|
14
|
+
*/
|
|
15
|
+
export function embedCacheKey(text, config) {
|
|
16
|
+
if (!config)
|
|
17
|
+
return `local::${text}`;
|
|
18
|
+
const endpoint = config.endpoint || "";
|
|
19
|
+
const model = config.model || config.localModel || "";
|
|
20
|
+
return `${endpoint}:${model}:${text}`;
|
|
21
|
+
}
|
|
22
|
+
export function getCachedEmbedding(key) {
|
|
23
|
+
const cached = embedCache.get(key);
|
|
24
|
+
if (cached === undefined)
|
|
25
|
+
return undefined;
|
|
26
|
+
// Move to end (most recently used) for LRU ordering
|
|
27
|
+
embedCache.delete(key);
|
|
28
|
+
embedCache.set(key, cached);
|
|
29
|
+
return cached;
|
|
30
|
+
}
|
|
31
|
+
export function setCachedEmbedding(key, value) {
|
|
32
|
+
// Evict oldest entry if at capacity
|
|
33
|
+
if (embedCache.size >= EMBED_CACHE_MAX) {
|
|
34
|
+
const oldest = embedCache.keys().next().value;
|
|
35
|
+
if (oldest !== undefined) {
|
|
36
|
+
embedCache.delete(oldest);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
embedCache.set(key, value);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Clear the embedding cache. Call when the embedding model changes
|
|
43
|
+
* or when you want to force fresh embeddings.
|
|
44
|
+
*/
|
|
45
|
+
export function clearEmbeddingCache() {
|
|
46
|
+
embedCache.clear();
|
|
47
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local @huggingface/transformers embedder.
|
|
3
|
+
*
|
|
4
|
+
* Encapsulates the transformer pipeline lifecycle as instance state on a
|
|
5
|
+
* `LocalEmbedder` so tests can construct fresh instances without leaking
|
|
6
|
+
* pipelines across tests. The facade in `../embedder.ts` keeps a single
|
|
7
|
+
* shared instance for the production code path.
|
|
8
|
+
*/
|
|
9
|
+
import path from "node:path";
|
|
10
|
+
import { getCacheDir } from "../../core/paths";
|
|
11
|
+
import { warn } from "../../core/warn";
|
|
12
|
+
/**
|
|
13
|
+
* Default local transformer model for embeddings.
|
|
14
|
+
* `bge-small-en-v1.5` scores higher on MTEB benchmarks than the previous
|
|
15
|
+
* `all-MiniLM-L6-v2` at the same 384-dimension footprint.
|
|
16
|
+
*/
|
|
17
|
+
export const DEFAULT_LOCAL_MODEL = "Xenova/bge-small-en-v1.5";
|
|
18
|
+
const LOCAL_EMBEDDER_DTYPE = "fp32";
|
|
19
|
+
const LOCAL_EMBEDDER_FALLBACK_DTYPE = "auto";
|
|
20
|
+
/**
|
|
21
|
+
* Return the local model name that will be used for embedding.
|
|
22
|
+
* When `overrideModel` is provided it takes precedence; otherwise
|
|
23
|
+
* the default model is returned.
|
|
24
|
+
*/
|
|
25
|
+
function resolveLocalModelName(overrideModel) {
|
|
26
|
+
return overrideModel || DEFAULT_LOCAL_MODEL;
|
|
27
|
+
}
|
|
28
|
+
export class LocalEmbedder {
|
|
29
|
+
defaultModel;
|
|
30
|
+
/**
|
|
31
|
+
* Cache the *promise* (not the resolved result) so concurrent calls share
|
|
32
|
+
* the same initialisation work and never download the model twice. Keyed
|
|
33
|
+
* by model name so switching models gets a fresh pipeline.
|
|
34
|
+
*/
|
|
35
|
+
pipelinePromise;
|
|
36
|
+
pipelineModelName;
|
|
37
|
+
constructor(defaultModel) {
|
|
38
|
+
this.defaultModel = defaultModel;
|
|
39
|
+
}
|
|
40
|
+
/** Reset the cached pipeline (used by tests and by `resetLocalEmbedder()`). */
|
|
41
|
+
reset() {
|
|
42
|
+
this.pipelinePromise = undefined;
|
|
43
|
+
this.pipelineModelName = undefined;
|
|
44
|
+
}
|
|
45
|
+
async embed(text) {
|
|
46
|
+
return this.embedWithModel(text, this.defaultModel);
|
|
47
|
+
}
|
|
48
|
+
async embedBatch(texts) {
|
|
49
|
+
if (texts.length === 0)
|
|
50
|
+
return [];
|
|
51
|
+
const results = [];
|
|
52
|
+
for (const text of texts) {
|
|
53
|
+
results.push(await this.embedWithModel(text, this.defaultModel));
|
|
54
|
+
}
|
|
55
|
+
return results;
|
|
56
|
+
}
|
|
57
|
+
/** Embed using a model name override (used by the facade for per-call model overrides). */
|
|
58
|
+
async embedWithModel(text, modelName) {
|
|
59
|
+
const pipeline = await this.getPipeline(modelName);
|
|
60
|
+
const result = await pipeline(text, { pooling: "mean", normalize: true });
|
|
61
|
+
return Array.from(result.data);
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Eagerly load (or return the cached) underlying pipeline. Used by
|
|
65
|
+
* availability checks that want to surface model-download failures
|
|
66
|
+
* without performing a real embed call.
|
|
67
|
+
*/
|
|
68
|
+
async getPipeline(modelName) {
|
|
69
|
+
const resolvedModel = resolveLocalModelName(modelName);
|
|
70
|
+
if (this.pipelinePromise && this.pipelineModelName !== resolvedModel) {
|
|
71
|
+
this.pipelinePromise = undefined;
|
|
72
|
+
this.pipelineModelName = undefined;
|
|
73
|
+
}
|
|
74
|
+
if (!this.pipelinePromise) {
|
|
75
|
+
this.pipelineModelName = resolvedModel;
|
|
76
|
+
this.pipelinePromise = (async () => {
|
|
77
|
+
// Ensure HuggingFace model cache lives in a stable location outside
|
|
78
|
+
// node_modules so it survives package reinstalls.
|
|
79
|
+
if (!process.env.HF_HOME) {
|
|
80
|
+
process.env.HF_HOME = path.join(getCacheDir(), "models");
|
|
81
|
+
}
|
|
82
|
+
let pipeline;
|
|
83
|
+
try {
|
|
84
|
+
const mod = await import("@huggingface/transformers");
|
|
85
|
+
pipeline = mod.pipeline;
|
|
86
|
+
}
|
|
87
|
+
catch (importError) {
|
|
88
|
+
const msg = importError instanceof Error ? importError.message : String(importError);
|
|
89
|
+
if (/Cannot find module|MODULE_NOT_FOUND|Cannot resolve/i.test(msg)) {
|
|
90
|
+
throw new Error("Semantic search requires @huggingface/transformers. Install it with: bun add @huggingface/transformers");
|
|
91
|
+
}
|
|
92
|
+
throw new Error(`Failed to load embedding runtime: ${msg}. Check platform compatibility.`);
|
|
93
|
+
}
|
|
94
|
+
const pipelineFn = pipeline;
|
|
95
|
+
return createLocalPipeline(pipelineFn, resolvedModel);
|
|
96
|
+
})();
|
|
97
|
+
// HI-13: Clear the cached promise on failure so the next call retries
|
|
98
|
+
// instead of permanently rejecting every subsequent call with the same error.
|
|
99
|
+
this.pipelinePromise.catch(() => {
|
|
100
|
+
this.pipelinePromise = undefined;
|
|
101
|
+
this.pipelineModelName = undefined;
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
return this.pipelinePromise;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
async function createLocalPipeline(pipelineFn, modelName) {
|
|
108
|
+
try {
|
|
109
|
+
return await pipelineFn("feature-extraction", modelName, { dtype: LOCAL_EMBEDDER_DTYPE });
|
|
110
|
+
}
|
|
111
|
+
catch (error) {
|
|
112
|
+
if (!shouldRetryWithoutExplicitDtype(error)) {
|
|
113
|
+
throw error;
|
|
114
|
+
}
|
|
115
|
+
warn('Local embedding model "%s" rejected explicit dtype "%s"; retrying with explicit fallback dtype "%s".', modelName, LOCAL_EMBEDDER_DTYPE, LOCAL_EMBEDDER_FALLBACK_DTYPE);
|
|
116
|
+
return pipelineFn("feature-extraction", modelName, { dtype: LOCAL_EMBEDDER_FALLBACK_DTYPE });
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
function shouldRetryWithoutExplicitDtype(error) {
|
|
120
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
121
|
+
return /dtype|fp32|precision|quant/i.test(message);
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Check whether the `@huggingface/transformers` package can be resolved.
|
|
125
|
+
* Uses `Bun.resolveSync` so we never load the module (which would trigger
|
|
126
|
+
* heavy WASM/model side-effects) just to test availability.
|
|
127
|
+
*
|
|
128
|
+
* Falls back to `require.resolve` when `Bun.resolveSync` is unavailable
|
|
129
|
+
* (e.g. running under Node), so the function still works in mixed runtimes.
|
|
130
|
+
*/
|
|
131
|
+
export function isTransformersAvailable() {
|
|
132
|
+
try {
|
|
133
|
+
if (typeof Bun !== "undefined" && typeof Bun.resolveSync === "function") {
|
|
134
|
+
Bun.resolveSync("@huggingface/transformers", import.meta.dir);
|
|
135
|
+
return true;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
catch {
|
|
139
|
+
return false;
|
|
140
|
+
}
|
|
141
|
+
try {
|
|
142
|
+
const req = globalThis.require;
|
|
143
|
+
if (req && typeof req.resolve === "function") {
|
|
144
|
+
req.resolve("@huggingface/transformers");
|
|
145
|
+
return true;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
catch {
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
return false;
|
|
152
|
+
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OpenAI-compatible remote embedder.
|
|
3
|
+
*
|
|
4
|
+
* Calls the configured `/embeddings` endpoint and L2-normalizes the returned
|
|
5
|
+
* vectors so the scoring pipeline's L2-to-cosine conversion is correct.
|
|
6
|
+
*/
|
|
7
|
+
import { fetchWithTimeout, isHttpUrl } from "../../core/common";
|
|
8
|
+
const REMOTE_BATCH_SIZE = 100;
|
|
9
|
+
export class RemoteEmbedder {
|
|
10
|
+
config;
|
|
11
|
+
constructor(config) {
|
|
12
|
+
this.config = config;
|
|
13
|
+
}
|
|
14
|
+
async embed(text) {
|
|
15
|
+
const headers = this.buildHeaders();
|
|
16
|
+
const body = {
|
|
17
|
+
input: text,
|
|
18
|
+
model: this.config.model,
|
|
19
|
+
};
|
|
20
|
+
if (this.config.dimension) {
|
|
21
|
+
body.dimensions = this.config.dimension;
|
|
22
|
+
}
|
|
23
|
+
const response = await fetchWithTimeout(normalizeEmbeddingEndpoint(this.config.endpoint), {
|
|
24
|
+
method: "POST",
|
|
25
|
+
headers,
|
|
26
|
+
body: JSON.stringify(body),
|
|
27
|
+
});
|
|
28
|
+
if (!response.ok) {
|
|
29
|
+
const errBody = await response.text().catch(() => "");
|
|
30
|
+
throw new Error(`Embedding request failed (${response.status}): ${errBody}`);
|
|
31
|
+
}
|
|
32
|
+
const json = (await response.json());
|
|
33
|
+
if (!json.data?.[0]?.embedding) {
|
|
34
|
+
throw new Error(`Unexpected embedding response format: missing data[0].embedding.${embeddingEndpointPathHint(this.config.endpoint)}`);
|
|
35
|
+
}
|
|
36
|
+
return l2Normalize(json.data[0].embedding);
|
|
37
|
+
}
|
|
38
|
+
async embedBatch(texts) {
|
|
39
|
+
if (texts.length === 0)
|
|
40
|
+
return [];
|
|
41
|
+
const results = [];
|
|
42
|
+
const headers = this.buildHeaders();
|
|
43
|
+
for (let i = 0; i < texts.length; i += REMOTE_BATCH_SIZE) {
|
|
44
|
+
const batch = texts.slice(i, i + REMOTE_BATCH_SIZE);
|
|
45
|
+
const body = {
|
|
46
|
+
input: batch,
|
|
47
|
+
model: this.config.model,
|
|
48
|
+
};
|
|
49
|
+
if (this.config.dimension) {
|
|
50
|
+
body.dimensions = this.config.dimension;
|
|
51
|
+
}
|
|
52
|
+
const response = await fetchWithTimeout(normalizeEmbeddingEndpoint(this.config.endpoint), {
|
|
53
|
+
method: "POST",
|
|
54
|
+
headers,
|
|
55
|
+
body: JSON.stringify(body),
|
|
56
|
+
});
|
|
57
|
+
if (!response.ok) {
|
|
58
|
+
const respBody = await response.text().catch(() => "");
|
|
59
|
+
throw new Error(`Embedding batch request failed (${response.status}): ${respBody}`);
|
|
60
|
+
}
|
|
61
|
+
const json = (await response.json());
|
|
62
|
+
if (!json.data || json.data.length !== batch.length) {
|
|
63
|
+
throw new Error(`Unexpected embedding batch response: expected ${batch.length} embeddings, got ${json.data?.length ?? 0}.${embeddingEndpointPathHint(this.config.endpoint)}`);
|
|
64
|
+
}
|
|
65
|
+
// Sort by index to guarantee correct order (OpenAI API doesn't guarantee order)
|
|
66
|
+
const sorted = [...json.data].sort((a, b) => a.index - b.index);
|
|
67
|
+
for (const [idx, d] of sorted.entries()) {
|
|
68
|
+
if (!Array.isArray(d.embedding)) {
|
|
69
|
+
throw new Error(`Unexpected embedding at batch index ${idx}: missing or invalid`);
|
|
70
|
+
}
|
|
71
|
+
results.push(l2Normalize(d.embedding));
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
return results;
|
|
75
|
+
}
|
|
76
|
+
buildHeaders() {
|
|
77
|
+
const headers = { "Content-Type": "application/json" };
|
|
78
|
+
if (this.config.apiKey) {
|
|
79
|
+
headers.Authorization = `Bearer ${this.config.apiKey}`;
|
|
80
|
+
}
|
|
81
|
+
return headers;
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* L2-normalize a vector to unit length.
|
|
86
|
+
* Required for remote embeddings because the scoring pipeline's L2-to-cosine
|
|
87
|
+
* conversion formula (1 - distance^2/2) is only correct for unit vectors.
|
|
88
|
+
* The local embedder already normalizes via `normalize: true`.
|
|
89
|
+
*/
|
|
90
|
+
function l2Normalize(vec) {
|
|
91
|
+
const norm = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0));
|
|
92
|
+
if (norm === 0)
|
|
93
|
+
return vec;
|
|
94
|
+
return vec.map((v) => v / norm);
|
|
95
|
+
}
|
|
96
|
+
export function normalizeEmbeddingEndpoint(endpoint) {
|
|
97
|
+
let parsed;
|
|
98
|
+
try {
|
|
99
|
+
parsed = new URL(endpoint);
|
|
100
|
+
}
|
|
101
|
+
catch {
|
|
102
|
+
return endpoint;
|
|
103
|
+
}
|
|
104
|
+
const normalizedPath = parsed.pathname.replace(/\/+$/, "");
|
|
105
|
+
if (normalizedPath.endsWith("/embeddings")) {
|
|
106
|
+
return parsed.toString();
|
|
107
|
+
}
|
|
108
|
+
parsed.pathname = normalizedPath ? `${normalizedPath}/embeddings` : "/embeddings";
|
|
109
|
+
return parsed.toString();
|
|
110
|
+
}
|
|
111
|
+
function embeddingEndpointPathHint(endpoint) {
|
|
112
|
+
const normalizedEndpoint = normalizeEmbeddingEndpoint(endpoint);
|
|
113
|
+
if (normalizedEndpoint !== endpoint) {
|
|
114
|
+
return ` Check that your endpoint includes the full embeddings path (for example "${normalizedEndpoint}", not just "${endpoint}").`;
|
|
115
|
+
}
|
|
116
|
+
return "";
|
|
117
|
+
}
|
|
118
|
+
/** Check whether an EmbeddingConnectionConfig has a valid remote endpoint. */
|
|
119
|
+
export function hasRemoteEndpoint(config) {
|
|
120
|
+
return isHttpUrl(config.endpoint);
|
|
121
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared embedder types.
|
|
3
|
+
*
|
|
4
|
+
* Pulled out of `embedder.ts` so concrete implementations (`local.ts`,
|
|
5
|
+
* `remote.ts`) and the cache layer can depend on a small, stable types
|
|
6
|
+
* module without dragging in the facade or a sibling implementation.
|
|
7
|
+
*/
|
|
8
|
+
/**
|
|
9
|
+
* Cosine similarity between two embedding vectors.
|
|
10
|
+
*
|
|
11
|
+
* Lives next to {@link EmbeddingVector} so importers (notably `db.ts`)
|
|
12
|
+
* can pull just the math without dragging in the embedder facade and its
|
|
13
|
+
* transitive `@huggingface/transformers` import chain.
|
|
14
|
+
*
|
|
15
|
+
* Returns 0 when the vectors have different dimensions — silently
|
|
16
|
+
* computing on a truncated view would produce meaningless scores.
|
|
17
|
+
*/
|
|
18
|
+
export function cosineSimilarity(a, b) {
|
|
19
|
+
if (a.length !== b.length) {
|
|
20
|
+
warn("cosineSimilarity: vector dimension mismatch (%d vs %d) — re-index recommended", a.length, b.length);
|
|
21
|
+
return 0;
|
|
22
|
+
}
|
|
23
|
+
const len = a.length;
|
|
24
|
+
if (len === 0)
|
|
25
|
+
return 0;
|
|
26
|
+
let dot = 0;
|
|
27
|
+
let magA = 0;
|
|
28
|
+
let magB = 0;
|
|
29
|
+
for (let i = 0; i < len; i++) {
|
|
30
|
+
dot += a[i] * b[i];
|
|
31
|
+
magA += a[i] * a[i];
|
|
32
|
+
magB += b[i] * b[i];
|
|
33
|
+
}
|
|
34
|
+
const denom = Math.sqrt(magA) * Math.sqrt(magB);
|
|
35
|
+
return denom === 0 ? 0 : dot / denom;
|
|
36
|
+
}
|
|
37
|
+
// Imported lazily to keep this types module dependency-free where possible;
|
|
38
|
+
// `warn` is a thin printf wrapper so the cost is negligible.
|
|
39
|
+
import { warn } from "../../core/warn";
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LLM-driven metadata enhancement for stash entries.
|
|
3
|
+
*
|
|
4
|
+
* Split out of `llm.ts` so the higher-level workflow (prompting the LLM to
|
|
5
|
+
* improve descriptions/tags/searchHints) lives separately from the low-level
|
|
6
|
+
* transport client in `client.ts`.
|
|
7
|
+
*/
|
|
8
|
+
import { chatCompletion, parseJsonResponse } from "./client";
|
|
9
|
+
const SYSTEM_PROMPT = `You are a metadata generator for a developer asset registry. Given a script/skill/command/agent entry, generate improved metadata. Respond with ONLY valid JSON, no markdown fencing.`;
|
|
10
|
+
/**
|
|
11
|
+
* Use an LLM to enhance a stash entry's metadata: improve description,
|
|
12
|
+
* generate searchHints, and suggest tags.
|
|
13
|
+
*/
|
|
14
|
+
export async function enhanceMetadata(config, entry, fileContent) {
|
|
15
|
+
const contextParts = [`Name: ${entry.name}`, `Type: ${entry.type}`];
|
|
16
|
+
if (entry.description)
|
|
17
|
+
contextParts.push(`Current description: ${entry.description}`);
|
|
18
|
+
if (entry.tags?.length)
|
|
19
|
+
contextParts.push(`Current tags: ${entry.tags.join(", ")}`);
|
|
20
|
+
if (fileContent) {
|
|
21
|
+
// Limit content to first 2000 chars to stay within token limits
|
|
22
|
+
const truncated = fileContent.length > 2000 ? `${fileContent.slice(0, 2000)}\n... (truncated)` : fileContent;
|
|
23
|
+
contextParts.push(`File content:\n${truncated}`);
|
|
24
|
+
}
|
|
25
|
+
const userPrompt = `${contextParts.join("\n")}
|
|
26
|
+
|
|
27
|
+
Generate improved metadata for this ${entry.type}. Return JSON with these fields:
|
|
28
|
+
- "description": a clear, concise one-sentence description of what this does
|
|
29
|
+
- "searchHints": an array of 3-6 natural language task phrases an agent might use to find this (e.g. "deploy a docker container", "run database migrations")
|
|
30
|
+
- "tags": an array of 3-8 relevant keyword tags
|
|
31
|
+
|
|
32
|
+
Return ONLY the JSON object, no explanation.`;
|
|
33
|
+
const raw = await chatCompletion(config, [
|
|
34
|
+
{ role: "system", content: SYSTEM_PROMPT },
|
|
35
|
+
{ role: "user", content: userPrompt },
|
|
36
|
+
]);
|
|
37
|
+
const parsed = parseJsonResponse(raw);
|
|
38
|
+
if (!parsed)
|
|
39
|
+
return {};
|
|
40
|
+
const result = {};
|
|
41
|
+
if (typeof parsed.description === "string" && parsed.description) {
|
|
42
|
+
result.description = parsed.description;
|
|
43
|
+
}
|
|
44
|
+
if (Array.isArray(parsed.searchHints)) {
|
|
45
|
+
result.searchHints = parsed.searchHints
|
|
46
|
+
.filter((s) => typeof s === "string" && s.trim().length > 0)
|
|
47
|
+
.slice(0, 8);
|
|
48
|
+
}
|
|
49
|
+
if (Array.isArray(parsed.tags)) {
|
|
50
|
+
result.tags = parsed.tags.filter((s) => typeof s === "string" && s.trim().length > 0).slice(0, 10);
|
|
51
|
+
}
|
|
52
|
+
return result;
|
|
53
|
+
}
|