akm-cli 0.5.0 → 0.6.0-rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +32 -5
- package/dist/asset-registry.js +29 -5
- package/dist/asset-spec.js +12 -5
- package/dist/cli-hints.js +300 -0
- package/dist/cli.js +218 -1357
- package/dist/common.js +147 -50
- package/dist/config.js +224 -13
- package/dist/create-provider-registry.js +1 -1
- package/dist/curate.js +258 -0
- package/dist/{local-search.js → db-search.js} +30 -19
- package/dist/db.js +168 -62
- package/dist/embedder.js +49 -273
- package/dist/embedders/cache.js +47 -0
- package/dist/embedders/local.js +152 -0
- package/dist/embedders/remote.js +121 -0
- package/dist/embedders/types.js +39 -0
- package/dist/errors.js +14 -3
- package/dist/frontmatter.js +61 -7
- package/dist/indexer.js +38 -7
- package/dist/info.js +2 -2
- package/dist/install-audit.js +16 -1
- package/dist/{installed-kits.js → installed-stashes.js} +48 -22
- package/dist/llm-client.js +92 -0
- package/dist/llm.js +14 -126
- package/dist/lockfile.js +28 -1
- package/dist/matchers.js +1 -1
- package/dist/metadata-enhance.js +53 -0
- package/dist/migration-help.js +75 -44
- package/dist/output-context.js +77 -0
- package/dist/output-shapes.js +198 -0
- package/dist/output-text.js +520 -0
- package/dist/paths.js +4 -4
- package/dist/providers/index.js +11 -0
- package/dist/providers/skills-sh.js +1 -1
- package/dist/providers/static-index.js +47 -45
- package/dist/registry-build-index.js +36 -29
- package/dist/registry-factory.js +2 -2
- package/dist/registry-resolve.js +8 -4
- package/dist/registry-search.js +62 -5
- package/dist/remember.js +172 -0
- package/dist/renderers.js +52 -0
- package/dist/search-source.js +73 -42
- package/dist/setup-steps.js +45 -0
- package/dist/setup.js +149 -76
- package/dist/stash-add.js +94 -38
- package/dist/stash-clone.js +4 -4
- package/dist/stash-provider-factory.js +2 -2
- package/dist/stash-provider.js +3 -1
- package/dist/stash-providers/filesystem.js +31 -1
- package/dist/stash-providers/git.js +209 -8
- package/dist/stash-providers/index.js +1 -0
- package/dist/stash-providers/npm.js +159 -0
- package/dist/stash-providers/provider-utils.js +162 -0
- package/dist/stash-providers/sync-from-ref.js +45 -0
- package/dist/stash-providers/tar-utils.js +151 -0
- package/dist/stash-providers/website.js +80 -4
- package/dist/stash-resolve.js +5 -5
- package/dist/stash-search.js +4 -4
- package/dist/stash-show.js +3 -3
- package/dist/wiki.js +6 -6
- package/dist/workflow-authoring.js +12 -4
- package/dist/workflow-markdown.js +9 -0
- package/dist/workflow-runs.js +12 -2
- package/docs/README.md +30 -0
- package/docs/migration/release-notes/0.0.13.md +4 -0
- package/docs/migration/release-notes/0.1.0.md +6 -0
- package/docs/migration/release-notes/0.2.0.md +6 -0
- package/docs/migration/release-notes/0.3.0.md +5 -0
- package/docs/migration/release-notes/0.5.0.md +6 -0
- package/docs/migration/release-notes/0.6.0.md +29 -0
- package/docs/migration/release-notes/README.md +21 -0
- package/package.json +3 -2
- package/dist/registry-install.js +0 -532
- /package/dist/{kit-include.js → stash-include.js} +0 -0
package/dist/embedder.js
CHANGED
|
@@ -1,184 +1,41 @@
|
|
|
1
|
-
import path from "node:path";
|
|
2
|
-
import { fetchWithTimeout, isHttpUrl } from "./common";
|
|
3
|
-
import { getCacheDir } from "./paths";
|
|
4
|
-
import { warn } from "./warn";
|
|
5
|
-
// ── Default local model ─────────────────────────────────────────────────────
|
|
6
1
|
/**
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
2
|
+
* Backward-compatible facade for the embedder module.
|
|
3
|
+
*
|
|
4
|
+
* The implementation has been split into:
|
|
5
|
+
* - `./embedders/types` — `EmbeddingVector`, `Embedder`, `EmbeddingCheckResult`
|
|
6
|
+
* - `./embedders/local` — `LocalEmbedder`, `DEFAULT_LOCAL_MODEL`,
|
|
7
|
+
* `isTransformersAvailable`
|
|
8
|
+
* - `./embedders/remote` — `RemoteEmbedder`, `hasRemoteEndpoint`
|
|
9
|
+
* - `./embedders/cache` — LRU `embedCache`, `clearEmbeddingCache`,
|
|
10
|
+
* `embedCacheKey`
|
|
11
|
+
*
|
|
12
|
+
* This module wires them together: it picks the right implementation from the
|
|
13
|
+
* (optional) embedding config, applies the cache layer, and re-exports the
|
|
14
|
+
* existing public API so call sites (`db-search.ts`, `indexer.ts`, `db.ts`,
|
|
15
|
+
* `setup.ts`, `semantic-status.ts`, tests) keep working unmodified.
|
|
16
|
+
*
|
|
17
|
+
* Tests can construct fresh `LocalEmbedder` / `RemoteEmbedder` instances
|
|
18
|
+
* directly from their submodules to avoid module-level state pollution.
|
|
10
19
|
*/
|
|
11
|
-
|
|
20
|
+
import { embedCacheKey, getCachedEmbedding, setCachedEmbedding } from "./embedders/cache";
|
|
21
|
+
import { isTransformersAvailable, LocalEmbedder } from "./embedders/local";
|
|
22
|
+
import { hasRemoteEndpoint, RemoteEmbedder } from "./embedders/remote";
|
|
23
|
+
// ── Re-exports (public API) ─────────────────────────────────────────────────
|
|
24
|
+
export { clearEmbeddingCache } from "./embedders/cache";
|
|
25
|
+
export { DEFAULT_LOCAL_MODEL, isTransformersAvailable } from "./embedders/local";
|
|
26
|
+
// ── Singleton local embedder ────────────────────────────────────────────────
|
|
27
|
+
// `localEmbedder` is an intentional module-level singleton. The underlying
|
|
28
|
+
// @huggingface/transformers pipeline is expensive to initialise (model download
|
|
29
|
+
// + WASM compilation) and is safe to share across calls because it is
|
|
30
|
+
// stateless once created. Storing it here avoids re-initialising on every
|
|
31
|
+
// embed() call.
|
|
32
|
+
const localEmbedder = new LocalEmbedder();
|
|
12
33
|
/**
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
* the default model is returned.
|
|
34
|
+
* Reset the cached local embedder pipeline. Used by tests that want a fresh
|
|
35
|
+
* pipeline construction (e.g. to assert the dtype-fallback retry logic).
|
|
16
36
|
*/
|
|
17
|
-
function getLocalModelName(overrideModel) {
|
|
18
|
-
return overrideModel || DEFAULT_LOCAL_MODEL;
|
|
19
|
-
}
|
|
20
|
-
const LOCAL_EMBEDDER_DTYPE = "fp32";
|
|
21
|
-
const LOCAL_EMBEDDER_FALLBACK_DTYPE = "auto";
|
|
22
|
-
// Cache the promise itself (not the resolved result) so concurrent calls share
|
|
23
|
-
// the same initialisation work and never download the model twice.
|
|
24
|
-
// The cache is keyed by model name so switching models gets a fresh pipeline.
|
|
25
|
-
let localEmbedderPromise;
|
|
26
|
-
let localEmbedderModelName;
|
|
27
|
-
async function getLocalEmbedder(modelName) {
|
|
28
|
-
const resolvedModel = getLocalModelName(modelName);
|
|
29
|
-
// If the cached pipeline was created for a different model, discard it.
|
|
30
|
-
if (localEmbedderPromise && localEmbedderModelName !== resolvedModel) {
|
|
31
|
-
localEmbedderPromise = undefined;
|
|
32
|
-
localEmbedderModelName = undefined;
|
|
33
|
-
}
|
|
34
|
-
if (!localEmbedderPromise) {
|
|
35
|
-
localEmbedderModelName = resolvedModel;
|
|
36
|
-
localEmbedderPromise = (async () => {
|
|
37
|
-
// Ensure HuggingFace model cache lives in a stable location outside
|
|
38
|
-
// node_modules so it survives package reinstalls.
|
|
39
|
-
if (!process.env.HF_HOME) {
|
|
40
|
-
process.env.HF_HOME = path.join(getCacheDir(), "models");
|
|
41
|
-
}
|
|
42
|
-
let pipeline;
|
|
43
|
-
try {
|
|
44
|
-
const mod = await import("@huggingface/transformers");
|
|
45
|
-
pipeline = mod.pipeline;
|
|
46
|
-
}
|
|
47
|
-
catch (importError) {
|
|
48
|
-
const msg = importError instanceof Error ? importError.message : String(importError);
|
|
49
|
-
if (/Cannot find module|MODULE_NOT_FOUND|Cannot resolve/i.test(msg)) {
|
|
50
|
-
throw new Error("Semantic search requires @huggingface/transformers. Install it with: bun add @huggingface/transformers");
|
|
51
|
-
}
|
|
52
|
-
throw new Error(`Failed to load embedding runtime: ${msg}. Check platform compatibility.`);
|
|
53
|
-
}
|
|
54
|
-
const pipelineFn = pipeline;
|
|
55
|
-
return createLocalPipeline(pipelineFn, resolvedModel);
|
|
56
|
-
})();
|
|
57
|
-
// HI-13: Clear the cached promise on failure so the next call retries
|
|
58
|
-
// instead of permanently rejecting every subsequent call with the same error.
|
|
59
|
-
localEmbedderPromise.catch(() => {
|
|
60
|
-
localEmbedderPromise = undefined;
|
|
61
|
-
localEmbedderModelName = undefined;
|
|
62
|
-
});
|
|
63
|
-
}
|
|
64
|
-
return localEmbedderPromise;
|
|
65
|
-
}
|
|
66
|
-
async function createLocalPipeline(pipelineFn, modelName) {
|
|
67
|
-
try {
|
|
68
|
-
return await pipelineFn("feature-extraction", modelName, { dtype: LOCAL_EMBEDDER_DTYPE });
|
|
69
|
-
}
|
|
70
|
-
catch (error) {
|
|
71
|
-
if (!shouldRetryWithoutExplicitDtype(error)) {
|
|
72
|
-
throw error;
|
|
73
|
-
}
|
|
74
|
-
warn('Local embedding model "%s" rejected explicit dtype "%s"; retrying with explicit fallback dtype "%s".', modelName, LOCAL_EMBEDDER_DTYPE, LOCAL_EMBEDDER_FALLBACK_DTYPE);
|
|
75
|
-
return pipelineFn("feature-extraction", modelName, { dtype: LOCAL_EMBEDDER_FALLBACK_DTYPE });
|
|
76
|
-
}
|
|
77
|
-
}
|
|
78
|
-
function shouldRetryWithoutExplicitDtype(error) {
|
|
79
|
-
const message = error instanceof Error ? error.message : String(error);
|
|
80
|
-
return /dtype|fp32|precision|quant/i.test(message);
|
|
81
|
-
}
|
|
82
37
|
export function resetLocalEmbedder() {
|
|
83
|
-
|
|
84
|
-
localEmbedderModelName = undefined;
|
|
85
|
-
}
|
|
86
|
-
async function embedLocal(text, modelName) {
|
|
87
|
-
const model = await getLocalEmbedder(modelName);
|
|
88
|
-
const result = await model(text, { pooling: "mean", normalize: true });
|
|
89
|
-
return Array.from(result.data);
|
|
90
|
-
}
|
|
91
|
-
// ── Vector normalization ─────────────────────────────────────────────────────
|
|
92
|
-
/**
|
|
93
|
-
* L2-normalize a vector to unit length.
|
|
94
|
-
* Required for remote embeddings because the scoring pipeline's L2-to-cosine
|
|
95
|
-
* conversion formula (1 - distance^2/2) is only correct for unit vectors.
|
|
96
|
-
* The local embedder already normalizes via `normalize: true`.
|
|
97
|
-
*/
|
|
98
|
-
function l2Normalize(vec) {
|
|
99
|
-
const norm = Math.sqrt(vec.reduce((sum, v) => sum + v * v, 0));
|
|
100
|
-
if (norm === 0)
|
|
101
|
-
return vec;
|
|
102
|
-
return vec.map((v) => v / norm);
|
|
103
|
-
}
|
|
104
|
-
// ── OpenAI-compatible remote embedder ───────────────────────────────────────
|
|
105
|
-
function normalizeEmbeddingEndpoint(endpoint) {
|
|
106
|
-
let parsed;
|
|
107
|
-
try {
|
|
108
|
-
parsed = new URL(endpoint);
|
|
109
|
-
}
|
|
110
|
-
catch {
|
|
111
|
-
return endpoint;
|
|
112
|
-
}
|
|
113
|
-
const normalizedPath = parsed.pathname.replace(/\/+$/, "");
|
|
114
|
-
if (normalizedPath.endsWith("/embeddings")) {
|
|
115
|
-
return parsed.toString();
|
|
116
|
-
}
|
|
117
|
-
parsed.pathname = normalizedPath ? `${normalizedPath}/embeddings` : "/embeddings";
|
|
118
|
-
return parsed.toString();
|
|
119
|
-
}
|
|
120
|
-
function embeddingEndpointPathHint(endpoint) {
|
|
121
|
-
const normalizedEndpoint = normalizeEmbeddingEndpoint(endpoint);
|
|
122
|
-
if (normalizedEndpoint !== endpoint) {
|
|
123
|
-
return ` Check that your endpoint includes the full embeddings path (for example "${normalizedEndpoint}", not just "${endpoint}").`;
|
|
124
|
-
}
|
|
125
|
-
return "";
|
|
126
|
-
}
|
|
127
|
-
async function embedRemote(text, config) {
|
|
128
|
-
const headers = { "Content-Type": "application/json" };
|
|
129
|
-
if (config.apiKey) {
|
|
130
|
-
headers.Authorization = `Bearer ${config.apiKey}`;
|
|
131
|
-
}
|
|
132
|
-
const body = {
|
|
133
|
-
input: text,
|
|
134
|
-
model: config.model,
|
|
135
|
-
};
|
|
136
|
-
if (config.dimension) {
|
|
137
|
-
body.dimensions = config.dimension;
|
|
138
|
-
}
|
|
139
|
-
const response = await fetchWithTimeout(normalizeEmbeddingEndpoint(config.endpoint), {
|
|
140
|
-
method: "POST",
|
|
141
|
-
headers,
|
|
142
|
-
body: JSON.stringify(body),
|
|
143
|
-
});
|
|
144
|
-
if (!response.ok) {
|
|
145
|
-
const body = await response.text().catch(() => "");
|
|
146
|
-
throw new Error(`Embedding request failed (${response.status}): ${body}`);
|
|
147
|
-
}
|
|
148
|
-
const json = (await response.json());
|
|
149
|
-
if (!json.data?.[0]?.embedding) {
|
|
150
|
-
throw new Error(`Unexpected embedding response format: missing data[0].embedding.${embeddingEndpointPathHint(config.endpoint)}`);
|
|
151
|
-
}
|
|
152
|
-
return l2Normalize(json.data[0].embedding);
|
|
153
|
-
}
|
|
154
|
-
// ── Helpers ──────────────────────────────────────────────────────────────────
|
|
155
|
-
/** Check whether an EmbeddingConnectionConfig has a valid remote endpoint. */
|
|
156
|
-
function hasRemoteEndpoint(config) {
|
|
157
|
-
return isHttpUrl(config.endpoint);
|
|
158
|
-
}
|
|
159
|
-
// ── LRU embedding cache ─────────────────────────────────────────────────────
|
|
160
|
-
// Caches query embeddings to avoid redundant computation for repeated queries.
|
|
161
|
-
// Uses a simple Map with LRU eviction (delete + re-insert to move to end).
|
|
162
|
-
const EMBED_CACHE_MAX = 100;
|
|
163
|
-
const embedCache = new Map();
|
|
164
|
-
/**
|
|
165
|
-
* Build a cache key from query text and optional config.
|
|
166
|
-
* Different endpoints/models should not share cached embeddings.
|
|
167
|
-
* apiKey deliberately excluded: same endpoint+model produce identical embeddings regardless of auth
|
|
168
|
-
*/
|
|
169
|
-
function embedCacheKey(text, config) {
|
|
170
|
-
if (!config)
|
|
171
|
-
return `local::${text}`;
|
|
172
|
-
const endpoint = config.endpoint || "";
|
|
173
|
-
const model = config.model || config.localModel || "";
|
|
174
|
-
return `${endpoint}:${model}:${text}`;
|
|
175
|
-
}
|
|
176
|
-
/**
|
|
177
|
-
* Clear the embedding cache. Call when the embedding model changes
|
|
178
|
-
* or when you want to force fresh embeddings.
|
|
179
|
-
*/
|
|
180
|
-
export function clearEmbeddingCache() {
|
|
181
|
-
embedCache.clear();
|
|
38
|
+
localEmbedder.reset();
|
|
182
39
|
}
|
|
183
40
|
// ── Public API ──────────────────────────────────────────────────────────────
|
|
184
41
|
/**
|
|
@@ -192,129 +49,48 @@ export function clearEmbeddingCache() {
|
|
|
192
49
|
*/
|
|
193
50
|
export async function embed(text, embeddingConfig) {
|
|
194
51
|
const key = embedCacheKey(text, embeddingConfig);
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
if (cached) {
|
|
198
|
-
// Move to end (most recently used) for LRU ordering
|
|
199
|
-
embedCache.delete(key);
|
|
200
|
-
embedCache.set(key, cached);
|
|
52
|
+
const cached = getCachedEmbedding(key);
|
|
53
|
+
if (cached)
|
|
201
54
|
return cached;
|
|
202
|
-
}
|
|
203
|
-
// Compute the embedding
|
|
204
55
|
const result = embeddingConfig && hasRemoteEndpoint(embeddingConfig)
|
|
205
|
-
? await
|
|
206
|
-
: await
|
|
207
|
-
|
|
208
|
-
if (embedCache.size >= EMBED_CACHE_MAX) {
|
|
209
|
-
const oldest = embedCache.keys().next().value;
|
|
210
|
-
if (oldest !== undefined) {
|
|
211
|
-
embedCache.delete(oldest);
|
|
212
|
-
}
|
|
213
|
-
}
|
|
214
|
-
embedCache.set(key, result);
|
|
56
|
+
? await new RemoteEmbedder(embeddingConfig).embed(text)
|
|
57
|
+
: await localEmbedder.embedWithModel(text, embeddingConfig?.localModel);
|
|
58
|
+
setCachedEmbedding(key, result);
|
|
215
59
|
return result;
|
|
216
60
|
}
|
|
217
|
-
// ── Batch embedding ─────────────────────────────────────────────────────────
|
|
218
61
|
/**
|
|
219
62
|
* Generate embeddings for multiple texts in batch.
|
|
220
63
|
* Uses the OpenAI-compatible batch API for remote endpoints (batches of 100).
|
|
221
|
-
* Falls back to sequential embedding for local transformer pipeline.
|
|
64
|
+
* Falls back to sequential embedding for the local transformer pipeline.
|
|
222
65
|
*/
|
|
223
66
|
export async function embedBatch(texts, embeddingConfig) {
|
|
224
67
|
if (texts.length === 0)
|
|
225
68
|
return [];
|
|
226
69
|
if (embeddingConfig && hasRemoteEndpoint(embeddingConfig)) {
|
|
227
|
-
return
|
|
70
|
+
return new RemoteEmbedder(embeddingConfig).embedBatch(texts);
|
|
228
71
|
}
|
|
229
72
|
// Local transformer: process sequentially (pipeline handles one at a time)
|
|
230
73
|
const localModel = embeddingConfig?.localModel;
|
|
231
74
|
const results = [];
|
|
232
75
|
for (const text of texts) {
|
|
233
|
-
results.push(await
|
|
234
|
-
}
|
|
235
|
-
return results;
|
|
236
|
-
}
|
|
237
|
-
async function embedRemoteBatch(texts, config) {
|
|
238
|
-
const BATCH_SIZE = 100;
|
|
239
|
-
const results = [];
|
|
240
|
-
const headers = { "Content-Type": "application/json" };
|
|
241
|
-
if (config.apiKey) {
|
|
242
|
-
headers.Authorization = `Bearer ${config.apiKey}`;
|
|
243
|
-
}
|
|
244
|
-
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
245
|
-
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
246
|
-
const body = {
|
|
247
|
-
input: batch,
|
|
248
|
-
model: config.model,
|
|
249
|
-
};
|
|
250
|
-
if (config.dimension) {
|
|
251
|
-
body.dimensions = config.dimension;
|
|
252
|
-
}
|
|
253
|
-
const response = await fetchWithTimeout(normalizeEmbeddingEndpoint(config.endpoint), {
|
|
254
|
-
method: "POST",
|
|
255
|
-
headers,
|
|
256
|
-
body: JSON.stringify(body),
|
|
257
|
-
});
|
|
258
|
-
if (!response.ok) {
|
|
259
|
-
const respBody = await response.text().catch(() => "");
|
|
260
|
-
throw new Error(`Embedding batch request failed (${response.status}): ${respBody}`);
|
|
261
|
-
}
|
|
262
|
-
const json = (await response.json());
|
|
263
|
-
if (!json.data || json.data.length !== batch.length) {
|
|
264
|
-
throw new Error(`Unexpected embedding batch response: expected ${batch.length} embeddings, got ${json.data?.length ?? 0}.${embeddingEndpointPathHint(config.endpoint)}`);
|
|
265
|
-
}
|
|
266
|
-
// Sort by index to guarantee correct order (OpenAI API doesn't guarantee order)
|
|
267
|
-
const sorted = [...json.data].sort((a, b) => a.index - b.index);
|
|
268
|
-
for (const [idx, d] of sorted.entries()) {
|
|
269
|
-
if (!Array.isArray(d.embedding)) {
|
|
270
|
-
throw new Error(`Unexpected embedding at batch index ${idx}: missing or invalid`);
|
|
271
|
-
}
|
|
272
|
-
results.push(l2Normalize(d.embedding));
|
|
273
|
-
}
|
|
76
|
+
results.push(await localEmbedder.embedWithModel(text, localModel));
|
|
274
77
|
}
|
|
275
78
|
return results;
|
|
276
79
|
}
|
|
277
80
|
// ── Similarity ──────────────────────────────────────────────────────────────
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
return 0;
|
|
284
|
-
}
|
|
285
|
-
const len = a.length;
|
|
286
|
-
if (len === 0)
|
|
287
|
-
return 0;
|
|
288
|
-
let dot = 0, magA = 0, magB = 0;
|
|
289
|
-
for (let i = 0; i < len; i++) {
|
|
290
|
-
dot += a[i] * b[i];
|
|
291
|
-
magA += a[i] * a[i];
|
|
292
|
-
magB += b[i] * b[i];
|
|
293
|
-
}
|
|
294
|
-
const denom = Math.sqrt(magA) * Math.sqrt(magB);
|
|
295
|
-
return denom === 0 ? 0 : dot / denom;
|
|
296
|
-
}
|
|
81
|
+
// `cosineSimilarity` was moved to `./embedders/types.ts` so importers
|
|
82
|
+
// (notably `db.ts`) can pull the math function without dragging in this
|
|
83
|
+
// facade and its `@huggingface/transformers` import chain. Re-export
|
|
84
|
+
// preserves the existing public API.
|
|
85
|
+
export { cosineSimilarity } from "./embedders/types";
|
|
297
86
|
// ── Availability check ──────────────────────────────────────────────────────
|
|
298
|
-
/**
|
|
299
|
-
* Check whether the `@huggingface/transformers` package can be imported.
|
|
300
|
-
* Returns `true` if it can, `false` otherwise.
|
|
301
|
-
*/
|
|
302
|
-
export async function isTransformersAvailable() {
|
|
303
|
-
try {
|
|
304
|
-
await import("@huggingface/transformers");
|
|
305
|
-
return true;
|
|
306
|
-
}
|
|
307
|
-
catch {
|
|
308
|
-
return false;
|
|
309
|
-
}
|
|
310
|
-
}
|
|
311
87
|
/**
|
|
312
88
|
* Check whether embedding is available with a detailed reason on failure.
|
|
313
89
|
*/
|
|
314
90
|
export async function checkEmbeddingAvailability(embeddingConfig) {
|
|
315
91
|
if (embeddingConfig && hasRemoteEndpoint(embeddingConfig)) {
|
|
316
92
|
try {
|
|
317
|
-
await
|
|
93
|
+
await new RemoteEmbedder(embeddingConfig).embed("test");
|
|
318
94
|
return { available: true };
|
|
319
95
|
}
|
|
320
96
|
catch (err) {
|
|
@@ -326,7 +102,7 @@ export async function checkEmbeddingAvailability(embeddingConfig) {
|
|
|
326
102
|
}
|
|
327
103
|
}
|
|
328
104
|
// Check if the package is importable before attempting the model download.
|
|
329
|
-
if (!
|
|
105
|
+
if (!isTransformersAvailable()) {
|
|
330
106
|
return {
|
|
331
107
|
available: false,
|
|
332
108
|
reason: "missing-package",
|
|
@@ -334,7 +110,7 @@ export async function checkEmbeddingAvailability(embeddingConfig) {
|
|
|
334
110
|
};
|
|
335
111
|
}
|
|
336
112
|
try {
|
|
337
|
-
await
|
|
113
|
+
await localEmbedder.getPipeline(embeddingConfig?.localModel);
|
|
338
114
|
return { available: true };
|
|
339
115
|
}
|
|
340
116
|
catch (err) {
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* LRU embedding cache shared by the embedder facade.
|
|
3
|
+
*
|
|
4
|
+
* Caches query embeddings to avoid redundant computation for repeated
|
|
5
|
+
* queries. Uses a simple Map with LRU eviction (delete + re-insert to move
|
|
6
|
+
* an entry to the most-recently-used end).
|
|
7
|
+
*/
|
|
8
|
+
const EMBED_CACHE_MAX = 100;
|
|
9
|
+
const embedCache = new Map();
|
|
10
|
+
/**
|
|
11
|
+
* Build a cache key from query text and optional config.
|
|
12
|
+
* Different endpoints/models should not share cached embeddings.
|
|
13
|
+
* apiKey deliberately excluded: same endpoint+model produce identical embeddings regardless of auth.
|
|
14
|
+
*/
|
|
15
|
+
export function embedCacheKey(text, config) {
|
|
16
|
+
if (!config)
|
|
17
|
+
return `local::${text}`;
|
|
18
|
+
const endpoint = config.endpoint || "";
|
|
19
|
+
const model = config.model || config.localModel || "";
|
|
20
|
+
return `${endpoint}:${model}:${text}`;
|
|
21
|
+
}
|
|
22
|
+
export function getCachedEmbedding(key) {
|
|
23
|
+
const cached = embedCache.get(key);
|
|
24
|
+
if (cached === undefined)
|
|
25
|
+
return undefined;
|
|
26
|
+
// Move to end (most recently used) for LRU ordering
|
|
27
|
+
embedCache.delete(key);
|
|
28
|
+
embedCache.set(key, cached);
|
|
29
|
+
return cached;
|
|
30
|
+
}
|
|
31
|
+
export function setCachedEmbedding(key, value) {
|
|
32
|
+
// Evict oldest entry if at capacity
|
|
33
|
+
if (embedCache.size >= EMBED_CACHE_MAX) {
|
|
34
|
+
const oldest = embedCache.keys().next().value;
|
|
35
|
+
if (oldest !== undefined) {
|
|
36
|
+
embedCache.delete(oldest);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
embedCache.set(key, value);
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Clear the embedding cache. Call when the embedding model changes
|
|
43
|
+
* or when you want to force fresh embeddings.
|
|
44
|
+
*/
|
|
45
|
+
export function clearEmbeddingCache() {
|
|
46
|
+
embedCache.clear();
|
|
47
|
+
}
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Local @huggingface/transformers embedder.
|
|
3
|
+
*
|
|
4
|
+
* Encapsulates the transformer pipeline lifecycle as instance state on a
|
|
5
|
+
* `LocalEmbedder` so tests can construct fresh instances without leaking
|
|
6
|
+
* pipelines across tests. The facade in `../embedder.ts` keeps a single
|
|
7
|
+
* shared instance for the production code path.
|
|
8
|
+
*/
|
|
9
|
+
import path from "node:path";
|
|
10
|
+
import { getCacheDir } from "../paths";
|
|
11
|
+
import { warn } from "../warn";
|
|
12
|
+
/**
|
|
13
|
+
* Default local transformer model for embeddings.
|
|
14
|
+
* `bge-small-en-v1.5` scores higher on MTEB benchmarks than the previous
|
|
15
|
+
* `all-MiniLM-L6-v2` at the same 384-dimension footprint.
|
|
16
|
+
*/
|
|
17
|
+
export const DEFAULT_LOCAL_MODEL = "Xenova/bge-small-en-v1.5";
|
|
18
|
+
const LOCAL_EMBEDDER_DTYPE = "fp32";
|
|
19
|
+
const LOCAL_EMBEDDER_FALLBACK_DTYPE = "auto";
|
|
20
|
+
/**
|
|
21
|
+
* Return the local model name that will be used for embedding.
|
|
22
|
+
* When `overrideModel` is provided it takes precedence; otherwise
|
|
23
|
+
* the default model is returned.
|
|
24
|
+
*/
|
|
25
|
+
function resolveLocalModelName(overrideModel) {
|
|
26
|
+
return overrideModel || DEFAULT_LOCAL_MODEL;
|
|
27
|
+
}
|
|
28
|
+
export class LocalEmbedder {
|
|
29
|
+
defaultModel;
|
|
30
|
+
/**
|
|
31
|
+
* Cache the *promise* (not the resolved result) so concurrent calls share
|
|
32
|
+
* the same initialisation work and never download the model twice. Keyed
|
|
33
|
+
* by model name so switching models gets a fresh pipeline.
|
|
34
|
+
*/
|
|
35
|
+
pipelinePromise;
|
|
36
|
+
pipelineModelName;
|
|
37
|
+
constructor(defaultModel) {
|
|
38
|
+
this.defaultModel = defaultModel;
|
|
39
|
+
}
|
|
40
|
+
/** Reset the cached pipeline (used by tests and by `resetLocalEmbedder()`). */
|
|
41
|
+
reset() {
|
|
42
|
+
this.pipelinePromise = undefined;
|
|
43
|
+
this.pipelineModelName = undefined;
|
|
44
|
+
}
|
|
45
|
+
async embed(text) {
|
|
46
|
+
return this.embedWithModel(text, this.defaultModel);
|
|
47
|
+
}
|
|
48
|
+
async embedBatch(texts) {
|
|
49
|
+
if (texts.length === 0)
|
|
50
|
+
return [];
|
|
51
|
+
const results = [];
|
|
52
|
+
for (const text of texts) {
|
|
53
|
+
results.push(await this.embedWithModel(text, this.defaultModel));
|
|
54
|
+
}
|
|
55
|
+
return results;
|
|
56
|
+
}
|
|
57
|
+
/** Embed using a model name override (used by the facade for per-call model overrides). */
|
|
58
|
+
async embedWithModel(text, modelName) {
|
|
59
|
+
const pipeline = await this.getPipeline(modelName);
|
|
60
|
+
const result = await pipeline(text, { pooling: "mean", normalize: true });
|
|
61
|
+
return Array.from(result.data);
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Eagerly load (or return the cached) underlying pipeline. Used by
|
|
65
|
+
* availability checks that want to surface model-download failures
|
|
66
|
+
* without performing a real embed call.
|
|
67
|
+
*/
|
|
68
|
+
async getPipeline(modelName) {
|
|
69
|
+
const resolvedModel = resolveLocalModelName(modelName);
|
|
70
|
+
if (this.pipelinePromise && this.pipelineModelName !== resolvedModel) {
|
|
71
|
+
this.pipelinePromise = undefined;
|
|
72
|
+
this.pipelineModelName = undefined;
|
|
73
|
+
}
|
|
74
|
+
if (!this.pipelinePromise) {
|
|
75
|
+
this.pipelineModelName = resolvedModel;
|
|
76
|
+
this.pipelinePromise = (async () => {
|
|
77
|
+
// Ensure HuggingFace model cache lives in a stable location outside
|
|
78
|
+
// node_modules so it survives package reinstalls.
|
|
79
|
+
if (!process.env.HF_HOME) {
|
|
80
|
+
process.env.HF_HOME = path.join(getCacheDir(), "models");
|
|
81
|
+
}
|
|
82
|
+
let pipeline;
|
|
83
|
+
try {
|
|
84
|
+
const mod = await import("@huggingface/transformers");
|
|
85
|
+
pipeline = mod.pipeline;
|
|
86
|
+
}
|
|
87
|
+
catch (importError) {
|
|
88
|
+
const msg = importError instanceof Error ? importError.message : String(importError);
|
|
89
|
+
if (/Cannot find module|MODULE_NOT_FOUND|Cannot resolve/i.test(msg)) {
|
|
90
|
+
throw new Error("Semantic search requires @huggingface/transformers. Install it with: bun add @huggingface/transformers");
|
|
91
|
+
}
|
|
92
|
+
throw new Error(`Failed to load embedding runtime: ${msg}. Check platform compatibility.`);
|
|
93
|
+
}
|
|
94
|
+
const pipelineFn = pipeline;
|
|
95
|
+
return createLocalPipeline(pipelineFn, resolvedModel);
|
|
96
|
+
})();
|
|
97
|
+
// HI-13: Clear the cached promise on failure so the next call retries
|
|
98
|
+
// instead of permanently rejecting every subsequent call with the same error.
|
|
99
|
+
this.pipelinePromise.catch(() => {
|
|
100
|
+
this.pipelinePromise = undefined;
|
|
101
|
+
this.pipelineModelName = undefined;
|
|
102
|
+
});
|
|
103
|
+
}
|
|
104
|
+
return this.pipelinePromise;
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
async function createLocalPipeline(pipelineFn, modelName) {
|
|
108
|
+
try {
|
|
109
|
+
return await pipelineFn("feature-extraction", modelName, { dtype: LOCAL_EMBEDDER_DTYPE });
|
|
110
|
+
}
|
|
111
|
+
catch (error) {
|
|
112
|
+
if (!shouldRetryWithoutExplicitDtype(error)) {
|
|
113
|
+
throw error;
|
|
114
|
+
}
|
|
115
|
+
warn('Local embedding model "%s" rejected explicit dtype "%s"; retrying with explicit fallback dtype "%s".', modelName, LOCAL_EMBEDDER_DTYPE, LOCAL_EMBEDDER_FALLBACK_DTYPE);
|
|
116
|
+
return pipelineFn("feature-extraction", modelName, { dtype: LOCAL_EMBEDDER_FALLBACK_DTYPE });
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
function shouldRetryWithoutExplicitDtype(error) {
|
|
120
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
121
|
+
return /dtype|fp32|precision|quant/i.test(message);
|
|
122
|
+
}
|
|
123
|
+
/**
|
|
124
|
+
* Check whether the `@huggingface/transformers` package can be resolved.
|
|
125
|
+
* Uses `Bun.resolveSync` so we never load the module (which would trigger
|
|
126
|
+
* heavy WASM/model side-effects) just to test availability.
|
|
127
|
+
*
|
|
128
|
+
* Falls back to `require.resolve` when `Bun.resolveSync` is unavailable
|
|
129
|
+
* (e.g. running under Node), so the function still works in mixed runtimes.
|
|
130
|
+
*/
|
|
131
|
+
export function isTransformersAvailable() {
|
|
132
|
+
try {
|
|
133
|
+
if (typeof Bun !== "undefined" && typeof Bun.resolveSync === "function") {
|
|
134
|
+
Bun.resolveSync("@huggingface/transformers", import.meta.dir);
|
|
135
|
+
return true;
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
catch {
|
|
139
|
+
return false;
|
|
140
|
+
}
|
|
141
|
+
try {
|
|
142
|
+
const req = globalThis.require;
|
|
143
|
+
if (req && typeof req.resolve === "function") {
|
|
144
|
+
req.resolve("@huggingface/transformers");
|
|
145
|
+
return true;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
catch {
|
|
149
|
+
return false;
|
|
150
|
+
}
|
|
151
|
+
return false;
|
|
152
|
+
}
|