@ctxr/skill-llm-wiki 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +134 -0
- package/LICENSE +21 -0
- package/README.md +484 -0
- package/SKILL.md +252 -0
- package/guide/basics/concepts.md +74 -0
- package/guide/basics/index.md +45 -0
- package/guide/basics/schema.md +140 -0
- package/guide/cli.md +256 -0
- package/guide/correctness/index.md +45 -0
- package/guide/correctness/invariants.md +89 -0
- package/guide/correctness/safety.md +96 -0
- package/guide/history/diff.md +110 -0
- package/guide/history/hidden-git.md +130 -0
- package/guide/history/index.md +52 -0
- package/guide/history/remote-sync.md +113 -0
- package/guide/index.md +134 -0
- package/guide/isolation/coexistence.md +134 -0
- package/guide/isolation/index.md +44 -0
- package/guide/isolation/scale.md +251 -0
- package/guide/layout/in-place-mode.md +97 -0
- package/guide/layout/index.md +53 -0
- package/guide/layout/layout-contract.md +131 -0
- package/guide/layout/layout-modes.md +115 -0
- package/guide/operations/index.md +76 -0
- package/guide/operations/ingest/build.md +75 -0
- package/guide/operations/ingest/extend.md +61 -0
- package/guide/operations/ingest/index.md +54 -0
- package/guide/operations/ingest/join.md +65 -0
- package/guide/operations/maintain/fix.md +66 -0
- package/guide/operations/maintain/index.md +47 -0
- package/guide/operations/maintain/rebuild.md +86 -0
- package/guide/operations/validate.md +48 -0
- package/guide/substrate/index.md +47 -0
- package/guide/substrate/operators.md +96 -0
- package/guide/substrate/tiered-ai.md +363 -0
- package/guide/ux/index.md +44 -0
- package/guide/ux/preflight.md +150 -0
- package/guide/ux/user-intent.md +135 -0
- package/package.json +55 -0
- package/scripts/cli.mjs +893 -0
- package/scripts/commands/remote.mjs +93 -0
- package/scripts/commands/review.mjs +253 -0
- package/scripts/commands/sync.mjs +84 -0
- package/scripts/lib/chunk.mjs +421 -0
- package/scripts/lib/cluster-detect.mjs +516 -0
- package/scripts/lib/decision-log.mjs +343 -0
- package/scripts/lib/draft.mjs +158 -0
- package/scripts/lib/embeddings.mjs +366 -0
- package/scripts/lib/frontmatter.mjs +497 -0
- package/scripts/lib/git-commands.mjs +155 -0
- package/scripts/lib/git.mjs +486 -0
- package/scripts/lib/gitignore.mjs +62 -0
- package/scripts/lib/history.mjs +331 -0
- package/scripts/lib/indices.mjs +510 -0
- package/scripts/lib/ingest.mjs +258 -0
- package/scripts/lib/intent.mjs +713 -0
- package/scripts/lib/interactive.mjs +99 -0
- package/scripts/lib/migrate.mjs +126 -0
- package/scripts/lib/nest-applier.mjs +260 -0
- package/scripts/lib/operators.mjs +1365 -0
- package/scripts/lib/orchestrator.mjs +718 -0
- package/scripts/lib/paths.mjs +197 -0
- package/scripts/lib/preflight.mjs +213 -0
- package/scripts/lib/provenance.mjs +672 -0
- package/scripts/lib/quality-metric.mjs +269 -0
- package/scripts/lib/query-fixture.mjs +71 -0
- package/scripts/lib/rollback.mjs +95 -0
- package/scripts/lib/shape-check.mjs +172 -0
- package/scripts/lib/similarity-cache.mjs +126 -0
- package/scripts/lib/similarity.mjs +230 -0
- package/scripts/lib/snapshot.mjs +54 -0
- package/scripts/lib/source-frontmatter.mjs +85 -0
- package/scripts/lib/tier2-protocol.mjs +470 -0
- package/scripts/lib/tiered.mjs +453 -0
- package/scripts/lib/validate.mjs +362 -0
|
@@ -0,0 +1,366 @@
|
|
|
1
|
+
// embeddings.mjs — Tier 1 of the tiered AI ladder.
|
|
2
|
+
//
|
|
3
|
+
// Backed by `@xenova/transformers` (MiniLM-L6-v2, 384 dims) loaded
|
|
4
|
+
// lazily via dynamic import. The dependency is REQUIRED — it is
|
|
5
|
+
// listed in `dependencies` (not devDependencies, not optional) and
|
|
6
|
+
// Tier 1 is the default decision layer after Tier 0 for every
|
|
7
|
+
// mid-band pair.
|
|
8
|
+
//
|
|
9
|
+
// Rationale for "required": Tier 0 TF-IDF on terse technical
|
|
10
|
+
// frontmatter produces mostly decisive-different results (150/151
|
|
11
|
+
// pairs < 0.30 on the skill's own guide/), which leaves Tier 2
|
|
12
|
+
// (sub-agent) as the only remaining decision layer. With no Tier 1
|
|
13
|
+
// at all, every non-trivially-same pair escalates to Tier 2, which
|
|
14
|
+
// is expensive. A real 23 MB sentence-embedding model bridges the
|
|
15
|
+
// gap: it's cheap, local, and shapes the decision space so Tier 2
|
|
16
|
+
// only sees genuinely ambiguous pairs.
|
|
17
|
+
//
|
|
18
|
+
// The `LLM_WIKI_MOCK_TIER1=1` env var is the test escape hatch.
|
|
19
|
+
// When set, `embed()` returns a deterministic hash-based vector
|
|
20
|
+
// instead of loading the real model. This is what the CI test
|
|
21
|
+
// suite uses; no network, no model download, no real model weights
|
|
22
|
+
// involved. The mock is NOT a production fallback — if Tier 1
|
|
23
|
+
// loading fails outside of mock mode, `embed()` throws loudly.
|
|
24
|
+
//
|
|
25
|
+
// Model download behaviour: `@xenova/transformers` downloads the
|
|
26
|
+
// ~23 MB MiniLM model to its HuggingFace cache the first time the
|
|
27
|
+
// extractor is constructed. Preflight warns if the model is not
|
|
28
|
+
// yet cached so the user knows the first run will pay this cost;
|
|
29
|
+
// the download itself is transparent and happens inside the
|
|
30
|
+
// pipeline constructor.
|
|
31
|
+
|
|
32
|
+
import { createHash } from "node:crypto";
|
|
33
|
+
import {
|
|
34
|
+
existsSync,
|
|
35
|
+
mkdirSync,
|
|
36
|
+
readFileSync,
|
|
37
|
+
renameSync,
|
|
38
|
+
writeFileSync,
|
|
39
|
+
} from "node:fs";
|
|
40
|
+
import { dirname, join } from "node:path";
|
|
41
|
+
|
|
42
|
+
// Public thresholds mirror methodology §8.5. `tiered.mjs` reads
|
|
43
|
+
// them via import. These are the Tier 1 (embedding cosine)
|
|
44
|
+
// thresholds, NOT the Tier 0 (TF-IDF) thresholds. They have been
|
|
45
|
+
// left unchanged from the Phase 6 stub values because they were
|
|
46
|
+
// justified from first principles (above 0.80 is functionally
|
|
47
|
+
// paraphrase-level, below 0.45 is topic-level different) and are
|
|
48
|
+
// corpus-independent.
|
|
49
|
+
export const TIER1_DECISIVE_SAME = 0.80;
|
|
50
|
+
export const TIER1_DECISIVE_DIFFERENT = 0.45;
|
|
51
|
+
|
|
52
|
+
// Model id + dims. Pinned here so a future model bump is one edit.
|
|
53
|
+
export const MODEL_ID = "Xenova/all-MiniLM-L6-v2";
|
|
54
|
+
export const EMBEDDING_DIMS = 384;
|
|
55
|
+
|
|
56
|
+
// Path helpers.
|
|
57
|
+
//
|
|
58
|
+
// The cache is namespaced by mode: mock-mode vectors live under a
|
|
59
|
+
// separate `mock/` subdirectory from real-model vectors. Without
|
|
60
|
+
// this namespace a `LLM_WIKI_MOCK_TIER1=1` test run would pollute
|
|
61
|
+
// the real-model cache with deterministic-hash vectors that a
|
|
62
|
+
// subsequent real build would blindly consume as free "hits",
|
|
63
|
+
// producing silently-wrong similarity scores. The namespace is
|
|
64
|
+
// absolute: switching modes is equivalent to a fresh cache.
|
|
65
|
+
export function embeddingCachePath(wikiRoot, textHash) {
|
|
66
|
+
const ns = isMockMode() ? "mock" : "model-minilm";
|
|
67
|
+
return join(wikiRoot, ".llmwiki", "embedding-cache", ns, textHash + ".f32");
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// ── Availability detection ───────────────────────────────────────────
|
|
71
|
+
//
|
|
72
|
+
// In mock mode we short-circuit the dynamic import entirely. In
|
|
73
|
+
// non-mock mode Tier 1 is required: if the dynamic import fails we
|
|
74
|
+
// surface the error to the caller rather than silently degrading.
|
|
75
|
+
|
|
76
|
+
let _tier1Module = null;
|
|
77
|
+
let _tier1LoadError = null;
|
|
78
|
+
// In-flight load promise. Caching the PROMISE (not a boolean) is
|
|
79
|
+
// essential because `tryLoadTier1` is invoked concurrently from
|
|
80
|
+
// multiple call sites — `cluster-detect::computeAffinityMatrix`
|
|
81
|
+
// launches 17+ parallel `embed()` calls via `Promise.all`, and
|
|
82
|
+
// `tiered.mjs` does the same for mid-band pair batches. A boolean
|
|
83
|
+
// flag creates a TOCTOU race: the first caller sets it to `true`,
|
|
84
|
+
// awaits the dynamic import, and every concurrent caller sees
|
|
85
|
+
// "loaded" but reads the module reference BEFORE it lands — then
|
|
86
|
+
// throws "Tier 1 failed to load" even though the import is in
|
|
87
|
+
// flight and will succeed. Caching the promise collapses every
|
|
88
|
+
// concurrent caller onto the same async resolution.
|
|
89
|
+
let _tier1LoadPromise = null;
|
|
90
|
+
|
|
91
|
+
// Reset hook for tests so fresh scenarios get a clean load state.
|
|
92
|
+
// Resets EVERY piece of module state the embeddings module owns so
|
|
93
|
+
// tests that switch wikis, mock modes, or installer outcomes start
|
|
94
|
+
// from a clean slate — including the lazily-constructed model
|
|
95
|
+
// extractor cached by `realEmbed`.
|
|
96
|
+
export function _resetTier1LoadState() {
|
|
97
|
+
_tier1Module = null;
|
|
98
|
+
_tier1LoadError = null;
|
|
99
|
+
_tier1LoadPromise = null;
|
|
100
|
+
_extractor = null;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Test-only introspection: has anything triggered the Tier 1
|
|
104
|
+
// module loader in this process yet? A `false` return guarantees
|
|
105
|
+
// neither `ensureTier1`, `isAvailable`, nor `embed()`'s cache-miss
|
|
106
|
+
// path has fired — the loader's promise slot is still empty. Used
|
|
107
|
+
// by the `tiered.decide()` warm-cache regression tests to prove the
|
|
108
|
+
// escalation path never touches the loader when the similarity
|
|
109
|
+
// cache already carries the answer.
|
|
110
|
+
export function _isTier1LoaderTouched() {
|
|
111
|
+
return _tier1LoadPromise !== null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
export function isMockMode() {
|
|
115
|
+
return (
|
|
116
|
+
process.env.LLM_WIKI_MOCK_TIER1 === "1" ||
|
|
117
|
+
process.env.LLM_WIKI_MOCK_TIER1 === "true"
|
|
118
|
+
);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function tryLoadTier1() {
|
|
122
|
+
if (_tier1LoadPromise) return _tier1LoadPromise;
|
|
123
|
+
_tier1LoadPromise = (async () => {
|
|
124
|
+
// Diagnostic hook: LLM_WIKI_TIER1_DEBUG=1 prints a single line to
|
|
125
|
+
// stderr when the module actually starts loading. This is a
|
|
126
|
+
// permanent debug seam (not a test-only var) because it lets
|
|
127
|
+
// anyone triaging a slow build confirm whether the MiniLM model
|
|
128
|
+
// reloaded on a resume cycle. The line is emitted BEFORE the
|
|
129
|
+
// dynamic import so a failing import still produces the breadcrumb
|
|
130
|
+
// that proves the attempt happened.
|
|
131
|
+
if (process.env.LLM_WIKI_TIER1_DEBUG === "1") {
|
|
132
|
+
process.stderr.write(
|
|
133
|
+
`[tier1-debug] loading Tier 1 model ${
|
|
134
|
+
isMockMode() ? "(mock)" : `(${MODEL_ID})`
|
|
135
|
+
}\n`,
|
|
136
|
+
);
|
|
137
|
+
}
|
|
138
|
+
if (isMockMode()) {
|
|
139
|
+
_tier1Module = { __mock: true };
|
|
140
|
+
return { module: _tier1Module, error: null };
|
|
141
|
+
}
|
|
142
|
+
try {
|
|
143
|
+
_tier1Module = await import("@xenova/transformers");
|
|
144
|
+
return { module: _tier1Module, error: null };
|
|
145
|
+
} catch (err) {
|
|
146
|
+
_tier1LoadError = err;
|
|
147
|
+
return { module: null, error: err };
|
|
148
|
+
}
|
|
149
|
+
})();
|
|
150
|
+
return _tier1LoadPromise;
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Is Tier 1 usable right now? Cheap check — does not run the model.
|
|
154
|
+
// In production mode this reflects whether the @xenova/transformers
|
|
155
|
+
// package is importable, which should always be true since it's a
|
|
156
|
+
// required dependency.
|
|
157
|
+
export async function isAvailable() {
|
|
158
|
+
const r = await tryLoadTier1();
|
|
159
|
+
return r.module !== null;
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// ── The ensure-ready contract ────────────────────────────────────────
|
|
163
|
+
//
|
|
164
|
+
// `ensureTier1(wikiRoot, opts)` is the single entry point tiered.mjs
|
|
165
|
+
// uses. Return shape:
|
|
166
|
+
// { available, reason, model? }
|
|
167
|
+
//
|
|
168
|
+
// where reason is one of:
|
|
169
|
+
// "ready" Tier 1 is loaded and ready to embed.
|
|
170
|
+
// "mock" Mock mode — deterministic fake embeddings.
|
|
171
|
+
// "module-load-failed" Dynamic import of @xenova/transformers failed.
|
|
172
|
+
//
|
|
173
|
+
// Tier 1 is a REQUIRED dependency. `ensureTier1` never installs,
|
|
174
|
+
// never prompts, never writes a persistent decline marker. If the
|
|
175
|
+
// module cannot be loaded we return `available: false` with a
|
|
176
|
+
// descriptive reason so the caller can decide whether to raise or
|
|
177
|
+
// degrade (in production the caller should raise).
|
|
178
|
+
export async function ensureTier1(wikiRoot, opts = {}) {
|
|
179
|
+
void wikiRoot;
|
|
180
|
+
void opts;
|
|
181
|
+
const r = await tryLoadTier1();
|
|
182
|
+
if (r.module) {
|
|
183
|
+
return {
|
|
184
|
+
available: true,
|
|
185
|
+
reason: isMockMode() ? "mock" : "ready",
|
|
186
|
+
model: r.module,
|
|
187
|
+
};
|
|
188
|
+
}
|
|
189
|
+
return {
|
|
190
|
+
available: false,
|
|
191
|
+
reason: "module-load-failed",
|
|
192
|
+
error: r.error,
|
|
193
|
+
};
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// ── Embedding generation ─────────────────────────────────────────────
|
|
197
|
+
//
|
|
198
|
+
// Given a text, returns a Float32Array embedding. Results are
|
|
199
|
+
// cached on disk at <wiki>/.llmwiki/embedding-cache/<ns>/<sha>.f32.
|
|
200
|
+
// The cache key is the sha256 of the input text — identical texts
|
|
201
|
+
// across entries share a cache entry.
|
|
202
|
+
//
|
|
203
|
+
// In mock mode the "embedding" is a deterministic hash-derived
|
|
204
|
+
// vector: a token-bag blended with a hash vector, normalized to
|
|
205
|
+
// unit length. This gives stable pairwise distances in tests
|
|
206
|
+
// without requiring a real model.
|
|
207
|
+
//
|
|
208
|
+
// In production mode `realEmbed` spins up the MiniLM extractor on
|
|
209
|
+
// first call (downloading the model if not already cached) and
|
|
210
|
+
// reuses it for all subsequent embeddings in this process.
|
|
211
|
+
|
|
212
|
+
export async function embed(wikiRoot, text, opts = {}) {
|
|
213
|
+
const { moduleHint = null } = opts;
|
|
214
|
+
const hash = createHash("sha256").update(text).digest("hex");
|
|
215
|
+
const cachePath = embeddingCachePath(wikiRoot, hash);
|
|
216
|
+
if (existsSync(cachePath)) {
|
|
217
|
+
return readCachedEmbedding(cachePath);
|
|
218
|
+
}
|
|
219
|
+
// Cache miss — we are about to compute a fresh embedding. In
|
|
220
|
+
// production (non-mock) mode this is the point that triggers a
|
|
221
|
+
// dynamic import of @xenova/transformers via `tryLoadTier1` below.
|
|
222
|
+
// In mock mode the `mockEmbed` branch does the hashing inline.
|
|
223
|
+
// The `LLM_WIKI_TIER1_DEBUG=1` hook surfaces BOTH paths here —
|
|
224
|
+
// the breadcrumb tells an operator that the embedding cache was
|
|
225
|
+
// cold and the skill had to compute a new vector. A warmed
|
|
226
|
+
// resume cycle must NOT print this line for any leaf.
|
|
227
|
+
if (process.env.LLM_WIKI_TIER1_DEBUG === "1") {
|
|
228
|
+
process.stderr.write(
|
|
229
|
+
`[tier1-debug] computing fresh embedding ${
|
|
230
|
+
isMockMode() ? "(mock)" : "(model)"
|
|
231
|
+
} for hash=${hash.slice(0, 12)}\n`,
|
|
232
|
+
);
|
|
233
|
+
}
|
|
234
|
+
let vec;
|
|
235
|
+
if (isMockMode()) {
|
|
236
|
+
vec = mockEmbed(text);
|
|
237
|
+
} else {
|
|
238
|
+
let mod = moduleHint;
|
|
239
|
+
if (!mod) {
|
|
240
|
+
const loadResult = await tryLoadTier1();
|
|
241
|
+
mod = loadResult.module;
|
|
242
|
+
if (!mod) {
|
|
243
|
+
const underlying = loadResult.error
|
|
244
|
+
? ` Underlying error: ${loadResult.error.message ?? String(loadResult.error)}`
|
|
245
|
+
: "";
|
|
246
|
+
throw new Error(
|
|
247
|
+
"embeddings: Tier 1 (@xenova/transformers) failed to load — " +
|
|
248
|
+
"required dependency is missing or broken. Run `npm install` " +
|
|
249
|
+
"in the skill directory to restore it. Set LLM_WIKI_MOCK_TIER1=1 " +
|
|
250
|
+
"only for hermetic test runs, never in production." +
|
|
251
|
+
underlying,
|
|
252
|
+
);
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
vec = await realEmbed(mod, text);
|
|
256
|
+
}
|
|
257
|
+
writeCachedEmbedding(cachePath, vec);
|
|
258
|
+
return vec;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
// Cosine similarity between two Float32Array embeddings.
|
|
262
|
+
export function embeddingCosine(a, b) {
|
|
263
|
+
if (!a || !b || a.length !== b.length) return 0;
|
|
264
|
+
let dot = 0;
|
|
265
|
+
let normA = 0;
|
|
266
|
+
let normB = 0;
|
|
267
|
+
for (let i = 0; i < a.length; i++) {
|
|
268
|
+
dot += a[i] * b[i];
|
|
269
|
+
normA += a[i] * a[i];
|
|
270
|
+
normB += b[i] * b[i];
|
|
271
|
+
}
|
|
272
|
+
const denom = Math.sqrt(normA) * Math.sqrt(normB);
|
|
273
|
+
if (denom === 0) return 0;
|
|
274
|
+
return dot / denom;
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
function writeCachedEmbedding(path, vec) {
|
|
278
|
+
mkdirSync(dirname(path), { recursive: true });
|
|
279
|
+
// Float32Array → Buffer for direct write. Atomic via temp+rename.
|
|
280
|
+
const buf = Buffer.from(vec.buffer, vec.byteOffset, vec.byteLength);
|
|
281
|
+
const tmp = `${path}.tmp.${process.pid}.${Date.now()}`;
|
|
282
|
+
writeFileSync(tmp, buf);
|
|
283
|
+
renameSync(tmp, path);
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function readCachedEmbedding(path) {
|
|
287
|
+
const buf = readFileSync(path);
|
|
288
|
+
return new Float32Array(buf.buffer, buf.byteOffset, buf.byteLength / 4);
|
|
289
|
+
}
|
|
290
|
+
|
|
291
|
+
// Deterministic mock embedding: hash the text, then map hash bytes
|
|
292
|
+
// into a unit vector. Identical texts produce identical vectors;
|
|
293
|
+
// similar texts produce moderately similar vectors because sha256
|
|
294
|
+
// avalanches and neighbouring inputs produce unrelated outputs.
|
|
295
|
+
//
|
|
296
|
+
// For the mock tests we need texts that SHOULD be similar to look
|
|
297
|
+
// similar, so we blend the hash vector with a simple token-bag
|
|
298
|
+
// signature. This gives us enough structure to drive tiered
|
|
299
|
+
// decision tests without bringing in a real model.
|
|
300
|
+
function mockEmbed(text) {
|
|
301
|
+
const vec = new Float32Array(EMBEDDING_DIMS);
|
|
302
|
+
// Primary signal: hash bytes
|
|
303
|
+
const hash = createHash("sha256").update(text).digest();
|
|
304
|
+
for (let i = 0; i < EMBEDDING_DIMS; i++) {
|
|
305
|
+
const byte = hash[i % hash.length];
|
|
306
|
+
vec[i] = (byte - 128) / 128; // in [-1, 1]
|
|
307
|
+
}
|
|
308
|
+
// Secondary signal: token occurrence bag. Lowercase + strip non-
|
|
309
|
+
// word chars, then for each token hash it into a dim and
|
|
310
|
+
// accumulate. This means texts with overlapping tokens have
|
|
311
|
+
// embeddings that correlate along those dimensions.
|
|
312
|
+
const tokens = text.toLowerCase().split(/[^\p{L}\p{N}]+/u).filter((t) => t.length >= 2);
|
|
313
|
+
for (const token of tokens) {
|
|
314
|
+
const tHash = createHash("sha256").update(token).digest();
|
|
315
|
+
for (let i = 0; i < EMBEDDING_DIMS; i++) {
|
|
316
|
+
vec[i] += ((tHash[i % tHash.length] - 128) / 128) * 0.3;
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
// Normalise to unit length so cosine is well-defined.
|
|
320
|
+
let norm = 0;
|
|
321
|
+
for (const v of vec) norm += v * v;
|
|
322
|
+
norm = Math.sqrt(norm);
|
|
323
|
+
if (norm > 0) {
|
|
324
|
+
for (let i = 0; i < EMBEDDING_DIMS; i++) vec[i] /= norm;
|
|
325
|
+
}
|
|
326
|
+
return vec;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
// Real embed via @xenova/transformers. The API is:
|
|
330
|
+
// const extractor = await pipeline('feature-extraction', MODEL_ID)
|
|
331
|
+
// const output = await extractor(text, { pooling: 'mean', normalize: true })
|
|
332
|
+
// output.data is a Float32Array
|
|
333
|
+
// We lazily construct the extractor and cache it in module state.
|
|
334
|
+
let _extractor = null;
|
|
335
|
+
async function realEmbed(mod, text) {
|
|
336
|
+
if (!_extractor) {
|
|
337
|
+
_extractor = await mod.pipeline("feature-extraction", MODEL_ID);
|
|
338
|
+
}
|
|
339
|
+
const output = await _extractor(text, { pooling: "mean", normalize: true });
|
|
340
|
+
return new Float32Array(output.data);
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
// Preflight warning helper: inspect the HuggingFace cache directory
|
|
344
|
+
// the transformers library uses and return a string when the model
|
|
345
|
+
// has not yet been downloaded. Intended for preflight.mjs to surface
|
|
346
|
+
// to the user so they understand the first run will pay the ~23 MB
|
|
347
|
+
// download latency. Returns null when the cache is already warm, or
|
|
348
|
+
// when running in mock mode (no model needed), or when the cache
|
|
349
|
+
// directory is unknown on this platform.
|
|
350
|
+
export function modelDownloadStatus() {
|
|
351
|
+
if (isMockMode()) return null;
|
|
352
|
+
// The library resolves its cache to:
|
|
353
|
+
// process.env.TRANSFORMERS_CACHE
|
|
354
|
+
// || <node_modules>/@xenova/transformers/.cache
|
|
355
|
+
// We can't reliably introspect the latter from here without
|
|
356
|
+
// importing the library, but we CAN check the former; if unset
|
|
357
|
+
// we return null (optimistic) so preflight stays quiet.
|
|
358
|
+
const cacheRoot = process.env.TRANSFORMERS_CACHE;
|
|
359
|
+
if (!cacheRoot) return null;
|
|
360
|
+
const modelDir = join(cacheRoot, MODEL_ID);
|
|
361
|
+
if (existsSync(modelDir)) return null;
|
|
362
|
+
return (
|
|
363
|
+
`Tier 1 embedding model ${MODEL_ID} has not been downloaded yet. ` +
|
|
364
|
+
`First run will pay the one-time ~23 MB download cost.`
|
|
365
|
+
);
|
|
366
|
+
}
|