@hiveai/embeddings 0.4.5 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +63 -1
- package/dist/index.js +132 -0
- package/dist/index.js.map +1 -1
- package/package.json +3 -3
package/dist/index.d.ts
CHANGED
|
@@ -72,4 +72,66 @@ declare function semanticSearch(paths: HaivePaths, query: string, options?: {
|
|
|
72
72
|
index: EmbeddingIndex;
|
|
73
73
|
} | null>;
|
|
74
74
|
|
|
75
|
-
|
|
75
|
+
interface CodeEmbeddingEntry {
|
|
76
|
+
/** stable id: `${file}#${name}` */
|
|
77
|
+
id: string;
|
|
78
|
+
file: string;
|
|
79
|
+
name: string;
|
|
80
|
+
kind: string;
|
|
81
|
+
line: number;
|
|
82
|
+
description?: string;
|
|
83
|
+
hash: string;
|
|
84
|
+
vector: number[];
|
|
85
|
+
}
|
|
86
|
+
interface CodeEmbeddingIndex {
|
|
87
|
+
model: string;
|
|
88
|
+
dimension: number;
|
|
89
|
+
updated_at: string;
|
|
90
|
+
source_generated_at: string;
|
|
91
|
+
entries: CodeEmbeddingEntry[];
|
|
92
|
+
}
|
|
93
|
+
declare function codeIndexPath(paths: HaivePaths): string;
|
|
94
|
+
declare function emptyCodeIndex(model?: string, dimension?: number, sourceGeneratedAt?: string): CodeEmbeddingIndex;
|
|
95
|
+
declare function loadCodeIndex(paths: HaivePaths): Promise<CodeEmbeddingIndex | null>;
|
|
96
|
+
declare function saveCodeIndex(paths: HaivePaths, index: CodeEmbeddingIndex): Promise<void>;
|
|
97
|
+
declare function buildCodeEntryText(file: string, name: string, kind: string, description?: string): string;
|
|
98
|
+
|
|
99
|
+
interface CodeIndexUpdateReport {
|
|
100
|
+
total: number;
|
|
101
|
+
added: number;
|
|
102
|
+
updated: number;
|
|
103
|
+
unchanged: number;
|
|
104
|
+
removed: number;
|
|
105
|
+
}
|
|
106
|
+
/**
|
|
107
|
+
* Build (or refresh) the code semantic-search index from the code-map.
|
|
108
|
+
* Each exported symbol becomes one embedding entry — granularity stays at the
|
|
109
|
+
* symbol level so search returns a precise file:line:name target.
|
|
110
|
+
*
|
|
111
|
+
* Re-uses entries whose embedded text is unchanged (hash check) so subsequent
|
|
112
|
+
* builds only embed the diff.
|
|
113
|
+
*/
|
|
114
|
+
declare function rebuildCodeIndex(paths: HaivePaths, embedder: EmbedderLike): Promise<{
|
|
115
|
+
index: CodeEmbeddingIndex;
|
|
116
|
+
report: CodeIndexUpdateReport;
|
|
117
|
+
}>;
|
|
118
|
+
|
|
119
|
+
interface CodeSearchHit {
|
|
120
|
+
file: string;
|
|
121
|
+
name: string;
|
|
122
|
+
kind: string;
|
|
123
|
+
line: number;
|
|
124
|
+
description?: string;
|
|
125
|
+
score: number;
|
|
126
|
+
}
|
|
127
|
+
declare function codeSemanticSearch(paths: HaivePaths, query: string, options?: {
|
|
128
|
+
limit?: number;
|
|
129
|
+
minScore?: number;
|
|
130
|
+
embedder?: EmbedderLike;
|
|
131
|
+
index?: CodeEmbeddingIndex;
|
|
132
|
+
}): Promise<{
|
|
133
|
+
hits: CodeSearchHit[];
|
|
134
|
+
index: CodeEmbeddingIndex;
|
|
135
|
+
} | null>;
|
|
136
|
+
|
|
137
|
+
export { type CodeEmbeddingEntry, type CodeEmbeddingIndex, type CodeIndexUpdateReport, type CodeSearchHit, DEFAULT_DIMENSION, DEFAULT_MODEL, Embedder, type EmbedderLike, type EmbeddingEntry, type EmbeddingIndex, type IndexUpdateReport, type SemanticHit, buildCodeEntryText, buildEntryText, cacheDir, codeIndexPath, codeSemanticSearch, cosine, emptyCodeIndex, emptyIndex, hashContent, indexPath, indexStat, loadCodeIndex, loadIndex, rebuildCodeIndex, rebuildIndex, saveCodeIndex, saveIndex, semanticSearch };
|
package/dist/index.js
CHANGED
|
@@ -179,19 +179,151 @@ async function semanticSearch(paths, query, options = {}) {
|
|
|
179
179
|
const scored = index.entries.map((e) => ({ id: e.id, file_path: e.file_path, score: cosine(queryVec, e.vector) })).filter((h) => h.score >= minScore).sort((a, b) => b.score - a.score).slice(0, limit);
|
|
180
180
|
return { hits: scored, index };
|
|
181
181
|
}
|
|
182
|
+
|
|
183
|
+
// src/code-index-cache.ts
|
|
184
|
+
import { mkdir as mkdir2, readFile as readFile2, writeFile as writeFile2 } from "fs/promises";
|
|
185
|
+
import { existsSync as existsSync2 } from "fs";
|
|
186
|
+
import path2 from "path";
|
|
187
|
+
var CODE_INDEX_FILE = "code-embeddings-index.json";
|
|
188
|
+
function codeIndexPath(paths) {
|
|
189
|
+
return path2.join(cacheDir(paths), CODE_INDEX_FILE);
|
|
190
|
+
}
|
|
191
|
+
function emptyCodeIndex(model = DEFAULT_MODEL, dimension = DEFAULT_DIMENSION, sourceGeneratedAt = "") {
|
|
192
|
+
return {
|
|
193
|
+
model,
|
|
194
|
+
dimension,
|
|
195
|
+
updated_at: (/* @__PURE__ */ new Date()).toISOString(),
|
|
196
|
+
source_generated_at: sourceGeneratedAt,
|
|
197
|
+
entries: []
|
|
198
|
+
};
|
|
199
|
+
}
|
|
200
|
+
async function loadCodeIndex(paths) {
|
|
201
|
+
const file = codeIndexPath(paths);
|
|
202
|
+
if (!existsSync2(file)) return null;
|
|
203
|
+
return JSON.parse(await readFile2(file, "utf8"));
|
|
204
|
+
}
|
|
205
|
+
async function saveCodeIndex(paths, index) {
|
|
206
|
+
const dir = cacheDir(paths);
|
|
207
|
+
await mkdir2(dir, { recursive: true });
|
|
208
|
+
index.updated_at = (/* @__PURE__ */ new Date()).toISOString();
|
|
209
|
+
await writeFile2(codeIndexPath(paths), JSON.stringify(index, null, 2), "utf8");
|
|
210
|
+
}
|
|
211
|
+
function buildCodeEntryText(file, name, kind, description) {
|
|
212
|
+
const filenameHints = file.split("/").pop()?.replace(/\.[^.]+$/, "").replace(/[._-]+/g, " ") ?? "";
|
|
213
|
+
return `${name} ${kind} ${filenameHints} ${description ?? ""}`.trim();
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
// src/code-indexer.ts
|
|
217
|
+
import { loadCodeMap } from "@hiveai/core";
|
|
218
|
+
import { createHash as createHash2 } from "crypto";
|
|
219
|
+
function hashEntry(text) {
|
|
220
|
+
return createHash2("sha256").update(text).digest("hex").slice(0, 32);
|
|
221
|
+
}
|
|
222
|
+
async function rebuildCodeIndex(paths, embedder) {
|
|
223
|
+
const codeMap = await loadCodeMap(paths);
|
|
224
|
+
if (!codeMap) {
|
|
225
|
+
throw new Error(
|
|
226
|
+
"No code-map found. Run `haive index code` to generate `.ai/code-map.json` first."
|
|
227
|
+
);
|
|
228
|
+
}
|
|
229
|
+
const existing = await loadCodeIndex(paths) ?? emptyCodeIndex(embedder.model, embedder.dimension, codeMap.generated_at);
|
|
230
|
+
if (existing.model !== embedder.model || existing.dimension !== embedder.dimension) {
|
|
231
|
+
existing.entries = [];
|
|
232
|
+
existing.model = embedder.model;
|
|
233
|
+
existing.dimension = embedder.dimension;
|
|
234
|
+
}
|
|
235
|
+
const byId = new Map(existing.entries.map((e) => [e.id, e]));
|
|
236
|
+
const nextEntries = [];
|
|
237
|
+
const seenIds = /* @__PURE__ */ new Set();
|
|
238
|
+
let added = 0;
|
|
239
|
+
let updated = 0;
|
|
240
|
+
let unchanged = 0;
|
|
241
|
+
for (const [filePath, fileEntry] of Object.entries(codeMap.files)) {
|
|
242
|
+
for (const exp of fileEntry.exports) {
|
|
243
|
+
const id = `${filePath}#${exp.name}`;
|
|
244
|
+
seenIds.add(id);
|
|
245
|
+
const text = buildCodeEntryText(filePath, exp.name, exp.kind, exp.description);
|
|
246
|
+
const hash = hashEntry(text);
|
|
247
|
+
const prior = byId.get(id);
|
|
248
|
+
if (prior && prior.hash === hash && prior.line === exp.line) {
|
|
249
|
+
nextEntries.push({ ...prior, file: filePath, name: exp.name, kind: exp.kind, line: exp.line, ...exp.description ? { description: exp.description } : {} });
|
|
250
|
+
unchanged++;
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
const vector = Array.from(await embedder.encode(text));
|
|
254
|
+
nextEntries.push({
|
|
255
|
+
id,
|
|
256
|
+
file: filePath,
|
|
257
|
+
name: exp.name,
|
|
258
|
+
kind: exp.kind,
|
|
259
|
+
line: exp.line,
|
|
260
|
+
...exp.description ? { description: exp.description } : {},
|
|
261
|
+
hash,
|
|
262
|
+
vector
|
|
263
|
+
});
|
|
264
|
+
if (prior) updated++;
|
|
265
|
+
else added++;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
const removed = existing.entries.filter((e) => !seenIds.has(e.id)).length;
|
|
269
|
+
existing.entries = nextEntries;
|
|
270
|
+
existing.source_generated_at = codeMap.generated_at;
|
|
271
|
+
await saveCodeIndex(paths, existing);
|
|
272
|
+
return {
|
|
273
|
+
index: existing,
|
|
274
|
+
report: {
|
|
275
|
+
total: nextEntries.length,
|
|
276
|
+
added,
|
|
277
|
+
updated,
|
|
278
|
+
unchanged,
|
|
279
|
+
removed
|
|
280
|
+
}
|
|
281
|
+
};
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// src/code-search.ts
|
|
285
|
+
async function codeSemanticSearch(paths, query, options = {}) {
|
|
286
|
+
const index = options.index ?? await loadCodeIndex(paths);
|
|
287
|
+
if (!index || index.entries.length === 0) return null;
|
|
288
|
+
const embedder = options.embedder ?? await Embedder.create(index.model);
|
|
289
|
+
if (embedder.dimension !== index.dimension) {
|
|
290
|
+
throw new Error(
|
|
291
|
+
`Embedder dimension (${embedder.dimension}) differs from code index (${index.dimension}). Re-run \`haive index code-search\`.`
|
|
292
|
+
);
|
|
293
|
+
}
|
|
294
|
+
const queryVec = await embedder.encode(query);
|
|
295
|
+
const minScore = options.minScore ?? 0;
|
|
296
|
+
const limit = options.limit ?? 5;
|
|
297
|
+
const scored = index.entries.map((e) => ({
|
|
298
|
+
file: e.file,
|
|
299
|
+
name: e.name,
|
|
300
|
+
kind: e.kind,
|
|
301
|
+
line: e.line,
|
|
302
|
+
...e.description ? { description: e.description } : {},
|
|
303
|
+
score: cosine(queryVec, e.vector)
|
|
304
|
+
})).filter((h) => h.score >= minScore).sort((a, b) => b.score - a.score).slice(0, limit);
|
|
305
|
+
return { hits: scored, index };
|
|
306
|
+
}
|
|
182
307
|
export {
|
|
183
308
|
DEFAULT_DIMENSION,
|
|
184
309
|
DEFAULT_MODEL,
|
|
185
310
|
Embedder,
|
|
311
|
+
buildCodeEntryText,
|
|
186
312
|
buildEntryText,
|
|
187
313
|
cacheDir,
|
|
314
|
+
codeIndexPath,
|
|
315
|
+
codeSemanticSearch,
|
|
188
316
|
cosine,
|
|
317
|
+
emptyCodeIndex,
|
|
189
318
|
emptyIndex,
|
|
190
319
|
hashContent,
|
|
191
320
|
indexPath,
|
|
192
321
|
indexStat,
|
|
322
|
+
loadCodeIndex,
|
|
193
323
|
loadIndex,
|
|
324
|
+
rebuildCodeIndex,
|
|
194
325
|
rebuildIndex,
|
|
326
|
+
saveCodeIndex,
|
|
195
327
|
saveIndex,
|
|
196
328
|
semanticSearch
|
|
197
329
|
};
|
package/dist/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/embedder.ts","../src/index-cache.ts","../src/indexer.ts","../src/search.ts"],"sourcesContent":["export const DEFAULT_MODEL = \"Xenova/bge-small-en-v1.5\";\nexport const DEFAULT_DIMENSION = 384;\n\nexport interface EmbedderLike {\n readonly model: string;\n readonly dimension: number;\n encode(text: string): Promise<Float32Array>;\n}\n\ninterface FeatureExtractionPipeline {\n (text: string | string[], options: { pooling: \"mean\"; normalize: boolean }): Promise<{\n data: Float32Array | number[];\n dims: number[];\n }>;\n}\n\nlet cachedPipeline: FeatureExtractionPipeline | null = null;\nlet cachedModel: string | null = null;\n\nasync function loadPipeline(model: string): Promise<FeatureExtractionPipeline> {\n if (cachedPipeline && cachedModel === model) return cachedPipeline;\n const { pipeline, env } = await import(\"@xenova/transformers\");\n // Allow remote model download by default; users can pre-cache for offline use.\n env.allowLocalModels = true;\n env.allowRemoteModels = true;\n const pipe = (await pipeline(\"feature-extraction\", model)) as unknown as FeatureExtractionPipeline;\n cachedPipeline = pipe;\n cachedModel = model;\n return pipe;\n}\n\nexport class Embedder implements EmbedderLike {\n private constructor(\n private readonly pipe: FeatureExtractionPipeline,\n public readonly model: string,\n public readonly dimension: number,\n ) {}\n\n static async create(model: string = DEFAULT_MODEL): Promise<Embedder> {\n const pipe = await loadPipeline(model);\n const probe = await pipe(\"dimension probe\", { pooling: \"mean\", normalize: true });\n const dim = probe.data instanceof Float32Array ? probe.data.length : probe.data.length;\n return new Embedder(pipe, model, dim);\n }\n\n async encode(text: string): Promise<Float32Array> {\n const result = await this.pipe(text, { pooling: \"mean\", normalize: true });\n return result.data instanceof Float32Array\n ? result.data\n : Float32Array.from(result.data);\n }\n\n async encodeMany(texts: string[]): Promise<Float32Array[]> {\n const out: Float32Array[] = [];\n for (const t of texts) {\n out.push(await this.encode(t));\n }\n return out;\n }\n}\n\nexport function cosine(a: Float32Array | number[], b: Float32Array | number[]): number {\n if (a.length !== b.length) {\n throw new Error(`vector dimension mismatch: ${a.length} vs ${b.length}`);\n }\n let dot = 0;\n let na = 0;\n let nb = 0;\n for (let i = 0; i < a.length; i++) {\n const av = a[i] as number;\n const bv = b[i] as number;\n dot += av * bv;\n na += av * av;\n nb += bv * bv;\n }\n if (na === 0 || nb === 0) return 0;\n return dot / (Math.sqrt(na) * Math.sqrt(nb));\n}\n","import { createHash } from \"node:crypto\";\nimport { mkdir, readFile, stat, writeFile } from \"node:fs/promises\";\nimport { existsSync } from \"node:fs\";\nimport path from \"node:path\";\nimport type { HaivePaths } from \"@hiveai/core\";\nimport { DEFAULT_DIMENSION, DEFAULT_MODEL } from \"./embedder.js\";\n\nexport const INDEX_FILE = \"embeddings-index.json\";\n\nexport interface EmbeddingEntry {\n id: string;\n file_path: string;\n hash: string;\n vector: number[];\n}\n\nexport interface EmbeddingIndex {\n model: string;\n dimension: number;\n updated_at: string;\n entries: EmbeddingEntry[];\n}\n\nexport function cacheDir(paths: HaivePaths): string {\n return path.join(paths.haiveDir, \".cache\", \"embeddings\");\n}\n\nexport function indexPath(paths: HaivePaths): string {\n return path.join(cacheDir(paths), INDEX_FILE);\n}\n\nexport function hashContent(text: string): string {\n return createHash(\"sha256\").update(text).digest(\"hex\");\n}\n\nexport function emptyIndex(model = DEFAULT_MODEL, dimension = DEFAULT_DIMENSION): EmbeddingIndex {\n return {\n model,\n dimension,\n updated_at: new Date().toISOString(),\n entries: [],\n };\n}\n\nexport async function loadIndex(paths: HaivePaths): Promise<EmbeddingIndex | null> {\n const file = indexPath(paths);\n if (!existsSync(file)) return null;\n const raw = await readFile(file, \"utf8\");\n return JSON.parse(raw) as EmbeddingIndex;\n}\n\nexport async function saveIndex(paths: HaivePaths, index: EmbeddingIndex): Promise<void> {\n const dir = cacheDir(paths);\n await mkdir(dir, { recursive: true });\n index.updated_at = new Date().toISOString();\n await writeFile(indexPath(paths), JSON.stringify(index, null, 2), \"utf8\");\n}\n\nexport async function indexStat(paths: HaivePaths): Promise<{\n exists: boolean;\n count: number;\n model: string | null;\n updatedAt: string | null;\n sizeBytes: number;\n}> {\n const file = indexPath(paths);\n if (!existsSync(file)) {\n return { exists: false, count: 0, model: null, updatedAt: null, sizeBytes: 0 };\n }\n const idx = await loadIndex(paths);\n const st = await stat(file);\n return {\n exists: true,\n count: idx?.entries.length ?? 0,\n model: idx?.model ?? null,\n updatedAt: idx?.updated_at ?? null,\n sizeBytes: st.size,\n };\n}\n\nexport function buildEntryText(id: string, tags: string[], body: string): string {\n // Concatenate id + tags + body so search works on metadata too.\n // Tags are weighted by repetition so they contribute more to the embedding.\n const tagPart = tags.length ? `${tags.join(\" \")} ${tags.join(\" \")} ` : \"\";\n return `${id} ${tagPart}${body}`;\n}\n","import { loadMemoriesFromDir, type HaivePaths } from \"@hiveai/core\";\nimport type { EmbedderLike } from \"./embedder.js\";\nimport {\n buildEntryText,\n emptyIndex,\n hashContent,\n loadIndex,\n saveIndex,\n type EmbeddingEntry,\n type EmbeddingIndex,\n} from \"./index-cache.js\";\n\nexport interface IndexUpdateReport {\n total: number;\n added: number;\n updated: number;\n unchanged: number;\n removed: number;\n}\n\nexport async function rebuildIndex(\n paths: HaivePaths,\n embedder: EmbedderLike,\n): Promise<{ index: EmbeddingIndex; report: IndexUpdateReport }> {\n const existing = (await loadIndex(paths)) ?? emptyIndex(embedder.model, embedder.dimension);\n // If model changed, reset.\n if (existing.model !== embedder.model || existing.dimension !== embedder.dimension) {\n existing.entries = [];\n existing.model = embedder.model;\n existing.dimension = embedder.dimension;\n }\n\n const memories = await loadMemoriesFromDir(paths.memoriesDir);\n const byId = new Map(existing.entries.map((e) => [e.id, e]));\n const seenIds = new Set<string>();\n\n let added = 0;\n let updated = 0;\n let unchanged = 0;\n\n const nextEntries: EmbeddingEntry[] = [];\n\n for (const { memory, filePath } of memories) {\n const id = memory.frontmatter.id;\n seenIds.add(id);\n const text = buildEntryText(id, memory.frontmatter.tags, memory.body);\n const hash = hashContent(text);\n const prior = byId.get(id);\n\n if (prior && prior.hash === hash) {\n nextEntries.push({ ...prior, file_path: filePath });\n unchanged++;\n continue;\n }\n\n const vector = Array.from(await embedder.encode(text));\n nextEntries.push({ id, file_path: filePath, hash, vector });\n if (prior) {\n updated++;\n } else {\n added++;\n }\n }\n\n const removed = existing.entries.filter((e) => !seenIds.has(e.id)).length;\n existing.entries = nextEntries;\n await saveIndex(paths, existing);\n\n return {\n index: existing,\n report: {\n total: nextEntries.length,\n added,\n updated,\n unchanged,\n removed,\n },\n };\n}\n","import type { HaivePaths } from \"@hiveai/core\";\nimport { cosine, Embedder, type EmbedderLike } from \"./embedder.js\";\nimport { loadIndex, type EmbeddingIndex } from \"./index-cache.js\";\n\nexport interface SemanticHit {\n id: string;\n file_path: string;\n score: number;\n}\n\nexport async function semanticSearch(\n paths: HaivePaths,\n query: string,\n options: {\n limit?: number;\n minScore?: number;\n embedder?: EmbedderLike;\n index?: EmbeddingIndex;\n } = {},\n): Promise<{ hits: SemanticHit[]; index: EmbeddingIndex } | null> {\n const index = options.index ?? (await loadIndex(paths));\n if (!index || index.entries.length === 0) return null;\n\n const embedder = options.embedder ?? (await Embedder.create(index.model));\n if (embedder.dimension !== index.dimension) {\n throw new Error(\n `Embedder dimension (${embedder.dimension}) differs from index (${index.dimension}). Re-run \\`haive embeddings index\\`.`,\n );\n }\n\n const queryVec = await embedder.encode(query);\n const minScore = options.minScore ?? 0;\n const limit = options.limit ?? 10;\n\n const scored = index.entries\n .map((e) => ({ id: e.id, file_path: e.file_path, score: cosine(queryVec, e.vector) }))\n .filter((h) => h.score >= minScore)\n .sort((a, b) => b.score - a.score)\n .slice(0, limit);\n\n return { hits: scored, index };\n}\n"],"mappings":";AAAO,IAAM,gBAAgB;AACtB,IAAM,oBAAoB;AAejC,IAAI,iBAAmD;AACvD,IAAI,cAA6B;AAEjC,eAAe,aAAa,OAAmD;AAC7E,MAAI,kBAAkB,gBAAgB,MAAO,QAAO;AACpD,QAAM,EAAE,UAAU,IAAI,IAAI,MAAM,OAAO,sBAAsB;AAE7D,MAAI,mBAAmB;AACvB,MAAI,oBAAoB;AACxB,QAAM,OAAQ,MAAM,SAAS,sBAAsB,KAAK;AACxD,mBAAiB;AACjB,gBAAc;AACd,SAAO;AACT;AAEO,IAAM,WAAN,MAAM,UAAiC;AAAA,EACpC,YACW,MACD,OACA,WAChB;AAHiB;AACD;AACA;AAAA,EACf;AAAA,EAHgB;AAAA,EACD;AAAA,EACA;AAAA,EAGlB,aAAa,OAAO,QAAgB,eAAkC;AACpE,UAAM,OAAO,MAAM,aAAa,KAAK;AACrC,UAAM,QAAQ,MAAM,KAAK,mBAAmB,EAAE,SAAS,QAAQ,WAAW,KAAK,CAAC;AAChF,UAAM,MAAM,MAAM,gBAAgB,eAAe,MAAM,KAAK,SAAS,MAAM,KAAK;AAChF,WAAO,IAAI,UAAS,MAAM,OAAO,GAAG;AAAA,EACtC;AAAA,EAEA,MAAM,OAAO,MAAqC;AAChD,UAAM,SAAS,MAAM,KAAK,KAAK,MAAM,EAAE,SAAS,QAAQ,WAAW,KAAK,CAAC;AACzE,WAAO,OAAO,gBAAgB,eAC1B,OAAO,OACP,aAAa,KAAK,OAAO,IAAI;AAAA,EACnC;AAAA,EAEA,MAAM,WAAW,OAA0C;AACzD,UAAM,MAAsB,CAAC;AAC7B,eAAW,KAAK,OAAO;AACrB,UAAI,KAAK,MAAM,KAAK,OAAO,CAAC,CAAC;AAAA,IAC/B;AACA,WAAO;AAAA,EACT;AACF;AAEO,SAAS,OAAO,GAA4B,GAAoC;AACrF,MAAI,EAAE,WAAW,EAAE,QAAQ;AACzB,UAAM,IAAI,MAAM,8BAA8B,EAAE,MAAM,OAAO,EAAE,MAAM,EAAE;AAAA,EACzE;AACA,MAAI,MAAM;AACV,MAAI,KAAK;AACT,MAAI,KAAK;AACT,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,UAAM,KAAK,EAAE,CAAC;AACd,UAAM,KAAK,EAAE,CAAC;AACd,WAAO,KAAK;AACZ,UAAM,KAAK;AACX,UAAM,KAAK;AAAA,EACb;AACA,MAAI,OAAO,KAAK,OAAO,EAAG,QAAO;AACjC,SAAO,OAAO,KAAK,KAAK,EAAE,IAAI,KAAK,KAAK,EAAE;AAC5C;;;AC7EA,SAAS,kBAAkB;AAC3B,SAAS,OAAO,UAAU,MAAM,iBAAiB;AACjD,SAAS,kBAAkB;AAC3B,OAAO,UAAU;AAIV,IAAM,aAAa;AAgBnB,SAAS,SAAS,OAA2B;AAClD,SAAO,KAAK,KAAK,MAAM,UAAU,UAAU,YAAY;AACzD;AAEO,SAAS,UAAU,OAA2B;AACnD,SAAO,KAAK,KAAK,SAAS,KAAK,GAAG,UAAU;AAC9C;AAEO,SAAS,YAAY,MAAsB;AAChD,SAAO,WAAW,QAAQ,EAAE,OAAO,IAAI,EAAE,OAAO,KAAK;AACvD;AAEO,SAAS,WAAW,QAAQ,eAAe,YAAY,mBAAmC;AAC/F,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,aAAY,oBAAI,KAAK,GAAE,YAAY;AAAA,IACnC,SAAS,CAAC;AAAA,EACZ;AACF;AAEA,eAAsB,UAAU,OAAmD;AACjF,QAAM,OAAO,UAAU,KAAK;AAC5B,MAAI,CAAC,WAAW,IAAI,EAAG,QAAO;AAC9B,QAAM,MAAM,MAAM,SAAS,MAAM,MAAM;AACvC,SAAO,KAAK,MAAM,GAAG;AACvB;AAEA,eAAsB,UAAU,OAAmB,OAAsC;AACvF,QAAM,MAAM,SAAS,KAAK;AAC1B,QAAM,MAAM,KAAK,EAAE,WAAW,KAAK,CAAC;AACpC,QAAM,cAAa,oBAAI,KAAK,GAAE,YAAY;AAC1C,QAAM,UAAU,UAAU,KAAK,GAAG,KAAK,UAAU,OAAO,MAAM,CAAC,GAAG,MAAM;AAC1E;AAEA,eAAsB,UAAU,OAM7B;AACD,QAAM,OAAO,UAAU,KAAK;AAC5B,MAAI,CAAC,WAAW,IAAI,GAAG;AACrB,WAAO,EAAE,QAAQ,OAAO,OAAO,GAAG,OAAO,MAAM,WAAW,MAAM,WAAW,EAAE;AAAA,EAC/E;AACA,QAAM,MAAM,MAAM,UAAU,KAAK;AACjC,QAAM,KAAK,MAAM,KAAK,IAAI;AAC1B,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,OAAO,KAAK,QAAQ,UAAU;AAAA,IAC9B,OAAO,KAAK,SAAS;AAAA,IACrB,WAAW,KAAK,cAAc;AAAA,IAC9B,WAAW,GAAG;AAAA,EAChB;AACF;AAEO,SAAS,eAAe,IAAY,MAAgB,MAAsB;AAG/E,QAAM,UAAU,KAAK,SAAS,GAAG,KAAK,KAAK,GAAG,CAAC,IAAI,KAAK,KAAK,GAAG,CAAC,MAAM;AACvE,SAAO,GAAG,EAAE,IAAI,OAAO,GAAG,IAAI;AAChC;;;ACrFA,SAAS,2BAA4C;AAoBrD,eAAsB,aACpB,OACA,UAC+D;AAC/D,QAAM,WAAY,MAAM,UAAU,KAAK,KAAM,WAAW,SAAS,OAAO,SAAS,SAAS;AAE1F,MAAI,SAAS,UAAU,SAAS,SAAS,SAAS,cAAc,SAAS,WAAW;AAClF,aAAS,UAAU,CAAC;AACpB,aAAS,QAAQ,SAAS;AAC1B,aAAS,YAAY,SAAS;AAAA,EAChC;AAEA,QAAM,WAAW,MAAM,oBAAoB,MAAM,WAAW;AAC5D,QAAM,OAAO,IAAI,IAAI,SAAS,QAAQ,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;AAC3D,QAAM,UAAU,oBAAI,IAAY;AAEhC,MAAI,QAAQ;AACZ,MAAI,UAAU;AACd,MAAI,YAAY;AAEhB,QAAM,cAAgC,CAAC;AAEvC,aAAW,EAAE,QAAQ,SAAS,KAAK,UAAU;AAC3C,UAAM,KAAK,OAAO,YAAY;AAC9B,YAAQ,IAAI,EAAE;AACd,UAAM,OAAO,eAAe,IAAI,OAAO,YAAY,MAAM,OAAO,IAAI;AACpE,UAAM,OAAO,YAAY,IAAI;AAC7B,UAAM,QAAQ,KAAK,IAAI,EAAE;AAEzB,QAAI,SAAS,MAAM,SAAS,MAAM;AAChC,kBAAY,KAAK,EAAE,GAAG,OAAO,WAAW,SAAS,CAAC;AAClD;AACA;AAAA,IACF;AAEA,UAAM,SAAS,MAAM,KAAK,MAAM,SAAS,OAAO,IAAI,CAAC;AACrD,gBAAY,KAAK,EAAE,IAAI,WAAW,UAAU,MAAM,OAAO,CAAC;AAC1D,QAAI,OAAO;AACT;AAAA,IACF,OAAO;AACL;AAAA,IACF;AAAA,EACF;AAEA,QAAM,UAAU,SAAS,QAAQ,OAAO,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,EAAE,CAAC,EAAE;AACnE,WAAS,UAAU;AACnB,QAAM,UAAU,OAAO,QAAQ;AAE/B,SAAO;AAAA,IACL,OAAO;AAAA,IACP,QAAQ;AAAA,MACN,OAAO,YAAY;AAAA,MACnB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;;;ACpEA,eAAsB,eACpB,OACA,OACA,UAKI,CAAC,GAC2D;AAChE,QAAM,QAAQ,QAAQ,SAAU,MAAM,UAAU,KAAK;AACrD,MAAI,CAAC,SAAS,MAAM,QAAQ,WAAW,EAAG,QAAO;AAEjD,QAAM,WAAW,QAAQ,YAAa,MAAM,SAAS,OAAO,MAAM,KAAK;AACvE,MAAI,SAAS,cAAc,MAAM,WAAW;AAC1C,UAAM,IAAI;AAAA,MACR,uBAAuB,SAAS,SAAS,yBAAyB,MAAM,SAAS;AAAA,IACnF;AAAA,EACF;AAEA,QAAM,WAAW,MAAM,SAAS,OAAO,KAAK;AAC5C,QAAM,WAAW,QAAQ,YAAY;AACrC,QAAM,QAAQ,QAAQ,SAAS;AAE/B,QAAM,SAAS,MAAM,QAClB,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,WAAW,EAAE,WAAW,OAAO,OAAO,UAAU,EAAE,MAAM,EAAE,EAAE,EACpF,OAAO,CAAC,MAAM,EAAE,SAAS,QAAQ,EACjC,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAChC,MAAM,GAAG,KAAK;AAEjB,SAAO,EAAE,MAAM,QAAQ,MAAM;AAC/B;","names":[]}
|
|
1
|
+
{"version":3,"sources":["../src/embedder.ts","../src/index-cache.ts","../src/indexer.ts","../src/search.ts","../src/code-index-cache.ts","../src/code-indexer.ts","../src/code-search.ts"],"sourcesContent":["export const DEFAULT_MODEL = \"Xenova/bge-small-en-v1.5\";\nexport const DEFAULT_DIMENSION = 384;\n\nexport interface EmbedderLike {\n readonly model: string;\n readonly dimension: number;\n encode(text: string): Promise<Float32Array>;\n}\n\ninterface FeatureExtractionPipeline {\n (text: string | string[], options: { pooling: \"mean\"; normalize: boolean }): Promise<{\n data: Float32Array | number[];\n dims: number[];\n }>;\n}\n\nlet cachedPipeline: FeatureExtractionPipeline | null = null;\nlet cachedModel: string | null = null;\n\nasync function loadPipeline(model: string): Promise<FeatureExtractionPipeline> {\n if (cachedPipeline && cachedModel === model) return cachedPipeline;\n const { pipeline, env } = await import(\"@xenova/transformers\");\n // Allow remote model download by default; users can pre-cache for offline use.\n env.allowLocalModels = true;\n env.allowRemoteModels = true;\n const pipe = (await pipeline(\"feature-extraction\", model)) as unknown as FeatureExtractionPipeline;\n cachedPipeline = pipe;\n cachedModel = model;\n return pipe;\n}\n\nexport class Embedder implements EmbedderLike {\n private constructor(\n private readonly pipe: FeatureExtractionPipeline,\n public readonly model: string,\n public readonly dimension: number,\n ) {}\n\n static async create(model: string = DEFAULT_MODEL): Promise<Embedder> {\n const pipe = await loadPipeline(model);\n const probe = await pipe(\"dimension probe\", { pooling: \"mean\", normalize: true });\n const dim = probe.data instanceof Float32Array ? probe.data.length : probe.data.length;\n return new Embedder(pipe, model, dim);\n }\n\n async encode(text: string): Promise<Float32Array> {\n const result = await this.pipe(text, { pooling: \"mean\", normalize: true });\n return result.data instanceof Float32Array\n ? result.data\n : Float32Array.from(result.data);\n }\n\n async encodeMany(texts: string[]): Promise<Float32Array[]> {\n const out: Float32Array[] = [];\n for (const t of texts) {\n out.push(await this.encode(t));\n }\n return out;\n }\n}\n\nexport function cosine(a: Float32Array | number[], b: Float32Array | number[]): number {\n if (a.length !== b.length) {\n throw new Error(`vector dimension mismatch: ${a.length} vs ${b.length}`);\n }\n let dot = 0;\n let na = 0;\n let nb = 0;\n for (let i = 0; i < a.length; i++) {\n const av = a[i] as number;\n const bv = b[i] as number;\n dot += av * bv;\n na += av * av;\n nb += bv * bv;\n }\n if (na === 0 || nb === 0) return 0;\n return dot / (Math.sqrt(na) * Math.sqrt(nb));\n}\n","import { createHash } from \"node:crypto\";\nimport { mkdir, readFile, stat, writeFile } from \"node:fs/promises\";\nimport { existsSync } from \"node:fs\";\nimport path from \"node:path\";\nimport type { HaivePaths } from \"@hiveai/core\";\nimport { DEFAULT_DIMENSION, DEFAULT_MODEL } from \"./embedder.js\";\n\nexport const INDEX_FILE = \"embeddings-index.json\";\n\nexport interface EmbeddingEntry {\n id: string;\n file_path: string;\n hash: string;\n vector: number[];\n}\n\nexport interface EmbeddingIndex {\n model: string;\n dimension: number;\n updated_at: string;\n entries: EmbeddingEntry[];\n}\n\nexport function cacheDir(paths: HaivePaths): string {\n return path.join(paths.haiveDir, \".cache\", \"embeddings\");\n}\n\nexport function indexPath(paths: HaivePaths): string {\n return path.join(cacheDir(paths), INDEX_FILE);\n}\n\nexport function hashContent(text: string): string {\n return createHash(\"sha256\").update(text).digest(\"hex\");\n}\n\nexport function emptyIndex(model = DEFAULT_MODEL, dimension = DEFAULT_DIMENSION): EmbeddingIndex {\n return {\n model,\n dimension,\n updated_at: new Date().toISOString(),\n entries: [],\n };\n}\n\nexport async function loadIndex(paths: HaivePaths): Promise<EmbeddingIndex | null> {\n const file = indexPath(paths);\n if (!existsSync(file)) return null;\n const raw = await readFile(file, \"utf8\");\n return JSON.parse(raw) as EmbeddingIndex;\n}\n\nexport async function saveIndex(paths: HaivePaths, index: EmbeddingIndex): Promise<void> {\n const dir = cacheDir(paths);\n await mkdir(dir, { recursive: true });\n index.updated_at = new Date().toISOString();\n await writeFile(indexPath(paths), JSON.stringify(index, null, 2), \"utf8\");\n}\n\nexport async function indexStat(paths: HaivePaths): Promise<{\n exists: boolean;\n count: number;\n model: string | null;\n updatedAt: string | null;\n sizeBytes: number;\n}> {\n const file = indexPath(paths);\n if (!existsSync(file)) {\n return { exists: false, count: 0, model: null, updatedAt: null, sizeBytes: 0 };\n }\n const idx = await loadIndex(paths);\n const st = await stat(file);\n return {\n exists: true,\n count: idx?.entries.length ?? 0,\n model: idx?.model ?? null,\n updatedAt: idx?.updated_at ?? null,\n sizeBytes: st.size,\n };\n}\n\nexport function buildEntryText(id: string, tags: string[], body: string): string {\n // Concatenate id + tags + body so search works on metadata too.\n // Tags are weighted by repetition so they contribute more to the embedding.\n const tagPart = tags.length ? `${tags.join(\" \")} ${tags.join(\" \")} ` : \"\";\n return `${id} ${tagPart}${body}`;\n}\n","import { loadMemoriesFromDir, type HaivePaths } from \"@hiveai/core\";\nimport type { EmbedderLike } from \"./embedder.js\";\nimport {\n buildEntryText,\n emptyIndex,\n hashContent,\n loadIndex,\n saveIndex,\n type EmbeddingEntry,\n type EmbeddingIndex,\n} from \"./index-cache.js\";\n\nexport interface IndexUpdateReport {\n total: number;\n added: number;\n updated: number;\n unchanged: number;\n removed: number;\n}\n\nexport async function rebuildIndex(\n paths: HaivePaths,\n embedder: EmbedderLike,\n): Promise<{ index: EmbeddingIndex; report: IndexUpdateReport }> {\n const existing = (await loadIndex(paths)) ?? emptyIndex(embedder.model, embedder.dimension);\n // If model changed, reset.\n if (existing.model !== embedder.model || existing.dimension !== embedder.dimension) {\n existing.entries = [];\n existing.model = embedder.model;\n existing.dimension = embedder.dimension;\n }\n\n const memories = await loadMemoriesFromDir(paths.memoriesDir);\n const byId = new Map(existing.entries.map((e) => [e.id, e]));\n const seenIds = new Set<string>();\n\n let added = 0;\n let updated = 0;\n let unchanged = 0;\n\n const nextEntries: EmbeddingEntry[] = [];\n\n for (const { memory, filePath } of memories) {\n const id = memory.frontmatter.id;\n seenIds.add(id);\n const text = buildEntryText(id, memory.frontmatter.tags, memory.body);\n const hash = hashContent(text);\n const prior = byId.get(id);\n\n if (prior && prior.hash === hash) {\n nextEntries.push({ ...prior, file_path: filePath });\n unchanged++;\n continue;\n }\n\n const vector = Array.from(await embedder.encode(text));\n nextEntries.push({ id, file_path: filePath, hash, vector });\n if (prior) {\n updated++;\n } else {\n added++;\n }\n }\n\n const removed = existing.entries.filter((e) => !seenIds.has(e.id)).length;\n existing.entries = nextEntries;\n await saveIndex(paths, existing);\n\n return {\n index: existing,\n report: {\n total: nextEntries.length,\n added,\n updated,\n unchanged,\n removed,\n },\n };\n}\n","import type { HaivePaths } from \"@hiveai/core\";\nimport { cosine, Embedder, type EmbedderLike } from \"./embedder.js\";\nimport { loadIndex, type EmbeddingIndex } from \"./index-cache.js\";\n\nexport interface SemanticHit {\n id: string;\n file_path: string;\n score: number;\n}\n\nexport async function semanticSearch(\n paths: HaivePaths,\n query: string,\n options: {\n limit?: number;\n minScore?: number;\n embedder?: EmbedderLike;\n index?: EmbeddingIndex;\n } = {},\n): Promise<{ hits: SemanticHit[]; index: EmbeddingIndex } | null> {\n const index = options.index ?? (await loadIndex(paths));\n if (!index || index.entries.length === 0) return null;\n\n const embedder = options.embedder ?? (await Embedder.create(index.model));\n if (embedder.dimension !== index.dimension) {\n throw new Error(\n `Embedder dimension (${embedder.dimension}) differs from index (${index.dimension}). Re-run \\`haive embeddings index\\`.`,\n );\n }\n\n const queryVec = await embedder.encode(query);\n const minScore = options.minScore ?? 0;\n const limit = options.limit ?? 10;\n\n const scored = index.entries\n .map((e) => ({ id: e.id, file_path: e.file_path, score: cosine(queryVec, e.vector) }))\n .filter((h) => h.score >= minScore)\n .sort((a, b) => b.score - a.score)\n .slice(0, limit);\n\n return { hits: scored, index };\n}\n","import { mkdir, readFile, writeFile } from \"node:fs/promises\";\nimport { existsSync } from \"node:fs\";\nimport path from \"node:path\";\nimport type { HaivePaths } from \"@hiveai/core\";\nimport { DEFAULT_DIMENSION, DEFAULT_MODEL } from \"./embedder.js\";\nimport { cacheDir } from \"./index-cache.js\";\n\nexport const CODE_INDEX_FILE = \"code-embeddings-index.json\";\n\nexport interface CodeEmbeddingEntry {\n /** stable id: `${file}#${name}` */\n id: string;\n file: string;\n name: string;\n kind: string;\n line: number;\n description?: string;\n hash: string;\n vector: number[];\n}\n\nexport interface CodeEmbeddingIndex {\n model: string;\n dimension: number;\n updated_at: string;\n source_generated_at: string;\n entries: CodeEmbeddingEntry[];\n}\n\nexport function codeIndexPath(paths: HaivePaths): string {\n return path.join(cacheDir(paths), CODE_INDEX_FILE);\n}\n\nexport function emptyCodeIndex(\n model = DEFAULT_MODEL,\n dimension = DEFAULT_DIMENSION,\n sourceGeneratedAt = \"\",\n): CodeEmbeddingIndex {\n return {\n model,\n dimension,\n updated_at: new Date().toISOString(),\n source_generated_at: sourceGeneratedAt,\n entries: [],\n };\n}\n\nexport async function loadCodeIndex(paths: HaivePaths): Promise<CodeEmbeddingIndex | null> {\n const file = codeIndexPath(paths);\n if (!existsSync(file)) return null;\n return JSON.parse(await readFile(file, \"utf8\")) as CodeEmbeddingIndex;\n}\n\nexport async function saveCodeIndex(paths: HaivePaths, index: CodeEmbeddingIndex): Promise<void> {\n const dir = cacheDir(paths);\n await mkdir(dir, { recursive: true });\n index.updated_at = new Date().toISOString();\n await writeFile(codeIndexPath(paths), JSON.stringify(index, null, 2), \"utf8\");\n}\n\nexport function buildCodeEntryText(file: string, name: string, kind: string, description?: string): string {\n // The embedded text is what we search against — keep it tight and signal-dense.\n // Filename tokens often carry intent (e.g. \"auth.controller.ts\" → \"auth controller\").\n const filenameHints = file\n .split(\"/\")\n .pop()\n ?.replace(/\\.[^.]+$/, \"\")\n .replace(/[._-]+/g, \" \") ?? \"\";\n return `${name} ${kind} ${filenameHints} ${description ?? \"\"}`.trim();\n}\n","import { loadCodeMap, type HaivePaths } from \"@hiveai/core\";\nimport { createHash } from \"node:crypto\";\nimport type { EmbedderLike } from \"./embedder.js\";\nimport {\n buildCodeEntryText,\n emptyCodeIndex,\n loadCodeIndex,\n saveCodeIndex,\n type CodeEmbeddingEntry,\n type CodeEmbeddingIndex,\n} from \"./code-index-cache.js\";\n\nexport interface CodeIndexUpdateReport {\n total: number;\n added: number;\n updated: number;\n unchanged: number;\n removed: number;\n}\n\nfunction hashEntry(text: string): string {\n return createHash(\"sha256\").update(text).digest(\"hex\").slice(0, 32);\n}\n\n/**\n * Build (or refresh) the code semantic-search index from the code-map.\n * Each exported symbol becomes one embedding entry — granularity stays at the\n * symbol level so search returns a precise file:line:name target.\n *\n * Re-uses entries whose embedded text is unchanged (hash check) so subsequent\n * builds only embed the diff.\n */\nexport async function rebuildCodeIndex(\n paths: HaivePaths,\n embedder: EmbedderLike,\n): Promise<{ index: CodeEmbeddingIndex; report: CodeIndexUpdateReport }> {\n const codeMap = await loadCodeMap(paths);\n if (!codeMap) {\n throw new Error(\n \"No code-map found. Run `haive index code` to generate `.ai/code-map.json` first.\",\n );\n }\n\n const existing =\n (await loadCodeIndex(paths)) ??\n emptyCodeIndex(embedder.model, embedder.dimension, codeMap.generated_at);\n\n if (existing.model !== embedder.model || existing.dimension !== embedder.dimension) {\n existing.entries = [];\n existing.model = embedder.model;\n existing.dimension = embedder.dimension;\n }\n\n const byId = new Map(existing.entries.map((e) => [e.id, e]));\n const nextEntries: CodeEmbeddingEntry[] = [];\n const seenIds = new Set<string>();\n let added = 0;\n let updated = 0;\n let unchanged = 0;\n\n for (const [filePath, fileEntry] of Object.entries(codeMap.files)) {\n for (const exp of fileEntry.exports) {\n const id = `${filePath}#${exp.name}`;\n seenIds.add(id);\n const text = buildCodeEntryText(filePath, exp.name, exp.kind, exp.description);\n const hash = hashEntry(text);\n const prior = byId.get(id);\n\n if (prior && prior.hash === hash && prior.line === exp.line) {\n nextEntries.push({ ...prior, file: filePath, name: exp.name, kind: exp.kind, line: exp.line, ...(exp.description ? { description: exp.description } : {}) });\n unchanged++;\n continue;\n }\n\n const vector = Array.from(await embedder.encode(text));\n nextEntries.push({\n id,\n file: filePath,\n name: exp.name,\n kind: exp.kind,\n line: exp.line,\n ...(exp.description ? { description: exp.description } : {}),\n hash,\n vector,\n });\n if (prior) updated++;\n else added++;\n }\n }\n\n const removed = existing.entries.filter((e) => !seenIds.has(e.id)).length;\n existing.entries = nextEntries;\n existing.source_generated_at = codeMap.generated_at;\n await saveCodeIndex(paths, existing);\n\n return {\n index: existing,\n report: {\n total: nextEntries.length,\n added,\n updated,\n unchanged,\n removed,\n },\n };\n}\n","import type { HaivePaths } from \"@hiveai/core\";\nimport { cosine, Embedder, type EmbedderLike } from \"./embedder.js\";\nimport { loadCodeIndex, type CodeEmbeddingIndex } from \"./code-index-cache.js\";\n\nexport interface CodeSearchHit {\n file: string;\n name: string;\n kind: string;\n line: number;\n description?: string;\n score: number;\n}\n\nexport async function codeSemanticSearch(\n paths: HaivePaths,\n query: string,\n options: {\n limit?: number;\n minScore?: number;\n embedder?: EmbedderLike;\n index?: CodeEmbeddingIndex;\n } = {},\n): Promise<{ hits: CodeSearchHit[]; index: CodeEmbeddingIndex } | null> {\n const index = options.index ?? (await loadCodeIndex(paths));\n if (!index || index.entries.length === 0) return null;\n\n const embedder = options.embedder ?? (await Embedder.create(index.model));\n if (embedder.dimension !== index.dimension) {\n throw new Error(\n `Embedder dimension (${embedder.dimension}) differs from code index (${index.dimension}). Re-run \\`haive index code-search\\`.`,\n );\n }\n\n const queryVec = await embedder.encode(query);\n const minScore = options.minScore ?? 0;\n const limit = options.limit ?? 5;\n\n const scored = index.entries\n .map((e) => ({\n file: e.file,\n name: e.name,\n kind: e.kind,\n line: e.line,\n ...(e.description ? { description: e.description } : {}),\n score: cosine(queryVec, e.vector),\n }))\n .filter((h) => h.score >= minScore)\n .sort((a, b) => b.score - a.score)\n .slice(0, limit);\n\n return { hits: scored, index };\n}\n"],"mappings":";AAAO,IAAM,gBAAgB;AACtB,IAAM,oBAAoB;AAejC,IAAI,iBAAmD;AACvD,IAAI,cAA6B;AAEjC,eAAe,aAAa,OAAmD;AAC7E,MAAI,kBAAkB,gBAAgB,MAAO,QAAO;AACpD,QAAM,EAAE,UAAU,IAAI,IAAI,MAAM,OAAO,sBAAsB;AAE7D,MAAI,mBAAmB;AACvB,MAAI,oBAAoB;AACxB,QAAM,OAAQ,MAAM,SAAS,sBAAsB,KAAK;AACxD,mBAAiB;AACjB,gBAAc;AACd,SAAO;AACT;AAEO,IAAM,WAAN,MAAM,UAAiC;AAAA,EACpC,YACW,MACD,OACA,WAChB;AAHiB;AACD;AACA;AAAA,EACf;AAAA,EAHgB;AAAA,EACD;AAAA,EACA;AAAA,EAGlB,aAAa,OAAO,QAAgB,eAAkC;AACpE,UAAM,OAAO,MAAM,aAAa,KAAK;AACrC,UAAM,QAAQ,MAAM,KAAK,mBAAmB,EAAE,SAAS,QAAQ,WAAW,KAAK,CAAC;AAChF,UAAM,MAAM,MAAM,gBAAgB,eAAe,MAAM,KAAK,SAAS,MAAM,KAAK;AAChF,WAAO,IAAI,UAAS,MAAM,OAAO,GAAG;AAAA,EACtC;AAAA,EAEA,MAAM,OAAO,MAAqC;AAChD,UAAM,SAAS,MAAM,KAAK,KAAK,MAAM,EAAE,SAAS,QAAQ,WAAW,KAAK,CAAC;AACzE,WAAO,OAAO,gBAAgB,eAC1B,OAAO,OACP,aAAa,KAAK,OAAO,IAAI;AAAA,EACnC;AAAA,EAEA,MAAM,WAAW,OAA0C;AACzD,UAAM,MAAsB,CAAC;AAC7B,eAAW,KAAK,OAAO;AACrB,UAAI,KAAK,MAAM,KAAK,OAAO,CAAC,CAAC;AAAA,IAC/B;AACA,WAAO;AAAA,EACT;AACF;AAEO,SAAS,OAAO,GAA4B,GAAoC;AACrF,MAAI,EAAE,WAAW,EAAE,QAAQ;AACzB,UAAM,IAAI,MAAM,8BAA8B,EAAE,MAAM,OAAO,EAAE,MAAM,EAAE;AAAA,EACzE;AACA,MAAI,MAAM;AACV,MAAI,KAAK;AACT,MAAI,KAAK;AACT,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AACjC,UAAM,KAAK,EAAE,CAAC;AACd,UAAM,KAAK,EAAE,CAAC;AACd,WAAO,KAAK;AACZ,UAAM,KAAK;AACX,UAAM,KAAK;AAAA,EACb;AACA,MAAI,OAAO,KAAK,OAAO,EAAG,QAAO;AACjC,SAAO,OAAO,KAAK,KAAK,EAAE,IAAI,KAAK,KAAK,EAAE;AAC5C;;;AC7EA,SAAS,kBAAkB;AAC3B,SAAS,OAAO,UAAU,MAAM,iBAAiB;AACjD,SAAS,kBAAkB;AAC3B,OAAO,UAAU;AAIV,IAAM,aAAa;AAgBnB,SAAS,SAAS,OAA2B;AAClD,SAAO,KAAK,KAAK,MAAM,UAAU,UAAU,YAAY;AACzD;AAEO,SAAS,UAAU,OAA2B;AACnD,SAAO,KAAK,KAAK,SAAS,KAAK,GAAG,UAAU;AAC9C;AAEO,SAAS,YAAY,MAAsB;AAChD,SAAO,WAAW,QAAQ,EAAE,OAAO,IAAI,EAAE,OAAO,KAAK;AACvD;AAEO,SAAS,WAAW,QAAQ,eAAe,YAAY,mBAAmC;AAC/F,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,aAAY,oBAAI,KAAK,GAAE,YAAY;AAAA,IACnC,SAAS,CAAC;AAAA,EACZ;AACF;AAEA,eAAsB,UAAU,OAAmD;AACjF,QAAM,OAAO,UAAU,KAAK;AAC5B,MAAI,CAAC,WAAW,IAAI,EAAG,QAAO;AAC9B,QAAM,MAAM,MAAM,SAAS,MAAM,MAAM;AACvC,SAAO,KAAK,MAAM,GAAG;AACvB;AAEA,eAAsB,UAAU,OAAmB,OAAsC;AACvF,QAAM,MAAM,SAAS,KAAK;AAC1B,QAAM,MAAM,KAAK,EAAE,WAAW,KAAK,CAAC;AACpC,QAAM,cAAa,oBAAI,KAAK,GAAE,YAAY;AAC1C,QAAM,UAAU,UAAU,KAAK,GAAG,KAAK,UAAU,OAAO,MAAM,CAAC,GAAG,MAAM;AAC1E;AAEA,eAAsB,UAAU,OAM7B;AACD,QAAM,OAAO,UAAU,KAAK;AAC5B,MAAI,CAAC,WAAW,IAAI,GAAG;AACrB,WAAO,EAAE,QAAQ,OAAO,OAAO,GAAG,OAAO,MAAM,WAAW,MAAM,WAAW,EAAE;AAAA,EAC/E;AACA,QAAM,MAAM,MAAM,UAAU,KAAK;AACjC,QAAM,KAAK,MAAM,KAAK,IAAI;AAC1B,SAAO;AAAA,IACL,QAAQ;AAAA,IACR,OAAO,KAAK,QAAQ,UAAU;AAAA,IAC9B,OAAO,KAAK,SAAS;AAAA,IACrB,WAAW,KAAK,cAAc;AAAA,IAC9B,WAAW,GAAG;AAAA,EAChB;AACF;AAEO,SAAS,eAAe,IAAY,MAAgB,MAAsB;AAG/E,QAAM,UAAU,KAAK,SAAS,GAAG,KAAK,KAAK,GAAG,CAAC,IAAI,KAAK,KAAK,GAAG,CAAC,MAAM;AACvE,SAAO,GAAG,EAAE,IAAI,OAAO,GAAG,IAAI;AAChC;;;ACrFA,SAAS,2BAA4C;AAoBrD,eAAsB,aACpB,OACA,UAC+D;AAC/D,QAAM,WAAY,MAAM,UAAU,KAAK,KAAM,WAAW,SAAS,OAAO,SAAS,SAAS;AAE1F,MAAI,SAAS,UAAU,SAAS,SAAS,SAAS,cAAc,SAAS,WAAW;AAClF,aAAS,UAAU,CAAC;AACpB,aAAS,QAAQ,SAAS;AAC1B,aAAS,YAAY,SAAS;AAAA,EAChC;AAEA,QAAM,WAAW,MAAM,oBAAoB,MAAM,WAAW;AAC5D,QAAM,OAAO,IAAI,IAAI,SAAS,QAAQ,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;AAC3D,QAAM,UAAU,oBAAI,IAAY;AAEhC,MAAI,QAAQ;AACZ,MAAI,UAAU;AACd,MAAI,YAAY;AAEhB,QAAM,cAAgC,CAAC;AAEvC,aAAW,EAAE,QAAQ,SAAS,KAAK,UAAU;AAC3C,UAAM,KAAK,OAAO,YAAY;AAC9B,YAAQ,IAAI,EAAE;AACd,UAAM,OAAO,eAAe,IAAI,OAAO,YAAY,MAAM,OAAO,IAAI;AACpE,UAAM,OAAO,YAAY,IAAI;AAC7B,UAAM,QAAQ,KAAK,IAAI,EAAE;AAEzB,QAAI,SAAS,MAAM,SAAS,MAAM;AAChC,kBAAY,KAAK,EAAE,GAAG,OAAO,WAAW,SAAS,CAAC;AAClD;AACA;AAAA,IACF;AAEA,UAAM,SAAS,MAAM,KAAK,MAAM,SAAS,OAAO,IAAI,CAAC;AACrD,gBAAY,KAAK,EAAE,IAAI,WAAW,UAAU,MAAM,OAAO,CAAC;AAC1D,QAAI,OAAO;AACT;AAAA,IACF,OAAO;AACL;AAAA,IACF;AAAA,EACF;AAEA,QAAM,UAAU,SAAS,QAAQ,OAAO,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,EAAE,CAAC,EAAE;AACnE,WAAS,UAAU;AACnB,QAAM,UAAU,OAAO,QAAQ;AAE/B,SAAO;AAAA,IACL,OAAO;AAAA,IACP,QAAQ;AAAA,MACN,OAAO,YAAY;AAAA,MACnB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;;;ACpEA,eAAsB,eACpB,OACA,OACA,UAKI,CAAC,GAC2D;AAChE,QAAM,QAAQ,QAAQ,SAAU,MAAM,UAAU,KAAK;AACrD,MAAI,CAAC,SAAS,MAAM,QAAQ,WAAW,EAAG,QAAO;AAEjD,QAAM,WAAW,QAAQ,YAAa,MAAM,SAAS,OAAO,MAAM,KAAK;AACvE,MAAI,SAAS,cAAc,MAAM,WAAW;AAC1C,UAAM,IAAI;AAAA,MACR,uBAAuB,SAAS,SAAS,yBAAyB,MAAM,SAAS;AAAA,IACnF;AAAA,EACF;AAEA,QAAM,WAAW,MAAM,SAAS,OAAO,KAAK;AAC5C,QAAM,WAAW,QAAQ,YAAY;AACrC,QAAM,QAAQ,QAAQ,SAAS;AAE/B,QAAM,SAAS,MAAM,QAClB,IAAI,CAAC,OAAO,EAAE,IAAI,EAAE,IAAI,WAAW,EAAE,WAAW,OAAO,OAAO,UAAU,EAAE,MAAM,EAAE,EAAE,EACpF,OAAO,CAAC,MAAM,EAAE,SAAS,QAAQ,EACjC,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAChC,MAAM,GAAG,KAAK;AAEjB,SAAO,EAAE,MAAM,QAAQ,MAAM;AAC/B;;;ACzCA,SAAS,SAAAA,QAAO,YAAAC,WAAU,aAAAC,kBAAiB;AAC3C,SAAS,cAAAC,mBAAkB;AAC3B,OAAOC,WAAU;AAKV,IAAM,kBAAkB;AAsBxB,SAAS,cAAc,OAA2B;AACvD,SAAOC,MAAK,KAAK,SAAS,KAAK,GAAG,eAAe;AACnD;AAEO,SAAS,eACd,QAAQ,eACR,YAAY,mBACZ,oBAAoB,IACA;AACpB,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,aAAY,oBAAI,KAAK,GAAE,YAAY;AAAA,IACnC,qBAAqB;AAAA,IACrB,SAAS,CAAC;AAAA,EACZ;AACF;AAEA,eAAsB,cAAc,OAAuD;AACzF,QAAM,OAAO,cAAc,KAAK;AAChC,MAAI,CAACC,YAAW,IAAI,EAAG,QAAO;AAC9B,SAAO,KAAK,MAAM,MAAMC,UAAS,MAAM,MAAM,CAAC;AAChD;AAEA,eAAsB,cAAc,OAAmB,OAA0C;AAC/F,QAAM,MAAM,SAAS,KAAK;AAC1B,QAAMC,OAAM,KAAK,EAAE,WAAW,KAAK,CAAC;AACpC,QAAM,cAAa,oBAAI,KAAK,GAAE,YAAY;AAC1C,QAAMC,WAAU,cAAc,KAAK,GAAG,KAAK,UAAU,OAAO,MAAM,CAAC,GAAG,MAAM;AAC9E;AAEO,SAAS,mBAAmB,MAAc,MAAc,MAAc,aAA8B;AAGzG,QAAM,gBAAgB,KACnB,MAAM,GAAG,EACT,IAAI,GACH,QAAQ,YAAY,EAAE,EACvB,QAAQ,WAAW,GAAG,KAAK;AAC9B,SAAO,GAAG,IAAI,IAAI,IAAI,IAAI,aAAa,IAAI,eAAe,EAAE,GAAG,KAAK;AACtE;;;ACrEA,SAAS,mBAAoC;AAC7C,SAAS,cAAAC,mBAAkB;AAmB3B,SAAS,UAAU,MAAsB;AACvC,SAAOC,YAAW,QAAQ,EAAE,OAAO,IAAI,EAAE,OAAO,KAAK,EAAE,MAAM,GAAG,EAAE;AACpE;AAUA,eAAsB,iBACpB,OACA,UACuE;AACvE,QAAM,UAAU,MAAM,YAAY,KAAK;AACvC,MAAI,CAAC,SAAS;AACZ,UAAM,IAAI;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAEA,QAAM,WACH,MAAM,cAAc,KAAK,KAC1B,eAAe,SAAS,OAAO,SAAS,WAAW,QAAQ,YAAY;AAEzE,MAAI,SAAS,UAAU,SAAS,SAAS,SAAS,cAAc,SAAS,WAAW;AAClF,aAAS,UAAU,CAAC;AACpB,aAAS,QAAQ,SAAS;AAC1B,aAAS,YAAY,SAAS;AAAA,EAChC;AAEA,QAAM,OAAO,IAAI,IAAI,SAAS,QAAQ,IAAI,CAAC,MAAM,CAAC,EAAE,IAAI,CAAC,CAAC,CAAC;AAC3D,QAAM,cAAoC,CAAC;AAC3C,QAAM,UAAU,oBAAI,IAAY;AAChC,MAAI,QAAQ;AACZ,MAAI,UAAU;AACd,MAAI,YAAY;AAEhB,aAAW,CAAC,UAAU,SAAS,KAAK,OAAO,QAAQ,QAAQ,KAAK,GAAG;AACjE,eAAW,OAAO,UAAU,SAAS;AACnC,YAAM,KAAK,GAAG,QAAQ,IAAI,IAAI,IAAI;AAClC,cAAQ,IAAI,EAAE;AACd,YAAM,OAAO,mBAAmB,UAAU,IAAI,MAAM,IAAI,MAAM,IAAI,WAAW;AAC7E,YAAM,OAAO,UAAU,IAAI;AAC3B,YAAM,QAAQ,KAAK,IAAI,EAAE;AAEzB,UAAI,SAAS,MAAM,SAAS,QAAQ,MAAM,SAAS,IAAI,MAAM;AAC3D,oBAAY,KAAK,EAAE,GAAG,OAAO,MAAM,UAAU,MAAM,IAAI,MAAM,MAAM,IAAI,MAAM,MAAM,IAAI,MAAM,GAAI,IAAI,cAAc,EAAE,aAAa,IAAI,YAAY,IAAI,CAAC,EAAG,CAAC;AAC3J;AACA;AAAA,MACF;AAEA,YAAM,SAAS,MAAM,KAAK,MAAM,SAAS,OAAO,IAAI,CAAC;AACrD,kBAAY,KAAK;AAAA,QACf;AAAA,QACA,MAAM;AAAA,QACN,MAAM,IAAI;AAAA,QACV,MAAM,IAAI;AAAA,QACV,MAAM,IAAI;AAAA,QACV,GAAI,IAAI,cAAc,EAAE,aAAa,IAAI,YAAY,IAAI,CAAC;AAAA,QAC1D;AAAA,QACA;AAAA,MACF,CAAC;AACD,UAAI,MAAO;AAAA,UACN;AAAA,IACP;AAAA,EACF;AAEA,QAAM,UAAU,SAAS,QAAQ,OAAO,CAAC,MAAM,CAAC,QAAQ,IAAI,EAAE,EAAE,CAAC,EAAE;AACnE,WAAS,UAAU;AACnB,WAAS,sBAAsB,QAAQ;AACvC,QAAM,cAAc,OAAO,QAAQ;AAEnC,SAAO;AAAA,IACL,OAAO;AAAA,IACP,QAAQ;AAAA,MACN,OAAO,YAAY;AAAA,MACnB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;;;AC5FA,eAAsB,mBACpB,OACA,OACA,UAKI,CAAC,GACiE;AACtE,QAAM,QAAQ,QAAQ,SAAU,MAAM,cAAc,KAAK;AACzD,MAAI,CAAC,SAAS,MAAM,QAAQ,WAAW,EAAG,QAAO;AAEjD,QAAM,WAAW,QAAQ,YAAa,MAAM,SAAS,OAAO,MAAM,KAAK;AACvE,MAAI,SAAS,cAAc,MAAM,WAAW;AAC1C,UAAM,IAAI;AAAA,MACR,uBAAuB,SAAS,SAAS,8BAA8B,MAAM,SAAS;AAAA,IACxF;AAAA,EACF;AAEA,QAAM,WAAW,MAAM,SAAS,OAAO,KAAK;AAC5C,QAAM,WAAW,QAAQ,YAAY;AACrC,QAAM,QAAQ,QAAQ,SAAS;AAE/B,QAAM,SAAS,MAAM,QAClB,IAAI,CAAC,OAAO;AAAA,IACX,MAAM,EAAE;AAAA,IACR,MAAM,EAAE;AAAA,IACR,MAAM,EAAE;AAAA,IACR,MAAM,EAAE;AAAA,IACR,GAAI,EAAE,cAAc,EAAE,aAAa,EAAE,YAAY,IAAI,CAAC;AAAA,IACtD,OAAO,OAAO,UAAU,EAAE,MAAM;AAAA,EAClC,EAAE,EACD,OAAO,CAAC,MAAM,EAAE,SAAS,QAAQ,EACjC,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAChC,MAAM,GAAG,KAAK;AAEjB,SAAO,EAAE,MAAM,QAAQ,MAAM;AAC/B;","names":["mkdir","readFile","writeFile","existsSync","path","path","existsSync","readFile","mkdir","writeFile","createHash","createHash"]}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@hiveai/embeddings",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.6.0",
|
|
4
4
|
"description": "hAIve embeddings — local sentence embeddings via Transformers.js for semantic memory search",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -32,8 +32,8 @@
|
|
|
32
32
|
"LICENSE"
|
|
33
33
|
],
|
|
34
34
|
"dependencies": {
|
|
35
|
-
"@
|
|
36
|
-
"@
|
|
35
|
+
"@xenova/transformers": "^2.17.2",
|
|
36
|
+
"@hiveai/core": "0.6.0"
|
|
37
37
|
},
|
|
38
38
|
"overrides": {
|
|
39
39
|
"protobufjs": "^7.5.5"
|