brainbank 0.1.0-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +155 -0
- package/assets/architecture.png +0 -0
- package/bin/brainbank +18 -0
- package/bin/brainbank-mcp +19 -0
- package/dist/chunk-3YBCD6DI.js +117 -0
- package/dist/chunk-3YBCD6DI.js.map +1 -0
- package/dist/chunk-63GBCDS5.js +3249 -0
- package/dist/chunk-63GBCDS5.js.map +1 -0
- package/dist/chunk-DMFMTOHF.js +123 -0
- package/dist/chunk-DMFMTOHF.js.map +1 -0
- package/dist/chunk-FQYKWB2Q.js +136 -0
- package/dist/chunk-FQYKWB2Q.js.map +1 -0
- package/dist/chunk-IMJJ2VEM.js +74 -0
- package/dist/chunk-IMJJ2VEM.js.map +1 -0
- package/dist/chunk-M744PCJQ.js +43 -0
- package/dist/chunk-M744PCJQ.js.map +1 -0
- package/dist/chunk-O3J6ZIXK.js +82 -0
- package/dist/chunk-O3J6ZIXK.js.map +1 -0
- package/dist/chunk-OPH7GZ7U.js +124 -0
- package/dist/chunk-OPH7GZ7U.js.map +1 -0
- package/dist/chunk-PXEWQMN7.js +89 -0
- package/dist/chunk-PXEWQMN7.js.map +1 -0
- package/dist/chunk-RDQYDLYZ.js +69 -0
- package/dist/chunk-RDQYDLYZ.js.map +1 -0
- package/dist/chunk-VIIHPCC4.js +254 -0
- package/dist/chunk-VIIHPCC4.js.map +1 -0
- package/dist/chunk-WCQVDF3K.js +14 -0
- package/dist/chunk-WCQVDF3K.js.map +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +3076 -0
- package/dist/cli.js.map +1 -0
- package/dist/haiku-expander-YRSIPGKP.js +8 -0
- package/dist/haiku-expander-YRSIPGKP.js.map +1 -0
- package/dist/haiku-pruner-SHAXUPY6.js +8 -0
- package/dist/haiku-pruner-SHAXUPY6.js.map +1 -0
- package/dist/http-server-QUXHLWUM.js +9 -0
- package/dist/http-server-QUXHLWUM.js.map +1 -0
- package/dist/index.d.ts +2161 -0
- package/dist/index.js +357 -0
- package/dist/index.js.map +1 -0
- package/dist/local-embedding-NZQTILGV.js +8 -0
- package/dist/local-embedding-NZQTILGV.js.map +1 -0
- package/dist/mcp.d.ts +2 -0
- package/dist/mcp.js +334 -0
- package/dist/mcp.js.map +1 -0
- package/dist/openai-embedding-ZP5TSUJG.js +8 -0
- package/dist/openai-embedding-ZP5TSUJG.js.map +1 -0
- package/dist/perplexity-context-embedding-GI5PHE6X.js +9 -0
- package/dist/perplexity-context-embedding-GI5PHE6X.js.map +1 -0
- package/dist/perplexity-embedding-KZRYGJRC.js +10 -0
- package/dist/perplexity-embedding-KZRYGJRC.js.map +1 -0
- package/dist/plugin-IKQ6IRSJ.js +32 -0
- package/dist/plugin-IKQ6IRSJ.js.map +1 -0
- package/dist/resolve-ASGLBNUC.js +10 -0
- package/dist/resolve-ASGLBNUC.js.map +1 -0
- package/dist/stats-tui-ZY2NQSEA.js +1904 -0
- package/dist/stats-tui-ZY2NQSEA.js.map +1 -0
- package/package.json +96 -0
- package/src/brainbank.ts +617 -0
- package/src/cli/commands/collection.ts +77 -0
- package/src/cli/commands/context.ts +179 -0
- package/src/cli/commands/daemon.ts +100 -0
- package/src/cli/commands/docs.ts +71 -0
- package/src/cli/commands/files.ts +69 -0
- package/src/cli/commands/help.ts +77 -0
- package/src/cli/commands/index.ts +482 -0
- package/src/cli/commands/kv.ts +140 -0
- package/src/cli/commands/mcp-export.ts +273 -0
- package/src/cli/commands/mcp.ts +6 -0
- package/src/cli/commands/reembed.ts +30 -0
- package/src/cli/commands/scan.ts +336 -0
- package/src/cli/commands/search.ts +203 -0
- package/src/cli/commands/stats.ts +68 -0
- package/src/cli/commands/status.ts +47 -0
- package/src/cli/commands/watch.ts +47 -0
- package/src/cli/factory/brain-context.ts +43 -0
- package/src/cli/factory/builtin-registration.ts +87 -0
- package/src/cli/factory/config-loader.ts +77 -0
- package/src/cli/factory/index.ts +69 -0
- package/src/cli/factory/plugin-loader.ts +325 -0
- package/src/cli/index.ts +71 -0
- package/src/cli/server-client.ts +178 -0
- package/src/cli/tui/index-tui.tsx +667 -0
- package/src/cli/tui/stats-data.ts +523 -0
- package/src/cli/tui/stats-search.ts +262 -0
- package/src/cli/tui/stats-tui.tsx +1465 -0
- package/src/cli/tui/tree-scanner.ts +650 -0
- package/src/cli/utils.ts +137 -0
- package/src/config.ts +49 -0
- package/src/constants.ts +21 -0
- package/src/db/adapter.ts +112 -0
- package/src/db/metadata.ts +130 -0
- package/src/db/migrations.ts +66 -0
- package/src/db/sqlite-adapter.ts +218 -0
- package/src/db/tracker.ts +91 -0
- package/src/engine/index-api.ts +81 -0
- package/src/engine/reembed.ts +206 -0
- package/src/engine/search-api.ts +218 -0
- package/src/index.ts +154 -0
- package/src/lib/fts.ts +57 -0
- package/src/lib/languages.ts +180 -0
- package/src/lib/logger.ts +126 -0
- package/src/lib/math.ts +87 -0
- package/src/lib/provider-key.ts +20 -0
- package/src/lib/prune.ts +71 -0
- package/src/lib/rrf.ts +133 -0
- package/src/lib/write-lock.ts +108 -0
- package/src/mcp/mcp-server.ts +195 -0
- package/src/mcp/workspace-factory.ts +68 -0
- package/src/mcp/workspace-pool.ts +224 -0
- package/src/plugin.ts +381 -0
- package/src/providers/embeddings/embedding-worker-thread.ts +95 -0
- package/src/providers/embeddings/embedding-worker.ts +141 -0
- package/src/providers/embeddings/local-embedding.ts +115 -0
- package/src/providers/embeddings/openai-embedding.ts +167 -0
- package/src/providers/embeddings/perplexity-context-embedding.ts +195 -0
- package/src/providers/embeddings/perplexity-embedding.ts +165 -0
- package/src/providers/embeddings/resolve.ts +34 -0
- package/src/providers/pruners/haiku-expander.ts +166 -0
- package/src/providers/pruners/haiku-pruner.ts +112 -0
- package/src/providers/vector/hnsw-index.ts +174 -0
- package/src/providers/vector/hnsw-loader.ts +129 -0
- package/src/search/bm25-boost.ts +69 -0
- package/src/search/context-builder.ts +251 -0
- package/src/search/keyword/composite-bm25-search.ts +47 -0
- package/src/search/types.ts +37 -0
- package/src/search/vector/composite-vector-search.ts +61 -0
- package/src/search/vector/mmr.ts +64 -0
- package/src/services/collection.ts +384 -0
- package/src/services/daemon.ts +87 -0
- package/src/services/http-server.ts +336 -0
- package/src/services/kv-service.ts +64 -0
- package/src/services/plugin-registry.ts +77 -0
- package/src/services/watch.ts +340 -0
- package/src/services/webhook-server.ts +100 -0
- package/src/types.ts +493 -0
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
import {
|
|
2
|
+
__name
|
|
3
|
+
} from "./chunk-WCQVDF3K.js";
|
|
4
|
+
|
|
5
|
+
// src/providers/embeddings/openai-embedding.ts
|
|
6
|
+
var DEFAULT_MODEL = "text-embedding-3-small";
|
|
7
|
+
var DEFAULT_DIMS = {
|
|
8
|
+
"text-embedding-3-small": 1536,
|
|
9
|
+
"text-embedding-3-large": 3072,
|
|
10
|
+
"text-embedding-ada-002": 1536
|
|
11
|
+
};
|
|
12
|
+
var API_URL = "https://api.openai.com/v1/embeddings";
|
|
13
|
+
var MAX_BATCH = 100;
|
|
14
|
+
var REQUEST_TIMEOUT_MS = 3e4;
|
|
15
|
+
var BATCH_DELAY_MS = 100;
|
|
16
|
+
var OpenAIEmbedding = class {
|
|
17
|
+
static {
|
|
18
|
+
__name(this, "OpenAIEmbedding");
|
|
19
|
+
}
|
|
20
|
+
dims;
|
|
21
|
+
_apiKey;
|
|
22
|
+
_model;
|
|
23
|
+
_baseUrl;
|
|
24
|
+
_requestDims;
|
|
25
|
+
_timeout;
|
|
26
|
+
constructor(options = {}) {
|
|
27
|
+
this._apiKey = options.apiKey ?? process.env.OPENAI_API_KEY ?? "";
|
|
28
|
+
this._model = options.model ?? DEFAULT_MODEL;
|
|
29
|
+
this._baseUrl = options.baseUrl ?? API_URL;
|
|
30
|
+
this._timeout = options.timeout ?? REQUEST_TIMEOUT_MS;
|
|
31
|
+
if (options.dims && this._model.startsWith("text-embedding-3")) {
|
|
32
|
+
this._requestDims = options.dims;
|
|
33
|
+
this.dims = options.dims;
|
|
34
|
+
} else {
|
|
35
|
+
this.dims = options.dims ?? DEFAULT_DIMS[this._model] ?? 1536;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
async embed(text) {
|
|
39
|
+
const results = await this._request([text]);
|
|
40
|
+
return results[0];
|
|
41
|
+
}
|
|
42
|
+
async embedBatch(texts) {
|
|
43
|
+
if (texts.length === 0) return [];
|
|
44
|
+
const results = [];
|
|
45
|
+
for (let i = 0; i < texts.length; i += MAX_BATCH) {
|
|
46
|
+
if (i > 0) await sleep(BATCH_DELAY_MS);
|
|
47
|
+
const batch = texts.slice(i, i + MAX_BATCH);
|
|
48
|
+
const embeddings = await this._request(batch);
|
|
49
|
+
results.push(...embeddings);
|
|
50
|
+
}
|
|
51
|
+
return results;
|
|
52
|
+
}
|
|
53
|
+
async close() {
|
|
54
|
+
}
|
|
55
|
+
_isTokenLimitError(errText) {
|
|
56
|
+
return errText.includes("maximum input length") || errText.includes("maximum context length") || errText.includes("too many tokens");
|
|
57
|
+
}
|
|
58
|
+
async _request(input, retryDepth = 0) {
|
|
59
|
+
if (!this._apiKey) {
|
|
60
|
+
throw new Error("OpenAI API key required. Set OPENAI_API_KEY env var or pass apiKey option.");
|
|
61
|
+
}
|
|
62
|
+
const MAX_CHARS = 24e3;
|
|
63
|
+
const safeInput = input.map((t) => t.length > MAX_CHARS ? t.slice(0, MAX_CHARS) : t);
|
|
64
|
+
const body = {
|
|
65
|
+
model: this._model,
|
|
66
|
+
input: safeInput
|
|
67
|
+
};
|
|
68
|
+
if (this._requestDims) body.dimensions = this._requestDims;
|
|
69
|
+
const controller = new AbortController();
|
|
70
|
+
const timer = setTimeout(() => controller.abort(), this._timeout);
|
|
71
|
+
let res;
|
|
72
|
+
try {
|
|
73
|
+
res = await fetch(this._baseUrl, {
|
|
74
|
+
method: "POST",
|
|
75
|
+
headers: {
|
|
76
|
+
"Content-Type": "application/json",
|
|
77
|
+
"Authorization": `Bearer ${this._apiKey}`
|
|
78
|
+
},
|
|
79
|
+
body: JSON.stringify(body),
|
|
80
|
+
signal: controller.signal
|
|
81
|
+
});
|
|
82
|
+
} catch (err) {
|
|
83
|
+
clearTimeout(timer);
|
|
84
|
+
if (err instanceof Error && err.name === "AbortError") {
|
|
85
|
+
throw new Error(`OpenAI embedding request timed out after ${this._timeout}ms.`);
|
|
86
|
+
}
|
|
87
|
+
throw err;
|
|
88
|
+
} finally {
|
|
89
|
+
clearTimeout(timer);
|
|
90
|
+
}
|
|
91
|
+
if (!res.ok) {
|
|
92
|
+
return this._handleApiError(res, safeInput, retryDepth);
|
|
93
|
+
}
|
|
94
|
+
const json = await res.json();
|
|
95
|
+
return json.data.sort((a, b) => a.index - b.index).map((d) => new Float32Array(d.embedding));
|
|
96
|
+
}
|
|
97
|
+
/** Handle API errors with token-limit retry logic. */
|
|
98
|
+
async _handleApiError(res, safeInput, retryDepth) {
|
|
99
|
+
const err = await res.text();
|
|
100
|
+
const isTokenLimit = res.status === 400 && this._isTokenLimitError(err);
|
|
101
|
+
if (isTokenLimit && safeInput.length > 1) {
|
|
102
|
+
const results = [];
|
|
103
|
+
for (const text of safeInput) {
|
|
104
|
+
const r = await this._request([text.slice(0, 8e3)]);
|
|
105
|
+
results.push(r[0]);
|
|
106
|
+
}
|
|
107
|
+
return results;
|
|
108
|
+
}
|
|
109
|
+
if (isTokenLimit && safeInput.length === 1 && retryDepth < 1) {
|
|
110
|
+
return this._request([safeInput[0].slice(0, 6e3)], retryDepth + 1);
|
|
111
|
+
}
|
|
112
|
+
throw new Error(`OpenAI embedding API error (${res.status}): ${err}`);
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
function sleep(ms) {
|
|
116
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
117
|
+
}
|
|
118
|
+
__name(sleep, "sleep");
|
|
119
|
+
|
|
120
|
+
export {
|
|
121
|
+
OpenAIEmbedding
|
|
122
|
+
};
|
|
123
|
+
//# sourceMappingURL=chunk-DMFMTOHF.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/providers/embeddings/openai-embedding.ts"],"sourcesContent":["/**\n * BrainBank — OpenAI Embedding Provider\n * \n * Uses OpenAI's embedding API via fetch (no SDK dependency).\n * Supports text-embedding-3-small, text-embedding-3-large, and ada-002.\n * \n * Usage:\n * const brain = new BrainBank({\n * embeddingProvider: new OpenAIEmbedding({ model: 'text-embedding-3-small' }),\n * });\n */\n\nimport type { EmbeddingProvider } from '@/types.ts';\n\nconst DEFAULT_MODEL = 'text-embedding-3-small';\nconst DEFAULT_DIMS: Record<string, number> = {\n 'text-embedding-3-small': 1536,\n 'text-embedding-3-large': 3072,\n 'text-embedding-ada-002': 1536,\n};\nconst API_URL = 'https://api.openai.com/v1/embeddings';\nconst MAX_BATCH = 100;\nconst REQUEST_TIMEOUT_MS = 30_000;\nconst BATCH_DELAY_MS = 100;\n\nexport interface OpenAIEmbeddingOptions {\n /** OpenAI API key. Falls back to OPENAI_API_KEY env var. */\n apiKey?: string;\n /** Model name. Default: 'text-embedding-3-small' */\n model?: string;\n /** Vector dimensions. If omitted, uses model default. text-embedding-3-* supports custom dims. */\n dims?: number;\n /** Base URL override (for Azure, proxies, etc.) */\n baseUrl?: string;\n /** Request timeout in ms. Default: 30000 */\n timeout?: number;\n}\n\nexport class OpenAIEmbedding implements EmbeddingProvider {\n readonly dims: number;\n\n private _apiKey: string;\n private _model: string;\n private _baseUrl: string;\n private _requestDims: number | undefined;\n private _timeout: number;\n\n constructor(options: OpenAIEmbeddingOptions = {}) {\n this._apiKey = options.apiKey ?? process.env.OPENAI_API_KEY ?? '';\n this._model = options.model ?? DEFAULT_MODEL;\n this._baseUrl = options.baseUrl ?? API_URL;\n this._timeout = options.timeout ?? REQUEST_TIMEOUT_MS;\n\n // Custom dims only supported by text-embedding-3-*\n if (options.dims && this._model.startsWith('text-embedding-3')) {\n this._requestDims = options.dims;\n this.dims = options.dims;\n } else {\n this.dims = options.dims ?? DEFAULT_DIMS[this._model] ?? 1536;\n }\n }\n\n async embed(text: string): Promise<Float32Array> {\n const results = await this._request([text]);\n return results[0];\n }\n\n async embedBatch(texts: string[]): Promise<Float32Array[]> {\n if (texts.length === 0) return [];\n\n const results: Float32Array[] = [];\n\n for (let i = 0; i < texts.length; i += MAX_BATCH) {\n if (i > 0) await sleep(BATCH_DELAY_MS);\n const batch = texts.slice(i, i + MAX_BATCH);\n const embeddings = await this._request(batch);\n results.push(...embeddings);\n }\n\n return results;\n }\n\n async close(): Promise<void> {\n // No resources to release\n }\n\n private _isTokenLimitError(errText: string): boolean {\n return errText.includes('maximum input length') ||\n errText.includes('maximum context length') ||\n errText.includes('too many tokens');\n }\n\n private async _request(input: string[], retryDepth: number = 0): Promise<Float32Array[]> {\n if (!this._apiKey) {\n throw new Error('OpenAI API key required. Set OPENAI_API_KEY env var or pass apiKey option.');\n }\n\n const MAX_CHARS = 24_000;\n const safeInput = input.map(t => t.length > MAX_CHARS ? t.slice(0, MAX_CHARS) : t);\n\n const body: { model: string; input: string[]; dimensions?: number } = {\n model: this._model, input: safeInput,\n };\n if (this._requestDims) body.dimensions = this._requestDims;\n\n const controller = new AbortController();\n const timer = setTimeout(() => controller.abort(), this._timeout);\n\n let res: Response;\n try {\n res = await fetch(this._baseUrl, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'Authorization': `Bearer ${this._apiKey}`,\n },\n body: JSON.stringify(body),\n signal: controller.signal,\n });\n } catch (err: unknown) {\n clearTimeout(timer);\n if (err instanceof Error && err.name === 'AbortError') {\n throw new Error(`OpenAI embedding request timed out after ${this._timeout}ms.`);\n }\n throw err;\n } finally {\n clearTimeout(timer);\n }\n\n if (!res.ok) {\n return this._handleApiError(res, safeInput, retryDepth);\n }\n\n const json = await res.json() as {\n data: Array<{ embedding: number[]; index: number }>;\n };\n return json.data.sort((a, b) => a.index - b.index).map(d => new Float32Array(d.embedding));\n }\n\n /** Handle API errors with token-limit retry logic. */\n private async _handleApiError(\n res: Response, safeInput: string[], retryDepth: number,\n ): Promise<Float32Array[]> {\n const err = await res.text();\n const isTokenLimit = res.status === 400 && this._isTokenLimitError(err);\n\n // Batch token limit → retry each item individually with aggressive truncation\n if (isTokenLimit && safeInput.length > 1) {\n const results: Float32Array[] = [];\n for (const text of safeInput) {\n const r = await this._request([text.slice(0, 8_000)]);\n results.push(r[0]);\n }\n return results;\n }\n // Single item still failing → truncate to ~2k tokens (max 1 retry)\n if (isTokenLimit && safeInput.length === 1 && retryDepth < 1) {\n return this._request([safeInput[0].slice(0, 6_000)], retryDepth + 1);\n }\n throw new Error(`OpenAI embedding API error (${res.status}): ${err}`);\n }\n}\n\n/** Simple delay helper. */\nfunction sleep(ms: number): Promise<void> {\n return new Promise(resolve => setTimeout(resolve, ms));\n}\n"],"mappings":";;;;;AAcA,IAAM,gBAAgB;AACtB,IAAM,eAAuC;AAAA,EACzC,0BAA0B;AAAA,EAC1B,0BAA0B;AAAA,EAC1B,0BAA0B;AAC9B;AACA,IAAM,UAAU;AAChB,IAAM,YAAY;AAClB,IAAM,qBAAqB;AAC3B,IAAM,iBAAiB;AAehB,IAAM,kBAAN,MAAmD;AAAA,EAtC1D,OAsC0D;AAAA;AAAA;AAAA,EAC7C;AAAA,EAED;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,UAAkC,CAAC,GAAG;AAC9C,SAAK,UAAU,QAAQ,UAAU,QAAQ,IAAI,kBAAkB;AAC/D,SAAK,SAAS,QAAQ,SAAS;AAC/B,SAAK,WAAW,QAAQ,WAAW;AACnC,SAAK,WAAW,QAAQ,WAAW;AAGnC,QAAI,QAAQ,QAAQ,KAAK,OAAO,WAAW,kBAAkB,GAAG;AAC5D,WAAK,eAAe,QAAQ;AAC5B,WAAK,OAAO,QAAQ;AAAA,IACxB,OAAO;AACH,WAAK,OAAO,QAAQ,QAAQ,aAAa,KAAK,MAAM,KAAK;AAAA,IAC7D;AAAA,EACJ;AAAA,EAEA,MAAM,MAAM,MAAqC;AAC7C,UAAM,UAAU,MAAM,KAAK,SAAS,CAAC,IAAI,CAAC;AAC1C,WAAO,QAAQ,CAAC;AAAA,EACpB;AAAA,EAEA,MAAM,WAAW,OAA0C;AACvD,QAAI,MAAM,WAAW,EAAG,QAAO,CAAC;AAEhC,UAAM,UAA0B,CAAC;AAEjC,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,WAAW;AAC9C,UAAI,IAAI,EAAG,OAAM,MAAM,cAAc;AACrC,YAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,SAAS;AAC1C,YAAM,aAAa,MAAM,KAAK,SAAS,KAAK;AAC5C,cAAQ,KAAK,GAAG,UAAU;AAAA,IAC9B;AAEA,WAAO;AAAA,EACX;AAAA,EAEA,MAAM,QAAuB;AAAA,EAE7B;AAAA,EAEQ,mBAAmB,SAA0B;AACjD,WAAO,QAAQ,SAAS,sBAAsB,KACvC,QAAQ,SAAS,wBAAwB,KACzC,QAAQ,SAAS,iBAAiB;AAAA,EAC7C;AAAA,EAEA,MAAc,SAAS,OAAiB,aAAqB,GAA4B;AACrF,QAAI,CAAC,KAAK,SAAS;AACf,YAAM,IAAI,MAAM,4EAA4E;AAAA,IAChG;AAEA,UAAM,YAAY;AAClB,UAAM,YAAY,MAAM,IAAI,OAAK,EAAE,SAAS,YAAY,EAAE,MAAM,GAAG,SAAS,IAAI,CAAC;AAEjF,UAAM,OAAgE;AAAA,MAClE,OAAO,KAAK;AAAA,MAAQ,OAAO;AAAA,IAC/B;AACA,QAAI,KAAK,aAAc,MAAK,aAAa,KAAK;AAE9C,UAAM,aAAa,IAAI,gBAAgB;AACvC,UAAM,QAAQ,WAAW,MAAM,WAAW,MAAM,GAAG,KAAK,QAAQ;AAEhE,QAAI;AACJ,QAAI;AACA,YAAM,MAAM,MAAM,KAAK,UAAU;AAAA,QAC7B,QAAQ;AAAA,QACR,SAAS;AAAA,UACL,gBAAgB;AAAA,UAChB,iBAAiB,UAAU,KAAK,OAAO;AAAA,QAC3C;AAAA,QACA,MAAM,KAAK,UAAU,IAAI;AAAA,QACzB,QAAQ,WAAW;AAAA,MACvB,CAAC;AAAA,IACL,SAAS,KAAc;AACnB,mBAAa,KAAK;AAClB,UAAI,eAAe,SAAS,IAAI,SAAS,cAAc;AACnD,cAAM,IAAI,MAAM,4CAA4C,KAAK,QAAQ,KAAK;AAAA,MAClF;AACA,YAAM;AAAA,IACV,UAAE;AACE,mBAAa,KAAK;AAAA,IACtB;AAEA,QAAI,CAAC,IAAI,IAAI;AACT,aAAO,KAAK,gBAAgB,KAAK,WAAW,UAAU;AAAA,IAC1D;AAEA,UAAM,OAAO,MAAM,IAAI,KAAK;AAG5B,WAAO,KAAK,KAAK,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAAE,IAAI,OAAK,IAAI,aAAa,EAAE,SAAS,CAAC;AAAA,EAC7F;AAAA;AAAA,EAGA,MAAc,gBACV,KAAe,WAAqB,YACb;AACvB,UAAM,MAAM,MAAM,IAAI,KAAK;AAC3B,UAAM,eAAe,IAAI,WAAW,OAAO,KAAK,mBAAmB,GAAG;AAGtE,QAAI,gBAAgB,UAAU,SAAS,GAAG;AACtC,YAAM,UAA0B,CAAC;AACjC,iBAAW,QAAQ,WAAW;AAC1B,cAAM,IAAI,MAAM,KAAK,SAAS,CAAC,KAAK,MAAM,GAAG,GAAK,CAAC,CAAC;AACpD,gBAAQ,KAAK,EAAE,CAAC,CAAC;AAAA,MACrB;AACA,aAAO;AAAA,IACX;AAEA,QAAI,gBAAgB,UAAU,WAAW,KAAK,aAAa,GAAG;AAC1D,aAAO,KAAK,SAAS,CAAC,UAAU,CAAC,EAAE,MAAM,GAAG,GAAK,CAAC,GAAG,aAAa,CAAC;AAAA,IACvE;AACA,UAAM,IAAI,MAAM,+BAA+B,IAAI,MAAM,MAAM,GAAG,EAAE;AAAA,EACxE;AACJ;AAGA,SAAS,MAAM,IAA2B;AACtC,SAAO,IAAI,QAAQ,aAAW,WAAW,SAAS,EAAE,CAAC;AACzD;AAFS;","names":[]}
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
import {
|
|
2
|
+
decodeBase64Int8
|
|
3
|
+
} from "./chunk-3YBCD6DI.js";
|
|
4
|
+
import {
|
|
5
|
+
__name
|
|
6
|
+
} from "./chunk-WCQVDF3K.js";
|
|
7
|
+
|
|
8
|
+
// src/providers/embeddings/perplexity-context-embedding.ts
|
|
9
|
+
var DEFAULT_MODEL = "pplx-embed-context-v1-4b";
|
|
10
|
+
var DEFAULT_DIMS = {
|
|
11
|
+
"pplx-embed-context-v1-0.6b": 1024,
|
|
12
|
+
"pplx-embed-context-v1-4b": 2560
|
|
13
|
+
};
|
|
14
|
+
var API_URL = "https://api.perplexity.ai/v1/contextualizedembeddings";
|
|
15
|
+
var REQUEST_TIMEOUT_MS = 3e4;
|
|
16
|
+
var BATCH_DELAY_MS = 100;
|
|
17
|
+
var PerplexityContextEmbedding = class {
|
|
18
|
+
static {
|
|
19
|
+
__name(this, "PerplexityContextEmbedding");
|
|
20
|
+
}
|
|
21
|
+
dims;
|
|
22
|
+
_apiKey;
|
|
23
|
+
_model;
|
|
24
|
+
_baseUrl;
|
|
25
|
+
_requestDims;
|
|
26
|
+
_timeout;
|
|
27
|
+
constructor(options = {}) {
|
|
28
|
+
this._apiKey = options.apiKey ?? process.env.PERPLEXITY_API_KEY ?? "";
|
|
29
|
+
this._model = options.model ?? DEFAULT_MODEL;
|
|
30
|
+
this._baseUrl = options.baseUrl ?? API_URL;
|
|
31
|
+
this._timeout = options.timeout ?? REQUEST_TIMEOUT_MS;
|
|
32
|
+
if (options.dims) {
|
|
33
|
+
this._requestDims = options.dims;
|
|
34
|
+
this.dims = options.dims;
|
|
35
|
+
} else {
|
|
36
|
+
this.dims = DEFAULT_DIMS[this._model] ?? 2560;
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
/** Embed a single text. Wraps as [[text]] for the contextualized API. */
|
|
40
|
+
async embed(text) {
|
|
41
|
+
const results = await this._request([[text]]);
|
|
42
|
+
return results[0];
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Embed multiple texts as chunks of contextualized documents.
|
|
46
|
+
* Splits into sub-documents to stay under Perplexity's 32k token/doc limit.
|
|
47
|
+
*/
|
|
48
|
+
async embedBatch(texts) {
|
|
49
|
+
if (texts.length === 0) return [];
|
|
50
|
+
const docs = splitIntoDocuments(texts);
|
|
51
|
+
const results = [];
|
|
52
|
+
for (let i = 0; i < docs.length; i++) {
|
|
53
|
+
if (i > 0) await sleep(BATCH_DELAY_MS);
|
|
54
|
+
const embeddings = await this._request([docs[i]]);
|
|
55
|
+
results.push(...embeddings);
|
|
56
|
+
}
|
|
57
|
+
return results;
|
|
58
|
+
}
|
|
59
|
+
async close() {
|
|
60
|
+
}
|
|
61
|
+
/** Send a contextualized request. Input is string[][] (docs × chunks). */
|
|
62
|
+
async _request(input) {
|
|
63
|
+
if (!this._apiKey) {
|
|
64
|
+
throw new Error(
|
|
65
|
+
"BrainBank: Perplexity API key required. Set PERPLEXITY_API_KEY env var or pass apiKey option."
|
|
66
|
+
);
|
|
67
|
+
}
|
|
68
|
+
const MAX_CHARS = 24e3;
|
|
69
|
+
const safeInput = input.map(
|
|
70
|
+
(doc) => doc.map((chunk) => chunk.length > MAX_CHARS ? chunk.slice(0, MAX_CHARS) : chunk)
|
|
71
|
+
);
|
|
72
|
+
const body = { model: this._model, input: safeInput };
|
|
73
|
+
if (this._requestDims) body.dimensions = this._requestDims;
|
|
74
|
+
const controller = new AbortController();
|
|
75
|
+
const timer = setTimeout(() => controller.abort(), this._timeout);
|
|
76
|
+
let res;
|
|
77
|
+
try {
|
|
78
|
+
res = await fetch(this._baseUrl, {
|
|
79
|
+
method: "POST",
|
|
80
|
+
headers: {
|
|
81
|
+
"Content-Type": "application/json",
|
|
82
|
+
"Authorization": `Bearer ${this._apiKey}`
|
|
83
|
+
},
|
|
84
|
+
body: JSON.stringify(body),
|
|
85
|
+
signal: controller.signal
|
|
86
|
+
});
|
|
87
|
+
} catch (err) {
|
|
88
|
+
clearTimeout(timer);
|
|
89
|
+
if (err instanceof Error && err.name === "AbortError") {
|
|
90
|
+
throw new Error(`BrainBank: Perplexity contextualized embedding request timed out after ${this._timeout}ms.`);
|
|
91
|
+
}
|
|
92
|
+
throw err;
|
|
93
|
+
} finally {
|
|
94
|
+
clearTimeout(timer);
|
|
95
|
+
}
|
|
96
|
+
if (!res.ok) {
|
|
97
|
+
const errText = await res.text();
|
|
98
|
+
throw new Error(`BrainBank: Perplexity contextualized embedding API error (${res.status}): ${errText}`);
|
|
99
|
+
}
|
|
100
|
+
const json = await res.json();
|
|
101
|
+
return flattenContextResponse(json, this.dims);
|
|
102
|
+
}
|
|
103
|
+
};
|
|
104
|
+
function flattenContextResponse(json, dims) {
|
|
105
|
+
return json.data.sort((a, b) => a.index - b.index).flatMap(
|
|
106
|
+
(doc) => doc.data.sort((a, b) => a.index - b.index).map((chunk) => decodeBase64Int8(chunk.embedding, dims))
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
__name(flattenContextResponse, "flattenContextResponse");
|
|
110
|
+
function splitIntoDocuments(texts) {
|
|
111
|
+
const MAX_CHARS_PER_DOC = 8e4;
|
|
112
|
+
const docs = [];
|
|
113
|
+
let current = [];
|
|
114
|
+
let currentChars = 0;
|
|
115
|
+
for (const text of texts) {
|
|
116
|
+
if (current.length > 0 && currentChars + text.length > MAX_CHARS_PER_DOC) {
|
|
117
|
+
docs.push(current);
|
|
118
|
+
current = [];
|
|
119
|
+
currentChars = 0;
|
|
120
|
+
}
|
|
121
|
+
current.push(text);
|
|
122
|
+
currentChars += text.length;
|
|
123
|
+
}
|
|
124
|
+
if (current.length > 0) docs.push(current);
|
|
125
|
+
return docs;
|
|
126
|
+
}
|
|
127
|
+
__name(splitIntoDocuments, "splitIntoDocuments");
|
|
128
|
+
function sleep(ms) {
|
|
129
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
130
|
+
}
|
|
131
|
+
__name(sleep, "sleep");
|
|
132
|
+
|
|
133
|
+
export {
|
|
134
|
+
PerplexityContextEmbedding
|
|
135
|
+
};
|
|
136
|
+
//# sourceMappingURL=chunk-FQYKWB2Q.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/providers/embeddings/perplexity-context-embedding.ts"],"sourcesContent":["/**\n * BrainBank — Perplexity Contextualized Embedding Provider\n *\n * Uses Perplexity's contextualized embeddings API for document-aware vectors.\n * Chunks from the same document share context, improving retrieval quality.\n *\n * Models: pplx-embed-context-v1-0.6b (1024d), pplx-embed-context-v1-4b (2560d).\n *\n * Key difference from standard: input is string[][] (docs × chunks) and the\n * response has a nested structure. This provider adapts the flat BrainBank\n * EmbeddingProvider interface to the nested Perplexity API:\n * - embed(text) → wraps as [[text]]\n * - embedBatch(texts) → wraps as [texts] (one \"document\" of related chunks)\n *\n * Usage:\n * const brain = new BrainBank({\n * embeddingProvider: new PerplexityContextEmbedding(),\n * });\n */\n\nimport type { EmbeddingProvider } from '@/types.ts';\nimport { decodeBase64Int8 } from './perplexity-embedding.ts';\n\nconst DEFAULT_MODEL = 'pplx-embed-context-v1-4b';\nconst DEFAULT_DIMS: Record<string, number> = {\n 'pplx-embed-context-v1-0.6b': 1024,\n 'pplx-embed-context-v1-4b': 2560,\n};\nconst API_URL = 'https://api.perplexity.ai/v1/contextualizedembeddings';\nconst MAX_BATCH = 100;\nconst REQUEST_TIMEOUT_MS = 30_000;\nconst BATCH_DELAY_MS = 100;\n\nexport interface PerplexityContextEmbeddingOptions {\n /** Perplexity API key. Falls back to PERPLEXITY_API_KEY env var. */\n apiKey?: string;\n /** Model name. Default: 'pplx-embed-context-v1-4b' */\n model?: string;\n /** Vector dimensions (Matryoshka reduction). If omitted, uses model default. */\n dims?: number;\n /** Base URL override. */\n baseUrl?: string;\n /** Request timeout in ms. Default: 30000 */\n timeout?: number;\n}\n\nexport class PerplexityContextEmbedding implements EmbeddingProvider {\n readonly dims: number;\n\n private _apiKey: string;\n private _model: string;\n private _baseUrl: string;\n private _requestDims: number | undefined;\n private _timeout: number;\n\n constructor(options: PerplexityContextEmbeddingOptions = {}) {\n this._apiKey = options.apiKey ?? process.env.PERPLEXITY_API_KEY ?? '';\n this._model = options.model ?? DEFAULT_MODEL;\n this._baseUrl = options.baseUrl ?? API_URL;\n this._timeout = options.timeout ?? REQUEST_TIMEOUT_MS;\n\n if (options.dims) {\n this._requestDims = options.dims;\n this.dims = options.dims;\n } else {\n this.dims = DEFAULT_DIMS[this._model] ?? 2560;\n }\n }\n\n /** Embed a single text. Wraps as [[text]] for the contextualized API. */\n async embed(text: string): Promise<Float32Array> {\n const results = await this._request([[text]]);\n return results[0];\n }\n\n /**\n * Embed multiple texts as chunks of contextualized documents.\n * Splits into sub-documents to stay under Perplexity's 32k token/doc limit.\n */\n async embedBatch(texts: string[]): Promise<Float32Array[]> {\n if (texts.length === 0) return [];\n\n const docs = splitIntoDocuments(texts);\n const results: Float32Array[] = [];\n\n for (let i = 0; i < docs.length; i++) {\n if (i > 0) await sleep(BATCH_DELAY_MS);\n const embeddings = await this._request([docs[i]]);\n results.push(...embeddings);\n }\n\n return results;\n }\n\n async close(): Promise<void> {\n // No resources to release\n }\n\n /** Send a contextualized request. Input is string[][] (docs × chunks). */\n private async _request(input: string[][]): Promise<Float32Array[]> {\n if (!this._apiKey) {\n throw new Error(\n 'BrainBank: Perplexity API key required. Set PERPLEXITY_API_KEY env var or pass apiKey option.',\n );\n }\n\n const MAX_CHARS = 24_000;\n const safeInput = input.map(doc =>\n doc.map(chunk => chunk.length > MAX_CHARS ? chunk.slice(0, MAX_CHARS) : chunk),\n );\n\n const body: Record<string, unknown> = { model: this._model, input: safeInput };\n if (this._requestDims) body.dimensions = this._requestDims;\n\n const controller = new AbortController();\n const timer = setTimeout(() => controller.abort(), this._timeout);\n\n let res: Response;\n try {\n res = await fetch(this._baseUrl, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'Authorization': `Bearer ${this._apiKey}`,\n },\n body: JSON.stringify(body),\n signal: controller.signal,\n });\n } catch (err: unknown) {\n clearTimeout(timer);\n if (err instanceof Error && err.name === 'AbortError') {\n throw new Error(`BrainBank: Perplexity contextualized embedding request timed out after ${this._timeout}ms.`);\n }\n throw err;\n } finally {\n clearTimeout(timer);\n }\n\n if (!res.ok) {\n const errText = await res.text();\n throw new Error(`BrainBank: Perplexity contextualized embedding API error (${res.status}): ${errText}`);\n }\n\n const json = await res.json() as PerplexityContextResponse;\n return flattenContextResponse(json, this.dims);\n }\n}\n\n\ninterface PerplexityContextResponse {\n data: Array<{\n index: number;\n data: Array<{ index: number; embedding: string }>;\n }>;\n}\n\n/** Flatten nested doc → chunk response into a single flat array. */\nfunction flattenContextResponse(json: PerplexityContextResponse, dims: number): Float32Array[] {\n return json.data\n .sort((a, b) => a.index - b.index)\n .flatMap(doc =>\n doc.data\n .sort((a, b) => a.index - b.index)\n .map(chunk => decodeBase64Int8(chunk.embedding, dims)),\n );\n}\n\n/**\n * Split chunks into sub-documents that each stay under the 32k token limit.\n * Uses ~4 chars/token estimate with safety margin (~80k chars ≈ ~20k tokens).\n */\nfunction splitIntoDocuments(texts: string[]): string[][] {\n const MAX_CHARS_PER_DOC = 80_000;\n const docs: string[][] = [];\n let current: string[] = [];\n let currentChars = 0;\n\n for (const text of texts) {\n if (current.length > 0 && currentChars + text.length > MAX_CHARS_PER_DOC) {\n docs.push(current);\n current = [];\n currentChars = 0;\n }\n current.push(text);\n currentChars += text.length;\n }\n\n if (current.length > 0) docs.push(current);\n return docs;\n}\n\n/** Simple delay helper. */\nfunction sleep(ms: number): Promise<void> {\n return new Promise(resolve => setTimeout(resolve, ms));\n}\n"],"mappings":";;;;;;;;AAuBA,IAAM,gBAAgB;AACtB,IAAM,eAAuC;AAAA,EACzC,8BAA8B;AAAA,EAC9B,4BAA4B;AAChC;AACA,IAAM,UAAU;AAEhB,IAAM,qBAAqB;AAC3B,IAAM,iBAAiB;AAehB,IAAM,6BAAN,MAA8D;AAAA,EA9CrE,OA8CqE;AAAA;AAAA;AAAA,EACxD;AAAA,EAED;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAER,YAAY,UAA6C,CAAC,GAAG;AACzD,SAAK,UAAU,QAAQ,UAAU,QAAQ,IAAI,sBAAsB;AACnE,SAAK,SAAS,QAAQ,SAAS;AAC/B,SAAK,WAAW,QAAQ,WAAW;AACnC,SAAK,WAAW,QAAQ,WAAW;AAEnC,QAAI,QAAQ,MAAM;AACd,WAAK,eAAe,QAAQ;AAC5B,WAAK,OAAO,QAAQ;AAAA,IACxB,OAAO;AACH,WAAK,OAAO,aAAa,KAAK,MAAM,KAAK;AAAA,IAC7C;AAAA,EACJ;AAAA;AAAA,EAGA,MAAM,MAAM,MAAqC;AAC7C,UAAM,UAAU,MAAM,KAAK,SAAS,CAAC,CAAC,IAAI,CAAC,CAAC;AAC5C,WAAO,QAAQ,CAAC;AAAA,EACpB;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,WAAW,OAA0C;AACvD,QAAI,MAAM,WAAW,EAAG,QAAO,CAAC;AAEhC,UAAM,OAAO,mBAAmB,KAAK;AACrC,UAAM,UAA0B,CAAC;AAEjC,aAAS,IAAI,GAAG,IAAI,KAAK,QAAQ,KAAK;AAClC,UAAI,IAAI,EAAG,OAAM,MAAM,cAAc;AACrC,YAAM,aAAa,MAAM,KAAK,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;AAChD,cAAQ,KAAK,GAAG,UAAU;AAAA,IAC9B;AAEA,WAAO;AAAA,EACX;AAAA,EAEA,MAAM,QAAuB;AAAA,EAE7B;AAAA;AAAA,EAGA,MAAc,SAAS,OAA4C;AAC/D,QAAI,CAAC,KAAK,SAAS;AACf,YAAM,IAAI;AAAA,QACN;AAAA,MACJ;AAAA,IACJ;AAEA,UAAM,YAAY;AAClB,UAAM,YAAY,MAAM;AAAA,MAAI,SACxB,IAAI,IAAI,WAAS,MAAM,SAAS,YAAY,MAAM,MAAM,GAAG,SAAS,IAAI,KAAK;AAAA,IACjF;AAEA,UAAM,OAAgC,EAAE,OAAO,KAAK,QAAQ,OAAO,UAAU;AAC7E,QAAI,KAAK,aAAc,MAAK,aAAa,KAAK;AAE9C,UAAM,aAAa,IAAI,gBAAgB;AACvC,UAAM,QAAQ,WAAW,MAAM,WAAW,MAAM,GAAG,KAAK,QAAQ;AAEhE,QAAI;AACJ,QAAI;AACA,YAAM,MAAM,MAAM,KAAK,UAAU;AAAA,QAC7B,QAAQ;AAAA,QACR,SAAS;AAAA,UACL,gBAAgB;AAAA,UAChB,iBAAiB,UAAU,KAAK,OAAO;AAAA,QAC3C;AAAA,QACA,MAAM,KAAK,UAAU,IAAI;AAAA,QACzB,QAAQ,WAAW;AAAA,MACvB,CAAC;AAAA,IACL,SAAS,KAAc;AACnB,mBAAa,KAAK;AAClB,UAAI,eAAe,SAAS,IAAI,SAAS,cAAc;AACnD,cAAM,IAAI,MAAM,0EAA0E,KAAK,QAAQ,KAAK;AAAA,MAChH;AACA,YAAM;AAAA,IACV,UAAE;AACE,mBAAa,KAAK;AAAA,IACtB;AAEA,QAAI,CAAC,IAAI,IAAI;AACT,YAAM,UAAU,MAAM,IAAI,KAAK;AAC/B,YAAM,IAAI,MAAM,6DAA6D,IAAI,MAAM,MAAM,OAAO,EAAE;AAAA,IAC1G;AAEA,UAAM,OAAO,MAAM,IAAI,KAAK;AAC5B,WAAO,uBAAuB,MAAM,KAAK,IAAI;AAAA,EACjD;AACJ;AAWA,SAAS,uBAAuB,MAAiC,MAA8B;AAC3F,SAAO,KAAK,KACP,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAChC;AAAA,IAAQ,SACL,IAAI,KACC,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK,EAChC,IAAI,WAAS,iBAAiB,MAAM,WAAW,IAAI,CAAC;AAAA,EAC7D;AACR;AARS;AAcT,SAAS,mBAAmB,OAA6B;AACrD,QAAM,oBAAoB;AAC1B,QAAM,OAAmB,CAAC;AAC1B,MAAI,UAAoB,CAAC;AACzB,MAAI,eAAe;AAEnB,aAAW,QAAQ,OAAO;AACtB,QAAI,QAAQ,SAAS,KAAK,eAAe,KAAK,SAAS,mBAAmB;AACtE,WAAK,KAAK,OAAO;AACjB,gBAAU,CAAC;AACX,qBAAe;AAAA,IACnB;AACA,YAAQ,KAAK,IAAI;AACjB,oBAAgB,KAAK;AAAA,EACzB;AAEA,MAAI,QAAQ,SAAS,EAAG,MAAK,KAAK,OAAO;AACzC,SAAO;AACX;AAlBS;AAqBT,SAAS,MAAM,IAA2B;AACtC,SAAO,IAAI,QAAQ,aAAW,WAAW,SAAS,EAAE,CAAC;AACzD;AAFS;","names":[]}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import {
|
|
2
|
+
__name
|
|
3
|
+
} from "./chunk-WCQVDF3K.js";
|
|
4
|
+
|
|
5
|
+
// src/plugin.ts
|
|
6
|
+
function isIndexable(i) {
|
|
7
|
+
return typeof i.index === "function";
|
|
8
|
+
}
|
|
9
|
+
__name(isIndexable, "isIndexable");
|
|
10
|
+
function isSearchable(i) {
|
|
11
|
+
return typeof i.search === "function";
|
|
12
|
+
}
|
|
13
|
+
__name(isSearchable, "isSearchable");
|
|
14
|
+
function isWatchable(i) {
|
|
15
|
+
return typeof i.watch === "function";
|
|
16
|
+
}
|
|
17
|
+
__name(isWatchable, "isWatchable");
|
|
18
|
+
function isDocsPlugin(i) {
|
|
19
|
+
return typeof i.addCollection === "function" && typeof i.listCollections === "function";
|
|
20
|
+
}
|
|
21
|
+
__name(isDocsPlugin, "isDocsPlugin");
|
|
22
|
+
function isCoEditPlugin(p) {
|
|
23
|
+
return "coEdits" in p && typeof p.coEdits?.suggest === "function";
|
|
24
|
+
}
|
|
25
|
+
__name(isCoEditPlugin, "isCoEditPlugin");
|
|
26
|
+
function isReembeddable(p) {
|
|
27
|
+
return typeof p.reembedConfig === "function";
|
|
28
|
+
}
|
|
29
|
+
__name(isReembeddable, "isReembeddable");
|
|
30
|
+
function isVectorSearchPlugin(p) {
|
|
31
|
+
return typeof p.createVectorSearch === "function";
|
|
32
|
+
}
|
|
33
|
+
__name(isVectorSearchPlugin, "isVectorSearchPlugin");
|
|
34
|
+
function isContextFieldPlugin(p) {
|
|
35
|
+
return typeof p.contextFields === "function";
|
|
36
|
+
}
|
|
37
|
+
__name(isContextFieldPlugin, "isContextFieldPlugin");
|
|
38
|
+
function isContextFormatterPlugin(p) {
|
|
39
|
+
return typeof p.formatContext === "function";
|
|
40
|
+
}
|
|
41
|
+
__name(isContextFormatterPlugin, "isContextFormatterPlugin");
|
|
42
|
+
function isMigratable(p) {
|
|
43
|
+
return typeof p.schemaVersion === "number" && Array.isArray(p.migrations);
|
|
44
|
+
}
|
|
45
|
+
__name(isMigratable, "isMigratable");
|
|
46
|
+
function isBM25SearchPlugin(p) {
|
|
47
|
+
return typeof p.searchBM25 === "function";
|
|
48
|
+
}
|
|
49
|
+
__name(isBM25SearchPlugin, "isBM25SearchPlugin");
|
|
50
|
+
function isExpandablePlugin(p) {
|
|
51
|
+
return typeof p.buildManifest === "function" && typeof p.resolveChunks === "function";
|
|
52
|
+
}
|
|
53
|
+
__name(isExpandablePlugin, "isExpandablePlugin");
|
|
54
|
+
function isFileResolvable(p) {
|
|
55
|
+
return typeof p.resolveFiles === "function";
|
|
56
|
+
}
|
|
57
|
+
__name(isFileResolvable, "isFileResolvable");
|
|
58
|
+
|
|
59
|
+
export {
|
|
60
|
+
isIndexable,
|
|
61
|
+
isSearchable,
|
|
62
|
+
isWatchable,
|
|
63
|
+
isDocsPlugin,
|
|
64
|
+
isCoEditPlugin,
|
|
65
|
+
isReembeddable,
|
|
66
|
+
isVectorSearchPlugin,
|
|
67
|
+
isContextFieldPlugin,
|
|
68
|
+
isContextFormatterPlugin,
|
|
69
|
+
isMigratable,
|
|
70
|
+
isBM25SearchPlugin,
|
|
71
|
+
isExpandablePlugin,
|
|
72
|
+
isFileResolvable
|
|
73
|
+
};
|
|
74
|
+
//# sourceMappingURL=chunk-IMJJ2VEM.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/plugin.ts"],"sourcesContent":["/**\n * BrainBank — Plugin System\n * \n * Plugins are pluggable strategies that scan external data sources\n * and push content into BrainBank. Built-in plugins handle code,\n * git, and docs. Third-party frameworks (LangChain, etc.)\n * can implement custom plugins.\n * \n * import { BrainBank } from 'brainbank';\n * import { code } from 'brainbank/indexers/code';\n * \n * const brain = new BrainBank()\n * .use(code({ repoPath: '.' }));\n */\n\nimport type { DatabaseAdapter } from './db/adapter.ts';\nimport type { Migration } from './db/migrations.ts';\nimport type { IncrementalTracker } from './db/tracker.ts';\nimport type { HNSWIndex } from './providers/vector/hnsw-index.ts';\nimport type { DomainVectorSearch } from './search/types.ts';\nimport type { WebhookServer } from './services/webhook-server.ts';\nimport type {\n EmbeddingProvider, SearchResult, IndexResult, ProgressCallback,\n ResolvedConfig, DocumentCollection, ICollection,\n WatchEventHandler, WatchHandle, WatchConfig,\n ExpanderManifestItem,\n} from './types.ts';\n\n// Provided to each plugin during initialization.\n\nexport interface PluginContext {\n /** Database adapter (shared across all plugins). */\n db: DatabaseAdapter;\n /** Embedding provider (shared). */\n embedding: EmbeddingProvider;\n /** Resolved BrainBank config. */\n config: ResolvedConfig;\n /**\n * Create and initialize an HNSW index.\n * Pass `name` to enable disk persistence (recommended).\n *\n * **Private vs shared:** Use `getOrCreateSharedHnsw()` for indexes that should be\n * part of the composite search (code, git) and persisted across restarts.\n * Use `createHnsw()` for plugin-local indexes that don't participate in the\n * main search pipeline (e.g. internal similarity lookups).\n */\n createHnsw(maxElements?: number, dims?: number, name?: string): Promise<HNSWIndex>;\n /** Load existing vectors from a SQLite vectors table into an HNSW index + cache. */\n loadVectors(table: string, idCol: string, hnsw: HNSWIndex, cache: Map<number, Float32Array>): void;\n /**\n * Get or create a shared HNSW index by key.\n *\n * **HNSW sharing strategies:**\n * The `type` key determines sharing behavior. Two plugins that pass the\n * same key share one HNSW index; different keys get separate indexes.\n *\n * | Plugin type | Key passed | Sharing behavior |\n * |------------|------------------|------------------------------------------|\n * | git | `'git'` | All `git:*` repos share one HNSW |\n * | docs | `'docs'` | All docs share one HNSW |\n * | code | `this.name` | Each `code:*` repo gets its own HNSW |\n *\n * **Rule of thumb:**\n * - Same key = shared index (saves memory, single search covers all)\n * - Plugin name as key = per-repo index (avoids cross-repo noise)\n *\n * The key is also used for hot-reload (`ensureFresh`) and disk persistence\n * (`hnsw-<key>.index`), so it must match the key used in `bumpVersion()`.\n */\n getOrCreateSharedHnsw(type: string, maxElements?: number, dims?: number): Promise<{ hnsw: HNSWIndex; vecCache: Map<number, Float32Array>; isNew: boolean }>;\n /** Get or create a dynamic collection. */\n collection(name: string): ICollection;\n /**\n * Create an incremental tracker scoped to this plugin.\n * Provides `isUnchanged`, `markIndexed`, `findOrphans`, `remove`, `clear`\n * for standardized add/update/delete detection during indexing.\n */\n createTracker(): IncrementalTracker;\n /** Optional webhook server for push-based watch plugins. undefined if not configured. */\n webhookServer?: WebhookServer;\n}\n\n// Minimal contract: name + initialize. All capabilities are expressed\n// via composed interfaces below.\n\nexport interface Plugin {\n /** Unique plugin name (e.g. 'code', 'git', 'docs'). */\n readonly name: string;\n /** Initialize the plugin (create HNSW, load vectors, etc.). */\n initialize(ctx: PluginContext): Promise<void>;\n /** Return stats for this plugin. */\n stats?(): Record<string, number | string>;\n /** Clean up resources. */\n close?(): void;\n}\n\n// Implemented by plugins that support specific capabilities.\n// Use type guards below to check at runtime.\n\n/** Options accepted by IndexablePlugin.index(). */\nexport interface IndexOptions {\n forceReindex?: boolean;\n depth?: number;\n onProgress?: ProgressCallback;\n}\n\n/** Plugins that can scan and index content (code, git). */\nexport interface IndexablePlugin extends Plugin {\n index(options?: IndexOptions): Promise<IndexResult>;\n /** Incremental: re-index only specific items by ID. Falls back to index() if not implemented. */\n indexItems?(ids: string[]): Promise<IndexResult>;\n}\n\n/** Plugins that can search indexed content (docs). */\nexport interface SearchablePlugin extends Plugin {\n search(query: string, options?: Record<string, unknown>): Promise<SearchResult[]>;\n}\n\n/** Plugins that can watch their own data source for changes. */\nexport interface WatchablePlugin extends Plugin {\n /** Start watching. Plugin controls how (fs.watch, polling, webhook, etc.). */\n watch(onEvent: WatchEventHandler): WatchHandle;\n /** Optional hints for the core (debounce, batching, priority). */\n watchConfig?(): WatchConfig;\n}\n\n\n/** Check if a plugin can scan/index content. */\nexport function isIndexable(i: Plugin): i is IndexablePlugin {\n return typeof (i as IndexablePlugin).index === 'function';\n}\n\n/** Check if a plugin can search content. */\nexport function isSearchable(i: Plugin): i is SearchablePlugin {\n return typeof (i as SearchablePlugin).search === 'function';\n}\n\n/** Check if a plugin can watch its own data source. */\nexport function isWatchable(i: Plugin): i is WatchablePlugin {\n return typeof (i as WatchablePlugin).watch === 'function';\n}\n\n/** Path-specific context metadata for document collections. */\nexport interface PathContext {\n collection: string;\n path: string;\n context: string;\n}\n\n/** Plugins that manage document collections (docs). */\nexport interface DocsPlugin extends SearchablePlugin {\n addCollection(collection: DocumentCollection): void;\n removeCollection(name: string): void;\n listCollections(): DocumentCollection[];\n indexDocs(options?: { onProgress?: (collection: string, file: string, current: number, total: number) => void }): Promise<Record<string, { indexed: number; skipped: number; removed: number; chunks: number }>>;\n addContext(collection: string, path: string, context: string): void;\n listContexts(): PathContext[];\n}\n\n/** Check if a plugin manages document collections. */\nexport function isDocsPlugin(i: Plugin): i is DocsPlugin {\n return typeof (i as DocsPlugin).addCollection === 'function'\n && typeof (i as DocsPlugin).listCollections === 'function';\n}\n\n\n/** Plugin that provides co-edit suggestions (e.g. git). */\nexport interface CoEditPlugin extends Plugin {\n coEdits: {\n suggest(filePath: string, limit: number): { file: string; count: number }[];\n };\n}\n\n/** Check if a plugin provides co-edit suggestions. */\nexport function isCoEditPlugin(p: Plugin): p is CoEditPlugin {\n return 'coEdits' in p && typeof (p as CoEditPlugin).coEdits?.suggest === 'function';\n}\n\n\n/** Table descriptor for re-embedding — maps text rows to vector BLOBs. */\nexport interface ReembedTable {\n /** Human-readable name (for progress). */\n name: string;\n /** Table with text content. */\n textTable: string;\n /** Table with vector BLOBs. */\n vectorTable: string;\n /** PK column in text table. */\n idColumn: string;\n /** FK column in vector table. */\n fkColumn: string;\n /** Build the embedding text from a DB row. */\n textBuilder: (row: Record<string, unknown>) => string;\n}\n\n/** Plugins that own vector tables and can rebuild embedding text from DB rows. */\nexport interface ReembeddablePlugin extends Plugin {\n /** Table descriptor for re-embedding. */\n reembedConfig(): ReembedTable;\n}\n\n/** Check if a plugin supports re-embedding. */\nexport function isReembeddable(p: Plugin): p is ReembeddablePlugin {\n return typeof (p as ReembeddablePlugin).reembedConfig === 'function';\n}\n\n\n/** Plugin that provides a domain-specific vector search strategy. */\nexport interface VectorSearchPlugin extends Plugin {\n /** Create the domain vector search (called during SearchAPI wiring). */\n createVectorSearch(): DomainVectorSearch | undefined;\n}\n\n/** Check if a plugin provides a domain vector search. */\nexport function isVectorSearchPlugin(p: Plugin): p is VectorSearchPlugin {\n return typeof (p as VectorSearchPlugin).createVectorSearch === 'function';\n}\n\n/** Describes a configurable context field that a plugin supports. */\nexport interface ContextFieldDef {\n /** Field name (e.g. 'lines', 'callTree', 'symbols'). Must be unique per plugin. */\n name: string;\n /** Accepted value type. 'object' allows nested config like `{ depth: 3 }`. */\n type: 'boolean' | 'number' | 'object';\n /** Default value (used when not specified in config or query). */\n default: unknown;\n /** Human-readable description for CLI --help and MCP tool descriptions. */\n description: string;\n}\n\n/** Plugin that declares configurable context fields. */\nexport interface ContextFieldPlugin extends Plugin {\n /** Declare available context fields. Called once during setup. */\n contextFields(): ContextFieldDef[];\n}\n\n/** Check if a plugin declares context fields. */\nexport function isContextFieldPlugin(p: Plugin): p is ContextFieldPlugin {\n return typeof (p as ContextFieldPlugin).contextFields === 'function';\n}\n\n/** Plugin that contributes sections to the context builder output. */\nexport interface ContextFormatterPlugin extends Plugin {\n /**\n * Append formatted markdown sections to `parts`.\n * `fields` contains resolved context fields (plugin defaults ← config ← per-query).\n */\n formatContext(results: SearchResult[], parts: string[], fields: Record<string, unknown>): void;\n}\n\n/** Check if a plugin provides context formatting. */\nexport function isContextFormatterPlugin(p: Plugin): p is ContextFormatterPlugin {\n return typeof (p as ContextFormatterPlugin).formatContext === 'function';\n}\n\n\n/** Plugin that owns database tables and supports versioned migrations. */\nexport interface MigratablePlugin extends Plugin {\n /** Current schema version for this plugin. */\n readonly schemaVersion: number;\n /** Ordered list of migrations (version 1, 2, 3, …). */\n readonly migrations: Migration[];\n}\n\n/** Check if a plugin supports schema migrations. */\nexport function isMigratable(p: Plugin): p is MigratablePlugin {\n return typeof (p as MigratablePlugin).schemaVersion === 'number'\n && Array.isArray((p as MigratablePlugin).migrations);\n}\n\n\n/** Plugin that can do FTS5 keyword search on its own tables. */\nexport interface BM25SearchPlugin extends Plugin {\n /** Run BM25 keyword search. Returns scored results. */\n searchBM25(query: string, k: number, minScore?: number): SearchResult[];\n /** Rebuild the FTS5 index from the content table. */\n rebuildFTS?(): void;\n}\n\n/** Check if a plugin provides BM25 keyword search. */\nexport function isBM25SearchPlugin(p: Plugin): p is BM25SearchPlugin {\n return typeof (p as BM25SearchPlugin).searchBM25 === 'function';\n}\n\n/** Plugin that supports context expansion (provides manifest + resolves chunk IDs). */\nexport interface ExpandablePlugin extends Plugin {\n /**\n * Build a manifest of candidate chunks for LLM expansion.\n * Returns chunks from files NOT already in search results.\n * Priority chunks (from import graph neighbors) are marked with `priority: true`.\n *\n * @param excludeFilePaths File paths already present in search results — excluded from manifest.\n * @param excludeIds Chunk IDs already in search results — excluded from manifest.\n * @param resultFilePaths File paths in search results — used to query import graph for priority chunks.\n */\n buildManifest(excludeFilePaths: string[], excludeIds: number[], resultFilePaths?: string[]): ExpanderManifestItem[];\n /**\n * Resolve chunk IDs back into SearchResults.\n * Called after the expander selects additional IDs.\n */\n resolveChunks(ids: number[]): SearchResult[];\n}\n\n/** Check if a plugin supports context expansion. */\nexport function isExpandablePlugin(p: Plugin): p is ExpandablePlugin {\n return typeof (p as ExpandablePlugin).buildManifest === 'function'\n && typeof (p as ExpandablePlugin).resolveChunks === 'function';\n}\n\n\n/** Plugin that can resolve file paths/patterns directly to SearchResults (no search). */\nexport interface FileResolvablePlugin extends Plugin {\n /**\n * Resolve file paths, directories, and glob patterns to SearchResults.\n * Each entry is resolved: exact → directory → glob → fuzzy basename fallback.\n *\n * @param patterns - File paths, directory prefixes (trailing `/`), or glob patterns (`*`).\n */\n resolveFiles(patterns: string[]): SearchResult[];\n}\n\n/** Check if a plugin can resolve files directly. */\nexport function isFileResolvable(p: Plugin): p is FileResolvablePlugin {\n return typeof (p as FileResolvablePlugin).resolveFiles === 'function';\n}\n\n\n// ── Third-party Plugin Discovery (TUI integration) ────────────────\n\n/**\n * Scan info for the TUI sidebar. Describes what content a plugin can index.\n * Exported standalone by plugin packages — called BEFORE plugin initialization.\n *\n * @example\n * ```typescript\n * // brainbank-csv/index.ts\n * export function scan(repoPath: string): PluginScanInfo { ... }\n * ```\n */\nexport interface PluginScanInfo {\n /** Plugin name (e.g. 'csv', 'openapi'). */\n name: string;\n /** Whether there's content available to index. */\n available: boolean;\n /** Human-readable summary (e.g. '12 CSV files'). */\n summary: string;\n /** Emoji icon for TUI display. */\n icon: string;\n /** Whether checked by default in the module selector. */\n checked: boolean;\n /** Reason this plugin is disabled (shown when unavailable). */\n disabled?: string;\n /** Detail lines for the scan tree (e.g. per-file breakdown). */\n details?: string[];\n}\n\n/**\n * A single line in the TUI explorer preview panel.\n * Returned by `preview()` — rendered as-is in the right panel.\n *\n * @example\n * ```typescript\n * // brainbank-csv/index.ts\n * export function preview(repoPath: string): PluginPreviewLine[] {\n * return [\n * { text: '📊 3 CSV files', bold: true },\n * { text: ' sales.csv 2.3 MB', color: '#9ECE6A' },\n * ];\n * }\n * ```\n */\nexport interface PluginPreviewLine {\n /** Text content for this line. */\n text: string;\n /** Optional hex color (e.g. '#9ECE6A'). */\n color?: string;\n /** Render bold. */\n bold?: boolean;\n /** Render dimmed. */\n dim?: boolean;\n}\n"],"mappings":";;;;;AAgIO,SAAS,YAAY,GAAiC;AACzD,SAAO,OAAQ,EAAsB,UAAU;AACnD;AAFgB;AAKT,SAAS,aAAa,GAAkC;AAC3D,SAAO,OAAQ,EAAuB,WAAW;AACrD;AAFgB;AAKT,SAAS,YAAY,GAAiC;AACzD,SAAO,OAAQ,EAAsB,UAAU;AACnD;AAFgB;AAsBT,SAAS,aAAa,GAA4B;AACrD,SAAO,OAAQ,EAAiB,kBAAkB,cAC3C,OAAQ,EAAiB,oBAAoB;AACxD;AAHgB;AAcT,SAAS,eAAe,GAA8B;AACzD,SAAO,aAAa,KAAK,OAAQ,EAAmB,SAAS,YAAY;AAC7E;AAFgB;AA4BT,SAAS,eAAe,GAAoC;AAC/D,SAAO,OAAQ,EAAyB,kBAAkB;AAC9D;AAFgB;AAYT,SAAS,qBAAqB,GAAoC;AACrE,SAAO,OAAQ,EAAyB,uBAAuB;AACnE;AAFgB;AAuBT,SAAS,qBAAqB,GAAoC;AACrE,SAAO,OAAQ,EAAyB,kBAAkB;AAC9D;AAFgB;AAcT,SAAS,yBAAyB,GAAwC;AAC7E,SAAO,OAAQ,EAA6B,kBAAkB;AAClE;AAFgB;AAcT,SAAS,aAAa,GAAkC;AAC3D,SAAO,OAAQ,EAAuB,kBAAkB,YACjD,MAAM,QAAS,EAAuB,UAAU;AAC3D;AAHgB;AAeT,SAAS,mBAAmB,GAAkC;AACjE,SAAO,OAAQ,EAAuB,eAAe;AACzD;AAFgB;AAwBT,SAAS,mBAAmB,GAAkC;AACjE,SAAO,OAAQ,EAAuB,kBAAkB,cACjD,OAAQ,EAAuB,kBAAkB;AAC5D;AAHgB;AAkBT,SAAS,iBAAiB,GAAsC;AACnE,SAAO,OAAQ,EAA2B,iBAAiB;AAC/D;AAFgB;","names":[]}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import {
|
|
2
|
+
__name
|
|
3
|
+
} from "./chunk-WCQVDF3K.js";
|
|
4
|
+
|
|
5
|
+
// src/lib/provider-key.ts
|
|
6
|
+
function providerKey(p) {
|
|
7
|
+
const name = p.constructor?.name ?? "";
|
|
8
|
+
if (name === "OpenAIEmbedding") return "openai";
|
|
9
|
+
if (name === "PerplexityEmbedding") return "perplexity";
|
|
10
|
+
if (name === "PerplexityContextEmbedding") return "perplexity-context";
|
|
11
|
+
return "local";
|
|
12
|
+
}
|
|
13
|
+
__name(providerKey, "providerKey");
|
|
14
|
+
|
|
15
|
+
// src/providers/embeddings/resolve.ts
|
|
16
|
+
async function resolveEmbedding(key) {
|
|
17
|
+
switch (key) {
|
|
18
|
+
case "openai": {
|
|
19
|
+
const { OpenAIEmbedding } = await import("./openai-embedding-ZP5TSUJG.js");
|
|
20
|
+
return new OpenAIEmbedding();
|
|
21
|
+
}
|
|
22
|
+
case "perplexity": {
|
|
23
|
+
const { PerplexityEmbedding } = await import("./perplexity-embedding-KZRYGJRC.js");
|
|
24
|
+
return new PerplexityEmbedding();
|
|
25
|
+
}
|
|
26
|
+
case "perplexity-context": {
|
|
27
|
+
const { PerplexityContextEmbedding } = await import("./perplexity-context-embedding-GI5PHE6X.js");
|
|
28
|
+
return new PerplexityContextEmbedding();
|
|
29
|
+
}
|
|
30
|
+
case "local":
|
|
31
|
+
default: {
|
|
32
|
+
const { LocalEmbedding } = await import("./local-embedding-NZQTILGV.js");
|
|
33
|
+
return new LocalEmbedding();
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
__name(resolveEmbedding, "resolveEmbedding");
|
|
38
|
+
|
|
39
|
+
export {
|
|
40
|
+
providerKey,
|
|
41
|
+
resolveEmbedding
|
|
42
|
+
};
|
|
43
|
+
//# sourceMappingURL=chunk-M744PCJQ.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/lib/provider-key.ts","../src/providers/embeddings/resolve.ts"],"sourcesContent":["/**\n * BrainBank — Provider Key\n *\n * Infers a stable key from an existing EmbeddingProvider instance.\n * Lives in lib/ (Layer 0) to avoid db/ → providers/ dependency.\n */\n\nimport type { EmbeddingProvider } from '@/types.ts';\n\n/** Known embedding provider keys. */\nexport type EmbeddingKey = 'local' | 'openai' | 'perplexity' | 'perplexity-context';\n\n/** Infer a stable key from an existing provider instance. */\nexport function providerKey(p: EmbeddingProvider): EmbeddingKey {\n const name = p.constructor?.name ?? '';\n if (name === 'OpenAIEmbedding') return 'openai';\n if (name === 'PerplexityEmbedding') return 'perplexity';\n if (name === 'PerplexityContextEmbedding') return 'perplexity-context';\n return 'local';\n}\n","/**\n * BrainBank — Embedding Provider Resolver\n *\n * Resolves an EmbeddingProvider from a stored key string.\n * Used by the Initializer to auto-resolve from DB config.\n */\n\nimport type { EmbeddingProvider } from '@/types.ts';\n\n/** Re-export providerKey from lib/ (canonical location). */\nexport { providerKey, type EmbeddingKey } from '@/lib/provider-key.ts';\n\n/** Resolve an EmbeddingProvider from a key string. Lazy-loads the provider module. */\nexport async function resolveEmbedding(key: string): Promise<EmbeddingProvider> {\n switch (key) {\n case 'openai': {\n const { OpenAIEmbedding } = await import('./openai-embedding.ts');\n return new OpenAIEmbedding();\n }\n case 'perplexity': {\n const { PerplexityEmbedding } = await import('./perplexity-embedding.ts');\n return new PerplexityEmbedding();\n }\n case 'perplexity-context': {\n const { PerplexityContextEmbedding } = await import('./perplexity-context-embedding.ts');\n return new PerplexityContextEmbedding();\n }\n case 'local':\n default: {\n const { LocalEmbedding } = await import('./local-embedding.ts');\n return new LocalEmbedding();\n }\n }\n}\n"],"mappings":";;;;;AAaO,SAAS,YAAY,GAAoC;AAC5D,QAAM,OAAO,EAAE,aAAa,QAAQ;AACpC,MAAI,SAAS,kBAAmB,QAAO;AACvC,MAAI,SAAS,sBAAuB,QAAO;AAC3C,MAAI,SAAS,6BAA8B,QAAO;AAClD,SAAO;AACX;AANgB;;;ACAhB,eAAsB,iBAAiB,KAAyC;AAC5E,UAAQ,KAAK;AAAA,IACT,KAAK,UAAU;AACX,YAAM,EAAE,gBAAgB,IAAI,MAAM,OAAO,gCAAuB;AAChE,aAAO,IAAI,gBAAgB;AAAA,IAC/B;AAAA,IACA,KAAK,cAAc;AACf,YAAM,EAAE,oBAAoB,IAAI,MAAM,OAAO,oCAA2B;AACxE,aAAO,IAAI,oBAAoB;AAAA,IACnC;AAAA,IACA,KAAK,sBAAsB;AACvB,YAAM,EAAE,2BAA2B,IAAI,MAAM,OAAO,4CAAmC;AACvF,aAAO,IAAI,2BAA2B;AAAA,IAC1C;AAAA,IACA,KAAK;AAAA,IACL,SAAS;AACL,YAAM,EAAE,eAAe,IAAI,MAAM,OAAO,+BAAsB;AAC9D,aAAO,IAAI,eAAe;AAAA,IAC9B;AAAA,EACJ;AACJ;AApBsB;","names":[]}
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import {
|
|
2
|
+
__name
|
|
3
|
+
} from "./chunk-WCQVDF3K.js";
|
|
4
|
+
|
|
5
|
+
// src/providers/embeddings/local-embedding.ts
|
|
6
|
+
var LocalEmbedding = class {
|
|
7
|
+
static {
|
|
8
|
+
__name(this, "LocalEmbedding");
|
|
9
|
+
}
|
|
10
|
+
dims = 384;
|
|
11
|
+
_pipeline = null;
|
|
12
|
+
_modelName;
|
|
13
|
+
_cacheDir;
|
|
14
|
+
constructor(options = {}) {
|
|
15
|
+
this._modelName = options.model ?? "Xenova/all-MiniLM-L6-v2";
|
|
16
|
+
this._cacheDir = options.cacheDir ?? ".model-cache";
|
|
17
|
+
}
|
|
18
|
+
_pipelinePromise = null;
|
|
19
|
+
/**
|
|
20
|
+
* Lazy-load the transformer pipeline.
|
|
21
|
+
* Singleton — created once and reused.
|
|
22
|
+
* Promise-deduped to prevent concurrent downloads.
|
|
23
|
+
*/
|
|
24
|
+
async _getPipeline() {
|
|
25
|
+
if (this._pipeline) return this._pipeline;
|
|
26
|
+
if (this._pipelinePromise) return this._pipelinePromise;
|
|
27
|
+
this._pipelinePromise = (async () => {
|
|
28
|
+
const mod = await import(
|
|
29
|
+
/* webpackIgnore: true */
|
|
30
|
+
"@xenova/transformers"
|
|
31
|
+
);
|
|
32
|
+
const { pipeline, env } = mod;
|
|
33
|
+
env.cacheDir = this._cacheDir;
|
|
34
|
+
env.allowLocalModels = true;
|
|
35
|
+
this._pipeline = await pipeline("feature-extraction", this._modelName, {
|
|
36
|
+
quantized: true
|
|
37
|
+
});
|
|
38
|
+
return this._pipeline;
|
|
39
|
+
})();
|
|
40
|
+
try {
|
|
41
|
+
return await this._pipelinePromise;
|
|
42
|
+
} finally {
|
|
43
|
+
this._pipelinePromise = null;
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
/**
|
|
47
|
+
* Embed a single text string.
|
|
48
|
+
* Returns a normalized Float32Array of length 384.
|
|
49
|
+
*/
|
|
50
|
+
async embed(text) {
|
|
51
|
+
const pipe = await this._getPipeline();
|
|
52
|
+
const output = await pipe(text, { pooling: "mean", normalize: true });
|
|
53
|
+
return output.data;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Embed multiple texts using real batch processing.
|
|
57
|
+
* Chunks into groups of BATCH_SIZE to balance throughput vs memory.
|
|
58
|
+
*/
|
|
59
|
+
async embedBatch(texts) {
|
|
60
|
+
if (texts.length === 0) return [];
|
|
61
|
+
const BATCH_SIZE = 32;
|
|
62
|
+
const pipe = await this._getPipeline();
|
|
63
|
+
const results = [];
|
|
64
|
+
for (let i = 0; i < texts.length; i += BATCH_SIZE) {
|
|
65
|
+
const batch = texts.slice(i, i + BATCH_SIZE);
|
|
66
|
+
const output = await pipe(batch, { pooling: "mean", normalize: true });
|
|
67
|
+
for (let j = 0; j < batch.length; j++) {
|
|
68
|
+
const start = j * this.dims;
|
|
69
|
+
results.push(output.data.slice(start, start + this.dims));
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return results;
|
|
73
|
+
}
|
|
74
|
+
async close() {
|
|
75
|
+
this._pipeline = null;
|
|
76
|
+
}
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
export {
|
|
80
|
+
LocalEmbedding
|
|
81
|
+
};
|
|
82
|
+
//# sourceMappingURL=chunk-O3J6ZIXK.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/providers/embeddings/local-embedding.ts"],"sourcesContent":["/**\n * BrainBank — Local Embedding Provider\n * \n * Uses @xenova/transformers with all-MiniLM-L6-v2 (384 dims, WASM).\n * Downloads ~23MB on first use, cached locally.\n * No external API calls — runs entirely in-process.\n */\n\nimport type { EmbeddingProvider } from '@/types.ts';\n\n/** Minimal interface for @xenova/transformers pipeline results. */\ninterface XenovaPipelineOutput {\n data: Float32Array;\n}\n\n/** Callable pipeline returned by @xenova/transformers. */\ninterface XenovaPipeline {\n (texts: string | string[], options: { pooling: string; normalize: boolean }): Promise<XenovaPipelineOutput>;\n}\n\n/** Configuration environment of @xenova/transformers. */\ninterface XenovaEnv {\n cacheDir: string;\n allowLocalModels: boolean;\n}\n\n/** Shape of the @xenova/transformers module used here. */\ninterface XenovaModule {\n pipeline(task: string, model: string, options?: { quantized?: boolean }): Promise<XenovaPipeline>;\n env: XenovaEnv;\n}\n\nexport class LocalEmbedding implements EmbeddingProvider {\n readonly dims: number = 384;\n\n private _pipeline: XenovaPipeline | null = null;\n private _modelName: string;\n private _cacheDir: string;\n\n constructor(options: { model?: string; cacheDir?: string } = {}) {\n this._modelName = options.model ?? 'Xenova/all-MiniLM-L6-v2';\n this._cacheDir = options.cacheDir ?? '.model-cache';\n }\n\n private _pipelinePromise: Promise<XenovaPipeline> | null = null;\n\n /**\n * Lazy-load the transformer pipeline.\n * Singleton — created once and reused.\n * Promise-deduped to prevent concurrent downloads.\n */\n private async _getPipeline(): Promise<XenovaPipeline> {\n if (this._pipeline) return this._pipeline;\n if (this._pipelinePromise) return this._pipelinePromise;\n\n this._pipelinePromise = (async () => {\n const mod = await import(/* webpackIgnore: true */ '@xenova/transformers' as string) as XenovaModule;\n const { pipeline, env } = mod;\n env.cacheDir = this._cacheDir;\n env.allowLocalModels = true;\n\n this._pipeline = await pipeline('feature-extraction', this._modelName, {\n quantized: true,\n });\n\n return this._pipeline!;\n })();\n\n try {\n return await this._pipelinePromise;\n } finally {\n this._pipelinePromise = null;\n }\n }\n\n /**\n * Embed a single text string.\n * Returns a normalized Float32Array of length 384.\n */\n async embed(text: string): Promise<Float32Array> {\n const pipe = await this._getPipeline();\n const output = await pipe(text, { pooling: 'mean', normalize: true });\n return output.data as Float32Array;\n }\n\n /**\n * Embed multiple texts using real batch processing.\n * Chunks into groups of BATCH_SIZE to balance throughput vs memory.\n */\n async embedBatch(texts: string[]): Promise<Float32Array[]> {\n if (texts.length === 0) return [];\n\n const BATCH_SIZE = 32;\n const pipe = await this._getPipeline();\n const results: Float32Array[] = [];\n\n for (let i = 0; i < texts.length; i += BATCH_SIZE) {\n const batch = texts.slice(i, i + BATCH_SIZE);\n const output = await pipe(batch, { pooling: 'mean', normalize: true });\n\n // output.data is a flat Float32Array — must copy, not view,\n // because the pipeline may reuse the underlying buffer\n for (let j = 0; j < batch.length; j++) {\n const start = j * this.dims;\n results.push(output.data.slice(start, start + this.dims) as Float32Array);\n }\n }\n\n return results;\n }\n\n async close(): Promise<void> {\n this._pipeline = null;\n }\n}\n"],"mappings":";;;;;AAgCO,IAAM,iBAAN,MAAkD;AAAA,EAhCzD,OAgCyD;AAAA;AAAA;AAAA,EAC5C,OAAe;AAAA,EAEhB,YAAmC;AAAA,EACnC;AAAA,EACA;AAAA,EAER,YAAY,UAAiD,CAAC,GAAG;AAC7D,SAAK,aAAa,QAAQ,SAAS;AACnC,SAAK,YAAY,QAAQ,YAAY;AAAA,EACzC;AAAA,EAEQ,mBAAmD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAO3D,MAAc,eAAwC;AAClD,QAAI,KAAK,UAAW,QAAO,KAAK;AAChC,QAAI,KAAK,iBAAkB,QAAO,KAAK;AAEvC,SAAK,oBAAoB,YAAY;AACjC,YAAM,MAAM,MAAM;AAAA;AAAA,QAAiC;AAAA,MAAgC;AACnF,YAAM,EAAE,UAAU,IAAI,IAAI;AAC1B,UAAI,WAAW,KAAK;AACpB,UAAI,mBAAmB;AAEvB,WAAK,YAAY,MAAM,SAAS,sBAAsB,KAAK,YAAY;AAAA,QACnE,WAAW;AAAA,MACf,CAAC;AAED,aAAO,KAAK;AAAA,IAChB,GAAG;AAEH,QAAI;AACA,aAAO,MAAM,KAAK;AAAA,IACtB,UAAE;AACE,WAAK,mBAAmB;AAAA,IAC5B;AAAA,EACJ;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,MAAM,MAAqC;AAC7C,UAAM,OAAO,MAAM,KAAK,aAAa;AACrC,UAAM,SAAS,MAAM,KAAK,MAAM,EAAE,SAAS,QAAQ,WAAW,KAAK,CAAC;AACpE,WAAO,OAAO;AAAA,EAClB;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,WAAW,OAA0C;AACvD,QAAI,MAAM,WAAW,EAAG,QAAO,CAAC;AAEhC,UAAM,aAAa;AACnB,UAAM,OAAO,MAAM,KAAK,aAAa;AACrC,UAAM,UAA0B,CAAC;AAEjC,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,YAAY;AAC/C,YAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,UAAU;AAC3C,YAAM,SAAS,MAAM,KAAK,OAAO,EAAE,SAAS,QAAQ,WAAW,KAAK,CAAC;AAIrE,eAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK;AACnC,cAAM,QAAQ,IAAI,KAAK;AACvB,gBAAQ,KAAK,OAAO,KAAK,MAAM,OAAO,QAAQ,KAAK,IAAI,CAAiB;AAAA,MAC5E;AAAA,IACJ;AAEA,WAAO;AAAA,EACX;AAAA,EAEA,MAAM,QAAuB;AACzB,SAAK,YAAY;AAAA,EACrB;AACJ;","names":[]}
|