brainbank 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +149 -16
- package/dist/{types-Da_zLLOl.d.ts → base-9vfWRHCV.d.ts} +131 -31
- package/dist/{chunk-YGSEUWLV.js → chunk-6MFTQV3O.js} +911 -674
- package/dist/chunk-6MFTQV3O.js.map +1 -0
- package/dist/chunk-7JCEW7LT.js +266 -0
- package/dist/chunk-7JCEW7LT.js.map +1 -0
- package/dist/{chunk-GOUBW7UA.js → chunk-F6SJ3U4H.js} +98 -34
- package/dist/chunk-F6SJ3U4H.js.map +1 -0
- package/dist/{chunk-MJ3Y24H6.js → chunk-FJJY4H2Y.js} +11 -11
- package/dist/chunk-FJJY4H2Y.js.map +1 -0
- package/dist/{chunk-3GAIDXRW.js → chunk-GUT5MSJT.js} +5 -11
- package/dist/chunk-GUT5MSJT.js.map +1 -0
- package/dist/{chunk-2P3EGY6S.js → chunk-QNHBCOKB.js} +2 -2
- package/dist/chunk-QNHBCOKB.js.map +1 -0
- package/dist/{chunk-4ZKBQ33J.js → chunk-V4UJKXPK.js} +23 -5
- package/dist/chunk-V4UJKXPK.js.map +1 -0
- package/dist/chunk-WR4WXKJT.js +723 -0
- package/dist/chunk-WR4WXKJT.js.map +1 -0
- package/dist/{chunk-Z5SU54HP.js → chunk-X6645UVR.js} +3 -3
- package/dist/chunk-X6645UVR.js.map +1 -0
- package/dist/cli.js +150 -100
- package/dist/cli.js.map +1 -1
- package/dist/code.d.ts +5 -5
- package/dist/code.js +1 -1
- package/dist/docs.d.ts +4 -6
- package/dist/docs.js +1 -1
- package/dist/git.d.ts +5 -5
- package/dist/git.js +1 -1
- package/dist/index.d.ts +95 -104
- package/dist/index.js +13 -13
- package/dist/memory.d.ts +5 -7
- package/dist/memory.js +9 -12
- package/dist/memory.js.map +1 -1
- package/dist/notes.d.ts +4 -6
- package/dist/notes.js +7 -10
- package/dist/notes.js.map +1 -1
- package/dist/{openai-PCTYLOWI.js → openai-CYDMYX7X.js} +2 -2
- package/package.json +24 -4
- package/dist/chunk-2P3EGY6S.js.map +0 -1
- package/dist/chunk-3GAIDXRW.js.map +0 -1
- package/dist/chunk-4ZKBQ33J.js.map +0 -1
- package/dist/chunk-EDKSKLX4.js +0 -490
- package/dist/chunk-EDKSKLX4.js.map +0 -1
- package/dist/chunk-GOUBW7UA.js.map +0 -1
- package/dist/chunk-MJ3Y24H6.js.map +0 -1
- package/dist/chunk-N6ZMBFDE.js +0 -224
- package/dist/chunk-N6ZMBFDE.js.map +0 -1
- package/dist/chunk-YGSEUWLV.js.map +0 -1
- package/dist/chunk-Z5SU54HP.js.map +0 -1
- /package/dist/{openai-PCTYLOWI.js.map → openai-CYDMYX7X.js.map} +0 -0
package/dist/notes.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/
|
|
1
|
+
{"version":3,"sources":["../src/indexers/notes/notes-plugin.ts"],"sourcesContent":["/**\n * BrainBank — Notes Module\n * \n * Store structured conversation digests so the agent\n * remembers past discussions.\n * \n * import { notes } from 'brainbank/notes';\n * brain.use(notes());\n */\n\nimport type { Indexer, IndexerContext } from '../base.ts';\nimport type { HNSWIndex } from '../../providers/vector/hnsw.ts';\nimport { NoteStore } from './note-store.ts';\nimport type { NoteDigest, StoredNote, RecallOptions } from './note-store.ts';\n\nclass NotesPlugin implements Indexer {\n readonly name = 'notes';\n hnsw!: HNSWIndex;\n store!: NoteStore;\n vecCache = new Map<number, Float32Array>();\n\n async initialize(ctx: IndexerContext): Promise<void> {\n this.hnsw = await ctx.createHnsw(100_000);\n ctx.loadVectors('note_vectors', 'note_id', this.hnsw, this.vecCache);\n this.store = new NoteStore(ctx.db, ctx.embedding, this.hnsw, this.vecCache);\n }\n\n /** Store a note digest. */\n async remember(digest: NoteDigest): Promise<number> {\n return this.store.remember(digest);\n }\n\n /** Recall relevant notes (hybrid search). */\n async recall(query: string, options?: RecallOptions): Promise<StoredNote[]> {\n return this.store.recall(query, options);\n }\n\n /** List recent notes. */\n list(limit?: number, tier?: 'short' | 'long'): StoredNote[] {\n return this.store.list(limit, tier);\n }\n\n /** Consolidate old short-term → long-term. */\n consolidate(keepRecent?: number): { promoted: number } {\n return this.store.consolidate(keepRecent);\n }\n\n /** Count notes by tier. */\n count(): { total: number; short: number; long: number } {\n return this.store.count();\n }\n\n stats(): Record<string, any> {\n return this.store.count();\n }\n}\n\n/** Create a notes plugin. */\nexport function notes(): Indexer {\n return new NotesPlugin();\n}\n"],"mappings":";;;;;;;;;AAeA,IAAM,cAAN,MAAqC;AAAA,EAfrC,OAeqC;AAAA;AAAA;AAAA,EACxB,OAAO;AAAA,EAChB;AAAA,EACA;AAAA,EACA,WAAW,oBAAI,IAA0B;AAAA,EAEzC,MAAM,WAAW,KAAoC;AACjD,SAAK,OAAO,MAAM,IAAI,WAAW,GAAO;AACxC,QAAI,YAAY,gBAAgB,WAAW,KAAK,MAAM,KAAK,QAAQ;AACnE,SAAK,QAAQ,IAAI,UAAU,IAAI,IAAI,IAAI,WAAW,KAAK,MAAM,KAAK,QAAQ;AAAA,EAC9E;AAAA;AAAA,EAGA,MAAM,SAAS,QAAqC;AAChD,WAAO,KAAK,MAAM,SAAS,MAAM;AAAA,EACrC;AAAA;AAAA,EAGA,MAAM,OAAO,OAAe,SAAgD;AACxE,WAAO,KAAK,MAAM,OAAO,OAAO,OAAO;AAAA,EAC3C;AAAA;AAAA,EAGA,KAAK,OAAgB,MAAuC;AACxD,WAAO,KAAK,MAAM,KAAK,OAAO,IAAI;AAAA,EACtC;AAAA;AAAA,EAGA,YAAY,YAA2C;AACnD,WAAO,KAAK,MAAM,YAAY,UAAU;AAAA,EAC5C;AAAA;AAAA,EAGA,QAAwD;AACpD,WAAO,KAAK,MAAM,MAAM;AAAA,EAC5B;AAAA,EAEA,QAA6B;AACzB,WAAO,KAAK,MAAM,MAAM;AAAA,EAC5B;AACJ;AAGO,SAAS,QAAiB;AAC7B,SAAO,IAAI,YAAY;AAC3B;AAFgB;","names":[]}
|
package/package.json
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "brainbank",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.2.0",
|
|
4
4
|
"description": "Pluggable semantic memory for AI agents — hybrid search (vector + BM25) in a single SQLite file. Built-in code, git, and docs indexers. Bring your own.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
7
7
|
"types": "dist/index.d.ts",
|
|
8
8
|
"bin": {
|
|
9
|
-
"brainbank": "./dist/cli.js"
|
|
9
|
+
"brainbank": "./dist/cli/index.js"
|
|
10
10
|
},
|
|
11
11
|
"exports": {
|
|
12
12
|
".": {
|
|
@@ -48,7 +48,7 @@
|
|
|
48
48
|
"test": "node --import tsx test/run.ts",
|
|
49
49
|
"test:integration": "node --import tsx test/run.ts --integration",
|
|
50
50
|
"test:verbose": "node --import tsx test/run.ts --verbose",
|
|
51
|
-
"dev": "tsx src/
|
|
51
|
+
"dev": "tsx src/cli/index.ts"
|
|
52
52
|
},
|
|
53
53
|
"engines": {
|
|
54
54
|
"node": ">=18"
|
|
@@ -71,7 +71,27 @@
|
|
|
71
71
|
"license": "MIT",
|
|
72
72
|
"dependencies": {
|
|
73
73
|
"better-sqlite3": "^11.8.1",
|
|
74
|
-
"hnswlib-node": "^3.0.0"
|
|
74
|
+
"hnswlib-node": "^3.0.0",
|
|
75
|
+
"tree-sitter": "^0.25.0",
|
|
76
|
+
"tree-sitter-bash": "^0.25.1",
|
|
77
|
+
"tree-sitter-c": "^0.24.1",
|
|
78
|
+
"tree-sitter-c-sharp": "^0.23.1",
|
|
79
|
+
"tree-sitter-cpp": "^0.23.4",
|
|
80
|
+
"tree-sitter-css": "^0.25.0",
|
|
81
|
+
"tree-sitter-elixir": "^0.3.5",
|
|
82
|
+
"tree-sitter-go": "^0.25.0",
|
|
83
|
+
"tree-sitter-html": "^0.23.2",
|
|
84
|
+
"tree-sitter-java": "^0.23.5",
|
|
85
|
+
"tree-sitter-javascript": "^0.25.0",
|
|
86
|
+
"tree-sitter-kotlin": "^0.3.8",
|
|
87
|
+
"tree-sitter-lua": "^2.1.3",
|
|
88
|
+
"tree-sitter-php": "^0.24.2",
|
|
89
|
+
"tree-sitter-python": "^0.25.0",
|
|
90
|
+
"tree-sitter-ruby": "^0.23.1",
|
|
91
|
+
"tree-sitter-rust": "^0.24.0",
|
|
92
|
+
"tree-sitter-scala": "^0.24.0",
|
|
93
|
+
"tree-sitter-swift": "^0.7.1",
|
|
94
|
+
"tree-sitter-typescript": "^0.23.2"
|
|
75
95
|
},
|
|
76
96
|
"optionalDependencies": {
|
|
77
97
|
"@xenova/transformers": "^2.17.2",
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/embeddings/math.ts"],"sourcesContent":["/**\n * BrainBank — Math Utilities\n * \n * Pure vector math functions for similarity calculations.\n * No dependencies — works on Float32Array directly.\n */\n\n/**\n * Cosine similarity between two vectors.\n * Assumes vectors are already normalized (unit length).\n * Returns value between -1.0 and 1.0.\n */\nexport function cosineSimilarity(a: Float32Array, b: Float32Array): number {\n if (a.length !== b.length) {\n throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);\n }\n if (a.length === 0) return 0;\n\n let dot = 0;\n for (let i = 0; i < a.length; i++) {\n dot += a[i] * b[i];\n }\n return dot;\n}\n\n/**\n * Full cosine similarity (normalizes first).\n * Use this when vectors may not be pre-normalized.\n */\nexport function cosineSimilarityFull(a: Float32Array, b: Float32Array): number {\n if (a.length !== b.length) {\n throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);\n }\n if (a.length === 0) return 0;\n\n let dot = 0, normA = 0, normB = 0;\n for (let i = 0; i < a.length; i++) {\n dot += a[i] * b[i];\n normA += a[i] * a[i];\n normB += b[i] * b[i];\n }\n const denom = Math.sqrt(normA) * Math.sqrt(normB);\n return denom === 0 ? 0 : dot / denom;\n}\n\n/**\n * L2-normalize a vector to unit length.\n * Returns a new Float32Array.\n */\nexport function normalize(vec: Float32Array): Float32Array {\n let norm = 0;\n for (let i = 0; i < vec.length; i++) {\n norm += vec[i] * vec[i];\n }\n norm = Math.sqrt(norm);\n if (norm === 0) return new Float32Array(vec.length);\n\n const result = new Float32Array(vec.length);\n for (let i = 0; i < vec.length; i++) {\n result[i] = vec[i] / norm;\n }\n return result;\n}\n\n/**\n * Euclidean distance between two vectors.\n */\nexport function euclideanDistance(a: Float32Array, b: Float32Array): number {\n if (a.length !== b.length) {\n throw new Error(`Vector dimension mismatch: ${a.length} vs ${b.length}`);\n }\n let sum = 0;\n for (let i = 0; i < a.length; i++) {\n const d = a[i] - b[i];\n sum += d * d;\n }\n return Math.sqrt(sum);\n}\n"],"mappings":";;;;;AAYO,SAAS,iBAAiB,GAAiB,GAAyB;AACvE,MAAI,EAAE,WAAW,EAAE,QAAQ;AACvB,UAAM,IAAI,MAAM,8BAA8B,EAAE,MAAM,OAAO,EAAE,MAAM,EAAE;AAAA,EAC3E;AACA,MAAI,EAAE,WAAW,EAAG,QAAO;AAE3B,MAAI,MAAM;AACV,WAAS,IAAI,GAAG,IAAI,EAAE,QAAQ,KAAK;AAC/B,WAAO,EAAE,CAAC,IAAI,EAAE,CAAC;AAAA,EACrB;AACA,SAAO;AACX;AAXgB;AAqCT,SAAS,UAAU,KAAiC;AACvD,MAAI,OAAO;AACX,WAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,KAAK;AACjC,YAAQ,IAAI,CAAC,IAAI,IAAI,CAAC;AAAA,EAC1B;AACA,SAAO,KAAK,KAAK,IAAI;AACrB,MAAI,SAAS,EAAG,QAAO,IAAI,aAAa,IAAI,MAAM;AAElD,QAAM,SAAS,IAAI,aAAa,IAAI,MAAM;AAC1C,WAAS,IAAI,GAAG,IAAI,IAAI,QAAQ,KAAK;AACjC,WAAO,CAAC,IAAI,IAAI,CAAC,IAAI;AAAA,EACzB;AACA,SAAO;AACX;AAbgB;","names":[]}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/embeddings/openai.ts"],"sourcesContent":["/**\n * BrainBank — OpenAI Embedding Provider\n * \n * Uses OpenAI's embedding API via fetch (no SDK dependency).\n * Supports text-embedding-3-small, text-embedding-3-large, and ada-002.\n * \n * Usage:\n * const brain = new BrainBank({\n * embeddingProvider: new OpenAIEmbedding({ model: 'text-embedding-3-small' }),\n * });\n */\n\nimport type { EmbeddingProvider } from '../types.ts';\n\nconst DEFAULT_MODEL = 'text-embedding-3-small';\nconst DEFAULT_DIMS: Record<string, number> = {\n 'text-embedding-3-small': 1536,\n 'text-embedding-3-large': 3072,\n 'text-embedding-ada-002': 1536,\n};\nconst API_URL = 'https://api.openai.com/v1/embeddings';\nconst MAX_BATCH = 100; // OpenAI limit per request\n\nexport interface OpenAIEmbeddingOptions {\n /** OpenAI API key. Falls back to OPENAI_API_KEY env var. */\n apiKey?: string;\n /** Model name. Default: 'text-embedding-3-small' */\n model?: string;\n /** Vector dimensions. If omitted, uses model default. text-embedding-3-* supports custom dims. */\n dims?: number;\n /** Base URL override (for Azure, proxies, etc.) */\n baseUrl?: string;\n}\n\nexport class OpenAIEmbedding implements EmbeddingProvider {\n readonly dims: number;\n\n private _apiKey: string;\n private _model: string;\n private _baseUrl: string;\n private _requestDims: number | undefined;\n private _retrying = false;\n\n constructor(options: OpenAIEmbeddingOptions = {}) {\n this._apiKey = options.apiKey ?? process.env.OPENAI_API_KEY ?? '';\n this._model = options.model ?? DEFAULT_MODEL;\n this._baseUrl = options.baseUrl ?? API_URL;\n\n // Custom dims only supported by text-embedding-3-*\n if (options.dims && this._model.startsWith('text-embedding-3')) {\n this._requestDims = options.dims;\n this.dims = options.dims;\n } else {\n this.dims = options.dims ?? DEFAULT_DIMS[this._model] ?? 1536;\n }\n }\n\n async embed(text: string): Promise<Float32Array> {\n const results = await this._request([text]);\n return results[0];\n }\n\n async embedBatch(texts: string[]): Promise<Float32Array[]> {\n if (texts.length === 0) return [];\n\n const results: Float32Array[] = [];\n\n // Split into chunks of MAX_BATCH\n for (let i = 0; i < texts.length; i += MAX_BATCH) {\n const batch = texts.slice(i, i + MAX_BATCH);\n const embeddings = await this._request(batch);\n results.push(...embeddings);\n }\n\n return results;\n }\n\n async close(): Promise<void> {\n // No resources to release\n }\n\n private _isTokenLimitError(errText: string): boolean {\n return errText.includes('maximum input length') ||\n errText.includes('maximum context length') ||\n errText.includes('too many tokens');\n }\n\n private async _request(input: string[]): Promise<Float32Array[]> {\n if (!this._apiKey) {\n throw new Error('OpenAI API key required. Set OPENAI_API_KEY env var or pass apiKey option.');\n }\n\n // Truncate texts that would exceed token limit (~4 chars per token, 8192 max)\n const MAX_CHARS = 24_000;\n const safeInput = input.map(t => t.length > MAX_CHARS ? t.slice(0, MAX_CHARS) : t);\n\n const body: Record<string, any> = {\n model: this._model,\n input: safeInput,\n };\n\n if (this._requestDims) {\n body.dimensions = this._requestDims;\n }\n\n const res = await fetch(this._baseUrl, {\n method: 'POST',\n headers: {\n 'Content-Type': 'application/json',\n 'Authorization': `Bearer ${this._apiKey}`,\n },\n body: JSON.stringify(body),\n });\n\n if (!res.ok) {\n const err = await res.text();\n const isTokenLimit = res.status === 400 && this._isTokenLimitError(err);\n\n // If token limit error in a batch, retry each item individually with more aggressive truncation\n if (isTokenLimit && safeInput.length > 1) {\n const results: Float32Array[] = [];\n for (const text of safeInput) {\n const r = await this._request([text.slice(0, 8_000)]);\n results.push(r[0]);\n }\n return results;\n }\n // Last resort: if single item still fails, truncate to ~2k tokens\n if (isTokenLimit && safeInput.length === 1 && !this._retrying) {\n this._retrying = true;\n try {\n return await this._request([safeInput[0].slice(0, 6_000)]);\n } finally {\n this._retrying = false;\n }\n }\n throw new Error(`OpenAI embedding API error (${res.status}): ${err}`);\n }\n\n const json = await res.json() as {\n data: Array<{ embedding: number[]; index: number }>;\n };\n\n // Sort by index (API may return out of order)\n const sorted = json.data.sort((a, b) => a.index - b.index);\n\n return sorted.map(d => new Float32Array(d.embedding));\n }\n}\n"],"mappings":";;;;;AAcA,IAAM,gBAAgB;AACtB,IAAM,eAAuC;AAAA,EACzC,0BAA0B;AAAA,EAC1B,0BAA0B;AAAA,EAC1B,0BAA0B;AAC9B;AACA,IAAM,UAAU;AAChB,IAAM,YAAY;AAaX,IAAM,kBAAN,MAAmD;AAAA,EAlC1D,OAkC0D;AAAA;AAAA;AAAA,EAC7C;AAAA,EAED;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA,YAAY;AAAA,EAEpB,YAAY,UAAkC,CAAC,GAAG;AAC9C,SAAK,UAAU,QAAQ,UAAU,QAAQ,IAAI,kBAAkB;AAC/D,SAAK,SAAS,QAAQ,SAAS;AAC/B,SAAK,WAAW,QAAQ,WAAW;AAGnC,QAAI,QAAQ,QAAQ,KAAK,OAAO,WAAW,kBAAkB,GAAG;AAC5D,WAAK,eAAe,QAAQ;AAC5B,WAAK,OAAO,QAAQ;AAAA,IACxB,OAAO;AACH,WAAK,OAAO,QAAQ,QAAQ,aAAa,KAAK,MAAM,KAAK;AAAA,IAC7D;AAAA,EACJ;AAAA,EAEA,MAAM,MAAM,MAAqC;AAC7C,UAAM,UAAU,MAAM,KAAK,SAAS,CAAC,IAAI,CAAC;AAC1C,WAAO,QAAQ,CAAC;AAAA,EACpB;AAAA,EAEA,MAAM,WAAW,OAA0C;AACvD,QAAI,MAAM,WAAW,EAAG,QAAO,CAAC;AAEhC,UAAM,UAA0B,CAAC;AAGjC,aAAS,IAAI,GAAG,IAAI,MAAM,QAAQ,KAAK,WAAW;AAC9C,YAAM,QAAQ,MAAM,MAAM,GAAG,IAAI,SAAS;AAC1C,YAAM,aAAa,MAAM,KAAK,SAAS,KAAK;AAC5C,cAAQ,KAAK,GAAG,UAAU;AAAA,IAC9B;AAEA,WAAO;AAAA,EACX;AAAA,EAEA,MAAM,QAAuB;AAAA,EAE7B;AAAA,EAEQ,mBAAmB,SAA0B;AACjD,WAAO,QAAQ,SAAS,sBAAsB,KACvC,QAAQ,SAAS,wBAAwB,KACzC,QAAQ,SAAS,iBAAiB;AAAA,EAC7C;AAAA,EAEA,MAAc,SAAS,OAA0C;AAC7D,QAAI,CAAC,KAAK,SAAS;AACf,YAAM,IAAI,MAAM,4EAA4E;AAAA,IAChG;AAGA,UAAM,YAAY;AAClB,UAAM,YAAY,MAAM,IAAI,OAAK,EAAE,SAAS,YAAY,EAAE,MAAM,GAAG,SAAS,IAAI,CAAC;AAEjF,UAAM,OAA4B;AAAA,MAC9B,OAAO,KAAK;AAAA,MACZ,OAAO;AAAA,IACX;AAEA,QAAI,KAAK,cAAc;AACnB,WAAK,aAAa,KAAK;AAAA,IAC3B;AAEA,UAAM,MAAM,MAAM,MAAM,KAAK,UAAU;AAAA,MACnC,QAAQ;AAAA,MACR,SAAS;AAAA,QACL,gBAAgB;AAAA,QAChB,iBAAiB,UAAU,KAAK,OAAO;AAAA,MAC3C;AAAA,MACA,MAAM,KAAK,UAAU,IAAI;AAAA,IAC7B,CAAC;AAED,QAAI,CAAC,IAAI,IAAI;AACT,YAAM,MAAM,MAAM,IAAI,KAAK;AAC3B,YAAM,eAAe,IAAI,WAAW,OAAO,KAAK,mBAAmB,GAAG;AAGtE,UAAI,gBAAgB,UAAU,SAAS,GAAG;AACtC,cAAM,UAA0B,CAAC;AACjC,mBAAW,QAAQ,WAAW;AAC1B,gBAAM,IAAI,MAAM,KAAK,SAAS,CAAC,KAAK,MAAM,GAAG,GAAK,CAAC,CAAC;AACpD,kBAAQ,KAAK,EAAE,CAAC,CAAC;AAAA,QACrB;AACA,eAAO;AAAA,MACX;AAEA,UAAI,gBAAgB,UAAU,WAAW,KAAK,CAAC,KAAK,WAAW;AAC3D,aAAK,YAAY;AACjB,YAAI;AACA,iBAAO,MAAM,KAAK,SAAS,CAAC,UAAU,CAAC,EAAE,MAAM,GAAG,GAAK,CAAC,CAAC;AAAA,QAC7D,UAAE;AACE,eAAK,YAAY;AAAA,QACrB;AAAA,MACJ;AACA,YAAM,IAAI,MAAM,+BAA+B,IAAI,MAAM,MAAM,GAAG,EAAE;AAAA,IACxE;AAEA,UAAM,OAAO,MAAM,IAAI,KAAK;AAK5B,UAAM,SAAS,KAAK,KAAK,KAAK,CAAC,GAAG,MAAM,EAAE,QAAQ,EAAE,KAAK;AAEzD,WAAO,OAAO,IAAI,OAAK,IAAI,aAAa,EAAE,SAAS,CAAC;AAAA,EACxD;AACJ;","names":[]}
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"sources":["../src/query/rrf.ts"],"sourcesContent":["/**\n * BrainBank — Reciprocal Rank Fusion (RRF)\n * \n * Combines results from multiple search systems (vector + BM25)\n * using the RRF algorithm: score = Σ 1/(k + rank_i)\n * \n * This is the same algorithm used by Elasticsearch, QMD, and most\n * production hybrid search systems. Simple but very effective.\n * \n * Reference: Cormack et al., \"Reciprocal Rank Fusion outperforms\n * Condorcet and individual Rank Learning Methods\" (2009)\n */\n\nimport type { SearchResult } from '../types.ts';\n\n/**\n * Fuse ranked lists from different search systems into a single ranked list.\n * \n * @param resultSets - Arrays of SearchResult from different systems (e.g. vector, BM25)\n * @param k - Smoothing constant. Default: 60 (standard value). Higher = less emphasis on top ranks.\n * @param maxResults - Maximum results to return.\n */\nexport function reciprocalRankFusion(\n resultSets: SearchResult[][],\n k: number = 60,\n maxResults: number = 15,\n): SearchResult[] {\n // Build a map: unique key → { bestResult, rrfScore }\n const fused = new Map<string, { result: SearchResult; rrfScore: number }>();\n\n for (const results of resultSets) {\n for (let rank = 0; rank < results.length; rank++) {\n const r = results[rank];\n const key = resultKey(r);\n const rrfContribution = 1.0 / (k + rank + 1);\n\n const existing = fused.get(key);\n if (existing) {\n existing.rrfScore += rrfContribution;\n // Keep the result with the higher original score\n if (r.score > existing.result.score) {\n existing.result = { ...r };\n }\n } else {\n fused.set(key, {\n result: { ...r },\n rrfScore: rrfContribution,\n });\n }\n }\n }\n\n // Sort by RRF score descending, normalize, and return\n const sorted = Array.from(fused.values())\n .sort((a, b) => b.rrfScore - a.rrfScore)\n .slice(0, maxResults);\n\n // Normalize RRF scores to 0..1 range\n const maxRRF = sorted[0]?.rrfScore ?? 1;\n return sorted.map(entry => ({\n ...entry.result,\n score: entry.rrfScore / maxRRF,\n metadata: {\n ...entry.result.metadata,\n rrfScore: entry.rrfScore,\n },\n }));\n}\n\n/**\n * Generate a unique key for a search result to detect duplicates across systems.\n */\nfunction resultKey(r: SearchResult): string {\n switch (r.type) {\n case 'code':\n return `code:${r.filePath}:${r.metadata.startLine}-${r.metadata.endLine}`;\n case 'commit':\n return `commit:${r.metadata.hash || r.metadata.shortHash}`;\n case 'pattern':\n return `pattern:${r.metadata.taskType}:${r.content?.slice(0, 60)}`;\n default:\n return `${r.type}:${r.content?.slice(0, 80)}`;\n }\n}\n"],"mappings":";;;;;AAsBO,SAAS,qBACZ,YACA,IAAY,IACZ,aAAqB,IACP;AAEd,QAAM,QAAQ,oBAAI,IAAwD;AAE1E,aAAW,WAAW,YAAY;AAC9B,aAAS,OAAO,GAAG,OAAO,QAAQ,QAAQ,QAAQ;AAC9C,YAAM,IAAI,QAAQ,IAAI;AACtB,YAAM,MAAM,UAAU,CAAC;AACvB,YAAM,kBAAkB,KAAO,IAAI,OAAO;AAE1C,YAAM,WAAW,MAAM,IAAI,GAAG;AAC9B,UAAI,UAAU;AACV,iBAAS,YAAY;AAErB,YAAI,EAAE,QAAQ,SAAS,OAAO,OAAO;AACjC,mBAAS,SAAS,EAAE,GAAG,EAAE;AAAA,QAC7B;AAAA,MACJ,OAAO;AACH,cAAM,IAAI,KAAK;AAAA,UACX,QAAQ,EAAE,GAAG,EAAE;AAAA,UACf,UAAU;AAAA,QACd,CAAC;AAAA,MACL;AAAA,IACJ;AAAA,EACJ;AAGA,QAAM,SAAS,MAAM,KAAK,MAAM,OAAO,CAAC,EACnC,KAAK,CAAC,GAAG,MAAM,EAAE,WAAW,EAAE,QAAQ,EACtC,MAAM,GAAG,UAAU;AAGxB,QAAM,SAAS,OAAO,CAAC,GAAG,YAAY;AACtC,SAAO,OAAO,IAAI,YAAU;AAAA,IACxB,GAAG,MAAM;AAAA,IACT,OAAO,MAAM,WAAW;AAAA,IACxB,UAAU;AAAA,MACN,GAAG,MAAM,OAAO;AAAA,MAChB,UAAU,MAAM;AAAA,IACpB;AAAA,EACJ,EAAE;AACN;AA7CgB;AAkDhB,SAAS,UAAU,GAAyB;AACxC,UAAQ,EAAE,MAAM;AAAA,IACZ,KAAK;AACD,aAAO,QAAQ,EAAE,QAAQ,IAAI,EAAE,SAAS,SAAS,IAAI,EAAE,SAAS,OAAO;AAAA,IAC3E,KAAK;AACD,aAAO,UAAU,EAAE,SAAS,QAAQ,EAAE,SAAS,SAAS;AAAA,IAC5D,KAAK;AACD,aAAO,WAAW,EAAE,SAAS,QAAQ,IAAI,EAAE,SAAS,MAAM,GAAG,EAAE,CAAC;AAAA,IACpE;AACI,aAAO,GAAG,EAAE,IAAI,IAAI,EAAE,SAAS,MAAM,GAAG,EAAE,CAAC;AAAA,EACnD;AACJ;AAXS;","names":[]}
|
package/dist/chunk-EDKSKLX4.js
DELETED
|
@@ -1,490 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
__name
|
|
3
|
-
} from "./chunk-7QVYU63E.js";
|
|
4
|
-
|
|
5
|
-
// src/indexers/code-indexer.ts
|
|
6
|
-
import fs from "fs";
|
|
7
|
-
import path2 from "path";
|
|
8
|
-
|
|
9
|
-
// src/indexers/chunker.ts
|
|
10
|
-
var CodeChunker = class {
|
|
11
|
-
static {
|
|
12
|
-
__name(this, "CodeChunker");
|
|
13
|
-
}
|
|
14
|
-
MAX;
|
|
15
|
-
MIN;
|
|
16
|
-
OVERLAP;
|
|
17
|
-
constructor(config = {}) {
|
|
18
|
-
this.MAX = config.maxLines ?? 80;
|
|
19
|
-
this.MIN = config.minLines ?? 3;
|
|
20
|
-
this.OVERLAP = config.overlap ?? 5;
|
|
21
|
-
}
|
|
22
|
-
/**
|
|
23
|
-
* Split file content into semantic chunks.
|
|
24
|
-
* Small files (< maxLines) become a single chunk.
|
|
25
|
-
* For JS/TS/Python: detects functions and classes.
|
|
26
|
-
* For other languages: sliding window with overlap.
|
|
27
|
-
*/
|
|
28
|
-
chunk(filePath, content, language) {
|
|
29
|
-
const lines = content.split("\n");
|
|
30
|
-
if (lines.length <= this.MAX) {
|
|
31
|
-
return [{
|
|
32
|
-
filePath,
|
|
33
|
-
chunkType: "file",
|
|
34
|
-
startLine: 1,
|
|
35
|
-
endLine: lines.length,
|
|
36
|
-
content: content.trim(),
|
|
37
|
-
language
|
|
38
|
-
}];
|
|
39
|
-
}
|
|
40
|
-
switch (language) {
|
|
41
|
-
case "typescript":
|
|
42
|
-
case "javascript":
|
|
43
|
-
return this._chunkJS(filePath, lines, language);
|
|
44
|
-
case "python":
|
|
45
|
-
return this._chunkPython(filePath, lines, language);
|
|
46
|
-
default:
|
|
47
|
-
return this._chunkGeneric(filePath, lines, language);
|
|
48
|
-
}
|
|
49
|
-
}
|
|
50
|
-
// ── JS / TS Strategy ────────────────────────────
|
|
51
|
-
_chunkJS(filePath, lines, language) {
|
|
52
|
-
const chunks = [];
|
|
53
|
-
const funcRe = /^(?:export\s+)?(?:async\s+)?function\s+(\w+)/;
|
|
54
|
-
const constFuncRe = /^(?:export\s+)?(?:const|let|var)\s+(\w+)\s*=\s*(?:async\s+)?\(/;
|
|
55
|
-
const classRe = /^(?:export\s+)?(?:abstract\s+)?class\s+(\w+)/;
|
|
56
|
-
const arrowRe = /^(?:export\s+)?(?:const|let)\s+(\w+)\s*=\s*(?:async\s+)?\([^)]*\)\s*(?::\s*\S+)?\s*=>/;
|
|
57
|
-
const interfaceRe = /^(?:export\s+)?(?:interface|type)\s+(\w+)/;
|
|
58
|
-
let i = 0;
|
|
59
|
-
while (i < lines.length) {
|
|
60
|
-
const line = lines[i].trim();
|
|
61
|
-
const fm = line.match(funcRe) || line.match(constFuncRe) || line.match(arrowRe);
|
|
62
|
-
const cm = line.match(classRe);
|
|
63
|
-
const im = line.match(interfaceRe);
|
|
64
|
-
if (fm || cm || im) {
|
|
65
|
-
const name = fm?.[1] || cm?.[1] || im?.[1] || "default";
|
|
66
|
-
const type = cm ? "class" : im ? "interface" : "function";
|
|
67
|
-
const start = i;
|
|
68
|
-
const end = this._findBlockEnd(lines, i);
|
|
69
|
-
if (end - start >= this.MIN) {
|
|
70
|
-
if (end - start > this.MAX) {
|
|
71
|
-
chunks.push(...this._splitLarge(filePath, lines, start, end, name, type, language));
|
|
72
|
-
} else {
|
|
73
|
-
const content = lines.slice(start, end + 1).join("\n").trim();
|
|
74
|
-
chunks.push({
|
|
75
|
-
filePath,
|
|
76
|
-
chunkType: type,
|
|
77
|
-
name,
|
|
78
|
-
startLine: start + 1,
|
|
79
|
-
endLine: end + 1,
|
|
80
|
-
content,
|
|
81
|
-
language
|
|
82
|
-
});
|
|
83
|
-
}
|
|
84
|
-
i = end + 1;
|
|
85
|
-
continue;
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
i++;
|
|
89
|
-
}
|
|
90
|
-
if (chunks.length > 0) {
|
|
91
|
-
return chunks.filter((c) => c.content.length > 20);
|
|
92
|
-
}
|
|
93
|
-
return this._chunkGeneric(filePath, lines, language);
|
|
94
|
-
}
|
|
95
|
-
// ── Python Strategy ─────────────────────────────
|
|
96
|
-
_chunkPython(filePath, lines, language) {
|
|
97
|
-
const chunks = [];
|
|
98
|
-
let i = 0;
|
|
99
|
-
while (i < lines.length) {
|
|
100
|
-
const funcMatch = lines[i].match(/^(?:async\s+)?def\s+(\w+)/);
|
|
101
|
-
const classMatch = lines[i].match(/^class\s+(\w+)/);
|
|
102
|
-
if (funcMatch || classMatch) {
|
|
103
|
-
const name = funcMatch?.[1] || classMatch?.[1];
|
|
104
|
-
const type = classMatch ? "class" : "function";
|
|
105
|
-
const start = i;
|
|
106
|
-
const baseIndent = (lines[i].match(/^(\s*)/) ?? ["", ""])[1].length;
|
|
107
|
-
let end = i + 1;
|
|
108
|
-
while (end < lines.length) {
|
|
109
|
-
const line = lines[end];
|
|
110
|
-
if (line.trim() !== "") {
|
|
111
|
-
const indent = (line.match(/^(\s*)/) ?? ["", ""])[1].length;
|
|
112
|
-
if (indent <= baseIndent) break;
|
|
113
|
-
}
|
|
114
|
-
end++;
|
|
115
|
-
}
|
|
116
|
-
end = Math.min(end - 1, lines.length - 1);
|
|
117
|
-
if (end - start >= this.MIN) {
|
|
118
|
-
const content = lines.slice(start, end + 1).join("\n").trim();
|
|
119
|
-
chunks.push({
|
|
120
|
-
filePath,
|
|
121
|
-
chunkType: type,
|
|
122
|
-
name,
|
|
123
|
-
startLine: start + 1,
|
|
124
|
-
endLine: end + 1,
|
|
125
|
-
content,
|
|
126
|
-
language
|
|
127
|
-
});
|
|
128
|
-
}
|
|
129
|
-
i = end + 1;
|
|
130
|
-
continue;
|
|
131
|
-
}
|
|
132
|
-
i++;
|
|
133
|
-
}
|
|
134
|
-
return chunks.length > 0 ? chunks : this._chunkGeneric(filePath, lines, language);
|
|
135
|
-
}
|
|
136
|
-
// ── Generic Strategy (sliding window) ───────────
|
|
137
|
-
_chunkGeneric(filePath, lines, language) {
|
|
138
|
-
const chunks = [];
|
|
139
|
-
const step = this.MAX - this.OVERLAP;
|
|
140
|
-
for (let s = 0; s < lines.length; s += step) {
|
|
141
|
-
const e = Math.min(s + this.MAX, lines.length);
|
|
142
|
-
const content = lines.slice(s, e).join("\n").trim();
|
|
143
|
-
if (content.length > 20) {
|
|
144
|
-
chunks.push({
|
|
145
|
-
filePath,
|
|
146
|
-
chunkType: "block",
|
|
147
|
-
startLine: s + 1,
|
|
148
|
-
endLine: e,
|
|
149
|
-
content,
|
|
150
|
-
language
|
|
151
|
-
});
|
|
152
|
-
}
|
|
153
|
-
if (e >= lines.length) break;
|
|
154
|
-
}
|
|
155
|
-
return chunks;
|
|
156
|
-
}
|
|
157
|
-
// ── Block End Detection (brace balance) ─────────
|
|
158
|
-
_findBlockEnd(lines, start) {
|
|
159
|
-
let depth = 0;
|
|
160
|
-
let foundOpen = false;
|
|
161
|
-
for (let i = start; i < lines.length; i++) {
|
|
162
|
-
for (const c of lines[i]) {
|
|
163
|
-
if (c === "{") {
|
|
164
|
-
depth++;
|
|
165
|
-
foundOpen = true;
|
|
166
|
-
}
|
|
167
|
-
if (c === "}") depth--;
|
|
168
|
-
}
|
|
169
|
-
if (foundOpen && depth === 0) return i;
|
|
170
|
-
}
|
|
171
|
-
return Math.min(start + this.MAX, lines.length - 1);
|
|
172
|
-
}
|
|
173
|
-
// ── Split Large Blocks ──────────────────────────
|
|
174
|
-
_splitLarge(filePath, lines, start, end, name, type, language) {
|
|
175
|
-
const chunks = [];
|
|
176
|
-
const step = this.MAX - this.OVERLAP;
|
|
177
|
-
let part = 1;
|
|
178
|
-
for (let s = start; s <= end; s += step) {
|
|
179
|
-
const e = Math.min(s + this.MAX, end + 1);
|
|
180
|
-
const content = lines.slice(s, e).join("\n").trim();
|
|
181
|
-
chunks.push({
|
|
182
|
-
filePath,
|
|
183
|
-
chunkType: type,
|
|
184
|
-
name: `${name} (part ${part++})`,
|
|
185
|
-
startLine: s + 1,
|
|
186
|
-
endLine: e,
|
|
187
|
-
content,
|
|
188
|
-
language
|
|
189
|
-
});
|
|
190
|
-
if (e > end) break;
|
|
191
|
-
}
|
|
192
|
-
return chunks;
|
|
193
|
-
}
|
|
194
|
-
};
|
|
195
|
-
|
|
196
|
-
// src/indexers/languages.ts
|
|
197
|
-
import path from "path";
|
|
198
|
-
var SUPPORTED_EXTENSIONS = {
|
|
199
|
-
// TypeScript / JavaScript
|
|
200
|
-
".ts": "typescript",
|
|
201
|
-
".tsx": "typescript",
|
|
202
|
-
".js": "javascript",
|
|
203
|
-
".jsx": "javascript",
|
|
204
|
-
".mjs": "javascript",
|
|
205
|
-
".cjs": "javascript",
|
|
206
|
-
// Systems
|
|
207
|
-
".go": "go",
|
|
208
|
-
".rs": "rust",
|
|
209
|
-
".cpp": "cpp",
|
|
210
|
-
".cc": "cpp",
|
|
211
|
-
".c": "c",
|
|
212
|
-
".h": "c",
|
|
213
|
-
".hpp": "cpp",
|
|
214
|
-
// JVM
|
|
215
|
-
".java": "java",
|
|
216
|
-
".kt": "kotlin",
|
|
217
|
-
".scala": "scala",
|
|
218
|
-
// Scripting
|
|
219
|
-
".py": "python",
|
|
220
|
-
".rb": "ruby",
|
|
221
|
-
".php": "php",
|
|
222
|
-
".lua": "lua",
|
|
223
|
-
".sh": "bash",
|
|
224
|
-
".bash": "bash",
|
|
225
|
-
".zsh": "bash",
|
|
226
|
-
// Web
|
|
227
|
-
".html": "html",
|
|
228
|
-
".css": "css",
|
|
229
|
-
".scss": "scss",
|
|
230
|
-
".less": "less",
|
|
231
|
-
".svelte": "svelte",
|
|
232
|
-
".vue": "vue",
|
|
233
|
-
// Data / Config
|
|
234
|
-
".json": "json",
|
|
235
|
-
".yaml": "yaml",
|
|
236
|
-
".yml": "yaml",
|
|
237
|
-
".toml": "toml",
|
|
238
|
-
".xml": "xml",
|
|
239
|
-
".graphql": "graphql",
|
|
240
|
-
".gql": "graphql",
|
|
241
|
-
// Docs
|
|
242
|
-
".md": "markdown",
|
|
243
|
-
".mdx": "markdown",
|
|
244
|
-
// Database
|
|
245
|
-
".sql": "sql",
|
|
246
|
-
".prisma": "prisma",
|
|
247
|
-
// Other
|
|
248
|
-
".swift": "swift",
|
|
249
|
-
".dart": "dart",
|
|
250
|
-
".r": "r",
|
|
251
|
-
".ex": "elixir",
|
|
252
|
-
".exs": "elixir",
|
|
253
|
-
".erl": "erlang",
|
|
254
|
-
".zig": "zig"
|
|
255
|
-
};
|
|
256
|
-
var IGNORE_DIRS = /* @__PURE__ */ new Set([
|
|
257
|
-
// Package managers
|
|
258
|
-
"node_modules",
|
|
259
|
-
"bower_components",
|
|
260
|
-
".pnpm",
|
|
261
|
-
// Build output
|
|
262
|
-
"dist",
|
|
263
|
-
"build",
|
|
264
|
-
"out",
|
|
265
|
-
".next",
|
|
266
|
-
".nuxt",
|
|
267
|
-
".output",
|
|
268
|
-
".svelte-kit",
|
|
269
|
-
// Version control
|
|
270
|
-
".git",
|
|
271
|
-
".hg",
|
|
272
|
-
".svn",
|
|
273
|
-
// IDE / Editor
|
|
274
|
-
".idea",
|
|
275
|
-
".vscode",
|
|
276
|
-
// Runtime / Cache
|
|
277
|
-
"__pycache__",
|
|
278
|
-
".pytest_cache",
|
|
279
|
-
"venv",
|
|
280
|
-
".venv",
|
|
281
|
-
".env",
|
|
282
|
-
".tox",
|
|
283
|
-
// Coverage / Test artifacts
|
|
284
|
-
"coverage",
|
|
285
|
-
".nyc_output",
|
|
286
|
-
"htmlcov",
|
|
287
|
-
// Compiled
|
|
288
|
-
"target",
|
|
289
|
-
// Rust, Java
|
|
290
|
-
".cargo",
|
|
291
|
-
"vendor",
|
|
292
|
-
// Go, PHP
|
|
293
|
-
// AI / Model cache
|
|
294
|
-
".model-cache",
|
|
295
|
-
".brainbank",
|
|
296
|
-
// OS
|
|
297
|
-
".DS_Store"
|
|
298
|
-
]);
|
|
299
|
-
var IGNORE_FILES = /* @__PURE__ */ new Set([
|
|
300
|
-
"package-lock.json",
|
|
301
|
-
"yarn.lock",
|
|
302
|
-
"pnpm-lock.yaml",
|
|
303
|
-
"bun.lockb",
|
|
304
|
-
"Cargo.lock",
|
|
305
|
-
"Gemfile.lock",
|
|
306
|
-
"poetry.lock",
|
|
307
|
-
"composer.lock",
|
|
308
|
-
"go.sum"
|
|
309
|
-
]);
|
|
310
|
-
function isSupported(filePath) {
|
|
311
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
312
|
-
return ext in SUPPORTED_EXTENSIONS;
|
|
313
|
-
}
|
|
314
|
-
__name(isSupported, "isSupported");
|
|
315
|
-
function getLanguage(filePath) {
|
|
316
|
-
const ext = path.extname(filePath).toLowerCase();
|
|
317
|
-
return SUPPORTED_EXTENSIONS[ext];
|
|
318
|
-
}
|
|
319
|
-
__name(getLanguage, "getLanguage");
|
|
320
|
-
function isIgnoredDir(dirName) {
|
|
321
|
-
return IGNORE_DIRS.has(dirName) || dirName.startsWith(".");
|
|
322
|
-
}
|
|
323
|
-
__name(isIgnoredDir, "isIgnoredDir");
|
|
324
|
-
function isIgnoredFile(fileName) {
|
|
325
|
-
return IGNORE_FILES.has(fileName);
|
|
326
|
-
}
|
|
327
|
-
__name(isIgnoredFile, "isIgnoredFile");
|
|
328
|
-
|
|
329
|
-
// src/indexers/code-indexer.ts
|
|
330
|
-
var CodeIndexer = class {
|
|
331
|
-
static {
|
|
332
|
-
__name(this, "CodeIndexer");
|
|
333
|
-
}
|
|
334
|
-
_chunker = new CodeChunker();
|
|
335
|
-
_deps;
|
|
336
|
-
_repoPath;
|
|
337
|
-
_maxFileSize;
|
|
338
|
-
constructor(repoPath, deps, maxFileSize = 512e3) {
|
|
339
|
-
this._deps = deps;
|
|
340
|
-
this._repoPath = repoPath;
|
|
341
|
-
this._maxFileSize = maxFileSize;
|
|
342
|
-
}
|
|
343
|
-
/**
|
|
344
|
-
* Index all supported files in the repository.
|
|
345
|
-
* Skips unchanged files (same content hash).
|
|
346
|
-
*/
|
|
347
|
-
async index(options = {}) {
|
|
348
|
-
const { forceReindex = false, onProgress } = options;
|
|
349
|
-
const files = this._walkRepo(this._repoPath);
|
|
350
|
-
let indexed = 0, skipped = 0, totalChunks = 0;
|
|
351
|
-
for (let i = 0; i < files.length; i++) {
|
|
352
|
-
const filePath = files[i];
|
|
353
|
-
const rel = path2.relative(this._repoPath, filePath);
|
|
354
|
-
onProgress?.(rel, i + 1, files.length);
|
|
355
|
-
let content;
|
|
356
|
-
try {
|
|
357
|
-
content = fs.readFileSync(filePath, "utf-8");
|
|
358
|
-
} catch {
|
|
359
|
-
continue;
|
|
360
|
-
}
|
|
361
|
-
const hash = this._hash(content);
|
|
362
|
-
const existing = this._deps.db.prepare(
|
|
363
|
-
"SELECT file_hash FROM indexed_files WHERE file_path = ?"
|
|
364
|
-
).get(rel);
|
|
365
|
-
if (!forceReindex && existing?.file_hash === hash) {
|
|
366
|
-
skipped++;
|
|
367
|
-
continue;
|
|
368
|
-
}
|
|
369
|
-
if (existing) {
|
|
370
|
-
this._deps.db.prepare("DELETE FROM code_chunks WHERE file_path = ?").run(rel);
|
|
371
|
-
}
|
|
372
|
-
const ext = path2.extname(filePath).toLowerCase();
|
|
373
|
-
const language = SUPPORTED_EXTENSIONS[ext] ?? "text";
|
|
374
|
-
const chunks = this._chunker.chunk(rel, content, language);
|
|
375
|
-
for (const chunk of chunks) {
|
|
376
|
-
const text = [
|
|
377
|
-
`File: ${rel}`,
|
|
378
|
-
chunk.name ? `${chunk.chunkType}: ${chunk.name}` : chunk.chunkType,
|
|
379
|
-
chunk.content
|
|
380
|
-
].join("\n");
|
|
381
|
-
const vec = await this._deps.embedding.embed(text);
|
|
382
|
-
const result = this._deps.db.prepare(
|
|
383
|
-
`INSERT INTO code_chunks (file_path, chunk_type, name, start_line, end_line, content, language, file_hash)
|
|
384
|
-
VALUES (?, ?, ?, ?, ?, ?, ?, ?)`
|
|
385
|
-
).run(rel, chunk.chunkType, chunk.name ?? null, chunk.startLine, chunk.endLine, chunk.content, language, hash);
|
|
386
|
-
const id = Number(result.lastInsertRowid);
|
|
387
|
-
this._deps.db.prepare(
|
|
388
|
-
"INSERT INTO code_vectors (chunk_id, embedding) VALUES (?, ?)"
|
|
389
|
-
).run(id, Buffer.from(vec.buffer));
|
|
390
|
-
this._deps.hnsw.add(vec, id);
|
|
391
|
-
this._deps.vectorCache.set(id, vec);
|
|
392
|
-
totalChunks++;
|
|
393
|
-
}
|
|
394
|
-
this._deps.db.prepare(
|
|
395
|
-
"INSERT OR REPLACE INTO indexed_files (file_path, file_hash) VALUES (?, ?)"
|
|
396
|
-
).run(rel, hash);
|
|
397
|
-
indexed++;
|
|
398
|
-
}
|
|
399
|
-
return { indexed, skipped, chunks: totalChunks };
|
|
400
|
-
}
|
|
401
|
-
// ── File Walker ─────────────────────────────────
|
|
402
|
-
_walkRepo(dir, files = []) {
|
|
403
|
-
let entries;
|
|
404
|
-
try {
|
|
405
|
-
entries = fs.readdirSync(dir, { withFileTypes: true });
|
|
406
|
-
} catch {
|
|
407
|
-
return files;
|
|
408
|
-
}
|
|
409
|
-
for (const entry of entries) {
|
|
410
|
-
if (entry.isDirectory()) {
|
|
411
|
-
if (isIgnoredDir(entry.name)) continue;
|
|
412
|
-
this._walkRepo(path2.join(dir, entry.name), files);
|
|
413
|
-
} else if (entry.isFile()) {
|
|
414
|
-
if (isIgnoredFile(entry.name)) continue;
|
|
415
|
-
const ext = path2.extname(entry.name).toLowerCase();
|
|
416
|
-
if (!(ext in SUPPORTED_EXTENSIONS)) continue;
|
|
417
|
-
const full = path2.join(dir, entry.name);
|
|
418
|
-
try {
|
|
419
|
-
if (fs.statSync(full).size <= this._maxFileSize) {
|
|
420
|
-
files.push(full);
|
|
421
|
-
}
|
|
422
|
-
} catch {
|
|
423
|
-
}
|
|
424
|
-
}
|
|
425
|
-
}
|
|
426
|
-
return files;
|
|
427
|
-
}
|
|
428
|
-
// ── FNV-1a Hash ─────────────────────────────────
|
|
429
|
-
_hash(content) {
|
|
430
|
-
let h = 2166136261;
|
|
431
|
-
for (let i = 0; i < content.length; i++) {
|
|
432
|
-
h ^= content.charCodeAt(i);
|
|
433
|
-
h = h * 16777619 >>> 0;
|
|
434
|
-
}
|
|
435
|
-
return h.toString(16);
|
|
436
|
-
}
|
|
437
|
-
};
|
|
438
|
-
|
|
439
|
-
// src/plugins/code.ts
|
|
440
|
-
var CodeModuleImpl = class {
|
|
441
|
-
constructor(opts = {}) {
|
|
442
|
-
this.opts = opts;
|
|
443
|
-
this.name = opts.name ?? "code";
|
|
444
|
-
}
|
|
445
|
-
static {
|
|
446
|
-
__name(this, "CodeModuleImpl");
|
|
447
|
-
}
|
|
448
|
-
name;
|
|
449
|
-
hnsw;
|
|
450
|
-
indexer;
|
|
451
|
-
vecCache = /* @__PURE__ */ new Map();
|
|
452
|
-
async initialize(ctx) {
|
|
453
|
-
const shared = await ctx.getOrCreateSharedHnsw("code");
|
|
454
|
-
this.hnsw = shared.hnsw;
|
|
455
|
-
this.vecCache = shared.vecCache;
|
|
456
|
-
if (shared.isNew) {
|
|
457
|
-
ctx.loadVectors("code_vectors", "chunk_id", this.hnsw, this.vecCache);
|
|
458
|
-
}
|
|
459
|
-
const repoPath = this.opts.repoPath ?? ctx.config.repoPath;
|
|
460
|
-
this.indexer = new CodeIndexer(repoPath, {
|
|
461
|
-
db: ctx.db,
|
|
462
|
-
hnsw: this.hnsw,
|
|
463
|
-
vectorCache: this.vecCache,
|
|
464
|
-
embedding: ctx.embedding
|
|
465
|
-
}, this.opts.maxFileSize ?? ctx.config.maxFileSize);
|
|
466
|
-
}
|
|
467
|
-
async index(options = {}) {
|
|
468
|
-
return this.indexer.index(options);
|
|
469
|
-
}
|
|
470
|
-
stats() {
|
|
471
|
-
return { hnswSize: this.hnsw.size };
|
|
472
|
-
}
|
|
473
|
-
};
|
|
474
|
-
function code(opts) {
|
|
475
|
-
return new CodeModuleImpl(opts);
|
|
476
|
-
}
|
|
477
|
-
__name(code, "code");
|
|
478
|
-
|
|
479
|
-
export {
|
|
480
|
-
SUPPORTED_EXTENSIONS,
|
|
481
|
-
IGNORE_DIRS,
|
|
482
|
-
isSupported,
|
|
483
|
-
getLanguage,
|
|
484
|
-
isIgnoredDir,
|
|
485
|
-
isIgnoredFile,
|
|
486
|
-
CodeChunker,
|
|
487
|
-
CodeIndexer,
|
|
488
|
-
code
|
|
489
|
-
};
|
|
490
|
-
//# sourceMappingURL=chunk-EDKSKLX4.js.map
|