@csuwl/opencode-memory-plugin 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,456 @@
1
+ /**
2
+ * Search Mode Implementations for OpenCode Memory Plugin
3
+ *
4
+ * This file contains the implementation for different search modes:
5
+ * - hybrid: Vector + BM25 (best quality)
6
+ * - vector: Vector-only (semantic search)
7
+ * - bm25: BM25-only (keyword search)
8
+ * - hash: Hash-based embeddings (fallback)
9
+ */
10
+
11
+ import path from "path"
12
+ import Database from "better-sqlite3"
13
+ import { readFile, exists } from "fs/promises"
14
+ import { cosineSimilarity } from "./vector-memory"
15
+ import { loadConfig, type MemoryConfig } from "./config"
16
+
17
+ const VECTOR_DB_PATH = path.join(process.env.HOME || "", ".opencode", "memory", "vector-index.db")
18
+ const MEMORY_DIR = path.join(process.env.HOME || "", ".opencode", "memory")
19
+
20
+ /**
21
+ * BM25-only search (fast keyword matching)
22
+ */
23
+ export async function bm25OnlySearch(
24
+ args: any,
25
+ limit: number,
26
+ config: MemoryConfig
27
+ ): Promise<string> {
28
+ await ensureVectorIndex()
29
+
30
+ const daysToSearch = args.days || 7
31
+ const query = args.query
32
+
33
+ const db = new Database(VECTOR_DB_PATH, { readonly: true })
34
+
35
+ try {
36
+ const filesToSearch = getFilesToSearch(args, daysToSearch, config)
37
+
38
+ if (filesToSearch.length === 0) {
39
+ return "No files found matching the specified scope."
40
+ }
41
+
42
+ const results: {
43
+ file: string
44
+ snippet: string
45
+ score: number
46
+ line_start: number
47
+ line_end: number
48
+ }[] = []
49
+
50
+ // Pure BM25 search (FTS5)
51
+ const ftsResults = db
52
+ .prepare(
53
+ `
54
+ SELECT memory_chunks.file_path, memory_chunks.chunk, memory_chunks.line_start, memory_chunks.line_end, bm25(memory_fts) as bm25_score
55
+ FROM memory_fts
56
+ JOIN memory_chunks ON memory_chunks.id = memory_fts.rowid
57
+ WHERE memory_fts MATCH ? AND memory_chunks.file_path IN (${filesToSearch
58
+ .map(() => "?")
59
+ .join(", ")})
60
+ ORDER BY bm25_score
61
+ LIMIT ?
62
+ `
63
+ )
64
+ .all(query, ...filesToSearch, limit)
65
+
66
+ for (const row of ftsResults) {
67
+ results.push({
68
+ file: path.basename(row.file_path as string),
69
+ snippet: (row.chunk as string).substring(0, 700),
70
+ score: 1.0, // BM25 already ranked
71
+ line_start: row.line_start as number,
72
+ line_end: row.line_end as number
73
+ })
74
+ }
75
+
76
+ return formatResults(results, query, "BM25 Keyword Search")
77
+ } finally {
78
+ db.close()
79
+ }
80
+ }
81
+
82
+ /**
83
+ * Vector-only search (pure semantic search)
84
+ */
85
+ export async function vectorOnlySearch(
86
+ args: any,
87
+ limit: number,
88
+ config: MemoryConfig
89
+ ): Promise<string> {
90
+ if (!config.embedding.enabled) {
91
+ return "Error: Vector search requires embeddings to be enabled. Set embedding.enabled=true in config or use search_mode='bm25'."
92
+ }
93
+
94
+ await ensureVectorIndex()
95
+ const daysToSearch = args.days || 7
96
+ const query = args.query
97
+
98
+ const db = new Database(VECTOR_DB_PATH, { readonly: true })
99
+
100
+ try {
101
+ const chunkCount = db.prepare("SELECT COUNT(*) as count FROM memory_chunks").get() as {
102
+ count: number
103
+ }
104
+
105
+ if (chunkCount.count === 0) {
106
+ db.close()
107
+ return "Vector index is empty. Write some memories first, then try searching."
108
+ }
109
+
110
+ // Import getEmbedding dynamically to avoid circular dependency
111
+ const { getEmbedding } = await import("./vector-memory")
112
+ const queryEmbedding = await getEmbedding(query)
113
+
114
+ const filesToSearch = getFilesToSearch(args, daysToSearch, config)
115
+ const results: {
116
+ file: string
117
+ snippet: string
118
+ score: number
119
+ line_start: number
120
+ line_end: number
121
+ }[] = []
122
+
123
+ // Pure vector similarity search
124
+ const vectorResults = db
125
+ .prepare(
126
+ `
127
+ SELECT file_path, chunk, line_start, line_end, embedding
128
+ FROM memory_chunks
129
+ WHERE file_path IN (${filesToSearch.map(() => "?").join(", ")})
130
+ `
131
+ )
132
+ .all(...filesToSearch)
133
+
134
+ for (const row of vectorResults) {
135
+ const similarity = cosineSimilarity(queryEmbedding, JSON.parse(row.embedding as string))
136
+ results.push({
137
+ file: path.basename(row.file_path as string),
138
+ snippet: (row.chunk as string).substring(0, 700),
139
+ score: similarity,
140
+ line_start: row.line_start as number,
141
+ line_end: row.line_end as number
142
+ })
143
+ }
144
+
145
+ results.sort((a, b) => b.score - a.score)
146
+ const topResults = results.slice(0, limit)
147
+
148
+ return formatResults(topResults, query, "Vector Semantic Search")
149
+ } finally {
150
+ db.close()
151
+ }
152
+ }
153
+
154
+ /**
155
+ * Hash-based search (emergency fallback, no model needed)
156
+ */
157
+ export async function hashOnlySearch(
158
+ args: any,
159
+ limit: number,
160
+ config: MemoryConfig
161
+ ): Promise<string> {
162
+ await ensureVectorIndex()
163
+ const daysToSearch = args.days || 7
164
+ const query = args.query
165
+
166
+ const db = new Database(VECTOR_DB_PATH, { readonly: true })
167
+
168
+ try {
169
+ const chunkCount = db.prepare("SELECT COUNT(*) as count FROM memory_chunks").get() as {
170
+ count: number
171
+ }
172
+
173
+ if (chunkCount.count === 0) {
174
+ db.close()
175
+ return "Vector index is empty. Write some memories first, then try searching."
176
+ }
177
+
178
+ // Use hash-based embedding (no model)
179
+ const modelInfo = config.models.available["Xenova/all-MiniLM-L6-v2"]
180
+ const dimensions = modelInfo?.dimensions || 384
181
+
182
+ const words = query.toLowerCase().split(/\s+/)
183
+ const queryEmbedding: number[] = []
184
+
185
+ for (let i = 0; i < dimensions; i++) {
186
+ let hash = 0
187
+ for (const word of words) {
188
+ for (let k = 0; k < word.length; k++) {
189
+ hash = ((hash << 5) - hash) + word.charCodeAt(k)
190
+ hash |= 0
191
+ }
192
+ }
193
+ queryEmbedding.push((hash % 1000) / 1000)
194
+ }
195
+
196
+ const filesToSearch = getFilesToSearch(args, daysToSearch, config)
197
+ const results: {
198
+ file: string
199
+ snippet: string
200
+ score: number
201
+ line_start: number
202
+ line_end: number
203
+ }[] = []
204
+
205
+ const vectorResults = db
206
+ .prepare(
207
+ `
208
+ SELECT file_path, chunk, line_start, line_end, embedding
209
+ FROM memory_chunks
210
+ WHERE file_path IN (${filesToSearch.map(() => "?").join(", ")})
211
+ `
212
+ )
213
+ .all(...filesToSearch)
214
+
215
+ for (const row of vectorResults) {
216
+ const similarity = cosineSimilarity(queryEmbedding, JSON.parse(row.embedding as string))
217
+ results.push({
218
+ file: path.basename(row.file_path as string),
219
+ snippet: (row.chunk as string).substring(0, 700),
220
+ score: similarity,
221
+ line_start: row.line_start as number,
222
+ line_end: row.line_end as number
223
+ })
224
+ }
225
+
226
+ results.sort((a, b) => b.score - a.score)
227
+ const topResults = results.slice(0, limit)
228
+
229
+ return formatResults(topResults, query, "Hash-based Search (low quality)")
230
+ } finally {
231
+ db.close()
232
+ }
233
+ }
234
+
235
+ /**
236
+ * Hybrid search (vector + BM25, best quality)
237
+ */
238
+ export async function hybridSearch(
239
+ args: any,
240
+ limit: number,
241
+ config: MemoryConfig
242
+ ): Promise<string> {
243
+ const useHybrid = args.hybrid !== false
244
+ const vectorWeight = config.search.options.hybrid?.vectorWeight || 0.7
245
+ const bm25Weight = config.search.options.hybrid?.bm25Weight || 0.3
246
+
247
+ if (!config.embedding.enabled) {
248
+ console.warn("Embeddings disabled, falling back to BM25-only search")
249
+ return bm25OnlySearch(args, limit, config)
250
+ }
251
+
252
+ await ensureVectorIndex()
253
+ const daysToSearch = args.days || 7
254
+ const query = args.query
255
+
256
+ const db = new Database(VECTOR_DB_PATH, { readonly: true })
257
+
258
+ try {
259
+ const chunkCount = db.prepare("SELECT COUNT(*) as count FROM memory_chunks").get() as {
260
+ count: number
261
+ }
262
+
263
+ if (chunkCount.count === 0) {
264
+ db.close()
265
+ return "Vector index is empty. Write some memories first, then try searching."
266
+ }
267
+
268
+ // Import getEmbedding dynamically
269
+ const { getEmbedding } = await import("./vector-memory")
270
+ const queryEmbedding = await getEmbedding(query)
271
+
272
+ const filesToSearch = getFilesToSearch(args, daysToSearch, config)
273
+ const results: {
274
+ file: string
275
+ snippet: string
276
+ score: number
277
+ line_start: number
278
+ line_end: number
279
+ }[] = []
280
+
281
+ // Vector similarity search
282
+ const vectorResults = db
283
+ .prepare(
284
+ `
285
+ SELECT file_path, chunk, line_start, line_end, embedding
286
+ FROM memory_chunks
287
+ WHERE file_path IN (${filesToSearch.map(() => "?").join(", ")})
288
+ `
289
+ )
290
+ .all(...filesToSearch)
291
+
292
+ for (const row of vectorResults) {
293
+ const similarity = cosineSimilarity(queryEmbedding, JSON.parse(row.embedding as string))
294
+ results.push({
295
+ file: path.basename(row.file_path as string),
296
+ snippet: (row.chunk as string).substring(0, 700),
297
+ score: similarity,
298
+ line_start: row.line_start as number,
299
+ line_end: row.line_end as number
300
+ })
301
+ }
302
+
303
+ // Hybrid search with BM25 if enabled
304
+ if (useHybrid) {
305
+ const ftsResults = db
306
+ .prepare(
307
+ `
308
+ SELECT memory_chunks.file_path, memory_chunks.chunk, memory_chunks.line_start, memory_chunks.line_end, bm25(memory_fts) as bm25_score
309
+ FROM memory_fts
310
+ JOIN memory_chunks ON memory_chunks.id = memory_fts.rowid
311
+ WHERE memory_fts MATCH ? AND memory_chunks.file_path IN (${filesToSearch
312
+ .map(() => "?")
313
+ .join(", ")})
314
+ ORDER BY bm25_score
315
+ LIMIT ${limit * 2}
316
+ `
317
+ )
318
+ .all(query, ...filesToSearch)
319
+
320
+ const vectorScores = new Map<string, number>()
321
+ for (const r of results) {
322
+ vectorScores.set(`${r.file}:${r.line_start}`, r.score)
323
+ }
324
+
325
+ for (const row of ftsResults) {
326
+ const key = `${path.basename(row.file_path as string)}:${row.line_start as number}`
327
+ const vectorScore = vectorScores.get(key) || 0
328
+ const bm25Score = 1 / (1 + (row.bm25_score as number))
329
+
330
+ const hybridScore = vectorWeight * vectorScore + bm25Weight * bm25Score
331
+
332
+ const existingIndex = results.findIndex(
333
+ (r) => r.file === path.basename(row.file_path as string) && r.line_start === row.line_start
334
+ )
335
+ if (existingIndex >= 0) {
336
+ if (hybridScore > results[existingIndex].score) {
337
+ results[existingIndex].score = hybridScore
338
+ }
339
+ } else if (hybridScore > 0.3) {
340
+ results.push({
341
+ file: path.basename(row.file_path as string),
342
+ snippet: (row.chunk as string).substring(0, 700),
343
+ score: hybridScore,
344
+ line_start: row.line_start as number,
345
+ line_end: row.line_end as number
346
+ })
347
+ }
348
+ }
349
+ }
350
+
351
+ results.sort((a, b) => b.score - a.score)
352
+ const topResults = results.slice(0, limit)
353
+
354
+ const searchType = useHybrid
355
+ ? `Hybrid Search (${Math.round(vectorWeight * 100)}% vector + ${Math.round(bm25Weight * 100)}% BM25)`
356
+ : "Vector Search"
357
+
358
+ return formatResults(topResults, query, searchType)
359
+ } finally {
360
+ db.close()
361
+ }
362
+ }
363
+
364
+ /**
365
+ * Get list of files to search based on scope
366
+ */
367
+ export function getFilesToSearch(args: any, daysToSearch: number, config: MemoryConfig): string[] {
368
+ const filesToSearch: string[] = []
369
+
370
+ if (args.scope === "all" || args.scope === "long-term") {
371
+ filesToSearch.push(path.join(MEMORY_DIR, "MEMORY.md"))
372
+ }
373
+ if (args.scope === "all" || args.scope === "preference") {
374
+ filesToSearch.push(path.join(MEMORY_DIR, "PREFERENCES.md"))
375
+ }
376
+ if (args.scope === "all" || args.scope === "personality") {
377
+ filesToSearch.push(path.join(MEMORY_DIR, "SOUL.md"))
378
+ }
379
+ if (args.scope === "all" || args.scope === "context") {
380
+ filesToSearch.push(path.join(MEMORY_DIR, "CONTEXT.md"))
381
+ }
382
+ if (args.scope === "all" || args.scope === "tools") {
383
+ filesToSearch.push(path.join(MEMORY_DIR, "TOOLS.md"))
384
+ }
385
+ if (args.scope === "all" || args.scope === "identity") {
386
+ filesToSearch.push(path.join(MEMORY_DIR, "IDENTITY.md"))
387
+ }
388
+ if (args.scope === "all" || args.scope === "user") {
389
+ filesToSearch.push(path.join(MEMORY_DIR, "USER.md"))
390
+ }
391
+ if (args.scope === "all" || args.scope === "daily") {
392
+ const dailyDir = path.join(MEMORY_DIR, "daily")
393
+ for (let i = 0; i < daysToSearch; i++) {
394
+ const date = new Date()
395
+ date.setDate(date.getDate() - i)
396
+ const dateStr = date.toISOString().split("T")[0]
397
+ filesToSearch.push(path.join(dailyDir, `${dateStr}.md`))
398
+ }
399
+ }
400
+
401
+ return filesToSearch
402
+ }
403
+
404
+ /**
405
+ * Format search results for display
406
+ */
407
+ export function formatResults(results: any[], query: string, searchType: string): string {
408
+ if (results.length === 0) {
409
+ return `No relevant memories found for: "${query}"\n\nTry different keywords or check if memories have been indexed.`
410
+ }
411
+
412
+ let output = `🔍 ${searchType} Results for: "${query}"\n\n`
413
+ for (const r of results) {
414
+ const scorePercent = (r.score * 100).toFixed(1)
415
+ output += `### ${r.file} (Lines ${r.line_start}-${r.line_end})\n`
416
+ output += `**Relevance**: ${scorePercent}%\n`
417
+ output += `${r.snippet}${r.snippet.length >= 700 ? "..." : ""}\n\n`
418
+ }
419
+
420
+ return output.trim()
421
+ }
422
+
423
+ /**
424
+ * Ensure vector index exists
425
+ */
426
+ async function ensureVectorIndex(): Promise<void> {
427
+ const { ensureDir } = await import("fs/promises")
428
+ await ensureDir(MEMORY_DIR)
429
+
430
+ const db = new Database(VECTOR_DB_PATH)
431
+
432
+ try {
433
+ db.exec(`
434
+ CREATE TABLE IF NOT EXISTS memory_chunks (
435
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
436
+ file_path TEXT NOT NULL,
437
+ chunk TEXT NOT NULL,
438
+ line_start INTEGER NOT NULL,
439
+ line_end INTEGER NOT NULL,
440
+ embedding BLOB,
441
+ created_at TEXT DEFAULT CURRENT_TIMESTAMP,
442
+ metadata TEXT
443
+ )
444
+ `)
445
+
446
+ db.exec(`
447
+ CREATE VIRTUAL TABLE IF NOT EXISTS memory_fts USING fts5(
448
+ chunk,
449
+ content='memory_chunks',
450
+ content_rowid='id'
451
+ )
452
+ `)
453
+ } finally {
454
+ db.close()
455
+ }
456
+ }
@@ -1,4 +1,32 @@
1
1
  import { tool } from "@opencode-ai/plugin"
2
+ import path from "path"
3
+ import { readFile, exists, mkdir } from "fs/promises"
4
+ import Database from "better-sqlite3"
5
+ import { pipeline, env } from "@huggingface/transformers"
6
+ import {
7
+ loadConfig,
8
+ getSearchMode,
9
+ getEmbeddingModel,
10
+ isEmbeddingEnabled,
11
+ getConfig
12
+ } from "./config"
13
+
14
+ // Configure Transformers.js for local use (no external calls)
15
+ env.allowLocalModels = true
16
+ env.allowRemoteModels = true
17
+ env.useBrowserCache = false
18
+ import path from "path"
19
+ import { readFile, exists, mkdir } from "fs/promises"
20
+ import Database from "better-sqlite3"
21
+ import { pipeline, env } from "@huggingface/transformers"
22
+
23
+ // Configure Transformers.js for local use (no external calls)
24
+ env.allowLocalModels = true
25
+ env.allowRemoteModels = true
26
+ env.useBrowserCache = false
27
+ // Silence transformers.js warnings in production
28
+ // env.disableLogging = false // Keep logging for debugging
29
+
2
30
  import path from "path"
3
31
  import { readFile, exists, mkdir } from "fs/promises"
4
32
  import Database from "better-sqlite3"
@@ -47,29 +75,124 @@ async function ensureVectorIndex() {
47
75
  db.close()
48
76
  }
49
77
 
78
+ // Helper: Initialize embedding model (lazy load)
79
+ let embeddingModel: any = null
80
+ let currentModelName: string | null = null
81
+
82
+ async function ensureEmbeddingModel() {
83
+ // Check if embeddings are enabled
84
+ if (!(await isEmbeddingEnabled())) {
85
+ throw new Error("Embeddings are disabled in configuration")
86
+ }
87
+
88
+ // Get configured model
89
+ const modelName = await getEmbeddingModel()
90
+
91
+ // Return if model already loaded
92
+ if (embeddingModelReady && currentModelName === modelName) {
93
+ return
94
+ }
95
+
96
+ // Reload if different model
97
+ if (currentModelName && currentModelName !== modelName) {
98
+ console.log(`Switching embedding model: ${currentModelName} → ${modelName}`)
99
+ embeddingModel = null
100
+ embeddingModelReady = false
101
+ }
102
+
103
+ try {
104
+ console.log(`Loading embedding model: ${modelName}`)
105
+
106
+ // Load the embedding model (configurable)
107
+ embeddingModel = await pipeline('feature-extraction', modelName, {
108
+ progress_callback: (progress: any) => {
109
+ // Only log significant progress to avoid spam
110
+ if (progress.status === 'downloading' && progress.progress !== undefined) {
111
+ if (Math.floor(progress.progress * 100) % 25 === 0) {
112
+ console.log(` Downloading model: ${Math.floor(progress.progress * 100)}%`)
113
+ }
114
+ }
115
+ }
116
+ })
117
+
118
+ currentModelName = modelName
119
+ embeddingModelReady = true
120
+ console.log(`✓ Model loaded: ${modelName}`)
121
+ } catch (error) {
122
+ console.error('Failed to load embedding model:', error)
123
+ throw error
124
+ }
125
+ }
126
+
50
127
  // Helper: Get text embedding using local model
51
128
  async function getEmbedding(text: string): Promise<number[]> {
52
- // This would use node-llama-cpp for local embeddings
53
- // For now, return a simple hash-based embedding as fallback
54
- // TODO: Integrate with node-llama-cpp
55
-
56
- const words = text.toLowerCase().split(/\s+/)
57
- const embedding: number[] = []
129
+ // Check if we should use embeddings
130
+ if (!(await isEmbeddingEnabled())) {
131
+ throw new Error("Embeddings are disabled")
132
+ }
58
133
 
59
- // Create a simple 384-dimension embedding (typical for small models)
60
- for (let i = 0; i < 384; i++) {
61
- let hash = 0
62
- for (let j = 0; j < words.length; j++) {
63
- const word = words[j]
64
- for (let k = 0; k < word.length; k++) {
65
- hash = ((hash << 5) - hash) + word.charCodeAt(k)
66
- hash |= 0
134
+ try {
135
+ await ensureEmbeddingModel()
136
+
137
+ // Get model info to determine dimensions
138
+ const config = await getConfig()
139
+ const modelInfo = config.models.available[currentModelName!]
140
+ const dimensions = modelInfo?.dimensions || 384
141
+
142
+ // Generate embedding using Transformers.js
143
+ const output = await embeddingModel(text, {
144
+ pooling: 'mean',
145
+ normalize: true
146
+ })
147
+
148
+ // Convert Tensor to number array
149
+ const embedding = Array.from(output.data as Float32Array)
150
+
151
+ return embedding
152
+ } catch (error) {
153
+ console.error('Embedding generation failed, using fallback:', error)
154
+
155
+ // Get fallback mode from config
156
+ const config = await getConfig()
157
+ const fallbackMode = config.embedding.fallbackMode
158
+
159
+ if (fallbackMode === 'error') {
160
+ throw new Error(`Embedding generation failed: ${error}`)
161
+ }
162
+
163
+ if (fallbackMode === 'bm25') {
164
+ throw new Error('BM25_FALLBACK') // Signal to use BM25-only search
165
+ }
166
+
167
+ // Default: hash-based fallback
168
+ const config = await getConfig()
169
+ const modelInfo = config.models.available[currentModelName!]
170
+ const dimensions = modelInfo?.dimensions || 384
171
+
172
+ const words = text.toLowerCase().split(/\s+/)
173
+ const fallbackEmbedding: number[] = []
174
+
175
+ for (let i = 0; i < dimensions; i++) {
176
+ let hash = 0
177
+ for (const word of words) {
178
+ for (let k = 0; k < word.length; k++) {
179
+ hash = ((hash << 5) - hash) + word.charCodeAt(k)
180
+ hash |= 0
181
+ }
67
182
  }
183
+ fallbackEmbedding.push((hash % 1000) / 1000)
68
184
  }
69
- embedding.push((hash % 1000) / 1000) // Normalize to 0-1
185
+
186
+ return fallbackEmbedding
70
187
  }
71
-
72
- return embedding
188
+ }
189
+
190
+ // Helper: Initialize embedding model asynchronously (call during startup)
191
+ export async function initEmbeddingModel() {
192
+ // Pre-load the model in the background
193
+ ensureEmbeddingModel().catch(err => {
194
+ console.warn('Failed to pre-load embedding model:', err)
195
+ })
73
196
  }
74
197
 
75
198
  // Helper: Split text into chunks