claude-brain 0.30.2 → 0.30.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/README.md +241 -191
  2. package/VERSION +1 -1
  3. package/assets/CLAUDE-unified.md +11 -11
  4. package/assets/CLAUDE.md +29 -29
  5. package/package.json +7 -3
  6. package/packs/backend/node.json +173 -173
  7. package/packs/core/javascript.json +176 -176
  8. package/packs/core/typescript.json +222 -222
  9. package/packs/frontend/react.json +254 -254
  10. package/packs/meta/testing.json +172 -172
  11. package/scripts/postinstall.mjs +531 -531
  12. package/src/automation/decision-detector.ts +452 -452
  13. package/src/automation/phase12-manager.ts +456 -456
  14. package/src/automation/proactive-recall.ts +373 -373
  15. package/src/automation/project-detector.ts +310 -310
  16. package/src/automation/repo-scanner.ts +210 -205
  17. package/src/cli/auto-setup.ts +75 -75
  18. package/src/cli/auto-start.ts +266 -266
  19. package/src/cli/bin.ts +264 -264
  20. package/src/cli/commands/autostart.ts +90 -90
  21. package/src/cli/commands/chroma.ts +578 -577
  22. package/src/cli/commands/export-training.ts +70 -70
  23. package/src/cli/commands/export.ts +130 -130
  24. package/src/cli/commands/git-hook.ts +183 -183
  25. package/src/cli/commands/hooks.ts +217 -217
  26. package/src/cli/commands/init.ts +123 -123
  27. package/src/cli/commands/install-mcp.ts +122 -111
  28. package/src/cli/commands/models.ts +979 -979
  29. package/src/cli/commands/pack.ts +200 -200
  30. package/src/cli/commands/refresh.ts +344 -339
  31. package/src/cli/commands/reindex.ts +120 -120
  32. package/src/cli/commands/serve.ts +466 -463
  33. package/src/cli/commands/start.ts +44 -44
  34. package/src/cli/commands/status.ts +220 -203
  35. package/src/cli/commands/uninstall-mcp.ts +45 -41
  36. package/src/cli/commands/update.ts +130 -124
  37. package/src/cli/migrate-chroma.ts +106 -106
  38. package/src/cli/ui/animations.ts +80 -80
  39. package/src/cli/ui/components.ts +82 -82
  40. package/src/cli/ui/index.ts +4 -4
  41. package/src/cli/ui/logo.ts +36 -36
  42. package/src/cli/ui/theme.ts +55 -55
  43. package/src/code-intelligence/indexer.ts +352 -352
  44. package/src/code-intelligence/linker.ts +178 -178
  45. package/src/code-intelligence/parser.ts +484 -484
  46. package/src/code-intelligence/query.ts +291 -291
  47. package/src/code-intelligence/schema.ts +83 -83
  48. package/src/code-intelligence/types.ts +95 -95
  49. package/src/config/defaults.ts +52 -52
  50. package/src/config/home.ts +56 -56
  51. package/src/config/index.ts +5 -5
  52. package/src/config/loader.ts +192 -192
  53. package/src/config/schema.ts +446 -415
  54. package/src/config/validator.ts +182 -182
  55. package/src/context/assembler.ts +407 -400
  56. package/src/context/index.ts +79 -79
  57. package/src/context/progress-tracker.ts +174 -174
  58. package/src/context/standards-manager.ts +287 -287
  59. package/src/context/validator.ts +58 -58
  60. package/src/diagnostics/index.ts +122 -121
  61. package/src/health/index.ts +233 -232
  62. package/src/hooks/brain-hook.ts +134 -131
  63. package/src/hooks/capture.ts +168 -168
  64. package/src/hooks/claude-code-mastery.md +112 -112
  65. package/src/hooks/context-hook.ts +260 -245
  66. package/src/hooks/deduplicator.ts +72 -72
  67. package/src/hooks/git-capture.ts +109 -109
  68. package/src/hooks/git-hook-installer.ts +211 -207
  69. package/src/hooks/index.ts +20 -20
  70. package/src/hooks/installer.ts +306 -288
  71. package/src/hooks/interceptor-hook.ts +204 -201
  72. package/src/hooks/passive-classifier.ts +397 -397
  73. package/src/hooks/queue.ts +160 -129
  74. package/src/hooks/session-tracker.ts +312 -312
  75. package/src/hooks/types.ts +52 -52
  76. package/src/index.ts +7 -7
  77. package/src/intelligence/cross-project/generalizer.ts +283 -283
  78. package/src/intelligence/cross-project/index.ts +7 -7
  79. package/src/intelligence/hf-downloader.ts +222 -222
  80. package/src/intelligence/hf-manifest.json +78 -78
  81. package/src/intelligence/index.ts +24 -24
  82. package/src/intelligence/inference-router.ts +762 -762
  83. package/src/intelligence/model-manager.ts +263 -245
  84. package/src/intelligence/optimization/index.ts +10 -10
  85. package/src/intelligence/optimization/precompute.ts +202 -202
  86. package/src/intelligence/optimization/semantic-cache.ts +213 -207
  87. package/src/intelligence/prediction/index.ts +7 -7
  88. package/src/intelligence/prediction/recommender.ts +276 -268
  89. package/src/intelligence/reasoning/chain-retrieval.ts +243 -247
  90. package/src/intelligence/reasoning/index.ts +7 -7
  91. package/src/intelligence/temporal/evolution.ts +193 -197
  92. package/src/intelligence/temporal/index.ts +16 -16
  93. package/src/intelligence/temporal/query-processor.ts +190 -190
  94. package/src/intelligence/temporal/timeline.ts +272 -259
  95. package/src/intelligence/temporal/trends.ts +263 -263
  96. package/src/intelligence/tokenizer.ts +118 -118
  97. package/src/knowledge/entity-extractor.ts +447 -443
  98. package/src/knowledge/graph/builder.ts +185 -185
  99. package/src/knowledge/graph/linker.ts +201 -201
  100. package/src/knowledge/graph/memory-graph.ts +359 -359
  101. package/src/knowledge/graph/schema.ts +99 -99
  102. package/src/knowledge/graph/search.ts +166 -166
  103. package/src/knowledge/relationship-extractor.ts +108 -108
  104. package/src/memory/chroma/client.ts +211 -192
  105. package/src/memory/chroma/collection-manager.ts +92 -92
  106. package/src/memory/chroma/config.ts +57 -57
  107. package/src/memory/chroma/embeddings.ts +177 -175
  108. package/src/memory/chroma/index.ts +82 -82
  109. package/src/memory/chroma/migration.ts +270 -270
  110. package/src/memory/chroma/schemas.ts +69 -69
  111. package/src/memory/chroma/search.ts +319 -315
  112. package/src/memory/chroma/store.ts +755 -747
  113. package/src/memory/compression.ts +121 -121
  114. package/src/memory/consolidation/archiver.ts +162 -165
  115. package/src/memory/consolidation/merger.ts +182 -186
  116. package/src/memory/consolidation/scorer.ts +136 -136
  117. package/src/memory/database.ts +9 -0
  118. package/src/memory/dual-write.ts +145 -0
  119. package/src/memory/embeddings.ts +226 -226
  120. package/src/memory/episodic/detector.ts +108 -108
  121. package/src/memory/episodic/manager.ts +347 -351
  122. package/src/memory/episodic/summarizer.ts +179 -179
  123. package/src/memory/episodic/types.ts +52 -52
  124. package/src/memory/fts5-search.ts +692 -633
  125. package/src/memory/index.ts +943 -1060
  126. package/src/memory/migrations/add-fts5.ts +118 -108
  127. package/src/memory/patterns.ts +438 -438
  128. package/src/memory/pruning.ts +60 -60
  129. package/src/memory/schema.ts +88 -88
  130. package/src/memory/store.ts +911 -787
  131. package/src/orchestrator/handlers/decision-handler.ts +204 -204
  132. package/src/packs/index.ts +9 -9
  133. package/src/packs/loader.ts +134 -134
  134. package/src/packs/manager.ts +204 -204
  135. package/src/packs/ranker.ts +78 -78
  136. package/src/packs/types.ts +81 -81
  137. package/src/phase12/index.ts +5 -5
  138. package/src/retrieval/bm25/index.ts +300 -297
  139. package/src/retrieval/bm25/tokenizer.ts +184 -184
  140. package/src/retrieval/feedback/adaptive.ts +221 -221
  141. package/src/retrieval/feedback/index.ts +16 -16
  142. package/src/retrieval/feedback/metrics.ts +221 -221
  143. package/src/retrieval/feedback/store.ts +283 -283
  144. package/src/retrieval/fusion/index.ts +194 -194
  145. package/src/retrieval/fusion/rrf.ts +165 -165
  146. package/src/retrieval/index.ts +12 -12
  147. package/src/retrieval/pipeline.ts +375 -375
  148. package/src/retrieval/query/expander.ts +203 -203
  149. package/src/retrieval/query/index.ts +27 -27
  150. package/src/retrieval/query/intent-classifier.ts +252 -252
  151. package/src/retrieval/query/temporal-parser.ts +295 -295
  152. package/src/retrieval/reranker/index.ts +189 -188
  153. package/src/retrieval/reranker/model.ts +99 -95
  154. package/src/retrieval/service.ts +125 -125
  155. package/src/retrieval/types.ts +162 -162
  156. package/src/routing/entity-extractor.ts +454 -454
  157. package/src/routing/handlers/exploration-handler.ts +369 -0
  158. package/src/routing/handlers/index.ts +19 -0
  159. package/src/routing/handlers/memory-handler.ts +273 -0
  160. package/src/routing/handlers/mutation-handler.ts +241 -0
  161. package/src/routing/handlers/recall-handler.ts +642 -0
  162. package/src/routing/handlers/shared.ts +515 -0
  163. package/src/routing/handlers/types.ts +48 -0
  164. package/src/routing/intent-classifier.ts +552 -552
  165. package/src/routing/response-filter.ts +399 -391
  166. package/src/routing/router.ts +245 -2193
  167. package/src/routing/search-engine.ts +521 -514
  168. package/src/routing/types.ts +104 -94
  169. package/src/scripts/health-check.ts +118 -118
  170. package/src/scripts/setup.ts +122 -122
  171. package/src/server/auto-updater.ts +283 -276
  172. package/src/server/handlers/call-tool.ts +159 -159
  173. package/src/server/handlers/list-tools.ts +35 -35
  174. package/src/server/handlers/tools/auto-remember.ts +165 -165
  175. package/src/server/handlers/tools/brain.ts +86 -86
  176. package/src/server/handlers/tools/create-project.ts +135 -135
  177. package/src/server/handlers/tools/get-code-standards.ts +123 -123
  178. package/src/server/handlers/tools/get-corrections.ts +152 -152
  179. package/src/server/handlers/tools/get-patterns.ts +156 -156
  180. package/src/server/handlers/tools/get-project-context.ts +75 -75
  181. package/src/server/handlers/tools/index.ts +30 -30
  182. package/src/server/handlers/tools/init-project.ts +756 -756
  183. package/src/server/handlers/tools/list-projects.ts +126 -126
  184. package/src/server/handlers/tools/recall-similar.ts +87 -87
  185. package/src/server/handlers/tools/recognize-pattern.ts +132 -132
  186. package/src/server/handlers/tools/record-correction.ts +131 -131
  187. package/src/server/handlers/tools/remember-decision.ts +168 -168
  188. package/src/server/handlers/tools/schemas.ts +179 -179
  189. package/src/server/handlers/tools/search-code.ts +122 -122
  190. package/src/server/handlers/tools/smart-context.ts +146 -146
  191. package/src/server/handlers/tools/update-progress.ts +131 -131
  192. package/src/server/http-api.ts +215 -1229
  193. package/src/server/mcp-proxy.ts +85 -84
  194. package/src/server/mcp-server.ts +285 -284
  195. package/src/server/middleware/auth.ts +39 -0
  196. package/src/server/middleware/error-handler.ts +37 -0
  197. package/src/server/middleware/rate-limit.ts +53 -0
  198. package/src/server/middleware/validate.ts +42 -0
  199. package/src/server/pid-manager.ts +137 -136
  200. package/src/server/providers/resources.ts +581 -581
  201. package/src/server/routes/code.ts +228 -0
  202. package/src/server/routes/context.ts +26 -0
  203. package/src/server/routes/health.ts +19 -0
  204. package/src/server/routes/helpers.ts +100 -0
  205. package/src/server/routes/hooks.ts +197 -0
  206. package/src/server/routes/mcp.ts +47 -0
  207. package/src/server/routes/memory.ts +397 -0
  208. package/src/server/routes/models.ts +96 -0
  209. package/src/server/routes/projects.ts +89 -0
  210. package/src/server/routes/types.ts +21 -0
  211. package/src/server/schemas/api-schemas.ts +202 -0
  212. package/src/server/services.ts +720 -720
  213. package/src/server/utils/memory-indicator.ts +84 -84
  214. package/src/server/utils/response-formatter.ts +129 -129
  215. package/src/server/web-viewer.ts +1145 -1115
  216. package/src/setup/index.ts +38 -38
  217. package/src/tools/registry.ts +115 -115
  218. package/src/tools/schemas.ts +666 -666
  219. package/src/tools/types.ts +412 -412
  220. package/src/training/data-store.ts +320 -298
  221. package/src/training/retrain-pipeline.ts +399 -394
  222. package/src/utils/error-handler.ts +136 -136
  223. package/src/utils/index.ts +58 -58
  224. package/src/utils/kill-port.ts +55 -53
  225. package/src/utils/phase12-helper.ts +56 -56
  226. package/src/utils/safe-path.ts +43 -0
  227. package/src/utils/timing.ts +47 -47
  228. package/src/utils/transaction.ts +63 -63
  229. package/src/vault/index.ts +4 -3
  230. package/src/vault/paths.ts +106 -106
  231. package/src/vault/query.ts +4 -1
  232. package/src/vault/reader.ts +44 -1
  233. package/src/vault/watcher.ts +24 -1
  234. package/src/vault/writer.ts +487 -413
  235. package/skills/persistent-memory/SKILL.md +0 -148
  236. package/skills/persistent-memory/references/tool-reference.md +0 -90
@@ -1,633 +1,692 @@
1
- /**
2
- * FTS5 Search Service — Phase 26
3
- * Full-text search over the observations table using SQLite FTS5.
4
- * Replaces ChromaDB as the primary search backend.
5
- */
6
-
7
- import { randomUUID } from 'crypto'
8
- import type { Database } from 'bun:sqlite'
9
- import type { Logger } from 'pino'
10
- import { expandQuery } from '@/retrieval/query/expander'
11
- import { embeddingToBuffer, bufferToEmbedding, cosineSimilarity } from './embedding-utils'
12
- import type { EmbeddingService } from './embeddings'
13
-
14
- export type ObservationCategory = 'decision' | 'pattern' | 'correction' | 'insight' | 'preference'
15
-
16
- export interface NewObservation {
17
- project: string
18
- category: ObservationCategory
19
- content: string
20
- reasoning?: string
21
- context?: string
22
- confidence?: number
23
- source?: string
24
- tags?: string[]
25
- file_paths?: string[]
26
- symbols?: string[]
27
- }
28
-
29
- export interface ObservationResult {
30
- id: string
31
- project: string
32
- category: ObservationCategory
33
- content: string
34
- reasoning: string | null
35
- context: string | null
36
- confidence: number
37
- source: string
38
- tags: string[]
39
- file_paths: string[]
40
- symbols: string[]
41
- access_count: number
42
- last_accessed: string | null
43
- created_at: string
44
- updated_at: string
45
- archived: boolean
46
- }
47
-
48
- export interface ScoredResult extends ObservationResult {
49
- score: number
50
- }
51
-
52
- export interface DuplicateResult {
53
- id: string
54
- content: string
55
- score: number
56
- }
57
-
58
- export class FTS5Search {
59
- private db: Database
60
- private logger: Logger
61
-
62
- constructor(db: Database, logger: Logger) {
63
- this.db = db
64
- this.logger = logger.child({ component: 'fts5-search' })
65
- }
66
-
67
- /**
68
- * Search observations via FTS5 full-text search.
69
- */
70
- search(query: string, project?: string, limit: number = 10): ObservationResult[] {
71
- if (!query.trim()) return []
72
-
73
- const ftsQuery = this.buildFTSQuery(query)
74
-
75
- try {
76
- let sql: string
77
- const params: any[] = []
78
-
79
- if (project) {
80
- sql = `
81
- SELECT o.*, rank
82
- FROM observations o
83
- JOIN observations_fts fts ON o.rowid = fts.rowid
84
- WHERE observations_fts MATCH ? AND o.project = ? AND o.archived = 0
85
- ORDER BY rank
86
- LIMIT ?
87
- `
88
- params.push(ftsQuery, project, limit)
89
- } else {
90
- sql = `
91
- SELECT o.*, rank
92
- FROM observations o
93
- JOIN observations_fts fts ON o.rowid = fts.rowid
94
- WHERE observations_fts MATCH ? AND o.archived = 0
95
- ORDER BY rank
96
- LIMIT ?
97
- `
98
- params.push(ftsQuery, limit)
99
- }
100
-
101
- const rows = this.db.prepare(sql).all(...params) as any[]
102
- return rows.map(row => this.rowToResult(row))
103
- } catch (error) {
104
- this.logger.warn({ error, query, ftsQuery }, 'FTS5 search failed, trying fallback')
105
- return this.fallbackSearch(query, project, limit)
106
- }
107
- }
108
-
109
- /**
110
- * Search with BM25 ranking and confidence scoring.
111
- * Returns results with a normalized score between 0 and 1.
112
- */
113
- searchWithConfidence(query: string, project?: string, limit: number = 10): ScoredResult[] {
114
- if (!query.trim()) return []
115
-
116
- const ftsQuery = this.buildFTSQuery(query)
117
- const queryLower = query.toLowerCase()
118
-
119
- try {
120
- let sql: string
121
- const params: any[] = []
122
-
123
- if (project) {
124
- sql = `
125
- SELECT o.*, bm25(observations_fts) as bm25_score
126
- FROM observations o
127
- JOIN observations_fts fts ON o.rowid = fts.rowid
128
- WHERE observations_fts MATCH ? AND o.project = ? AND o.archived = 0
129
- ORDER BY bm25_score
130
- LIMIT ?
131
- `
132
- params.push(ftsQuery, project, limit)
133
- } else {
134
- sql = `
135
- SELECT o.*, bm25(observations_fts) as bm25_score
136
- FROM observations o
137
- JOIN observations_fts fts ON o.rowid = fts.rowid
138
- WHERE observations_fts MATCH ? AND o.archived = 0
139
- ORDER BY bm25_score
140
- LIMIT ?
141
- `
142
- params.push(ftsQuery, limit)
143
- }
144
-
145
- const rows = this.db.prepare(sql).all(...params) as any[]
146
-
147
- return rows.map(row => {
148
- const result = this.rowToResult(row)
149
- const bm25 = Math.abs(row.bm25_score as number)
150
-
151
- // Compute confidence score
152
- let score = this.normalizeBM25(bm25)
153
-
154
- // Boost for exact content match
155
- if (result.content.toLowerCase().includes(queryLower)) {
156
- score = Math.min(1.0, score + 0.15)
157
- }
158
-
159
- // Boost for tag match
160
- if (result.tags.some(t => queryLower.includes(t.toLowerCase()))) {
161
- score = Math.min(1.0, score + 0.1)
162
- }
163
-
164
- // Boost for project match
165
- if (project && result.project === project) {
166
- score = Math.min(1.0, score + 0.05)
167
- }
168
-
169
- return { ...result, score }
170
- }).sort((a, b) => b.score - a.score)
171
- } catch (error) {
172
- this.logger.warn({ error, query, ftsQuery }, 'FTS5 confidence search failed, trying fallback')
173
- return this.fallbackSearch(query, project, limit).map(r => ({ ...r, score: 0.5 }))
174
- }
175
- }
176
-
177
- /**
178
- * Store a new observation. Returns the generated ID.
179
- */
180
- store(observation: NewObservation, providedId?: string): string {
181
- const id = providedId || randomUUID()
182
- const now = new Date().toISOString()
183
-
184
- const stmt = this.db.prepare(`
185
- INSERT INTO observations (id, project, category, content, reasoning, context, confidence, source, tags, file_paths, symbols, access_count, last_accessed, created_at, updated_at, archived)
186
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, NULL, ?, ?, 0)
187
- `)
188
-
189
- stmt.run(
190
- id,
191
- observation.project,
192
- observation.category,
193
- observation.content,
194
- observation.reasoning || null,
195
- observation.context || null,
196
- observation.confidence ?? 0.8,
197
- observation.source || 'explicit',
198
- observation.tags ? JSON.stringify(observation.tags) : null,
199
- observation.file_paths ? JSON.stringify(observation.file_paths) : null,
200
- observation.symbols ? JSON.stringify(observation.symbols) : null,
201
- now,
202
- now
203
- )
204
-
205
- this.logger.debug({ id, category: observation.category, project: observation.project }, 'Observation stored')
206
- return id
207
- }
208
-
209
- /**
210
- * Check for duplicates via FTS5 text similarity.
211
- * Returns the best matching duplicate if above threshold, null otherwise.
212
- */
213
- searchForDuplicates(content: string, project: string, threshold: number = 0.85): DuplicateResult | null {
214
- const ftsQuery = this.buildFTSQuery(content)
215
- if (!ftsQuery.trim()) return null
216
-
217
- try {
218
- const rows = this.db.prepare(`
219
- SELECT o.id, o.content, bm25(observations_fts) as bm25_score
220
- FROM observations o
221
- JOIN observations_fts fts ON o.rowid = fts.rowid
222
- WHERE observations_fts MATCH ? AND o.project = ? AND o.archived = 0
223
- ORDER BY bm25_score
224
- LIMIT 3
225
- `).all(ftsQuery, project) as any[]
226
-
227
- if (rows.length === 0) return null
228
-
229
- // Use word overlap as a proxy for semantic similarity
230
- for (const row of rows) {
231
- const similarity = this.wordOverlap(content, row.content as string)
232
- if (similarity >= threshold) {
233
- return {
234
- id: row.id as string,
235
- content: row.content as string,
236
- score: similarity
237
- }
238
- }
239
- }
240
-
241
- return null
242
- } catch (error) {
243
- this.logger.warn({ error }, 'Duplicate check failed')
244
- return null
245
- }
246
- }
247
-
248
- /**
249
- * Fetch all observations for a project, optionally filtered by category.
250
- */
251
- fetchAll(project?: string, category?: ObservationCategory): ObservationResult[] {
252
- let sql: string
253
- const params: any[] = []
254
-
255
- if (project && category) {
256
- sql = `SELECT * FROM observations WHERE project = ? AND category = ? AND archived = 0 ORDER BY created_at DESC`
257
- params.push(project, category)
258
- } else if (project) {
259
- sql = `SELECT * FROM observations WHERE project = ? AND archived = 0 ORDER BY created_at DESC`
260
- params.push(project)
261
- } else if (category) {
262
- sql = `SELECT * FROM observations WHERE category = ? AND archived = 0 ORDER BY created_at DESC`
263
- params.push(category)
264
- } else {
265
- sql = `SELECT * FROM observations WHERE archived = 0 ORDER BY created_at DESC`
266
- }
267
-
268
- const rows = this.db.prepare(sql).all(...params) as any[]
269
- return rows.map(row => this.rowToResult(row))
270
- }
271
-
272
- /**
273
- * Update an observation's fields.
274
- */
275
- update(id: string, updates: Partial<NewObservation>): void {
276
- const fields: string[] = []
277
- const values: any[] = []
278
-
279
- if (updates.content !== undefined) { fields.push('content = ?'); values.push(updates.content) }
280
- if (updates.reasoning !== undefined) { fields.push('reasoning = ?'); values.push(updates.reasoning) }
281
- if (updates.context !== undefined) { fields.push('context = ?'); values.push(updates.context) }
282
- if (updates.confidence !== undefined) { fields.push('confidence = ?'); values.push(updates.confidence) }
283
- if (updates.source !== undefined) { fields.push('source = ?'); values.push(updates.source) }
284
- if (updates.tags !== undefined) { fields.push('tags = ?'); values.push(JSON.stringify(updates.tags)) }
285
- if (updates.file_paths !== undefined) { fields.push('file_paths = ?'); values.push(JSON.stringify(updates.file_paths)) }
286
- if (updates.symbols !== undefined) { fields.push('symbols = ?'); values.push(JSON.stringify(updates.symbols)) }
287
-
288
- if (fields.length === 0) return
289
-
290
- fields.push('updated_at = ?')
291
- values.push(new Date().toISOString())
292
- values.push(id)
293
-
294
- this.db.prepare(`UPDATE observations SET ${fields.join(', ')} WHERE id = ?`).run(...values)
295
- }
296
-
297
- /**
298
- * Delete an observation by ID.
299
- */
300
- delete(id: string): void {
301
- this.db.prepare('DELETE FROM observations WHERE id = ?').run(id)
302
- }
303
-
304
- /**
305
- * Record an access to bump access_count and last_accessed.
306
- */
307
- recordAccess(id: string): void {
308
- this.db.prepare(`
309
- UPDATE observations SET access_count = access_count + 1, last_accessed = ? WHERE id = ?
310
- `).run(new Date().toISOString(), id)
311
- }
312
-
313
- /**
314
- * Get a single observation by ID.
315
- */
316
- getById(id: string): ObservationResult | null {
317
- const row = this.db.prepare('SELECT * FROM observations WHERE id = ?').get(id) as any
318
- if (!row) return null
319
- return this.rowToResult(row)
320
- }
321
-
322
- /**
323
- * Phase 27: Fetch observations in a date range for a project.
324
- * Ordered by created_at descending (most recent first).
325
- */
326
- fetchByTimeRange(project: string, start: Date, end: Date, limit: number = 50): ObservationResult[] {
327
- const startStr = start.toISOString()
328
- const endStr = end.toISOString()
329
-
330
- const rows = this.db.prepare(`
331
- SELECT * FROM observations
332
- WHERE project = ? AND archived = 0
333
- AND created_at >= ? AND created_at <= ?
334
- ORDER BY created_at DESC
335
- LIMIT ?
336
- `).all(project, startStr, endStr, limit) as any[]
337
-
338
- return rows.map(row => this.rowToResult(row))
339
- }
340
-
341
- /**
342
- * Phase 27: Increment access count and update last_accessed timestamp.
343
- * Alias for recordAccess for API consistency.
344
- */
345
- incrementAccess(id: string): void {
346
- this.recordAccess(id)
347
- }
348
-
349
- /**
350
- * BUG-002: Search within a specific category, optionally scoped to a project.
351
- * Returns results ordered by creation date (most recent first).
352
- */
353
- searchByCategory(category: ObservationCategory, project?: string, limit: number = 10): ObservationResult[] {
354
- let sql: string
355
- const params: any[] = []
356
-
357
- if (project) {
358
- sql = `SELECT * FROM observations WHERE category = ? AND project = ? AND archived = 0 ORDER BY created_at DESC LIMIT ?`
359
- params.push(category, project, limit)
360
- } else {
361
- sql = `SELECT * FROM observations WHERE category = ? AND archived = 0 ORDER BY created_at DESC LIMIT ?`
362
- params.push(category, limit)
363
- }
364
-
365
- const rows = this.db.prepare(sql).all(...params) as any[]
366
- return rows.map(row => this.rowToResult(row))
367
- }
368
-
369
- // --- Embedding-based semantic search ---
370
-
371
- /**
372
- * Store an embedding for an observation.
373
- * Called after storing an observation to enable semantic search.
374
- */
375
- storeEmbedding(observationId: string, embedding: number[]): void {
376
- const buffer = embeddingToBuffer(embedding)
377
- const now = new Date().toISOString()
378
-
379
- try {
380
- this.db.prepare(`
381
- INSERT OR REPLACE INTO observation_embeddings (observation_id, embedding, created_at)
382
- VALUES (?, ?, ?)
383
- `).run(observationId, buffer, now)
384
- } catch (error) {
385
- this.logger.warn({ error, observationId }, 'Failed to store embedding')
386
- }
387
- }
388
-
389
- /**
390
- * Delete an embedding for an observation.
391
- */
392
- deleteEmbedding(observationId: string): void {
393
- try {
394
- this.db.prepare('DELETE FROM observation_embeddings WHERE observation_id = ?').run(observationId)
395
- } catch (error) {
396
- this.logger.warn({ error, observationId }, 'Failed to delete embedding')
397
- }
398
- }
399
-
400
- /**
401
- * Semantic search using stored embeddings.
402
- * Generates a query embedding, then computes cosine similarity against all
403
- * stored observation embeddings. Returns top matches above threshold.
404
- */
405
- async semanticSearch(
406
- queryEmbedding: number[],
407
- project?: string,
408
- limit: number = 10,
409
- minSimilarity: number = 0.3
410
- ): Promise<ScoredResult[]> {
411
- try {
412
- // Load all embeddings with their observation IDs
413
- let sql: string
414
- const params: any[] = []
415
-
416
- if (project) {
417
- sql = `
418
- SELECT oe.observation_id, oe.embedding, o.*
419
- FROM observation_embeddings oe
420
- JOIN observations o ON oe.observation_id = o.id
421
- WHERE o.archived = 0 AND o.project = ?
422
- `
423
- params.push(project)
424
- } else {
425
- sql = `
426
- SELECT oe.observation_id, oe.embedding, o.*
427
- FROM observation_embeddings oe
428
- JOIN observations o ON oe.observation_id = o.id
429
- WHERE o.archived = 0
430
- `
431
- }
432
-
433
- const rows = this.db.prepare(sql).all(...params) as any[]
434
-
435
- if (rows.length === 0) {
436
- this.logger.debug('No embeddings found for semantic search')
437
- return []
438
- }
439
-
440
- // Compute cosine similarity for each stored embedding
441
- const scored: { row: any; similarity: number }[] = []
442
- for (const row of rows) {
443
- try {
444
- const storedEmbedding = bufferToEmbedding(row.embedding)
445
- const similarity = cosineSimilarity(queryEmbedding, storedEmbedding)
446
- if (similarity >= minSimilarity) {
447
- scored.push({ row, similarity })
448
- }
449
- } catch {
450
- // Skip invalid embeddings
451
- }
452
- }
453
-
454
- // Sort by similarity descending and take top results
455
- scored.sort((a, b) => b.similarity - a.similarity)
456
- const topResults = scored.slice(0, limit)
457
-
458
- return topResults.map(({ row, similarity }) => ({
459
- ...this.rowToResult(row),
460
- score: similarity
461
- }))
462
- } catch (error) {
463
- this.logger.warn({ error }, 'Semantic search failed')
464
- return []
465
- }
466
- }
467
-
468
- /**
469
- * Check if there are any stored embeddings.
470
- */
471
- hasEmbeddings(): boolean {
472
- try {
473
- const row = this.db.prepare('SELECT COUNT(*) as count FROM observation_embeddings').get() as any
474
- return (row?.count || 0) > 0
475
- } catch {
476
- return false
477
- }
478
- }
479
-
480
- /**
481
- * Backfill embeddings for observations that don't have them yet.
482
- * Used for migrating existing data.
483
- */
484
- async backfillEmbeddings(embeddingService: EmbeddingService, batchSize: number = 50): Promise<number> {
485
- try {
486
- const rows = this.db.prepare(`
487
- SELECT o.id, o.content, o.reasoning, o.context
488
- FROM observations o
489
- LEFT JOIN observation_embeddings oe ON o.id = oe.observation_id
490
- WHERE oe.observation_id IS NULL AND o.archived = 0
491
- LIMIT ?
492
- `).all(batchSize) as any[]
493
-
494
- if (rows.length === 0) return 0
495
-
496
- let count = 0
497
- for (const row of rows) {
498
- try {
499
- const text = [row.content, row.reasoning, row.context].filter(Boolean).join(' ')
500
- const embedding = await embeddingService.generateEmbedding(text)
501
- this.storeEmbedding(row.id, embedding)
502
- count++
503
- } catch {
504
- // Skip failures, continue with next
505
- }
506
- }
507
-
508
- this.logger.info({ backfilled: count, total: rows.length }, 'Embedding backfill complete')
509
- return count
510
- } catch (error) {
511
- this.logger.warn({ error }, 'Embedding backfill failed')
512
- return 0
513
- }
514
- }
515
-
516
- // --- Private helpers ---
517
-
518
- /**
519
- * Build an FTS5 query from a natural language query.
520
- * Tokenizes words and joins with OR for broad matching.
521
- */
522
- private buildFTSQuery(query: string): string {
523
- // Remove special FTS5 characters that could cause syntax errors
524
- const cleaned = query.replace(/[":*^~(){}[\]]/g, ' ').trim()
525
- if (!cleaned) return ''
526
-
527
- // Expand query with synonyms (e.g., "database" → also search "storage", "persistence")
528
- const expanded = expandQuery(cleaned, { useSynonyms: true, maxExpansions: 8 })
529
- const allWords = expanded.combined.split(/\s+/).filter(w => w.length >= 2)
530
- const unique = [...new Set(allWords)]
531
-
532
- if (unique.length === 0) return ''
533
- return unique.map(w => `"${w}"`).join(' OR ')
534
- }
535
-
536
- /**
537
- * Normalize BM25 score to 0-1 range.
538
- * BM25 returns negative scores in SQLite (lower = better match).
539
- * Typical range: -20 (excellent) to 0 (poor).
540
- */
541
- private normalizeBM25(score: number): number {
542
- // Map BM25 range to 0.2-0.9 (wider range, allows low scores to be filtered)
543
- const normalized = Math.min(1, Math.max(0, score / 20))
544
- return 0.2 + normalized * 0.7
545
- }
546
-
547
- /**
548
- * Compute word overlap between two strings as a similarity proxy.
549
- * Returns a value between 0 and 1.
550
- */
551
- private wordOverlap(a: string, b: string): number {
552
- const wordsA = new Set(a.toLowerCase().split(/\s+/).filter(w => w.length >= 3))
553
- const wordsB = new Set(b.toLowerCase().split(/\s+/).filter(w => w.length >= 3))
554
-
555
- if (wordsA.size === 0 || wordsB.size === 0) return 0
556
-
557
- let overlap = 0
558
- for (const word of wordsA) {
559
- if (wordsB.has(word)) overlap++
560
- }
561
-
562
- // Jaccard similarity
563
- const union = new Set([...wordsA, ...wordsB]).size
564
- return overlap / union
565
- }
566
-
567
- /**
568
- * Fallback LIKE-based search when FTS5 query syntax fails.
569
- */
570
- private fallbackSearch(query: string, project?: string, limit: number = 10): ObservationResult[] {
571
- const pattern = `%${query}%`
572
-
573
- let sql: string
574
- const params: any[] = []
575
-
576
- if (project) {
577
- sql = `
578
- SELECT * FROM observations
579
- WHERE (content LIKE ? OR reasoning LIKE ? OR context LIKE ? OR tags LIKE ?)
580
- AND project = ? AND archived = 0
581
- ORDER BY created_at DESC
582
- LIMIT ?
583
- `
584
- params.push(pattern, pattern, pattern, pattern, project, limit)
585
- } else {
586
- sql = `
587
- SELECT * FROM observations
588
- WHERE (content LIKE ? OR reasoning LIKE ? OR context LIKE ? OR tags LIKE ?)
589
- AND archived = 0
590
- ORDER BY created_at DESC
591
- LIMIT ?
592
- `
593
- params.push(pattern, pattern, pattern, pattern, limit)
594
- }
595
-
596
- const rows = this.db.prepare(sql).all(...params) as any[]
597
- return rows.map(row => this.rowToResult(row))
598
- }
599
-
600
- /**
601
- * Convert a raw database row to an ObservationResult.
602
- */
603
- private rowToResult(row: any): ObservationResult {
604
- return {
605
- id: row.id,
606
- project: row.project,
607
- category: row.category,
608
- content: row.content,
609
- reasoning: row.reasoning || null,
610
- context: row.context || null,
611
- confidence: row.confidence ?? 0.8,
612
- source: row.source || 'explicit',
613
- tags: row.tags ? this.parseJsonArray(row.tags) : [],
614
- file_paths: row.file_paths ? this.parseJsonArray(row.file_paths) : [],
615
- symbols: row.symbols ? this.parseJsonArray(row.symbols) : [],
616
- access_count: row.access_count ?? 0,
617
- last_accessed: row.last_accessed || null,
618
- created_at: row.created_at,
619
- updated_at: row.updated_at,
620
- archived: row.archived === 1
621
- }
622
- }
623
-
624
- private parseJsonArray(value: string): string[] {
625
- try {
626
- const parsed = JSON.parse(value)
627
- return Array.isArray(parsed) ? parsed : []
628
- } catch {
629
- // Handle comma-separated strings as fallback
630
- return value.split(',').map(s => s.trim()).filter(Boolean)
631
- }
632
- }
633
- }
1
+ /**
2
+ * FTS5 Search Service — Phase 26
3
+ * Full-text search over the observations table using SQLite FTS5.
4
+ * Replaces ChromaDB as the primary search backend.
5
+ */
6
+
7
+ import { randomUUID } from 'crypto'
8
+ import type { Database } from 'bun:sqlite'
9
+ import type { Logger } from 'pino'
10
+ import { expandQuery } from '@/retrieval/query/expander'
11
+ import { embeddingToBuffer, bufferToEmbedding, cosineSimilarity } from './embedding-utils'
12
+ import type { EmbeddingService } from './embeddings'
13
+
14
+ export type ObservationCategory = 'decision' | 'pattern' | 'correction' | 'insight' | 'preference'
15
+
16
+ export interface NewObservation {
17
+ project: string
18
+ category: ObservationCategory
19
+ content: string
20
+ reasoning?: string
21
+ context?: string
22
+ confidence?: number
23
+ source?: string
24
+ tags?: string[]
25
+ file_paths?: string[]
26
+ symbols?: string[]
27
+ }
28
+
29
+ export interface ObservationResult {
30
+ id: string
31
+ project: string
32
+ category: ObservationCategory
33
+ content: string
34
+ reasoning: string | null
35
+ context: string | null
36
+ confidence: number
37
+ source: string
38
+ tags: string[]
39
+ file_paths: string[]
40
+ symbols: string[]
41
+ access_count: number
42
+ last_accessed: string | null
43
+ created_at: string
44
+ updated_at: string
45
+ archived: boolean
46
+ }
47
+
48
+ export interface ScoredResult extends ObservationResult {
49
+ score: number
50
+ }
51
+
52
+ export interface DuplicateResult {
53
+ id: string
54
+ content: string
55
+ score: number
56
+ }
57
+
58
+ /** Raw database row from the observations table */
59
+ interface ObservationRow {
60
+ id: string
61
+ project: string
62
+ category: string
63
+ content: string
64
+ reasoning: string | null
65
+ context: string | null
66
+ tags: string | null
67
+ confidence: number
68
+ source: string | null
69
+ file_paths: string | null
70
+ symbols: string | null
71
+ archived: number
72
+ access_count: number
73
+ last_accessed: string | null
74
+ created_at: string
75
+ updated_at: string
76
+ }
77
+
78
+ /** Observation row with BM25 score */
79
+ interface ObservationWithBM25Row extends ObservationRow {
80
+ bm25_score: number
81
+ }
82
+
83
+ /** Observation row joined with embedding data */
84
+ interface ObservationWithEmbeddingRow extends ObservationRow {
85
+ observation_id: string
86
+ embedding: Buffer | Uint8Array
87
+ }
88
+
89
+ /** Duplicate check result row */
90
+ interface DuplicateCheckRow {
91
+ id: string
92
+ content: string
93
+ bm25_score: number
94
+ }
95
+
96
+ /** Row for backfill query */
97
+ interface BackfillRow {
98
+ id: string
99
+ content: string
100
+ reasoning: string | null
101
+ context: string | null
102
+ }
103
+
104
+ /** Count row */
105
+ interface CountRow {
106
+ count: number
107
+ }
108
+
109
+ export class FTS5Search {
110
+ private db: Database
111
+ private logger: Logger
112
+
113
+ constructor(db: Database, logger: Logger) {
114
+ this.db = db
115
+ this.logger = logger.child({ component: 'fts5-search' })
116
+ }
117
+
118
+ /**
119
+ * Search observations via FTS5 full-text search.
120
+ */
121
+ search(query: string, project?: string, limit: number = 10): ObservationResult[] {
122
+ if (!query.trim()) return []
123
+
124
+ const ftsQuery = this.buildFTSQuery(query)
125
+
126
+ try {
127
+ let sql: string
128
+ const params: (string | number)[] = []
129
+
130
+ if (project) {
131
+ sql = `
132
+ SELECT o.*, rank
133
+ FROM observations o
134
+ JOIN observations_fts fts ON o.rowid = fts.rowid
135
+ WHERE observations_fts MATCH ? AND o.project = ? AND o.archived = 0
136
+ ORDER BY rank
137
+ LIMIT ?
138
+ `
139
+ params.push(ftsQuery, project, limit)
140
+ } else {
141
+ sql = `
142
+ SELECT o.*, rank
143
+ FROM observations o
144
+ JOIN observations_fts fts ON o.rowid = fts.rowid
145
+ WHERE observations_fts MATCH ? AND o.archived = 0
146
+ ORDER BY rank
147
+ LIMIT ?
148
+ `
149
+ params.push(ftsQuery, limit)
150
+ }
151
+
152
+ const rows = this.db.prepare(sql).all(...params) as ObservationRow[]
153
+ return rows.map(row => this.rowToResult(row))
154
+ } catch (error) {
155
+ this.logger.warn({ error, query, ftsQuery }, 'FTS5 search failed, trying fallback')
156
+ return this.fallbackSearch(query, project, limit)
157
+ }
158
+ }
159
+
160
+ /**
161
+ * Search with BM25 ranking and confidence scoring.
162
+ * Returns results with a normalized score between 0 and 1.
163
+ */
164
+ searchWithConfidence(query: string, project?: string, limit: number = 10): ScoredResult[] {
165
+ if (!query.trim()) return []
166
+
167
+ const ftsQuery = this.buildFTSQuery(query)
168
+ const queryLower = query.toLowerCase()
169
+
170
+ try {
171
+ let sql: string
172
+ const params: (string | number)[] = []
173
+
174
+ if (project) {
175
+ sql = `
176
+ SELECT o.*, bm25(observations_fts) as bm25_score
177
+ FROM observations o
178
+ JOIN observations_fts fts ON o.rowid = fts.rowid
179
+ WHERE observations_fts MATCH ? AND o.project = ? AND o.archived = 0
180
+ ORDER BY bm25_score
181
+ LIMIT ?
182
+ `
183
+ params.push(ftsQuery, project, limit)
184
+ } else {
185
+ sql = `
186
+ SELECT o.*, bm25(observations_fts) as bm25_score
187
+ FROM observations o
188
+ JOIN observations_fts fts ON o.rowid = fts.rowid
189
+ WHERE observations_fts MATCH ? AND o.archived = 0
190
+ ORDER BY bm25_score
191
+ LIMIT ?
192
+ `
193
+ params.push(ftsQuery, limit)
194
+ }
195
+
196
+ const rows = this.db.prepare(sql).all(...params) as ObservationWithBM25Row[]
197
+
198
+ return rows.map(row => {
199
+ const result = this.rowToResult(row)
200
+ const bm25 = Math.abs(row.bm25_score as number)
201
+
202
+ // Compute confidence score
203
+ let score = this.normalizeBM25(bm25)
204
+
205
+ // Boost for exact content match
206
+ if (result.content.toLowerCase().includes(queryLower)) {
207
+ score = Math.min(1.0, score + 0.15)
208
+ }
209
+
210
+ // Boost for tag match
211
+ if (result.tags.some(t => queryLower.includes(t.toLowerCase()))) {
212
+ score = Math.min(1.0, score + 0.1)
213
+ }
214
+
215
+ // Boost for project match
216
+ if (project && result.project === project) {
217
+ score = Math.min(1.0, score + 0.05)
218
+ }
219
+
220
+ return { ...result, score }
221
+ }).sort((a, b) => b.score - a.score)
222
+ } catch (error) {
223
+ this.logger.warn({ error, query, ftsQuery }, 'FTS5 confidence search failed, trying fallback')
224
+ return this.fallbackSearch(query, project, limit).map(r => ({ ...r, score: 0.5 }))
225
+ }
226
+ }
227
+
228
+ /**
229
+ * Store a new observation. Returns the generated ID.
230
+ */
231
+ store(observation: NewObservation, providedId?: string): string {
232
+ const id = providedId || randomUUID()
233
+ const now = new Date().toISOString()
234
+
235
+ const stmt = this.db.prepare(`
236
+ INSERT INTO observations (id, project, category, content, reasoning, context, confidence, source, tags, file_paths, symbols, access_count, last_accessed, created_at, updated_at, archived)
237
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, NULL, ?, ?, 0)
238
+ `)
239
+
240
+ stmt.run(
241
+ id,
242
+ observation.project,
243
+ observation.category,
244
+ observation.content,
245
+ observation.reasoning || null,
246
+ observation.context || null,
247
+ observation.confidence ?? 0.8,
248
+ observation.source || 'explicit',
249
+ observation.tags ? JSON.stringify(observation.tags) : null,
250
+ observation.file_paths ? JSON.stringify(observation.file_paths) : null,
251
+ observation.symbols ? JSON.stringify(observation.symbols) : null,
252
+ now,
253
+ now
254
+ )
255
+
256
+ this.logger.debug({ id, category: observation.category, project: observation.project }, 'Observation stored')
257
+ return id
258
+ }
259
+
260
+ /**
261
+ * Check for duplicates via FTS5 text similarity.
262
+ * Returns the best matching duplicate if above threshold, null otherwise.
263
+ */
264
+ searchForDuplicates(content: string, project: string, threshold: number = 0.85): DuplicateResult | null {
265
+ const ftsQuery = this.buildFTSQuery(content)
266
+ if (!ftsQuery.trim()) return null
267
+
268
+ try {
269
+ const rows = this.db.prepare(`
270
+ SELECT o.id, o.content, bm25(observations_fts) as bm25_score
271
+ FROM observations o
272
+ JOIN observations_fts fts ON o.rowid = fts.rowid
273
+ WHERE observations_fts MATCH ? AND o.project = ? AND o.archived = 0
274
+ ORDER BY bm25_score
275
+ LIMIT 3
276
+ `).all(ftsQuery, project) as DuplicateCheckRow[]
277
+
278
+ if (rows.length === 0) return null
279
+
280
+ // Use word overlap as a proxy for semantic similarity
281
+ for (const row of rows) {
282
+ const similarity = this.wordOverlap(content, row.content)
283
+ if (similarity >= threshold) {
284
+ return {
285
+ id: row.id,
286
+ content: row.content,
287
+ score: similarity
288
+ }
289
+ }
290
+ }
291
+
292
+ return null
293
+ } catch (error) {
294
+ this.logger.warn({ error }, 'Duplicate check failed')
295
+ return null
296
+ }
297
+ }
298
+
299
+ /**
300
+ * Fetch all observations for a project, optionally filtered by category.
301
+ */
302
+ fetchAll(project?: string, category?: ObservationCategory): ObservationResult[] {
303
+ let sql: string
304
+ const params: (string | number)[] = []
305
+
306
+ if (project && category) {
307
+ sql = `SELECT * FROM observations WHERE project = ? AND category = ? AND archived = 0 ORDER BY created_at DESC`
308
+ params.push(project, category)
309
+ } else if (project) {
310
+ sql = `SELECT * FROM observations WHERE project = ? AND archived = 0 ORDER BY created_at DESC`
311
+ params.push(project)
312
+ } else if (category) {
313
+ sql = `SELECT * FROM observations WHERE category = ? AND archived = 0 ORDER BY created_at DESC`
314
+ params.push(category)
315
+ } else {
316
+ sql = `SELECT * FROM observations WHERE archived = 0 ORDER BY created_at DESC`
317
+ }
318
+
319
+ const rows = this.db.prepare(sql).all(...params) as ObservationRow[]
320
+ return rows.map(row => this.rowToResult(row))
321
+ }
322
+
323
+ /** Allowed fields for observation updates */
324
+ private static readonly UPDATABLE_FIELDS = new Set([
325
+ 'content', 'reasoning', 'context', 'confidence', 'source', 'tags', 'file_paths', 'symbols'
326
+ ])
327
+
328
+ /**
329
+ * Update an observation's fields.
330
+ */
331
+ update(id: string, updates: Partial<NewObservation>): void {
332
+ const fields: string[] = []
333
+ const values: (string | number | null)[] = []
334
+
335
+ for (const [key, value] of Object.entries(updates)) {
336
+ if (value === undefined) continue
337
+ if (!FTS5Search.UPDATABLE_FIELDS.has(key)) {
338
+ this.logger.warn({ field: key }, 'Attempted to update non-whitelisted field')
339
+ continue
340
+ }
341
+
342
+ const serialized = Array.isArray(value) ? JSON.stringify(value) : value
343
+ fields.push(`${key} = ?`)
344
+ values.push(serialized)
345
+ }
346
+
347
+ if (fields.length === 0) return
348
+
349
+ fields.push('updated_at = ?')
350
+ values.push(new Date().toISOString())
351
+ values.push(id)
352
+
353
+ this.db.prepare(`UPDATE observations SET ${fields.join(', ')} WHERE id = ?`).run(...values)
354
+ }
355
+
356
+ /**
357
+ * Delete an observation by ID.
358
+ */
359
+ delete(id: string): void {
360
+ this.db.prepare('DELETE FROM observations WHERE id = ?').run(id)
361
+ }
362
+
363
+ /**
364
+ * Record an access to bump access_count and last_accessed.
365
+ */
366
+ recordAccess(id: string): void {
367
+ this.db.prepare(`
368
+ UPDATE observations SET access_count = access_count + 1, last_accessed = ? WHERE id = ?
369
+ `).run(new Date().toISOString(), id)
370
+ }
371
+
372
+ /**
373
+ * Get a single observation by ID.
374
+ */
375
+ getById(id: string): ObservationResult | null {
376
+ const row = this.db.prepare('SELECT * FROM observations WHERE id = ?').get(id) as ObservationRow | null
377
+ if (!row) return null
378
+ return this.rowToResult(row)
379
+ }
380
+
381
+ /**
382
+ * Phase 27: Fetch observations in a date range for a project.
383
+ * Ordered by created_at descending (most recent first).
384
+ */
385
+ fetchByTimeRange(project: string, start: Date, end: Date, limit: number = 50): ObservationResult[] {
386
+ const startStr = start.toISOString()
387
+ const endStr = end.toISOString()
388
+
389
+ const rows = this.db.prepare(`
390
+ SELECT * FROM observations
391
+ WHERE project = ? AND archived = 0
392
+ AND created_at >= ? AND created_at <= ?
393
+ ORDER BY created_at DESC
394
+ LIMIT ?
395
+ `).all(project, startStr, endStr, limit) as ObservationRow[]
396
+
397
+ return rows.map(row => this.rowToResult(row))
398
+ }
399
+
400
+ /**
401
+ * Phase 27: Increment access count and update last_accessed timestamp.
402
+ * Alias for recordAccess for API consistency.
403
+ */
404
+ incrementAccess(id: string): void {
405
+ this.recordAccess(id)
406
+ }
407
+
408
+ /**
409
+ * BUG-002: Search within a specific category, optionally scoped to a project.
410
+ * Returns results ordered by creation date (most recent first).
411
+ */
412
+ searchByCategory(category: ObservationCategory, project?: string, limit: number = 10): ObservationResult[] {
413
+ let sql: string
414
+ const params: (string | number)[] = []
415
+
416
+ if (project) {
417
+ sql = `SELECT * FROM observations WHERE category = ? AND project = ? AND archived = 0 ORDER BY created_at DESC LIMIT ?`
418
+ params.push(category, project, limit)
419
+ } else {
420
+ sql = `SELECT * FROM observations WHERE category = ? AND archived = 0 ORDER BY created_at DESC LIMIT ?`
421
+ params.push(category, limit)
422
+ }
423
+
424
+ const rows = this.db.prepare(sql).all(...params) as ObservationRow[]
425
+ return rows.map(row => this.rowToResult(row))
426
+ }
427
+
428
+ // --- Embedding-based semantic search ---
429
+
430
+ /**
431
+ * Store an embedding for an observation.
432
+ * Called after storing an observation to enable semantic search.
433
+ */
434
+ storeEmbedding(observationId: string, embedding: number[]): void {
435
+ const buffer = embeddingToBuffer(embedding)
436
+ const now = new Date().toISOString()
437
+
438
+ try {
439
+ this.db.prepare(`
440
+ INSERT OR REPLACE INTO observation_embeddings (observation_id, embedding, created_at)
441
+ VALUES (?, ?, ?)
442
+ `).run(observationId, buffer, now)
443
+ } catch (error) {
444
+ this.logger.warn({ error, observationId }, 'Failed to store embedding')
445
+ }
446
+ }
447
+
448
+ /**
449
+ * Delete an embedding for an observation.
450
+ */
451
+ deleteEmbedding(observationId: string): void {
452
+ try {
453
+ this.db.prepare('DELETE FROM observation_embeddings WHERE observation_id = ?').run(observationId)
454
+ } catch (error) {
455
+ this.logger.warn({ error, observationId }, 'Failed to delete embedding')
456
+ }
457
+ }
458
+
459
+ /**
460
+ * Semantic search using stored embeddings.
461
+ * Generates a query embedding, then computes cosine similarity against all
462
+ * stored observation embeddings. Returns top matches above threshold.
463
+ */
464
+ async semanticSearch(
465
+ queryEmbedding: number[],
466
+ project?: string,
467
+ limit: number = 10,
468
+ minSimilarity: number = 0.3
469
+ ): Promise<ScoredResult[]> {
470
+ try {
471
+ // Load all embeddings with their observation IDs
472
+ let sql: string
473
+ const params: string[] = []
474
+
475
+ if (project) {
476
+ sql = `
477
+ SELECT oe.observation_id, oe.embedding, o.*
478
+ FROM observation_embeddings oe
479
+ JOIN observations o ON oe.observation_id = o.id
480
+ WHERE o.archived = 0 AND o.project = ?
481
+ `
482
+ params.push(project)
483
+ } else {
484
+ sql = `
485
+ SELECT oe.observation_id, oe.embedding, o.*
486
+ FROM observation_embeddings oe
487
+ JOIN observations o ON oe.observation_id = o.id
488
+ WHERE o.archived = 0
489
+ `
490
+ }
491
+
492
+ const rows = this.db.prepare(sql).all(...params) as ObservationWithEmbeddingRow[]
493
+
494
+ if (rows.length === 0) {
495
+ this.logger.debug('No embeddings found for semantic search')
496
+ return []
497
+ }
498
+
499
+ // Compute cosine similarity for each stored embedding
500
+ const scored: { row: ObservationWithEmbeddingRow; similarity: number }[] = []
501
+ for (const row of rows) {
502
+ try {
503
+ const storedEmbedding = bufferToEmbedding(row.embedding)
504
+ const similarity = cosineSimilarity(queryEmbedding, storedEmbedding)
505
+ if (similarity >= minSimilarity) {
506
+ scored.push({ row, similarity })
507
+ }
508
+ } catch {
509
+ // Skip invalid embeddings
510
+ }
511
+ }
512
+
513
+ // Sort by similarity descending and take top results
514
+ scored.sort((a, b) => b.similarity - a.similarity)
515
+ const topResults = scored.slice(0, limit)
516
+
517
+ return topResults.map(({ row, similarity }) => ({
518
+ ...this.rowToResult(row),
519
+ score: similarity
520
+ }))
521
+ } catch (error) {
522
+ this.logger.warn({ error }, 'Semantic search failed')
523
+ return []
524
+ }
525
+ }
526
+
527
+ /**
528
+ * Check if there are any stored embeddings.
529
+ */
530
+ hasEmbeddings(): boolean {
531
+ try {
532
+ const row = this.db.prepare('SELECT COUNT(*) as count FROM observation_embeddings').get() as CountRow | null
533
+ return (row?.count || 0) > 0
534
+ } catch {
535
+ return false
536
+ }
537
+ }
538
+
539
+ /**
540
+ * Backfill embeddings for observations that don't have them yet.
541
+ * Used for migrating existing data.
542
+ */
543
+ async backfillEmbeddings(embeddingService: EmbeddingService, batchSize: number = 50): Promise<number> {
544
+ try {
545
+ const rows = this.db.prepare(`
546
+ SELECT o.id, o.content, o.reasoning, o.context
547
+ FROM observations o
548
+ LEFT JOIN observation_embeddings oe ON o.id = oe.observation_id
549
+ WHERE oe.observation_id IS NULL AND o.archived = 0
550
+ LIMIT ?
551
+ `).all(batchSize) as BackfillRow[]
552
+
553
+ if (rows.length === 0) return 0
554
+
555
+ let count = 0
556
+ for (const row of rows) {
557
+ try {
558
+ const text = [row.content, row.reasoning, row.context].filter(Boolean).join(' ')
559
+ const embedding = await embeddingService.generateEmbedding(text)
560
+ this.storeEmbedding(row.id, embedding)
561
+ count++
562
+ } catch {
563
+ // Skip failures, continue with next
564
+ }
565
+ }
566
+
567
+ this.logger.info({ backfilled: count, total: rows.length }, 'Embedding backfill complete')
568
+ return count
569
+ } catch (error) {
570
+ this.logger.warn({ error }, 'Embedding backfill failed')
571
+ return 0
572
+ }
573
+ }
574
+
575
+ // --- Private helpers ---
576
+
577
+ /**
578
+ * Build an FTS5 query from a natural language query.
579
+ * Tokenizes words and joins with OR for broad matching.
580
+ */
581
+ private buildFTSQuery(query: string): string {
582
+ // Remove special FTS5 characters that could cause syntax errors
583
+ const cleaned = query.replace(/[":*^~(){}[\]]/g, ' ').trim()
584
+ if (!cleaned) return ''
585
+
586
+ // Expand query with synonyms (e.g., "database" → also search "storage", "persistence")
587
+ const expanded = expandQuery(cleaned, { useSynonyms: true, maxExpansions: 8 })
588
+ const allWords = expanded.combined.split(/\s+/).filter(w => w.length >= 2)
589
+ const unique = [...new Set(allWords)]
590
+
591
+ if (unique.length === 0) return ''
592
+ return unique.map(w => `"${w}"`).join(' OR ')
593
+ }
594
+
595
+ /**
596
+ * Normalize BM25 score to 0-1 range.
597
+ * BM25 returns negative scores in SQLite (lower = better match).
598
+ * Typical range: -20 (excellent) to 0 (poor).
599
+ */
600
+ private normalizeBM25(score: number): number {
601
+ // Map BM25 range to 0.2-0.9 (wider range, allows low scores to be filtered)
602
+ const normalized = Math.min(1, Math.max(0, score / 20))
603
+ return 0.2 + normalized * 0.7
604
+ }
605
+
606
+ /**
607
+ * Compute word overlap between two strings as a similarity proxy.
608
+ * Returns a value between 0 and 1.
609
+ */
610
+ private wordOverlap(a: string, b: string): number {
611
+ const wordsA = new Set(a.toLowerCase().split(/\s+/).filter(w => w.length >= 3))
612
+ const wordsB = new Set(b.toLowerCase().split(/\s+/).filter(w => w.length >= 3))
613
+
614
+ if (wordsA.size === 0 || wordsB.size === 0) return 0
615
+
616
+ let overlap = 0
617
+ for (const word of wordsA) {
618
+ if (wordsB.has(word)) overlap++
619
+ }
620
+
621
+ // Jaccard similarity
622
+ const union = new Set([...wordsA, ...wordsB]).size
623
+ return overlap / union
624
+ }
625
+
626
+ /**
627
+ * Fallback LIKE-based search when FTS5 query syntax fails.
628
+ */
629
+ private fallbackSearch(query: string, project?: string, limit: number = 10): ObservationResult[] {
630
+ const pattern = `%${query}%`
631
+
632
+ let sql: string
633
+ const params: (string | number)[] = []
634
+
635
+ if (project) {
636
+ sql = `
637
+ SELECT * FROM observations
638
+ WHERE (content LIKE ? OR reasoning LIKE ? OR context LIKE ? OR tags LIKE ?)
639
+ AND project = ? AND archived = 0
640
+ ORDER BY created_at DESC
641
+ LIMIT ?
642
+ `
643
+ params.push(pattern, pattern, pattern, pattern, project, limit)
644
+ } else {
645
+ sql = `
646
+ SELECT * FROM observations
647
+ WHERE (content LIKE ? OR reasoning LIKE ? OR context LIKE ? OR tags LIKE ?)
648
+ AND archived = 0
649
+ ORDER BY created_at DESC
650
+ LIMIT ?
651
+ `
652
+ params.push(pattern, pattern, pattern, pattern, limit)
653
+ }
654
+
655
+ const rows = this.db.prepare(sql).all(...params) as ObservationRow[]
656
+ return rows.map(row => this.rowToResult(row))
657
+ }
658
+
659
+ /**
660
+ * Convert a raw database row to an ObservationResult.
661
+ */
662
+ private rowToResult(row: ObservationRow): ObservationResult {
663
+ return {
664
+ id: row.id,
665
+ project: row.project,
666
+ category: row.category,
667
+ content: row.content,
668
+ reasoning: row.reasoning || null,
669
+ context: row.context || null,
670
+ confidence: row.confidence ?? 0.8,
671
+ source: row.source || 'explicit',
672
+ tags: row.tags ? this.parseJsonArray(row.tags) : [],
673
+ file_paths: row.file_paths ? this.parseJsonArray(row.file_paths) : [],
674
+ symbols: row.symbols ? this.parseJsonArray(row.symbols) : [],
675
+ access_count: row.access_count ?? 0,
676
+ last_accessed: row.last_accessed || null,
677
+ created_at: row.created_at,
678
+ updated_at: row.updated_at,
679
+ archived: row.archived === 1
680
+ }
681
+ }
682
+
683
+ private parseJsonArray(value: string): string[] {
684
+ try {
685
+ const parsed = JSON.parse(value)
686
+ return Array.isArray(parsed) ? parsed : []
687
+ } catch {
688
+ // Handle comma-separated strings as fallback
689
+ return value.split(',').map(s => s.trim()).filter(Boolean)
690
+ }
691
+ }
692
+ }