claude-brain 0.30.2 → 0.30.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (236) hide show
  1. package/README.md +241 -191
  2. package/VERSION +1 -1
  3. package/assets/CLAUDE-unified.md +11 -11
  4. package/assets/CLAUDE.md +29 -29
  5. package/package.json +7 -3
  6. package/packs/backend/node.json +173 -173
  7. package/packs/core/javascript.json +176 -176
  8. package/packs/core/typescript.json +222 -222
  9. package/packs/frontend/react.json +254 -254
  10. package/packs/meta/testing.json +172 -172
  11. package/scripts/postinstall.mjs +531 -531
  12. package/src/automation/decision-detector.ts +452 -452
  13. package/src/automation/phase12-manager.ts +456 -456
  14. package/src/automation/proactive-recall.ts +373 -373
  15. package/src/automation/project-detector.ts +310 -310
  16. package/src/automation/repo-scanner.ts +210 -205
  17. package/src/cli/auto-setup.ts +75 -75
  18. package/src/cli/auto-start.ts +266 -266
  19. package/src/cli/bin.ts +264 -264
  20. package/src/cli/commands/autostart.ts +90 -90
  21. package/src/cli/commands/chroma.ts +578 -577
  22. package/src/cli/commands/export-training.ts +70 -70
  23. package/src/cli/commands/export.ts +130 -130
  24. package/src/cli/commands/git-hook.ts +183 -183
  25. package/src/cli/commands/hooks.ts +217 -217
  26. package/src/cli/commands/init.ts +123 -123
  27. package/src/cli/commands/install-mcp.ts +122 -111
  28. package/src/cli/commands/models.ts +979 -979
  29. package/src/cli/commands/pack.ts +200 -200
  30. package/src/cli/commands/refresh.ts +344 -339
  31. package/src/cli/commands/reindex.ts +120 -120
  32. package/src/cli/commands/serve.ts +466 -463
  33. package/src/cli/commands/start.ts +44 -44
  34. package/src/cli/commands/status.ts +220 -203
  35. package/src/cli/commands/uninstall-mcp.ts +45 -41
  36. package/src/cli/commands/update.ts +130 -124
  37. package/src/cli/migrate-chroma.ts +106 -106
  38. package/src/cli/ui/animations.ts +80 -80
  39. package/src/cli/ui/components.ts +82 -82
  40. package/src/cli/ui/index.ts +4 -4
  41. package/src/cli/ui/logo.ts +36 -36
  42. package/src/cli/ui/theme.ts +55 -55
  43. package/src/code-intelligence/indexer.ts +352 -352
  44. package/src/code-intelligence/linker.ts +178 -178
  45. package/src/code-intelligence/parser.ts +484 -484
  46. package/src/code-intelligence/query.ts +291 -291
  47. package/src/code-intelligence/schema.ts +83 -83
  48. package/src/code-intelligence/types.ts +95 -95
  49. package/src/config/defaults.ts +52 -52
  50. package/src/config/home.ts +56 -56
  51. package/src/config/index.ts +5 -5
  52. package/src/config/loader.ts +192 -192
  53. package/src/config/schema.ts +446 -415
  54. package/src/config/validator.ts +182 -182
  55. package/src/context/assembler.ts +407 -400
  56. package/src/context/index.ts +79 -79
  57. package/src/context/progress-tracker.ts +174 -174
  58. package/src/context/standards-manager.ts +287 -287
  59. package/src/context/validator.ts +58 -58
  60. package/src/diagnostics/index.ts +122 -121
  61. package/src/health/index.ts +233 -232
  62. package/src/hooks/brain-hook.ts +134 -131
  63. package/src/hooks/capture.ts +168 -168
  64. package/src/hooks/claude-code-mastery.md +112 -112
  65. package/src/hooks/context-hook.ts +260 -245
  66. package/src/hooks/deduplicator.ts +72 -72
  67. package/src/hooks/git-capture.ts +109 -109
  68. package/src/hooks/git-hook-installer.ts +211 -207
  69. package/src/hooks/index.ts +20 -20
  70. package/src/hooks/installer.ts +306 -288
  71. package/src/hooks/interceptor-hook.ts +204 -201
  72. package/src/hooks/passive-classifier.ts +397 -397
  73. package/src/hooks/queue.ts +160 -129
  74. package/src/hooks/session-tracker.ts +312 -312
  75. package/src/hooks/types.ts +52 -52
  76. package/src/index.ts +7 -7
  77. package/src/intelligence/cross-project/generalizer.ts +283 -283
  78. package/src/intelligence/cross-project/index.ts +7 -7
  79. package/src/intelligence/hf-downloader.ts +222 -222
  80. package/src/intelligence/hf-manifest.json +78 -78
  81. package/src/intelligence/index.ts +24 -24
  82. package/src/intelligence/inference-router.ts +762 -762
  83. package/src/intelligence/model-manager.ts +263 -245
  84. package/src/intelligence/optimization/index.ts +10 -10
  85. package/src/intelligence/optimization/precompute.ts +202 -202
  86. package/src/intelligence/optimization/semantic-cache.ts +213 -207
  87. package/src/intelligence/prediction/index.ts +7 -7
  88. package/src/intelligence/prediction/recommender.ts +276 -268
  89. package/src/intelligence/reasoning/chain-retrieval.ts +243 -247
  90. package/src/intelligence/reasoning/index.ts +7 -7
  91. package/src/intelligence/temporal/evolution.ts +193 -197
  92. package/src/intelligence/temporal/index.ts +16 -16
  93. package/src/intelligence/temporal/query-processor.ts +190 -190
  94. package/src/intelligence/temporal/timeline.ts +272 -259
  95. package/src/intelligence/temporal/trends.ts +263 -263
  96. package/src/intelligence/tokenizer.ts +118 -118
  97. package/src/knowledge/entity-extractor.ts +447 -443
  98. package/src/knowledge/graph/builder.ts +185 -185
  99. package/src/knowledge/graph/linker.ts +201 -201
  100. package/src/knowledge/graph/memory-graph.ts +359 -359
  101. package/src/knowledge/graph/schema.ts +99 -99
  102. package/src/knowledge/graph/search.ts +166 -166
  103. package/src/knowledge/relationship-extractor.ts +108 -108
  104. package/src/memory/chroma/client.ts +211 -192
  105. package/src/memory/chroma/collection-manager.ts +92 -92
  106. package/src/memory/chroma/config.ts +57 -57
  107. package/src/memory/chroma/embeddings.ts +177 -175
  108. package/src/memory/chroma/index.ts +82 -82
  109. package/src/memory/chroma/migration.ts +270 -270
  110. package/src/memory/chroma/schemas.ts +69 -69
  111. package/src/memory/chroma/search.ts +319 -315
  112. package/src/memory/chroma/store.ts +755 -747
  113. package/src/memory/compression.ts +121 -121
  114. package/src/memory/consolidation/archiver.ts +162 -165
  115. package/src/memory/consolidation/merger.ts +182 -186
  116. package/src/memory/consolidation/scorer.ts +136 -136
  117. package/src/memory/database.ts +9 -0
  118. package/src/memory/dual-write.ts +145 -0
  119. package/src/memory/embeddings.ts +226 -226
  120. package/src/memory/episodic/detector.ts +108 -108
  121. package/src/memory/episodic/manager.ts +347 -351
  122. package/src/memory/episodic/summarizer.ts +179 -179
  123. package/src/memory/episodic/types.ts +52 -52
  124. package/src/memory/fts5-search.ts +692 -633
  125. package/src/memory/index.ts +943 -1060
  126. package/src/memory/migrations/add-fts5.ts +118 -108
  127. package/src/memory/patterns.ts +438 -438
  128. package/src/memory/pruning.ts +60 -60
  129. package/src/memory/schema.ts +88 -88
  130. package/src/memory/store.ts +911 -787
  131. package/src/orchestrator/handlers/decision-handler.ts +204 -204
  132. package/src/packs/index.ts +9 -9
  133. package/src/packs/loader.ts +134 -134
  134. package/src/packs/manager.ts +204 -204
  135. package/src/packs/ranker.ts +78 -78
  136. package/src/packs/types.ts +81 -81
  137. package/src/phase12/index.ts +5 -5
  138. package/src/retrieval/bm25/index.ts +300 -297
  139. package/src/retrieval/bm25/tokenizer.ts +184 -184
  140. package/src/retrieval/feedback/adaptive.ts +221 -221
  141. package/src/retrieval/feedback/index.ts +16 -16
  142. package/src/retrieval/feedback/metrics.ts +221 -221
  143. package/src/retrieval/feedback/store.ts +283 -283
  144. package/src/retrieval/fusion/index.ts +194 -194
  145. package/src/retrieval/fusion/rrf.ts +165 -165
  146. package/src/retrieval/index.ts +12 -12
  147. package/src/retrieval/pipeline.ts +375 -375
  148. package/src/retrieval/query/expander.ts +203 -203
  149. package/src/retrieval/query/index.ts +27 -27
  150. package/src/retrieval/query/intent-classifier.ts +252 -252
  151. package/src/retrieval/query/temporal-parser.ts +295 -295
  152. package/src/retrieval/reranker/index.ts +189 -188
  153. package/src/retrieval/reranker/model.ts +99 -95
  154. package/src/retrieval/service.ts +125 -125
  155. package/src/retrieval/types.ts +162 -162
  156. package/src/routing/entity-extractor.ts +454 -454
  157. package/src/routing/handlers/exploration-handler.ts +369 -0
  158. package/src/routing/handlers/index.ts +19 -0
  159. package/src/routing/handlers/memory-handler.ts +273 -0
  160. package/src/routing/handlers/mutation-handler.ts +241 -0
  161. package/src/routing/handlers/recall-handler.ts +642 -0
  162. package/src/routing/handlers/shared.ts +515 -0
  163. package/src/routing/handlers/types.ts +48 -0
  164. package/src/routing/intent-classifier.ts +552 -552
  165. package/src/routing/response-filter.ts +399 -391
  166. package/src/routing/router.ts +245 -2193
  167. package/src/routing/search-engine.ts +521 -514
  168. package/src/routing/types.ts +104 -94
  169. package/src/scripts/health-check.ts +118 -118
  170. package/src/scripts/setup.ts +122 -122
  171. package/src/server/auto-updater.ts +283 -276
  172. package/src/server/handlers/call-tool.ts +159 -159
  173. package/src/server/handlers/list-tools.ts +35 -35
  174. package/src/server/handlers/tools/auto-remember.ts +165 -165
  175. package/src/server/handlers/tools/brain.ts +86 -86
  176. package/src/server/handlers/tools/create-project.ts +135 -135
  177. package/src/server/handlers/tools/get-code-standards.ts +123 -123
  178. package/src/server/handlers/tools/get-corrections.ts +152 -152
  179. package/src/server/handlers/tools/get-patterns.ts +156 -156
  180. package/src/server/handlers/tools/get-project-context.ts +75 -75
  181. package/src/server/handlers/tools/index.ts +30 -30
  182. package/src/server/handlers/tools/init-project.ts +756 -756
  183. package/src/server/handlers/tools/list-projects.ts +126 -126
  184. package/src/server/handlers/tools/recall-similar.ts +87 -87
  185. package/src/server/handlers/tools/recognize-pattern.ts +132 -132
  186. package/src/server/handlers/tools/record-correction.ts +131 -131
  187. package/src/server/handlers/tools/remember-decision.ts +168 -168
  188. package/src/server/handlers/tools/schemas.ts +179 -179
  189. package/src/server/handlers/tools/search-code.ts +122 -122
  190. package/src/server/handlers/tools/smart-context.ts +146 -146
  191. package/src/server/handlers/tools/update-progress.ts +131 -131
  192. package/src/server/http-api.ts +215 -1229
  193. package/src/server/mcp-proxy.ts +85 -84
  194. package/src/server/mcp-server.ts +285 -284
  195. package/src/server/middleware/auth.ts +39 -0
  196. package/src/server/middleware/error-handler.ts +37 -0
  197. package/src/server/middleware/rate-limit.ts +53 -0
  198. package/src/server/middleware/validate.ts +42 -0
  199. package/src/server/pid-manager.ts +137 -136
  200. package/src/server/providers/resources.ts +581 -581
  201. package/src/server/routes/code.ts +228 -0
  202. package/src/server/routes/context.ts +26 -0
  203. package/src/server/routes/health.ts +19 -0
  204. package/src/server/routes/helpers.ts +100 -0
  205. package/src/server/routes/hooks.ts +197 -0
  206. package/src/server/routes/mcp.ts +47 -0
  207. package/src/server/routes/memory.ts +397 -0
  208. package/src/server/routes/models.ts +96 -0
  209. package/src/server/routes/projects.ts +89 -0
  210. package/src/server/routes/types.ts +21 -0
  211. package/src/server/schemas/api-schemas.ts +202 -0
  212. package/src/server/services.ts +720 -720
  213. package/src/server/utils/memory-indicator.ts +84 -84
  214. package/src/server/utils/response-formatter.ts +129 -129
  215. package/src/server/web-viewer.ts +1145 -1115
  216. package/src/setup/index.ts +38 -38
  217. package/src/tools/registry.ts +115 -115
  218. package/src/tools/schemas.ts +666 -666
  219. package/src/tools/types.ts +412 -412
  220. package/src/training/data-store.ts +320 -298
  221. package/src/training/retrain-pipeline.ts +399 -394
  222. package/src/utils/error-handler.ts +136 -136
  223. package/src/utils/index.ts +58 -58
  224. package/src/utils/kill-port.ts +55 -53
  225. package/src/utils/phase12-helper.ts +56 -56
  226. package/src/utils/safe-path.ts +43 -0
  227. package/src/utils/timing.ts +47 -47
  228. package/src/utils/transaction.ts +63 -63
  229. package/src/vault/index.ts +4 -3
  230. package/src/vault/paths.ts +106 -106
  231. package/src/vault/query.ts +4 -1
  232. package/src/vault/reader.ts +44 -1
  233. package/src/vault/watcher.ts +24 -1
  234. package/src/vault/writer.ts +487 -413
  235. package/skills/persistent-memory/SKILL.md +0 -148
  236. package/skills/persistent-memory/references/tool-reference.md +0 -90
@@ -1,298 +1,320 @@
1
- /**
2
- * Training Data Store — Phase 1A (SLM Upgrade)
3
- * Logs classification decisions to SQLite for model training.
4
- * Async, non-blocking — never impacts main request path.
5
- *
6
- * Table: training_data in ~/.claude-brain/data/memory.db
7
- */
8
-
9
- import { Database } from 'bun:sqlite'
10
- import { join } from 'node:path'
11
- import { existsSync, mkdirSync } from 'node:fs'
12
- import { getClaudeBrainHome } from '@/config/home'
13
-
14
- export type TrainingTask = 'intent' | 'entity' | 'query' | 'knowledge' | 'compress' | 'pattern'
15
-
16
- export interface TrainingEntry {
17
- task: TrainingTask
18
- input: string
19
- output: string // JSON-encoded: label, entities array, summary, etc.
20
- metadata?: string // JSON-encoded: confidence, scores, timing
21
- }
22
-
23
- export interface ModelFeedbackEntry {
24
- task: string
25
- input: string
26
- modelPrediction: string
27
- modelConfidence: number
28
- regexPrediction: string
29
- actualLabel?: string
30
- }
31
-
32
- let db: Database | null = null
33
- let insertStmt: ReturnType<Database['prepare']> | null = null
34
- let feedbackInsertStmt: ReturnType<Database['prepare']> | null = null
35
-
36
- function getDb(): Database | null {
37
- if (db) return db
38
- try {
39
- const dataDir = join(getClaudeBrainHome(), 'data')
40
- if (!existsSync(dataDir)) {
41
- mkdirSync(dataDir, { recursive: true })
42
- }
43
- const dbPath = join(dataDir, 'memory.db')
44
- db = new Database(dbPath)
45
- db.run('PRAGMA journal_mode = WAL')
46
- ensureTable(db)
47
- insertStmt = db.prepare(
48
- 'INSERT INTO training_data (task, input, output, metadata) VALUES (?, ?, ?, ?)'
49
- )
50
- feedbackInsertStmt = db.prepare(
51
- 'INSERT INTO model_feedback (task, input, model_prediction, model_confidence, regex_prediction, actual_label) VALUES (?, ?, ?, ?, ?, ?)'
52
- )
53
- return db
54
- } catch {
55
- return null
56
- }
57
- }
58
-
59
- function ensureTable(database: Database): void {
60
- database.run(`
61
- CREATE TABLE IF NOT EXISTS training_data (
62
- id INTEGER PRIMARY KEY AUTOINCREMENT,
63
- task TEXT NOT NULL,
64
- input TEXT NOT NULL,
65
- output TEXT NOT NULL,
66
- metadata TEXT,
67
- verified INTEGER DEFAULT 0,
68
- created_at TEXT DEFAULT (datetime('now'))
69
- )
70
- `)
71
- // Indexes for efficient querying
72
- database.run('CREATE INDEX IF NOT EXISTS idx_training_task ON training_data(task)')
73
- database.run('CREATE INDEX IF NOT EXISTS idx_training_verified ON training_data(verified)')
74
-
75
- // Phase 6A: Model feedback table for continuous learning loop
76
- database.run(`
77
- CREATE TABLE IF NOT EXISTS model_feedback (
78
- id INTEGER PRIMARY KEY AUTOINCREMENT,
79
- task TEXT NOT NULL,
80
- input TEXT NOT NULL,
81
- model_prediction TEXT NOT NULL,
82
- model_confidence REAL NOT NULL,
83
- regex_prediction TEXT NOT NULL,
84
- actual_label TEXT,
85
- created_at TEXT DEFAULT (datetime('now'))
86
- )
87
- `)
88
- database.run('CREATE INDEX IF NOT EXISTS idx_feedback_task ON model_feedback(task)')
89
- }
90
-
91
- /**
92
- * Log a training example. Fire-and-forget — errors are silently swallowed.
93
- */
94
- export function logTrainingData(entry: TrainingEntry): void {
95
- setImmediate(() => {
96
- try {
97
- const database = getDb()
98
- if (!database || !insertStmt) return
99
- insertStmt.run(entry.task, entry.input, entry.output, entry.metadata || null)
100
- } catch {
101
- // Never block or crash the main path
102
- }
103
- })
104
- }
105
-
106
- /**
107
- * Export training data as JSONL lines for a specific task.
108
- */
109
- export function exportTrainingData(
110
- task: TrainingTask,
111
- options?: { verifiedOnly?: boolean; limit?: number }
112
- ): string[] {
113
- const database = getDb()
114
- if (!database) return []
115
-
116
- let sql = 'SELECT input, output, metadata, verified, created_at FROM training_data WHERE task = ?'
117
- const params: any[] = [task]
118
-
119
- if (options?.verifiedOnly) {
120
- sql += ' AND verified = 1'
121
- }
122
-
123
- sql += ' ORDER BY created_at DESC'
124
-
125
- if (options?.limit) {
126
- sql += ' LIMIT ?'
127
- params.push(options.limit)
128
- }
129
-
130
- const rows = database.prepare(sql).all(...params) as any[]
131
- return rows.map(row => JSON.stringify({
132
- input: row.input,
133
- output: JSON.parse(row.output),
134
- metadata: row.metadata ? JSON.parse(row.metadata) : null,
135
- verified: row.verified === 1,
136
- created_at: row.created_at,
137
- }))
138
- }
139
-
140
- /**
141
- * Get count of training examples per task.
142
- */
143
- export function getTrainingStats(): Record<TrainingTask, { total: number; verified: number }> {
144
- const database = getDb()
145
- const tasks: TrainingTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
146
- const stats = {} as Record<TrainingTask, { total: number; verified: number }>
147
-
148
- for (const task of tasks) {
149
- if (!database) {
150
- stats[task] = { total: 0, verified: 0 }
151
- continue
152
- }
153
- const total = (database.prepare('SELECT COUNT(*) as c FROM training_data WHERE task = ?').get(task) as any)?.c || 0
154
- const verified = (database.prepare('SELECT COUNT(*) as c FROM training_data WHERE task = ? AND verified = 1').get(task) as any)?.c || 0
155
- stats[task] = { total, verified }
156
- }
157
-
158
- return stats
159
- }
160
-
161
- // ── Phase 6A: Model Feedback Functions ──────────────────────────────
162
-
163
- /**
164
- * Log a model vs regex comparison. Fire-and-forget — errors are silently swallowed.
165
- */
166
- export function logModelFeedback(entry: ModelFeedbackEntry): void {
167
- setImmediate(() => {
168
- try {
169
- const database = getDb()
170
- if (!database || !feedbackInsertStmt) return
171
- feedbackInsertStmt.run(
172
- entry.task,
173
- entry.input,
174
- entry.modelPrediction,
175
- entry.modelConfidence,
176
- entry.regexPrediction,
177
- entry.actualLabel || null
178
- )
179
- } catch {
180
- // Never block or crash the main path
181
- }
182
- })
183
- }
184
-
185
- /**
186
- * Get per-task feedback stats: total, agreements, disagreements, disagreement rate, reviewed count.
187
- */
188
- export function getModelFeedbackStats(): Record<string, {
189
- total: number
190
- agreements: number
191
- disagreements: number
192
- disagreementRate: number
193
- reviewed: number
194
- }> {
195
- const database = getDb()
196
- const tasks: TrainingTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
197
- const stats = {} as Record<string, {
198
- total: number
199
- agreements: number
200
- disagreements: number
201
- disagreementRate: number
202
- reviewed: number
203
- }>
204
-
205
- for (const task of tasks) {
206
- if (!database) {
207
- stats[task] = { total: 0, agreements: 0, disagreements: 0, disagreementRate: 0, reviewed: 0 }
208
- continue
209
- }
210
- const total = (database.prepare(
211
- 'SELECT COUNT(*) as c FROM model_feedback WHERE task = ?'
212
- ).get(task) as any)?.c || 0
213
-
214
- const agreements = (database.prepare(
215
- 'SELECT COUNT(*) as c FROM model_feedback WHERE task = ? AND model_prediction = regex_prediction'
216
- ).get(task) as any)?.c || 0
217
-
218
- const disagreements = total - agreements
219
-
220
- const reviewed = (database.prepare(
221
- 'SELECT COUNT(*) as c FROM model_feedback WHERE task = ? AND actual_label IS NOT NULL'
222
- ).get(task) as any)?.c || 0
223
-
224
- stats[task] = {
225
- total,
226
- agreements,
227
- disagreements,
228
- disagreementRate: total > 0 ? disagreements / total : 0,
229
- reviewed,
230
- }
231
- }
232
-
233
- return stats
234
- }
235
-
236
- /**
237
- * Export feedback as JSONL lines for a specific task.
238
- */
239
- export function exportModelFeedback(
240
- task: string,
241
- options?: { limit?: number }
242
- ): string[] {
243
- const database = getDb()
244
- if (!database) return []
245
-
246
- let sql = 'SELECT input, model_prediction, model_confidence, regex_prediction, actual_label, created_at FROM model_feedback WHERE task = ?'
247
- const params: any[] = [task]
248
-
249
- sql += ' ORDER BY created_at DESC'
250
-
251
- if (options?.limit) {
252
- sql += ' LIMIT ?'
253
- params.push(options.limit)
254
- }
255
-
256
- const rows = database.prepare(sql).all(...params) as any[]
257
- return rows.map(row => JSON.stringify({
258
- input: row.input,
259
- modelPrediction: row.model_prediction,
260
- modelConfidence: row.model_confidence,
261
- regexPrediction: row.regex_prediction,
262
- actualLabel: row.actual_label,
263
- createdAt: row.created_at,
264
- }))
265
- }
266
-
267
- /**
268
- * Get the most recent disagreements for human review.
269
- */
270
- export function getDisagreements(
271
- task: string,
272
- limit: number = 50
273
- ): Array<{
274
- input: string
275
- modelPrediction: string
276
- modelConfidence: number
277
- regexPrediction: string
278
- createdAt: string
279
- }> {
280
- const database = getDb()
281
- if (!database) return []
282
-
283
- const rows = database.prepare(`
284
- SELECT input, model_prediction, model_confidence, regex_prediction, created_at
285
- FROM model_feedback
286
- WHERE task = ? AND model_prediction != regex_prediction
287
- ORDER BY created_at DESC
288
- LIMIT ?
289
- `).all(task, limit) as any[]
290
-
291
- return rows.map(row => ({
292
- input: row.input,
293
- modelPrediction: row.model_prediction,
294
- modelConfidence: row.model_confidence,
295
- regexPrediction: row.regex_prediction,
296
- createdAt: row.created_at,
297
- }))
298
- }
1
+ /**
2
+ * Training Data Store — Phase 1A (SLM Upgrade)
3
+ * Logs classification decisions to SQLite for model training.
4
+ * Async, non-blocking — never impacts main request path.
5
+ *
6
+ * Table: training_data in ~/.claude-brain/data/memory.db
7
+ */
8
+
9
+ import { Database } from 'bun:sqlite'
10
+ import { join } from 'node:path'
11
+ import { existsSync, mkdirSync } from 'node:fs'
12
+ import { getClaudeBrainHome } from '@/config/home'
13
+
14
+ /** Row from training_data table */
15
+ interface TrainingDataRow {
16
+ input: string
17
+ output: string
18
+ metadata: string | null
19
+ verified: number
20
+ created_at: string
21
+ }
22
+
23
+ /** Count query result */
24
+ interface CountResult { c: number }
25
+
26
+ /** Row from model_feedback table */
27
+ interface FeedbackRow {
28
+ input: string
29
+ model_prediction: string
30
+ model_confidence: number
31
+ regex_prediction: string
32
+ actual_label: string | null
33
+ created_at: string
34
+ }
35
+
36
+ export type TrainingTask = 'intent' | 'entity' | 'query' | 'knowledge' | 'compress' | 'pattern'
37
+
38
+ export interface TrainingEntry {
39
+ task: TrainingTask
40
+ input: string
41
+ output: string // JSON-encoded: label, entities array, summary, etc.
42
+ metadata?: string // JSON-encoded: confidence, scores, timing
43
+ }
44
+
45
+ export interface ModelFeedbackEntry {
46
+ task: string
47
+ input: string
48
+ modelPrediction: string
49
+ modelConfidence: number
50
+ regexPrediction: string
51
+ actualLabel?: string
52
+ }
53
+
54
+ let db: Database | null = null
55
+ let insertStmt: ReturnType<Database['prepare']> | null = null
56
+ let feedbackInsertStmt: ReturnType<Database['prepare']> | null = null
57
+
58
+ function getDb(): Database | null {
59
+ if (db) return db
60
+ try {
61
+ const dataDir = join(getClaudeBrainHome(), 'data')
62
+ if (!existsSync(dataDir)) {
63
+ mkdirSync(dataDir, { recursive: true })
64
+ }
65
+ const dbPath = join(dataDir, 'memory.db')
66
+ db = new Database(dbPath)
67
+ db.run('PRAGMA journal_mode = WAL')
68
+ ensureTable(db)
69
+ insertStmt = db.prepare(
70
+ 'INSERT INTO training_data (task, input, output, metadata) VALUES (?, ?, ?, ?)'
71
+ )
72
+ feedbackInsertStmt = db.prepare(
73
+ 'INSERT INTO model_feedback (task, input, model_prediction, model_confidence, regex_prediction, actual_label) VALUES (?, ?, ?, ?, ?, ?)'
74
+ )
75
+ return db
76
+ } catch {
77
+ return null
78
+ }
79
+ }
80
+
81
+ function ensureTable(database: Database): void {
82
+ database.run(`
83
+ CREATE TABLE IF NOT EXISTS training_data (
84
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
85
+ task TEXT NOT NULL,
86
+ input TEXT NOT NULL,
87
+ output TEXT NOT NULL,
88
+ metadata TEXT,
89
+ verified INTEGER DEFAULT 0,
90
+ created_at TEXT DEFAULT (datetime('now'))
91
+ )
92
+ `)
93
+ // Indexes for efficient querying
94
+ database.run('CREATE INDEX IF NOT EXISTS idx_training_task ON training_data(task)')
95
+ database.run('CREATE INDEX IF NOT EXISTS idx_training_verified ON training_data(verified)')
96
+
97
+ // Phase 6A: Model feedback table for continuous learning loop
98
+ database.run(`
99
+ CREATE TABLE IF NOT EXISTS model_feedback (
100
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
101
+ task TEXT NOT NULL,
102
+ input TEXT NOT NULL,
103
+ model_prediction TEXT NOT NULL,
104
+ model_confidence REAL NOT NULL,
105
+ regex_prediction TEXT NOT NULL,
106
+ actual_label TEXT,
107
+ created_at TEXT DEFAULT (datetime('now'))
108
+ )
109
+ `)
110
+ database.run('CREATE INDEX IF NOT EXISTS idx_feedback_task ON model_feedback(task)')
111
+ }
112
+
113
+ /**
114
+ * Log a training example. Fire-and-forget — errors are silently swallowed.
115
+ */
116
+ export function logTrainingData(entry: TrainingEntry): void {
117
+ setImmediate(() => {
118
+ try {
119
+ const database = getDb()
120
+ if (!database || !insertStmt) return
121
+ insertStmt.run(entry.task, entry.input, entry.output, entry.metadata || null)
122
+ } catch {
123
+ // Never block or crash the main path
124
+ }
125
+ })
126
+ }
127
+
128
+ /**
129
+ * Export training data as JSONL lines for a specific task.
130
+ */
131
+ export function exportTrainingData(
132
+ task: TrainingTask,
133
+ options?: { verifiedOnly?: boolean; limit?: number }
134
+ ): string[] {
135
+ const database = getDb()
136
+ if (!database) return []
137
+
138
+ let sql = 'SELECT input, output, metadata, verified, created_at FROM training_data WHERE task = ?'
139
+ const params: (string | number)[] = [task]
140
+
141
+ if (options?.verifiedOnly) {
142
+ sql += ' AND verified = 1'
143
+ }
144
+
145
+ sql += ' ORDER BY created_at DESC'
146
+
147
+ if (options?.limit) {
148
+ sql += ' LIMIT ?'
149
+ params.push(options.limit)
150
+ }
151
+
152
+ const rows = database.prepare(sql).all(...params) as TrainingDataRow[]
153
+ return rows.map(row => JSON.stringify({
154
+ input: row.input,
155
+ output: JSON.parse(row.output),
156
+ metadata: row.metadata ? JSON.parse(row.metadata) : null,
157
+ verified: row.verified === 1,
158
+ created_at: row.created_at,
159
+ }))
160
+ }
161
+
162
+ /**
163
+ * Get count of training examples per task.
164
+ */
165
+ export function getTrainingStats(): Record<TrainingTask, { total: number; verified: number }> {
166
+ const database = getDb()
167
+ const tasks: TrainingTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
168
+ const stats = {} as Record<TrainingTask, { total: number; verified: number }>
169
+
170
+ for (const task of tasks) {
171
+ if (!database) {
172
+ stats[task] = { total: 0, verified: 0 }
173
+ continue
174
+ }
175
+ const total = (database.prepare('SELECT COUNT(*) as c FROM training_data WHERE task = ?').get(task) as CountResult | null)?.c || 0
176
+ const verified = (database.prepare('SELECT COUNT(*) as c FROM training_data WHERE task = ? AND verified = 1').get(task) as CountResult | null)?.c || 0
177
+ stats[task] = { total, verified }
178
+ }
179
+
180
+ return stats
181
+ }
182
+
183
+ // ── Phase 6A: Model Feedback Functions ──────────────────────────────
184
+
185
+ /**
186
+ * Log a model vs regex comparison. Fire-and-forget errors are silently swallowed.
187
+ */
188
+ export function logModelFeedback(entry: ModelFeedbackEntry): void {
189
+ setImmediate(() => {
190
+ try {
191
+ const database = getDb()
192
+ if (!database || !feedbackInsertStmt) return
193
+ feedbackInsertStmt.run(
194
+ entry.task,
195
+ entry.input,
196
+ entry.modelPrediction,
197
+ entry.modelConfidence,
198
+ entry.regexPrediction,
199
+ entry.actualLabel || null
200
+ )
201
+ } catch {
202
+ // Never block or crash the main path
203
+ }
204
+ })
205
+ }
206
+
207
+ /**
208
+ * Get per-task feedback stats: total, agreements, disagreements, disagreement rate, reviewed count.
209
+ */
210
+ export function getModelFeedbackStats(): Record<string, {
211
+ total: number
212
+ agreements: number
213
+ disagreements: number
214
+ disagreementRate: number
215
+ reviewed: number
216
+ }> {
217
+ const database = getDb()
218
+ const tasks: TrainingTask[] = ['intent', 'entity', 'query', 'knowledge', 'compress', 'pattern']
219
+ const stats = {} as Record<string, {
220
+ total: number
221
+ agreements: number
222
+ disagreements: number
223
+ disagreementRate: number
224
+ reviewed: number
225
+ }>
226
+
227
+ for (const task of tasks) {
228
+ if (!database) {
229
+ stats[task] = { total: 0, agreements: 0, disagreements: 0, disagreementRate: 0, reviewed: 0 }
230
+ continue
231
+ }
232
+ const total = (database.prepare(
233
+ 'SELECT COUNT(*) as c FROM model_feedback WHERE task = ?'
234
+ ).get(task) as CountResult | null)?.c || 0
235
+
236
+ const agreements = (database.prepare(
237
+ 'SELECT COUNT(*) as c FROM model_feedback WHERE task = ? AND model_prediction = regex_prediction'
238
+ ).get(task) as CountResult | null)?.c || 0
239
+
240
+ const disagreements = total - agreements
241
+
242
+ const reviewed = (database.prepare(
243
+ 'SELECT COUNT(*) as c FROM model_feedback WHERE task = ? AND actual_label IS NOT NULL'
244
+ ).get(task) as CountResult | null)?.c || 0
245
+
246
+ stats[task] = {
247
+ total,
248
+ agreements,
249
+ disagreements,
250
+ disagreementRate: total > 0 ? disagreements / total : 0,
251
+ reviewed,
252
+ }
253
+ }
254
+
255
+ return stats
256
+ }
257
+
258
+ /**
259
+ * Export feedback as JSONL lines for a specific task.
260
+ */
261
+ export function exportModelFeedback(
262
+ task: string,
263
+ options?: { limit?: number }
264
+ ): string[] {
265
+ const database = getDb()
266
+ if (!database) return []
267
+
268
+ let sql = 'SELECT input, model_prediction, model_confidence, regex_prediction, actual_label, created_at FROM model_feedback WHERE task = ?'
269
+ const params: (string | number)[] = [task]
270
+
271
+ sql += ' ORDER BY created_at DESC'
272
+
273
+ if (options?.limit) {
274
+ sql += ' LIMIT ?'
275
+ params.push(options.limit)
276
+ }
277
+
278
+ const rows = database.prepare(sql).all(...params) as FeedbackRow[]
279
+ return rows.map(row => JSON.stringify({
280
+ input: row.input,
281
+ modelPrediction: row.model_prediction,
282
+ modelConfidence: row.model_confidence,
283
+ regexPrediction: row.regex_prediction,
284
+ actualLabel: row.actual_label,
285
+ createdAt: row.created_at,
286
+ }))
287
+ }
288
+
289
+ /**
290
+ * Get the most recent disagreements for human review.
291
+ */
292
+ export function getDisagreements(
293
+ task: string,
294
+ limit: number = 50
295
+ ): Array<{
296
+ input: string
297
+ modelPrediction: string
298
+ modelConfidence: number
299
+ regexPrediction: string
300
+ createdAt: string
301
+ }> {
302
+ const database = getDb()
303
+ if (!database) return []
304
+
305
+ const rows = database.prepare(`
306
+ SELECT input, model_prediction, model_confidence, regex_prediction, created_at
307
+ FROM model_feedback
308
+ WHERE task = ? AND model_prediction != regex_prediction
309
+ ORDER BY created_at DESC
310
+ LIMIT ?
311
+ `).all(task, limit) as FeedbackRow[]
312
+
313
+ return rows.map(row => ({
314
+ input: row.input,
315
+ modelPrediction: row.model_prediction,
316
+ modelConfidence: row.model_confidence,
317
+ regexPrediction: row.regex_prediction,
318
+ createdAt: row.created_at,
319
+ }))
320
+ }