claude-brain 0.5.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/VERSION +1 -1
  2. package/assets/CLAUDE-unified.md +11 -0
  3. package/package.json +5 -1
  4. package/packs/backend/node.json +173 -0
  5. package/packs/core/javascript.json +176 -0
  6. package/packs/core/typescript.json +222 -0
  7. package/packs/frontend/react.json +254 -0
  8. package/packs/meta/testing.json +172 -0
  9. package/src/cli/bin.ts +14 -0
  10. package/src/cli/commands/hooks.ts +214 -0
  11. package/src/cli/commands/pack.ts +197 -0
  12. package/src/cli/commands/serve.ts +34 -0
  13. package/src/config/defaults.ts +1 -1
  14. package/src/config/schema.ts +85 -2
  15. package/src/hooks/brain-hook.ts +110 -0
  16. package/src/hooks/capture.ts +161 -0
  17. package/src/hooks/deduplicator.ts +72 -0
  18. package/src/hooks/index.ts +19 -0
  19. package/src/hooks/installer.ts +181 -0
  20. package/src/hooks/passive-classifier.ts +366 -0
  21. package/src/hooks/queue.ts +122 -0
  22. package/src/hooks/session-tracker.ts +199 -0
  23. package/src/hooks/types.ts +47 -0
  24. package/src/memory/chroma/store.ts +2 -1
  25. package/src/memory/index.ts +1 -0
  26. package/src/memory/store.ts +1 -0
  27. package/src/packs/index.ts +9 -0
  28. package/src/packs/loader.ts +134 -0
  29. package/src/packs/manager.ts +204 -0
  30. package/src/packs/ranker.ts +78 -0
  31. package/src/packs/types.ts +81 -0
  32. package/src/routing/entity-extractor.ts +410 -0
  33. package/src/routing/intent-classifier.ts +229 -0
  34. package/src/routing/response-filter.ts +221 -0
  35. package/src/routing/router.ts +671 -0
  36. package/src/server/handlers/call-tool.ts +7 -0
  37. package/src/server/handlers/list-tools.ts +22 -5
  38. package/src/server/handlers/tools/brain.ts +85 -0
  39. package/src/server/handlers/tools/init-project.ts +47 -0
  40. package/src/server/handlers/tools/schemas.ts +12 -0
  41. package/src/server/http-api.ts +188 -0
  42. package/src/tools/registry.ts +9 -0
  43. package/src/tools/schemas.ts +33 -1
@@ -0,0 +1,197 @@
1
+ /**
2
+ * Phase 18: CLI Pack Command
3
+ * Manages knowledge packs (list/status/reload)
4
+ */
5
+
6
+ import {
7
+ renderLogo, theme, heading, successText, warningText, errorText, dimText,
8
+ box, summaryPanel, withSpinner,
9
+ } from '@/cli/ui/index.js'
10
+ import { readFileSync } from 'node:fs'
11
+ import { resolve, dirname, join } from 'node:path'
12
+ import { fileURLToPath } from 'node:url'
13
+ import { PackManager } from '@/packs/manager'
14
+
15
+ const __filename = fileURLToPath(import.meta.url)
16
+ const __dirname = dirname(__filename)
17
+ const PACKAGE_ROOT = resolve(__dirname, '..', '..', '..')
18
+
19
+ function getDataDir(): string {
20
+ return join(
21
+ process.env.CLAUDE_BRAIN_HOME || join(process.env.HOME || '~', '.claude-brain'),
22
+ 'data'
23
+ )
24
+ }
25
+
26
+ function getDefaultConfig() {
27
+ return {
28
+ enabled: true,
29
+ packsDir: 'packs',
30
+ alwaysLoadCore: true,
31
+ alwaysLoadMeta: true,
32
+ communityConfidenceMultiplier: 0.8,
33
+ personalBoost: 1.2,
34
+ projectBoost: 1.15
35
+ }
36
+ }
37
+
38
+ function createPackManager(): PackManager {
39
+ // Lightweight logger for CLI
40
+ const logger = {
41
+ child: () => logger,
42
+ debug: () => {},
43
+ info: () => {},
44
+ warn: () => {},
45
+ error: () => {},
46
+ } as any
47
+
48
+ return new PackManager(logger, getDefaultConfig(), PACKAGE_ROOT, getDataDir())
49
+ }
50
+
51
+ export async function runPack() {
52
+ const subcommand = process.argv[3] || 'list'
53
+
54
+ switch (subcommand) {
55
+ case 'list':
56
+ await handleList()
57
+ break
58
+ case 'status':
59
+ await handleStatus()
60
+ break
61
+ case 'reload':
62
+ await handleReload()
63
+ break
64
+ default:
65
+ console.log()
66
+ console.log(errorText(`Unknown pack subcommand: ${subcommand}`))
67
+ printPackHelp()
68
+ process.exit(1)
69
+ }
70
+ }
71
+
72
+ async function handleList() {
73
+ console.log()
74
+ console.log(renderLogo())
75
+ console.log()
76
+ console.log(heading('Available Knowledge Packs'))
77
+ console.log()
78
+
79
+ const manager = createPackManager()
80
+
81
+ let packs: Array<{ id: string; name: string; description: string; entries: number; version: string }> = []
82
+ await withSpinner('Scanning packs directory', async () => {
83
+ packs = await manager.listAvailablePacks()
84
+ })
85
+
86
+ if (packs.length === 0) {
87
+ console.log(warningText(' No packs found in packs/ directory'))
88
+ console.log()
89
+ return
90
+ }
91
+
92
+ console.log()
93
+ for (const pack of packs) {
94
+ console.log(` ${theme.primary(pack.id.padEnd(24))} ${theme.bold(pack.name)}`)
95
+ console.log(` ${''.padEnd(24)} ${dimText(pack.description)}`)
96
+ console.log(` ${''.padEnd(24)} ${dimText(`v${pack.version} - ${pack.entries} entries`)}`)
97
+ console.log()
98
+ }
99
+
100
+ const totalEntries = packs.reduce((sum, p) => sum + p.entries, 0)
101
+ console.log(dimText(` Total: ${packs.length} packs, ${totalEntries} entries`))
102
+ console.log()
103
+ }
104
+
105
+ async function handleStatus() {
106
+ console.log()
107
+ console.log(renderLogo())
108
+ console.log()
109
+ console.log(heading('Pack Status'))
110
+ console.log()
111
+
112
+ const project = process.argv[4]
113
+ if (!project) {
114
+ console.log(warningText(' Usage: claude-brain pack status <project-name>'))
115
+ console.log(dimText(' Example: claude-brain pack status my-app'))
116
+ console.log()
117
+ return
118
+ }
119
+
120
+ const manager = createPackManager()
121
+ const manifest = await manager.getManifest(project)
122
+
123
+ if (manifest.packs.length === 0) {
124
+ console.log(dimText(` No packs loaded for project "${project}"`))
125
+ console.log(dimText(' Packs are loaded automatically when you run init_project'))
126
+ console.log()
127
+ return
128
+ }
129
+
130
+ const items = manifest.packs.map(p => ({
131
+ label: p.packId,
132
+ value: `v${p.version} (${p.entriesLoaded} entries, ${new Date(p.loadedAt).toLocaleDateString()})`,
133
+ status: 'success' as const
134
+ }))
135
+
136
+ console.log(summaryPanel(`Packs for ${project}`, items))
137
+ console.log()
138
+ console.log(dimText(` Last updated: ${manifest.lastUpdated}`))
139
+ console.log()
140
+ }
141
+
142
+ async function handleReload() {
143
+ console.log()
144
+ console.log(renderLogo())
145
+ console.log()
146
+ console.log(heading('Reload Knowledge Packs'))
147
+ console.log()
148
+
149
+ const project = process.argv[4]
150
+ if (!project) {
151
+ console.log(warningText(' Usage: claude-brain pack reload <project-name>'))
152
+ console.log(dimText(' This clears the manifest so packs are re-loaded on next init_project'))
153
+ console.log()
154
+ return
155
+ }
156
+
157
+ const manager = createPackManager()
158
+
159
+ await withSpinner(`Clearing manifest for "${project}"`, async () => {
160
+ await manager.deleteManifest(project)
161
+ })
162
+
163
+ console.log()
164
+ console.log(box([
165
+ successText(`Manifest cleared for "${project}"`),
166
+ '',
167
+ dimText('Packs will be re-loaded on the next init_project call.'),
168
+ dimText('Or use the init_project MCP tool to reload now.'),
169
+ ].join('\n'), 'Reload'))
170
+ console.log()
171
+ }
172
+
173
+ function printPackHelp() {
174
+ console.log()
175
+ const commands = [
176
+ ['list', 'Show all available knowledge packs'],
177
+ ['status', 'Show loaded packs for a project'],
178
+ ['reload', 'Clear manifest to force pack reload'],
179
+ ]
180
+
181
+ const cmdLines = commands
182
+ .map(([cmd, desc]) => ` ${theme.primary(cmd!.padEnd(14))} ${dimText(desc!)}`)
183
+ .join('\n')
184
+
185
+ console.log(box([
186
+ theme.bold('Usage:') + ' ' + dimText('claude-brain pack [subcommand]'),
187
+ '',
188
+ theme.bold('Subcommands:'),
189
+ cmdLines,
190
+ '',
191
+ theme.bold('Examples:'),
192
+ ` ${dimText('claude-brain pack list')}`,
193
+ ` ${dimText('claude-brain pack status my-project')}`,
194
+ ` ${dimText('claude-brain pack reload my-project')}`,
195
+ ].join('\n'), 'Pack Help'))
196
+ console.log()
197
+ }
@@ -110,11 +110,45 @@ export async function runServe() {
110
110
  await httpServer.stop()
111
111
  })
112
112
 
113
+ // Phase 17: Initialize hook session tracker + queue drain
114
+ let hookSessionTracker: any = null
115
+ if (config.hooks?.enabled !== false) {
116
+ try {
117
+ const { HookSessionTracker } = await import('@/hooks/session-tracker')
118
+ const { getEpisodeService } = await import('@/server/services')
119
+ const episodeManager = getEpisodeService()
120
+ hookSessionTracker = new HookSessionTracker(logger, episodeManager, config.hooks?.sessions)
121
+ httpServer.setSessionTracker(hookSessionTracker)
122
+
123
+ cleanup.register(async () => {
124
+ if (hookSessionTracker) {
125
+ await hookSessionTracker.endAllSessions()
126
+ mainLogger.info('Hook session tracker shut down')
127
+ }
128
+ })
129
+
130
+ mainLogger.info('Hook session tracker initialized')
131
+ } catch (error) {
132
+ mainLogger.warn({ error }, 'Failed to initialize hook session tracker, continuing without passive learning')
133
+ }
134
+ }
135
+
113
136
  // Start HTTP server after MCP server is ready
114
137
  setTimeout(async () => {
115
138
  try {
116
139
  await httpServer.start()
117
140
  mainLogger.info({ port: config.port }, 'HTTP API server started')
141
+
142
+ // Drain hook queue after HTTP server is ready
143
+ try {
144
+ const { drainQueue } = await import('@/hooks/queue')
145
+ const drained = await drainQueue(config.port)
146
+ if (drained > 0) {
147
+ mainLogger.info({ drained }, 'Drained hook queue')
148
+ }
149
+ } catch (error) {
150
+ mainLogger.debug({ error }, 'No hook queue to drain')
151
+ }
118
152
  } catch (error) {
119
153
  mainLogger.error({ error }, 'Failed to start HTTP API server')
120
154
  }
@@ -3,7 +3,7 @@ import type { PartialConfig } from './schema'
3
3
  /** Default configuration values for Claude Brain */
4
4
  export const defaultConfig: PartialConfig = {
5
5
  serverName: 'claude-brain',
6
- serverVersion: '0.5.1',
6
+ serverVersion: '0.9.0',
7
7
  logLevel: 'info',
8
8
  logFilePath: './logs/claude-brain.log',
9
9
  dbPath: './data/memory.db',
@@ -131,6 +131,80 @@ export const KnowledgeConfigSchema = z.object({
131
131
 
132
132
  export type KnowledgeConfig = z.infer<typeof KnowledgeConfigSchema>
133
133
 
134
+ /** Phase 17: Passive Learning via Hooks configuration */
135
+ export const HooksConfigSchema = z.object({
136
+ /** Master switch for hooks passive learning */
137
+ enabled: z.boolean().default(false),
138
+
139
+ /** What to capture from tool calls */
140
+ capture: z.object({
141
+ /** Capture knowledge from tool use (Edit/Write) */
142
+ toolUse: z.boolean().default(true),
143
+ /** Capture knowledge from file edits */
144
+ fileEdits: z.boolean().default(true),
145
+ /** Capture knowledge from bash commands */
146
+ bashCommands: z.boolean().default(true),
147
+ /** Capture knowledge from user messages */
148
+ userMessages: z.boolean().default(true)
149
+ }).default({}),
150
+
151
+ /** Privacy filters */
152
+ privacy: z.object({
153
+ /** File paths to ignore (glob patterns) */
154
+ ignorePaths: z.array(z.string()).default([]),
155
+ /** Projects to ignore */
156
+ ignoreProjects: z.array(z.string()).default([]),
157
+ /** Minimum confidence to store captured knowledge */
158
+ minConfidence: z.number().min(0).max(1).default(0.7)
159
+ }).default({}),
160
+
161
+ /** Session tracking settings */
162
+ sessions: z.object({
163
+ /** Enable session tracking */
164
+ enabled: z.boolean().default(true),
165
+ /** Idle timeout in minutes before summarizing */
166
+ idleTimeoutMinutes: z.number().int().min(1).default(30),
167
+ /** Minimum events required before generating summary */
168
+ minEventsForSummary: z.number().int().min(1).default(3)
169
+ }).default({}),
170
+
171
+ /** Deduplication thresholds */
172
+ deduplication: z.object({
173
+ /** Similarity above this skips storage (exact duplicate) */
174
+ skipThreshold: z.number().min(0).max(1).default(0.95),
175
+ /** Similarity above this merges instead of creating new */
176
+ mergeThreshold: z.number().min(0).max(1).default(0.85)
177
+ }).default({})
178
+ })
179
+
180
+ export type HooksConfig = z.infer<typeof HooksConfigSchema>
181
+
182
+ /** Phase 18: Knowledge Packs configuration */
183
+ export const PacksConfigSchema = z.object({
184
+ /** Master switch for knowledge packs */
185
+ enabled: z.boolean().default(true),
186
+
187
+ /** Directory name for pack files (relative to package root) */
188
+ packsDir: z.string().default('packs'),
189
+
190
+ /** Always load core/ packs regardless of tech stack */
191
+ alwaysLoadCore: z.boolean().default(true),
192
+
193
+ /** Always load meta/ packs regardless of tech stack */
194
+ alwaysLoadMeta: z.boolean().default(true),
195
+
196
+ /** Confidence multiplier for community (pack) entries (dampens scores) */
197
+ communityConfidenceMultiplier: z.number().min(0).max(1).default(0.8),
198
+
199
+ /** Search result boost for personal (user-created) entries */
200
+ personalBoost: z.number().min(1).max(2).default(1.2),
201
+
202
+ /** Search result boost for project-specific entries */
203
+ projectBoost: z.number().min(1).max(2).default(1.15)
204
+ })
205
+
206
+ export type PacksConfig = z.infer<typeof PacksConfigSchema>
207
+
134
208
  /** Phase 15: Advanced Intelligence configuration */
135
209
  export const AdvancedIntelligenceConfigSchema = z.object({
136
210
  /** Enable advanced intelligence features (Phase 15) */
@@ -196,7 +270,7 @@ export const ConfigSchema = z.object({
196
270
  serverName: z.string().default('claude-brain'),
197
271
 
198
272
  /** Server version in semver format */
199
- serverVersion: z.string().regex(/^\d+\.\d+\.\d+$/, 'Version must be semver format').default('0.5.1'),
273
+ serverVersion: z.string().regex(/^\d+\.\d+\.\d+$/, 'Version must be semver format').default('0.8.0'),
200
274
 
201
275
  /** Logging level */
202
276
  logLevel: LogLevelSchema.default('info'),
@@ -248,7 +322,16 @@ export const ConfigSchema = z.object({
248
322
  knowledge: KnowledgeConfigSchema.default({}),
249
323
 
250
324
  /** Advanced intelligence configuration (Phase 15) */
251
- advancedIntelligence: AdvancedIntelligenceConfigSchema.default({})
325
+ advancedIntelligence: AdvancedIntelligenceConfigSchema.default({}),
326
+
327
+ /** Phase 16: Unified tool mode — expose only brain() instead of all 25 tools */
328
+ unifiedToolMode: z.boolean().default(true),
329
+
330
+ /** Phase 17: Passive learning hooks configuration */
331
+ hooks: HooksConfigSchema.default({}),
332
+
333
+ /** Phase 18: Knowledge packs configuration */
334
+ packs: PacksConfigSchema.default({})
252
335
  })
253
336
 
254
337
  export type Config = z.infer<typeof ConfigSchema>
@@ -0,0 +1,110 @@
1
+ #!/usr/bin/env bun
2
+ /**
3
+ * Phase 17: Brain Hook Worker Script
4
+ * Standalone entry point executed by Claude Code on every tool call.
5
+ *
6
+ * CRITICAL CONSTRAINTS:
7
+ * - Must complete in <200ms (no heavy imports like ChromaDB/embeddings)
8
+ * - NEVER write to stdout (would corrupt Claude Code JSON-RPC)
9
+ * - All errors silently caught with process.exit(0)
10
+ * - Reads stdin JSON from Claude Code, classifies, POSTs to HTTP API
11
+ */
12
+
13
+ import { BrainCapture } from './capture'
14
+ import { appendToQueue } from './queue'
15
+ import type { HookInput } from './types'
16
+
17
+ async function main(): Promise<void> {
18
+ // Parse --event arg
19
+ const eventIdx = process.argv.indexOf('--event')
20
+ const eventName = eventIdx >= 0 ? process.argv[eventIdx + 1] : undefined
21
+
22
+ // Read stdin JSON from Claude Code
23
+ let rawInput: string
24
+ try {
25
+ rawInput = await readStdin()
26
+ } catch {
27
+ process.exit(0)
28
+ return
29
+ }
30
+
31
+ if (!rawInput.trim()) {
32
+ process.exit(0)
33
+ return
34
+ }
35
+
36
+ let input: HookInput
37
+ try {
38
+ input = JSON.parse(rawInput)
39
+ } catch {
40
+ process.exit(0)
41
+ return
42
+ }
43
+
44
+ // Override event name from arg if provided
45
+ if (eventName) {
46
+ input.hook_event_name = eventName as HookInput['hook_event_name']
47
+ }
48
+
49
+ // Load minimal config from env vars
50
+ const enabled = process.env.CLAUDE_BRAIN_HOOKS_ENABLED !== 'false'
51
+ if (!enabled) {
52
+ process.exit(0)
53
+ return
54
+ }
55
+
56
+ const port = parseInt(process.env.CLAUDE_BRAIN_PORT || '3000', 10)
57
+
58
+ // Process the hook event
59
+ const capture = new BrainCapture({ enabled: true })
60
+ const knowledge = capture.process(input)
61
+
62
+ if (!knowledge) {
63
+ process.exit(0)
64
+ return
65
+ }
66
+
67
+ // POST to HTTP API server
68
+ try {
69
+ const res = await fetch(`http://localhost:${port}/api/hooks/ingest`, {
70
+ method: 'POST',
71
+ headers: { 'Content-Type': 'application/json' },
72
+ body: JSON.stringify({
73
+ knowledge: [knowledge],
74
+ sessionId: input.session_id,
75
+ }),
76
+ signal: AbortSignal.timeout(3000),
77
+ })
78
+
79
+ if (!res.ok) {
80
+ // Server returned error — queue for later
81
+ appendToQueue([knowledge])
82
+ }
83
+ } catch {
84
+ // Server unreachable — append to offline queue
85
+ appendToQueue([knowledge])
86
+ }
87
+
88
+ process.exit(0)
89
+ }
90
+
91
+ /** Read all of stdin as a string */
92
+ function readStdin(): Promise<string> {
93
+ return new Promise((resolve, reject) => {
94
+ const chunks: Buffer[] = []
95
+ const stdin = process.stdin
96
+
97
+ stdin.on('data', (chunk: Buffer) => chunks.push(chunk))
98
+ stdin.on('end', () => resolve(Buffer.concat(chunks).toString('utf-8')))
99
+ stdin.on('error', reject)
100
+
101
+ // Timeout after 2 seconds
102
+ setTimeout(() => {
103
+ stdin.destroy()
104
+ resolve(Buffer.concat(chunks).toString('utf-8'))
105
+ }, 2000)
106
+ })
107
+ }
108
+
109
+ // Execute — all errors caught silently
110
+ main().catch(() => process.exit(0))
@@ -0,0 +1,161 @@
1
+ /**
2
+ * Phase 17: Capture Engine
3
+ * Orchestrates passive classification + entity extraction with privacy filters.
4
+ */
5
+
6
+ import type { HookInput, CapturedKnowledge } from './types'
7
+ import type { HooksConfig } from '@/config/schema'
8
+ import { PassiveClassifier } from './passive-classifier'
9
+
10
+ // Reuse tech dictionaries from entity extractor for enrichment
11
+ const COMMON_TECH: Set<string> = new Set([
12
+ 'typescript', 'javascript', 'python', 'rust', 'go', 'java', 'ruby', 'php', 'swift', 'kotlin',
13
+ 'react', 'vue', 'angular', 'svelte', 'nextjs', 'nuxt', 'remix', 'astro', 'solid',
14
+ 'express', 'fastify', 'hono', 'nestjs', 'django', 'flask', 'fastapi', 'rails', 'spring',
15
+ 'mongodb', 'redis', 'postgresql', 'postgres', 'mysql', 'sqlite', 'dynamodb', 'firebase', 'supabase',
16
+ 'prisma', 'drizzle', 'typeorm', 'sequelize', 'chromadb', 'pinecone',
17
+ 'docker', 'kubernetes', 'aws', 'gcp', 'azure', 'vercel', 'netlify',
18
+ 'webpack', 'vite', 'esbuild', 'bun', 'deno', 'node', 'npm', 'yarn', 'pnpm',
19
+ 'jest', 'vitest', 'cypress', 'playwright',
20
+ 'tailwind', 'bootstrap', 'zod', 'trpc', 'graphql', 'rest',
21
+ 'jwt', 'oauth', 'openai', 'anthropic', 'langchain',
22
+ 'git', 'github', 'gitlab', 'eslint', 'prettier',
23
+ 'zustand', 'redux', 'pinia', 'mobx', 'jotai', 'recoil',
24
+ 'storybook', 'turborepo', 'nx',
25
+ 'microservices', 'serverless', 'monolith', 'ssr', 'ssg', 'spa', 'pwa', 'mcp', 'rag'
26
+ ])
27
+
28
+ const TECH_ALIASES: Record<string, string> = {
29
+ 'ts': 'typescript', 'js': 'javascript', 'py': 'python',
30
+ 'react.js': 'react', 'reactjs': 'react', 'vue.js': 'vue', 'vuejs': 'vue',
31
+ 'next.js': 'nextjs', 'nuxt.js': 'nuxt', 'nest.js': 'nestjs',
32
+ 'express.js': 'express', 'node.js': 'node', 'nodejs': 'node',
33
+ 'mongo': 'mongodb', 'pg': 'postgresql', 'k8s': 'kubernetes',
34
+ 'tailwindcss': 'tailwind', 'tailwind-css': 'tailwind',
35
+ 'gql': 'graphql', 'golang': 'go',
36
+ }
37
+
38
+ export class BrainCapture {
39
+ private classifier: PassiveClassifier
40
+ private config: HooksConfig
41
+
42
+ constructor(config?: Partial<HooksConfig>) {
43
+ this.config = {
44
+ enabled: config?.enabled ?? false,
45
+ capture: {
46
+ toolUse: config?.capture?.toolUse ?? true,
47
+ fileEdits: config?.capture?.fileEdits ?? true,
48
+ bashCommands: config?.capture?.bashCommands ?? true,
49
+ userMessages: config?.capture?.userMessages ?? true,
50
+ },
51
+ privacy: {
52
+ ignorePaths: config?.privacy?.ignorePaths ?? [],
53
+ ignoreProjects: config?.privacy?.ignoreProjects ?? [],
54
+ minConfidence: config?.privacy?.minConfidence ?? 0.7,
55
+ },
56
+ sessions: {
57
+ enabled: config?.sessions?.enabled ?? true,
58
+ idleTimeoutMinutes: config?.sessions?.idleTimeoutMinutes ?? 30,
59
+ minEventsForSummary: config?.sessions?.minEventsForSummary ?? 3,
60
+ },
61
+ deduplication: {
62
+ skipThreshold: config?.deduplication?.skipThreshold ?? 0.95,
63
+ mergeThreshold: config?.deduplication?.mergeThreshold ?? 0.85,
64
+ },
65
+ }
66
+ this.classifier = new PassiveClassifier()
67
+ }
68
+
69
+ /**
70
+ * Process a hook input event.
71
+ * Returns captured knowledge or null if nothing worth capturing.
72
+ */
73
+ process(input: HookInput): CapturedKnowledge | null {
74
+ if (!this.config.enabled) return null
75
+
76
+ // Privacy: check ignored paths
77
+ if (this.isPathIgnored(input.cwd)) return null
78
+
79
+ // Check capture toggles by tool type
80
+ if (!this.shouldCapture(input)) return null
81
+
82
+ // Classify tool output
83
+ const knowledge = this.classifier.classify(input)
84
+ if (!knowledge) return null
85
+
86
+ // Privacy: check ignored projects
87
+ if (knowledge.project && this.config.privacy.ignoreProjects.includes(knowledge.project)) {
88
+ return null
89
+ }
90
+
91
+ // Enrich with additional technology detection from content
92
+ knowledge.technologies = this.enrichTechnologies(knowledge)
93
+
94
+ // Filter by minimum confidence
95
+ if (knowledge.confidence < this.config.privacy.minConfidence) {
96
+ return null
97
+ }
98
+
99
+ return knowledge
100
+ }
101
+
102
+ /** Check if a path matches any ignore pattern */
103
+ private isPathIgnored(cwd: string): boolean {
104
+ if (!cwd || this.config.privacy.ignorePaths.length === 0) return false
105
+ const lowerCwd = cwd.toLowerCase()
106
+ return this.config.privacy.ignorePaths.some(pattern => {
107
+ const lowerPattern = pattern.toLowerCase()
108
+ // Simple glob: just check if pattern appears in path
109
+ if (lowerPattern.includes('*')) {
110
+ const regex = new RegExp(lowerPattern.replace(/\*/g, '.*'))
111
+ return regex.test(lowerCwd)
112
+ }
113
+ return lowerCwd.includes(lowerPattern)
114
+ })
115
+ }
116
+
117
+ /** Check if this tool type should be captured based on config */
118
+ private shouldCapture(input: HookInput): boolean {
119
+ const toolName = input.tool_name?.toLowerCase()
120
+ if (!toolName) return false
121
+
122
+ switch (toolName) {
123
+ case 'edit':
124
+ case 'write':
125
+ return this.config.capture.fileEdits
126
+ case 'bash':
127
+ return this.config.capture.bashCommands
128
+ default:
129
+ return this.config.capture.toolUse
130
+ }
131
+ }
132
+
133
+ /** Enrich technologies by scanning content text for known tech names */
134
+ private enrichTechnologies(knowledge: CapturedKnowledge): string[] {
135
+ const existing = new Set(knowledge.technologies)
136
+ const lower = knowledge.content.toLowerCase()
137
+ const words = lower.split(/[\s,;:()[\]{}"'`|/\\]+/)
138
+
139
+ for (const word of words) {
140
+ const cleaned = word.replace(/^[^a-z0-9]+|[^a-z0-9]+$/g, '')
141
+ if (cleaned.length < 2) continue
142
+
143
+ if (COMMON_TECH.has(cleaned) && !existing.has(cleaned)) {
144
+ existing.add(cleaned)
145
+ }
146
+ const alias = TECH_ALIASES[cleaned]
147
+ if (alias && !existing.has(alias)) {
148
+ existing.add(alias)
149
+ }
150
+ }
151
+
152
+ // Check multi-word aliases
153
+ for (const [alias, normalized] of Object.entries(TECH_ALIASES)) {
154
+ if ((alias.includes('.') || alias.includes('-')) && lower.includes(alias) && !existing.has(normalized)) {
155
+ existing.add(normalized)
156
+ }
157
+ }
158
+
159
+ return Array.from(existing)
160
+ }
161
+ }