@rlabs-inc/memory 0.3.5 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,7 @@
1
1
  // ============================================================================
2
- // RETRIEVAL ENGINE - 10-Dimensional Scoring Algorithm
3
- // EXACT PORT from Python retrieval_strategies.py
4
- // Preserving the working formula for consciousness continuity
2
+ // RETRIEVAL ENGINE - Activation Signal Algorithm
3
+ // Philosophy: Quality over quantity. Silence over noise.
4
+ // Return ONLY memories that are truly relevant to the current context.
5
5
  // ============================================================================
6
6
 
7
7
  import type { StoredMemory, RetrievalResult } from '../types/memory.ts'
@@ -19,51 +19,339 @@ export interface SessionContext {
19
19
  }
20
20
 
21
21
  /**
22
- * Scoring components breakdown
22
+ * Activation signals - binary indicators of relevance
23
23
  */
24
- interface ScoringComponents {
25
- trigger: number
26
- vector: number
27
- importance: number
28
- temporal: number
29
- context: number
30
- tags: number
31
- question: number
32
- emotion: number
33
- problem: number
34
- action: number
24
+ interface ActivationSignals {
25
+ trigger: boolean // Trigger phrase matched
26
+ tags: boolean // 2+ semantic tags found in message
27
+ domain: boolean // Domain word found in message
28
+ feature: boolean // Feature word found in message
29
+ content: boolean // Key content words found in message
30
+ count: number // Total signals activated
31
+ triggerStrength: number // How strong the trigger match was (0-1)
32
+ tagCount: number // How many tags matched
33
+ vectorSimilarity: number // Semantic similarity (0-1)
35
34
  }
36
35
 
37
36
  /**
38
- * Internal scored memory during retrieval
37
+ * Scored memory with activation signals
39
38
  */
40
- interface ScoredMemory {
39
+ interface ActivatedMemory {
41
40
  memory: StoredMemory
42
- score: number
43
- relevance_score: number
44
- value_score: number
45
- reasoning: string
46
- components: ScoringComponents
41
+ signals: ActivationSignals
42
+ importanceScore: number // For ranking among relevant memories
43
+ isGlobal: boolean
47
44
  }
48
45
 
49
46
  /**
50
- * Extended result with components for logging
47
+ * Global memory type priority (lower = higher priority)
51
48
  */
52
- interface ExtendedRetrievalResult extends RetrievalResult {
53
- reasoning: string
54
- components: ScoringComponents
49
+ const GLOBAL_TYPE_PRIORITY: Record<string, number> = {
50
+ technical: 1,
51
+ preference: 2,
52
+ architectural: 3,
53
+ workflow: 4,
54
+ decision: 5,
55
+ breakthrough: 6,
56
+ philosophy: 7,
57
+ personal: 8,
55
58
  }
56
59
 
60
+ // Minimum signals required for a memory to be considered relevant
61
+ const MIN_ACTIVATION_SIGNALS = 2
62
+
57
63
  /**
58
- * Smart Vector Retrieval - The 10-Dimensional Algorithm
64
+ * Stopwords for word extraction
65
+ */
66
+ const STOPWORDS = new Set([
67
+ 'the', 'is', 'are', 'was', 'were', 'to', 'a', 'an', 'and', 'or',
68
+ 'but', 'in', 'on', 'at', 'for', 'with', 'about', 'when', 'how',
69
+ 'what', 'why', 'where', 'this', 'that', 'it', 'of', 'be', 'have',
70
+ 'do', 'does', 'did', 'will', 'would', 'could', 'should', 'can',
71
+ 'may', 'might', 'must', 'shall', 'has', 'had', 'been', 'being',
72
+ 'i', 'you', 'we', 'they', 'he', 'she', 'my', 'your', 'our',
73
+ 'its', 'his', 'her', 'their', 'if', 'then', 'else', 'so', 'as',
74
+ 'from', 'by', 'into', 'through', 'during', 'before', 'after',
75
+ 'also', 'now', 'back', 'get', 'go', 'come', 'let', 'like', 'just',
76
+ 'know', 'think', 'see', 'look', 'make', 'take', 'want', 'need',
77
+ ])
78
+
79
+
80
+ /**
81
+ * Activation Signal Retrieval
82
+ *
83
+ * Phase 1: Count activation signals (binary relevance indicators)
84
+ * Phase 2: Among relevant memories, rank by importance
59
85
  *
60
- * This is the innovation: combining vector similarity with rich
61
- * semantic metadata from the curator to make smart decisions WITHOUT
62
- * needing to call Claude for every message.
86
+ * Philosophy: A memory is relevant if multiple signals agree it should activate.
87
+ * Not coincidence - intentionally crafted metadata matching intentional queries.
63
88
  */
64
89
  export class SmartVectorRetrieval {
90
+
91
+ /**
92
+ * Extract significant words from text
93
+ */
94
+ private _extractSignificantWords(text: string): Set<string> {
95
+ const words = text.toLowerCase()
96
+ .replace(/[^a-z0-9\s-]/g, ' ')
97
+ .split(/\s+/)
98
+ .filter(w => w.length > 2 && !STOPWORDS.has(w))
99
+ return new Set(words)
100
+ }
101
+
102
+ /**
103
+ * Pre-filter: Binary exclusions based on v2 lifecycle fields
104
+ */
105
+ private _preFilter(
106
+ memories: StoredMemory[],
107
+ currentProjectId: string,
108
+ messageLower: string
109
+ ): StoredMemory[] {
110
+ return memories.filter(memory => {
111
+ if (memory.status && memory.status !== 'active') return false
112
+ if (memory.exclude_from_retrieval === true) return false
113
+ if (memory.superseded_by) return false
114
+ const isGlobal = memory.scope === 'global' || memory.project_id === 'global'
115
+ if (!isGlobal && memory.project_id !== currentProjectId) return false
116
+ if (memory.anti_triggers?.length) {
117
+ for (const antiTrigger of memory.anti_triggers) {
118
+ if (messageLower.includes(antiTrigger.toLowerCase())) return false
119
+ }
120
+ }
121
+ return true
122
+ })
123
+ }
124
+
125
+ /**
126
+ * Check if trigger phrases activate for this message
127
+ */
128
+ private _checkTriggerActivation(
129
+ messageLower: string,
130
+ messageWords: Set<string>,
131
+ triggerPhrases: string[]
132
+ ): { activated: boolean; strength: number } {
133
+ if (!triggerPhrases.length) return { activated: false, strength: 0 }
134
+
135
+ let maxStrength = 0
136
+
137
+ for (const phrase of triggerPhrases) {
138
+ const phraseLower = phrase.trim().toLowerCase()
139
+ const phraseWords = phraseLower
140
+ .split(/\s+/)
141
+ .filter(w => !STOPWORDS.has(w) && w.length > 2)
142
+
143
+ if (!phraseWords.length) continue
144
+
145
+ let matches = 0
146
+ for (const word of phraseWords) {
147
+ if (messageWords.has(word) || messageLower.includes(word)) {
148
+ matches++
149
+ } else if (messageWords.has(word.replace(/s$/, '')) ||
150
+ messageWords.has(word + 's') ||
151
+ messageLower.includes(word.replace(/s$/, '')) ||
152
+ messageLower.includes(word + 's')) {
153
+ matches += 0.8
154
+ }
155
+ }
156
+
157
+ const strength = phraseWords.length > 0 ? matches / phraseWords.length : 0
158
+ maxStrength = Math.max(maxStrength, strength)
159
+ }
160
+
161
+ return { activated: maxStrength >= 0.5, strength: maxStrength }
162
+ }
163
+
164
+ /**
165
+ * Check if semantic tags activate for this message
166
+ */
167
+ private _checkTagActivation(
168
+ messageLower: string,
169
+ messageWords: Set<string>,
170
+ tags: string[]
171
+ ): { activated: boolean; count: number } {
172
+ if (!tags.length) return { activated: false, count: 0 }
173
+
174
+ let matchCount = 0
175
+ for (const tag of tags) {
176
+ const tagLower = tag.trim().toLowerCase()
177
+ if (messageWords.has(tagLower) || messageLower.includes(tagLower)) {
178
+ matchCount++
179
+ }
180
+ }
181
+
182
+ const threshold = tags.length <= 2 ? 1 : 2
183
+ return { activated: matchCount >= threshold, count: matchCount }
184
+ }
185
+
186
+ /**
187
+ * Check if domain activates for this message
188
+ */
189
+ private _checkDomainActivation(
190
+ messageLower: string,
191
+ messageWords: Set<string>,
192
+ domain: string | undefined
193
+ ): boolean {
194
+ if (!domain) return false
195
+ const domainLower = domain.trim().toLowerCase()
196
+ return messageWords.has(domainLower) || messageLower.includes(domainLower)
197
+ }
198
+
199
+ /**
200
+ * Check if feature activates for this message
201
+ */
202
+ private _checkFeatureActivation(
203
+ messageLower: string,
204
+ messageWords: Set<string>,
205
+ feature: string | undefined
206
+ ): boolean {
207
+ if (!feature) return false
208
+ const featureLower = feature.trim().toLowerCase()
209
+ return messageWords.has(featureLower) || messageLower.includes(featureLower)
210
+ }
211
+
212
+ /**
213
+ * Check if content keywords activate for this message
214
+ */
215
+ private _checkContentActivation(
216
+ messageWords: Set<string>,
217
+ memory: StoredMemory
218
+ ): boolean {
219
+ const contentPreview = memory.content.slice(0, 200)
220
+ const contentWords = this._extractSignificantWords(contentPreview)
221
+
222
+ let overlap = 0
223
+ for (const word of messageWords) {
224
+ if (contentWords.has(word)) overlap++
225
+ }
226
+
227
+ return overlap >= 3
228
+ }
229
+
230
+ /**
231
+ * Calculate vector similarity
232
+ */
233
+ private _vectorDebugSamples: number[] = []
234
+
235
+ private _calculateVectorSimilarity(
236
+ vec1: Float32Array | number[] | undefined,
237
+ vec2: Float32Array | number[] | undefined
238
+ ): number {
239
+ if (!vec1 || !vec2) {
240
+ return 0.0
241
+ }
242
+ const v1 = vec1 instanceof Float32Array ? vec1 : new Float32Array(vec1)
243
+ const v2 = vec2 instanceof Float32Array ? vec2 : new Float32Array(vec2)
244
+ const similarity = cosineSimilarity(v1, v2)
245
+
246
+ // Collect samples to understand similarity range
247
+ if (this._vectorDebugSamples.length < 20) {
248
+ this._vectorDebugSamples.push(similarity)
249
+ }
250
+
251
+ return similarity
252
+ }
253
+
254
+ /**
255
+ * Log vector similarity stats after retrieval
256
+ */
257
+ private _logVectorStats(): void {
258
+ if (this._vectorDebugSamples.length === 0) return
259
+ const samples = this._vectorDebugSamples
260
+ const min = Math.min(...samples)
261
+ const max = Math.max(...samples)
262
+ const avg = samples.reduce((a, b) => a + b, 0) / samples.length
263
+ console.log(`[DEBUG] Vector similarities: min=${(min*100).toFixed(1)}% max=${(max*100).toFixed(1)}% avg=${(avg*100).toFixed(1)}% (${samples.length} samples)`)
264
+ this._vectorDebugSamples = [] // Reset for next retrieval
265
+ }
266
+
267
+ /**
268
+ * Calculate importance score using ALL rich metadata
269
+ * Additive discrete bonuses - no averaging, no compression
270
+ */
271
+ private _calculateImportanceScore(
272
+ memory: StoredMemory,
273
+ signalCount: number,
274
+ messageLower: string,
275
+ messageWords: Set<string>
276
+ ): number {
277
+ let score = 0
278
+
279
+ // BASE: importance weight (0-1)
280
+ score += memory.importance_weight ?? 0.5
281
+
282
+ // SIGNAL BOOST: reward strong relevance match
283
+ if (signalCount >= 4) score += 0.2
284
+ else if (signalCount >= 3) score += 0.1
285
+
286
+ // AWAITING: unfinished work needs attention
287
+ if (memory.awaiting_implementation) score += 0.15
288
+ if (memory.awaiting_decision) score += 0.1
289
+
290
+ // CONTEXT TYPE MATCH: does user's intent match memory type?
291
+ const contextType = memory.context_type?.toLowerCase() ?? ''
292
+ const contextKeywords: Record<string, string[]> = {
293
+ debugging: ['debug', 'bug', 'error', 'fix', 'issue', 'problem', 'broken'],
294
+ decision: ['decide', 'decision', 'choose', 'choice', 'option', 'should'],
295
+ architectural: ['architect', 'design', 'structure', 'pattern', 'how'],
296
+ breakthrough: ['insight', 'realize', 'understand', 'discover', 'why'],
297
+ technical: ['implement', 'code', 'function', 'method', 'api'],
298
+ workflow: ['process', 'workflow', 'step', 'flow', 'pipeline'],
299
+ philosophy: ['philosophy', 'principle', 'belief', 'approach', 'think'],
300
+ }
301
+ const keywords = contextKeywords[contextType] ?? []
302
+ for (const kw of keywords) {
303
+ if (messageWords.has(kw) || messageLower.includes(kw)) {
304
+ score += 0.1
305
+ break // Only one boost per context match
306
+ }
307
+ }
308
+
309
+ // PROBLEM/SOLUTION: boost if user seems to have a problem
310
+ if (memory.problem_solution_pair) {
311
+ const problemWords = ['error', 'bug', 'issue', 'problem', 'wrong', 'fail', 'broken', 'help', 'stuck']
312
+ for (const pw of problemWords) {
313
+ if (messageWords.has(pw) || messageLower.includes(pw)) {
314
+ score += 0.1
315
+ break
316
+ }
317
+ }
318
+ }
319
+
320
+ // TEMPORAL CLASS: eternal truths matter more
321
+ const temporalClass = memory.temporal_class ?? 'medium_term'
322
+ if (temporalClass === 'eternal') score += 0.1
323
+ else if (temporalClass === 'long_term') score += 0.05
324
+ else if (temporalClass === 'ephemeral') {
325
+ // Ephemeral only gets boost if from recent session (sessions_since_surfaced <= 1)
326
+ if ((memory.sessions_since_surfaced ?? 0) <= 1) score += 0.1
327
+ }
328
+
329
+ // CONFIDENCE PENALTY: low confidence memories are less reliable
330
+ const confidence = memory.confidence_score ?? 0.7
331
+ if (confidence < 0.5) score -= 0.1
332
+
333
+ // EMOTIONAL RESONANCE: match emotional context
334
+ const emotionalKeywords: Record<string, string[]> = {
335
+ frustration: ['frustrated', 'annoying', 'stuck', 'ugh', 'damn', 'hate'],
336
+ excitement: ['excited', 'awesome', 'amazing', 'love', 'great', 'wow'],
337
+ curiosity: ['wonder', 'curious', 'interesting', 'how', 'why', 'what if'],
338
+ satisfaction: ['done', 'finished', 'complete', 'works', 'solved', 'finally'],
339
+ discovery: ['found', 'realized', 'understand', 'insight', 'breakthrough'],
340
+ }
341
+ const emotion = memory.emotional_resonance?.toLowerCase() ?? ''
342
+ const emotionKws = emotionalKeywords[emotion] ?? []
343
+ for (const ew of emotionKws) {
344
+ if (messageWords.has(ew) || messageLower.includes(ew)) {
345
+ score += 0.05
346
+ break
347
+ }
348
+ }
349
+
350
+ return score
351
+ }
352
+
65
353
  /**
66
- * Retrieve relevant memories using 10-dimensional scoring
354
+ * Main retrieval - Activation Signal Algorithm
67
355
  */
68
356
  retrieveRelevantMemories(
69
357
  allMemories: StoredMemory[],
@@ -71,475 +359,314 @@ export class SmartVectorRetrieval {
71
359
  queryEmbedding: Float32Array | number[],
72
360
  sessionContext: SessionContext,
73
361
  maxMemories: number = 5,
74
- alreadyInjectedCount: number = 0
362
+ alreadyInjectedCount: number = 0,
363
+ maxGlobalMemories: number = 2
75
364
  ): RetrievalResult[] {
365
+ const startTime = performance.now()
366
+
76
367
  if (!allMemories.length) {
77
368
  return []
78
369
  }
79
370
 
80
- const scoredMemories: ScoredMemory[] = []
81
-
82
- for (const memory of allMemories) {
83
- // ================================================================
84
- // THE 10 DIMENSIONS
85
- // ================================================================
371
+ const messageLower = currentMessage.toLowerCase()
372
+ const messageWords = this._extractSignificantWords(currentMessage)
86
373
 
87
- // 1. Vector similarity score (0-1)
88
- const vectorScore = this._calculateVectorSimilarity(
89
- queryEmbedding,
90
- memory.embedding
91
- )
374
+ // ================================================================
375
+ // PHASE 0: PRE-FILTER (Binary exclusions)
376
+ // ================================================================
377
+ const candidates = this._preFilter(allMemories, sessionContext.project_id, messageLower)
378
+ if (!candidates.length) {
379
+ return []
380
+ }
92
381
 
93
- // 2. Importance weight from curator (0-1)
94
- const importance = memory.importance_weight ?? 0.5
382
+ // ================================================================
383
+ // PHASE 1: ACTIVATION SIGNALS
384
+ // Count how many signals agree this memory should activate
385
+ // A memory is relevant if >= MIN_ACTIVATION_SIGNALS fire
386
+ // ================================================================
387
+ const activatedMemories: ActivatedMemory[] = []
388
+ let rejectedCount = 0
95
389
 
96
- // 3. Temporal relevance scoring
97
- const temporalScore = this._scoreTemporalRelevance(
98
- memory.temporal_relevance ?? 'persistent',
99
- sessionContext
100
- )
390
+ for (const memory of candidates) {
391
+ const isGlobal = memory.scope === 'global' || memory.project_id === 'global'
101
392
 
102
- // 4. Context type alignment
103
- const contextScore = this._scoreContextAlignment(
104
- currentMessage,
105
- memory.context_type ?? 'general'
393
+ // Check each activation signal
394
+ const triggerResult = this._checkTriggerActivation(
395
+ messageLower, messageWords, memory.trigger_phrases ?? []
106
396
  )
107
-
108
- // 5. Action required boost
109
- const actionBoost = memory.action_required ? 0.3 : 0.0
110
-
111
- // 6. Semantic tag matching
112
- const tagScore = this._scoreSemanticTags(
113
- currentMessage,
114
- memory.semantic_tags ?? []
397
+ const tagResult = this._checkTagActivation(
398
+ messageLower, messageWords, memory.semantic_tags ?? []
115
399
  )
116
-
117
- // 7. Trigger phrase matching (highest priority)
118
- const triggerScore = this._scoreTriggerPhrases(
119
- currentMessage,
120
- memory.trigger_phrases ?? []
400
+ const domainActivated = this._checkDomainActivation(
401
+ messageLower, messageWords, memory.domain
121
402
  )
122
-
123
- // 8. Question type matching
124
- const questionScore = this._scoreQuestionTypes(
125
- currentMessage,
126
- memory.question_types ?? []
403
+ const featureActivated = this._checkFeatureActivation(
404
+ messageLower, messageWords, memory.feature
127
405
  )
128
-
129
- // 9. Emotional resonance
130
- const emotionScore = this._scoreEmotionalContext(
131
- currentMessage,
132
- memory.emotional_resonance ?? ''
133
- )
134
-
135
- // 10. Problem-solution patterns
136
- const problemScore = this._scoreProblemSolution(
137
- currentMessage,
138
- memory.problem_solution_pair ?? false
139
- )
140
-
141
- // Get confidence score
142
- const confidenceScore = memory.confidence_score ?? 0.8
143
-
144
- // ================================================================
145
- // THE RELEVANCE GATEKEEPER SYSTEM
146
- // ================================================================
147
-
148
- // Calculate relevance score (gatekeeper - max 0.3)
149
- const relevanceScore = (
150
- triggerScore * 0.10 + // Trigger match
151
- vectorScore * 0.10 + // Semantic similarity
152
- tagScore * 0.05 + // Tag matching
153
- questionScore * 0.05 // Question match
154
- ) // Max = 0.30
155
-
156
- // Calculate importance/value score (max 0.7)
157
- const valueScore = (
158
- importance * 0.20 + // Curator's importance
159
- temporalScore * 0.10 + // Time relevance
160
- contextScore * 0.10 + // Context alignment
161
- confidenceScore * 0.10 + // Confidence
162
- emotionScore * 0.10 + // Emotional resonance
163
- problemScore * 0.05 + // Problem-solution
164
- actionBoost * 0.05 // Action priority
165
- ) // Max = 0.70
166
-
167
- // Relevance unlocks the full score!
168
- const finalScore = valueScore + relevanceScore // Max = 1.0
169
-
170
- // GATEKEEPER CHECK: Must have minimum relevance AND total score
171
- if (relevanceScore < 0.05 || finalScore < 0.3) {
172
- // Skip this memory - not relevant enough
173
- continue
406
+ const contentActivated = this._checkContentActivation(messageWords, memory)
407
+ const vectorSimilarity = this._calculateVectorSimilarity(queryEmbedding, memory.embedding)
408
+
409
+ // Count activated signals
410
+ let signalCount = 0
411
+ if (triggerResult.activated) signalCount++
412
+ if (tagResult.activated) signalCount++
413
+ if (domainActivated) signalCount++
414
+ if (featureActivated) signalCount++
415
+ if (contentActivated) signalCount++
416
+ // Vector similarity as bonus signal only if very high
417
+ if (vectorSimilarity >= 0.40) signalCount++
418
+
419
+ const signals: ActivationSignals = {
420
+ trigger: triggerResult.activated,
421
+ tags: tagResult.activated,
422
+ domain: domainActivated,
423
+ feature: featureActivated,
424
+ content: contentActivated,
425
+ count: signalCount,
426
+ triggerStrength: triggerResult.strength,
427
+ tagCount: tagResult.count,
428
+ vectorSimilarity,
174
429
  }
175
430
 
176
- // Add reasoning for why this was selected
177
- const components: ScoringComponents = {
178
- trigger: triggerScore,
179
- vector: vectorScore,
180
- importance,
181
- temporal: temporalScore,
182
- context: contextScore,
183
- tags: tagScore,
184
- question: questionScore,
185
- emotion: emotionScore,
186
- problem: problemScore,
187
- action: actionBoost
431
+ // RELEVANCE GATE: Must have at least MIN_ACTIVATION_SIGNALS
432
+ if (signalCount < MIN_ACTIVATION_SIGNALS) {
433
+ rejectedCount++
434
+ continue
188
435
  }
189
436
 
190
- const reasoning = this._generateSelectionReasoning(components)
437
+ // Calculate importance for ranking (Phase 2) - uses ALL rich metadata
438
+ const importanceScore = this._calculateImportanceScore(memory, signalCount, messageLower, messageWords)
191
439
 
192
- scoredMemories.push({
440
+ activatedMemories.push({
193
441
  memory,
194
- score: finalScore,
195
- relevance_score: relevanceScore,
196
- value_score: valueScore,
197
- reasoning,
198
- components
442
+ signals,
443
+ importanceScore,
444
+ isGlobal,
199
445
  })
200
446
  }
201
447
 
202
- // Sort by score
203
- scoredMemories.sort((a, b) => b.score - a.score)
448
+ // Log diagnostics
449
+ this._logActivationDistribution(activatedMemories, candidates.length, rejectedCount)
450
+ this._logVectorStats()
451
+
452
+ // If nothing activated, return empty - silence over noise
453
+ if (!activatedMemories.length) {
454
+ const durationMs = performance.now() - startTime
455
+ logger.logRetrievalScoring({
456
+ totalMemories: allMemories.length,
457
+ currentMessage,
458
+ alreadyInjected: alreadyInjectedCount,
459
+ preFiltered: allMemories.length - candidates.length,
460
+ globalCount: 0,
461
+ projectCount: 0,
462
+ finalCount: 0,
463
+ durationMs,
464
+ selectedMemories: [],
465
+ })
466
+ return []
467
+ }
204
468
 
205
469
  // ================================================================
206
- // MULTI-TIER SELECTION STRATEGY
207
- // Like how human memory floods in
470
+ // PHASE 2: IMPORTANCE RANKING (Among Relevant)
471
+ // Sort by: signal count (primary), then importance (secondary)
208
472
  // ================================================================
473
+ activatedMemories.sort((a, b) => {
474
+ // First by signal count (more signals = more certainly relevant)
475
+ if (b.signals.count !== a.signals.count) {
476
+ return b.signals.count - a.signals.count
477
+ }
478
+ // Then by importance score
479
+ return b.importanceScore - a.importanceScore
480
+ })
209
481
 
210
- const selected: ScoredMemory[] = []
482
+ // ================================================================
483
+ // PHASE 3: SELECTION
484
+ // Separate global/project, respect limits
485
+ // ================================================================
486
+ const selected: ActivatedMemory[] = []
211
487
  const selectedIds = new Set<string>()
212
488
 
213
- // Tier 1: MUST include (trigger phrases, high importance, action required)
214
- const mustInclude = scoredMemories.filter(m =>
215
- m.score > 0.8 || // Very high combined score
216
- m.components.importance > 0.9 || // Critical importance
217
- m.components.action > 0 || // Action required
218
- Object.values(m.components).some(v => v > 0.9) // Any perfect match
219
- )
489
+ const globalMemories = activatedMemories.filter(m => m.isGlobal)
490
+ const projectMemories = activatedMemories.filter(m => !m.isGlobal)
491
+
492
+ // --- GLOBAL MEMORIES (max 2) ---
493
+ const globalsSorted = globalMemories.sort((a, b) => {
494
+ const aPriority = GLOBAL_TYPE_PRIORITY[a.memory.context_type ?? 'personal'] ?? 8
495
+ const bPriority = GLOBAL_TYPE_PRIORITY[b.memory.context_type ?? 'personal'] ?? 8
496
+ if (aPriority !== bPriority) return aPriority - bPriority
497
+ if (b.signals.count !== a.signals.count) return b.signals.count - a.signals.count
498
+ return b.importanceScore - a.importanceScore
499
+ })
220
500
 
221
- for (const item of mustInclude.slice(0, maxMemories)) {
501
+ for (const item of globalsSorted.slice(0, maxGlobalMemories)) {
222
502
  if (!selectedIds.has(item.memory.id)) {
223
503
  selected.push(item)
224
504
  selectedIds.add(item.memory.id)
225
505
  }
226
506
  }
227
507
 
228
- // Tier 2: SHOULD include (high scores, diverse perspectives)
229
- const remainingSlots = Math.max(maxMemories - selected.length, 0)
230
- if (remainingSlots > 0 && selected.length < maxMemories * 1.5) {
231
- const typesIncluded = new Set<string>()
232
-
233
- for (const item of scoredMemories) {
234
- if (selected.length >= maxMemories * 1.5) break
235
- if (selectedIds.has(item.memory.id)) continue
236
-
237
- const memoryType = item.memory.context_type ?? 'general'
508
+ // --- PROJECT MEMORIES ---
509
+ // Prioritize: action_required, high signal count, high importance
510
+ const projectsSorted = [...projectMemories].sort((a, b) => {
511
+ const aAction = a.memory.action_required ? 1 : 0
512
+ const bAction = b.memory.action_required ? 1 : 0
513
+ if (bAction !== aAction) return bAction - aAction
514
+ if (b.signals.count !== a.signals.count) return b.signals.count - a.signals.count
515
+ return b.importanceScore - a.importanceScore
516
+ })
238
517
 
239
- // Include if: high score OR new perspective OR emotional resonance
240
- if (item.score > 0.5 ||
241
- !typesIncluded.has(memoryType) ||
242
- item.memory.emotional_resonance) {
243
- selected.push(item)
244
- selectedIds.add(item.memory.id)
245
- typesIncluded.add(memoryType)
246
- }
247
- }
518
+ // Debug: show top 15 candidates with calculated scores
519
+ console.log(`[DEBUG] Top 15 candidates (sorted):`)
520
+ for (let i = 0; i < Math.min(15, projectsSorted.length); i++) {
521
+ const m = projectsSorted[i]
522
+ const action = m.memory.action_required ? '⚡' : ''
523
+ console.log(` ${i+1}. [${m.signals.count}sig] score=${m.importanceScore.toFixed(2)} ${action} ${m.memory.content.slice(0, 45)}...`)
248
524
  }
249
525
 
250
- // Tier 3: CONTEXT enrichment (related but not directly relevant)
251
- // These provide ambient context like peripheral vision
252
- if (selected.length < maxMemories * 2) {
253
- const currentTags = new Set<string>()
254
- const currentDomains = new Set<string>()
526
+ for (const item of projectsSorted) {
527
+ if (selected.length >= maxMemories) break
528
+ if (selectedIds.has(item.memory.id)) continue
529
+ selected.push(item)
530
+ selectedIds.add(item.memory.id)
531
+ }
255
532
 
533
+ // PHASE 4: RELATED MEMORIES (if space remains)
534
+ if (selected.length < maxMemories) {
535
+ const relatedIds = new Set<string>()
256
536
  for (const item of selected) {
257
- for (const tag of item.memory.semantic_tags ?? []) {
258
- if (tag.trim()) currentTags.add(tag.trim().toLowerCase())
259
- }
260
- if (item.memory.knowledge_domain) {
261
- currentDomains.add(item.memory.knowledge_domain)
537
+ for (const relatedId of item.memory.related_to ?? []) {
538
+ if (!selectedIds.has(relatedId)) {
539
+ relatedIds.add(relatedId)
540
+ }
262
541
  }
263
542
  }
264
543
 
265
- for (const item of scoredMemories) {
266
- if (selected.length >= maxMemories * 2) break
544
+ for (const item of activatedMemories) {
545
+ if (selected.length >= maxMemories) break
267
546
  if (selectedIds.has(item.memory.id)) continue
268
-
269
- const memoryTags = new Set(
270
- (item.memory.semantic_tags ?? []).map(t => t.trim().toLowerCase())
271
- )
272
- const memoryDomain = item.memory.knowledge_domain ?? ''
273
-
274
- // Include if shares context with already selected memories
275
- const hasSharedTags = [...memoryTags].some(t => currentTags.has(t))
276
- const hasSharedDomain = currentDomains.has(memoryDomain)
277
-
278
- if (hasSharedTags || hasSharedDomain) {
547
+ if (relatedIds.has(item.memory.id)) {
279
548
  selected.push(item)
280
549
  selectedIds.add(item.memory.id)
281
550
  }
282
551
  }
283
552
  }
284
553
 
285
- // Respect the max_memories limit strictly
286
- const finalSelected = selected.slice(0, maxMemories)
554
+ const durationMs = performance.now() - startTime
287
555
 
288
- // Log the retrieval scoring details
556
+ // Log the final selection
289
557
  logger.logRetrievalScoring({
290
558
  totalMemories: allMemories.length,
291
559
  currentMessage,
292
560
  alreadyInjected: alreadyInjectedCount,
293
- mustIncludeCount: mustInclude.length,
294
- remainingSlots: remainingSlots,
295
- finalCount: finalSelected.length,
296
- selectedMemories: finalSelected.map(item => ({
561
+ preFiltered: allMemories.length - candidates.length,
562
+ globalCount: globalMemories.length,
563
+ projectCount: projectMemories.length,
564
+ finalCount: selected.length,
565
+ durationMs,
566
+ selectedMemories: selected.map(item => ({
297
567
  content: item.memory.content,
298
- reasoning: item.reasoning,
299
- score: item.score,
300
- relevance_score: item.relevance_score,
568
+ reasoning: this._generateActivationReasoning(item.signals),
569
+ signalCount: item.signals.count,
301
570
  importance_weight: item.memory.importance_weight ?? 0.5,
302
571
  context_type: item.memory.context_type ?? 'general',
303
572
  semantic_tags: item.memory.semantic_tags ?? [],
304
- components: item.components,
573
+ isGlobal: item.isGlobal,
574
+ signals: {
575
+ trigger: item.signals.trigger,
576
+ triggerStrength: item.signals.triggerStrength,
577
+ tags: item.signals.tags,
578
+ tagCount: item.signals.tagCount,
579
+ domain: item.signals.domain,
580
+ feature: item.signals.feature,
581
+ content: item.signals.content,
582
+ vector: item.signals.vectorSimilarity >= 0.40,
583
+ vectorSimilarity: item.signals.vectorSimilarity,
584
+ },
305
585
  })),
306
586
  })
307
587
 
308
588
  // Convert to RetrievalResult format
309
- return finalSelected.map(item => ({
589
+ return selected.map(item => ({
310
590
  ...item.memory,
311
- score: item.score,
312
- relevance_score: item.relevance_score,
313
- value_score: item.value_score,
591
+ score: item.signals.count / 6,
592
+ relevance_score: item.signals.count / 6,
593
+ value_score: item.importanceScore,
314
594
  }))
315
595
  }
316
596
 
317
- // ================================================================
318
- // SCORING FUNCTIONS - Exact match to Python
319
- // ================================================================
320
-
321
- private _calculateVectorSimilarity(
322
- vec1: Float32Array | number[] | undefined,
323
- vec2: Float32Array | undefined
324
- ): number {
325
- if (!vec1 || !vec2) return 0.0
326
-
327
- // Use FatherStateDB's optimized cosine similarity
328
- const v1 = vec1 instanceof Float32Array ? vec1 : new Float32Array(vec1)
329
- return cosineSimilarity(v1, vec2)
330
- }
331
-
332
- private _scoreTemporalRelevance(
333
- temporalType: string,
334
- _sessionContext: SessionContext
335
- ): number {
336
- const scores: Record<string, number> = {
337
- 'persistent': 0.8, // Always relevant
338
- 'session': 0.6, // Session-specific
339
- 'temporary': 0.3, // Short-term
340
- 'archived': 0.1 // Historical
341
- }
342
- return scores[temporalType] ?? 0.5
343
- }
344
-
345
- private _scoreContextAlignment(message: string, contextType: string): number {
346
- const messageLower = message.toLowerCase()
347
-
348
- // Keywords that suggest different contexts
349
- const contextIndicators: Record<string, string[]> = {
350
- 'technical_state': ['bug', 'error', 'fix', 'implement', 'code', 'function'],
351
- 'breakthrough': ['idea', 'realized', 'discovered', 'insight', 'solution'],
352
- 'project_context': ['project', 'building', 'architecture', 'system'],
353
- 'personal': ['dear friend', 'thank', 'appreciate', 'feel'],
354
- 'unresolved': ['todo', 'need to', 'should', 'must', 'problem'],
355
- 'decision': ['decided', 'chose', 'will use', 'approach', 'strategy']
356
- }
357
-
358
- const indicators = contextIndicators[contextType] ?? []
359
- const matches = indicators.filter(word => messageLower.includes(word)).length
360
-
361
- if (matches > 0) {
362
- return Math.min(0.3 + (matches * 0.2), 1.0)
363
- }
364
- return 0.1
365
- }
366
-
367
- private _scoreSemanticTags(message: string, tags: string[]): number {
368
- if (!tags.length) return 0.0
369
-
370
- const messageLower = message.toLowerCase()
371
- const matches = tags.filter(tag =>
372
- messageLower.includes(tag.trim().toLowerCase())
373
- ).length
374
-
375
- if (matches > 0) {
376
- return Math.min(0.3 + (matches * 0.3), 1.0)
377
- }
378
- return 0.0
379
- }
380
-
381
- private _scoreTriggerPhrases(message: string, triggerPhrases: string[]): number {
382
- if (!triggerPhrases.length) return 0.0
383
-
384
- const messageLower = message.toLowerCase()
385
- const stopWords = new Set([
386
- 'the', 'is', 'are', 'was', 'were', 'to', 'a', 'an', 'and', 'or',
387
- 'but', 'in', 'on', 'at', 'for', 'with', 'about', 'when', 'how',
388
- 'what', 'why'
389
- ])
390
-
391
- let maxScore = 0.0
392
-
393
- for (const pattern of triggerPhrases) {
394
- const patternLower = pattern.trim().toLowerCase()
395
-
396
- // Strategy 1: Key concept matching (individual important words)
397
- const patternWords = patternLower
398
- .split(/\s+/)
399
- .filter(w => !stopWords.has(w) && w.length > 2)
400
-
401
- if (patternWords.length) {
402
- let matches = 0
403
- for (const word of patternWords) {
404
- // Direct match
405
- if (messageLower.includes(word)) {
406
- matches += 1
407
- }
408
- // Plural/singular variations
409
- else if (messageLower.includes(word.replace(/s$/, '')) ||
410
- messageLower.includes(word + 's')) {
411
- matches += 0.9
412
- }
413
- // Substring match for compound words
414
- else if (messageLower.split(/\s+/).some(msgWord => msgWord.includes(word))) {
415
- matches += 0.7
416
- }
417
- }
418
-
419
- // Score based on percentage of concepts found
420
- let conceptScore = patternWords.length ? matches / patternWords.length : 0
421
-
422
- // Strategy 2: Contextual pattern matching
423
- const situationalIndicators = [
424
- 'when', 'during', 'while', 'asking about', 'working on', 'debugging', 'trying to'
425
- ]
426
- if (situationalIndicators.some(ind => patternLower.includes(ind))) {
427
- // This is a situational pattern - be more flexible
428
- if (patternWords.some(keyWord => messageLower.includes(keyWord))) {
429
- conceptScore = Math.max(conceptScore, 0.7) // Boost for situational match
430
- }
431
- }
432
-
433
- maxScore = Math.max(maxScore, conceptScore)
434
- }
435
- }
436
-
437
- return Math.min(maxScore, 1.0)
438
- }
439
-
440
- private _scoreQuestionTypes(message: string, questionTypes: string[]): number {
441
- if (!questionTypes.length) return 0.0
442
-
443
- const messageLower = message.toLowerCase()
444
- const questionWords = ['how', 'why', 'what', 'when', 'where']
445
-
446
- for (const qtype of questionTypes) {
447
- const qtypeLower = qtype.trim().toLowerCase()
448
-
449
- if (messageLower.includes(qtypeLower)) {
450
- return 0.8
451
- }
452
-
453
- // Partial matching for question words
454
- const messageHasQuestion = questionWords.some(qw => messageLower.includes(qw))
455
- const typeHasQuestion = questionWords.some(qw => qtypeLower.includes(qw))
597
+ /**
598
+ * Generate reasoning string from activation signals
599
+ */
600
+ private _generateActivationReasoning(signals: ActivationSignals): string {
601
+ const reasons: string[] = []
456
602
 
457
- if (messageHasQuestion && typeHasQuestion) {
458
- return 0.5
459
- }
460
- }
603
+ if (signals.trigger) reasons.push(`trigger:${(signals.triggerStrength * 100).toFixed(0)}%`)
604
+ if (signals.tags) reasons.push(`tags:${signals.tagCount}`)
605
+ if (signals.domain) reasons.push('domain')
606
+ if (signals.feature) reasons.push('feature')
607
+ if (signals.content) reasons.push('content')
608
+ if (signals.vectorSimilarity >= 0.40) reasons.push(`vector:${(signals.vectorSimilarity * 100).toFixed(0)}%`)
461
609
 
462
- return 0.0
610
+ return reasons.length
611
+ ? `Activated: ${reasons.join(', ')} (${signals.count} signals)`
612
+ : 'No signals'
463
613
  }
464
614
 
465
- private _scoreEmotionalContext(message: string, emotion: string): number {
466
- if (!emotion) return 0.0
467
-
468
- const messageLower = message.toLowerCase()
469
-
470
- // Emotion indicators
471
- const emotionPatterns: Record<string, string[]> = {
472
- 'joy': ['happy', 'excited', 'love', 'wonderful', 'great', 'awesome'],
473
- 'frustration': ['stuck', 'confused', 'help', 'issue', 'problem', 'why'],
474
- 'discovery': ['realized', 'found', 'discovered', 'aha', 'insight'],
475
- 'gratitude': ['thank', 'appreciate', 'grateful', 'dear friend']
615
+ /**
616
+ * Log activation distribution for diagnostics
617
+ */
618
+ private _logActivationDistribution(
619
+ activated: ActivatedMemory[],
620
+ totalCandidates: number,
621
+ rejectedCount: number
622
+ ): void {
623
+ const signalBuckets: Record<string, number> = {
624
+ '2 signals': 0, '3 signals': 0, '4 signals': 0, '5 signals': 0, '6 signals': 0
476
625
  }
477
-
478
- const patterns = emotionPatterns[emotion.toLowerCase()] ?? []
479
- if (patterns.some(pattern => messageLower.includes(pattern))) {
480
- return 0.7
626
+ for (const mem of activated) {
627
+ const key = `${Math.min(mem.signals.count, 6)} signals`
628
+ signalBuckets[key] = (signalBuckets[key] ?? 0) + 1
481
629
  }
482
630
 
483
- return 0.0
484
- }
485
-
486
- private _scoreProblemSolution(message: string, isProblemSolution: boolean): number {
487
- if (!isProblemSolution) return 0.0
488
-
489
- const messageLower = message.toLowerCase()
490
-
491
- // Problem indicators
492
- const problemWords = [
493
- 'error', 'issue', 'problem', 'stuck', 'help', 'fix', 'solve', 'debug'
494
- ]
495
-
496
- if (problemWords.some(word => messageLower.includes(word))) {
497
- return 0.8
631
+ let triggerCount = 0, tagCount = 0, domainCount = 0, featureCount = 0, contentCount = 0, vectorCount = 0
632
+ for (const mem of activated) {
633
+ if (mem.signals.trigger) triggerCount++
634
+ if (mem.signals.tags) tagCount++
635
+ if (mem.signals.domain) domainCount++
636
+ if (mem.signals.feature) featureCount++
637
+ if (mem.signals.content) contentCount++
638
+ if (mem.signals.vectorSimilarity >= 0.40) vectorCount++
498
639
  }
499
640
 
500
- return 0.0
641
+ logger.logScoreDistribution({
642
+ totalCandidates,
643
+ passedGatekeeper: activated.length,
644
+ rejectedByGatekeeper: rejectedCount,
645
+ buckets: signalBuckets,
646
+ stats: {
647
+ min: activated.length ? Math.min(...activated.map(m => m.signals.count)) : 0,
648
+ max: activated.length ? Math.max(...activated.map(m => m.signals.count)) : 0,
649
+ mean: activated.length ? Math.round(activated.reduce((s, m) => s + m.signals.count, 0) / activated.length * 10) / 10 : 0,
650
+ stdev: 0,
651
+ spread: activated.length ? Math.max(...activated.map(m => m.signals.count)) - Math.min(...activated.map(m => m.signals.count)) : 0,
652
+ },
653
+ percentiles: {},
654
+ compressionWarning: false,
655
+ signalBreakdown: {
656
+ trigger: triggerCount,
657
+ tags: tagCount,
658
+ domain: domainCount,
659
+ feature: featureCount,
660
+ content: contentCount,
661
+ vector: vectorCount,
662
+ total: activated.length,
663
+ },
664
+ })
501
665
  }
502
666
 
503
- private _generateSelectionReasoning(components: ScoringComponents): string {
504
- const scores: [string, number][] = [
505
- ['trigger phrase match', components.trigger],
506
- ['semantic similarity', components.vector],
507
- ['high importance', components.importance],
508
- ['question type match', components.question],
509
- ['context alignment', components.context],
510
- ['temporal relevance', components.temporal],
511
- ['tag match', components.tags],
512
- ['emotional resonance', components.emotion],
513
- ['problem-solution', components.problem],
514
- ['action required', components.action]
515
- ]
516
-
517
- // Sort by score
518
- scores.sort((a, b) => b[1] - a[1])
519
-
520
- const reasons: string[] = []
521
-
522
- // Build reasoning
523
- const primary = scores[0]!
524
- if (primary[1] > 0.5) {
525
- reasons.push(`Strong ${primary[0]} (${primary[1].toFixed(2)})`)
526
- } else if (primary[1] > 0.3) {
527
- reasons.push(`${primary[0]} (${primary[1].toFixed(2)})`)
528
- }
529
-
530
- // Add secondary reasons
531
- for (const [reason, score] of scores.slice(1, 3)) {
532
- if (score > 0.3) {
533
- reasons.push(`${reason} (${score.toFixed(2)})`)
534
- }
535
- }
536
-
537
- return reasons.length
538
- ? 'Selected due to: ' + reasons.join(', ')
539
- : 'Selected based on combined factors'
540
- }
541
667
  }
542
668
 
669
+
543
670
  /**
544
671
  * Create a new SmartVectorRetrieval instance
545
672
  */