@twelvehart/supermemory-runtime 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +57 -0
- package/README.md +374 -0
- package/dist/index.js +189 -0
- package/dist/mcp/index.js +1132 -0
- package/docker-compose.prod.yml +91 -0
- package/docker-compose.yml +358 -0
- package/drizzle/0000_dapper_the_professor.sql +159 -0
- package/drizzle/0001_api_keys.sql +51 -0
- package/drizzle/meta/0000_snapshot.json +1532 -0
- package/drizzle/meta/_journal.json +13 -0
- package/drizzle.config.ts +20 -0
- package/package.json +114 -0
- package/scripts/add-extraction-job.ts +122 -0
- package/scripts/benchmark-pgvector.ts +122 -0
- package/scripts/bootstrap.sh +209 -0
- package/scripts/check-runtime-pack.ts +111 -0
- package/scripts/claude-mcp-config.ts +336 -0
- package/scripts/docker-entrypoint.sh +183 -0
- package/scripts/doctor.ts +377 -0
- package/scripts/init-db.sql +33 -0
- package/scripts/install.sh +1110 -0
- package/scripts/mcp-setup.ts +271 -0
- package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
- package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
- package/scripts/migrations/003_create_hnsw_index.sql +94 -0
- package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
- package/scripts/migrations/005_create_chunks_table.sql +95 -0
- package/scripts/migrations/006_create_processing_queue.sql +45 -0
- package/scripts/migrations/generate_test_data.sql +42 -0
- package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
- package/scripts/migrations/run_migrations.sh +286 -0
- package/scripts/migrations/test_hnsw_index.sql +255 -0
- package/scripts/pre-commit-secrets +282 -0
- package/scripts/run-extraction-worker.ts +46 -0
- package/scripts/run-phase1-tests.sh +291 -0
- package/scripts/setup.ts +222 -0
- package/scripts/smoke-install.sh +12 -0
- package/scripts/test-health-endpoint.sh +328 -0
- package/src/api/index.ts +2 -0
- package/src/api/middleware/auth.ts +80 -0
- package/src/api/middleware/csrf.ts +308 -0
- package/src/api/middleware/errorHandler.ts +166 -0
- package/src/api/middleware/rateLimit.ts +360 -0
- package/src/api/middleware/validation.ts +514 -0
- package/src/api/routes/documents.ts +286 -0
- package/src/api/routes/profiles.ts +237 -0
- package/src/api/routes/search.ts +71 -0
- package/src/api/stores/index.ts +58 -0
- package/src/config/bootstrap-env.ts +3 -0
- package/src/config/env.ts +71 -0
- package/src/config/feature-flags.ts +25 -0
- package/src/config/index.ts +140 -0
- package/src/config/secrets.config.ts +291 -0
- package/src/db/client.ts +92 -0
- package/src/db/index.ts +73 -0
- package/src/db/postgres.ts +72 -0
- package/src/db/schema/chunks.schema.ts +31 -0
- package/src/db/schema/containers.schema.ts +46 -0
- package/src/db/schema/documents.schema.ts +49 -0
- package/src/db/schema/embeddings.schema.ts +32 -0
- package/src/db/schema/index.ts +11 -0
- package/src/db/schema/memories.schema.ts +72 -0
- package/src/db/schema/profiles.schema.ts +34 -0
- package/src/db/schema/queue.schema.ts +59 -0
- package/src/db/schema/relationships.schema.ts +42 -0
- package/src/db/schema.ts +223 -0
- package/src/db/worker-connection.ts +47 -0
- package/src/index.ts +235 -0
- package/src/mcp/CLAUDE.md +1 -0
- package/src/mcp/index.ts +1380 -0
- package/src/mcp/legacyState.ts +22 -0
- package/src/mcp/rateLimit.ts +358 -0
- package/src/mcp/resources.ts +309 -0
- package/src/mcp/results.ts +104 -0
- package/src/mcp/tools.ts +401 -0
- package/src/queues/config.ts +119 -0
- package/src/queues/index.ts +289 -0
- package/src/sdk/client.ts +225 -0
- package/src/sdk/errors.ts +266 -0
- package/src/sdk/http.ts +560 -0
- package/src/sdk/index.ts +244 -0
- package/src/sdk/resources/base.ts +65 -0
- package/src/sdk/resources/connections.ts +204 -0
- package/src/sdk/resources/documents.ts +163 -0
- package/src/sdk/resources/index.ts +10 -0
- package/src/sdk/resources/memories.ts +150 -0
- package/src/sdk/resources/search.ts +60 -0
- package/src/sdk/resources/settings.ts +36 -0
- package/src/sdk/types.ts +674 -0
- package/src/services/chunking/index.ts +451 -0
- package/src/services/chunking.service.ts +650 -0
- package/src/services/csrf.service.ts +252 -0
- package/src/services/documents.repository.ts +219 -0
- package/src/services/documents.service.ts +191 -0
- package/src/services/embedding.service.ts +404 -0
- package/src/services/extraction.service.ts +300 -0
- package/src/services/extractors/code.extractor.ts +451 -0
- package/src/services/extractors/index.ts +9 -0
- package/src/services/extractors/markdown.extractor.ts +461 -0
- package/src/services/extractors/pdf.extractor.ts +315 -0
- package/src/services/extractors/text.extractor.ts +118 -0
- package/src/services/extractors/url.extractor.ts +243 -0
- package/src/services/index.ts +235 -0
- package/src/services/ingestion.service.ts +177 -0
- package/src/services/llm/anthropic.ts +400 -0
- package/src/services/llm/base.ts +460 -0
- package/src/services/llm/contradiction-detector.service.ts +526 -0
- package/src/services/llm/heuristics.ts +148 -0
- package/src/services/llm/index.ts +309 -0
- package/src/services/llm/memory-classifier.service.ts +383 -0
- package/src/services/llm/memory-extension-detector.service.ts +523 -0
- package/src/services/llm/mock.ts +470 -0
- package/src/services/llm/openai.ts +398 -0
- package/src/services/llm/prompts.ts +438 -0
- package/src/services/llm/types.ts +373 -0
- package/src/services/memory.repository.ts +1769 -0
- package/src/services/memory.service.ts +1338 -0
- package/src/services/memory.types.ts +234 -0
- package/src/services/persistence/index.ts +295 -0
- package/src/services/pipeline.service.ts +509 -0
- package/src/services/profile.repository.ts +436 -0
- package/src/services/profile.service.ts +560 -0
- package/src/services/profile.types.ts +270 -0
- package/src/services/relationships/detector.ts +1128 -0
- package/src/services/relationships/index.ts +268 -0
- package/src/services/relationships/memory-integration.ts +459 -0
- package/src/services/relationships/strategies.ts +132 -0
- package/src/services/relationships/types.ts +370 -0
- package/src/services/search.service.ts +761 -0
- package/src/services/search.types.ts +220 -0
- package/src/services/secrets.service.ts +384 -0
- package/src/services/vectorstore/base.ts +327 -0
- package/src/services/vectorstore/index.ts +444 -0
- package/src/services/vectorstore/memory.ts +286 -0
- package/src/services/vectorstore/migration.ts +295 -0
- package/src/services/vectorstore/mock.ts +403 -0
- package/src/services/vectorstore/pgvector.ts +695 -0
- package/src/services/vectorstore/types.ts +247 -0
- package/src/startup.ts +389 -0
- package/src/types/api.types.ts +193 -0
- package/src/types/document.types.ts +103 -0
- package/src/types/index.ts +241 -0
- package/src/types/profile.base.ts +133 -0
- package/src/utils/errors.ts +447 -0
- package/src/utils/id.ts +15 -0
- package/src/utils/index.ts +101 -0
- package/src/utils/logger.ts +313 -0
- package/src/utils/sanitization.ts +501 -0
- package/src/utils/secret-validation.ts +273 -0
- package/src/utils/synonyms.ts +188 -0
- package/src/utils/validation.ts +581 -0
- package/src/workers/chunking.worker.ts +242 -0
- package/src/workers/embedding.worker.ts +358 -0
- package/src/workers/extraction.worker.ts +346 -0
- package/src/workers/indexing.worker.ts +505 -0
- package/tsconfig.json +38 -0
|
@@ -0,0 +1,523 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Memory Extension Detector Service
|
|
3
|
+
*
|
|
4
|
+
* LLM-based detection of whether a new memory extends/enriches an existing memory.
|
|
5
|
+
* Replaces length-based heuristics for TODO-003 in memory.service.ts
|
|
6
|
+
*
|
|
7
|
+
* Cost optimization:
|
|
8
|
+
* - Similarity-based caching
|
|
9
|
+
* - Prompt optimization
|
|
10
|
+
* - Batch processing support
|
|
11
|
+
* - Fallback to heuristic matching
|
|
12
|
+
*
|
|
13
|
+
* Target: <$0.60/month with typical usage
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
import { getLogger } from '../../utils/logger.js'
|
|
17
|
+
import { createHash } from 'crypto'
|
|
18
|
+
import type { Memory } from '../../types/index.js'
|
|
19
|
+
import { getLLMProvider, isLLMAvailable } from './index.js'
|
|
20
|
+
import { LLMError } from './base.js'
|
|
21
|
+
|
|
22
|
+
const logger = getLogger('ExtensionDetector')
|
|
23
|
+
|
|
24
|
+
// ============================================================================
|
|
25
|
+
// Prompt Templates
|
|
26
|
+
// ============================================================================
|
|
27
|
+
|
|
28
|
+
export const EXTENSION_DETECTOR_SYSTEM_PROMPT = `You are an expert at determining if one statement extends or adds detail to another.
|
|
29
|
+
|
|
30
|
+
Compare two statements and determine:
|
|
31
|
+
1. Does the NEW statement add detail, elaboration, or context to the OLD statement?
|
|
32
|
+
2. Do they NOT contradict each other?
|
|
33
|
+
3. What is your confidence (0.0-1.0)?
|
|
34
|
+
|
|
35
|
+
Extension criteria:
|
|
36
|
+
- NEW provides additional details about the same topic as OLD
|
|
37
|
+
- NEW elaborates on aspects mentioned in OLD
|
|
38
|
+
- NEW adds context without contradicting OLD
|
|
39
|
+
- NEW is NOT just a subset of OLD (already contained)
|
|
40
|
+
|
|
41
|
+
NOT an extension if:
|
|
42
|
+
- NEW contradicts OLD
|
|
43
|
+
- NEW is about a different topic
|
|
44
|
+
- NEW is already fully contained in OLD
|
|
45
|
+
- NEW replaces OLD entirely
|
|
46
|
+
|
|
47
|
+
Respond with ONLY a JSON object:
|
|
48
|
+
{
|
|
49
|
+
"isExtension": boolean,
|
|
50
|
+
"confidence": 0.0-1.0,
|
|
51
|
+
"reason": "brief explanation"
|
|
52
|
+
}`
|
|
53
|
+
|
|
54
|
+
export function buildExtensionUserPrompt(newContent: string, existingContent: string): string {
|
|
55
|
+
return `Compare these statements:\n\nOLD: "${existingContent}"\nNEW: "${newContent}"\n\nDoes NEW extend OLD? Respond with JSON only.`
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// ============================================================================
|
|
59
|
+
// Types
|
|
60
|
+
// ============================================================================
|
|
61
|
+
|
|
62
|
+
export interface ExtensionResult {
|
|
63
|
+
isExtension: boolean
|
|
64
|
+
confidence: number
|
|
65
|
+
reason: string
|
|
66
|
+
cached: boolean
|
|
67
|
+
usedLLM: boolean
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export interface ExtensionDetectorConfig {
|
|
71
|
+
/** Minimum confidence for extension (0-1) */
|
|
72
|
+
minConfidence?: number
|
|
73
|
+
/** Whether to enable caching */
|
|
74
|
+
enableCache?: boolean
|
|
75
|
+
/** Cache TTL in milliseconds */
|
|
76
|
+
cacheTTLMs?: number
|
|
77
|
+
/** Maximum cache size */
|
|
78
|
+
maxCacheSize?: number
|
|
79
|
+
/** Whether to fallback to heuristics on errors */
|
|
80
|
+
fallbackToHeuristics?: boolean
|
|
81
|
+
/** Minimum word overlap ratio to even check (0-1) */
|
|
82
|
+
minOverlapForCheck?: number
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
interface CacheEntry {
|
|
86
|
+
isExtension: boolean
|
|
87
|
+
confidence: number
|
|
88
|
+
reason: string
|
|
89
|
+
timestamp: number
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// ============================================================================
|
|
93
|
+
// Heuristic Patterns
|
|
94
|
+
// ============================================================================
|
|
95
|
+
|
|
96
|
+
const EXTENSION_INDICATORS = [
|
|
97
|
+
/\b(also|additionally|furthermore|moreover|in addition|plus|and|as well)\b/i,
|
|
98
|
+
/\b(more specifically|more detail|to elaborate|to expand|to clarify)\b/i,
|
|
99
|
+
/\b(including|such as|for example|e\.g\.|specifically)\b/i,
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
// ============================================================================
|
|
103
|
+
// Memory Extension Detector Service
|
|
104
|
+
// ============================================================================
|
|
105
|
+
|
|
106
|
+
export class MemoryExtensionDetectorService {
|
|
107
|
+
private config: Required<ExtensionDetectorConfig>
|
|
108
|
+
private cache: Map<string, CacheEntry> = new Map()
|
|
109
|
+
private stats = {
|
|
110
|
+
totalChecks: 0,
|
|
111
|
+
llmChecks: 0,
|
|
112
|
+
heuristicChecks: 0,
|
|
113
|
+
cacheHits: 0,
|
|
114
|
+
extensionsFound: 0,
|
|
115
|
+
errors: 0,
|
|
116
|
+
totalCost: 0,
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
constructor(config: ExtensionDetectorConfig = {}) {
|
|
120
|
+
this.config = {
|
|
121
|
+
minConfidence: config.minConfidence ?? 0.65,
|
|
122
|
+
enableCache: config.enableCache ?? true,
|
|
123
|
+
cacheTTLMs: config.cacheTTLMs ?? 30 * 60 * 1000, // 30 minutes
|
|
124
|
+
maxCacheSize: config.maxCacheSize ?? 500,
|
|
125
|
+
fallbackToHeuristics: config.fallbackToHeuristics ?? true,
|
|
126
|
+
minOverlapForCheck: config.minOverlapForCheck ?? 0.15,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
logger.info('Extension detector initialized', {
|
|
130
|
+
cacheEnabled: this.config.enableCache,
|
|
131
|
+
fallbackEnabled: this.config.fallbackToHeuristics,
|
|
132
|
+
})
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// ============================================================================
|
|
136
|
+
// Public API
|
|
137
|
+
// ============================================================================
|
|
138
|
+
|
|
139
|
+
/**
|
|
140
|
+
* Check if a new memory extends/enriches an existing memory
|
|
141
|
+
*
|
|
142
|
+
* @param newMemory - The new memory being added
|
|
143
|
+
* @param existingMemory - The existing memory to compare against
|
|
144
|
+
* @returns Extension detection result
|
|
145
|
+
*/
|
|
146
|
+
async checkExtension(newMemory: Memory, existingMemory: Memory): Promise<ExtensionResult> {
|
|
147
|
+
this.stats.totalChecks++
|
|
148
|
+
|
|
149
|
+
// Quick filter: check word overlap first
|
|
150
|
+
const overlap = this.calculateWordOverlap(newMemory.content, existingMemory.content)
|
|
151
|
+
if (overlap < this.config.minOverlapForCheck) {
|
|
152
|
+
logger.debug('Skipping extension check due to low overlap', { overlap })
|
|
153
|
+
return {
|
|
154
|
+
isExtension: false,
|
|
155
|
+
confidence: 0,
|
|
156
|
+
reason: 'Insufficient content overlap',
|
|
157
|
+
cached: false,
|
|
158
|
+
usedLLM: false,
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
// Quick filter: if new content is contained in old, it's not an extension
|
|
163
|
+
if (this.isSubstring(newMemory.content, existingMemory.content)) {
|
|
164
|
+
logger.debug('New content is substring of old, not an extension')
|
|
165
|
+
return {
|
|
166
|
+
isExtension: false,
|
|
167
|
+
confidence: 0.8,
|
|
168
|
+
reason: 'New content is already contained in existing memory',
|
|
169
|
+
cached: false,
|
|
170
|
+
usedLLM: false,
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
// Check cache
|
|
175
|
+
if (this.config.enableCache) {
|
|
176
|
+
const cached = this.getCached(newMemory.content, existingMemory.content)
|
|
177
|
+
if (cached) {
|
|
178
|
+
this.stats.cacheHits++
|
|
179
|
+
logger.debug('Cache hit for extension check')
|
|
180
|
+
return {
|
|
181
|
+
...cached,
|
|
182
|
+
cached: true,
|
|
183
|
+
usedLLM: false,
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Try LLM detection if available
|
|
189
|
+
if (isLLMAvailable()) {
|
|
190
|
+
try {
|
|
191
|
+
const result = await this.detectWithLLM(newMemory, existingMemory)
|
|
192
|
+
this.stats.llmChecks++
|
|
193
|
+
|
|
194
|
+
if (result.isExtension) {
|
|
195
|
+
this.stats.extensionsFound++
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// Cache the result
|
|
199
|
+
if (this.config.enableCache && result.confidence >= this.config.minConfidence) {
|
|
200
|
+
this.setCached(newMemory.content, existingMemory.content, {
|
|
201
|
+
isExtension: result.isExtension,
|
|
202
|
+
confidence: result.confidence,
|
|
203
|
+
reason: result.reason,
|
|
204
|
+
timestamp: Date.now(),
|
|
205
|
+
})
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
return {
|
|
209
|
+
...result,
|
|
210
|
+
cached: false,
|
|
211
|
+
usedLLM: true,
|
|
212
|
+
}
|
|
213
|
+
} catch (error) {
|
|
214
|
+
this.stats.errors++
|
|
215
|
+
logger.warn('LLM extension detection failed, falling back to heuristics', {
|
|
216
|
+
error: error instanceof Error ? error.message : String(error),
|
|
217
|
+
})
|
|
218
|
+
|
|
219
|
+
if (!this.config.fallbackToHeuristics) {
|
|
220
|
+
throw error
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
// Fallback to heuristics
|
|
226
|
+
const heuristicResult = this.detectWithHeuristics(newMemory, existingMemory)
|
|
227
|
+
this.stats.heuristicChecks++
|
|
228
|
+
|
|
229
|
+
if (heuristicResult.isExtension) {
|
|
230
|
+
this.stats.extensionsFound++
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
return {
|
|
234
|
+
...heuristicResult,
|
|
235
|
+
cached: false,
|
|
236
|
+
usedLLM: false,
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
/**
|
|
241
|
+
* Get detection statistics
|
|
242
|
+
*/
|
|
243
|
+
getStats() {
|
|
244
|
+
const cacheHitRate = this.stats.totalChecks > 0 ? (this.stats.cacheHits / this.stats.totalChecks) * 100 : 0
|
|
245
|
+
|
|
246
|
+
const extensionRate = this.stats.totalChecks > 0 ? (this.stats.extensionsFound / this.stats.totalChecks) * 100 : 0
|
|
247
|
+
|
|
248
|
+
return {
|
|
249
|
+
...this.stats,
|
|
250
|
+
cacheHitRate: parseFloat(cacheHitRate.toFixed(2)),
|
|
251
|
+
extensionRate: parseFloat(extensionRate.toFixed(2)),
|
|
252
|
+
cacheSize: this.cache.size,
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
/**
|
|
257
|
+
* Clear the cache
|
|
258
|
+
*/
|
|
259
|
+
clearCache(): void {
|
|
260
|
+
this.cache.clear()
|
|
261
|
+
logger.info('Extension cache cleared')
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
// ============================================================================
|
|
265
|
+
// LLM Detection
|
|
266
|
+
// ============================================================================
|
|
267
|
+
|
|
268
|
+
private async detectWithLLM(
|
|
269
|
+
newMemory: Memory,
|
|
270
|
+
existingMemory: Memory
|
|
271
|
+
): Promise<{
|
|
272
|
+
isExtension: boolean
|
|
273
|
+
confidence: number
|
|
274
|
+
reason: string
|
|
275
|
+
}> {
|
|
276
|
+
const provider = getLLMProvider()
|
|
277
|
+
|
|
278
|
+
try {
|
|
279
|
+
const response = await provider.generateJson(
|
|
280
|
+
EXTENSION_DETECTOR_SYSTEM_PROMPT,
|
|
281
|
+
buildExtensionUserPrompt(newMemory.content, existingMemory.content)
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
const parsed = this.parseJsonResponse(response.rawResponse, response.provider)
|
|
285
|
+
|
|
286
|
+
// Estimate cost
|
|
287
|
+
const inputCost = ((response.tokensUsed?.prompt ?? 0) / 1000000) * 0.25
|
|
288
|
+
const outputCost = ((response.tokensUsed?.completion ?? 0) / 1000000) * 1.25
|
|
289
|
+
this.stats.totalCost += inputCost + outputCost
|
|
290
|
+
|
|
291
|
+
logger.debug('LLM extension detection successful', {
|
|
292
|
+
isExtension: parsed.isExtension,
|
|
293
|
+
confidence: parsed.confidence,
|
|
294
|
+
tokensUsed: response.tokensUsed?.total ?? 0,
|
|
295
|
+
cost: inputCost + outputCost,
|
|
296
|
+
})
|
|
297
|
+
|
|
298
|
+
return parsed
|
|
299
|
+
} catch (error) {
|
|
300
|
+
if (error instanceof LLMError) {
|
|
301
|
+
throw error
|
|
302
|
+
}
|
|
303
|
+
throw new Error(`LLM extension detection failed: ${error instanceof Error ? error.message : String(error)}`)
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
private parseJsonResponse(
|
|
308
|
+
rawResponse: string,
|
|
309
|
+
provider: 'openai' | 'anthropic' | 'mock'
|
|
310
|
+
): {
|
|
311
|
+
isExtension: boolean
|
|
312
|
+
confidence: number
|
|
313
|
+
reason: string
|
|
314
|
+
} {
|
|
315
|
+
const trimmed = rawResponse.trim()
|
|
316
|
+
const jsonMatch = trimmed.startsWith('{') ? trimmed : trimmed.match(/\{[\s\S]*\}/)?.[0]
|
|
317
|
+
if (!jsonMatch) {
|
|
318
|
+
throw LLMError.invalidResponse(provider, 'No JSON object found in response')
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
let parsed: unknown
|
|
322
|
+
try {
|
|
323
|
+
parsed = JSON.parse(jsonMatch)
|
|
324
|
+
} catch {
|
|
325
|
+
throw LLMError.invalidResponse(provider, 'Invalid JSON response')
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
if (
|
|
329
|
+
!parsed ||
|
|
330
|
+
typeof parsed !== 'object' ||
|
|
331
|
+
!('isExtension' in parsed) ||
|
|
332
|
+
!('confidence' in parsed) ||
|
|
333
|
+
!('reason' in parsed)
|
|
334
|
+
) {
|
|
335
|
+
throw LLMError.invalidResponse(provider, 'Missing required fields in JSON response')
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
const isExtension = (parsed as { isExtension: boolean }).isExtension
|
|
339
|
+
const confidence = (parsed as { confidence: number }).confidence
|
|
340
|
+
const reason = (parsed as { reason: string }).reason
|
|
341
|
+
|
|
342
|
+
if (typeof isExtension !== 'boolean') {
|
|
343
|
+
throw LLMError.invalidResponse(provider, 'Invalid isExtension in response')
|
|
344
|
+
}
|
|
345
|
+
if (typeof confidence !== 'number' || Number.isNaN(confidence)) {
|
|
346
|
+
throw LLMError.invalidResponse(provider, 'Invalid confidence in response')
|
|
347
|
+
}
|
|
348
|
+
if (typeof reason !== 'string') {
|
|
349
|
+
throw LLMError.invalidResponse(provider, 'Invalid reason in response')
|
|
350
|
+
}
|
|
351
|
+
|
|
352
|
+
return { isExtension, confidence, reason }
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
// ============================================================================
|
|
356
|
+
// Heuristic Detection
|
|
357
|
+
// ============================================================================
|
|
358
|
+
|
|
359
|
+
private detectWithHeuristics(
|
|
360
|
+
newMemory: Memory,
|
|
361
|
+
existingMemory: Memory
|
|
362
|
+
): {
|
|
363
|
+
isExtension: boolean
|
|
364
|
+
confidence: number
|
|
365
|
+
reason: string
|
|
366
|
+
} {
|
|
367
|
+
const newLower = newMemory.content.toLowerCase()
|
|
368
|
+
const existingLower = existingMemory.content.toLowerCase()
|
|
369
|
+
|
|
370
|
+
// Calculate metrics
|
|
371
|
+
const overlap = this.calculateWordOverlap(newLower, existingLower)
|
|
372
|
+
const hasMoreDetail = newMemory.content.length > existingMemory.content.length * 0.8
|
|
373
|
+
const newContentInOld = existingLower.includes(newLower.slice(0, 20))
|
|
374
|
+
|
|
375
|
+
// Check for extension indicators
|
|
376
|
+
let hasExtensionIndicator = false
|
|
377
|
+
for (const pattern of EXTENSION_INDICATORS) {
|
|
378
|
+
if (pattern.test(newLower)) {
|
|
379
|
+
hasExtensionIndicator = true
|
|
380
|
+
break
|
|
381
|
+
}
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// Decision logic
|
|
385
|
+
// Allow high overlap if there are extension indicators or more detail
|
|
386
|
+
// The duplicate check (overlap < 0.9) is skipped when extension indicators are present
|
|
387
|
+
const isExtension =
|
|
388
|
+
overlap > 0.2 && // Sufficient overlap
|
|
389
|
+
!newContentInOld && // Not contained
|
|
390
|
+
((overlap < 0.9 && hasMoreDetail) || // More detail with reasonable overlap
|
|
391
|
+
hasExtensionIndicator) // Extension indicators override overlap threshold
|
|
392
|
+
|
|
393
|
+
const confidence = isExtension ? Math.min(0.65, overlap + 0.2) : 0.3
|
|
394
|
+
|
|
395
|
+
let reason = 'No extension detected via heuristics'
|
|
396
|
+
if (isExtension) {
|
|
397
|
+
if (hasExtensionIndicator) {
|
|
398
|
+
reason = 'Contains extension indicators and adds detail (via pattern matching)'
|
|
399
|
+
} else {
|
|
400
|
+
reason = 'Adds detail without contradicting (via pattern matching)'
|
|
401
|
+
}
|
|
402
|
+
} else if (newContentInOld) {
|
|
403
|
+
reason = 'New content already contained in existing memory'
|
|
404
|
+
} else if (overlap < 0.2) {
|
|
405
|
+
reason = 'Insufficient overlap between memories'
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
logger.debug('Heuristic extension detection', {
|
|
409
|
+
isExtension,
|
|
410
|
+
confidence,
|
|
411
|
+
overlap,
|
|
412
|
+
hasMoreDetail,
|
|
413
|
+
})
|
|
414
|
+
|
|
415
|
+
return {
|
|
416
|
+
isExtension,
|
|
417
|
+
confidence,
|
|
418
|
+
reason,
|
|
419
|
+
}
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
// ============================================================================
|
|
423
|
+
// Helpers
|
|
424
|
+
// ============================================================================
|
|
425
|
+
|
|
426
|
+
private calculateWordOverlap(text1: string, text2: string): number {
|
|
427
|
+
const words1 = new Set(
|
|
428
|
+
text1
|
|
429
|
+
.toLowerCase()
|
|
430
|
+
.split(/\s+/)
|
|
431
|
+
.filter((w) => w.length > 3)
|
|
432
|
+
)
|
|
433
|
+
const words2 = new Set(
|
|
434
|
+
text2
|
|
435
|
+
.toLowerCase()
|
|
436
|
+
.split(/\s+/)
|
|
437
|
+
.filter((w) => w.length > 3)
|
|
438
|
+
)
|
|
439
|
+
|
|
440
|
+
const intersection = new Set([...words1].filter((x) => words2.has(x)))
|
|
441
|
+
const union = new Set([...words1, ...words2])
|
|
442
|
+
|
|
443
|
+
return union.size > 0 ? intersection.size / union.size : 0
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
private isSubstring(shorter: string, longer: string): boolean {
|
|
447
|
+
const shortNorm = shorter.trim().toLowerCase()
|
|
448
|
+
const longNorm = longer.trim().toLowerCase()
|
|
449
|
+
|
|
450
|
+
// Check if significant portion of shorter is in longer
|
|
451
|
+
const significantPortion = shortNorm.slice(0, Math.min(50, shortNorm.length))
|
|
452
|
+
return longNorm.includes(significantPortion)
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// ============================================================================
|
|
456
|
+
// Caching
|
|
457
|
+
// ============================================================================
|
|
458
|
+
|
|
459
|
+
private getCacheKey(content1: string, content2: string): string {
|
|
460
|
+
// Create deterministic key regardless of order
|
|
461
|
+
const normalized = [content1, content2]
|
|
462
|
+
.map((c) => c.substring(0, 200).trim().toLowerCase())
|
|
463
|
+
.sort()
|
|
464
|
+
.join('|||')
|
|
465
|
+
return createHash('sha256').update(normalized).digest('hex')
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
private getCached(content1: string, content2: string): CacheEntry | null {
|
|
469
|
+
const key = this.getCacheKey(content1, content2)
|
|
470
|
+
const entry = this.cache.get(key)
|
|
471
|
+
|
|
472
|
+
if (!entry) {
|
|
473
|
+
return null
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
// Check if expired
|
|
477
|
+
const age = Date.now() - entry.timestamp
|
|
478
|
+
if (age > this.config.cacheTTLMs) {
|
|
479
|
+
this.cache.delete(key)
|
|
480
|
+
return null
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
return entry
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
private setCached(content1: string, content2: string, entry: CacheEntry): void {
|
|
487
|
+
// Enforce cache size limit
|
|
488
|
+
if (this.cache.size >= this.config.maxCacheSize) {
|
|
489
|
+
const entries = Array.from(this.cache.entries())
|
|
490
|
+
entries.sort((a, b) => a[1].timestamp - b[1].timestamp)
|
|
491
|
+
const toRemove = entries.slice(0, Math.floor(this.config.maxCacheSize * 0.1))
|
|
492
|
+
for (const [key] of toRemove) {
|
|
493
|
+
this.cache.delete(key)
|
|
494
|
+
}
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
const key = this.getCacheKey(content1, content2)
|
|
498
|
+
this.cache.set(key, entry)
|
|
499
|
+
}
|
|
500
|
+
}
|
|
501
|
+
|
|
502
|
+
// ============================================================================
|
|
503
|
+
// Singleton Instance
|
|
504
|
+
// ============================================================================
|
|
505
|
+
|
|
506
|
+
let _instance: MemoryExtensionDetectorService | null = null
|
|
507
|
+
|
|
508
|
+
/**
|
|
509
|
+
* Get the singleton instance
|
|
510
|
+
*/
|
|
511
|
+
export function getMemoryExtensionDetector(config?: ExtensionDetectorConfig): MemoryExtensionDetectorService {
|
|
512
|
+
if (!_instance) {
|
|
513
|
+
_instance = new MemoryExtensionDetectorService(config)
|
|
514
|
+
}
|
|
515
|
+
return _instance
|
|
516
|
+
}
|
|
517
|
+
|
|
518
|
+
/**
|
|
519
|
+
* Reset the singleton (for testing)
|
|
520
|
+
*/
|
|
521
|
+
export function resetMemoryExtensionDetector(): void {
|
|
522
|
+
_instance = null
|
|
523
|
+
}
|