@twelvehart/supermemory-runtime 1.0.0-next.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. package/.env.example +57 -0
  2. package/README.md +374 -0
  3. package/dist/index.js +189 -0
  4. package/dist/mcp/index.js +1132 -0
  5. package/docker-compose.prod.yml +91 -0
  6. package/docker-compose.yml +358 -0
  7. package/drizzle/0000_dapper_the_professor.sql +159 -0
  8. package/drizzle/0001_api_keys.sql +51 -0
  9. package/drizzle/meta/0000_snapshot.json +1532 -0
  10. package/drizzle/meta/_journal.json +13 -0
  11. package/drizzle.config.ts +20 -0
  12. package/package.json +114 -0
  13. package/scripts/add-extraction-job.ts +122 -0
  14. package/scripts/benchmark-pgvector.ts +122 -0
  15. package/scripts/bootstrap.sh +209 -0
  16. package/scripts/check-runtime-pack.ts +111 -0
  17. package/scripts/claude-mcp-config.ts +336 -0
  18. package/scripts/docker-entrypoint.sh +183 -0
  19. package/scripts/doctor.ts +377 -0
  20. package/scripts/init-db.sql +33 -0
  21. package/scripts/install.sh +1110 -0
  22. package/scripts/mcp-setup.ts +271 -0
  23. package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
  24. package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
  25. package/scripts/migrations/003_create_hnsw_index.sql +94 -0
  26. package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
  27. package/scripts/migrations/005_create_chunks_table.sql +95 -0
  28. package/scripts/migrations/006_create_processing_queue.sql +45 -0
  29. package/scripts/migrations/generate_test_data.sql +42 -0
  30. package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
  31. package/scripts/migrations/run_migrations.sh +286 -0
  32. package/scripts/migrations/test_hnsw_index.sql +255 -0
  33. package/scripts/pre-commit-secrets +282 -0
  34. package/scripts/run-extraction-worker.ts +46 -0
  35. package/scripts/run-phase1-tests.sh +291 -0
  36. package/scripts/setup.ts +222 -0
  37. package/scripts/smoke-install.sh +12 -0
  38. package/scripts/test-health-endpoint.sh +328 -0
  39. package/src/api/index.ts +2 -0
  40. package/src/api/middleware/auth.ts +80 -0
  41. package/src/api/middleware/csrf.ts +308 -0
  42. package/src/api/middleware/errorHandler.ts +166 -0
  43. package/src/api/middleware/rateLimit.ts +360 -0
  44. package/src/api/middleware/validation.ts +514 -0
  45. package/src/api/routes/documents.ts +286 -0
  46. package/src/api/routes/profiles.ts +237 -0
  47. package/src/api/routes/search.ts +71 -0
  48. package/src/api/stores/index.ts +58 -0
  49. package/src/config/bootstrap-env.ts +3 -0
  50. package/src/config/env.ts +71 -0
  51. package/src/config/feature-flags.ts +25 -0
  52. package/src/config/index.ts +140 -0
  53. package/src/config/secrets.config.ts +291 -0
  54. package/src/db/client.ts +92 -0
  55. package/src/db/index.ts +73 -0
  56. package/src/db/postgres.ts +72 -0
  57. package/src/db/schema/chunks.schema.ts +31 -0
  58. package/src/db/schema/containers.schema.ts +46 -0
  59. package/src/db/schema/documents.schema.ts +49 -0
  60. package/src/db/schema/embeddings.schema.ts +32 -0
  61. package/src/db/schema/index.ts +11 -0
  62. package/src/db/schema/memories.schema.ts +72 -0
  63. package/src/db/schema/profiles.schema.ts +34 -0
  64. package/src/db/schema/queue.schema.ts +59 -0
  65. package/src/db/schema/relationships.schema.ts +42 -0
  66. package/src/db/schema.ts +223 -0
  67. package/src/db/worker-connection.ts +47 -0
  68. package/src/index.ts +235 -0
  69. package/src/mcp/CLAUDE.md +1 -0
  70. package/src/mcp/index.ts +1380 -0
  71. package/src/mcp/legacyState.ts +22 -0
  72. package/src/mcp/rateLimit.ts +358 -0
  73. package/src/mcp/resources.ts +309 -0
  74. package/src/mcp/results.ts +104 -0
  75. package/src/mcp/tools.ts +401 -0
  76. package/src/queues/config.ts +119 -0
  77. package/src/queues/index.ts +289 -0
  78. package/src/sdk/client.ts +225 -0
  79. package/src/sdk/errors.ts +266 -0
  80. package/src/sdk/http.ts +560 -0
  81. package/src/sdk/index.ts +244 -0
  82. package/src/sdk/resources/base.ts +65 -0
  83. package/src/sdk/resources/connections.ts +204 -0
  84. package/src/sdk/resources/documents.ts +163 -0
  85. package/src/sdk/resources/index.ts +10 -0
  86. package/src/sdk/resources/memories.ts +150 -0
  87. package/src/sdk/resources/search.ts +60 -0
  88. package/src/sdk/resources/settings.ts +36 -0
  89. package/src/sdk/types.ts +674 -0
  90. package/src/services/chunking/index.ts +451 -0
  91. package/src/services/chunking.service.ts +650 -0
  92. package/src/services/csrf.service.ts +252 -0
  93. package/src/services/documents.repository.ts +219 -0
  94. package/src/services/documents.service.ts +191 -0
  95. package/src/services/embedding.service.ts +404 -0
  96. package/src/services/extraction.service.ts +300 -0
  97. package/src/services/extractors/code.extractor.ts +451 -0
  98. package/src/services/extractors/index.ts +9 -0
  99. package/src/services/extractors/markdown.extractor.ts +461 -0
  100. package/src/services/extractors/pdf.extractor.ts +315 -0
  101. package/src/services/extractors/text.extractor.ts +118 -0
  102. package/src/services/extractors/url.extractor.ts +243 -0
  103. package/src/services/index.ts +235 -0
  104. package/src/services/ingestion.service.ts +177 -0
  105. package/src/services/llm/anthropic.ts +400 -0
  106. package/src/services/llm/base.ts +460 -0
  107. package/src/services/llm/contradiction-detector.service.ts +526 -0
  108. package/src/services/llm/heuristics.ts +148 -0
  109. package/src/services/llm/index.ts +309 -0
  110. package/src/services/llm/memory-classifier.service.ts +383 -0
  111. package/src/services/llm/memory-extension-detector.service.ts +523 -0
  112. package/src/services/llm/mock.ts +470 -0
  113. package/src/services/llm/openai.ts +398 -0
  114. package/src/services/llm/prompts.ts +438 -0
  115. package/src/services/llm/types.ts +373 -0
  116. package/src/services/memory.repository.ts +1769 -0
  117. package/src/services/memory.service.ts +1338 -0
  118. package/src/services/memory.types.ts +234 -0
  119. package/src/services/persistence/index.ts +295 -0
  120. package/src/services/pipeline.service.ts +509 -0
  121. package/src/services/profile.repository.ts +436 -0
  122. package/src/services/profile.service.ts +560 -0
  123. package/src/services/profile.types.ts +270 -0
  124. package/src/services/relationships/detector.ts +1128 -0
  125. package/src/services/relationships/index.ts +268 -0
  126. package/src/services/relationships/memory-integration.ts +459 -0
  127. package/src/services/relationships/strategies.ts +132 -0
  128. package/src/services/relationships/types.ts +370 -0
  129. package/src/services/search.service.ts +761 -0
  130. package/src/services/search.types.ts +220 -0
  131. package/src/services/secrets.service.ts +384 -0
  132. package/src/services/vectorstore/base.ts +327 -0
  133. package/src/services/vectorstore/index.ts +444 -0
  134. package/src/services/vectorstore/memory.ts +286 -0
  135. package/src/services/vectorstore/migration.ts +295 -0
  136. package/src/services/vectorstore/mock.ts +403 -0
  137. package/src/services/vectorstore/pgvector.ts +695 -0
  138. package/src/services/vectorstore/types.ts +247 -0
  139. package/src/startup.ts +389 -0
  140. package/src/types/api.types.ts +193 -0
  141. package/src/types/document.types.ts +103 -0
  142. package/src/types/index.ts +241 -0
  143. package/src/types/profile.base.ts +133 -0
  144. package/src/utils/errors.ts +447 -0
  145. package/src/utils/id.ts +15 -0
  146. package/src/utils/index.ts +101 -0
  147. package/src/utils/logger.ts +313 -0
  148. package/src/utils/sanitization.ts +501 -0
  149. package/src/utils/secret-validation.ts +273 -0
  150. package/src/utils/synonyms.ts +188 -0
  151. package/src/utils/validation.ts +581 -0
  152. package/src/workers/chunking.worker.ts +242 -0
  153. package/src/workers/embedding.worker.ts +358 -0
  154. package/src/workers/extraction.worker.ts +346 -0
  155. package/src/workers/indexing.worker.ts +505 -0
  156. package/tsconfig.json +38 -0
@@ -0,0 +1,523 @@
1
+ /**
2
+ * Memory Extension Detector Service
3
+ *
4
+ * LLM-based detection of whether a new memory extends/enriches an existing memory.
5
+ * Replaces length-based heuristics for TODO-003 in memory.service.ts
6
+ *
7
+ * Cost optimization:
8
+ * - Similarity-based caching
9
+ * - Prompt optimization
10
+ * - Batch processing support
11
+ * - Fallback to heuristic matching
12
+ *
13
+ * Target: <$0.60/month with typical usage
14
+ */
15
+
16
+ import { getLogger } from '../../utils/logger.js'
17
+ import { createHash } from 'crypto'
18
+ import type { Memory } from '../../types/index.js'
19
+ import { getLLMProvider, isLLMAvailable } from './index.js'
20
+ import { LLMError } from './base.js'
21
+
22
+ const logger = getLogger('ExtensionDetector')
23
+
24
+ // ============================================================================
25
+ // Prompt Templates
26
+ // ============================================================================
27
+
28
+ export const EXTENSION_DETECTOR_SYSTEM_PROMPT = `You are an expert at determining if one statement extends or adds detail to another.
29
+
30
+ Compare two statements and determine:
31
+ 1. Does the NEW statement add detail, elaboration, or context to the OLD statement?
32
+ 2. Do they NOT contradict each other?
33
+ 3. What is your confidence (0.0-1.0)?
34
+
35
+ Extension criteria:
36
+ - NEW provides additional details about the same topic as OLD
37
+ - NEW elaborates on aspects mentioned in OLD
38
+ - NEW adds context without contradicting OLD
39
+ - NEW is NOT just a subset of OLD (already contained)
40
+
41
+ NOT an extension if:
42
+ - NEW contradicts OLD
43
+ - NEW is about a different topic
44
+ - NEW is already fully contained in OLD
45
+ - NEW replaces OLD entirely
46
+
47
+ Respond with ONLY a JSON object:
48
+ {
49
+ "isExtension": boolean,
50
+ "confidence": 0.0-1.0,
51
+ "reason": "brief explanation"
52
+ }`
53
+
54
+ export function buildExtensionUserPrompt(newContent: string, existingContent: string): string {
55
+ return `Compare these statements:\n\nOLD: "${existingContent}"\nNEW: "${newContent}"\n\nDoes NEW extend OLD? Respond with JSON only.`
56
+ }
57
+
58
+ // ============================================================================
59
+ // Types
60
+ // ============================================================================
61
+
62
+ export interface ExtensionResult {
63
+ isExtension: boolean
64
+ confidence: number
65
+ reason: string
66
+ cached: boolean
67
+ usedLLM: boolean
68
+ }
69
+
70
+ export interface ExtensionDetectorConfig {
71
+ /** Minimum confidence for extension (0-1) */
72
+ minConfidence?: number
73
+ /** Whether to enable caching */
74
+ enableCache?: boolean
75
+ /** Cache TTL in milliseconds */
76
+ cacheTTLMs?: number
77
+ /** Maximum cache size */
78
+ maxCacheSize?: number
79
+ /** Whether to fallback to heuristics on errors */
80
+ fallbackToHeuristics?: boolean
81
+ /** Minimum word overlap ratio to even check (0-1) */
82
+ minOverlapForCheck?: number
83
+ }
84
+
85
+ interface CacheEntry {
86
+ isExtension: boolean
87
+ confidence: number
88
+ reason: string
89
+ timestamp: number
90
+ }
91
+
92
+ // ============================================================================
93
+ // Heuristic Patterns
94
+ // ============================================================================
95
+
96
+ const EXTENSION_INDICATORS = [
97
+ /\b(also|additionally|furthermore|moreover|in addition|plus|and|as well)\b/i,
98
+ /\b(more specifically|more detail|to elaborate|to expand|to clarify)\b/i,
99
+ /\b(including|such as|for example|e\.g\.|specifically)\b/i,
100
+ ]
101
+
102
+ // ============================================================================
103
+ // Memory Extension Detector Service
104
+ // ============================================================================
105
+
106
+ export class MemoryExtensionDetectorService {
107
+ private config: Required<ExtensionDetectorConfig>
108
+ private cache: Map<string, CacheEntry> = new Map()
109
+ private stats = {
110
+ totalChecks: 0,
111
+ llmChecks: 0,
112
+ heuristicChecks: 0,
113
+ cacheHits: 0,
114
+ extensionsFound: 0,
115
+ errors: 0,
116
+ totalCost: 0,
117
+ }
118
+
119
+ constructor(config: ExtensionDetectorConfig = {}) {
120
+ this.config = {
121
+ minConfidence: config.minConfidence ?? 0.65,
122
+ enableCache: config.enableCache ?? true,
123
+ cacheTTLMs: config.cacheTTLMs ?? 30 * 60 * 1000, // 30 minutes
124
+ maxCacheSize: config.maxCacheSize ?? 500,
125
+ fallbackToHeuristics: config.fallbackToHeuristics ?? true,
126
+ minOverlapForCheck: config.minOverlapForCheck ?? 0.15,
127
+ }
128
+
129
+ logger.info('Extension detector initialized', {
130
+ cacheEnabled: this.config.enableCache,
131
+ fallbackEnabled: this.config.fallbackToHeuristics,
132
+ })
133
+ }
134
+
135
+ // ============================================================================
136
+ // Public API
137
+ // ============================================================================
138
+
139
+ /**
140
+ * Check if a new memory extends/enriches an existing memory
141
+ *
142
+ * @param newMemory - The new memory being added
143
+ * @param existingMemory - The existing memory to compare against
144
+ * @returns Extension detection result
145
+ */
146
+ async checkExtension(newMemory: Memory, existingMemory: Memory): Promise<ExtensionResult> {
147
+ this.stats.totalChecks++
148
+
149
+ // Quick filter: check word overlap first
150
+ const overlap = this.calculateWordOverlap(newMemory.content, existingMemory.content)
151
+ if (overlap < this.config.minOverlapForCheck) {
152
+ logger.debug('Skipping extension check due to low overlap', { overlap })
153
+ return {
154
+ isExtension: false,
155
+ confidence: 0,
156
+ reason: 'Insufficient content overlap',
157
+ cached: false,
158
+ usedLLM: false,
159
+ }
160
+ }
161
+
162
+ // Quick filter: if new content is contained in old, it's not an extension
163
+ if (this.isSubstring(newMemory.content, existingMemory.content)) {
164
+ logger.debug('New content is substring of old, not an extension')
165
+ return {
166
+ isExtension: false,
167
+ confidence: 0.8,
168
+ reason: 'New content is already contained in existing memory',
169
+ cached: false,
170
+ usedLLM: false,
171
+ }
172
+ }
173
+
174
+ // Check cache
175
+ if (this.config.enableCache) {
176
+ const cached = this.getCached(newMemory.content, existingMemory.content)
177
+ if (cached) {
178
+ this.stats.cacheHits++
179
+ logger.debug('Cache hit for extension check')
180
+ return {
181
+ ...cached,
182
+ cached: true,
183
+ usedLLM: false,
184
+ }
185
+ }
186
+ }
187
+
188
+ // Try LLM detection if available
189
+ if (isLLMAvailable()) {
190
+ try {
191
+ const result = await this.detectWithLLM(newMemory, existingMemory)
192
+ this.stats.llmChecks++
193
+
194
+ if (result.isExtension) {
195
+ this.stats.extensionsFound++
196
+ }
197
+
198
+ // Cache the result
199
+ if (this.config.enableCache && result.confidence >= this.config.minConfidence) {
200
+ this.setCached(newMemory.content, existingMemory.content, {
201
+ isExtension: result.isExtension,
202
+ confidence: result.confidence,
203
+ reason: result.reason,
204
+ timestamp: Date.now(),
205
+ })
206
+ }
207
+
208
+ return {
209
+ ...result,
210
+ cached: false,
211
+ usedLLM: true,
212
+ }
213
+ } catch (error) {
214
+ this.stats.errors++
215
+ logger.warn('LLM extension detection failed, falling back to heuristics', {
216
+ error: error instanceof Error ? error.message : String(error),
217
+ })
218
+
219
+ if (!this.config.fallbackToHeuristics) {
220
+ throw error
221
+ }
222
+ }
223
+ }
224
+
225
+ // Fallback to heuristics
226
+ const heuristicResult = this.detectWithHeuristics(newMemory, existingMemory)
227
+ this.stats.heuristicChecks++
228
+
229
+ if (heuristicResult.isExtension) {
230
+ this.stats.extensionsFound++
231
+ }
232
+
233
+ return {
234
+ ...heuristicResult,
235
+ cached: false,
236
+ usedLLM: false,
237
+ }
238
+ }
239
+
240
+ /**
241
+ * Get detection statistics
242
+ */
243
+ getStats() {
244
+ const cacheHitRate = this.stats.totalChecks > 0 ? (this.stats.cacheHits / this.stats.totalChecks) * 100 : 0
245
+
246
+ const extensionRate = this.stats.totalChecks > 0 ? (this.stats.extensionsFound / this.stats.totalChecks) * 100 : 0
247
+
248
+ return {
249
+ ...this.stats,
250
+ cacheHitRate: parseFloat(cacheHitRate.toFixed(2)),
251
+ extensionRate: parseFloat(extensionRate.toFixed(2)),
252
+ cacheSize: this.cache.size,
253
+ }
254
+ }
255
+
256
+ /**
257
+ * Clear the cache
258
+ */
259
+ clearCache(): void {
260
+ this.cache.clear()
261
+ logger.info('Extension cache cleared')
262
+ }
263
+
264
+ // ============================================================================
265
+ // LLM Detection
266
+ // ============================================================================
267
+
268
+ private async detectWithLLM(
269
+ newMemory: Memory,
270
+ existingMemory: Memory
271
+ ): Promise<{
272
+ isExtension: boolean
273
+ confidence: number
274
+ reason: string
275
+ }> {
276
+ const provider = getLLMProvider()
277
+
278
+ try {
279
+ const response = await provider.generateJson(
280
+ EXTENSION_DETECTOR_SYSTEM_PROMPT,
281
+ buildExtensionUserPrompt(newMemory.content, existingMemory.content)
282
+ )
283
+
284
+ const parsed = this.parseJsonResponse(response.rawResponse, response.provider)
285
+
286
+ // Estimate cost
287
+ const inputCost = ((response.tokensUsed?.prompt ?? 0) / 1000000) * 0.25
288
+ const outputCost = ((response.tokensUsed?.completion ?? 0) / 1000000) * 1.25
289
+ this.stats.totalCost += inputCost + outputCost
290
+
291
+ logger.debug('LLM extension detection successful', {
292
+ isExtension: parsed.isExtension,
293
+ confidence: parsed.confidence,
294
+ tokensUsed: response.tokensUsed?.total ?? 0,
295
+ cost: inputCost + outputCost,
296
+ })
297
+
298
+ return parsed
299
+ } catch (error) {
300
+ if (error instanceof LLMError) {
301
+ throw error
302
+ }
303
+ throw new Error(`LLM extension detection failed: ${error instanceof Error ? error.message : String(error)}`)
304
+ }
305
+ }
306
+
307
+ private parseJsonResponse(
308
+ rawResponse: string,
309
+ provider: 'openai' | 'anthropic' | 'mock'
310
+ ): {
311
+ isExtension: boolean
312
+ confidence: number
313
+ reason: string
314
+ } {
315
+ const trimmed = rawResponse.trim()
316
+ const jsonMatch = trimmed.startsWith('{') ? trimmed : trimmed.match(/\{[\s\S]*\}/)?.[0]
317
+ if (!jsonMatch) {
318
+ throw LLMError.invalidResponse(provider, 'No JSON object found in response')
319
+ }
320
+
321
+ let parsed: unknown
322
+ try {
323
+ parsed = JSON.parse(jsonMatch)
324
+ } catch {
325
+ throw LLMError.invalidResponse(provider, 'Invalid JSON response')
326
+ }
327
+
328
+ if (
329
+ !parsed ||
330
+ typeof parsed !== 'object' ||
331
+ !('isExtension' in parsed) ||
332
+ !('confidence' in parsed) ||
333
+ !('reason' in parsed)
334
+ ) {
335
+ throw LLMError.invalidResponse(provider, 'Missing required fields in JSON response')
336
+ }
337
+
338
+ const isExtension = (parsed as { isExtension: boolean }).isExtension
339
+ const confidence = (parsed as { confidence: number }).confidence
340
+ const reason = (parsed as { reason: string }).reason
341
+
342
+ if (typeof isExtension !== 'boolean') {
343
+ throw LLMError.invalidResponse(provider, 'Invalid isExtension in response')
344
+ }
345
+ if (typeof confidence !== 'number' || Number.isNaN(confidence)) {
346
+ throw LLMError.invalidResponse(provider, 'Invalid confidence in response')
347
+ }
348
+ if (typeof reason !== 'string') {
349
+ throw LLMError.invalidResponse(provider, 'Invalid reason in response')
350
+ }
351
+
352
+ return { isExtension, confidence, reason }
353
+ }
354
+
355
+ // ============================================================================
356
+ // Heuristic Detection
357
+ // ============================================================================
358
+
359
+ private detectWithHeuristics(
360
+ newMemory: Memory,
361
+ existingMemory: Memory
362
+ ): {
363
+ isExtension: boolean
364
+ confidence: number
365
+ reason: string
366
+ } {
367
+ const newLower = newMemory.content.toLowerCase()
368
+ const existingLower = existingMemory.content.toLowerCase()
369
+
370
+ // Calculate metrics
371
+ const overlap = this.calculateWordOverlap(newLower, existingLower)
372
+ const hasMoreDetail = newMemory.content.length > existingMemory.content.length * 0.8
373
+ const newContentInOld = existingLower.includes(newLower.slice(0, 20))
374
+
375
+ // Check for extension indicators
376
+ let hasExtensionIndicator = false
377
+ for (const pattern of EXTENSION_INDICATORS) {
378
+ if (pattern.test(newLower)) {
379
+ hasExtensionIndicator = true
380
+ break
381
+ }
382
+ }
383
+
384
+ // Decision logic
385
+ // Allow high overlap if there are extension indicators or more detail
386
+ // The duplicate check (overlap < 0.9) is skipped when extension indicators are present
387
+ const isExtension =
388
+ overlap > 0.2 && // Sufficient overlap
389
+ !newContentInOld && // Not contained
390
+ ((overlap < 0.9 && hasMoreDetail) || // More detail with reasonable overlap
391
+ hasExtensionIndicator) // Extension indicators override overlap threshold
392
+
393
+ const confidence = isExtension ? Math.min(0.65, overlap + 0.2) : 0.3
394
+
395
+ let reason = 'No extension detected via heuristics'
396
+ if (isExtension) {
397
+ if (hasExtensionIndicator) {
398
+ reason = 'Contains extension indicators and adds detail (via pattern matching)'
399
+ } else {
400
+ reason = 'Adds detail without contradicting (via pattern matching)'
401
+ }
402
+ } else if (newContentInOld) {
403
+ reason = 'New content already contained in existing memory'
404
+ } else if (overlap < 0.2) {
405
+ reason = 'Insufficient overlap between memories'
406
+ }
407
+
408
+ logger.debug('Heuristic extension detection', {
409
+ isExtension,
410
+ confidence,
411
+ overlap,
412
+ hasMoreDetail,
413
+ })
414
+
415
+ return {
416
+ isExtension,
417
+ confidence,
418
+ reason,
419
+ }
420
+ }
421
+
422
+ // ============================================================================
423
+ // Helpers
424
+ // ============================================================================
425
+
426
+ private calculateWordOverlap(text1: string, text2: string): number {
427
+ const words1 = new Set(
428
+ text1
429
+ .toLowerCase()
430
+ .split(/\s+/)
431
+ .filter((w) => w.length > 3)
432
+ )
433
+ const words2 = new Set(
434
+ text2
435
+ .toLowerCase()
436
+ .split(/\s+/)
437
+ .filter((w) => w.length > 3)
438
+ )
439
+
440
+ const intersection = new Set([...words1].filter((x) => words2.has(x)))
441
+ const union = new Set([...words1, ...words2])
442
+
443
+ return union.size > 0 ? intersection.size / union.size : 0
444
+ }
445
+
446
+ private isSubstring(shorter: string, longer: string): boolean {
447
+ const shortNorm = shorter.trim().toLowerCase()
448
+ const longNorm = longer.trim().toLowerCase()
449
+
450
+ // Check if significant portion of shorter is in longer
451
+ const significantPortion = shortNorm.slice(0, Math.min(50, shortNorm.length))
452
+ return longNorm.includes(significantPortion)
453
+ }
454
+
455
+ // ============================================================================
456
+ // Caching
457
+ // ============================================================================
458
+
459
+ private getCacheKey(content1: string, content2: string): string {
460
+ // Create deterministic key regardless of order
461
+ const normalized = [content1, content2]
462
+ .map((c) => c.substring(0, 200).trim().toLowerCase())
463
+ .sort()
464
+ .join('|||')
465
+ return createHash('sha256').update(normalized).digest('hex')
466
+ }
467
+
468
+ private getCached(content1: string, content2: string): CacheEntry | null {
469
+ const key = this.getCacheKey(content1, content2)
470
+ const entry = this.cache.get(key)
471
+
472
+ if (!entry) {
473
+ return null
474
+ }
475
+
476
+ // Check if expired
477
+ const age = Date.now() - entry.timestamp
478
+ if (age > this.config.cacheTTLMs) {
479
+ this.cache.delete(key)
480
+ return null
481
+ }
482
+
483
+ return entry
484
+ }
485
+
486
+ private setCached(content1: string, content2: string, entry: CacheEntry): void {
487
+ // Enforce cache size limit
488
+ if (this.cache.size >= this.config.maxCacheSize) {
489
+ const entries = Array.from(this.cache.entries())
490
+ entries.sort((a, b) => a[1].timestamp - b[1].timestamp)
491
+ const toRemove = entries.slice(0, Math.floor(this.config.maxCacheSize * 0.1))
492
+ for (const [key] of toRemove) {
493
+ this.cache.delete(key)
494
+ }
495
+ }
496
+
497
+ const key = this.getCacheKey(content1, content2)
498
+ this.cache.set(key, entry)
499
+ }
500
+ }
501
+
502
+ // ============================================================================
503
+ // Singleton Instance
504
+ // ============================================================================
505
+
506
+ let _instance: MemoryExtensionDetectorService | null = null
507
+
508
+ /**
509
+ * Get the singleton instance
510
+ */
511
+ export function getMemoryExtensionDetector(config?: ExtensionDetectorConfig): MemoryExtensionDetectorService {
512
+ if (!_instance) {
513
+ _instance = new MemoryExtensionDetectorService(config)
514
+ }
515
+ return _instance
516
+ }
517
+
518
+ /**
519
+ * Reset the singleton (for testing)
520
+ */
521
+ export function resetMemoryExtensionDetector(): void {
522
+ _instance = null
523
+ }