@twelvehart/supermemory-runtime 1.0.0-next.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +57 -0
- package/README.md +374 -0
- package/dist/index.js +189 -0
- package/dist/mcp/index.js +1132 -0
- package/docker-compose.prod.yml +91 -0
- package/docker-compose.yml +358 -0
- package/drizzle/0000_dapper_the_professor.sql +159 -0
- package/drizzle/0001_api_keys.sql +51 -0
- package/drizzle/meta/0000_snapshot.json +1532 -0
- package/drizzle/meta/_journal.json +13 -0
- package/drizzle.config.ts +20 -0
- package/package.json +114 -0
- package/scripts/add-extraction-job.ts +122 -0
- package/scripts/benchmark-pgvector.ts +122 -0
- package/scripts/bootstrap.sh +209 -0
- package/scripts/check-runtime-pack.ts +111 -0
- package/scripts/claude-mcp-config.ts +336 -0
- package/scripts/docker-entrypoint.sh +183 -0
- package/scripts/doctor.ts +377 -0
- package/scripts/init-db.sql +33 -0
- package/scripts/install.sh +1110 -0
- package/scripts/mcp-setup.ts +271 -0
- package/scripts/migrations/001_create_pgvector_extension.sql +31 -0
- package/scripts/migrations/002_create_memory_embeddings_table.sql +75 -0
- package/scripts/migrations/003_create_hnsw_index.sql +94 -0
- package/scripts/migrations/004_create_memory_embeddings_standalone.sql +70 -0
- package/scripts/migrations/005_create_chunks_table.sql +95 -0
- package/scripts/migrations/006_create_processing_queue.sql +45 -0
- package/scripts/migrations/generate_test_data.sql +42 -0
- package/scripts/migrations/phase1_comprehensive_test.sql +204 -0
- package/scripts/migrations/run_migrations.sh +286 -0
- package/scripts/migrations/test_hnsw_index.sql +255 -0
- package/scripts/pre-commit-secrets +282 -0
- package/scripts/run-extraction-worker.ts +46 -0
- package/scripts/run-phase1-tests.sh +291 -0
- package/scripts/setup.ts +222 -0
- package/scripts/smoke-install.sh +12 -0
- package/scripts/test-health-endpoint.sh +328 -0
- package/src/api/index.ts +2 -0
- package/src/api/middleware/auth.ts +80 -0
- package/src/api/middleware/csrf.ts +308 -0
- package/src/api/middleware/errorHandler.ts +166 -0
- package/src/api/middleware/rateLimit.ts +360 -0
- package/src/api/middleware/validation.ts +514 -0
- package/src/api/routes/documents.ts +286 -0
- package/src/api/routes/profiles.ts +237 -0
- package/src/api/routes/search.ts +71 -0
- package/src/api/stores/index.ts +58 -0
- package/src/config/bootstrap-env.ts +3 -0
- package/src/config/env.ts +71 -0
- package/src/config/feature-flags.ts +25 -0
- package/src/config/index.ts +140 -0
- package/src/config/secrets.config.ts +291 -0
- package/src/db/client.ts +92 -0
- package/src/db/index.ts +73 -0
- package/src/db/postgres.ts +72 -0
- package/src/db/schema/chunks.schema.ts +31 -0
- package/src/db/schema/containers.schema.ts +46 -0
- package/src/db/schema/documents.schema.ts +49 -0
- package/src/db/schema/embeddings.schema.ts +32 -0
- package/src/db/schema/index.ts +11 -0
- package/src/db/schema/memories.schema.ts +72 -0
- package/src/db/schema/profiles.schema.ts +34 -0
- package/src/db/schema/queue.schema.ts +59 -0
- package/src/db/schema/relationships.schema.ts +42 -0
- package/src/db/schema.ts +223 -0
- package/src/db/worker-connection.ts +47 -0
- package/src/index.ts +235 -0
- package/src/mcp/CLAUDE.md +1 -0
- package/src/mcp/index.ts +1380 -0
- package/src/mcp/legacyState.ts +22 -0
- package/src/mcp/rateLimit.ts +358 -0
- package/src/mcp/resources.ts +309 -0
- package/src/mcp/results.ts +104 -0
- package/src/mcp/tools.ts +401 -0
- package/src/queues/config.ts +119 -0
- package/src/queues/index.ts +289 -0
- package/src/sdk/client.ts +225 -0
- package/src/sdk/errors.ts +266 -0
- package/src/sdk/http.ts +560 -0
- package/src/sdk/index.ts +244 -0
- package/src/sdk/resources/base.ts +65 -0
- package/src/sdk/resources/connections.ts +204 -0
- package/src/sdk/resources/documents.ts +163 -0
- package/src/sdk/resources/index.ts +10 -0
- package/src/sdk/resources/memories.ts +150 -0
- package/src/sdk/resources/search.ts +60 -0
- package/src/sdk/resources/settings.ts +36 -0
- package/src/sdk/types.ts +674 -0
- package/src/services/chunking/index.ts +451 -0
- package/src/services/chunking.service.ts +650 -0
- package/src/services/csrf.service.ts +252 -0
- package/src/services/documents.repository.ts +219 -0
- package/src/services/documents.service.ts +191 -0
- package/src/services/embedding.service.ts +404 -0
- package/src/services/extraction.service.ts +300 -0
- package/src/services/extractors/code.extractor.ts +451 -0
- package/src/services/extractors/index.ts +9 -0
- package/src/services/extractors/markdown.extractor.ts +461 -0
- package/src/services/extractors/pdf.extractor.ts +315 -0
- package/src/services/extractors/text.extractor.ts +118 -0
- package/src/services/extractors/url.extractor.ts +243 -0
- package/src/services/index.ts +235 -0
- package/src/services/ingestion.service.ts +177 -0
- package/src/services/llm/anthropic.ts +400 -0
- package/src/services/llm/base.ts +460 -0
- package/src/services/llm/contradiction-detector.service.ts +526 -0
- package/src/services/llm/heuristics.ts +148 -0
- package/src/services/llm/index.ts +309 -0
- package/src/services/llm/memory-classifier.service.ts +383 -0
- package/src/services/llm/memory-extension-detector.service.ts +523 -0
- package/src/services/llm/mock.ts +470 -0
- package/src/services/llm/openai.ts +398 -0
- package/src/services/llm/prompts.ts +438 -0
- package/src/services/llm/types.ts +373 -0
- package/src/services/memory.repository.ts +1769 -0
- package/src/services/memory.service.ts +1338 -0
- package/src/services/memory.types.ts +234 -0
- package/src/services/persistence/index.ts +295 -0
- package/src/services/pipeline.service.ts +509 -0
- package/src/services/profile.repository.ts +436 -0
- package/src/services/profile.service.ts +560 -0
- package/src/services/profile.types.ts +270 -0
- package/src/services/relationships/detector.ts +1128 -0
- package/src/services/relationships/index.ts +268 -0
- package/src/services/relationships/memory-integration.ts +459 -0
- package/src/services/relationships/strategies.ts +132 -0
- package/src/services/relationships/types.ts +370 -0
- package/src/services/search.service.ts +761 -0
- package/src/services/search.types.ts +220 -0
- package/src/services/secrets.service.ts +384 -0
- package/src/services/vectorstore/base.ts +327 -0
- package/src/services/vectorstore/index.ts +444 -0
- package/src/services/vectorstore/memory.ts +286 -0
- package/src/services/vectorstore/migration.ts +295 -0
- package/src/services/vectorstore/mock.ts +403 -0
- package/src/services/vectorstore/pgvector.ts +695 -0
- package/src/services/vectorstore/types.ts +247 -0
- package/src/startup.ts +389 -0
- package/src/types/api.types.ts +193 -0
- package/src/types/document.types.ts +103 -0
- package/src/types/index.ts +241 -0
- package/src/types/profile.base.ts +133 -0
- package/src/utils/errors.ts +447 -0
- package/src/utils/id.ts +15 -0
- package/src/utils/index.ts +101 -0
- package/src/utils/logger.ts +313 -0
- package/src/utils/sanitization.ts +501 -0
- package/src/utils/secret-validation.ts +273 -0
- package/src/utils/synonyms.ts +188 -0
- package/src/utils/validation.ts +581 -0
- package/src/workers/chunking.worker.ts +242 -0
- package/src/workers/embedding.worker.ts +358 -0
- package/src/workers/extraction.worker.ts +346 -0
- package/src/workers/indexing.worker.ts +505 -0
- package/tsconfig.json +38 -0
|
@@ -0,0 +1,470 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Mock LLM Provider
|
|
3
|
+
*
|
|
4
|
+
* A testing provider that returns predefined responses or generates
|
|
5
|
+
* simple rule-based extractions. Useful for testing without API calls.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { getLogger } from '../../utils/logger.js'
|
|
9
|
+
import { BaseLLMProvider, LLMError } from './base.js'
|
|
10
|
+
import type {
|
|
11
|
+
MockLLMConfig,
|
|
12
|
+
LLMProviderType,
|
|
13
|
+
ExtractedMemory,
|
|
14
|
+
DetectedRelationship,
|
|
15
|
+
ExtractionOptions,
|
|
16
|
+
RelationshipDetectionOptions,
|
|
17
|
+
} from './types.js'
|
|
18
|
+
import { LLMErrorCode } from './types.js'
|
|
19
|
+
import type { MemoryType, Entity } from '../../types/index.js'
|
|
20
|
+
import { classifyMemoryTypeHeuristically, countMemoryTypeMatches, calculateHeuristicConfidence } from './heuristics.js'
|
|
21
|
+
|
|
22
|
+
const logger = getLogger('MockProvider')
|
|
23
|
+
|
|
24
|
+
// ============================================================================
|
|
25
|
+
// Default Configuration
|
|
26
|
+
// ============================================================================
|
|
27
|
+
|
|
28
|
+
const DEFAULT_MOCK_CONFIG: MockLLMConfig = {
|
|
29
|
+
maxTokens: 2000,
|
|
30
|
+
temperature: 0.1,
|
|
31
|
+
timeoutMs: 1000,
|
|
32
|
+
maxRetries: 1,
|
|
33
|
+
retryDelayMs: 100,
|
|
34
|
+
simulatedLatencyMs: 100,
|
|
35
|
+
simulateErrors: false,
|
|
36
|
+
errorRate: 0.1,
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// ============================================================================
|
|
40
|
+
// Rule-Based Extraction Patterns
|
|
41
|
+
// ============================================================================
|
|
42
|
+
|
|
43
|
+
const ENTITY_PATTERNS: Array<{ pattern: RegExp; type: Entity['type'] }> = [
|
|
44
|
+
{ pattern: /\b(?:Mr\.|Mrs\.|Ms\.|Dr\.)\s*[A-Z][a-z]+(?:\s+[A-Z][a-z]+)*/g, type: 'person' },
|
|
45
|
+
{ pattern: /\b[A-Z][a-z]+\s+[A-Z][a-z]+\b/g, type: 'person' },
|
|
46
|
+
{ pattern: /\b(?:Inc\.|Corp\.|LLC|Ltd\.|Company)\b/gi, type: 'organization' },
|
|
47
|
+
{ pattern: /\b(?:Google|Microsoft|Apple|Amazon|Meta)\b/gi, type: 'organization' },
|
|
48
|
+
{ pattern: /\b(?:New York|London|Paris|Tokyo|San Francisco)\b/gi, type: 'place' },
|
|
49
|
+
{ pattern: /\b\d{1,2}[/-]\d{1,2}[/-]\d{2,4}\b/g, type: 'date' },
|
|
50
|
+
]
|
|
51
|
+
|
|
52
|
+
// ============================================================================
|
|
53
|
+
// Mock Provider Implementation
|
|
54
|
+
// ============================================================================
|
|
55
|
+
|
|
56
|
+
export class MockLLMProvider extends BaseLLMProvider {
|
|
57
|
+
readonly type: LLMProviderType = 'mock'
|
|
58
|
+
|
|
59
|
+
private readonly mockResponses?: ExtractedMemory[][]
|
|
60
|
+
private readonly mockJsonResponses?: Array<string | Record<string, unknown>>
|
|
61
|
+
private readonly simulatedLatencyMs: number
|
|
62
|
+
private readonly simulateErrors: boolean
|
|
63
|
+
private readonly errorRate: number
|
|
64
|
+
private responseIndex: number = 0
|
|
65
|
+
private jsonResponseIndex: number = 0
|
|
66
|
+
private lastJsonTask?: { systemPrompt: string; userPrompt: string }
|
|
67
|
+
|
|
68
|
+
constructor(config: MockLLMConfig = {}) {
|
|
69
|
+
super({
|
|
70
|
+
...DEFAULT_MOCK_CONFIG,
|
|
71
|
+
...config,
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
this.mockResponses = config.mockResponses
|
|
75
|
+
this.mockJsonResponses = config.mockJsonResponses
|
|
76
|
+
this.simulatedLatencyMs = config.simulatedLatencyMs ?? DEFAULT_MOCK_CONFIG.simulatedLatencyMs!
|
|
77
|
+
this.simulateErrors = config.simulateErrors ?? DEFAULT_MOCK_CONFIG.simulateErrors!
|
|
78
|
+
this.errorRate = config.errorRate ?? DEFAULT_MOCK_CONFIG.errorRate!
|
|
79
|
+
|
|
80
|
+
logger.debug('Mock provider initialized', {
|
|
81
|
+
hasMockResponses: !!this.mockResponses,
|
|
82
|
+
simulatedLatencyMs: this.simulatedLatencyMs,
|
|
83
|
+
simulateErrors: this.simulateErrors,
|
|
84
|
+
})
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
// ============================================================================
|
|
88
|
+
// Availability Check
|
|
89
|
+
// ============================================================================
|
|
90
|
+
|
|
91
|
+
isAvailable(): boolean {
|
|
92
|
+
return true // Mock is always available
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
// ============================================================================
|
|
96
|
+
// Memory Extraction
|
|
97
|
+
// ============================================================================
|
|
98
|
+
|
|
99
|
+
protected async doExtractMemories(
|
|
100
|
+
text: string,
|
|
101
|
+
options: ExtractionOptions
|
|
102
|
+
): Promise<{
|
|
103
|
+
memories: ExtractedMemory[]
|
|
104
|
+
rawResponse?: string
|
|
105
|
+
tokensUsed?: { prompt: number; completion: number; total: number }
|
|
106
|
+
}> {
|
|
107
|
+
// Simulate latency
|
|
108
|
+
await this.simulateLatency()
|
|
109
|
+
|
|
110
|
+
// Simulate errors if configured
|
|
111
|
+
if (this.simulateErrors && Math.random() < this.errorRate) {
|
|
112
|
+
throw new LLMError('Simulated error', LLMErrorCode.PROVIDER_UNAVAILABLE, 'mock', true)
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Return mock responses if provided
|
|
116
|
+
if (this.mockResponses && this.mockResponses.length > 0) {
|
|
117
|
+
const memories = this.mockResponses[this.responseIndex % this.mockResponses.length]!
|
|
118
|
+
this.responseIndex++
|
|
119
|
+
return {
|
|
120
|
+
memories,
|
|
121
|
+
rawResponse: JSON.stringify({ memories }),
|
|
122
|
+
tokensUsed: { prompt: 100, completion: 50, total: 150 },
|
|
123
|
+
}
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
// Generate rule-based extraction
|
|
127
|
+
const memories = this.ruleBasedExtraction(text, options)
|
|
128
|
+
|
|
129
|
+
return {
|
|
130
|
+
memories,
|
|
131
|
+
rawResponse: JSON.stringify({ memories }),
|
|
132
|
+
tokensUsed: {
|
|
133
|
+
prompt: Math.ceil(text.length / 4),
|
|
134
|
+
completion: memories.length * 50,
|
|
135
|
+
total: Math.ceil(text.length / 4) + memories.length * 50,
|
|
136
|
+
},
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// ============================================================================
|
|
141
|
+
// Relationship Detection
|
|
142
|
+
// ============================================================================
|
|
143
|
+
|
|
144
|
+
protected async doDetectRelationships(
|
|
145
|
+
newMemory: { id: string; content: string; type: MemoryType },
|
|
146
|
+
existingMemories: Array<{ id: string; content: string; type: MemoryType }>,
|
|
147
|
+
options: RelationshipDetectionOptions
|
|
148
|
+
): Promise<{
|
|
149
|
+
relationships: DetectedRelationship[]
|
|
150
|
+
supersededMemoryIds: string[]
|
|
151
|
+
}> {
|
|
152
|
+
// Simulate latency
|
|
153
|
+
await this.simulateLatency()
|
|
154
|
+
|
|
155
|
+
// Simulate errors if configured
|
|
156
|
+
if (this.simulateErrors && Math.random() < this.errorRate) {
|
|
157
|
+
throw new LLMError('Simulated error', LLMErrorCode.PROVIDER_UNAVAILABLE, 'mock', true)
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
// Simple rule-based relationship detection
|
|
161
|
+
const relationships: DetectedRelationship[] = []
|
|
162
|
+
const supersededMemoryIds: string[] = []
|
|
163
|
+
|
|
164
|
+
for (const existing of existingMemories) {
|
|
165
|
+
const similarity = this.calculateSimilarity(newMemory.content, existing.content)
|
|
166
|
+
|
|
167
|
+
if (similarity < 0.3) {
|
|
168
|
+
continue
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Check for update indicators
|
|
172
|
+
const updatePatterns = [/now|actually|instead|changed|updated/i]
|
|
173
|
+
const isUpdate = updatePatterns.some((p) => p.test(newMemory.content))
|
|
174
|
+
|
|
175
|
+
if (isUpdate && similarity > 0.5) {
|
|
176
|
+
relationships.push({
|
|
177
|
+
sourceMemoryId: newMemory.id,
|
|
178
|
+
targetMemoryId: existing.id,
|
|
179
|
+
type: 'updates',
|
|
180
|
+
confidence: similarity,
|
|
181
|
+
reason: 'Content suggests update to existing information',
|
|
182
|
+
})
|
|
183
|
+
supersededMemoryIds.push(existing.id)
|
|
184
|
+
continue
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Check for extension indicators
|
|
188
|
+
const extensionPatterns = [/also|additionally|furthermore|moreover/i]
|
|
189
|
+
const isExtension = extensionPatterns.some((p) => p.test(newMemory.content))
|
|
190
|
+
|
|
191
|
+
if (isExtension && similarity > 0.4) {
|
|
192
|
+
relationships.push({
|
|
193
|
+
sourceMemoryId: newMemory.id,
|
|
194
|
+
targetMemoryId: existing.id,
|
|
195
|
+
type: 'extends',
|
|
196
|
+
confidence: similarity,
|
|
197
|
+
reason: 'Content extends existing information',
|
|
198
|
+
})
|
|
199
|
+
continue
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// Default to related if similar enough
|
|
203
|
+
if (similarity >= (options.minConfidence ?? 0.5)) {
|
|
204
|
+
relationships.push({
|
|
205
|
+
sourceMemoryId: newMemory.id,
|
|
206
|
+
targetMemoryId: existing.id,
|
|
207
|
+
type: 'related',
|
|
208
|
+
confidence: similarity,
|
|
209
|
+
reason: 'Semantically related content',
|
|
210
|
+
})
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Apply limits
|
|
215
|
+
const maxRels = options.maxRelationships ?? relationships.length
|
|
216
|
+
return {
|
|
217
|
+
relationships: relationships.slice(0, maxRels),
|
|
218
|
+
supersededMemoryIds,
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
// ============================================================================
|
|
223
|
+
// Generic JSON Task
|
|
224
|
+
// ============================================================================
|
|
225
|
+
|
|
226
|
+
protected async doGenerateJson(
|
|
227
|
+
systemPrompt: string,
|
|
228
|
+
userPrompt: string
|
|
229
|
+
): Promise<{
|
|
230
|
+
rawResponse: string
|
|
231
|
+
tokensUsed?: { prompt: number; completion: number; total: number }
|
|
232
|
+
}> {
|
|
233
|
+
await this.simulateLatency()
|
|
234
|
+
|
|
235
|
+
if (this.simulateErrors && Math.random() < this.errorRate) {
|
|
236
|
+
throw new LLMError('Simulated error', LLMErrorCode.PROVIDER_UNAVAILABLE, 'mock', true)
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
this.lastJsonTask = { systemPrompt, userPrompt }
|
|
240
|
+
|
|
241
|
+
if (this.mockJsonResponses && this.mockJsonResponses.length > 0) {
|
|
242
|
+
const response = this.mockJsonResponses[this.jsonResponseIndex % this.mockJsonResponses.length]!
|
|
243
|
+
this.jsonResponseIndex++
|
|
244
|
+
const rawResponse = typeof response === 'string' ? response : JSON.stringify(response)
|
|
245
|
+
return {
|
|
246
|
+
rawResponse,
|
|
247
|
+
tokensUsed: { prompt: 80, completion: 40, total: 120 },
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
return {
|
|
252
|
+
rawResponse: JSON.stringify({ ok: true }),
|
|
253
|
+
tokensUsed: { prompt: 80, completion: 40, total: 120 },
|
|
254
|
+
}
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
// ============================================================================
|
|
258
|
+
// Helper Methods
|
|
259
|
+
// ============================================================================
|
|
260
|
+
|
|
261
|
+
private async simulateLatency(): Promise<void> {
|
|
262
|
+
if (this.simulatedLatencyMs > 0) {
|
|
263
|
+
await new Promise((resolve) => setTimeout(resolve, this.simulatedLatencyMs))
|
|
264
|
+
}
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
private ruleBasedExtraction(text: string, options: ExtractionOptions): ExtractedMemory[] {
|
|
268
|
+
// Split into sentences
|
|
269
|
+
const sentences = text.split(/(?<=[.!?])\s+/).filter((s) => s.trim().length >= 10)
|
|
270
|
+
|
|
271
|
+
const memories: ExtractedMemory[] = []
|
|
272
|
+
const maxMemories = options.maxMemories ?? 10
|
|
273
|
+
|
|
274
|
+
for (const sentence of sentences) {
|
|
275
|
+
if (memories.length >= maxMemories) {
|
|
276
|
+
break
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
const type = this.classifyType(sentence)
|
|
280
|
+
const entities = this.extractEntities(sentence)
|
|
281
|
+
const keywords = this.extractKeywords(sentence)
|
|
282
|
+
const confidence = this.calculateConfidence(sentence, type)
|
|
283
|
+
|
|
284
|
+
if (options.minConfidence && confidence < options.minConfidence) {
|
|
285
|
+
continue
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
memories.push({
|
|
289
|
+
content: sentence.trim(),
|
|
290
|
+
type,
|
|
291
|
+
confidence,
|
|
292
|
+
entities,
|
|
293
|
+
keywords,
|
|
294
|
+
})
|
|
295
|
+
}
|
|
296
|
+
|
|
297
|
+
return memories
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
private classifyType(text: string): MemoryType {
|
|
301
|
+
return classifyMemoryTypeHeuristically(text).type
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
private extractEntities(text: string): Entity[] {
|
|
305
|
+
const entities: Entity[] = []
|
|
306
|
+
const seen = new Set<string>()
|
|
307
|
+
|
|
308
|
+
for (const { pattern, type } of ENTITY_PATTERNS) {
|
|
309
|
+
const matches = text.matchAll(new RegExp(pattern.source, pattern.flags))
|
|
310
|
+
for (const match of matches) {
|
|
311
|
+
const name = match[0].trim()
|
|
312
|
+
const normalized = name.toLowerCase()
|
|
313
|
+
|
|
314
|
+
if (!seen.has(normalized) && name.length > 1) {
|
|
315
|
+
seen.add(normalized)
|
|
316
|
+
entities.push({
|
|
317
|
+
name,
|
|
318
|
+
type,
|
|
319
|
+
mentions: 1,
|
|
320
|
+
})
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
return entities
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
private extractKeywords(text: string): string[] {
|
|
329
|
+
const stopWords = new Set([
|
|
330
|
+
'the',
|
|
331
|
+
'a',
|
|
332
|
+
'an',
|
|
333
|
+
'and',
|
|
334
|
+
'or',
|
|
335
|
+
'but',
|
|
336
|
+
'in',
|
|
337
|
+
'on',
|
|
338
|
+
'at',
|
|
339
|
+
'to',
|
|
340
|
+
'for',
|
|
341
|
+
'of',
|
|
342
|
+
'with',
|
|
343
|
+
'by',
|
|
344
|
+
'from',
|
|
345
|
+
'as',
|
|
346
|
+
'is',
|
|
347
|
+
'was',
|
|
348
|
+
'are',
|
|
349
|
+
'were',
|
|
350
|
+
'been',
|
|
351
|
+
'be',
|
|
352
|
+
'have',
|
|
353
|
+
'has',
|
|
354
|
+
'had',
|
|
355
|
+
'do',
|
|
356
|
+
'does',
|
|
357
|
+
'did',
|
|
358
|
+
'will',
|
|
359
|
+
'would',
|
|
360
|
+
'could',
|
|
361
|
+
'should',
|
|
362
|
+
'may',
|
|
363
|
+
'might',
|
|
364
|
+
'must',
|
|
365
|
+
'shall',
|
|
366
|
+
'can',
|
|
367
|
+
'need',
|
|
368
|
+
'it',
|
|
369
|
+
'this',
|
|
370
|
+
'that',
|
|
371
|
+
'these',
|
|
372
|
+
'those',
|
|
373
|
+
'i',
|
|
374
|
+
'you',
|
|
375
|
+
'he',
|
|
376
|
+
'she',
|
|
377
|
+
'we',
|
|
378
|
+
'they',
|
|
379
|
+
'my',
|
|
380
|
+
'your',
|
|
381
|
+
'his',
|
|
382
|
+
'her',
|
|
383
|
+
'our',
|
|
384
|
+
'their',
|
|
385
|
+
'its',
|
|
386
|
+
])
|
|
387
|
+
|
|
388
|
+
const words = text.toLowerCase().match(/\b[a-z]{3,}\b/g) || []
|
|
389
|
+
const keywords = words.filter((word) => !stopWords.has(word))
|
|
390
|
+
|
|
391
|
+
return [...new Set(keywords)].slice(0, 10)
|
|
392
|
+
}
|
|
393
|
+
|
|
394
|
+
private calculateConfidence(text: string, type: MemoryType): number {
|
|
395
|
+
let confidence = 0.6
|
|
396
|
+
|
|
397
|
+
// Longer sentences with more detail
|
|
398
|
+
if (text.length > 100) confidence += 0.1
|
|
399
|
+
if (text.length > 200) confidence += 0.1
|
|
400
|
+
|
|
401
|
+
const matchCount = countMemoryTypeMatches(text, type)
|
|
402
|
+
confidence = Math.max(
|
|
403
|
+
confidence,
|
|
404
|
+
calculateHeuristicConfidence(matchCount, {
|
|
405
|
+
base: 0.6,
|
|
406
|
+
perMatch: 0.1,
|
|
407
|
+
max: 0.95,
|
|
408
|
+
defaultConfidence: 0.6,
|
|
409
|
+
})
|
|
410
|
+
)
|
|
411
|
+
|
|
412
|
+
return Math.min(confidence, 0.95)
|
|
413
|
+
}
|
|
414
|
+
|
|
415
|
+
private calculateSimilarity(text1: string, text2: string): number {
|
|
416
|
+
const words1 = new Set(text1.toLowerCase().split(/\s+/))
|
|
417
|
+
const words2 = new Set(text2.toLowerCase().split(/\s+/))
|
|
418
|
+
|
|
419
|
+
const intersection = new Set([...words1].filter((x) => words2.has(x)))
|
|
420
|
+
const union = new Set([...words1, ...words2])
|
|
421
|
+
|
|
422
|
+
if (union.size === 0) return 0
|
|
423
|
+
return intersection.size / union.size
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// ============================================================================
|
|
427
|
+
// Test Helpers
|
|
428
|
+
// ============================================================================
|
|
429
|
+
|
|
430
|
+
/**
|
|
431
|
+
* Set mock responses for testing
|
|
432
|
+
*/
|
|
433
|
+
setMockResponses(responses: ExtractedMemory[][]): void {
|
|
434
|
+
;(this as unknown as { mockResponses: ExtractedMemory[][] }).mockResponses = responses
|
|
435
|
+
this.responseIndex = 0
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
/**
|
|
439
|
+
* Reset response index
|
|
440
|
+
*/
|
|
441
|
+
resetResponseIndex(): void {
|
|
442
|
+
this.responseIndex = 0
|
|
443
|
+
this.jsonResponseIndex = 0
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
/**
|
|
447
|
+
* Get current response index
|
|
448
|
+
*/
|
|
449
|
+
getResponseIndex(): number {
|
|
450
|
+
return this.responseIndex
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
/**
|
|
454
|
+
* Get last JSON task prompts (for testing)
|
|
455
|
+
*/
|
|
456
|
+
getLastJsonTask(): { systemPrompt: string; userPrompt: string } | undefined {
|
|
457
|
+
return this.lastJsonTask
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// ============================================================================
|
|
462
|
+
// Factory Function
|
|
463
|
+
// ============================================================================
|
|
464
|
+
|
|
465
|
+
/**
|
|
466
|
+
* Create a mock LLM provider
|
|
467
|
+
*/
|
|
468
|
+
export function createMockProvider(config: MockLLMConfig = {}): MockLLMProvider {
|
|
469
|
+
return new MockLLMProvider(config)
|
|
470
|
+
}
|