alma-memory 0.5.1__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- alma/__init__.py +296 -226
- alma/compression/__init__.py +33 -0
- alma/compression/pipeline.py +980 -0
- alma/confidence/__init__.py +47 -47
- alma/confidence/engine.py +540 -540
- alma/confidence/types.py +351 -351
- alma/config/loader.py +157 -157
- alma/consolidation/__init__.py +23 -23
- alma/consolidation/engine.py +678 -678
- alma/consolidation/prompts.py +84 -84
- alma/core.py +1189 -430
- alma/domains/__init__.py +30 -30
- alma/domains/factory.py +359 -359
- alma/domains/schemas.py +448 -448
- alma/domains/types.py +272 -272
- alma/events/__init__.py +75 -75
- alma/events/emitter.py +285 -284
- alma/events/storage_mixin.py +246 -246
- alma/events/types.py +126 -126
- alma/events/webhook.py +425 -425
- alma/exceptions.py +49 -49
- alma/extraction/__init__.py +31 -31
- alma/extraction/auto_learner.py +265 -265
- alma/extraction/extractor.py +420 -420
- alma/graph/__init__.py +106 -106
- alma/graph/backends/__init__.py +32 -32
- alma/graph/backends/kuzu.py +624 -624
- alma/graph/backends/memgraph.py +432 -432
- alma/graph/backends/memory.py +236 -236
- alma/graph/backends/neo4j.py +417 -417
- alma/graph/base.py +159 -159
- alma/graph/extraction.py +198 -198
- alma/graph/store.py +860 -860
- alma/harness/__init__.py +35 -35
- alma/harness/base.py +386 -386
- alma/harness/domains.py +705 -705
- alma/initializer/__init__.py +37 -37
- alma/initializer/initializer.py +418 -418
- alma/initializer/types.py +250 -250
- alma/integration/__init__.py +62 -62
- alma/integration/claude_agents.py +444 -444
- alma/integration/helena.py +423 -423
- alma/integration/victor.py +471 -471
- alma/learning/__init__.py +101 -86
- alma/learning/decay.py +878 -0
- alma/learning/forgetting.py +1446 -1446
- alma/learning/heuristic_extractor.py +390 -390
- alma/learning/protocols.py +374 -374
- alma/learning/validation.py +346 -346
- alma/mcp/__init__.py +123 -45
- alma/mcp/__main__.py +156 -156
- alma/mcp/resources.py +122 -122
- alma/mcp/server.py +955 -591
- alma/mcp/tools.py +3254 -509
- alma/observability/__init__.py +91 -84
- alma/observability/config.py +302 -302
- alma/observability/guidelines.py +170 -0
- alma/observability/logging.py +424 -424
- alma/observability/metrics.py +583 -583
- alma/observability/tracing.py +440 -440
- alma/progress/__init__.py +21 -21
- alma/progress/tracker.py +607 -607
- alma/progress/types.py +250 -250
- alma/retrieval/__init__.py +134 -53
- alma/retrieval/budget.py +525 -0
- alma/retrieval/cache.py +1304 -1061
- alma/retrieval/embeddings.py +202 -202
- alma/retrieval/engine.py +850 -427
- alma/retrieval/modes.py +365 -0
- alma/retrieval/progressive.py +560 -0
- alma/retrieval/scoring.py +344 -344
- alma/retrieval/trust_scoring.py +637 -0
- alma/retrieval/verification.py +797 -0
- alma/session/__init__.py +19 -19
- alma/session/manager.py +442 -399
- alma/session/types.py +288 -288
- alma/storage/__init__.py +101 -90
- alma/storage/archive.py +233 -0
- alma/storage/azure_cosmos.py +1259 -1259
- alma/storage/base.py +1083 -583
- alma/storage/chroma.py +1443 -1443
- alma/storage/constants.py +103 -103
- alma/storage/file_based.py +614 -614
- alma/storage/migrations/__init__.py +21 -21
- alma/storage/migrations/base.py +321 -321
- alma/storage/migrations/runner.py +323 -323
- alma/storage/migrations/version_stores.py +337 -337
- alma/storage/migrations/versions/__init__.py +11 -11
- alma/storage/migrations/versions/v1_0_0.py +373 -373
- alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
- alma/storage/pinecone.py +1080 -1080
- alma/storage/postgresql.py +1948 -1559
- alma/storage/qdrant.py +1306 -1306
- alma/storage/sqlite_local.py +3041 -1457
- alma/testing/__init__.py +46 -46
- alma/testing/factories.py +301 -301
- alma/testing/mocks.py +389 -389
- alma/types.py +292 -264
- alma/utils/__init__.py +19 -0
- alma/utils/tokenizer.py +521 -0
- alma/workflow/__init__.py +83 -0
- alma/workflow/artifacts.py +170 -0
- alma/workflow/checkpoint.py +311 -0
- alma/workflow/context.py +228 -0
- alma/workflow/outcomes.py +189 -0
- alma/workflow/reducers.py +393 -0
- {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/METADATA +210 -72
- alma_memory-0.7.0.dist-info/RECORD +112 -0
- alma_memory-0.5.1.dist-info/RECORD +0 -93
- {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
- {alma_memory-0.5.1.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,980 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ALMA Memory Compression Pipeline.
|
|
3
|
+
|
|
4
|
+
Provides intelligent compression of verbose content into structured, efficient memories.
|
|
5
|
+
Supports both LLM-based intelligent extraction and rule-based fallback.
|
|
6
|
+
|
|
7
|
+
Target compression ratios:
|
|
8
|
+
- LIGHT: 1.5x (remove redundancy)
|
|
9
|
+
- MEDIUM: 3x (extract key points)
|
|
10
|
+
- AGGRESSIVE: 5x+ (maximum compression)
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
import logging
|
|
14
|
+
import re
|
|
15
|
+
import time
|
|
16
|
+
from dataclasses import dataclass
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import Any, Dict, List, Optional, Protocol
|
|
19
|
+
|
|
20
|
+
logger = logging.getLogger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class CompressionLevel(Enum):
|
|
24
|
+
"""Compression level for memory content."""
|
|
25
|
+
|
|
26
|
+
NONE = "none" # No compression, preserve original
|
|
27
|
+
LIGHT = "light" # Remove redundancy only (~1.5x)
|
|
28
|
+
MEDIUM = "medium" # Extract key points (~3x)
|
|
29
|
+
AGGRESSIVE = "aggressive" # Maximum compression (~5x+)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class CompressedMemory:
|
|
34
|
+
"""
|
|
35
|
+
Result of compressing verbose content.
|
|
36
|
+
|
|
37
|
+
Attributes:
|
|
38
|
+
original_length: Length of original content in characters
|
|
39
|
+
compressed_length: Length of compressed summary
|
|
40
|
+
compression_ratio: Ratio of original to compressed length
|
|
41
|
+
key_facts: Essential facts extracted from content
|
|
42
|
+
constraints: Limitations or requirements discovered
|
|
43
|
+
patterns: Reusable patterns identified
|
|
44
|
+
summary: Compressed content for storage
|
|
45
|
+
full_content: Original content preserved for verification
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
original_length: int
|
|
49
|
+
compressed_length: int
|
|
50
|
+
compression_ratio: float
|
|
51
|
+
key_facts: List[str]
|
|
52
|
+
constraints: List[str]
|
|
53
|
+
patterns: List[str]
|
|
54
|
+
summary: str
|
|
55
|
+
full_content: str
|
|
56
|
+
|
|
57
|
+
def to_metadata(self) -> Dict[str, Any]:
|
|
58
|
+
"""Generate metadata to store with memory."""
|
|
59
|
+
return {
|
|
60
|
+
"compressed": True,
|
|
61
|
+
"compression_ratio": round(self.compression_ratio, 2),
|
|
62
|
+
"original_length": self.original_length,
|
|
63
|
+
"compressed_length": self.compressed_length,
|
|
64
|
+
"key_facts": self.key_facts,
|
|
65
|
+
"constraints": self.constraints,
|
|
66
|
+
"patterns": self.patterns,
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
70
|
+
"""Convert to dictionary."""
|
|
71
|
+
return {
|
|
72
|
+
"original_length": self.original_length,
|
|
73
|
+
"compressed_length": self.compressed_length,
|
|
74
|
+
"compression_ratio": self.compression_ratio,
|
|
75
|
+
"key_facts": self.key_facts,
|
|
76
|
+
"constraints": self.constraints,
|
|
77
|
+
"patterns": self.patterns,
|
|
78
|
+
"summary": self.summary,
|
|
79
|
+
"full_content": self.full_content,
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
@classmethod
|
|
83
|
+
def from_dict(cls, data: Dict[str, Any]) -> "CompressedMemory":
|
|
84
|
+
"""Create from dictionary."""
|
|
85
|
+
return cls(
|
|
86
|
+
original_length=data["original_length"],
|
|
87
|
+
compressed_length=data["compressed_length"],
|
|
88
|
+
compression_ratio=data["compression_ratio"],
|
|
89
|
+
key_facts=data.get("key_facts", []),
|
|
90
|
+
constraints=data.get("constraints", []),
|
|
91
|
+
patterns=data.get("patterns", []),
|
|
92
|
+
summary=data["summary"],
|
|
93
|
+
full_content=data["full_content"],
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
@dataclass
|
|
98
|
+
class CompressionResult:
|
|
99
|
+
"""
|
|
100
|
+
Full result of a compression operation.
|
|
101
|
+
|
|
102
|
+
Attributes:
|
|
103
|
+
compressed: The compressed memory
|
|
104
|
+
level: Compression level used
|
|
105
|
+
method: Method used (llm or rule_based)
|
|
106
|
+
compression_time_ms: Time taken for compression
|
|
107
|
+
success: Whether compression succeeded
|
|
108
|
+
error: Error message if failed
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
compressed: Optional[CompressedMemory]
|
|
112
|
+
level: CompressionLevel
|
|
113
|
+
method: str # "llm" or "rule_based"
|
|
114
|
+
compression_time_ms: int = 0
|
|
115
|
+
success: bool = True
|
|
116
|
+
error: Optional[str] = None
|
|
117
|
+
|
|
118
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
119
|
+
"""Convert to dictionary."""
|
|
120
|
+
return {
|
|
121
|
+
"compressed": self.compressed.to_dict() if self.compressed else None,
|
|
122
|
+
"level": self.level.value,
|
|
123
|
+
"method": self.method,
|
|
124
|
+
"compression_time_ms": self.compression_time_ms,
|
|
125
|
+
"success": self.success,
|
|
126
|
+
"error": self.error,
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
@dataclass
|
|
131
|
+
class CompressionConfig:
|
|
132
|
+
"""
|
|
133
|
+
Configuration for compression behavior.
|
|
134
|
+
|
|
135
|
+
Attributes:
|
|
136
|
+
default_level: Default compression level
|
|
137
|
+
min_length_for_compression: Minimum content length to compress
|
|
138
|
+
max_key_facts: Maximum number of key facts to extract
|
|
139
|
+
max_constraints: Maximum number of constraints to extract
|
|
140
|
+
max_patterns: Maximum number of patterns to extract
|
|
141
|
+
preserve_full_content: Whether to preserve original content
|
|
142
|
+
llm_timeout_seconds: Timeout for LLM-based compression
|
|
143
|
+
"""
|
|
144
|
+
|
|
145
|
+
default_level: CompressionLevel = CompressionLevel.MEDIUM
|
|
146
|
+
min_length_for_compression: int = 200
|
|
147
|
+
max_key_facts: int = 5
|
|
148
|
+
max_constraints: int = 3
|
|
149
|
+
max_patterns: int = 3
|
|
150
|
+
preserve_full_content: bool = True
|
|
151
|
+
llm_timeout_seconds: float = 10.0
|
|
152
|
+
|
|
153
|
+
@classmethod
|
|
154
|
+
def from_dict(cls, data: Dict[str, Any]) -> "CompressionConfig":
|
|
155
|
+
"""Create from dictionary."""
|
|
156
|
+
level_str = data.get("default_level", "medium")
|
|
157
|
+
return cls(
|
|
158
|
+
default_level=CompressionLevel(level_str),
|
|
159
|
+
min_length_for_compression=data.get("min_length_for_compression", 200),
|
|
160
|
+
max_key_facts=data.get("max_key_facts", 5),
|
|
161
|
+
max_constraints=data.get("max_constraints", 3),
|
|
162
|
+
max_patterns=data.get("max_patterns", 3),
|
|
163
|
+
preserve_full_content=data.get("preserve_full_content", True),
|
|
164
|
+
llm_timeout_seconds=data.get("llm_timeout_seconds", 10.0),
|
|
165
|
+
)
|
|
166
|
+
|
|
167
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
168
|
+
"""Convert to dictionary."""
|
|
169
|
+
return {
|
|
170
|
+
"default_level": self.default_level.value,
|
|
171
|
+
"min_length_for_compression": self.min_length_for_compression,
|
|
172
|
+
"max_key_facts": self.max_key_facts,
|
|
173
|
+
"max_constraints": self.max_constraints,
|
|
174
|
+
"max_patterns": self.max_patterns,
|
|
175
|
+
"preserve_full_content": self.preserve_full_content,
|
|
176
|
+
"llm_timeout_seconds": self.llm_timeout_seconds,
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class LLMClient(Protocol):
|
|
181
|
+
"""Protocol for LLM clients used in compression."""
|
|
182
|
+
|
|
183
|
+
def complete(self, prompt: str, timeout: Optional[float] = None) -> str:
|
|
184
|
+
"""Complete a prompt and return the response."""
|
|
185
|
+
...
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
class MemoryCompressor:
|
|
189
|
+
"""
|
|
190
|
+
Intelligent compression of verbose content into structured memories.
|
|
191
|
+
|
|
192
|
+
Supports both LLM-based intelligent extraction and rule-based fallback.
|
|
193
|
+
Achieves 3-5x compression ratio while preserving essential information.
|
|
194
|
+
|
|
195
|
+
Example:
|
|
196
|
+
compressor = MemoryCompressor(llm_client=llm)
|
|
197
|
+
result = compressor.compress_outcome(
|
|
198
|
+
"Long verbose task outcome description...",
|
|
199
|
+
level=CompressionLevel.MEDIUM
|
|
200
|
+
)
|
|
201
|
+
print(f"Compressed {result.compression_ratio:.1f}x")
|
|
202
|
+
print(f"Key facts: {result.key_facts}")
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
def __init__(
|
|
206
|
+
self,
|
|
207
|
+
llm_client: Optional[LLMClient] = None,
|
|
208
|
+
config: Optional[CompressionConfig] = None,
|
|
209
|
+
):
|
|
210
|
+
"""
|
|
211
|
+
Initialize compressor.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
llm_client: Optional LLM client for intelligent compression
|
|
215
|
+
config: Compression configuration
|
|
216
|
+
"""
|
|
217
|
+
self.llm = llm_client
|
|
218
|
+
self.config = config or CompressionConfig()
|
|
219
|
+
|
|
220
|
+
def compress(
|
|
221
|
+
self,
|
|
222
|
+
content: str,
|
|
223
|
+
level: Optional[CompressionLevel] = None,
|
|
224
|
+
content_type: str = "general",
|
|
225
|
+
) -> CompressionResult:
|
|
226
|
+
"""
|
|
227
|
+
Compress content using the appropriate method.
|
|
228
|
+
|
|
229
|
+
Args:
|
|
230
|
+
content: Content to compress
|
|
231
|
+
level: Compression level (default from config)
|
|
232
|
+
content_type: Type of content (outcome, conversation, general)
|
|
233
|
+
|
|
234
|
+
Returns:
|
|
235
|
+
CompressionResult with compressed memory and metadata
|
|
236
|
+
"""
|
|
237
|
+
level = level or self.config.default_level
|
|
238
|
+
start_time = time.time()
|
|
239
|
+
|
|
240
|
+
try:
|
|
241
|
+
# Skip compression for short content
|
|
242
|
+
if len(content) < self.config.min_length_for_compression:
|
|
243
|
+
compressed = self._no_compression(content)
|
|
244
|
+
return CompressionResult(
|
|
245
|
+
compressed=compressed,
|
|
246
|
+
level=CompressionLevel.NONE,
|
|
247
|
+
method="skip",
|
|
248
|
+
compression_time_ms=int((time.time() - start_time) * 1000),
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
# No compression requested
|
|
252
|
+
if level == CompressionLevel.NONE:
|
|
253
|
+
compressed = self._no_compression(content)
|
|
254
|
+
return CompressionResult(
|
|
255
|
+
compressed=compressed,
|
|
256
|
+
level=level,
|
|
257
|
+
method="none",
|
|
258
|
+
compression_time_ms=int((time.time() - start_time) * 1000),
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
# Try LLM-based compression
|
|
262
|
+
if self.llm:
|
|
263
|
+
try:
|
|
264
|
+
if content_type == "outcome":
|
|
265
|
+
compressed = self._llm_compress_outcome(content, level)
|
|
266
|
+
elif content_type == "conversation":
|
|
267
|
+
compressed = self._llm_compress_conversation(content, level)
|
|
268
|
+
else:
|
|
269
|
+
compressed = self._llm_compress_general(content, level)
|
|
270
|
+
|
|
271
|
+
return CompressionResult(
|
|
272
|
+
compressed=compressed,
|
|
273
|
+
level=level,
|
|
274
|
+
method="llm",
|
|
275
|
+
compression_time_ms=int((time.time() - start_time) * 1000),
|
|
276
|
+
)
|
|
277
|
+
except Exception as e:
|
|
278
|
+
logger.warning(f"LLM compression failed, falling back: {e}")
|
|
279
|
+
# Fall through to rule-based
|
|
280
|
+
|
|
281
|
+
# Rule-based fallback
|
|
282
|
+
compressed = self._rule_based_compression(content, level)
|
|
283
|
+
return CompressionResult(
|
|
284
|
+
compressed=compressed,
|
|
285
|
+
level=level,
|
|
286
|
+
method="rule_based",
|
|
287
|
+
compression_time_ms=int((time.time() - start_time) * 1000),
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
except Exception as e:
|
|
291
|
+
logger.error(f"Compression failed: {e}")
|
|
292
|
+
return CompressionResult(
|
|
293
|
+
compressed=None,
|
|
294
|
+
level=level,
|
|
295
|
+
method="error",
|
|
296
|
+
compression_time_ms=int((time.time() - start_time) * 1000),
|
|
297
|
+
success=False,
|
|
298
|
+
error=str(e),
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
def compress_outcome(
|
|
302
|
+
self,
|
|
303
|
+
verbose_outcome: str,
|
|
304
|
+
level: Optional[CompressionLevel] = None,
|
|
305
|
+
) -> CompressedMemory:
|
|
306
|
+
"""
|
|
307
|
+
Compress a task outcome into structured memory.
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
verbose_outcome: Verbose task outcome description
|
|
311
|
+
level: Compression level
|
|
312
|
+
|
|
313
|
+
Returns:
|
|
314
|
+
CompressedMemory with extracted key information
|
|
315
|
+
"""
|
|
316
|
+
result = self.compress(verbose_outcome, level, content_type="outcome")
|
|
317
|
+
if result.compressed:
|
|
318
|
+
return result.compressed
|
|
319
|
+
# Fallback to no compression on error
|
|
320
|
+
return self._no_compression(verbose_outcome)
|
|
321
|
+
|
|
322
|
+
def compress_conversation(
|
|
323
|
+
self,
|
|
324
|
+
conversation: str,
|
|
325
|
+
focus: Optional[str] = None,
|
|
326
|
+
level: Optional[CompressionLevel] = None,
|
|
327
|
+
) -> CompressedMemory:
|
|
328
|
+
"""
|
|
329
|
+
Extract learnable knowledge from a conversation.
|
|
330
|
+
|
|
331
|
+
Args:
|
|
332
|
+
conversation: Conversation content
|
|
333
|
+
focus: Optional focus area for extraction
|
|
334
|
+
level: Compression level
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
CompressedMemory with extracted knowledge
|
|
338
|
+
"""
|
|
339
|
+
level = level or self.config.default_level
|
|
340
|
+
|
|
341
|
+
if level == CompressionLevel.NONE:
|
|
342
|
+
return self._no_compression(conversation)
|
|
343
|
+
|
|
344
|
+
if self.llm:
|
|
345
|
+
try:
|
|
346
|
+
return self._llm_compress_conversation(conversation, level, focus)
|
|
347
|
+
except Exception as e:
|
|
348
|
+
logger.warning(f"LLM conversation compression failed: {e}")
|
|
349
|
+
|
|
350
|
+
return self._rule_based_compression(conversation, level)
|
|
351
|
+
|
|
352
|
+
def extract_heuristic(
|
|
353
|
+
self,
|
|
354
|
+
experiences: List[str],
|
|
355
|
+
min_experiences: int = 3,
|
|
356
|
+
) -> Optional[str]:
|
|
357
|
+
"""
|
|
358
|
+
Extract a general rule from multiple similar experiences.
|
|
359
|
+
|
|
360
|
+
Args:
|
|
361
|
+
experiences: List of similar experience descriptions
|
|
362
|
+
min_experiences: Minimum number of experiences required
|
|
363
|
+
|
|
364
|
+
Returns:
|
|
365
|
+
Extracted heuristic rule, or None if no pattern found
|
|
366
|
+
"""
|
|
367
|
+
if len(experiences) < min_experiences:
|
|
368
|
+
logger.debug(
|
|
369
|
+
f"Not enough experiences ({len(experiences)}) for heuristic extraction"
|
|
370
|
+
)
|
|
371
|
+
return None
|
|
372
|
+
|
|
373
|
+
if not self.llm:
|
|
374
|
+
# Try simple rule-based extraction
|
|
375
|
+
return self._rule_based_heuristic(experiences)
|
|
376
|
+
|
|
377
|
+
prompt = f"""Analyze these {len(experiences)} similar experiences and extract a general rule.
|
|
378
|
+
|
|
379
|
+
EXPERIENCES:
|
|
380
|
+
{chr(10).join(f"{i + 1}. {e}" for i, e in enumerate(experiences[:10]))}
|
|
381
|
+
|
|
382
|
+
If a clear, actionable pattern exists, state it as:
|
|
383
|
+
"When [specific situation], then [specific action] because [brief reason]."
|
|
384
|
+
|
|
385
|
+
Requirements:
|
|
386
|
+
- The pattern must apply to ALL experiences
|
|
387
|
+
- Be specific and actionable, not vague
|
|
388
|
+
- Keep it to ONE sentence
|
|
389
|
+
|
|
390
|
+
If no clear pattern exists or experiences are too different, respond exactly: NO_PATTERN"""
|
|
391
|
+
|
|
392
|
+
try:
|
|
393
|
+
response = self.llm.complete(
|
|
394
|
+
prompt, timeout=self.config.llm_timeout_seconds
|
|
395
|
+
).strip()
|
|
396
|
+
|
|
397
|
+
if "NO_PATTERN" in response.upper():
|
|
398
|
+
return None
|
|
399
|
+
|
|
400
|
+
# Clean up response
|
|
401
|
+
response = response.strip('"').strip()
|
|
402
|
+
if response and len(response) > 10:
|
|
403
|
+
return response
|
|
404
|
+
return None
|
|
405
|
+
|
|
406
|
+
except Exception as e:
|
|
407
|
+
logger.warning(f"Heuristic extraction failed: {e}")
|
|
408
|
+
return self._rule_based_heuristic(experiences)
|
|
409
|
+
|
|
410
|
+
def deduplicate_knowledge(
|
|
411
|
+
self,
|
|
412
|
+
new_knowledge: str,
|
|
413
|
+
existing_knowledge: List[str],
|
|
414
|
+
) -> Optional[str]:
|
|
415
|
+
"""
|
|
416
|
+
Merge new knowledge with existing, removing redundancy.
|
|
417
|
+
|
|
418
|
+
Args:
|
|
419
|
+
new_knowledge: New knowledge to potentially add
|
|
420
|
+
existing_knowledge: List of existing knowledge items
|
|
421
|
+
|
|
422
|
+
Returns:
|
|
423
|
+
- new_knowledge if unique
|
|
424
|
+
- Merged statement if overlapping
|
|
425
|
+
- None if duplicate
|
|
426
|
+
"""
|
|
427
|
+
if not existing_knowledge:
|
|
428
|
+
return new_knowledge
|
|
429
|
+
|
|
430
|
+
# Find similar existing knowledge
|
|
431
|
+
similar = self._find_similar(new_knowledge, existing_knowledge)
|
|
432
|
+
if not similar:
|
|
433
|
+
return new_knowledge
|
|
434
|
+
|
|
435
|
+
if not self.llm:
|
|
436
|
+
# Simple dedup: check for high overlap
|
|
437
|
+
for existing in similar:
|
|
438
|
+
if self._is_duplicate(new_knowledge, existing):
|
|
439
|
+
return None
|
|
440
|
+
return new_knowledge
|
|
441
|
+
|
|
442
|
+
prompt = f"""Compare new knowledge with existing similar items and decide how to handle.
|
|
443
|
+
|
|
444
|
+
NEW KNOWLEDGE:
|
|
445
|
+
{new_knowledge}
|
|
446
|
+
|
|
447
|
+
EXISTING SIMILAR KNOWLEDGE:
|
|
448
|
+
{chr(10).join(f"- {k}" for k in similar[:5])}
|
|
449
|
+
|
|
450
|
+
Decide:
|
|
451
|
+
1. If new knowledge is completely redundant (says nothing new): respond "DUPLICATE"
|
|
452
|
+
2. If new adds information: respond with a merged statement that combines both
|
|
453
|
+
3. If new contradicts existing: respond "CONTRADICTION: [brief explanation]"
|
|
454
|
+
|
|
455
|
+
Keep merged statements concise (1-2 sentences max)."""
|
|
456
|
+
|
|
457
|
+
try:
|
|
458
|
+
response = self.llm.complete(
|
|
459
|
+
prompt, timeout=self.config.llm_timeout_seconds
|
|
460
|
+
).strip()
|
|
461
|
+
|
|
462
|
+
if "DUPLICATE" in response.upper():
|
|
463
|
+
return None
|
|
464
|
+
if response.upper().startswith("CONTRADICTION"):
|
|
465
|
+
# Log but return new knowledge (let caller decide)
|
|
466
|
+
logger.warning(f"Knowledge contradiction detected: {response}")
|
|
467
|
+
return new_knowledge
|
|
468
|
+
|
|
469
|
+
# Return merged statement
|
|
470
|
+
return response if response else new_knowledge
|
|
471
|
+
|
|
472
|
+
except Exception as e:
|
|
473
|
+
logger.warning(f"Deduplication failed: {e}")
|
|
474
|
+
return new_knowledge
|
|
475
|
+
|
|
476
|
+
def batch_compress(
|
|
477
|
+
self,
|
|
478
|
+
contents: List[str],
|
|
479
|
+
level: Optional[CompressionLevel] = None,
|
|
480
|
+
) -> List[CompressionResult]:
|
|
481
|
+
"""
|
|
482
|
+
Compress multiple content items.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
contents: List of content strings to compress
|
|
486
|
+
level: Compression level for all items
|
|
487
|
+
|
|
488
|
+
Returns:
|
|
489
|
+
List of CompressionResult objects
|
|
490
|
+
"""
|
|
491
|
+
return [self.compress(content, level) for content in contents]
|
|
492
|
+
|
|
493
|
+
# ==================== LLM-BASED COMPRESSION ====================
|
|
494
|
+
|
|
495
|
+
def _llm_compress_outcome(
|
|
496
|
+
self,
|
|
497
|
+
content: str,
|
|
498
|
+
level: CompressionLevel,
|
|
499
|
+
) -> CompressedMemory:
|
|
500
|
+
"""LLM-based outcome compression."""
|
|
501
|
+
level_instruction = self._get_level_instruction(level)
|
|
502
|
+
|
|
503
|
+
prompt = f"""Compress this task outcome into essential information only.
|
|
504
|
+
{level_instruction}
|
|
505
|
+
|
|
506
|
+
TASK OUTCOME:
|
|
507
|
+
{content}
|
|
508
|
+
|
|
509
|
+
Extract in this exact format:
|
|
510
|
+
SUMMARY: [1-2 sentence compressed summary]
|
|
511
|
+
KEY_FACTS:
|
|
512
|
+
- [fact 1]
|
|
513
|
+
- [fact 2]
|
|
514
|
+
(max 5 facts)
|
|
515
|
+
CONSTRAINTS:
|
|
516
|
+
- [constraint 1]
|
|
517
|
+
(max 3 constraints, or "None" if none)
|
|
518
|
+
PATTERNS:
|
|
519
|
+
- [reusable pattern]
|
|
520
|
+
(max 3 patterns, or "None" if none)
|
|
521
|
+
|
|
522
|
+
Every word must earn its place. Be extremely concise."""
|
|
523
|
+
|
|
524
|
+
response = self.llm.complete(prompt, timeout=self.config.llm_timeout_seconds)
|
|
525
|
+
return self._parse_llm_response(response, content)
|
|
526
|
+
|
|
527
|
+
def _llm_compress_general(
|
|
528
|
+
self,
|
|
529
|
+
content: str,
|
|
530
|
+
level: CompressionLevel,
|
|
531
|
+
) -> CompressedMemory:
|
|
532
|
+
"""LLM-based general content compression."""
|
|
533
|
+
level_instruction = self._get_level_instruction(level)
|
|
534
|
+
|
|
535
|
+
prompt = f"""Compress this content, extracting only essential information.
|
|
536
|
+
{level_instruction}
|
|
537
|
+
|
|
538
|
+
CONTENT:
|
|
539
|
+
{content}
|
|
540
|
+
|
|
541
|
+
Extract in this exact format:
|
|
542
|
+
SUMMARY: [Compressed content - be concise]
|
|
543
|
+
KEY_FACTS:
|
|
544
|
+
- [fact 1]
|
|
545
|
+
- [fact 2]
|
|
546
|
+
(max 5 most important facts)
|
|
547
|
+
CONSTRAINTS:
|
|
548
|
+
- [limitation or requirement]
|
|
549
|
+
(max 3, or "None")
|
|
550
|
+
PATTERNS:
|
|
551
|
+
- [reusable insight]
|
|
552
|
+
(max 3, or "None")"""
|
|
553
|
+
|
|
554
|
+
response = self.llm.complete(prompt, timeout=self.config.llm_timeout_seconds)
|
|
555
|
+
return self._parse_llm_response(response, content)
|
|
556
|
+
|
|
557
|
+
def _llm_compress_conversation(
|
|
558
|
+
self,
|
|
559
|
+
content: str,
|
|
560
|
+
level: CompressionLevel,
|
|
561
|
+
focus: Optional[str] = None,
|
|
562
|
+
) -> CompressedMemory:
|
|
563
|
+
"""LLM-based conversation compression."""
|
|
564
|
+
level_instruction = self._get_level_instruction(level)
|
|
565
|
+
focus_clause = f"\nFocus specifically on: {focus}" if focus else ""
|
|
566
|
+
|
|
567
|
+
prompt = f"""Extract learnable knowledge from this conversation.
|
|
568
|
+
{level_instruction}{focus_clause}
|
|
569
|
+
|
|
570
|
+
Skip pleasantries, tangents, and filler. Extract only actionable knowledge.
|
|
571
|
+
|
|
572
|
+
CONVERSATION:
|
|
573
|
+
{content}
|
|
574
|
+
|
|
575
|
+
Extract in this exact format:
|
|
576
|
+
SUMMARY: [Key takeaways in 1-2 sentences]
|
|
577
|
+
KEY_FACTS:
|
|
578
|
+
- [Confirmed fact or decision]
|
|
579
|
+
(max 5 most important)
|
|
580
|
+
CONSTRAINTS:
|
|
581
|
+
- [Limitation or requirement discovered]
|
|
582
|
+
(max 3, or "None")
|
|
583
|
+
PATTERNS:
|
|
584
|
+
- [Reusable pattern or rule identified]
|
|
585
|
+
(max 3, or "None")"""
|
|
586
|
+
|
|
587
|
+
response = self.llm.complete(prompt, timeout=self.config.llm_timeout_seconds)
|
|
588
|
+
return self._parse_llm_response(response, content)
|
|
589
|
+
|
|
590
|
+
def _get_level_instruction(self, level: CompressionLevel) -> str:
|
|
591
|
+
"""Get compression instruction based on level."""
|
|
592
|
+
if level == CompressionLevel.LIGHT:
|
|
593
|
+
return "COMPRESSION: Light - Remove redundancy but preserve detail."
|
|
594
|
+
elif level == CompressionLevel.MEDIUM:
|
|
595
|
+
return (
|
|
596
|
+
"COMPRESSION: Medium - Extract key points only. Target 3x compression."
|
|
597
|
+
)
|
|
598
|
+
elif level == CompressionLevel.AGGRESSIVE:
|
|
599
|
+
return "COMPRESSION: Aggressive - Maximum compression. Only absolute essentials. Target 5x+ compression."
|
|
600
|
+
return ""
|
|
601
|
+
|
|
602
|
+
def _parse_llm_response(
|
|
603
|
+
self,
|
|
604
|
+
response: str,
|
|
605
|
+
original_content: str,
|
|
606
|
+
) -> CompressedMemory:
|
|
607
|
+
"""Parse LLM response into CompressedMemory."""
|
|
608
|
+
lines = response.strip().split("\n")
|
|
609
|
+
|
|
610
|
+
summary = ""
|
|
611
|
+
key_facts: List[str] = []
|
|
612
|
+
constraints: List[str] = []
|
|
613
|
+
patterns: List[str] = []
|
|
614
|
+
|
|
615
|
+
current_section = None
|
|
616
|
+
|
|
617
|
+
for line in lines:
|
|
618
|
+
line = line.strip()
|
|
619
|
+
if not line:
|
|
620
|
+
continue
|
|
621
|
+
|
|
622
|
+
# Check for section headers
|
|
623
|
+
upper_line = line.upper()
|
|
624
|
+
if upper_line.startswith("SUMMARY:"):
|
|
625
|
+
summary = line.split(":", 1)[1].strip() if ":" in line else ""
|
|
626
|
+
current_section = "summary"
|
|
627
|
+
elif upper_line.startswith("KEY_FACTS:") or upper_line.startswith(
|
|
628
|
+
"KEY FACTS:"
|
|
629
|
+
):
|
|
630
|
+
current_section = "facts"
|
|
631
|
+
elif upper_line.startswith("CONSTRAINTS:"):
|
|
632
|
+
current_section = "constraints"
|
|
633
|
+
elif upper_line.startswith("PATTERNS:"):
|
|
634
|
+
current_section = "patterns"
|
|
635
|
+
elif line.startswith("-") or line.startswith("•"):
|
|
636
|
+
# Bullet point
|
|
637
|
+
item = line.lstrip("-•").strip()
|
|
638
|
+
if item.lower() == "none" or not item:
|
|
639
|
+
continue
|
|
640
|
+
if (
|
|
641
|
+
current_section == "facts"
|
|
642
|
+
and len(key_facts) < self.config.max_key_facts
|
|
643
|
+
):
|
|
644
|
+
key_facts.append(item)
|
|
645
|
+
elif (
|
|
646
|
+
current_section == "constraints"
|
|
647
|
+
and len(constraints) < self.config.max_constraints
|
|
648
|
+
):
|
|
649
|
+
constraints.append(item)
|
|
650
|
+
elif (
|
|
651
|
+
current_section == "patterns"
|
|
652
|
+
and len(patterns) < self.config.max_patterns
|
|
653
|
+
):
|
|
654
|
+
patterns.append(item)
|
|
655
|
+
elif current_section == "summary" and not summary:
|
|
656
|
+
# Continuation of summary
|
|
657
|
+
summary = line
|
|
658
|
+
|
|
659
|
+
# Fallback if no summary extracted
|
|
660
|
+
if not summary:
|
|
661
|
+
summary = (
|
|
662
|
+
original_content[:500] + "..."
|
|
663
|
+
if len(original_content) > 500
|
|
664
|
+
else original_content
|
|
665
|
+
)
|
|
666
|
+
|
|
667
|
+
compressed_length = len(summary)
|
|
668
|
+
original_length = len(original_content)
|
|
669
|
+
|
|
670
|
+
return CompressedMemory(
|
|
671
|
+
original_length=original_length,
|
|
672
|
+
compressed_length=compressed_length,
|
|
673
|
+
compression_ratio=original_length / compressed_length
|
|
674
|
+
if compressed_length > 0
|
|
675
|
+
else 1.0,
|
|
676
|
+
key_facts=key_facts,
|
|
677
|
+
constraints=constraints,
|
|
678
|
+
patterns=patterns,
|
|
679
|
+
summary=summary,
|
|
680
|
+
full_content=original_content if self.config.preserve_full_content else "",
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
# ==================== RULE-BASED COMPRESSION ====================
|
|
684
|
+
|
|
685
|
+
def _no_compression(self, content: str) -> CompressedMemory:
|
|
686
|
+
"""Return content without compression."""
|
|
687
|
+
return CompressedMemory(
|
|
688
|
+
original_length=len(content),
|
|
689
|
+
compressed_length=len(content),
|
|
690
|
+
compression_ratio=1.0,
|
|
691
|
+
key_facts=[],
|
|
692
|
+
constraints=[],
|
|
693
|
+
patterns=[],
|
|
694
|
+
summary=content,
|
|
695
|
+
full_content=content,
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
def _rule_based_compression(
|
|
699
|
+
self,
|
|
700
|
+
text: str,
|
|
701
|
+
level: CompressionLevel,
|
|
702
|
+
) -> CompressedMemory:
|
|
703
|
+
"""
|
|
704
|
+
Rule-based compression fallback when LLM unavailable.
|
|
705
|
+
|
|
706
|
+
Uses sentence-level heuristics to extract key information.
|
|
707
|
+
"""
|
|
708
|
+
# Split into sentences
|
|
709
|
+
sentences = self._split_sentences(text)
|
|
710
|
+
|
|
711
|
+
if level == CompressionLevel.LIGHT:
|
|
712
|
+
summary = self._light_compression(sentences)
|
|
713
|
+
elif level == CompressionLevel.MEDIUM:
|
|
714
|
+
summary = self._medium_compression(sentences)
|
|
715
|
+
else: # AGGRESSIVE
|
|
716
|
+
summary = self._aggressive_compression(sentences, text)
|
|
717
|
+
|
|
718
|
+
# Extract key facts using indicators
|
|
719
|
+
key_facts = self._extract_key_facts(sentences)
|
|
720
|
+
|
|
721
|
+
# Extract constraints
|
|
722
|
+
constraints = self._extract_constraints(sentences)
|
|
723
|
+
|
|
724
|
+
# No pattern extraction without LLM
|
|
725
|
+
patterns: List[str] = []
|
|
726
|
+
|
|
727
|
+
compressed_length = len(summary)
|
|
728
|
+
original_length = len(text)
|
|
729
|
+
|
|
730
|
+
return CompressedMemory(
|
|
731
|
+
original_length=original_length,
|
|
732
|
+
compressed_length=compressed_length,
|
|
733
|
+
compression_ratio=original_length / compressed_length
|
|
734
|
+
if compressed_length > 0
|
|
735
|
+
else 1.0,
|
|
736
|
+
key_facts=key_facts[: self.config.max_key_facts],
|
|
737
|
+
constraints=constraints[: self.config.max_constraints],
|
|
738
|
+
patterns=patterns,
|
|
739
|
+
summary=summary,
|
|
740
|
+
full_content=text if self.config.preserve_full_content else "",
|
|
741
|
+
)
|
|
742
|
+
|
|
743
|
+
def _split_sentences(self, text: str) -> List[str]:
|
|
744
|
+
"""Split text into sentences."""
|
|
745
|
+
# Simple sentence splitting
|
|
746
|
+
sentences = re.split(r"(?<=[.!?])\s+", text)
|
|
747
|
+
return [s.strip() for s in sentences if s.strip()]
|
|
748
|
+
|
|
749
|
+
def _light_compression(self, sentences: List[str]) -> str:
|
|
750
|
+
"""Light compression: remove duplicate/similar sentences."""
|
|
751
|
+
if not sentences:
|
|
752
|
+
return ""
|
|
753
|
+
|
|
754
|
+
unique_sentences: List[str] = []
|
|
755
|
+
seen_normalized: set = set()
|
|
756
|
+
|
|
757
|
+
for sentence in sentences:
|
|
758
|
+
normalized = self._normalize_sentence(sentence)
|
|
759
|
+
if normalized not in seen_normalized:
|
|
760
|
+
seen_normalized.add(normalized)
|
|
761
|
+
unique_sentences.append(sentence)
|
|
762
|
+
|
|
763
|
+
return " ".join(unique_sentences)
|
|
764
|
+
|
|
765
|
+
def _medium_compression(self, sentences: List[str]) -> str:
|
|
766
|
+
"""Medium compression: extract key sentences."""
|
|
767
|
+
if not sentences:
|
|
768
|
+
return ""
|
|
769
|
+
|
|
770
|
+
# Indicators of important sentences
|
|
771
|
+
importance_indicators = [
|
|
772
|
+
" is ",
|
|
773
|
+
" are ",
|
|
774
|
+
" was ",
|
|
775
|
+
" were ",
|
|
776
|
+
" should ",
|
|
777
|
+
" must ",
|
|
778
|
+
" need ",
|
|
779
|
+
" because ",
|
|
780
|
+
" therefore ",
|
|
781
|
+
" however ",
|
|
782
|
+
" key ",
|
|
783
|
+
" important ",
|
|
784
|
+
" essential ",
|
|
785
|
+
" result ",
|
|
786
|
+
" conclusion ",
|
|
787
|
+
" found ",
|
|
788
|
+
]
|
|
789
|
+
|
|
790
|
+
# Score sentences by importance
|
|
791
|
+
scored: List[tuple] = []
|
|
792
|
+
for i, sentence in enumerate(sentences):
|
|
793
|
+
lower = sentence.lower()
|
|
794
|
+
score = sum(1 for ind in importance_indicators if ind in lower)
|
|
795
|
+
# Boost first and last sentences
|
|
796
|
+
if i == 0:
|
|
797
|
+
score += 2
|
|
798
|
+
if i == len(sentences) - 1:
|
|
799
|
+
score += 1
|
|
800
|
+
scored.append((score, i, sentence))
|
|
801
|
+
|
|
802
|
+
# Sort by score descending, then by position
|
|
803
|
+
scored.sort(key=lambda x: (-x[0], x[1]))
|
|
804
|
+
|
|
805
|
+
# Take top sentences, maintaining original order
|
|
806
|
+
target_count = max(2, len(sentences) // 3)
|
|
807
|
+
selected_indices = sorted([s[1] for s in scored[:target_count]])
|
|
808
|
+
selected_sentences = [sentences[i] for i in selected_indices]
|
|
809
|
+
|
|
810
|
+
return " ".join(selected_sentences)
|
|
811
|
+
|
|
812
|
+
def _aggressive_compression(self, sentences: List[str], original: str) -> str:
|
|
813
|
+
"""Aggressive compression: minimal content."""
|
|
814
|
+
if not sentences:
|
|
815
|
+
return original[:200] + "..." if len(original) > 200 else original
|
|
816
|
+
|
|
817
|
+
if len(sentences) == 1:
|
|
818
|
+
# Single sentence - truncate if needed
|
|
819
|
+
return (
|
|
820
|
+
sentences[0][:300] + "..." if len(sentences[0]) > 300 else sentences[0]
|
|
821
|
+
)
|
|
822
|
+
|
|
823
|
+
# First sentence (context) + most important + last (conclusion)
|
|
824
|
+
first = sentences[0]
|
|
825
|
+
last = sentences[-1] if len(sentences) > 1 else ""
|
|
826
|
+
|
|
827
|
+
# Find most important middle sentence
|
|
828
|
+
middle_sentences = sentences[1:-1] if len(sentences) > 2 else []
|
|
829
|
+
best_middle = ""
|
|
830
|
+
if middle_sentences:
|
|
831
|
+
importance_words = [
|
|
832
|
+
"must",
|
|
833
|
+
"should",
|
|
834
|
+
"key",
|
|
835
|
+
"important",
|
|
836
|
+
"because",
|
|
837
|
+
"therefore",
|
|
838
|
+
]
|
|
839
|
+
for sentence in middle_sentences:
|
|
840
|
+
if any(w in sentence.lower() for w in importance_words):
|
|
841
|
+
best_middle = sentence
|
|
842
|
+
break
|
|
843
|
+
|
|
844
|
+
parts = [first]
|
|
845
|
+
if best_middle and best_middle != last:
|
|
846
|
+
parts.append(best_middle)
|
|
847
|
+
if last and last != first:
|
|
848
|
+
parts.append(last)
|
|
849
|
+
|
|
850
|
+
return " ".join(parts)
|
|
851
|
+
|
|
852
|
+
def _extract_key_facts(self, sentences: List[str]) -> List[str]:
|
|
853
|
+
"""Extract sentences that appear to be stating facts."""
|
|
854
|
+
fact_indicators = [" is ", " are ", " has ", " have ", " was ", " were "]
|
|
855
|
+
facts = []
|
|
856
|
+
|
|
857
|
+
for sentence in sentences:
|
|
858
|
+
lower = sentence.lower()
|
|
859
|
+
if any(ind in lower for ind in fact_indicators):
|
|
860
|
+
# Clean up the sentence
|
|
861
|
+
clean = sentence.strip()
|
|
862
|
+
if clean and len(clean) > 10:
|
|
863
|
+
facts.append(clean)
|
|
864
|
+
|
|
865
|
+
return facts[: self.config.max_key_facts]
|
|
866
|
+
|
|
867
|
+
def _extract_constraints(self, sentences: List[str]) -> List[str]:
|
|
868
|
+
"""Extract sentences that appear to describe constraints."""
|
|
869
|
+
constraint_indicators = [
|
|
870
|
+
" must ",
|
|
871
|
+
" cannot ",
|
|
872
|
+
" should not ",
|
|
873
|
+
" shouldn't ",
|
|
874
|
+
" limit ",
|
|
875
|
+
" require ",
|
|
876
|
+
" only ",
|
|
877
|
+
" never ",
|
|
878
|
+
" avoid ",
|
|
879
|
+
" prevent ",
|
|
880
|
+
" restrict ",
|
|
881
|
+
]
|
|
882
|
+
constraints = []
|
|
883
|
+
|
|
884
|
+
for sentence in sentences:
|
|
885
|
+
lower = sentence.lower()
|
|
886
|
+
if any(ind in lower for ind in constraint_indicators):
|
|
887
|
+
clean = sentence.strip()
|
|
888
|
+
if clean and len(clean) > 10:
|
|
889
|
+
constraints.append(clean)
|
|
890
|
+
|
|
891
|
+
return constraints[: self.config.max_constraints]
|
|
892
|
+
|
|
893
|
+
def _normalize_sentence(self, sentence: str) -> str:
|
|
894
|
+
"""Normalize sentence for deduplication."""
|
|
895
|
+
# Lowercase, remove extra whitespace, remove punctuation
|
|
896
|
+
normalized = sentence.lower()
|
|
897
|
+
normalized = re.sub(r"[^\w\s]", "", normalized)
|
|
898
|
+
normalized = " ".join(normalized.split())
|
|
899
|
+
return normalized
|
|
900
|
+
|
|
901
|
+
def _rule_based_heuristic(self, experiences: List[str]) -> Optional[str]:
|
|
902
|
+
"""Try to extract heuristic using simple pattern matching."""
|
|
903
|
+
# Look for common words across experiences
|
|
904
|
+
word_counts: Dict[str, int] = {}
|
|
905
|
+
for exp in experiences:
|
|
906
|
+
words = set(exp.lower().split())
|
|
907
|
+
for word in words:
|
|
908
|
+
if len(word) > 3: # Skip short words
|
|
909
|
+
word_counts[word] = word_counts.get(word, 0) + 1
|
|
910
|
+
|
|
911
|
+
# Find words common to most experiences
|
|
912
|
+
threshold = len(experiences) * 0.6
|
|
913
|
+
common_words = [w for w, c in word_counts.items() if c >= threshold]
|
|
914
|
+
|
|
915
|
+
if not common_words:
|
|
916
|
+
return None
|
|
917
|
+
|
|
918
|
+
# Very basic pattern: if common action words found
|
|
919
|
+
action_words = ["use", "apply", "check", "verify", "add", "remove", "update"]
|
|
920
|
+
found_actions = [w for w in common_words if w in action_words]
|
|
921
|
+
|
|
922
|
+
if found_actions:
|
|
923
|
+
return f"Consider using '{found_actions[0]}' approach based on {len(experiences)} similar experiences."
|
|
924
|
+
|
|
925
|
+
return None
|
|
926
|
+
|
|
927
|
+
def _find_similar(
|
|
928
|
+
self,
|
|
929
|
+
query: str,
|
|
930
|
+
candidates: List[str],
|
|
931
|
+
threshold: float = 0.3,
|
|
932
|
+
) -> List[str]:
|
|
933
|
+
"""Find candidates similar to query using word overlap."""
|
|
934
|
+
query_words = set(query.lower().split())
|
|
935
|
+
similar = []
|
|
936
|
+
|
|
937
|
+
for candidate in candidates:
|
|
938
|
+
cand_words = set(candidate.lower().split())
|
|
939
|
+
if not query_words or not cand_words:
|
|
940
|
+
continue
|
|
941
|
+
|
|
942
|
+
# Jaccard similarity
|
|
943
|
+
intersection = len(query_words & cand_words)
|
|
944
|
+
union = len(query_words | cand_words)
|
|
945
|
+
similarity = intersection / union if union > 0 else 0
|
|
946
|
+
|
|
947
|
+
if similarity >= threshold:
|
|
948
|
+
similar.append(candidate)
|
|
949
|
+
|
|
950
|
+
return similar[:5]
|
|
951
|
+
|
|
952
|
+
def _is_duplicate(self, new: str, existing: str, threshold: float = 0.8) -> bool:
|
|
953
|
+
"""Check if new content is duplicate of existing."""
|
|
954
|
+
new_words = set(new.lower().split())
|
|
955
|
+
existing_words = set(existing.lower().split())
|
|
956
|
+
|
|
957
|
+
if not new_words or not existing_words:
|
|
958
|
+
return False
|
|
959
|
+
|
|
960
|
+
intersection = len(new_words & existing_words)
|
|
961
|
+
smaller = min(len(new_words), len(existing_words))
|
|
962
|
+
|
|
963
|
+
return intersection / smaller >= threshold if smaller > 0 else False
|
|
964
|
+
|
|
965
|
+
|
|
966
|
+
def create_compressor(
|
|
967
|
+
llm_client: Optional[LLMClient] = None,
|
|
968
|
+
config: Optional[CompressionConfig] = None,
|
|
969
|
+
) -> MemoryCompressor:
|
|
970
|
+
"""
|
|
971
|
+
Factory function to create a MemoryCompressor.
|
|
972
|
+
|
|
973
|
+
Args:
|
|
974
|
+
llm_client: Optional LLM client for intelligent compression
|
|
975
|
+
config: Compression configuration
|
|
976
|
+
|
|
977
|
+
Returns:
|
|
978
|
+
Configured MemoryCompressor
|
|
979
|
+
"""
|
|
980
|
+
return MemoryCompressor(llm_client=llm_client, config=config)
|