alma-memory 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. alma/__init__.py +296 -194
  2. alma/compression/__init__.py +33 -0
  3. alma/compression/pipeline.py +980 -0
  4. alma/confidence/__init__.py +47 -47
  5. alma/confidence/engine.py +540 -540
  6. alma/confidence/types.py +351 -351
  7. alma/config/loader.py +157 -157
  8. alma/consolidation/__init__.py +23 -23
  9. alma/consolidation/engine.py +678 -678
  10. alma/consolidation/prompts.py +84 -84
  11. alma/core.py +1189 -322
  12. alma/domains/__init__.py +30 -30
  13. alma/domains/factory.py +359 -359
  14. alma/domains/schemas.py +448 -448
  15. alma/domains/types.py +272 -272
  16. alma/events/__init__.py +75 -75
  17. alma/events/emitter.py +285 -284
  18. alma/events/storage_mixin.py +246 -246
  19. alma/events/types.py +126 -126
  20. alma/events/webhook.py +425 -425
  21. alma/exceptions.py +49 -49
  22. alma/extraction/__init__.py +31 -31
  23. alma/extraction/auto_learner.py +265 -264
  24. alma/extraction/extractor.py +420 -420
  25. alma/graph/__init__.py +106 -81
  26. alma/graph/backends/__init__.py +32 -18
  27. alma/graph/backends/kuzu.py +624 -0
  28. alma/graph/backends/memgraph.py +432 -0
  29. alma/graph/backends/memory.py +236 -236
  30. alma/graph/backends/neo4j.py +417 -417
  31. alma/graph/base.py +159 -159
  32. alma/graph/extraction.py +198 -198
  33. alma/graph/store.py +860 -860
  34. alma/harness/__init__.py +35 -35
  35. alma/harness/base.py +386 -386
  36. alma/harness/domains.py +705 -705
  37. alma/initializer/__init__.py +37 -37
  38. alma/initializer/initializer.py +418 -418
  39. alma/initializer/types.py +250 -250
  40. alma/integration/__init__.py +62 -62
  41. alma/integration/claude_agents.py +444 -432
  42. alma/integration/helena.py +423 -423
  43. alma/integration/victor.py +471 -471
  44. alma/learning/__init__.py +101 -86
  45. alma/learning/decay.py +878 -0
  46. alma/learning/forgetting.py +1446 -1446
  47. alma/learning/heuristic_extractor.py +390 -390
  48. alma/learning/protocols.py +374 -374
  49. alma/learning/validation.py +346 -346
  50. alma/mcp/__init__.py +123 -45
  51. alma/mcp/__main__.py +156 -156
  52. alma/mcp/resources.py +122 -122
  53. alma/mcp/server.py +955 -591
  54. alma/mcp/tools.py +3254 -511
  55. alma/observability/__init__.py +91 -0
  56. alma/observability/config.py +302 -0
  57. alma/observability/guidelines.py +170 -0
  58. alma/observability/logging.py +424 -0
  59. alma/observability/metrics.py +583 -0
  60. alma/observability/tracing.py +440 -0
  61. alma/progress/__init__.py +21 -21
  62. alma/progress/tracker.py +607 -607
  63. alma/progress/types.py +250 -250
  64. alma/retrieval/__init__.py +134 -53
  65. alma/retrieval/budget.py +525 -0
  66. alma/retrieval/cache.py +1304 -1061
  67. alma/retrieval/embeddings.py +202 -202
  68. alma/retrieval/engine.py +850 -366
  69. alma/retrieval/modes.py +365 -0
  70. alma/retrieval/progressive.py +560 -0
  71. alma/retrieval/scoring.py +344 -344
  72. alma/retrieval/trust_scoring.py +637 -0
  73. alma/retrieval/verification.py +797 -0
  74. alma/session/__init__.py +19 -19
  75. alma/session/manager.py +442 -399
  76. alma/session/types.py +288 -288
  77. alma/storage/__init__.py +101 -61
  78. alma/storage/archive.py +233 -0
  79. alma/storage/azure_cosmos.py +1259 -1048
  80. alma/storage/base.py +1083 -525
  81. alma/storage/chroma.py +1443 -1443
  82. alma/storage/constants.py +103 -0
  83. alma/storage/file_based.py +614 -619
  84. alma/storage/migrations/__init__.py +21 -0
  85. alma/storage/migrations/base.py +321 -0
  86. alma/storage/migrations/runner.py +323 -0
  87. alma/storage/migrations/version_stores.py +337 -0
  88. alma/storage/migrations/versions/__init__.py +11 -0
  89. alma/storage/migrations/versions/v1_0_0.py +373 -0
  90. alma/storage/migrations/versions/v1_1_0_workflow_context.py +551 -0
  91. alma/storage/pinecone.py +1080 -1080
  92. alma/storage/postgresql.py +1948 -1452
  93. alma/storage/qdrant.py +1306 -1306
  94. alma/storage/sqlite_local.py +3041 -1358
  95. alma/testing/__init__.py +46 -0
  96. alma/testing/factories.py +301 -0
  97. alma/testing/mocks.py +389 -0
  98. alma/types.py +292 -264
  99. alma/utils/__init__.py +19 -0
  100. alma/utils/tokenizer.py +521 -0
  101. alma/workflow/__init__.py +83 -0
  102. alma/workflow/artifacts.py +170 -0
  103. alma/workflow/checkpoint.py +311 -0
  104. alma/workflow/context.py +228 -0
  105. alma/workflow/outcomes.py +189 -0
  106. alma/workflow/reducers.py +393 -0
  107. {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/METADATA +244 -72
  108. alma_memory-0.7.0.dist-info/RECORD +112 -0
  109. alma_memory-0.5.0.dist-info/RECORD +0 -76
  110. {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/WHEEL +0 -0
  111. {alma_memory-0.5.0.dist-info → alma_memory-0.7.0.dist-info}/top_level.txt +0 -0
@@ -1,619 +1,614 @@
1
- """
2
- ALMA File-Based Storage Backend.
3
-
4
- Simple JSON file storage for testing and fallback scenarios.
5
- No vector search - uses basic text matching for retrieval.
6
- """
7
-
8
- import json
9
- import logging
10
- from datetime import datetime, timezone
11
- from pathlib import Path
12
- from typing import Any, Dict, List, Optional
13
-
14
- from alma.storage.base import StorageBackend
15
- from alma.types import (
16
- AntiPattern,
17
- DomainKnowledge,
18
- Heuristic,
19
- Outcome,
20
- UserPreference,
21
- )
22
-
23
- logger = logging.getLogger(__name__)
24
-
25
-
26
- class FileBasedStorage(StorageBackend):
27
- """
28
- File-based storage using JSON files.
29
-
30
- Structure:
31
- .alma/
32
- ├── heuristics.json
33
- ├── outcomes.json
34
- ├── preferences.json
35
- ├── domain_knowledge.json
36
- └── anti_patterns.json
37
-
38
- Note: This backend does NOT support vector search.
39
- Use SQLiteStorage or AzureCosmosStorage for semantic retrieval.
40
- """
41
-
42
- def __init__(self, storage_dir: Path):
43
- """
44
- Initialize file-based storage.
45
-
46
- Args:
47
- storage_dir: Directory to store JSON files
48
- """
49
- self.storage_dir = Path(storage_dir)
50
- self.storage_dir.mkdir(parents=True, exist_ok=True)
51
-
52
- # File paths
53
- self._files = {
54
- "heuristics": self.storage_dir / "heuristics.json",
55
- "outcomes": self.storage_dir / "outcomes.json",
56
- "preferences": self.storage_dir / "preferences.json",
57
- "domain_knowledge": self.storage_dir / "domain_knowledge.json",
58
- "anti_patterns": self.storage_dir / "anti_patterns.json",
59
- }
60
-
61
- # Initialize empty files if they don't exist
62
- for file_path in self._files.values():
63
- if not file_path.exists():
64
- self._write_json(file_path, [])
65
-
66
- @classmethod
67
- def from_config(cls, config: Dict[str, Any]) -> "FileBasedStorage":
68
- """Create instance from configuration."""
69
- storage_dir = config.get("storage_dir", ".alma")
70
- return cls(storage_dir=Path(storage_dir))
71
-
72
- # ==================== WRITE OPERATIONS ====================
73
-
74
- def save_heuristic(self, heuristic: Heuristic) -> str:
75
- """Save a heuristic (UPSERT - update if exists, insert if new)."""
76
- data = self._read_json(self._files["heuristics"])
77
- record = self._to_dict(heuristic)
78
- # Find and replace existing, or append new
79
- found = False
80
- for i, existing in enumerate(data):
81
- if existing.get("id") == record["id"]:
82
- data[i] = record
83
- found = True
84
- break
85
- if not found:
86
- data.append(record)
87
- self._write_json(self._files["heuristics"], data)
88
- logger.debug(f"Saved heuristic: {heuristic.id}")
89
- return heuristic.id
90
-
91
- def save_outcome(self, outcome: Outcome) -> str:
92
- """Save an outcome (UPSERT - update if exists, insert if new)."""
93
- data = self._read_json(self._files["outcomes"])
94
- record = self._to_dict(outcome)
95
- # Find and replace existing, or append new
96
- found = False
97
- for i, existing in enumerate(data):
98
- if existing.get("id") == record["id"]:
99
- data[i] = record
100
- found = True
101
- break
102
- if not found:
103
- data.append(record)
104
- self._write_json(self._files["outcomes"], data)
105
- logger.debug(f"Saved outcome: {outcome.id}")
106
- return outcome.id
107
-
108
- def save_user_preference(self, preference: UserPreference) -> str:
109
- """Save a user preference (UPSERT - update if exists, insert if new)."""
110
- data = self._read_json(self._files["preferences"])
111
- record = self._to_dict(preference)
112
- # Find and replace existing, or append new
113
- found = False
114
- for i, existing in enumerate(data):
115
- if existing.get("id") == record["id"]:
116
- data[i] = record
117
- found = True
118
- break
119
- if not found:
120
- data.append(record)
121
- self._write_json(self._files["preferences"], data)
122
- logger.debug(f"Saved preference: {preference.id}")
123
- return preference.id
124
-
125
- def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
126
- """Save domain knowledge (UPSERT - update if exists, insert if new)."""
127
- data = self._read_json(self._files["domain_knowledge"])
128
- record = self._to_dict(knowledge)
129
- # Find and replace existing, or append new
130
- found = False
131
- for i, existing in enumerate(data):
132
- if existing.get("id") == record["id"]:
133
- data[i] = record
134
- found = True
135
- break
136
- if not found:
137
- data.append(record)
138
- self._write_json(self._files["domain_knowledge"], data)
139
- logger.debug(f"Saved domain knowledge: {knowledge.id}")
140
- return knowledge.id
141
-
142
- def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
143
- """Save an anti-pattern (UPSERT - update if exists, insert if new)."""
144
- data = self._read_json(self._files["anti_patterns"])
145
- record = self._to_dict(anti_pattern)
146
- # Find and replace existing, or append new
147
- found = False
148
- for i, existing in enumerate(data):
149
- if existing.get("id") == record["id"]:
150
- data[i] = record
151
- found = True
152
- break
153
- if not found:
154
- data.append(record)
155
- self._write_json(self._files["anti_patterns"], data)
156
- logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
157
- return anti_pattern.id
158
-
159
- # ==================== READ OPERATIONS ====================
160
-
161
- def get_heuristics(
162
- self,
163
- project_id: str,
164
- agent: Optional[str] = None,
165
- embedding: Optional[List[float]] = None,
166
- top_k: int = 5,
167
- min_confidence: float = 0.0,
168
- ) -> List[Heuristic]:
169
- """Get heuristics (no vector search - returns all matching filters)."""
170
- data = self._read_json(self._files["heuristics"])
171
-
172
- # Filter
173
- results = []
174
- for record in data:
175
- if record.get("project_id") != project_id:
176
- continue
177
- if agent and record.get("agent") != agent:
178
- continue
179
- if record.get("confidence", 0) < min_confidence:
180
- continue
181
- results.append(self._to_heuristic(record))
182
-
183
- # Sort by confidence and return top_k
184
- results.sort(key=lambda x: -x.confidence)
185
- return results[:top_k]
186
-
187
- def get_outcomes(
188
- self,
189
- project_id: str,
190
- agent: Optional[str] = None,
191
- task_type: Optional[str] = None,
192
- embedding: Optional[List[float]] = None,
193
- top_k: int = 5,
194
- success_only: bool = False,
195
- ) -> List[Outcome]:
196
- """Get outcomes (no vector search)."""
197
- data = self._read_json(self._files["outcomes"])
198
-
199
- results = []
200
- for record in data:
201
- if record.get("project_id") != project_id:
202
- continue
203
- if agent and record.get("agent") != agent:
204
- continue
205
- if task_type and record.get("task_type") != task_type:
206
- continue
207
- if success_only and not record.get("success"):
208
- continue
209
- results.append(self._to_outcome(record))
210
-
211
- # Sort by timestamp (most recent first) and return top_k
212
- results.sort(key=lambda x: x.timestamp, reverse=True)
213
- return results[:top_k]
214
-
215
- def get_user_preferences(
216
- self,
217
- user_id: str,
218
- category: Optional[str] = None,
219
- ) -> List[UserPreference]:
220
- """Get user preferences."""
221
- data = self._read_json(self._files["preferences"])
222
-
223
- results = []
224
- for record in data:
225
- if record.get("user_id") != user_id:
226
- continue
227
- if category and record.get("category") != category:
228
- continue
229
- results.append(self._to_user_preference(record))
230
-
231
- return results
232
-
233
- def get_domain_knowledge(
234
- self,
235
- project_id: str,
236
- agent: Optional[str] = None,
237
- domain: Optional[str] = None,
238
- embedding: Optional[List[float]] = None,
239
- top_k: int = 5,
240
- ) -> List[DomainKnowledge]:
241
- """Get domain knowledge (no vector search)."""
242
- data = self._read_json(self._files["domain_knowledge"])
243
-
244
- results = []
245
- for record in data:
246
- if record.get("project_id") != project_id:
247
- continue
248
- if agent and record.get("agent") != agent:
249
- continue
250
- if domain and record.get("domain") != domain:
251
- continue
252
- results.append(self._to_domain_knowledge(record))
253
-
254
- # Sort by confidence and return top_k
255
- results.sort(key=lambda x: -x.confidence)
256
- return results[:top_k]
257
-
258
- def get_anti_patterns(
259
- self,
260
- project_id: str,
261
- agent: Optional[str] = None,
262
- embedding: Optional[List[float]] = None,
263
- top_k: int = 5,
264
- ) -> List[AntiPattern]:
265
- """Get anti-patterns (no vector search)."""
266
- data = self._read_json(self._files["anti_patterns"])
267
-
268
- results = []
269
- for record in data:
270
- if record.get("project_id") != project_id:
271
- continue
272
- if agent and record.get("agent") != agent:
273
- continue
274
- results.append(self._to_anti_pattern(record))
275
-
276
- # Sort by occurrence count and return top_k
277
- results.sort(key=lambda x: -x.occurrence_count)
278
- return results[:top_k]
279
-
280
- # ==================== UPDATE OPERATIONS ====================
281
-
282
- def update_heuristic(
283
- self,
284
- heuristic_id: str,
285
- updates: Dict[str, Any],
286
- ) -> bool:
287
- """Update a heuristic's fields."""
288
- data = self._read_json(self._files["heuristics"])
289
-
290
- for i, record in enumerate(data):
291
- if record.get("id") == heuristic_id:
292
- data[i].update(updates)
293
- self._write_json(self._files["heuristics"], data)
294
- return True
295
-
296
- return False
297
-
298
- def increment_heuristic_occurrence(
299
- self,
300
- heuristic_id: str,
301
- success: bool,
302
- ) -> bool:
303
- """Increment heuristic occurrence count."""
304
- data = self._read_json(self._files["heuristics"])
305
-
306
- for i, record in enumerate(data):
307
- if record.get("id") == heuristic_id:
308
- data[i]["occurrence_count"] = record.get("occurrence_count", 0) + 1
309
- if success:
310
- data[i]["success_count"] = record.get("success_count", 0) + 1
311
- data[i]["last_validated"] = datetime.now(timezone.utc).isoformat()
312
- self._write_json(self._files["heuristics"], data)
313
- return True
314
-
315
- return False
316
-
317
- # ==================== UPDATE CONFIDENCE OPERATIONS ====================
318
-
319
- def update_heuristic_confidence(
320
- self,
321
- heuristic_id: str,
322
- new_confidence: float,
323
- ) -> bool:
324
- """Update a heuristic's confidence score."""
325
- data = self._read_json(self._files["heuristics"])
326
-
327
- for i, record in enumerate(data):
328
- if record.get("id") == heuristic_id:
329
- data[i]["confidence"] = new_confidence
330
- data[i]["last_validated"] = datetime.now(timezone.utc).isoformat()
331
- self._write_json(self._files["heuristics"], data)
332
- return True
333
-
334
- return False
335
-
336
- def update_knowledge_confidence(
337
- self,
338
- knowledge_id: str,
339
- new_confidence: float,
340
- ) -> bool:
341
- """Update domain knowledge confidence score."""
342
- data = self._read_json(self._files["domain_knowledge"])
343
-
344
- for i, record in enumerate(data):
345
- if record.get("id") == knowledge_id:
346
- data[i]["confidence"] = new_confidence
347
- data[i]["last_verified"] = datetime.now(timezone.utc).isoformat()
348
- self._write_json(self._files["domain_knowledge"], data)
349
- return True
350
-
351
- return False
352
-
353
- # ==================== DELETE OPERATIONS ====================
354
-
355
- def delete_heuristic(self, heuristic_id: str) -> bool:
356
- """Delete a single heuristic by ID."""
357
- data = self._read_json(self._files["heuristics"])
358
- original_count = len(data)
359
-
360
- filtered = [r for r in data if r.get("id") != heuristic_id]
361
- self._write_json(self._files["heuristics"], filtered)
362
-
363
- deleted = original_count != len(filtered)
364
- if deleted:
365
- logger.debug(f"Deleted heuristic: {heuristic_id}")
366
- return deleted
367
-
368
- def delete_outcome(self, outcome_id: str) -> bool:
369
- """Delete a single outcome by ID."""
370
- data = self._read_json(self._files["outcomes"])
371
- original_count = len(data)
372
-
373
- filtered = [r for r in data if r.get("id") != outcome_id]
374
- self._write_json(self._files["outcomes"], filtered)
375
-
376
- deleted = original_count != len(filtered)
377
- if deleted:
378
- logger.debug(f"Deleted outcome: {outcome_id}")
379
- return deleted
380
-
381
- def delete_domain_knowledge(self, knowledge_id: str) -> bool:
382
- """Delete a single domain knowledge entry by ID."""
383
- data = self._read_json(self._files["domain_knowledge"])
384
- original_count = len(data)
385
-
386
- filtered = [r for r in data if r.get("id") != knowledge_id]
387
- self._write_json(self._files["domain_knowledge"], filtered)
388
-
389
- deleted = original_count != len(filtered)
390
- if deleted:
391
- logger.debug(f"Deleted domain knowledge: {knowledge_id}")
392
- return deleted
393
-
394
- def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
395
- """Delete a single anti-pattern by ID."""
396
- data = self._read_json(self._files["anti_patterns"])
397
- original_count = len(data)
398
-
399
- filtered = [r for r in data if r.get("id") != anti_pattern_id]
400
- self._write_json(self._files["anti_patterns"], filtered)
401
-
402
- deleted = original_count != len(filtered)
403
- if deleted:
404
- logger.debug(f"Deleted anti-pattern: {anti_pattern_id}")
405
- return deleted
406
-
407
- def delete_outcomes_older_than(
408
- self,
409
- project_id: str,
410
- older_than: datetime,
411
- agent: Optional[str] = None,
412
- ) -> int:
413
- """Delete old outcomes."""
414
- data = self._read_json(self._files["outcomes"])
415
- original_count = len(data)
416
-
417
- filtered = []
418
- for record in data:
419
- if record.get("project_id") != project_id:
420
- filtered.append(record)
421
- continue
422
- if agent and record.get("agent") != agent:
423
- filtered.append(record)
424
- continue
425
-
426
- timestamp = self._parse_datetime(record.get("timestamp"))
427
- if timestamp and timestamp >= older_than:
428
- filtered.append(record)
429
-
430
- self._write_json(self._files["outcomes"], filtered)
431
- deleted = original_count - len(filtered)
432
- logger.info(f"Deleted {deleted} old outcomes")
433
- return deleted
434
-
435
- def delete_low_confidence_heuristics(
436
- self,
437
- project_id: str,
438
- below_confidence: float,
439
- agent: Optional[str] = None,
440
- ) -> int:
441
- """Delete low-confidence heuristics."""
442
- data = self._read_json(self._files["heuristics"])
443
- original_count = len(data)
444
-
445
- filtered = []
446
- for record in data:
447
- if record.get("project_id") != project_id:
448
- filtered.append(record)
449
- continue
450
- if agent and record.get("agent") != agent:
451
- filtered.append(record)
452
- continue
453
-
454
- if record.get("confidence", 0) >= below_confidence:
455
- filtered.append(record)
456
-
457
- self._write_json(self._files["heuristics"], filtered)
458
- deleted = original_count - len(filtered)
459
- logger.info(f"Deleted {deleted} low-confidence heuristics")
460
- return deleted
461
-
462
- # ==================== STATS ====================
463
-
464
- def get_stats(
465
- self,
466
- project_id: str,
467
- agent: Optional[str] = None,
468
- ) -> Dict[str, Any]:
469
- """Get memory statistics."""
470
- stats = {
471
- "project_id": project_id,
472
- "agent": agent,
473
- "heuristics_count": 0,
474
- "outcomes_count": 0,
475
- "preferences_count": 0,
476
- "domain_knowledge_count": 0,
477
- "anti_patterns_count": 0,
478
- }
479
-
480
- for name, file_path in self._files.items():
481
- data = self._read_json(file_path)
482
- count = 0
483
- for record in data:
484
- if name == "preferences":
485
- # Preferences don't have project_id
486
- count += 1
487
- elif record.get("project_id") == project_id:
488
- if agent is None or record.get("agent") == agent:
489
- count += 1
490
- stats[f"{name}_count"] = count
491
-
492
- stats["total_count"] = sum(stats[k] for k in stats if k.endswith("_count"))
493
-
494
- return stats
495
-
496
- # ==================== HELPERS ====================
497
-
498
- def _read_json(self, file_path: Path) -> List[Dict]:
499
- """Read JSON file."""
500
- try:
501
- with open(file_path, "r") as f:
502
- return json.load(f)
503
- except (json.JSONDecodeError, FileNotFoundError):
504
- return []
505
-
506
- def _write_json(self, file_path: Path, data: List[Dict]):
507
- """Write JSON file."""
508
- with open(file_path, "w") as f:
509
- json.dump(data, f, indent=2, default=str)
510
-
511
- def _to_dict(self, obj: Any) -> Dict:
512
- """Convert dataclass to dict with datetime handling."""
513
- if hasattr(obj, "__dataclass_fields__"):
514
- result = {}
515
- for field_name in obj.__dataclass_fields__:
516
- value = getattr(obj, field_name)
517
- if isinstance(value, datetime):
518
- result[field_name] = value.isoformat()
519
- elif value is not None:
520
- result[field_name] = value
521
- return result
522
- return dict(obj)
523
-
524
- def _parse_datetime(self, value: Any) -> Optional[datetime]:
525
- """Parse datetime from string or return as-is."""
526
- if value is None:
527
- return None
528
- if isinstance(value, datetime):
529
- return value
530
- try:
531
- return datetime.fromisoformat(value.replace("Z", "+00:00"))
532
- except (ValueError, AttributeError):
533
- return None
534
-
535
- def _to_heuristic(self, record: Dict) -> Heuristic:
536
- """Convert dict to Heuristic."""
537
- return Heuristic(
538
- id=record["id"],
539
- agent=record["agent"],
540
- project_id=record["project_id"],
541
- condition=record["condition"],
542
- strategy=record["strategy"],
543
- confidence=record.get("confidence", 0.0),
544
- occurrence_count=record.get("occurrence_count", 0),
545
- success_count=record.get("success_count", 0),
546
- last_validated=self._parse_datetime(record.get("last_validated"))
547
- or datetime.now(timezone.utc),
548
- created_at=self._parse_datetime(record.get("created_at"))
549
- or datetime.now(timezone.utc),
550
- embedding=record.get("embedding"),
551
- metadata=record.get("metadata", {}),
552
- )
553
-
554
- def _to_outcome(self, record: Dict) -> Outcome:
555
- """Convert dict to Outcome."""
556
- return Outcome(
557
- id=record["id"],
558
- agent=record["agent"],
559
- project_id=record["project_id"],
560
- task_type=record.get("task_type", "general"),
561
- task_description=record["task_description"],
562
- success=record.get("success", False),
563
- strategy_used=record.get("strategy_used", ""),
564
- duration_ms=record.get("duration_ms"),
565
- error_message=record.get("error_message"),
566
- user_feedback=record.get("user_feedback"),
567
- timestamp=self._parse_datetime(record.get("timestamp"))
568
- or datetime.now(timezone.utc),
569
- embedding=record.get("embedding"),
570
- metadata=record.get("metadata", {}),
571
- )
572
-
573
- def _to_user_preference(self, record: Dict) -> UserPreference:
574
- """Convert dict to UserPreference."""
575
- return UserPreference(
576
- id=record["id"],
577
- user_id=record["user_id"],
578
- category=record.get("category", "general"),
579
- preference=record["preference"],
580
- source=record.get("source", "unknown"),
581
- confidence=record.get("confidence", 1.0),
582
- timestamp=self._parse_datetime(record.get("timestamp"))
583
- or datetime.now(timezone.utc),
584
- metadata=record.get("metadata", {}),
585
- )
586
-
587
- def _to_domain_knowledge(self, record: Dict) -> DomainKnowledge:
588
- """Convert dict to DomainKnowledge."""
589
- return DomainKnowledge(
590
- id=record["id"],
591
- agent=record["agent"],
592
- project_id=record["project_id"],
593
- domain=record.get("domain", "general"),
594
- fact=record["fact"],
595
- source=record.get("source", "unknown"),
596
- confidence=record.get("confidence", 1.0),
597
- last_verified=self._parse_datetime(record.get("last_verified"))
598
- or datetime.now(timezone.utc),
599
- embedding=record.get("embedding"),
600
- metadata=record.get("metadata", {}),
601
- )
602
-
603
- def _to_anti_pattern(self, record: Dict) -> AntiPattern:
604
- """Convert dict to AntiPattern."""
605
- return AntiPattern(
606
- id=record["id"],
607
- agent=record["agent"],
608
- project_id=record["project_id"],
609
- pattern=record["pattern"],
610
- why_bad=record.get("why_bad", ""),
611
- better_alternative=record.get("better_alternative", ""),
612
- occurrence_count=record.get("occurrence_count", 1),
613
- last_seen=self._parse_datetime(record.get("last_seen"))
614
- or datetime.now(timezone.utc),
615
- created_at=self._parse_datetime(record.get("created_at"))
616
- or datetime.now(timezone.utc),
617
- embedding=record.get("embedding"),
618
- metadata=record.get("metadata", {}),
619
- )
1
+ """
2
+ ALMA File-Based Storage Backend.
3
+
4
+ Simple JSON file storage for testing and fallback scenarios.
5
+ No vector search - uses basic text matching for retrieval.
6
+ """
7
+
8
+ import json
9
+ import logging
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Optional
13
+
14
+ from alma.storage.base import StorageBackend
15
+ from alma.storage.constants import MemoryType
16
+ from alma.types import (
17
+ AntiPattern,
18
+ DomainKnowledge,
19
+ Heuristic,
20
+ Outcome,
21
+ UserPreference,
22
+ )
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ class FileBasedStorage(StorageBackend):
28
+ """
29
+ File-based storage using JSON files.
30
+
31
+ Structure:
32
+ .alma/
33
+ ├── heuristics.json
34
+ ├── outcomes.json
35
+ ├── preferences.json
36
+ ├── domain_knowledge.json
37
+ └── anti_patterns.json
38
+
39
+ Note: This backend does NOT support vector search.
40
+ Use SQLiteStorage or AzureCosmosStorage for semantic retrieval.
41
+ """
42
+
43
+ def __init__(self, storage_dir: Path):
44
+ """
45
+ Initialize file-based storage.
46
+
47
+ Args:
48
+ storage_dir: Directory to store JSON files
49
+ """
50
+ self.storage_dir = Path(storage_dir)
51
+ self.storage_dir.mkdir(parents=True, exist_ok=True)
52
+
53
+ # File paths (using canonical memory type names)
54
+ self._files = {mt: self.storage_dir / f"{mt}.json" for mt in MemoryType.ALL}
55
+
56
+ # Initialize empty files if they don't exist
57
+ for file_path in self._files.values():
58
+ if not file_path.exists():
59
+ self._write_json(file_path, [])
60
+
61
+ @classmethod
62
+ def from_config(cls, config: Dict[str, Any]) -> "FileBasedStorage":
63
+ """Create instance from configuration."""
64
+ storage_dir = config.get("storage_dir", ".alma")
65
+ return cls(storage_dir=Path(storage_dir))
66
+
67
+ # ==================== WRITE OPERATIONS ====================
68
+
69
+ def save_heuristic(self, heuristic: Heuristic) -> str:
70
+ """Save a heuristic (UPSERT - update if exists, insert if new)."""
71
+ data = self._read_json(self._files["heuristics"])
72
+ record = self._to_dict(heuristic)
73
+ # Find and replace existing, or append new
74
+ found = False
75
+ for i, existing in enumerate(data):
76
+ if existing.get("id") == record["id"]:
77
+ data[i] = record
78
+ found = True
79
+ break
80
+ if not found:
81
+ data.append(record)
82
+ self._write_json(self._files["heuristics"], data)
83
+ logger.debug(f"Saved heuristic: {heuristic.id}")
84
+ return heuristic.id
85
+
86
+ def save_outcome(self, outcome: Outcome) -> str:
87
+ """Save an outcome (UPSERT - update if exists, insert if new)."""
88
+ data = self._read_json(self._files["outcomes"])
89
+ record = self._to_dict(outcome)
90
+ # Find and replace existing, or append new
91
+ found = False
92
+ for i, existing in enumerate(data):
93
+ if existing.get("id") == record["id"]:
94
+ data[i] = record
95
+ found = True
96
+ break
97
+ if not found:
98
+ data.append(record)
99
+ self._write_json(self._files["outcomes"], data)
100
+ logger.debug(f"Saved outcome: {outcome.id}")
101
+ return outcome.id
102
+
103
+ def save_user_preference(self, preference: UserPreference) -> str:
104
+ """Save a user preference (UPSERT - update if exists, insert if new)."""
105
+ data = self._read_json(self._files["preferences"])
106
+ record = self._to_dict(preference)
107
+ # Find and replace existing, or append new
108
+ found = False
109
+ for i, existing in enumerate(data):
110
+ if existing.get("id") == record["id"]:
111
+ data[i] = record
112
+ found = True
113
+ break
114
+ if not found:
115
+ data.append(record)
116
+ self._write_json(self._files["preferences"], data)
117
+ logger.debug(f"Saved preference: {preference.id}")
118
+ return preference.id
119
+
120
+ def save_domain_knowledge(self, knowledge: DomainKnowledge) -> str:
121
+ """Save domain knowledge (UPSERT - update if exists, insert if new)."""
122
+ data = self._read_json(self._files["domain_knowledge"])
123
+ record = self._to_dict(knowledge)
124
+ # Find and replace existing, or append new
125
+ found = False
126
+ for i, existing in enumerate(data):
127
+ if existing.get("id") == record["id"]:
128
+ data[i] = record
129
+ found = True
130
+ break
131
+ if not found:
132
+ data.append(record)
133
+ self._write_json(self._files["domain_knowledge"], data)
134
+ logger.debug(f"Saved domain knowledge: {knowledge.id}")
135
+ return knowledge.id
136
+
137
+ def save_anti_pattern(self, anti_pattern: AntiPattern) -> str:
138
+ """Save an anti-pattern (UPSERT - update if exists, insert if new)."""
139
+ data = self._read_json(self._files["anti_patterns"])
140
+ record = self._to_dict(anti_pattern)
141
+ # Find and replace existing, or append new
142
+ found = False
143
+ for i, existing in enumerate(data):
144
+ if existing.get("id") == record["id"]:
145
+ data[i] = record
146
+ found = True
147
+ break
148
+ if not found:
149
+ data.append(record)
150
+ self._write_json(self._files["anti_patterns"], data)
151
+ logger.debug(f"Saved anti-pattern: {anti_pattern.id}")
152
+ return anti_pattern.id
153
+
154
+ # ==================== READ OPERATIONS ====================
155
+
156
+ def get_heuristics(
157
+ self,
158
+ project_id: str,
159
+ agent: Optional[str] = None,
160
+ embedding: Optional[List[float]] = None,
161
+ top_k: int = 5,
162
+ min_confidence: float = 0.0,
163
+ ) -> List[Heuristic]:
164
+ """Get heuristics (no vector search - returns all matching filters)."""
165
+ data = self._read_json(self._files["heuristics"])
166
+
167
+ # Filter
168
+ results = []
169
+ for record in data:
170
+ if record.get("project_id") != project_id:
171
+ continue
172
+ if agent and record.get("agent") != agent:
173
+ continue
174
+ if record.get("confidence", 0) < min_confidence:
175
+ continue
176
+ results.append(self._to_heuristic(record))
177
+
178
+ # Sort by confidence and return top_k
179
+ results.sort(key=lambda x: -x.confidence)
180
+ return results[:top_k]
181
+
182
+ def get_outcomes(
183
+ self,
184
+ project_id: str,
185
+ agent: Optional[str] = None,
186
+ task_type: Optional[str] = None,
187
+ embedding: Optional[List[float]] = None,
188
+ top_k: int = 5,
189
+ success_only: bool = False,
190
+ ) -> List[Outcome]:
191
+ """Get outcomes (no vector search)."""
192
+ data = self._read_json(self._files["outcomes"])
193
+
194
+ results = []
195
+ for record in data:
196
+ if record.get("project_id") != project_id:
197
+ continue
198
+ if agent and record.get("agent") != agent:
199
+ continue
200
+ if task_type and record.get("task_type") != task_type:
201
+ continue
202
+ if success_only and not record.get("success"):
203
+ continue
204
+ results.append(self._to_outcome(record))
205
+
206
+ # Sort by timestamp (most recent first) and return top_k
207
+ results.sort(key=lambda x: x.timestamp, reverse=True)
208
+ return results[:top_k]
209
+
210
+ def get_user_preferences(
211
+ self,
212
+ user_id: str,
213
+ category: Optional[str] = None,
214
+ ) -> List[UserPreference]:
215
+ """Get user preferences."""
216
+ data = self._read_json(self._files["preferences"])
217
+
218
+ results = []
219
+ for record in data:
220
+ if record.get("user_id") != user_id:
221
+ continue
222
+ if category and record.get("category") != category:
223
+ continue
224
+ results.append(self._to_user_preference(record))
225
+
226
+ return results
227
+
228
+ def get_domain_knowledge(
229
+ self,
230
+ project_id: str,
231
+ agent: Optional[str] = None,
232
+ domain: Optional[str] = None,
233
+ embedding: Optional[List[float]] = None,
234
+ top_k: int = 5,
235
+ ) -> List[DomainKnowledge]:
236
+ """Get domain knowledge (no vector search)."""
237
+ data = self._read_json(self._files["domain_knowledge"])
238
+
239
+ results = []
240
+ for record in data:
241
+ if record.get("project_id") != project_id:
242
+ continue
243
+ if agent and record.get("agent") != agent:
244
+ continue
245
+ if domain and record.get("domain") != domain:
246
+ continue
247
+ results.append(self._to_domain_knowledge(record))
248
+
249
+ # Sort by confidence and return top_k
250
+ results.sort(key=lambda x: -x.confidence)
251
+ return results[:top_k]
252
+
253
+ def get_anti_patterns(
254
+ self,
255
+ project_id: str,
256
+ agent: Optional[str] = None,
257
+ embedding: Optional[List[float]] = None,
258
+ top_k: int = 5,
259
+ ) -> List[AntiPattern]:
260
+ """Get anti-patterns (no vector search)."""
261
+ data = self._read_json(self._files["anti_patterns"])
262
+
263
+ results = []
264
+ for record in data:
265
+ if record.get("project_id") != project_id:
266
+ continue
267
+ if agent and record.get("agent") != agent:
268
+ continue
269
+ results.append(self._to_anti_pattern(record))
270
+
271
+ # Sort by occurrence count and return top_k
272
+ results.sort(key=lambda x: -x.occurrence_count)
273
+ return results[:top_k]
274
+
275
+ # ==================== UPDATE OPERATIONS ====================
276
+
277
+ def update_heuristic(
278
+ self,
279
+ heuristic_id: str,
280
+ updates: Dict[str, Any],
281
+ ) -> bool:
282
+ """Update a heuristic's fields."""
283
+ data = self._read_json(self._files["heuristics"])
284
+
285
+ for i, record in enumerate(data):
286
+ if record.get("id") == heuristic_id:
287
+ data[i].update(updates)
288
+ self._write_json(self._files["heuristics"], data)
289
+ return True
290
+
291
+ return False
292
+
293
+ def increment_heuristic_occurrence(
294
+ self,
295
+ heuristic_id: str,
296
+ success: bool,
297
+ ) -> bool:
298
+ """Increment heuristic occurrence count."""
299
+ data = self._read_json(self._files["heuristics"])
300
+
301
+ for i, record in enumerate(data):
302
+ if record.get("id") == heuristic_id:
303
+ data[i]["occurrence_count"] = record.get("occurrence_count", 0) + 1
304
+ if success:
305
+ data[i]["success_count"] = record.get("success_count", 0) + 1
306
+ data[i]["last_validated"] = datetime.now(timezone.utc).isoformat()
307
+ self._write_json(self._files["heuristics"], data)
308
+ return True
309
+
310
+ return False
311
+
312
+ # ==================== UPDATE CONFIDENCE OPERATIONS ====================
313
+
314
+ def update_heuristic_confidence(
315
+ self,
316
+ heuristic_id: str,
317
+ new_confidence: float,
318
+ ) -> bool:
319
+ """Update a heuristic's confidence score."""
320
+ data = self._read_json(self._files["heuristics"])
321
+
322
+ for i, record in enumerate(data):
323
+ if record.get("id") == heuristic_id:
324
+ data[i]["confidence"] = new_confidence
325
+ data[i]["last_validated"] = datetime.now(timezone.utc).isoformat()
326
+ self._write_json(self._files["heuristics"], data)
327
+ return True
328
+
329
+ return False
330
+
331
+ def update_knowledge_confidence(
332
+ self,
333
+ knowledge_id: str,
334
+ new_confidence: float,
335
+ ) -> bool:
336
+ """Update domain knowledge confidence score."""
337
+ data = self._read_json(self._files["domain_knowledge"])
338
+
339
+ for i, record in enumerate(data):
340
+ if record.get("id") == knowledge_id:
341
+ data[i]["confidence"] = new_confidence
342
+ data[i]["last_verified"] = datetime.now(timezone.utc).isoformat()
343
+ self._write_json(self._files["domain_knowledge"], data)
344
+ return True
345
+
346
+ return False
347
+
348
+ # ==================== DELETE OPERATIONS ====================
349
+
350
+ def delete_heuristic(self, heuristic_id: str) -> bool:
351
+ """Delete a single heuristic by ID."""
352
+ data = self._read_json(self._files["heuristics"])
353
+ original_count = len(data)
354
+
355
+ filtered = [r for r in data if r.get("id") != heuristic_id]
356
+ self._write_json(self._files["heuristics"], filtered)
357
+
358
+ deleted = original_count != len(filtered)
359
+ if deleted:
360
+ logger.debug(f"Deleted heuristic: {heuristic_id}")
361
+ return deleted
362
+
363
+ def delete_outcome(self, outcome_id: str) -> bool:
364
+ """Delete a single outcome by ID."""
365
+ data = self._read_json(self._files["outcomes"])
366
+ original_count = len(data)
367
+
368
+ filtered = [r for r in data if r.get("id") != outcome_id]
369
+ self._write_json(self._files["outcomes"], filtered)
370
+
371
+ deleted = original_count != len(filtered)
372
+ if deleted:
373
+ logger.debug(f"Deleted outcome: {outcome_id}")
374
+ return deleted
375
+
376
+ def delete_domain_knowledge(self, knowledge_id: str) -> bool:
377
+ """Delete a single domain knowledge entry by ID."""
378
+ data = self._read_json(self._files["domain_knowledge"])
379
+ original_count = len(data)
380
+
381
+ filtered = [r for r in data if r.get("id") != knowledge_id]
382
+ self._write_json(self._files["domain_knowledge"], filtered)
383
+
384
+ deleted = original_count != len(filtered)
385
+ if deleted:
386
+ logger.debug(f"Deleted domain knowledge: {knowledge_id}")
387
+ return deleted
388
+
389
+ def delete_anti_pattern(self, anti_pattern_id: str) -> bool:
390
+ """Delete a single anti-pattern by ID."""
391
+ data = self._read_json(self._files["anti_patterns"])
392
+ original_count = len(data)
393
+
394
+ filtered = [r for r in data if r.get("id") != anti_pattern_id]
395
+ self._write_json(self._files["anti_patterns"], filtered)
396
+
397
+ deleted = original_count != len(filtered)
398
+ if deleted:
399
+ logger.debug(f"Deleted anti-pattern: {anti_pattern_id}")
400
+ return deleted
401
+
402
+ def delete_outcomes_older_than(
403
+ self,
404
+ project_id: str,
405
+ older_than: datetime,
406
+ agent: Optional[str] = None,
407
+ ) -> int:
408
+ """Delete old outcomes."""
409
+ data = self._read_json(self._files["outcomes"])
410
+ original_count = len(data)
411
+
412
+ filtered = []
413
+ for record in data:
414
+ if record.get("project_id") != project_id:
415
+ filtered.append(record)
416
+ continue
417
+ if agent and record.get("agent") != agent:
418
+ filtered.append(record)
419
+ continue
420
+
421
+ timestamp = self._parse_datetime(record.get("timestamp"))
422
+ if timestamp and timestamp >= older_than:
423
+ filtered.append(record)
424
+
425
+ self._write_json(self._files["outcomes"], filtered)
426
+ deleted = original_count - len(filtered)
427
+ logger.info(f"Deleted {deleted} old outcomes")
428
+ return deleted
429
+
430
+ def delete_low_confidence_heuristics(
431
+ self,
432
+ project_id: str,
433
+ below_confidence: float,
434
+ agent: Optional[str] = None,
435
+ ) -> int:
436
+ """Delete low-confidence heuristics."""
437
+ data = self._read_json(self._files["heuristics"])
438
+ original_count = len(data)
439
+
440
+ filtered = []
441
+ for record in data:
442
+ if record.get("project_id") != project_id:
443
+ filtered.append(record)
444
+ continue
445
+ if agent and record.get("agent") != agent:
446
+ filtered.append(record)
447
+ continue
448
+
449
+ if record.get("confidence", 0) >= below_confidence:
450
+ filtered.append(record)
451
+
452
+ self._write_json(self._files["heuristics"], filtered)
453
+ deleted = original_count - len(filtered)
454
+ logger.info(f"Deleted {deleted} low-confidence heuristics")
455
+ return deleted
456
+
457
+ # ==================== STATS ====================
458
+
459
+ def get_stats(
460
+ self,
461
+ project_id: str,
462
+ agent: Optional[str] = None,
463
+ ) -> Dict[str, Any]:
464
+ """Get memory statistics."""
465
+ stats = {
466
+ "project_id": project_id,
467
+ "agent": agent,
468
+ "heuristics_count": 0,
469
+ "outcomes_count": 0,
470
+ "preferences_count": 0,
471
+ "domain_knowledge_count": 0,
472
+ "anti_patterns_count": 0,
473
+ }
474
+
475
+ for name, file_path in self._files.items():
476
+ data = self._read_json(file_path)
477
+ count = 0
478
+ for record in data:
479
+ if name == "preferences":
480
+ # Preferences don't have project_id
481
+ count += 1
482
+ elif record.get("project_id") == project_id:
483
+ if agent is None or record.get("agent") == agent:
484
+ count += 1
485
+ stats[f"{name}_count"] = count
486
+
487
+ stats["total_count"] = sum(stats[k] for k in stats if k.endswith("_count"))
488
+
489
+ return stats
490
+
491
+ # ==================== HELPERS ====================
492
+
493
+ def _read_json(self, file_path: Path) -> List[Dict]:
494
+ """Read JSON file."""
495
+ try:
496
+ with open(file_path, "r") as f:
497
+ return json.load(f)
498
+ except (json.JSONDecodeError, FileNotFoundError):
499
+ return []
500
+
501
+ def _write_json(self, file_path: Path, data: List[Dict]):
502
+ """Write JSON file."""
503
+ with open(file_path, "w") as f:
504
+ json.dump(data, f, indent=2, default=str)
505
+
506
+ def _to_dict(self, obj: Any) -> Dict:
507
+ """Convert dataclass to dict with datetime handling."""
508
+ if hasattr(obj, "__dataclass_fields__"):
509
+ result = {}
510
+ for field_name in obj.__dataclass_fields__:
511
+ value = getattr(obj, field_name)
512
+ if isinstance(value, datetime):
513
+ result[field_name] = value.isoformat()
514
+ elif value is not None:
515
+ result[field_name] = value
516
+ return result
517
+ return dict(obj)
518
+
519
+ def _parse_datetime(self, value: Any) -> Optional[datetime]:
520
+ """Parse datetime from string or return as-is."""
521
+ if value is None:
522
+ return None
523
+ if isinstance(value, datetime):
524
+ return value
525
+ try:
526
+ return datetime.fromisoformat(value.replace("Z", "+00:00"))
527
+ except (ValueError, AttributeError):
528
+ return None
529
+
530
+ def _to_heuristic(self, record: Dict) -> Heuristic:
531
+ """Convert dict to Heuristic."""
532
+ return Heuristic(
533
+ id=record["id"],
534
+ agent=record["agent"],
535
+ project_id=record["project_id"],
536
+ condition=record["condition"],
537
+ strategy=record["strategy"],
538
+ confidence=record.get("confidence", 0.0),
539
+ occurrence_count=record.get("occurrence_count", 0),
540
+ success_count=record.get("success_count", 0),
541
+ last_validated=self._parse_datetime(record.get("last_validated"))
542
+ or datetime.now(timezone.utc),
543
+ created_at=self._parse_datetime(record.get("created_at"))
544
+ or datetime.now(timezone.utc),
545
+ embedding=record.get("embedding"),
546
+ metadata=record.get("metadata", {}),
547
+ )
548
+
549
+ def _to_outcome(self, record: Dict) -> Outcome:
550
+ """Convert dict to Outcome."""
551
+ return Outcome(
552
+ id=record["id"],
553
+ agent=record["agent"],
554
+ project_id=record["project_id"],
555
+ task_type=record.get("task_type", "general"),
556
+ task_description=record["task_description"],
557
+ success=record.get("success", False),
558
+ strategy_used=record.get("strategy_used", ""),
559
+ duration_ms=record.get("duration_ms"),
560
+ error_message=record.get("error_message"),
561
+ user_feedback=record.get("user_feedback"),
562
+ timestamp=self._parse_datetime(record.get("timestamp"))
563
+ or datetime.now(timezone.utc),
564
+ embedding=record.get("embedding"),
565
+ metadata=record.get("metadata", {}),
566
+ )
567
+
568
+ def _to_user_preference(self, record: Dict) -> UserPreference:
569
+ """Convert dict to UserPreference."""
570
+ return UserPreference(
571
+ id=record["id"],
572
+ user_id=record["user_id"],
573
+ category=record.get("category", "general"),
574
+ preference=record["preference"],
575
+ source=record.get("source", "unknown"),
576
+ confidence=record.get("confidence", 1.0),
577
+ timestamp=self._parse_datetime(record.get("timestamp"))
578
+ or datetime.now(timezone.utc),
579
+ metadata=record.get("metadata", {}),
580
+ )
581
+
582
+ def _to_domain_knowledge(self, record: Dict) -> DomainKnowledge:
583
+ """Convert dict to DomainKnowledge."""
584
+ return DomainKnowledge(
585
+ id=record["id"],
586
+ agent=record["agent"],
587
+ project_id=record["project_id"],
588
+ domain=record.get("domain", "general"),
589
+ fact=record["fact"],
590
+ source=record.get("source", "unknown"),
591
+ confidence=record.get("confidence", 1.0),
592
+ last_verified=self._parse_datetime(record.get("last_verified"))
593
+ or datetime.now(timezone.utc),
594
+ embedding=record.get("embedding"),
595
+ metadata=record.get("metadata", {}),
596
+ )
597
+
598
+ def _to_anti_pattern(self, record: Dict) -> AntiPattern:
599
+ """Convert dict to AntiPattern."""
600
+ return AntiPattern(
601
+ id=record["id"],
602
+ agent=record["agent"],
603
+ project_id=record["project_id"],
604
+ pattern=record["pattern"],
605
+ why_bad=record.get("why_bad", ""),
606
+ better_alternative=record.get("better_alternative", ""),
607
+ occurrence_count=record.get("occurrence_count", 1),
608
+ last_seen=self._parse_datetime(record.get("last_seen"))
609
+ or datetime.now(timezone.utc),
610
+ created_at=self._parse_datetime(record.get("created_at"))
611
+ or datetime.now(timezone.utc),
612
+ embedding=record.get("embedding"),
613
+ metadata=record.get("metadata", {}),
614
+ )