agno 2.3.24__py3-none-any.whl → 2.3.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. agno/agent/agent.py +297 -11
  2. agno/db/base.py +214 -0
  3. agno/db/dynamo/dynamo.py +47 -0
  4. agno/db/firestore/firestore.py +47 -0
  5. agno/db/gcs_json/gcs_json_db.py +47 -0
  6. agno/db/in_memory/in_memory_db.py +47 -0
  7. agno/db/json/json_db.py +47 -0
  8. agno/db/mongo/async_mongo.py +229 -0
  9. agno/db/mongo/mongo.py +47 -0
  10. agno/db/mongo/schemas.py +16 -0
  11. agno/db/mysql/async_mysql.py +47 -0
  12. agno/db/mysql/mysql.py +47 -0
  13. agno/db/postgres/async_postgres.py +231 -0
  14. agno/db/postgres/postgres.py +239 -0
  15. agno/db/postgres/schemas.py +19 -0
  16. agno/db/redis/redis.py +47 -0
  17. agno/db/singlestore/singlestore.py +47 -0
  18. agno/db/sqlite/async_sqlite.py +242 -0
  19. agno/db/sqlite/schemas.py +18 -0
  20. agno/db/sqlite/sqlite.py +239 -0
  21. agno/db/surrealdb/surrealdb.py +47 -0
  22. agno/knowledge/chunking/code.py +90 -0
  23. agno/knowledge/chunking/document.py +62 -2
  24. agno/knowledge/chunking/strategy.py +14 -0
  25. agno/knowledge/knowledge.py +7 -1
  26. agno/knowledge/reader/arxiv_reader.py +1 -0
  27. agno/knowledge/reader/csv_reader.py +1 -0
  28. agno/knowledge/reader/docx_reader.py +1 -0
  29. agno/knowledge/reader/firecrawl_reader.py +1 -0
  30. agno/knowledge/reader/json_reader.py +1 -0
  31. agno/knowledge/reader/markdown_reader.py +1 -0
  32. agno/knowledge/reader/pdf_reader.py +1 -0
  33. agno/knowledge/reader/pptx_reader.py +1 -0
  34. agno/knowledge/reader/s3_reader.py +1 -0
  35. agno/knowledge/reader/tavily_reader.py +1 -0
  36. agno/knowledge/reader/text_reader.py +1 -0
  37. agno/knowledge/reader/web_search_reader.py +1 -0
  38. agno/knowledge/reader/website_reader.py +1 -0
  39. agno/knowledge/reader/wikipedia_reader.py +1 -0
  40. agno/knowledge/reader/youtube_reader.py +1 -0
  41. agno/knowledge/utils.py +1 -0
  42. agno/learn/__init__.py +65 -0
  43. agno/learn/config.py +463 -0
  44. agno/learn/curate.py +185 -0
  45. agno/learn/machine.py +690 -0
  46. agno/learn/schemas.py +1043 -0
  47. agno/learn/stores/__init__.py +35 -0
  48. agno/learn/stores/entity_memory.py +3275 -0
  49. agno/learn/stores/learned_knowledge.py +1583 -0
  50. agno/learn/stores/protocol.py +117 -0
  51. agno/learn/stores/session_context.py +1217 -0
  52. agno/learn/stores/user_memory.py +1495 -0
  53. agno/learn/stores/user_profile.py +1220 -0
  54. agno/learn/utils.py +209 -0
  55. agno/models/base.py +59 -0
  56. agno/os/routers/knowledge/knowledge.py +7 -0
  57. agno/tools/browserbase.py +78 -6
  58. agno/tools/google_bigquery.py +11 -2
  59. agno/utils/agent.py +30 -1
  60. {agno-2.3.24.dist-info → agno-2.3.25.dist-info}/METADATA +24 -2
  61. {agno-2.3.24.dist-info → agno-2.3.25.dist-info}/RECORD +64 -50
  62. {agno-2.3.24.dist-info → agno-2.3.25.dist-info}/WHEEL +0 -0
  63. {agno-2.3.24.dist-info → agno-2.3.25.dist-info}/licenses/LICENSE +0 -0
  64. {agno-2.3.24.dist-info → agno-2.3.25.dist-info}/top_level.txt +0 -0
agno/learn/config.py ADDED
@@ -0,0 +1,463 @@
1
+ """
2
+ LearningMachine Configuration
3
+ =============================
4
+ Enums and configuration classes for the unified learning system.
5
+
6
+ Uses dataclasses instead of Pydantic BaseModels to avoid runtime
7
+ overhead and validation errors that could break agents mid-run.
8
+
9
+ Configurations:
10
+ - LearningMode: How learning is extracted (ALWAYS, AGENTIC, PROPOSE, HITL)
11
+ - UserProfileConfig: Config for user profile learning
12
+ - MemoriesConfig: Config for memories learning
13
+ - SessionContextConfig: Config for session context learning
14
+ - LearnedKnowledgeConfig: Config for learned knowledge
15
+ - EntityMemoryConfig: Config for entity memory
16
+ """
17
+
18
+ from dataclasses import dataclass
19
+ from enum import Enum
20
+ from typing import TYPE_CHECKING, Any, Optional, Type, Union
21
+
22
+ if TYPE_CHECKING:
23
+ from agno.db.base import AsyncBaseDb, BaseDb
24
+ from agno.models.base import Model
25
+
26
+
27
+ # =============================================================================
28
+ # Enums
29
+ # =============================================================================
30
+
31
+
32
+ class LearningMode(Enum):
33
+ """How learning is extracted and saved.
34
+
35
+ ALWAYS: Automatic extraction after each response.
36
+ AGENTIC: Agent decides when to learn via tools.
37
+ PROPOSE: Agent proposes, human confirms.
38
+ HITL (Human-in-the-Loop): Reserved for future use.
39
+ """
40
+
41
+ ALWAYS = "always"
42
+ AGENTIC = "agentic"
43
+ PROPOSE = "propose"
44
+ HITL = "hitl"
45
+
46
+
47
+ # =============================================================================
48
+ # Learning Type Configurations
49
+ # =============================================================================
50
+
51
+
52
+ @dataclass
53
+ class UserProfileConfig:
54
+ """Configuration for User Profile learning type.
55
+
56
+ UserProfile stores long-term structured profile fields about users:
57
+ name, preferred_name, and custom fields from extended schemas.
58
+ Updated via `update_profile` tool.
59
+
60
+ Note: For unstructured memories, use UserMemoryConfig instead.
61
+
62
+ Scope: USER (fixed) - Retrieved and stored by user_id.
63
+
64
+ Attributes:
65
+ db: Database backend for storage.
66
+ model: Model for extraction (required for ALWAYS mode).
67
+ mode: How learning is extracted. Default: ALWAYS.
68
+ schema: Custom schema for user profile data. Default: UserProfile.
69
+
70
+ # Extraction operations
71
+ enable_update_profile: Allow updating profile fields (name, etc).
72
+
73
+ # Agent tools
74
+ enable_agent_tools: Expose tools to the agent.
75
+ agent_can_update_profile: If agent_tools enabled, provide update_user_profile tool.
76
+
77
+ # Prompt customization
78
+ instructions: Custom instructions for what to capture.
79
+ additional_instructions: Extra instructions appended to default.
80
+ system_message: Full override for extraction system message.
81
+ """
82
+
83
+ # Required fields
84
+ db: Optional[Union["BaseDb", "AsyncBaseDb"]] = None
85
+ model: Optional["Model"] = None
86
+
87
+ # Mode and extraction
88
+ mode: LearningMode = LearningMode.ALWAYS
89
+ schema: Optional[Type[Any]] = None
90
+
91
+ # Extraction operations
92
+ enable_update_profile: bool = True # Allow updating profile fields
93
+
94
+ # Agent tools
95
+ enable_agent_tools: bool = False
96
+ agent_can_update_profile: bool = True
97
+
98
+ # Prompt customization
99
+ instructions: Optional[str] = None
100
+ additional_instructions: Optional[str] = None
101
+ system_message: Optional[str] = None
102
+
103
+ def __repr__(self) -> str:
104
+ return f"UserProfileConfig(mode={self.mode.value}, enable_agent_tools={self.enable_agent_tools})"
105
+
106
+
107
+ @dataclass
108
+ class UserMemoryConfig:
109
+ """Configuration for User Memory learning type.
110
+
111
+ User Memory stores unstructured observations about users that don't fit
112
+ into structured profile fields. These are long-term memories that
113
+ persist across sessions.
114
+
115
+ Scope: USER (fixed) - Retrieved and stored by user_id.
116
+
117
+ Attributes:
118
+ db: Database backend for storage.
119
+ model: Model for extraction (required for ALWAYS mode).
120
+ mode: How learning is extracted. Default: ALWAYS.
121
+ schema: Custom schema for memories data. Default: Memories.
122
+
123
+ # Extraction operations
124
+ enable_add_memory: Allow adding new memories during extraction.
125
+ enable_update_memory: Allow updating existing memories.
126
+ enable_delete_memory: Allow deleting memories.
127
+ enable_clear_memories: Allow clearing all memories (dangerous).
128
+
129
+ # Agent tools
130
+ enable_agent_tools: Expose tools to the agent.
131
+ agent_can_update_memories: If agent_tools enabled, provide update_user_memory tool.
132
+
133
+ # Prompt customization
134
+ instructions: Custom instructions for what to capture.
135
+ additional_instructions: Extra instructions appended to default.
136
+ system_message: Full override for extraction system message.
137
+ """
138
+
139
+ # Required fields
140
+ db: Optional[Union["BaseDb", "AsyncBaseDb"]] = None
141
+ model: Optional["Model"] = None
142
+
143
+ # Mode and extraction
144
+ mode: LearningMode = LearningMode.ALWAYS
145
+ schema: Optional[Type[Any]] = None
146
+
147
+ # Extraction operations
148
+ enable_add_memory: bool = True
149
+ enable_update_memory: bool = True
150
+ enable_delete_memory: bool = True
151
+ enable_clear_memories: bool = False # Dangerous - disabled by default
152
+
153
+ # Agent tools
154
+ enable_agent_tools: bool = False
155
+ agent_can_update_memories: bool = True
156
+
157
+ # Prompt customization
158
+ instructions: Optional[str] = None
159
+ additional_instructions: Optional[str] = None
160
+ system_message: Optional[str] = None
161
+
162
+ def __repr__(self) -> str:
163
+ return f"UserMemoryConfig(mode={self.mode.value}, enable_agent_tools={self.enable_agent_tools})"
164
+
165
+
166
+ # Backwards compatibility alias
167
+ MemoriesConfig = UserMemoryConfig
168
+
169
+
170
+ @dataclass
171
+ class SessionContextConfig:
172
+ """Configuration for Session Context learning type.
173
+
174
+ Session Context captures state and summary for the current session:
175
+ what's happened, goals, plans, and progress.
176
+
177
+ Scope: SESSION (fixed) - Retrieved and stored by session_id.
178
+
179
+ Key behavior: Context builds on previous context. Each extraction
180
+ receives the previous context and updates it, rather than creating
181
+ from scratch. This ensures continuity even with truncated message history.
182
+
183
+ Attributes:
184
+ db: Database backend for storage.
185
+ model: Model for extraction (required for ALWAYS mode).
186
+ mode: How learning is extracted. Default: ALWAYS.
187
+ schema: Custom schema for session context. Default: SessionContext.
188
+
189
+ # Planning mode
190
+ enable_planning: Track goal, plan, and progress (not just summary).
191
+ enable_add_context: Allow creating new context.
192
+ enable_update_context: Allow updating existing context.
193
+ enable_delete_context: Allow deleting context.
194
+ enable_clear_context: Allow clearing context.
195
+
196
+ # Prompt customization
197
+ instructions: Custom instructions for extraction.
198
+ additional_instructions: Extra instructions appended to default.
199
+ system_message: Full override for extraction system message.
200
+ """
201
+
202
+ # Required fields
203
+ db: Optional[Union["BaseDb", "AsyncBaseDb"]] = None
204
+ model: Optional["Model"] = None
205
+
206
+ # Mode and extraction
207
+ mode: LearningMode = LearningMode.ALWAYS
208
+ schema: Optional[Type[Any]] = None
209
+
210
+ # Planning mode
211
+ enable_planning: bool = False
212
+ # Extraction operations
213
+ enable_add_context: bool = True
214
+ enable_update_context: bool = True
215
+ enable_delete_context: bool = True
216
+ enable_clear_context: bool = False
217
+
218
+ # Prompt customization
219
+ instructions: Optional[str] = None
220
+ additional_instructions: Optional[str] = None
221
+ system_message: Optional[str] = None
222
+
223
+ def __repr__(self) -> str:
224
+ return f"SessionContextConfig(mode={self.mode.value}, enable_planning={self.enable_planning})"
225
+
226
+
227
+ @dataclass
228
+ class LearnedKnowledgeConfig:
229
+ """Configuration for Learned Knowledge learning type.
230
+
231
+ Learned Knowledge captures reusable insights and patterns that
232
+ can be shared across users and agents.
233
+
234
+ Scope: `namespace` + KNOWLEDGE (fixed):
235
+ - "user": Private learned knowledge per user
236
+ - "global": Shared with everyone (default)
237
+ - Custom string: Explicit grouping (e.g., "engineering", "sales_west")
238
+
239
+ IMPORTANT: A knowledge base is required for learnings to work.
240
+ Either provide it here or pass it to LearningMachine directly.
241
+
242
+ Attributes:
243
+ knowledge: Knowledge base instance (vector store) for storage.
244
+ REQUIRED - learnings cannot be saved/searched without this.
245
+ model: Model for extraction (if using ALWAYS mode).
246
+ mode: How learning is extracted. Default: AGENTIC.
247
+ schema: Custom schema for learning data. Default: LearnedKnowledge.
248
+
249
+ # Sharing boundary
250
+ namespace: Sharing boundary ("user", "global", or custom).
251
+
252
+ # Agent tools
253
+ enable_agent_tools: Expose tools to the agent.
254
+ agent_can_save: If agent_tools enabled, provide save_learning tool.
255
+ agent_can_search: If agent_tools enabled, provide search_learnings tool.
256
+
257
+ # Prompt customization
258
+ instructions: Custom instructions for what makes a good learning.
259
+ additional_instructions: Extra instructions appended to default.
260
+ system_message: Full override for extraction system message.
261
+ """
262
+
263
+ # Knowledge base - required for learnings to work
264
+ knowledge: Optional[Any] = None # agno.knowledge.Knowledge
265
+ model: Optional["Model"] = None
266
+
267
+ # Mode and extraction
268
+ mode: LearningMode = LearningMode.AGENTIC
269
+ schema: Optional[Type[Any]] = None
270
+
271
+ # Sharing boundary
272
+ namespace: str = "global"
273
+
274
+ # Agent tools
275
+ enable_agent_tools: bool = True
276
+ agent_can_save: bool = True
277
+ agent_can_search: bool = True
278
+
279
+ # Prompt customization
280
+ instructions: Optional[str] = None
281
+ additional_instructions: Optional[str] = None
282
+ system_message: Optional[str] = None
283
+
284
+ def __repr__(self) -> str:
285
+ has_knowledge = self.knowledge is not None
286
+ return f"LearnedKnowledgeConfig(mode={self.mode.value}, knowledge={has_knowledge}, enable_agent_tools={self.enable_agent_tools})"
287
+
288
+
289
+ @dataclass
290
+ class EntityMemoryConfig:
291
+ """Configuration for EntityMemory learning type.
292
+
293
+ EntityMemory stores facts about third-party entities: companies,
294
+ projects, people, systems, products, etc. Think of it as UserProfile
295
+ but for things that aren't the user.
296
+
297
+ Entities have:
298
+ - Core properties (name, description, key-value properties)
299
+ - Facts (semantic memory - "Acme uses PostgreSQL")
300
+ - Events (episodic memory - "Acme launched v2 on Jan 15")
301
+ - Relationships (graph edges - "Bob is CEO of Acme")
302
+
303
+ Scope is controlled by `namespace`:
304
+ - "user": Private entity graph per user
305
+ - "global": Shared with everyone (default)
306
+ - Custom string: Explicit grouping (e.g., "sales_west")
307
+
308
+ Attributes:
309
+ db: Database backend for storage.
310
+ model: Model for extraction (required for ALWAYS mode).
311
+ mode: How learning is extracted. Default: ALWAYS.
312
+ schema: Custom schema for entity memory data. Default: EntityMemory.
313
+
314
+ # Sharing boundary
315
+ namespace: Sharing boundary ("user", "global", or custom).
316
+
317
+ # Extraction operations
318
+ enable_create_entity: Allow creating new entities.
319
+ enable_update_entity: Allow updating entity properties.
320
+ enable_add_fact: Allow adding facts to entities.
321
+ enable_update_fact: Allow updating existing facts.
322
+ enable_delete_fact: Allow deleting facts.
323
+ enable_add_event: Allow adding events to entities.
324
+ enable_add_relationship: Allow adding relationships.
325
+
326
+ # Agent tools
327
+ enable_agent_tools: Expose tools to the agent.
328
+ agent_can_create_entity: If agent_tools enabled, provide create_entity tool.
329
+ agent_can_update_entity: If agent_tools enabled, provide update_entity tool.
330
+ agent_can_search_entities: If agent_tools enabled, provide search_entities tool.
331
+
332
+ # Prompt customization
333
+ instructions: Custom instructions for entity extraction.
334
+ additional_instructions: Extra instructions appended to default.
335
+ system_message: Full override for extraction system message.
336
+ """
337
+
338
+ # Required fields
339
+ db: Optional[Union["BaseDb", "AsyncBaseDb"]] = None
340
+ model: Optional["Model"] = None
341
+
342
+ # Mode and extraction
343
+ mode: LearningMode = LearningMode.ALWAYS
344
+ schema: Optional[Type[Any]] = None
345
+
346
+ # Sharing boundary
347
+ namespace: str = "global"
348
+
349
+ # Extraction operations
350
+ enable_create_entity: bool = True
351
+ enable_update_entity: bool = True
352
+ enable_add_fact: bool = True
353
+ enable_update_fact: bool = True
354
+ enable_delete_fact: bool = True
355
+ enable_add_event: bool = True
356
+ enable_add_relationship: bool = True
357
+
358
+ # Agent tools
359
+ enable_agent_tools: bool = False
360
+ agent_can_create_entity: bool = True
361
+ agent_can_update_entity: bool = True
362
+ agent_can_search_entities: bool = True
363
+
364
+ # Prompt customization
365
+ instructions: Optional[str] = None
366
+ additional_instructions: Optional[str] = None
367
+ system_message: Optional[str] = None
368
+
369
+ def __repr__(self) -> str:
370
+ return f"EntityMemoryConfig(mode={self.mode.value}, namespace={self.namespace}, enable_agent_tools={self.enable_agent_tools})"
371
+
372
+
373
+ # =============================================================================
374
+ # Phase 2 Configurations (Placeholders)
375
+ # =============================================================================
376
+
377
+
378
+ @dataclass
379
+ class DecisionLogConfig:
380
+ """Configuration for Decision Logs learning type.
381
+
382
+ Decision Logs record decisions made by the agent with reasoning
383
+ and context. Useful for auditing and learning from past decisions.
384
+
385
+ Scope: AGENT (fixed) - Stored and retrieved by agent_id.
386
+
387
+ Note: Deferred to Phase 2.
388
+ """
389
+
390
+ # Required fields
391
+ db: Optional[Union["BaseDb", "AsyncBaseDb"]] = None
392
+ model: Optional["Model"] = None
393
+
394
+ # Mode and extraction
395
+ mode: LearningMode = LearningMode.ALWAYS
396
+ schema: Optional[Type[Any]] = None
397
+
398
+ # Agent tools
399
+ enable_agent_tools: bool = True
400
+ agent_can_save: bool = True
401
+ agent_can_search: bool = True
402
+
403
+ # Prompt customization
404
+ system_message: Optional[str] = None
405
+ instructions: Optional[str] = None
406
+ additional_instructions: Optional[str] = None
407
+
408
+ def __repr__(self) -> str:
409
+ return f"DecisionLogConfig(mode={self.mode.value})"
410
+
411
+
412
+ @dataclass
413
+ class FeedbackConfig:
414
+ """Configuration for Behavioral Feedback learning type.
415
+
416
+ Behavioral Feedback captures signals about what worked and what
417
+ didn't: thumbs up/down, corrections, regeneration requests.
418
+
419
+ Scope: AGENT (fixed) - Stored and retrieved by agent_id.
420
+
421
+ Note: Deferred to Phase 2.
422
+ """
423
+
424
+ # Required fields
425
+ db: Optional[Union["BaseDb", "AsyncBaseDb"]] = None
426
+ model: Optional["Model"] = None
427
+
428
+ # Mode and extraction
429
+ mode: LearningMode = LearningMode.ALWAYS
430
+ schema: Optional[Type[Any]] = None
431
+
432
+ # Prompt customization
433
+ instructions: Optional[str] = None
434
+
435
+ def __repr__(self) -> str:
436
+ return "FeedbackConfig(mode=ALWAYS)"
437
+
438
+
439
+ @dataclass
440
+ class SelfImprovementConfig:
441
+ """Configuration for Self-Improvement learning type.
442
+
443
+ Self-Improvement proposes updates to agent instructions based
444
+ on feedback patterns and successful interactions.
445
+
446
+ Scope: AGENT (fixed) - Stored and retrieved by agent_id.
447
+
448
+ Note: Deferred to Phase 3.
449
+ """
450
+
451
+ # Required fields
452
+ db: Optional[Union["BaseDb", "AsyncBaseDb"]] = None
453
+ model: Optional["Model"] = None
454
+
455
+ # Mode and extraction
456
+ mode: LearningMode = LearningMode.HITL
457
+ schema: Optional[Type[Any]] = None
458
+
459
+ # Prompt customization
460
+ instructions: Optional[str] = None
461
+
462
+ def __repr__(self) -> str:
463
+ return "SelfImprovementConfig(mode=HITL)"
agno/learn/curate.py ADDED
@@ -0,0 +1,185 @@
1
+ """
2
+ Curator
3
+ =======
4
+ Memory maintenance for LearningMachine.
5
+
6
+ Keeps memories tidy through:
7
+ - Pruning: Remove old memories
8
+ - Deduplication: Remove exact/near-exact duplicates
9
+
10
+ Usage:
11
+ >>> learning = LearningMachine(db=db, model=model, user_profile=True)
12
+ >>>
13
+ >>> # Remove memories older than 90 days, keep max 100
14
+ >>> removed = learning.curator.prune(user_id="alice", max_age_days=90, max_count=100)
15
+ >>>
16
+ >>> # Remove duplicate memories
17
+ >>> deduped = learning.curator.deduplicate(user_id="alice")
18
+ """
19
+
20
+ from dataclasses import dataclass
21
+ from datetime import datetime, timedelta, timezone
22
+ from typing import Any, List
23
+
24
+ from agno.utils.log import log_debug
25
+
26
+
27
+ @dataclass
28
+ class Curator:
29
+ """Memory maintenance. Keeps things tidy.
30
+
31
+ Currently supports user_profile store only.
32
+ """
33
+
34
+ machine: Any # LearningMachine
35
+
36
+ def prune(
37
+ self,
38
+ user_id: str,
39
+ max_age_days: int = 0,
40
+ max_count: int = 0,
41
+ ) -> int:
42
+ """Remove old memories from user profile.
43
+
44
+ Args:
45
+ user_id: User to prune memories for.
46
+ max_age_days: Remove memories older than this (0 = disabled).
47
+ max_count: Keep at most this many memories (0 = disabled).
48
+
49
+ Returns:
50
+ Number of memories removed.
51
+ """
52
+ store = self.machine.stores.get("user_profile")
53
+ if not store:
54
+ return 0
55
+
56
+ profile = store.get(user_id=user_id)
57
+ if not profile or not hasattr(profile, "memories"):
58
+ return 0
59
+
60
+ memories = profile.memories
61
+ if not memories:
62
+ return 0
63
+
64
+ original_count = len(memories)
65
+
66
+ # Age filter
67
+ if max_age_days > 0:
68
+ cutoff = datetime.now(timezone.utc) - timedelta(days=max_age_days)
69
+ memories = self._filter_by_age(memories=memories, cutoff=cutoff)
70
+
71
+ # Count filter (keep newest)
72
+ if max_count > 0 and len(memories) > max_count:
73
+ memories = self._keep_newest(memories=memories, count=max_count)
74
+
75
+ removed = original_count - len(memories)
76
+
77
+ if removed > 0:
78
+ profile.memories = memories
79
+ store.save(user_id=user_id, profile=profile)
80
+ log_debug(f"Curator.prune: removed {removed} memories for user_id={user_id}")
81
+
82
+ return removed
83
+
84
+ def deduplicate(
85
+ self,
86
+ user_id: str,
87
+ ) -> int:
88
+ """Remove duplicate memories from user profile.
89
+
90
+ Uses exact and near-exact string matching.
91
+
92
+ Args:
93
+ user_id: User to deduplicate memories for.
94
+
95
+ Returns:
96
+ Number of duplicate memories removed.
97
+ """
98
+ store = self.machine.stores.get("user_profile")
99
+ if not store:
100
+ return 0
101
+
102
+ profile = store.get(user_id=user_id)
103
+ if not profile or not hasattr(profile, "memories"):
104
+ return 0
105
+
106
+ memories = profile.memories
107
+ if len(memories) < 2:
108
+ return 0
109
+
110
+ original_count = len(memories)
111
+ unique_memories = self._remove_duplicates(memories=memories)
112
+ removed = original_count - len(unique_memories)
113
+
114
+ if removed > 0:
115
+ profile.memories = unique_memories
116
+ store.save(user_id=user_id, profile=profile)
117
+ log_debug(f"Curator.deduplicate: removed {removed} duplicates for user_id={user_id}")
118
+
119
+ return removed
120
+
121
+ # =========================================================================
122
+ # Helpers
123
+ # =========================================================================
124
+
125
+ def _filter_by_age(
126
+ self,
127
+ memories: List[dict],
128
+ cutoff: datetime,
129
+ ) -> List[dict]:
130
+ """Keep memories newer than cutoff."""
131
+ result = []
132
+ for m in memories:
133
+ created_at = m.get("created_at")
134
+ if not created_at:
135
+ result.append(m) # Keep if no timestamp
136
+ continue
137
+
138
+ try:
139
+ created = datetime.fromisoformat(created_at.replace("Z", "+00:00"))
140
+ if created >= cutoff:
141
+ result.append(m)
142
+ except (ValueError, TypeError):
143
+ result.append(m) # Keep if unparseable
144
+
145
+ return result
146
+
147
+ def _keep_newest(
148
+ self,
149
+ memories: List[dict],
150
+ count: int,
151
+ ) -> List[dict]:
152
+ """Keep the N newest memories."""
153
+ sorted_memories = sorted(
154
+ memories,
155
+ key=lambda m: m.get("created_at", ""),
156
+ reverse=True,
157
+ )
158
+ return sorted_memories[:count]
159
+
160
+ def _remove_duplicates(
161
+ self,
162
+ memories: List[dict],
163
+ ) -> List[dict]:
164
+ """Remove exact and near-exact duplicate memories."""
165
+ seen = set()
166
+ unique = []
167
+
168
+ for m in memories:
169
+ content = m.get("content", "")
170
+ normalized = self._normalize(content)
171
+
172
+ if normalized not in seen:
173
+ seen.add(normalized)
174
+ unique.append(m)
175
+
176
+ return unique
177
+
178
+ def _normalize(self, text: str) -> str:
179
+ """Normalize text for comparison."""
180
+ import re
181
+
182
+ text = text.lower().strip()
183
+ text = re.sub(r"[^\w\s]", "", text)
184
+ text = re.sub(r"\s+", " ", text)
185
+ return text