flock-core 0.4.519__py3-none-any.whl → 0.5.0b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

Files changed (103) hide show
  1. flock/cli/manage_agents.py +3 -3
  2. flock/components/__init__.py +28 -0
  3. flock/components/evaluation/__init__.py +9 -0
  4. flock/components/evaluation/declarative_evaluation_component.py +198 -0
  5. flock/components/routing/__init__.py +15 -0
  6. flock/{routers/conditional/conditional_router.py → components/routing/conditional_routing_component.py} +60 -49
  7. flock/components/routing/default_routing_component.py +103 -0
  8. flock/components/routing/llm_routing_component.py +208 -0
  9. flock/components/utility/__init__.py +15 -0
  10. flock/{modules/enterprise_memory/enterprise_memory_module.py → components/utility/memory_utility_component.py} +195 -173
  11. flock/{modules/performance/metrics_module.py → components/utility/metrics_utility_component.py} +101 -86
  12. flock/{modules/output/output_module.py → components/utility/output_utility_component.py} +49 -49
  13. flock/core/__init__.py +2 -8
  14. flock/core/agent/__init__.py +16 -0
  15. flock/core/agent/flock_agent_components.py +104 -0
  16. flock/core/agent/flock_agent_execution.py +101 -0
  17. flock/core/agent/flock_agent_integration.py +147 -0
  18. flock/core/agent/flock_agent_lifecycle.py +177 -0
  19. flock/core/agent/flock_agent_serialization.py +378 -0
  20. flock/core/component/__init__.py +15 -0
  21. flock/core/{flock_module.py → component/agent_component_base.py} +136 -35
  22. flock/core/component/evaluation_component_base.py +56 -0
  23. flock/core/component/routing_component_base.py +75 -0
  24. flock/core/component/utility_component_base.py +69 -0
  25. flock/core/config/flock_agent_config.py +49 -2
  26. flock/core/evaluation/utils.py +1 -1
  27. flock/core/execution/evaluation_executor.py +1 -1
  28. flock/core/flock.py +137 -483
  29. flock/core/flock_agent.py +151 -1018
  30. flock/core/flock_factory.py +94 -73
  31. flock/core/{flock_registry.py → flock_registry.py.backup} +3 -17
  32. flock/core/logging/logging.py +1 -0
  33. flock/core/mcp/flock_mcp_server.py +42 -37
  34. flock/core/mixin/dspy_integration.py +5 -5
  35. flock/core/orchestration/__init__.py +18 -0
  36. flock/core/orchestration/flock_batch_processor.py +94 -0
  37. flock/core/orchestration/flock_evaluator.py +113 -0
  38. flock/core/orchestration/flock_execution.py +288 -0
  39. flock/core/orchestration/flock_initialization.py +125 -0
  40. flock/core/orchestration/flock_server_manager.py +65 -0
  41. flock/core/orchestration/flock_web_server.py +117 -0
  42. flock/core/registry/__init__.py +39 -0
  43. flock/core/registry/agent_registry.py +69 -0
  44. flock/core/registry/callable_registry.py +139 -0
  45. flock/core/registry/component_discovery.py +142 -0
  46. flock/core/registry/component_registry.py +64 -0
  47. flock/core/registry/config_mapping.py +64 -0
  48. flock/core/registry/decorators.py +137 -0
  49. flock/core/registry/registry_hub.py +202 -0
  50. flock/core/registry/server_registry.py +57 -0
  51. flock/core/registry/type_registry.py +86 -0
  52. flock/core/serialization/flock_serializer.py +33 -30
  53. flock/core/serialization/serialization_utils.py +28 -25
  54. flock/core/util/input_resolver.py +29 -2
  55. flock/platform/docker_tools.py +3 -3
  56. flock/tools/markdown_tools.py +1 -2
  57. flock/tools/text_tools.py +1 -2
  58. flock/webapp/app/main.py +9 -5
  59. flock/workflow/activities.py +59 -84
  60. flock/workflow/activities_unified.py +230 -0
  61. flock/workflow/agent_execution_activity.py +6 -6
  62. flock/workflow/flock_workflow.py +1 -1
  63. {flock_core-0.4.519.dist-info → flock_core-0.5.0b1.dist-info}/METADATA +4 -4
  64. {flock_core-0.4.519.dist-info → flock_core-0.5.0b1.dist-info}/RECORD +67 -68
  65. flock/core/flock_evaluator.py +0 -60
  66. flock/core/flock_router.py +0 -83
  67. flock/evaluators/__init__.py +0 -1
  68. flock/evaluators/declarative/__init__.py +0 -1
  69. flock/evaluators/declarative/declarative_evaluator.py +0 -194
  70. flock/evaluators/memory/memory_evaluator.py +0 -90
  71. flock/evaluators/test/test_case_evaluator.py +0 -38
  72. flock/evaluators/zep/zep_evaluator.py +0 -59
  73. flock/modules/__init__.py +0 -1
  74. flock/modules/assertion/__init__.py +0 -1
  75. flock/modules/assertion/assertion_module.py +0 -286
  76. flock/modules/callback/__init__.py +0 -1
  77. flock/modules/callback/callback_module.py +0 -91
  78. flock/modules/enterprise_memory/README.md +0 -99
  79. flock/modules/mem0/__init__.py +0 -1
  80. flock/modules/mem0/mem0_module.py +0 -126
  81. flock/modules/mem0_async/__init__.py +0 -1
  82. flock/modules/mem0_async/async_mem0_module.py +0 -126
  83. flock/modules/memory/__init__.py +0 -1
  84. flock/modules/memory/memory_module.py +0 -429
  85. flock/modules/memory/memory_parser.py +0 -125
  86. flock/modules/memory/memory_storage.py +0 -736
  87. flock/modules/output/__init__.py +0 -1
  88. flock/modules/performance/__init__.py +0 -1
  89. flock/modules/zep/__init__.py +0 -1
  90. flock/modules/zep/zep_module.py +0 -192
  91. flock/routers/__init__.py +0 -1
  92. flock/routers/agent/__init__.py +0 -1
  93. flock/routers/agent/agent_router.py +0 -236
  94. flock/routers/agent/handoff_agent.py +0 -58
  95. flock/routers/default/__init__.py +0 -1
  96. flock/routers/default/default_router.py +0 -80
  97. flock/routers/feedback/feedback_router.py +0 -114
  98. flock/routers/list_generator/list_generator_router.py +0 -166
  99. flock/routers/llm/__init__.py +0 -1
  100. flock/routers/llm/llm_router.py +0 -365
  101. {flock_core-0.4.519.dist-info → flock_core-0.5.0b1.dist-info}/WHEEL +0 -0
  102. {flock_core-0.4.519.dist-info → flock_core-0.5.0b1.dist-info}/entry_points.txt +0 -0
  103. {flock_core-0.4.519.dist-info → flock_core-0.5.0b1.dist-info}/licenses/LICENSE +0 -0
@@ -1,736 +0,0 @@
1
- """Flock memory storage with short-term and long-term memory, concept graph, and clustering.
2
-
3
- Based on concept graph spreading activation and embedding-based semantic search.
4
- """
5
-
6
- import json
7
- from datetime import datetime
8
- from enum import Enum
9
- from typing import Any, Literal
10
-
11
- import networkx as nx
12
- import numpy as np
13
- from networkx.readwrite import json_graph
14
- from opentelemetry import trace
15
- from pydantic import BaseModel, Field, PrivateAttr
16
-
17
- # Import SentenceTransformer for production-grade embeddings.
18
- from sentence_transformers import SentenceTransformer
19
-
20
- # Import the Flock logger.
21
- from flock.core.logging.logging import get_logger
22
-
23
- tracer = trace.get_tracer(__name__)
24
- logger = get_logger("memory")
25
-
26
-
27
- class MemoryScope(Enum):
28
- LOCAL = "local"
29
- GLOBAL = "global"
30
- BOTH = "both"
31
-
32
-
33
- class MemoryOperation(BaseModel):
34
- """Base class for memory operations."""
35
-
36
- type: str
37
- scope: MemoryScope = MemoryScope.BOTH
38
-
39
-
40
- class CombineOperation(MemoryOperation):
41
- """Combine results from multiple operations using weighted scoring."""
42
-
43
- type: Literal["combine"] = "combine"
44
- weights: dict[str, float] = Field(
45
- default_factory=lambda: {"semantic": 0.7, "exact": 0.3}
46
- )
47
-
48
-
49
- class SemanticOperation(MemoryOperation):
50
- """Semantic search operation."""
51
-
52
- type: Literal["semantic"] = "semantic"
53
- threshold: float = 0.5
54
- max_results: int = 10
55
- recency_filter: str | None = None # e.g., "7d", "24h"
56
-
57
-
58
- class ExactOperation(MemoryOperation):
59
- """Exact matching operation."""
60
-
61
- type: Literal["exact"] = "exact"
62
- keys: list[str] = Field(default_factory=list)
63
-
64
-
65
- class ChunkOperation(MemoryOperation):
66
- """Operation for handling chunked entries."""
67
-
68
- type: Literal["chunk"] = "chunk"
69
- reconstruct: bool = True
70
-
71
-
72
- class EnrichOperation(MemoryOperation):
73
- """Enrich memory with tool results."""
74
-
75
- type: Literal["enrich"] = "enrich"
76
- tools: list[str]
77
- strategy: Literal["comprehensive", "quick", "validated"] = "comprehensive"
78
-
79
-
80
- class FilterOperation(MemoryOperation):
81
- """Filter memory results."""
82
-
83
- type: Literal["filter"] = "filter"
84
- recency: str | None = None
85
- relevance: float | None = None
86
- metadata: dict[str, Any] = Field(default_factory=dict)
87
-
88
-
89
- class SortOperation(MemoryOperation):
90
- """Sort memory results."""
91
-
92
- type: Literal["sort"] = "sort"
93
- by: Literal["relevance", "recency", "access_count"] = "relevance"
94
- ascending: bool = False
95
-
96
-
97
- class MemoryEntry(BaseModel):
98
- """A single memory entry."""
99
-
100
- id: str
101
- content: str
102
- embedding: list[float] | None = None
103
- timestamp: datetime = Field(default_factory=datetime.now)
104
- access_count: int = Field(default=0)
105
- concepts: set[str] = Field(default_factory=set)
106
- decay_factor: float = Field(default=1.0)
107
-
108
-
109
- class MemoryGraph(BaseModel):
110
- """Graph representation of concept relationships.
111
-
112
- The graph is stored as a JSON string for serialization, while a private attribute holds the actual NetworkX graph.
113
- """
114
-
115
- # JSON representation using the node-link format with explicit edges="links" to avoid warnings.
116
- graph_json: str = Field(
117
- default_factory=lambda: json.dumps(
118
- json_graph.node_link_data(nx.Graph(), edges="links")
119
- )
120
- )
121
- # Private attribute for the actual NetworkX graph.
122
- _graph: nx.Graph = PrivateAttr()
123
-
124
- def __init__(self, **data):
125
- """Initialize the MemoryGraph with a NetworkX graph from JSON data."""
126
- super().__init__(**data)
127
- try:
128
- data_graph = json.loads(self.graph_json)
129
- self._graph = json_graph.node_link_graph(data_graph, edges="links")
130
- logger.debug(
131
- f"MemoryGraph initialized from JSON with {len(self._graph.nodes())} nodes."
132
- )
133
- except Exception as e:
134
- logger.error(f"Failed to load MemoryGraph from JSON: {e}")
135
- self._graph = nx.Graph()
136
-
137
- @property
138
- def graph(self) -> nx.Graph:
139
- """Provides access to the internal NetworkX graph."""
140
- return self._graph
141
-
142
- def update_graph_json(self) -> None:
143
- """Update the JSON representation based on the current state of the graph."""
144
- self.graph_json = json.dumps(
145
- json_graph.node_link_data(self._graph, edges="links")
146
- )
147
- logger.debug("MemoryGraph JSON updated.")
148
-
149
- def add_concepts(self, concepts: set[str]) -> None:
150
- """Add a set of concepts to the graph and update their associations."""
151
- concept_list = list(concepts)
152
- logger.debug(f"Adding concepts: {concept_list}")
153
- for concept in concepts:
154
- self._graph.add_node(concept)
155
- for c1 in concepts:
156
- for c2 in concepts:
157
- if c1 != c2:
158
- if self._graph.has_edge(c1, c2):
159
- self._graph[c1][c2]["weight"] += 1
160
- else:
161
- self._graph.add_edge(c1, c2, weight=1)
162
- self.update_graph_json()
163
-
164
- def spread_activation(
165
- self, initial_concepts: set[str], decay_factor: float = 0.5
166
- ) -> dict[str, float]:
167
- """Spread activation through the concept graph.
168
-
169
- Args:
170
- initial_concepts: The starting set of concepts.
171
- decay_factor: How much the activation decays at each step.
172
-
173
- Returns:
174
- A dictionary mapping each concept to its activation level.
175
- """
176
- logger.debug(f"Spreading activation from concepts: {initial_concepts}")
177
- activated = {concept: 1.0 for concept in initial_concepts}
178
- frontier = list(initial_concepts)
179
-
180
- while frontier:
181
- current = frontier.pop(0)
182
- current_activation = activated[current]
183
- for neighbor in self._graph.neighbors(current):
184
- weight = self._graph[current][neighbor]["weight"]
185
- new_activation = current_activation * decay_factor * weight
186
- if (
187
- neighbor not in activated
188
- or activated[neighbor] < new_activation
189
- ):
190
- activated[neighbor] = new_activation
191
- frontier.append(neighbor)
192
-
193
- logger.debug(f"Activation levels: {activated}")
194
- return activated
195
-
196
- def save_as_image(self, filename: str = "memory_graph.png") -> None:
197
- """Visualize the concept graph and save it as a PNG image with improved readability.
198
-
199
- This method uses matplotlib to create a clear and readable visualization by:
200
- - Using a larger figure size
201
- - Implementing better node spacing
202
- - Adding adjustable text labels
203
- - Using a more visually appealing color scheme
204
- - Adding edge weight visualization
205
-
206
- Args:
207
- filename: The path (including .png) where the image will be saved.
208
- """
209
- import matplotlib
210
-
211
- matplotlib.use("Agg")
212
- import matplotlib.pyplot as plt
213
-
214
- logger.info(f"Saving MemoryGraph visualization to '{filename}'")
215
-
216
- if self._graph.number_of_nodes() == 0:
217
- logger.warning("MemoryGraph is empty; skipping image creation.")
218
- return
219
-
220
- try:
221
- # Create a larger figure with higher DPI
222
- plt.figure(figsize=(16, 12), dpi=100)
223
-
224
- # Use Kamada-Kawai layout for better node distribution
225
- pos = nx.kamada_kawai_layout(self._graph)
226
-
227
- # Calculate node sizes based on degree
228
- node_degrees = dict(self._graph.degree())
229
- node_sizes = [
230
- 2000 * (1 + node_degrees[node] * 0.2)
231
- for node in self._graph.nodes()
232
- ]
233
-
234
- # Calculate edge weights for width and transparency
235
- edge_weights = [
236
- d["weight"] for (u, v, d) in self._graph.edges(data=True)
237
- ]
238
- max_weight = max(edge_weights) if edge_weights else 1
239
- edge_widths = [1 + (w / max_weight) * 3 for w in edge_weights]
240
- edge_alphas = [0.2 + (w / max_weight) * 0.8 for w in edge_weights]
241
-
242
- # Draw the network with custom styling
243
- # Nodes
244
- nx.draw_networkx_nodes(
245
- self._graph,
246
- pos,
247
- node_size=node_sizes,
248
- node_color="#5fa4d4", # Lighter blue
249
- alpha=0.7,
250
- edgecolors="white",
251
- )
252
-
253
- # Edges with varying width and transparency
254
- for (u, v, d), width, alpha in zip(
255
- self._graph.edges(data=True), edge_widths, edge_alphas
256
- ):
257
- nx.draw_networkx_edges(
258
- self._graph,
259
- pos,
260
- edgelist=[(u, v)],
261
- width=width,
262
- alpha=alpha,
263
- edge_color="#2c3e50", # Darker blue-grey
264
- )
265
-
266
- # Add labels with better positioning and background
267
- labels = nx.get_node_attributes(self._graph, "name") or {
268
- node: node for node in self._graph.nodes()
269
- }
270
- label_pos = {
271
- node: (x, y + 0.02) for node, (x, y) in pos.items()
272
- } # Slightly offset labels
273
-
274
- # Draw labels with white background for better readability
275
- for node, (x, y) in label_pos.items():
276
- plt.text(
277
- x,
278
- y,
279
- labels[node],
280
- horizontalalignment="center",
281
- verticalalignment="center",
282
- fontsize=8,
283
- fontweight="bold",
284
- bbox=dict(
285
- facecolor="white", edgecolor="none", alpha=0.7, pad=2.0
286
- ),
287
- )
288
-
289
- # Add edge weight labels for significant weights
290
- edge_labels = nx.get_edge_attributes(self._graph, "weight")
291
- significant_edges = {
292
- (u, v): w
293
- for (u, v), w in edge_labels.items()
294
- if w > max_weight * 0.3
295
- }
296
- if significant_edges:
297
- nx.draw_networkx_edge_labels(
298
- self._graph,
299
- pos,
300
- edge_labels=significant_edges,
301
- font_size=6,
302
- bbox=dict(facecolor="white", edgecolor="none", alpha=0.7),
303
- )
304
-
305
- # Improve layout
306
- plt.title("Memory Concept Graph", fontsize=16, pad=20)
307
- plt.axis("off")
308
-
309
- # Add padding and save
310
- plt.tight_layout(pad=2.0)
311
- plt.savefig(filename, bbox_inches="tight", facecolor="white")
312
- plt.close()
313
-
314
- logger.info(f"MemoryGraph image saved successfully to '{filename}'")
315
-
316
- except Exception as e:
317
- logger.error(f"Failed to save MemoryGraph image: {e}")
318
- plt.close()
319
-
320
-
321
- class FlockMemoryStore(BaseModel):
322
- """Enhanced Flock memory storage with short-term and long-term memory.
323
-
324
- including embedding-based semantic search, exact matching, and result combination.
325
- """
326
-
327
- short_term: list[MemoryEntry] = Field(default_factory=list)
328
- long_term: list[MemoryEntry] = Field(default_factory=list)
329
- concept_graph: MemoryGraph = Field(default_factory=MemoryGraph)
330
- clusters: dict[int, list[MemoryEntry]] = Field(default_factory=dict)
331
- # Instead of np.ndarray, store centroids as lists of floats.
332
- cluster_centroids: dict[int, list[float]] = Field(default_factory=dict)
333
- # The embedding model is stored as a private attribute, as it's not serializable.
334
- _embedding_model: SentenceTransformer | None = PrivateAttr(default=None)
335
-
336
- @classmethod
337
- def load_from_file(cls, file_path: str | None = None) -> "FlockMemoryStore":
338
- """Load a memory store from a JSON file.
339
-
340
- Args:
341
- file_path: Path to the JSON file containing the serialized memory store.
342
- If None, returns an empty memory store.
343
-
344
- Returns:
345
- FlockMemoryStore: A new memory store instance with loaded data.
346
-
347
- Raises:
348
- FileNotFoundError: If the specified file doesn't exist
349
- JSONDecodeError: If the file contains invalid JSON
350
- ValueError: If the JSON structure is invalid
351
- """
352
- if file_path is None:
353
- logger.debug("No file path provided, creating new memory store")
354
- return cls()
355
-
356
- try:
357
- logger.info(f"Loading memory store from {file_path}")
358
- with open(file_path) as f:
359
- data = json.load(f)
360
-
361
- # Initialize a new store
362
- store = cls()
363
-
364
- # Load short-term memory entries
365
- store.short_term = [
366
- MemoryEntry(
367
- id=entry["id"],
368
- content=entry["content"],
369
- embedding=entry.get("embedding"),
370
- timestamp=datetime.fromisoformat(entry["timestamp"]),
371
- access_count=entry.get("access_count", 0),
372
- concepts=set(entry.get("concepts", [])),
373
- decay_factor=entry.get("decay_factor", 1.0),
374
- )
375
- for entry in data.get("short_term", [])
376
- ]
377
-
378
- # Load long-term memory entries
379
- store.long_term = [
380
- MemoryEntry(
381
- id=entry["id"],
382
- content=entry["content"],
383
- embedding=entry.get("embedding"),
384
- timestamp=datetime.fromisoformat(entry["timestamp"]),
385
- access_count=entry.get("access_count", 0),
386
- concepts=set(entry.get("concepts", [])),
387
- decay_factor=entry.get("decay_factor", 1.0),
388
- )
389
- for entry in data.get("long_term", [])
390
- ]
391
-
392
- # Load concept graph
393
- if "concept_graph" in data:
394
- graph_data = json.loads(data["concept_graph"]["graph_json"])
395
- store.concept_graph = MemoryGraph(
396
- graph_json=json.dumps(graph_data)
397
- )
398
-
399
- # Load clusters
400
- if "clusters" in data:
401
- store.clusters = {
402
- int(k): [
403
- MemoryEntry(
404
- id=entry["id"],
405
- content=entry["content"],
406
- embedding=entry.get("embedding"),
407
- timestamp=datetime.fromisoformat(
408
- entry["timestamp"]
409
- ),
410
- access_count=entry.get("access_count", 0),
411
- concepts=set(entry.get("concepts", [])),
412
- decay_factor=entry.get("decay_factor", 1.0),
413
- )
414
- for entry in v
415
- ]
416
- for k, v in data["clusters"].items()
417
- }
418
-
419
- # Load cluster centroids
420
- if "cluster_centroids" in data:
421
- store.cluster_centroids = {
422
- int(k): v for k, v in data["cluster_centroids"].items()
423
- }
424
-
425
- # Initialize the embedding model
426
- store._embedding_model = None # Will be lazy-loaded when needed
427
-
428
- logger.info(
429
- f"Successfully loaded memory store with "
430
- f"{len(store.short_term)} short-term and "
431
- f"{len(store.long_term)} long-term entries"
432
- )
433
- return store
434
-
435
- except FileNotFoundError:
436
- logger.warning(
437
- f"Memory file {file_path} not found, creating new store"
438
- )
439
- return cls()
440
- except json.JSONDecodeError as e:
441
- logger.error(f"Invalid JSON in memory file: {e}")
442
- raise
443
- except Exception as e:
444
- logger.error(f"Error loading memory store: {e}")
445
- raise ValueError(f"Failed to load memory store: {e}")
446
-
447
- @classmethod
448
- def merge_stores(
449
- cls, stores: list["FlockMemoryStore"]
450
- ) -> "FlockMemoryStore":
451
- """Merge multiple memory stores into a single store.
452
-
453
- Args:
454
- stores: List of FlockMemoryStore instances to merge
455
-
456
- Returns:
457
- FlockMemoryStore: A new memory store containing merged data
458
- """
459
- merged = cls()
460
-
461
- # Merge short-term and long-term memories
462
- for store in stores:
463
- merged.short_term.extend(store.short_term)
464
- merged.long_term.extend(store.long_term)
465
-
466
- # Merge concept graphs
467
- merged_graph = nx.Graph()
468
- for store in stores:
469
- if store.concept_graph and store.concept_graph.graph:
470
- merged_graph = nx.compose(
471
- merged_graph, store.concept_graph.graph
472
- )
473
-
474
- merged.concept_graph = MemoryGraph(
475
- graph_json=json.dumps(
476
- nx.node_link_data(merged_graph, edges="links")
477
- )
478
- )
479
-
480
- # Recompute clusters for the merged data
481
- if merged.short_term:
482
- merged._update_clusters()
483
-
484
- return merged
485
-
486
- def get_embedding_model(self) -> SentenceTransformer:
487
- """Initialize and return the SentenceTransformer model.
488
-
489
- Uses "all-MiniLM-L6-v2" as the default model.
490
- """
491
- if self._embedding_model is None:
492
- try:
493
- logger.debug(
494
- "Loading SentenceTransformer model 'all-MiniLM-L6-v2'."
495
- )
496
- self._embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
497
- except Exception as e:
498
- logger.error(f"Failed to load embedding model: {e}")
499
- raise RuntimeError(f"Failed to load embedding model: {e}")
500
- return self._embedding_model
501
-
502
- def compute_embedding(self, text: str) -> np.ndarray:
503
- """Compute and return the embedding for the provided text as a NumPy array."""
504
- logger.debug(
505
- f"Computing embedding for text: {text[:100].replace('{', '{{').replace('}', '}}')}..."
506
- ) # Log first 30 chars for brevity.
507
- model = self.get_embedding_model()
508
- try:
509
- embedding = model.encode(text, convert_to_numpy=True)
510
- return embedding
511
- except Exception as e:
512
- logger.error(f"Error computing embedding: {e}")
513
- raise RuntimeError(f"Error computing embedding: {e}")
514
-
515
- def _calculate_similarity(
516
- self, query_embedding: np.ndarray, entry_embedding: np.ndarray
517
- ) -> float:
518
- """Compute the cosine similarity between two embeddings.
519
-
520
- Returns a float between 0 and 1.
521
- """
522
- try:
523
- norm_query = np.linalg.norm(query_embedding)
524
- norm_entry = np.linalg.norm(entry_embedding)
525
- if norm_query == 0 or norm_entry == 0:
526
- return 0.0
527
- similarity = float(
528
- np.dot(query_embedding, entry_embedding)
529
- / (norm_query * norm_entry)
530
- )
531
- return similarity
532
- except Exception as e:
533
- logger.error(f"Error computing similarity: {e}")
534
- raise RuntimeError(f"Error computing similarity: {e}")
535
-
536
- def exact_match(self, inputs: dict[str, Any]) -> list[MemoryEntry]:
537
- """Perform an exact key-based lookup in short-term memory.
538
-
539
- Returns entries where all provided key-value pairs exist in the entry's inputs.
540
- """
541
- logger.debug(f"Performing exact match lookup with inputs: {inputs}")
542
- matches = []
543
- for entry in self.short_term:
544
- if all(item in entry.inputs.items() for item in inputs.items()):
545
- matches.append(entry)
546
- logger.debug(f"Exact match found {len(matches)} entries.")
547
- return matches
548
-
549
- def combine_results(
550
- self, inputs: dict[str, Any], weights: dict[str, float]
551
- ) -> dict[str, Any]:
552
- """Combine semantic and exact match results using the provided weights.
553
-
554
- Args:
555
- inputs: Input dictionary to search memory.
556
- weights: Dictionary with keys "semantic" and "exact" for weighting.
557
-
558
- Returns:
559
- A dictionary with "combined_results" as a sorted list of memory entries.
560
- """
561
- logger.debug(
562
- f"Combining results for inputs: {inputs} with weights: {weights}"
563
- )
564
- query_text = " ".join(str(value) for value in inputs.values())
565
- query_embedding = self.compute_embedding(query_text)
566
-
567
- semantic_matches = self.retrieve(
568
- query_embedding, set(inputs.values()), similarity_threshold=0.8
569
- )
570
- exact_matches = self.exact_match(inputs)
571
-
572
- combined: dict[str, dict[str, Any]] = {}
573
- for entry in semantic_matches:
574
- if entry.embedding is None:
575
- continue
576
- semantic_score = self._calculate_similarity(
577
- query_embedding, np.array(entry.embedding)
578
- )
579
- combined[entry.id] = {
580
- "entry": entry,
581
- "semantic_score": semantic_score * weights.get("semantic", 0.7),
582
- "exact_score": 0.0,
583
- }
584
- for entry in exact_matches:
585
- if entry.id in combined:
586
- combined[entry.id]["exact_score"] = 1.0 * weights.get(
587
- "exact", 0.3
588
- )
589
- else:
590
- combined[entry.id] = {
591
- "entry": entry,
592
- "semantic_score": 0.0,
593
- "exact_score": 1.0 * weights.get("exact", 0.3),
594
- }
595
- results: list[tuple[float, MemoryEntry]] = []
596
- for data in combined.values():
597
- total_score = data["semantic_score"] + data["exact_score"]
598
- results.append((total_score, data["entry"]))
599
- results.sort(key=lambda x: x[0], reverse=True)
600
- logger.debug(f"Combined results count: {len(results)}")
601
- return {"combined_results": [entry for score, entry in results]}
602
-
603
- def add_entry(self, entry: MemoryEntry) -> None:
604
- """Add a new memory entry to short-term memory, update the concept graph and clusters.
605
-
606
- and check for promotion to long-term memory.
607
- """
608
- with tracer.start_as_current_span("memory.add_entry") as span:
609
- logger.info(f"Adding memory entry with id: {entry.id}")
610
- span.set_attribute("entry.id", entry.id)
611
- self.short_term.append(entry)
612
- self.concept_graph.add_concepts(entry.concepts)
613
- self._update_clusters()
614
- if entry.access_count > 10:
615
- self._promote_to_long_term(entry)
616
-
617
- def _promote_to_long_term(self, entry: MemoryEntry) -> None:
618
- """Promote an entry to long-term memory."""
619
- logger.info(f"Promoting entry {entry.id} to long-term memory.")
620
- if entry not in self.long_term:
621
- self.long_term.append(entry)
622
-
623
- def retrieve(
624
- self,
625
- query_embedding: np.ndarray,
626
- query_concepts: set[str],
627
- similarity_threshold: float = 0.8,
628
- exclude_last_n: int = 0,
629
- ) -> list[MemoryEntry]:
630
- """Retrieve memory entries using semantic similarity and concept-based activation."""
631
- with tracer.start_as_current_span("memory.retrieve") as span:
632
- logger.debug("Retrieving memory entries...")
633
- results = []
634
- current_time = datetime.now()
635
- decay_rate = 0.0001
636
- norm_query = query_embedding / (
637
- np.linalg.norm(query_embedding) + 1e-8
638
- )
639
-
640
- entries = (
641
- self.short_term[:-exclude_last_n]
642
- if exclude_last_n > 0
643
- else self.short_term
644
- )
645
-
646
- for entry in entries:
647
- if entry.embedding is None:
648
- continue
649
-
650
- # Calculate base similarity
651
- entry_embedding = np.array(entry.embedding)
652
- norm_entry = entry_embedding / (
653
- np.linalg.norm(entry_embedding) + 1e-8
654
- )
655
- similarity = float(np.dot(norm_query, norm_entry))
656
-
657
- # Calculate modifiers
658
- time_diff = (current_time - entry.timestamp).total_seconds()
659
- decay = np.exp(-decay_rate * time_diff)
660
- # Add 1 to base score so new entries aren't zeroed out
661
- reinforcement = 1.0 + np.log1p(entry.access_count)
662
-
663
- # Calculate final score
664
- final_score = (
665
- similarity * decay * reinforcement * entry.decay_factor
666
- )
667
-
668
- span.add_event(
669
- "memory score",
670
- attributes={
671
- "entry_id": entry.id,
672
- "similarity": similarity,
673
- "final_score": final_score,
674
- },
675
- )
676
-
677
- # If base similarity passes threshold, include in results
678
- if similarity >= similarity_threshold:
679
- results.append((final_score, entry))
680
-
681
- # Update access counts and decay for retrieved entries
682
- for _, entry in results:
683
- entry.access_count += 1
684
- self._update_decay_factors(entry)
685
-
686
- # Sort by final score
687
- results.sort(key=lambda x: x[0], reverse=True)
688
- logger.debug(f"Retrieved {len(results)} memory entries.")
689
- return [entry for score, entry in results]
690
-
691
- def _update_decay_factors(self, retrieved_entry: MemoryEntry) -> None:
692
- """Update decay factors: increase for the retrieved entry and decrease for others."""
693
- logger.debug(f"Updating decay factor for entry {retrieved_entry.id}")
694
- retrieved_entry.decay_factor *= 1.1
695
- for entry in self.short_term:
696
- if entry != retrieved_entry:
697
- entry.decay_factor *= 0.9
698
-
699
- def _update_clusters(self) -> None:
700
- """Update memory clusters using k-means clustering on entry embeddings."""
701
- logger.debug("Updating memory clusters...")
702
- if len(self.short_term) < 2:
703
- logger.debug("Not enough entries for clustering.")
704
- return
705
-
706
- valid_entries = [
707
- entry for entry in self.short_term if entry.embedding is not None
708
- ]
709
- if not valid_entries:
710
- logger.debug(
711
- "No valid entries with embeddings found for clustering."
712
- )
713
- return
714
-
715
- embeddings = [np.array(entry.embedding) for entry in valid_entries]
716
- embeddings_matrix = np.vstack(embeddings)
717
-
718
- from sklearn.cluster import KMeans
719
-
720
- n_clusters = min(10, len(embeddings))
721
- kmeans = KMeans(n_clusters=n_clusters, random_state=42)
722
- labels = kmeans.fit_predict(embeddings_matrix)
723
-
724
- self.clusters.clear()
725
- self.cluster_centroids.clear()
726
-
727
- for i in range(n_clusters):
728
- cluster_entries = [
729
- entry
730
- for entry, label in zip(valid_entries, labels)
731
- if label == i
732
- ]
733
- self.clusters[i] = cluster_entries
734
- # Convert the centroid (np.ndarray) to a list of floats.
735
- self.cluster_centroids[i] = kmeans.cluster_centers_[i].tolist()
736
- logger.debug(f"Clustering complete with {n_clusters} clusters.")