flock-core 0.4.527__py3-none-any.whl → 0.5.0b0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flock-core might be problematic. Click here for more details.
- flock/cli/execute_flock.py +1 -1
- flock/cli/manage_agents.py +6 -6
- flock/components/__init__.py +30 -0
- flock/components/evaluation/__init__.py +9 -0
- flock/components/evaluation/declarative_evaluation_component.py +222 -0
- flock/components/routing/__init__.py +15 -0
- flock/{routers/conditional/conditional_router.py → components/routing/conditional_routing_component.py} +61 -53
- flock/components/routing/default_routing_component.py +103 -0
- flock/components/routing/llm_routing_component.py +206 -0
- flock/components/utility/__init__.py +15 -0
- flock/{modules/enterprise_memory/enterprise_memory_module.py → components/utility/memory_utility_component.py} +195 -173
- flock/{modules/performance/metrics_module.py → components/utility/metrics_utility_component.py} +110 -95
- flock/{modules/output/output_module.py → components/utility/output_utility_component.py} +47 -45
- flock/core/__init__.py +26 -18
- flock/core/agent/__init__.py +16 -0
- flock/core/agent/flock_agent_components.py +104 -0
- flock/core/agent/flock_agent_execution.py +101 -0
- flock/core/agent/flock_agent_integration.py +206 -0
- flock/core/agent/flock_agent_lifecycle.py +177 -0
- flock/core/agent/flock_agent_serialization.py +381 -0
- flock/core/api/endpoints.py +2 -2
- flock/core/api/service.py +2 -2
- flock/core/component/__init__.py +15 -0
- flock/core/{flock_module.py → component/agent_component_base.py} +136 -34
- flock/core/component/evaluation_component.py +56 -0
- flock/core/component/routing_component.py +74 -0
- flock/core/component/utility_component.py +69 -0
- flock/core/config/flock_agent_config.py +49 -2
- flock/core/evaluation/utils.py +3 -2
- flock/core/execution/batch_executor.py +1 -1
- flock/core/execution/evaluation_executor.py +2 -2
- flock/core/execution/opik_executor.py +1 -1
- flock/core/flock.py +147 -493
- flock/core/flock_agent.py +195 -1032
- flock/core/flock_factory.py +114 -90
- flock/core/flock_scheduler.py +1 -1
- flock/core/flock_server_manager.py +8 -8
- flock/core/logging/logging.py +1 -0
- flock/core/mcp/flock_mcp_server.py +53 -48
- flock/core/mcp/{flock_mcp_tool_base.py → flock_mcp_tool.py} +2 -2
- flock/core/mcp/mcp_client.py +9 -9
- flock/core/mcp/mcp_client_manager.py +9 -9
- flock/core/mcp/mcp_config.py +24 -24
- flock/core/mixin/dspy_integration.py +5 -5
- flock/core/orchestration/__init__.py +18 -0
- flock/core/orchestration/flock_batch_processor.py +94 -0
- flock/core/orchestration/flock_evaluator.py +113 -0
- flock/core/orchestration/flock_execution.py +288 -0
- flock/core/orchestration/flock_initialization.py +125 -0
- flock/core/orchestration/flock_server_manager.py +67 -0
- flock/core/orchestration/flock_web_server.py +117 -0
- flock/core/registry/__init__.py +45 -0
- flock/core/registry/agent_registry.py +69 -0
- flock/core/registry/callable_registry.py +139 -0
- flock/core/registry/component_discovery.py +142 -0
- flock/core/registry/component_registry.py +64 -0
- flock/core/registry/config_mapping.py +64 -0
- flock/core/registry/decorators.py +137 -0
- flock/core/registry/registry_hub.py +205 -0
- flock/core/registry/server_registry.py +57 -0
- flock/core/registry/type_registry.py +86 -0
- flock/core/serialization/flock_serializer.py +36 -32
- flock/core/serialization/serialization_utils.py +28 -25
- flock/core/util/hydrator.py +1 -1
- flock/core/util/input_resolver.py +29 -2
- flock/mcp/servers/sse/flock_sse_server.py +10 -10
- flock/mcp/servers/stdio/flock_stdio_server.py +10 -10
- flock/mcp/servers/streamable_http/flock_streamable_http_server.py +10 -10
- flock/mcp/servers/websockets/flock_websocket_server.py +10 -10
- flock/platform/docker_tools.py +3 -3
- flock/webapp/app/chat.py +1 -1
- flock/webapp/app/main.py +9 -5
- flock/webapp/app/services/flock_service.py +1 -1
- flock/webapp/app/services/sharing_store.py +1 -0
- flock/workflow/activities.py +67 -92
- flock/workflow/agent_execution_activity.py +6 -6
- flock/workflow/flock_workflow.py +1 -1
- flock_core-0.5.0b0.dist-info/METADATA +272 -0
- {flock_core-0.4.527.dist-info → flock_core-0.5.0b0.dist-info}/RECORD +82 -95
- flock/core/flock_evaluator.py +0 -60
- flock/core/flock_registry.py +0 -702
- flock/core/flock_router.py +0 -83
- flock/evaluators/__init__.py +0 -1
- flock/evaluators/declarative/__init__.py +0 -1
- flock/evaluators/declarative/declarative_evaluator.py +0 -217
- flock/evaluators/memory/memory_evaluator.py +0 -90
- flock/evaluators/test/test_case_evaluator.py +0 -38
- flock/evaluators/zep/zep_evaluator.py +0 -59
- flock/modules/__init__.py +0 -1
- flock/modules/assertion/__init__.py +0 -1
- flock/modules/assertion/assertion_module.py +0 -286
- flock/modules/callback/__init__.py +0 -1
- flock/modules/callback/callback_module.py +0 -91
- flock/modules/enterprise_memory/README.md +0 -99
- flock/modules/mem0/__init__.py +0 -1
- flock/modules/mem0/mem0_module.py +0 -126
- flock/modules/mem0_async/__init__.py +0 -1
- flock/modules/mem0_async/async_mem0_module.py +0 -126
- flock/modules/memory/__init__.py +0 -1
- flock/modules/memory/memory_module.py +0 -429
- flock/modules/memory/memory_parser.py +0 -125
- flock/modules/memory/memory_storage.py +0 -736
- flock/modules/output/__init__.py +0 -1
- flock/modules/performance/__init__.py +0 -1
- flock/modules/zep/__init__.py +0 -1
- flock/modules/zep/zep_module.py +0 -192
- flock/routers/__init__.py +0 -1
- flock/routers/agent/__init__.py +0 -1
- flock/routers/agent/agent_router.py +0 -236
- flock/routers/agent/handoff_agent.py +0 -58
- flock/routers/default/__init__.py +0 -1
- flock/routers/default/default_router.py +0 -80
- flock/routers/feedback/feedback_router.py +0 -114
- flock/routers/list_generator/list_generator_router.py +0 -166
- flock/routers/llm/__init__.py +0 -1
- flock/routers/llm/llm_router.py +0 -365
- flock/tools/__init__.py +0 -0
- flock/tools/azure_tools.py +0 -781
- flock/tools/code_tools.py +0 -167
- flock/tools/file_tools.py +0 -149
- flock/tools/github_tools.py +0 -157
- flock/tools/markdown_tools.py +0 -205
- flock/tools/system_tools.py +0 -9
- flock/tools/text_tools.py +0 -810
- flock/tools/web_tools.py +0 -90
- flock/tools/zendesk_tools.py +0 -147
- flock_core-0.4.527.dist-info/METADATA +0 -674
- {flock_core-0.4.527.dist-info → flock_core-0.5.0b0.dist-info}/WHEEL +0 -0
- {flock_core-0.4.527.dist-info → flock_core-0.5.0b0.dist-info}/entry_points.txt +0 -0
- {flock_core-0.4.527.dist-info → flock_core-0.5.0b0.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,736 +0,0 @@
|
|
|
1
|
-
"""Flock memory storage with short-term and long-term memory, concept graph, and clustering.
|
|
2
|
-
|
|
3
|
-
Based on concept graph spreading activation and embedding-based semantic search.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
import json
|
|
7
|
-
from datetime import datetime
|
|
8
|
-
from enum import Enum
|
|
9
|
-
from typing import Any, Literal
|
|
10
|
-
|
|
11
|
-
import networkx as nx
|
|
12
|
-
import numpy as np
|
|
13
|
-
from networkx.readwrite import json_graph
|
|
14
|
-
from opentelemetry import trace
|
|
15
|
-
from pydantic import BaseModel, Field, PrivateAttr
|
|
16
|
-
|
|
17
|
-
# Import SentenceTransformer for production-grade embeddings.
|
|
18
|
-
from sentence_transformers import SentenceTransformer
|
|
19
|
-
|
|
20
|
-
# Import the Flock logger.
|
|
21
|
-
from flock.core.logging.logging import get_logger
|
|
22
|
-
|
|
23
|
-
tracer = trace.get_tracer(__name__)
|
|
24
|
-
logger = get_logger("memory")
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class MemoryScope(Enum):
|
|
28
|
-
LOCAL = "local"
|
|
29
|
-
GLOBAL = "global"
|
|
30
|
-
BOTH = "both"
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
class MemoryOperation(BaseModel):
|
|
34
|
-
"""Base class for memory operations."""
|
|
35
|
-
|
|
36
|
-
type: str
|
|
37
|
-
scope: MemoryScope = MemoryScope.BOTH
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
class CombineOperation(MemoryOperation):
|
|
41
|
-
"""Combine results from multiple operations using weighted scoring."""
|
|
42
|
-
|
|
43
|
-
type: Literal["combine"] = "combine"
|
|
44
|
-
weights: dict[str, float] = Field(
|
|
45
|
-
default_factory=lambda: {"semantic": 0.7, "exact": 0.3}
|
|
46
|
-
)
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
class SemanticOperation(MemoryOperation):
|
|
50
|
-
"""Semantic search operation."""
|
|
51
|
-
|
|
52
|
-
type: Literal["semantic"] = "semantic"
|
|
53
|
-
threshold: float = 0.5
|
|
54
|
-
max_results: int = 10
|
|
55
|
-
recency_filter: str | None = None # e.g., "7d", "24h"
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
class ExactOperation(MemoryOperation):
|
|
59
|
-
"""Exact matching operation."""
|
|
60
|
-
|
|
61
|
-
type: Literal["exact"] = "exact"
|
|
62
|
-
keys: list[str] = Field(default_factory=list)
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
class ChunkOperation(MemoryOperation):
|
|
66
|
-
"""Operation for handling chunked entries."""
|
|
67
|
-
|
|
68
|
-
type: Literal["chunk"] = "chunk"
|
|
69
|
-
reconstruct: bool = True
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
class EnrichOperation(MemoryOperation):
|
|
73
|
-
"""Enrich memory with tool results."""
|
|
74
|
-
|
|
75
|
-
type: Literal["enrich"] = "enrich"
|
|
76
|
-
tools: list[str]
|
|
77
|
-
strategy: Literal["comprehensive", "quick", "validated"] = "comprehensive"
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
class FilterOperation(MemoryOperation):
|
|
81
|
-
"""Filter memory results."""
|
|
82
|
-
|
|
83
|
-
type: Literal["filter"] = "filter"
|
|
84
|
-
recency: str | None = None
|
|
85
|
-
relevance: float | None = None
|
|
86
|
-
metadata: dict[str, Any] = Field(default_factory=dict)
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
class SortOperation(MemoryOperation):
|
|
90
|
-
"""Sort memory results."""
|
|
91
|
-
|
|
92
|
-
type: Literal["sort"] = "sort"
|
|
93
|
-
by: Literal["relevance", "recency", "access_count"] = "relevance"
|
|
94
|
-
ascending: bool = False
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
class MemoryEntry(BaseModel):
|
|
98
|
-
"""A single memory entry."""
|
|
99
|
-
|
|
100
|
-
id: str
|
|
101
|
-
content: str
|
|
102
|
-
embedding: list[float] | None = None
|
|
103
|
-
timestamp: datetime = Field(default_factory=datetime.now)
|
|
104
|
-
access_count: int = Field(default=0)
|
|
105
|
-
concepts: set[str] = Field(default_factory=set)
|
|
106
|
-
decay_factor: float = Field(default=1.0)
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
class MemoryGraph(BaseModel):
|
|
110
|
-
"""Graph representation of concept relationships.
|
|
111
|
-
|
|
112
|
-
The graph is stored as a JSON string for serialization, while a private attribute holds the actual NetworkX graph.
|
|
113
|
-
"""
|
|
114
|
-
|
|
115
|
-
# JSON representation using the node-link format with explicit edges="links" to avoid warnings.
|
|
116
|
-
graph_json: str = Field(
|
|
117
|
-
default_factory=lambda: json.dumps(
|
|
118
|
-
json_graph.node_link_data(nx.Graph(), edges="links")
|
|
119
|
-
)
|
|
120
|
-
)
|
|
121
|
-
# Private attribute for the actual NetworkX graph.
|
|
122
|
-
_graph: nx.Graph = PrivateAttr()
|
|
123
|
-
|
|
124
|
-
def __init__(self, **data):
|
|
125
|
-
"""Initialize the MemoryGraph with a NetworkX graph from JSON data."""
|
|
126
|
-
super().__init__(**data)
|
|
127
|
-
try:
|
|
128
|
-
data_graph = json.loads(self.graph_json)
|
|
129
|
-
self._graph = json_graph.node_link_graph(data_graph, edges="links")
|
|
130
|
-
logger.debug(
|
|
131
|
-
f"MemoryGraph initialized from JSON with {len(self._graph.nodes())} nodes."
|
|
132
|
-
)
|
|
133
|
-
except Exception as e:
|
|
134
|
-
logger.error(f"Failed to load MemoryGraph from JSON: {e}")
|
|
135
|
-
self._graph = nx.Graph()
|
|
136
|
-
|
|
137
|
-
@property
|
|
138
|
-
def graph(self) -> nx.Graph:
|
|
139
|
-
"""Provides access to the internal NetworkX graph."""
|
|
140
|
-
return self._graph
|
|
141
|
-
|
|
142
|
-
def update_graph_json(self) -> None:
|
|
143
|
-
"""Update the JSON representation based on the current state of the graph."""
|
|
144
|
-
self.graph_json = json.dumps(
|
|
145
|
-
json_graph.node_link_data(self._graph, edges="links")
|
|
146
|
-
)
|
|
147
|
-
logger.debug("MemoryGraph JSON updated.")
|
|
148
|
-
|
|
149
|
-
def add_concepts(self, concepts: set[str]) -> None:
|
|
150
|
-
"""Add a set of concepts to the graph and update their associations."""
|
|
151
|
-
concept_list = list(concepts)
|
|
152
|
-
logger.debug(f"Adding concepts: {concept_list}")
|
|
153
|
-
for concept in concepts:
|
|
154
|
-
self._graph.add_node(concept)
|
|
155
|
-
for c1 in concepts:
|
|
156
|
-
for c2 in concepts:
|
|
157
|
-
if c1 != c2:
|
|
158
|
-
if self._graph.has_edge(c1, c2):
|
|
159
|
-
self._graph[c1][c2]["weight"] += 1
|
|
160
|
-
else:
|
|
161
|
-
self._graph.add_edge(c1, c2, weight=1)
|
|
162
|
-
self.update_graph_json()
|
|
163
|
-
|
|
164
|
-
def spread_activation(
|
|
165
|
-
self, initial_concepts: set[str], decay_factor: float = 0.5
|
|
166
|
-
) -> dict[str, float]:
|
|
167
|
-
"""Spread activation through the concept graph.
|
|
168
|
-
|
|
169
|
-
Args:
|
|
170
|
-
initial_concepts: The starting set of concepts.
|
|
171
|
-
decay_factor: How much the activation decays at each step.
|
|
172
|
-
|
|
173
|
-
Returns:
|
|
174
|
-
A dictionary mapping each concept to its activation level.
|
|
175
|
-
"""
|
|
176
|
-
logger.debug(f"Spreading activation from concepts: {initial_concepts}")
|
|
177
|
-
activated = {concept: 1.0 for concept in initial_concepts}
|
|
178
|
-
frontier = list(initial_concepts)
|
|
179
|
-
|
|
180
|
-
while frontier:
|
|
181
|
-
current = frontier.pop(0)
|
|
182
|
-
current_activation = activated[current]
|
|
183
|
-
for neighbor in self._graph.neighbors(current):
|
|
184
|
-
weight = self._graph[current][neighbor]["weight"]
|
|
185
|
-
new_activation = current_activation * decay_factor * weight
|
|
186
|
-
if (
|
|
187
|
-
neighbor not in activated
|
|
188
|
-
or activated[neighbor] < new_activation
|
|
189
|
-
):
|
|
190
|
-
activated[neighbor] = new_activation
|
|
191
|
-
frontier.append(neighbor)
|
|
192
|
-
|
|
193
|
-
logger.debug(f"Activation levels: {activated}")
|
|
194
|
-
return activated
|
|
195
|
-
|
|
196
|
-
def save_as_image(self, filename: str = "memory_graph.png") -> None:
|
|
197
|
-
"""Visualize the concept graph and save it as a PNG image with improved readability.
|
|
198
|
-
|
|
199
|
-
This method uses matplotlib to create a clear and readable visualization by:
|
|
200
|
-
- Using a larger figure size
|
|
201
|
-
- Implementing better node spacing
|
|
202
|
-
- Adding adjustable text labels
|
|
203
|
-
- Using a more visually appealing color scheme
|
|
204
|
-
- Adding edge weight visualization
|
|
205
|
-
|
|
206
|
-
Args:
|
|
207
|
-
filename: The path (including .png) where the image will be saved.
|
|
208
|
-
"""
|
|
209
|
-
import matplotlib
|
|
210
|
-
|
|
211
|
-
matplotlib.use("Agg")
|
|
212
|
-
import matplotlib.pyplot as plt
|
|
213
|
-
|
|
214
|
-
logger.info(f"Saving MemoryGraph visualization to '{filename}'")
|
|
215
|
-
|
|
216
|
-
if self._graph.number_of_nodes() == 0:
|
|
217
|
-
logger.warning("MemoryGraph is empty; skipping image creation.")
|
|
218
|
-
return
|
|
219
|
-
|
|
220
|
-
try:
|
|
221
|
-
# Create a larger figure with higher DPI
|
|
222
|
-
plt.figure(figsize=(16, 12), dpi=100)
|
|
223
|
-
|
|
224
|
-
# Use Kamada-Kawai layout for better node distribution
|
|
225
|
-
pos = nx.kamada_kawai_layout(self._graph)
|
|
226
|
-
|
|
227
|
-
# Calculate node sizes based on degree
|
|
228
|
-
node_degrees = dict(self._graph.degree())
|
|
229
|
-
node_sizes = [
|
|
230
|
-
2000 * (1 + node_degrees[node] * 0.2)
|
|
231
|
-
for node in self._graph.nodes()
|
|
232
|
-
]
|
|
233
|
-
|
|
234
|
-
# Calculate edge weights for width and transparency
|
|
235
|
-
edge_weights = [
|
|
236
|
-
d["weight"] for (u, v, d) in self._graph.edges(data=True)
|
|
237
|
-
]
|
|
238
|
-
max_weight = max(edge_weights) if edge_weights else 1
|
|
239
|
-
edge_widths = [1 + (w / max_weight) * 3 for w in edge_weights]
|
|
240
|
-
edge_alphas = [0.2 + (w / max_weight) * 0.8 for w in edge_weights]
|
|
241
|
-
|
|
242
|
-
# Draw the network with custom styling
|
|
243
|
-
# Nodes
|
|
244
|
-
nx.draw_networkx_nodes(
|
|
245
|
-
self._graph,
|
|
246
|
-
pos,
|
|
247
|
-
node_size=node_sizes,
|
|
248
|
-
node_color="#5fa4d4", # Lighter blue
|
|
249
|
-
alpha=0.7,
|
|
250
|
-
edgecolors="white",
|
|
251
|
-
)
|
|
252
|
-
|
|
253
|
-
# Edges with varying width and transparency
|
|
254
|
-
for (u, v, d), width, alpha in zip(
|
|
255
|
-
self._graph.edges(data=True), edge_widths, edge_alphas
|
|
256
|
-
):
|
|
257
|
-
nx.draw_networkx_edges(
|
|
258
|
-
self._graph,
|
|
259
|
-
pos,
|
|
260
|
-
edgelist=[(u, v)],
|
|
261
|
-
width=width,
|
|
262
|
-
alpha=alpha,
|
|
263
|
-
edge_color="#2c3e50", # Darker blue-grey
|
|
264
|
-
)
|
|
265
|
-
|
|
266
|
-
# Add labels with better positioning and background
|
|
267
|
-
labels = nx.get_node_attributes(self._graph, "name") or {
|
|
268
|
-
node: node for node in self._graph.nodes()
|
|
269
|
-
}
|
|
270
|
-
label_pos = {
|
|
271
|
-
node: (x, y + 0.02) for node, (x, y) in pos.items()
|
|
272
|
-
} # Slightly offset labels
|
|
273
|
-
|
|
274
|
-
# Draw labels with white background for better readability
|
|
275
|
-
for node, (x, y) in label_pos.items():
|
|
276
|
-
plt.text(
|
|
277
|
-
x,
|
|
278
|
-
y,
|
|
279
|
-
labels[node],
|
|
280
|
-
horizontalalignment="center",
|
|
281
|
-
verticalalignment="center",
|
|
282
|
-
fontsize=8,
|
|
283
|
-
fontweight="bold",
|
|
284
|
-
bbox=dict(
|
|
285
|
-
facecolor="white", edgecolor="none", alpha=0.7, pad=2.0
|
|
286
|
-
),
|
|
287
|
-
)
|
|
288
|
-
|
|
289
|
-
# Add edge weight labels for significant weights
|
|
290
|
-
edge_labels = nx.get_edge_attributes(self._graph, "weight")
|
|
291
|
-
significant_edges = {
|
|
292
|
-
(u, v): w
|
|
293
|
-
for (u, v), w in edge_labels.items()
|
|
294
|
-
if w > max_weight * 0.3
|
|
295
|
-
}
|
|
296
|
-
if significant_edges:
|
|
297
|
-
nx.draw_networkx_edge_labels(
|
|
298
|
-
self._graph,
|
|
299
|
-
pos,
|
|
300
|
-
edge_labels=significant_edges,
|
|
301
|
-
font_size=6,
|
|
302
|
-
bbox=dict(facecolor="white", edgecolor="none", alpha=0.7),
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
# Improve layout
|
|
306
|
-
plt.title("Memory Concept Graph", fontsize=16, pad=20)
|
|
307
|
-
plt.axis("off")
|
|
308
|
-
|
|
309
|
-
# Add padding and save
|
|
310
|
-
plt.tight_layout(pad=2.0)
|
|
311
|
-
plt.savefig(filename, bbox_inches="tight", facecolor="white")
|
|
312
|
-
plt.close()
|
|
313
|
-
|
|
314
|
-
logger.info(f"MemoryGraph image saved successfully to '{filename}'")
|
|
315
|
-
|
|
316
|
-
except Exception as e:
|
|
317
|
-
logger.error(f"Failed to save MemoryGraph image: {e}")
|
|
318
|
-
plt.close()
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
class FlockMemoryStore(BaseModel):
|
|
322
|
-
"""Enhanced Flock memory storage with short-term and long-term memory.
|
|
323
|
-
|
|
324
|
-
including embedding-based semantic search, exact matching, and result combination.
|
|
325
|
-
"""
|
|
326
|
-
|
|
327
|
-
short_term: list[MemoryEntry] = Field(default_factory=list)
|
|
328
|
-
long_term: list[MemoryEntry] = Field(default_factory=list)
|
|
329
|
-
concept_graph: MemoryGraph = Field(default_factory=MemoryGraph)
|
|
330
|
-
clusters: dict[int, list[MemoryEntry]] = Field(default_factory=dict)
|
|
331
|
-
# Instead of np.ndarray, store centroids as lists of floats.
|
|
332
|
-
cluster_centroids: dict[int, list[float]] = Field(default_factory=dict)
|
|
333
|
-
# The embedding model is stored as a private attribute, as it's not serializable.
|
|
334
|
-
_embedding_model: SentenceTransformer | None = PrivateAttr(default=None)
|
|
335
|
-
|
|
336
|
-
@classmethod
|
|
337
|
-
def load_from_file(cls, file_path: str | None = None) -> "FlockMemoryStore":
|
|
338
|
-
"""Load a memory store from a JSON file.
|
|
339
|
-
|
|
340
|
-
Args:
|
|
341
|
-
file_path: Path to the JSON file containing the serialized memory store.
|
|
342
|
-
If None, returns an empty memory store.
|
|
343
|
-
|
|
344
|
-
Returns:
|
|
345
|
-
FlockMemoryStore: A new memory store instance with loaded data.
|
|
346
|
-
|
|
347
|
-
Raises:
|
|
348
|
-
FileNotFoundError: If the specified file doesn't exist
|
|
349
|
-
JSONDecodeError: If the file contains invalid JSON
|
|
350
|
-
ValueError: If the JSON structure is invalid
|
|
351
|
-
"""
|
|
352
|
-
if file_path is None:
|
|
353
|
-
logger.debug("No file path provided, creating new memory store")
|
|
354
|
-
return cls()
|
|
355
|
-
|
|
356
|
-
try:
|
|
357
|
-
logger.info(f"Loading memory store from {file_path}")
|
|
358
|
-
with open(file_path) as f:
|
|
359
|
-
data = json.load(f)
|
|
360
|
-
|
|
361
|
-
# Initialize a new store
|
|
362
|
-
store = cls()
|
|
363
|
-
|
|
364
|
-
# Load short-term memory entries
|
|
365
|
-
store.short_term = [
|
|
366
|
-
MemoryEntry(
|
|
367
|
-
id=entry["id"],
|
|
368
|
-
content=entry["content"],
|
|
369
|
-
embedding=entry.get("embedding"),
|
|
370
|
-
timestamp=datetime.fromisoformat(entry["timestamp"]),
|
|
371
|
-
access_count=entry.get("access_count", 0),
|
|
372
|
-
concepts=set(entry.get("concepts", [])),
|
|
373
|
-
decay_factor=entry.get("decay_factor", 1.0),
|
|
374
|
-
)
|
|
375
|
-
for entry in data.get("short_term", [])
|
|
376
|
-
]
|
|
377
|
-
|
|
378
|
-
# Load long-term memory entries
|
|
379
|
-
store.long_term = [
|
|
380
|
-
MemoryEntry(
|
|
381
|
-
id=entry["id"],
|
|
382
|
-
content=entry["content"],
|
|
383
|
-
embedding=entry.get("embedding"),
|
|
384
|
-
timestamp=datetime.fromisoformat(entry["timestamp"]),
|
|
385
|
-
access_count=entry.get("access_count", 0),
|
|
386
|
-
concepts=set(entry.get("concepts", [])),
|
|
387
|
-
decay_factor=entry.get("decay_factor", 1.0),
|
|
388
|
-
)
|
|
389
|
-
for entry in data.get("long_term", [])
|
|
390
|
-
]
|
|
391
|
-
|
|
392
|
-
# Load concept graph
|
|
393
|
-
if "concept_graph" in data:
|
|
394
|
-
graph_data = json.loads(data["concept_graph"]["graph_json"])
|
|
395
|
-
store.concept_graph = MemoryGraph(
|
|
396
|
-
graph_json=json.dumps(graph_data)
|
|
397
|
-
)
|
|
398
|
-
|
|
399
|
-
# Load clusters
|
|
400
|
-
if "clusters" in data:
|
|
401
|
-
store.clusters = {
|
|
402
|
-
int(k): [
|
|
403
|
-
MemoryEntry(
|
|
404
|
-
id=entry["id"],
|
|
405
|
-
content=entry["content"],
|
|
406
|
-
embedding=entry.get("embedding"),
|
|
407
|
-
timestamp=datetime.fromisoformat(
|
|
408
|
-
entry["timestamp"]
|
|
409
|
-
),
|
|
410
|
-
access_count=entry.get("access_count", 0),
|
|
411
|
-
concepts=set(entry.get("concepts", [])),
|
|
412
|
-
decay_factor=entry.get("decay_factor", 1.0),
|
|
413
|
-
)
|
|
414
|
-
for entry in v
|
|
415
|
-
]
|
|
416
|
-
for k, v in data["clusters"].items()
|
|
417
|
-
}
|
|
418
|
-
|
|
419
|
-
# Load cluster centroids
|
|
420
|
-
if "cluster_centroids" in data:
|
|
421
|
-
store.cluster_centroids = {
|
|
422
|
-
int(k): v for k, v in data["cluster_centroids"].items()
|
|
423
|
-
}
|
|
424
|
-
|
|
425
|
-
# Initialize the embedding model
|
|
426
|
-
store._embedding_model = None # Will be lazy-loaded when needed
|
|
427
|
-
|
|
428
|
-
logger.info(
|
|
429
|
-
f"Successfully loaded memory store with "
|
|
430
|
-
f"{len(store.short_term)} short-term and "
|
|
431
|
-
f"{len(store.long_term)} long-term entries"
|
|
432
|
-
)
|
|
433
|
-
return store
|
|
434
|
-
|
|
435
|
-
except FileNotFoundError:
|
|
436
|
-
logger.warning(
|
|
437
|
-
f"Memory file {file_path} not found, creating new store"
|
|
438
|
-
)
|
|
439
|
-
return cls()
|
|
440
|
-
except json.JSONDecodeError as e:
|
|
441
|
-
logger.error(f"Invalid JSON in memory file: {e}")
|
|
442
|
-
raise
|
|
443
|
-
except Exception as e:
|
|
444
|
-
logger.error(f"Error loading memory store: {e}")
|
|
445
|
-
raise ValueError(f"Failed to load memory store: {e}")
|
|
446
|
-
|
|
447
|
-
@classmethod
|
|
448
|
-
def merge_stores(
|
|
449
|
-
cls, stores: list["FlockMemoryStore"]
|
|
450
|
-
) -> "FlockMemoryStore":
|
|
451
|
-
"""Merge multiple memory stores into a single store.
|
|
452
|
-
|
|
453
|
-
Args:
|
|
454
|
-
stores: List of FlockMemoryStore instances to merge
|
|
455
|
-
|
|
456
|
-
Returns:
|
|
457
|
-
FlockMemoryStore: A new memory store containing merged data
|
|
458
|
-
"""
|
|
459
|
-
merged = cls()
|
|
460
|
-
|
|
461
|
-
# Merge short-term and long-term memories
|
|
462
|
-
for store in stores:
|
|
463
|
-
merged.short_term.extend(store.short_term)
|
|
464
|
-
merged.long_term.extend(store.long_term)
|
|
465
|
-
|
|
466
|
-
# Merge concept graphs
|
|
467
|
-
merged_graph = nx.Graph()
|
|
468
|
-
for store in stores:
|
|
469
|
-
if store.concept_graph and store.concept_graph.graph:
|
|
470
|
-
merged_graph = nx.compose(
|
|
471
|
-
merged_graph, store.concept_graph.graph
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
merged.concept_graph = MemoryGraph(
|
|
475
|
-
graph_json=json.dumps(
|
|
476
|
-
nx.node_link_data(merged_graph, edges="links")
|
|
477
|
-
)
|
|
478
|
-
)
|
|
479
|
-
|
|
480
|
-
# Recompute clusters for the merged data
|
|
481
|
-
if merged.short_term:
|
|
482
|
-
merged._update_clusters()
|
|
483
|
-
|
|
484
|
-
return merged
|
|
485
|
-
|
|
486
|
-
def get_embedding_model(self) -> SentenceTransformer:
|
|
487
|
-
"""Initialize and return the SentenceTransformer model.
|
|
488
|
-
|
|
489
|
-
Uses "all-MiniLM-L6-v2" as the default model.
|
|
490
|
-
"""
|
|
491
|
-
if self._embedding_model is None:
|
|
492
|
-
try:
|
|
493
|
-
logger.debug(
|
|
494
|
-
"Loading SentenceTransformer model 'all-MiniLM-L6-v2'."
|
|
495
|
-
)
|
|
496
|
-
self._embedding_model = SentenceTransformer("all-MiniLM-L6-v2")
|
|
497
|
-
except Exception as e:
|
|
498
|
-
logger.error(f"Failed to load embedding model: {e}")
|
|
499
|
-
raise RuntimeError(f"Failed to load embedding model: {e}")
|
|
500
|
-
return self._embedding_model
|
|
501
|
-
|
|
502
|
-
def compute_embedding(self, text: str) -> np.ndarray:
|
|
503
|
-
"""Compute and return the embedding for the provided text as a NumPy array."""
|
|
504
|
-
logger.debug(
|
|
505
|
-
f"Computing embedding for text: {text[:100].replace('{', '{{').replace('}', '}}')}..."
|
|
506
|
-
) # Log first 30 chars for brevity.
|
|
507
|
-
model = self.get_embedding_model()
|
|
508
|
-
try:
|
|
509
|
-
embedding = model.encode(text, convert_to_numpy=True)
|
|
510
|
-
return embedding
|
|
511
|
-
except Exception as e:
|
|
512
|
-
logger.error(f"Error computing embedding: {e}")
|
|
513
|
-
raise RuntimeError(f"Error computing embedding: {e}")
|
|
514
|
-
|
|
515
|
-
def _calculate_similarity(
|
|
516
|
-
self, query_embedding: np.ndarray, entry_embedding: np.ndarray
|
|
517
|
-
) -> float:
|
|
518
|
-
"""Compute the cosine similarity between two embeddings.
|
|
519
|
-
|
|
520
|
-
Returns a float between 0 and 1.
|
|
521
|
-
"""
|
|
522
|
-
try:
|
|
523
|
-
norm_query = np.linalg.norm(query_embedding)
|
|
524
|
-
norm_entry = np.linalg.norm(entry_embedding)
|
|
525
|
-
if norm_query == 0 or norm_entry == 0:
|
|
526
|
-
return 0.0
|
|
527
|
-
similarity = float(
|
|
528
|
-
np.dot(query_embedding, entry_embedding)
|
|
529
|
-
/ (norm_query * norm_entry)
|
|
530
|
-
)
|
|
531
|
-
return similarity
|
|
532
|
-
except Exception as e:
|
|
533
|
-
logger.error(f"Error computing similarity: {e}")
|
|
534
|
-
raise RuntimeError(f"Error computing similarity: {e}")
|
|
535
|
-
|
|
536
|
-
def exact_match(self, inputs: dict[str, Any]) -> list[MemoryEntry]:
|
|
537
|
-
"""Perform an exact key-based lookup in short-term memory.
|
|
538
|
-
|
|
539
|
-
Returns entries where all provided key-value pairs exist in the entry's inputs.
|
|
540
|
-
"""
|
|
541
|
-
logger.debug(f"Performing exact match lookup with inputs: {inputs}")
|
|
542
|
-
matches = []
|
|
543
|
-
for entry in self.short_term:
|
|
544
|
-
if all(item in entry.inputs.items() for item in inputs.items()):
|
|
545
|
-
matches.append(entry)
|
|
546
|
-
logger.debug(f"Exact match found {len(matches)} entries.")
|
|
547
|
-
return matches
|
|
548
|
-
|
|
549
|
-
def combine_results(
|
|
550
|
-
self, inputs: dict[str, Any], weights: dict[str, float]
|
|
551
|
-
) -> dict[str, Any]:
|
|
552
|
-
"""Combine semantic and exact match results using the provided weights.
|
|
553
|
-
|
|
554
|
-
Args:
|
|
555
|
-
inputs: Input dictionary to search memory.
|
|
556
|
-
weights: Dictionary with keys "semantic" and "exact" for weighting.
|
|
557
|
-
|
|
558
|
-
Returns:
|
|
559
|
-
A dictionary with "combined_results" as a sorted list of memory entries.
|
|
560
|
-
"""
|
|
561
|
-
logger.debug(
|
|
562
|
-
f"Combining results for inputs: {inputs} with weights: {weights}"
|
|
563
|
-
)
|
|
564
|
-
query_text = " ".join(str(value) for value in inputs.values())
|
|
565
|
-
query_embedding = self.compute_embedding(query_text)
|
|
566
|
-
|
|
567
|
-
semantic_matches = self.retrieve(
|
|
568
|
-
query_embedding, set(inputs.values()), similarity_threshold=0.8
|
|
569
|
-
)
|
|
570
|
-
exact_matches = self.exact_match(inputs)
|
|
571
|
-
|
|
572
|
-
combined: dict[str, dict[str, Any]] = {}
|
|
573
|
-
for entry in semantic_matches:
|
|
574
|
-
if entry.embedding is None:
|
|
575
|
-
continue
|
|
576
|
-
semantic_score = self._calculate_similarity(
|
|
577
|
-
query_embedding, np.array(entry.embedding)
|
|
578
|
-
)
|
|
579
|
-
combined[entry.id] = {
|
|
580
|
-
"entry": entry,
|
|
581
|
-
"semantic_score": semantic_score * weights.get("semantic", 0.7),
|
|
582
|
-
"exact_score": 0.0,
|
|
583
|
-
}
|
|
584
|
-
for entry in exact_matches:
|
|
585
|
-
if entry.id in combined:
|
|
586
|
-
combined[entry.id]["exact_score"] = 1.0 * weights.get(
|
|
587
|
-
"exact", 0.3
|
|
588
|
-
)
|
|
589
|
-
else:
|
|
590
|
-
combined[entry.id] = {
|
|
591
|
-
"entry": entry,
|
|
592
|
-
"semantic_score": 0.0,
|
|
593
|
-
"exact_score": 1.0 * weights.get("exact", 0.3),
|
|
594
|
-
}
|
|
595
|
-
results: list[tuple[float, MemoryEntry]] = []
|
|
596
|
-
for data in combined.values():
|
|
597
|
-
total_score = data["semantic_score"] + data["exact_score"]
|
|
598
|
-
results.append((total_score, data["entry"]))
|
|
599
|
-
results.sort(key=lambda x: x[0], reverse=True)
|
|
600
|
-
logger.debug(f"Combined results count: {len(results)}")
|
|
601
|
-
return {"combined_results": [entry for score, entry in results]}
|
|
602
|
-
|
|
603
|
-
def add_entry(self, entry: MemoryEntry) -> None:
|
|
604
|
-
"""Add a new memory entry to short-term memory, update the concept graph and clusters.
|
|
605
|
-
|
|
606
|
-
and check for promotion to long-term memory.
|
|
607
|
-
"""
|
|
608
|
-
with tracer.start_as_current_span("memory.add_entry") as span:
|
|
609
|
-
logger.info(f"Adding memory entry with id: {entry.id}")
|
|
610
|
-
span.set_attribute("entry.id", entry.id)
|
|
611
|
-
self.short_term.append(entry)
|
|
612
|
-
self.concept_graph.add_concepts(entry.concepts)
|
|
613
|
-
self._update_clusters()
|
|
614
|
-
if entry.access_count > 10:
|
|
615
|
-
self._promote_to_long_term(entry)
|
|
616
|
-
|
|
617
|
-
def _promote_to_long_term(self, entry: MemoryEntry) -> None:
|
|
618
|
-
"""Promote an entry to long-term memory."""
|
|
619
|
-
logger.info(f"Promoting entry {entry.id} to long-term memory.")
|
|
620
|
-
if entry not in self.long_term:
|
|
621
|
-
self.long_term.append(entry)
|
|
622
|
-
|
|
623
|
-
def retrieve(
|
|
624
|
-
self,
|
|
625
|
-
query_embedding: np.ndarray,
|
|
626
|
-
query_concepts: set[str],
|
|
627
|
-
similarity_threshold: float = 0.8,
|
|
628
|
-
exclude_last_n: int = 0,
|
|
629
|
-
) -> list[MemoryEntry]:
|
|
630
|
-
"""Retrieve memory entries using semantic similarity and concept-based activation."""
|
|
631
|
-
with tracer.start_as_current_span("memory.retrieve") as span:
|
|
632
|
-
logger.debug("Retrieving memory entries...")
|
|
633
|
-
results = []
|
|
634
|
-
current_time = datetime.now()
|
|
635
|
-
decay_rate = 0.0001
|
|
636
|
-
norm_query = query_embedding / (
|
|
637
|
-
np.linalg.norm(query_embedding) + 1e-8
|
|
638
|
-
)
|
|
639
|
-
|
|
640
|
-
entries = (
|
|
641
|
-
self.short_term[:-exclude_last_n]
|
|
642
|
-
if exclude_last_n > 0
|
|
643
|
-
else self.short_term
|
|
644
|
-
)
|
|
645
|
-
|
|
646
|
-
for entry in entries:
|
|
647
|
-
if entry.embedding is None:
|
|
648
|
-
continue
|
|
649
|
-
|
|
650
|
-
# Calculate base similarity
|
|
651
|
-
entry_embedding = np.array(entry.embedding)
|
|
652
|
-
norm_entry = entry_embedding / (
|
|
653
|
-
np.linalg.norm(entry_embedding) + 1e-8
|
|
654
|
-
)
|
|
655
|
-
similarity = float(np.dot(norm_query, norm_entry))
|
|
656
|
-
|
|
657
|
-
# Calculate modifiers
|
|
658
|
-
time_diff = (current_time - entry.timestamp).total_seconds()
|
|
659
|
-
decay = np.exp(-decay_rate * time_diff)
|
|
660
|
-
# Add 1 to base score so new entries aren't zeroed out
|
|
661
|
-
reinforcement = 1.0 + np.log1p(entry.access_count)
|
|
662
|
-
|
|
663
|
-
# Calculate final score
|
|
664
|
-
final_score = (
|
|
665
|
-
similarity * decay * reinforcement * entry.decay_factor
|
|
666
|
-
)
|
|
667
|
-
|
|
668
|
-
span.add_event(
|
|
669
|
-
"memory score",
|
|
670
|
-
attributes={
|
|
671
|
-
"entry_id": entry.id,
|
|
672
|
-
"similarity": similarity,
|
|
673
|
-
"final_score": final_score,
|
|
674
|
-
},
|
|
675
|
-
)
|
|
676
|
-
|
|
677
|
-
# If base similarity passes threshold, include in results
|
|
678
|
-
if similarity >= similarity_threshold:
|
|
679
|
-
results.append((final_score, entry))
|
|
680
|
-
|
|
681
|
-
# Update access counts and decay for retrieved entries
|
|
682
|
-
for _, entry in results:
|
|
683
|
-
entry.access_count += 1
|
|
684
|
-
self._update_decay_factors(entry)
|
|
685
|
-
|
|
686
|
-
# Sort by final score
|
|
687
|
-
results.sort(key=lambda x: x[0], reverse=True)
|
|
688
|
-
logger.debug(f"Retrieved {len(results)} memory entries.")
|
|
689
|
-
return [entry for score, entry in results]
|
|
690
|
-
|
|
691
|
-
def _update_decay_factors(self, retrieved_entry: MemoryEntry) -> None:
|
|
692
|
-
"""Update decay factors: increase for the retrieved entry and decrease for others."""
|
|
693
|
-
logger.debug(f"Updating decay factor for entry {retrieved_entry.id}")
|
|
694
|
-
retrieved_entry.decay_factor *= 1.1
|
|
695
|
-
for entry in self.short_term:
|
|
696
|
-
if entry != retrieved_entry:
|
|
697
|
-
entry.decay_factor *= 0.9
|
|
698
|
-
|
|
699
|
-
def _update_clusters(self) -> None:
|
|
700
|
-
"""Update memory clusters using k-means clustering on entry embeddings."""
|
|
701
|
-
logger.debug("Updating memory clusters...")
|
|
702
|
-
if len(self.short_term) < 2:
|
|
703
|
-
logger.debug("Not enough entries for clustering.")
|
|
704
|
-
return
|
|
705
|
-
|
|
706
|
-
valid_entries = [
|
|
707
|
-
entry for entry in self.short_term if entry.embedding is not None
|
|
708
|
-
]
|
|
709
|
-
if not valid_entries:
|
|
710
|
-
logger.debug(
|
|
711
|
-
"No valid entries with embeddings found for clustering."
|
|
712
|
-
)
|
|
713
|
-
return
|
|
714
|
-
|
|
715
|
-
embeddings = [np.array(entry.embedding) for entry in valid_entries]
|
|
716
|
-
embeddings_matrix = np.vstack(embeddings)
|
|
717
|
-
|
|
718
|
-
from sklearn.cluster import KMeans
|
|
719
|
-
|
|
720
|
-
n_clusters = min(10, len(embeddings))
|
|
721
|
-
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
|
722
|
-
labels = kmeans.fit_predict(embeddings_matrix)
|
|
723
|
-
|
|
724
|
-
self.clusters.clear()
|
|
725
|
-
self.cluster_centroids.clear()
|
|
726
|
-
|
|
727
|
-
for i in range(n_clusters):
|
|
728
|
-
cluster_entries = [
|
|
729
|
-
entry
|
|
730
|
-
for entry, label in zip(valid_entries, labels)
|
|
731
|
-
if label == i
|
|
732
|
-
]
|
|
733
|
-
self.clusters[i] = cluster_entries
|
|
734
|
-
# Convert the centroid (np.ndarray) to a list of floats.
|
|
735
|
-
self.cluster_centroids[i] = kmeans.cluster_centers_[i].tolist()
|
|
736
|
-
logger.debug(f"Clustering complete with {n_clusters} clusters.")
|