flock-core 0.3.6__py3-none-any.whl → 0.3.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flock-core might be problematic. Click here for more details.
- flock/cli/assets/release_notes.md +37 -9
- flock/core/context/context_manager.py +3 -0
- flock/core/context/context_vars.py +1 -0
- flock/core/flock.py +2 -1
- flock/core/flock_agent.py +10 -23
- flock/core/flock_factory.py +9 -6
- flock/core/flock_module.py +3 -1
- flock/core/flock_router.py +70 -0
- flock/core/util/cli_helper.py +1 -3
- flock/evaluators/zep/zep_evaluator.py +2 -0
- flock/modules/performance/metrics_module.py +185 -189
- flock/routers/__init__.py +1 -0
- flock/routers/agent/__init__.py +1 -0
- flock/routers/agent/agent_router.py +234 -0
- flock/routers/agent/handoff_agent.py +58 -0
- flock/routers/default/__init__.py +1 -0
- flock/routers/default/default_router.py +76 -0
- flock/routers/llm/__init__.py +1 -0
- flock/routers/llm/llm_router.py +363 -0
- flock/workflow/activities.py +77 -34
- {flock_core-0.3.6.dist-info → flock_core-0.3.10.dist-info}/METADATA +6 -6
- {flock_core-0.3.6.dist-info → flock_core-0.3.10.dist-info}/RECORD +25 -16
- {flock_core-0.3.6.dist-info → flock_core-0.3.10.dist-info}/WHEEL +0 -0
- {flock_core-0.3.6.dist-info → flock_core-0.3.10.dist-info}/entry_points.txt +0 -0
- {flock_core-0.3.6.dist-info → flock_core-0.3.10.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,15 +1,15 @@
|
|
|
1
1
|
"""Performance and metrics tracking for Flock agents."""
|
|
2
2
|
|
|
3
|
-
import time
|
|
4
3
|
import json
|
|
5
4
|
import os
|
|
6
|
-
|
|
7
|
-
from typing import Any, Dict, List, Optional, Union, Literal
|
|
5
|
+
import time
|
|
8
6
|
from collections import defaultdict
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
from typing import Any, Literal
|
|
9
9
|
|
|
10
|
+
import numpy as np
|
|
10
11
|
import psutil
|
|
11
12
|
from pydantic import BaseModel, Field, validator
|
|
12
|
-
import numpy as np
|
|
13
13
|
|
|
14
14
|
from flock.core.flock_agent import FlockAgent
|
|
15
15
|
from flock.core.flock_module import FlockModule, FlockModuleConfig
|
|
@@ -17,146 +17,144 @@ from flock.core.flock_module import FlockModule, FlockModuleConfig
|
|
|
17
17
|
|
|
18
18
|
class MetricPoint(BaseModel):
|
|
19
19
|
"""Single metric measurement."""
|
|
20
|
+
|
|
20
21
|
timestamp: datetime
|
|
21
|
-
value:
|
|
22
|
-
tags:
|
|
22
|
+
value: int | float | str
|
|
23
|
+
tags: dict[str, str] = {}
|
|
24
|
+
|
|
23
25
|
|
|
24
26
|
class MetricsModuleConfig(FlockModuleConfig):
|
|
25
27
|
"""Configuration for performance metrics collection."""
|
|
26
|
-
|
|
28
|
+
|
|
27
29
|
# Collection settings
|
|
28
30
|
collect_timing: bool = Field(
|
|
29
|
-
default=True,
|
|
30
|
-
description="Collect timing metrics"
|
|
31
|
+
default=True, description="Collect timing metrics"
|
|
31
32
|
)
|
|
32
33
|
collect_memory: bool = Field(
|
|
33
|
-
default=True,
|
|
34
|
-
description="Collect memory usage"
|
|
34
|
+
default=True, description="Collect memory usage"
|
|
35
35
|
)
|
|
36
36
|
collect_token_usage: bool = Field(
|
|
37
|
-
default=True,
|
|
38
|
-
description="Collect token usage stats"
|
|
37
|
+
default=True, description="Collect token usage stats"
|
|
39
38
|
)
|
|
40
|
-
collect_cpu: bool = Field(
|
|
41
|
-
|
|
42
|
-
description="Collect CPU usage"
|
|
43
|
-
)
|
|
44
|
-
|
|
39
|
+
collect_cpu: bool = Field(default=True, description="Collect CPU usage")
|
|
40
|
+
|
|
45
41
|
# Storage settings
|
|
46
42
|
storage_type: Literal["json", "prometheus", "memory"] = Field(
|
|
47
|
-
default="json",
|
|
48
|
-
description="Where to store metrics"
|
|
43
|
+
default="json", description="Where to store metrics"
|
|
49
44
|
)
|
|
50
45
|
metrics_dir: str = Field(
|
|
51
|
-
default="metrics/",
|
|
52
|
-
description="Directory for metrics storage"
|
|
46
|
+
default="metrics/", description="Directory for metrics storage"
|
|
53
47
|
)
|
|
54
|
-
|
|
48
|
+
|
|
55
49
|
# Aggregation settings
|
|
56
50
|
aggregation_interval: str = Field(
|
|
57
|
-
default="1h",
|
|
58
|
-
description="Interval for metric aggregation"
|
|
59
|
-
)
|
|
60
|
-
retention_days: int = Field(
|
|
61
|
-
default=30,
|
|
62
|
-
description="Days to keep metrics"
|
|
51
|
+
default="1h", description="Interval for metric aggregation"
|
|
63
52
|
)
|
|
64
|
-
|
|
53
|
+
retention_days: int = Field(default=30, description="Days to keep metrics")
|
|
54
|
+
|
|
65
55
|
# Alerting settings
|
|
66
56
|
alert_on_high_latency: bool = Field(
|
|
67
|
-
default=True,
|
|
68
|
-
description="Alert on high latency"
|
|
57
|
+
default=True, description="Alert on high latency"
|
|
69
58
|
)
|
|
70
59
|
latency_threshold_ms: int = Field(
|
|
71
|
-
default=1000,
|
|
72
|
-
description="Threshold for latency alerts"
|
|
60
|
+
default=1000, description="Threshold for latency alerts"
|
|
73
61
|
)
|
|
74
|
-
|
|
62
|
+
|
|
75
63
|
@validator("aggregation_interval")
|
|
76
64
|
def validate_interval(cls, v):
|
|
77
65
|
"""Validate time interval format."""
|
|
78
|
-
if
|
|
66
|
+
if v[-1] not in ["s", "m", "h", "d"]:
|
|
79
67
|
raise ValueError("Interval must end with s, m, h, or d")
|
|
80
68
|
return v
|
|
81
69
|
|
|
70
|
+
|
|
82
71
|
class MetricsModule(FlockModule):
|
|
83
72
|
"""Module for collecting and analyzing agent performance metrics."""
|
|
84
|
-
|
|
73
|
+
|
|
85
74
|
name: str = "performance_metrics"
|
|
86
75
|
config: MetricsModuleConfig = Field(
|
|
87
76
|
default_factory=MetricsModuleConfig,
|
|
88
|
-
description="Performance metrics configuration"
|
|
77
|
+
description="Performance metrics configuration",
|
|
89
78
|
)
|
|
90
|
-
|
|
79
|
+
|
|
91
80
|
def __init__(self, name, config):
|
|
92
81
|
super().__init__(name=name, config=config)
|
|
93
82
|
self._metrics = defaultdict(list)
|
|
94
|
-
self._start_time:
|
|
95
|
-
self._start_memory:
|
|
96
|
-
|
|
83
|
+
self._start_time: float | None = None
|
|
84
|
+
self._start_memory: int | None = None
|
|
85
|
+
|
|
97
86
|
# Set up storage
|
|
98
87
|
if self.config.storage_type == "json":
|
|
99
88
|
os.makedirs(self.config.metrics_dir, exist_ok=True)
|
|
100
|
-
|
|
89
|
+
|
|
101
90
|
# Set up prometheus if needed
|
|
102
91
|
if self.config.storage_type == "prometheus":
|
|
103
92
|
try:
|
|
104
93
|
from prometheus_client import Counter, Gauge, Histogram
|
|
105
|
-
|
|
94
|
+
|
|
106
95
|
self._prom_latency = Histogram(
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
[
|
|
96
|
+
"flock_agent_latency_seconds",
|
|
97
|
+
"Time taken for agent evaluation",
|
|
98
|
+
["agent_name"],
|
|
110
99
|
)
|
|
111
100
|
self._prom_memory = Gauge(
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
[
|
|
101
|
+
"flock_agent_memory_bytes",
|
|
102
|
+
"Memory usage by agent",
|
|
103
|
+
["agent_name"],
|
|
115
104
|
)
|
|
116
105
|
self._prom_tokens = Counter(
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
[
|
|
106
|
+
"flock_agent_tokens_total",
|
|
107
|
+
"Token usage by agent",
|
|
108
|
+
["agent_name", "type"],
|
|
120
109
|
)
|
|
121
110
|
self._prom_errors = Counter(
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
[
|
|
111
|
+
"flock_agent_errors_total",
|
|
112
|
+
"Error count by agent",
|
|
113
|
+
["agent_name", "error_type"],
|
|
125
114
|
)
|
|
126
115
|
except ImportError:
|
|
127
116
|
self.config.storage_type = "json"
|
|
128
117
|
|
|
129
118
|
"""Fixes for metrics summary calculation."""
|
|
130
119
|
|
|
131
|
-
def _load_metrics_from_files(
|
|
120
|
+
def _load_metrics_from_files(
|
|
121
|
+
self, metric_name: str = None
|
|
122
|
+
) -> dict[str, list[MetricPoint]]:
|
|
132
123
|
"""Load metrics from JSON files."""
|
|
133
124
|
metrics = defaultdict(list)
|
|
134
|
-
|
|
125
|
+
|
|
135
126
|
try:
|
|
136
127
|
# Get all metric files
|
|
137
|
-
files = [
|
|
138
|
-
|
|
139
|
-
|
|
128
|
+
files = [
|
|
129
|
+
f
|
|
130
|
+
for f in os.listdir(self.config.metrics_dir)
|
|
131
|
+
if f.endswith(".json") and not f.startswith("summary_")
|
|
132
|
+
]
|
|
133
|
+
|
|
140
134
|
# Filter by metric name if specified
|
|
141
135
|
if metric_name:
|
|
142
136
|
files = [f for f in files if f.startswith(f"{metric_name}_")]
|
|
143
|
-
|
|
137
|
+
|
|
144
138
|
for filename in files:
|
|
145
139
|
filepath = os.path.join(self.config.metrics_dir, filename)
|
|
146
|
-
with open(filepath
|
|
140
|
+
with open(filepath) as f:
|
|
147
141
|
for line in f:
|
|
148
142
|
try:
|
|
149
143
|
data = json.loads(line)
|
|
150
144
|
point = MetricPoint(
|
|
151
|
-
timestamp=datetime.fromisoformat(
|
|
152
|
-
|
|
153
|
-
|
|
145
|
+
timestamp=datetime.fromisoformat(
|
|
146
|
+
data["timestamp"]
|
|
147
|
+
),
|
|
148
|
+
value=data["value"],
|
|
149
|
+
tags=data["tags"],
|
|
154
150
|
)
|
|
155
|
-
name = filename.split(
|
|
151
|
+
name = filename.split("_")[
|
|
152
|
+
0
|
|
153
|
+
] # Get metric name from filename
|
|
156
154
|
metrics[name].append(point)
|
|
157
155
|
except json.JSONDecodeError:
|
|
158
156
|
continue
|
|
159
|
-
|
|
157
|
+
|
|
160
158
|
return dict(metrics)
|
|
161
159
|
except Exception as e:
|
|
162
160
|
print(f"Error loading metrics from files: {e}")
|
|
@@ -164,10 +162,10 @@ class MetricsModule(FlockModule):
|
|
|
164
162
|
|
|
165
163
|
def get_metrics(
|
|
166
164
|
self,
|
|
167
|
-
metric_name:
|
|
168
|
-
start_time:
|
|
169
|
-
end_time:
|
|
170
|
-
) ->
|
|
165
|
+
metric_name: str | None = None,
|
|
166
|
+
start_time: datetime | None = None,
|
|
167
|
+
end_time: datetime | None = None,
|
|
168
|
+
) -> dict[str, list[MetricPoint]]:
|
|
171
169
|
"""Get recorded metrics with optional filtering."""
|
|
172
170
|
# Get metrics from appropriate source
|
|
173
171
|
if self.config.storage_type == "json":
|
|
@@ -176,97 +174,95 @@ class MetricsModule(FlockModule):
|
|
|
176
174
|
metrics = self._metrics
|
|
177
175
|
if metric_name:
|
|
178
176
|
metrics = {metric_name: metrics[metric_name]}
|
|
179
|
-
|
|
177
|
+
|
|
180
178
|
# Apply time filtering if needed
|
|
181
179
|
if start_time or end_time:
|
|
182
180
|
filtered_metrics = defaultdict(list)
|
|
183
181
|
for name, points in metrics.items():
|
|
184
182
|
filtered_points = [
|
|
185
|
-
p
|
|
186
|
-
|
|
187
|
-
(not
|
|
183
|
+
p
|
|
184
|
+
for p in points
|
|
185
|
+
if (not start_time or p.timestamp >= start_time)
|
|
186
|
+
and (not end_time or p.timestamp <= end_time)
|
|
188
187
|
]
|
|
189
188
|
filtered_metrics[name] = filtered_points
|
|
190
189
|
metrics = filtered_metrics
|
|
191
|
-
|
|
190
|
+
|
|
192
191
|
return dict(metrics)
|
|
193
192
|
|
|
194
193
|
def get_statistics(
|
|
195
|
-
self,
|
|
196
|
-
|
|
197
|
-
percentiles: List[float] = [50, 90, 95, 99]
|
|
198
|
-
) -> Dict[str, float]:
|
|
194
|
+
self, metric_name: str, percentiles: list[float] = [50, 90, 95, 99]
|
|
195
|
+
) -> dict[str, float]:
|
|
199
196
|
"""Calculate statistics for a metric."""
|
|
200
197
|
# Get all points for this metric
|
|
201
198
|
metrics = self.get_metrics(metric_name=metric_name)
|
|
202
199
|
points = metrics.get(metric_name, [])
|
|
203
|
-
|
|
200
|
+
|
|
204
201
|
if not points:
|
|
205
202
|
return {}
|
|
206
|
-
|
|
203
|
+
|
|
207
204
|
values = [p.value for p in points if isinstance(p.value, (int, float))]
|
|
208
205
|
if not values:
|
|
209
206
|
return {}
|
|
210
|
-
|
|
207
|
+
|
|
211
208
|
stats = {
|
|
212
209
|
"min": min(values),
|
|
213
210
|
"max": max(values),
|
|
214
|
-
"mean": float(
|
|
211
|
+
"mean": float(
|
|
212
|
+
np.mean(values)
|
|
213
|
+
), # Convert to float for JSON serialization
|
|
215
214
|
"std": float(np.std(values)),
|
|
216
215
|
"count": len(values),
|
|
217
|
-
"last_value": values[-1]
|
|
216
|
+
"last_value": values[-1],
|
|
218
217
|
}
|
|
219
|
-
|
|
218
|
+
|
|
220
219
|
for p in percentiles:
|
|
221
220
|
stats[f"p{p}"] = float(np.percentile(values, p))
|
|
222
|
-
|
|
221
|
+
|
|
223
222
|
return stats
|
|
224
223
|
|
|
225
|
-
async def terminate(
|
|
224
|
+
async def terminate(
|
|
225
|
+
self, agent: FlockAgent, inputs: dict[str, Any], result: dict[str, Any]
|
|
226
|
+
) -> None:
|
|
226
227
|
"""Clean up and final metric recording."""
|
|
227
228
|
if self.config.storage_type == "json":
|
|
228
229
|
# Save aggregated metrics
|
|
229
230
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
230
231
|
summary_file = os.path.join(
|
|
231
232
|
self.config.metrics_dir,
|
|
232
|
-
f"summary_{agent.name}_{timestamp}.json"
|
|
233
|
+
f"summary_{agent.name}_{timestamp}.json",
|
|
233
234
|
)
|
|
234
|
-
|
|
235
|
+
|
|
235
236
|
# Calculate summary for all metrics
|
|
236
237
|
summary = {
|
|
237
238
|
"agent": agent.name,
|
|
238
239
|
"timestamp": timestamp,
|
|
239
|
-
"metrics": {}
|
|
240
|
+
"metrics": {},
|
|
240
241
|
}
|
|
241
|
-
|
|
242
|
+
|
|
242
243
|
# Get all unique metric names from files
|
|
243
244
|
all_metrics = self._load_metrics_from_files()
|
|
244
|
-
|
|
245
|
+
|
|
245
246
|
for metric_name in all_metrics.keys():
|
|
246
247
|
stats = self.get_statistics(metric_name)
|
|
247
248
|
if stats: # Only include metrics that have data
|
|
248
249
|
summary["metrics"][metric_name] = stats
|
|
249
|
-
|
|
250
|
+
|
|
250
251
|
with open(summary_file, "w") as f:
|
|
251
252
|
json.dump(summary, f, indent=2)
|
|
252
|
-
|
|
253
|
+
|
|
253
254
|
def _record_metric(
|
|
254
|
-
self,
|
|
255
|
-
name: str,
|
|
256
|
-
value: Union[int, float, str],
|
|
257
|
-
tags: Dict[str, str] = None
|
|
255
|
+
self, name: str, value: int | float | str, tags: dict[str, str] = None
|
|
258
256
|
) -> None:
|
|
259
257
|
"""Record a single metric point."""
|
|
260
258
|
point = MetricPoint(
|
|
261
|
-
timestamp=datetime.now(),
|
|
262
|
-
value=value,
|
|
263
|
-
tags=tags or {}
|
|
259
|
+
timestamp=datetime.now(), value=value, tags=tags or {}
|
|
264
260
|
)
|
|
265
|
-
|
|
261
|
+
|
|
266
262
|
# Store metric
|
|
267
263
|
if self.config.storage_type == "memory":
|
|
268
264
|
self._metrics[name].append(point)
|
|
269
|
-
|
|
265
|
+
|
|
270
266
|
elif self.config.storage_type == "prometheus":
|
|
271
267
|
if name == "latency":
|
|
272
268
|
self._prom_latency.labels(**tags).observe(value)
|
|
@@ -274,47 +270,47 @@ class MetricsModule(FlockModule):
|
|
|
274
270
|
self._prom_memory.labels(**tags).set(value)
|
|
275
271
|
elif name == "tokens":
|
|
276
272
|
self._prom_tokens.labels(**tags).inc(value)
|
|
277
|
-
|
|
273
|
+
|
|
278
274
|
elif self.config.storage_type == "json":
|
|
279
275
|
self._save_metric_to_file(name, point)
|
|
280
|
-
|
|
276
|
+
|
|
281
277
|
def _save_metric_to_file(self, name: str, point: MetricPoint) -> None:
|
|
282
278
|
"""Save metric to JSON file."""
|
|
283
279
|
filename = f"{name}_{point.timestamp.strftime('%Y%m')}.json"
|
|
284
280
|
filepath = os.path.join(self.config.metrics_dir, filename)
|
|
285
|
-
|
|
281
|
+
|
|
286
282
|
data = {
|
|
287
283
|
"timestamp": point.timestamp.isoformat(),
|
|
288
284
|
"value": point.value,
|
|
289
|
-
"tags": point.tags
|
|
285
|
+
"tags": point.tags,
|
|
290
286
|
}
|
|
291
|
-
|
|
287
|
+
|
|
292
288
|
# Append to file
|
|
293
289
|
with open(filepath, "a") as f:
|
|
294
290
|
f.write(json.dumps(data) + "\n")
|
|
295
|
-
|
|
291
|
+
|
|
296
292
|
def _get_tokenizer(self, model: str):
|
|
297
293
|
"""Get the appropriate tokenizer for the model."""
|
|
298
294
|
try:
|
|
299
295
|
import tiktoken
|
|
300
|
-
|
|
296
|
+
|
|
301
297
|
# Handle different model naming conventions
|
|
302
|
-
if model.startswith(
|
|
298
|
+
if model.startswith("openai/"):
|
|
303
299
|
model = model[7:] # Strip 'openai/' prefix
|
|
304
|
-
|
|
300
|
+
|
|
305
301
|
try:
|
|
306
302
|
return tiktoken.encoding_for_model(model)
|
|
307
303
|
except KeyError:
|
|
308
304
|
# Fallback to cl100k_base for unknown models
|
|
309
305
|
return tiktoken.get_encoding("cl100k_base")
|
|
310
|
-
|
|
306
|
+
|
|
311
307
|
except ImportError:
|
|
312
308
|
return None
|
|
313
|
-
|
|
309
|
+
|
|
314
310
|
def _calculate_token_usage(self, text: str, model: str = "gpt-4") -> int:
|
|
315
311
|
"""Calculate token count using tiktoken when available."""
|
|
316
312
|
tokenizer = self._get_tokenizer(model)
|
|
317
|
-
|
|
313
|
+
|
|
318
314
|
if tokenizer:
|
|
319
315
|
# Use tiktoken for accurate count
|
|
320
316
|
return len(tokenizer.encode(text))
|
|
@@ -322,156 +318,156 @@ class MetricsModule(FlockModule):
|
|
|
322
318
|
# Fallback to estimation if tiktoken not available
|
|
323
319
|
# Simple estimation - words / 0.75 for average tokens per word
|
|
324
320
|
token_estimate = int(len(text.split()) / 0.75)
|
|
325
|
-
|
|
321
|
+
|
|
326
322
|
# Log warning about estimation
|
|
327
|
-
print(
|
|
328
|
-
|
|
323
|
+
print(
|
|
324
|
+
f"Warning: Using estimated token count. Install tiktoken for accurate counting."
|
|
325
|
+
)
|
|
326
|
+
|
|
329
327
|
def _should_alert(self, metric: str, value: float) -> bool:
|
|
330
328
|
"""Check if metric should trigger alert."""
|
|
331
329
|
if metric == "latency" and self.config.alert_on_high_latency:
|
|
332
330
|
return value * 1000 > self.config.latency_threshold_ms
|
|
333
331
|
return False
|
|
334
|
-
|
|
335
|
-
|
|
336
332
|
|
|
337
|
-
|
|
338
|
-
|
|
333
|
+
async def initialize(
|
|
334
|
+
self, agent: FlockAgent, inputs: dict[str, Any]
|
|
335
|
+
) -> None:
|
|
339
336
|
"""Initialize metrics collection."""
|
|
340
337
|
self._start_time = time.time()
|
|
341
|
-
|
|
338
|
+
|
|
342
339
|
if self.config.collect_memory:
|
|
343
340
|
self._start_memory = psutil.Process().memory_info().rss
|
|
344
341
|
self._record_metric(
|
|
345
342
|
"memory",
|
|
346
343
|
self._start_memory,
|
|
347
|
-
{"agent": agent.name, "phase": "start"}
|
|
344
|
+
{"agent": agent.name, "phase": "start"},
|
|
348
345
|
)
|
|
349
346
|
|
|
350
|
-
|
|
351
347
|
def _calculate_cost(
|
|
352
|
-
self,
|
|
353
|
-
text: str,
|
|
354
|
-
model: str,
|
|
355
|
-
is_completion: bool = False
|
|
348
|
+
self, text: str, model: str, is_completion: bool = False
|
|
356
349
|
) -> tuple[int, float]:
|
|
357
350
|
"""Calculate both token count and cost."""
|
|
358
351
|
# Get token count
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
352
|
+
try:
|
|
353
|
+
from litellm import cost_per_token
|
|
362
354
|
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
355
|
+
token_count = self._calculate_token_usage(text, model)
|
|
356
|
+
# Calculate total cost
|
|
357
|
+
if is_completion:
|
|
358
|
+
total_cost = token_count * cost_per_token(
|
|
359
|
+
model, completion_tokens=token_count
|
|
360
|
+
)
|
|
361
|
+
else:
|
|
362
|
+
total_cost = token_count * cost_per_token(
|
|
363
|
+
model, prompt_tokens=token_count
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
return token_count, total_cost
|
|
367
|
+
except Exception:
|
|
368
|
+
token_count = 0
|
|
369
|
+
total_cost = 0.0
|
|
370
|
+
return token_count, total_cost
|
|
371
|
+
|
|
372
|
+
async def pre_evaluate(
|
|
373
|
+
self, agent: FlockAgent, inputs: dict[str, Any]
|
|
374
|
+
) -> dict[str, Any]:
|
|
372
375
|
"""Record pre-evaluation metrics."""
|
|
373
376
|
if self.config.collect_token_usage:
|
|
374
377
|
# Calculate input tokens and cost
|
|
375
378
|
total_input_tokens = 0
|
|
376
379
|
total_input_cost = 0.0
|
|
377
|
-
|
|
380
|
+
|
|
378
381
|
for v in inputs.values():
|
|
379
382
|
tokens, cost = self._calculate_cost(
|
|
380
|
-
str(v),
|
|
381
|
-
agent.model,
|
|
382
|
-
is_completion=False
|
|
383
|
+
str(v), agent.model, is_completion=False
|
|
383
384
|
)
|
|
384
385
|
total_input_tokens += tokens
|
|
385
|
-
|
|
386
|
-
|
|
386
|
+
if isinstance(cost, float):
|
|
387
|
+
total_input_cost += cost
|
|
388
|
+
else:
|
|
389
|
+
total_input_cost += cost[1]
|
|
390
|
+
|
|
387
391
|
self._record_metric(
|
|
388
392
|
"tokens",
|
|
389
393
|
total_input_tokens,
|
|
390
|
-
{"agent": agent.name, "type": "input"}
|
|
394
|
+
{"agent": agent.name, "type": "input"},
|
|
391
395
|
)
|
|
392
396
|
self._record_metric(
|
|
393
|
-
"cost",
|
|
394
|
-
total_input_cost,
|
|
395
|
-
{"agent": agent.name, "type": "input"}
|
|
397
|
+
"cost", total_input_cost, {"agent": agent.name, "type": "input"}
|
|
396
398
|
)
|
|
397
|
-
|
|
399
|
+
|
|
398
400
|
if self.config.collect_cpu:
|
|
399
401
|
cpu_percent = psutil.Process().cpu_percent()
|
|
400
402
|
self._record_metric(
|
|
401
403
|
"cpu",
|
|
402
404
|
cpu_percent,
|
|
403
|
-
{"agent": agent.name, "phase": "pre_evaluate"}
|
|
405
|
+
{"agent": agent.name, "phase": "pre_evaluate"},
|
|
404
406
|
)
|
|
405
|
-
|
|
407
|
+
|
|
406
408
|
return inputs
|
|
407
|
-
|
|
408
|
-
async def post_evaluate(
|
|
409
|
+
|
|
410
|
+
async def post_evaluate(
|
|
411
|
+
self, agent: FlockAgent, inputs: dict[str, Any], result: dict[str, Any]
|
|
412
|
+
) -> dict[str, Any]:
|
|
409
413
|
"""Record post-evaluation metrics."""
|
|
410
414
|
if self.config.collect_timing and self._start_time:
|
|
411
415
|
latency = time.time() - self._start_time
|
|
412
|
-
self._record_metric(
|
|
413
|
-
|
|
414
|
-
latency,
|
|
415
|
-
{"agent": agent.name}
|
|
416
|
-
)
|
|
417
|
-
|
|
416
|
+
self._record_metric("latency", latency, {"agent": agent.name})
|
|
417
|
+
|
|
418
418
|
# Check for alerts
|
|
419
419
|
if self._should_alert("latency", latency):
|
|
420
420
|
# In practice, you'd want to integrate with a proper alerting system
|
|
421
|
-
print(f"ALERT: High latency detected: {latency*1000:.2f}ms")
|
|
422
|
-
|
|
421
|
+
print(f"ALERT: High latency detected: {latency * 1000:.2f}ms")
|
|
422
|
+
|
|
423
423
|
if self.config.collect_token_usage:
|
|
424
424
|
# Calculate output tokens and cost
|
|
425
425
|
total_output_tokens = 0
|
|
426
426
|
total_output_cost = 0.0
|
|
427
|
-
|
|
427
|
+
|
|
428
428
|
for v in result.values():
|
|
429
429
|
tokens, cost = self._calculate_cost(
|
|
430
|
-
str(v),
|
|
431
|
-
agent.model,
|
|
432
|
-
is_completion=True
|
|
430
|
+
str(v), agent.model, is_completion=True
|
|
433
431
|
)
|
|
434
432
|
total_output_tokens += tokens
|
|
435
|
-
|
|
436
|
-
|
|
433
|
+
if isinstance(cost, float):
|
|
434
|
+
total_output_cost += cost
|
|
435
|
+
else:
|
|
436
|
+
total_output_cost += cost[1]
|
|
437
|
+
|
|
437
438
|
self._record_metric(
|
|
438
439
|
"tokens",
|
|
439
440
|
total_output_tokens,
|
|
440
|
-
{"agent": agent.name, "type": "output"}
|
|
441
|
+
{"agent": agent.name, "type": "output"},
|
|
441
442
|
)
|
|
442
443
|
self._record_metric(
|
|
443
444
|
"cost",
|
|
444
445
|
total_output_cost,
|
|
445
|
-
{"agent": agent.name, "type": "output"}
|
|
446
|
+
{"agent": agent.name, "type": "output"},
|
|
446
447
|
)
|
|
447
|
-
|
|
448
|
+
|
|
448
449
|
# Record total cost for this operation
|
|
449
450
|
self._record_metric(
|
|
450
451
|
"total_cost",
|
|
451
452
|
total_output_cost + total_output_cost,
|
|
452
|
-
{"agent": agent.name}
|
|
453
|
+
{"agent": agent.name},
|
|
453
454
|
)
|
|
454
|
-
|
|
455
|
+
|
|
455
456
|
if self.config.collect_memory and self._start_memory:
|
|
456
457
|
current_memory = psutil.Process().memory_info().rss
|
|
457
458
|
memory_diff = current_memory - self._start_memory
|
|
458
459
|
self._record_metric(
|
|
459
|
-
"memory",
|
|
460
|
-
memory_diff,
|
|
461
|
-
{"agent": agent.name, "phase": "end"}
|
|
460
|
+
"memory", memory_diff, {"agent": agent.name, "phase": "end"}
|
|
462
461
|
)
|
|
463
|
-
|
|
462
|
+
|
|
464
463
|
return result
|
|
465
|
-
|
|
466
464
|
|
|
467
|
-
|
|
468
|
-
|
|
465
|
+
async def on_error(
|
|
466
|
+
self, agent: FlockAgent, error: Exception, inputs: dict[str, Any]
|
|
467
|
+
) -> None:
|
|
469
468
|
"""Record error metrics."""
|
|
470
469
|
self._record_metric(
|
|
471
470
|
"errors",
|
|
472
471
|
1,
|
|
473
|
-
{
|
|
474
|
-
|
|
475
|
-
"error_type": type(error).__name__
|
|
476
|
-
}
|
|
477
|
-
)
|
|
472
|
+
{"agent": agent.name, "error_type": type(error).__name__},
|
|
473
|
+
)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Routers for the Flock framework."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Agent-based router implementation for the Flock framework."""
|