flock-core 0.2.18__py3-none-any.whl → 0.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of flock-core might be problematic. Click here for more details.
- flock/__init__.py +39 -29
- flock/cli/assets/release_notes.md +111 -0
- flock/cli/constants.py +1 -0
- flock/cli/load_release_notes.py +23 -0
- flock/core/__init__.py +12 -1
- flock/core/context/context.py +10 -5
- flock/core/flock.py +61 -21
- flock/core/flock_agent.py +112 -442
- flock/core/flock_evaluator.py +49 -0
- flock/core/flock_factory.py +73 -0
- flock/core/flock_module.py +77 -0
- flock/{interpreter → core/interpreter}/python_interpreter.py +9 -1
- flock/core/logging/formatters/themes.py +1 -1
- flock/core/logging/logging.py +119 -15
- flock/core/mixin/dspy_integration.py +11 -8
- flock/core/registry/agent_registry.py +4 -2
- flock/core/tools/basic_tools.py +1 -1
- flock/core/util/cli_helper.py +41 -3
- flock/evaluators/declarative/declarative_evaluator.py +52 -0
- flock/evaluators/natural_language/natural_language_evaluator.py +66 -0
- flock/modules/callback/callback_module.py +86 -0
- flock/modules/memory/memory_module.py +235 -0
- flock/modules/memory/memory_parser.py +125 -0
- flock/modules/memory/memory_storage.py +736 -0
- flock/modules/output/output_module.py +194 -0
- flock/modules/performance/metrics_module.py +477 -0
- flock/themes/aardvark-blue.toml +1 -1
- {flock_core-0.2.18.dist-info → flock_core-0.3.1.dist-info}/METADATA +48 -2
- {flock_core-0.2.18.dist-info → flock_core-0.3.1.dist-info}/RECORD +32 -19
- {flock_core-0.2.18.dist-info → flock_core-0.3.1.dist-info}/WHEEL +0 -0
- {flock_core-0.2.18.dist-info → flock_core-0.3.1.dist-info}/entry_points.txt +0 -0
- {flock_core-0.2.18.dist-info → flock_core-0.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,194 @@
|
|
|
1
|
+
"""Output formatting and display functionality for agents."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from pydantic import Field
|
|
9
|
+
|
|
10
|
+
from flock.core import FlockAgent
|
|
11
|
+
from flock.core.flock_module import FlockModule, FlockModuleConfig
|
|
12
|
+
from flock.core.logging.formatters.themed_formatter import (
|
|
13
|
+
ThemedAgentResultFormatter,
|
|
14
|
+
)
|
|
15
|
+
from flock.core.logging.formatters.themes import OutputTheme
|
|
16
|
+
from flock.core.logging.logging import get_logger
|
|
17
|
+
|
|
18
|
+
logger = get_logger("module.output")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class OutputModuleConfig(FlockModuleConfig):
|
|
22
|
+
"""Configuration for output formatting and display."""
|
|
23
|
+
|
|
24
|
+
theme: OutputTheme = Field(
|
|
25
|
+
default=OutputTheme.afterglow, description="Theme for output formatting"
|
|
26
|
+
)
|
|
27
|
+
render_table: bool = Field(
|
|
28
|
+
default=False, description="Whether to render output as a table"
|
|
29
|
+
)
|
|
30
|
+
max_length: int = Field(
|
|
31
|
+
default=1000, description="Maximum length for displayed output"
|
|
32
|
+
)
|
|
33
|
+
wait_for_input: bool = Field(
|
|
34
|
+
default=False,
|
|
35
|
+
description="Whether to wait for user input after display",
|
|
36
|
+
)
|
|
37
|
+
write_to_file: bool = Field(
|
|
38
|
+
default=False, description="Whether to save output to file"
|
|
39
|
+
)
|
|
40
|
+
output_dir: str = Field(
|
|
41
|
+
default="output/", description="Directory for saving output files"
|
|
42
|
+
)
|
|
43
|
+
truncate_long_values: bool = Field(
|
|
44
|
+
default=True, description="Whether to truncate long values in display"
|
|
45
|
+
)
|
|
46
|
+
show_metadata: bool = Field(
|
|
47
|
+
default=True, description="Whether to show metadata like timestamps"
|
|
48
|
+
)
|
|
49
|
+
format_code_blocks: bool = Field(
|
|
50
|
+
default=True,
|
|
51
|
+
description="Whether to apply syntax highlighting to code blocks",
|
|
52
|
+
)
|
|
53
|
+
custom_formatters: dict[str, str] = Field(
|
|
54
|
+
default_factory=dict,
|
|
55
|
+
description="Custom formatters for specific output types",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class OutputModule(FlockModule):
|
|
60
|
+
"""Module that handles output formatting and display."""
|
|
61
|
+
|
|
62
|
+
name: str = "output"
|
|
63
|
+
config: OutputModuleConfig = Field(
|
|
64
|
+
default_factory=OutputModuleConfig, description="Output configuration"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
def __init__(self, name: str, config: OutputModuleConfig):
|
|
68
|
+
super().__init__(name=name, config=config)
|
|
69
|
+
if self.config.write_to_file:
|
|
70
|
+
os.makedirs(self.config.output_dir, exist_ok=True)
|
|
71
|
+
self._formatter = ThemedAgentResultFormatter(
|
|
72
|
+
theme=self.config.theme,
|
|
73
|
+
max_length=self.config.max_length,
|
|
74
|
+
render_table=self.config.render_table,
|
|
75
|
+
wait_for_input=self.config.wait_for_input,
|
|
76
|
+
write_to_file=self.config.write_to_file,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
def _format_value(self, value: Any, key: str) -> str:
|
|
80
|
+
"""Format a single value based on its type and configuration."""
|
|
81
|
+
# Check for custom formatter
|
|
82
|
+
if key in self.config.custom_formatters:
|
|
83
|
+
formatter_name = self.config.custom_formatters[key]
|
|
84
|
+
if hasattr(self, f"_format_{formatter_name}"):
|
|
85
|
+
return getattr(self, f"_format_{formatter_name}")(value)
|
|
86
|
+
|
|
87
|
+
# Default formatting based on type
|
|
88
|
+
if isinstance(value, dict):
|
|
89
|
+
return self._format_dict(value)
|
|
90
|
+
elif isinstance(value, list):
|
|
91
|
+
return self._format_list(value)
|
|
92
|
+
elif isinstance(value, str) and self.config.format_code_blocks:
|
|
93
|
+
return self._format_potential_code(value)
|
|
94
|
+
else:
|
|
95
|
+
return str(value)
|
|
96
|
+
|
|
97
|
+
def _format_dict(self, d: dict[str, Any], indent: int = 0) -> str:
|
|
98
|
+
"""Format a dictionary with proper indentation."""
|
|
99
|
+
lines = []
|
|
100
|
+
for k, v in d.items():
|
|
101
|
+
formatted_value = self._format_value(v, k)
|
|
102
|
+
if (
|
|
103
|
+
self.config.truncate_long_values
|
|
104
|
+
and len(formatted_value) > self.config.max_length
|
|
105
|
+
):
|
|
106
|
+
formatted_value = (
|
|
107
|
+
formatted_value[: self.config.max_length] + "..."
|
|
108
|
+
)
|
|
109
|
+
lines.append(f"{' ' * indent}{k}: {formatted_value}")
|
|
110
|
+
return "\n".join(lines)
|
|
111
|
+
|
|
112
|
+
def _format_list(self, lst: list[Any]) -> str:
|
|
113
|
+
"""Format a list with proper indentation."""
|
|
114
|
+
return "\n".join(f"- {self._format_value(item, '')}" for item in lst)
|
|
115
|
+
|
|
116
|
+
def _format_potential_code(self, text: str) -> str:
|
|
117
|
+
"""Format text that might contain code blocks."""
|
|
118
|
+
import re
|
|
119
|
+
|
|
120
|
+
def replace_code_block(match):
|
|
121
|
+
code = match.group(2)
|
|
122
|
+
lang = match.group(1) if match.group(1) else ""
|
|
123
|
+
# Here you could add syntax highlighting
|
|
124
|
+
return f"```{lang}\n{code}\n```"
|
|
125
|
+
|
|
126
|
+
# Replace code blocks with formatted versions
|
|
127
|
+
text = re.sub(
|
|
128
|
+
r"```(\w+)?\n(.*?)\n```", replace_code_block, text, flags=re.DOTALL
|
|
129
|
+
)
|
|
130
|
+
return text
|
|
131
|
+
|
|
132
|
+
def _save_output(self, agent_name: str, result: dict[str, Any]) -> None:
|
|
133
|
+
"""Save output to file if configured."""
|
|
134
|
+
if not self.config.write_to_file:
|
|
135
|
+
return
|
|
136
|
+
|
|
137
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
138
|
+
filename = f"{agent_name}_output_{timestamp}.json"
|
|
139
|
+
filepath = os.path.join(self.config.output_dir, filename)
|
|
140
|
+
|
|
141
|
+
output_data = {
|
|
142
|
+
"agent": agent_name,
|
|
143
|
+
"timestamp": timestamp,
|
|
144
|
+
"result": result,
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
if self.config.show_metadata:
|
|
148
|
+
output_data["metadata"] = {
|
|
149
|
+
"formatted_at": datetime.now().isoformat(),
|
|
150
|
+
"theme": self.config.theme.value,
|
|
151
|
+
"max_length": self.config.max_length,
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
with open(filepath, "w") as f:
|
|
155
|
+
json.dump(output_data, f, indent=2)
|
|
156
|
+
|
|
157
|
+
async def post_evaluate(
|
|
158
|
+
self, agent: FlockAgent, inputs: dict[str, Any], result: dict[str, Any]
|
|
159
|
+
) -> dict[str, Any]:
|
|
160
|
+
"""Format and display the output."""
|
|
161
|
+
logger.debug("Formatting and displaying output")
|
|
162
|
+
# Display the result using the formatter
|
|
163
|
+
self._formatter.display_result(result, agent.name)
|
|
164
|
+
|
|
165
|
+
# Save to file if configured
|
|
166
|
+
self._save_output(agent.name, result)
|
|
167
|
+
|
|
168
|
+
return result
|
|
169
|
+
|
|
170
|
+
def update_theme(self, new_theme: OutputTheme) -> None:
|
|
171
|
+
"""Update the output theme."""
|
|
172
|
+
self.config.theme = new_theme
|
|
173
|
+
self._formatter = ThemedAgentResultFormatter(
|
|
174
|
+
theme=self.config.theme,
|
|
175
|
+
max_length=self.config.max_length,
|
|
176
|
+
render_table=self.config.render_table,
|
|
177
|
+
wait_for_input=self.config.wait_for_input,
|
|
178
|
+
write_to_file=self.config.write_to_file,
|
|
179
|
+
)
|
|
180
|
+
|
|
181
|
+
def add_custom_formatter(self, key: str, formatter_name: str) -> None:
|
|
182
|
+
"""Add a custom formatter for a specific output key."""
|
|
183
|
+
self.config.custom_formatters[key] = formatter_name
|
|
184
|
+
|
|
185
|
+
def get_output_files(self) -> list[str]:
|
|
186
|
+
"""Get list of saved output files."""
|
|
187
|
+
if not self.config.write_to_file:
|
|
188
|
+
return []
|
|
189
|
+
|
|
190
|
+
return [
|
|
191
|
+
f
|
|
192
|
+
for f in os.listdir(self.config.output_dir)
|
|
193
|
+
if f.endswith("_output.json")
|
|
194
|
+
]
|
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
"""Performance and metrics tracking for Flock agents."""
|
|
2
|
+
|
|
3
|
+
import time
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from typing import Any, Dict, List, Optional, Union, Literal
|
|
8
|
+
from collections import defaultdict
|
|
9
|
+
|
|
10
|
+
import psutil
|
|
11
|
+
from pydantic import BaseModel, Field, validator
|
|
12
|
+
import numpy as np
|
|
13
|
+
|
|
14
|
+
from flock.core.flock_agent import FlockAgent
|
|
15
|
+
from flock.core.flock_module import FlockModule, FlockModuleConfig
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class MetricPoint(BaseModel):
|
|
19
|
+
"""Single metric measurement."""
|
|
20
|
+
timestamp: datetime
|
|
21
|
+
value: Union[int, float, str]
|
|
22
|
+
tags: Dict[str, str] = {}
|
|
23
|
+
|
|
24
|
+
class MetricsModuleConfig(FlockModuleConfig):
|
|
25
|
+
"""Configuration for performance metrics collection."""
|
|
26
|
+
|
|
27
|
+
# Collection settings
|
|
28
|
+
collect_timing: bool = Field(
|
|
29
|
+
default=True,
|
|
30
|
+
description="Collect timing metrics"
|
|
31
|
+
)
|
|
32
|
+
collect_memory: bool = Field(
|
|
33
|
+
default=True,
|
|
34
|
+
description="Collect memory usage"
|
|
35
|
+
)
|
|
36
|
+
collect_token_usage: bool = Field(
|
|
37
|
+
default=True,
|
|
38
|
+
description="Collect token usage stats"
|
|
39
|
+
)
|
|
40
|
+
collect_cpu: bool = Field(
|
|
41
|
+
default=True,
|
|
42
|
+
description="Collect CPU usage"
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Storage settings
|
|
46
|
+
storage_type: Literal["json", "prometheus", "memory"] = Field(
|
|
47
|
+
default="json",
|
|
48
|
+
description="Where to store metrics"
|
|
49
|
+
)
|
|
50
|
+
metrics_dir: str = Field(
|
|
51
|
+
default="metrics/",
|
|
52
|
+
description="Directory for metrics storage"
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Aggregation settings
|
|
56
|
+
aggregation_interval: str = Field(
|
|
57
|
+
default="1h",
|
|
58
|
+
description="Interval for metric aggregation"
|
|
59
|
+
)
|
|
60
|
+
retention_days: int = Field(
|
|
61
|
+
default=30,
|
|
62
|
+
description="Days to keep metrics"
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Alerting settings
|
|
66
|
+
alert_on_high_latency: bool = Field(
|
|
67
|
+
default=True,
|
|
68
|
+
description="Alert on high latency"
|
|
69
|
+
)
|
|
70
|
+
latency_threshold_ms: int = Field(
|
|
71
|
+
default=1000,
|
|
72
|
+
description="Threshold for latency alerts"
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
@validator("aggregation_interval")
|
|
76
|
+
def validate_interval(cls, v):
|
|
77
|
+
"""Validate time interval format."""
|
|
78
|
+
if not v[-1] in ["s", "m", "h", "d"]:
|
|
79
|
+
raise ValueError("Interval must end with s, m, h, or d")
|
|
80
|
+
return v
|
|
81
|
+
|
|
82
|
+
class MetricsModule(FlockModule):
|
|
83
|
+
"""Module for collecting and analyzing agent performance metrics."""
|
|
84
|
+
|
|
85
|
+
name: str = "performance_metrics"
|
|
86
|
+
config: MetricsModuleConfig = Field(
|
|
87
|
+
default_factory=MetricsModuleConfig,
|
|
88
|
+
description="Performance metrics configuration"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
def __init__(self, name, config):
|
|
92
|
+
super().__init__(name=name, config=config)
|
|
93
|
+
self._metrics = defaultdict(list)
|
|
94
|
+
self._start_time: Optional[float] = None
|
|
95
|
+
self._start_memory: Optional[int] = None
|
|
96
|
+
|
|
97
|
+
# Set up storage
|
|
98
|
+
if self.config.storage_type == "json":
|
|
99
|
+
os.makedirs(self.config.metrics_dir, exist_ok=True)
|
|
100
|
+
|
|
101
|
+
# Set up prometheus if needed
|
|
102
|
+
if self.config.storage_type == "prometheus":
|
|
103
|
+
try:
|
|
104
|
+
from prometheus_client import Counter, Gauge, Histogram
|
|
105
|
+
|
|
106
|
+
self._prom_latency = Histogram(
|
|
107
|
+
'flock_agent_latency_seconds',
|
|
108
|
+
'Time taken for agent evaluation',
|
|
109
|
+
['agent_name']
|
|
110
|
+
)
|
|
111
|
+
self._prom_memory = Gauge(
|
|
112
|
+
'flock_agent_memory_bytes',
|
|
113
|
+
'Memory usage by agent',
|
|
114
|
+
['agent_name']
|
|
115
|
+
)
|
|
116
|
+
self._prom_tokens = Counter(
|
|
117
|
+
'flock_agent_tokens_total',
|
|
118
|
+
'Token usage by agent',
|
|
119
|
+
['agent_name', 'type']
|
|
120
|
+
)
|
|
121
|
+
self._prom_errors = Counter(
|
|
122
|
+
'flock_agent_errors_total',
|
|
123
|
+
'Error count by agent',
|
|
124
|
+
['agent_name', 'error_type']
|
|
125
|
+
)
|
|
126
|
+
except ImportError:
|
|
127
|
+
self.config.storage_type = "json"
|
|
128
|
+
|
|
129
|
+
"""Fixes for metrics summary calculation."""
|
|
130
|
+
|
|
131
|
+
def _load_metrics_from_files(self, metric_name: str = None) -> Dict[str, List[MetricPoint]]:
|
|
132
|
+
"""Load metrics from JSON files."""
|
|
133
|
+
metrics = defaultdict(list)
|
|
134
|
+
|
|
135
|
+
try:
|
|
136
|
+
# Get all metric files
|
|
137
|
+
files = [f for f in os.listdir(self.config.metrics_dir)
|
|
138
|
+
if f.endswith('.json') and not f.startswith('summary_')]
|
|
139
|
+
|
|
140
|
+
# Filter by metric name if specified
|
|
141
|
+
if metric_name:
|
|
142
|
+
files = [f for f in files if f.startswith(f"{metric_name}_")]
|
|
143
|
+
|
|
144
|
+
for filename in files:
|
|
145
|
+
filepath = os.path.join(self.config.metrics_dir, filename)
|
|
146
|
+
with open(filepath, 'r') as f:
|
|
147
|
+
for line in f:
|
|
148
|
+
try:
|
|
149
|
+
data = json.loads(line)
|
|
150
|
+
point = MetricPoint(
|
|
151
|
+
timestamp=datetime.fromisoformat(data['timestamp']),
|
|
152
|
+
value=data['value'],
|
|
153
|
+
tags=data['tags']
|
|
154
|
+
)
|
|
155
|
+
name = filename.split('_')[0] # Get metric name from filename
|
|
156
|
+
metrics[name].append(point)
|
|
157
|
+
except json.JSONDecodeError:
|
|
158
|
+
continue
|
|
159
|
+
|
|
160
|
+
return dict(metrics)
|
|
161
|
+
except Exception as e:
|
|
162
|
+
print(f"Error loading metrics from files: {e}")
|
|
163
|
+
return {}
|
|
164
|
+
|
|
165
|
+
def get_metrics(
|
|
166
|
+
self,
|
|
167
|
+
metric_name: Optional[str] = None,
|
|
168
|
+
start_time: Optional[datetime] = None,
|
|
169
|
+
end_time: Optional[datetime] = None
|
|
170
|
+
) -> Dict[str, List[MetricPoint]]:
|
|
171
|
+
"""Get recorded metrics with optional filtering."""
|
|
172
|
+
# Get metrics from appropriate source
|
|
173
|
+
if self.config.storage_type == "json":
|
|
174
|
+
metrics = self._load_metrics_from_files(metric_name)
|
|
175
|
+
else:
|
|
176
|
+
metrics = self._metrics
|
|
177
|
+
if metric_name:
|
|
178
|
+
metrics = {metric_name: metrics[metric_name]}
|
|
179
|
+
|
|
180
|
+
# Apply time filtering if needed
|
|
181
|
+
if start_time or end_time:
|
|
182
|
+
filtered_metrics = defaultdict(list)
|
|
183
|
+
for name, points in metrics.items():
|
|
184
|
+
filtered_points = [
|
|
185
|
+
p for p in points
|
|
186
|
+
if (not start_time or p.timestamp >= start_time) and
|
|
187
|
+
(not end_time or p.timestamp <= end_time)
|
|
188
|
+
]
|
|
189
|
+
filtered_metrics[name] = filtered_points
|
|
190
|
+
metrics = filtered_metrics
|
|
191
|
+
|
|
192
|
+
return dict(metrics)
|
|
193
|
+
|
|
194
|
+
def get_statistics(
|
|
195
|
+
self,
|
|
196
|
+
metric_name: str,
|
|
197
|
+
percentiles: List[float] = [50, 90, 95, 99]
|
|
198
|
+
) -> Dict[str, float]:
|
|
199
|
+
"""Calculate statistics for a metric."""
|
|
200
|
+
# Get all points for this metric
|
|
201
|
+
metrics = self.get_metrics(metric_name=metric_name)
|
|
202
|
+
points = metrics.get(metric_name, [])
|
|
203
|
+
|
|
204
|
+
if not points:
|
|
205
|
+
return {}
|
|
206
|
+
|
|
207
|
+
values = [p.value for p in points if isinstance(p.value, (int, float))]
|
|
208
|
+
if not values:
|
|
209
|
+
return {}
|
|
210
|
+
|
|
211
|
+
stats = {
|
|
212
|
+
"min": min(values),
|
|
213
|
+
"max": max(values),
|
|
214
|
+
"mean": float(np.mean(values)), # Convert to float for JSON serialization
|
|
215
|
+
"std": float(np.std(values)),
|
|
216
|
+
"count": len(values),
|
|
217
|
+
"last_value": values[-1]
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
for p in percentiles:
|
|
221
|
+
stats[f"p{p}"] = float(np.percentile(values, p))
|
|
222
|
+
|
|
223
|
+
return stats
|
|
224
|
+
|
|
225
|
+
async def terminate(self, agent: FlockAgent, inputs: Dict[str, Any], result: Dict[str, Any]) -> None:
|
|
226
|
+
"""Clean up and final metric recording."""
|
|
227
|
+
if self.config.storage_type == "json":
|
|
228
|
+
# Save aggregated metrics
|
|
229
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
230
|
+
summary_file = os.path.join(
|
|
231
|
+
self.config.metrics_dir,
|
|
232
|
+
f"summary_{agent.name}_{timestamp}.json"
|
|
233
|
+
)
|
|
234
|
+
|
|
235
|
+
# Calculate summary for all metrics
|
|
236
|
+
summary = {
|
|
237
|
+
"agent": agent.name,
|
|
238
|
+
"timestamp": timestamp,
|
|
239
|
+
"metrics": {}
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
# Get all unique metric names from files
|
|
243
|
+
all_metrics = self._load_metrics_from_files()
|
|
244
|
+
|
|
245
|
+
for metric_name in all_metrics.keys():
|
|
246
|
+
stats = self.get_statistics(metric_name)
|
|
247
|
+
if stats: # Only include metrics that have data
|
|
248
|
+
summary["metrics"][metric_name] = stats
|
|
249
|
+
|
|
250
|
+
with open(summary_file, "w") as f:
|
|
251
|
+
json.dump(summary, f, indent=2)
|
|
252
|
+
|
|
253
|
+
def _record_metric(
|
|
254
|
+
self,
|
|
255
|
+
name: str,
|
|
256
|
+
value: Union[int, float, str],
|
|
257
|
+
tags: Dict[str, str] = None
|
|
258
|
+
) -> None:
|
|
259
|
+
"""Record a single metric point."""
|
|
260
|
+
point = MetricPoint(
|
|
261
|
+
timestamp=datetime.now(),
|
|
262
|
+
value=value,
|
|
263
|
+
tags=tags or {}
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
# Store metric
|
|
267
|
+
if self.config.storage_type == "memory":
|
|
268
|
+
self._metrics[name].append(point)
|
|
269
|
+
|
|
270
|
+
elif self.config.storage_type == "prometheus":
|
|
271
|
+
if name == "latency":
|
|
272
|
+
self._prom_latency.labels(**tags).observe(value)
|
|
273
|
+
elif name == "memory":
|
|
274
|
+
self._prom_memory.labels(**tags).set(value)
|
|
275
|
+
elif name == "tokens":
|
|
276
|
+
self._prom_tokens.labels(**tags).inc(value)
|
|
277
|
+
|
|
278
|
+
elif self.config.storage_type == "json":
|
|
279
|
+
self._save_metric_to_file(name, point)
|
|
280
|
+
|
|
281
|
+
def _save_metric_to_file(self, name: str, point: MetricPoint) -> None:
|
|
282
|
+
"""Save metric to JSON file."""
|
|
283
|
+
filename = f"{name}_{point.timestamp.strftime('%Y%m')}.json"
|
|
284
|
+
filepath = os.path.join(self.config.metrics_dir, filename)
|
|
285
|
+
|
|
286
|
+
data = {
|
|
287
|
+
"timestamp": point.timestamp.isoformat(),
|
|
288
|
+
"value": point.value,
|
|
289
|
+
"tags": point.tags
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
# Append to file
|
|
293
|
+
with open(filepath, "a") as f:
|
|
294
|
+
f.write(json.dumps(data) + "\n")
|
|
295
|
+
|
|
296
|
+
def _get_tokenizer(self, model: str):
|
|
297
|
+
"""Get the appropriate tokenizer for the model."""
|
|
298
|
+
try:
|
|
299
|
+
import tiktoken
|
|
300
|
+
|
|
301
|
+
# Handle different model naming conventions
|
|
302
|
+
if model.startswith('openai/'):
|
|
303
|
+
model = model[7:] # Strip 'openai/' prefix
|
|
304
|
+
|
|
305
|
+
try:
|
|
306
|
+
return tiktoken.encoding_for_model(model)
|
|
307
|
+
except KeyError:
|
|
308
|
+
# Fallback to cl100k_base for unknown models
|
|
309
|
+
return tiktoken.get_encoding("cl100k_base")
|
|
310
|
+
|
|
311
|
+
except ImportError:
|
|
312
|
+
return None
|
|
313
|
+
|
|
314
|
+
def _calculate_token_usage(self, text: str, model: str = "gpt-4") -> int:
|
|
315
|
+
"""Calculate token count using tiktoken when available."""
|
|
316
|
+
tokenizer = self._get_tokenizer(model)
|
|
317
|
+
|
|
318
|
+
if tokenizer:
|
|
319
|
+
# Use tiktoken for accurate count
|
|
320
|
+
return len(tokenizer.encode(text))
|
|
321
|
+
else:
|
|
322
|
+
# Fallback to estimation if tiktoken not available
|
|
323
|
+
# Simple estimation - words / 0.75 for average tokens per word
|
|
324
|
+
token_estimate = int(len(text.split()) / 0.75)
|
|
325
|
+
|
|
326
|
+
# Log warning about estimation
|
|
327
|
+
print(f"Warning: Using estimated token count. Install tiktoken for accurate counting.")
|
|
328
|
+
|
|
329
|
+
def _should_alert(self, metric: str, value: float) -> bool:
|
|
330
|
+
"""Check if metric should trigger alert."""
|
|
331
|
+
if metric == "latency" and self.config.alert_on_high_latency:
|
|
332
|
+
return value * 1000 > self.config.latency_threshold_ms
|
|
333
|
+
return False
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
async def initialize(self, agent: FlockAgent, inputs: Dict[str, Any]) -> None:
|
|
339
|
+
"""Initialize metrics collection."""
|
|
340
|
+
self._start_time = time.time()
|
|
341
|
+
|
|
342
|
+
if self.config.collect_memory:
|
|
343
|
+
self._start_memory = psutil.Process().memory_info().rss
|
|
344
|
+
self._record_metric(
|
|
345
|
+
"memory",
|
|
346
|
+
self._start_memory,
|
|
347
|
+
{"agent": agent.name, "phase": "start"}
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
|
|
351
|
+
def _calculate_cost(
|
|
352
|
+
self,
|
|
353
|
+
text: str,
|
|
354
|
+
model: str,
|
|
355
|
+
is_completion: bool = False
|
|
356
|
+
) -> tuple[int, float]:
|
|
357
|
+
"""Calculate both token count and cost."""
|
|
358
|
+
# Get token count
|
|
359
|
+
from litellm import cost_per_token
|
|
360
|
+
token_count = self._calculate_token_usage(text, model)
|
|
361
|
+
|
|
362
|
+
|
|
363
|
+
# Calculate total cost
|
|
364
|
+
if is_completion:
|
|
365
|
+
total_cost = token_count * cost_per_token(model, completion_tokens=token_count)
|
|
366
|
+
else:
|
|
367
|
+
total_cost = token_count * cost_per_token(model, prompt_tokens=token_count)
|
|
368
|
+
|
|
369
|
+
return token_count, total_cost
|
|
370
|
+
|
|
371
|
+
async def pre_evaluate(self, agent: FlockAgent, inputs: Dict[str, Any]) -> Dict[str, Any]:
|
|
372
|
+
"""Record pre-evaluation metrics."""
|
|
373
|
+
if self.config.collect_token_usage:
|
|
374
|
+
# Calculate input tokens and cost
|
|
375
|
+
total_input_tokens = 0
|
|
376
|
+
total_input_cost = 0.0
|
|
377
|
+
|
|
378
|
+
for v in inputs.values():
|
|
379
|
+
tokens, cost = self._calculate_cost(
|
|
380
|
+
str(v),
|
|
381
|
+
agent.model,
|
|
382
|
+
is_completion=False
|
|
383
|
+
)
|
|
384
|
+
total_input_tokens += tokens
|
|
385
|
+
total_input_cost += cost[1]
|
|
386
|
+
|
|
387
|
+
self._record_metric(
|
|
388
|
+
"tokens",
|
|
389
|
+
total_input_tokens,
|
|
390
|
+
{"agent": agent.name, "type": "input"}
|
|
391
|
+
)
|
|
392
|
+
self._record_metric(
|
|
393
|
+
"cost",
|
|
394
|
+
total_input_cost,
|
|
395
|
+
{"agent": agent.name, "type": "input"}
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
if self.config.collect_cpu:
|
|
399
|
+
cpu_percent = psutil.Process().cpu_percent()
|
|
400
|
+
self._record_metric(
|
|
401
|
+
"cpu",
|
|
402
|
+
cpu_percent,
|
|
403
|
+
{"agent": agent.name, "phase": "pre_evaluate"}
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
return inputs
|
|
407
|
+
|
|
408
|
+
async def post_evaluate(self, agent: FlockAgent, inputs: Dict[str, Any], result: Dict[str, Any]) -> Dict[str, Any]:
|
|
409
|
+
"""Record post-evaluation metrics."""
|
|
410
|
+
if self.config.collect_timing and self._start_time:
|
|
411
|
+
latency = time.time() - self._start_time
|
|
412
|
+
self._record_metric(
|
|
413
|
+
"latency",
|
|
414
|
+
latency,
|
|
415
|
+
{"agent": agent.name}
|
|
416
|
+
)
|
|
417
|
+
|
|
418
|
+
# Check for alerts
|
|
419
|
+
if self._should_alert("latency", latency):
|
|
420
|
+
# In practice, you'd want to integrate with a proper alerting system
|
|
421
|
+
print(f"ALERT: High latency detected: {latency*1000:.2f}ms")
|
|
422
|
+
|
|
423
|
+
if self.config.collect_token_usage:
|
|
424
|
+
# Calculate output tokens and cost
|
|
425
|
+
total_output_tokens = 0
|
|
426
|
+
total_output_cost = 0.0
|
|
427
|
+
|
|
428
|
+
for v in result.values():
|
|
429
|
+
tokens, cost = self._calculate_cost(
|
|
430
|
+
str(v),
|
|
431
|
+
agent.model,
|
|
432
|
+
is_completion=True
|
|
433
|
+
)
|
|
434
|
+
total_output_tokens += tokens
|
|
435
|
+
total_output_cost += cost[1]
|
|
436
|
+
|
|
437
|
+
self._record_metric(
|
|
438
|
+
"tokens",
|
|
439
|
+
total_output_tokens,
|
|
440
|
+
{"agent": agent.name, "type": "output"}
|
|
441
|
+
)
|
|
442
|
+
self._record_metric(
|
|
443
|
+
"cost",
|
|
444
|
+
total_output_cost,
|
|
445
|
+
{"agent": agent.name, "type": "output"}
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
# Record total cost for this operation
|
|
449
|
+
self._record_metric(
|
|
450
|
+
"total_cost",
|
|
451
|
+
total_output_cost + total_output_cost,
|
|
452
|
+
{"agent": agent.name}
|
|
453
|
+
)
|
|
454
|
+
|
|
455
|
+
if self.config.collect_memory and self._start_memory:
|
|
456
|
+
current_memory = psutil.Process().memory_info().rss
|
|
457
|
+
memory_diff = current_memory - self._start_memory
|
|
458
|
+
self._record_metric(
|
|
459
|
+
"memory",
|
|
460
|
+
memory_diff,
|
|
461
|
+
{"agent": agent.name, "phase": "end"}
|
|
462
|
+
)
|
|
463
|
+
|
|
464
|
+
return result
|
|
465
|
+
|
|
466
|
+
|
|
467
|
+
|
|
468
|
+
async def on_error(self, agent: FlockAgent, error: Exception, inputs: Dict[str, Any]) -> None:
|
|
469
|
+
"""Record error metrics."""
|
|
470
|
+
self._record_metric(
|
|
471
|
+
"errors",
|
|
472
|
+
1,
|
|
473
|
+
{
|
|
474
|
+
"agent": agent.name,
|
|
475
|
+
"error_type": type(error).__name__
|
|
476
|
+
}
|
|
477
|
+
)
|
flock/themes/aardvark-blue.toml
CHANGED
|
@@ -28,7 +28,7 @@ table_show_lines = true
|
|
|
28
28
|
table_box = "ROUNDED"
|
|
29
29
|
panel_padding = [ 1, 2,]
|
|
30
30
|
panel_title_align = "center"
|
|
31
|
-
table_row_styles = [ "", "
|
|
31
|
+
table_row_styles = [ "", "bold",]
|
|
32
32
|
table_safe_box = true
|
|
33
33
|
table_padding = [ 0, 1,]
|
|
34
34
|
table_collapse_padding = false
|