flock-core 0.3.6__py3-none-any.whl → 0.3.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of flock-core might be problematic. Click here for more details.

@@ -1,15 +1,15 @@
1
1
  """Performance and metrics tracking for Flock agents."""
2
2
 
3
- import time
4
3
  import json
5
4
  import os
6
- from datetime import datetime
7
- from typing import Any, Dict, List, Optional, Union, Literal
5
+ import time
8
6
  from collections import defaultdict
7
+ from datetime import datetime
8
+ from typing import Any, Literal
9
9
 
10
+ import numpy as np
10
11
  import psutil
11
12
  from pydantic import BaseModel, Field, validator
12
- import numpy as np
13
13
 
14
14
  from flock.core.flock_agent import FlockAgent
15
15
  from flock.core.flock_module import FlockModule, FlockModuleConfig
@@ -17,146 +17,144 @@ from flock.core.flock_module import FlockModule, FlockModuleConfig
17
17
 
18
18
  class MetricPoint(BaseModel):
19
19
  """Single metric measurement."""
20
+
20
21
  timestamp: datetime
21
- value: Union[int, float, str]
22
- tags: Dict[str, str] = {}
22
+ value: int | float | str
23
+ tags: dict[str, str] = {}
24
+
23
25
 
24
26
  class MetricsModuleConfig(FlockModuleConfig):
25
27
  """Configuration for performance metrics collection."""
26
-
28
+
27
29
  # Collection settings
28
30
  collect_timing: bool = Field(
29
- default=True,
30
- description="Collect timing metrics"
31
+ default=True, description="Collect timing metrics"
31
32
  )
32
33
  collect_memory: bool = Field(
33
- default=True,
34
- description="Collect memory usage"
34
+ default=True, description="Collect memory usage"
35
35
  )
36
36
  collect_token_usage: bool = Field(
37
- default=True,
38
- description="Collect token usage stats"
37
+ default=True, description="Collect token usage stats"
39
38
  )
40
- collect_cpu: bool = Field(
41
- default=True,
42
- description="Collect CPU usage"
43
- )
44
-
39
+ collect_cpu: bool = Field(default=True, description="Collect CPU usage")
40
+
45
41
  # Storage settings
46
42
  storage_type: Literal["json", "prometheus", "memory"] = Field(
47
- default="json",
48
- description="Where to store metrics"
43
+ default="json", description="Where to store metrics"
49
44
  )
50
45
  metrics_dir: str = Field(
51
- default="metrics/",
52
- description="Directory for metrics storage"
46
+ default="metrics/", description="Directory for metrics storage"
53
47
  )
54
-
48
+
55
49
  # Aggregation settings
56
50
  aggregation_interval: str = Field(
57
- default="1h",
58
- description="Interval for metric aggregation"
59
- )
60
- retention_days: int = Field(
61
- default=30,
62
- description="Days to keep metrics"
51
+ default="1h", description="Interval for metric aggregation"
63
52
  )
64
-
53
+ retention_days: int = Field(default=30, description="Days to keep metrics")
54
+
65
55
  # Alerting settings
66
56
  alert_on_high_latency: bool = Field(
67
- default=True,
68
- description="Alert on high latency"
57
+ default=True, description="Alert on high latency"
69
58
  )
70
59
  latency_threshold_ms: int = Field(
71
- default=1000,
72
- description="Threshold for latency alerts"
60
+ default=1000, description="Threshold for latency alerts"
73
61
  )
74
-
62
+
75
63
  @validator("aggregation_interval")
76
64
  def validate_interval(cls, v):
77
65
  """Validate time interval format."""
78
- if not v[-1] in ["s", "m", "h", "d"]:
66
+ if v[-1] not in ["s", "m", "h", "d"]:
79
67
  raise ValueError("Interval must end with s, m, h, or d")
80
68
  return v
81
69
 
70
+
82
71
  class MetricsModule(FlockModule):
83
72
  """Module for collecting and analyzing agent performance metrics."""
84
-
73
+
85
74
  name: str = "performance_metrics"
86
75
  config: MetricsModuleConfig = Field(
87
76
  default_factory=MetricsModuleConfig,
88
- description="Performance metrics configuration"
77
+ description="Performance metrics configuration",
89
78
  )
90
-
79
+
91
80
  def __init__(self, name, config):
92
81
  super().__init__(name=name, config=config)
93
82
  self._metrics = defaultdict(list)
94
- self._start_time: Optional[float] = None
95
- self._start_memory: Optional[int] = None
96
-
83
+ self._start_time: float | None = None
84
+ self._start_memory: int | None = None
85
+
97
86
  # Set up storage
98
87
  if self.config.storage_type == "json":
99
88
  os.makedirs(self.config.metrics_dir, exist_ok=True)
100
-
89
+
101
90
  # Set up prometheus if needed
102
91
  if self.config.storage_type == "prometheus":
103
92
  try:
104
93
  from prometheus_client import Counter, Gauge, Histogram
105
-
94
+
106
95
  self._prom_latency = Histogram(
107
- 'flock_agent_latency_seconds',
108
- 'Time taken for agent evaluation',
109
- ['agent_name']
96
+ "flock_agent_latency_seconds",
97
+ "Time taken for agent evaluation",
98
+ ["agent_name"],
110
99
  )
111
100
  self._prom_memory = Gauge(
112
- 'flock_agent_memory_bytes',
113
- 'Memory usage by agent',
114
- ['agent_name']
101
+ "flock_agent_memory_bytes",
102
+ "Memory usage by agent",
103
+ ["agent_name"],
115
104
  )
116
105
  self._prom_tokens = Counter(
117
- 'flock_agent_tokens_total',
118
- 'Token usage by agent',
119
- ['agent_name', 'type']
106
+ "flock_agent_tokens_total",
107
+ "Token usage by agent",
108
+ ["agent_name", "type"],
120
109
  )
121
110
  self._prom_errors = Counter(
122
- 'flock_agent_errors_total',
123
- 'Error count by agent',
124
- ['agent_name', 'error_type']
111
+ "flock_agent_errors_total",
112
+ "Error count by agent",
113
+ ["agent_name", "error_type"],
125
114
  )
126
115
  except ImportError:
127
116
  self.config.storage_type = "json"
128
117
 
129
118
  """Fixes for metrics summary calculation."""
130
119
 
131
- def _load_metrics_from_files(self, metric_name: str = None) -> Dict[str, List[MetricPoint]]:
120
+ def _load_metrics_from_files(
121
+ self, metric_name: str = None
122
+ ) -> dict[str, list[MetricPoint]]:
132
123
  """Load metrics from JSON files."""
133
124
  metrics = defaultdict(list)
134
-
125
+
135
126
  try:
136
127
  # Get all metric files
137
- files = [f for f in os.listdir(self.config.metrics_dir)
138
- if f.endswith('.json') and not f.startswith('summary_')]
139
-
128
+ files = [
129
+ f
130
+ for f in os.listdir(self.config.metrics_dir)
131
+ if f.endswith(".json") and not f.startswith("summary_")
132
+ ]
133
+
140
134
  # Filter by metric name if specified
141
135
  if metric_name:
142
136
  files = [f for f in files if f.startswith(f"{metric_name}_")]
143
-
137
+
144
138
  for filename in files:
145
139
  filepath = os.path.join(self.config.metrics_dir, filename)
146
- with open(filepath, 'r') as f:
140
+ with open(filepath) as f:
147
141
  for line in f:
148
142
  try:
149
143
  data = json.loads(line)
150
144
  point = MetricPoint(
151
- timestamp=datetime.fromisoformat(data['timestamp']),
152
- value=data['value'],
153
- tags=data['tags']
145
+ timestamp=datetime.fromisoformat(
146
+ data["timestamp"]
147
+ ),
148
+ value=data["value"],
149
+ tags=data["tags"],
154
150
  )
155
- name = filename.split('_')[0] # Get metric name from filename
151
+ name = filename.split("_")[
152
+ 0
153
+ ] # Get metric name from filename
156
154
  metrics[name].append(point)
157
155
  except json.JSONDecodeError:
158
156
  continue
159
-
157
+
160
158
  return dict(metrics)
161
159
  except Exception as e:
162
160
  print(f"Error loading metrics from files: {e}")
@@ -164,10 +162,10 @@ class MetricsModule(FlockModule):
164
162
 
165
163
  def get_metrics(
166
164
  self,
167
- metric_name: Optional[str] = None,
168
- start_time: Optional[datetime] = None,
169
- end_time: Optional[datetime] = None
170
- ) -> Dict[str, List[MetricPoint]]:
165
+ metric_name: str | None = None,
166
+ start_time: datetime | None = None,
167
+ end_time: datetime | None = None,
168
+ ) -> dict[str, list[MetricPoint]]:
171
169
  """Get recorded metrics with optional filtering."""
172
170
  # Get metrics from appropriate source
173
171
  if self.config.storage_type == "json":
@@ -176,97 +174,95 @@ class MetricsModule(FlockModule):
176
174
  metrics = self._metrics
177
175
  if metric_name:
178
176
  metrics = {metric_name: metrics[metric_name]}
179
-
177
+
180
178
  # Apply time filtering if needed
181
179
  if start_time or end_time:
182
180
  filtered_metrics = defaultdict(list)
183
181
  for name, points in metrics.items():
184
182
  filtered_points = [
185
- p for p in points
186
- if (not start_time or p.timestamp >= start_time) and
187
- (not end_time or p.timestamp <= end_time)
183
+ p
184
+ for p in points
185
+ if (not start_time or p.timestamp >= start_time)
186
+ and (not end_time or p.timestamp <= end_time)
188
187
  ]
189
188
  filtered_metrics[name] = filtered_points
190
189
  metrics = filtered_metrics
191
-
190
+
192
191
  return dict(metrics)
193
192
 
194
193
  def get_statistics(
195
- self,
196
- metric_name: str,
197
- percentiles: List[float] = [50, 90, 95, 99]
198
- ) -> Dict[str, float]:
194
+ self, metric_name: str, percentiles: list[float] = [50, 90, 95, 99]
195
+ ) -> dict[str, float]:
199
196
  """Calculate statistics for a metric."""
200
197
  # Get all points for this metric
201
198
  metrics = self.get_metrics(metric_name=metric_name)
202
199
  points = metrics.get(metric_name, [])
203
-
200
+
204
201
  if not points:
205
202
  return {}
206
-
203
+
207
204
  values = [p.value for p in points if isinstance(p.value, (int, float))]
208
205
  if not values:
209
206
  return {}
210
-
207
+
211
208
  stats = {
212
209
  "min": min(values),
213
210
  "max": max(values),
214
- "mean": float(np.mean(values)), # Convert to float for JSON serialization
211
+ "mean": float(
212
+ np.mean(values)
213
+ ), # Convert to float for JSON serialization
215
214
  "std": float(np.std(values)),
216
215
  "count": len(values),
217
- "last_value": values[-1]
216
+ "last_value": values[-1],
218
217
  }
219
-
218
+
220
219
  for p in percentiles:
221
220
  stats[f"p{p}"] = float(np.percentile(values, p))
222
-
221
+
223
222
  return stats
224
223
 
225
- async def terminate(self, agent: FlockAgent, inputs: Dict[str, Any], result: Dict[str, Any]) -> None:
224
+ async def terminate(
225
+ self, agent: FlockAgent, inputs: dict[str, Any], result: dict[str, Any]
226
+ ) -> None:
226
227
  """Clean up and final metric recording."""
227
228
  if self.config.storage_type == "json":
228
229
  # Save aggregated metrics
229
230
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
230
231
  summary_file = os.path.join(
231
232
  self.config.metrics_dir,
232
- f"summary_{agent.name}_{timestamp}.json"
233
+ f"summary_{agent.name}_{timestamp}.json",
233
234
  )
234
-
235
+
235
236
  # Calculate summary for all metrics
236
237
  summary = {
237
238
  "agent": agent.name,
238
239
  "timestamp": timestamp,
239
- "metrics": {}
240
+ "metrics": {},
240
241
  }
241
-
242
+
242
243
  # Get all unique metric names from files
243
244
  all_metrics = self._load_metrics_from_files()
244
-
245
+
245
246
  for metric_name in all_metrics.keys():
246
247
  stats = self.get_statistics(metric_name)
247
248
  if stats: # Only include metrics that have data
248
249
  summary["metrics"][metric_name] = stats
249
-
250
+
250
251
  with open(summary_file, "w") as f:
251
252
  json.dump(summary, f, indent=2)
252
-
253
+
253
254
  def _record_metric(
254
- self,
255
- name: str,
256
- value: Union[int, float, str],
257
- tags: Dict[str, str] = None
255
+ self, name: str, value: int | float | str, tags: dict[str, str] = None
258
256
  ) -> None:
259
257
  """Record a single metric point."""
260
258
  point = MetricPoint(
261
- timestamp=datetime.now(),
262
- value=value,
263
- tags=tags or {}
259
+ timestamp=datetime.now(), value=value, tags=tags or {}
264
260
  )
265
-
261
+
266
262
  # Store metric
267
263
  if self.config.storage_type == "memory":
268
264
  self._metrics[name].append(point)
269
-
265
+
270
266
  elif self.config.storage_type == "prometheus":
271
267
  if name == "latency":
272
268
  self._prom_latency.labels(**tags).observe(value)
@@ -274,47 +270,47 @@ class MetricsModule(FlockModule):
274
270
  self._prom_memory.labels(**tags).set(value)
275
271
  elif name == "tokens":
276
272
  self._prom_tokens.labels(**tags).inc(value)
277
-
273
+
278
274
  elif self.config.storage_type == "json":
279
275
  self._save_metric_to_file(name, point)
280
-
276
+
281
277
  def _save_metric_to_file(self, name: str, point: MetricPoint) -> None:
282
278
  """Save metric to JSON file."""
283
279
  filename = f"{name}_{point.timestamp.strftime('%Y%m')}.json"
284
280
  filepath = os.path.join(self.config.metrics_dir, filename)
285
-
281
+
286
282
  data = {
287
283
  "timestamp": point.timestamp.isoformat(),
288
284
  "value": point.value,
289
- "tags": point.tags
285
+ "tags": point.tags,
290
286
  }
291
-
287
+
292
288
  # Append to file
293
289
  with open(filepath, "a") as f:
294
290
  f.write(json.dumps(data) + "\n")
295
-
291
+
296
292
  def _get_tokenizer(self, model: str):
297
293
  """Get the appropriate tokenizer for the model."""
298
294
  try:
299
295
  import tiktoken
300
-
296
+
301
297
  # Handle different model naming conventions
302
- if model.startswith('openai/'):
298
+ if model.startswith("openai/"):
303
299
  model = model[7:] # Strip 'openai/' prefix
304
-
300
+
305
301
  try:
306
302
  return tiktoken.encoding_for_model(model)
307
303
  except KeyError:
308
304
  # Fallback to cl100k_base for unknown models
309
305
  return tiktoken.get_encoding("cl100k_base")
310
-
306
+
311
307
  except ImportError:
312
308
  return None
313
-
309
+
314
310
  def _calculate_token_usage(self, text: str, model: str = "gpt-4") -> int:
315
311
  """Calculate token count using tiktoken when available."""
316
312
  tokenizer = self._get_tokenizer(model)
317
-
313
+
318
314
  if tokenizer:
319
315
  # Use tiktoken for accurate count
320
316
  return len(tokenizer.encode(text))
@@ -322,156 +318,156 @@ class MetricsModule(FlockModule):
322
318
  # Fallback to estimation if tiktoken not available
323
319
  # Simple estimation - words / 0.75 for average tokens per word
324
320
  token_estimate = int(len(text.split()) / 0.75)
325
-
321
+
326
322
  # Log warning about estimation
327
- print(f"Warning: Using estimated token count. Install tiktoken for accurate counting.")
328
-
323
+ print(
324
+ f"Warning: Using estimated token count. Install tiktoken for accurate counting."
325
+ )
326
+
329
327
  def _should_alert(self, metric: str, value: float) -> bool:
330
328
  """Check if metric should trigger alert."""
331
329
  if metric == "latency" and self.config.alert_on_high_latency:
332
330
  return value * 1000 > self.config.latency_threshold_ms
333
331
  return False
334
-
335
-
336
332
 
337
-
338
- async def initialize(self, agent: FlockAgent, inputs: Dict[str, Any]) -> None:
333
+ async def initialize(
334
+ self, agent: FlockAgent, inputs: dict[str, Any]
335
+ ) -> None:
339
336
  """Initialize metrics collection."""
340
337
  self._start_time = time.time()
341
-
338
+
342
339
  if self.config.collect_memory:
343
340
  self._start_memory = psutil.Process().memory_info().rss
344
341
  self._record_metric(
345
342
  "memory",
346
343
  self._start_memory,
347
- {"agent": agent.name, "phase": "start"}
344
+ {"agent": agent.name, "phase": "start"},
348
345
  )
349
346
 
350
-
351
347
  def _calculate_cost(
352
- self,
353
- text: str,
354
- model: str,
355
- is_completion: bool = False
348
+ self, text: str, model: str, is_completion: bool = False
356
349
  ) -> tuple[int, float]:
357
350
  """Calculate both token count and cost."""
358
351
  # Get token count
359
- from litellm import cost_per_token
360
- token_count = self._calculate_token_usage(text, model)
361
-
352
+ try:
353
+ from litellm import cost_per_token
362
354
 
363
- # Calculate total cost
364
- if is_completion:
365
- total_cost = token_count * cost_per_token(model, completion_tokens=token_count)
366
- else:
367
- total_cost = token_count * cost_per_token(model, prompt_tokens=token_count)
368
-
369
- return token_count, total_cost
370
-
371
- async def pre_evaluate(self, agent: FlockAgent, inputs: Dict[str, Any]) -> Dict[str, Any]:
355
+ token_count = self._calculate_token_usage(text, model)
356
+ # Calculate total cost
357
+ if is_completion:
358
+ total_cost = token_count * cost_per_token(
359
+ model, completion_tokens=token_count
360
+ )
361
+ else:
362
+ total_cost = token_count * cost_per_token(
363
+ model, prompt_tokens=token_count
364
+ )
365
+
366
+ return token_count, total_cost
367
+ except Exception:
368
+ token_count = 0
369
+ total_cost = 0.0
370
+ return token_count, total_cost
371
+
372
+ async def pre_evaluate(
373
+ self, agent: FlockAgent, inputs: dict[str, Any]
374
+ ) -> dict[str, Any]:
372
375
  """Record pre-evaluation metrics."""
373
376
  if self.config.collect_token_usage:
374
377
  # Calculate input tokens and cost
375
378
  total_input_tokens = 0
376
379
  total_input_cost = 0.0
377
-
380
+
378
381
  for v in inputs.values():
379
382
  tokens, cost = self._calculate_cost(
380
- str(v),
381
- agent.model,
382
- is_completion=False
383
+ str(v), agent.model, is_completion=False
383
384
  )
384
385
  total_input_tokens += tokens
385
- total_input_cost += cost[1]
386
-
386
+ if isinstance(cost, float):
387
+ total_input_cost += cost
388
+ else:
389
+ total_input_cost += cost[1]
390
+
387
391
  self._record_metric(
388
392
  "tokens",
389
393
  total_input_tokens,
390
- {"agent": agent.name, "type": "input"}
394
+ {"agent": agent.name, "type": "input"},
391
395
  )
392
396
  self._record_metric(
393
- "cost",
394
- total_input_cost,
395
- {"agent": agent.name, "type": "input"}
397
+ "cost", total_input_cost, {"agent": agent.name, "type": "input"}
396
398
  )
397
-
399
+
398
400
  if self.config.collect_cpu:
399
401
  cpu_percent = psutil.Process().cpu_percent()
400
402
  self._record_metric(
401
403
  "cpu",
402
404
  cpu_percent,
403
- {"agent": agent.name, "phase": "pre_evaluate"}
405
+ {"agent": agent.name, "phase": "pre_evaluate"},
404
406
  )
405
-
407
+
406
408
  return inputs
407
-
408
- async def post_evaluate(self, agent: FlockAgent, inputs: Dict[str, Any], result: Dict[str, Any]) -> Dict[str, Any]:
409
+
410
+ async def post_evaluate(
411
+ self, agent: FlockAgent, inputs: dict[str, Any], result: dict[str, Any]
412
+ ) -> dict[str, Any]:
409
413
  """Record post-evaluation metrics."""
410
414
  if self.config.collect_timing and self._start_time:
411
415
  latency = time.time() - self._start_time
412
- self._record_metric(
413
- "latency",
414
- latency,
415
- {"agent": agent.name}
416
- )
417
-
416
+ self._record_metric("latency", latency, {"agent": agent.name})
417
+
418
418
  # Check for alerts
419
419
  if self._should_alert("latency", latency):
420
420
  # In practice, you'd want to integrate with a proper alerting system
421
- print(f"ALERT: High latency detected: {latency*1000:.2f}ms")
422
-
421
+ print(f"ALERT: High latency detected: {latency * 1000:.2f}ms")
422
+
423
423
  if self.config.collect_token_usage:
424
424
  # Calculate output tokens and cost
425
425
  total_output_tokens = 0
426
426
  total_output_cost = 0.0
427
-
427
+
428
428
  for v in result.values():
429
429
  tokens, cost = self._calculate_cost(
430
- str(v),
431
- agent.model,
432
- is_completion=True
430
+ str(v), agent.model, is_completion=True
433
431
  )
434
432
  total_output_tokens += tokens
435
- total_output_cost += cost[1]
436
-
433
+ if isinstance(cost, float):
434
+ total_output_cost += cost
435
+ else:
436
+ total_output_cost += cost[1]
437
+
437
438
  self._record_metric(
438
439
  "tokens",
439
440
  total_output_tokens,
440
- {"agent": agent.name, "type": "output"}
441
+ {"agent": agent.name, "type": "output"},
441
442
  )
442
443
  self._record_metric(
443
444
  "cost",
444
445
  total_output_cost,
445
- {"agent": agent.name, "type": "output"}
446
+ {"agent": agent.name, "type": "output"},
446
447
  )
447
-
448
+
448
449
  # Record total cost for this operation
449
450
  self._record_metric(
450
451
  "total_cost",
451
452
  total_output_cost + total_output_cost,
452
- {"agent": agent.name}
453
+ {"agent": agent.name},
453
454
  )
454
-
455
+
455
456
  if self.config.collect_memory and self._start_memory:
456
457
  current_memory = psutil.Process().memory_info().rss
457
458
  memory_diff = current_memory - self._start_memory
458
459
  self._record_metric(
459
- "memory",
460
- memory_diff,
461
- {"agent": agent.name, "phase": "end"}
460
+ "memory", memory_diff, {"agent": agent.name, "phase": "end"}
462
461
  )
463
-
462
+
464
463
  return result
465
-
466
464
 
467
-
468
- async def on_error(self, agent: FlockAgent, error: Exception, inputs: Dict[str, Any]) -> None:
465
+ async def on_error(
466
+ self, agent: FlockAgent, error: Exception, inputs: dict[str, Any]
467
+ ) -> None:
469
468
  """Record error metrics."""
470
469
  self._record_metric(
471
470
  "errors",
472
471
  1,
473
- {
474
- "agent": agent.name,
475
- "error_type": type(error).__name__
476
- }
477
- )
472
+ {"agent": agent.name, "error_type": type(error).__name__},
473
+ )
@@ -0,0 +1 @@
1
+ """Routers for the Flock framework."""
@@ -0,0 +1 @@
1
+ """Agent-based router implementation for the Flock framework."""