kalibr 1.2.0__py3-none-any.whl → 1.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
kalibr/__init__.py CHANGED
@@ -56,7 +56,17 @@ from .collector import is_configured as is_collector_configured
56
56
  from .collector import (
57
57
  setup_collector,
58
58
  )
59
- from .context import get_parent_span_id, get_trace_id, new_trace_id, trace_context
59
+ from .context import (
60
+ get_parent_span_id,
61
+ get_trace_id,
62
+ new_trace_id,
63
+ trace_context,
64
+ # Goal context (v1.3.0)
65
+ goal,
66
+ set_goal,
67
+ get_goal,
68
+ clear_goal,
69
+ )
60
70
  from .cost_adapter import (
61
71
  AnthropicCostAdapter,
62
72
  BaseCostAdapter,
@@ -79,6 +89,8 @@ from .intelligence import (
79
89
  get_policy,
80
90
  report_outcome,
81
91
  get_recommendation,
92
+ register_path,
93
+ decide,
82
94
  )
83
95
 
84
96
  if os.getenv("KALIBR_AUTO_INSTRUMENT", "true").lower() == "true":
@@ -114,6 +126,11 @@ __all__ = [
114
126
  "get_trace_id",
115
127
  "get_parent_span_id",
116
128
  "new_trace_id",
129
+ # Goal Context (v1.3.0)
130
+ "goal",
131
+ "set_goal",
132
+ "get_goal",
133
+ "clear_goal",
117
134
  # Tracer
118
135
  "Tracer",
119
136
  "SpanContext",
@@ -144,4 +161,6 @@ __all__ = [
144
161
  "get_policy",
145
162
  "report_outcome",
146
163
  "get_recommendation",
164
+ "register_path",
165
+ "decide",
147
166
  ]
kalibr/context.py CHANGED
@@ -8,6 +8,7 @@ HTTP requests to SDK calls (OpenAI, Anthropic, Google).
8
8
  import random
9
9
  import string
10
10
  import uuid
11
+ from contextlib import contextmanager
11
12
  from contextvars import ContextVar
12
13
  from typing import Dict, Optional
13
14
 
@@ -130,3 +131,44 @@ def inject_kalibr_context_into_span(span: Span):
130
131
  span.set_attribute("kalibr.http_trace_id", ctx["trace_id"])
131
132
  if ctx.get("span_id"):
132
133
  span.set_attribute("kalibr.http_span_id", ctx["span_id"])
134
+
135
+
136
+ # ============================================================================
137
+ # Goal Context for Outcome Tracking (v1.3.0)
138
+ # ============================================================================
139
+
140
+ _goal_context: ContextVar[Optional[str]] = ContextVar("goal_context", default=None)
141
+
142
+
143
+ def set_goal(goal: str):
144
+ """Set the current goal for all subsequent Kalibr traces."""
145
+ _goal_context.set(goal)
146
+
147
+
148
+ def get_goal() -> Optional[str]:
149
+ """Get the current goal."""
150
+ return _goal_context.get()
151
+
152
+
153
+ def clear_goal():
154
+ """Clear the current goal."""
155
+ _goal_context.set(None)
156
+
157
+
158
+ @contextmanager
159
+ def goal(goal_name: str):
160
+ """Context manager to set goal for a block of code.
161
+
162
+ Usage:
163
+ with kalibr.goal("research_company"):
164
+ agent.run("Research Weights & Biases")
165
+ """
166
+ previous = get_goal()
167
+ set_goal(goal_name)
168
+ try:
169
+ yield
170
+ finally:
171
+ if previous:
172
+ set_goal(previous)
173
+ else:
174
+ clear_goal()
kalibr/intelligence.py CHANGED
@@ -4,7 +4,7 @@ This module enables the outcome-conditioned routing loop:
4
4
  1. Before executing: query get_policy() to get the best path for your goal
5
5
  2. After executing: call report_outcome() to teach Kalibr what worked
6
6
 
7
- Example:
7
+ Example - Policy-based routing:
8
8
  from kalibr import get_policy, report_outcome
9
9
 
10
10
  # Before executing - get best path
@@ -17,6 +17,17 @@ Example:
17
17
  goal="book_meeting",
18
18
  success=True
19
19
  )
20
+
21
+ Example - Path registration and intelligent routing:
22
+ from kalibr import register_path, decide
23
+
24
+ # Register paths for a goal
25
+ register_path(goal="book_meeting", model_id="gpt-4", tool_id="calendar_tool")
26
+ register_path(goal="book_meeting", model_id="claude-3-opus")
27
+
28
+ # Get intelligent routing decision
29
+ decision = decide(goal="book_meeting")
30
+ model = decision["model_id"] # Selected based on outcomes
20
31
  """
21
32
 
22
33
  from __future__ import annotations
@@ -64,6 +75,7 @@ class KalibrIntelligence:
64
75
  method: str,
65
76
  path: str,
66
77
  json: dict | None = None,
78
+ params: dict | None = None,
67
79
  ) -> httpx.Response:
68
80
  """Make authenticated request to intelligence API."""
69
81
  headers = {
@@ -73,7 +85,7 @@ class KalibrIntelligence:
73
85
  }
74
86
 
75
87
  url = f"{self.base_url}{path}"
76
- response = self._client.request(method, url, json=json, headers=headers)
88
+ response = self._client.request(method, url, json=json, params=params, headers=headers)
77
89
  response.raise_for_status()
78
90
  return response
79
91
 
@@ -139,6 +151,8 @@ class KalibrIntelligence:
139
151
  score: float | None = None,
140
152
  failure_reason: str | None = None,
141
153
  metadata: dict | None = None,
154
+ tool_id: str | None = None,
155
+ execution_params: dict | None = None,
142
156
  ) -> dict[str, Any]:
143
157
  """Report execution outcome for a goal.
144
158
 
@@ -152,6 +166,8 @@ class KalibrIntelligence:
152
166
  score: Optional quality score (0-1) for more granular feedback
153
167
  failure_reason: Optional reason for failure (helps with debugging)
154
168
  metadata: Optional additional context as a dict
169
+ tool_id: Optional tool that was used (e.g., "serper", "browserless")
170
+ execution_params: Optional execution parameters (e.g., {"temperature": 0.3})
155
171
 
156
172
  Returns:
157
173
  dict with:
@@ -184,6 +200,8 @@ class KalibrIntelligence:
184
200
  "score": score,
185
201
  "failure_reason": failure_reason,
186
202
  "metadata": metadata,
203
+ "tool_id": tool_id,
204
+ "execution_params": execution_params,
187
205
  },
188
206
  )
189
207
  return response.json()
@@ -230,6 +248,252 @@ class KalibrIntelligence:
230
248
  )
231
249
  return response.json()
232
250
 
251
+ # =========================================================================
252
+ # ROUTING METHODS
253
+ # =========================================================================
254
+
255
+ def register_path(
256
+ self,
257
+ goal: str,
258
+ model_id: str,
259
+ tool_id: str | None = None,
260
+ params: dict | None = None,
261
+ risk_level: str = "low",
262
+ ) -> dict[str, Any]:
263
+ """Register a new routing path for a goal.
264
+
265
+ Creates a path that maps a goal to a specific model (and optionally tool)
266
+ configuration. This path can then be selected by the decide() method.
267
+
268
+ Args:
269
+ goal: The goal this path is for (e.g., "book_meeting", "resolve_ticket")
270
+ model_id: The model identifier to use (e.g., "gpt-4", "claude-3-opus")
271
+ tool_id: Optional tool identifier if this path uses a specific tool
272
+ params: Optional parameters dict for the path configuration
273
+ risk_level: Risk level for this path - "low", "medium", or "high"
274
+
275
+ Returns:
276
+ dict with the created path including:
277
+ - path_id: Unique identifier for the path
278
+ - goal: The goal
279
+ - model_id: The model
280
+ - tool_id: The tool (if specified)
281
+ - params: The parameters (if specified)
282
+ - risk_level: The risk level
283
+ - created_at: Creation timestamp
284
+
285
+ Raises:
286
+ httpx.HTTPStatusError: If the API returns an error
287
+
288
+ Example:
289
+ path = intelligence.register_path(
290
+ goal="book_meeting",
291
+ model_id="gpt-4",
292
+ tool_id="calendar_tool",
293
+ risk_level="low"
294
+ )
295
+ print(f"Created path: {path['path_id']}")
296
+ """
297
+ response = self._request(
298
+ "POST",
299
+ "/api/v1/routing/paths",
300
+ json={
301
+ "goal": goal,
302
+ "model_id": model_id,
303
+ "tool_id": tool_id,
304
+ "params": params,
305
+ "risk_level": risk_level,
306
+ },
307
+ )
308
+ return response.json()
309
+
310
+ def list_paths(
311
+ self,
312
+ goal: str | None = None,
313
+ include_disabled: bool = False,
314
+ ) -> dict[str, Any]:
315
+ """List registered routing paths.
316
+
317
+ Args:
318
+ goal: Optional goal to filter paths by
319
+ include_disabled: Whether to include disabled paths (default False)
320
+
321
+ Returns:
322
+ dict with:
323
+ - paths: List of path objects
324
+
325
+ Raises:
326
+ httpx.HTTPStatusError: If the API returns an error
327
+
328
+ Example:
329
+ result = intelligence.list_paths(goal="book_meeting")
330
+ for path in result["paths"]:
331
+ print(f"{path['path_id']}: {path['model_id']}")
332
+ """
333
+ params = {}
334
+ if goal is not None:
335
+ params["goal"] = goal
336
+ if include_disabled:
337
+ params["include_disabled"] = "true"
338
+
339
+ response = self._request(
340
+ "GET",
341
+ "/api/v1/routing/paths",
342
+ params=params if params else None,
343
+ )
344
+ return response.json()
345
+
346
+ def disable_path(self, path_id: str) -> dict[str, Any]:
347
+ """Disable a routing path.
348
+
349
+ Disables a path so it won't be selected by decide(). The path
350
+ data is retained for historical analysis.
351
+
352
+ Args:
353
+ path_id: The unique identifier of the path to disable
354
+
355
+ Returns:
356
+ dict with:
357
+ - status: "disabled" if successful
358
+ - path_id: The disabled path ID
359
+
360
+ Raises:
361
+ httpx.HTTPStatusError: If the API returns an error
362
+
363
+ Example:
364
+ result = intelligence.disable_path("path_abc123")
365
+ print(f"Status: {result['status']}")
366
+ """
367
+ response = self._request(
368
+ "DELETE",
369
+ f"/api/v1/routing/paths/{path_id}",
370
+ )
371
+ return response.json()
372
+
373
+ def decide(
374
+ self,
375
+ goal: str,
376
+ task_risk_level: str = "low",
377
+ ) -> dict[str, Any]:
378
+ """Get routing decision for a goal.
379
+
380
+ Uses outcome data and exploration/exploitation strategy to decide
381
+ which path to use for achieving the specified goal.
382
+
383
+ Args:
384
+ goal: The goal to route for (e.g., "book_meeting")
385
+ task_risk_level: Risk tolerance for this task - "low", "medium", or "high"
386
+
387
+ Returns:
388
+ dict with:
389
+ - model_id: The selected model
390
+ - tool_id: The selected tool (if any)
391
+ - params: Additional parameters (if any)
392
+ - reason: Human-readable explanation of the decision
393
+ - confidence: Confidence score (0-1)
394
+ - is_exploration: Whether this is an exploration choice
395
+ - path_id: The selected path ID
396
+
397
+ Raises:
398
+ httpx.HTTPStatusError: If the API returns an error
399
+
400
+ Example:
401
+ decision = intelligence.decide(goal="book_meeting")
402
+ model = decision["model_id"]
403
+ print(f"Using {model} ({decision['reason']})")
404
+ """
405
+ response = self._request(
406
+ "POST",
407
+ "/api/v1/routing/decide",
408
+ json={
409
+ "goal": goal,
410
+ "task_risk_level": task_risk_level,
411
+ },
412
+ )
413
+ return response.json()
414
+
415
+ def set_exploration_config(
416
+ self,
417
+ goal: str = "*",
418
+ exploration_rate: float = 0.1,
419
+ min_samples_before_exploit: int = 20,
420
+ rollback_threshold: float = 0.3,
421
+ staleness_days: int = 7,
422
+ exploration_on_high_risk: bool = False,
423
+ ) -> dict[str, Any]:
424
+ """Set exploration/exploitation configuration for routing.
425
+
426
+ Configures how the decide() method balances exploring new paths
427
+ vs exploiting known good paths.
428
+
429
+ Args:
430
+ goal: Goal to configure, or "*" for default config
431
+ exploration_rate: Probability of exploring (0-1, default 0.1)
432
+ min_samples_before_exploit: Minimum outcomes before exploiting (default 20)
433
+ rollback_threshold: Performance drop threshold to rollback (default 0.3)
434
+ staleness_days: Days before reexploring stale paths (default 7)
435
+ exploration_on_high_risk: Whether to explore on high-risk tasks (default False)
436
+
437
+ Returns:
438
+ dict with the saved configuration
439
+
440
+ Raises:
441
+ httpx.HTTPStatusError: If the API returns an error
442
+
443
+ Example:
444
+ config = intelligence.set_exploration_config(
445
+ goal="book_meeting",
446
+ exploration_rate=0.2,
447
+ min_samples_before_exploit=10
448
+ )
449
+ """
450
+ response = self._request(
451
+ "POST",
452
+ "/api/v1/routing/config",
453
+ json={
454
+ "goal": goal,
455
+ "exploration_rate": exploration_rate,
456
+ "min_samples_before_exploit": min_samples_before_exploit,
457
+ "rollback_threshold": rollback_threshold,
458
+ "staleness_days": staleness_days,
459
+ "exploration_on_high_risk": exploration_on_high_risk,
460
+ },
461
+ )
462
+ return response.json()
463
+
464
+ def get_exploration_config(self, goal: str | None = None) -> dict[str, Any]:
465
+ """Get exploration/exploitation configuration.
466
+
467
+ Args:
468
+ goal: Optional goal to get config for (returns default if not found)
469
+
470
+ Returns:
471
+ dict with configuration values:
472
+ - goal: The goal this config applies to
473
+ - exploration_rate: Exploration probability
474
+ - min_samples_before_exploit: Minimum samples before exploiting
475
+ - rollback_threshold: Rollback threshold
476
+ - staleness_days: Staleness threshold in days
477
+ - exploration_on_high_risk: Whether exploration is allowed on high-risk
478
+
479
+ Raises:
480
+ httpx.HTTPStatusError: If the API returns an error
481
+
482
+ Example:
483
+ config = intelligence.get_exploration_config(goal="book_meeting")
484
+ print(f"Exploration rate: {config['exploration_rate']}")
485
+ """
486
+ params = {}
487
+ if goal is not None:
488
+ params["goal"] = goal
489
+
490
+ response = self._request(
491
+ "GET",
492
+ "/api/v1/routing/config",
493
+ params=params if params else None,
494
+ )
495
+ return response.json()
496
+
233
497
  def close(self):
234
498
  """Close the HTTP client."""
235
499
  self._client.close()
@@ -291,7 +555,7 @@ def report_outcome(trace_id: str, goal: str, success: bool, tenant_id: str | Non
291
555
  goal: The goal this execution was trying to achieve
292
556
  success: Whether the goal was achieved
293
557
  tenant_id: Optional tenant ID override (default: uses KALIBR_TENANT_ID env var)
294
- **kwargs: Additional arguments (score, failure_reason, metadata)
558
+ **kwargs: Additional arguments (score, failure_reason, metadata, tool_id, execution_params)
295
559
 
296
560
  Returns:
297
561
  Response dict with status confirmation
@@ -315,3 +579,72 @@ def get_recommendation(task_type: str, **kwargs) -> dict[str, Any]:
315
579
  See KalibrIntelligence.get_recommendation for full documentation.
316
580
  """
317
581
  return _get_intelligence_client().get_recommendation(task_type, **kwargs)
582
+
583
+
584
+ def register_path(
585
+ goal: str,
586
+ model_id: str,
587
+ tool_id: str | None = None,
588
+ params: dict | None = None,
589
+ risk_level: str = "low",
590
+ tenant_id: str | None = None,
591
+ ) -> dict[str, Any]:
592
+ """Register a new routing path for a goal.
593
+
594
+ Convenience function that uses the default intelligence client.
595
+ See KalibrIntelligence.register_path for full documentation.
596
+
597
+ Args:
598
+ goal: The goal this path is for
599
+ model_id: The model identifier to use
600
+ tool_id: Optional tool identifier
601
+ params: Optional parameters dict
602
+ risk_level: Risk level - "low", "medium", or "high"
603
+ tenant_id: Optional tenant ID override
604
+
605
+ Returns:
606
+ dict with the created path
607
+
608
+ Example:
609
+ from kalibr import register_path
610
+
611
+ path = register_path(
612
+ goal="book_meeting",
613
+ model_id="gpt-4",
614
+ tool_id="calendar_tool"
615
+ )
616
+ """
617
+ client = _get_intelligence_client()
618
+ if tenant_id:
619
+ client = KalibrIntelligence(tenant_id=tenant_id)
620
+ return client.register_path(goal, model_id, tool_id, params, risk_level)
621
+
622
+
623
+ def decide(
624
+ goal: str,
625
+ task_risk_level: str = "low",
626
+ tenant_id: str | None = None,
627
+ ) -> dict[str, Any]:
628
+ """Get routing decision for a goal.
629
+
630
+ Convenience function that uses the default intelligence client.
631
+ See KalibrIntelligence.decide for full documentation.
632
+
633
+ Args:
634
+ goal: The goal to route for
635
+ task_risk_level: Risk tolerance - "low", "medium", or "high"
636
+ tenant_id: Optional tenant ID override
637
+
638
+ Returns:
639
+ dict with model_id, tool_id, params, reason, confidence, etc.
640
+
641
+ Example:
642
+ from kalibr import decide
643
+
644
+ decision = decide(goal="book_meeting")
645
+ model = decision["model_id"]
646
+ """
647
+ client = _get_intelligence_client()
648
+ if tenant_id:
649
+ client = KalibrIntelligence(tenant_id=tenant_id)
650
+ return client.decide(goal, task_risk_level)
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: kalibr
3
- Version: 1.2.0
3
+ Version: 1.2.2
4
4
  Summary: Unified LLM Observability & Multi-Model AI Integration Framework - Deploy to GPT, Claude, Gemini, Copilot with full telemetry.
5
5
  Author-email: Kalibr Team <support@kalibr.systems>
6
6
  License: Apache-2.0
@@ -64,7 +64,6 @@ Requires-Dist: pytest>=7.4.0; extra == "dev"
64
64
  Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
65
65
  Requires-Dist: black>=23.0.0; extra == "dev"
66
66
  Requires-Dist: ruff>=0.1.0; extra == "dev"
67
- Dynamic: license-file
68
67
 
69
68
  # Kalibr Python SDK
70
69
 
@@ -188,6 +187,41 @@ policy = get_policy(
188
187
  )
189
188
  ```
190
189
 
190
+ ### Intelligent Routing with decide()
191
+
192
+ Register execution paths and let Kalibr decide the best strategy:
193
+
194
+ ```python
195
+ from kalibr import register_path, decide
196
+
197
+ # Register available paths
198
+ register_path(goal="book_meeting", model_id="gpt-4o", tool_id="calendar_api")
199
+ register_path(goal="book_meeting", model_id="claude-3-sonnet")
200
+
201
+ # Get intelligent routing decision
202
+ decision = decide(goal="book_meeting")
203
+ model = decision["model_id"] # Selected based on outcomes
204
+ tool = decision.get("tool_id") # If tool routing enabled
205
+ print(decision["exploration"]) # True if exploring new paths
206
+ ```
207
+
208
+ ### Goal Context
209
+
210
+ Tag traces with goals for outcome tracking:
211
+
212
+ ```python
213
+ from kalibr import goal, set_goal, get_goal, clear_goal
214
+
215
+ # Context manager (recommended)
216
+ with goal("book_meeting"):
217
+ response = openai.chat.completions.create(...)
218
+
219
+ # Or manual control
220
+ set_goal("book_meeting")
221
+ response = openai.chat.completions.create(...)
222
+ clear_goal()
223
+ ```
224
+
191
225
  ## TraceCapsule - Cross-Agent Tracing
192
226
 
193
227
  Propagate trace context across agent boundaries:
@@ -285,7 +319,7 @@ Configure via environment variables:
285
319
  | `KALIBR_API_KEY` | API key for authentication | *Required* |
286
320
  | `KALIBR_TENANT_ID` | Tenant identifier | `default` |
287
321
  | `KALIBR_COLLECTOR_URL` | Collector endpoint URL | `https://api.kalibr.systems/api/ingest` |
288
- | `KALIBR_INTELLIGENCE_URL` | Intelligence API URL | `https://kalibr-intelligence.fly.dev` |
322
+ | `KALIBR_INTELLIGENCE_URL` | Intelligence API URL | `https://dashboard.kalibr.systems/intelligence` |
289
323
  | `KALIBR_SERVICE_NAME` | Service name for spans | `kalibr-app` |
290
324
  | `KALIBR_ENVIRONMENT` | Environment (prod/staging/dev) | `prod` |
291
325
  | `KALIBR_WORKFLOW_ID` | Workflow identifier | `default` |
@@ -294,20 +328,20 @@ Configure via environment variables:
294
328
  ## CLI Commands
295
329
 
296
330
  ```bash
297
- # Serve your app with tracing
298
- kalibr serve myapp.py
331
+ # Show version
332
+ kalibr version
299
333
 
300
- # Run with managed runtime
301
- kalibr run myapp.py --port 8000
334
+ # Validate configuration
335
+ kalibr validate
302
336
 
303
- # Deploy to cloud platforms
304
- kalibr deploy myapp.py --runtime fly.io
337
+ # Check connection status
338
+ kalibr status
305
339
 
306
- # Fetch trace capsule by ID
307
- kalibr capsule <trace-id>
340
+ # Package for deployment
341
+ kalibr package
308
342
 
309
- # Show version
310
- kalibr version
343
+ # Update schemas
344
+ kalibr update_schemas
311
345
  ```
312
346
 
313
347
  ## Supported Providers
@@ -1,12 +1,12 @@
1
- kalibr/__init__.py,sha256=16g-LPXiB_10TUcUeNzTy_EL5npqCFGYWJF-IhWpWDY,4889
1
+ kalibr/__init__.py,sha256=N0FRcMM5Rq845MPDjogsY1iRZu7K7NoUHAGqW8-JQDQ,5148
2
2
  kalibr/__main__.py,sha256=jO96I4pqinwHg7ONRvNVKbySBh5pSIhOAiNrgSQrNlY,110
3
3
  kalibr/capsule_middleware.py,sha256=pXG_wORgCqo3wHjtkn_zY4doLyiDmTwJtB7XiZNnbPk,3163
4
4
  kalibr/client.py,sha256=6D1paakE6zgWJStaow3ak9t0R8afodQhSSpUO3WTs_8,9732
5
5
  kalibr/collector.py,sha256=rtTKQLe6NkDSblBIfFooQ-ESFcP0Q1HUp4Bcqqg8JFo,5818
6
- kalibr/context.py,sha256=hBxWXZx0gcmeGqDMS1rstke_DmrujoRBIsfrG26WKUY,3755
6
+ kalibr/context.py,sha256=FgN9-WyMQMDgg2Vqwje4r2_jKRvnMeI8t4fIE1VRn_8,4777
7
7
  kalibr/cost_adapter.py,sha256=NerJ7ywaJjBn97gVFr7qKX7318e3Kmy2qqeNlGl9nPE,6439
8
8
  kalibr/decorators.py,sha256=m-XBXxWMDVrzaNsljACiGmeGhgiHj_MqSfj6OGK3L5I,4380
9
- kalibr/intelligence.py,sha256=oW_GFDHj5NEa-9L2y4jZcDsEQt81P77PpCuY--aIzLY,10889
9
+ kalibr/intelligence.py,sha256=JOckaykWrMloZV_MH1e9kvVxPRQKavIgLSdgqiJjxC4,22158
10
10
  kalibr/kalibr.py,sha256=cNXC3W_TX5SvGsy1lRopkwFqsHOpyd1kkVjEMOz1Yr4,6084
11
11
  kalibr/kalibr_app.py,sha256=ItZwEh0FZPx9_BE-zPQajC2yxI2y9IHYwJD0k9tbHvY,2773
12
12
  kalibr/models.py,sha256=HwD_-iysZMSnCzMQYO1Qcf0aeXySupY7yJeBwl_dLS0,1024
@@ -33,17 +33,17 @@ kalibr/instrumentation/openai_instr.py,sha256=UU0Pi1Gq1FqgetYWDacQhNFdjemuPrc0hR
33
33
  kalibr/instrumentation/registry.py,sha256=sfQnXhbPOI5LVon2kFhe8KcXQwWmuKW1XUe50B2AaBc,4749
34
34
  kalibr/middleware/__init__.py,sha256=qyDUn_irAX67MS-IkuDVxg4RmFnJHDf_BfIT3qfGoBI,115
35
35
  kalibr/middleware/auto_tracer.py,sha256=ZBSBM0O3a6rwVzfik1n5NUmQDah8_iaf86rU64aPYT4,13037
36
- kalibr-1.2.0.dist-info/licenses/LICENSE,sha256=5mwAnB38l3_PjmOQn6_L6cZnJvus143DUjMBPIH1yso,10768
37
36
  kalibr_crewai/__init__.py,sha256=b0HFTiE80eArtSMBOIEKu1JM6KU0tCjEylKCVVVF29Q,1796
38
- kalibr_crewai/callbacks.py,sha256=UBgGw0vdT0Jf9x8fNrHfsUR4unqX4nxNFta07OoSgaI,17162
39
- kalibr_crewai/instrumentor.py,sha256=AfnK5t7Ynb-7ytZF7XdOSPpr0o8hDf3sFkyzhc1ogY0,19465
37
+ kalibr_crewai/callbacks.py,sha256=_d1M4J-6XfKqrVIxnOgOQu57jpFKVv-VIsmPV0HNgZ4,20419
38
+ kalibr_crewai/instrumentor.py,sha256=x26v0RcriImkPiC8KB1Hmez1XOYLcDa9o-g35BMu5Ek,24420
40
39
  kalibr_langchain/__init__.py,sha256=O4XYVyhLp1v-Y1kGZw3zD-tUK9wp0UX8Jt6oN0QTHN4,1373
41
40
  kalibr_langchain/async_callback.py,sha256=_Mj_YrKbULNtfxixZ7iwiHyWEV9l178ZA5Oy5A5Pakk,27748
42
- kalibr_langchain/callback.py,sha256=VVPAvksS8TFMC21QlGj-1NRFsWnkLKPyzqhfA3kmT4c,34265
41
+ kalibr_langchain/callback.py,sha256=SNM1aHOXdG55grHmGyTwbXOeM6hjZTub2REiZD2H-d8,35216
43
42
  kalibr_openai_agents/__init__.py,sha256=wL59LzGstptKigfQDrKKt_7hcMO1JGVQtVAsE0lz-Zw,1367
44
43
  kalibr_openai_agents/processor.py,sha256=F550sdRf3rpguP1yOlgAUQWDLPBy4hSACV3-zOyCpOU,18257
45
- kalibr-1.2.0.dist-info/METADATA,sha256=45tJcZAcqg575gr2HSIMRArUhbz9juYec_Mi8LdiW9E,10339
46
- kalibr-1.2.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
- kalibr-1.2.0.dist-info/entry_points.txt,sha256=Kojlc6WRX8V1qS9lOMdDPZpTUVHCtzGtHqXusErgmLY,47
48
- kalibr-1.2.0.dist-info/top_level.txt,sha256=dIfBOWUnnHGFDwgz5zfIx5_0bU3wOUgAbYr4JcFHZmo,59
49
- kalibr-1.2.0.dist-info/RECORD,,
44
+ kalibr-1.2.2.dist-info/LICENSE,sha256=5mwAnB38l3_PjmOQn6_L6cZnJvus143DUjMBPIH1yso,10768
45
+ kalibr-1.2.2.dist-info/METADATA,sha256=XqhSOZekE5D5WQVT0pH2h1HCZWXiruGM9CqOelAZnf0,11201
46
+ kalibr-1.2.2.dist-info/WHEEL,sha256=beeZ86-EfXScwlR_HKu4SllMC9wUEj_8Z_4FJ3egI2w,91
47
+ kalibr-1.2.2.dist-info/entry_points.txt,sha256=Kojlc6WRX8V1qS9lOMdDPZpTUVHCtzGtHqXusErgmLY,47
48
+ kalibr-1.2.2.dist-info/top_level.txt,sha256=dIfBOWUnnHGFDwgz5zfIx5_0bU3wOUgAbYr4JcFHZmo,59
49
+ kalibr-1.2.2.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (76.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -60,6 +60,72 @@ def _get_provider_from_model(model: str) -> str:
60
60
  return "openai"
61
61
 
62
62
 
63
+ def _extract_model_from_agent(agent) -> tuple[str, str]:
64
+ """Extract model name and provider from agent's LLM config.
65
+
66
+ Args:
67
+ agent: CrewAI agent instance
68
+
69
+ Returns:
70
+ Tuple of (model_name, provider)
71
+ """
72
+ model_name = "unknown"
73
+ provider = "openai"
74
+
75
+ if not hasattr(agent, "llm"):
76
+ return model_name, provider
77
+
78
+ llm = agent.llm
79
+
80
+ # Case 1: LLM is a string like "openai/gpt-4o-mini" or "gpt-4"
81
+ if isinstance(llm, str):
82
+ if "/" in llm:
83
+ parts = llm.split("/", 1)
84
+ provider = parts[0]
85
+ model_name = parts[1]
86
+ else:
87
+ model_name = llm
88
+ provider = _get_provider_from_model(llm)
89
+ return model_name, provider
90
+
91
+ # Case 2: LLM has model or model_name attribute
92
+ if hasattr(llm, "model"):
93
+ model_name = str(llm.model)
94
+ elif hasattr(llm, "model_name"):
95
+ model_name = str(llm.model_name)
96
+
97
+ # Parse provider from model string if it contains "/"
98
+ if "/" in model_name:
99
+ parts = model_name.split("/", 1)
100
+ provider = parts[0]
101
+ model_name = parts[1]
102
+ else:
103
+ provider = _get_provider_from_model(model_name)
104
+
105
+ return model_name, provider
106
+
107
+
108
+ def _calculate_cost(provider: str, model: str, input_tokens: int, output_tokens: int) -> float:
109
+ """Calculate cost using CostAdapterFactory.
110
+
111
+ Args:
112
+ provider: Provider name (openai, anthropic, etc.)
113
+ model: Model name
114
+ input_tokens: Number of input tokens
115
+ output_tokens: Number of output tokens
116
+
117
+ Returns:
118
+ Cost in USD
119
+ """
120
+ if CostAdapterFactory is None:
121
+ return 0.0
122
+
123
+ try:
124
+ return CostAdapterFactory.compute_cost(provider, model, input_tokens, output_tokens)
125
+ except Exception:
126
+ return 0.0
127
+
128
+
63
129
  class EventBatcher:
64
130
  """Shared event batching for callbacks."""
65
131
 
@@ -198,6 +264,7 @@ class KalibrAgentCallback:
198
264
  service: Service name
199
265
  workflow_id: Workflow identifier
200
266
  metadata: Additional metadata for all events
267
+ agent: Optional agent reference for model extraction
201
268
 
202
269
  Usage:
203
270
  from kalibr_crewai import KalibrAgentCallback
@@ -210,6 +277,7 @@ class KalibrAgentCallback:
210
277
  goal="Find information",
211
278
  step_callback=callback,
212
279
  )
280
+ callback.set_agent(agent) # Set agent reference for model extraction
213
281
  """
214
282
 
215
283
  def __init__(
@@ -221,6 +289,7 @@ class KalibrAgentCallback:
221
289
  service: Optional[str] = None,
222
290
  workflow_id: Optional[str] = None,
223
291
  metadata: Optional[Dict[str, Any]] = None,
292
+ agent: Optional[Any] = None,
224
293
  ):
225
294
  self.api_key = api_key or os.getenv("KALIBR_API_KEY", "")
226
295
  self.endpoint = endpoint or os.getenv(
@@ -232,6 +301,7 @@ class KalibrAgentCallback:
232
301
  self.service = service or os.getenv("KALIBR_SERVICE", "crewai-app")
233
302
  self.workflow_id = workflow_id or os.getenv("KALIBR_WORKFLOW_ID", "default-workflow")
234
303
  self.default_metadata = metadata or {}
304
+ self._agent = agent
235
305
 
236
306
  # Get shared batcher
237
307
  self._batcher = EventBatcher.get_instance(
@@ -244,6 +314,14 @@ class KalibrAgentCallback:
244
314
  self._agent_span_id: Optional[str] = None
245
315
  self._step_count: int = 0
246
316
 
317
+ def set_agent(self, agent: Any) -> None:
318
+ """Set the agent reference for model extraction.
319
+
320
+ Args:
321
+ agent: CrewAI agent instance
322
+ """
323
+ self._agent = agent
324
+
247
325
  def __call__(self, step_output: Any) -> None:
248
326
  """Called after each agent step.
249
327
 
@@ -271,6 +349,12 @@ class KalibrAgentCallback:
271
349
 
272
350
  span_id = str(uuid.uuid4())
273
351
 
352
+ # Extract model from agent if available
353
+ model_name = "unknown"
354
+ provider = "openai"
355
+ if self._agent:
356
+ model_name, provider = _extract_model_from_agent(self._agent)
357
+
274
358
  # Extract step information
275
359
  step_type = "agent_step"
276
360
  operation = "agent_step"
@@ -307,8 +391,11 @@ class KalibrAgentCallback:
307
391
  output_text = str(step_output)
308
392
 
309
393
  # Count tokens
310
- input_tokens = _count_tokens(tool_input or "", "gpt-4")
311
- output_tokens = _count_tokens(output_text, "gpt-4")
394
+ input_tokens = _count_tokens(tool_input or "", model_name)
395
+ output_tokens = _count_tokens(output_text, model_name)
396
+
397
+ # Calculate cost using CostAdapterFactory
398
+ cost_usd = _calculate_cost(provider, model_name, input_tokens, output_tokens)
312
399
 
313
400
  # Build event
314
401
  event = {
@@ -318,9 +405,9 @@ class KalibrAgentCallback:
318
405
  "parent_span_id": self._agent_span_id,
319
406
  "tenant_id": self.tenant_id,
320
407
  "workflow_id": self.workflow_id,
321
- "provider": "crewai",
322
- "model_id": "agent",
323
- "model_name": "crewai-agent",
408
+ "provider": provider,
409
+ "model_id": model_name,
410
+ "model_name": model_name,
324
411
  "operation": operation,
325
412
  "endpoint": operation,
326
413
  "duration_ms": 0, # Step timing not available
@@ -328,8 +415,8 @@ class KalibrAgentCallback:
328
415
  "input_tokens": input_tokens,
329
416
  "output_tokens": output_tokens,
330
417
  "total_tokens": input_tokens + output_tokens,
331
- "cost_usd": 0.0, # Cost tracked at LLM level
332
- "total_cost_usd": 0.0,
418
+ "cost_usd": cost_usd,
419
+ "total_cost_usd": cost_usd,
333
420
  "status": status,
334
421
  "timestamp": now.isoformat(),
335
422
  "ts_start": now.isoformat(),
@@ -376,6 +463,7 @@ class KalibrTaskCallback:
376
463
  service: Service name
377
464
  workflow_id: Workflow identifier
378
465
  metadata: Additional metadata for all events
466
+ agent: Optional agent reference for model extraction
379
467
 
380
468
  Usage:
381
469
  from kalibr_crewai import KalibrTaskCallback
@@ -388,6 +476,7 @@ class KalibrTaskCallback:
388
476
  agent=my_agent,
389
477
  callback=callback,
390
478
  )
479
+ callback.set_agent(my_agent) # Set agent reference for model extraction
391
480
  """
392
481
 
393
482
  def __init__(
@@ -399,6 +488,7 @@ class KalibrTaskCallback:
399
488
  service: Optional[str] = None,
400
489
  workflow_id: Optional[str] = None,
401
490
  metadata: Optional[Dict[str, Any]] = None,
491
+ agent: Optional[Any] = None,
402
492
  ):
403
493
  self.api_key = api_key or os.getenv("KALIBR_API_KEY", "")
404
494
  self.endpoint = endpoint or os.getenv(
@@ -410,6 +500,7 @@ class KalibrTaskCallback:
410
500
  self.service = service or os.getenv("KALIBR_SERVICE", "crewai-app")
411
501
  self.workflow_id = workflow_id or os.getenv("KALIBR_WORKFLOW_ID", "default-workflow")
412
502
  self.default_metadata = metadata or {}
503
+ self._agent = agent
413
504
 
414
505
  # Get shared batcher
415
506
  self._batcher = EventBatcher.get_instance(
@@ -421,6 +512,14 @@ class KalibrTaskCallback:
421
512
  self._trace_id: Optional[str] = None
422
513
  self._crew_span_id: Optional[str] = None
423
514
 
515
+ def set_agent(self, agent: Any) -> None:
516
+ """Set the agent reference for model extraction.
517
+
518
+ Args:
519
+ agent: CrewAI agent instance
520
+ """
521
+ self._agent = agent
522
+
424
523
  def __call__(self, task_output: Any) -> None:
425
524
  """Called when task completes.
426
525
 
@@ -467,9 +566,18 @@ class KalibrTaskCallback:
467
566
  if hasattr(task_output, "agent"):
468
567
  agent_role = str(task_output.agent)
469
568
 
569
+ # Extract model from agent if available
570
+ model_name = "unknown"
571
+ provider = "openai"
572
+ if self._agent:
573
+ model_name, provider = _extract_model_from_agent(self._agent)
574
+
470
575
  # Token counting
471
- input_tokens = _count_tokens(description, "gpt-4")
472
- output_tokens = _count_tokens(raw_output, "gpt-4")
576
+ input_tokens = _count_tokens(description, model_name)
577
+ output_tokens = _count_tokens(raw_output, model_name)
578
+
579
+ # Calculate cost using CostAdapterFactory
580
+ cost_usd = _calculate_cost(provider, model_name, input_tokens, output_tokens)
473
581
 
474
582
  # Build operation name from description
475
583
  operation = "task_complete"
@@ -486,9 +594,9 @@ class KalibrTaskCallback:
486
594
  "parent_span_id": self._crew_span_id,
487
595
  "tenant_id": self.tenant_id,
488
596
  "workflow_id": self.workflow_id,
489
- "provider": "crewai",
490
- "model_id": "task",
491
- "model_name": agent_role,
597
+ "provider": provider,
598
+ "model_id": model_name,
599
+ "model_name": model_name,
492
600
  "operation": operation,
493
601
  "endpoint": "task_complete",
494
602
  "duration_ms": 0, # Task timing not available in callback
@@ -496,8 +604,8 @@ class KalibrTaskCallback:
496
604
  "input_tokens": input_tokens,
497
605
  "output_tokens": output_tokens,
498
606
  "total_tokens": input_tokens + output_tokens,
499
- "cost_usd": 0.0, # Cost tracked at LLM level
500
- "total_cost_usd": 0.0,
607
+ "cost_usd": cost_usd,
608
+ "total_cost_usd": cost_usd,
501
609
  "status": "success",
502
610
  "timestamp": now.isoformat(),
503
611
  "ts_start": now.isoformat(),
@@ -21,6 +21,72 @@ except ImportError:
21
21
  CostAdapterFactory = None
22
22
 
23
23
 
24
+ def _extract_model_from_agent(agent) -> tuple[str, str]:
25
+ """Extract model name and provider from agent's LLM config.
26
+
27
+ Args:
28
+ agent: CrewAI agent instance
29
+
30
+ Returns:
31
+ Tuple of (model_name, provider)
32
+ """
33
+ model_name = "unknown"
34
+ provider = "openai"
35
+
36
+ if not hasattr(agent, "llm"):
37
+ return model_name, provider
38
+
39
+ llm = agent.llm
40
+
41
+ # Case 1: LLM is a string like "openai/gpt-4o-mini" or "gpt-4"
42
+ if isinstance(llm, str):
43
+ if "/" in llm:
44
+ parts = llm.split("/", 1)
45
+ provider = parts[0]
46
+ model_name = parts[1]
47
+ else:
48
+ model_name = llm
49
+ provider = _get_provider_from_model(llm)
50
+ return model_name, provider
51
+
52
+ # Case 2: LLM has model or model_name attribute
53
+ if hasattr(llm, "model"):
54
+ model_name = str(llm.model)
55
+ elif hasattr(llm, "model_name"):
56
+ model_name = str(llm.model_name)
57
+
58
+ # Parse provider from model string if it contains "/"
59
+ if "/" in model_name:
60
+ parts = model_name.split("/", 1)
61
+ provider = parts[0]
62
+ model_name = parts[1]
63
+ else:
64
+ provider = _get_provider_from_model(model_name)
65
+
66
+ return model_name, provider
67
+
68
+
69
+ def _calculate_cost(provider: str, model: str, input_tokens: int, output_tokens: int) -> float:
70
+ """Calculate cost using CostAdapterFactory.
71
+
72
+ Args:
73
+ provider: Provider name (openai, anthropic, etc.)
74
+ model: Model name
75
+ input_tokens: Number of input tokens
76
+ output_tokens: Number of output tokens
77
+
78
+ Returns:
79
+ Cost in USD
80
+ """
81
+ if CostAdapterFactory is None:
82
+ return 0.0
83
+
84
+ try:
85
+ return CostAdapterFactory.compute_cost(provider, model, input_tokens, output_tokens)
86
+ except Exception:
87
+ return 0.0
88
+
89
+
24
90
  class KalibrCrewAIInstrumentor:
25
91
  """Auto-instrumentation for CrewAI.
26
92
 
@@ -84,6 +150,10 @@ class KalibrCrewAIInstrumentor:
84
150
  # Instrumentation state
85
151
  self._is_instrumented = False
86
152
 
153
+ # Accumulated metrics for crew-level aggregation
154
+ self._accumulated_tokens = {"input": 0, "output": 0}
155
+ self._accumulated_cost = 0.0
156
+
87
157
  def instrument(self) -> bool:
88
158
  """Instrument CrewAI classes.
89
159
 
@@ -170,11 +240,22 @@ class KalibrCrewAIInstrumentor:
170
240
  start_time = time.time()
171
241
  ts_start = datetime.now(timezone.utc)
172
242
 
243
+ # Reset accumulators before crew execution
244
+ instrumentor._accumulated_tokens = {"input": 0, "output": 0}
245
+ instrumentor._accumulated_cost = 0.0
246
+
173
247
  # Capture crew info
174
248
  crew_name = getattr(crew_self, "name", None) or "unnamed_crew"
175
- agent_count = len(getattr(crew_self, "agents", []))
249
+ agents = getattr(crew_self, "agents", [])
250
+ agent_count = len(agents)
176
251
  task_count = len(getattr(crew_self, "tasks", []))
177
252
 
253
+ # Extract model from first agent if available
254
+ model_name = "unknown"
255
+ provider = "crewai"
256
+ if agents:
257
+ model_name, provider = _extract_model_from_agent(agents[0])
258
+
178
259
  status = "success"
179
260
  error_type = None
180
261
  error_message = None
@@ -199,7 +280,13 @@ class KalibrCrewAIInstrumentor:
199
280
  if instrumentor.capture_output and result is not None:
200
281
  output_preview = str(result)[:500]
201
282
 
202
- # Create event
283
+ # Get accumulated metrics from child agent/task executions
284
+ input_tokens = instrumentor._accumulated_tokens["input"]
285
+ output_tokens = instrumentor._accumulated_tokens["output"]
286
+ total_tokens = input_tokens + output_tokens
287
+ cost_usd = instrumentor._accumulated_cost
288
+
289
+ # Create event with aggregated metrics
203
290
  event = {
204
291
  "schema_version": "1.0",
205
292
  "trace_id": trace_id,
@@ -207,18 +294,18 @@ class KalibrCrewAIInstrumentor:
207
294
  "parent_span_id": None,
208
295
  "tenant_id": instrumentor.tenant_id,
209
296
  "workflow_id": instrumentor.workflow_id,
210
- "provider": "crewai",
211
- "model_id": "crew",
212
- "model_name": crew_name,
297
+ "provider": provider,
298
+ "model_id": model_name,
299
+ "model_name": model_name,
213
300
  "operation": f"crew:{crew_name}",
214
301
  "endpoint": "crew.kickoff",
215
302
  "duration_ms": duration_ms,
216
303
  "latency_ms": duration_ms,
217
- "input_tokens": 0,
218
- "output_tokens": 0,
219
- "total_tokens": 0,
220
- "cost_usd": 0.0,
221
- "total_cost_usd": 0.0,
304
+ "input_tokens": input_tokens,
305
+ "output_tokens": output_tokens,
306
+ "total_tokens": total_tokens,
307
+ "cost_usd": cost_usd,
308
+ "total_cost_usd": cost_usd,
222
309
  "status": status,
223
310
  "error_type": error_type,
224
311
  "error_message": error_message,
@@ -255,10 +342,21 @@ class KalibrCrewAIInstrumentor:
255
342
  start_time = time.time()
256
343
  ts_start = datetime.now(timezone.utc)
257
344
 
345
+ # Reset accumulators before crew execution
346
+ instrumentor._accumulated_tokens = {"input": 0, "output": 0}
347
+ instrumentor._accumulated_cost = 0.0
348
+
258
349
  crew_name = getattr(crew_self, "name", None) or "unnamed_crew"
259
- agent_count = len(getattr(crew_self, "agents", []))
350
+ agents = getattr(crew_self, "agents", [])
351
+ agent_count = len(agents)
260
352
  task_count = len(getattr(crew_self, "tasks", []))
261
353
 
354
+ # Extract model from first agent if available
355
+ model_name = "unknown"
356
+ provider = "crewai"
357
+ if agents:
358
+ model_name, provider = _extract_model_from_agent(agents[0])
359
+
262
360
  status = "success"
263
361
  error_type = None
264
362
  error_message = None
@@ -282,6 +380,13 @@ class KalibrCrewAIInstrumentor:
282
380
  if instrumentor.capture_output and result is not None:
283
381
  output_preview = str(result)[:500]
284
382
 
383
+ # Get accumulated metrics from child agent/task executions
384
+ input_tokens = instrumentor._accumulated_tokens["input"]
385
+ output_tokens = instrumentor._accumulated_tokens["output"]
386
+ total_tokens = input_tokens + output_tokens
387
+ cost_usd = instrumentor._accumulated_cost
388
+
389
+ # Create event with aggregated metrics
285
390
  event = {
286
391
  "schema_version": "1.0",
287
392
  "trace_id": trace_id,
@@ -289,18 +394,18 @@ class KalibrCrewAIInstrumentor:
289
394
  "parent_span_id": None,
290
395
  "tenant_id": instrumentor.tenant_id,
291
396
  "workflow_id": instrumentor.workflow_id,
292
- "provider": "crewai",
293
- "model_id": "crew",
294
- "model_name": crew_name,
397
+ "provider": provider,
398
+ "model_id": model_name,
399
+ "model_name": model_name,
295
400
  "operation": f"crew:{crew_name}",
296
401
  "endpoint": "crew.kickoff_async",
297
402
  "duration_ms": duration_ms,
298
403
  "latency_ms": duration_ms,
299
- "input_tokens": 0,
300
- "output_tokens": 0,
301
- "total_tokens": 0,
302
- "cost_usd": 0.0,
303
- "total_cost_usd": 0.0,
404
+ "input_tokens": input_tokens,
405
+ "output_tokens": output_tokens,
406
+ "total_tokens": total_tokens,
407
+ "cost_usd": cost_usd,
408
+ "total_cost_usd": cost_usd,
304
409
  "status": status,
305
410
  "error_type": error_type,
306
411
  "error_message": error_message,
@@ -341,6 +446,9 @@ class KalibrCrewAIInstrumentor:
341
446
  role = getattr(agent_self, "role", "unknown")
342
447
  goal = getattr(agent_self, "goal", "")
343
448
 
449
+ # Extract model from agent's LLM config
450
+ model_name, provider = _extract_model_from_agent(agent_self)
451
+
344
452
  # Get task info
345
453
  task_description = ""
346
454
  if hasattr(task, "description"):
@@ -370,8 +478,16 @@ class KalibrCrewAIInstrumentor:
370
478
  output_preview = str(result)[:500]
371
479
 
372
480
  # Token estimation
373
- input_tokens = _count_tokens(task_description + goal, "gpt-4")
374
- output_tokens = _count_tokens(output_preview or "", "gpt-4")
481
+ input_tokens = _count_tokens(task_description + goal, model_name)
482
+ output_tokens = _count_tokens(output_preview or "", model_name)
483
+
484
+ # Calculate cost using CostAdapterFactory
485
+ cost_usd = _calculate_cost(provider, model_name, input_tokens, output_tokens)
486
+
487
+ # Accumulate metrics for crew-level aggregation
488
+ instrumentor._accumulated_tokens["input"] += input_tokens
489
+ instrumentor._accumulated_tokens["output"] += output_tokens
490
+ instrumentor._accumulated_cost += cost_usd
375
491
 
376
492
  event = {
377
493
  "schema_version": "1.0",
@@ -380,9 +496,9 @@ class KalibrCrewAIInstrumentor:
380
496
  "parent_span_id": None,
381
497
  "tenant_id": instrumentor.tenant_id,
382
498
  "workflow_id": instrumentor.workflow_id,
383
- "provider": "crewai",
384
- "model_id": "agent",
385
- "model_name": role,
499
+ "provider": provider,
500
+ "model_id": model_name,
501
+ "model_name": model_name,
386
502
  "operation": f"agent:{role}",
387
503
  "endpoint": "agent.execute_task",
388
504
  "duration_ms": duration_ms,
@@ -390,8 +506,8 @@ class KalibrCrewAIInstrumentor:
390
506
  "input_tokens": input_tokens,
391
507
  "output_tokens": output_tokens,
392
508
  "total_tokens": input_tokens + output_tokens,
393
- "cost_usd": 0.0,
394
- "total_cost_usd": 0.0,
509
+ "cost_usd": cost_usd,
510
+ "total_cost_usd": cost_usd,
395
511
  "status": status,
396
512
  "error_type": error_type,
397
513
  "error_message": error_message,
@@ -430,6 +546,13 @@ class KalibrCrewAIInstrumentor:
430
546
  description = getattr(task_self, "description", "")
431
547
  expected_output = getattr(task_self, "expected_output", "")
432
548
 
549
+ # Try to extract model from task's agent
550
+ model_name = "unknown"
551
+ provider = "openai"
552
+ agent = getattr(task_self, "agent", None)
553
+ if agent:
554
+ model_name, provider = _extract_model_from_agent(agent)
555
+
433
556
  status = "success"
434
557
  error_type = None
435
558
  error_message = None
@@ -456,8 +579,16 @@ class KalibrCrewAIInstrumentor:
456
579
  else:
457
580
  output_preview = str(result)[:500]
458
581
 
459
- input_tokens = _count_tokens(description, "gpt-4")
460
- output_tokens = _count_tokens(output_preview or "", "gpt-4")
582
+ input_tokens = _count_tokens(description, model_name)
583
+ output_tokens = _count_tokens(output_preview or "", model_name)
584
+
585
+ # Calculate cost using CostAdapterFactory
586
+ cost_usd = _calculate_cost(provider, model_name, input_tokens, output_tokens)
587
+
588
+ # Accumulate metrics for crew-level aggregation
589
+ instrumentor._accumulated_tokens["input"] += input_tokens
590
+ instrumentor._accumulated_tokens["output"] += output_tokens
591
+ instrumentor._accumulated_cost += cost_usd
461
592
 
462
593
  event = {
463
594
  "schema_version": "1.0",
@@ -466,9 +597,9 @@ class KalibrCrewAIInstrumentor:
466
597
  "parent_span_id": None,
467
598
  "tenant_id": instrumentor.tenant_id,
468
599
  "workflow_id": instrumentor.workflow_id,
469
- "provider": "crewai",
470
- "model_id": "task",
471
- "model_name": "crewai-task",
600
+ "provider": provider,
601
+ "model_id": model_name,
602
+ "model_name": model_name,
472
603
  "operation": f"task:{description[:30]}..." if len(description) > 30 else f"task:{description}",
473
604
  "endpoint": "task.execute_sync",
474
605
  "duration_ms": duration_ms,
@@ -476,8 +607,8 @@ class KalibrCrewAIInstrumentor:
476
607
  "input_tokens": input_tokens,
477
608
  "output_tokens": output_tokens,
478
609
  "total_tokens": input_tokens + output_tokens,
479
- "cost_usd": 0.0,
480
- "total_cost_usd": 0.0,
610
+ "cost_usd": cost_usd,
611
+ "total_cost_usd": cost_usd,
481
612
  "status": status,
482
613
  "error_type": error_type,
483
614
  "error_message": error_message,
@@ -29,6 +29,8 @@ try:
29
29
  except ImportError:
30
30
  CostAdapterFactory = None
31
31
 
32
+ from kalibr.context import get_goal
33
+
32
34
  # Import tiktoken for token counting
33
35
  try:
34
36
  import tiktoken
@@ -288,6 +290,25 @@ class KalibrCallbackHandler(BaseCallbackHandler):
288
290
  # Compute cost
289
291
  cost_usd = self._compute_cost(provider, model, input_tokens, output_tokens)
290
292
 
293
+ # Extract tool_id from operation if this is a tool span
294
+ tool_id = ""
295
+ tool_input = ""
296
+ tool_output = ""
297
+
298
+ if span.get("span_type") == "tool":
299
+ operation = span.get("operation", "")
300
+ if operation.startswith("tool:"):
301
+ tool_id = operation[5:] # Extract "browserless" from "tool:browserless"
302
+
303
+ # Get tool input/output from span (truncate to 10KB)
304
+ if span.get("input"):
305
+ tool_input = str(span["input"])[:10000]
306
+ if metadata and metadata.get("output"):
307
+ tool_output = str(metadata["output"])[:10000]
308
+
309
+ # Get goal from context (thread-safe)
310
+ current_goal = get_goal() or ""
311
+
291
312
  # Build event
292
313
  event = {
293
314
  "schema_version": "1.0",
@@ -318,6 +339,11 @@ class KalibrCallbackHandler(BaseCallbackHandler):
318
339
  "service": self.service,
319
340
  "runtime_env": os.getenv("RUNTIME_ENV", "local"),
320
341
  "sandbox_id": os.getenv("SANDBOX_ID", "local"),
342
+ # New fields for tool/goal tracking
343
+ "tool_id": tool_id,
344
+ "tool_input": tool_input,
345
+ "tool_output": tool_output,
346
+ "goal": current_goal,
321
347
  "metadata": {
322
348
  **self.default_metadata,
323
349
  "span_type": span.get("span_type", "llm"),