agent-runtime-core 0.7.1__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -34,7 +34,7 @@ Example usage:
34
34
  return RunResult(final_output={"message": "Hello!"})
35
35
  """
36
36
 
37
- __version__ = "0.7.0"
37
+ __version__ = "0.9.0"
38
38
 
39
39
  # Core interfaces
40
40
  from agent_runtime_core.interfaces import (
@@ -63,6 +63,7 @@ from agent_runtime_core.tool_calling_agent import ToolCallingAgent
63
63
  from agent_runtime_core.agentic_loop import (
64
64
  run_agentic_loop,
65
65
  AgenticLoopResult,
66
+ UsageStats,
66
67
  )
67
68
 
68
69
  # Configuration
@@ -187,17 +188,49 @@ from agent_runtime_core.tools import (
187
188
  schemas_to_openai_format,
188
189
  )
189
190
 
190
- # Multi-agent support (agent-as-tool pattern)
191
+ # Multi-agent support (agent-as-tool pattern, system context)
191
192
  from agent_runtime_core.multi_agent import (
193
+ # System context for shared knowledge
194
+ SystemContext,
195
+ SharedKnowledge,
196
+ SharedMemoryConfig,
197
+ InjectMode,
198
+ # Agent-as-tool pattern
192
199
  AgentTool,
193
200
  AgentInvocationResult,
194
201
  InvocationMode,
195
202
  ContextMode,
196
203
  SubAgentContext,
197
204
  invoke_agent,
205
+ invoke_agent_with_fallback,
198
206
  create_agent_tool_handler,
199
207
  register_agent_tools,
200
208
  build_sub_agent_messages,
209
+ # Structured Handback Protocol
210
+ HandbackStatus,
211
+ HandbackResult,
212
+ Learning,
213
+ # Stuck/Loop Detection
214
+ StuckCondition,
215
+ StuckDetectionResult,
216
+ StuckDetector,
217
+ # Journey Mode
218
+ JourneyState,
219
+ JourneyEndReason,
220
+ JourneyEndResult,
221
+ JourneyManager,
222
+ JOURNEY_STATE_KEY,
223
+ # Fallback Routing
224
+ FallbackConfig,
225
+ )
226
+
227
+ # Privacy and user isolation
228
+ from agent_runtime_core.privacy import (
229
+ PrivacyConfig,
230
+ UserContext,
231
+ MemoryScope,
232
+ DEFAULT_PRIVACY_CONFIG,
233
+ ANONYMOUS_USER,
201
234
  )
202
235
 
203
236
  # Cross-conversation memory
@@ -230,6 +263,7 @@ __all__ = [
230
263
  "ToolCallingAgent",
231
264
  "run_agentic_loop",
232
265
  "AgenticLoopResult",
266
+ "UsageStats",
233
267
  # Configuration
234
268
  "RuntimeConfig",
235
269
  "configure",
@@ -306,14 +340,42 @@ __all__ = [
306
340
  "ToolSchemaBuilder",
307
341
  "ToolParameter",
308
342
  "schemas_to_openai_format",
309
- # Multi-agent support
343
+ # Multi-agent support - System context
344
+ "SystemContext",
345
+ "SharedKnowledge",
346
+ "SharedMemoryConfig",
347
+ "InjectMode",
348
+ # Multi-agent support - Agent-as-tool
310
349
  "AgentTool",
311
350
  "AgentInvocationResult",
312
351
  "InvocationMode",
313
352
  "ContextMode",
314
353
  "SubAgentContext",
315
354
  "invoke_agent",
355
+ "invoke_agent_with_fallback",
316
356
  "create_agent_tool_handler",
317
357
  "register_agent_tools",
318
358
  "build_sub_agent_messages",
359
+ # Multi-agent support - Structured Handback Protocol
360
+ "HandbackStatus",
361
+ "HandbackResult",
362
+ "Learning",
363
+ # Multi-agent support - Stuck/Loop Detection
364
+ "StuckCondition",
365
+ "StuckDetectionResult",
366
+ "StuckDetector",
367
+ # Multi-agent support - Journey Mode
368
+ "JourneyState",
369
+ "JourneyEndReason",
370
+ "JourneyEndResult",
371
+ "JourneyManager",
372
+ "JOURNEY_STATE_KEY",
373
+ # Multi-agent support - Fallback Routing
374
+ "FallbackConfig",
375
+ # Privacy and user isolation
376
+ "PrivacyConfig",
377
+ "UserContext",
378
+ "MemoryScope",
379
+ "DEFAULT_PRIVACY_CONFIG",
380
+ "ANONYMOUS_USER",
319
381
  ]
@@ -12,7 +12,7 @@ This can be used by any agent implementation without requiring inheritance.
12
12
 
13
13
  import json
14
14
  import logging
15
- from dataclasses import dataclass
15
+ from dataclasses import dataclass, field
16
16
  from typing import Any, Callable, Optional, Awaitable, Union
17
17
 
18
18
  from agent_runtime_core.interfaces import (
@@ -21,9 +21,107 @@ from agent_runtime_core.interfaces import (
21
21
  LLMClient,
22
22
  LLMResponse,
23
23
  )
24
+ from agent_runtime_core.config import get_config
24
25
 
25
26
  logger = logging.getLogger(__name__)
26
27
 
28
+
29
+ # =============================================================================
30
+ # Cost Estimation Configuration
31
+ # =============================================================================
32
+
33
+ # Pricing per 1M tokens (input/output) - updated Jan 2026
34
+ # These are approximate and should be updated as pricing changes
35
+ MODEL_PRICING = {
36
+ # OpenAI
37
+ "gpt-4o": {"input": 2.50, "output": 10.00},
38
+ "gpt-4o-mini": {"input": 0.15, "output": 0.60},
39
+ "gpt-4-turbo": {"input": 10.00, "output": 30.00},
40
+ "gpt-4": {"input": 30.00, "output": 60.00},
41
+ "gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
42
+ "o1": {"input": 15.00, "output": 60.00},
43
+ "o1-mini": {"input": 3.00, "output": 12.00},
44
+ "o1-preview": {"input": 15.00, "output": 60.00},
45
+ "o3-mini": {"input": 1.10, "output": 4.40},
46
+ # Anthropic
47
+ "claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00},
48
+ "claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00},
49
+ "claude-3-opus-20240229": {"input": 15.00, "output": 75.00},
50
+ "claude-3-sonnet-20240229": {"input": 3.00, "output": 15.00},
51
+ "claude-3-haiku-20240307": {"input": 0.25, "output": 1.25},
52
+ # Google
53
+ "gemini-1.5-pro": {"input": 1.25, "output": 5.00},
54
+ "gemini-1.5-flash": {"input": 0.075, "output": 0.30},
55
+ "gemini-2.0-flash": {"input": 0.10, "output": 0.40},
56
+ # Default fallback
57
+ "default": {"input": 3.00, "output": 15.00},
58
+ }
59
+
60
+
61
+ def _get_model_pricing(model: str) -> dict:
62
+ """Get pricing for a model, with fallback to default."""
63
+ # Try exact match first
64
+ if model in MODEL_PRICING:
65
+ return MODEL_PRICING[model]
66
+ # Try prefix match (e.g., "gpt-4o-2024-08-06" -> "gpt-4o")
67
+ for key in MODEL_PRICING:
68
+ if model.startswith(key):
69
+ return MODEL_PRICING[key]
70
+ return MODEL_PRICING["default"]
71
+
72
+
73
+ def _estimate_cost(usage: dict, model: str) -> float:
74
+ """Estimate cost in USD from usage dict and model."""
75
+ pricing = _get_model_pricing(model)
76
+ prompt_tokens = usage.get("prompt_tokens", 0)
77
+ completion_tokens = usage.get("completion_tokens", 0)
78
+
79
+ input_cost = (prompt_tokens / 1_000_000) * pricing["input"]
80
+ output_cost = (completion_tokens / 1_000_000) * pricing["output"]
81
+
82
+ return input_cost + output_cost
83
+
84
+
85
+ def _format_cost(cost: float) -> str:
86
+ """Format cost for display."""
87
+ if cost < 0.01:
88
+ return f"${cost:.4f}"
89
+ return f"${cost:.3f}"
90
+
91
+
92
+ @dataclass
93
+ class UsageStats:
94
+ """Accumulated usage statistics for the agentic loop."""
95
+
96
+ total_prompt_tokens: int = 0
97
+ total_completion_tokens: int = 0
98
+ total_cost: float = 0.0
99
+ llm_calls: int = 0
100
+ tool_calls: int = 0
101
+
102
+ def add_llm_call(self, usage: dict, model: str):
103
+ """Add usage from an LLM call."""
104
+ self.llm_calls += 1
105
+ self.total_prompt_tokens += usage.get("prompt_tokens", 0)
106
+ self.total_completion_tokens += usage.get("completion_tokens", 0)
107
+ self.total_cost += _estimate_cost(usage, model)
108
+
109
+ def add_tool_call(self):
110
+ """Record a tool call."""
111
+ self.tool_calls += 1
112
+
113
+ def to_dict(self) -> dict:
114
+ """Convert to dictionary."""
115
+ return {
116
+ "total_prompt_tokens": self.total_prompt_tokens,
117
+ "total_completion_tokens": self.total_completion_tokens,
118
+ "total_tokens": self.total_prompt_tokens + self.total_completion_tokens,
119
+ "total_cost_usd": self.total_cost,
120
+ "llm_calls": self.llm_calls,
121
+ "tool_calls": self.tool_calls,
122
+ }
123
+
124
+
27
125
  # Type alias for tool executor function
28
126
  ToolExecutor = Callable[[str, dict], Awaitable[Any]]
29
127
 
@@ -31,19 +129,22 @@ ToolExecutor = Callable[[str, dict], Awaitable[Any]]
31
129
  @dataclass
32
130
  class AgenticLoopResult:
33
131
  """Result from running the agentic loop."""
34
-
132
+
35
133
  final_content: str
36
134
  """The final text response from the LLM."""
37
-
135
+
38
136
  messages: list[dict]
39
137
  """All messages including tool calls and results."""
40
-
138
+
41
139
  iterations: int
42
140
  """Number of iterations the loop ran."""
43
-
141
+
44
142
  usage: dict
45
143
  """Token usage from the final LLM call."""
46
144
 
145
+ usage_stats: Optional[UsageStats] = None
146
+ """Accumulated usage statistics across all LLM calls (if debug mode enabled)."""
147
+
47
148
 
48
149
  async def run_agentic_loop(
49
150
  llm: LLMClient,
@@ -55,17 +156,18 @@ async def run_agentic_loop(
55
156
  model: Optional[str] = None,
56
157
  max_iterations: int = 15,
57
158
  emit_events: bool = True,
159
+ ensure_final_response: bool = False,
58
160
  **llm_kwargs,
59
161
  ) -> AgenticLoopResult:
60
162
  """
61
163
  Run the standard agentic tool-calling loop.
62
-
164
+
63
165
  This handles the common pattern of:
64
166
  1. Call LLM with available tools
65
167
  2. If LLM returns tool calls, execute them
66
168
  3. Add tool results to messages and loop back to step 1
67
169
  4. If LLM returns a text response (no tool calls), return it
68
-
170
+
69
171
  Args:
70
172
  llm: The LLM client to use for generation
71
173
  messages: Initial messages (should include system prompt)
@@ -75,17 +177,20 @@ async def run_agentic_loop(
75
177
  model: Model to use (passed to LLM client)
76
178
  max_iterations: Maximum loop iterations to prevent infinite loops
77
179
  emit_events: Whether to emit TOOL_CALL and TOOL_RESULT events
180
+ ensure_final_response: If True, ensures a summary is generated when tools
181
+ were used but the final response is empty or very short. This is useful
182
+ for agents that should always provide a summary of what was accomplished.
78
183
  **llm_kwargs: Additional kwargs passed to llm.generate()
79
-
184
+
80
185
  Returns:
81
186
  AgenticLoopResult with final content, messages, and metadata
82
-
187
+
83
188
  Example:
84
189
  async def my_tool_executor(name: str, args: dict) -> Any:
85
190
  if name == "get_weather":
86
191
  return {"temp": 72, "conditions": "sunny"}
87
192
  raise ValueError(f"Unknown tool: {name}")
88
-
193
+
89
194
  result = await run_agentic_loop(
90
195
  llm=my_llm_client,
91
196
  messages=[{"role": "system", "content": "You are helpful."}],
@@ -93,6 +198,7 @@ async def run_agentic_loop(
93
198
  execute_tool=my_tool_executor,
94
199
  ctx=ctx,
95
200
  model="gpt-4o",
201
+ ensure_final_response=True, # Guarantees a summary
96
202
  )
97
203
  """
98
204
  iteration = 0
@@ -101,6 +207,11 @@ async def run_agentic_loop(
101
207
  consecutive_errors = 0
102
208
  max_consecutive_errors = 3 # Bail out if tool keeps failing
103
209
 
210
+ # Initialize usage tracking (enabled in debug mode)
211
+ debug_mode = get_config().debug
212
+ usage_stats = UsageStats() if debug_mode else None
213
+ effective_model = model or "unknown"
214
+
104
215
  while iteration < max_iterations:
105
216
  iteration += 1
106
217
  print(f"[agentic-loop] Iteration {iteration}/{max_iterations}, messages={len(messages)}", flush=True)
@@ -120,8 +231,25 @@ async def run_agentic_loop(
120
231
  model=model,
121
232
  **llm_kwargs,
122
233
  )
123
-
234
+
124
235
  last_response = response
236
+
237
+ # Track usage in debug mode
238
+ if debug_mode and usage_stats:
239
+ # Get model from response if available, otherwise use effective_model
240
+ resp_model = response.model or effective_model
241
+ usage_stats.add_llm_call(response.usage, resp_model)
242
+
243
+ # Print debug info
244
+ prompt_tokens = response.usage.get("prompt_tokens", 0)
245
+ completion_tokens = response.usage.get("completion_tokens", 0)
246
+ call_cost = _estimate_cost(response.usage, resp_model)
247
+
248
+ print(f"[agentic-loop] 💰 LLM Call #{usage_stats.llm_calls}:", flush=True)
249
+ print(f"[agentic-loop] Model: {resp_model}", flush=True)
250
+ print(f"[agentic-loop] Tokens: {prompt_tokens:,} in / {completion_tokens:,} out", flush=True)
251
+ print(f"[agentic-loop] Cost: {_format_cost(call_cost)}", flush=True)
252
+ print(f"[agentic-loop] Running total: {usage_stats.total_prompt_tokens:,} in / {usage_stats.total_completion_tokens:,} out = {_format_cost(usage_stats.total_cost)}", flush=True)
125
253
 
126
254
  # Check for tool calls
127
255
  if response.tool_calls:
@@ -158,6 +286,11 @@ async def run_agentic_loop(
158
286
  logger.warning(f"Failed to parse tool args: {tool_args_str}")
159
287
  tool_args = {}
160
288
 
289
+ # Track tool call in debug mode
290
+ if debug_mode and usage_stats:
291
+ usage_stats.add_tool_call()
292
+ print(f"[agentic-loop] 🔧 Tool #{usage_stats.tool_calls}: {tool_name}", flush=True)
293
+
161
294
  # Emit tool call event
162
295
  if emit_events:
163
296
  await ctx.emit(EventType.TOOL_CALL, {
@@ -165,7 +298,7 @@ async def run_agentic_loop(
165
298
  "name": tool_name,
166
299
  "arguments": tool_args,
167
300
  })
168
-
301
+
169
302
  # Execute the tool
170
303
  try:
171
304
  result = await execute_tool(tool_name, tool_args)
@@ -209,8 +342,18 @@ async def run_agentic_loop(
209
342
  "iterations": iteration,
210
343
  })
211
344
 
212
- # Add error to messages for conversation history
213
- final_content = f"I encountered repeated errors while trying to complete this task. The last error was: {error_msg}"
345
+ # Generate a summary if ensure_final_response is enabled
346
+ if ensure_final_response:
347
+ logger.info("Generating summary after error exit because ensure_final_response=True")
348
+ print("[agentic-loop] Generating summary after error exit", flush=True)
349
+ summary = await _generate_task_summary(llm, messages, model, **llm_kwargs)
350
+ if summary:
351
+ final_content = f"{summary}\n\n---\n\n⚠️ Note: The task ended early due to repeated errors. Last error: {error_msg}"
352
+ else:
353
+ final_content = f"I encountered repeated errors while trying to complete this task. The last error was: {error_msg}"
354
+ else:
355
+ final_content = f"I encountered repeated errors while trying to complete this task. The last error was: {error_msg}"
356
+
214
357
  messages.append({
215
358
  "role": "assistant",
216
359
  "content": final_content,
@@ -222,11 +365,22 @@ async def run_agentic_loop(
222
365
  "role": "assistant",
223
366
  })
224
367
 
368
+ # Print final summary in debug mode
369
+ if debug_mode and usage_stats:
370
+ print(f"[agentic-loop] ═══════════════════════════════════════════", flush=True)
371
+ print(f"[agentic-loop] 📊 FINAL USAGE SUMMARY (error exit)", flush=True)
372
+ print(f"[agentic-loop] LLM calls: {usage_stats.llm_calls}", flush=True)
373
+ print(f"[agentic-loop] Tool calls: {usage_stats.tool_calls}", flush=True)
374
+ print(f"[agentic-loop] Total tokens: {usage_stats.total_prompt_tokens:,} in / {usage_stats.total_completion_tokens:,} out", flush=True)
375
+ print(f"[agentic-loop] Estimated cost: {_format_cost(usage_stats.total_cost)}", flush=True)
376
+ print(f"[agentic-loop] ═══════════════════════════════════════════", flush=True)
377
+
225
378
  return AgenticLoopResult(
226
379
  final_content=final_content,
227
380
  messages=messages,
228
381
  iterations=iteration,
229
382
  usage=last_response.usage if last_response else {},
383
+ usage_stats=usage_stats,
230
384
  )
231
385
 
232
386
  # Continue the loop to get next response
@@ -244,11 +398,117 @@ async def run_agentic_loop(
244
398
  })
245
399
 
246
400
  break
247
-
401
+
402
+ # Check if we need to ensure a final response (summary)
403
+ if ensure_final_response:
404
+ # Check if tools were used during this run
405
+ tools_were_used = any(
406
+ msg.get("role") == "assistant" and msg.get("tool_calls")
407
+ for msg in messages
408
+ )
409
+
410
+ # If tools were used but final response is empty or very short, generate a summary
411
+ if tools_were_used and (not final_content or len(final_content.strip()) < 50):
412
+ logger.info("Generating summary because tools were used but final response was empty/short")
413
+ print("[agentic-loop] Generating summary - tools were used but no final response", flush=True)
414
+
415
+ summary = await _generate_task_summary(llm, messages, model, **llm_kwargs)
416
+ if summary:
417
+ final_content = summary
418
+ # Emit the summary as an assistant message
419
+ if emit_events:
420
+ await ctx.emit(EventType.ASSISTANT_MESSAGE, {
421
+ "content": summary,
422
+ "role": "assistant",
423
+ })
424
+ # Add to messages
425
+ messages.append({"role": "assistant", "content": summary})
426
+
427
+ # Print final summary in debug mode
428
+ if debug_mode and usage_stats:
429
+ print(f"[agentic-loop] ═══════════════════════════════════════════", flush=True)
430
+ print(f"[agentic-loop] 📊 FINAL USAGE SUMMARY", flush=True)
431
+ print(f"[agentic-loop] Iterations: {iteration}", flush=True)
432
+ print(f"[agentic-loop] LLM calls: {usage_stats.llm_calls}", flush=True)
433
+ print(f"[agentic-loop] Tool calls: {usage_stats.tool_calls}", flush=True)
434
+ print(f"[agentic-loop] Total tokens: {usage_stats.total_prompt_tokens:,} in / {usage_stats.total_completion_tokens:,} out", flush=True)
435
+ print(f"[agentic-loop] Estimated cost: {_format_cost(usage_stats.total_cost)}", flush=True)
436
+ print(f"[agentic-loop] ═══════════════════════════════════════════", flush=True)
437
+
248
438
  return AgenticLoopResult(
249
439
  final_content=final_content,
250
440
  messages=messages,
251
441
  iterations=iteration,
252
442
  usage=last_response.usage if last_response else {},
443
+ usage_stats=usage_stats,
253
444
  )
254
445
 
446
+
447
+ async def _generate_task_summary(
448
+ llm: LLMClient,
449
+ messages: list[dict],
450
+ model: Optional[str] = None,
451
+ **llm_kwargs,
452
+ ) -> str:
453
+ """
454
+ Generate a summary of what was accomplished based on the conversation history.
455
+
456
+ This is called when ensure_final_response=True and tools were used but
457
+ no meaningful final response was provided.
458
+
459
+ Args:
460
+ llm: The LLM client to use
461
+ messages: The conversation history including tool calls and results
462
+ model: Model to use
463
+ **llm_kwargs: Additional kwargs for the LLM
464
+
465
+ Returns:
466
+ A summary string of what was accomplished
467
+ """
468
+ # Build a summary of tool calls and their results
469
+ tool_summary_parts = []
470
+ for msg in messages:
471
+ if msg.get("role") == "assistant" and msg.get("tool_calls"):
472
+ for tc in msg.get("tool_calls", []):
473
+ if isinstance(tc, dict):
474
+ name = tc.get("function", {}).get("name", "unknown")
475
+ else:
476
+ name = getattr(tc, "name", "unknown")
477
+ tool_summary_parts.append(f"- Called: {name}")
478
+ elif msg.get("role") == "tool":
479
+ content = msg.get("content", "")
480
+ # Truncate long results
481
+ if len(content) > 200:
482
+ content = content[:200] + "..."
483
+ tool_summary_parts.append(f" Result: {content}")
484
+
485
+ tool_summary = "\n".join(tool_summary_parts[-20:]) # Last 20 entries to avoid token limits
486
+
487
+ summary_prompt = f"""Based on the conversation above, provide a brief summary of what was accomplished.
488
+
489
+ Here's a summary of the tools that were called:
490
+ {tool_summary}
491
+
492
+ Please provide a clear, concise summary (2-4 sentences) of:
493
+ 1. What actions were taken
494
+ 2. What was accomplished or changed
495
+ 3. Any important results or next steps
496
+
497
+ Start your response directly with the summary - do not include phrases like "Here's a summary" or "Based on the conversation"."""
498
+
499
+ # Create a simplified message list for the summary request
500
+ summary_messages = [
501
+ {"role": "system", "content": "You are a helpful assistant that provides clear, concise summaries of completed tasks."},
502
+ {"role": "user", "content": summary_prompt},
503
+ ]
504
+
505
+ try:
506
+ response = await llm.generate(
507
+ summary_messages,
508
+ model=model,
509
+ **llm_kwargs,
510
+ )
511
+ return response.message.get("content", "")
512
+ except Exception as e:
513
+ logger.exception("Failed to generate task summary")
514
+ return f"Task completed. (Summary generation failed: {e})"
@@ -88,6 +88,9 @@ class RuntimeConfig:
88
88
  vertex_deployed_index_id: Optional[str] = None
89
89
  vertex_index_id: Optional[str] = None
90
90
 
91
+ # Debug mode - enables verbose logging, cost tracking, etc.
92
+ debug: bool = False
93
+
91
94
  def get_openai_api_key(self) -> Optional[str]:
92
95
  """Get OpenAI API key from config or environment."""
93
96
  return self.openai_api_key or os.environ.get("OPENAI_API_KEY")
@@ -204,6 +207,7 @@ def _apply_env_vars(config: RuntimeConfig) -> None:
204
207
  bool_fields = {
205
208
  "AGENT_RUNTIME_INCLUDE_CONVERSATION_HISTORY": "include_conversation_history",
206
209
  "AGENT_RUNTIME_AUTO_PERSIST_MESSAGES": "auto_persist_messages",
210
+ "AGENT_RUNTIME_DEBUG": "debug",
207
211
  }
208
212
 
209
213
  for env_var, attr in env_mapping.items():