agent-runtime-core 0.8.0__tar.gz → 0.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/PKG-INFO +2 -1
  2. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/README.md +1 -0
  3. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/__init__.py +65 -3
  4. agent_runtime_core-0.9.0/agent_runtime_core/agentic_loop.py +514 -0
  5. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/config.py +4 -0
  6. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/contexts.py +72 -4
  7. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/llm/anthropic.py +83 -0
  8. agent_runtime_core-0.9.0/agent_runtime_core/multi_agent.py +1961 -0
  9. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/persistence/__init__.py +8 -0
  10. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/persistence/base.py +318 -1
  11. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/persistence/file.py +226 -2
  12. agent_runtime_core-0.9.0/agent_runtime_core/privacy.py +250 -0
  13. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/pyproject.toml +1 -1
  14. agent_runtime_core-0.9.0/tests/test_llm_anthropic.py +145 -0
  15. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_persistence.py +248 -0
  16. agent_runtime_core-0.9.0/tests/test_privacy.py +585 -0
  17. agent_runtime_core-0.8.0/agent_runtime_core/agentic_loop.py +0 -254
  18. agent_runtime_core-0.8.0/agent_runtime_core/multi_agent.py +0 -569
  19. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/.gitignore +0 -0
  20. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/LICENSE +0 -0
  21. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/config_schema.py +0 -0
  22. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/events/__init__.py +0 -0
  23. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/events/base.py +0 -0
  24. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/events/memory.py +0 -0
  25. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/events/redis.py +0 -0
  26. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/events/sqlite.py +0 -0
  27. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/files/__init__.py +0 -0
  28. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/files/base.py +0 -0
  29. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/files/ocr.py +0 -0
  30. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/files/processors.py +0 -0
  31. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/files/tools.py +0 -0
  32. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/files/vision.py +0 -0
  33. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/interfaces.py +0 -0
  34. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/json_runtime.py +0 -0
  35. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/llm/__init__.py +0 -0
  36. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/llm/litellm_client.py +0 -0
  37. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/llm/models_config.py +0 -0
  38. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/llm/openai.py +0 -0
  39. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/memory/__init__.py +0 -0
  40. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/memory/manager.py +0 -0
  41. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/memory/mixin.py +0 -0
  42. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/persistence/manager.py +0 -0
  43. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/queue/__init__.py +0 -0
  44. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/queue/base.py +0 -0
  45. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/queue/memory.py +0 -0
  46. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/queue/redis.py +0 -0
  47. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/queue/sqlite.py +0 -0
  48. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/rag/__init__.py +0 -0
  49. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/rag/chunking.py +0 -0
  50. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/rag/indexer.py +0 -0
  51. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/rag/retriever.py +0 -0
  52. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/registry.py +0 -0
  53. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/runner.py +0 -0
  54. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/state/__init__.py +0 -0
  55. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/state/base.py +0 -0
  56. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/state/memory.py +0 -0
  57. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/state/redis.py +0 -0
  58. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/state/sqlite.py +0 -0
  59. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/steps.py +0 -0
  60. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/testing.py +0 -0
  61. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/tool_calling_agent.py +0 -0
  62. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/tools.py +0 -0
  63. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/tracing/__init__.py +0 -0
  64. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/tracing/langfuse.py +0 -0
  65. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/tracing/noop.py +0 -0
  66. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/vectorstore/__init__.py +0 -0
  67. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/vectorstore/base.py +0 -0
  68. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/vectorstore/embeddings.py +0 -0
  69. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/vectorstore/sqlite_vec.py +0 -0
  70. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/agent_runtime_core/vectorstore/vertex.py +0 -0
  71. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/__init__.py +0 -0
  72. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_contexts.py +0 -0
  73. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_events.py +0 -0
  74. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_imports.py +0 -0
  75. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_memory.py +0 -0
  76. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_multi_agent.py +0 -0
  77. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_queue.py +0 -0
  78. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_rag.py +0 -0
  79. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_state.py +0 -0
  80. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_steps.py +0 -0
  81. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_testing.py +0 -0
  82. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_tools.py +0 -0
  83. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/tests/test_vectorstore.py +0 -0
  84. {agent_runtime_core-0.8.0 → agent_runtime_core-0.9.0}/uv.lock +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: agent-runtime-core
3
- Version: 0.8.0
3
+ Version: 0.9.0
4
4
  Summary: Framework-agnostic Python library for executing AI agents with consistent patterns
5
5
  Project-URL: Homepage, https://github.com/makemore/agent-runtime-core
6
6
  Project-URL: Repository, https://github.com/makemore/agent-runtime-core
@@ -92,6 +92,7 @@ A lightweight, framework-agnostic Python library for building AI agent systems.
92
92
 
93
93
  | Version | Date | Changes |
94
94
  |---------|------|---------|
95
+ | **0.9.0** | 2026-01-28 | **Multi-Agent & Debug** - Shared memory with privacy controls, journey mode, structured handback protocol, stuck/loop detection, fallback routing, cost/context tracking in debug mode |
95
96
  | **0.8.0** | 2026-01-28 | **File Ingestion** - Pluggable file processors (PDF, images, DOCX, XLSX, CSV), OCR providers (Tesseract, Google Vision, AWS Textract, Azure), AI vision (OpenAI, Anthropic, Gemini), file read/write tools |
96
97
  | **0.7.1** | 2026-01-24 | RAG module, vector stores (sqlite-vec, Vertex AI), memory system, multi-agent support, agentic loop, JSON runtime |
97
98
  | **0.6.0** | 2025-01-23 | Enhanced registry with factory functions and class registration |
@@ -10,6 +10,7 @@ A lightweight, framework-agnostic Python library for building AI agent systems.
10
10
 
11
11
  | Version | Date | Changes |
12
12
  |---------|------|---------|
13
+ | **0.9.0** | 2026-01-28 | **Multi-Agent & Debug** - Shared memory with privacy controls, journey mode, structured handback protocol, stuck/loop detection, fallback routing, cost/context tracking in debug mode |
13
14
  | **0.8.0** | 2026-01-28 | **File Ingestion** - Pluggable file processors (PDF, images, DOCX, XLSX, CSV), OCR providers (Tesseract, Google Vision, AWS Textract, Azure), AI vision (OpenAI, Anthropic, Gemini), file read/write tools |
14
15
  | **0.7.1** | 2026-01-24 | RAG module, vector stores (sqlite-vec, Vertex AI), memory system, multi-agent support, agentic loop, JSON runtime |
15
16
  | **0.6.0** | 2025-01-23 | Enhanced registry with factory functions and class registration |
@@ -34,7 +34,7 @@ Example usage:
34
34
  return RunResult(final_output={"message": "Hello!"})
35
35
  """
36
36
 
37
- __version__ = "0.8.0"
37
+ __version__ = "0.9.0"
38
38
 
39
39
  # Core interfaces
40
40
  from agent_runtime_core.interfaces import (
@@ -63,6 +63,7 @@ from agent_runtime_core.tool_calling_agent import ToolCallingAgent
63
63
  from agent_runtime_core.agentic_loop import (
64
64
  run_agentic_loop,
65
65
  AgenticLoopResult,
66
+ UsageStats,
66
67
  )
67
68
 
68
69
  # Configuration
@@ -187,17 +188,49 @@ from agent_runtime_core.tools import (
187
188
  schemas_to_openai_format,
188
189
  )
189
190
 
190
- # Multi-agent support (agent-as-tool pattern)
191
+ # Multi-agent support (agent-as-tool pattern, system context)
191
192
  from agent_runtime_core.multi_agent import (
193
+ # System context for shared knowledge
194
+ SystemContext,
195
+ SharedKnowledge,
196
+ SharedMemoryConfig,
197
+ InjectMode,
198
+ # Agent-as-tool pattern
192
199
  AgentTool,
193
200
  AgentInvocationResult,
194
201
  InvocationMode,
195
202
  ContextMode,
196
203
  SubAgentContext,
197
204
  invoke_agent,
205
+ invoke_agent_with_fallback,
198
206
  create_agent_tool_handler,
199
207
  register_agent_tools,
200
208
  build_sub_agent_messages,
209
+ # Structured Handback Protocol
210
+ HandbackStatus,
211
+ HandbackResult,
212
+ Learning,
213
+ # Stuck/Loop Detection
214
+ StuckCondition,
215
+ StuckDetectionResult,
216
+ StuckDetector,
217
+ # Journey Mode
218
+ JourneyState,
219
+ JourneyEndReason,
220
+ JourneyEndResult,
221
+ JourneyManager,
222
+ JOURNEY_STATE_KEY,
223
+ # Fallback Routing
224
+ FallbackConfig,
225
+ )
226
+
227
+ # Privacy and user isolation
228
+ from agent_runtime_core.privacy import (
229
+ PrivacyConfig,
230
+ UserContext,
231
+ MemoryScope,
232
+ DEFAULT_PRIVACY_CONFIG,
233
+ ANONYMOUS_USER,
201
234
  )
202
235
 
203
236
  # Cross-conversation memory
@@ -230,6 +263,7 @@ __all__ = [
230
263
  "ToolCallingAgent",
231
264
  "run_agentic_loop",
232
265
  "AgenticLoopResult",
266
+ "UsageStats",
233
267
  # Configuration
234
268
  "RuntimeConfig",
235
269
  "configure",
@@ -306,14 +340,42 @@ __all__ = [
306
340
  "ToolSchemaBuilder",
307
341
  "ToolParameter",
308
342
  "schemas_to_openai_format",
309
- # Multi-agent support
343
+ # Multi-agent support - System context
344
+ "SystemContext",
345
+ "SharedKnowledge",
346
+ "SharedMemoryConfig",
347
+ "InjectMode",
348
+ # Multi-agent support - Agent-as-tool
310
349
  "AgentTool",
311
350
  "AgentInvocationResult",
312
351
  "InvocationMode",
313
352
  "ContextMode",
314
353
  "SubAgentContext",
315
354
  "invoke_agent",
355
+ "invoke_agent_with_fallback",
316
356
  "create_agent_tool_handler",
317
357
  "register_agent_tools",
318
358
  "build_sub_agent_messages",
359
+ # Multi-agent support - Structured Handback Protocol
360
+ "HandbackStatus",
361
+ "HandbackResult",
362
+ "Learning",
363
+ # Multi-agent support - Stuck/Loop Detection
364
+ "StuckCondition",
365
+ "StuckDetectionResult",
366
+ "StuckDetector",
367
+ # Multi-agent support - Journey Mode
368
+ "JourneyState",
369
+ "JourneyEndReason",
370
+ "JourneyEndResult",
371
+ "JourneyManager",
372
+ "JOURNEY_STATE_KEY",
373
+ # Multi-agent support - Fallback Routing
374
+ "FallbackConfig",
375
+ # Privacy and user isolation
376
+ "PrivacyConfig",
377
+ "UserContext",
378
+ "MemoryScope",
379
+ "DEFAULT_PRIVACY_CONFIG",
380
+ "ANONYMOUS_USER",
319
381
  ]
@@ -0,0 +1,514 @@
1
+ """
2
+ Reusable agentic loop for tool-calling agents.
3
+
4
+ This module provides a flexible `run_agentic_loop` function that handles
5
+ the standard tool-calling pattern:
6
+ 1. Call LLM with tools
7
+ 2. If tool calls, execute them and loop back
8
+ 3. If no tool calls, return final response
9
+
10
+ This can be used by any agent implementation without requiring inheritance.
11
+ """
12
+
13
+ import json
14
+ import logging
15
+ from dataclasses import dataclass, field
16
+ from typing import Any, Callable, Optional, Awaitable, Union
17
+
18
+ from agent_runtime_core.interfaces import (
19
+ RunContext,
20
+ EventType,
21
+ LLMClient,
22
+ LLMResponse,
23
+ )
24
+ from agent_runtime_core.config import get_config
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ # =============================================================================
30
+ # Cost Estimation Configuration
31
+ # =============================================================================
32
+
33
+ # Pricing per 1M tokens (input/output) - updated Jan 2026
34
+ # These are approximate and should be updated as pricing changes
35
+ MODEL_PRICING = {
36
+ # OpenAI
37
+ "gpt-4o": {"input": 2.50, "output": 10.00},
38
+ "gpt-4o-mini": {"input": 0.15, "output": 0.60},
39
+ "gpt-4-turbo": {"input": 10.00, "output": 30.00},
40
+ "gpt-4": {"input": 30.00, "output": 60.00},
41
+ "gpt-3.5-turbo": {"input": 0.50, "output": 1.50},
42
+ "o1": {"input": 15.00, "output": 60.00},
43
+ "o1-mini": {"input": 3.00, "output": 12.00},
44
+ "o1-preview": {"input": 15.00, "output": 60.00},
45
+ "o3-mini": {"input": 1.10, "output": 4.40},
46
+ # Anthropic
47
+ "claude-3-5-sonnet-20241022": {"input": 3.00, "output": 15.00},
48
+ "claude-3-5-haiku-20241022": {"input": 0.80, "output": 4.00},
49
+ "claude-3-opus-20240229": {"input": 15.00, "output": 75.00},
50
+ "claude-3-sonnet-20240229": {"input": 3.00, "output": 15.00},
51
+ "claude-3-haiku-20240307": {"input": 0.25, "output": 1.25},
52
+ # Google
53
+ "gemini-1.5-pro": {"input": 1.25, "output": 5.00},
54
+ "gemini-1.5-flash": {"input": 0.075, "output": 0.30},
55
+ "gemini-2.0-flash": {"input": 0.10, "output": 0.40},
56
+ # Default fallback
57
+ "default": {"input": 3.00, "output": 15.00},
58
+ }
59
+
60
+
61
+ def _get_model_pricing(model: str) -> dict:
62
+ """Get pricing for a model, with fallback to default."""
63
+ # Try exact match first
64
+ if model in MODEL_PRICING:
65
+ return MODEL_PRICING[model]
66
+ # Try prefix match (e.g., "gpt-4o-2024-08-06" -> "gpt-4o")
67
+ for key in MODEL_PRICING:
68
+ if model.startswith(key):
69
+ return MODEL_PRICING[key]
70
+ return MODEL_PRICING["default"]
71
+
72
+
73
+ def _estimate_cost(usage: dict, model: str) -> float:
74
+ """Estimate cost in USD from usage dict and model."""
75
+ pricing = _get_model_pricing(model)
76
+ prompt_tokens = usage.get("prompt_tokens", 0)
77
+ completion_tokens = usage.get("completion_tokens", 0)
78
+
79
+ input_cost = (prompt_tokens / 1_000_000) * pricing["input"]
80
+ output_cost = (completion_tokens / 1_000_000) * pricing["output"]
81
+
82
+ return input_cost + output_cost
83
+
84
+
85
+ def _format_cost(cost: float) -> str:
86
+ """Format cost for display."""
87
+ if cost < 0.01:
88
+ return f"${cost:.4f}"
89
+ return f"${cost:.3f}"
90
+
91
+
92
+ @dataclass
93
+ class UsageStats:
94
+ """Accumulated usage statistics for the agentic loop."""
95
+
96
+ total_prompt_tokens: int = 0
97
+ total_completion_tokens: int = 0
98
+ total_cost: float = 0.0
99
+ llm_calls: int = 0
100
+ tool_calls: int = 0
101
+
102
+ def add_llm_call(self, usage: dict, model: str):
103
+ """Add usage from an LLM call."""
104
+ self.llm_calls += 1
105
+ self.total_prompt_tokens += usage.get("prompt_tokens", 0)
106
+ self.total_completion_tokens += usage.get("completion_tokens", 0)
107
+ self.total_cost += _estimate_cost(usage, model)
108
+
109
+ def add_tool_call(self):
110
+ """Record a tool call."""
111
+ self.tool_calls += 1
112
+
113
+ def to_dict(self) -> dict:
114
+ """Convert to dictionary."""
115
+ return {
116
+ "total_prompt_tokens": self.total_prompt_tokens,
117
+ "total_completion_tokens": self.total_completion_tokens,
118
+ "total_tokens": self.total_prompt_tokens + self.total_completion_tokens,
119
+ "total_cost_usd": self.total_cost,
120
+ "llm_calls": self.llm_calls,
121
+ "tool_calls": self.tool_calls,
122
+ }
123
+
124
+
125
+ # Type alias for tool executor function
126
+ ToolExecutor = Callable[[str, dict], Awaitable[Any]]
127
+
128
+
129
+ @dataclass
130
+ class AgenticLoopResult:
131
+ """Result from running the agentic loop."""
132
+
133
+ final_content: str
134
+ """The final text response from the LLM."""
135
+
136
+ messages: list[dict]
137
+ """All messages including tool calls and results."""
138
+
139
+ iterations: int
140
+ """Number of iterations the loop ran."""
141
+
142
+ usage: dict
143
+ """Token usage from the final LLM call."""
144
+
145
+ usage_stats: Optional[UsageStats] = None
146
+ """Accumulated usage statistics across all LLM calls (if debug mode enabled)."""
147
+
148
+
149
+ async def run_agentic_loop(
150
+ llm: LLMClient,
151
+ messages: list[dict],
152
+ tools: Optional[list[dict]],
153
+ execute_tool: ToolExecutor,
154
+ ctx: RunContext,
155
+ *,
156
+ model: Optional[str] = None,
157
+ max_iterations: int = 15,
158
+ emit_events: bool = True,
159
+ ensure_final_response: bool = False,
160
+ **llm_kwargs,
161
+ ) -> AgenticLoopResult:
162
+ """
163
+ Run the standard agentic tool-calling loop.
164
+
165
+ This handles the common pattern of:
166
+ 1. Call LLM with available tools
167
+ 2. If LLM returns tool calls, execute them
168
+ 3. Add tool results to messages and loop back to step 1
169
+ 4. If LLM returns a text response (no tool calls), return it
170
+
171
+ Args:
172
+ llm: The LLM client to use for generation
173
+ messages: Initial messages (should include system prompt)
174
+ tools: List of tool schemas in OpenAI format, or None for no tools
175
+ execute_tool: Async function that executes a tool: (name, args) -> result
176
+ ctx: Run context for emitting events
177
+ model: Model to use (passed to LLM client)
178
+ max_iterations: Maximum loop iterations to prevent infinite loops
179
+ emit_events: Whether to emit TOOL_CALL and TOOL_RESULT events
180
+ ensure_final_response: If True, ensures a summary is generated when tools
181
+ were used but the final response is empty or very short. This is useful
182
+ for agents that should always provide a summary of what was accomplished.
183
+ **llm_kwargs: Additional kwargs passed to llm.generate()
184
+
185
+ Returns:
186
+ AgenticLoopResult with final content, messages, and metadata
187
+
188
+ Example:
189
+ async def my_tool_executor(name: str, args: dict) -> Any:
190
+ if name == "get_weather":
191
+ return {"temp": 72, "conditions": "sunny"}
192
+ raise ValueError(f"Unknown tool: {name}")
193
+
194
+ result = await run_agentic_loop(
195
+ llm=my_llm_client,
196
+ messages=[{"role": "system", "content": "You are helpful."}],
197
+ tools=[{"type": "function", "function": {...}}],
198
+ execute_tool=my_tool_executor,
199
+ ctx=ctx,
200
+ model="gpt-4o",
201
+ ensure_final_response=True, # Guarantees a summary
202
+ )
203
+ """
204
+ iteration = 0
205
+ final_content = ""
206
+ last_response: Optional[LLMResponse] = None
207
+ consecutive_errors = 0
208
+ max_consecutive_errors = 3 # Bail out if tool keeps failing
209
+
210
+ # Initialize usage tracking (enabled in debug mode)
211
+ debug_mode = get_config().debug
212
+ usage_stats = UsageStats() if debug_mode else None
213
+ effective_model = model or "unknown"
214
+
215
+ while iteration < max_iterations:
216
+ iteration += 1
217
+ print(f"[agentic-loop] Iteration {iteration}/{max_iterations}, messages={len(messages)}", flush=True)
218
+ logger.debug(f"Agentic loop iteration {iteration}/{max_iterations}")
219
+
220
+ # Call LLM
221
+ if tools:
222
+ response = await llm.generate(
223
+ messages,
224
+ model=model,
225
+ tools=tools,
226
+ **llm_kwargs,
227
+ )
228
+ else:
229
+ response = await llm.generate(
230
+ messages,
231
+ model=model,
232
+ **llm_kwargs,
233
+ )
234
+
235
+ last_response = response
236
+
237
+ # Track usage in debug mode
238
+ if debug_mode and usage_stats:
239
+ # Get model from response if available, otherwise use effective_model
240
+ resp_model = response.model or effective_model
241
+ usage_stats.add_llm_call(response.usage, resp_model)
242
+
243
+ # Print debug info
244
+ prompt_tokens = response.usage.get("prompt_tokens", 0)
245
+ completion_tokens = response.usage.get("completion_tokens", 0)
246
+ call_cost = _estimate_cost(response.usage, resp_model)
247
+
248
+ print(f"[agentic-loop] 💰 LLM Call #{usage_stats.llm_calls}:", flush=True)
249
+ print(f"[agentic-loop] Model: {resp_model}", flush=True)
250
+ print(f"[agentic-loop] Tokens: {prompt_tokens:,} in / {completion_tokens:,} out", flush=True)
251
+ print(f"[agentic-loop] Cost: {_format_cost(call_cost)}", flush=True)
252
+ print(f"[agentic-loop] Running total: {usage_stats.total_prompt_tokens:,} in / {usage_stats.total_completion_tokens:,} out = {_format_cost(usage_stats.total_cost)}", flush=True)
253
+
254
+ # Check for tool calls
255
+ if response.tool_calls:
256
+ # Add assistant message with tool calls to conversation
257
+ messages.append(response.message)
258
+
259
+ # Execute each tool call
260
+ for tool_call in response.tool_calls:
261
+ # Debug: log raw tool call to help diagnose empty args issue
262
+ print(f"[agentic-loop] Raw tool_call type={type(tool_call).__name__}", flush=True)
263
+ if hasattr(tool_call, '_data'):
264
+ print(f"[agentic-loop] tool_call._data={tool_call._data}", flush=True)
265
+
266
+ # Handle both ToolCall objects (with .id, .name, .arguments) and dicts
267
+ if hasattr(tool_call, 'id') and not isinstance(tool_call, dict):
268
+ # ToolCall object
269
+ tool_call_id = tool_call.id
270
+ tool_name = tool_call.name
271
+ tool_args = tool_call.arguments
272
+ print(f"[agentic-loop] Parsed: name={tool_name}, args={tool_args}", flush=True)
273
+ else:
274
+ # Dict format
275
+ tool_call_id = tool_call.get("id")
276
+ tool_name = tool_call.get("function", {}).get("name")
277
+ tool_args_str = tool_call.get("function", {}).get("arguments", "{}")
278
+ logger.debug(f"Dict tool_call: id={tool_call_id}, name={tool_name}, args_str={tool_args_str}")
279
+ # Parse arguments (handle both string and dict)
280
+ if isinstance(tool_args_str, dict):
281
+ tool_args = tool_args_str
282
+ else:
283
+ try:
284
+ tool_args = json.loads(tool_args_str)
285
+ except json.JSONDecodeError:
286
+ logger.warning(f"Failed to parse tool args: {tool_args_str}")
287
+ tool_args = {}
288
+
289
+ # Track tool call in debug mode
290
+ if debug_mode and usage_stats:
291
+ usage_stats.add_tool_call()
292
+ print(f"[agentic-loop] 🔧 Tool #{usage_stats.tool_calls}: {tool_name}", flush=True)
293
+
294
+ # Emit tool call event
295
+ if emit_events:
296
+ await ctx.emit(EventType.TOOL_CALL, {
297
+ "id": tool_call_id,
298
+ "name": tool_name,
299
+ "arguments": tool_args,
300
+ })
301
+
302
+ # Execute the tool
303
+ try:
304
+ result = await execute_tool(tool_name, tool_args)
305
+ # Reset error counter on success
306
+ if not isinstance(result, dict) or "error" not in result:
307
+ consecutive_errors = 0
308
+ else:
309
+ consecutive_errors += 1
310
+ except Exception as e:
311
+ logger.exception(f"Error executing tool {tool_name}")
312
+ result = {"error": str(e)}
313
+ consecutive_errors += 1
314
+
315
+ # Emit tool result event
316
+ if emit_events:
317
+ await ctx.emit(EventType.TOOL_RESULT, {
318
+ "tool_call_id": tool_call_id,
319
+ "name": tool_name,
320
+ "result": result,
321
+ })
322
+
323
+ # Add tool result to messages
324
+ result_str = json.dumps(result) if not isinstance(result, str) else result
325
+ messages.append({
326
+ "role": "tool",
327
+ "tool_call_id": tool_call_id,
328
+ "content": result_str,
329
+ })
330
+
331
+ # Check for too many consecutive errors
332
+ if consecutive_errors >= max_consecutive_errors:
333
+ error_msg = result.get('error', 'Unknown error') if isinstance(result, dict) else str(result)
334
+ logger.warning(f"Aborting agentic loop after {consecutive_errors} consecutive tool errors: {error_msg}")
335
+
336
+ # Emit error event for run history
337
+ if emit_events:
338
+ await ctx.emit(EventType.ERROR, {
339
+ "error": f"Tool loop aborted after {consecutive_errors} consecutive errors",
340
+ "last_error": error_msg,
341
+ "tool_name": tool_name,
342
+ "iterations": iteration,
343
+ })
344
+
345
+ # Generate a summary if ensure_final_response is enabled
346
+ if ensure_final_response:
347
+ logger.info("Generating summary after error exit because ensure_final_response=True")
348
+ print("[agentic-loop] Generating summary after error exit", flush=True)
349
+ summary = await _generate_task_summary(llm, messages, model, **llm_kwargs)
350
+ if summary:
351
+ final_content = f"{summary}\n\n---\n\n⚠️ Note: The task ended early due to repeated errors. Last error: {error_msg}"
352
+ else:
353
+ final_content = f"I encountered repeated errors while trying to complete this task. The last error was: {error_msg}"
354
+ else:
355
+ final_content = f"I encountered repeated errors while trying to complete this task. The last error was: {error_msg}"
356
+
357
+ messages.append({
358
+ "role": "assistant",
359
+ "content": final_content,
360
+ })
361
+
362
+ if emit_events:
363
+ await ctx.emit(EventType.ASSISTANT_MESSAGE, {
364
+ "content": final_content,
365
+ "role": "assistant",
366
+ })
367
+
368
+ # Print final summary in debug mode
369
+ if debug_mode and usage_stats:
370
+ print(f"[agentic-loop] ═══════════════════════════════════════════", flush=True)
371
+ print(f"[agentic-loop] 📊 FINAL USAGE SUMMARY (error exit)", flush=True)
372
+ print(f"[agentic-loop] LLM calls: {usage_stats.llm_calls}", flush=True)
373
+ print(f"[agentic-loop] Tool calls: {usage_stats.tool_calls}", flush=True)
374
+ print(f"[agentic-loop] Total tokens: {usage_stats.total_prompt_tokens:,} in / {usage_stats.total_completion_tokens:,} out", flush=True)
375
+ print(f"[agentic-loop] Estimated cost: {_format_cost(usage_stats.total_cost)}", flush=True)
376
+ print(f"[agentic-loop] ═══════════════════════════════════════════", flush=True)
377
+
378
+ return AgenticLoopResult(
379
+ final_content=final_content,
380
+ messages=messages,
381
+ iterations=iteration,
382
+ usage=last_response.usage if last_response else {},
383
+ usage_stats=usage_stats,
384
+ )
385
+
386
+ # Continue the loop to get next response
387
+ continue
388
+
389
+ # No tool calls - we have the final response
390
+ final_content = response.message.get("content", "")
391
+ messages.append(response.message)
392
+
393
+ # Emit assistant message event for the final response
394
+ if emit_events and final_content:
395
+ await ctx.emit(EventType.ASSISTANT_MESSAGE, {
396
+ "content": final_content,
397
+ "role": "assistant",
398
+ })
399
+
400
+ break
401
+
402
+ # Check if we need to ensure a final response (summary)
403
+ if ensure_final_response:
404
+ # Check if tools were used during this run
405
+ tools_were_used = any(
406
+ msg.get("role") == "assistant" and msg.get("tool_calls")
407
+ for msg in messages
408
+ )
409
+
410
+ # If tools were used but final response is empty or very short, generate a summary
411
+ if tools_were_used and (not final_content or len(final_content.strip()) < 50):
412
+ logger.info("Generating summary because tools were used but final response was empty/short")
413
+ print("[agentic-loop] Generating summary - tools were used but no final response", flush=True)
414
+
415
+ summary = await _generate_task_summary(llm, messages, model, **llm_kwargs)
416
+ if summary:
417
+ final_content = summary
418
+ # Emit the summary as an assistant message
419
+ if emit_events:
420
+ await ctx.emit(EventType.ASSISTANT_MESSAGE, {
421
+ "content": summary,
422
+ "role": "assistant",
423
+ })
424
+ # Add to messages
425
+ messages.append({"role": "assistant", "content": summary})
426
+
427
+ # Print final summary in debug mode
428
+ if debug_mode and usage_stats:
429
+ print(f"[agentic-loop] ═══════════════════════════════════════════", flush=True)
430
+ print(f"[agentic-loop] 📊 FINAL USAGE SUMMARY", flush=True)
431
+ print(f"[agentic-loop] Iterations: {iteration}", flush=True)
432
+ print(f"[agentic-loop] LLM calls: {usage_stats.llm_calls}", flush=True)
433
+ print(f"[agentic-loop] Tool calls: {usage_stats.tool_calls}", flush=True)
434
+ print(f"[agentic-loop] Total tokens: {usage_stats.total_prompt_tokens:,} in / {usage_stats.total_completion_tokens:,} out", flush=True)
435
+ print(f"[agentic-loop] Estimated cost: {_format_cost(usage_stats.total_cost)}", flush=True)
436
+ print(f"[agentic-loop] ═══════════════════════════════════════════", flush=True)
437
+
438
+ return AgenticLoopResult(
439
+ final_content=final_content,
440
+ messages=messages,
441
+ iterations=iteration,
442
+ usage=last_response.usage if last_response else {},
443
+ usage_stats=usage_stats,
444
+ )
445
+
446
+
447
+ async def _generate_task_summary(
448
+ llm: LLMClient,
449
+ messages: list[dict],
450
+ model: Optional[str] = None,
451
+ **llm_kwargs,
452
+ ) -> str:
453
+ """
454
+ Generate a summary of what was accomplished based on the conversation history.
455
+
456
+ This is called when ensure_final_response=True and tools were used but
457
+ no meaningful final response was provided.
458
+
459
+ Args:
460
+ llm: The LLM client to use
461
+ messages: The conversation history including tool calls and results
462
+ model: Model to use
463
+ **llm_kwargs: Additional kwargs for the LLM
464
+
465
+ Returns:
466
+ A summary string of what was accomplished
467
+ """
468
+ # Build a summary of tool calls and their results
469
+ tool_summary_parts = []
470
+ for msg in messages:
471
+ if msg.get("role") == "assistant" and msg.get("tool_calls"):
472
+ for tc in msg.get("tool_calls", []):
473
+ if isinstance(tc, dict):
474
+ name = tc.get("function", {}).get("name", "unknown")
475
+ else:
476
+ name = getattr(tc, "name", "unknown")
477
+ tool_summary_parts.append(f"- Called: {name}")
478
+ elif msg.get("role") == "tool":
479
+ content = msg.get("content", "")
480
+ # Truncate long results
481
+ if len(content) > 200:
482
+ content = content[:200] + "..."
483
+ tool_summary_parts.append(f" Result: {content}")
484
+
485
+ tool_summary = "\n".join(tool_summary_parts[-20:]) # Last 20 entries to avoid token limits
486
+
487
+ summary_prompt = f"""Based on the conversation above, provide a brief summary of what was accomplished.
488
+
489
+ Here's a summary of the tools that were called:
490
+ {tool_summary}
491
+
492
+ Please provide a clear, concise summary (2-4 sentences) of:
493
+ 1. What actions were taken
494
+ 2. What was accomplished or changed
495
+ 3. Any important results or next steps
496
+
497
+ Start your response directly with the summary - do not include phrases like "Here's a summary" or "Based on the conversation"."""
498
+
499
+ # Create a simplified message list for the summary request
500
+ summary_messages = [
501
+ {"role": "system", "content": "You are a helpful assistant that provides clear, concise summaries of completed tasks."},
502
+ {"role": "user", "content": summary_prompt},
503
+ ]
504
+
505
+ try:
506
+ response = await llm.generate(
507
+ summary_messages,
508
+ model=model,
509
+ **llm_kwargs,
510
+ )
511
+ return response.message.get("content", "")
512
+ except Exception as e:
513
+ logger.exception("Failed to generate task summary")
514
+ return f"Task completed. (Summary generation failed: {e})"
@@ -88,6 +88,9 @@ class RuntimeConfig:
88
88
  vertex_deployed_index_id: Optional[str] = None
89
89
  vertex_index_id: Optional[str] = None
90
90
 
91
+ # Debug mode - enables verbose logging, cost tracking, etc.
92
+ debug: bool = False
93
+
91
94
  def get_openai_api_key(self) -> Optional[str]:
92
95
  """Get OpenAI API key from config or environment."""
93
96
  return self.openai_api_key or os.environ.get("OPENAI_API_KEY")
@@ -204,6 +207,7 @@ def _apply_env_vars(config: RuntimeConfig) -> None:
204
207
  bool_fields = {
205
208
  "AGENT_RUNTIME_INCLUDE_CONVERSATION_HISTORY": "include_conversation_history",
206
209
  "AGENT_RUNTIME_AUTO_PERSIST_MESSAGES": "auto_persist_messages",
210
+ "AGENT_RUNTIME_DEBUG": "debug",
207
211
  }
208
212
 
209
213
  for env_var, attr in env_mapping.items():