connectonion 0.5.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- connectonion/__init__.py +78 -0
- connectonion/address.py +320 -0
- connectonion/agent.py +450 -0
- connectonion/announce.py +84 -0
- connectonion/asgi.py +287 -0
- connectonion/auto_debug_exception.py +181 -0
- connectonion/cli/__init__.py +3 -0
- connectonion/cli/browser_agent/__init__.py +5 -0
- connectonion/cli/browser_agent/browser.py +243 -0
- connectonion/cli/browser_agent/prompt.md +107 -0
- connectonion/cli/commands/__init__.py +1 -0
- connectonion/cli/commands/auth_commands.py +527 -0
- connectonion/cli/commands/browser_commands.py +27 -0
- connectonion/cli/commands/create.py +511 -0
- connectonion/cli/commands/deploy_commands.py +220 -0
- connectonion/cli/commands/doctor_commands.py +173 -0
- connectonion/cli/commands/init.py +469 -0
- connectonion/cli/commands/project_cmd_lib.py +828 -0
- connectonion/cli/commands/reset_commands.py +149 -0
- connectonion/cli/commands/status_commands.py +168 -0
- connectonion/cli/docs/co-vibecoding-principles-docs-contexts-all-in-one.md +2010 -0
- connectonion/cli/docs/connectonion.md +1256 -0
- connectonion/cli/docs.md +123 -0
- connectonion/cli/main.py +148 -0
- connectonion/cli/templates/meta-agent/README.md +287 -0
- connectonion/cli/templates/meta-agent/agent.py +196 -0
- connectonion/cli/templates/meta-agent/prompts/answer_prompt.md +9 -0
- connectonion/cli/templates/meta-agent/prompts/docs_retrieve_prompt.md +15 -0
- connectonion/cli/templates/meta-agent/prompts/metagent.md +71 -0
- connectonion/cli/templates/meta-agent/prompts/think_prompt.md +18 -0
- connectonion/cli/templates/minimal/README.md +56 -0
- connectonion/cli/templates/minimal/agent.py +40 -0
- connectonion/cli/templates/playwright/README.md +118 -0
- connectonion/cli/templates/playwright/agent.py +336 -0
- connectonion/cli/templates/playwright/prompt.md +102 -0
- connectonion/cli/templates/playwright/requirements.txt +3 -0
- connectonion/cli/templates/web-research/agent.py +122 -0
- connectonion/connect.py +128 -0
- connectonion/console.py +539 -0
- connectonion/debug_agent/__init__.py +13 -0
- connectonion/debug_agent/agent.py +45 -0
- connectonion/debug_agent/prompts/debug_assistant.md +72 -0
- connectonion/debug_agent/runtime_inspector.py +406 -0
- connectonion/debug_explainer/__init__.py +10 -0
- connectonion/debug_explainer/explain_agent.py +114 -0
- connectonion/debug_explainer/explain_context.py +263 -0
- connectonion/debug_explainer/explainer_prompt.md +29 -0
- connectonion/debug_explainer/root_cause_analysis_prompt.md +43 -0
- connectonion/debugger_ui.py +1039 -0
- connectonion/decorators.py +208 -0
- connectonion/events.py +248 -0
- connectonion/execution_analyzer/__init__.py +9 -0
- connectonion/execution_analyzer/execution_analysis.py +93 -0
- connectonion/execution_analyzer/execution_analysis_prompt.md +47 -0
- connectonion/host.py +579 -0
- connectonion/interactive_debugger.py +342 -0
- connectonion/llm.py +801 -0
- connectonion/llm_do.py +307 -0
- connectonion/logger.py +300 -0
- connectonion/prompt_files/__init__.py +1 -0
- connectonion/prompt_files/analyze_contact.md +62 -0
- connectonion/prompt_files/eval_expected.md +12 -0
- connectonion/prompt_files/react_evaluate.md +11 -0
- connectonion/prompt_files/react_plan.md +16 -0
- connectonion/prompt_files/reflect.md +22 -0
- connectonion/prompts.py +144 -0
- connectonion/relay.py +200 -0
- connectonion/static/docs.html +688 -0
- connectonion/tool_executor.py +279 -0
- connectonion/tool_factory.py +186 -0
- connectonion/tool_registry.py +105 -0
- connectonion/trust.py +166 -0
- connectonion/trust_agents.py +71 -0
- connectonion/trust_functions.py +88 -0
- connectonion/tui/__init__.py +57 -0
- connectonion/tui/divider.py +39 -0
- connectonion/tui/dropdown.py +251 -0
- connectonion/tui/footer.py +31 -0
- connectonion/tui/fuzzy.py +56 -0
- connectonion/tui/input.py +278 -0
- connectonion/tui/keys.py +35 -0
- connectonion/tui/pick.py +130 -0
- connectonion/tui/providers.py +155 -0
- connectonion/tui/status_bar.py +163 -0
- connectonion/usage.py +161 -0
- connectonion/useful_events_handlers/__init__.py +16 -0
- connectonion/useful_events_handlers/reflect.py +116 -0
- connectonion/useful_plugins/__init__.py +20 -0
- connectonion/useful_plugins/calendar_plugin.py +163 -0
- connectonion/useful_plugins/eval.py +139 -0
- connectonion/useful_plugins/gmail_plugin.py +162 -0
- connectonion/useful_plugins/image_result_formatter.py +127 -0
- connectonion/useful_plugins/re_act.py +78 -0
- connectonion/useful_plugins/shell_approval.py +159 -0
- connectonion/useful_tools/__init__.py +44 -0
- connectonion/useful_tools/diff_writer.py +192 -0
- connectonion/useful_tools/get_emails.py +183 -0
- connectonion/useful_tools/gmail.py +1596 -0
- connectonion/useful_tools/google_calendar.py +613 -0
- connectonion/useful_tools/memory.py +380 -0
- connectonion/useful_tools/microsoft_calendar.py +604 -0
- connectonion/useful_tools/outlook.py +488 -0
- connectonion/useful_tools/send_email.py +205 -0
- connectonion/useful_tools/shell.py +97 -0
- connectonion/useful_tools/slash_command.py +201 -0
- connectonion/useful_tools/terminal.py +285 -0
- connectonion/useful_tools/todo_list.py +241 -0
- connectonion/useful_tools/web_fetch.py +216 -0
- connectonion/xray.py +467 -0
- connectonion-0.5.8.dist-info/METADATA +741 -0
- connectonion-0.5.8.dist-info/RECORD +113 -0
- connectonion-0.5.8.dist-info/WHEEL +4 -0
- connectonion-0.5.8.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Provide @replay decorator for re-executing tools with modified parameters during debugging
|
|
3
|
+
LLM-Note:
|
|
4
|
+
Dependencies: imports from [functools, builtins, typing] | imported by [agent.py, __init__.py] | tested by [tests/test_decorators.py]
|
|
5
|
+
Data flow: @replay wraps function → stores func, args, kwargs in ReplayFunction during execution → user calls replay(param=new_value) in debugger → re-executes function with merged kwargs → prints result
|
|
6
|
+
State/Effects: modifies builtins namespace by injecting global 'replay' object | stores ReplayFunction state in _func, _args, _kwargs, _original_func | clears context after execution | no persistence
|
|
7
|
+
Integration: exposes @replay decorator, replay global callable, xray_replay() combined decorator, _is_replay_enabled() helper | marked functions have __replay_enabled__ attribute | ReplayDecorator acts as both decorator and callable
|
|
8
|
+
Performance: lightweight wrapper with functools.wraps | no performance overhead (just attribute marking) | context cleared immediately after execution
|
|
9
|
+
Errors: replay() with no active context prints helpful error message | re-execution errors are re-raised after printing
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
import functools
|
|
13
|
+
import builtins
|
|
14
|
+
from typing import Any, Callable
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# =============================================================================
|
|
18
|
+
# Replay Function and Decorator
|
|
19
|
+
# =============================================================================
|
|
20
|
+
|
|
21
|
+
class ReplayFunction:
|
|
22
|
+
"""
|
|
23
|
+
Container for replay functionality.
|
|
24
|
+
|
|
25
|
+
Holds the current function and its arguments to enable re-execution
|
|
26
|
+
with modified parameters during debugging.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(self):
|
|
30
|
+
"""Initialize with no active function."""
|
|
31
|
+
self._func = None
|
|
32
|
+
self._args = None
|
|
33
|
+
self._kwargs = None
|
|
34
|
+
self._original_func = None
|
|
35
|
+
|
|
36
|
+
def _setup(self, func: Callable, args: tuple, kwargs: dict) -> None:
|
|
37
|
+
"""
|
|
38
|
+
Set up replay context (internal use).
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
func: The function to replay
|
|
42
|
+
args: Original positional arguments
|
|
43
|
+
kwargs: Original keyword arguments
|
|
44
|
+
"""
|
|
45
|
+
self._func = func
|
|
46
|
+
self._args = args
|
|
47
|
+
self._kwargs = kwargs
|
|
48
|
+
self._original_func = func
|
|
49
|
+
|
|
50
|
+
def _clear(self) -> None:
|
|
51
|
+
"""Clear replay context after execution (internal use)."""
|
|
52
|
+
self._func = None
|
|
53
|
+
self._args = None
|
|
54
|
+
self._kwargs = None
|
|
55
|
+
self._original_func = None
|
|
56
|
+
|
|
57
|
+
def __call__(self, **new_kwargs) -> Any:
|
|
58
|
+
"""
|
|
59
|
+
Replay the function with modified parameters.
|
|
60
|
+
|
|
61
|
+
Args:
|
|
62
|
+
**new_kwargs: Keyword arguments to override
|
|
63
|
+
|
|
64
|
+
Returns:
|
|
65
|
+
Result of re-executing the function
|
|
66
|
+
|
|
67
|
+
Example:
|
|
68
|
+
# In debugger at breakpoint:
|
|
69
|
+
>>> replay(threshold=0.8) # Re-run with new threshold
|
|
70
|
+
🔄 Replaying my_function()
|
|
71
|
+
Modified parameters: {'threshold': 0.8}
|
|
72
|
+
✅ Result: 0.95
|
|
73
|
+
"""
|
|
74
|
+
if self._func is None:
|
|
75
|
+
print("❌ No function to replay. Make sure you're in a breakpoint "
|
|
76
|
+
"inside a @replay decorated function.")
|
|
77
|
+
return None
|
|
78
|
+
|
|
79
|
+
# Merge original kwargs with new ones (new ones override)
|
|
80
|
+
merged_kwargs = self._kwargs.copy() if self._kwargs else {}
|
|
81
|
+
merged_kwargs.update(new_kwargs)
|
|
82
|
+
|
|
83
|
+
print(f"🔄 Replaying {self._original_func.__name__}()")
|
|
84
|
+
if new_kwargs:
|
|
85
|
+
print(f" Modified parameters: {new_kwargs}")
|
|
86
|
+
|
|
87
|
+
try:
|
|
88
|
+
result = self._func(*self._args, **merged_kwargs)
|
|
89
|
+
print(f"✅ Result: {result}")
|
|
90
|
+
return result
|
|
91
|
+
except Exception as e:
|
|
92
|
+
print(f"❌ Error during replay: {e}")
|
|
93
|
+
raise
|
|
94
|
+
|
|
95
|
+
def __repr__(self):
|
|
96
|
+
"""Show current replay state."""
|
|
97
|
+
if self._original_func:
|
|
98
|
+
return f"<replay function for {self._original_func.__name__}>"
|
|
99
|
+
return "<replay function (not active)>"
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
class ReplayDecorator:
|
|
103
|
+
"""
|
|
104
|
+
Hybrid object that acts as both a decorator and replay function.
|
|
105
|
+
|
|
106
|
+
Dual-purpose design:
|
|
107
|
+
1. When decorating a function, enables replay functionality
|
|
108
|
+
2. When called with kwargs, replays the current function
|
|
109
|
+
"""
|
|
110
|
+
|
|
111
|
+
def __init__(self, replay_func: ReplayFunction):
|
|
112
|
+
"""
|
|
113
|
+
Initialize with a replay function container.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
replay_func: ReplayFunction instance to manage replay state
|
|
117
|
+
"""
|
|
118
|
+
self._replay_func = replay_func
|
|
119
|
+
# Make this available globally as 'replay' for easy access
|
|
120
|
+
builtins.replay = self
|
|
121
|
+
|
|
122
|
+
def __call__(self, *args, **kwargs) -> Any:
|
|
123
|
+
"""
|
|
124
|
+
Act as decorator or replay function based on arguments.
|
|
125
|
+
|
|
126
|
+
If called with a single callable argument and no kwargs, acts as decorator.
|
|
127
|
+
Otherwise, forwards the call to replay the current function.
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
*args: Positional arguments
|
|
131
|
+
**kwargs: Keyword arguments
|
|
132
|
+
|
|
133
|
+
Returns:
|
|
134
|
+
Decorated function or replay result
|
|
135
|
+
"""
|
|
136
|
+
# Check if being used as decorator
|
|
137
|
+
if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
|
|
138
|
+
func = args[0]
|
|
139
|
+
|
|
140
|
+
@functools.wraps(func)
|
|
141
|
+
def wrapper(*inner_args, **inner_kwargs):
|
|
142
|
+
# Set up replay context with current execution
|
|
143
|
+
self._replay_func._setup(func, inner_args, inner_kwargs)
|
|
144
|
+
|
|
145
|
+
try:
|
|
146
|
+
# Execute the original function
|
|
147
|
+
return func(*inner_args, **inner_kwargs)
|
|
148
|
+
finally:
|
|
149
|
+
# Clean up replay context
|
|
150
|
+
self._replay_func._clear()
|
|
151
|
+
|
|
152
|
+
# Mark function as replay-enabled
|
|
153
|
+
wrapper.__replay_enabled__ = True
|
|
154
|
+
return wrapper
|
|
155
|
+
|
|
156
|
+
# Otherwise, act as the replay function
|
|
157
|
+
else:
|
|
158
|
+
return self._replay_func(*args, **kwargs)
|
|
159
|
+
|
|
160
|
+
def __repr__(self):
|
|
161
|
+
"""Delegate representation to replay function."""
|
|
162
|
+
return repr(self._replay_func)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# Create the global replay instance
|
|
166
|
+
replay_function = ReplayFunction()
|
|
167
|
+
replay = ReplayDecorator(replay_function)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
# =============================================================================
|
|
171
|
+
# Combined Decorator
|
|
172
|
+
# =============================================================================
|
|
173
|
+
|
|
174
|
+
def xray_replay(func: Callable) -> Callable:
|
|
175
|
+
"""
|
|
176
|
+
Convenience decorator that combines @xray and @replay.
|
|
177
|
+
|
|
178
|
+
Equivalent to:
|
|
179
|
+
@xray
|
|
180
|
+
@replay
|
|
181
|
+
def my_tool(...):
|
|
182
|
+
...
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
func: Function to decorate
|
|
186
|
+
|
|
187
|
+
Returns:
|
|
188
|
+
Function with both xray and replay capabilities
|
|
189
|
+
"""
|
|
190
|
+
from .xray import xray
|
|
191
|
+
return xray(replay(func))
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
# =============================================================================
|
|
195
|
+
# Helper Functions
|
|
196
|
+
# =============================================================================
|
|
197
|
+
|
|
198
|
+
def _is_replay_enabled(func: Callable) -> bool:
|
|
199
|
+
"""
|
|
200
|
+
Check if a function has the @replay decorator.
|
|
201
|
+
|
|
202
|
+
Args:
|
|
203
|
+
func: Function to check
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
True if function is decorated with @replay
|
|
207
|
+
"""
|
|
208
|
+
return getattr(func, '__replay_enabled__', False)
|
connectonion/events.py
ADDED
|
@@ -0,0 +1,248 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Event system for hooking into agent lifecycle
|
|
3
|
+
LLM-Note:
|
|
4
|
+
Dependencies: None (standalone module) | imported by [agent.py, __init__.py] | tested by [tests/test_events.py]
|
|
5
|
+
Data flow: Wrapper functions tag event handlers with _event_type attribute → Agent organizes handlers by type → Agent invokes handlers at specific lifecycle points passing agent instance
|
|
6
|
+
State/Effects: Event handlers receive agent instance and can modify agent.current_session (messages, trace, etc.)
|
|
7
|
+
Integration: exposes after_user_input(), before_llm(), after_llm(), before_each_tool(), before_tools(), after_each_tool(), after_tools(), on_error(), on_complete()
|
|
8
|
+
Performance: Minimal overhead - just function attribute checking and iteration over handler lists
|
|
9
|
+
Errors: Event handler exceptions propagate and stop agent execution (fail fast)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from typing import Callable, List, Union, TYPE_CHECKING
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from .agent import Agent
|
|
16
|
+
|
|
17
|
+
# Event handler type: function that takes Agent and returns None
|
|
18
|
+
EventHandler = Callable[['Agent'], None]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def after_user_input(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
|
|
22
|
+
"""
|
|
23
|
+
Mark function(s) as after_user_input event handlers.
|
|
24
|
+
|
|
25
|
+
Fires once per turn, after user input is added to session.
|
|
26
|
+
Use for: adding context, timestamps, initializing turn state.
|
|
27
|
+
|
|
28
|
+
Supports both decorator and wrapper syntax:
|
|
29
|
+
# As decorator
|
|
30
|
+
@after_user_input
|
|
31
|
+
def add_timestamp(agent):
|
|
32
|
+
...
|
|
33
|
+
|
|
34
|
+
# As wrapper (single or multiple)
|
|
35
|
+
on_events=[after_user_input(handler1, handler2)]
|
|
36
|
+
"""
|
|
37
|
+
for fn in funcs:
|
|
38
|
+
fn._event_type = 'after_user_input' # type: ignore
|
|
39
|
+
return funcs[0] if len(funcs) == 1 else list(funcs)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def before_llm(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
|
|
43
|
+
"""
|
|
44
|
+
Mark function(s) as before_llm event handlers.
|
|
45
|
+
|
|
46
|
+
Fires before each LLM call (multiple times per turn).
|
|
47
|
+
Use for: modifying messages for specific LLM calls.
|
|
48
|
+
|
|
49
|
+
Supports both decorator and wrapper syntax:
|
|
50
|
+
@before_llm
|
|
51
|
+
def inject_context(agent):
|
|
52
|
+
...
|
|
53
|
+
|
|
54
|
+
on_events=[before_llm(handler1, handler2)]
|
|
55
|
+
"""
|
|
56
|
+
for fn in funcs:
|
|
57
|
+
fn._event_type = 'before_llm' # type: ignore
|
|
58
|
+
return funcs[0] if len(funcs) == 1 else list(funcs)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def after_llm(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
|
|
62
|
+
"""
|
|
63
|
+
Mark function(s) as after_llm event handlers.
|
|
64
|
+
|
|
65
|
+
Fires after each LLM response (multiple times per turn).
|
|
66
|
+
Use for: logging LLM calls, analyzing responses.
|
|
67
|
+
|
|
68
|
+
Supports both decorator and wrapper syntax:
|
|
69
|
+
@after_llm
|
|
70
|
+
def log_llm(agent):
|
|
71
|
+
trace = agent.current_session['trace'][-1]
|
|
72
|
+
if trace['type'] == 'llm_call':
|
|
73
|
+
print(f"LLM took {trace['duration_ms']:.0f}ms")
|
|
74
|
+
|
|
75
|
+
on_events=[after_llm(log_llm)]
|
|
76
|
+
"""
|
|
77
|
+
for fn in funcs:
|
|
78
|
+
fn._event_type = 'after_llm' # type: ignore
|
|
79
|
+
return funcs[0] if len(funcs) == 1 else list(funcs)
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def before_each_tool(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
|
|
83
|
+
"""
|
|
84
|
+
Mark function(s) as before_each_tool event handlers.
|
|
85
|
+
|
|
86
|
+
Fires before EACH individual tool execution.
|
|
87
|
+
Use for: validating arguments, approval prompts, logging.
|
|
88
|
+
|
|
89
|
+
Access pending tool via agent.current_session['pending_tool']:
|
|
90
|
+
- name: Tool name (e.g., "bash")
|
|
91
|
+
- arguments: Tool arguments dict
|
|
92
|
+
- id: Tool call ID
|
|
93
|
+
|
|
94
|
+
Raise an exception to cancel the tool execution.
|
|
95
|
+
|
|
96
|
+
Supports both decorator and wrapper syntax:
|
|
97
|
+
@before_each_tool
|
|
98
|
+
def approve_dangerous(agent):
|
|
99
|
+
...
|
|
100
|
+
|
|
101
|
+
# Multiple handlers
|
|
102
|
+
on_events=[before_each_tool(check_shell, check_email)]
|
|
103
|
+
"""
|
|
104
|
+
for fn in funcs:
|
|
105
|
+
fn._event_type = 'before_each_tool' # type: ignore
|
|
106
|
+
return funcs[0] if len(funcs) == 1 else list(funcs)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def before_tools(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
|
|
110
|
+
"""
|
|
111
|
+
Mark function(s) as before_tools event handlers.
|
|
112
|
+
|
|
113
|
+
Fires ONCE before ALL tools in a batch execute.
|
|
114
|
+
|
|
115
|
+
What is a "tools batch"?
|
|
116
|
+
When the LLM responds, it can request multiple tools at once. For example:
|
|
117
|
+
LLM Response: tool_calls = [search("python"), read_file("docs.md"), calculate(2+2)]
|
|
118
|
+
|
|
119
|
+
This group of tools from ONE LLM response is called a "tools batch".
|
|
120
|
+
- before_tools fires ONCE before the batch starts
|
|
121
|
+
- after_tools fires ONCE after ALL tools in the batch complete
|
|
122
|
+
|
|
123
|
+
Use for: batch validation, user approval before execution, setup.
|
|
124
|
+
|
|
125
|
+
Supports both decorator and wrapper syntax:
|
|
126
|
+
@before_tools
|
|
127
|
+
def log_batch_start(agent):
|
|
128
|
+
...
|
|
129
|
+
|
|
130
|
+
on_events=[before_tools(handler)]
|
|
131
|
+
"""
|
|
132
|
+
for fn in funcs:
|
|
133
|
+
fn._event_type = 'before_tools' # type: ignore
|
|
134
|
+
return funcs[0] if len(funcs) == 1 else list(funcs)
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def after_each_tool(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
|
|
138
|
+
"""
|
|
139
|
+
Mark function(s) as after_each_tool event handlers.
|
|
140
|
+
|
|
141
|
+
Fires after EACH individual tool execution (success, error, or not_found).
|
|
142
|
+
Use for: logging individual tool performance, debugging.
|
|
143
|
+
|
|
144
|
+
⚠️ WARNING: Do NOT add messages to agent.current_session['messages'] here!
|
|
145
|
+
When LLM returns multiple tool_calls, this fires after EACH tool, which would
|
|
146
|
+
interleave messages between tool results. This breaks Anthropic Claude's API
|
|
147
|
+
which requires all tool_results to immediately follow the tool_use message.
|
|
148
|
+
|
|
149
|
+
If you need to add messages after tools complete, use `after_tools` instead.
|
|
150
|
+
|
|
151
|
+
Supports both decorator and wrapper syntax:
|
|
152
|
+
@after_each_tool
|
|
153
|
+
def log_tool(agent):
|
|
154
|
+
trace = agent.current_session['trace'][-1]
|
|
155
|
+
if trace['type'] == 'tool_execution':
|
|
156
|
+
print(f"Tool: {trace['tool_name']} in {trace['timing']:.0f}ms")
|
|
157
|
+
|
|
158
|
+
on_events=[after_each_tool(handler1, handler2)]
|
|
159
|
+
"""
|
|
160
|
+
for fn in funcs:
|
|
161
|
+
fn._event_type = 'after_each_tool' # type: ignore
|
|
162
|
+
return funcs[0] if len(funcs) == 1 else list(funcs)
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
def after_tools(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
|
|
166
|
+
"""
|
|
167
|
+
Mark function(s) as after_tools event handlers.
|
|
168
|
+
|
|
169
|
+
Fires ONCE after ALL tools in a batch complete.
|
|
170
|
+
|
|
171
|
+
What is a "tools batch"?
|
|
172
|
+
When the LLM responds, it can request multiple tools at once. For example:
|
|
173
|
+
LLM Response: tool_calls = [search("python"), read_file("docs.md"), calculate(2+2)]
|
|
174
|
+
|
|
175
|
+
This group of tools from ONE LLM response is called a "tools batch".
|
|
176
|
+
- before_tools fires ONCE before the batch starts
|
|
177
|
+
- after_tools fires ONCE after ALL tools in the batch complete
|
|
178
|
+
|
|
179
|
+
This is the SAFE place to add messages to agent.current_session['messages']
|
|
180
|
+
after tool execution, because all tool_results have been added and message
|
|
181
|
+
ordering is correct for all LLM providers (including Anthropic Claude).
|
|
182
|
+
|
|
183
|
+
Message ordering when this event fires:
|
|
184
|
+
- assistant (with tool_calls)
|
|
185
|
+
- tool result 1
|
|
186
|
+
- tool result 2
|
|
187
|
+
- tool result N
|
|
188
|
+
- [YOUR MESSAGE HERE - safe to add]
|
|
189
|
+
|
|
190
|
+
Use for: reflection/reasoning injection, ReAct pattern, batch cleanup.
|
|
191
|
+
|
|
192
|
+
Supports both decorator and wrapper syntax:
|
|
193
|
+
@after_tools
|
|
194
|
+
def add_reflection(agent):
|
|
195
|
+
trace = agent.current_session['trace']
|
|
196
|
+
recent = [t for t in trace if t['type'] == 'tool_execution'][-3:]
|
|
197
|
+
agent.current_session['messages'].append({
|
|
198
|
+
'role': 'assistant',
|
|
199
|
+
'content': f"Completed {len(recent)} tools"
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
on_events=[after_tools(add_reflection)]
|
|
203
|
+
"""
|
|
204
|
+
for fn in funcs:
|
|
205
|
+
fn._event_type = 'after_tools' # type: ignore
|
|
206
|
+
return funcs[0] if len(funcs) == 1 else list(funcs)
|
|
207
|
+
|
|
208
|
+
|
|
209
|
+
def on_error(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
|
|
210
|
+
"""
|
|
211
|
+
Mark function(s) as on_error event handlers.
|
|
212
|
+
|
|
213
|
+
Fires when tool execution fails.
|
|
214
|
+
Use for: custom error handling, retries, fallback values.
|
|
215
|
+
|
|
216
|
+
Supports both decorator and wrapper syntax:
|
|
217
|
+
@on_error
|
|
218
|
+
def handle_error(agent):
|
|
219
|
+
trace = agent.current_session['trace'][-1]
|
|
220
|
+
if trace.get('status') == 'error':
|
|
221
|
+
print(f"Tool failed: {trace['error']}")
|
|
222
|
+
|
|
223
|
+
on_events=[on_error(handler1, handler2)]
|
|
224
|
+
"""
|
|
225
|
+
for fn in funcs:
|
|
226
|
+
fn._event_type = 'on_error' # type: ignore
|
|
227
|
+
return funcs[0] if len(funcs) == 1 else list(funcs)
|
|
228
|
+
|
|
229
|
+
|
|
230
|
+
def on_complete(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
|
|
231
|
+
"""
|
|
232
|
+
Mark function(s) as on_complete event handlers.
|
|
233
|
+
|
|
234
|
+
Fires once per input() call, after final response is generated.
|
|
235
|
+
Use for: metrics, logging, cleanup, final summary.
|
|
236
|
+
|
|
237
|
+
Supports both decorator and wrapper syntax:
|
|
238
|
+
@on_complete
|
|
239
|
+
def log_done(agent):
|
|
240
|
+
trace = agent.current_session['trace']
|
|
241
|
+
tools_used = [t['tool_name'] for t in trace if t['type'] == 'tool_execution']
|
|
242
|
+
print(f"Task done. Tools: {tools_used}")
|
|
243
|
+
|
|
244
|
+
on_events=[on_complete(handler1, handler2)]
|
|
245
|
+
"""
|
|
246
|
+
for fn in funcs:
|
|
247
|
+
fn._event_type = 'on_complete' # type: ignore
|
|
248
|
+
return funcs[0] if len(funcs) == 1 else list(funcs)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
"""Execution analyzer - Post-execution analysis and improvement suggestions.
|
|
2
|
+
|
|
3
|
+
Analyzes completed agent runs and provides suggestions for improving
|
|
4
|
+
system prompts and agent behavior.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from .execution_analysis import analyze_execution, ExecutionAnalysis
|
|
8
|
+
|
|
9
|
+
__all__ = ["analyze_execution", "ExecutionAnalysis"]
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Purpose: Post-execution analysis of agent runs with AI-powered improvement suggestions
|
|
3
|
+
LLM-Note:
|
|
4
|
+
Dependencies: imports from [pathlib, pydantic, typing, llm_do.py] | imported by [agent.py via execution_analyzer/__init__.py] | tested by [tests/test_execution_analyzer.py]
|
|
5
|
+
Data flow: receives from Agent.input() after completion → analyze_execution(user_prompt, agent_instance, final_result, execution_trace, max_iterations_reached) → builds execution summary with tool calls → loads prompt from execution_analysis_prompt.md → calls llm_do() with ExecutionAnalysis Pydantic schema → returns structured analysis with task_completed, problems_identified, system_prompt_suggestions, overall_quality, key_insights
|
|
6
|
+
State/Effects: reads execution_analysis_prompt.md file | makes single LLM API call via llm_do() | no writes or global state | stateless
|
|
7
|
+
Integration: exposes analyze_execution(), ExecutionAnalysis Pydantic model | used by Agent after .input() completes to provide feedback | uses same model as agent instance for analysis | ExecutionAnalysis schema validated by Pydantic
|
|
8
|
+
Performance: single LLM call per execution (only when enabled) | execution_trace serialization can be large for long runs | no caching
|
|
9
|
+
Errors: llm_do() errors bubble up (API failures, timeout) | Pydantic ValidationError if LLM output doesn't match schema | handles empty tools list gracefully
|
|
10
|
+
|
|
11
|
+
Post-execution analysis for completed agent runs.
|
|
12
|
+
|
|
13
|
+
Analyzes the entire execution trace and provides suggestions for improvement.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from pydantic import BaseModel
|
|
18
|
+
from typing import List
|
|
19
|
+
from ..llm_do import llm_do
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class ExecutionAnalysis(BaseModel):
|
|
23
|
+
"""Structured output for post-execution analysis."""
|
|
24
|
+
task_completed: bool
|
|
25
|
+
completion_explanation: str
|
|
26
|
+
problems_identified: List[str]
|
|
27
|
+
system_prompt_suggestions: List[str]
|
|
28
|
+
overall_quality: str # "excellent" | "good" | "fair" | "poor"
|
|
29
|
+
key_insights: List[str]
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def analyze_execution(
|
|
33
|
+
user_prompt: str,
|
|
34
|
+
agent_instance,
|
|
35
|
+
final_result: str,
|
|
36
|
+
execution_trace: List,
|
|
37
|
+
max_iterations_reached: bool
|
|
38
|
+
) -> ExecutionAnalysis:
|
|
39
|
+
"""Analyze completed execution and suggest improvements.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
user_prompt: The original user request
|
|
43
|
+
agent_instance: The Agent that executed
|
|
44
|
+
final_result: Final response from agent
|
|
45
|
+
execution_trace: Complete trace of execution
|
|
46
|
+
max_iterations_reached: Whether agent hit iteration limit
|
|
47
|
+
|
|
48
|
+
Returns:
|
|
49
|
+
Structured analysis with improvement suggestions
|
|
50
|
+
"""
|
|
51
|
+
# Build execution summary
|
|
52
|
+
tools_called = [
|
|
53
|
+
entry for entry in execution_trace
|
|
54
|
+
if entry.get('type') == 'tool_execution'
|
|
55
|
+
]
|
|
56
|
+
|
|
57
|
+
tools_summary = []
|
|
58
|
+
for entry in tools_called:
|
|
59
|
+
status = "✓" if entry.get('status') == 'success' else "✗"
|
|
60
|
+
tools_summary.append(
|
|
61
|
+
f"{status} {entry.get('tool_name')}({entry.get('args')}) → {entry.get('result')}"
|
|
62
|
+
)
|
|
63
|
+
|
|
64
|
+
# Create analysis input
|
|
65
|
+
data = f"""**User Request:**
|
|
66
|
+
{user_prompt}
|
|
67
|
+
|
|
68
|
+
**Agent System Prompt:**
|
|
69
|
+
{agent_instance.system_prompt}
|
|
70
|
+
|
|
71
|
+
**Available Tools:**
|
|
72
|
+
{', '.join([t.name for t in agent_instance.tools]) if agent_instance.tools else 'None'}
|
|
73
|
+
|
|
74
|
+
**Execution Summary:**
|
|
75
|
+
- Max iterations reached: {max_iterations_reached}
|
|
76
|
+
- Tools called ({len(tools_called)}):
|
|
77
|
+
{chr(10).join(f" {i+1}. {s}" for i, s in enumerate(tools_summary))}
|
|
78
|
+
|
|
79
|
+
**Final Result:**
|
|
80
|
+
{final_result}
|
|
81
|
+
|
|
82
|
+
**Complete Trace:**
|
|
83
|
+
{execution_trace}"""
|
|
84
|
+
|
|
85
|
+
prompt_file = Path(__file__).parent / "execution_analysis_prompt.md"
|
|
86
|
+
|
|
87
|
+
# Use same model as agent
|
|
88
|
+
return llm_do(
|
|
89
|
+
data,
|
|
90
|
+
output=ExecutionAnalysis,
|
|
91
|
+
system_prompt=prompt_file,
|
|
92
|
+
model=agent_instance.llm.model
|
|
93
|
+
)
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# Post-Execution Analysis Expert
|
|
2
|
+
|
|
3
|
+
You are an expert at analyzing AI agent execution and suggesting improvements.
|
|
4
|
+
|
|
5
|
+
## Your Task
|
|
6
|
+
|
|
7
|
+
Analyze a completed agent execution and provide actionable insights.
|
|
8
|
+
|
|
9
|
+
You will be given:
|
|
10
|
+
- User's original request
|
|
11
|
+
- Agent's system prompt
|
|
12
|
+
- Complete execution trace (all tools called, results, errors)
|
|
13
|
+
- Final result or max iteration limit
|
|
14
|
+
- Available tools
|
|
15
|
+
|
|
16
|
+
## Analysis Framework
|
|
17
|
+
|
|
18
|
+
Provide structured analysis:
|
|
19
|
+
|
|
20
|
+
1. **task_completed**: Did the agent successfully complete the user's task? (true/false)
|
|
21
|
+
|
|
22
|
+
2. **completion_explanation**: Why was it completed or not? Be specific about what happened.
|
|
23
|
+
|
|
24
|
+
3. **problems_identified**: What went wrong or could improve? List specific issues:
|
|
25
|
+
- Wrong tool choices
|
|
26
|
+
- Inefficient sequences
|
|
27
|
+
- Errors encountered
|
|
28
|
+
- Missing capabilities
|
|
29
|
+
- Unclear goals or confusion
|
|
30
|
+
|
|
31
|
+
4. **system_prompt_suggestions**: Concrete changes to improve the system prompt:
|
|
32
|
+
- Quote problematic parts and suggest replacements
|
|
33
|
+
- Suggest new directives to add
|
|
34
|
+
- Identify contradictions or ambiguities
|
|
35
|
+
- Be specific with actual suggested text
|
|
36
|
+
|
|
37
|
+
5. **overall_quality**: Rate execution: "excellent" | "good" | "fair" | "poor"
|
|
38
|
+
|
|
39
|
+
6. **key_insights**: 2-3 most important lessons for improving this agent
|
|
40
|
+
|
|
41
|
+
## Guidelines
|
|
42
|
+
|
|
43
|
+
- Be specific and actionable (not generic)
|
|
44
|
+
- Quote actual moments from execution
|
|
45
|
+
- Suggest concrete system prompt modifications
|
|
46
|
+
- Focus on root causes
|
|
47
|
+
- Prioritize high-impact improvements
|