connectonion 0.5.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. connectonion/__init__.py +78 -0
  2. connectonion/address.py +320 -0
  3. connectonion/agent.py +450 -0
  4. connectonion/announce.py +84 -0
  5. connectonion/asgi.py +287 -0
  6. connectonion/auto_debug_exception.py +181 -0
  7. connectonion/cli/__init__.py +3 -0
  8. connectonion/cli/browser_agent/__init__.py +5 -0
  9. connectonion/cli/browser_agent/browser.py +243 -0
  10. connectonion/cli/browser_agent/prompt.md +107 -0
  11. connectonion/cli/commands/__init__.py +1 -0
  12. connectonion/cli/commands/auth_commands.py +527 -0
  13. connectonion/cli/commands/browser_commands.py +27 -0
  14. connectonion/cli/commands/create.py +511 -0
  15. connectonion/cli/commands/deploy_commands.py +220 -0
  16. connectonion/cli/commands/doctor_commands.py +173 -0
  17. connectonion/cli/commands/init.py +469 -0
  18. connectonion/cli/commands/project_cmd_lib.py +828 -0
  19. connectonion/cli/commands/reset_commands.py +149 -0
  20. connectonion/cli/commands/status_commands.py +168 -0
  21. connectonion/cli/docs/co-vibecoding-principles-docs-contexts-all-in-one.md +2010 -0
  22. connectonion/cli/docs/connectonion.md +1256 -0
  23. connectonion/cli/docs.md +123 -0
  24. connectonion/cli/main.py +148 -0
  25. connectonion/cli/templates/meta-agent/README.md +287 -0
  26. connectonion/cli/templates/meta-agent/agent.py +196 -0
  27. connectonion/cli/templates/meta-agent/prompts/answer_prompt.md +9 -0
  28. connectonion/cli/templates/meta-agent/prompts/docs_retrieve_prompt.md +15 -0
  29. connectonion/cli/templates/meta-agent/prompts/metagent.md +71 -0
  30. connectonion/cli/templates/meta-agent/prompts/think_prompt.md +18 -0
  31. connectonion/cli/templates/minimal/README.md +56 -0
  32. connectonion/cli/templates/minimal/agent.py +40 -0
  33. connectonion/cli/templates/playwright/README.md +118 -0
  34. connectonion/cli/templates/playwright/agent.py +336 -0
  35. connectonion/cli/templates/playwright/prompt.md +102 -0
  36. connectonion/cli/templates/playwright/requirements.txt +3 -0
  37. connectonion/cli/templates/web-research/agent.py +122 -0
  38. connectonion/connect.py +128 -0
  39. connectonion/console.py +539 -0
  40. connectonion/debug_agent/__init__.py +13 -0
  41. connectonion/debug_agent/agent.py +45 -0
  42. connectonion/debug_agent/prompts/debug_assistant.md +72 -0
  43. connectonion/debug_agent/runtime_inspector.py +406 -0
  44. connectonion/debug_explainer/__init__.py +10 -0
  45. connectonion/debug_explainer/explain_agent.py +114 -0
  46. connectonion/debug_explainer/explain_context.py +263 -0
  47. connectonion/debug_explainer/explainer_prompt.md +29 -0
  48. connectonion/debug_explainer/root_cause_analysis_prompt.md +43 -0
  49. connectonion/debugger_ui.py +1039 -0
  50. connectonion/decorators.py +208 -0
  51. connectonion/events.py +248 -0
  52. connectonion/execution_analyzer/__init__.py +9 -0
  53. connectonion/execution_analyzer/execution_analysis.py +93 -0
  54. connectonion/execution_analyzer/execution_analysis_prompt.md +47 -0
  55. connectonion/host.py +579 -0
  56. connectonion/interactive_debugger.py +342 -0
  57. connectonion/llm.py +801 -0
  58. connectonion/llm_do.py +307 -0
  59. connectonion/logger.py +300 -0
  60. connectonion/prompt_files/__init__.py +1 -0
  61. connectonion/prompt_files/analyze_contact.md +62 -0
  62. connectonion/prompt_files/eval_expected.md +12 -0
  63. connectonion/prompt_files/react_evaluate.md +11 -0
  64. connectonion/prompt_files/react_plan.md +16 -0
  65. connectonion/prompt_files/reflect.md +22 -0
  66. connectonion/prompts.py +144 -0
  67. connectonion/relay.py +200 -0
  68. connectonion/static/docs.html +688 -0
  69. connectonion/tool_executor.py +279 -0
  70. connectonion/tool_factory.py +186 -0
  71. connectonion/tool_registry.py +105 -0
  72. connectonion/trust.py +166 -0
  73. connectonion/trust_agents.py +71 -0
  74. connectonion/trust_functions.py +88 -0
  75. connectonion/tui/__init__.py +57 -0
  76. connectonion/tui/divider.py +39 -0
  77. connectonion/tui/dropdown.py +251 -0
  78. connectonion/tui/footer.py +31 -0
  79. connectonion/tui/fuzzy.py +56 -0
  80. connectonion/tui/input.py +278 -0
  81. connectonion/tui/keys.py +35 -0
  82. connectonion/tui/pick.py +130 -0
  83. connectonion/tui/providers.py +155 -0
  84. connectonion/tui/status_bar.py +163 -0
  85. connectonion/usage.py +161 -0
  86. connectonion/useful_events_handlers/__init__.py +16 -0
  87. connectonion/useful_events_handlers/reflect.py +116 -0
  88. connectonion/useful_plugins/__init__.py +20 -0
  89. connectonion/useful_plugins/calendar_plugin.py +163 -0
  90. connectonion/useful_plugins/eval.py +139 -0
  91. connectonion/useful_plugins/gmail_plugin.py +162 -0
  92. connectonion/useful_plugins/image_result_formatter.py +127 -0
  93. connectonion/useful_plugins/re_act.py +78 -0
  94. connectonion/useful_plugins/shell_approval.py +159 -0
  95. connectonion/useful_tools/__init__.py +44 -0
  96. connectonion/useful_tools/diff_writer.py +192 -0
  97. connectonion/useful_tools/get_emails.py +183 -0
  98. connectonion/useful_tools/gmail.py +1596 -0
  99. connectonion/useful_tools/google_calendar.py +613 -0
  100. connectonion/useful_tools/memory.py +380 -0
  101. connectonion/useful_tools/microsoft_calendar.py +604 -0
  102. connectonion/useful_tools/outlook.py +488 -0
  103. connectonion/useful_tools/send_email.py +205 -0
  104. connectonion/useful_tools/shell.py +97 -0
  105. connectonion/useful_tools/slash_command.py +201 -0
  106. connectonion/useful_tools/terminal.py +285 -0
  107. connectonion/useful_tools/todo_list.py +241 -0
  108. connectonion/useful_tools/web_fetch.py +216 -0
  109. connectonion/xray.py +467 -0
  110. connectonion-0.5.8.dist-info/METADATA +741 -0
  111. connectonion-0.5.8.dist-info/RECORD +113 -0
  112. connectonion-0.5.8.dist-info/WHEEL +4 -0
  113. connectonion-0.5.8.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,208 @@
1
+ """
2
+ Purpose: Provide @replay decorator for re-executing tools with modified parameters during debugging
3
+ LLM-Note:
4
+ Dependencies: imports from [functools, builtins, typing] | imported by [agent.py, __init__.py] | tested by [tests/test_decorators.py]
5
+ Data flow: @replay wraps function → stores func, args, kwargs in ReplayFunction during execution → user calls replay(param=new_value) in debugger → re-executes function with merged kwargs → prints result
6
+ State/Effects: modifies builtins namespace by injecting global 'replay' object | stores ReplayFunction state in _func, _args, _kwargs, _original_func | clears context after execution | no persistence
7
+ Integration: exposes @replay decorator, replay global callable, xray_replay() combined decorator, _is_replay_enabled() helper | marked functions have __replay_enabled__ attribute | ReplayDecorator acts as both decorator and callable
8
+ Performance: lightweight wrapper with functools.wraps | no performance overhead (just attribute marking) | context cleared immediately after execution
9
+ Errors: replay() with no active context prints helpful error message | re-execution errors are re-raised after printing
10
+ """
11
+
12
+ import functools
13
+ import builtins
14
+ from typing import Any, Callable
15
+
16
+
17
+ # =============================================================================
18
+ # Replay Function and Decorator
19
+ # =============================================================================
20
+
21
+ class ReplayFunction:
22
+ """
23
+ Container for replay functionality.
24
+
25
+ Holds the current function and its arguments to enable re-execution
26
+ with modified parameters during debugging.
27
+ """
28
+
29
+ def __init__(self):
30
+ """Initialize with no active function."""
31
+ self._func = None
32
+ self._args = None
33
+ self._kwargs = None
34
+ self._original_func = None
35
+
36
+ def _setup(self, func: Callable, args: tuple, kwargs: dict) -> None:
37
+ """
38
+ Set up replay context (internal use).
39
+
40
+ Args:
41
+ func: The function to replay
42
+ args: Original positional arguments
43
+ kwargs: Original keyword arguments
44
+ """
45
+ self._func = func
46
+ self._args = args
47
+ self._kwargs = kwargs
48
+ self._original_func = func
49
+
50
+ def _clear(self) -> None:
51
+ """Clear replay context after execution (internal use)."""
52
+ self._func = None
53
+ self._args = None
54
+ self._kwargs = None
55
+ self._original_func = None
56
+
57
+ def __call__(self, **new_kwargs) -> Any:
58
+ """
59
+ Replay the function with modified parameters.
60
+
61
+ Args:
62
+ **new_kwargs: Keyword arguments to override
63
+
64
+ Returns:
65
+ Result of re-executing the function
66
+
67
+ Example:
68
+ # In debugger at breakpoint:
69
+ >>> replay(threshold=0.8) # Re-run with new threshold
70
+ 🔄 Replaying my_function()
71
+ Modified parameters: {'threshold': 0.8}
72
+ ✅ Result: 0.95
73
+ """
74
+ if self._func is None:
75
+ print("❌ No function to replay. Make sure you're in a breakpoint "
76
+ "inside a @replay decorated function.")
77
+ return None
78
+
79
+ # Merge original kwargs with new ones (new ones override)
80
+ merged_kwargs = self._kwargs.copy() if self._kwargs else {}
81
+ merged_kwargs.update(new_kwargs)
82
+
83
+ print(f"🔄 Replaying {self._original_func.__name__}()")
84
+ if new_kwargs:
85
+ print(f" Modified parameters: {new_kwargs}")
86
+
87
+ try:
88
+ result = self._func(*self._args, **merged_kwargs)
89
+ print(f"✅ Result: {result}")
90
+ return result
91
+ except Exception as e:
92
+ print(f"❌ Error during replay: {e}")
93
+ raise
94
+
95
+ def __repr__(self):
96
+ """Show current replay state."""
97
+ if self._original_func:
98
+ return f"<replay function for {self._original_func.__name__}>"
99
+ return "<replay function (not active)>"
100
+
101
+
102
+ class ReplayDecorator:
103
+ """
104
+ Hybrid object that acts as both a decorator and replay function.
105
+
106
+ Dual-purpose design:
107
+ 1. When decorating a function, enables replay functionality
108
+ 2. When called with kwargs, replays the current function
109
+ """
110
+
111
+ def __init__(self, replay_func: ReplayFunction):
112
+ """
113
+ Initialize with a replay function container.
114
+
115
+ Args:
116
+ replay_func: ReplayFunction instance to manage replay state
117
+ """
118
+ self._replay_func = replay_func
119
+ # Make this available globally as 'replay' for easy access
120
+ builtins.replay = self
121
+
122
+ def __call__(self, *args, **kwargs) -> Any:
123
+ """
124
+ Act as decorator or replay function based on arguments.
125
+
126
+ If called with a single callable argument and no kwargs, acts as decorator.
127
+ Otherwise, forwards the call to replay the current function.
128
+
129
+ Args:
130
+ *args: Positional arguments
131
+ **kwargs: Keyword arguments
132
+
133
+ Returns:
134
+ Decorated function or replay result
135
+ """
136
+ # Check if being used as decorator
137
+ if len(args) == 1 and len(kwargs) == 0 and callable(args[0]):
138
+ func = args[0]
139
+
140
+ @functools.wraps(func)
141
+ def wrapper(*inner_args, **inner_kwargs):
142
+ # Set up replay context with current execution
143
+ self._replay_func._setup(func, inner_args, inner_kwargs)
144
+
145
+ try:
146
+ # Execute the original function
147
+ return func(*inner_args, **inner_kwargs)
148
+ finally:
149
+ # Clean up replay context
150
+ self._replay_func._clear()
151
+
152
+ # Mark function as replay-enabled
153
+ wrapper.__replay_enabled__ = True
154
+ return wrapper
155
+
156
+ # Otherwise, act as the replay function
157
+ else:
158
+ return self._replay_func(*args, **kwargs)
159
+
160
+ def __repr__(self):
161
+ """Delegate representation to replay function."""
162
+ return repr(self._replay_func)
163
+
164
+
165
+ # Create the global replay instance
166
+ replay_function = ReplayFunction()
167
+ replay = ReplayDecorator(replay_function)
168
+
169
+
170
+ # =============================================================================
171
+ # Combined Decorator
172
+ # =============================================================================
173
+
174
+ def xray_replay(func: Callable) -> Callable:
175
+ """
176
+ Convenience decorator that combines @xray and @replay.
177
+
178
+ Equivalent to:
179
+ @xray
180
+ @replay
181
+ def my_tool(...):
182
+ ...
183
+
184
+ Args:
185
+ func: Function to decorate
186
+
187
+ Returns:
188
+ Function with both xray and replay capabilities
189
+ """
190
+ from .xray import xray
191
+ return xray(replay(func))
192
+
193
+
194
+ # =============================================================================
195
+ # Helper Functions
196
+ # =============================================================================
197
+
198
+ def _is_replay_enabled(func: Callable) -> bool:
199
+ """
200
+ Check if a function has the @replay decorator.
201
+
202
+ Args:
203
+ func: Function to check
204
+
205
+ Returns:
206
+ True if function is decorated with @replay
207
+ """
208
+ return getattr(func, '__replay_enabled__', False)
connectonion/events.py ADDED
@@ -0,0 +1,248 @@
1
+ """
2
+ Purpose: Event system for hooking into agent lifecycle
3
+ LLM-Note:
4
+ Dependencies: None (standalone module) | imported by [agent.py, __init__.py] | tested by [tests/test_events.py]
5
+ Data flow: Wrapper functions tag event handlers with _event_type attribute → Agent organizes handlers by type → Agent invokes handlers at specific lifecycle points passing agent instance
6
+ State/Effects: Event handlers receive agent instance and can modify agent.current_session (messages, trace, etc.)
7
+ Integration: exposes after_user_input(), before_llm(), after_llm(), before_each_tool(), before_tools(), after_each_tool(), after_tools(), on_error(), on_complete()
8
+ Performance: Minimal overhead - just function attribute checking and iteration over handler lists
9
+ Errors: Event handler exceptions propagate and stop agent execution (fail fast)
10
+ """
11
+
12
+ from typing import Callable, List, Union, TYPE_CHECKING
13
+
14
+ if TYPE_CHECKING:
15
+ from .agent import Agent
16
+
17
+ # Event handler type: function that takes Agent and returns None
18
+ EventHandler = Callable[['Agent'], None]
19
+
20
+
21
+ def after_user_input(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
22
+ """
23
+ Mark function(s) as after_user_input event handlers.
24
+
25
+ Fires once per turn, after user input is added to session.
26
+ Use for: adding context, timestamps, initializing turn state.
27
+
28
+ Supports both decorator and wrapper syntax:
29
+ # As decorator
30
+ @after_user_input
31
+ def add_timestamp(agent):
32
+ ...
33
+
34
+ # As wrapper (single or multiple)
35
+ on_events=[after_user_input(handler1, handler2)]
36
+ """
37
+ for fn in funcs:
38
+ fn._event_type = 'after_user_input' # type: ignore
39
+ return funcs[0] if len(funcs) == 1 else list(funcs)
40
+
41
+
42
+ def before_llm(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
43
+ """
44
+ Mark function(s) as before_llm event handlers.
45
+
46
+ Fires before each LLM call (multiple times per turn).
47
+ Use for: modifying messages for specific LLM calls.
48
+
49
+ Supports both decorator and wrapper syntax:
50
+ @before_llm
51
+ def inject_context(agent):
52
+ ...
53
+
54
+ on_events=[before_llm(handler1, handler2)]
55
+ """
56
+ for fn in funcs:
57
+ fn._event_type = 'before_llm' # type: ignore
58
+ return funcs[0] if len(funcs) == 1 else list(funcs)
59
+
60
+
61
+ def after_llm(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
62
+ """
63
+ Mark function(s) as after_llm event handlers.
64
+
65
+ Fires after each LLM response (multiple times per turn).
66
+ Use for: logging LLM calls, analyzing responses.
67
+
68
+ Supports both decorator and wrapper syntax:
69
+ @after_llm
70
+ def log_llm(agent):
71
+ trace = agent.current_session['trace'][-1]
72
+ if trace['type'] == 'llm_call':
73
+ print(f"LLM took {trace['duration_ms']:.0f}ms")
74
+
75
+ on_events=[after_llm(log_llm)]
76
+ """
77
+ for fn in funcs:
78
+ fn._event_type = 'after_llm' # type: ignore
79
+ return funcs[0] if len(funcs) == 1 else list(funcs)
80
+
81
+
82
+ def before_each_tool(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
83
+ """
84
+ Mark function(s) as before_each_tool event handlers.
85
+
86
+ Fires before EACH individual tool execution.
87
+ Use for: validating arguments, approval prompts, logging.
88
+
89
+ Access pending tool via agent.current_session['pending_tool']:
90
+ - name: Tool name (e.g., "bash")
91
+ - arguments: Tool arguments dict
92
+ - id: Tool call ID
93
+
94
+ Raise an exception to cancel the tool execution.
95
+
96
+ Supports both decorator and wrapper syntax:
97
+ @before_each_tool
98
+ def approve_dangerous(agent):
99
+ ...
100
+
101
+ # Multiple handlers
102
+ on_events=[before_each_tool(check_shell, check_email)]
103
+ """
104
+ for fn in funcs:
105
+ fn._event_type = 'before_each_tool' # type: ignore
106
+ return funcs[0] if len(funcs) == 1 else list(funcs)
107
+
108
+
109
+ def before_tools(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
110
+ """
111
+ Mark function(s) as before_tools event handlers.
112
+
113
+ Fires ONCE before ALL tools in a batch execute.
114
+
115
+ What is a "tools batch"?
116
+ When the LLM responds, it can request multiple tools at once. For example:
117
+ LLM Response: tool_calls = [search("python"), read_file("docs.md"), calculate(2+2)]
118
+
119
+ This group of tools from ONE LLM response is called a "tools batch".
120
+ - before_tools fires ONCE before the batch starts
121
+ - after_tools fires ONCE after ALL tools in the batch complete
122
+
123
+ Use for: batch validation, user approval before execution, setup.
124
+
125
+ Supports both decorator and wrapper syntax:
126
+ @before_tools
127
+ def log_batch_start(agent):
128
+ ...
129
+
130
+ on_events=[before_tools(handler)]
131
+ """
132
+ for fn in funcs:
133
+ fn._event_type = 'before_tools' # type: ignore
134
+ return funcs[0] if len(funcs) == 1 else list(funcs)
135
+
136
+
137
+ def after_each_tool(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
138
+ """
139
+ Mark function(s) as after_each_tool event handlers.
140
+
141
+ Fires after EACH individual tool execution (success, error, or not_found).
142
+ Use for: logging individual tool performance, debugging.
143
+
144
+ ⚠️ WARNING: Do NOT add messages to agent.current_session['messages'] here!
145
+ When LLM returns multiple tool_calls, this fires after EACH tool, which would
146
+ interleave messages between tool results. This breaks Anthropic Claude's API
147
+ which requires all tool_results to immediately follow the tool_use message.
148
+
149
+ If you need to add messages after tools complete, use `after_tools` instead.
150
+
151
+ Supports both decorator and wrapper syntax:
152
+ @after_each_tool
153
+ def log_tool(agent):
154
+ trace = agent.current_session['trace'][-1]
155
+ if trace['type'] == 'tool_execution':
156
+ print(f"Tool: {trace['tool_name']} in {trace['timing']:.0f}ms")
157
+
158
+ on_events=[after_each_tool(handler1, handler2)]
159
+ """
160
+ for fn in funcs:
161
+ fn._event_type = 'after_each_tool' # type: ignore
162
+ return funcs[0] if len(funcs) == 1 else list(funcs)
163
+
164
+
165
+ def after_tools(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
166
+ """
167
+ Mark function(s) as after_tools event handlers.
168
+
169
+ Fires ONCE after ALL tools in a batch complete.
170
+
171
+ What is a "tools batch"?
172
+ When the LLM responds, it can request multiple tools at once. For example:
173
+ LLM Response: tool_calls = [search("python"), read_file("docs.md"), calculate(2+2)]
174
+
175
+ This group of tools from ONE LLM response is called a "tools batch".
176
+ - before_tools fires ONCE before the batch starts
177
+ - after_tools fires ONCE after ALL tools in the batch complete
178
+
179
+ This is the SAFE place to add messages to agent.current_session['messages']
180
+ after tool execution, because all tool_results have been added and message
181
+ ordering is correct for all LLM providers (including Anthropic Claude).
182
+
183
+ Message ordering when this event fires:
184
+ - assistant (with tool_calls)
185
+ - tool result 1
186
+ - tool result 2
187
+ - tool result N
188
+ - [YOUR MESSAGE HERE - safe to add]
189
+
190
+ Use for: reflection/reasoning injection, ReAct pattern, batch cleanup.
191
+
192
+ Supports both decorator and wrapper syntax:
193
+ @after_tools
194
+ def add_reflection(agent):
195
+ trace = agent.current_session['trace']
196
+ recent = [t for t in trace if t['type'] == 'tool_execution'][-3:]
197
+ agent.current_session['messages'].append({
198
+ 'role': 'assistant',
199
+ 'content': f"Completed {len(recent)} tools"
200
+ })
201
+
202
+ on_events=[after_tools(add_reflection)]
203
+ """
204
+ for fn in funcs:
205
+ fn._event_type = 'after_tools' # type: ignore
206
+ return funcs[0] if len(funcs) == 1 else list(funcs)
207
+
208
+
209
+ def on_error(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
210
+ """
211
+ Mark function(s) as on_error event handlers.
212
+
213
+ Fires when tool execution fails.
214
+ Use for: custom error handling, retries, fallback values.
215
+
216
+ Supports both decorator and wrapper syntax:
217
+ @on_error
218
+ def handle_error(agent):
219
+ trace = agent.current_session['trace'][-1]
220
+ if trace.get('status') == 'error':
221
+ print(f"Tool failed: {trace['error']}")
222
+
223
+ on_events=[on_error(handler1, handler2)]
224
+ """
225
+ for fn in funcs:
226
+ fn._event_type = 'on_error' # type: ignore
227
+ return funcs[0] if len(funcs) == 1 else list(funcs)
228
+
229
+
230
+ def on_complete(*funcs: EventHandler) -> Union[EventHandler, List[EventHandler]]:
231
+ """
232
+ Mark function(s) as on_complete event handlers.
233
+
234
+ Fires once per input() call, after final response is generated.
235
+ Use for: metrics, logging, cleanup, final summary.
236
+
237
+ Supports both decorator and wrapper syntax:
238
+ @on_complete
239
+ def log_done(agent):
240
+ trace = agent.current_session['trace']
241
+ tools_used = [t['tool_name'] for t in trace if t['type'] == 'tool_execution']
242
+ print(f"Task done. Tools: {tools_used}")
243
+
244
+ on_events=[on_complete(handler1, handler2)]
245
+ """
246
+ for fn in funcs:
247
+ fn._event_type = 'on_complete' # type: ignore
248
+ return funcs[0] if len(funcs) == 1 else list(funcs)
@@ -0,0 +1,9 @@
1
+ """Execution analyzer - Post-execution analysis and improvement suggestions.
2
+
3
+ Analyzes completed agent runs and provides suggestions for improving
4
+ system prompts and agent behavior.
5
+ """
6
+
7
+ from .execution_analysis import analyze_execution, ExecutionAnalysis
8
+
9
+ __all__ = ["analyze_execution", "ExecutionAnalysis"]
@@ -0,0 +1,93 @@
1
+ """
2
+ Purpose: Post-execution analysis of agent runs with AI-powered improvement suggestions
3
+ LLM-Note:
4
+ Dependencies: imports from [pathlib, pydantic, typing, llm_do.py] | imported by [agent.py via execution_analyzer/__init__.py] | tested by [tests/test_execution_analyzer.py]
5
+ Data flow: receives from Agent.input() after completion → analyze_execution(user_prompt, agent_instance, final_result, execution_trace, max_iterations_reached) → builds execution summary with tool calls → loads prompt from execution_analysis_prompt.md → calls llm_do() with ExecutionAnalysis Pydantic schema → returns structured analysis with task_completed, problems_identified, system_prompt_suggestions, overall_quality, key_insights
6
+ State/Effects: reads execution_analysis_prompt.md file | makes single LLM API call via llm_do() | no writes or global state | stateless
7
+ Integration: exposes analyze_execution(), ExecutionAnalysis Pydantic model | used by Agent after .input() completes to provide feedback | uses same model as agent instance for analysis | ExecutionAnalysis schema validated by Pydantic
8
+ Performance: single LLM call per execution (only when enabled) | execution_trace serialization can be large for long runs | no caching
9
+ Errors: llm_do() errors bubble up (API failures, timeout) | Pydantic ValidationError if LLM output doesn't match schema | handles empty tools list gracefully
10
+
11
+ Post-execution analysis for completed agent runs.
12
+
13
+ Analyzes the entire execution trace and provides suggestions for improvement.
14
+ """
15
+
16
+ from pathlib import Path
17
+ from pydantic import BaseModel
18
+ from typing import List
19
+ from ..llm_do import llm_do
20
+
21
+
22
+ class ExecutionAnalysis(BaseModel):
23
+ """Structured output for post-execution analysis."""
24
+ task_completed: bool
25
+ completion_explanation: str
26
+ problems_identified: List[str]
27
+ system_prompt_suggestions: List[str]
28
+ overall_quality: str # "excellent" | "good" | "fair" | "poor"
29
+ key_insights: List[str]
30
+
31
+
32
+ def analyze_execution(
33
+ user_prompt: str,
34
+ agent_instance,
35
+ final_result: str,
36
+ execution_trace: List,
37
+ max_iterations_reached: bool
38
+ ) -> ExecutionAnalysis:
39
+ """Analyze completed execution and suggest improvements.
40
+
41
+ Args:
42
+ user_prompt: The original user request
43
+ agent_instance: The Agent that executed
44
+ final_result: Final response from agent
45
+ execution_trace: Complete trace of execution
46
+ max_iterations_reached: Whether agent hit iteration limit
47
+
48
+ Returns:
49
+ Structured analysis with improvement suggestions
50
+ """
51
+ # Build execution summary
52
+ tools_called = [
53
+ entry for entry in execution_trace
54
+ if entry.get('type') == 'tool_execution'
55
+ ]
56
+
57
+ tools_summary = []
58
+ for entry in tools_called:
59
+ status = "✓" if entry.get('status') == 'success' else "✗"
60
+ tools_summary.append(
61
+ f"{status} {entry.get('tool_name')}({entry.get('args')}) → {entry.get('result')}"
62
+ )
63
+
64
+ # Create analysis input
65
+ data = f"""**User Request:**
66
+ {user_prompt}
67
+
68
+ **Agent System Prompt:**
69
+ {agent_instance.system_prompt}
70
+
71
+ **Available Tools:**
72
+ {', '.join([t.name for t in agent_instance.tools]) if agent_instance.tools else 'None'}
73
+
74
+ **Execution Summary:**
75
+ - Max iterations reached: {max_iterations_reached}
76
+ - Tools called ({len(tools_called)}):
77
+ {chr(10).join(f" {i+1}. {s}" for i, s in enumerate(tools_summary))}
78
+
79
+ **Final Result:**
80
+ {final_result}
81
+
82
+ **Complete Trace:**
83
+ {execution_trace}"""
84
+
85
+ prompt_file = Path(__file__).parent / "execution_analysis_prompt.md"
86
+
87
+ # Use same model as agent
88
+ return llm_do(
89
+ data,
90
+ output=ExecutionAnalysis,
91
+ system_prompt=prompt_file,
92
+ model=agent_instance.llm.model
93
+ )
@@ -0,0 +1,47 @@
1
+ # Post-Execution Analysis Expert
2
+
3
+ You are an expert at analyzing AI agent execution and suggesting improvements.
4
+
5
+ ## Your Task
6
+
7
+ Analyze a completed agent execution and provide actionable insights.
8
+
9
+ You will be given:
10
+ - User's original request
11
+ - Agent's system prompt
12
+ - Complete execution trace (all tools called, results, errors)
13
+ - Final result or max iteration limit
14
+ - Available tools
15
+
16
+ ## Analysis Framework
17
+
18
+ Provide structured analysis:
19
+
20
+ 1. **task_completed**: Did the agent successfully complete the user's task? (true/false)
21
+
22
+ 2. **completion_explanation**: Why was it completed or not? Be specific about what happened.
23
+
24
+ 3. **problems_identified**: What went wrong or could improve? List specific issues:
25
+ - Wrong tool choices
26
+ - Inefficient sequences
27
+ - Errors encountered
28
+ - Missing capabilities
29
+ - Unclear goals or confusion
30
+
31
+ 4. **system_prompt_suggestions**: Concrete changes to improve the system prompt:
32
+ - Quote problematic parts and suggest replacements
33
+ - Suggest new directives to add
34
+ - Identify contradictions or ambiguities
35
+ - Be specific with actual suggested text
36
+
37
+ 5. **overall_quality**: Rate execution: "excellent" | "good" | "fair" | "poor"
38
+
39
+ 6. **key_insights**: 2-3 most important lessons for improving this agent
40
+
41
+ ## Guidelines
42
+
43
+ - Be specific and actionable (not generic)
44
+ - Quote actual moments from execution
45
+ - Suggest concrete system prompt modifications
46
+ - Focus on root causes
47
+ - Prioritize high-impact improvements