connectonion 0.6.2__py3-none-any.whl → 0.6.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- connectonion/__init__.py +46 -9
- connectonion/cli/__init__.py +11 -1
- connectonion/cli/browser_agent/__init__.py +11 -1
- connectonion/cli/browser_agent/browser.py +13 -3
- connectonion/cli/browser_agent/element_finder.py +8 -0
- connectonion/cli/browser_agent/highlight_screenshot.py +9 -1
- connectonion/cli/browser_agent/scroll.py +8 -0
- connectonion/cli/co_ai/__init__.py +6 -0
- connectonion/cli/co_ai/agent.py +87 -0
- connectonion/cli/co_ai/agents/__init__.py +5 -0
- connectonion/cli/co_ai/agents/registry.py +57 -0
- connectonion/cli/co_ai/commands/__init__.py +45 -0
- connectonion/cli/co_ai/commands/compact.py +173 -0
- connectonion/cli/co_ai/commands/cost.py +77 -0
- connectonion/cli/co_ai/commands/export.py +60 -0
- connectonion/cli/co_ai/commands/help.py +80 -0
- connectonion/cli/co_ai/commands/init.py +101 -0
- connectonion/cli/co_ai/commands/sessions.py +55 -0
- connectonion/cli/co_ai/commands/tasks.py +63 -0
- connectonion/cli/co_ai/commands/undo.py +103 -0
- connectonion/cli/co_ai/context.py +127 -0
- connectonion/cli/co_ai/main.py +52 -0
- connectonion/cli/co_ai/plugins/__init__.py +6 -0
- connectonion/cli/co_ai/plugins/reminder.py +76 -0
- connectonion/cli/co_ai/plugins/shell_approval.py +105 -0
- connectonion/cli/co_ai/prompts/agents/explore.md +79 -0
- connectonion/cli/co_ai/prompts/agents/plan.md +60 -0
- connectonion/cli/co_ai/prompts/assembler.py +303 -0
- connectonion/cli/{docs/co-vibecoding-principles-docs-contexts-all-in-one.md → co_ai/prompts/connectonion/README.md} +26 -0
- connectonion/cli/co_ai/prompts/connectonion/api.md +457 -0
- connectonion/cli/co_ai/prompts/connectonion/cli/README.md +805 -0
- connectonion/cli/co_ai/prompts/connectonion/cli/auth.md +46 -0
- connectonion/cli/co_ai/prompts/connectonion/cli/browser.md +235 -0
- connectonion/cli/co_ai/prompts/connectonion/cli/copy.md +184 -0
- connectonion/cli/co_ai/prompts/connectonion/cli/create.md +335 -0
- connectonion/cli/co_ai/prompts/connectonion/cli/init.md +431 -0
- connectonion/cli/co_ai/prompts/connectonion/co-directory-structure.md +214 -0
- connectonion/cli/co_ai/prompts/connectonion/concepts/agent.md +1078 -0
- connectonion/cli/co_ai/prompts/connectonion/concepts/events.md +816 -0
- connectonion/cli/co_ai/prompts/connectonion/concepts/llm_do.md +256 -0
- connectonion/cli/co_ai/prompts/connectonion/concepts/max_iterations.md +362 -0
- connectonion/cli/co_ai/prompts/connectonion/concepts/models.md +641 -0
- connectonion/cli/co_ai/prompts/connectonion/concepts/plugins.md +100 -0
- connectonion/cli/co_ai/prompts/connectonion/concepts/prompts.md +122 -0
- connectonion/cli/co_ai/prompts/connectonion/concepts/tools.md +512 -0
- connectonion/cli/co_ai/prompts/connectonion/concepts/transcribe.md +156 -0
- connectonion/cli/co_ai/prompts/connectonion/concepts/trust.md +291 -0
- connectonion/cli/co_ai/prompts/connectonion/debug/README.md +18 -0
- connectonion/cli/co_ai/prompts/connectonion/debug/auto_debug.md +1026 -0
- connectonion/cli/co_ai/prompts/connectonion/debug/console.md +129 -0
- connectonion/cli/co_ai/prompts/connectonion/debug/eval-format.md +178 -0
- connectonion/cli/co_ai/prompts/connectonion/debug/eval.md +230 -0
- connectonion/cli/co_ai/prompts/connectonion/debug/exceptions.md +307 -0
- connectonion/cli/co_ai/prompts/connectonion/debug/log.md +117 -0
- connectonion/cli/co_ai/prompts/connectonion/debug/xray.md +215 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/001-choosing-input-method.md +202 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/002-choosing-llm-function-name.md +202 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/003-choosing-trust-keyword.md +141 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/004-cli-create-flow.md +117 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/005-designing-agent-network-protocol.md +503 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/006-agent-address-format.md +305 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/007-authentication-backend-design.md +240 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/008-naming-is-hard.md +228 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/009-why-connect-function.md +167 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/010-cli-ux-progressive-disclosure.md +176 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/011-global-config-identity-management.md +357 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/012-tool-execution-separation.md +259 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/013-debug-and-logging-design.md +253 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/014-hook-system-design.md +510 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/015-interactive-auto-debug-design.md +837 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/016-why-no-zero-knowledge-proofs.md +358 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/017-session-logging-and-eval-format.md +120 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/018-event-api-naming.md +274 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/019-agent-lifecycle-design.md +655 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/020-trust-system-and-network-architecture.md +503 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/021-task-storage-jsonl-design.md +496 -0
- connectonion/cli/co_ai/prompts/connectonion/design-decisions/022-raw-asgi-implementation.md +273 -0
- connectonion/cli/co_ai/prompts/connectonion/examples/agent_reasoning.md +62 -0
- connectonion/cli/co_ai/prompts/connectonion/examples/atomic_tools.md +24 -0
- connectonion/cli/co_ai/prompts/connectonion/examples/load_guide.md +18 -0
- connectonion/cli/co_ai/prompts/connectonion/examples.md +0 -0
- connectonion/cli/co_ai/prompts/connectonion/hook-system-options.md +364 -0
- connectonion/cli/co_ai/prompts/connectonion/index.md +162 -0
- connectonion/cli/co_ai/prompts/connectonion/integrations/README.md +12 -0
- connectonion/cli/co_ai/prompts/connectonion/integrations/auth.md +450 -0
- connectonion/cli/co_ai/prompts/connectonion/integrations/google.md +431 -0
- connectonion/cli/co_ai/prompts/connectonion/integrations/microsoft.md +370 -0
- connectonion/cli/co_ai/prompts/connectonion/network/README.md +14 -0
- connectonion/cli/co_ai/prompts/connectonion/network/connect.md +543 -0
- connectonion/cli/co_ai/prompts/connectonion/network/connection.md +538 -0
- connectonion/cli/co_ai/prompts/connectonion/network/deploy.md +123 -0
- connectonion/cli/co_ai/prompts/connectonion/network/host.md +1049 -0
- connectonion/cli/co_ai/prompts/connectonion/network/protocol/agent-relay-protocol.md +495 -0
- connectonion/cli/co_ai/prompts/connectonion/network/protocol/announce-message.md +115 -0
- connectonion/cli/co_ai/prompts/connectonion/principles.md +124 -0
- connectonion/cli/co_ai/prompts/connectonion/quickstart.md +261 -0
- connectonion/cli/co_ai/prompts/connectonion/roadmap.md +81 -0
- connectonion/cli/co_ai/prompts/connectonion/templates/README.md +77 -0
- connectonion/cli/co_ai/prompts/connectonion/templates/meta-agent.md +152 -0
- connectonion/cli/co_ai/prompts/connectonion/templates/minimal.md +105 -0
- connectonion/cli/co_ai/prompts/connectonion/templates/playwright.md +130 -0
- connectonion/cli/co_ai/prompts/connectonion/templates/web-research.md +144 -0
- connectonion/cli/co_ai/prompts/connectonion/tui/README.md +95 -0
- connectonion/cli/co_ai/prompts/connectonion/tui/chat.md +181 -0
- connectonion/cli/co_ai/prompts/connectonion/tui/divider.md +63 -0
- connectonion/cli/co_ai/prompts/connectonion/tui/dropdown.md +83 -0
- connectonion/cli/co_ai/prompts/connectonion/tui/footer.md +44 -0
- connectonion/cli/co_ai/prompts/connectonion/tui/fuzzy.md +68 -0
- connectonion/cli/co_ai/prompts/connectonion/tui/input.md +84 -0
- connectonion/cli/co_ai/prompts/connectonion/tui/keys.md +77 -0
- connectonion/cli/co_ai/prompts/connectonion/tui/pick.md +71 -0
- connectonion/cli/co_ai/prompts/connectonion/tui/providers.md +89 -0
- connectonion/cli/co_ai/prompts/connectonion/tui/status_bar.md +67 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_plugins/README.md +156 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_plugins/calendar_plugin.md +68 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_plugins/eval.md +89 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_plugins/gmail_plugin.md +68 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_plugins/image_result_formatter.md +74 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_plugins/re_act.md +86 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_plugins/shell_approval.md +69 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/README.md +81 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/diff_writer.md +138 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/get_emails.md +499 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/gmail.md +135 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/google_calendar.md +106 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/memory.md +486 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/microsoft_calendar.md +106 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/outlook.md +120 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/send_email.md +403 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/shell.md +95 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/slash_command.md +96 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/terminal.md +97 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/todo_list.md +252 -0
- connectonion/cli/co_ai/prompts/connectonion/useful_tools/web_fetch.md +130 -0
- connectonion/cli/co_ai/prompts/connectonion/vibe-coding-guide.md +97 -0
- connectonion/cli/co_ai/prompts/connectonion/windows-support.md +258 -0
- connectonion/cli/co_ai/prompts/main.md +247 -0
- connectonion/cli/co_ai/prompts/reminders/plan_mode.md +34 -0
- connectonion/cli/co_ai/prompts/summarization.md +55 -0
- connectonion/cli/co_ai/prompts/tools/ask_user.md +61 -0
- connectonion/cli/co_ai/prompts/tools/background.md +57 -0
- connectonion/cli/co_ai/prompts/tools/edit.md +90 -0
- connectonion/cli/co_ai/prompts/tools/glob.md +52 -0
- connectonion/cli/co_ai/prompts/tools/grep.md +55 -0
- connectonion/cli/co_ai/prompts/tools/plan_mode.md +80 -0
- connectonion/cli/co_ai/prompts/tools/read.md +40 -0
- connectonion/cli/co_ai/prompts/tools/shell.md +67 -0
- connectonion/cli/co_ai/prompts/tools/task.md +51 -0
- connectonion/cli/co_ai/prompts/tools/todo.md +139 -0
- connectonion/cli/co_ai/prompts/tools/write.md +47 -0
- connectonion/cli/co_ai/prompts/workflow.md +89 -0
- connectonion/cli/co_ai/reminders.py +159 -0
- connectonion/cli/co_ai/sessions.py +110 -0
- connectonion/cli/co_ai/skills/__init__.py +37 -0
- connectonion/cli/co_ai/skills/builtin/commit/SKILL.md +63 -0
- connectonion/cli/co_ai/skills/builtin/review-pr/SKILL.md +76 -0
- connectonion/cli/co_ai/skills/loader.py +166 -0
- connectonion/cli/co_ai/skills/tool.py +46 -0
- connectonion/cli/co_ai/tools/__init__.py +92 -0
- connectonion/cli/co_ai/tools/ask_user.py +35 -0
- connectonion/cli/co_ai/tools/background.py +201 -0
- connectonion/cli/co_ai/tools/diff_writer.py +291 -0
- connectonion/cli/co_ai/tools/edit.py +89 -0
- connectonion/cli/co_ai/tools/glob.py +84 -0
- connectonion/cli/co_ai/tools/grep.py +158 -0
- connectonion/cli/co_ai/tools/load_guide.py +23 -0
- connectonion/cli/co_ai/tools/multi_edit.py +116 -0
- connectonion/cli/co_ai/tools/plan_mode.py +172 -0
- connectonion/cli/co_ai/tools/read.py +67 -0
- connectonion/cli/co_ai/tools/task.py +59 -0
- connectonion/cli/co_ai/tools/todo_list.py +159 -0
- connectonion/cli/co_ai/tools/write.py +126 -0
- connectonion/cli/commands/__init__.py +11 -1
- connectonion/cli/commands/ai_commands.py +34 -0
- connectonion/cli/commands/copy_commands.py +55 -6
- connectonion/cli/commands/create.py +20 -17
- connectonion/cli/commands/init.py +19 -22
- connectonion/cli/commands/project_cmd_lib.py +15 -0
- connectonion/cli/main.py +11 -0
- connectonion/console.py +15 -1
- connectonion/core/__init__.py +10 -1
- connectonion/core/agent.py +37 -16
- connectonion/core/exceptions.py +74 -0
- connectonion/core/llm.py +54 -6
- connectonion/core/tool_executor.py +32 -31
- connectonion/core/tool_factory.py +47 -10
- connectonion/debug/__init__.py +10 -1
- connectonion/debug/debug_explainer/__init__.py +10 -1
- connectonion/debug/execution_analyzer/__init__.py +10 -1
- connectonion/debug/execution_analyzer/execution_analysis.py +5 -2
- connectonion/debug/runtime_inspector/__init__.py +10 -1
- connectonion/docs/.package-ignore +6 -0
- connectonion/docs/README.md +2036 -0
- connectonion/docs/api.md +457 -0
- connectonion/docs/archive/001-ai-agent-is-just-prompt-plus-function.md +249 -0
- connectonion/docs/archive/README.md +53 -0
- connectonion/docs/archive/archive/consolidation-plan.md +72 -0
- connectonion/docs/archive/archive/core-principles-extracted.md +239 -0
- connectonion/docs/archive/archive/master-principles.md +222 -0
- connectonion/docs/archive/archive/principles.md +293 -0
- connectonion/docs/archive/archive/simplicity-principles.md +221 -0
- connectonion/docs/archive/attack-defense-insights.md +410 -0
- connectonion/docs/archive/business-model.md +305 -0
- connectonion/docs/archive/core-principles-unified.md +190 -0
- connectonion/docs/archive/discussion-journey.md +178 -0
- connectonion/docs/archive/economic-analysis.md +323 -0
- connectonion/docs/archive/features/01-share-and-find.md +256 -0
- connectonion/docs/archive/features/02-agent-authentication.md +93 -0
- connectonion/docs/archive/features/03-test-before-trust.md +71 -0
- connectonion/docs/archive/features/06-reliability-and-offline.md +197 -0
- connectonion/docs/archive/features/README.md +46 -0
- connectonion/docs/archive/features-roadmap.md +247 -0
- connectonion/docs/archive/mcp-comparison-insights.md +215 -0
- connectonion/docs/archive/migration-strategy.md +571 -0
- connectonion/docs/archive/mini-whitepaper.md +293 -0
- connectonion/docs/archive/network-protocol.md +394 -0
- connectonion/docs/archive/semantic-revolution.md +367 -0
- connectonion/docs/archive/technical-architecture.md +453 -0
- connectonion/docs/archive/the-semantic-insight.md +207 -0
- connectonion/docs/archive/threat-model.md +164 -0
- connectonion/docs/cli/README.md +805 -0
- connectonion/docs/cli/auth.md +46 -0
- connectonion/docs/cli/browser.md +235 -0
- connectonion/docs/cli/copy.md +232 -0
- connectonion/docs/cli/create.md +335 -0
- connectonion/docs/cli/init.md +431 -0
- connectonion/docs/co-directory-structure.md +214 -0
- connectonion/docs/concepts/agent.md +1078 -0
- connectonion/docs/concepts/events.md +699 -0
- connectonion/docs/concepts/llm_do.md +256 -0
- connectonion/docs/concepts/max_iterations.md +362 -0
- connectonion/docs/concepts/models.md +641 -0
- connectonion/docs/concepts/plugins.md +100 -0
- connectonion/docs/concepts/prompts.md +122 -0
- connectonion/docs/concepts/session.md +428 -0
- connectonion/docs/concepts/tools.md +512 -0
- connectonion/docs/concepts/transcribe.md +156 -0
- connectonion/docs/concepts/trust.md +291 -0
- connectonion/docs/connectonion.md +1256 -0
- connectonion/docs/debug/README.md +18 -0
- connectonion/docs/debug/auto_debug.md +1026 -0
- connectonion/docs/debug/console.md +129 -0
- connectonion/docs/debug/eval-format.md +178 -0
- connectonion/docs/debug/eval.md +230 -0
- connectonion/docs/debug/exceptions.md +307 -0
- connectonion/docs/debug/log.md +117 -0
- connectonion/docs/debug/xray.md +215 -0
- connectonion/docs/design-decisions/001-choosing-input-method.md +202 -0
- connectonion/docs/design-decisions/002-choosing-llm-function-name.md +202 -0
- connectonion/docs/design-decisions/003-choosing-trust-keyword.md +141 -0
- connectonion/docs/design-decisions/004-cli-create-flow.md +117 -0
- connectonion/docs/design-decisions/005-designing-agent-network-protocol.md +503 -0
- connectonion/docs/design-decisions/006-agent-address-format.md +305 -0
- connectonion/docs/design-decisions/007-authentication-backend-design.md +240 -0
- connectonion/docs/design-decisions/008-naming-is-hard.md +228 -0
- connectonion/docs/design-decisions/009-why-connect-function.md +167 -0
- connectonion/docs/design-decisions/010-cli-ux-progressive-disclosure.md +176 -0
- connectonion/docs/design-decisions/011-global-config-identity-management.md +357 -0
- connectonion/docs/design-decisions/012-tool-execution-separation.md +259 -0
- connectonion/docs/design-decisions/013-debug-and-logging-design.md +253 -0
- connectonion/docs/design-decisions/014-hook-system-design.md +510 -0
- connectonion/docs/design-decisions/015-interactive-auto-debug-design.md +837 -0
- connectonion/docs/design-decisions/016-why-no-zero-knowledge-proofs.md +358 -0
- connectonion/docs/design-decisions/017-session-logging-and-eval-format.md +120 -0
- connectonion/docs/design-decisions/018-event-api-naming.md +274 -0
- connectonion/docs/design-decisions/019-agent-lifecycle-design.md +655 -0
- connectonion/docs/design-decisions/020-trust-system-and-network-architecture.md +503 -0
- connectonion/docs/design-decisions/021-task-storage-jsonl-design.md +496 -0
- connectonion/docs/design-decisions/022-raw-asgi-implementation.md +273 -0
- connectonion/docs/examples.md +0 -0
- connectonion/docs/hook-system-options.md +364 -0
- connectonion/docs/integrations/README.md +12 -0
- connectonion/docs/integrations/auth.md +450 -0
- connectonion/docs/integrations/google.md +431 -0
- connectonion/docs/integrations/microsoft.md +370 -0
- connectonion/docs/network/README.md +14 -0
- connectonion/docs/network/connect.md +629 -0
- connectonion/docs/network/deploy.md +124 -0
- connectonion/docs/network/host.md +1087 -0
- connectonion/docs/network/io.md +538 -0
- connectonion/docs/network/protocol/agent-relay-protocol.md +495 -0
- connectonion/docs/network/protocol/announce-message.md +115 -0
- connectonion/docs/principles.md +124 -0
- connectonion/docs/quickstart.md +261 -0
- connectonion/docs/roadmap.md +81 -0
- connectonion/docs/templates/README.md +77 -0
- connectonion/docs/templates/meta-agent.md +152 -0
- connectonion/docs/templates/minimal.md +105 -0
- connectonion/docs/templates/playwright.md +130 -0
- connectonion/docs/templates/web-research.md +144 -0
- connectonion/docs/tui/README.md +95 -0
- connectonion/docs/tui/chat.md +181 -0
- connectonion/docs/tui/divider.md +63 -0
- connectonion/docs/tui/dropdown.md +83 -0
- connectonion/docs/tui/footer.md +44 -0
- connectonion/docs/tui/fuzzy.md +68 -0
- connectonion/docs/tui/input.md +84 -0
- connectonion/docs/tui/keys.md +77 -0
- connectonion/docs/tui/pick.md +71 -0
- connectonion/docs/tui/providers.md +89 -0
- connectonion/docs/tui/status_bar.md +67 -0
- connectonion/docs/useful_plugins/README.md +160 -0
- connectonion/docs/useful_plugins/calendar_plugin.md +68 -0
- connectonion/docs/useful_plugins/eval.md +89 -0
- connectonion/docs/useful_plugins/gmail_plugin.md +68 -0
- connectonion/docs/useful_plugins/image_result_formatter.md +74 -0
- connectonion/docs/useful_plugins/re_act.md +86 -0
- connectonion/docs/useful_plugins/shell_approval.md +69 -0
- connectonion/docs/useful_plugins/system_reminder.md +210 -0
- connectonion/docs/useful_prompts/README.md +127 -0
- connectonion/docs/useful_prompts/coding_agent.md +214 -0
- connectonion/docs/useful_tools/README.md +81 -0
- connectonion/docs/useful_tools/ask_user.md +103 -0
- connectonion/docs/useful_tools/diff_writer.md +158 -0
- connectonion/docs/useful_tools/get_emails.md +519 -0
- connectonion/docs/useful_tools/gmail.md +155 -0
- connectonion/docs/useful_tools/google_calendar.md +126 -0
- connectonion/docs/useful_tools/memory.md +506 -0
- connectonion/docs/useful_tools/microsoft_calendar.md +126 -0
- connectonion/docs/useful_tools/outlook.md +140 -0
- connectonion/docs/useful_tools/send_email.md +423 -0
- connectonion/docs/useful_tools/shell.md +115 -0
- connectonion/docs/useful_tools/slash_command.md +116 -0
- connectonion/docs/useful_tools/terminal.md +115 -0
- connectonion/docs/useful_tools/todo_list.md +272 -0
- connectonion/docs/useful_tools/web_fetch.md +150 -0
- connectonion/docs/vibe-coding-guide.md +97 -0
- connectonion/docs/windows-support.md +258 -0
- connectonion/logger.py +3 -3
- connectonion/network/__init__.py +19 -6
- connectonion/network/asgi/__init__.py +81 -0
- connectonion/network/asgi/http.py +205 -0
- connectonion/network/asgi/websocket.py +217 -0
- connectonion/network/connect.py +232 -185
- connectonion/network/host/__init__.py +59 -0
- connectonion/network/host/auth.py +191 -0
- connectonion/network/host/routes.py +135 -0
- connectonion/network/host/server.py +289 -0
- connectonion/network/host/session.py +78 -0
- connectonion/network/io/__init__.py +21 -0
- connectonion/network/{connection.py → io/base.py} +17 -42
- connectonion/network/io/websocket.py +55 -0
- connectonion/network/relay.py +37 -16
- connectonion/network/trust/__init__.py +30 -0
- connectonion/network/trust/factory.py +138 -0
- connectonion/network/{trust_agents.py → trust/prompts.py} +3 -3
- connectonion/network/{trust_functions.py → trust/tools.py} +2 -2
- connectonion/prompt_files/__init__.py +11 -1
- connectonion/prompt_files/react_acknowledge.md +26 -0
- connectonion/prompts.py +10 -1
- connectonion/tui/chat.py +10 -1
- connectonion/tui/divider.py +10 -1
- connectonion/tui/dropdown.py +10 -1
- connectonion/tui/footer.py +8 -0
- connectonion/tui/fuzzy.py +11 -1
- connectonion/tui/input.py +118 -70
- connectonion/tui/keys.py +133 -6
- connectonion/tui/providers.py +11 -1
- connectonion/tui/status_bar.py +10 -1
- connectonion/useful_events_handlers/__init__.py +8 -0
- connectonion/useful_events_handlers/reflect.py +19 -4
- connectonion/useful_plugins/__init__.py +2 -1
- connectonion/useful_plugins/eval.py +2 -2
- connectonion/useful_plugins/gmail_plugin.py +3 -3
- connectonion/useful_plugins/image_result_formatter.py +3 -3
- connectonion/useful_plugins/re_act.py +114 -28
- connectonion/useful_plugins/shell_approval.py +2 -2
- connectonion/useful_plugins/system_reminder.py +103 -0
- connectonion/useful_plugins/ui_stream.py +18 -133
- connectonion/useful_prompts/README.md +61 -0
- connectonion/useful_prompts/__init__.py +45 -0
- connectonion/useful_prompts/coding_agent/README.md +106 -0
- connectonion/useful_prompts/coding_agent/assembler.py +123 -0
- connectonion/useful_prompts/coding_agent/prompts/main.md +227 -0
- connectonion/useful_prompts/coding_agent/prompts/tools/ask_user.md +61 -0
- connectonion/useful_prompts/coding_agent/prompts/tools/background.md +57 -0
- connectonion/useful_prompts/coding_agent/prompts/tools/edit.md +90 -0
- connectonion/useful_prompts/coding_agent/prompts/tools/glob.md +52 -0
- connectonion/useful_prompts/coding_agent/prompts/tools/grep.md +55 -0
- connectonion/useful_prompts/coding_agent/prompts/tools/plan_mode.md +80 -0
- connectonion/useful_prompts/coding_agent/prompts/tools/read.md +40 -0
- connectonion/useful_prompts/coding_agent/prompts/tools/shell.md +67 -0
- connectonion/useful_prompts/coding_agent/prompts/tools/task.md +51 -0
- connectonion/useful_prompts/coding_agent/prompts/tools/todo.md +139 -0
- connectonion/useful_prompts/coding_agent/prompts/tools/write.md +48 -0
- connectonion/useful_prompts/system-reminders/security-warning.md +14 -0
- connectonion/useful_prompts/system-reminders/test-reminder.md +11 -0
- connectonion/useful_tools/__init__.py +31 -4
- connectonion/useful_tools/ask_user.py +35 -0
- connectonion/useful_tools/bash.py +69 -0
- connectonion/useful_tools/diff_writer.py +186 -94
- connectonion/useful_tools/edit.py +102 -0
- connectonion/useful_tools/glob_files.py +97 -0
- connectonion/useful_tools/grep_files.py +171 -0
- connectonion/useful_tools/multi_edit.py +116 -0
- connectonion/useful_tools/read_file.py +73 -0
- connectonion/useful_tools/shell.py +50 -45
- connectonion/useful_tools/write_file.py +129 -0
- {connectonion-0.6.2.dist-info → connectonion-0.6.3.dist-info}/METADATA +10 -3
- connectonion-0.6.3.dist-info/RECORD +469 -0
- connectonion/network/asgi.py +0 -407
- connectonion/network/host.py +0 -616
- connectonion/network/trust.py +0 -166
- connectonion-0.6.2.dist-info/RECORD +0 -129
- /connectonion/cli/{docs → co_ai/prompts/connectonion}/connectonion.md +0 -0
- {connectonion-0.6.2.dist-info → connectonion-0.6.3.dist-info}/WHEEL +0 -0
- {connectonion-0.6.2.dist-info → connectonion-0.6.3.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,358 @@
|
|
|
1
|
+
# Design Decision: Why ConnectOnion Doesn't Use Zero-Knowledge Proofs
|
|
2
|
+
|
|
3
|
+
*Date: 2025-11-18*
|
|
4
|
+
*Status: Decided - Not Using (For Now)*
|
|
5
|
+
*Decision: Use simpler trust mechanisms instead of Zero-Knowledge Proofs*
|
|
6
|
+
|
|
7
|
+
## The Question
|
|
8
|
+
|
|
9
|
+
As we designed the multi-agent trust system, we explored Zero-Knowledge Proofs (ZKPs): Could agents cryptographically prove their capabilities, reputation, or correct execution without revealing their secrets?
|
|
10
|
+
|
|
11
|
+
The idea was compelling. But after deep analysis, we decided against it.
|
|
12
|
+
|
|
13
|
+
## What Are Zero-Knowledge Proofs?
|
|
14
|
+
|
|
15
|
+
ZKPs let you prove a statement is true without revealing why it's true.
|
|
16
|
+
|
|
17
|
+
**Traditional proof:**
|
|
18
|
+
```
|
|
19
|
+
"I have 1000 successful task completions"
|
|
20
|
+
→ Shows all 1000 task records
|
|
21
|
+
→ Verifier sees everything
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
**Zero-knowledge proof:**
|
|
25
|
+
```
|
|
26
|
+
"I have 1000 successful task completions"
|
|
27
|
+
→ Shows cryptographic proof (200 bytes)
|
|
28
|
+
→ Verifier confirms it's true
|
|
29
|
+
→ Verifier learns nothing about the actual tasks
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
**The magic:** Mathematical certainty without information leakage.
|
|
33
|
+
|
|
34
|
+
## Why We Considered ZKPs
|
|
35
|
+
|
|
36
|
+
### Use Case 1: Privacy-Preserving Reputation
|
|
37
|
+
```python
|
|
38
|
+
# Agent B proves reputation without revealing clients
|
|
39
|
+
agent.prove_reputation(min_tasks=1000, min_success_rate=0.95)
|
|
40
|
+
# Verifier confirms: "Yes, they have good reputation"
|
|
41
|
+
# Verifier doesn't learn: which clients, what tasks, when
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
### Use Case 2: Verifiable Computation
|
|
45
|
+
```python
|
|
46
|
+
# Agent B proves correct execution without revealing inputs
|
|
47
|
+
result = agent.process_data(private_dataset)
|
|
48
|
+
proof = agent.generate_execution_proof()
|
|
49
|
+
# Verifier confirms: "Yes, computation was correct"
|
|
50
|
+
# Verifier doesn't learn: what the dataset was
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### Use Case 3: Capability Proving
|
|
54
|
+
```python
|
|
55
|
+
# Agent proves it has specific tools without revealing implementation
|
|
56
|
+
agent.prove_capability("secure_data_processing")
|
|
57
|
+
# Verifier confirms: "Yes, they can do it"
|
|
58
|
+
# Verifier doesn't learn: how it's implemented
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
This aligns with our philosophy: **"Behavior over identity"** - prove trustworthiness through cryptographic evidence of past behavior.
|
|
62
|
+
|
|
63
|
+
## Why We Decided Against ZKPs
|
|
64
|
+
|
|
65
|
+
### Problem 1: AI Agents Are Non-Deterministic
|
|
66
|
+
|
|
67
|
+
ZKPs work brilliantly for deterministic computations:
|
|
68
|
+
|
|
69
|
+
```python
|
|
70
|
+
# Deterministic - ZKP works perfectly
|
|
71
|
+
Input: [1, 2, 3, 4, 5]
|
|
72
|
+
Function: sum()
|
|
73
|
+
Output: 15
|
|
74
|
+
Every execution: identical
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
But AI agents are fundamentally different:
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
# Non-deterministic - ZKP struggles
|
|
81
|
+
Input: "Analyze this data"
|
|
82
|
+
Agent execution:
|
|
83
|
+
Run 1: LLM → Tool A → Tool B → Result
|
|
84
|
+
Run 2: LLM → Tool C → Tool A → Tool B → Result
|
|
85
|
+
Run 3: LLM → Tool A → Result
|
|
86
|
+
Every execution: different path
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
**The question:** What does it mean to "prove correct execution" when there's no single correct path?
|
|
90
|
+
|
|
91
|
+
### Problem 2: Cannot Prevent Model Switching
|
|
92
|
+
|
|
93
|
+
The most common "cheat" in agent systems: advertising GPT-4, delivering GPT-3.5.
|
|
94
|
+
|
|
95
|
+
**What ZKP can prove:**
|
|
96
|
+
- "I processed the input and produced this output"
|
|
97
|
+
- "I called N tools during execution"
|
|
98
|
+
- "I completed the task"
|
|
99
|
+
|
|
100
|
+
**What ZKP cannot prove:**
|
|
101
|
+
- Which model was actually used (GPT-4 vs GPT-3.5)
|
|
102
|
+
- What system prompt was used
|
|
103
|
+
- What parameters were configured
|
|
104
|
+
- Whether quality degraded between demo and production
|
|
105
|
+
|
|
106
|
+
The LLM API call is a black box. The proof can only cover what happens before and after the API call, not what happens inside it.
|
|
107
|
+
|
|
108
|
+
### Problem 3: Cannot Verify Prompts
|
|
109
|
+
|
|
110
|
+
**Promised configuration:**
|
|
111
|
+
```python
|
|
112
|
+
system_prompt = """
|
|
113
|
+
You are an expert data analyst.
|
|
114
|
+
Use advanced statistical methods.
|
|
115
|
+
Provide detailed explanations.
|
|
116
|
+
Always cite sources.
|
|
117
|
+
"""
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
**Actual configuration:**
|
|
121
|
+
```python
|
|
122
|
+
system_prompt = "Analyze quickly."
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
ZKP could prove "I used some prompt," but not "I used this specific prompt." The agent's internal state is opaque.
|
|
126
|
+
|
|
127
|
+
### Problem 4: Violates "Keep Simple Things Simple"
|
|
128
|
+
|
|
129
|
+
**Complexity cost:**
|
|
130
|
+
- ZKP libraries: Complex dependencies (bellman, arkworks, circom)
|
|
131
|
+
- Circuit design: Requires cryptography expertise
|
|
132
|
+
- Proof generation: 100ms - 10s per proof
|
|
133
|
+
- Developer experience: Steep learning curve
|
|
134
|
+
- Debugging: "Why did my proof fail?" is much harder than "Why did my function fail?"
|
|
135
|
+
|
|
136
|
+
**For what benefit?**
|
|
137
|
+
- Privacy: Most agent interactions don't require cryptographic privacy
|
|
138
|
+
- Trust: Behavior-based reputation is simpler and more intuitive
|
|
139
|
+
- Verification: Random sampling catches cheaters at 1/100th the complexity
|
|
140
|
+
|
|
141
|
+
This is classic over-engineering.
|
|
142
|
+
|
|
143
|
+
## What We Use Instead
|
|
144
|
+
|
|
145
|
+
### Solution 1: Behavior-Based Reputation (Primary)
|
|
146
|
+
|
|
147
|
+
```python
|
|
148
|
+
class AgentReputation:
|
|
149
|
+
def __init__(self):
|
|
150
|
+
self.completed_tasks = 0
|
|
151
|
+
self.quality_scores = []
|
|
152
|
+
self.user_ratings = []
|
|
153
|
+
|
|
154
|
+
def record_task(self, success: bool, quality: float):
|
|
155
|
+
self.completed_tasks += 1
|
|
156
|
+
self.quality_scores.append(quality)
|
|
157
|
+
|
|
158
|
+
def get_reputation(self):
|
|
159
|
+
return {
|
|
160
|
+
"total_tasks": self.completed_tasks,
|
|
161
|
+
"avg_quality": mean(self.quality_scores),
|
|
162
|
+
"success_rate": sum(self.quality_scores) / len(self.quality_scores)
|
|
163
|
+
}
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
**Why this works:**
|
|
167
|
+
- Simple to implement and understand
|
|
168
|
+
- Directly measures what users care about (quality)
|
|
169
|
+
- Can't be gamed without actually delivering quality
|
|
170
|
+
- Aligns with "behavior over identity"
|
|
171
|
+
|
|
172
|
+
### Solution 2: Random Sampling + Economic Incentives
|
|
173
|
+
|
|
174
|
+
```python
|
|
175
|
+
def verify_quality(task_result, agent):
|
|
176
|
+
# 10% random verification
|
|
177
|
+
if random.random() < 0.1:
|
|
178
|
+
expected = rerun_with_reference_implementation(task)
|
|
179
|
+
actual = task_result
|
|
180
|
+
similarity = compare(expected, actual)
|
|
181
|
+
|
|
182
|
+
if similarity < 0.8:
|
|
183
|
+
# Penalize severely
|
|
184
|
+
agent.reputation -= 100
|
|
185
|
+
agent.deposit -= 1000
|
|
186
|
+
agent.suspend(days=30)
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
**Economic calculation:**
|
|
190
|
+
```
|
|
191
|
+
Cheat profit: Save $0.01/task × 1000 tasks = $10
|
|
192
|
+
Cheat risk: 10% detection × $1000 penalty = $100 expected loss
|
|
193
|
+
Rational choice: Don't cheat
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
### Solution 3: Trust Through Transparency (Not Cryptography)
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
class AgentCommitment:
|
|
200
|
+
"""Agent publicly commits to configuration"""
|
|
201
|
+
|
|
202
|
+
def __init__(self):
|
|
203
|
+
self.model = "gpt-4"
|
|
204
|
+
self.min_quality_threshold = 0.9
|
|
205
|
+
self.system_prompt_hash = hash(system_prompt)
|
|
206
|
+
|
|
207
|
+
def sign_commitment(self):
|
|
208
|
+
# Sign with private key
|
|
209
|
+
return sign(
|
|
210
|
+
model=self.model,
|
|
211
|
+
quality=self.min_quality_threshold,
|
|
212
|
+
prompt_hash=self.system_prompt_hash,
|
|
213
|
+
private_key=self.private_key
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
def verify_commitment(self, task_id):
|
|
217
|
+
# Selected tasks: reveal actual configuration
|
|
218
|
+
# Platform compares to signed commitment
|
|
219
|
+
pass
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
**This provides:**
|
|
223
|
+
- Accountability without cryptographic complexity
|
|
224
|
+
- Selective disclosure (only when challenged)
|
|
225
|
+
- Simple signature verification (not ZKP)
|
|
226
|
+
|
|
227
|
+
### Solution 4: Trust Levels (Already Implemented)
|
|
228
|
+
|
|
229
|
+
We already have a working trust system:
|
|
230
|
+
|
|
231
|
+
```python
|
|
232
|
+
# Development - trust everything
|
|
233
|
+
agent = Agent("worker", trust="open")
|
|
234
|
+
|
|
235
|
+
# Production - verify behavior
|
|
236
|
+
agent = Agent("worker", trust="careful")
|
|
237
|
+
|
|
238
|
+
# High-stakes - maximum verification
|
|
239
|
+
agent = Agent("worker", trust="strict")
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
This is pragmatic, understandable, and sufficient for 99% of use cases.
|
|
243
|
+
|
|
244
|
+
## When We Might Reconsider ZKPs
|
|
245
|
+
|
|
246
|
+
We'll revisit ZKPs if we face these specific scenarios:
|
|
247
|
+
|
|
248
|
+
### Scenario 1: Enterprise Privacy Requirements
|
|
249
|
+
```
|
|
250
|
+
Client: "We need cryptographic proof that agents processed our data correctly,
|
|
251
|
+
but we cannot reveal the data even to verifiers."
|
|
252
|
+
Current solution: Trust + audit logs
|
|
253
|
+
ZKP solution: Cryptographic proof without data disclosure
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
### Scenario 2: Competitive Agent Marketplace
|
|
257
|
+
```
|
|
258
|
+
Agent providers: "We want to prove our capabilities without revealing our
|
|
259
|
+
implementation details to competitors."
|
|
260
|
+
Current solution: Behavioral reputation
|
|
261
|
+
ZKP solution: Capability proofs without code disclosure
|
|
262
|
+
```
|
|
263
|
+
|
|
264
|
+
### Scenario 3: Regulatory Compliance
|
|
265
|
+
```
|
|
266
|
+
Regulator: "You must prove AI agents executed correctly,
|
|
267
|
+
with mathematical certainty, while preserving user privacy."
|
|
268
|
+
Current solution: Audit logs + sampling
|
|
269
|
+
ZKP solution: Cryptographic execution proofs
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
### Scenario 4: Verifiable Deterministic Components
|
|
273
|
+
|
|
274
|
+
If we add deterministic data processing that genuinely benefits from ZKP:
|
|
275
|
+
|
|
276
|
+
```python
|
|
277
|
+
# Example: Privacy-preserving data aggregation
|
|
278
|
+
def aggregate_sensitive_data(datasets):
|
|
279
|
+
# Prove: "Total count = 10,000"
|
|
280
|
+
# Without revealing individual dataset sizes
|
|
281
|
+
pass
|
|
282
|
+
```
|
|
283
|
+
|
|
284
|
+
## The Pragmatic Path
|
|
285
|
+
|
|
286
|
+
**Phase 1: Now (Behavior + Economics)**
|
|
287
|
+
- Reputation system based on actual task quality
|
|
288
|
+
- Random verification with economic penalties
|
|
289
|
+
- Public commitments with signature verification
|
|
290
|
+
- Trust levels (open/careful/strict)
|
|
291
|
+
|
|
292
|
+
**Phase 2: 6-12 Months (If Demand Emerges)**
|
|
293
|
+
- Add optional ZKP for specific deterministic components
|
|
294
|
+
- Keep it opt-in, not required
|
|
295
|
+
- Use existing libraries (don't build crypto from scratch)
|
|
296
|
+
|
|
297
|
+
**Phase 3: 12+ Months (If Critical Need)**
|
|
298
|
+
- Full ZKP support for privacy-preserving verification
|
|
299
|
+
- Only if: Real customer demand + regulatory requirement
|
|
300
|
+
|
|
301
|
+
## Lessons Learned
|
|
302
|
+
|
|
303
|
+
### 1. Fancy Technology ≠ Better Solution
|
|
304
|
+
ZKPs are mathematically beautiful. They're also overkill for our problem.
|
|
305
|
+
|
|
306
|
+
### 2. Match Tool to Problem
|
|
307
|
+
- ZKP is perfect for: Deterministic computation + privacy requirement
|
|
308
|
+
- ZKP is wrong for: Non-deterministic AI agents + no privacy requirement
|
|
309
|
+
|
|
310
|
+
### 3. Economics > Cryptography (For Trust)
|
|
311
|
+
Making cheating unprofitable is simpler and more effective than making cheating cryptographically impossible.
|
|
312
|
+
|
|
313
|
+
### 4. Behavior > Proof
|
|
314
|
+
Users don't care about cryptographic proofs. They care about: "Does this agent deliver good results?" Behavioral reputation answers that directly.
|
|
315
|
+
|
|
316
|
+
### 5. Simplicity Wins
|
|
317
|
+
A simple system people actually use beats a sophisticated system nobody understands.
|
|
318
|
+
|
|
319
|
+
## The ConnectOnion Philosophy
|
|
320
|
+
|
|
321
|
+
This decision reflects our core principles:
|
|
322
|
+
|
|
323
|
+
**"Keep simple things simple"**
|
|
324
|
+
- Trust through behavior: Simple
|
|
325
|
+
- Trust through ZKP circuits: Complex
|
|
326
|
+
|
|
327
|
+
**"Make complicated things possible"**
|
|
328
|
+
- We can add ZKP later if truly needed
|
|
329
|
+
- Architecture doesn't prevent it
|
|
330
|
+
|
|
331
|
+
**"Behavior over identity"**
|
|
332
|
+
- Behavioral reputation directly implements this
|
|
333
|
+
- ZKP would add indirection without benefit
|
|
334
|
+
|
|
335
|
+
## Conclusion
|
|
336
|
+
|
|
337
|
+
Zero-Knowledge Proofs are powerful cryptographic tools. For privacy-preserving verification of deterministic computation, they're unmatched.
|
|
338
|
+
|
|
339
|
+
But ConnectOnion agents are:
|
|
340
|
+
- Non-deterministic (LLM-based)
|
|
341
|
+
- Primarily concerned with quality (not privacy)
|
|
342
|
+
- Used by developers who prefer simplicity
|
|
343
|
+
|
|
344
|
+
For these requirements, behavioral reputation + economic incentives + random verification is the right solution.
|
|
345
|
+
|
|
346
|
+
We're not saying "never." We're saying "not now, and not without clear need."
|
|
347
|
+
|
|
348
|
+
**When cryptography becomes necessary, we'll add it. Until then, we'll keep it simple.**
|
|
349
|
+
|
|
350
|
+
---
|
|
351
|
+
|
|
352
|
+
*"The best solution is the simplest one that solves the actual problem, not the most sophisticated one that solves a theoretical problem."*
|
|
353
|
+
|
|
354
|
+
## Further Reading
|
|
355
|
+
|
|
356
|
+
- [Trust System Design](./003-choosing-trust-keyword.md)
|
|
357
|
+
- [Agent Network Protocol](./004-designing-agent-network-protocol.md)
|
|
358
|
+
- [Behavioral Trust in Action](../trust.md)
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# Session Logging and Eval Format
|
|
2
|
+
|
|
3
|
+
## Problem
|
|
4
|
+
|
|
5
|
+
When developing AI agents:
|
|
6
|
+
1. Agent runs are slow (LLM calls take time)
|
|
7
|
+
2. We run the same prompt many times during development
|
|
8
|
+
3. We need to compare: "after changing this prompt, what's different?"
|
|
9
|
+
4. We need regression testing: "does new prompt still work for old cases?"
|
|
10
|
+
|
|
11
|
+
## Requirements
|
|
12
|
+
|
|
13
|
+
1. **Log** - Save full session (inputs, tool calls, results, messages)
|
|
14
|
+
2. **Replay** - Load saved messages to restore context
|
|
15
|
+
3. **Eval** - Mark expected behavior, LLM judges if it matches
|
|
16
|
+
4. **Edit** - Developers can view/edit in VS Code
|
|
17
|
+
|
|
18
|
+
## Format Decision: YAML with JSON String for Messages
|
|
19
|
+
|
|
20
|
+
```yaml
|
|
21
|
+
name: gmail_agent
|
|
22
|
+
timestamp: 2024-11-27 11:39:58
|
|
23
|
+
|
|
24
|
+
turns:
|
|
25
|
+
- input: "check my emails"
|
|
26
|
+
model: "gemini-2.5-pro"
|
|
27
|
+
duration_ms: 11200
|
|
28
|
+
tokens: 1234
|
|
29
|
+
cost: 0.01
|
|
30
|
+
tools_called: [get_emails]
|
|
31
|
+
result: "You have 3 emails"
|
|
32
|
+
messages: '[{"role":"system","content":"..."},{"role":"user","content":"check my emails"},{"role":"assistant","content":"You have 3 emails"}]'
|
|
33
|
+
eval:
|
|
34
|
+
expect_tools: [get_emails]
|
|
35
|
+
expect_result: "shows email list"
|
|
36
|
+
|
|
37
|
+
- input: "reply to first saying thanks"
|
|
38
|
+
model: "gemini-2.5-pro"
|
|
39
|
+
duration_ms: 8500
|
|
40
|
+
tokens: 2345
|
|
41
|
+
cost: 0.02
|
|
42
|
+
tools_called: [send_email]
|
|
43
|
+
result: "Reply sent"
|
|
44
|
+
messages: '[{"role":"system","content":"..."},{"role":"user","content":"reply to first saying thanks"},{"role":"assistant","content":"Reply sent"}]'
|
|
45
|
+
eval:
|
|
46
|
+
expect_tools: [send_email]
|
|
47
|
+
expect_result: "reply sent"
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
## Why This Format
|
|
51
|
+
|
|
52
|
+
**YAML** for readable fields (input, tools, result, cost)
|
|
53
|
+
|
|
54
|
+
**JSON string** for messages:
|
|
55
|
+
- Won't break YAML (prompts with colons, quotes, etc.)
|
|
56
|
+
- One line = simple to parse
|
|
57
|
+
- Already JSON in code, easy to `json.loads()`
|
|
58
|
+
|
|
59
|
+
**One turn = one user input**:
|
|
60
|
+
- System message + user input + tool calls + final response
|
|
61
|
+
- No second user input within a turn
|
|
62
|
+
- Second user input = next turn
|
|
63
|
+
|
|
64
|
+
## Field Reference
|
|
65
|
+
|
|
66
|
+
| Field | Type | Description |
|
|
67
|
+
|-------|------|-------------|
|
|
68
|
+
| `input` | string | User input for this turn |
|
|
69
|
+
| `model` | string | LLM model used |
|
|
70
|
+
| `duration_ms` | int | How long the turn took |
|
|
71
|
+
| `tokens` | int | Total tokens used |
|
|
72
|
+
| `cost` | float | Cost in USD |
|
|
73
|
+
| `tools_called` | list | Tools that were called |
|
|
74
|
+
| `result` | string | Agent's final response |
|
|
75
|
+
| `messages` | JSON string | Message context window |
|
|
76
|
+
| `eval` | object | Optional expectations |
|
|
77
|
+
| `eval.expect_tools` | list | Expected tools |
|
|
78
|
+
| `eval.expect_result` | string | Expected result description |
|
|
79
|
+
|
|
80
|
+
## File Structure
|
|
81
|
+
|
|
82
|
+
```
|
|
83
|
+
.co/
|
|
84
|
+
sessions/
|
|
85
|
+
gmail_agent_2024-11-27_11-39-58.yaml # Auto-saved
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## Workflow
|
|
89
|
+
|
|
90
|
+
1. Run agent → auto-saves to `.co/evals/`
|
|
91
|
+
2. Add `eval:` to turns you want to test
|
|
92
|
+
3. Change prompt → run evals
|
|
93
|
+
4. LLM judges: did tools match? Is result similar?
|
|
94
|
+
|
|
95
|
+
## Implementation
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
# Save
|
|
99
|
+
turn = {
|
|
100
|
+
'input': user_input,
|
|
101
|
+
'model': self.llm.model,
|
|
102
|
+
'duration_ms': duration,
|
|
103
|
+
'tokens': usage.input_tokens + usage.output_tokens,
|
|
104
|
+
'cost': usage.cost,
|
|
105
|
+
'tools_called': [tc.name for tc in tool_calls],
|
|
106
|
+
'result': response,
|
|
107
|
+
'messages': json.dumps(messages),
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
# Load
|
|
111
|
+
session = yaml.safe_load(file)
|
|
112
|
+
messages = json.loads(turn['messages'])
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
## Alternatives Rejected
|
|
116
|
+
|
|
117
|
+
- **JSONL** - Crash-unsafe, hard to read
|
|
118
|
+
- **Pure YAML** - Prompts break format (colons, quotes)
|
|
119
|
+
- **Multi-line JSON** - Unnecessary complexity
|
|
120
|
+
- **Separate message files** - Too complex
|