@myrialabs/clopen 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +6 -0
- package/.github/workflows/release.yml +60 -0
- package/.github/workflows/test.yml +40 -0
- package/CONTRIBUTING.md +499 -0
- package/LICENSE +21 -0
- package/README.md +209 -0
- package/backend/index.ts +156 -0
- package/backend/lib/chat/helpers.ts +42 -0
- package/backend/lib/chat/index.ts +2 -0
- package/backend/lib/chat/stream-manager.ts +1126 -0
- package/backend/lib/database/README.md +77 -0
- package/backend/lib/database/index.ts +119 -0
- package/backend/lib/database/migrations/001_create_projects_table.ts +31 -0
- package/backend/lib/database/migrations/002_create_chat_sessions_table.ts +33 -0
- package/backend/lib/database/migrations/003_create_messages_table.ts +32 -0
- package/backend/lib/database/migrations/004_create_prompt_templates_table.ts +34 -0
- package/backend/lib/database/migrations/005_create_settings_table.ts +24 -0
- package/backend/lib/database/migrations/006_add_user_to_messages.ts +58 -0
- package/backend/lib/database/migrations/007_create_stream_states_table.ts +41 -0
- package/backend/lib/database/migrations/008_create_message_snapshots_table.ts +62 -0
- package/backend/lib/database/migrations/009_add_delta_snapshot_fields.ts +41 -0
- package/backend/lib/database/migrations/010_add_soft_delete_and_branch_support.ts +70 -0
- package/backend/lib/database/migrations/011_git_like_commit_graph.ts +156 -0
- package/backend/lib/database/migrations/012_add_file_change_statistics.ts +41 -0
- package/backend/lib/database/migrations/013_checkpoint_tree_state.ts +118 -0
- package/backend/lib/database/migrations/014_add_engine_to_sessions.ts +18 -0
- package/backend/lib/database/migrations/015_add_model_to_sessions.ts +18 -0
- package/backend/lib/database/migrations/016_create_user_projects_table.ts +34 -0
- package/backend/lib/database/migrations/017_add_current_session_to_user_projects.ts +32 -0
- package/backend/lib/database/migrations/018_create_claude_accounts_table.ts +24 -0
- package/backend/lib/database/migrations/019_add_claude_account_to_sessions.ts +18 -0
- package/backend/lib/database/migrations/020_add_snapshot_tree_hash.ts +32 -0
- package/backend/lib/database/migrations/021_drop_prompt_templates_table.ts +33 -0
- package/backend/lib/database/migrations/index.ts +154 -0
- package/backend/lib/database/queries/checkpoint-queries.ts +87 -0
- package/backend/lib/database/queries/engine-queries.ts +75 -0
- package/backend/lib/database/queries/index.ts +9 -0
- package/backend/lib/database/queries/message-queries.ts +472 -0
- package/backend/lib/database/queries/project-queries.ts +117 -0
- package/backend/lib/database/queries/session-queries.ts +271 -0
- package/backend/lib/database/queries/settings-queries.ts +34 -0
- package/backend/lib/database/queries/snapshot-queries.ts +326 -0
- package/backend/lib/database/queries/utils-queries.ts +59 -0
- package/backend/lib/database/seeders/index.ts +13 -0
- package/backend/lib/database/seeders/settings_seeder.ts +84 -0
- package/backend/lib/database/utils/connection.ts +174 -0
- package/backend/lib/database/utils/index.ts +4 -0
- package/backend/lib/database/utils/migration-runner.ts +118 -0
- package/backend/lib/database/utils/seeder-runner.ts +121 -0
- package/backend/lib/engine/adapters/claude/environment.ts +164 -0
- package/backend/lib/engine/adapters/claude/error-handler.ts +60 -0
- package/backend/lib/engine/adapters/claude/index.ts +1 -0
- package/backend/lib/engine/adapters/claude/path-utils.ts +38 -0
- package/backend/lib/engine/adapters/claude/stream.ts +177 -0
- package/backend/lib/engine/adapters/opencode/index.ts +2 -0
- package/backend/lib/engine/adapters/opencode/message-converter.ts +862 -0
- package/backend/lib/engine/adapters/opencode/server.ts +104 -0
- package/backend/lib/engine/adapters/opencode/stream.ts +755 -0
- package/backend/lib/engine/index.ts +196 -0
- package/backend/lib/engine/types.ts +58 -0
- package/backend/lib/files/file-operations.ts +478 -0
- package/backend/lib/files/file-reading.ts +308 -0
- package/backend/lib/files/file-watcher.ts +383 -0
- package/backend/lib/files/path-browsing.ts +382 -0
- package/backend/lib/git/git-executor.ts +88 -0
- package/backend/lib/git/git-parser.ts +411 -0
- package/backend/lib/git/git-service.ts +505 -0
- package/backend/lib/mcp/README.md +1144 -0
- package/backend/lib/mcp/config.ts +316 -0
- package/backend/lib/mcp/index.ts +35 -0
- package/backend/lib/mcp/project-context.ts +236 -0
- package/backend/lib/mcp/servers/browser-automation/actions.ts +156 -0
- package/backend/lib/mcp/servers/browser-automation/browser.ts +419 -0
- package/backend/lib/mcp/servers/browser-automation/index.ts +791 -0
- package/backend/lib/mcp/servers/browser-automation/inspection.ts +501 -0
- package/backend/lib/mcp/servers/helper.ts +143 -0
- package/backend/lib/mcp/servers/index.ts +45 -0
- package/backend/lib/mcp/servers/weather/get-temperature.ts +56 -0
- package/backend/lib/mcp/servers/weather/index.ts +31 -0
- package/backend/lib/mcp/stdio-server.ts +103 -0
- package/backend/lib/mcp/types.ts +65 -0
- package/backend/lib/preview/browser/browser-audio-capture.ts +86 -0
- package/backend/lib/preview/browser/browser-console-manager.ts +263 -0
- package/backend/lib/preview/browser/browser-dialog-handler.ts +222 -0
- package/backend/lib/preview/browser/browser-interaction-handler.ts +421 -0
- package/backend/lib/preview/browser/browser-mcp-control.ts +415 -0
- package/backend/lib/preview/browser/browser-native-ui-handler.ts +512 -0
- package/backend/lib/preview/browser/browser-navigation-tracker.ts +104 -0
- package/backend/lib/preview/browser/browser-pool.ts +357 -0
- package/backend/lib/preview/browser/browser-preview-service.ts +882 -0
- package/backend/lib/preview/browser/browser-tab-manager.ts +935 -0
- package/backend/lib/preview/browser/browser-video-capture.ts +695 -0
- package/backend/lib/preview/browser/scripts/audio-stream.ts +292 -0
- package/backend/lib/preview/browser/scripts/cursor-tracking.ts +85 -0
- package/backend/lib/preview/browser/scripts/video-stream.ts +438 -0
- package/backend/lib/preview/browser/types.ts +359 -0
- package/backend/lib/preview/index.ts +24 -0
- package/backend/lib/project/index.ts +2 -0
- package/backend/lib/project/status-manager.ts +182 -0
- package/backend/lib/shared/index.ts +2 -0
- package/backend/lib/shared/port-utils.ts +25 -0
- package/backend/lib/shared/process-manager.ts +281 -0
- package/backend/lib/snapshot/blob-store.ts +227 -0
- package/backend/lib/snapshot/gitignore.ts +307 -0
- package/backend/lib/snapshot/helpers.ts +397 -0
- package/backend/lib/snapshot/snapshot-service.ts +483 -0
- package/backend/lib/terminal/helpers.ts +14 -0
- package/backend/lib/terminal/index.ts +8 -0
- package/backend/lib/terminal/pty-manager.ts +4 -0
- package/backend/lib/terminal/pty-session-manager.ts +387 -0
- package/backend/lib/terminal/shell-utils.ts +313 -0
- package/backend/lib/terminal/stream-manager.ts +293 -0
- package/backend/lib/tunnel/global-tunnel-manager.ts +243 -0
- package/backend/lib/tunnel/project-tunnel-manager.ts +311 -0
- package/backend/lib/user/helpers.ts +87 -0
- package/backend/lib/utils/ws.ts +944 -0
- package/backend/lib/vite-dev.ts +353 -0
- package/backend/middleware/cors.ts +15 -0
- package/backend/middleware/error-handler.ts +49 -0
- package/backend/middleware/logger.ts +9 -0
- package/backend/types/api.ts +24 -0
- package/backend/ws/README.md +1505 -0
- package/backend/ws/chat/background.ts +198 -0
- package/backend/ws/chat/index.ts +21 -0
- package/backend/ws/chat/stream.ts +707 -0
- package/backend/ws/engine/claude/accounts.ts +401 -0
- package/backend/ws/engine/claude/index.ts +13 -0
- package/backend/ws/engine/claude/status.ts +43 -0
- package/backend/ws/engine/index.ts +14 -0
- package/backend/ws/engine/opencode/index.ts +11 -0
- package/backend/ws/engine/opencode/status.ts +30 -0
- package/backend/ws/engine/utils.ts +36 -0
- package/backend/ws/files/index.ts +30 -0
- package/backend/ws/files/read.ts +189 -0
- package/backend/ws/files/search.ts +453 -0
- package/backend/ws/files/watch.ts +124 -0
- package/backend/ws/files/write.ts +143 -0
- package/backend/ws/git/branch.ts +106 -0
- package/backend/ws/git/commit.ts +39 -0
- package/backend/ws/git/conflict.ts +68 -0
- package/backend/ws/git/diff.ts +69 -0
- package/backend/ws/git/index.ts +24 -0
- package/backend/ws/git/log.ts +41 -0
- package/backend/ws/git/remote.ts +214 -0
- package/backend/ws/git/staging.ts +84 -0
- package/backend/ws/git/status.ts +90 -0
- package/backend/ws/index.ts +69 -0
- package/backend/ws/mcp/index.ts +61 -0
- package/backend/ws/messages/crud.ts +74 -0
- package/backend/ws/messages/index.ts +14 -0
- package/backend/ws/preview/browser/cleanup.ts +129 -0
- package/backend/ws/preview/browser/console.ts +114 -0
- package/backend/ws/preview/browser/interact.ts +513 -0
- package/backend/ws/preview/browser/mcp.ts +129 -0
- package/backend/ws/preview/browser/native-ui.ts +235 -0
- package/backend/ws/preview/browser/stats.ts +55 -0
- package/backend/ws/preview/browser/tab-info.ts +126 -0
- package/backend/ws/preview/browser/tab.ts +166 -0
- package/backend/ws/preview/browser/webcodecs.ts +293 -0
- package/backend/ws/preview/index.ts +146 -0
- package/backend/ws/projects/crud.ts +113 -0
- package/backend/ws/projects/index.ts +25 -0
- package/backend/ws/projects/presence.ts +46 -0
- package/backend/ws/projects/status.ts +116 -0
- package/backend/ws/sessions/crud.ts +327 -0
- package/backend/ws/sessions/index.ts +33 -0
- package/backend/ws/settings/crud.ts +112 -0
- package/backend/ws/settings/index.ts +14 -0
- package/backend/ws/snapshot/index.ts +17 -0
- package/backend/ws/snapshot/restore.ts +173 -0
- package/backend/ws/snapshot/timeline.ts +141 -0
- package/backend/ws/system/index.ts +14 -0
- package/backend/ws/system/operations.ts +49 -0
- package/backend/ws/terminal/index.ts +40 -0
- package/backend/ws/terminal/persistence.ts +153 -0
- package/backend/ws/terminal/session.ts +382 -0
- package/backend/ws/terminal/stream.ts +79 -0
- package/backend/ws/tunnel/index.ts +14 -0
- package/backend/ws/tunnel/operations.ts +91 -0
- package/backend/ws/types.ts +20 -0
- package/backend/ws/user/crud.ts +156 -0
- package/backend/ws/user/index.ts +14 -0
- package/bin/clopen.ts +307 -0
- package/bun.lock +1352 -0
- package/frontend/App.svelte +34 -0
- package/frontend/app.css +313 -0
- package/frontend/lib/app-environment.ts +10 -0
- package/frontend/lib/components/chat/ChatInterface.svelte +407 -0
- package/frontend/lib/components/chat/formatters/ErrorMessage.svelte +57 -0
- package/frontend/lib/components/chat/formatters/MessageFormatter.svelte +224 -0
- package/frontend/lib/components/chat/formatters/TextMessage.svelte +395 -0
- package/frontend/lib/components/chat/formatters/Tools.svelte +70 -0
- package/frontend/lib/components/chat/formatters/index.ts +3 -0
- package/frontend/lib/components/chat/input/ChatInput.svelte +421 -0
- package/frontend/lib/components/chat/input/components/ChatInputActions.svelte +78 -0
- package/frontend/lib/components/chat/input/components/DragDropOverlay.svelte +30 -0
- package/frontend/lib/components/chat/input/components/EditModeIndicator.svelte +33 -0
- package/frontend/lib/components/chat/input/components/EngineModelPicker.svelte +619 -0
- package/frontend/lib/components/chat/input/components/FileAttachmentPreview.svelte +48 -0
- package/frontend/lib/components/chat/input/components/LoadingIndicator.svelte +31 -0
- package/frontend/lib/components/chat/input/composables/use-animations.svelte.ts +201 -0
- package/frontend/lib/components/chat/input/composables/use-chat-actions.svelte.ts +148 -0
- package/frontend/lib/components/chat/input/composables/use-file-handling.svelte.ts +216 -0
- package/frontend/lib/components/chat/input/composables/use-input-state.svelte.ts +357 -0
- package/frontend/lib/components/chat/input/composables/use-textarea-resize.svelte.ts +57 -0
- package/frontend/lib/components/chat/message/ChatMessage.svelte +478 -0
- package/frontend/lib/components/chat/message/ChatMessages.svelte +541 -0
- package/frontend/lib/components/chat/message/DateSeparator.svelte +86 -0
- package/frontend/lib/components/chat/message/MessageBubble.svelte +86 -0
- package/frontend/lib/components/chat/message/MessageHeader.svelte +157 -0
- package/frontend/lib/components/chat/modal/DebugModal.svelte +59 -0
- package/frontend/lib/components/chat/modal/TokenUsageModal.svelte +124 -0
- package/frontend/lib/components/chat/shared/index.ts +2 -0
- package/frontend/lib/components/chat/shared/utils.ts +116 -0
- package/frontend/lib/components/chat/tools/BashOutputTool.svelte +35 -0
- package/frontend/lib/components/chat/tools/BashTool.svelte +46 -0
- package/frontend/lib/components/chat/tools/CustomMcpTool.svelte +139 -0
- package/frontend/lib/components/chat/tools/EditTool.svelte +48 -0
- package/frontend/lib/components/chat/tools/ExitPlanModeTool.svelte +32 -0
- package/frontend/lib/components/chat/tools/GlobTool.svelte +51 -0
- package/frontend/lib/components/chat/tools/GrepTool.svelte +90 -0
- package/frontend/lib/components/chat/tools/KillShellTool.svelte +26 -0
- package/frontend/lib/components/chat/tools/ListMcpResourcesTool.svelte +31 -0
- package/frontend/lib/components/chat/tools/NotebookEditTool.svelte +38 -0
- package/frontend/lib/components/chat/tools/ReadMcpResourceTool.svelte +34 -0
- package/frontend/lib/components/chat/tools/ReadTool.svelte +41 -0
- package/frontend/lib/components/chat/tools/TaskTool.svelte +64 -0
- package/frontend/lib/components/chat/tools/TodoWriteTool.svelte +75 -0
- package/frontend/lib/components/chat/tools/WebFetchTool.svelte +35 -0
- package/frontend/lib/components/chat/tools/WebSearchTool.svelte +84 -0
- package/frontend/lib/components/chat/tools/WriteTool.svelte +33 -0
- package/frontend/lib/components/chat/tools/components/CodeBlock.svelte +79 -0
- package/frontend/lib/components/chat/tools/components/DiffBlock.svelte +408 -0
- package/frontend/lib/components/chat/tools/components/FileHeader.svelte +45 -0
- package/frontend/lib/components/chat/tools/components/InfoLine.svelte +19 -0
- package/frontend/lib/components/chat/tools/components/StatsBadges.svelte +27 -0
- package/frontend/lib/components/chat/tools/components/TerminalCommand.svelte +54 -0
- package/frontend/lib/components/chat/tools/components/index.ts +7 -0
- package/frontend/lib/components/chat/tools/index.ts +26 -0
- package/frontend/lib/components/chat/widgets/FloatingTodoList.svelte +249 -0
- package/frontend/lib/components/chat/widgets/TokenUsage.svelte +78 -0
- package/frontend/lib/components/checkpoint/TimelineModal.svelte +391 -0
- package/frontend/lib/components/checkpoint/timeline/TimelineEdge.svelte +26 -0
- package/frontend/lib/components/checkpoint/timeline/TimelineGraph.svelte +87 -0
- package/frontend/lib/components/checkpoint/timeline/TimelineNode.svelte +108 -0
- package/frontend/lib/components/checkpoint/timeline/TimelineVersionGroup.svelte +59 -0
- package/frontend/lib/components/checkpoint/timeline/animation.ts +168 -0
- package/frontend/lib/components/checkpoint/timeline/config.ts +44 -0
- package/frontend/lib/components/checkpoint/timeline/graph-builder.ts +304 -0
- package/frontend/lib/components/checkpoint/timeline/types.ts +65 -0
- package/frontend/lib/components/checkpoint/timeline/utils.ts +53 -0
- package/frontend/lib/components/common/Alert.svelte +139 -0
- package/frontend/lib/components/common/AvatarBubble.svelte +56 -0
- package/frontend/lib/components/common/Button.svelte +71 -0
- package/frontend/lib/components/common/Card.svelte +102 -0
- package/frontend/lib/components/common/Checkbox.svelte +48 -0
- package/frontend/lib/components/common/Dialog.svelte +249 -0
- package/frontend/lib/components/common/FolderBrowser.svelte +843 -0
- package/frontend/lib/components/common/Icon.svelte +58 -0
- package/frontend/lib/components/common/Input.svelte +72 -0
- package/frontend/lib/components/common/Lightbox.svelte +233 -0
- package/frontend/lib/components/common/LoadingScreen.svelte +52 -0
- package/frontend/lib/components/common/LoadingSpinner.svelte +48 -0
- package/frontend/lib/components/common/Modal.svelte +177 -0
- package/frontend/lib/components/common/ModalProvider.svelte +28 -0
- package/frontend/lib/components/common/ModelSelector.svelte +110 -0
- package/frontend/lib/components/common/MonacoEditor.svelte +569 -0
- package/frontend/lib/components/common/NotificationToast.svelte +113 -0
- package/frontend/lib/components/common/PageTemplate.svelte +76 -0
- package/frontend/lib/components/common/ProjectUserAvatars.svelte +79 -0
- package/frontend/lib/components/common/Select.svelte +98 -0
- package/frontend/lib/components/common/Textarea.svelte +80 -0
- package/frontend/lib/components/common/ThemeToggle.svelte +44 -0
- package/frontend/lib/components/common/lucide-icons.ts +1642 -0
- package/frontend/lib/components/common/material-icons.ts +1082 -0
- package/frontend/lib/components/common/xterm/XTerm.svelte +796 -0
- package/frontend/lib/components/common/xterm/index.ts +16 -0
- package/frontend/lib/components/common/xterm/terminal-config.ts +68 -0
- package/frontend/lib/components/common/xterm/types.ts +31 -0
- package/frontend/lib/components/common/xterm/xterm-service.ts +353 -0
- package/frontend/lib/components/files/FileNode.svelte +384 -0
- package/frontend/lib/components/files/FileTree.svelte +681 -0
- package/frontend/lib/components/files/FileViewer.svelte +728 -0
- package/frontend/lib/components/files/SearchResults.svelte +303 -0
- package/frontend/lib/components/git/BranchManager.svelte +458 -0
- package/frontend/lib/components/git/ChangesSection.svelte +107 -0
- package/frontend/lib/components/git/CommitForm.svelte +76 -0
- package/frontend/lib/components/git/ConflictResolver.svelte +158 -0
- package/frontend/lib/components/git/DiffViewer.svelte +364 -0
- package/frontend/lib/components/git/FileChangeItem.svelte +97 -0
- package/frontend/lib/components/git/GitButton.svelte +33 -0
- package/frontend/lib/components/git/GitLog.svelte +361 -0
- package/frontend/lib/components/git/GitModal.svelte +80 -0
- package/frontend/lib/components/history/HistoryModal.svelte +563 -0
- package/frontend/lib/components/history/HistoryView.svelte +615 -0
- package/frontend/lib/components/index.ts +34 -0
- package/frontend/lib/components/preview/browser/BrowserPreview.svelte +549 -0
- package/frontend/lib/components/preview/browser/components/Canvas.svelte +1058 -0
- package/frontend/lib/components/preview/browser/components/ConsolePanel.svelte +757 -0
- package/frontend/lib/components/preview/browser/components/Container.svelte +450 -0
- package/frontend/lib/components/preview/browser/components/ContextMenu.svelte +236 -0
- package/frontend/lib/components/preview/browser/components/SelectDropdown.svelte +224 -0
- package/frontend/lib/components/preview/browser/components/Toolbar.svelte +339 -0
- package/frontend/lib/components/preview/browser/components/VirtualCursor.svelte +36 -0
- package/frontend/lib/components/preview/browser/core/cleanup.svelte.ts +155 -0
- package/frontend/lib/components/preview/browser/core/coordinator.svelte.ts +837 -0
- package/frontend/lib/components/preview/browser/core/interactions.svelte.ts +113 -0
- package/frontend/lib/components/preview/browser/core/mcp-handlers.svelte.ts +296 -0
- package/frontend/lib/components/preview/browser/core/native-ui-handlers.svelte.ts +391 -0
- package/frontend/lib/components/preview/browser/core/stream-handler.svelte.ts +231 -0
- package/frontend/lib/components/preview/browser/core/tab-manager.svelte.ts +210 -0
- package/frontend/lib/components/preview/browser/core/tab-operations.svelte.ts +239 -0
- package/frontend/lib/components/preview/index.ts +2 -0
- package/frontend/lib/components/settings/SettingsModal.svelte +235 -0
- package/frontend/lib/components/settings/SettingsView.svelte +36 -0
- package/frontend/lib/components/settings/appearance/AppearanceSettings.svelte +51 -0
- package/frontend/lib/components/settings/appearance/LayoutPresetSettings.svelte +160 -0
- package/frontend/lib/components/settings/appearance/LayoutPreview.svelte +76 -0
- package/frontend/lib/components/settings/engines/AIEnginesSettings.svelte +917 -0
- package/frontend/lib/components/settings/general/AdvancedSettings.svelte +187 -0
- package/frontend/lib/components/settings/general/DataManagementSettings.svelte +203 -0
- package/frontend/lib/components/settings/general/GeneralSettings.svelte +10 -0
- package/frontend/lib/components/settings/model/ModelSettings.svelte +357 -0
- package/frontend/lib/components/settings/notifications/NotificationSettings.svelte +205 -0
- package/frontend/lib/components/settings/user/UserSettings.svelte +197 -0
- package/frontend/lib/components/terminal/Terminal.svelte +368 -0
- package/frontend/lib/components/terminal/TerminalTabs.svelte +87 -0
- package/frontend/lib/components/terminal/TerminalView.svelte +55 -0
- package/frontend/lib/components/tunnel/TunnelActive.svelte +142 -0
- package/frontend/lib/components/tunnel/TunnelButton.svelte +54 -0
- package/frontend/lib/components/tunnel/TunnelInactive.svelte +284 -0
- package/frontend/lib/components/tunnel/TunnelModal.svelte +47 -0
- package/frontend/lib/components/tunnel/TunnelQRCode.svelte +49 -0
- package/frontend/lib/components/workspace/DesktopNavigator.svelte +382 -0
- package/frontend/lib/components/workspace/MobileNavigator.svelte +403 -0
- package/frontend/lib/components/workspace/PanelContainer.svelte +100 -0
- package/frontend/lib/components/workspace/PanelHeader.svelte +505 -0
- package/frontend/lib/components/workspace/ViewMenu.svelte +162 -0
- package/frontend/lib/components/workspace/WorkspaceLayout.svelte +169 -0
- package/frontend/lib/components/workspace/layout/DesktopLayout.svelte +15 -0
- package/frontend/lib/components/workspace/layout/MobileLayout.svelte +17 -0
- package/frontend/lib/components/workspace/layout/split-pane/Container.svelte +42 -0
- package/frontend/lib/components/workspace/layout/split-pane/Handle.svelte +85 -0
- package/frontend/lib/components/workspace/layout/split-pane/Layout.svelte +37 -0
- package/frontend/lib/components/workspace/panels/ChatPanel.svelte +274 -0
- package/frontend/lib/components/workspace/panels/FilesPanel.svelte +1261 -0
- package/frontend/lib/components/workspace/panels/GitPanel.svelte +1560 -0
- package/frontend/lib/components/workspace/panels/PreviewPanel.svelte +150 -0
- package/frontend/lib/components/workspace/panels/TerminalPanel.svelte +73 -0
- package/frontend/lib/constants/preview.ts +45 -0
- package/frontend/lib/services/chat/chat.service.ts +704 -0
- package/frontend/lib/services/chat/index.ts +7 -0
- package/frontend/lib/services/notification/global-stream-monitor.ts +86 -0
- package/frontend/lib/services/notification/index.ts +8 -0
- package/frontend/lib/services/notification/push.service.ts +144 -0
- package/frontend/lib/services/notification/sound.service.ts +127 -0
- package/frontend/lib/services/preview/browser/browser-console.service.ts +61 -0
- package/frontend/lib/services/preview/browser/browser-webcodecs.service.ts +1499 -0
- package/frontend/lib/services/preview/browser/mcp-integration.svelte.ts +67 -0
- package/frontend/lib/services/preview/index.ts +23 -0
- package/frontend/lib/services/project/index.ts +8 -0
- package/frontend/lib/services/project/status.service.ts +159 -0
- package/frontend/lib/services/snapshot/snapshot.service.ts +47 -0
- package/frontend/lib/services/terminal/background/index.ts +130 -0
- package/frontend/lib/services/terminal/background/session-restore.ts +274 -0
- package/frontend/lib/services/terminal/background/stream-manager.ts +286 -0
- package/frontend/lib/services/terminal/index.ts +14 -0
- package/frontend/lib/services/terminal/persistence.service.ts +260 -0
- package/frontend/lib/services/terminal/project.service.ts +953 -0
- package/frontend/lib/services/terminal/session.service.ts +364 -0
- package/frontend/lib/services/terminal/terminal.service.ts +369 -0
- package/frontend/lib/stores/core/app.svelte.ts +117 -0
- package/frontend/lib/stores/core/files.svelte.ts +73 -0
- package/frontend/lib/stores/core/presence.svelte.ts +48 -0
- package/frontend/lib/stores/core/projects.svelte.ts +317 -0
- package/frontend/lib/stores/core/sessions.svelte.ts +383 -0
- package/frontend/lib/stores/features/claude-accounts.svelte.ts +58 -0
- package/frontend/lib/stores/features/models.svelte.ts +89 -0
- package/frontend/lib/stores/features/settings.svelte.ts +87 -0
- package/frontend/lib/stores/features/terminal.svelte.ts +701 -0
- package/frontend/lib/stores/features/tunnel.svelte.ts +161 -0
- package/frontend/lib/stores/features/user.svelte.ts +96 -0
- package/frontend/lib/stores/ui/chat-input.svelte.ts +57 -0
- package/frontend/lib/stores/ui/chat-model.svelte.ts +61 -0
- package/frontend/lib/stores/ui/dialog.svelte.ts +59 -0
- package/frontend/lib/stores/ui/edit-mode.svelte.ts +214 -0
- package/frontend/lib/stores/ui/notification.svelte.ts +166 -0
- package/frontend/lib/stores/ui/settings-modal.svelte.ts +88 -0
- package/frontend/lib/stores/ui/theme.svelte.ts +179 -0
- package/frontend/lib/stores/ui/workspace.svelte.ts +754 -0
- package/frontend/lib/types/native-ui.ts +73 -0
- package/frontend/lib/utils/chat/date-separator.ts +39 -0
- package/frontend/lib/utils/chat/message-grouper.ts +219 -0
- package/frontend/lib/utils/chat/message-processor.ts +135 -0
- package/frontend/lib/utils/chat/tool-handler.ts +161 -0
- package/frontend/lib/utils/chat/virtual-scroll.svelte.ts +142 -0
- package/frontend/lib/utils/click-outside.ts +20 -0
- package/frontend/lib/utils/context-manager.ts +257 -0
- package/frontend/lib/utils/file-icon-mappings.ts +769 -0
- package/frontend/lib/utils/folder-icon-mappings.ts +1030 -0
- package/frontend/lib/utils/git-status.ts +68 -0
- package/frontend/lib/utils/platform.ts +113 -0
- package/frontend/lib/utils/port-check.ts +65 -0
- package/frontend/lib/utils/terminalFormatter.ts +207 -0
- package/frontend/lib/utils/theme.ts +6 -0
- package/frontend/lib/utils/tree-visualizer.ts +320 -0
- package/frontend/lib/utils/ws.ts +44 -0
- package/frontend/main.ts +13 -0
- package/index.html +70 -0
- package/package.json +111 -0
- package/scripts/generate-icons.ts +87 -0
- package/scripts/pre-publish-check.sh +142 -0
- package/scripts/setup-hooks.sh +134 -0
- package/scripts/validate-branch-name.sh +47 -0
- package/scripts/validate-commit-msg.sh +42 -0
- package/shared/constants/engines.ts +134 -0
- package/shared/types/database/connection.ts +16 -0
- package/shared/types/database/index.ts +6 -0
- package/shared/types/database/schema.ts +141 -0
- package/shared/types/engine/index.ts +45 -0
- package/shared/types/filesystem/index.ts +22 -0
- package/shared/types/git.ts +171 -0
- package/shared/types/messaging/index.ts +239 -0
- package/shared/types/messaging/tool.ts +526 -0
- package/shared/types/network/api.ts +18 -0
- package/shared/types/network/index.ts +5 -0
- package/shared/types/stores/app.ts +23 -0
- package/shared/types/stores/dialog.ts +21 -0
- package/shared/types/stores/index.ts +3 -0
- package/shared/types/stores/settings.ts +15 -0
- package/shared/types/terminal/index.ts +44 -0
- package/shared/types/ui/components.ts +61 -0
- package/shared/types/ui/icons.ts +23 -0
- package/shared/types/ui/index.ts +22 -0
- package/shared/types/ui/notifications.ts +14 -0
- package/shared/types/ui/theme.ts +12 -0
- package/shared/types/websocket/index.ts +43 -0
- package/shared/types/window.d.ts +13 -0
- package/shared/utils/anonymous-user.ts +168 -0
- package/shared/utils/async.ts +10 -0
- package/shared/utils/diff-calculator.ts +184 -0
- package/shared/utils/file-type-detection.ts +166 -0
- package/shared/utils/logger.ts +158 -0
- package/shared/utils/message-formatter.ts +79 -0
- package/shared/utils/path.ts +47 -0
- package/shared/utils/ws-client.ts +768 -0
- package/shared/utils/ws-server.ts +660 -0
- package/static/audio/notification.ogg +0 -0
- package/static/favicon.svg +8 -0
- package/static/fonts/dm-sans/dm-sans-italic-latin-ext.woff2 +0 -0
- package/static/fonts/dm-sans/dm-sans-italic-latin.woff2 +0 -0
- package/static/fonts/dm-sans/dm-sans-normal-latin-ext.woff2 +0 -0
- package/static/fonts/dm-sans/dm-sans-normal-latin.woff2 +0 -0
- package/static/fonts/dm-sans.css +96 -0
- package/svelte.config.js +20 -0
- package/vite.config.ts +33 -0
|
@@ -0,0 +1,791 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Browser Automation - Custom MCP Server
|
|
3
|
+
*
|
|
4
|
+
* Provides comprehensive browser automation tools for AI-driven testing and exploration.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { z } from "zod";
|
|
8
|
+
import { defineServer } from "../helper";
|
|
9
|
+
|
|
10
|
+
// Import handlers
|
|
11
|
+
import { listTabsHandler, switchTabHandler, openNewTabHandler, closeTabHandler, navigateHandler, setViewportHandler } from "./browser";
|
|
12
|
+
import { actionsHandler } from "./actions";
|
|
13
|
+
import { getConsoleLogsHandler, clearConsoleLogsHandler, executeConsoleHandler, takeScreenshotHandler, analyzeDomHandler } from "./inspection";
|
|
14
|
+
|
|
15
|
+
export default defineServer({
|
|
16
|
+
name: "browser-automation",
|
|
17
|
+
version: "1.0.0",
|
|
18
|
+
tools: {
|
|
19
|
+
// ============================================================================
|
|
20
|
+
// Browser Tab Management
|
|
21
|
+
// ============================================================================
|
|
22
|
+
"list_tabs": {
|
|
23
|
+
description: `List all open browser tabs with metadata.
|
|
24
|
+
|
|
25
|
+
WHEN TO USE:
|
|
26
|
+
- Start of session to see available tabs
|
|
27
|
+
- Before switching tabs
|
|
28
|
+
- Verify tab state after open/close operations
|
|
29
|
+
|
|
30
|
+
OUTPUT: Formatted text with tab index, ID, title, URL, and active indicator (*). Example:
|
|
31
|
+
[1] tab-abc123 * Google Search - https://google.com
|
|
32
|
+
[2] tab-def456 Calculator - https://calculator.net
|
|
33
|
+
|
|
34
|
+
Use exact tab ID from output with switch_tab or close_tab - not the index number.`,
|
|
35
|
+
handler: listTabsHandler
|
|
36
|
+
},
|
|
37
|
+
|
|
38
|
+
"switch_tab": {
|
|
39
|
+
description: `Switch browser focus to different tab.
|
|
40
|
+
|
|
41
|
+
WHEN TO USE:
|
|
42
|
+
- Multi-tab workflows
|
|
43
|
+
- Comparing content across tabs
|
|
44
|
+
- Managing multiple sessions
|
|
45
|
+
|
|
46
|
+
OUTPUT: Confirmation with switched tab's ID, title, and URL.
|
|
47
|
+
|
|
48
|
+
Tab ID must be obtained from list_tabs. After switching, previous tab's context is lost - take new screenshot or analyze_dom if needed.`,
|
|
49
|
+
schema: {
|
|
50
|
+
tabId: z.string().min(1).describe("Tab ID obtained from list_tabs output (e.g., 'tab-abc123')")
|
|
51
|
+
},
|
|
52
|
+
handler: switchTabHandler
|
|
53
|
+
},
|
|
54
|
+
|
|
55
|
+
"open_new_tab": {
|
|
56
|
+
description: `Create new browser tab with optional URL and viewport configuration.
|
|
57
|
+
|
|
58
|
+
WHEN TO USE:
|
|
59
|
+
- Starting fresh session
|
|
60
|
+
- Opening multiple pages for comparison
|
|
61
|
+
- Separating workflows
|
|
62
|
+
- Testing risky actions in isolation
|
|
63
|
+
- Testing responsive designs across devices
|
|
64
|
+
|
|
65
|
+
OUTPUT: Tab ID, title, and URL of newly created tab. New tab automatically becomes active.
|
|
66
|
+
|
|
67
|
+
Automatically creates session. New tab is immediately active - no need to switch_tab. If URL provided, waits for page load.
|
|
68
|
+
|
|
69
|
+
VIEWPORT MODES:
|
|
70
|
+
- desktop: 1920x1080 (default rotation: landscape) - For testing full desktop layouts
|
|
71
|
+
- laptop: 1280x800 (default, default rotation: landscape) - Most common desktop viewport
|
|
72
|
+
- tablet: 820x1050 (default rotation: portrait) - iPad-like tablet viewport
|
|
73
|
+
- mobile: 393x740 (default rotation: portrait) - Modern smartphone viewport
|
|
74
|
+
|
|
75
|
+
ROTATION:
|
|
76
|
+
- portrait: Standard vertical orientation (default for tablet/mobile)
|
|
77
|
+
- landscape: Horizontal orientation (default for desktop/laptop)
|
|
78
|
+
- Omit rotation to use device-appropriate default`,
|
|
79
|
+
schema: {
|
|
80
|
+
url: z.string().url().optional().describe("Initial URL with protocol (http:// or https://). Omit for blank tab."),
|
|
81
|
+
deviceSize: z.enum(['desktop', 'laptop', 'tablet', 'mobile']).optional().default('laptop').describe("Viewport device size (default: laptop). Choose based on testing needs."),
|
|
82
|
+
rotation: z.enum(['portrait', 'landscape']).optional().describe("Screen orientation. If omitted, uses device-appropriate default: landscape for desktop/laptop, portrait for tablet/mobile.")
|
|
83
|
+
},
|
|
84
|
+
handler: openNewTabHandler
|
|
85
|
+
},
|
|
86
|
+
|
|
87
|
+
"close_tab": {
|
|
88
|
+
description: `Close tab by ID and cleanup session.
|
|
89
|
+
|
|
90
|
+
WHEN TO USE:
|
|
91
|
+
- Cleanup after completing task
|
|
92
|
+
- Managing memory/resources
|
|
93
|
+
- Closing tabs no longer needed
|
|
94
|
+
|
|
95
|
+
OUTPUT: Confirmation message. If closed tab was active, automatically switches to another tab and returns new active tab info.
|
|
96
|
+
|
|
97
|
+
Cannot close last remaining tab (will error). If tab was active, focus moves to another tab - check list_tabs after.`,
|
|
98
|
+
schema: {
|
|
99
|
+
tabId: z.string().min(1).describe("Tab ID obtained from list_tabs output (e.g., 'tab-abc123')")
|
|
100
|
+
},
|
|
101
|
+
handler: closeTabHandler
|
|
102
|
+
},
|
|
103
|
+
|
|
104
|
+
"set_viewport": {
|
|
105
|
+
description: `Change viewport settings (device size and rotation) for active tab.
|
|
106
|
+
|
|
107
|
+
WHEN TO USE:
|
|
108
|
+
- Testing responsive designs across devices
|
|
109
|
+
- Switching between mobile and desktop views
|
|
110
|
+
- Testing different screen orientations
|
|
111
|
+
- Simulating different device types
|
|
112
|
+
|
|
113
|
+
OUTPUT: Confirmation with tab ID and new viewport settings.
|
|
114
|
+
|
|
115
|
+
VIEWPORT MODES:
|
|
116
|
+
- desktop: 1920x1080 - For testing full desktop layouts
|
|
117
|
+
- laptop: 1280x800 - Most common desktop viewport
|
|
118
|
+
- tablet: 820x1050 - iPad-like tablet viewport
|
|
119
|
+
- mobile: 393x740 - Modern smartphone viewport
|
|
120
|
+
|
|
121
|
+
ROTATION:
|
|
122
|
+
- portrait: Standard vertical orientation
|
|
123
|
+
- landscape: Horizontal orientation (width and height swapped)
|
|
124
|
+
|
|
125
|
+
The viewport change is applied immediately to the active tab. If testing multiple viewports, consider taking screenshots before and after to compare layouts.`,
|
|
126
|
+
schema: {
|
|
127
|
+
deviceSize: z.enum(['desktop', 'laptop', 'tablet', 'mobile']).optional().describe("Viewport device size. Omit to keep current device size."),
|
|
128
|
+
rotation: z.enum(['portrait', 'landscape']).optional().describe("Screen orientation. Omit to keep current rotation.")
|
|
129
|
+
},
|
|
130
|
+
handler: setViewportHandler
|
|
131
|
+
},
|
|
132
|
+
|
|
133
|
+
// ============================================================================
|
|
134
|
+
// Navigation
|
|
135
|
+
// ============================================================================
|
|
136
|
+
"navigate": {
|
|
137
|
+
description: `Navigate to URL and wait for page load.
|
|
138
|
+
|
|
139
|
+
PRIMARY USE: Following links when you have href URL (more efficient than clicking coordinates).
|
|
140
|
+
|
|
141
|
+
WHEN TO USE:
|
|
142
|
+
- Following links from analyze_dom.navigation.links (use href directly)
|
|
143
|
+
- Moving to known URLs
|
|
144
|
+
- Multi-page information extraction workflow
|
|
145
|
+
- Refreshing with new parameters
|
|
146
|
+
|
|
147
|
+
WHEN TO AVOID:
|
|
148
|
+
- Don't have URL (must use take_screenshot + click instead)
|
|
149
|
+
- Testing click interactions (use actions for actual click testing)
|
|
150
|
+
|
|
151
|
+
EFFICIENT WORKFLOW:
|
|
152
|
+
|
|
153
|
+
Information extraction across pages:
|
|
154
|
+
→ navigate(page1)
|
|
155
|
+
→ analyze_dom(['navigation', 'content'])
|
|
156
|
+
→ Find link: {text: "Pricing", href: "https://example.com/pricing"}
|
|
157
|
+
→ navigate(href) ← USE THIS, not click coordinates!
|
|
158
|
+
→ analyze_dom(['content'])
|
|
159
|
+
→ Extract information
|
|
160
|
+
|
|
161
|
+
Why navigate > click for links:
|
|
162
|
+
- Faster: direct URL navigation
|
|
163
|
+
- More reliable: no coordinate dependency
|
|
164
|
+
- Cleaner: no need for screenshot
|
|
165
|
+
- Better for information tasks
|
|
166
|
+
|
|
167
|
+
Example - Multi-page extraction:
|
|
168
|
+
Task: "Find pricing at loom.com"
|
|
169
|
+
→ navigate("https://www.loom.com")
|
|
170
|
+
→ analyze_dom(['navigation'])
|
|
171
|
+
→ Found: {text: "Pricing", href: "https://www.loom.com/pricing"}
|
|
172
|
+
→ navigate("https://www.loom.com/pricing") ← Efficient!
|
|
173
|
+
→ analyze_dom(['content'])
|
|
174
|
+
→ Extract pricing info
|
|
175
|
+
|
|
176
|
+
OUTPUT: Final URL after all redirects complete.
|
|
177
|
+
|
|
178
|
+
After navigate, page is completely new:
|
|
179
|
+
- Previous analyze_dom data is invalid - re-run on new page
|
|
180
|
+
- Pattern for information: navigate → analyze_dom
|
|
181
|
+
- Pattern for interaction: navigate → take_screenshot → actions
|
|
182
|
+
|
|
183
|
+
Session state (cookies, localStorage) preserved. Timeout 30 seconds. Handles redirects automatically.`,
|
|
184
|
+
schema: {
|
|
185
|
+
url: z.string().url().describe("Target URL with protocol (http:// or https://). Redirects handled automatically.")
|
|
186
|
+
},
|
|
187
|
+
handler: navigateHandler
|
|
188
|
+
},
|
|
189
|
+
|
|
190
|
+
// ============================================================================
|
|
191
|
+
// Browser Actions
|
|
192
|
+
// ============================================================================
|
|
193
|
+
"actions": {
|
|
194
|
+
description: `Execute browser interactions in sequence. Each action completes before next begins.
|
|
195
|
+
|
|
196
|
+
MANDATORY PLANNING PROTOCOL:
|
|
197
|
+
|
|
198
|
+
BEFORE executing ANY action sequence, you MUST follow this protocol:
|
|
199
|
+
|
|
200
|
+
1. STATE THE GOAL
|
|
201
|
+
Write down the exact outcome you want to achieve.
|
|
202
|
+
Example: "Enter formula (8 × 5) + (12 ÷ 3) - 7 into calculator"
|
|
203
|
+
Example: "Fill login form with username and password, then submit"
|
|
204
|
+
Example: "Navigate through multi-step checkout process"
|
|
205
|
+
|
|
206
|
+
2. PLAN THE SEQUENCE
|
|
207
|
+
List each action and its expected result/state after execution.
|
|
208
|
+
IMPORTANT: Write this planning EVEN for TYPE actions - always document your plan.
|
|
209
|
+
|
|
210
|
+
Example (Calculator):
|
|
211
|
+
Action 1: Click "(" → Display shows "("
|
|
212
|
+
Action 2: Click "8" → Display shows "(8"
|
|
213
|
+
Action 3: Click "×" → Display shows "(8×"
|
|
214
|
+
Action 4: Click "5" → Display shows "(8×5"
|
|
215
|
+
Action 5: Click ")" → Display shows "(8×5)"
|
|
216
|
+
Action 6: Click "+" → Display shows "(8×5)+"
|
|
217
|
+
... continue for each step
|
|
218
|
+
|
|
219
|
+
Example (Form):
|
|
220
|
+
Action 1: Click username field → Field focused
|
|
221
|
+
Action 2: Type "user@example.com" → Username entered
|
|
222
|
+
Action 3: Click password field → Password field focused
|
|
223
|
+
Action 4: Type "password123" → Password entered (hidden)
|
|
224
|
+
Action 5: Click submit button → Form submitted
|
|
225
|
+
|
|
226
|
+
Example (Calculator with TYPE):
|
|
227
|
+
Action 1: Click input field → Field focused
|
|
228
|
+
Action 2: Type "(15*8)/(4+2)" → Formula entered
|
|
229
|
+
Action 3: Click "=" → Result calculated
|
|
230
|
+
|
|
231
|
+
3. VERIFY THE LOGIC
|
|
232
|
+
Trace through the sequence mentally:
|
|
233
|
+
- Does each action move toward the goal?
|
|
234
|
+
- What is the state after each action?
|
|
235
|
+
- Are there dependencies between actions?
|
|
236
|
+
- Could any action produce unintended results?
|
|
237
|
+
- Is this the most efficient approach?
|
|
238
|
+
|
|
239
|
+
4. EFFICIENT ALTERNATIVES:
|
|
240
|
+
Before using click sequences, verify:
|
|
241
|
+
- Can I TYPE instead of clicking individual buttons?
|
|
242
|
+
- Can I NAVIGATE instead of clicking coordinates?
|
|
243
|
+
- Is there a simpler single-action approach?
|
|
244
|
+
|
|
245
|
+
PREFER:
|
|
246
|
+
- TYPE over click sequence (for formulas, text input)
|
|
247
|
+
- NAVIGATE over click coordinates (for links)
|
|
248
|
+
- Single action over multiple actions (when possible)
|
|
249
|
+
|
|
250
|
+
Example - Calculator:
|
|
251
|
+
INCORRECT: Click "(" → "1" → "5" → "×" → "8" → ")" → "/" → "(" → "4" → "+" → "2" → ")" → "=" (13 actions)
|
|
252
|
+
CORRECT: If input field exists, type "(15*8)/(4+2)" then click "=" (2 actions)
|
|
253
|
+
|
|
254
|
+
5. EXECUTE ONLY AFTER VERIFICATION
|
|
255
|
+
Build the action array only after steps 1-4 are complete and verified.
|
|
256
|
+
|
|
257
|
+
6. VERIFY RESULT AFTER EXECUTION
|
|
258
|
+
After complex sequences complete:
|
|
259
|
+
- Check if result matches your planned input/goal
|
|
260
|
+
- If mismatch detected (e.g., planned sin(45) but got sin(42)):
|
|
261
|
+
* Acknowledge: "Planned [X] but result shows [Y]"
|
|
262
|
+
* Explain likely cause (wrong coordinate, button misidentification, etc.)
|
|
263
|
+
* Decide: Continue with corrected understanding OR re-execute if critical
|
|
264
|
+
- For calculator/form tasks, verify the final input/output matches expectation
|
|
265
|
+
|
|
266
|
+
This protocol applies to ALL sequential actions: calculators, forms, navigation, games, automation workflows, etc.
|
|
267
|
+
|
|
268
|
+
COMMON MISTAKES TO AVOID:
|
|
269
|
+
|
|
270
|
+
Mistake: Not closing parentheses/brackets before next operation
|
|
271
|
+
Example: Click "√" → "64" → "+" results in √(64+...) instead of √64 +
|
|
272
|
+
Fix: Plan to close grouping before continuing: "√" → "64" → ")" → "+"
|
|
273
|
+
|
|
274
|
+
Mistake: Wrong order in multi-step forms
|
|
275
|
+
Example: Type in all fields → click submits → data goes to wrong inputs
|
|
276
|
+
Fix: Click field → type → click next field → type (interleave click and type)
|
|
277
|
+
|
|
278
|
+
Mistake: Not verifying state between dependent actions
|
|
279
|
+
Example: Click modal button → immediately click behind it → clicks wrong element
|
|
280
|
+
Fix: Add wait after modal opens, verify state, then continue
|
|
281
|
+
|
|
282
|
+
PURPOSE: Perform user-like actions (clicking, typing, scrolling) for INTERACTION TASKS.
|
|
283
|
+
|
|
284
|
+
TASK CLASSIFICATION:
|
|
285
|
+
|
|
286
|
+
INTERACTION TASKS (use actions):
|
|
287
|
+
- UI testing, automation testing
|
|
288
|
+
- Form filling that requires interaction
|
|
289
|
+
- Button clicking, element interaction
|
|
290
|
+
→ Workflow: take_screenshot → PLAN → VERIFY → actions
|
|
291
|
+
|
|
292
|
+
INFORMATION TASKS (DON'T use actions for navigation):
|
|
293
|
+
- Reading content, extracting information
|
|
294
|
+
- Following links to other pages
|
|
295
|
+
→ Workflow: analyze_dom → navigate (use href, NOT click)
|
|
296
|
+
|
|
297
|
+
WHEN TO USE:
|
|
298
|
+
- After take_screenshot when you have coordinates
|
|
299
|
+
- Clicking buttons, typing in forms, scrolling
|
|
300
|
+
- Testing interactions, automating workflows
|
|
301
|
+
- Solving obstacles (captcha, modals)
|
|
302
|
+
|
|
303
|
+
WHEN TO AVOID:
|
|
304
|
+
- Information extraction (use analyze_dom instead)
|
|
305
|
+
- Following links to navigate (use navigate with href from analyze_dom, not click coordinates)
|
|
306
|
+
- Reading text content (analyze_dom.content is faster)
|
|
307
|
+
|
|
308
|
+
COORDINATE SOURCE:
|
|
309
|
+
|
|
310
|
+
Coordinates come from AI vision analysis of screenshots, NOT from analyze_dom.
|
|
311
|
+
|
|
312
|
+
Process for getting coordinates:
|
|
313
|
+
1. Call take_screenshot to capture current page
|
|
314
|
+
2. Use AI vision to identify element positions in screenshot
|
|
315
|
+
3. Determine pixel coordinates (x, y) from visual analysis
|
|
316
|
+
4. Apply PLANNING PROTOCOL before building actions
|
|
317
|
+
5. Use verified coordinates in actions
|
|
318
|
+
|
|
319
|
+
Example - Click "Login" button:
|
|
320
|
+
Task: Click button labeled "Login"
|
|
321
|
+
→ take_screenshot (capture page)
|
|
322
|
+
→ AI vision: "Login button at x=1200, y=45"
|
|
323
|
+
→ PLAN: Click button → form appears
|
|
324
|
+
→ VERIFY: Single click achieves goal
|
|
325
|
+
→ Execute: {type: "click", x: 1200, y: 45}
|
|
326
|
+
|
|
327
|
+
Example - Sequential actions with planning:
|
|
328
|
+
Task: Calculate (15 × 8) / (4 + 2) - 3²
|
|
329
|
+
|
|
330
|
+
APPROACH A - If input field exists (PREFERRED):
|
|
331
|
+
→ take_screenshot
|
|
332
|
+
→ PLAN: Type formula directly, then calculate
|
|
333
|
+
→ VERIFY: Input field accepts text → yes
|
|
334
|
+
→ Execute: [
|
|
335
|
+
{click input field x:640, y:311},
|
|
336
|
+
{type "(15*8)/(4+2)-3^2"},
|
|
337
|
+
{click equals x:913, y:479}
|
|
338
|
+
]
|
|
339
|
+
|
|
340
|
+
APPROACH B - If must use buttons:
|
|
341
|
+
→ take_screenshot
|
|
342
|
+
→ PLAN: Each button click in exact order
|
|
343
|
+
"(": x:672,y:320 → display "("
|
|
344
|
+
"1": x:672,y:449 → display "(1"
|
|
345
|
+
"5": x:730,y:405 → display "(15"
|
|
346
|
+
... (continue for all 15+ buttons)
|
|
347
|
+
→ VERIFY: Sequence produces correct formula
|
|
348
|
+
→ Execute: [full button sequence with waits]
|
|
349
|
+
|
|
350
|
+
Example - Form filling with planning:
|
|
351
|
+
Task: Fill and submit login form
|
|
352
|
+
→ take_screenshot
|
|
353
|
+
→ PLAN:
|
|
354
|
+
1. Click username field → focused
|
|
355
|
+
2. Type email → entered
|
|
356
|
+
3. Click password field → focused
|
|
357
|
+
4. Type password → entered (hidden)
|
|
358
|
+
5. Click submit → form submitted
|
|
359
|
+
→ VERIFY: Each field gets correct data in correct order
|
|
360
|
+
→ Execute: [
|
|
361
|
+
{click x:640, y:300},
|
|
362
|
+
{type "user@example.com"},
|
|
363
|
+
{click x:640, y:380},
|
|
364
|
+
{type "password123"},
|
|
365
|
+
{click x:640, y:450}
|
|
366
|
+
]
|
|
367
|
+
|
|
368
|
+
ACTION TYPES:
|
|
369
|
+
|
|
370
|
+
click {x, y, click?: "left"/"right"/"middle"}
|
|
371
|
+
Click at coordinates from screenshot AI vision analysis
|
|
372
|
+
|
|
373
|
+
type {text?: "string", key?: "Enter", clearFirst?: true}
|
|
374
|
+
text: String to type (click input field first)
|
|
375
|
+
key: Special key (Enter, Tab, Escape, Backspace, ArrowUp, ArrowDown, etc)
|
|
376
|
+
clearFirst: true (default) removes existing text
|
|
377
|
+
|
|
378
|
+
move {x, y, steps?: 1}
|
|
379
|
+
Move mouse to coordinates from screenshot (hover effects, tooltips)
|
|
380
|
+
|
|
381
|
+
scroll {deltaX?: 0, deltaY: 500}
|
|
382
|
+
deltaY positive=down, negative=up
|
|
383
|
+
|
|
384
|
+
wait {delay: milliseconds}
|
|
385
|
+
Pause between actions (use between dependent actions)
|
|
386
|
+
|
|
387
|
+
extract_data {selector: "CSS selector"}
|
|
388
|
+
Extract text/value from DOM (use after actions complete)
|
|
389
|
+
|
|
390
|
+
OUTPUT:
|
|
391
|
+
- Success: "Successfully performed N action(s) in sequence"
|
|
392
|
+
- With extraction: Returns extracted text/value
|
|
393
|
+
- Error: Descriptive message if action fails
|
|
394
|
+
|
|
395
|
+
For complex sequences (>10 actions):
|
|
396
|
+
- Consider breaking into smaller batches (5-8 actions per batch)
|
|
397
|
+
- Take screenshot between batches to verify partial results
|
|
398
|
+
- Adjust if needed before continuing
|
|
399
|
+
|
|
400
|
+
Obstacle handling (captcha, modal, popup):
|
|
401
|
+
- NEVER stop or ask user - solve automatically
|
|
402
|
+
- Workflow: take_screenshot → identify → solve/dismiss → verify → continue
|
|
403
|
+
- If fails after attempts: report failure, proceed if possible`,
|
|
404
|
+
schema: {
|
|
405
|
+
actions: z.array(
|
|
406
|
+
z.discriminatedUnion("type", [
|
|
407
|
+
z.object({
|
|
408
|
+
type: z.literal("click"),
|
|
409
|
+
x: z.number().describe("X coordinate from screenshot AI vision"),
|
|
410
|
+
y: z.number().describe("Y coordinate from screenshot AI vision"),
|
|
411
|
+
click: z.enum(["left", "right", "middle"]).optional().describe("Mouse button (default: left)")
|
|
412
|
+
}),
|
|
413
|
+
z.object({
|
|
414
|
+
type: z.literal("type"),
|
|
415
|
+
text: z.string().optional().describe("Text to type into focused element"),
|
|
416
|
+
key: z.string().optional().describe("Special key (Enter, Tab, Escape, ArrowDown, Backspace, etc)"),
|
|
417
|
+
clearFirst: z.boolean().optional().describe("Clear field before typing (default: true)")
|
|
418
|
+
}),
|
|
419
|
+
z.object({
|
|
420
|
+
type: z.literal("move"),
|
|
421
|
+
x: z.number().describe("X coordinate from screenshot AI vision"),
|
|
422
|
+
y: z.number().describe("Y coordinate from screenshot AI vision"),
|
|
423
|
+
steps: z.number().optional().describe("Steps for smooth movement (default: 1)")
|
|
424
|
+
}),
|
|
425
|
+
z.object({
|
|
426
|
+
type: z.literal("scroll"),
|
|
427
|
+
deltaX: z.number().optional().describe("Horizontal pixels (positive=right, negative=left)"),
|
|
428
|
+
deltaY: z.number().optional().describe("Vertical pixels (positive=down, negative=up)"),
|
|
429
|
+
smooth: z.boolean().optional().describe("Smooth animation (default: false)")
|
|
430
|
+
}),
|
|
431
|
+
z.object({
|
|
432
|
+
type: z.literal("wait"),
|
|
433
|
+
delay: z.number().describe("Wait duration in milliseconds before next action")
|
|
434
|
+
}),
|
|
435
|
+
z.object({
|
|
436
|
+
type: z.literal("extract_data"),
|
|
437
|
+
selector: z.string().describe("CSS selector (e.g., '#username') or element ID to extract data from")
|
|
438
|
+
})
|
|
439
|
+
])
|
|
440
|
+
).min(1).describe("Actions to execute in sequence. Apply PLANNING PROTOCOL before building this array.")
|
|
441
|
+
},
|
|
442
|
+
handler: actionsHandler
|
|
443
|
+
},
|
|
444
|
+
|
|
445
|
+
// ============================================================================
|
|
446
|
+
// Page Inspection
|
|
447
|
+
// ============================================================================
|
|
448
|
+
"analyze_dom": {
|
|
449
|
+
description: `Extract page information - text content, links, structure, and page metadata.
|
|
450
|
+
|
|
451
|
+
PRIMARY USE CASES:
|
|
452
|
+
|
|
453
|
+
1. WEBSITE EXPLORATION/CLONING
|
|
454
|
+
Task keywords: "explore website", "clone website", "analyze site", "jelajahi website", "get all pages"
|
|
455
|
+
→ Use ONLY analyze_dom + navigate (NO screenshot needed)
|
|
456
|
+
→ Workflow: analyze_dom → navigate(href) → analyze_dom → repeat
|
|
457
|
+
|
|
458
|
+
Example: "Clone example-company.com"
|
|
459
|
+
CORRECT: analyze_dom → get links → navigate to each page → analyze_dom each page
|
|
460
|
+
INCORRECT: analyze_dom → take_screenshot (expensive, unnecessary)
|
|
461
|
+
|
|
462
|
+
2. INFORMATION EXTRACTION
|
|
463
|
+
Task keywords: "find", "read", "get", "extract", "search content"
|
|
464
|
+
→ Use analyze_dom for content, navigate for links
|
|
465
|
+
→ NO screenshot needed unless visual-only content
|
|
466
|
+
|
|
467
|
+
3. CONTENT RESEARCH
|
|
468
|
+
Task keywords: "what does page say", "get article", "find documentation"
|
|
469
|
+
→ analyze_dom is PRIMARY method
|
|
470
|
+
→ Much faster and cheaper than screenshot
|
|
471
|
+
|
|
472
|
+
TASK CLASSIFICATION:
|
|
473
|
+
|
|
474
|
+
INFORMATION EXTRACTION TASKS (reading, finding, extracting, exploring):
|
|
475
|
+
→ PRIMARY: analyze_dom (fast, efficient, no screenshot needed)
|
|
476
|
+
→ SECONDARY: navigate (if need to follow links - use href, not click)
|
|
477
|
+
→ AVOID: take_screenshot (expensive, unnecessary for text content)
|
|
478
|
+
|
|
479
|
+
Examples:
|
|
480
|
+
- "Explore website and all pages" → analyze_dom + navigate only
|
|
481
|
+
- "Clone website structure" → analyze_dom + navigate only
|
|
482
|
+
- "Find pricing information" → analyze_dom.content
|
|
483
|
+
- "Get all navigation links" → analyze_dom.navigation.links
|
|
484
|
+
- "Read article summary" → analyze_dom.structure + content
|
|
485
|
+
- "Find documentation URL" → analyze_dom.navigation.links → navigate(href)
|
|
486
|
+
|
|
487
|
+
INTERACTION TASKS (clicking, typing, testing):
|
|
488
|
+
→ PRIMARY: take_screenshot (for coordinates)
|
|
489
|
+
→ SECONDARY: actions (execute interactions)
|
|
490
|
+
→ SUPPORT: analyze_dom (context only)
|
|
491
|
+
|
|
492
|
+
Examples:
|
|
493
|
+
- "Click login button" → take_screenshot → actions
|
|
494
|
+
- "Fill form" → analyze_dom (optional context) → take_screenshot → actions
|
|
495
|
+
|
|
496
|
+
WHEN TO USE:
|
|
497
|
+
- Read page content (text, headings, paragraphs)
|
|
498
|
+
- Get all links with URLs (for navigation)
|
|
499
|
+
- Understand page structure and hierarchy
|
|
500
|
+
- Extract form metadata
|
|
501
|
+
- Check page properties (iframes, captcha)
|
|
502
|
+
|
|
503
|
+
WHEN TO SKIP:
|
|
504
|
+
- Need pixel coordinates for clicking (use take_screenshot instead)
|
|
505
|
+
- Already have page information and unchanged
|
|
506
|
+
|
|
507
|
+
EFFICIENT INFORMATION EXTRACTION WORKFLOW:
|
|
508
|
+
|
|
509
|
+
Step 1: navigate to target page
|
|
510
|
+
Step 2: analyze_dom (get content, links, structure)
|
|
511
|
+
Step 3: If need more pages:
|
|
512
|
+
- Find link in navigation.links (has href)
|
|
513
|
+
- Use navigate(href) - NOT click coordinates
|
|
514
|
+
- Repeat analyze_dom
|
|
515
|
+
Step 4: Extract and return information - DONE
|
|
516
|
+
|
|
517
|
+
NO screenshot needed for information tasks!
|
|
518
|
+
|
|
519
|
+
Example - "Explore/clone website example-company.com":
|
|
520
|
+
→ open_new_tab("https://example-company.com")
|
|
521
|
+
→ analyze_dom(['navigation', 'structure', 'content']) - get ALL page info
|
|
522
|
+
→ Find links: About, Products, Blog, Contact (in navigation.links)
|
|
523
|
+
→ navigate("https://example-company.com/about")
|
|
524
|
+
→ analyze_dom(['structure', 'content']) - get About page info
|
|
525
|
+
→ navigate("https://example-company.com/products")
|
|
526
|
+
→ analyze_dom(['structure', 'content']) - get Products page info
|
|
527
|
+
→ Continue for all pages
|
|
528
|
+
→ DONE (NO screenshot needed at all!)
|
|
529
|
+
|
|
530
|
+
Example - "Find pricing information at loom.com":
|
|
531
|
+
→ open_new_tab("https://www.loom.com")
|
|
532
|
+
→ analyze_dom(['navigation', 'content'])
|
|
533
|
+
→ Find pricing link: {text: "Pricing", href: "https://www.loom.com/pricing"}
|
|
534
|
+
→ navigate("https://www.loom.com/pricing")
|
|
535
|
+
→ analyze_dom(['content', 'structure'])
|
|
536
|
+
→ Extract pricing from content.paragraphs
|
|
537
|
+
→ DONE (no screenshot used!)
|
|
538
|
+
|
|
539
|
+
OUTPUT STRUCTURE:
|
|
540
|
+
|
|
541
|
+
navigation: {links: [{text, href}]}
|
|
542
|
+
- ALL links on page (not just nav menu) - comprehensive extraction
|
|
543
|
+
- Deduplicated by href
|
|
544
|
+
- Use href with navigate tool (efficient)
|
|
545
|
+
- Avoid clicking coordinates for navigation
|
|
546
|
+
|
|
547
|
+
structure: {headings: [{level, text, id}], sections: [{heading, summary}]}
|
|
548
|
+
- Page organization and hierarchy
|
|
549
|
+
|
|
550
|
+
content: {paragraphs: [strings]}
|
|
551
|
+
- Text content from various elements (p, div, li, td, span)
|
|
552
|
+
- Deduplicated, up to 100 text items
|
|
553
|
+
- Primary source for information extraction
|
|
554
|
+
|
|
555
|
+
forms: [{formId, action, fields: [{label, type, name, placeholder, required, currentValue}]}]
|
|
556
|
+
- Form structure metadata
|
|
557
|
+
|
|
558
|
+
summary: {url, title, hasIframes, hasCaptcha, scrollableHeight, viewportHeight}
|
|
559
|
+
- Page metadata
|
|
560
|
+
- hasIframes/hasCaptcha: may need take_screenshot
|
|
561
|
+
|
|
562
|
+
IMPORTANT:
|
|
563
|
+
|
|
564
|
+
analyze_dom is PRIMARY method for information extraction tasks.
|
|
565
|
+
Use navigate + analyze_dom workflow - more efficient than screenshot + click.
|
|
566
|
+
Result becomes stale after navigation - re-run on new pages.`,
|
|
567
|
+
schema: {
|
|
568
|
+
include: z.array(z.enum(['navigation', 'structure', 'content', 'forms', 'summary'])).optional().describe("Sections to return. Omit for all. Use ['navigation', 'content'] for most reading tasks.")
|
|
569
|
+
},
|
|
570
|
+
handler: analyzeDomHandler
|
|
571
|
+
},
|
|
572
|
+
|
|
573
|
+
"take_screenshot": {
|
|
574
|
+
description: `Capture viewport screenshot for visual analysis and coordinate determination.
|
|
575
|
+
|
|
576
|
+
TASK CLASSIFICATION:
|
|
577
|
+
|
|
578
|
+
Ask yourself: Is this an INTERACTION task or INFORMATION task?
|
|
579
|
+
|
|
580
|
+
INTERACTION TASKS (clicking, typing, testing):
|
|
581
|
+
→ USE: take_screenshot (get coordinates via AI vision)
|
|
582
|
+
→ Keywords: "click", "fill form", "type", "test UI", "automate interaction", "submit"
|
|
583
|
+
→ THEN: Apply PLANNING PROTOCOL before actions (see actions tool)
|
|
584
|
+
|
|
585
|
+
INFORMATION TASKS (reading, finding, exploring):
|
|
586
|
+
→ DO NOT USE: take_screenshot (expensive, unnecessary)
|
|
587
|
+
→ Keywords: "explore", "clone", "jelajahi", "find", "read", "get", "extract", "analyze"
|
|
588
|
+
→ USE INSTEAD: analyze_dom (10x faster, much cheaper)
|
|
589
|
+
|
|
590
|
+
WHEN TO USE:
|
|
591
|
+
- Need pixel coordinates for clicking/typing (analyze_dom has no coordinates)
|
|
592
|
+
- Visual-only content (calculator displays, canvas, images, charts)
|
|
593
|
+
- Iframe content (hasIframes=true - analyze_dom can't see inside)
|
|
594
|
+
- Obstacles (captcha, popups, modals)
|
|
595
|
+
- UI/visual verification (styling, layout, colors)
|
|
596
|
+
- Before/after visual comparison
|
|
597
|
+
|
|
598
|
+
WHEN TO SKIP:
|
|
599
|
+
- Website exploration/cloning (use analyze_dom + navigate only)
|
|
600
|
+
- Reading page content (analyze_dom.content is faster)
|
|
601
|
+
- Getting links for navigation (analyze_dom.navigation.links)
|
|
602
|
+
- Scrolling to "see more content" (analyze_dom already gets all DOM content)
|
|
603
|
+
- Information extraction tasks (use analyze_dom instead)
|
|
604
|
+
- Navigation (use navigate with href, not click coordinates)
|
|
605
|
+
- Already have recent screenshot and page unchanged
|
|
606
|
+
|
|
607
|
+
Cost consideration:
|
|
608
|
+
- Screenshot is EXPENSIVE (image encoding, AI vision processing)
|
|
609
|
+
- analyze_dom is CHEAP (text extraction, no image processing)
|
|
610
|
+
- Rule: If task is reading/extracting information → use analyze_dom, NOT screenshot
|
|
611
|
+
|
|
612
|
+
COMMON MISTAKES:
|
|
613
|
+
|
|
614
|
+
Mistake: Using screenshot for website exploration
|
|
615
|
+
Example: "Explore website" → take_screenshot → scroll → screenshot → scroll → screenshot
|
|
616
|
+
Fix: Use analyze_dom + navigate only (no screenshot needed)
|
|
617
|
+
|
|
618
|
+
Mistake: Screenshot to "see" text content
|
|
619
|
+
Example: Want to read page → take_screenshot
|
|
620
|
+
Fix: analyze_dom.content gives you all text directly
|
|
621
|
+
|
|
622
|
+
Mistake: Screenshot + scroll to see full page
|
|
623
|
+
Example: take_screenshot → scroll → screenshot → scroll
|
|
624
|
+
Fix: analyze_dom gets entire DOM at once, no scrolling needed
|
|
625
|
+
|
|
626
|
+
OUTPUT:
|
|
627
|
+
PNG image (base64 encoded) showing visible viewport.
|
|
628
|
+
|
|
629
|
+
WORKFLOW AFTER SCREENSHOT:
|
|
630
|
+
|
|
631
|
+
After taking screenshot for interaction tasks:
|
|
632
|
+
1. AI vision: Analyze screenshot to identify element positions
|
|
633
|
+
2. Determine pixel coordinates (x, y) for target elements
|
|
634
|
+
3. Apply PLANNING PROTOCOL (see actions tool) before executing:
|
|
635
|
+
- STATE THE GOAL
|
|
636
|
+
- PLAN THE SEQUENCE (what happens after each action)
|
|
637
|
+
- VERIFY THE LOGIC
|
|
638
|
+
- EFFICIENT ALTERNATIVES
|
|
639
|
+
- VERIFY RESULT after execution
|
|
640
|
+
4. Execute actions only after verification
|
|
641
|
+
|
|
642
|
+
Example (INCORRECT) - Website exploration:
|
|
643
|
+
|
|
644
|
+
Task: "Explore example-company.com and all pages"
|
|
645
|
+
INCORRECT approach (expensive, slow, unnecessary):
|
|
646
|
+
→ open_new_tab
|
|
647
|
+
→ take_screenshot (not needed)
|
|
648
|
+
→ scroll → take_screenshot (repeat 5 times)
|
|
649
|
+
→ navigate to /about
|
|
650
|
+
→ take_screenshot (not needed)
|
|
651
|
+
→ scroll → take_screenshot
|
|
652
|
+
Total: 10+ screenshots, very expensive!
|
|
653
|
+
|
|
654
|
+
CORRECT approach (fast, efficient):
|
|
655
|
+
→ open_new_tab
|
|
656
|
+
→ analyze_dom (get all content + links)
|
|
657
|
+
→ navigate to /about
|
|
658
|
+
→ analyze_dom (get all content)
|
|
659
|
+
→ Continue for other pages
|
|
660
|
+
Total: 0 screenshots, 10x faster!
|
|
661
|
+
|
|
662
|
+
Example (CORRECT) - Button interaction:
|
|
663
|
+
|
|
664
|
+
Task: "Click the login button"
|
|
665
|
+
→ take_screenshot (need coordinates for interaction)
|
|
666
|
+
→ AI vision: "Login button at x=1200, y=45"
|
|
667
|
+
→ PLAN: Click button → login form appears
|
|
668
|
+
→ VERIFY: Single action achieves goal
|
|
669
|
+
→ actions([{click, x:1200, y:45}])
|
|
670
|
+
|
|
671
|
+
Example (CORRECT) - Sequential calculator actions:
|
|
672
|
+
|
|
673
|
+
Task: Enter calculator formula (15 × 8) / (4 + 2)
|
|
674
|
+
→ take_screenshot (need coordinates for interaction)
|
|
675
|
+
→ AI vision: Identify all needed buttons and their positions
|
|
676
|
+
→ CHECK: Is there an input field? If yes, TYPE is more efficient!
|
|
677
|
+
→ If TYPE available:
|
|
678
|
+
PLAN: Click input → type formula → calculate
|
|
679
|
+
actions([{click x:640,y:311}, {type "(15*8)/(4+2)"}, {click x:913,y:479}])
|
|
680
|
+
→ If must use buttons:
|
|
681
|
+
PLAN each click: "(" at x:672,y:320 → "1" at x:672,y:449 → ...
|
|
682
|
+
VERIFY: Sequence produces correct formula in correct order
|
|
683
|
+
actions([full sequence with verification])
|
|
684
|
+
|
|
685
|
+
Example (CORRECT) - Form filling:
|
|
686
|
+
|
|
687
|
+
Task: "Fill login form"
|
|
688
|
+
→ take_screenshot (need coordinates for interaction)
|
|
689
|
+
→ AI vision: username x:640,y:300; password x:640,y:380; submit x:640,y:450
|
|
690
|
+
→ PLAN:
|
|
691
|
+
1. Click username → focus
|
|
692
|
+
2. Type email → entered
|
|
693
|
+
3. Click password → focus
|
|
694
|
+
4. Type password → entered
|
|
695
|
+
5. Click submit → submitted
|
|
696
|
+
→ VERIFY: Correct order, correct data, correct targets
|
|
697
|
+
→ VERIFY RESULT: After execution, confirm form was filled correctly
|
|
698
|
+
→ actions([sequence])
|
|
699
|
+
|
|
700
|
+
OBSTACLE HANDLING:
|
|
701
|
+
|
|
702
|
+
When encountering obstacles (captcha, modals, popups, challenges):
|
|
703
|
+
- DO NOT stop or ask user
|
|
704
|
+
- Attempt to solve/dismiss automatically using AI vision
|
|
705
|
+
- Continue original task after resolving
|
|
706
|
+
|
|
707
|
+
Captcha workflow:
|
|
708
|
+
1. take_screenshot → identify captcha type
|
|
709
|
+
2. PLAN solution based on type:
|
|
710
|
+
- Checkbox: click checkbox, wait, verify
|
|
711
|
+
- Image selection: identify matching images, click sequence, verify
|
|
712
|
+
- Text/math: solve, input, submit
|
|
713
|
+
3. Execute solution
|
|
714
|
+
4. take_screenshot (verify passed)
|
|
715
|
+
5. Continue task
|
|
716
|
+
|
|
717
|
+
Modal/popup workflow:
|
|
718
|
+
1. take_screenshot → identify modal/popup
|
|
719
|
+
2. Find close button or dismiss action
|
|
720
|
+
3. actions([click dismiss])
|
|
721
|
+
4. Continue task
|
|
722
|
+
|
|
723
|
+
IMPORTANT:
|
|
724
|
+
- Coordinates from AI vision are pixel positions in screenshot
|
|
725
|
+
- Can see inside iframes (unlike analyze_dom)
|
|
726
|
+
- Always apply PLANNING PROTOCOL before executing sequential actions
|
|
727
|
+
- When obstacles appear: solve automatically, never stop
|
|
728
|
+
- After actions: take new screenshot to verify or find new elements`,
|
|
729
|
+
handler: takeScreenshotHandler
|
|
730
|
+
},
|
|
731
|
+
|
|
732
|
+
"get_console_logs": {
|
|
733
|
+
description: `Retrieve browser console logs for debugging.
|
|
734
|
+
|
|
735
|
+
WHEN TO USE:
|
|
736
|
+
- Debug JavaScript errors after actions fail
|
|
737
|
+
- Monitor API calls or script execution
|
|
738
|
+
- Detect client-side errors not visible in UI
|
|
739
|
+
- Investigate unexpected behavior
|
|
740
|
+
|
|
741
|
+
OUTPUT: Formatted text, most recent first:
|
|
742
|
+
[2024-01-27 10:30:45] ERROR: Uncaught TypeError: Cannot read property 'value' of null
|
|
743
|
+
[2024-01-27 10:30:44] WARN: Deprecated API usage
|
|
744
|
+
|
|
745
|
+
Logs captured from browser console only - not backend logs. Default 20 entries, max 100. Logs persist until cleared or tab closed.`,
|
|
746
|
+
schema: {
|
|
747
|
+
limit: z.number().min(1).max(100).optional().default(20).describe("Maximum number of log entries to return (min: 1, max: 100, default: 20)")
|
|
748
|
+
},
|
|
749
|
+
handler: getConsoleLogsHandler
|
|
750
|
+
},
|
|
751
|
+
|
|
752
|
+
"clear_console_logs": {
|
|
753
|
+
description: `Clear console logs from backend storage.
|
|
754
|
+
|
|
755
|
+
WHEN TO USE:
|
|
756
|
+
- Before important actions to isolate new logs
|
|
757
|
+
- Clean slate for debugging specific operations
|
|
758
|
+
|
|
759
|
+
OUTPUT: Success confirmation.
|
|
760
|
+
|
|
761
|
+
Only clears backend storage, not browser console. Use before actions for clean debugging context.`,
|
|
762
|
+
handler: clearConsoleLogsHandler
|
|
763
|
+
},
|
|
764
|
+
|
|
765
|
+
"execute_console": {
|
|
766
|
+
description: `Execute JavaScript in browser console.
|
|
767
|
+
|
|
768
|
+
WHEN TO USE:
|
|
769
|
+
- Extract dynamic data not in DOM (computed styles, variables, runtime state)
|
|
770
|
+
- Access browser APIs (localStorage, sessionStorage)
|
|
771
|
+
- Debug page behavior
|
|
772
|
+
- Verify JavaScript context
|
|
773
|
+
|
|
774
|
+
COMMON COMMANDS:
|
|
775
|
+
- document.title
|
|
776
|
+
- window.location.href
|
|
777
|
+
- localStorage.getItem("key")
|
|
778
|
+
- document.querySelector("#id").value
|
|
779
|
+
- Array.from(document.querySelectorAll(".item")).map(el => el.textContent)
|
|
780
|
+
|
|
781
|
+
OUTPUT: Execution result as JSON string, or error message.
|
|
782
|
+
|
|
783
|
+
Has full access to page context. Return value must be JSON-serializable. Use for extraction/debugging, not complex automation.`,
|
|
784
|
+
schema: {
|
|
785
|
+
command: z.string().min(1).describe("JavaScript expression or statement to execute (e.g., 'document.title', 'window.location.href', 'localStorage.getItem(\"key\")')")
|
|
786
|
+
},
|
|
787
|
+
handler: executeConsoleHandler
|
|
788
|
+
},
|
|
789
|
+
|
|
790
|
+
}
|
|
791
|
+
});
|