ferrum-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.env.example +90 -0
  3. data/CHANGELOG.md +229 -0
  4. data/CONTRIBUTING.md +469 -0
  5. data/LICENSE +21 -0
  6. data/README.md +334 -0
  7. data/SECURITY.md +286 -0
  8. data/bin/ferrum-mcp +66 -0
  9. data/bin/lint +10 -0
  10. data/bin/serve +3 -0
  11. data/bin/test +4 -0
  12. data/docs/API_REFERENCE.md +1410 -0
  13. data/docs/CONFIGURATION.md +254 -0
  14. data/docs/DEPLOYMENT.md +846 -0
  15. data/docs/DOCKER.md +836 -0
  16. data/docs/DOCKER_BOTBROWSER.md +455 -0
  17. data/docs/GETTING_STARTED.md +249 -0
  18. data/docs/TROUBLESHOOTING.md +677 -0
  19. data/lib/ferrum_mcp/browser_manager.rb +101 -0
  20. data/lib/ferrum_mcp/cli/command_handler.rb +99 -0
  21. data/lib/ferrum_mcp/cli/server_runner.rb +166 -0
  22. data/lib/ferrum_mcp/configuration.rb +229 -0
  23. data/lib/ferrum_mcp/resource_manager.rb +223 -0
  24. data/lib/ferrum_mcp/server.rb +254 -0
  25. data/lib/ferrum_mcp/session.rb +227 -0
  26. data/lib/ferrum_mcp/session_manager.rb +183 -0
  27. data/lib/ferrum_mcp/tools/accept_cookies_tool.rb +458 -0
  28. data/lib/ferrum_mcp/tools/base_tool.rb +114 -0
  29. data/lib/ferrum_mcp/tools/clear_cookies_tool.rb +66 -0
  30. data/lib/ferrum_mcp/tools/click_tool.rb +218 -0
  31. data/lib/ferrum_mcp/tools/close_session_tool.rb +49 -0
  32. data/lib/ferrum_mcp/tools/create_session_tool.rb +146 -0
  33. data/lib/ferrum_mcp/tools/drag_and_drop_tool.rb +171 -0
  34. data/lib/ferrum_mcp/tools/evaluate_js_tool.rb +46 -0
  35. data/lib/ferrum_mcp/tools/execute_script_tool.rb +48 -0
  36. data/lib/ferrum_mcp/tools/fill_form_tool.rb +78 -0
  37. data/lib/ferrum_mcp/tools/find_by_text_tool.rb +153 -0
  38. data/lib/ferrum_mcp/tools/get_attribute_tool.rb +56 -0
  39. data/lib/ferrum_mcp/tools/get_cookies_tool.rb +70 -0
  40. data/lib/ferrum_mcp/tools/get_html_tool.rb +52 -0
  41. data/lib/ferrum_mcp/tools/get_session_info_tool.rb +40 -0
  42. data/lib/ferrum_mcp/tools/get_text_tool.rb +67 -0
  43. data/lib/ferrum_mcp/tools/get_title_tool.rb +42 -0
  44. data/lib/ferrum_mcp/tools/get_url_tool.rb +39 -0
  45. data/lib/ferrum_mcp/tools/go_back_tool.rb +49 -0
  46. data/lib/ferrum_mcp/tools/go_forward_tool.rb +49 -0
  47. data/lib/ferrum_mcp/tools/hover_tool.rb +76 -0
  48. data/lib/ferrum_mcp/tools/list_sessions_tool.rb +33 -0
  49. data/lib/ferrum_mcp/tools/navigate_tool.rb +59 -0
  50. data/lib/ferrum_mcp/tools/press_key_tool.rb +91 -0
  51. data/lib/ferrum_mcp/tools/query_shadow_dom_tool.rb +225 -0
  52. data/lib/ferrum_mcp/tools/refresh_tool.rb +49 -0
  53. data/lib/ferrum_mcp/tools/screenshot_tool.rb +121 -0
  54. data/lib/ferrum_mcp/tools/session_tool.rb +37 -0
  55. data/lib/ferrum_mcp/tools/set_cookie_tool.rb +77 -0
  56. data/lib/ferrum_mcp/tools/solve_captcha_tool.rb +528 -0
  57. data/lib/ferrum_mcp/transport/http_server.rb +93 -0
  58. data/lib/ferrum_mcp/transport/rate_limiter.rb +79 -0
  59. data/lib/ferrum_mcp/transport/stdio_server.rb +63 -0
  60. data/lib/ferrum_mcp/version.rb +5 -0
  61. data/lib/ferrum_mcp/whisper_service.rb +222 -0
  62. data/lib/ferrum_mcp.rb +35 -0
  63. metadata +248 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9053a5693d5260b569d476ac94b970edcb7557637f1191961eb3037f3b4834de
4
+ data.tar.gz: ca905498d67b1b8ee5834e532fb0478968c411dc2751d7cb4c0423a079215bda
5
+ SHA512:
6
+ metadata.gz: cc2b19c3c127d7d1efe72d35eea10743cb224e18c66ee1e0fcd4fa56f84a878ded6b9da11a10a57c3fd513e78b36484a8c2a1e2fbe70865c131aa67eb6251c66
7
+ data.tar.gz: f60da3eb3ac99efc0b65d6a1ef4b5fa58306c816cfcc3b6784fd20ca50deb06f55573f9fd2b11ced4e6ffede2037a7815bc722826d45d25398e5b978d0b5e3f4
data/.env.example ADDED
@@ -0,0 +1,90 @@
1
+ # =============================================================================
2
+ # FERRUM MCP SERVER CONFIGURATION
3
+ # =============================================================================
4
+
5
+ # Server Configuration
6
+ MCP_SERVER_HOST=0.0.0.0
7
+ MCP_SERVER_PORT=3000
8
+
9
+ # Browser Options
10
+ BROWSER_HEADLESS=false
11
+ BROWSER_TIMEOUT=60
12
+
13
+ # Session Management
14
+ MAX_CONCURRENT_SESSIONS=10
15
+
16
+ # Rate Limiting (HTTP Transport only)
17
+ RATE_LIMIT_ENABLED=true
18
+ RATE_LIMIT_MAX_REQUESTS=100
19
+ RATE_LIMIT_WINDOW=60
20
+
21
+ # Logging
22
+ LOG_LEVEL=info
23
+
24
+ # =============================================================================
25
+ # MULTI-BROWSER CONFIGURATION
26
+ # Format: BROWSER_<ID>=type:path:name:description
27
+ # Supported types: chrome, chromium, edge, brave, botbrowser
28
+ # Leave path empty to use system default
29
+ # =============================================================================
30
+
31
+ # Example: System Chrome (auto-detected)
32
+ # BROWSER_CHROME=chrome::Google Chrome:Standard Chrome browser
33
+
34
+ # Example: Custom Chrome path
35
+ # BROWSER_CHROME=chrome:/usr/bin/google-chrome:Google Chrome:Standard Chrome browser
36
+
37
+ # Example: BotBrowser
38
+ # BROWSER_BOTBROWSER=botbrowser:/opt/botbrowser/chrome:BotBrowser:Anti-detection browser
39
+
40
+ # Example: Chromium
41
+ # BROWSER_CHROMIUM=chromium:/usr/bin/chromium:Chromium:Open source Chromium
42
+
43
+ # =============================================================================
44
+ # LEGACY BROWSER CONFIGURATION (deprecated but still supported)
45
+ # =============================================================================
46
+ # BOTBROWSER_PATH=/path/to/botbrowser/chrome
47
+ # BROWSER_PATH=/path/to/chrome
48
+
49
+ # =============================================================================
50
+ # USER PROFILES CONFIGURATION
51
+ # Format: USER_PROFILE_<ID>=path:name:description
52
+ # User profiles are standard Chrome user data directories
53
+ # =============================================================================
54
+
55
+ # Example: Development profile with extensions
56
+ # USER_PROFILE_DEV=/home/user/.config/chrome-dev:Development:Dev profile with extensions
57
+
58
+ # Example: Testing profile (clean state)
59
+ # USER_PROFILE_TEST=/home/user/.config/chrome-test:Testing:Clean testing profile
60
+
61
+ # Example: Production profile
62
+ # USER_PROFILE_PROD=/home/user/.config/chrome-prod:Production:Production-like environment
63
+
64
+ # =============================================================================
65
+ # BOTBROWSER PROFILES CONFIGURATION
66
+ # Format: BOT_PROFILE_<ID>=path:name:description
67
+ # Bot profiles contain encrypted anti-detection fingerprints (.enc files)
68
+ # =============================================================================
69
+
70
+ # Example: US-based Chrome profile
71
+ # BOT_PROFILE_US=/profiles/us_chrome.enc:US Chrome:US-based Chrome fingerprint
72
+
73
+ # Example: EU-based Firefox profile
74
+ # BOT_PROFILE_EU_FF=/profiles/eu_firefox.enc:EU Firefox:EU-based Firefox fingerprint
75
+
76
+ # Example: Mobile Android profile
77
+ # BOT_PROFILE_ANDROID=/profiles/android_chrome.enc:Android Chrome:Android mobile fingerprint
78
+
79
+ # Legacy BotBrowser profile (deprecated but still supported)
80
+ # BOTBROWSER_PROFILE=/path/to/profile.enc
81
+
82
+ # =============================================================================
83
+ # RESOURCE DISCOVERY
84
+ # The server will automatically expose configured browsers and profiles via
85
+ # MCP Resources for AI agents to discover and use:
86
+ # - ferrum://browsers - List all available browsers
87
+ # - ferrum://user-profiles - List all user profiles
88
+ # - ferrum://bot-profiles - List all BotBrowser profiles
89
+ # - ferrum://capabilities - Server capabilities and features
90
+ # =============================================================================
data/CHANGELOG.md ADDED
@@ -0,0 +1,229 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ### Added
11
+ - Comprehensive documentation structure in `docs/` directory
12
+ - API reference with all 27+ tools documented
13
+ - Configuration guide for multi-browser and multi-profile setups
14
+ - Getting started guide with detailed setup instructions
15
+ - CHANGELOG.md for version tracking
16
+ - SECURITY.md with responsible disclosure policy
17
+ - CONTRIBUTING.md with contribution guidelines
18
+ - GitHub issue and PR templates
19
+ - Gemspec for RubyGems packaging
20
+ - CLI command structure with `ServerRunner` and `CommandHandler` classes
21
+ - Comprehensive help text with usage examples
22
+ - `wait_for_selector` tool for explicit element waiting
23
+ - `wait_for_text` tool for text-based waiting
24
+
25
+ ### Changed
26
+ - README.md restructured as table of contents
27
+ - Documentation reorganized into dedicated `docs/` folder
28
+ - **CLI architecture refactored** with clear separation of concerns
29
+ - Created `ServerRunner` class for server lifecycle management
30
+ - Created `CommandHandler` class for command dispatching
31
+ - Simplified `bin/ferrum-mcp` to minimal entry point (reduced from ~131 to ~67 lines)
32
+ - Removed `server.rb` to eliminate duplication
33
+ - Updated command format: `ferrum-mcp [COMMAND] [OPTIONS]` (e.g., `ferrum-mcp help`, `ferrum-mcp version`, `ferrum-mcp start`)
34
+ - Test infrastructure improved with `SessionManager` integration
35
+ - All tool tests now use `SessionManager#with_session` pattern
36
+ - Consistent session management across test suite
37
+ - Better test isolation and cleanup
38
+ - Updated `server_options_spec.rb` to match new CLI structure
39
+
40
+ ### Fixed
41
+ - BaseTool `find_element` now uses Ferrum's native wait instead of manual polling with sleep
42
+ - Navigation tools properly wait for network idle after page transitions
43
+ - XSS protection in HoverTool using proper JavaScript escaping with `inspect`
44
+ - XPath injection protection in FindByTextTool with proper quote escaping
45
+ - Stale element retry logic in ClickTool and FillFormTool
46
+ - EvaluateJSTool now properly returns JavaScript evaluation results
47
+ - BrowserManager crash detection and graceful error handling
48
+ - PressKey tool no longer duplicates characters when pressing special keys
49
+ - ClickTool supports force clicking hidden elements with `force: true` parameter
50
+ - DragAndDropTool supports both target elements and coordinates
51
+ - GetTextTool supports XPath selectors with `xpath:` prefix
52
+ - QueryShadowDOMTool for interacting with Shadow DOM elements (click, get_text, get_html, get_attribute)
53
+
54
+ ### Security
55
+ - Documented security model and trust assumptions
56
+ - Added session limit recommendations
57
+ - Implemented XSS and XPath injection protections in multiple tools
58
+
59
+ ## [0.1.0] - 2024-11-22
60
+
61
+ Initial release of FerrumMCP - Browser automation server implementing the Model Context Protocol.
62
+
63
+ ### Added
64
+
65
+ #### Core Features
66
+ - **Session-based architecture** with automatic cleanup (30min idle timeout)
67
+ - **Multi-browser support** via structured environment variables
68
+ - **Multi-profile support** for Chrome user profiles and BotBrowser fingerprints
69
+ - **MCP Resource discovery** for AI agents to introspect available configurations
70
+ - **Dual transport support** (HTTP via Puma and STDIO for Claude Desktop)
71
+ - **Thread-safe session management** with mutex-protected operations
72
+
73
+ #### Session Management Tools (4 tools)
74
+ - `create_session` - Create browser sessions with custom configuration
75
+ - `list_sessions` - List all active browser sessions
76
+ - `get_session_info` - Get detailed information about a session
77
+ - `close_session` - Manually close a browser session
78
+
79
+ #### Navigation Tools (4 tools)
80
+ - `navigate` - Navigate to URLs with network idle waiting
81
+ - `go_back` - Navigate back in browser history
82
+ - `go_forward` - Navigate forward in browser history
83
+ - `refresh` - Reload current page
84
+
85
+ #### Interaction Tools (7 tools)
86
+ - `click` - Click elements with retry logic for stale elements
87
+ - `fill_form` - Fill form fields with typing delays
88
+ - `press_key` - Simulate keyboard input (Enter, Tab, Escape, etc.)
89
+ - `hover` - Hover over elements with JavaScript fallback
90
+ - `drag_and_drop` - Drag elements with smooth animations
91
+ - `accept_cookies` - **Smart cookie banner detection and acceptance**
92
+ - 8 detection strategies (ID, class, text, ARIA, buttons, shadows, iframes, common selectors)
93
+ - Multi-language support (English, French, German, Spanish, Italian, Portuguese, Dutch, Swedish, Norwegian, Danish, Finnish)
94
+ - Customizable selectors and texts
95
+ - `solve_captcha` - **AI-powered CAPTCHA solving**
96
+ - Audio CAPTCHA support with Whisper speech recognition
97
+ - Automatic iframe detection and switching
98
+ - Model selection (tiny, base, small, medium)
99
+ - Automatic model download and caching
100
+
101
+ #### Extraction Tools (6 tools)
102
+ - `get_text` - Extract text from elements
103
+ - `get_html` - Get HTML content (full page or element)
104
+ - `screenshot` - Capture screenshots with base64 encoding and auto-resize
105
+ - `get_title` - Get current page title
106
+ - `get_url` - Get current page URL
107
+ - `find_by_text` - XPath-based text search with visibility filtering
108
+
109
+ #### Advanced Tools (9 tools)
110
+ - `execute_script` - Execute JavaScript without return value
111
+ - `evaluate_js` - Evaluate JavaScript with return value
112
+ - `get_cookies` - Get all or domain-filtered cookies
113
+ - `set_cookie` - Set cookies with all attributes (domain, path, secure, httpOnly, sameSite)
114
+ - `clear_cookies` - Clear all or domain-filtered cookies
115
+ - `get_attribute` - Get element attributes
116
+ - `query_shadow_dom` - Interact with Shadow DOM elements
117
+
118
+ #### MCP Resources (7 resources)
119
+ - `ferrum://browsers` - List all configured browsers
120
+ - `ferrum://browsers/{id}` - Browser details with usage examples
121
+ - `ferrum://user-profiles` - List Chrome user profiles
122
+ - `ferrum://user-profiles/{id}` - User profile details
123
+ - `ferrum://bot-profiles` - List BotBrowser profiles with anti-detection features
124
+ - `ferrum://bot-profiles/{id}` - Bot profile details
125
+ - `ferrum://capabilities` - Server capabilities and feature flags
126
+
127
+ #### BotBrowser Integration
128
+ - **Optional BotBrowser support** for anti-detection browser automation
129
+ - Profile encryption support (`.enc` files)
130
+ - Graceful fallback when BotBrowser not available
131
+ - Configuration via `BROWSER_BOTBROWSER` and `BOT_PROFILE_*` environment variables
132
+ - See [docs/BOTBROWSER_INTEGRATION.md](docs/BOTBROWSER_INTEGRATION.md) for details
133
+
134
+ #### Configuration System
135
+ - **Multi-browser configuration**: `BROWSER_<ID>=type:path:name:description`
136
+ - **User profile configuration**: `USER_PROFILE_<ID>=path:name:description`
137
+ - **Bot profile configuration**: `BOT_PROFILE_<ID>=path:name:description`
138
+ - Legacy configuration support (`BROWSER_PATH`, `BOTBROWSER_PATH`, `BOTBROWSER_PROFILE`)
139
+ - Environment variable validation at startup
140
+ - Configuration discovery through MCP Resources
141
+
142
+ #### Infrastructure
143
+ - **Docker support** with multi-platform builds (amd64, arm64)
144
+ - **GitHub Actions CI/CD**:
145
+ - RuboCop linting with GitHub annotations
146
+ - Zeitwerk eager loading validation
147
+ - RSpec tests on Ruby 3.2 and 3.3
148
+ - Coverage reporting (79% line, 55% branch)
149
+ - Automatic Docker image publishing
150
+ - **Comprehensive test suite** with 79% line coverage
151
+ - Unit tests for all tools
152
+ - Integration tests for multi-browser scenarios
153
+ - Test fixtures with WEBrick server
154
+ - SimpleCov coverage reporting
155
+ - **File-only logging** to `logs/ferrum_mcp.log` (STDIO transport compatible)
156
+ - **Zeitwerk autoloading** with custom inflections (MCP, HTML, URL, JS)
157
+
158
+ #### Documentation
159
+ - Comprehensive README with quick start guides
160
+ - CLAUDE.md for AI assistant development guidance
161
+ - BOTBROWSER_INTEGRATION.md for anti-detection setup
162
+ - .env.example with all configuration options
163
+ - Inline code documentation for complex logic
164
+
165
+ ### Technical Details
166
+
167
+ #### Dependencies
168
+ - Ruby 3.2+ required
169
+ - Ferrum 0.17.1 (Chrome DevTools Protocol)
170
+ - MCP 0.4.0 (Model Context Protocol)
171
+ - Puma 7.1 (HTTP server)
172
+ - Zeitwerk 2.7 (Autoloading)
173
+ - Optional: whisper-cli (CAPTCHA solving)
174
+ - Optional: BotBrowser (Anti-detection)
175
+
176
+ #### Browser Compatibility
177
+ - Google Chrome / Chromium
178
+ - Microsoft Edge
179
+ - Brave Browser
180
+ - BotBrowser (commercial, optional)
181
+
182
+ #### Supported Platforms
183
+ - macOS (development)
184
+ - Linux (Docker, CI)
185
+ - Windows (untested, should work)
186
+
187
+ ### Known Limitations
188
+
189
+ - **No authentication**: HTTP endpoint is open, intended for trusted environments
190
+ - **No rate limiting**: Session creation and tool execution not rate-limited
191
+ - **Branch coverage**: 55% (line coverage: 79%)
192
+
193
+ ### Breaking Changes
194
+
195
+ This is the initial release, but note for future versions:
196
+
197
+ - **Session-based architecture is required**: All browser operations require an explicit `session_id`
198
+ - Pre-session architecture is fully deprecated
199
+ - `start_browser` and `stop_browser` methods raise `NotImplementedError`
200
+
201
+ ### Security Notes
202
+
203
+ - **Trusted environment assumption**: No authentication on HTTP endpoint
204
+ - **Docker runs as root**: Use `--security-opt seccomp=unconfined` if needed
205
+ - **Arbitrary JavaScript execution**: `execute_script` and `evaluate_js` allow arbitrary code
206
+ - **File system access**: Screenshots and downloads have filesystem access
207
+ - **No sandboxing** beyond Chrome's built-in sandbox
208
+ - See [SECURITY.md](SECURITY.md) for security policy and responsible disclosure
209
+
210
+ ### Credits
211
+
212
+ - Built with [Ferrum](https://github.com/rubycdp/ferrum) - Ruby Chrome DevTools Protocol
213
+ - Implements [Model Context Protocol](https://github.com/anthropics/mcp) by Anthropic
214
+ - Whisper integration via [whisper.cpp](https://github.com/ggerganov/whisper.cpp)
215
+ - BotBrowser by [BotBrowser.com](https://botbrowser.com) (optional)
216
+
217
+ ### Contributors
218
+
219
+ - [@Eth3rnit3](https://github.com/Eth3rnit3) - Creator and maintainer
220
+
221
+ ---
222
+
223
+ ## Release Links
224
+
225
+ - [0.1.0] - Initial release (2024-11-22)
226
+ - [Unreleased] - Current development
227
+
228
+ [Unreleased]: https://github.com/Eth3rnit3/FerrumMCP/compare/v0.1.0...HEAD
229
+ [0.1.0]: https://github.com/Eth3rnit3/FerrumMCP/releases/tag/v0.1.0