ferrum-mcp 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. checksums.yaml +7 -0
  2. data/.env.example +90 -0
  3. data/CHANGELOG.md +229 -0
  4. data/CONTRIBUTING.md +469 -0
  5. data/LICENSE +21 -0
  6. data/README.md +334 -0
  7. data/SECURITY.md +286 -0
  8. data/bin/ferrum-mcp +66 -0
  9. data/bin/lint +10 -0
  10. data/bin/serve +3 -0
  11. data/bin/test +4 -0
  12. data/docs/API_REFERENCE.md +1410 -0
  13. data/docs/CONFIGURATION.md +254 -0
  14. data/docs/DEPLOYMENT.md +846 -0
  15. data/docs/DOCKER.md +836 -0
  16. data/docs/DOCKER_BOTBROWSER.md +455 -0
  17. data/docs/GETTING_STARTED.md +249 -0
  18. data/docs/TROUBLESHOOTING.md +677 -0
  19. data/lib/ferrum_mcp/browser_manager.rb +101 -0
  20. data/lib/ferrum_mcp/cli/command_handler.rb +99 -0
  21. data/lib/ferrum_mcp/cli/server_runner.rb +166 -0
  22. data/lib/ferrum_mcp/configuration.rb +229 -0
  23. data/lib/ferrum_mcp/resource_manager.rb +223 -0
  24. data/lib/ferrum_mcp/server.rb +254 -0
  25. data/lib/ferrum_mcp/session.rb +227 -0
  26. data/lib/ferrum_mcp/session_manager.rb +183 -0
  27. data/lib/ferrum_mcp/tools/accept_cookies_tool.rb +458 -0
  28. data/lib/ferrum_mcp/tools/base_tool.rb +114 -0
  29. data/lib/ferrum_mcp/tools/clear_cookies_tool.rb +66 -0
  30. data/lib/ferrum_mcp/tools/click_tool.rb +218 -0
  31. data/lib/ferrum_mcp/tools/close_session_tool.rb +49 -0
  32. data/lib/ferrum_mcp/tools/create_session_tool.rb +146 -0
  33. data/lib/ferrum_mcp/tools/drag_and_drop_tool.rb +171 -0
  34. data/lib/ferrum_mcp/tools/evaluate_js_tool.rb +46 -0
  35. data/lib/ferrum_mcp/tools/execute_script_tool.rb +48 -0
  36. data/lib/ferrum_mcp/tools/fill_form_tool.rb +78 -0
  37. data/lib/ferrum_mcp/tools/find_by_text_tool.rb +153 -0
  38. data/lib/ferrum_mcp/tools/get_attribute_tool.rb +56 -0
  39. data/lib/ferrum_mcp/tools/get_cookies_tool.rb +70 -0
  40. data/lib/ferrum_mcp/tools/get_html_tool.rb +52 -0
  41. data/lib/ferrum_mcp/tools/get_session_info_tool.rb +40 -0
  42. data/lib/ferrum_mcp/tools/get_text_tool.rb +67 -0
  43. data/lib/ferrum_mcp/tools/get_title_tool.rb +42 -0
  44. data/lib/ferrum_mcp/tools/get_url_tool.rb +39 -0
  45. data/lib/ferrum_mcp/tools/go_back_tool.rb +49 -0
  46. data/lib/ferrum_mcp/tools/go_forward_tool.rb +49 -0
  47. data/lib/ferrum_mcp/tools/hover_tool.rb +76 -0
  48. data/lib/ferrum_mcp/tools/list_sessions_tool.rb +33 -0
  49. data/lib/ferrum_mcp/tools/navigate_tool.rb +59 -0
  50. data/lib/ferrum_mcp/tools/press_key_tool.rb +91 -0
  51. data/lib/ferrum_mcp/tools/query_shadow_dom_tool.rb +225 -0
  52. data/lib/ferrum_mcp/tools/refresh_tool.rb +49 -0
  53. data/lib/ferrum_mcp/tools/screenshot_tool.rb +121 -0
  54. data/lib/ferrum_mcp/tools/session_tool.rb +37 -0
  55. data/lib/ferrum_mcp/tools/set_cookie_tool.rb +77 -0
  56. data/lib/ferrum_mcp/tools/solve_captcha_tool.rb +528 -0
  57. data/lib/ferrum_mcp/transport/http_server.rb +93 -0
  58. data/lib/ferrum_mcp/transport/rate_limiter.rb +79 -0
  59. data/lib/ferrum_mcp/transport/stdio_server.rb +63 -0
  60. data/lib/ferrum_mcp/version.rb +5 -0
  61. data/lib/ferrum_mcp/whisper_service.rb +222 -0
  62. data/lib/ferrum_mcp.rb +35 -0
  63. metadata +248 -0
@@ -0,0 +1,225 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FerrumMCP
4
+ module Tools
5
+ # Tool to interact with Shadow DOM elements
6
+ class QueryShadowDOMTool < BaseTool
7
+ def self.tool_name
8
+ 'query_shadow_dom'
9
+ end
10
+
11
+ def self.description
12
+ 'Query and interact with elements inside Shadow DOM'
13
+ end
14
+
15
+ def self.input_schema
16
+ {
17
+ type: 'object',
18
+ properties: {
19
+ host_selector: {
20
+ type: 'string',
21
+ description: 'CSS selector of the Shadow DOM host element'
22
+ },
23
+ shadow_selector: {
24
+ type: 'string',
25
+ description: 'CSS selector to find element(s) within the Shadow DOM'
26
+ },
27
+ action: {
28
+ type: 'string',
29
+ description: 'Action to perform: click, get_text, get_html, or get_attribute',
30
+ enum: %w[click get_text get_html get_attribute]
31
+ },
32
+ attribute: {
33
+ type: 'string',
34
+ description: 'Attribute name (required when action is get_attribute)'
35
+ },
36
+ multiple: {
37
+ type: 'boolean',
38
+ description: 'Return all matching elements (default: false)',
39
+ default: false
40
+ },
41
+ session_id: {
42
+ type: 'string',
43
+ description: 'Session ID to use for this operation'
44
+ }
45
+ },
46
+ required: %w[host_selector shadow_selector action session_id]
47
+ }
48
+ end
49
+
50
+ def execute(params)
51
+ host_selector = params['host_selector'] || params[:host_selector]
52
+ shadow_selector = params['shadow_selector'] || params[:shadow_selector]
53
+ action = params['action'] || params[:action]
54
+ attribute = params['attribute'] || params[:attribute]
55
+ multiple = params['multiple'] || params[:multiple] || false
56
+
57
+ logger.info "Querying Shadow DOM: #{host_selector} -> #{shadow_selector}, action: #{action}"
58
+
59
+ result = case action
60
+ when 'click'
61
+ click_in_shadow_dom(host_selector, shadow_selector)
62
+ when 'get_text'
63
+ get_text_from_shadow_dom(host_selector, shadow_selector, multiple)
64
+ when 'get_html'
65
+ get_html_from_shadow_dom(host_selector, shadow_selector, multiple)
66
+ when 'get_attribute'
67
+ raise ToolError, 'attribute parameter required for get_attribute action' unless attribute
68
+
69
+ get_attribute_from_shadow_dom(host_selector, shadow_selector, attribute, multiple)
70
+ else
71
+ raise ToolError, "Unknown action: #{action}"
72
+ end
73
+
74
+ success_response(result)
75
+ rescue StandardError => e
76
+ logger.error "Shadow DOM query failed: #{e.message}"
77
+ error_response("Failed to query Shadow DOM: #{e.message}")
78
+ end
79
+
80
+ private
81
+
82
+ def click_in_shadow_dom(host_selector, shadow_selector)
83
+ host_js = host_selector.inspect
84
+ shadow_js = shadow_selector.inspect
85
+
86
+ script = <<~JS.strip
87
+ (function() {
88
+ var host = document.querySelector(#{host_js});
89
+ if (!host || !host.shadowRoot) {
90
+ throw new Error('Shadow DOM host not found or has no shadowRoot');
91
+ }
92
+ var element = host.shadowRoot.querySelector(#{shadow_js});
93
+ if (!element) {
94
+ throw new Error('Element not found in Shadow DOM');
95
+ }
96
+ element.scrollIntoView({ behavior: 'instant', block: 'center' });
97
+ element.click();
98
+ return true;
99
+ })()
100
+ JS
101
+
102
+ browser.execute(script)
103
+ { message: "Clicked element in Shadow DOM: #{shadow_selector}" }
104
+ end
105
+
106
+ def get_text_from_shadow_dom(host_selector, shadow_selector, multiple)
107
+ host_js = host_selector.inspect
108
+ shadow_js = shadow_selector.inspect
109
+
110
+ script = if multiple
111
+ <<~JS.strip
112
+ (function() {
113
+ var host = document.querySelector(#{host_js});
114
+ if (!host || !host.shadowRoot) {
115
+ throw new Error('Shadow DOM host not found or has no shadowRoot');
116
+ }
117
+ var elements = Array.from(host.shadowRoot.querySelectorAll(#{shadow_js}));
118
+ var texts = [];
119
+ for (var i = 0; i < elements.length; i++) {
120
+ texts.push(elements[i].textContent);
121
+ }
122
+ return texts;
123
+ })()
124
+ JS
125
+ else
126
+ <<~JS.strip
127
+ (function() {
128
+ var host = document.querySelector(#{host_js});
129
+ if (!host || !host.shadowRoot) {
130
+ throw new Error('Shadow DOM host not found or has no shadowRoot');
131
+ }
132
+ var element = host.shadowRoot.querySelector(#{shadow_js});
133
+ if (!element) {
134
+ throw new Error('Element not found in Shadow DOM');
135
+ }
136
+ return element.textContent;
137
+ })()
138
+ JS
139
+ end
140
+
141
+ result = browser.evaluate(script)
142
+ multiple ? { texts: result, count: result.length } : { text: result }
143
+ end
144
+
145
+ def get_html_from_shadow_dom(host_selector, shadow_selector, multiple)
146
+ host_js = host_selector.inspect
147
+ shadow_js = shadow_selector.inspect
148
+
149
+ script = if multiple
150
+ <<~JS.strip
151
+ (function() {
152
+ var host = document.querySelector(#{host_js});
153
+ if (!host || !host.shadowRoot) {
154
+ throw new Error('Shadow DOM host not found or has no shadowRoot');
155
+ }
156
+ var elements = Array.from(host.shadowRoot.querySelectorAll(#{shadow_js}));
157
+ var htmls = [];
158
+ for (var i = 0; i < elements.length; i++) {
159
+ htmls.push(elements[i].innerHTML);
160
+ }
161
+ return htmls;
162
+ })()
163
+ JS
164
+ else
165
+ <<~JS.strip
166
+ (function() {
167
+ var host = document.querySelector(#{host_js});
168
+ if (!host || !host.shadowRoot) {
169
+ throw new Error('Shadow DOM host not found or has no shadowRoot');
170
+ }
171
+ var element = host.shadowRoot.querySelector(#{shadow_js});
172
+ if (!element) {
173
+ throw new Error('Element not found in Shadow DOM');
174
+ }
175
+ return element.innerHTML;
176
+ })()
177
+ JS
178
+ end
179
+
180
+ result = browser.evaluate(script)
181
+ multiple ? { html: result, count: result.length } : { html: result }
182
+ end
183
+
184
+ def get_attribute_from_shadow_dom(host_selector, shadow_selector, attribute, multiple)
185
+ host_js = host_selector.inspect
186
+ shadow_js = shadow_selector.inspect
187
+ attr_js = attribute.inspect
188
+
189
+ script = if multiple
190
+ <<~JS.strip
191
+ (function() {
192
+ var host = document.querySelector(#{host_js});
193
+ if (!host || !host.shadowRoot) {
194
+ throw new Error('Shadow DOM host not found or has no shadowRoot');
195
+ }
196
+ var elements = Array.from(host.shadowRoot.querySelectorAll(#{shadow_js}));
197
+ var values = [];
198
+ for (var i = 0; i < elements.length; i++) {
199
+ values.push(elements[i].getAttribute(#{attr_js}));
200
+ }
201
+ return values;
202
+ })()
203
+ JS
204
+ else
205
+ <<~JS.strip
206
+ (function() {
207
+ var host = document.querySelector(#{host_js});
208
+ if (!host || !host.shadowRoot) {
209
+ throw new Error('Shadow DOM host not found or has no shadowRoot');
210
+ }
211
+ var element = host.shadowRoot.querySelector(#{shadow_js});
212
+ if (!element) {
213
+ throw new Error('Element not found in Shadow DOM');
214
+ }
215
+ return element.getAttribute(#{attr_js});
216
+ })()
217
+ JS
218
+ end
219
+
220
+ result = browser.evaluate(script)
221
+ multiple ? { values: result, count: result.length } : { value: result }
222
+ end
223
+ end
224
+ end
225
+ end
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FerrumMCP
4
+ module Tools
5
+ # Tool to refresh the current page
6
+ class RefreshTool < BaseTool
7
+ def self.tool_name
8
+ 'refresh'
9
+ end
10
+
11
+ def self.description
12
+ 'Refresh the current page'
13
+ end
14
+
15
+ def self.input_schema
16
+ {
17
+ type: 'object',
18
+ properties: {
19
+ session_id: {
20
+ type: 'string',
21
+ description: 'Session ID to use for this operation'
22
+ }
23
+ },
24
+ required: ['session_id']
25
+ }
26
+ end
27
+
28
+ def execute(_params)
29
+ ensure_browser_active
30
+ logger.info 'Refreshing page'
31
+ browser.refresh
32
+
33
+ # Wait for network to be idle to ensure page is reloaded
34
+ browser.network.wait_for_idle(timeout: 30)
35
+
36
+ success_response(
37
+ url: browser.url,
38
+ title: browser.title
39
+ )
40
+ rescue Ferrum::TimeoutError => e
41
+ logger.error "Refresh timeout: #{e.message}"
42
+ error_response("Refresh timed out: #{e.message}")
43
+ rescue StandardError => e
44
+ logger.error "Refresh failed: #{e.message}"
45
+ error_response("Failed to refresh: #{e.message}")
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,121 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'vips'
4
+
5
+ module FerrumMCP
6
+ module Tools
7
+ # Tool to take screenshots
8
+ class ScreenshotTool < BaseTool
9
+ # Claude API has a maximum dimension of 8000 pixels per side
10
+ MAX_DIMENSION = 8000
11
+ def self.tool_name
12
+ 'screenshot'
13
+ end
14
+
15
+ def self.description
16
+ 'Take a screenshot of the page or a specific element'
17
+ end
18
+
19
+ def self.input_schema
20
+ {
21
+ type: 'object',
22
+ properties: {
23
+ selector: {
24
+ type: 'string',
25
+ description: 'Optional: CSS selector to screenshot specific element'
26
+ },
27
+ full_page: {
28
+ type: 'boolean',
29
+ description: 'Capture full scrollable page (default: false)',
30
+ default: false
31
+ },
32
+ format: {
33
+ type: 'string',
34
+ enum: %w[png jpeg],
35
+ description: 'Image format (default: png)',
36
+ default: 'png'
37
+ },
38
+ session_id: {
39
+ type: 'string',
40
+ description: 'Session ID to use for this operation'
41
+ }
42
+ },
43
+ required: ['session_id']
44
+ }
45
+ end
46
+
47
+ def execute(params)
48
+ ensure_browser_active
49
+ selector = param(params, :selector)
50
+ full_page = param(params, :full_page) || false
51
+ format = param(params, :format) || 'png'
52
+
53
+ logger.info 'Taking screenshot'
54
+
55
+ # If selector provided, verify element exists and is visible
56
+ if selector
57
+ element = find_element(selector)
58
+ element.scroll_into_view if element.respond_to?(:scroll_into_view)
59
+
60
+ # Small delay to ensure element is fully rendered
61
+ sleep 0.1
62
+ end
63
+
64
+ # Request binary encoding from Ferrum (by default it returns base64)
65
+ options = { format: format, full: full_page, encoding: :binary }
66
+
67
+ # Add selector to options if provided
68
+ options[:selector] = selector if selector
69
+
70
+ screenshot_data = browser.screenshot(**options)
71
+
72
+ # Resize if dimensions exceed Claude API limits
73
+ screenshot_data = resize_if_needed(screenshot_data, format)
74
+
75
+ # Now encode the binary data to base64 for MCP
76
+ base64_data = Base64.strict_encode64(screenshot_data)
77
+ mime_type = format == 'png' ? 'image/png' : 'image/jpeg'
78
+
79
+ # Use image_response for MCP image injection
80
+ image_response(base64_data, mime_type)
81
+ rescue StandardError => e
82
+ logger.error "Screenshot failed: #{e.message}"
83
+ error_response("Failed to take screenshot: #{e.message}")
84
+ end
85
+
86
+ private
87
+
88
+ # Resize image if any dimension exceeds MAX_DIMENSION
89
+ # @param image_data [String] Binary image data
90
+ # @param format [String] Image format ('png' or 'jpeg')
91
+ # @return [String] Resized binary image data (or original if no resize needed)
92
+ def resize_if_needed(image_data, format)
93
+ image = Vips::Image.new_from_buffer(image_data, '')
94
+ width = image.width
95
+ height = image.height
96
+
97
+ # Check if resize is needed
98
+ if width <= MAX_DIMENSION && height <= MAX_DIMENSION
99
+ logger.debug "Screenshot dimensions (#{width}x#{height}) within limits, no resize needed"
100
+ return image_data
101
+ end
102
+
103
+ # Calculate scaling factor to fit within MAX_DIMENSION
104
+ scale = [MAX_DIMENSION.to_f / width, MAX_DIMENSION.to_f / height].min
105
+ new_width = (width * scale).to_i
106
+ new_height = (height * scale).to_i
107
+
108
+ logger.info "Resizing screenshot from #{width}x#{height} to #{new_width}x#{new_height}"
109
+
110
+ # Resize image (using high quality Lanczos3 interpolation)
111
+ resized = image.thumbnail_image(new_width, height: new_height, size: :force)
112
+
113
+ # Return resized binary data in the correct format
114
+ resized.write_to_buffer(".#{format}")
115
+ rescue StandardError => e
116
+ logger.warn "Failed to resize screenshot: #{e.message}, returning original"
117
+ image_data
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FerrumMCP
4
+ module Tools
5
+ # Base class for session management tools
6
+ class SessionTool
7
+ attr_reader :session_manager, :logger
8
+
9
+ def initialize(session_manager)
10
+ @session_manager = session_manager
11
+ @logger = session_manager.logger
12
+ end
13
+
14
+ def self.tool_name
15
+ raise NotImplementedError, 'Subclasses must implement .tool_name'
16
+ end
17
+
18
+ def self.description
19
+ raise NotImplementedError, 'Subclasses must implement .description'
20
+ end
21
+
22
+ def self.input_schema
23
+ raise NotImplementedError, 'Subclasses must implement .input_schema'
24
+ end
25
+
26
+ protected
27
+
28
+ def success_response(data = {})
29
+ { success: true, data: data }
30
+ end
31
+
32
+ def error_response(message)
33
+ { success: false, error: message }
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ module FerrumMCP
4
+ module Tools
5
+ # Tool to set a cookie
6
+ class SetCookieTool < BaseTool
7
+ def self.tool_name
8
+ 'set_cookie'
9
+ end
10
+
11
+ def self.description
12
+ 'Set a cookie in the browser'
13
+ end
14
+
15
+ def self.input_schema
16
+ {
17
+ type: 'object',
18
+ properties: {
19
+ name: {
20
+ type: 'string',
21
+ description: 'Cookie name'
22
+ },
23
+ value: {
24
+ type: 'string',
25
+ description: 'Cookie value'
26
+ },
27
+ domain: {
28
+ type: 'string',
29
+ description: 'Cookie domain'
30
+ },
31
+ path: {
32
+ type: 'string',
33
+ description: 'Cookie path (default: /)',
34
+ default: '/'
35
+ },
36
+ secure: {
37
+ type: 'boolean',
38
+ description: 'Secure flag (default: false)',
39
+ default: false
40
+ },
41
+ httponly: {
42
+ type: 'boolean',
43
+ description: 'HttpOnly flag (default: false)',
44
+ default: false
45
+ },
46
+ session_id: {
47
+ type: 'string',
48
+ description: 'Session ID to use for this operation'
49
+ }
50
+ },
51
+ required: %w[name value domain session_id]
52
+ }
53
+ end
54
+
55
+ def execute(params)
56
+ ensure_browser_active
57
+
58
+ cookie = {
59
+ name: params['name'] || params[:name],
60
+ value: params['value'] || params[:value],
61
+ domain: params['domain'] || params[:domain],
62
+ path: params['path'] || params[:path] || '/',
63
+ secure: params['secure'] || params[:secure] || false,
64
+ httpOnly: params['httponly'] || params[:httponly] || false
65
+ }
66
+
67
+ logger.info "Setting cookie: #{cookie[:name]}"
68
+ browser.cookies.set(**cookie)
69
+
70
+ success_response(message: "Cookie set: #{cookie[:name]}")
71
+ rescue StandardError => e
72
+ logger.error "Set cookie failed: #{e.message}"
73
+ error_response("Failed to set cookie: #{e.message}")
74
+ end
75
+ end
76
+ end
77
+ end