ferrum-mcp 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.env.example +90 -0
- data/CHANGELOG.md +229 -0
- data/CONTRIBUTING.md +469 -0
- data/LICENSE +21 -0
- data/README.md +334 -0
- data/SECURITY.md +286 -0
- data/bin/ferrum-mcp +66 -0
- data/bin/lint +10 -0
- data/bin/serve +3 -0
- data/bin/test +4 -0
- data/docs/API_REFERENCE.md +1410 -0
- data/docs/CONFIGURATION.md +254 -0
- data/docs/DEPLOYMENT.md +846 -0
- data/docs/DOCKER.md +836 -0
- data/docs/DOCKER_BOTBROWSER.md +455 -0
- data/docs/GETTING_STARTED.md +249 -0
- data/docs/TROUBLESHOOTING.md +677 -0
- data/lib/ferrum_mcp/browser_manager.rb +101 -0
- data/lib/ferrum_mcp/cli/command_handler.rb +99 -0
- data/lib/ferrum_mcp/cli/server_runner.rb +166 -0
- data/lib/ferrum_mcp/configuration.rb +229 -0
- data/lib/ferrum_mcp/resource_manager.rb +223 -0
- data/lib/ferrum_mcp/server.rb +254 -0
- data/lib/ferrum_mcp/session.rb +227 -0
- data/lib/ferrum_mcp/session_manager.rb +183 -0
- data/lib/ferrum_mcp/tools/accept_cookies_tool.rb +458 -0
- data/lib/ferrum_mcp/tools/base_tool.rb +114 -0
- data/lib/ferrum_mcp/tools/clear_cookies_tool.rb +66 -0
- data/lib/ferrum_mcp/tools/click_tool.rb +218 -0
- data/lib/ferrum_mcp/tools/close_session_tool.rb +49 -0
- data/lib/ferrum_mcp/tools/create_session_tool.rb +146 -0
- data/lib/ferrum_mcp/tools/drag_and_drop_tool.rb +171 -0
- data/lib/ferrum_mcp/tools/evaluate_js_tool.rb +46 -0
- data/lib/ferrum_mcp/tools/execute_script_tool.rb +48 -0
- data/lib/ferrum_mcp/tools/fill_form_tool.rb +78 -0
- data/lib/ferrum_mcp/tools/find_by_text_tool.rb +153 -0
- data/lib/ferrum_mcp/tools/get_attribute_tool.rb +56 -0
- data/lib/ferrum_mcp/tools/get_cookies_tool.rb +70 -0
- data/lib/ferrum_mcp/tools/get_html_tool.rb +52 -0
- data/lib/ferrum_mcp/tools/get_session_info_tool.rb +40 -0
- data/lib/ferrum_mcp/tools/get_text_tool.rb +67 -0
- data/lib/ferrum_mcp/tools/get_title_tool.rb +42 -0
- data/lib/ferrum_mcp/tools/get_url_tool.rb +39 -0
- data/lib/ferrum_mcp/tools/go_back_tool.rb +49 -0
- data/lib/ferrum_mcp/tools/go_forward_tool.rb +49 -0
- data/lib/ferrum_mcp/tools/hover_tool.rb +76 -0
- data/lib/ferrum_mcp/tools/list_sessions_tool.rb +33 -0
- data/lib/ferrum_mcp/tools/navigate_tool.rb +59 -0
- data/lib/ferrum_mcp/tools/press_key_tool.rb +91 -0
- data/lib/ferrum_mcp/tools/query_shadow_dom_tool.rb +225 -0
- data/lib/ferrum_mcp/tools/refresh_tool.rb +49 -0
- data/lib/ferrum_mcp/tools/screenshot_tool.rb +121 -0
- data/lib/ferrum_mcp/tools/session_tool.rb +37 -0
- data/lib/ferrum_mcp/tools/set_cookie_tool.rb +77 -0
- data/lib/ferrum_mcp/tools/solve_captcha_tool.rb +528 -0
- data/lib/ferrum_mcp/transport/http_server.rb +93 -0
- data/lib/ferrum_mcp/transport/rate_limiter.rb +79 -0
- data/lib/ferrum_mcp/transport/stdio_server.rb +63 -0
- data/lib/ferrum_mcp/version.rb +5 -0
- data/lib/ferrum_mcp/whisper_service.rb +222 -0
- data/lib/ferrum_mcp.rb +35 -0
- metadata +248 -0
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FerrumMCP
|
|
4
|
+
# Manages multiple browser sessions with thread-safety and automatic cleanup
|
|
5
|
+
class SessionManager
|
|
6
|
+
attr_reader :config, :logger
|
|
7
|
+
|
|
8
|
+
# Default session timeout: 30 minutes
|
|
9
|
+
DEFAULT_SESSION_TIMEOUT = 30 * 60
|
|
10
|
+
# Default cleanup interval: 5 minutes
|
|
11
|
+
DEFAULT_CLEANUP_INTERVAL = 5 * 60
|
|
12
|
+
|
|
13
|
+
def initialize(config)
|
|
14
|
+
@config = config
|
|
15
|
+
@logger = config.logger
|
|
16
|
+
@sessions = {}
|
|
17
|
+
@mutex = Mutex.new
|
|
18
|
+
@session_timeout = DEFAULT_SESSION_TIMEOUT
|
|
19
|
+
@cleanup_thread = nil
|
|
20
|
+
|
|
21
|
+
start_cleanup_thread
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Create a new session with custom options
|
|
25
|
+
# @param options [Hash] Browser options for this session
|
|
26
|
+
# @return [String] Session ID
|
|
27
|
+
# @raise [SessionError] If max concurrent sessions limit is reached
|
|
28
|
+
def create_session(options = {})
|
|
29
|
+
@mutex.synchronize do
|
|
30
|
+
# Check session limit
|
|
31
|
+
if @sessions.size >= @config.max_sessions
|
|
32
|
+
logger.warn "Session limit reached: #{@sessions.size}/#{@config.max_sessions}"
|
|
33
|
+
raise SessionError, "Maximum concurrent sessions limit reached (#{@config.max_sessions}). " \
|
|
34
|
+
'Please close some sessions before creating new ones.'
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
session = Session.new(config: @config, options: options)
|
|
38
|
+
@sessions[session.id] = session
|
|
39
|
+
logger.info "Created session #{session.id} (#{session.browser_type}) - " \
|
|
40
|
+
"Active sessions: #{@sessions.size}/#{@config.max_sessions}"
|
|
41
|
+
session.id
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Get a session by ID
|
|
46
|
+
# @param session_id [String] Session ID (required)
|
|
47
|
+
# @return [Session, nil]
|
|
48
|
+
def get_session(session_id)
|
|
49
|
+
raise ArgumentError, 'session_id is required' if session_id.nil? || session_id.empty?
|
|
50
|
+
|
|
51
|
+
@mutex.synchronize do
|
|
52
|
+
session = @sessions[session_id]
|
|
53
|
+
unless session
|
|
54
|
+
logger.warn "Session not found: #{session_id}"
|
|
55
|
+
return nil
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
session
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Close a specific session
|
|
63
|
+
# @param session_id [String] Session ID
|
|
64
|
+
# @return [Boolean] Success
|
|
65
|
+
def close_session(session_id)
|
|
66
|
+
@mutex.synchronize do
|
|
67
|
+
session = @sessions[session_id]
|
|
68
|
+
return false unless session
|
|
69
|
+
|
|
70
|
+
logger.info "Closing session #{session_id}"
|
|
71
|
+
session.stop
|
|
72
|
+
@sessions.delete(session_id)
|
|
73
|
+
|
|
74
|
+
true
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
# List all active sessions
|
|
79
|
+
# @return [Array<Hash>] Session information
|
|
80
|
+
def list_sessions
|
|
81
|
+
@mutex.synchronize do
|
|
82
|
+
@sessions.values.map(&:info)
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Get session count
|
|
87
|
+
# @return [Integer]
|
|
88
|
+
def session_count
|
|
89
|
+
@mutex.synchronize { @sessions.size }
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
# Close all sessions
|
|
93
|
+
def close_all_sessions
|
|
94
|
+
@mutex.synchronize do
|
|
95
|
+
logger.info "Closing all #{@sessions.size} sessions"
|
|
96
|
+
@sessions.each_value(&:stop)
|
|
97
|
+
@sessions.clear
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Execute a block with a session (thread-safe)
|
|
102
|
+
# @param session_id [String] Session ID (required)
|
|
103
|
+
# @yield [BrowserManager] Browser manager for the session
|
|
104
|
+
def with_session(session_id, &)
|
|
105
|
+
raise ArgumentError, 'session_id is required' if session_id.nil? || session_id.empty?
|
|
106
|
+
|
|
107
|
+
session = get_session(session_id)
|
|
108
|
+
raise SessionError, "Session not found: #{session_id}" unless session
|
|
109
|
+
|
|
110
|
+
# Start browser if not active
|
|
111
|
+
session.start unless session.active?
|
|
112
|
+
|
|
113
|
+
# Execute with thread-safe access
|
|
114
|
+
session.with_browser(&)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Set session timeout (in seconds)
|
|
118
|
+
# @param timeout [Integer] Timeout in seconds
|
|
119
|
+
def session_timeout=(timeout)
|
|
120
|
+
@mutex.synchronize do
|
|
121
|
+
@session_timeout = timeout
|
|
122
|
+
logger.info "Session timeout set to #{timeout} seconds"
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
# Stop cleanup thread and close all sessions
|
|
127
|
+
def shutdown
|
|
128
|
+
logger.info 'Shutting down SessionManager'
|
|
129
|
+
stop_cleanup_thread
|
|
130
|
+
close_all_sessions
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
private
|
|
134
|
+
|
|
135
|
+
# Start background thread for cleaning up idle sessions
|
|
136
|
+
def start_cleanup_thread
|
|
137
|
+
return if @cleanup_thread&.alive?
|
|
138
|
+
|
|
139
|
+
@cleanup_thread = Thread.new do
|
|
140
|
+
loop do
|
|
141
|
+
sleep DEFAULT_CLEANUP_INTERVAL
|
|
142
|
+
cleanup_idle_sessions
|
|
143
|
+
rescue StandardError => e
|
|
144
|
+
logger.error "Cleanup thread error: #{e.message}"
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
@cleanup_thread.priority = -1 # Lower priority
|
|
149
|
+
logger.debug 'Started session cleanup thread'
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
# Stop cleanup thread
|
|
153
|
+
def stop_cleanup_thread
|
|
154
|
+
return unless @cleanup_thread
|
|
155
|
+
|
|
156
|
+
@cleanup_thread.kill
|
|
157
|
+
@cleanup_thread.join(5) # Wait max 5 seconds
|
|
158
|
+
@cleanup_thread = nil
|
|
159
|
+
logger.debug 'Stopped session cleanup thread'
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
# Clean up idle sessions
|
|
163
|
+
def cleanup_idle_sessions
|
|
164
|
+
idle_sessions = []
|
|
165
|
+
|
|
166
|
+
@mutex.synchronize do
|
|
167
|
+
@sessions.each do |id, session|
|
|
168
|
+
idle_sessions << id if session.idle?(@session_timeout)
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
idle_sessions.each do |id|
|
|
173
|
+
logger.info "Cleaning up idle session #{id}"
|
|
174
|
+
close_session(id)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
logger.debug "Cleaned up #{idle_sessions.size} idle sessions" if idle_sessions.any?
|
|
178
|
+
end
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Custom error for session-related issues
|
|
182
|
+
class SessionError < StandardError; end
|
|
183
|
+
end
|
|
@@ -0,0 +1,458 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module FerrumMCP
|
|
4
|
+
module Tools
|
|
5
|
+
# Tool to automatically accept cookie consent banners
|
|
6
|
+
# Uses multiple strategies to find and click accept buttons
|
|
7
|
+
# rubocop:disable Metrics/ClassLength
|
|
8
|
+
class AcceptCookiesTool < BaseTool
|
|
9
|
+
def self.tool_name
|
|
10
|
+
'accept_cookies'
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def self.description
|
|
14
|
+
'Automatically detect and accept cookie consent banners using multiple detection strategies'
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def self.input_schema
|
|
18
|
+
{
|
|
19
|
+
type: 'object',
|
|
20
|
+
properties: {
|
|
21
|
+
wait: {
|
|
22
|
+
type: 'number',
|
|
23
|
+
description: 'Seconds to wait for cookie banner to appear (default: 3)',
|
|
24
|
+
default: 3
|
|
25
|
+
},
|
|
26
|
+
session_id: {
|
|
27
|
+
type: 'string',
|
|
28
|
+
description: 'Session ID to use for this operation'
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
required: %w[session_id]
|
|
32
|
+
}
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Common text patterns for cookie accept buttons (multiple languages)
|
|
36
|
+
# Patterns are ordered from most specific to least specific to avoid false positives
|
|
37
|
+
ACCEPT_PATTERNS = [
|
|
38
|
+
# English
|
|
39
|
+
'accept all cookies', 'accept all', 'accept cookies', 'accept and continue',
|
|
40
|
+
'allow all', 'allow cookies', 'agree and continue', 'i accept', 'agree', 'consent',
|
|
41
|
+
'i agree', 'got it', 'continue',
|
|
42
|
+
# French
|
|
43
|
+
'accepter et continuer', 'tout accepter', 'accepter tout', 'accepter les cookies',
|
|
44
|
+
'accepter', 'j\'accepte', 'd\'accord', 'autoriser tout', 'autoriser', 'consentir',
|
|
45
|
+
# German
|
|
46
|
+
'alle akzeptieren', 'akzeptieren', 'zustimmen', 'einverstanden',
|
|
47
|
+
# Spanish
|
|
48
|
+
'aceptar todas', 'aceptar todo', 'aceptar', 'de acuerdo', 'acepto',
|
|
49
|
+
# Italian
|
|
50
|
+
'accetta tutto', 'accetta', 'accetto', 'acconsento',
|
|
51
|
+
# Portuguese
|
|
52
|
+
'aceitar tudo', 'aceitar', 'aceito', 'concordo'
|
|
53
|
+
].freeze
|
|
54
|
+
|
|
55
|
+
# Common reject patterns to avoid clicking
|
|
56
|
+
REJECT_PATTERNS = %w[
|
|
57
|
+
reject refuse decline deny refuser ablehnen
|
|
58
|
+
rechazar rifiuta recusar customize personaliser
|
|
59
|
+
settings options manage gérer
|
|
60
|
+
].freeze
|
|
61
|
+
|
|
62
|
+
def execute(params)
|
|
63
|
+
ensure_browser_active
|
|
64
|
+
wait_time = param(params, :wait) || 3
|
|
65
|
+
|
|
66
|
+
logger.info 'Attempting to accept cookies using multiple strategies...'
|
|
67
|
+
|
|
68
|
+
# Wait a bit for cookie banner to appear
|
|
69
|
+
sleep wait_time
|
|
70
|
+
|
|
71
|
+
# Try different strategies in order of reliability (most reliable first)
|
|
72
|
+
# 1. Frameworks are most specific and reliable (no false positives)
|
|
73
|
+
# 2. Iframes often contain cookie banners from known frameworks
|
|
74
|
+
# 3. Text-based is generic but works across many sites
|
|
75
|
+
# 4. CSS selectors are least specific (higher risk of false positives)
|
|
76
|
+
strategies = [
|
|
77
|
+
method(:try_common_frameworks), # Most reliable: known frameworks
|
|
78
|
+
method(:try_iframe_detection), # Check iframes (Sourcepoint, OneTrust, etc.)
|
|
79
|
+
method(:try_text_based_detection), # Generic text patterns
|
|
80
|
+
method(:try_css_selectors) # Least specific: generic CSS
|
|
81
|
+
]
|
|
82
|
+
|
|
83
|
+
strategies.each_with_index do |strategy, index|
|
|
84
|
+
logger.debug "Trying strategy #{index + 1}/#{strategies.length}: #{strategy.name}"
|
|
85
|
+
|
|
86
|
+
result = strategy.call
|
|
87
|
+
if result[:found]
|
|
88
|
+
return success_response(
|
|
89
|
+
message: 'Cookie consent accepted successfully',
|
|
90
|
+
strategy: strategy.name.to_s.gsub('try_', ''),
|
|
91
|
+
selector: result[:selector]
|
|
92
|
+
)
|
|
93
|
+
end
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
error_response('No cookie consent banner found or unable to accept')
|
|
97
|
+
rescue StandardError => e
|
|
98
|
+
logger.error "Accept cookies failed: #{e.message}"
|
|
99
|
+
error_response("Failed to accept cookies: #{e.message}")
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
private
|
|
103
|
+
|
|
104
|
+
# Strategy 1: Try known cookie consent frameworks
|
|
105
|
+
def try_common_frameworks
|
|
106
|
+
logger.debug 'Trying common frameworks detection...'
|
|
107
|
+
|
|
108
|
+
# OneTrust
|
|
109
|
+
selectors = [
|
|
110
|
+
'#onetrust-accept-btn-handler',
|
|
111
|
+
'.onetrust-close-btn-handler',
|
|
112
|
+
'#accept-recommended-btn-handler',
|
|
113
|
+
|
|
114
|
+
# Cookiebot
|
|
115
|
+
'#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll',
|
|
116
|
+
'#CybotCookiebotDialogBodyButtonAccept',
|
|
117
|
+
'.CybotCookiebotDialogBodyButton',
|
|
118
|
+
|
|
119
|
+
# Cookie Notice
|
|
120
|
+
'#cookie-notice-accept',
|
|
121
|
+
'.cookie-notice-accept-button',
|
|
122
|
+
|
|
123
|
+
# Osano
|
|
124
|
+
'.osano-cm-accept-all',
|
|
125
|
+
'.osano-cm-dialog__close',
|
|
126
|
+
|
|
127
|
+
# Quantcast
|
|
128
|
+
'.qc-cmp2-summary-buttons > button[mode="primary"]',
|
|
129
|
+
'button[aria-label="AGREE"]',
|
|
130
|
+
|
|
131
|
+
# TrustArc
|
|
132
|
+
'#truste-consent-button',
|
|
133
|
+
'.truste-button1',
|
|
134
|
+
|
|
135
|
+
# Termly
|
|
136
|
+
'#consent-accept-all',
|
|
137
|
+
'.consent-accept-all-button',
|
|
138
|
+
|
|
139
|
+
# Didomi
|
|
140
|
+
'#didomi-notice-agree-button',
|
|
141
|
+
'.didomi-continue-without-agreeing',
|
|
142
|
+
|
|
143
|
+
# Sourcepoint
|
|
144
|
+
'button.sp_choice_type_11', # Accept all
|
|
145
|
+
'button[title="Accept all"]',
|
|
146
|
+
'button[aria-label="Accept all"]',
|
|
147
|
+
'.message-button.btn-primary',
|
|
148
|
+
'button.message-component.sp_choice_type_11'
|
|
149
|
+
]
|
|
150
|
+
|
|
151
|
+
try_selectors(selectors)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Strategy 2: Text-based detection with XPath
|
|
155
|
+
def try_text_based_detection
|
|
156
|
+
logger.debug 'Trying text-based detection...'
|
|
157
|
+
|
|
158
|
+
ACCEPT_PATTERNS.each do |pattern|
|
|
159
|
+
# Try buttons first
|
|
160
|
+
elements = find_elements_by_text(pattern, tag: 'button')
|
|
161
|
+
next if elements.empty?
|
|
162
|
+
|
|
163
|
+
# Filter: must be visible AND not a reject button AND text should actually contain the pattern
|
|
164
|
+
accept_button = elements.find do |el|
|
|
165
|
+
next unless element_visible?(el)
|
|
166
|
+
|
|
167
|
+
text = el.text.downcase.strip
|
|
168
|
+
# Verify the pattern is actually in the text (not just a substring match)
|
|
169
|
+
next unless text.include?(pattern.downcase)
|
|
170
|
+
|
|
171
|
+
# Make sure it's not a reject button
|
|
172
|
+
REJECT_PATTERNS.none? { |reject| text.include?(reject) }
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
next unless accept_button
|
|
176
|
+
|
|
177
|
+
if click_element(accept_button)
|
|
178
|
+
xpath = build_xpath_for_text(pattern, 'button')
|
|
179
|
+
return { found: true, selector: "xpath:#{xpath}" }
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Try links if buttons didn't work
|
|
183
|
+
elements = find_elements_by_text(pattern, tag: 'a')
|
|
184
|
+
next if elements.empty?
|
|
185
|
+
|
|
186
|
+
accept_link = elements.find do |el|
|
|
187
|
+
next unless element_visible?(el)
|
|
188
|
+
|
|
189
|
+
text = el.text.downcase.strip
|
|
190
|
+
next unless text.include?(pattern.downcase)
|
|
191
|
+
|
|
192
|
+
REJECT_PATTERNS.none? { |reject| text.include?(reject) }
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
next unless accept_link
|
|
196
|
+
|
|
197
|
+
if click_element(accept_link)
|
|
198
|
+
xpath = build_xpath_for_text(pattern, 'a')
|
|
199
|
+
return { found: true, selector: "xpath:#{xpath}" }
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
{ found: false }
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
# Strategy 3: Common CSS selectors and classes
|
|
207
|
+
def try_css_selectors
|
|
208
|
+
logger.debug 'Trying CSS selectors detection...'
|
|
209
|
+
|
|
210
|
+
selectors = [
|
|
211
|
+
# Generic accept buttons
|
|
212
|
+
'button[class*="accept"]',
|
|
213
|
+
'button[class*="consent"]',
|
|
214
|
+
'button[class*="agree"]',
|
|
215
|
+
'a[class*="accept"]',
|
|
216
|
+
'a[class*="consent"]',
|
|
217
|
+
|
|
218
|
+
# Common IDs
|
|
219
|
+
'#accept-cookies',
|
|
220
|
+
'#acceptCookies',
|
|
221
|
+
'#cookie-accept',
|
|
222
|
+
'#cookieAccept',
|
|
223
|
+
'#cookies-accept',
|
|
224
|
+
|
|
225
|
+
# Common classes
|
|
226
|
+
'.accept-cookies',
|
|
227
|
+
'.accept-all',
|
|
228
|
+
'.cookie-accept',
|
|
229
|
+
'.cookies-accept',
|
|
230
|
+
'.consent-accept',
|
|
231
|
+
'.btn-accept',
|
|
232
|
+
|
|
233
|
+
# Data attributes
|
|
234
|
+
'[data-action="accept"]',
|
|
235
|
+
'[data-cookie="accept"]',
|
|
236
|
+
'[data-consent="accept"]',
|
|
237
|
+
'[data-cookie-consent="accept"]'
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
try_selectors(selectors)
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Strategy 4: ARIA labels
|
|
244
|
+
def try_aria_labels
|
|
245
|
+
logger.debug 'Trying ARIA labels detection...'
|
|
246
|
+
|
|
247
|
+
ACCEPT_PATTERNS.each do |pattern|
|
|
248
|
+
selectors = [
|
|
249
|
+
"button[aria-label*=\"#{pattern}\" i]",
|
|
250
|
+
"a[aria-label*=\"#{pattern}\" i]"
|
|
251
|
+
]
|
|
252
|
+
|
|
253
|
+
result = try_selectors(selectors)
|
|
254
|
+
return result if result[:found]
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
{ found: false }
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Strategy 5: Check iframes for cookie banners
|
|
261
|
+
def try_iframe_detection
|
|
262
|
+
logger.debug 'Trying iframe detection...'
|
|
263
|
+
|
|
264
|
+
# Get all iframes
|
|
265
|
+
iframes = browser.css('iframe')
|
|
266
|
+
return { found: false } if iframes.empty?
|
|
267
|
+
|
|
268
|
+
logger.debug "Found #{iframes.length} iframe(s), checking for cookie banners..."
|
|
269
|
+
|
|
270
|
+
# Get frames (includes main frame + all iframes)
|
|
271
|
+
frames = browser.frames
|
|
272
|
+
return { found: false } if frames.empty?
|
|
273
|
+
|
|
274
|
+
# Skip the main frame (index 0), only check iframes
|
|
275
|
+
frames[1..].each_with_index do |frame, index|
|
|
276
|
+
next unless frame
|
|
277
|
+
|
|
278
|
+
logger.debug "Checking iframe #{index + 1}: #{frame.url}"
|
|
279
|
+
|
|
280
|
+
# Try strategies within iframe using frame.at_css() directly
|
|
281
|
+
result = try_iframe_frameworks(frame)
|
|
282
|
+
return { found: true, selector: "iframe[#{index}] > #{result[:selector]}" } if result[:found]
|
|
283
|
+
|
|
284
|
+
result = try_iframe_text_detection(frame)
|
|
285
|
+
return { found: true, selector: "iframe[#{index}] > #{result[:selector]}" } if result[:found]
|
|
286
|
+
|
|
287
|
+
result = try_iframe_css_selectors(frame)
|
|
288
|
+
return { found: true, selector: "iframe[#{index}] > #{result[:selector]}" } if result[:found]
|
|
289
|
+
rescue StandardError => e
|
|
290
|
+
logger.debug "Cannot access iframe #{index + 1}: #{e.message}"
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
{ found: false }
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Try common frameworks within an iframe
|
|
297
|
+
def try_iframe_frameworks(frame)
|
|
298
|
+
# Same selectors as try_common_frameworks
|
|
299
|
+
selectors = [
|
|
300
|
+
# OneTrust
|
|
301
|
+
'#onetrust-accept-btn-handler',
|
|
302
|
+
'.onetrust-close-btn-handler',
|
|
303
|
+
|
|
304
|
+
# Cookiebot
|
|
305
|
+
'#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll',
|
|
306
|
+
|
|
307
|
+
# Sourcepoint (most important for Guardian)
|
|
308
|
+
'button.sp_choice_type_11',
|
|
309
|
+
'button[title="Accept all"]',
|
|
310
|
+
'button[aria-label="Accept all"]',
|
|
311
|
+
|
|
312
|
+
# Didomi
|
|
313
|
+
'#didomi-notice-agree-button'
|
|
314
|
+
]
|
|
315
|
+
|
|
316
|
+
selectors.each do |selector|
|
|
317
|
+
element = frame.at_css(selector)
|
|
318
|
+
next unless element
|
|
319
|
+
|
|
320
|
+
if click_element(element)
|
|
321
|
+
logger.info "Successfully clicked in iframe: #{selector}"
|
|
322
|
+
return { found: true, selector: selector }
|
|
323
|
+
end
|
|
324
|
+
rescue StandardError => e
|
|
325
|
+
logger.debug "Iframe selector '#{selector}' failed: #{e.message}"
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
{ found: false }
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# Try text-based detection within an iframe
|
|
332
|
+
def try_iframe_text_detection(frame)
|
|
333
|
+
patterns = ['accept all', 'accept and continue', 'accepter et continuer']
|
|
334
|
+
|
|
335
|
+
patterns.each do |pattern|
|
|
336
|
+
elements = frame.css('button')
|
|
337
|
+
button = elements.find do |el|
|
|
338
|
+
text = begin
|
|
339
|
+
el.text.downcase.strip
|
|
340
|
+
rescue StandardError
|
|
341
|
+
''
|
|
342
|
+
end
|
|
343
|
+
text.include?(pattern)
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
return { found: true, selector: "button:contains('#{pattern}')" } if button && click_element(button)
|
|
347
|
+
rescue StandardError => e
|
|
348
|
+
logger.debug "Iframe text pattern '#{pattern}' failed: #{e.message}"
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
{ found: false }
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
# Try CSS selectors within an iframe
|
|
355
|
+
def try_iframe_css_selectors(frame)
|
|
356
|
+
selectors = [
|
|
357
|
+
'button[class*="accept"]',
|
|
358
|
+
'button[class*="consent"]',
|
|
359
|
+
'.accept-cookies'
|
|
360
|
+
]
|
|
361
|
+
|
|
362
|
+
selectors.each do |selector|
|
|
363
|
+
element = frame.at_css(selector)
|
|
364
|
+
next unless element
|
|
365
|
+
|
|
366
|
+
return { found: true, selector: selector } if click_element(element)
|
|
367
|
+
rescue StandardError => e
|
|
368
|
+
logger.debug "Iframe CSS selector '#{selector}' failed: #{e.message}"
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
{ found: false }
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
# Helper: Try multiple CSS selectors
|
|
375
|
+
def try_selectors(selectors)
|
|
376
|
+
selectors.each do |selector|
|
|
377
|
+
element = browser.at_css(selector)
|
|
378
|
+
next unless element
|
|
379
|
+
|
|
380
|
+
# Check if element is visible
|
|
381
|
+
next unless element_visible?(element)
|
|
382
|
+
|
|
383
|
+
# Check if it's not a reject button
|
|
384
|
+
text = begin
|
|
385
|
+
element.text.downcase.strip
|
|
386
|
+
rescue StandardError
|
|
387
|
+
''
|
|
388
|
+
end
|
|
389
|
+
next if REJECT_PATTERNS.any? { |reject| text.include?(reject) }
|
|
390
|
+
|
|
391
|
+
if click_element(element)
|
|
392
|
+
logger.info "Successfully clicked: #{selector}"
|
|
393
|
+
return { found: true, selector: selector }
|
|
394
|
+
end
|
|
395
|
+
rescue StandardError => e
|
|
396
|
+
logger.debug "Selector '#{selector}' failed: #{e.message}"
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
{ found: false }
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
# Helper: Find elements by text content
|
|
403
|
+
def find_elements_by_text(text, tag: '*')
|
|
404
|
+
escaped_text = escape_xpath_string(text)
|
|
405
|
+
xpath = "//#{tag}[contains(translate(normalize-space(.), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " \
|
|
406
|
+
"'abcdefghijklmnopqrstuvwxyz'), #{escaped_text})]"
|
|
407
|
+
|
|
408
|
+
browser.xpath(xpath)
|
|
409
|
+
rescue StandardError => e
|
|
410
|
+
logger.debug "XPath search for '#{text}' failed: #{e.message}"
|
|
411
|
+
[]
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
# Helper: Build XPath for text
|
|
415
|
+
def build_xpath_for_text(text, tag)
|
|
416
|
+
escaped_text = escape_xpath_string(text)
|
|
417
|
+
"//#{tag}[contains(translate(normalize-space(.), 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " \
|
|
418
|
+
"'abcdefghijklmnopqrstuvwxyz'), #{escaped_text})]"
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
# Helper: Escape XPath string
|
|
422
|
+
def escape_xpath_string(text)
|
|
423
|
+
return "'#{text.downcase}'" unless text.include?("'")
|
|
424
|
+
|
|
425
|
+
parts = text.downcase.split("'")
|
|
426
|
+
quoted_parts = parts.map { |part| "'#{part}'" }
|
|
427
|
+
"concat(#{quoted_parts.join(", \"'\", ")})"
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
# Helper: Click element with retry and fallback to JavaScript
|
|
431
|
+
def click_element(element)
|
|
432
|
+
return false unless element
|
|
433
|
+
|
|
434
|
+
# Try native click first
|
|
435
|
+
element.scroll_into_view if element.respond_to?(:scroll_into_view)
|
|
436
|
+
element.click
|
|
437
|
+
sleep 0.5 # Wait for any animations
|
|
438
|
+
true
|
|
439
|
+
rescue StandardError => e
|
|
440
|
+
logger.debug "Native click failed: #{e.message}, trying JavaScript..."
|
|
441
|
+
|
|
442
|
+
# Fallback to JavaScript click
|
|
443
|
+
begin
|
|
444
|
+
browser.execute(<<~JAVASCRIPT, element)
|
|
445
|
+
arguments[0].scrollIntoView({ behavior: 'instant', block: 'center' });
|
|
446
|
+
arguments[0].click();
|
|
447
|
+
JAVASCRIPT
|
|
448
|
+
sleep 0.5
|
|
449
|
+
true
|
|
450
|
+
rescue StandardError => js_error
|
|
451
|
+
logger.debug "JavaScript click also failed: #{js_error.message}"
|
|
452
|
+
false
|
|
453
|
+
end
|
|
454
|
+
end
|
|
455
|
+
end
|
|
456
|
+
# rubocop:enable Metrics/ClassLength
|
|
457
|
+
end
|
|
458
|
+
end
|