crucible 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +102 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +21 -0
  6. data/README.md +366 -0
  7. data/Rakefile +23 -0
  8. data/TESTING.md +319 -0
  9. data/config.sample.yml +48 -0
  10. data/crucible.gemspec +48 -0
  11. data/exe/crucible +122 -0
  12. data/lib/crucible/configuration.rb +212 -0
  13. data/lib/crucible/server.rb +123 -0
  14. data/lib/crucible/session_manager.rb +209 -0
  15. data/lib/crucible/stealth/evasions/chrome_app.js +75 -0
  16. data/lib/crucible/stealth/evasions/chrome_csi.js +33 -0
  17. data/lib/crucible/stealth/evasions/chrome_load_times.js +44 -0
  18. data/lib/crucible/stealth/evasions/chrome_runtime.js +190 -0
  19. data/lib/crucible/stealth/evasions/iframe_content_window.js +101 -0
  20. data/lib/crucible/stealth/evasions/media_codecs.js +65 -0
  21. data/lib/crucible/stealth/evasions/navigator_hardware_concurrency.js +18 -0
  22. data/lib/crucible/stealth/evasions/navigator_languages.js +18 -0
  23. data/lib/crucible/stealth/evasions/navigator_permissions.js +53 -0
  24. data/lib/crucible/stealth/evasions/navigator_plugins.js +261 -0
  25. data/lib/crucible/stealth/evasions/navigator_vendor.js +18 -0
  26. data/lib/crucible/stealth/evasions/navigator_webdriver.js +16 -0
  27. data/lib/crucible/stealth/evasions/webgl_vendor.js +43 -0
  28. data/lib/crucible/stealth/evasions/window_outerdimensions.js +18 -0
  29. data/lib/crucible/stealth/utils.js +266 -0
  30. data/lib/crucible/stealth.rb +213 -0
  31. data/lib/crucible/tools/cookies.rb +206 -0
  32. data/lib/crucible/tools/downloads.rb +273 -0
  33. data/lib/crucible/tools/extraction.rb +335 -0
  34. data/lib/crucible/tools/helpers.rb +46 -0
  35. data/lib/crucible/tools/interaction.rb +355 -0
  36. data/lib/crucible/tools/navigation.rb +181 -0
  37. data/lib/crucible/tools/sessions.rb +85 -0
  38. data/lib/crucible/tools/stealth.rb +167 -0
  39. data/lib/crucible/tools.rb +42 -0
  40. data/lib/crucible/version.rb +5 -0
  41. data/lib/crucible.rb +60 -0
  42. metadata +201 -0
data/TESTING.md ADDED
@@ -0,0 +1,319 @@
1
+ # Testing Guide
2
+
3
+ This document covers the testing setup and practices for Crucible.
4
+
5
+ ## Running the Server
6
+
7
+ ```bash
8
+ # Run directly (no bundle exec needed)
9
+ ./exe/crucible
10
+
11
+ # With options
12
+ ./exe/crucible --no-headless --width 1920 --height 1080
13
+
14
+ # Show all options
15
+ ./exe/crucible --help
16
+ ```
17
+
18
+ ## Running Tests
19
+
20
+ ```bash
21
+ # Run all tests
22
+ bundle exec rspec
23
+
24
+ # Run with documentation format
25
+ bundle exec rspec --format doc
26
+
27
+ # Run specific test file
28
+ bundle exec rspec spec/tools/navigation_spec.rb
29
+
30
+ # Run specific test by line number
31
+ bundle exec rspec spec/tools/navigation_spec.rb:44
32
+
33
+ # Run tests matching a pattern
34
+ bundle exec rspec --example "navigate tool"
35
+ ```
36
+
37
+ ## Test Structure
38
+
39
+ ```
40
+ spec/
41
+ ├── spec_helper.rb # Test configuration and helpers
42
+ ├── crucible_spec.rb # Core module tests
43
+ ├── configuration_spec.rb # Configuration validation tests
44
+ ├── session_manager_spec.rb # Session lifecycle tests
45
+ ├── tools/
46
+ │ ├── navigation_spec.rb # navigate, wait_for, back, forward, refresh
47
+ │ ├── interaction_spec.rb # click, type, fill_form, select_option, scroll, hover
48
+ │ ├── extraction_spec.rb # screenshot, get_content, pdf, evaluate, get_url, get_title
49
+ │ ├── cookies_spec.rb # get_cookies, set_cookies, clear_cookies
50
+ │ ├── sessions_spec.rb # list_sessions, close_session
51
+ │ └── downloads_spec.rb # set_download_path, wait_for_download, list_downloads, clear_downloads
52
+ └── e2e/
53
+ └── stealth_e2e_spec.rb # End-to-end stealth mode tests
54
+ ```
55
+
56
+ ## Test Helper
57
+
58
+ The `ToolTestHelper` module provides a convenient way to call MCP tools in tests:
59
+
60
+ ```ruby
61
+ module ToolTestHelper
62
+ def call_tool(tool, args = {})
63
+ tool.call(args, nil)
64
+ end
65
+ end
66
+ ```
67
+
68
+ MCP tools expect two arguments: `(args, context)`. The helper passes `nil` for context since tests don't need server context.
69
+
70
+ ## Mocking Strategy
71
+
72
+ Tests use RSpec's `instance_double` to mock Ferrum objects:
73
+
74
+ ```ruby
75
+ let(:session_manager) { instance_double(Crucible::SessionManager) }
76
+ let(:page) { instance_double("Ferrum::Page") }
77
+ let(:element) { instance_double("Ferrum::Node") }
78
+
79
+ before do
80
+ allow(session_manager).to receive(:page).and_return(page)
81
+ end
82
+ ```
83
+
84
+ ### Why instance_double?
85
+
86
+ - Verifies mocked methods exist on the real class
87
+ - Catches API mismatches early (e.g., wrong method signatures)
88
+ - Provides clear error messages when expectations fail
89
+
90
+ ### Important: Ferrum is loaded for real
91
+
92
+ The spec_helper loads the real Ferrum gem:
93
+
94
+ ```ruby
95
+ require "ferrum"
96
+ ```
97
+
98
+ This ensures `instance_double` can verify method signatures against the actual Ferrum API.
99
+
100
+ ## Testing MCP Tool Schemas
101
+
102
+ MCP tools have input schemas that define their parameters. Test schema properties using the `.properties` and `.required` methods:
103
+
104
+ ```ruby
105
+ it "has correct schema" do
106
+ schema = tool.input_schema_value
107
+
108
+ # Properties returns a hash with symbol keys
109
+ expect(schema.properties).to have_key(:url)
110
+ expect(schema.properties).to have_key(:session)
111
+
112
+ # Required returns an array of symbols
113
+ expect(schema.required).to include(:url)
114
+ end
115
+ ```
116
+
117
+ **Note**: Schema methods return symbols, not strings:
118
+
119
+ - `schema.properties` → `{ url: {...}, session: {...} }`
120
+ - `schema.required` → `[:url]`
121
+
122
+ ## Testing MCP Tool Responses
123
+
124
+ MCP tools return `MCP::Tool::Response` objects:
125
+
126
+ ```ruby
127
+ # Successful response
128
+ result = call_tool(tool, url: "https://example.com")
129
+ expect(result.content.first[:text]).to include("Navigated to")
130
+ expect(result.error?).to be(false)
131
+
132
+ # Error response
133
+ result = call_tool(tool, url: "invalid")
134
+ expect(result.error?).to be(true)
135
+ expect(result.content.first[:text]).to include("failed")
136
+ ```
137
+
138
+ ### Response Structure
139
+
140
+ ```ruby
141
+ result.content # Array of content blocks
142
+ result.error? # Boolean indicating error state
143
+ result.to_h # Hash representation for MCP protocol
144
+ ```
145
+
146
+ ### Content Types
147
+
148
+ ```ruby
149
+ # Text content
150
+ { type: "text", text: "Success message" }
151
+
152
+ # Image content (screenshots)
153
+ { type: "image", data: "base64...", mimeType: "image/png" }
154
+
155
+ # Resource content (PDFs)
156
+ { type: "resource", resource: { uri: "...", mimeType: "application/pdf", blob: "..." } }
157
+ ```
158
+
159
+ ## Testing Error Handling
160
+
161
+ Tools should handle errors gracefully and return error responses:
162
+
163
+ ```ruby
164
+ it "returns error on failure" do
165
+ allow(page).to receive(:go_to).and_raise(Ferrum::Error.new("Connection refused"))
166
+
167
+ result = call_tool(tool, url: "https://example.com")
168
+
169
+ expect(result.error?).to be(true)
170
+ expect(result.content.first[:text]).to include("Navigation failed")
171
+ end
172
+
173
+ it "returns error when element not found" do
174
+ allow(page).to receive(:at_css).and_return(nil)
175
+
176
+ result = call_tool(tool, selector: "#missing")
177
+
178
+ expect(result.error?).to be(true)
179
+ expect(result.content.first[:text]).to include("Element not found")
180
+ end
181
+ ```
182
+
183
+ ## Code Coverage
184
+
185
+ SimpleCov is configured to track coverage:
186
+
187
+ ```ruby
188
+ require "simplecov"
189
+ SimpleCov.start do
190
+ add_filter "/spec/"
191
+ enable_coverage :branch
192
+ minimum_coverage 50
193
+ end
194
+ ```
195
+
196
+ View the coverage report at `coverage/index.html` after running tests.
197
+
198
+ Current coverage:
199
+
200
+ - Line Coverage: ~87%
201
+ - Branch Coverage: ~75%
202
+
203
+ ## Common Patterns
204
+
205
+ ### Testing session parameter
206
+
207
+ Most tools accept an optional `session` parameter:
208
+
209
+ ```ruby
210
+ it "uses specified session" do
211
+ allow(page).to receive(:go_to)
212
+
213
+ call_tool(tool, session: "my-session", url: "https://example.com")
214
+
215
+ expect(session_manager).to have_received(:page).with("my-session")
216
+ end
217
+ ```
218
+
219
+ ### Testing optional parameters
220
+
221
+ ```ruby
222
+ it "uses default format" do
223
+ allow(page).to receive(:screenshot).with(hash_including(format: :png)).and_return("base64data")
224
+
225
+ call_tool(tool)
226
+
227
+ expect(page).to have_received(:screenshot).with(hash_including(format: :png))
228
+ end
229
+
230
+ it "respects custom format" do
231
+ allow(page).to receive(:screenshot).with(hash_including(format: :jpeg)).and_return("base64data")
232
+
233
+ call_tool(tool, format: "jpeg")
234
+
235
+ expect(page).to have_received(:screenshot).with(hash_including(format: :jpeg))
236
+ end
237
+ ```
238
+
239
+ ## Ferrum API Reference
240
+
241
+ Key Ferrum methods used and their signatures:
242
+
243
+ ```ruby
244
+ # Navigation
245
+ page.go_to(url)
246
+ page.back
247
+ page.forward
248
+ page.refresh
249
+
250
+ # Element finding
251
+ page.at_css(selector) # Returns single element or nil
252
+
253
+ # Element interaction
254
+ element.click(mode: :left) # :left, :right, or :double
255
+ element.hover
256
+ element.focus
257
+ element.type("text")
258
+ element.type("text", :Enter) # Type with key
259
+ element.scroll_into_view
260
+
261
+ # Content extraction
262
+ page.body # Full HTML
263
+ page.current_url
264
+ page.current_title
265
+ element.text
266
+ element.property("outerHTML")
267
+
268
+ # JavaScript
269
+ page.evaluate("expression")
270
+ page.execute("script")
271
+
272
+ # Screenshots/PDF
273
+ page.screenshot(format: :png, full: false, quality: 100, path: "/tmp/screenshot.png")
274
+ page.pdf(landscape: false, format: :A4, scale: 1.0, path: "/tmp/page.pdf")
275
+
276
+ # Cookies
277
+ page.cookies.all # Hash of all cookies
278
+ page.cookies[name] # Get specific cookie
279
+ page.cookies.set(name:, value:, ...)
280
+ page.cookies.remove(name:, url:)
281
+ page.cookies.clear
282
+
283
+ # Downloads
284
+ browser.downloads.set_behavior(save_path: "/tmp/downloads")
285
+ browser.downloads.wait(timeout)
286
+ browser.downloads.files # List of downloaded file paths
287
+ ```
288
+
289
+ ## Debugging Tests
290
+
291
+ ```bash
292
+ # Run with full backtrace
293
+ bundle exec rspec --backtrace
294
+
295
+ # Run single test in isolation
296
+ bundle exec rspec spec/tools/navigation_spec.rb:44 --format doc
297
+
298
+ # Add binding.irb to pause execution
299
+ it "debugs something" do
300
+ result = call_tool(tool, url: "https://example.com")
301
+ binding.irb # Pause here
302
+ expect(result).to be_valid
303
+ end
304
+ ```
305
+
306
+ ## CI/CD Considerations
307
+
308
+ The test suite:
309
+
310
+ - Runs in ~2 seconds
311
+ - Requires no network access (all Ferrum calls mocked)
312
+ - Requires no Chrome/Chromium installation for unit tests
313
+ - Uses random test ordering (`config.order = :random`)
314
+
315
+ For integration tests that actually drive a browser, you would need:
316
+
317
+ - Chrome/Chromium installed
318
+ - Xvfb or headless mode on CI
319
+ - Longer timeouts for browser operations
data/config.sample.yml ADDED
@@ -0,0 +1,48 @@
1
+ # Crucible Sample Configuration
2
+ # Copy to ~/.config/crucible/config.yml or use with --config flag
3
+ #
4
+ # All settings are optional - defaults are shown below
5
+
6
+ browser:
7
+ headless: true # Run browser without visible window (true/false)
8
+ window_size: [1280, 720] # Viewport dimensions [width, height] in pixels
9
+ timeout: 30 # Default timeout for operations in seconds
10
+ # chrome_path: /usr/bin/chromium # Custom path to Chrome/Chromium executable
11
+
12
+ stealth:
13
+ enabled: true # Enable stealth mode to evade bot detection (true/false)
14
+ profile: moderate # Stealth profile: minimal, moderate, or maximum
15
+ # minimal - Basic evasions (webdriver flag, window dimensions)
16
+ # moderate - Common evasions for most sites (default)
17
+ # maximum - All evasions for strictest detection
18
+ locale: "en-US,en" # Browser locale for Accept-Language header
19
+
20
+ server:
21
+ log_level: warn # Logging verbosity: debug, info, warn, or error
22
+ # logfile: /path/to/crucible.log # Optional log file path
23
+
24
+ # Operating modes - predefined configurations for different use cases
25
+ # Switch modes at runtime with set_stealth_profile or via default mode
26
+ modes:
27
+ default: ai_agent # Mode to use on startup (optional)
28
+
29
+ ai_agent: # Optimized for AI agent browser control
30
+ stealth_profile: maximum # Use maximum stealth for web automation
31
+ screenshot_format: png # Screenshot format: png, jpeg, or base64
32
+ wait_timeout: 30000 # Default wait timeout in milliseconds
33
+
34
+ scraping: # Optimized for web scraping tasks
35
+ stealth_profile: maximum # Maximum stealth to avoid detection
36
+ # rate_limit: 1000 # (future) Delay between requests in ms
37
+ # retry_attempts: 3 # (future) Number of retry attempts
38
+ # respect_robots_txt: true # (future) Honor robots.txt rules
39
+
40
+ testing: # Optimized for automated testing
41
+ stealth_profile: minimal # Minimal stealth for faster execution
42
+ # capture_network: true # (future) Capture network requests
43
+ # performance_metrics: true # (future) Collect performance data
44
+ # screenshot_on_failure: true # (future) Auto-screenshot on errors
45
+
46
+ manual: # For interactive/debugging sessions
47
+ stealth_profile: moderate # Balanced stealth settings
48
+ # expose_cdp: true # (future) Expose Chrome DevTools Protocol
data/crucible.gemspec ADDED
@@ -0,0 +1,48 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/crucible/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'crucible'
7
+ spec.version = Crucible::VERSION
8
+ spec.authors = ['Josh Frye']
9
+ spec.email = ['me@joshfrye.dev']
10
+
11
+ spec.summary = 'MCP server for browser automation using Ferrum/Chrome'
12
+ spec.description = <<~DESC
13
+ An MCP (Model Context Protocol) server that provides browser automation tools
14
+ for AI agents using Ferrum and headless Chrome. Features 25 tools covering
15
+ navigation, screenshots, form interaction, JavaScript evaluation, cookies,
16
+ file downloads, and multi-session management.
17
+ DESC
18
+ spec.homepage = 'https://github.com/joshfng/crucible'
19
+ spec.license = 'MIT'
20
+ spec.required_ruby_version = '>= 3.2.0'
21
+
22
+ spec.metadata['homepage_uri'] = spec.homepage
23
+ spec.metadata['source_code_uri'] = spec.homepage
24
+ spec.metadata['changelog_uri'] = "#{spec.homepage}/blob/main/CHANGELOG.md"
25
+ spec.metadata['rubygems_mfa_required'] = 'true'
26
+
27
+ spec.files = Dir.chdir(__dir__) do
28
+ `git ls-files -z`.split("\x0").reject do |f|
29
+ (File.expand_path(f) == __FILE__) ||
30
+ f.start_with?(*%w[bin/ test/ spec/ features/ .git .github])
31
+ end
32
+ end
33
+ spec.bindir = 'exe'
34
+ spec.executables = spec.files.grep(%r{\Aexe/}) { |f| File.basename(f) }
35
+ spec.require_paths = ['lib']
36
+
37
+ # Runtime dependencies
38
+ spec.add_dependency 'ferrum'
39
+ spec.add_dependency 'mcp'
40
+
41
+ # Development dependencies
42
+ spec.add_development_dependency 'rake'
43
+ spec.add_development_dependency 'rspec'
44
+ spec.add_development_dependency 'rubocop'
45
+ spec.add_development_dependency 'rubocop-rake'
46
+ spec.add_development_dependency 'rubocop-rspec'
47
+ spec.add_development_dependency 'simplecov'
48
+ end
data/exe/crucible ADDED
@@ -0,0 +1,122 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Add lib to load path for development
5
+ $LOAD_PATH.unshift File.expand_path('../lib', __dir__)
6
+
7
+ require 'optparse'
8
+ require 'crucible'
9
+
10
+ options = {}
11
+ config_file = nil
12
+
13
+ parser = OptionParser.new do |opts|
14
+ opts.banner = 'Usage: crucible [options]'
15
+ opts.separator ''
16
+ opts.separator 'MCP server for browser automation using Ferrum/Chrome'
17
+ opts.separator ''
18
+ opts.separator 'Options:'
19
+
20
+ opts.on('-c', '--config FILE', 'Path to YAML configuration file') do |v|
21
+ config_file = v
22
+ end
23
+
24
+ opts.on('--[no-]headless', 'Run browser in headless mode (default: true)') do |v|
25
+ options[:headless] = v
26
+ end
27
+
28
+ opts.on('-w', '--width WIDTH', Integer, 'Viewport width in pixels (default: 1280)') do |v|
29
+ options[:viewport_width] = v
30
+ end
31
+
32
+ opts.on('-h', '--height HEIGHT', Integer, 'Viewport height in pixels (default: 720)') do |v|
33
+ options[:viewport_height] = v
34
+ end
35
+
36
+ opts.on('--chrome PATH', 'Path to Chrome/Chromium executable') do |v|
37
+ options[:chrome_path] = v
38
+ end
39
+
40
+ opts.on('-t', '--timeout SECONDS', Integer, 'Default timeout in seconds (default: 30)') do |v|
41
+ options[:timeout] = v
42
+ end
43
+
44
+ opts.on('--error-level LEVEL', %w[debug info warn error],
45
+ 'Logging level: debug, info, warn, error (default: warn)') do |v|
46
+ options[:error_level] = v.to_sym
47
+ end
48
+
49
+ opts.on('--screenshot-format FORMAT', %w[png jpeg base64],
50
+ 'Default screenshot format: png, jpeg, base64 (default: png)') do |v|
51
+ options[:screenshot_format] = v.to_sym
52
+ end
53
+
54
+ opts.on('--content-format FORMAT', %w[html text],
55
+ 'Default content format: html, text (default: html)') do |v|
56
+ options[:content_format] = v.to_sym
57
+ end
58
+
59
+ opts.separator ''
60
+ opts.separator 'Stealth Options:'
61
+
62
+ opts.on('--[no-]stealth', 'Enable/disable stealth mode (default: enabled)') do |v|
63
+ options[:stealth_enabled] = v
64
+ end
65
+
66
+ opts.on('--stealth-profile PROFILE', %w[minimal moderate maximum],
67
+ 'Stealth profile: minimal, moderate, maximum (default: moderate)') do |v|
68
+ options[:stealth_profile] = v.to_sym
69
+ end
70
+
71
+ opts.on('--stealth-locale LOCALE', 'Browser locale for stealth mode (default: en-US,en)') do |v|
72
+ options[:stealth_locale] = v
73
+ end
74
+
75
+ opts.separator ''
76
+
77
+ opts.on('-v', '--version', 'Show version') do
78
+ puts "crucible #{Crucible::VERSION}"
79
+ exit
80
+ end
81
+
82
+ opts.on('--help', 'Show this help message') do
83
+ puts opts
84
+ exit
85
+ end
86
+ end
87
+
88
+ begin
89
+ parser.parse!
90
+ rescue OptionParser::InvalidOption, OptionParser::MissingArgument => e
91
+ warn "Error: #{e.message}"
92
+ warn "Run 'crucible --help' for usage information"
93
+ exit 1
94
+ end
95
+
96
+ # Handle extra arguments
97
+ warn "Warning: Ignoring extra arguments: #{ARGV.join(' ')}" unless ARGV.empty?
98
+
99
+ begin
100
+ # Load configuration
101
+ config = if config_file
102
+ Crucible::Configuration.from_file(config_file)
103
+ else
104
+ Crucible::Configuration.from_defaults
105
+ end
106
+
107
+ # Apply command-line overrides
108
+ options.each do |key, value|
109
+ config.public_send(:"#{key}=", value) if config.respond_to?(:"#{key}=")
110
+ end
111
+
112
+ # Apply default mode if configured
113
+ config.apply_mode(config.modes[:default]) if config.modes && config.modes[:default]
114
+
115
+ Crucible::Server.new(config).run
116
+ rescue Crucible::Error => e
117
+ warn "Error: #{e.message}"
118
+ exit 1
119
+ rescue Interrupt
120
+ # Clean exit on Ctrl+C
121
+ exit 0
122
+ end