crucible 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +102 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +21 -0
  6. data/README.md +366 -0
  7. data/Rakefile +23 -0
  8. data/TESTING.md +319 -0
  9. data/config.sample.yml +48 -0
  10. data/crucible.gemspec +48 -0
  11. data/exe/crucible +122 -0
  12. data/lib/crucible/configuration.rb +212 -0
  13. data/lib/crucible/server.rb +123 -0
  14. data/lib/crucible/session_manager.rb +209 -0
  15. data/lib/crucible/stealth/evasions/chrome_app.js +75 -0
  16. data/lib/crucible/stealth/evasions/chrome_csi.js +33 -0
  17. data/lib/crucible/stealth/evasions/chrome_load_times.js +44 -0
  18. data/lib/crucible/stealth/evasions/chrome_runtime.js +190 -0
  19. data/lib/crucible/stealth/evasions/iframe_content_window.js +101 -0
  20. data/lib/crucible/stealth/evasions/media_codecs.js +65 -0
  21. data/lib/crucible/stealth/evasions/navigator_hardware_concurrency.js +18 -0
  22. data/lib/crucible/stealth/evasions/navigator_languages.js +18 -0
  23. data/lib/crucible/stealth/evasions/navigator_permissions.js +53 -0
  24. data/lib/crucible/stealth/evasions/navigator_plugins.js +261 -0
  25. data/lib/crucible/stealth/evasions/navigator_vendor.js +18 -0
  26. data/lib/crucible/stealth/evasions/navigator_webdriver.js +16 -0
  27. data/lib/crucible/stealth/evasions/webgl_vendor.js +43 -0
  28. data/lib/crucible/stealth/evasions/window_outerdimensions.js +18 -0
  29. data/lib/crucible/stealth/utils.js +266 -0
  30. data/lib/crucible/stealth.rb +213 -0
  31. data/lib/crucible/tools/cookies.rb +206 -0
  32. data/lib/crucible/tools/downloads.rb +273 -0
  33. data/lib/crucible/tools/extraction.rb +335 -0
  34. data/lib/crucible/tools/helpers.rb +46 -0
  35. data/lib/crucible/tools/interaction.rb +355 -0
  36. data/lib/crucible/tools/navigation.rb +181 -0
  37. data/lib/crucible/tools/sessions.rb +85 -0
  38. data/lib/crucible/tools/stealth.rb +167 -0
  39. data/lib/crucible/tools.rb +42 -0
  40. data/lib/crucible/version.rb +5 -0
  41. data/lib/crucible.rb +60 -0
  42. metadata +201 -0
@@ -0,0 +1,335 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mcp'
4
+ require 'json'
5
+ require 'base64'
6
+
7
+ module Crucible
8
+ module Tools
9
+ # Extraction tools: screenshot, get_content, pdf, evaluate, get_url, get_title
10
+ module Extraction
11
+ class << self
12
+ def tools(sessions, config)
13
+ [
14
+ screenshot_tool(sessions, config),
15
+ get_content_tool(sessions, config),
16
+ pdf_tool(sessions, config),
17
+ evaluate_tool(sessions),
18
+ get_url_tool(sessions),
19
+ get_title_tool(sessions)
20
+ ]
21
+ end
22
+
23
+ private
24
+
25
+ def screenshot_tool(sessions, config)
26
+ MCP::Tool.define(
27
+ name: 'screenshot',
28
+ description: 'Take a screenshot of the page or a specific element',
29
+ input_schema: {
30
+ type: 'object',
31
+ properties: {
32
+ session: {
33
+ type: 'string',
34
+ description: 'Session name',
35
+ default: 'default'
36
+ },
37
+ selector: {
38
+ type: 'string',
39
+ description: 'CSS selector for element screenshot (optional, captures full viewport if not specified)'
40
+ },
41
+ full_page: {
42
+ type: 'boolean',
43
+ description: 'Capture full scrollable page',
44
+ default: false
45
+ },
46
+ format: {
47
+ type: 'string',
48
+ description: 'Image format',
49
+ enum: %w[png jpeg],
50
+ default: 'png'
51
+ },
52
+ quality: {
53
+ type: 'integer',
54
+ description: 'JPEG quality (1-100)',
55
+ minimum: 1,
56
+ maximum: 100,
57
+ default: 80
58
+ },
59
+ path: {
60
+ type: 'string',
61
+ description: 'File path to save screenshot (if omitted, returns base64 data)'
62
+ }
63
+ },
64
+ required: []
65
+ }
66
+ ) do |session: 'default', selector: nil, full_page: false, format: nil, quality: 80, path: nil, **|
67
+ format = (format || config.screenshot_format.to_s).to_sym
68
+
69
+ page = sessions.page(session)
70
+
71
+ screenshot_opts = {
72
+ format: format,
73
+ quality: quality
74
+ }
75
+
76
+ # Either save to file or return base64
77
+ if path
78
+ screenshot_opts[:path] = File.expand_path(path)
79
+ else
80
+ screenshot_opts[:encoding] = :base64
81
+ end
82
+
83
+ if selector
84
+ element = page.at_css(selector)
85
+ raise ElementNotFoundError, "Element not found: #{selector}" unless element
86
+
87
+ data = page.screenshot(**screenshot_opts, selector: selector)
88
+ elsif full_page
89
+ data = page.screenshot(**screenshot_opts, full: true)
90
+ else
91
+ data = page.screenshot(**screenshot_opts)
92
+ end
93
+
94
+ if path
95
+ MCP::Tool::Response.new([{
96
+ type: 'text',
97
+ text: "Screenshot saved to: #{screenshot_opts[:path]}"
98
+ }])
99
+ else
100
+ mime_type = format == :jpeg ? 'image/jpeg' : 'image/png'
101
+ MCP::Tool::Response.new([{
102
+ type: 'image',
103
+ data: data,
104
+ mimeType: mime_type
105
+ }])
106
+ end
107
+ rescue ElementNotFoundError => e
108
+ MCP::Tool::Response.new([{ type: 'text', text: e.message }], error: true)
109
+ rescue Ferrum::Error => e
110
+ MCP::Tool::Response.new([{ type: 'text', text: "Screenshot failed: #{e.message}" }], error: true)
111
+ end
112
+ end
113
+
114
+ def get_content_tool(sessions, config)
115
+ MCP::Tool.define(
116
+ name: 'get_content',
117
+ description: 'Get the content of the page or a specific element',
118
+ input_schema: {
119
+ type: 'object',
120
+ properties: {
121
+ session: {
122
+ type: 'string',
123
+ description: 'Session name',
124
+ default: 'default'
125
+ },
126
+ selector: {
127
+ type: 'string',
128
+ description: 'CSS selector for specific element (optional, gets full page if not specified)'
129
+ },
130
+ format: {
131
+ type: 'string',
132
+ description: 'Content format to return',
133
+ enum: %w[html text],
134
+ default: 'html'
135
+ }
136
+ },
137
+ required: []
138
+ }
139
+ ) do |session: 'default', selector: nil, format: nil, **|
140
+ format ||= config.content_format.to_s
141
+
142
+ page = sessions.page(session)
143
+
144
+ content = if selector
145
+ element = page.at_css(selector)
146
+ raise ElementNotFoundError, "Element not found: #{selector}" unless element
147
+
148
+ if format == 'text'
149
+ element.text
150
+ else
151
+ element.property('outerHTML')
152
+ end
153
+ elsif format == 'text'
154
+ body = page.at_css('body')
155
+ body ? body.text : ''
156
+ else
157
+ page.body
158
+ end
159
+
160
+ MCP::Tool::Response.new([{ type: 'text', text: content || '' }])
161
+ rescue ElementNotFoundError => e
162
+ MCP::Tool::Response.new([{ type: 'text', text: e.message }], error: true)
163
+ rescue Ferrum::Error => e
164
+ MCP::Tool::Response.new([{ type: 'text', text: "Get content failed: #{e.message}" }], error: true)
165
+ end
166
+ end
167
+
168
+ def pdf_tool(sessions, _config)
169
+ MCP::Tool.define(
170
+ name: 'pdf',
171
+ description: 'Generate a PDF of the current page',
172
+ input_schema: {
173
+ type: 'object',
174
+ properties: {
175
+ session: {
176
+ type: 'string',
177
+ description: 'Session name',
178
+ default: 'default'
179
+ },
180
+ landscape: {
181
+ type: 'boolean',
182
+ description: 'Use landscape orientation',
183
+ default: false
184
+ },
185
+ format: {
186
+ type: 'string',
187
+ description: 'Paper format',
188
+ enum: %w[A4 Letter Legal Tabloid],
189
+ default: 'A4'
190
+ },
191
+ scale: {
192
+ type: 'number',
193
+ description: 'Scale factor (0.1 to 2.0)',
194
+ minimum: 0.1,
195
+ maximum: 2.0,
196
+ default: 1.0
197
+ },
198
+ print_background: {
199
+ type: 'boolean',
200
+ description: 'Print background graphics',
201
+ default: true
202
+ },
203
+ path: {
204
+ type: 'string',
205
+ description: 'File path to save PDF (if omitted, returns base64 data)'
206
+ }
207
+ },
208
+ required: []
209
+ }
210
+ ) do |session: 'default', landscape: false, format: 'A4', scale: 1.0, print_background: true, path: nil, **|
211
+ page = sessions.page(session)
212
+
213
+ pdf_opts = {
214
+ landscape: landscape,
215
+ format: format.to_sym,
216
+ scale: scale,
217
+ print_background: print_background
218
+ }
219
+
220
+ if path
221
+ expanded_path = File.expand_path(path)
222
+ pdf_opts[:path] = expanded_path
223
+ page.pdf(**pdf_opts)
224
+
225
+ MCP::Tool::Response.new([{
226
+ type: 'text',
227
+ text: "PDF saved to: #{expanded_path}"
228
+ }])
229
+ else
230
+ pdf_opts[:encoding] = :base64
231
+ pdf_data = page.pdf(**pdf_opts)
232
+
233
+ MCP::Tool::Response.new([{
234
+ type: 'resource',
235
+ resource: {
236
+ uri: "data:application/pdf;base64,#{pdf_data}",
237
+ mimeType: 'application/pdf',
238
+ text: pdf_data
239
+ }
240
+ }])
241
+ end
242
+ rescue Ferrum::Error => e
243
+ MCP::Tool::Response.new([{ type: 'text', text: "PDF generation failed: #{e.message}" }], error: true)
244
+ end
245
+ end
246
+
247
+ def evaluate_tool(sessions)
248
+ MCP::Tool.define(
249
+ name: 'evaluate',
250
+ description: 'Execute JavaScript in the page context and return the result',
251
+ input_schema: {
252
+ type: 'object',
253
+ properties: {
254
+ session: {
255
+ type: 'string',
256
+ description: 'Session name',
257
+ default: 'default'
258
+ },
259
+ expression: {
260
+ type: 'string',
261
+ description: 'JavaScript expression to evaluate'
262
+ }
263
+ },
264
+ required: ['expression']
265
+ }
266
+ ) do |expression:, session: 'default', **|
267
+ page = sessions.page(session)
268
+ result = page.evaluate(expression)
269
+
270
+ # Convert result to JSON for consistent output
271
+ result_text = case result
272
+ when nil then 'null'
273
+ when String then result
274
+ else JSON.generate(result)
275
+ end
276
+
277
+ MCP::Tool::Response.new([{ type: 'text', text: result_text }])
278
+ rescue Ferrum::JavaScriptError => e
279
+ MCP::Tool::Response.new([{ type: 'text', text: "JavaScript error: #{e.message}" }], error: true)
280
+ rescue Ferrum::Error => e
281
+ MCP::Tool::Response.new([{ type: 'text', text: "Evaluate failed: #{e.message}" }], error: true)
282
+ end
283
+ end
284
+
285
+ def get_url_tool(sessions)
286
+ MCP::Tool.define(
287
+ name: 'get_url',
288
+ description: 'Get the current URL of the page',
289
+ input_schema: {
290
+ type: 'object',
291
+ properties: {
292
+ session: {
293
+ type: 'string',
294
+ description: 'Session name',
295
+ default: 'default'
296
+ }
297
+ },
298
+ required: []
299
+ }
300
+ ) do |session: 'default', **|
301
+ page = sessions.page(session)
302
+
303
+ MCP::Tool::Response.new([{ type: 'text', text: page.current_url }])
304
+ rescue Ferrum::Error => e
305
+ MCP::Tool::Response.new([{ type: 'text', text: "Get URL failed: #{e.message}" }], error: true)
306
+ end
307
+ end
308
+
309
+ def get_title_tool(sessions)
310
+ MCP::Tool.define(
311
+ name: 'get_title',
312
+ description: 'Get the title of the current page',
313
+ input_schema: {
314
+ type: 'object',
315
+ properties: {
316
+ session: {
317
+ type: 'string',
318
+ description: 'Session name',
319
+ default: 'default'
320
+ }
321
+ },
322
+ required: []
323
+ }
324
+ ) do |session: 'default', **|
325
+ page = sessions.page(session)
326
+
327
+ MCP::Tool::Response.new([{ type: 'text', text: page.current_title || '' }])
328
+ rescue Ferrum::Error => e
329
+ MCP::Tool::Response.new([{ type: 'text', text: "Get title failed: #{e.message}" }], error: true)
330
+ end
331
+ end
332
+ end
333
+ end
334
+ end
335
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Crucible
4
+ module Tools
5
+ # Shared helper methods for tool implementations
6
+ module Helpers
7
+ # Extracts an argument value, checking both symbol and string keys
8
+ # @param args [Hash] the arguments hash from MCP
9
+ # @param key [Symbol] the key to look for
10
+ # @param default [Object] default value if not found
11
+ # @return [Object] the argument value or default
12
+ def extract_arg(args, key, default = nil)
13
+ args.fetch(key) { args.fetch(key.to_s, default) }
14
+ end
15
+
16
+ # Returns the platform-appropriate modifier key for keyboard shortcuts
17
+ # @return [Symbol] :meta on macOS, :control elsewhere
18
+ def select_all_modifier
19
+ RUBY_PLATFORM.include?('darwin') ? :meta : :control
20
+ end
21
+
22
+ # Clears an input field by selecting all and deleting
23
+ # @param element [Ferrum::Node] the input element
24
+ def clear_field(element)
25
+ element.focus
26
+ element.type([select_all_modifier, 'a'], [:backspace])
27
+ end
28
+
29
+ # Converts a Ferrum cookie to a hash
30
+ # @param cookie [Ferrum::Cookie] the cookie object
31
+ # @return [Hash] cookie as a hash
32
+ def cookie_to_hash(cookie)
33
+ {
34
+ name: cookie.name,
35
+ value: cookie.value,
36
+ domain: cookie.domain,
37
+ path: cookie.path,
38
+ secure: cookie.secure?,
39
+ httpOnly: cookie.httponly?,
40
+ sameSite: cookie.samesite,
41
+ expires: cookie.expires
42
+ }.compact
43
+ end
44
+ end
45
+ end
46
+ end