crucible 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +102 -0
  4. data/Gemfile +10 -0
  5. data/LICENSE +21 -0
  6. data/README.md +366 -0
  7. data/Rakefile +23 -0
  8. data/TESTING.md +319 -0
  9. data/config.sample.yml +48 -0
  10. data/crucible.gemspec +48 -0
  11. data/exe/crucible +122 -0
  12. data/lib/crucible/configuration.rb +212 -0
  13. data/lib/crucible/server.rb +123 -0
  14. data/lib/crucible/session_manager.rb +209 -0
  15. data/lib/crucible/stealth/evasions/chrome_app.js +75 -0
  16. data/lib/crucible/stealth/evasions/chrome_csi.js +33 -0
  17. data/lib/crucible/stealth/evasions/chrome_load_times.js +44 -0
  18. data/lib/crucible/stealth/evasions/chrome_runtime.js +190 -0
  19. data/lib/crucible/stealth/evasions/iframe_content_window.js +101 -0
  20. data/lib/crucible/stealth/evasions/media_codecs.js +65 -0
  21. data/lib/crucible/stealth/evasions/navigator_hardware_concurrency.js +18 -0
  22. data/lib/crucible/stealth/evasions/navigator_languages.js +18 -0
  23. data/lib/crucible/stealth/evasions/navigator_permissions.js +53 -0
  24. data/lib/crucible/stealth/evasions/navigator_plugins.js +261 -0
  25. data/lib/crucible/stealth/evasions/navigator_vendor.js +18 -0
  26. data/lib/crucible/stealth/evasions/navigator_webdriver.js +16 -0
  27. data/lib/crucible/stealth/evasions/webgl_vendor.js +43 -0
  28. data/lib/crucible/stealth/evasions/window_outerdimensions.js +18 -0
  29. data/lib/crucible/stealth/utils.js +266 -0
  30. data/lib/crucible/stealth.rb +213 -0
  31. data/lib/crucible/tools/cookies.rb +206 -0
  32. data/lib/crucible/tools/downloads.rb +273 -0
  33. data/lib/crucible/tools/extraction.rb +335 -0
  34. data/lib/crucible/tools/helpers.rb +46 -0
  35. data/lib/crucible/tools/interaction.rb +355 -0
  36. data/lib/crucible/tools/navigation.rb +181 -0
  37. data/lib/crucible/tools/sessions.rb +85 -0
  38. data/lib/crucible/tools/stealth.rb +167 -0
  39. data/lib/crucible/tools.rb +42 -0
  40. data/lib/crucible/version.rb +5 -0
  41. data/lib/crucible.rb +60 -0
  42. metadata +201 -0
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Evasion: navigator.vendor
3
+ * Override navigator.vendor to return the correct value.
4
+ */
5
+ (function(opts) {
6
+ 'use strict';
7
+
8
+ const utils = window._stealthUtils;
9
+ if (!utils) return;
10
+
11
+ const vendor = opts.vendor || 'Google Inc.';
12
+
13
+ utils.replaceGetterWithProxy(
14
+ Object.getPrototypeOf(navigator),
15
+ 'vendor',
16
+ utils.makeHandler().getterValue(vendor)
17
+ );
18
+ })({ vendor: null }); // Will be replaced by Ruby
@@ -0,0 +1,16 @@
1
+ /**
2
+ * Evasion: navigator.webdriver
3
+ * Deletes the navigator.webdriver property that reveals automation.
4
+ */
5
+ (function() {
6
+ 'use strict';
7
+
8
+ if (navigator.webdriver === false) {
9
+ // Post Chrome 89.0.4339.0 and already good
10
+ } else if (navigator.webdriver === undefined) {
11
+ // Pre Chrome 89.0.4339.0 and already good
12
+ } else {
13
+ // Needs patching
14
+ delete Object.getPrototypeOf(navigator).webdriver;
15
+ }
16
+ })();
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Evasion: webgl.vendor
3
+ * Fix WebGL Vendor/Renderer being set to Google in headless mode.
4
+ * Default values match a typical Apple Retina MBP.
5
+ */
6
+ (function(opts) {
7
+ 'use strict';
8
+
9
+ const utils = window._stealthUtils;
10
+ if (!utils) return;
11
+
12
+ const vendor = opts.vendor || 'Intel Inc.';
13
+ const renderer = opts.renderer || 'Intel Iris OpenGL Engine';
14
+
15
+ const getParameterProxyHandler = {
16
+ apply: function(target, ctx, args) {
17
+ const param = (args || [])[0];
18
+ const result = utils.cache.Reflect.apply(target, ctx, args);
19
+ // UNMASKED_VENDOR_WEBGL
20
+ if (param === 37445) {
21
+ return vendor;
22
+ }
23
+ // UNMASKED_RENDERER_WEBGL
24
+ if (param === 37446) {
25
+ return renderer;
26
+ }
27
+ return result;
28
+ }
29
+ };
30
+
31
+ // There's more than one WebGL rendering context
32
+ const addProxy = (obj, propName) => {
33
+ utils.replaceWithProxy(obj, propName, getParameterProxyHandler);
34
+ };
35
+
36
+ // Patch both WebGL contexts
37
+ if (typeof WebGLRenderingContext !== 'undefined') {
38
+ addProxy(WebGLRenderingContext.prototype, 'getParameter');
39
+ }
40
+ if (typeof WebGL2RenderingContext !== 'undefined') {
41
+ addProxy(WebGL2RenderingContext.prototype, 'getParameter');
42
+ }
43
+ })({ vendor: null, renderer: null }); // Will be replaced by Ruby
@@ -0,0 +1,18 @@
1
+ /**
2
+ * Evasion: window.outerdimensions
3
+ * Fix missing window.outerWidth/window.outerHeight in headless mode.
4
+ */
5
+ (function() {
6
+ 'use strict';
7
+
8
+ try {
9
+ if (window.outerWidth && window.outerHeight) {
10
+ return; // Nothing to do here
11
+ }
12
+ const windowFrame = 85; // Approximate window frame size (OS/WM dependent)
13
+ window.outerWidth = window.innerWidth;
14
+ window.outerHeight = window.innerHeight + windowFrame;
15
+ } catch (err) {
16
+ // Silently fail
17
+ }
18
+ })();
@@ -0,0 +1,266 @@
1
+ /**
2
+ * Stealth utilities for modifying native browser APIs without leaving traces.
3
+ * Ported from puppeteer-extra-plugin-stealth.
4
+ */
5
+ (function() {
6
+ 'use strict';
7
+
8
+ if (window._stealthUtils) return;
9
+
10
+ const utils = {};
11
+
12
+ utils.init = () => {
13
+ utils.preloadCache();
14
+ };
15
+
16
+ /**
17
+ * Preload a cache of function copies and data.
18
+ */
19
+ utils.preloadCache = () => {
20
+ if (utils.cache) return;
21
+ utils.cache = {
22
+ Reflect: {
23
+ get: Reflect.get.bind(Reflect),
24
+ apply: Reflect.apply.bind(Reflect)
25
+ },
26
+ nativeToStringStr: Function.toString + ''
27
+ };
28
+ };
29
+
30
+ /**
31
+ * Generate a native toString result.
32
+ */
33
+ utils.makeNativeString = (name = '') => {
34
+ return utils.cache.nativeToStringStr.replace('toString', name || '');
35
+ };
36
+
37
+ /**
38
+ * Wrap a JS Proxy Handler and strip its presence from error stacks.
39
+ */
40
+ utils.stripProxyFromErrors = (handler = {}) => {
41
+ const newHandler = {
42
+ setPrototypeOf: function(target, proto) {
43
+ if (proto === null)
44
+ throw new TypeError('Cannot convert object to primitive value');
45
+ if (Object.getPrototypeOf(target) === Object.getPrototypeOf(proto)) {
46
+ throw new TypeError('Cyclic __proto__ value');
47
+ }
48
+ return Reflect.setPrototypeOf(target, proto);
49
+ }
50
+ };
51
+
52
+ const traps = Object.getOwnPropertyNames(handler);
53
+ traps.forEach(trap => {
54
+ newHandler[trap] = function() {
55
+ try {
56
+ return handler[trap].apply(this, arguments || []);
57
+ } catch (err) {
58
+ if (!err || !err.stack || !err.stack.includes('at ')) {
59
+ throw err;
60
+ }
61
+
62
+ const stripWithBlacklist = (stack, stripFirstLine = true) => {
63
+ const blacklist = [
64
+ `at Reflect.${trap} `,
65
+ `at Object.${trap} `,
66
+ `at Object.newHandler.<computed> [as ${trap}] `
67
+ ];
68
+ return err.stack
69
+ .split('\n')
70
+ .filter((line, index) => !(index === 1 && stripFirstLine))
71
+ .filter(line => !blacklist.some(bl => line.trim().startsWith(bl)))
72
+ .join('\n');
73
+ };
74
+
75
+ const stripWithAnchor = (stack, anchor) => {
76
+ const stackArr = stack.split('\n');
77
+ anchor = anchor || `at Object.newHandler.<computed> [as ${trap}] `;
78
+ const anchorIndex = stackArr.findIndex(line =>
79
+ line.trim().startsWith(anchor)
80
+ );
81
+ if (anchorIndex === -1) return false;
82
+ stackArr.splice(1, anchorIndex);
83
+ return stackArr.join('\n');
84
+ };
85
+
86
+ err.stack = err.stack.replace(
87
+ 'at Object.toString (',
88
+ 'at Function.toString ('
89
+ );
90
+ if ((err.stack || '').includes('at Function.toString (')) {
91
+ err.stack = stripWithBlacklist(err.stack, false);
92
+ throw err;
93
+ }
94
+
95
+ err.stack = stripWithAnchor(err.stack) || stripWithBlacklist(err.stack);
96
+ throw err;
97
+ }
98
+ };
99
+ });
100
+ return newHandler;
101
+ };
102
+
103
+ /**
104
+ * Replace the property of an object.
105
+ */
106
+ utils.replaceProperty = (obj, propName, descriptorOverrides = {}) => {
107
+ return Object.defineProperty(obj, propName, {
108
+ ...(Object.getOwnPropertyDescriptor(obj, propName) || {}),
109
+ ...descriptorOverrides
110
+ });
111
+ };
112
+
113
+ /**
114
+ * Patch toString of an object.
115
+ */
116
+ utils.patchToString = (obj, str = '') => {
117
+ const handler = {
118
+ apply: function(target, ctx) {
119
+ if (ctx === Function.prototype.toString) {
120
+ return utils.makeNativeString('toString');
121
+ }
122
+ if (ctx === obj) {
123
+ return str || utils.makeNativeString(obj.name);
124
+ }
125
+ const hasSameProto = Object.getPrototypeOf(
126
+ Function.prototype.toString
127
+ ).isPrototypeOf(ctx.toString);
128
+ if (!hasSameProto) {
129
+ return ctx.toString();
130
+ }
131
+ return target.call(ctx);
132
+ }
133
+ };
134
+
135
+ const toStringProxy = new Proxy(
136
+ Function.prototype.toString,
137
+ utils.stripProxyFromErrors(handler)
138
+ );
139
+ utils.replaceProperty(Function.prototype, 'toString', {
140
+ value: toStringProxy
141
+ });
142
+ };
143
+
144
+ /**
145
+ * Redirect toString from proxyObj to originalObj.
146
+ */
147
+ utils.redirectToString = (proxyObj, originalObj) => {
148
+ const handler = {
149
+ apply: function(target, ctx) {
150
+ if (ctx === Function.prototype.toString) {
151
+ return utils.makeNativeString('toString');
152
+ }
153
+ if (ctx === proxyObj) {
154
+ const fallback = () =>
155
+ originalObj && originalObj.name
156
+ ? utils.makeNativeString(originalObj.name)
157
+ : utils.makeNativeString(proxyObj.name);
158
+ return originalObj + '' || fallback();
159
+ }
160
+ if (typeof ctx === 'undefined' || ctx === null) {
161
+ return target.call(ctx);
162
+ }
163
+ const hasSameProto = Object.getPrototypeOf(
164
+ Function.prototype.toString
165
+ ).isPrototypeOf(ctx.toString);
166
+ if (!hasSameProto) {
167
+ return ctx.toString();
168
+ }
169
+ return target.call(ctx);
170
+ }
171
+ };
172
+
173
+ const toStringProxy = new Proxy(
174
+ Function.prototype.toString,
175
+ utils.stripProxyFromErrors(handler)
176
+ );
177
+ utils.replaceProperty(Function.prototype, 'toString', {
178
+ value: toStringProxy
179
+ });
180
+ };
181
+
182
+ /**
183
+ * Replace a property with a JS Proxy.
184
+ */
185
+ utils.replaceWithProxy = (obj, propName, handler) => {
186
+ const originalObj = obj[propName];
187
+ const proxyObj = new Proxy(obj[propName], utils.stripProxyFromErrors(handler));
188
+ utils.replaceProperty(obj, propName, { value: proxyObj });
189
+ utils.redirectToString(proxyObj, originalObj);
190
+ return true;
191
+ };
192
+
193
+ /**
194
+ * Replace a getter with a JS Proxy.
195
+ */
196
+ utils.replaceGetterWithProxy = (obj, propName, handler) => {
197
+ const fn = Object.getOwnPropertyDescriptor(obj, propName).get;
198
+ const fnStr = fn.toString();
199
+ const proxyObj = new Proxy(fn, utils.stripProxyFromErrors(handler));
200
+ utils.replaceProperty(obj, propName, { get: proxyObj });
201
+ utils.patchToString(proxyObj, fnStr);
202
+ return true;
203
+ };
204
+
205
+ /**
206
+ * Mock a non-existing property with a JS Proxy.
207
+ */
208
+ utils.mockWithProxy = (obj, propName, pseudoTarget, handler) => {
209
+ const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler));
210
+ utils.replaceProperty(obj, propName, { value: proxyObj });
211
+ utils.patchToString(proxyObj);
212
+ return true;
213
+ };
214
+
215
+ /**
216
+ * Create a new JS Proxy with stealth tweaks.
217
+ */
218
+ utils.createProxy = (pseudoTarget, handler) => {
219
+ const proxyObj = new Proxy(pseudoTarget, utils.stripProxyFromErrors(handler));
220
+ utils.patchToString(proxyObj);
221
+ return proxyObj;
222
+ };
223
+
224
+ /**
225
+ * Make all nested functions of an object native.
226
+ */
227
+ utils.patchToStringNested = (obj = {}) => {
228
+ return utils.execRecursively(obj, ['function'], utils.patchToString);
229
+ };
230
+
231
+ /**
232
+ * Traverse nested properties recursively.
233
+ */
234
+ utils.execRecursively = (obj = {}, typeFilter = [], fn) => {
235
+ function recurse(obj) {
236
+ for (const key in obj) {
237
+ if (obj[key] === undefined) continue;
238
+ if (obj[key] && typeof obj[key] === 'object') {
239
+ recurse(obj[key]);
240
+ } else {
241
+ if (obj[key] && typeFilter.includes(typeof obj[key])) {
242
+ fn.call(this, obj[key]);
243
+ }
244
+ }
245
+ }
246
+ }
247
+ recurse(obj);
248
+ return obj;
249
+ };
250
+
251
+ /**
252
+ * Handler templates for re-usability.
253
+ */
254
+ utils.makeHandler = () => ({
255
+ getterValue: value => ({
256
+ apply(target, ctx, args) {
257
+ utils.cache.Reflect.apply(...arguments);
258
+ return value;
259
+ }
260
+ })
261
+ });
262
+
263
+ // Initialize and expose
264
+ utils.init();
265
+ window._stealthUtils = utils;
266
+ })();
@@ -0,0 +1,213 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Crucible
4
+ # Stealth mode for browser automation
5
+ #
6
+ # Applies various evasion techniques to make headless Chrome
7
+ # appear as a regular browser to bot detection systems.
8
+ #
9
+ # @example
10
+ # stealth = Stealth.new(profile: :maximum)
11
+ # stealth.apply(browser)
12
+ #
13
+ class Stealth
14
+ EVASIONS_PATH = File.expand_path('stealth/evasions', __dir__)
15
+ UTILS_PATH = File.expand_path('stealth/utils.js', __dir__)
16
+
17
+ # Stealth profiles define which evasions are enabled
18
+ PROFILES = {
19
+ # Minimal - only essential evasions
20
+ minimal: %i[
21
+ navigator_webdriver
22
+ window_outerdimensions
23
+ ],
24
+
25
+ # Moderate - common evasions for most use cases
26
+ moderate: %i[
27
+ navigator_webdriver
28
+ chrome_app
29
+ chrome_csi
30
+ chrome_load_times
31
+ chrome_runtime
32
+ navigator_vendor
33
+ navigator_languages
34
+ window_outerdimensions
35
+ ],
36
+
37
+ # Maximum - all evasions for strictest detection
38
+ maximum: %i[
39
+ navigator_webdriver
40
+ chrome_app
41
+ chrome_csi
42
+ chrome_load_times
43
+ chrome_runtime
44
+ navigator_vendor
45
+ navigator_languages
46
+ navigator_plugins
47
+ navigator_permissions
48
+ navigator_hardware_concurrency
49
+ webgl_vendor
50
+ media_codecs
51
+ iframe_content_window
52
+ window_outerdimensions
53
+ ]
54
+ }.freeze
55
+
56
+ # Evasion files with their configurable options
57
+ EVASION_OPTIONS = {
58
+ navigator_vendor: { vendor: 'Google Inc.' },
59
+ navigator_languages: { languages: %w[en-US en] },
60
+ navigator_hardware_concurrency: { hardwareConcurrency: 4 },
61
+ webgl_vendor: { vendor: 'Intel Inc.', renderer: 'Intel Iris OpenGL Engine' }
62
+ }.freeze
63
+
64
+ attr_reader :profile, :enabled, :options
65
+
66
+ # @param profile [Symbol] stealth profile (:minimal, :moderate, :maximum)
67
+ # @param enabled [Boolean] whether stealth is enabled
68
+ # @param options [Hash] additional options (locale, custom evasion opts)
69
+ def initialize(profile: :moderate, enabled: true, **options)
70
+ @profile = validate_profile(profile)
71
+ @enabled = enabled
72
+ @options = {
73
+ locale: 'en-US,en',
74
+ inject_utils: true
75
+ }.merge(options)
76
+ end
77
+
78
+ # Apply stealth evasions to a browser
79
+ # @param browser [Ferrum::Browser] the browser instance
80
+ def apply(browser)
81
+ return unless enabled
82
+
83
+ inject_utils(browser) if options[:inject_utils]
84
+ inject_evasions(browser)
85
+ apply_user_agent_override(browser)
86
+ end
87
+
88
+ # Apply stealth evasions to a page (for new pages in existing session)
89
+ # @param page [Ferrum::Page] the page instance
90
+ def apply_to_page(page)
91
+ return unless enabled
92
+
93
+ inject_utils_to_page(page) if options[:inject_utils]
94
+ inject_evasions_to_page(page)
95
+ end
96
+
97
+ # Get list of enabled evasions for current profile
98
+ # @return [Array<Symbol>]
99
+ def enabled_evasions
100
+ PROFILES.fetch(profile, PROFILES[:moderate])
101
+ end
102
+
103
+ # Creates extensions array suitable for Ferrum browser options
104
+ # @return [Array<String>]
105
+ def extensions
106
+ return [] unless enabled
107
+
108
+ scripts = []
109
+ scripts << utils_script if options[:inject_utils]
110
+ scripts.concat(evasion_scripts)
111
+ scripts
112
+ end
113
+
114
+ private
115
+
116
+ def validate_profile(profile)
117
+ profile = profile.to_sym
118
+ unless PROFILES.key?(profile)
119
+ raise Error, "Invalid stealth profile: #{profile}. Must be one of: #{PROFILES.keys.join(', ')}"
120
+ end
121
+
122
+ profile
123
+ end
124
+
125
+ def inject_utils(browser)
126
+ # Use evaluate_on_new_document to inject before page loads
127
+ browser.evaluate_on_new_document(utils_script)
128
+ rescue Ferrum::Error
129
+ # Browser may not be ready, that's OK
130
+ end
131
+
132
+ def inject_utils_to_page(page)
133
+ # For existing pages, use command directly
134
+ page.command('Page.addScriptToEvaluateOnNewDocument', source: utils_script)
135
+ rescue Ferrum::Error
136
+ # Page may not be ready, that's OK
137
+ end
138
+
139
+ def inject_evasions(browser)
140
+ evasion_scripts.each do |script|
141
+ browser.evaluate_on_new_document(script)
142
+ rescue Ferrum::Error
143
+ # Continue with other evasions
144
+ end
145
+ end
146
+
147
+ def inject_evasions_to_page(page)
148
+ evasion_scripts.each do |script|
149
+ page.command('Page.addScriptToEvaluateOnNewDocument', source: script)
150
+ rescue Ferrum::Error
151
+ # Continue with other evasions
152
+ end
153
+ end
154
+
155
+ def apply_user_agent_override(browser)
156
+ page = browser.page
157
+ return unless page
158
+
159
+ # Get current UA and strip "Headless"
160
+ ua = page.evaluate('navigator.userAgent')
161
+ ua = ua.gsub('HeadlessChrome/', 'Chrome/')
162
+
163
+ # Mask Linux as Windows (common detection vector)
164
+ ua = ua.gsub(/\(([^)]+)\)/, '(Windows NT 10.0; Win64; x64)') if ua.include?('Linux') && !ua.include?('Android')
165
+
166
+ # Apply via Ferrum's page command (uses Network.setUserAgentOverride internally)
167
+ page.command(
168
+ 'Network.setUserAgentOverride',
169
+ userAgent: ua,
170
+ acceptLanguage: options[:locale]
171
+ )
172
+ rescue Ferrum::Error => e
173
+ # Log but don't fail - UA override is optional
174
+ warn "[Stealth] Failed to apply user agent override: #{e.message}"
175
+ end
176
+
177
+ def utils_script
178
+ @utils_script ||= File.read(UTILS_PATH)
179
+ end
180
+
181
+ def evasion_scripts
182
+ enabled_evasions.map do |evasion|
183
+ script = load_evasion_script(evasion)
184
+ substitute_options(evasion, script)
185
+ end.compact
186
+ end
187
+
188
+ def load_evasion_script(evasion)
189
+ path = File.join(EVASIONS_PATH, "#{evasion}.js")
190
+ return nil unless File.exist?(path)
191
+
192
+ File.read(path)
193
+ end
194
+
195
+ def substitute_options(evasion, script)
196
+ return script unless script
197
+
198
+ evasion_opts = EVASION_OPTIONS[evasion]
199
+ return script unless evasion_opts
200
+
201
+ # Merge with user-provided options
202
+ merged_opts = evasion_opts.merge(options.fetch(evasion, {}))
203
+
204
+ # Substitute options in the script
205
+ # Scripts end with `})({ key: null }); // Will be replaced by Ruby`
206
+ # rubocop:disable Style/RegexpLiteral
207
+ script.gsub(/\}\)\(\{[^}]+\}\);?\s*(\/\/.*)?$/) do
208
+ # rubocop:enable Style/RegexpLiteral
209
+ "})(#{merged_opts.to_json});"
210
+ end
211
+ end
212
+ end
213
+ end