agent_ferrum 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/LICENSE.txt +21 -0
- data/README.md +345 -0
- data/lib/agent_ferrum/browser/target_resolution.rb +64 -0
- data/lib/agent_ferrum/browser.rb +159 -0
- data/lib/agent_ferrum/configuration.rb +45 -0
- data/lib/agent_ferrum/content/accessibility_tree.rb +82 -0
- data/lib/agent_ferrum/content/markdown_converter.rb +32 -0
- data/lib/agent_ferrum/content/snapshot.rb +44 -0
- data/lib/agent_ferrum/content/visibility_filter.rb +44 -0
- data/lib/agent_ferrum/downloads.rb +52 -0
- data/lib/agent_ferrum/errors.rb +20 -0
- data/lib/agent_ferrum/node.rb +61 -0
- data/lib/agent_ferrum/stealth/manager.rb +34 -0
- data/lib/agent_ferrum/stealth/profiles.rb +20 -0
- data/lib/agent_ferrum/stealth/scripts/chrome_runtime.js +34 -0
- data/lib/agent_ferrum/stealth/scripts/iframe_content_window.js +22 -0
- data/lib/agent_ferrum/stealth/scripts/navigator_plugins.js +64 -0
- data/lib/agent_ferrum/stealth/scripts/navigator_vendor.js +4 -0
- data/lib/agent_ferrum/stealth/scripts/user_agent.js +12 -0
- data/lib/agent_ferrum/stealth/scripts/utils.js +24 -0
- data/lib/agent_ferrum/stealth/scripts/webdriver.js +16 -0
- data/lib/agent_ferrum/stealth/scripts/webgl_vendor.js +27 -0
- data/lib/agent_ferrum/version.rb +5 -0
- data/lib/agent_ferrum/waiter.rb +54 -0
- data/lib/agent_ferrum.rb +25 -0
- metadata +127 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: e0f2cc1c04153bbcfec18fe9d0e5c68299bf3bac2eed2cff4b7ab4cdc8cd03a5
|
|
4
|
+
data.tar.gz: e1150457b6023ebb1176a671dbab3e2058e5ec183adab75aee59f4c067acd422
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 72d310191f52a7b7aa0a179058c771611b1387c8e87def6d6d2051b32256f8fc48c0f739e14c657f64fe1bd2fc6c8955768f7765a4a5841667e59aa9a55439de
|
|
7
|
+
data.tar.gz: 49e604f1dad52a548b1180e77186a54bf6b5a8306ca478cb2c5679157d8928df91595ff6087c1d4e8320e62b786120d4c060388ff9a9dd1f892bd9253066778a
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [0.1.0] - 2026-02-09
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- **Hybrid snapshots** combining accessibility tree (interactive elements with `@eN` refs) and markdown content into a single compact output
|
|
13
|
+
- **Ref-based actions** -- `click`, `fill`, `select`, `hover` via `@e1`, `@e2`... refs from the snapshot
|
|
14
|
+
- **Visibility filtering** -- JS-based filtering removes hidden elements (display:none, visibility:hidden, aria-hidden, etc.), Nokogiri post-processing strips scripts, styles, and noise attributes
|
|
15
|
+
- **Markdown conversion** -- HTML to clean markdown via ReverseMarkdown with whitespace compaction
|
|
16
|
+
- **Stealth mode** -- Three profiles (`:minimal`, `:moderate`, `:maximum`) ported from puppeteer-extra-plugin-stealth
|
|
17
|
+
- `:minimal` -- removes `navigator.webdriver` flag
|
|
18
|
+
- `:moderate` -- adds vendor/platform spoofing, Chrome runtime, user-agent cleanup
|
|
19
|
+
- `:maximum` -- adds navigator plugins, WebGL vendor masking, iframe fixes
|
|
20
|
+
- **Download management** -- set download path via CDP, wait for completion with timeout and optional filename filter
|
|
21
|
+
- **Smart waiting** -- poll for CSS/XPath/text/block conditions with configurable timeout and interval
|
|
22
|
+
- **Auto-retry** -- node actions retry automatically on `Ferrum::NodeMovingError` and `Ferrum::CoordinatesNotFoundError` (up to 3 attempts)
|
|
23
|
+
- **AI-friendly errors** -- `RefNotFoundError` and `ElementNotFoundError` include actionable messages guiding the agent
|
|
24
|
+
- **Configuration DSL** -- `AgentFerrum.configure` block or per-instance keyword arguments
|
|
25
|
+
- **Custom user-agent** via CDP `Network.setUserAgentOverride`
|
|
26
|
+
- **Timezone override** via CDP `Emulation.setTimezoneOverride`
|
|
27
|
+
- **Locale support** via Chrome browser options
|
|
28
|
+
|
|
29
|
+
[0.1.0]: https://github.com/Alqemist-labs/agent_ferrum/releases/tag/v0.1.0
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
The MIT License (MIT)
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Florian
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,345 @@
|
|
|
1
|
+
# AgentFerrum
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/rb/agent_ferrum) [](https://www.ruby-lang.org) [](https://opensource.org/licenses/MIT)
|
|
4
|
+
|
|
5
|
+
**Browser automation for AI agents, in Ruby.** Powered by [Ferrum](https://github.com/rubycdp/ferrum).
|
|
6
|
+
|
|
7
|
+
AgentFerrum wraps Chrome headless (via CDP) with an AI-optimized extraction layer. Instead of dumping raw HTML into your agent's context, it produces a compact snapshot: a markdown rendering of the visible page content + an accessibility tree of interactive elements with clickable refs. Typical reduction: **50-80% fewer tokens** compared to raw HTML (the heavier the page, the bigger the savings).
|
|
8
|
+
|
|
9
|
+
> Inspired by [agent-browser](https://github.com/vercel-labs/agent-browser) (Vercel) for the accessibility tree + refs concept, [Crucible](https://github.com/joshfng/crucible) for stealth profiles and download management, and [FerrumMCP](https://github.com/Eth3rnit3/FerrumMCP) for Ruby/Ferrum patterns.
|
|
10
|
+
|
|
11
|
+
## Why AgentFerrum?
|
|
12
|
+
|
|
13
|
+
Most browser automation tools return the full DOM or raw HTML. An LLM agent processing a typical web page receives **thousands of tokens** of noise (scripts, styles, hidden elements, data attributes). AgentFerrum solves this with a hybrid snapshot:
|
|
14
|
+
|
|
15
|
+
```
|
|
16
|
+
# Shopping Cart # What your agent sees
|
|
17
|
+
URL: https://shop.example.com/cart
|
|
18
|
+
|
|
19
|
+
## Interactive Elements # Clickable refs
|
|
20
|
+
@e1: [link] "Home" href="/"
|
|
21
|
+
@e2: [link] "Products" href="/products"
|
|
22
|
+
@e3: [textbox] "Search" value=""
|
|
23
|
+
@e4: [button] "Remove"
|
|
24
|
+
@e5: [button] "Checkout"
|
|
25
|
+
|
|
26
|
+
## Page Content # Clean markdown
|
|
27
|
+
# Your Cart
|
|
28
|
+
|
|
29
|
+
| Product | Qty | Price |
|
|
30
|
+
|---------|-----|-------|
|
|
31
|
+
| Widget | 2 | $20 |
|
|
32
|
+
|
|
33
|
+
Total: **$20.00**
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
Your agent reads a compact snapshot instead of the full DOM. It clicks `@e5` to checkout. Done.
|
|
37
|
+
|
|
38
|
+
## Benchmark
|
|
39
|
+
|
|
40
|
+
Real-world token reduction measured on live pages (February 2026):
|
|
41
|
+
|
|
42
|
+
| Site | Raw HTML | Snapshot | Reduction |
|
|
43
|
+
|------|----------|----------|-----------|
|
|
44
|
+
| **Hacker News** | ~8,600 tokens | ~4,500 tokens | **47%** |
|
|
45
|
+
| **Wikipedia** (Ruby article) | ~140,000 tokens | ~31,000 tokens | **78%** |
|
|
46
|
+
| **GitHub** (repo page) | ~265,000 tokens | ~22,000 tokens | **92%** |
|
|
47
|
+
|
|
48
|
+
The heavier the page (scripts, styles, data attributes, hidden elements), the bigger the savings. Simple content-focused pages like HN see ~50% reduction. Rich web apps like GitHub or StackOverflow see 90%+.
|
|
49
|
+
|
|
50
|
+
## Features
|
|
51
|
+
|
|
52
|
+
- **Hybrid snapshots** -- Accessibility tree (interactive elements with refs) + markdown (visible content), combined into a single compact output
|
|
53
|
+
- **Ref-based actions** -- Click, fill, select, hover via `@e1`, `@e2`... refs from the snapshot. No CSS selectors needed
|
|
54
|
+
- **Visibility filtering** -- JS-based filtering removes hidden elements, then Nokogiri strips scripts, styles, and noise attributes
|
|
55
|
+
- **Markdown conversion** -- HTML to clean markdown via [ReverseMarkdown](https://github.com/xijo/reverse_markdown), with whitespace compaction
|
|
56
|
+
- **Stealth mode** -- Three profiles (`:minimal`, `:moderate`, `:maximum`) ported from puppeteer-extra-plugin-stealth
|
|
57
|
+
- **Download management** -- Set download path, wait for completion with timeout
|
|
58
|
+
- **Smart waiting** -- Poll for CSS/XPath/text/block conditions with configurable timeout and interval
|
|
59
|
+
- **Auto-retry** -- Node actions retry automatically on transient errors (element moving, coordinates not found)
|
|
60
|
+
- **AI-friendly errors** -- Error messages tell the agent what to do next ("Call browser.snapshot to refresh refs")
|
|
61
|
+
|
|
62
|
+
## Installation
|
|
63
|
+
|
|
64
|
+
Add to your Gemfile:
|
|
65
|
+
|
|
66
|
+
```ruby
|
|
67
|
+
gem "agent_ferrum"
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Then:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
bundle install
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
**Requirements:** Ruby 3.4+, Chrome/Chromium installed.
|
|
77
|
+
|
|
78
|
+
## Quick Start
|
|
79
|
+
|
|
80
|
+
### 1. Navigate and snapshot
|
|
81
|
+
|
|
82
|
+
```ruby
|
|
83
|
+
require "agent_ferrum"
|
|
84
|
+
|
|
85
|
+
browser = AgentFerrum::Browser.new
|
|
86
|
+
browser.navigate("https://example.com")
|
|
87
|
+
|
|
88
|
+
snap = browser.snapshot
|
|
89
|
+
puts snap.to_s
|
|
90
|
+
# => Compact snapshot with interactive elements + markdown content
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
### 2. Interact via refs
|
|
94
|
+
|
|
95
|
+
```ruby
|
|
96
|
+
snap = browser.snapshot
|
|
97
|
+
|
|
98
|
+
# Click a button by ref
|
|
99
|
+
browser.click("@e3")
|
|
100
|
+
|
|
101
|
+
# Fill a text field by ref
|
|
102
|
+
browser.fill("@e2", "search query")
|
|
103
|
+
|
|
104
|
+
# Or use CSS/XPath selectors
|
|
105
|
+
browser.click("button.submit")
|
|
106
|
+
browser.click("//a[@href='/about']")
|
|
107
|
+
browser.fill(css: "input[name='email']", "user@example.com")
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
### 3. Wait for content
|
|
111
|
+
|
|
112
|
+
```ruby
|
|
113
|
+
# Wait for an element
|
|
114
|
+
browser.wait_for(css: ".results", timeout: 10)
|
|
115
|
+
|
|
116
|
+
# Wait for text to appear
|
|
117
|
+
browser.wait_for(text: "Search complete")
|
|
118
|
+
|
|
119
|
+
# Wait for a custom condition
|
|
120
|
+
browser.wait_for { |b| b.evaluate("document.readyState") == "complete" }
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
### 4. Extract content
|
|
124
|
+
|
|
125
|
+
```ruby
|
|
126
|
+
# Full snapshot (accessibility tree + markdown)
|
|
127
|
+
snap = browser.snapshot
|
|
128
|
+
puts snap.to_s # Combined output for AI
|
|
129
|
+
puts snap.markdown # Just the markdown content
|
|
130
|
+
puts snap.accessibility_tree # Just the interactive elements
|
|
131
|
+
puts snap.estimated_tokens # Approximate token count
|
|
132
|
+
|
|
133
|
+
# Quick markdown only
|
|
134
|
+
puts browser.page_markdown
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### 5. Clean up
|
|
138
|
+
|
|
139
|
+
```ruby
|
|
140
|
+
browser.quit
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## Configuration
|
|
144
|
+
|
|
145
|
+
```ruby
|
|
146
|
+
AgentFerrum.configure do |c|
|
|
147
|
+
c.headless = true # Run headless (default: true)
|
|
148
|
+
c.timeout = 30 # Default timeout in seconds
|
|
149
|
+
c.poll_interval = 0.1 # Polling interval for wait_for
|
|
150
|
+
c.viewport = [1920, 1080] # Browser viewport size
|
|
151
|
+
c.stealth = :off # Stealth profile: :off, :minimal, :moderate, :maximum
|
|
152
|
+
c.download_path = nil # Directory for downloads
|
|
153
|
+
c.browser_path = nil # Custom Chrome/Chromium path
|
|
154
|
+
c.user_agent = nil # Custom user agent
|
|
155
|
+
c.locale = nil # Browser locale (e.g., "fr-FR")
|
|
156
|
+
end
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Or pass options directly:
|
|
160
|
+
|
|
161
|
+
```ruby
|
|
162
|
+
browser = AgentFerrum::Browser.new(headless: false, timeout: 60, stealth: :maximum)
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
## Stealth Mode
|
|
166
|
+
|
|
167
|
+
Three profiles of increasing evasion, ported from [puppeteer-extra-plugin-stealth](https://github.com/berstend/puppeteer-extra/tree/master/packages/puppeteer-extra-plugin-stealth):
|
|
168
|
+
|
|
169
|
+
| Profile | Scripts | What it does |
|
|
170
|
+
| ----------- | ------- | -------------------------------------------------------------- |
|
|
171
|
+
| `:minimal` | 1 | Removes `navigator.webdriver` flag |
|
|
172
|
+
| `:moderate` | 4 | + Vendor/platform spoofing, Chrome runtime, user-agent cleanup |
|
|
173
|
+
| `:maximum` | 7 | + Navigator plugins, WebGL vendor masking, iframe fixes |
|
|
174
|
+
|
|
175
|
+
```ruby
|
|
176
|
+
# Enable at initialization
|
|
177
|
+
browser = AgentFerrum::Browser.new(stealth: :maximum)
|
|
178
|
+
|
|
179
|
+
# Or switch dynamically
|
|
180
|
+
browser.stealth(:moderate)
|
|
181
|
+
browser.navigate("https://bot-detection-site.com")
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Downloads
|
|
185
|
+
|
|
186
|
+
```ruby
|
|
187
|
+
browser.download_path = "/tmp/downloads"
|
|
188
|
+
browser.click("@e5") # Click a download link
|
|
189
|
+
|
|
190
|
+
filepath = browser.wait_for_download(timeout: 30)
|
|
191
|
+
puts filepath # => "/tmp/downloads/report.pdf"
|
|
192
|
+
|
|
193
|
+
# Or wait for a specific filename
|
|
194
|
+
filepath = browser.wait_for_download(filename: "report.pdf", timeout: 60)
|
|
195
|
+
```
|
|
196
|
+
|
|
197
|
+
## Snapshot Format
|
|
198
|
+
|
|
199
|
+
The snapshot output is designed for AI consumption. Here's the structure:
|
|
200
|
+
|
|
201
|
+
```
|
|
202
|
+
# Page Title
|
|
203
|
+
URL: https://example.com/page
|
|
204
|
+
|
|
205
|
+
## Interactive Elements
|
|
206
|
+
@e1: [button] "Submit"
|
|
207
|
+
@e2: [textbox] "Email" value=""
|
|
208
|
+
@e3: [link] "Home" href="/"
|
|
209
|
+
@e4: [checkbox] "Remember me" checked=true
|
|
210
|
+
@e5: [combobox] "Country"
|
|
211
|
+
@e6: [link] "Sign up" href="/register"
|
|
212
|
+
|
|
213
|
+
## Page Content
|
|
214
|
+
# Welcome
|
|
215
|
+
|
|
216
|
+
Please fill in your details below.
|
|
217
|
+
|
|
218
|
+
| Field | Required |
|
|
219
|
+
|-------|----------|
|
|
220
|
+
| Email | Yes |
|
|
221
|
+
| Name | No |
|
|
222
|
+
|
|
223
|
+
[Terms of Service](/tos)
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
**Supported interactive roles:** button, link, textbox, checkbox, radio, combobox, menuitem, tab, slider, spinbutton, searchbox, switch, option, listbox, menu, menubar.
|
|
227
|
+
|
|
228
|
+
**Element properties** included when present: `value`, `disabled`, `required`, `checked`, `selected`, `readonly`.
|
|
229
|
+
|
|
230
|
+
## API Reference
|
|
231
|
+
|
|
232
|
+
### Navigation
|
|
233
|
+
|
|
234
|
+
| Method | Description |
|
|
235
|
+
| --------------- | ---------------- |
|
|
236
|
+
| `navigate(url)` | Navigate to URL |
|
|
237
|
+
| `back` | Go back |
|
|
238
|
+
| `forward` | Go forward |
|
|
239
|
+
| `refresh` | Reload page |
|
|
240
|
+
| `current_url` | Current page URL |
|
|
241
|
+
| `title` | Page title |
|
|
242
|
+
|
|
243
|
+
### Content Extraction
|
|
244
|
+
|
|
245
|
+
| Method | Returns | Description |
|
|
246
|
+
| -------------------- | ------------------- | ---------------------------------------------------- |
|
|
247
|
+
| `snapshot` | `Snapshot` | Full hybrid snapshot (accessibility tree + markdown) |
|
|
248
|
+
| `page_markdown` | `String` | Markdown of visible content only |
|
|
249
|
+
| `accessibility_tree` | `AccessibilityTree` | Interactive elements with refs |
|
|
250
|
+
|
|
251
|
+
### Actions
|
|
252
|
+
|
|
253
|
+
| Method | Description |
|
|
254
|
+
| ----------------------- | ---------------------------------------- |
|
|
255
|
+
| `click(target)` | Click element (ref, CSS, XPath, or Hash) |
|
|
256
|
+
| `fill(target, value)` | Fill text field |
|
|
257
|
+
| `select(target, value)` | Select dropdown option |
|
|
258
|
+
| `hover(target)` | Hover over element |
|
|
259
|
+
| `type_text(text)` | Type text via keyboard |
|
|
260
|
+
|
|
261
|
+
**Target resolution:** `"@e1"` (ref) > `{css: ".btn"}` / `{xpath: "//a"}` (Hash) > `"//*[@id='x']"` (XPath string starting with `/`) > `"button.submit"` (CSS string).
|
|
262
|
+
|
|
263
|
+
### Waiting
|
|
264
|
+
|
|
265
|
+
| Method | Description |
|
|
266
|
+
| --------------------- | ------------------------------- |
|
|
267
|
+
| `wait_for(css:)` | Wait for CSS selector |
|
|
268
|
+
| `wait_for(xpath:)` | Wait for XPath |
|
|
269
|
+
| `wait_for(text:)` | Wait for text content |
|
|
270
|
+
| `wait_for { block }` | Wait for block to return truthy |
|
|
271
|
+
| `wait_for_navigation` | Wait for page idle |
|
|
272
|
+
|
|
273
|
+
### Utilities
|
|
274
|
+
|
|
275
|
+
| Method | Description |
|
|
276
|
+
| -------------------------- | --------------------------------- |
|
|
277
|
+
| `evaluate(js)` | Execute JavaScript, return result |
|
|
278
|
+
| `screenshot(path:, full:)` | Take screenshot |
|
|
279
|
+
| `quit` | Close browser |
|
|
280
|
+
|
|
281
|
+
## AI Agent Integration Example
|
|
282
|
+
|
|
283
|
+
Here's how an AI agent loop might use AgentFerrum:
|
|
284
|
+
|
|
285
|
+
```ruby
|
|
286
|
+
browser = AgentFerrum::Browser.new(stealth: :moderate)
|
|
287
|
+
|
|
288
|
+
# Agent navigates
|
|
289
|
+
browser.navigate("https://shop.example.com")
|
|
290
|
+
|
|
291
|
+
# Agent gets compact page representation
|
|
292
|
+
snap = browser.snapshot
|
|
293
|
+
# => Send snap.to_s to LLM (compact snapshot)
|
|
294
|
+
|
|
295
|
+
# LLM decides to search for a product
|
|
296
|
+
browser.fill("@e3", "wireless headphones")
|
|
297
|
+
browser.click("@e4") # Search button
|
|
298
|
+
|
|
299
|
+
# Agent refreshes snapshot after action
|
|
300
|
+
browser.wait_for(css: ".results")
|
|
301
|
+
snap = browser.snapshot
|
|
302
|
+
# => Updated snapshot with search results
|
|
303
|
+
|
|
304
|
+
# LLM picks a product
|
|
305
|
+
browser.click("@e7") # Product link
|
|
306
|
+
|
|
307
|
+
# Continue the loop...
|
|
308
|
+
browser.quit
|
|
309
|
+
```
|
|
310
|
+
|
|
311
|
+
## Development
|
|
312
|
+
|
|
313
|
+
```bash
|
|
314
|
+
git clone https://github.com/Alqemist-labs/agent_ferrum
|
|
315
|
+
cd agent_ferrum
|
|
316
|
+
bundle install
|
|
317
|
+
|
|
318
|
+
# Run unit tests
|
|
319
|
+
bundle exec rake test
|
|
320
|
+
|
|
321
|
+
# Run integration tests (requires Chrome)
|
|
322
|
+
bundle exec rake integration
|
|
323
|
+
```
|
|
324
|
+
|
|
325
|
+
## Contributing
|
|
326
|
+
|
|
327
|
+
Bug reports and pull requests are welcome on GitHub.
|
|
328
|
+
|
|
329
|
+
1. Fork it
|
|
330
|
+
2. Create your feature branch (`git checkout -b feature/my-feature`)
|
|
331
|
+
3. Commit your changes
|
|
332
|
+
4. Push to the branch
|
|
333
|
+
5. Create a Pull Request
|
|
334
|
+
|
|
335
|
+
## License
|
|
336
|
+
|
|
337
|
+
The gem is available as open source under the terms of the [MIT License](LICENSE.txt).
|
|
338
|
+
|
|
339
|
+
## See Also
|
|
340
|
+
|
|
341
|
+
- [Ferrum](https://github.com/rubycdp/ferrum) -- The Chrome headless Ruby library this gem is built on
|
|
342
|
+
- [agent-browser](https://github.com/vercel-labs/agent-browser) -- Vercel's CLI for AI browser automation (accessibility tree + refs concept)
|
|
343
|
+
- [Crucible](https://github.com/joshfng/crucible) -- Ruby MCP server for browser automation with stealth mode
|
|
344
|
+
- [FerrumMCP](https://github.com/Eth3rnit3/FerrumMCP) -- MCP server for Ferrum with AI agent integration
|
|
345
|
+
- [RubyLLM::Tribunal](https://github.com/Alqemist-labs/ruby_llm-tribunal) -- LLM evaluation framework for Ruby
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
module AgentFerrum
|
|
2
|
+
class Browser
|
|
3
|
+
module TargetResolution
|
|
4
|
+
private
|
|
5
|
+
|
|
6
|
+
def resolve_target(target)
|
|
7
|
+
case target
|
|
8
|
+
when /\A@e\d+\z/
|
|
9
|
+
resolve_ref(target)
|
|
10
|
+
when Hash
|
|
11
|
+
resolve_hash_target(target)
|
|
12
|
+
when %r{\A/}
|
|
13
|
+
find_by_xpath(target)
|
|
14
|
+
else
|
|
15
|
+
find_by_css(target)
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def resolve_hash_target(target)
|
|
20
|
+
if target[:css]
|
|
21
|
+
find_by_css(target[:css])
|
|
22
|
+
elsif target[:xpath]
|
|
23
|
+
find_by_xpath(target[:xpath])
|
|
24
|
+
else
|
|
25
|
+
raise ArgumentError, "Hash target must have :css or :xpath key"
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def resolve_ref(ref)
|
|
30
|
+
node_info = @ref_map[ref]
|
|
31
|
+
raise RefNotFoundError, ref unless node_info
|
|
32
|
+
|
|
33
|
+
backend_node_id = node_info[:backend_node_id]
|
|
34
|
+
|
|
35
|
+
result = @ferrum.page.command("DOM.resolveNode", backendNodeId: backend_node_id)
|
|
36
|
+
object_id = result.dig("object", "objectId")
|
|
37
|
+
|
|
38
|
+
desc = @ferrum.page.command("DOM.describeNode", backendNodeId: backend_node_id)
|
|
39
|
+
description = desc["node"]
|
|
40
|
+
|
|
41
|
+
push_result = @ferrum.page.command("DOM.requestNode", objectId: object_id)
|
|
42
|
+
node_id = push_result["nodeId"]
|
|
43
|
+
|
|
44
|
+
frame = @ferrum.page.main_frame
|
|
45
|
+
target_id = @ferrum.page.target_id
|
|
46
|
+
Ferrum::Node.new(frame, target_id, node_id, description)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def find_by_css(selector)
|
|
50
|
+
node = @ferrum.at_css(selector)
|
|
51
|
+
raise ElementNotFoundError, selector unless node
|
|
52
|
+
|
|
53
|
+
node
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def find_by_xpath(xpath)
|
|
57
|
+
node = @ferrum.at_xpath(xpath)
|
|
58
|
+
raise ElementNotFoundError, xpath unless node
|
|
59
|
+
|
|
60
|
+
node
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "ferrum"
|
|
4
|
+
require_relative "browser/target_resolution"
|
|
5
|
+
|
|
6
|
+
module AgentFerrum
|
|
7
|
+
class Browser
|
|
8
|
+
include TargetResolution
|
|
9
|
+
|
|
10
|
+
attr_reader :ferrum, :config
|
|
11
|
+
|
|
12
|
+
def initialize(**opts)
|
|
13
|
+
@config = AgentFerrum.configuration.dup
|
|
14
|
+
opts.each { |k, v| @config.public_send(:"#{k}=", v) if @config.respond_to?(:"#{k}=") }
|
|
15
|
+
|
|
16
|
+
@ref_map = {}
|
|
17
|
+
@stealth_manager = Stealth::Manager.new
|
|
18
|
+
@ferrum = Ferrum::Browser.new(**@config.ferrum_options)
|
|
19
|
+
@waiter = Waiter.new(self, default_timeout: @config.timeout, default_interval: @config.poll_interval)
|
|
20
|
+
@downloads = Downloads.new(self)
|
|
21
|
+
|
|
22
|
+
apply_stealth if @config.stealth != :off
|
|
23
|
+
apply_user_agent if @config.user_agent
|
|
24
|
+
apply_timezone if @config.timezone
|
|
25
|
+
@downloads.download_path = @config.download_path if @config.download_path
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# --- Navigation ---
|
|
29
|
+
|
|
30
|
+
def navigate(url)
|
|
31
|
+
@ferrum.goto(url)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def back
|
|
35
|
+
@ferrum.back
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def forward
|
|
39
|
+
@ferrum.forward
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def refresh
|
|
43
|
+
@ferrum.refresh
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# --- Content extraction ---
|
|
47
|
+
|
|
48
|
+
def snapshot
|
|
49
|
+
snap = Content::Snapshot.new(self)
|
|
50
|
+
@ref_map = snap.refs
|
|
51
|
+
snap
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def page_markdown
|
|
55
|
+
filtered_html = Content::VisibilityFilter.new(self).filtered_html
|
|
56
|
+
Content::MarkdownConverter.new(filtered_html).convert
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def accessibility_tree
|
|
60
|
+
tree = Content::AccessibilityTree.new(self)
|
|
61
|
+
@ref_map = tree.refs
|
|
62
|
+
tree
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# --- Actions ---
|
|
66
|
+
|
|
67
|
+
def click(target)
|
|
68
|
+
node = resolve_target(target)
|
|
69
|
+
Node.new(node).click
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def fill(target, value)
|
|
73
|
+
node = resolve_target(target)
|
|
74
|
+
Node.new(node).fill(value)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def select(target, value)
|
|
78
|
+
node = resolve_target(target)
|
|
79
|
+
Node.new(node).select(value)
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def hover(target)
|
|
83
|
+
node = resolve_target(target)
|
|
84
|
+
Node.new(node).hover
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def type_text(text)
|
|
88
|
+
@ferrum.page.keyboard.type(text)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# --- Wait ---
|
|
92
|
+
|
|
93
|
+
def wait_for(css: nil, xpath: nil, text: nil, timeout: nil, interval: nil, &)
|
|
94
|
+
@waiter.call(css:, xpath:, text:, timeout:, interval:, &)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def wait_for_navigation(timeout: nil)
|
|
98
|
+
timeout ||= @config.timeout
|
|
99
|
+
@ferrum.page.wait_for_idle(timeout: timeout)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# --- Downloads ---
|
|
103
|
+
|
|
104
|
+
def download_path=(path)
|
|
105
|
+
@downloads.download_path = path
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def wait_for_download(timeout: 30, filename: nil)
|
|
109
|
+
@downloads.wait(timeout:, filename:)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# --- Stealth ---
|
|
113
|
+
|
|
114
|
+
def stealth(profile)
|
|
115
|
+
@config.stealth = profile
|
|
116
|
+
apply_stealth
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
# --- Utils ---
|
|
120
|
+
|
|
121
|
+
def current_url
|
|
122
|
+
@ferrum.current_url
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def title
|
|
126
|
+
@ferrum.page.title
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def evaluate(expression)
|
|
130
|
+
@ferrum.evaluate(expression)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def screenshot(path: nil, selector: nil, full: false)
|
|
134
|
+
opts = {}
|
|
135
|
+
opts[:path] = path if path
|
|
136
|
+
opts[:selector] = selector if selector
|
|
137
|
+
opts[:full] = full
|
|
138
|
+
@ferrum.screenshot(**opts)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def quit
|
|
142
|
+
@ferrum.quit
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
private
|
|
146
|
+
|
|
147
|
+
def apply_stealth
|
|
148
|
+
@stealth_manager.apply(@ferrum.page, @config.stealth)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def apply_user_agent
|
|
152
|
+
@ferrum.page.command("Network.setUserAgentOverride", userAgent: @config.user_agent)
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
def apply_timezone
|
|
156
|
+
@ferrum.page.command("Emulation.setTimezoneOverride", timezoneId: @config.timezone)
|
|
157
|
+
end
|
|
158
|
+
end
|
|
159
|
+
end
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module AgentFerrum
|
|
4
|
+
class Configuration
|
|
5
|
+
attr_accessor :headless, :timeout, :process_timeout, :poll_interval,
|
|
6
|
+
:viewport, :stealth, :download_path,
|
|
7
|
+
:browser_path, :chrome_args, :user_agent,
|
|
8
|
+
:locale, :timezone
|
|
9
|
+
|
|
10
|
+
def initialize
|
|
11
|
+
@headless = true
|
|
12
|
+
@timeout = 30
|
|
13
|
+
@process_timeout = nil
|
|
14
|
+
@poll_interval = 0.1
|
|
15
|
+
@viewport = [1920, 1080]
|
|
16
|
+
@stealth = :off
|
|
17
|
+
@download_path = nil
|
|
18
|
+
@browser_path = nil
|
|
19
|
+
@chrome_args = []
|
|
20
|
+
@user_agent = nil
|
|
21
|
+
@locale = nil
|
|
22
|
+
@timezone = nil
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def initialize_dup(original)
|
|
26
|
+
super
|
|
27
|
+
@viewport = original.viewport.dup
|
|
28
|
+
@chrome_args = original.chrome_args.dup
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def ferrum_options
|
|
32
|
+
opts = {
|
|
33
|
+
headless: @headless,
|
|
34
|
+
timeout: @timeout,
|
|
35
|
+
window_size: @viewport,
|
|
36
|
+
browser_options: {}
|
|
37
|
+
}
|
|
38
|
+
opts[:process_timeout] = @process_timeout if @process_timeout
|
|
39
|
+
opts[:browser_path] = @browser_path if @browser_path
|
|
40
|
+
opts[:browser_options]["lang"] = @locale if @locale
|
|
41
|
+
@chrome_args.each { |arg| opts[:browser_options][arg.delete_prefix("--")] = nil }
|
|
42
|
+
opts
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|