omniai-tools 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +3 -12
- data/lib/omniai/tools/browser/base_driver.rb +78 -0
- data/lib/omniai/tools/browser/base_tool.rb +31 -4
- data/lib/omniai/tools/browser/button_click_tool.rb +1 -14
- data/lib/omniai/tools/browser/element_click_tool.rb +30 -0
- data/lib/omniai/tools/browser/elements/element_grouper.rb +73 -0
- data/lib/omniai/tools/browser/elements/nearby_element_detector.rb +108 -0
- data/lib/omniai/tools/browser/formatters/action_formatter.rb +37 -0
- data/lib/omniai/tools/browser/formatters/data_entry_formatter.rb +135 -0
- data/lib/omniai/tools/browser/formatters/element_formatter.rb +52 -0
- data/lib/omniai/tools/browser/formatters/input_formatter.rb +59 -0
- data/lib/omniai/tools/browser/inspect_tool.rb +46 -13
- data/lib/omniai/tools/browser/inspect_utils.rb +51 -0
- data/lib/omniai/tools/browser/link_click_tool.rb +2 -14
- data/lib/omniai/tools/browser/page_inspect/button_summarizer.rb +140 -0
- data/lib/omniai/tools/browser/page_inspect/form_summarizer.rb +98 -0
- data/lib/omniai/tools/browser/page_inspect/html_summarizer.rb +37 -0
- data/lib/omniai/tools/browser/page_inspect/link_summarizer.rb +103 -0
- data/lib/omniai/tools/browser/page_inspect_tool.rb +30 -0
- data/lib/omniai/tools/browser/page_screenshot_tool.rb +22 -0
- data/lib/omniai/tools/browser/selector_generator/base_selectors.rb +28 -0
- data/lib/omniai/tools/browser/selector_generator/contextual_selectors.rb +140 -0
- data/lib/omniai/tools/browser/selector_generator.rb +73 -0
- data/lib/omniai/tools/browser/selector_inspect_tool.rb +44 -0
- data/lib/omniai/tools/browser/text_field_area_set_tool.rb +2 -31
- data/lib/omniai/tools/browser/visit_tool.rb +1 -1
- data/lib/omniai/tools/browser/watir_driver.rb +224 -0
- data/lib/omniai/tools/browser_tool.rb +265 -0
- data/lib/omniai/tools/version.rb +1 -1
- metadata +23 -2
@@ -0,0 +1,265 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OmniAI
|
4
|
+
module Tools
|
5
|
+
# A tool for controller a browser.
|
6
|
+
class BrowserTool < OmniAI::Tool
|
7
|
+
module Action
|
8
|
+
VISIT = "visit"
|
9
|
+
PAGE_INSPECT = "page_inspect"
|
10
|
+
UI_INSPECT = "ui_inspect"
|
11
|
+
SELECTOR_INSPECT = "selector_inspect"
|
12
|
+
BUTTON_CLICK = "button_click"
|
13
|
+
LINK_CLICK = "link_click"
|
14
|
+
ELEMENT_CLICK = "element_click"
|
15
|
+
TEXT_FIELD_SET = "text_field_set"
|
16
|
+
SCREENSHOT = "screenshot"
|
17
|
+
end
|
18
|
+
|
19
|
+
ACTIONS = [
|
20
|
+
Action::VISIT,
|
21
|
+
Action::PAGE_INSPECT,
|
22
|
+
Action::UI_INSPECT,
|
23
|
+
Action::SELECTOR_INSPECT,
|
24
|
+
Action::BUTTON_CLICK,
|
25
|
+
Action::LINK_CLICK,
|
26
|
+
Action::ELEMENT_CLICK,
|
27
|
+
Action::TEXT_FIELD_SET,
|
28
|
+
Action::SCREENSHOT,
|
29
|
+
].freeze
|
30
|
+
|
31
|
+
description <<~TEXT
|
32
|
+
Control a web browser to automate interactions with websites.
|
33
|
+
|
34
|
+
1. `#{Action::VISIT}` - Navigate to a website
|
35
|
+
Required: "action": "visit", "url": "[website URL]"
|
36
|
+
|
37
|
+
2. `#{Action::PAGE_INSPECT} - Get page HTML or summary
|
38
|
+
Required: "action": "page_inspect"
|
39
|
+
Optional: "full_html": true/false (get full HTML vs summary, default: summary)
|
40
|
+
|
41
|
+
3. `#{Action::UI_INSPECT}` - Find elements by text content
|
42
|
+
Required: "action": "ui_inspect", "text_content": "[text to search for]"
|
43
|
+
Optional:
|
44
|
+
- "selector": "[CSS selector]" (search within specific container)
|
45
|
+
- "context_size": [number] (parent elements to show, default: 2)
|
46
|
+
|
47
|
+
4. `#{Action::SELECTOR_INSPECT} - Find elements by CSS selector
|
48
|
+
Required: "action": "selector_inspect", "selector": "[CSS selector]"
|
49
|
+
Optional: "context_size": [number] (parent elements to show, default: 2)
|
50
|
+
|
51
|
+
5. `#{Action::BUTTON_CLICK}` - Click a button
|
52
|
+
Required: "action": "button_click", "selector": "[button text or CSS selector]"
|
53
|
+
|
54
|
+
6. `#{Action::LINK_CLICK}` - Click a link
|
55
|
+
Required: "action": "link_click", "selector": "[link text or CSS selector]"
|
56
|
+
|
57
|
+
7. `#{Action::ELEMENT_CLICK}` - Click any clickable element (div, span, etc.)
|
58
|
+
Required: "action": "element_click", "selector": "[CSS selector, text content, ID, or XPath]"
|
59
|
+
|
60
|
+
8. `#{Action::TEXT_FIELD_SET}` - Enter text in input fields/text areas
|
61
|
+
Required: "action": "text_field_set", "selector": "[field CSS selector]", "value": "[text to enter]"
|
62
|
+
|
63
|
+
9. `#{Action::SCREENSHOT}` - Take a screenshot of the page or specific element
|
64
|
+
Required: "action": "screenshot"
|
65
|
+
|
66
|
+
Examples:
|
67
|
+
Visit a website
|
68
|
+
{"action": "#{Action::VISIT}", "url": "https://example.com"}
|
69
|
+
Get page summary
|
70
|
+
{"action": "#{Action::PAGE_INSPECT}"}
|
71
|
+
Get full page HTML
|
72
|
+
{"action": "#{Action::PAGE_INSPECT}", "full_html": true}
|
73
|
+
Find elements containing text
|
74
|
+
{"action": "#{Action::UI_INSPECT}", "text_content": "Submit"}
|
75
|
+
Find elements with context
|
76
|
+
{"action": "#{Action::UI_INSPECT}", "text_content": "Login", "context_size": 3}
|
77
|
+
Find elements by CSS selector
|
78
|
+
{"action": "#{Action::SELECTOR_INSPECT}", "selector": ".button"}
|
79
|
+
Find selector with context
|
80
|
+
{"action": "#{Action::SELECTOR_INSPECT}", "selector": "h1", "context_size": 2}
|
81
|
+
Click a button with specific text
|
82
|
+
{"action": "#{Action::BUTTON_CLICK}", "selector": "Submit"}
|
83
|
+
Click a link with specific text
|
84
|
+
{"action": "#{Action::LINK_CLICK}", "selector": "Learn More"}
|
85
|
+
Click element by CSS selector
|
86
|
+
{"action": "#{Action::ELEMENT_CLICK}", "selector": ".wv-select__menu__option"}
|
87
|
+
Click element by role attribute
|
88
|
+
{"action": "#{Action::ELEMENT_CLICK}", "selector": "[role='listitem']"}
|
89
|
+
Click element by text content
|
90
|
+
{"action": "#{Action::ELEMENT_CLICK}", "selector": "Medical Evaluation Management"}
|
91
|
+
Set text in an input field
|
92
|
+
{"action": "#{Action::TEXT_FIELD_SET}", "selector": "#search", "value": "search query"}
|
93
|
+
Take a full page screenshot
|
94
|
+
{"action": "#{Action::SCREENSHOT}"}
|
95
|
+
TEXT
|
96
|
+
|
97
|
+
parameter :action, :string, enum: ACTIONS, description: <<~TEXT
|
98
|
+
The browser action to perform. Options:
|
99
|
+
* `#{Action::VISIT}`: Navigate to a website
|
100
|
+
* `#{Action::PAGE_INSPECT}`: Get full HTML or summarized page information
|
101
|
+
* `#{Action::UI_INSPECT}`: Find elements containing specific text
|
102
|
+
* `#{Action::SELECTOR_INSPECT}`: Find elements matching CSS selectors
|
103
|
+
* `#{Action::BUTTON_CLICK}`: Click a button element
|
104
|
+
* `#{Action::LINK_CLICK}`: Click a link element
|
105
|
+
* `#{Action::ELEMENT_CLICK}`: Click any clickable element
|
106
|
+
* `#{Action::TEXT_FIELD_SET}`: Enter text in input fields or text areas
|
107
|
+
* `#{Action::SCREENSHOT}`: Take a screenshot of the page or specific element
|
108
|
+
TEXT
|
109
|
+
|
110
|
+
parameter :url, :string, description: <<~TEXT
|
111
|
+
The URL to visit. Required for the following actions:
|
112
|
+
* `#{Action::VISIT}`
|
113
|
+
TEXT
|
114
|
+
|
115
|
+
parameter :selector, :string, description: <<~TEXT
|
116
|
+
CSS selector, ID, or text content of the element. Required for the following actions:
|
117
|
+
* `#{Action::SELECTOR_INSPECT}`
|
118
|
+
* `#{Action::BUTTON_CLICK}`
|
119
|
+
* `#{Action::LINK_CLICK}`
|
120
|
+
* `#{Action::ELEMENT_CLICK}`
|
121
|
+
* `#{Action::TEXT_FIELD_SET}`
|
122
|
+
|
123
|
+
Optional for the following actions:
|
124
|
+
* `#{Action::UI_INSPECT}` (search within specific container)
|
125
|
+
TEXT
|
126
|
+
|
127
|
+
parameter :value, :string, description: <<~TEXT
|
128
|
+
The value to set in the text field. Required for the following actions:
|
129
|
+
* `#{Action::TEXT_FIELD_SET}`
|
130
|
+
TEXT
|
131
|
+
|
132
|
+
parameter :context_size, :integer, description: <<~TEXT
|
133
|
+
Number of parent elements to include in inspect results (default: 2). Optional for the following actions:
|
134
|
+
* `#{Action::UI_INSPECT}`
|
135
|
+
* `#{Action::SELECTOR_INSPECT}`
|
136
|
+
TEXT
|
137
|
+
|
138
|
+
parameter :full_html, :boolean, description: <<~TEXT
|
139
|
+
Return the full HTML of the page instead of a summary. Optional for the following actions:
|
140
|
+
* `#{Action::PAGE_INSPECT}`
|
141
|
+
TEXT
|
142
|
+
|
143
|
+
parameter :text_content, :string, description: <<~TEXT
|
144
|
+
Search for elements containing this text. Required for the following actions:
|
145
|
+
* `#{Action::UI_INSPECT}`
|
146
|
+
TEXT
|
147
|
+
|
148
|
+
required %i[action]
|
149
|
+
|
150
|
+
# @param logger [Logger]
|
151
|
+
# @param driver [OmniAI::Tools::Browser::BaseDriver]
|
152
|
+
def initialize(logger:, driver:)
|
153
|
+
super()
|
154
|
+
@logger = logger
|
155
|
+
@driver = driver
|
156
|
+
end
|
157
|
+
|
158
|
+
def cleanup!
|
159
|
+
@driver.close
|
160
|
+
end
|
161
|
+
|
162
|
+
# @param action [String]
|
163
|
+
# @param url [String, nil]
|
164
|
+
# @param selector [String, nil]
|
165
|
+
# @param value [String, nil]
|
166
|
+
# @param context_size [Integer]
|
167
|
+
# @param full_html [Boolean]
|
168
|
+
# @param text_content [String, nil]
|
169
|
+
#
|
170
|
+
# @return [String]
|
171
|
+
def execute(action:, url: nil, selector: nil, value: nil, context_size: 2, full_html: false, text_content: nil)
|
172
|
+
case action.to_s.downcase
|
173
|
+
when Action::VISIT
|
174
|
+
require_param!(:url, url)
|
175
|
+
visit_tool.execute(url:)
|
176
|
+
when Action::PAGE_INSPECT
|
177
|
+
if full_html
|
178
|
+
page_inspect_tool.execute
|
179
|
+
else
|
180
|
+
page_inspect_tool.execute(summarize: true)
|
181
|
+
end
|
182
|
+
when Action::UI_INSPECT
|
183
|
+
require_param!(:text_content, text_content)
|
184
|
+
inspect_tool.execute(text_content:, selector:, context_size:)
|
185
|
+
when Action::SELECTOR_INSPECT
|
186
|
+
require_param!(:selector, selector)
|
187
|
+
selector_inspect_tool.execute(selector:, context_size:)
|
188
|
+
when Action::BUTTON_CLICK
|
189
|
+
require_param!(:selector, selector)
|
190
|
+
button_click_tool.execute(selector:)
|
191
|
+
when Action::LINK_CLICK
|
192
|
+
require_param!(:selector, selector)
|
193
|
+
link_click_tool.execute(selector:)
|
194
|
+
when Action::ELEMENT_CLICK
|
195
|
+
require_param!(:selector, selector)
|
196
|
+
element_click_tool.execute(selector:)
|
197
|
+
when Action::TEXT_FIELD_SET
|
198
|
+
require_param!(:selector, selector)
|
199
|
+
require_param!(:value, value)
|
200
|
+
text_field_area_set_tool.execute(selector:, text: value)
|
201
|
+
when Action::SCREENSHOT
|
202
|
+
page_screenshot_tool.execute
|
203
|
+
else
|
204
|
+
{ error: "Unsupported action: #{action}. Supported actions are: #{ACTIONS.join(', ')}" }
|
205
|
+
end
|
206
|
+
end
|
207
|
+
|
208
|
+
private
|
209
|
+
|
210
|
+
# @param name [Symbol]
|
211
|
+
# @param value [Object]
|
212
|
+
#
|
213
|
+
# @raise [ArgumentError]
|
214
|
+
# @return [void]
|
215
|
+
def require_param!(name, value)
|
216
|
+
raise ArgumentError, "#{name} parameter is required for this action" if value.nil?
|
217
|
+
end
|
218
|
+
|
219
|
+
# @return [Browser::VisitTool]
|
220
|
+
def visit_tool
|
221
|
+
Browser::VisitTool.new(driver: @driver, logger: @logger)
|
222
|
+
end
|
223
|
+
|
224
|
+
# @return [Browser::PageInspectTool]
|
225
|
+
def page_inspect_tool
|
226
|
+
Browser::PageInspectTool.new(driver: @driver, logger: @logger)
|
227
|
+
end
|
228
|
+
|
229
|
+
# @return [Browser::UIInspectTool]
|
230
|
+
def inspect_tool
|
231
|
+
Browser::InspectTool.new(driver: @driver, logger: @logger)
|
232
|
+
end
|
233
|
+
|
234
|
+
# @return [Browser::SelectorInspectTool]
|
235
|
+
def selector_inspect_tool
|
236
|
+
Browser::SelectorInspectTool.new(driver: @driver, logger: @logger)
|
237
|
+
end
|
238
|
+
|
239
|
+
# @return [Browser::ButtonClickTool]
|
240
|
+
def button_click_tool
|
241
|
+
Browser::ButtonClickTool.new(driver: @driver, logger: @logger)
|
242
|
+
end
|
243
|
+
|
244
|
+
# @return [Browser::LinkClickTool]
|
245
|
+
def link_click_tool
|
246
|
+
Browser::LinkClickTool.new(driver: @driver, logger: @logger)
|
247
|
+
end
|
248
|
+
|
249
|
+
# @return [Browser::ElementClickTool]
|
250
|
+
def element_click_tool
|
251
|
+
Browser::ElementClickTool.new(driver: @driver, logger: @logger)
|
252
|
+
end
|
253
|
+
|
254
|
+
# @return [Browser::TextFieldAreaSetTool]
|
255
|
+
def text_field_area_set_tool
|
256
|
+
Browser::TextFieldAreaSetTool.new(driver: @driver, logger: @logger)
|
257
|
+
end
|
258
|
+
|
259
|
+
# @return [Browser::PageScreenshotTool]
|
260
|
+
def page_screenshot_tool
|
261
|
+
Browser::PageScreenshotTool.new(driver: @driver, logger: @logger)
|
262
|
+
end
|
263
|
+
end
|
264
|
+
end
|
265
|
+
end
|
data/lib/omniai/tools/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: omniai-tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin Sylvestre
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date: 2025-05-
|
10
|
+
date: 2025-05-29 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: omniai
|
@@ -49,12 +49,33 @@ files:
|
|
49
49
|
- bin/console
|
50
50
|
- bin/setup
|
51
51
|
- lib/omniai/tools.rb
|
52
|
+
- lib/omniai/tools/browser/base_driver.rb
|
52
53
|
- lib/omniai/tools/browser/base_tool.rb
|
53
54
|
- lib/omniai/tools/browser/button_click_tool.rb
|
55
|
+
- lib/omniai/tools/browser/element_click_tool.rb
|
56
|
+
- lib/omniai/tools/browser/elements/element_grouper.rb
|
57
|
+
- lib/omniai/tools/browser/elements/nearby_element_detector.rb
|
58
|
+
- lib/omniai/tools/browser/formatters/action_formatter.rb
|
59
|
+
- lib/omniai/tools/browser/formatters/data_entry_formatter.rb
|
60
|
+
- lib/omniai/tools/browser/formatters/element_formatter.rb
|
61
|
+
- lib/omniai/tools/browser/formatters/input_formatter.rb
|
54
62
|
- lib/omniai/tools/browser/inspect_tool.rb
|
63
|
+
- lib/omniai/tools/browser/inspect_utils.rb
|
55
64
|
- lib/omniai/tools/browser/link_click_tool.rb
|
65
|
+
- lib/omniai/tools/browser/page_inspect/button_summarizer.rb
|
66
|
+
- lib/omniai/tools/browser/page_inspect/form_summarizer.rb
|
67
|
+
- lib/omniai/tools/browser/page_inspect/html_summarizer.rb
|
68
|
+
- lib/omniai/tools/browser/page_inspect/link_summarizer.rb
|
69
|
+
- lib/omniai/tools/browser/page_inspect_tool.rb
|
70
|
+
- lib/omniai/tools/browser/page_screenshot_tool.rb
|
71
|
+
- lib/omniai/tools/browser/selector_generator.rb
|
72
|
+
- lib/omniai/tools/browser/selector_generator/base_selectors.rb
|
73
|
+
- lib/omniai/tools/browser/selector_generator/contextual_selectors.rb
|
74
|
+
- lib/omniai/tools/browser/selector_inspect_tool.rb
|
56
75
|
- lib/omniai/tools/browser/text_field_area_set_tool.rb
|
57
76
|
- lib/omniai/tools/browser/visit_tool.rb
|
77
|
+
- lib/omniai/tools/browser/watir_driver.rb
|
78
|
+
- lib/omniai/tools/browser_tool.rb
|
58
79
|
- lib/omniai/tools/database/base_tool.rb
|
59
80
|
- lib/omniai/tools/database/sqlite_tool.rb
|
60
81
|
- lib/omniai/tools/disk/base_tool.rb
|