omniai-tools 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -12
  3. data/lib/omniai/tools/browser/base_driver.rb +78 -0
  4. data/lib/omniai/tools/browser/base_tool.rb +31 -4
  5. data/lib/omniai/tools/browser/button_click_tool.rb +1 -14
  6. data/lib/omniai/tools/browser/element_click_tool.rb +30 -0
  7. data/lib/omniai/tools/browser/elements/element_grouper.rb +73 -0
  8. data/lib/omniai/tools/browser/elements/nearby_element_detector.rb +108 -0
  9. data/lib/omniai/tools/browser/formatters/action_formatter.rb +37 -0
  10. data/lib/omniai/tools/browser/formatters/data_entry_formatter.rb +135 -0
  11. data/lib/omniai/tools/browser/formatters/element_formatter.rb +52 -0
  12. data/lib/omniai/tools/browser/formatters/input_formatter.rb +59 -0
  13. data/lib/omniai/tools/browser/inspect_tool.rb +46 -13
  14. data/lib/omniai/tools/browser/inspect_utils.rb +51 -0
  15. data/lib/omniai/tools/browser/link_click_tool.rb +2 -14
  16. data/lib/omniai/tools/browser/page_inspect/button_summarizer.rb +140 -0
  17. data/lib/omniai/tools/browser/page_inspect/form_summarizer.rb +98 -0
  18. data/lib/omniai/tools/browser/page_inspect/html_summarizer.rb +37 -0
  19. data/lib/omniai/tools/browser/page_inspect/link_summarizer.rb +103 -0
  20. data/lib/omniai/tools/browser/page_inspect_tool.rb +30 -0
  21. data/lib/omniai/tools/browser/page_screenshot_tool.rb +22 -0
  22. data/lib/omniai/tools/browser/selector_generator/base_selectors.rb +28 -0
  23. data/lib/omniai/tools/browser/selector_generator/contextual_selectors.rb +140 -0
  24. data/lib/omniai/tools/browser/selector_generator.rb +73 -0
  25. data/lib/omniai/tools/browser/selector_inspect_tool.rb +44 -0
  26. data/lib/omniai/tools/browser/text_field_area_set_tool.rb +2 -31
  27. data/lib/omniai/tools/browser/visit_tool.rb +1 -1
  28. data/lib/omniai/tools/browser/watir_driver.rb +224 -0
  29. data/lib/omniai/tools/browser_tool.rb +265 -0
  30. data/lib/omniai/tools/version.rb +1 -1
  31. metadata +23 -2
@@ -0,0 +1,265 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ # A tool for controller a browser.
6
+ class BrowserTool < OmniAI::Tool
7
+ module Action
8
+ VISIT = "visit"
9
+ PAGE_INSPECT = "page_inspect"
10
+ UI_INSPECT = "ui_inspect"
11
+ SELECTOR_INSPECT = "selector_inspect"
12
+ BUTTON_CLICK = "button_click"
13
+ LINK_CLICK = "link_click"
14
+ ELEMENT_CLICK = "element_click"
15
+ TEXT_FIELD_SET = "text_field_set"
16
+ SCREENSHOT = "screenshot"
17
+ end
18
+
19
+ ACTIONS = [
20
+ Action::VISIT,
21
+ Action::PAGE_INSPECT,
22
+ Action::UI_INSPECT,
23
+ Action::SELECTOR_INSPECT,
24
+ Action::BUTTON_CLICK,
25
+ Action::LINK_CLICK,
26
+ Action::ELEMENT_CLICK,
27
+ Action::TEXT_FIELD_SET,
28
+ Action::SCREENSHOT,
29
+ ].freeze
30
+
31
+ description <<~TEXT
32
+ Control a web browser to automate interactions with websites.
33
+
34
+ 1. `#{Action::VISIT}` - Navigate to a website
35
+ Required: "action": "visit", "url": "[website URL]"
36
+
37
+ 2. `#{Action::PAGE_INSPECT} - Get page HTML or summary
38
+ Required: "action": "page_inspect"
39
+ Optional: "full_html": true/false (get full HTML vs summary, default: summary)
40
+
41
+ 3. `#{Action::UI_INSPECT}` - Find elements by text content
42
+ Required: "action": "ui_inspect", "text_content": "[text to search for]"
43
+ Optional:
44
+ - "selector": "[CSS selector]" (search within specific container)
45
+ - "context_size": [number] (parent elements to show, default: 2)
46
+
47
+ 4. `#{Action::SELECTOR_INSPECT} - Find elements by CSS selector
48
+ Required: "action": "selector_inspect", "selector": "[CSS selector]"
49
+ Optional: "context_size": [number] (parent elements to show, default: 2)
50
+
51
+ 5. `#{Action::BUTTON_CLICK}` - Click a button
52
+ Required: "action": "button_click", "selector": "[button text or CSS selector]"
53
+
54
+ 6. `#{Action::LINK_CLICK}` - Click a link
55
+ Required: "action": "link_click", "selector": "[link text or CSS selector]"
56
+
57
+ 7. `#{Action::ELEMENT_CLICK}` - Click any clickable element (div, span, etc.)
58
+ Required: "action": "element_click", "selector": "[CSS selector, text content, ID, or XPath]"
59
+
60
+ 8. `#{Action::TEXT_FIELD_SET}` - Enter text in input fields/text areas
61
+ Required: "action": "text_field_set", "selector": "[field CSS selector]", "value": "[text to enter]"
62
+
63
+ 9. `#{Action::SCREENSHOT}` - Take a screenshot of the page or specific element
64
+ Required: "action": "screenshot"
65
+
66
+ Examples:
67
+ Visit a website
68
+ {"action": "#{Action::VISIT}", "url": "https://example.com"}
69
+ Get page summary
70
+ {"action": "#{Action::PAGE_INSPECT}"}
71
+ Get full page HTML
72
+ {"action": "#{Action::PAGE_INSPECT}", "full_html": true}
73
+ Find elements containing text
74
+ {"action": "#{Action::UI_INSPECT}", "text_content": "Submit"}
75
+ Find elements with context
76
+ {"action": "#{Action::UI_INSPECT}", "text_content": "Login", "context_size": 3}
77
+ Find elements by CSS selector
78
+ {"action": "#{Action::SELECTOR_INSPECT}", "selector": ".button"}
79
+ Find selector with context
80
+ {"action": "#{Action::SELECTOR_INSPECT}", "selector": "h1", "context_size": 2}
81
+ Click a button with specific text
82
+ {"action": "#{Action::BUTTON_CLICK}", "selector": "Submit"}
83
+ Click a link with specific text
84
+ {"action": "#{Action::LINK_CLICK}", "selector": "Learn More"}
85
+ Click element by CSS selector
86
+ {"action": "#{Action::ELEMENT_CLICK}", "selector": ".wv-select__menu__option"}
87
+ Click element by role attribute
88
+ {"action": "#{Action::ELEMENT_CLICK}", "selector": "[role='listitem']"}
89
+ Click element by text content
90
+ {"action": "#{Action::ELEMENT_CLICK}", "selector": "Medical Evaluation Management"}
91
+ Set text in an input field
92
+ {"action": "#{Action::TEXT_FIELD_SET}", "selector": "#search", "value": "search query"}
93
+ Take a full page screenshot
94
+ {"action": "#{Action::SCREENSHOT}"}
95
+ TEXT
96
+
97
+ parameter :action, :string, enum: ACTIONS, description: <<~TEXT
98
+ The browser action to perform. Options:
99
+ * `#{Action::VISIT}`: Navigate to a website
100
+ * `#{Action::PAGE_INSPECT}`: Get full HTML or summarized page information
101
+ * `#{Action::UI_INSPECT}`: Find elements containing specific text
102
+ * `#{Action::SELECTOR_INSPECT}`: Find elements matching CSS selectors
103
+ * `#{Action::BUTTON_CLICK}`: Click a button element
104
+ * `#{Action::LINK_CLICK}`: Click a link element
105
+ * `#{Action::ELEMENT_CLICK}`: Click any clickable element
106
+ * `#{Action::TEXT_FIELD_SET}`: Enter text in input fields or text areas
107
+ * `#{Action::SCREENSHOT}`: Take a screenshot of the page or specific element
108
+ TEXT
109
+
110
+ parameter :url, :string, description: <<~TEXT
111
+ The URL to visit. Required for the following actions:
112
+ * `#{Action::VISIT}`
113
+ TEXT
114
+
115
+ parameter :selector, :string, description: <<~TEXT
116
+ CSS selector, ID, or text content of the element. Required for the following actions:
117
+ * `#{Action::SELECTOR_INSPECT}`
118
+ * `#{Action::BUTTON_CLICK}`
119
+ * `#{Action::LINK_CLICK}`
120
+ * `#{Action::ELEMENT_CLICK}`
121
+ * `#{Action::TEXT_FIELD_SET}`
122
+
123
+ Optional for the following actions:
124
+ * `#{Action::UI_INSPECT}` (search within specific container)
125
+ TEXT
126
+
127
+ parameter :value, :string, description: <<~TEXT
128
+ The value to set in the text field. Required for the following actions:
129
+ * `#{Action::TEXT_FIELD_SET}`
130
+ TEXT
131
+
132
+ parameter :context_size, :integer, description: <<~TEXT
133
+ Number of parent elements to include in inspect results (default: 2). Optional for the following actions:
134
+ * `#{Action::UI_INSPECT}`
135
+ * `#{Action::SELECTOR_INSPECT}`
136
+ TEXT
137
+
138
+ parameter :full_html, :boolean, description: <<~TEXT
139
+ Return the full HTML of the page instead of a summary. Optional for the following actions:
140
+ * `#{Action::PAGE_INSPECT}`
141
+ TEXT
142
+
143
+ parameter :text_content, :string, description: <<~TEXT
144
+ Search for elements containing this text. Required for the following actions:
145
+ * `#{Action::UI_INSPECT}`
146
+ TEXT
147
+
148
+ required %i[action]
149
+
150
+ # @param logger [Logger]
151
+ # @param driver [OmniAI::Tools::Browser::BaseDriver]
152
+ def initialize(logger:, driver:)
153
+ super()
154
+ @logger = logger
155
+ @driver = driver
156
+ end
157
+
158
+ def cleanup!
159
+ @driver.close
160
+ end
161
+
162
+ # @param action [String]
163
+ # @param url [String, nil]
164
+ # @param selector [String, nil]
165
+ # @param value [String, nil]
166
+ # @param context_size [Integer]
167
+ # @param full_html [Boolean]
168
+ # @param text_content [String, nil]
169
+ #
170
+ # @return [String]
171
+ def execute(action:, url: nil, selector: nil, value: nil, context_size: 2, full_html: false, text_content: nil)
172
+ case action.to_s.downcase
173
+ when Action::VISIT
174
+ require_param!(:url, url)
175
+ visit_tool.execute(url:)
176
+ when Action::PAGE_INSPECT
177
+ if full_html
178
+ page_inspect_tool.execute
179
+ else
180
+ page_inspect_tool.execute(summarize: true)
181
+ end
182
+ when Action::UI_INSPECT
183
+ require_param!(:text_content, text_content)
184
+ inspect_tool.execute(text_content:, selector:, context_size:)
185
+ when Action::SELECTOR_INSPECT
186
+ require_param!(:selector, selector)
187
+ selector_inspect_tool.execute(selector:, context_size:)
188
+ when Action::BUTTON_CLICK
189
+ require_param!(:selector, selector)
190
+ button_click_tool.execute(selector:)
191
+ when Action::LINK_CLICK
192
+ require_param!(:selector, selector)
193
+ link_click_tool.execute(selector:)
194
+ when Action::ELEMENT_CLICK
195
+ require_param!(:selector, selector)
196
+ element_click_tool.execute(selector:)
197
+ when Action::TEXT_FIELD_SET
198
+ require_param!(:selector, selector)
199
+ require_param!(:value, value)
200
+ text_field_area_set_tool.execute(selector:, text: value)
201
+ when Action::SCREENSHOT
202
+ page_screenshot_tool.execute
203
+ else
204
+ { error: "Unsupported action: #{action}. Supported actions are: #{ACTIONS.join(', ')}" }
205
+ end
206
+ end
207
+
208
+ private
209
+
210
+ # @param name [Symbol]
211
+ # @param value [Object]
212
+ #
213
+ # @raise [ArgumentError]
214
+ # @return [void]
215
+ def require_param!(name, value)
216
+ raise ArgumentError, "#{name} parameter is required for this action" if value.nil?
217
+ end
218
+
219
+ # @return [Browser::VisitTool]
220
+ def visit_tool
221
+ Browser::VisitTool.new(driver: @driver, logger: @logger)
222
+ end
223
+
224
+ # @return [Browser::PageInspectTool]
225
+ def page_inspect_tool
226
+ Browser::PageInspectTool.new(driver: @driver, logger: @logger)
227
+ end
228
+
229
+ # @return [Browser::UIInspectTool]
230
+ def inspect_tool
231
+ Browser::InspectTool.new(driver: @driver, logger: @logger)
232
+ end
233
+
234
+ # @return [Browser::SelectorInspectTool]
235
+ def selector_inspect_tool
236
+ Browser::SelectorInspectTool.new(driver: @driver, logger: @logger)
237
+ end
238
+
239
+ # @return [Browser::ButtonClickTool]
240
+ def button_click_tool
241
+ Browser::ButtonClickTool.new(driver: @driver, logger: @logger)
242
+ end
243
+
244
+ # @return [Browser::LinkClickTool]
245
+ def link_click_tool
246
+ Browser::LinkClickTool.new(driver: @driver, logger: @logger)
247
+ end
248
+
249
+ # @return [Browser::ElementClickTool]
250
+ def element_click_tool
251
+ Browser::ElementClickTool.new(driver: @driver, logger: @logger)
252
+ end
253
+
254
+ # @return [Browser::TextFieldAreaSetTool]
255
+ def text_field_area_set_tool
256
+ Browser::TextFieldAreaSetTool.new(driver: @driver, logger: @logger)
257
+ end
258
+
259
+ # @return [Browser::PageScreenshotTool]
260
+ def page_screenshot_tool
261
+ Browser::PageScreenshotTool.new(driver: @driver, logger: @logger)
262
+ end
263
+ end
264
+ end
265
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module OmniAI
4
4
  module Tools
5
- VERSION = "0.5.0"
5
+ VERSION = "0.5.1"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omniai-tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.0
4
+ version: 0.5.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Sylvestre
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-05-15 00:00:00.000000000 Z
10
+ date: 2025-05-29 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: omniai
@@ -49,12 +49,33 @@ files:
49
49
  - bin/console
50
50
  - bin/setup
51
51
  - lib/omniai/tools.rb
52
+ - lib/omniai/tools/browser/base_driver.rb
52
53
  - lib/omniai/tools/browser/base_tool.rb
53
54
  - lib/omniai/tools/browser/button_click_tool.rb
55
+ - lib/omniai/tools/browser/element_click_tool.rb
56
+ - lib/omniai/tools/browser/elements/element_grouper.rb
57
+ - lib/omniai/tools/browser/elements/nearby_element_detector.rb
58
+ - lib/omniai/tools/browser/formatters/action_formatter.rb
59
+ - lib/omniai/tools/browser/formatters/data_entry_formatter.rb
60
+ - lib/omniai/tools/browser/formatters/element_formatter.rb
61
+ - lib/omniai/tools/browser/formatters/input_formatter.rb
54
62
  - lib/omniai/tools/browser/inspect_tool.rb
63
+ - lib/omniai/tools/browser/inspect_utils.rb
55
64
  - lib/omniai/tools/browser/link_click_tool.rb
65
+ - lib/omniai/tools/browser/page_inspect/button_summarizer.rb
66
+ - lib/omniai/tools/browser/page_inspect/form_summarizer.rb
67
+ - lib/omniai/tools/browser/page_inspect/html_summarizer.rb
68
+ - lib/omniai/tools/browser/page_inspect/link_summarizer.rb
69
+ - lib/omniai/tools/browser/page_inspect_tool.rb
70
+ - lib/omniai/tools/browser/page_screenshot_tool.rb
71
+ - lib/omniai/tools/browser/selector_generator.rb
72
+ - lib/omniai/tools/browser/selector_generator/base_selectors.rb
73
+ - lib/omniai/tools/browser/selector_generator/contextual_selectors.rb
74
+ - lib/omniai/tools/browser/selector_inspect_tool.rb
56
75
  - lib/omniai/tools/browser/text_field_area_set_tool.rb
57
76
  - lib/omniai/tools/browser/visit_tool.rb
77
+ - lib/omniai/tools/browser/watir_driver.rb
78
+ - lib/omniai/tools/browser_tool.rb
58
79
  - lib/omniai/tools/database/base_tool.rb
59
80
  - lib/omniai/tools/database/sqlite_tool.rb
60
81
  - lib/omniai/tools/disk/base_tool.rb