omniai-tools 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/README.md +72 -16
  4. data/lib/omniai/tools/browser/base_driver.rb +78 -0
  5. data/lib/omniai/tools/browser/base_tool.rb +31 -4
  6. data/lib/omniai/tools/browser/button_click_tool.rb +1 -14
  7. data/lib/omniai/tools/browser/element_click_tool.rb +30 -0
  8. data/lib/omniai/tools/browser/elements/element_grouper.rb +73 -0
  9. data/lib/omniai/tools/browser/elements/nearby_element_detector.rb +108 -0
  10. data/lib/omniai/tools/browser/formatters/action_formatter.rb +37 -0
  11. data/lib/omniai/tools/browser/formatters/data_entry_formatter.rb +135 -0
  12. data/lib/omniai/tools/browser/formatters/element_formatter.rb +52 -0
  13. data/lib/omniai/tools/browser/formatters/input_formatter.rb +59 -0
  14. data/lib/omniai/tools/browser/inspect_tool.rb +46 -13
  15. data/lib/omniai/tools/browser/inspect_utils.rb +51 -0
  16. data/lib/omniai/tools/browser/link_click_tool.rb +2 -14
  17. data/lib/omniai/tools/browser/page_inspect/button_summarizer.rb +140 -0
  18. data/lib/omniai/tools/browser/page_inspect/form_summarizer.rb +98 -0
  19. data/lib/omniai/tools/browser/page_inspect/html_summarizer.rb +37 -0
  20. data/lib/omniai/tools/browser/page_inspect/link_summarizer.rb +103 -0
  21. data/lib/omniai/tools/browser/page_inspect_tool.rb +30 -0
  22. data/lib/omniai/tools/browser/page_screenshot_tool.rb +22 -0
  23. data/lib/omniai/tools/browser/selector_generator/base_selectors.rb +28 -0
  24. data/lib/omniai/tools/browser/selector_generator/contextual_selectors.rb +140 -0
  25. data/lib/omniai/tools/browser/selector_generator.rb +73 -0
  26. data/lib/omniai/tools/browser/selector_inspect_tool.rb +44 -0
  27. data/lib/omniai/tools/browser/text_field_area_set_tool.rb +2 -31
  28. data/lib/omniai/tools/browser/visit_tool.rb +1 -1
  29. data/lib/omniai/tools/browser/watir_driver.rb +222 -0
  30. data/lib/omniai/tools/browser_tool.rb +262 -0
  31. data/lib/omniai/tools/computer/base_driver.rb +179 -0
  32. data/lib/omniai/tools/computer/mac_driver.rb +103 -0
  33. data/lib/omniai/tools/computer_tool.rb +189 -0
  34. data/lib/omniai/tools/database/base_driver.rb +17 -0
  35. data/lib/omniai/tools/database/postgres_driver.rb +30 -0
  36. data/lib/omniai/tools/database/sqlite_driver.rb +29 -0
  37. data/lib/omniai/tools/database_tool.rb +100 -0
  38. data/lib/omniai/tools/version.rb +1 -1
  39. metadata +31 -5
  40. data/lib/omniai/tools/database/base_tool.rb +0 -37
  41. data/lib/omniai/tools/database/sqlite_tool.rb +0 -110
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "base64"
4
+
5
+ module OmniAI
6
+ module Tools
7
+ module Browser
8
+ # A browser automation tool for taking screenshots of the current page.
9
+ class PageScreenshotTool < BaseTool
10
+ description "A browser automation tool for taking screenshots of the current page."
11
+
12
+ def execute
13
+ @logger.info("#{self.class.name}##{__method__}")
14
+
15
+ @driver.screenshot do |file|
16
+ "data:image/png;base64,#{Base64.strict_encode64(file.read)}"
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Browser
6
+ module SelectorGenerator
7
+ # Basic selector generation methods
8
+ module BaseSelectors
9
+ def placeholder_selector(element, tag)
10
+ valid_attribute?(element["placeholder"]) ? ["#{tag}[placeholder=\"#{element['placeholder']}\"]"] : []
11
+ end
12
+
13
+ def aria_label_selector(element, tag)
14
+ valid_attribute?(element["aria-label"]) ? ["#{tag}[aria-label=\"#{element['aria-label']}\"]"] : []
15
+ end
16
+
17
+ def name_selector(element, tag)
18
+ valid_attribute?(element["name"]) ? ["#{tag}[name=\"#{element['name']}\"]"] : []
19
+ end
20
+
21
+ def maxlength_selector(element, tag)
22
+ valid_attribute?(element["maxlength"]) ? ["#{tag}[maxlength=\"#{element['maxlength']}\"]"] : []
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Browser
6
+ module SelectorGenerator
7
+ # Context-aware selector generation for complex elements
8
+ module ContextualSelectors
9
+ def generate_contextual_selectors(element)
10
+ selectors = []
11
+ selectors.concat(parent_class_selectors(element))
12
+ selectors.concat(label_based_selectors(element))
13
+ selectors.concat(position_based_selectors(element))
14
+ selectors
15
+ end
16
+
17
+ # Generate selectors based on parent container classes
18
+ def parent_class_selectors(element)
19
+ significant_parent = find_significant_parent(element)
20
+ return [] unless significant_parent
21
+
22
+ parent_class = most_specific_class(significant_parent)
23
+ return [] unless parent_class
24
+
25
+ build_parent_selector(element, parent_class)
26
+ end
27
+
28
+ # Build selector with parent class context
29
+ def build_parent_selector(element, parent_class)
30
+ base = ".#{parent_class} #{element.name}"
31
+ return ["#{base}[placeholder=\"#{element['placeholder']}\"]"] if element["placeholder"]
32
+ return ["#{base}[type=\"#{element['type']}\"]"] if element["type"]
33
+
34
+ [base]
35
+ end
36
+
37
+ # Find parent with meaningful class (not generic like 'row' or 'col')
38
+ def find_significant_parent(element)
39
+ parent = element.parent
40
+ while parent && parent.name != "body"
41
+ return parent if element_has_significant_class?(parent)
42
+
43
+ parent = parent.parent
44
+ end
45
+ nil
46
+ end
47
+
48
+ # Check if element has significant class
49
+ def element_has_significant_class?(element)
50
+ classes = element["class"]&.split || []
51
+ classes.any? { |c| significant_class?(c) }
52
+ end
53
+
54
+ # Check if class name is likely to be meaningful/specific
55
+ def significant_class?(class_name)
56
+ return false if class_name.length < 4
57
+ return false if generic_class?(class_name)
58
+
59
+ class_name.match?(/[a-z]+[-_]?[a-z]+/i)
60
+ end
61
+
62
+ # Common generic class names to ignore
63
+ def generic_class?(class_name)
64
+ %w[row col container wrapper inner outer main].include?(class_name.downcase)
65
+ end
66
+
67
+ # Get most specific (longest) class name
68
+ def most_specific_class(element)
69
+ classes = element["class"]&.split || []
70
+ classes.select { |c| significant_class?(c) }.max_by(&:length)
71
+ end
72
+
73
+ # Generate selectors based on label associations
74
+ def label_based_selectors(element)
75
+ return [] unless stable_id?(element)
76
+
77
+ label = find_label_for_element(element)
78
+ label ? ["#{element.name}##{element['id']}"] : []
79
+ end
80
+
81
+ # Check if element has stable (non-React) ID
82
+ def stable_id?(element)
83
+ id = element["id"]
84
+ id && !id.empty? && !id.match?(/^:r[0-9a-z]+:$/i)
85
+ end
86
+
87
+ # Find label element associated with this element
88
+ def find_label_for_element(element)
89
+ element.document.at_css("label[for=\"#{element['id']}\"]")
90
+ end
91
+
92
+ # Generate position-based selectors for similar elements
93
+ def position_based_selectors(element)
94
+ siblings = find_similar_siblings(element)
95
+ return [] unless siblings.size > 1
96
+
97
+ index = siblings.index(element) + 1
98
+ parent_context = parent_context_prefix(element)
99
+ build_position_selector(element, index, parent_context)
100
+ end
101
+
102
+ # Build nth-of-type selector
103
+ def build_position_selector(element, index, parent_context = "")
104
+ nth = ":nth-of-type(#{index})"
105
+ base = "#{parent_context}#{element.name}#{nth}"
106
+ return ["#{parent_context}#{element.name}[type=\"#{element['type']}\"]#{nth}"] if element["type"]
107
+ if element["placeholder"]
108
+ return ["#{parent_context}#{element.name}[placeholder=\"#{element['placeholder']}\"]#{nth}"]
109
+ end
110
+
111
+ [base]
112
+ end
113
+
114
+ # Find sibling elements of same type with similar attributes
115
+ def find_similar_siblings(element)
116
+ return [] unless element.parent
117
+
118
+ element.parent.css(element.name).select { |sibling| same_key_attributes?(element, sibling) }
119
+ end
120
+
121
+ # Check if two elements have same key attributes
122
+ def same_key_attributes?(elem1, elem2)
123
+ return false unless elem1.name == elem2.name
124
+
125
+ elem1.name == "input" ? elem1["type"] == elem2["type"] : true
126
+ end
127
+
128
+ # Get parent context for more specific position selectors
129
+ def parent_context_prefix(element)
130
+ parent = find_significant_parent(element)
131
+ return "" unless parent
132
+
133
+ parent_class = most_specific_class(parent)
134
+ parent_class ? ".#{parent_class} " : ""
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Browser
6
+ # Generates stable CSS selectors for HTML elements
7
+ module SelectorGenerator
8
+ extend BaseSelectors
9
+ extend ContextualSelectors
10
+
11
+ module_function
12
+
13
+ def generate_stable_selectors(element)
14
+ return [] unless valid_element?(element)
15
+
16
+ selectors = []
17
+ selectors.concat(generate_by_type(element))
18
+ selectors.concat(generate_contextual_selectors(element))
19
+ selectors.compact.uniq
20
+ end
21
+
22
+ def generate_by_type(element)
23
+ case element.name
24
+ when "input" then generate_input_selectors(element)
25
+ when "textarea" then generate_textarea_selectors(element)
26
+ when "select" then generate_select_selectors(element)
27
+ else []
28
+ end
29
+ end
30
+
31
+ def valid_element?(element)
32
+ element.respond_to?(:name) && element.respond_to?(:parent)
33
+ end
34
+
35
+ def generate_input_selectors(element)
36
+ selectors = []
37
+ selectors.concat(placeholder_selector(element, "input"))
38
+ selectors.concat(aria_label_selector(element, "input"))
39
+ selectors.concat(type_selectors(element))
40
+ selectors.concat(attribute_selectors(element, "input"))
41
+ selectors
42
+ end
43
+
44
+ def generate_textarea_selectors(element)
45
+ placeholder_selector(element, "textarea") + name_selector(element, "textarea")
46
+ end
47
+
48
+ def generate_select_selectors(element)
49
+ name_selector(element, "select") + aria_label_selector(element, "select")
50
+ end
51
+
52
+ def type_selectors(element)
53
+ return [] unless valid_attribute?(element["type"])
54
+
55
+ base = "input[type=\"#{element['type']}\"]"
56
+ [base, amount_class_selector(base, element)].compact
57
+ end
58
+
59
+ def amount_class_selector(base, element)
60
+ element["class"]&.include?("wv-input--amount") ? "#{base}.wv-input--amount" : nil
61
+ end
62
+
63
+ def attribute_selectors(element, tag)
64
+ maxlength_selector(element, tag) + name_selector(element, tag)
65
+ end
66
+
67
+ def valid_attribute?(attribute)
68
+ attribute && attribute.strip.length.positive?
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+
5
+ module OmniAI
6
+ module Tools
7
+ module Browser
8
+ # A browser automation tool for inspecting elements using CSS selectors.
9
+ class SelectorInspectTool < BaseTool
10
+ include InspectUtils
11
+
12
+ description "A browser automation tool for finding and inspecting elements by CSS selector."
13
+
14
+ parameter :selector, :string, description: "CSS selector to target specific elements"
15
+ parameter :context_size, :integer, description: "Number of parent elements to include for context"
16
+
17
+ def execute(selector:, context_size: 2)
18
+ @logger.info("#{self.class.name}##{__method__}")
19
+
20
+ doc = cleaned_document(html: @driver.html)
21
+ target_elements = doc.css(selector)
22
+
23
+ return "No elements found matching selector: #{selector}" if target_elements.empty?
24
+
25
+ format_elements(target_elements, selector, context_size)
26
+ end
27
+
28
+ private
29
+
30
+ def format_elements(elements, selector, context_size)
31
+ result = "Found #{elements.size} elements matching '#{selector}':\n\n"
32
+
33
+ elements.each_with_index do |element, index|
34
+ result += "--- Element #{index + 1} ---\n"
35
+ result += Formatters::ElementFormatter.get_parent_context(element, context_size) if context_size.positive?
36
+ result += "Element: #{element.to_html}\n\n"
37
+ end
38
+
39
+ result
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
@@ -20,38 +20,9 @@ module OmniAI
20
20
  # @param selector [String] The ID / name of the text field / text area to interact with.
21
21
  # @param text [String] The text to set.
22
22
  def execute(selector:, text:)
23
- @logger.info("#{self.class.name}##{__method__} selector=#{selector.inspect} text=#{text.inspect}")
23
+ @logger.info("#{self.class.name}##{__method__} selector=#{selector.inspect}")
24
24
 
25
- element = find(id: selector) || find(name: selector)
26
-
27
- return { error: "unknown selector=#{selector}" } if element.nil?
28
-
29
- element.set(text)
30
- end
31
-
32
- protected
33
-
34
- # @param selector [Hash]
35
- #
36
- # @return [Watir::TextArea, Watir::TextField, nil]
37
- def find(selector)
38
- find_text_area(selector) || find_text_field(selector)
39
- end
40
-
41
- # @param selector [Hash]
42
- #
43
- # @return [Watir::TextArea, nil]
44
- def find_text_area(selector)
45
- element = @browser.textarea(selector)
46
- element if element.exists?
47
- end
48
-
49
- # @param selector [Hash]
50
- #
51
- # @return [Watir::TextField, nil]
52
- def find_text_field(selector)
53
- element = @browser.text_field(selector)
54
- element if element.exists?
25
+ @driver.fill_in(selector:, text:)
55
26
  end
56
27
  end
57
28
  end
@@ -20,7 +20,7 @@ module OmniAI
20
20
  def execute(url:)
21
21
  @logger.info("#{self.class.name}##{__method__} url=#{url.inspect}")
22
22
 
23
- @browser.goto(url)
23
+ @driver.goto(url:)
24
24
  end
25
25
  end
26
26
  end
@@ -0,0 +1,222 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Browser
6
+ # @example
7
+ # driver = OmniAI::Tools::Browser::WatirDriver.new
8
+ # driver.visit("https://example.com")
9
+ # driver.click(id: "submit-button")
10
+ class WatirDriver < BaseDriver
11
+ def initialize(logger: Logger.new(IO::NULL), browser: Watir::Browser.new(:chrome))
12
+ super(logger:)
13
+ @browser = browser
14
+ end
15
+
16
+ def close
17
+ @browser.close
18
+ end
19
+
20
+ # @return [String]
21
+ def url
22
+ @browser.url
23
+ end
24
+
25
+ # @return [String]
26
+ def title
27
+ @browser.title
28
+ end
29
+
30
+ # @return [String]
31
+ def html
32
+ @browser.html
33
+ end
34
+
35
+ # @param url [String]
36
+ def goto(url:)
37
+ @browser.goto(url)
38
+
39
+ { status: :ok }
40
+ end
41
+
42
+ # @yield [file]
43
+ # @yieldparam file [File]
44
+ def screenshot
45
+ tempfile = Tempfile.new(["screenshot", ".png"])
46
+ @browser.screenshot.save(tempfile.path)
47
+
48
+ yield File.open(tempfile.path, "rb")
49
+ ensure
50
+ tempfile&.close
51
+ tempfile&.unlink
52
+ end
53
+
54
+ # @param selector [String]
55
+ # @param text [String]
56
+ #
57
+ # @return [Hash]
58
+ def fill_in(selector:, text:)
59
+ element = find_field(selector)
60
+
61
+ return { status: :error, message: "unknown selector=#{selector.inspect}" } if element.nil?
62
+
63
+ element.set(text)
64
+
65
+ { status: :ok }
66
+ end
67
+
68
+ # @param selector [String]
69
+ #
70
+ # @return [Hash]
71
+ def button_click(selector:)
72
+ element = find_button(selector)
73
+
74
+ return { status: error, message: "unknown selector=#{selector.inspect}" } if element.nil?
75
+
76
+ element.click
77
+
78
+ { status: :ok }
79
+ end
80
+
81
+ # @param selector [String]
82
+ #
83
+ # @return [Hash]
84
+ def link_click(selector:)
85
+ element = find_link(selector)
86
+
87
+ return { status: :error, message: "unknown selector=#{selector.inspect}" } if element.nil?
88
+
89
+ element.click
90
+
91
+ { status: :ok }
92
+ end
93
+
94
+ # @param selector [String]
95
+ #
96
+ # @return [Hash]
97
+ def element_click(selector:)
98
+ element = find_element(selector)
99
+
100
+ return { status: :error, message: "unknown selector=#{selector.inspect}" } if element.nil?
101
+
102
+ element.click
103
+
104
+ { status: :ok }
105
+ rescue TimeoutError => e
106
+ { status: :error, message: e.message }
107
+ end
108
+
109
+ protected
110
+
111
+ def wait_for_element
112
+ Watir::Wait.until(timeout: TIMEOUT) do
113
+ element = yield
114
+ element if element&.visible?
115
+ end
116
+ rescue Watir::Wait::TimeoutError
117
+ nil
118
+ end
119
+
120
+ # @param selector [String]
121
+ #
122
+ # @return [Watir::TextField, Watir::TextArea, nil]
123
+ def find_field(selector)
124
+ wait_for_element do
125
+ find_text_area_or_field_by(id: selector) ||
126
+ find_text_area_or_field_by(name: selector) ||
127
+ find_text_area_or_field_by(placeholder: selector) ||
128
+ find_text_area_or_field_by(class: selector) ||
129
+ find_text_area_or_field_by(css: selector)
130
+ end
131
+ end
132
+
133
+ # @param selector [String]
134
+ #
135
+ # @return [Watir::TextArea, Watir::TextField, nil]
136
+ def find_text_area_or_field_by(selector)
137
+ find_text_field_by(selector) || find_text_area_by(selector)
138
+ end
139
+
140
+ # @param selector [String]
141
+ #
142
+ # @return [Watir::Button, nil]
143
+ def find_button(selector)
144
+ wait_for_element do
145
+ find_button_by(text: selector) || find_button_by(id: selector)
146
+ end
147
+ end
148
+
149
+ # @param selector [String]
150
+ #
151
+ # @return [Watir::Button, nil]
152
+ def find_link(selector)
153
+ wait_for_element do
154
+ find_link_by(text: selector) || find_link_by(href: selector) || find_link_by(id: selector)
155
+ end
156
+ end
157
+
158
+ # @param selector [Hash] A hash with one of the following
159
+ #
160
+ # @return [Watir::Element, nil]
161
+ def find_element(selector)
162
+ wait_for_element do
163
+ find_element_by(css: selector) ||
164
+ find_element_by(text: selector) ||
165
+ find_element_by(id: selector) ||
166
+ find_element_by(xpath: selector)
167
+ end
168
+ end
169
+
170
+ # @param selector [Hash]
171
+ #
172
+ # @return [Watir::TextArea, nil]
173
+ def find_text_area_by(selector)
174
+ element = @browser.textarea(selector)
175
+ return unless element.respond_to?(:exists?)
176
+
177
+ element if element.exists?
178
+ end
179
+
180
+ # @param selector [Hash]
181
+ #
182
+ # @return [Watir::TextField, nil]
183
+ def find_text_field_by(selector)
184
+ element = @browser.text_field(selector)
185
+ return unless element.respond_to?(:exists?)
186
+
187
+ element if element.exists?
188
+ end
189
+
190
+ # @param selector [String] CSS selector to find the element
191
+ #
192
+ # @return [Watir::Element, nil]
193
+ def find_element_by(selector)
194
+ element = @browser.element(selector)
195
+ return nil unless element.respond_to?(:exists?)
196
+
197
+ element if element.exists?
198
+ end
199
+
200
+ # @param selector [Hash]
201
+ #
202
+ # @return [Watir::Anchor, nil]
203
+ def find_link_by(selector)
204
+ element = @browser.link(selector)
205
+ return unless element.respond_to?(:exists?)
206
+
207
+ element if element.exists?
208
+ end
209
+
210
+ # @param selector [Hash]
211
+ #
212
+ # @return [Watir::Button, nil]
213
+ def find_button_by(selector)
214
+ element = @browser.button(selector)
215
+ return unless element.respond_to?(:exists?)
216
+
217
+ element if element.exists?
218
+ end
219
+ end
220
+ end
221
+ end
222
+ end