omniai-tools 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +3 -12
  3. data/lib/omniai/tools/browser/base_driver.rb +78 -0
  4. data/lib/omniai/tools/browser/base_tool.rb +31 -4
  5. data/lib/omniai/tools/browser/button_click_tool.rb +1 -14
  6. data/lib/omniai/tools/browser/element_click_tool.rb +30 -0
  7. data/lib/omniai/tools/browser/elements/element_grouper.rb +73 -0
  8. data/lib/omniai/tools/browser/elements/nearby_element_detector.rb +108 -0
  9. data/lib/omniai/tools/browser/formatters/action_formatter.rb +37 -0
  10. data/lib/omniai/tools/browser/formatters/data_entry_formatter.rb +135 -0
  11. data/lib/omniai/tools/browser/formatters/element_formatter.rb +52 -0
  12. data/lib/omniai/tools/browser/formatters/input_formatter.rb +59 -0
  13. data/lib/omniai/tools/browser/inspect_tool.rb +46 -13
  14. data/lib/omniai/tools/browser/inspect_utils.rb +51 -0
  15. data/lib/omniai/tools/browser/link_click_tool.rb +2 -14
  16. data/lib/omniai/tools/browser/page_inspect/button_summarizer.rb +140 -0
  17. data/lib/omniai/tools/browser/page_inspect/form_summarizer.rb +98 -0
  18. data/lib/omniai/tools/browser/page_inspect/html_summarizer.rb +37 -0
  19. data/lib/omniai/tools/browser/page_inspect/link_summarizer.rb +103 -0
  20. data/lib/omniai/tools/browser/page_inspect_tool.rb +30 -0
  21. data/lib/omniai/tools/browser/page_screenshot_tool.rb +22 -0
  22. data/lib/omniai/tools/browser/selector_generator/base_selectors.rb +28 -0
  23. data/lib/omniai/tools/browser/selector_generator/contextual_selectors.rb +140 -0
  24. data/lib/omniai/tools/browser/selector_generator.rb +73 -0
  25. data/lib/omniai/tools/browser/selector_inspect_tool.rb +44 -0
  26. data/lib/omniai/tools/browser/text_field_area_set_tool.rb +2 -31
  27. data/lib/omniai/tools/browser/visit_tool.rb +1 -1
  28. data/lib/omniai/tools/browser/watir_driver.rb +224 -0
  29. data/lib/omniai/tools/browser_tool.rb +265 -0
  30. data/lib/omniai/tools/version.rb +1 -1
  31. metadata +23 -2
@@ -5,25 +5,58 @@ require "nokogiri"
5
5
  module OmniAI
6
6
  module Tools
7
7
  module Browser
8
- # @example
9
- # browser = Watir::Browser.new(:chrome)
10
- # tool = OmniAI::Tools::Browser::InspectTool.new(browser:)
11
- # tool.execute
8
+ # A browser automation tool for finding UI elements by their text content.
12
9
  class InspectTool < BaseTool
13
- description "A browser automation tool for viewing the HTML for the browser."
10
+ include InspectUtils
14
11
 
15
- # @return [String]
16
- def execute
12
+ description "A browser automation tool for finding UI elements by their text content."
13
+
14
+ parameter :text_content, :string, description: "Search for elements containing this text"
15
+ parameter :selector, :string, description: "Optional CSS selector to further filter results"
16
+ parameter :context_size, :integer, description: "Number of parent elements to include for context"
17
+
18
+ def execute(text_content:, selector: nil, context_size: 2)
17
19
  @logger.info("#{self.class.name}##{__method__}")
18
20
 
19
- html = @browser.html
20
- doc = Nokogiri::HTML(html)
21
+ html = @driver.html
22
+
23
+ @logger.info("#{self.class.name}##{__method__} html=#{html}")
24
+
25
+ doc = cleaned_document(html: @driver.html)
26
+ find_elements_by_text(doc, text_content, context_size, selector)
27
+ end
28
+
29
+ private
30
+
31
+ def find_elements_by_text(doc, text, context_size, additional_selector = nil)
32
+ elements = get_elements_matching_text(doc, text, additional_selector)
33
+
34
+ return "No elements found containing text: #{text}" if elements.empty?
35
+
36
+ adjusted_context_size = additional_selector ? 0 : context_size
37
+
38
+ Formatters::ElementFormatter.format_matching_elements(elements, text, adjusted_context_size)
39
+ end
40
+
41
+ def get_elements_matching_text(doc, text, additional_selector)
42
+ text_downcase = text.downcase
43
+
44
+ elements = find_elements_with_matching_text(doc, text_downcase)
45
+
46
+ elements = add_elements_from_matching_labels(doc, text_downcase, elements)
47
+
48
+ unless additional_selector && !additional_selector.empty?
49
+ elements = Elements::NearbyElementDetector.add_nearby_interactive_elements(elements)
50
+ end
51
+
52
+ apply_additional_selector(doc, elements, additional_selector)
53
+ end
21
54
 
22
- doc.css("link").each(&:remove)
23
- doc.css("style").each(&:remove)
24
- doc.css("script").each(&:remove)
55
+ def apply_additional_selector(doc, elements, additional_selector)
56
+ return elements.uniq unless additional_selector && !additional_selector.empty?
25
57
 
26
- doc.to_html
58
+ css_matches = doc.css(additional_selector)
59
+ elements.select { |el| css_matches.include?(el) }.uniq
27
60
  end
28
61
  end
29
62
  end
@@ -0,0 +1,51 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Browser
6
+ # Utility methods for browser inspection tools that handle HTML document cleaning
7
+ # and various element searching functionalities.
8
+ module InspectUtils
9
+ def cleaned_document(html:)
10
+ clean_document(Nokogiri::HTML(html))
11
+ end
12
+
13
+ def clean_document(doc)
14
+ doc.css("link, style, script").each(&:remove)
15
+ doc
16
+ end
17
+
18
+ def ci_contains(attribute, value)
19
+ "contains(translate(#{attribute}, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', " \
20
+ "'abcdefghijklmnopqrstuvwxyz'), '#{value}')"
21
+ end
22
+
23
+ def find_elements_with_matching_text(doc, text_downcase)
24
+ xpath_conditions = [
25
+ ci_contains("text()", text_downcase),
26
+ ci_contains("@value", text_downcase),
27
+ ci_contains("@placeholder", text_downcase),
28
+ ci_contains("@type", text_downcase),
29
+ ].join(" or ")
30
+
31
+ doc.xpath("//*[#{xpath_conditions}]")
32
+ end
33
+
34
+ def add_elements_from_matching_labels(doc, text_downcase, elements)
35
+ label_condition = ci_contains(".//text()", text_downcase)
36
+ matching_labels = doc.xpath("//label[#{label_condition}]")
37
+
38
+ matching_labels.each do |label|
39
+ for_attr = label["for"]
40
+ next unless for_attr && !for_attr.empty?
41
+
42
+ associated_input = doc.css("[id='#{for_attr}']")
43
+ elements += associated_input if associated_input.any?
44
+ end
45
+
46
+ elements
47
+ end
48
+ end
49
+ end
50
+ end
51
+ end
@@ -14,23 +14,11 @@ module OmniAI
14
14
 
15
15
  parameter :selector, :string, description: "The ID or text of the link to interact with."
16
16
 
17
- # @param to [String] The ID or text of the link to interact with.
17
+ # @param selector [String] The ID or text of the link to interact with.
18
18
  def execute(selector:)
19
19
  @logger.info("#{self.class.name}##{__method__} selector=#{selector.inspect}")
20
20
 
21
- element = find(text: selector) || find(value: selector) || find(id: selector)
22
-
23
- return { error: "unknown selector=#{selector}" } if element.nil?
24
-
25
- element.click
26
- end
27
-
28
- protected
29
-
30
- # @return [Watir::Anchor, nil]
31
- def find(selector)
32
- element = @browser.a(selector)
33
- element if element.exists?
21
+ @driver.link_click(selector:)
34
22
  end
35
23
  end
36
24
  end
@@ -0,0 +1,140 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Browser
6
+ module PageInspect
7
+ # Module to handle button elements summarization for AI agents
8
+ module ButtonSummarizer
9
+ module_function
10
+
11
+ def summarize_primary_actions(doc)
12
+ buttons = find_primary_buttons(doc)
13
+ return "" if buttons.empty?
14
+
15
+ format_primary_actions(buttons)
16
+ end
17
+
18
+ def find_primary_buttons(doc)
19
+ all_buttons = doc.css('button, input[type="button"], input[type="submit"], [role="button"], [tabindex="0"]')
20
+ all_buttons.select { |btn| primary_action?(btn) && !skip_button?(btn) }
21
+ end
22
+
23
+ def skip_button?(button)
24
+ button["disabled"] ||
25
+ button["style"]&.include?("display: none") ||
26
+ button["aria-hidden"] == "true"
27
+ end
28
+
29
+ def primary_action?(button)
30
+ return true if button["type"] == "submit"
31
+ return true if primary_button_text?(button)
32
+ return true if primary_button_class?(button)
33
+ return true if workflow_action_button?(button)
34
+
35
+ false
36
+ end
37
+
38
+ def primary_button_text?(button)
39
+ text = get_button_text(button).downcase
40
+
41
+ # Direct keyword matches
42
+ primary_keywords = %w[save submit continue next finish send create update
43
+ delete cancel close done confirm proceed add edit]
44
+ return true if primary_keywords.any? { |keyword| text.include?(keyword) }
45
+
46
+ # Workflow action patterns
47
+ return true if text.include?("add") && text.match?(/item|customer|discount|product|contact|line/)
48
+ return true if text.include?("choose") || text.include?("select")
49
+
50
+ false
51
+ end
52
+
53
+ def primary_button_class?(button)
54
+ classes = button["class"] || ""
55
+
56
+ # Generic primary button patterns (universal)
57
+ primary_classes = %w[primary submit btn-primary button--primary save continue]
58
+ generic_match = primary_classes.any? { |css_class| classes.include?(css_class) }
59
+
60
+ # Generic link-button patterns (works across frameworks)
61
+ link_button_patterns = %w[button--link btn-link link-button button-link]
62
+ link_match = link_button_patterns.any? { |pattern| classes.include?(pattern) }
63
+
64
+ generic_match || link_match
65
+ end
66
+
67
+ def workflow_action_button?(button)
68
+ text = get_button_text(button).downcase
69
+
70
+ # Check for common workflow patterns
71
+ return true if text.match?(/add.*(item|customer|discount|product|contact)/i)
72
+ return true if text.match?(/edit.*(column|field|profile)/i)
73
+ return true if text.match?(/choose.*(different|customer)/i)
74
+ return true if text.match?(/(create|new).*(item|customer|product)/i)
75
+
76
+ false
77
+ end
78
+
79
+ def get_button_text(button)
80
+ text = button.text.strip
81
+ text = button["value"] if text.empty? && button["value"]
82
+ text = button["aria-label"] if text.empty? && button["aria-label"]
83
+
84
+ text.empty? ? "Button" : text
85
+ end
86
+
87
+ def format_primary_actions(buttons)
88
+ result = "⚡ Primary Actions:\n"
89
+
90
+ # Group by importance
91
+ critical = buttons.select(&method(:critical_action?))
92
+ regular = buttons - critical
93
+
94
+ result += format_button_group(critical, "🔥 Critical")
95
+ result += format_button_group(regular, "📝 Actions")
96
+
97
+ "#{result}\n"
98
+ end
99
+
100
+ def critical_action?(button)
101
+ text = get_button_text(button).downcase
102
+ %w[save submit send create].any? do |keyword|
103
+ text.include?(keyword)
104
+ end
105
+ end
106
+
107
+ def format_button_group(buttons, title)
108
+ return "" if buttons.empty?
109
+
110
+ result = "#{title}:\n"
111
+ buttons.first(5).each do |btn|
112
+ result += format_action_button(btn)
113
+ end
114
+ result += " ... and #{buttons.size - 5} more\n" if buttons.size > 5
115
+ result += "\n"
116
+ end
117
+
118
+ def format_action_button(button)
119
+ text = get_button_text(button)
120
+ selector = get_button_selector(button)
121
+
122
+ " • #{text} (#{selector})\n"
123
+ end
124
+
125
+ def get_button_selector(button)
126
+ return button["id"] if button["id"] && !button["id"].empty?
127
+ return "text:#{get_button_text(button)}" if distinctive_text?(button)
128
+
129
+ "css-selector-needed"
130
+ end
131
+
132
+ def distinctive_text?(button)
133
+ text = get_button_text(button)
134
+ text.length > 2 && text != "Button"
135
+ end
136
+ end
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Browser
6
+ module PageInspect
7
+ # Module to handle form elements summarization for AI agents
8
+ module FormSummarizer
9
+ module_function
10
+
11
+ def summarize_data_entry_opportunities(doc)
12
+ fields = find_data_entry_fields(doc)
13
+ return "" if fields.empty?
14
+
15
+ format_for_agents(fields)
16
+ end
17
+
18
+ def summarize_form_structure(doc)
19
+ fields = doc.css("input, textarea, select")
20
+ return "📝 No form fields available.\n\n" if fields.empty?
21
+
22
+ "📝 Form Fields Available: #{fields.size} fields found\n\n"
23
+ end
24
+
25
+ def find_data_entry_fields(doc)
26
+ doc.css("input, textarea, select").reject { |f| skip_field?(f) }
27
+ end
28
+
29
+ def skip_field?(field)
30
+ field["type"] == "hidden" ||
31
+ field["disabled"] ||
32
+ %w[button submit reset].include?(field["type"])
33
+ end
34
+
35
+ def format_for_agents(fields)
36
+ result = "📝 Data Entry Fields:\n"
37
+
38
+ # Group important fields first
39
+ date_fields = fields.select { |f| date_field?(f) }
40
+ text_fields = fields.select { |f| text_field?(f) }
41
+ other_fields = fields - date_fields - text_fields
42
+
43
+ result += format_field_group("📅 Date Fields", date_fields)
44
+ result += format_field_group("📝 Text Fields", text_fields)
45
+ result += format_field_group("🔧 Other Fields", other_fields)
46
+
47
+ "#{result}\n"
48
+ end
49
+
50
+ def format_field_group(title, fields)
51
+ return "" if fields.empty?
52
+
53
+ result = "#{title}:\n"
54
+ fields.first(8).each { |field| result += format_agent_field(field) }
55
+ result += " ... and #{fields.size - 8} more\n" if fields.size > 8
56
+ result += "\n"
57
+ end
58
+
59
+ def format_agent_field(field)
60
+ label = get_field_label(field)
61
+ id = field["id"] || "no-id"
62
+ value = field["value"] ? " = '#{field['value']}'" : ""
63
+ placeholder = field["placeholder"] ? " [#{field['placeholder']}]" : ""
64
+
65
+ " • #{label} (#{id})#{value}#{placeholder}\n"
66
+ end
67
+
68
+ def get_field_label(field)
69
+ return get_associated_label(field) if field["id"]
70
+ return field["placeholder"] if field["placeholder"]
71
+
72
+ field_type = field.name == "input" ? (field["type"] || "text") : field.name
73
+ field_type.capitalize
74
+ end
75
+
76
+ def get_associated_label(field)
77
+ label = field.document.at_css("label[for='#{field['id']}']")
78
+ return nil unless label&.text
79
+
80
+ text = label.text.strip
81
+ text.empty? ? nil : text
82
+ end
83
+
84
+ def date_field?(field)
85
+ field["type"] == "date" ||
86
+ field["placeholder"]&.match?(/date|yyyy|mm|dd/i) ||
87
+ get_associated_label(field)&.match?(/date|due/i)
88
+ end
89
+
90
+ def text_field?(field)
91
+ %w[text email tel url textarea].include?(field["type"]) ||
92
+ field.name == "textarea"
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Browser
6
+ module PageInspect
7
+ # Module to handle HTML formatting and summary generation for AI agents
8
+ module HtmlSummarizer
9
+ module_function
10
+
11
+ def summarize_interactive_elements(doc)
12
+ title = doc.at_css("title")&.text || "Untitled Page"
13
+
14
+ summary = "#{title}\n\n"
15
+
16
+ # Primary focus: What can agents fill out?
17
+ data_entry = FormSummarizer.summarize_data_entry_opportunities(doc)
18
+ summary += data_entry unless data_entry.empty?
19
+
20
+ # Secondary: What actions can agents take?
21
+ primary_actions = ButtonSummarizer.summarize_primary_actions(doc)
22
+ summary += primary_actions unless primary_actions.empty?
23
+
24
+ # Tertiary: Key navigation (only if relevant)
25
+ navigation = LinkSummarizer.summarize_key_navigation(doc)
26
+ summary += navigation unless navigation.empty?
27
+
28
+ # Fallback: If no data entry found, show form structure
29
+ summary += FormSummarizer.summarize_form_structure(doc) if data_entry.empty?
30
+
31
+ summary.strip
32
+ end
33
+ end
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Browser
6
+ module PageInspect
7
+ # Module to handle link elements summarization for AI agents
8
+ module LinkSummarizer
9
+ module_function
10
+
11
+ def summarize_key_navigation(doc)
12
+ nav_links = find_navigation_links(doc)
13
+ return "" if nav_links.empty?
14
+
15
+ format_navigation(nav_links)
16
+ end
17
+
18
+ def find_navigation_links(doc)
19
+ links = doc.css("a[href]").reject { |link| skip_link?(link) }
20
+ links.select { |link| navigation_link?(link) }
21
+ end
22
+
23
+ def skip_link?(link)
24
+ href = link["href"]
25
+ return true if href.nil? || href.empty? || href == "#"
26
+ return true if href.start_with?("javascript:")
27
+ return true if link["style"]&.include?("display: none")
28
+
29
+ false
30
+ end
31
+
32
+ def navigation_link?(link)
33
+ return true if main_navigation?(link)
34
+ return true if workflow_link?(link)
35
+
36
+ false
37
+ end
38
+
39
+ def main_navigation?(link)
40
+ ancestors = link.ancestors.map { |el| el["class"] }.compact.join(" ")
41
+ nav_indicators = %w[nav navigation menu main-nav primary-nav app-menu breadcrumb]
42
+
43
+ nav_indicators.any? { |indicator| ancestors.include?(indicator) }
44
+ end
45
+
46
+ def workflow_link?(link)
47
+ text = get_link_text(link).downcase
48
+ workflow_keywords = %w[dashboard home create new add edit settings
49
+ invoice estimate customer payment back continue]
50
+
51
+ workflow_keywords.any? { |keyword| text.include?(keyword) }
52
+ end
53
+
54
+ def get_link_text(link)
55
+ text = link.text.strip
56
+ text = link["title"] if text.empty? && link["title"]
57
+ text = link["aria-label"] if text.empty? && link["aria-label"]
58
+
59
+ text.empty? ? "Link" : text
60
+ end
61
+
62
+ def format_navigation(links)
63
+ result = "🧭 Key Navigation:\n"
64
+
65
+ main_nav = links.select { |l| main_navigation?(l) }
66
+ actions = links.select { |l| workflow_link?(l) && !main_navigation?(l) }
67
+
68
+ result += format_link_group(main_nav, "📍 Main Menu")
69
+ result += format_link_group(actions, "🔗 Quick Actions")
70
+
71
+ "#{result}\n"
72
+ end
73
+
74
+ def format_link_group(links, title)
75
+ return "" if links.empty?
76
+
77
+ result = "#{title}:\n"
78
+ links.first(6).each { |link| result += format_nav_link(link) }
79
+ result += " ... and #{links.size - 6} more\n" if links.size > 6
80
+ result += "\n"
81
+ end
82
+
83
+ def format_nav_link(link)
84
+ text = get_link_text(link)
85
+ destination = extract_destination(link["href"])
86
+
87
+ " • #{text}#{destination}\n"
88
+ end
89
+
90
+ def extract_destination(href)
91
+ return "" if href.nil? || href.empty?
92
+ return "" unless href.include?("/")
93
+
94
+ path = href.split("/").last
95
+ return "" if path.nil? || path.empty?
96
+
97
+ " → #{path}"
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+
5
+ module OmniAI
6
+ module Tools
7
+ module Browser
8
+ # A browser automation tool for viewing the full HTML of the page.
9
+ class PageInspectTool < BaseTool
10
+ include InspectUtils
11
+
12
+ description "A browser automation tool for viewing the full HTML of the current page."
13
+
14
+ parameter :summarize, :boolean, description: "If true, returns a summary instead of full HTML"
15
+
16
+ def execute(summarize: false)
17
+ @logger.info("#{self.class.name}##{__method__}")
18
+
19
+ doc = cleaned_document(html: @driver.html)
20
+
21
+ if summarize
22
+ PageInspect::HtmlSummarizer.summarize_interactive_elements(doc)
23
+ else
24
+ doc.to_html
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "base64"
4
+
5
+ module OmniAI
6
+ module Tools
7
+ module Browser
8
+ # A browser automation tool for taking screenshots of the current page.
9
+ class PageScreenshotTool < BaseTool
10
+ description "A browser automation tool for taking screenshots of the current page."
11
+
12
+ def execute
13
+ @logger.info("#{self.class.name}##{__method__}")
14
+
15
+ @driver.screenshot do |file|
16
+ "data:image/png;base64,#{Base64.strict_encode64(file.read)}"
17
+ end
18
+ end
19
+ end
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Browser
6
+ module SelectorGenerator
7
+ # Basic selector generation methods
8
+ module BaseSelectors
9
+ def placeholder_selector(element, tag)
10
+ valid_attribute?(element["placeholder"]) ? ["#{tag}[placeholder=\"#{element['placeholder']}\"]"] : []
11
+ end
12
+
13
+ def aria_label_selector(element, tag)
14
+ valid_attribute?(element["aria-label"]) ? ["#{tag}[aria-label=\"#{element['aria-label']}\"]"] : []
15
+ end
16
+
17
+ def name_selector(element, tag)
18
+ valid_attribute?(element["name"]) ? ["#{tag}[name=\"#{element['name']}\"]"] : []
19
+ end
20
+
21
+ def maxlength_selector(element, tag)
22
+ valid_attribute?(element["maxlength"]) ? ["#{tag}[maxlength=\"#{element['maxlength']}\"]"] : []
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
28
+ end