omniai-tools 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +72 -16
- data/lib/omniai/tools/browser/base_driver.rb +78 -0
- data/lib/omniai/tools/browser/base_tool.rb +31 -4
- data/lib/omniai/tools/browser/button_click_tool.rb +1 -14
- data/lib/omniai/tools/browser/element_click_tool.rb +30 -0
- data/lib/omniai/tools/browser/elements/element_grouper.rb +73 -0
- data/lib/omniai/tools/browser/elements/nearby_element_detector.rb +108 -0
- data/lib/omniai/tools/browser/formatters/action_formatter.rb +37 -0
- data/lib/omniai/tools/browser/formatters/data_entry_formatter.rb +135 -0
- data/lib/omniai/tools/browser/formatters/element_formatter.rb +52 -0
- data/lib/omniai/tools/browser/formatters/input_formatter.rb +59 -0
- data/lib/omniai/tools/browser/inspect_tool.rb +46 -13
- data/lib/omniai/tools/browser/inspect_utils.rb +51 -0
- data/lib/omniai/tools/browser/link_click_tool.rb +2 -14
- data/lib/omniai/tools/browser/page_inspect/button_summarizer.rb +140 -0
- data/lib/omniai/tools/browser/page_inspect/form_summarizer.rb +98 -0
- data/lib/omniai/tools/browser/page_inspect/html_summarizer.rb +37 -0
- data/lib/omniai/tools/browser/page_inspect/link_summarizer.rb +103 -0
- data/lib/omniai/tools/browser/page_inspect_tool.rb +30 -0
- data/lib/omniai/tools/browser/page_screenshot_tool.rb +22 -0
- data/lib/omniai/tools/browser/selector_generator/base_selectors.rb +28 -0
- data/lib/omniai/tools/browser/selector_generator/contextual_selectors.rb +140 -0
- data/lib/omniai/tools/browser/selector_generator.rb +73 -0
- data/lib/omniai/tools/browser/selector_inspect_tool.rb +44 -0
- data/lib/omniai/tools/browser/text_field_area_set_tool.rb +2 -31
- data/lib/omniai/tools/browser/visit_tool.rb +1 -1
- data/lib/omniai/tools/browser/watir_driver.rb +222 -0
- data/lib/omniai/tools/browser_tool.rb +262 -0
- data/lib/omniai/tools/computer/base_driver.rb +179 -0
- data/lib/omniai/tools/computer/mac_driver.rb +103 -0
- data/lib/omniai/tools/computer_tool.rb +189 -0
- data/lib/omniai/tools/database/base_driver.rb +17 -0
- data/lib/omniai/tools/database/postgres_driver.rb +30 -0
- data/lib/omniai/tools/database/sqlite_driver.rb +29 -0
- data/lib/omniai/tools/database_tool.rb +100 -0
- data/lib/omniai/tools/version.rb +1 -1
- metadata +31 -5
- data/lib/omniai/tools/database/base_tool.rb +0 -37
- data/lib/omniai/tools/database/sqlite_tool.rb +0 -110
@@ -0,0 +1,262 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OmniAI
|
4
|
+
module Tools
|
5
|
+
# A tool for controller a browser.
|
6
|
+
class BrowserTool < OmniAI::Tool
|
7
|
+
module Action
|
8
|
+
VISIT = "visit"
|
9
|
+
PAGE_INSPECT = "page_inspect"
|
10
|
+
UI_INSPECT = "ui_inspect"
|
11
|
+
SELECTOR_INSPECT = "selector_inspect"
|
12
|
+
BUTTON_CLICK = "button_click"
|
13
|
+
LINK_CLICK = "link_click"
|
14
|
+
ELEMENT_CLICK = "element_click"
|
15
|
+
TEXT_FIELD_SET = "text_field_set"
|
16
|
+
end
|
17
|
+
|
18
|
+
ACTIONS = [
|
19
|
+
Action::VISIT,
|
20
|
+
Action::PAGE_INSPECT,
|
21
|
+
Action::UI_INSPECT,
|
22
|
+
Action::SELECTOR_INSPECT,
|
23
|
+
Action::BUTTON_CLICK,
|
24
|
+
Action::LINK_CLICK,
|
25
|
+
Action::ELEMENT_CLICK,
|
26
|
+
Action::TEXT_FIELD_SET,
|
27
|
+
].freeze
|
28
|
+
|
29
|
+
description <<~TEXT
|
30
|
+
Control a web browser to automate interactions with websites.
|
31
|
+
|
32
|
+
1. `#{Action::VISIT}` - Navigate to a website
|
33
|
+
Required: "action": "visit", "url": "[website URL]"
|
34
|
+
|
35
|
+
2. `#{Action::PAGE_INSPECT} - Get page HTML or summary
|
36
|
+
Required: "action": "page_inspect"
|
37
|
+
Optional: "full_html": true/false (get full HTML vs summary, default: summary)
|
38
|
+
|
39
|
+
3. `#{Action::UI_INSPECT}` - Find elements by text content
|
40
|
+
Required: "action": "ui_inspect", "text_content": "[text to search for]"
|
41
|
+
Optional:
|
42
|
+
- "selector": "[CSS selector]" (search within specific container)
|
43
|
+
- "context_size": [number] (parent elements to show, default: 2)
|
44
|
+
|
45
|
+
4. `#{Action::SELECTOR_INSPECT} - Find elements by CSS selector
|
46
|
+
Required: "action": "selector_inspect", "selector": "[CSS selector]"
|
47
|
+
Optional: "context_size": [number] (parent elements to show, default: 2)
|
48
|
+
|
49
|
+
5. `#{Action::BUTTON_CLICK}` - Click a button
|
50
|
+
Required: "action": "button_click", "selector": "[button text or CSS selector]"
|
51
|
+
|
52
|
+
6. `#{Action::LINK_CLICK}` - Click a link
|
53
|
+
Required: "action": "link_click", "selector": "[link text or CSS selector]"
|
54
|
+
|
55
|
+
7. `#{Action::ELEMENT_CLICK}` - Click any clickable element (div, span, etc.)
|
56
|
+
Required: "action": "element_click", "selector": "[CSS selector, text content, ID, or XPath]"
|
57
|
+
|
58
|
+
8. `#{Action::TEXT_FIELD_SET}` - Enter text in input fields/text areas
|
59
|
+
Required: "action": "text_field_set", "selector": "[field CSS selector]", "value": "[text to enter]"
|
60
|
+
|
61
|
+
9. `#{Action::SCREENSHOT}` - Take a screenshot of the page or specific element
|
62
|
+
Required: "action": "screenshot"
|
63
|
+
|
64
|
+
Examples:
|
65
|
+
Visit a website
|
66
|
+
{"action": "#{Action::VISIT}", "url": "https://example.com"}
|
67
|
+
Get page summary
|
68
|
+
{"action": "#{Action::PAGE_INSPECT}"}
|
69
|
+
Get full page HTML
|
70
|
+
{"action": "#{Action::PAGE_INSPECT}", "full_html": true}
|
71
|
+
Find elements containing text
|
72
|
+
{"action": "#{Action::UI_INSPECT}", "text_content": "Submit"}
|
73
|
+
Find elements with context
|
74
|
+
{"action": "#{Action::UI_INSPECT}", "text_content": "Login", "context_size": 3}
|
75
|
+
Find elements by CSS selector
|
76
|
+
{"action": "#{Action::SELECTOR_INSPECT}", "selector": ".button"}
|
77
|
+
Find selector with context
|
78
|
+
{"action": "#{Action::SELECTOR_INSPECT}", "selector": "h1", "context_size": 2}
|
79
|
+
Click a button with specific text
|
80
|
+
{"action": "#{Action::BUTTON_CLICK}", "selector": "Submit"}
|
81
|
+
Click a link with specific text
|
82
|
+
{"action": "#{Action::LINK_CLICK}", "selector": "Learn More"}
|
83
|
+
Click element by CSS selector
|
84
|
+
{"action": "#{Action::ELEMENT_CLICK}", "selector": ".wv-select__menu__option"}
|
85
|
+
Click element by role attribute
|
86
|
+
{"action": "#{Action::ELEMENT_CLICK}", "selector": "[role='listitem']"}
|
87
|
+
Click element by text content
|
88
|
+
{"action": "#{Action::ELEMENT_CLICK}", "selector": "Medical Evaluation Management"}
|
89
|
+
Set text in an input field
|
90
|
+
{"action": "#{Action::TEXT_FIELD_SET}", "selector": "#search", "value": "search query"}
|
91
|
+
Take a full page screenshot
|
92
|
+
{"action": "#{Action::SCREENSHOT}"}
|
93
|
+
TEXT
|
94
|
+
|
95
|
+
parameter :action, :string, enum: ACTIONS, description: <<~TEXT
|
96
|
+
The browser action to perform. Options:
|
97
|
+
* `#{Action::VISIT}`: Navigate to a website
|
98
|
+
* `#{Action::PAGE_INSPECT}`: Get full HTML or summarized page information
|
99
|
+
* `#{Action::UI_INSPECT}`: Find elements containing specific text
|
100
|
+
* `#{Action::SELECTOR_INSPECT}`: Find elements matching CSS selectors
|
101
|
+
* `#{Action::BUTTON_CLICK}`: Click a button element
|
102
|
+
* `#{Action::LINK_CLICK}`: Click a link element
|
103
|
+
* `#{Action::ELEMENT_CLICK}`: Click any clickable element
|
104
|
+
* `#{Action::TEXT_FIELD_SET}`: Enter text in input fields or text areas
|
105
|
+
TEXT
|
106
|
+
|
107
|
+
parameter :url, :string, description: <<~TEXT
|
108
|
+
The URL to visit. Required for the following actions:
|
109
|
+
* `#{Action::VISIT}`
|
110
|
+
TEXT
|
111
|
+
|
112
|
+
parameter :selector, :string, description: <<~TEXT
|
113
|
+
CSS selector, ID, or text content of the element. Required for the following actions:
|
114
|
+
* `#{Action::SELECTOR_INSPECT}`
|
115
|
+
* `#{Action::BUTTON_CLICK}`
|
116
|
+
* `#{Action::LINK_CLICK}`
|
117
|
+
* `#{Action::ELEMENT_CLICK}`
|
118
|
+
* `#{Action::TEXT_FIELD_SET}`
|
119
|
+
|
120
|
+
Optional for the following actions:
|
121
|
+
* `#{Action::UI_INSPECT}` (search within specific container)
|
122
|
+
TEXT
|
123
|
+
|
124
|
+
parameter :value, :string, description: <<~TEXT
|
125
|
+
The value to set in the text field. Required for the following actions:
|
126
|
+
* `#{Action::TEXT_FIELD_SET}`
|
127
|
+
TEXT
|
128
|
+
|
129
|
+
parameter :context_size, :integer, description: <<~TEXT
|
130
|
+
Number of parent elements to include in inspect results (default: 2). Optional for the following actions:
|
131
|
+
* `#{Action::UI_INSPECT}`
|
132
|
+
* `#{Action::SELECTOR_INSPECT}`
|
133
|
+
TEXT
|
134
|
+
|
135
|
+
parameter :full_html, :boolean, description: <<~TEXT
|
136
|
+
Return the full HTML of the page instead of a summary. Optional for the following actions:
|
137
|
+
* `#{Action::PAGE_INSPECT}`
|
138
|
+
TEXT
|
139
|
+
|
140
|
+
parameter :text_content, :string, description: <<~TEXT
|
141
|
+
Search for elements containing this text. Required for the following actions:
|
142
|
+
* `#{Action::UI_INSPECT}`
|
143
|
+
TEXT
|
144
|
+
|
145
|
+
required %i[action]
|
146
|
+
|
147
|
+
# @param logger [Logger]
|
148
|
+
# @param driver [OmniAI::Tools::Browser::BaseDriver]
|
149
|
+
def initialize(logger:, driver:)
|
150
|
+
super()
|
151
|
+
@logger = logger
|
152
|
+
@driver = driver
|
153
|
+
end
|
154
|
+
|
155
|
+
def cleanup!
|
156
|
+
@driver.close
|
157
|
+
end
|
158
|
+
|
159
|
+
# @param action [String]
|
160
|
+
# @param url [String, nil]
|
161
|
+
# @param selector [String, nil]
|
162
|
+
# @param value [String, nil]
|
163
|
+
# @param context_size [Integer]
|
164
|
+
# @param full_html [Boolean]
|
165
|
+
# @param text_content [String, nil]
|
166
|
+
#
|
167
|
+
# @return [String]
|
168
|
+
def execute(action:, url: nil, selector: nil, value: nil, context_size: 2, full_html: false, text_content: nil)
|
169
|
+
case action.to_s.downcase
|
170
|
+
when Action::VISIT
|
171
|
+
require_param!(:url, url)
|
172
|
+
visit_tool.execute(url:)
|
173
|
+
when Action::PAGE_INSPECT
|
174
|
+
if full_html
|
175
|
+
page_inspect_tool.execute
|
176
|
+
else
|
177
|
+
page_inspect_tool.execute(summarize: true)
|
178
|
+
end
|
179
|
+
when Action::UI_INSPECT
|
180
|
+
require_param!(:text_content, text_content)
|
181
|
+
inspect_tool.execute(text_content:, selector:, context_size:)
|
182
|
+
when Action::SELECTOR_INSPECT
|
183
|
+
require_param!(:selector, selector)
|
184
|
+
selector_inspect_tool.execute(selector:, context_size:)
|
185
|
+
when Action::BUTTON_CLICK
|
186
|
+
require_param!(:selector, selector)
|
187
|
+
button_click_tool.execute(selector:)
|
188
|
+
when Action::LINK_CLICK
|
189
|
+
require_param!(:selector, selector)
|
190
|
+
link_click_tool.execute(selector:)
|
191
|
+
when Action::ELEMENT_CLICK
|
192
|
+
require_param!(:selector, selector)
|
193
|
+
element_click_tool.execute(selector:)
|
194
|
+
when Action::TEXT_FIELD_SET
|
195
|
+
require_param!(:selector, selector)
|
196
|
+
require_param!(:value, value)
|
197
|
+
text_field_area_set_tool.execute(selector:, text: value)
|
198
|
+
when Action::SCREENSHOT
|
199
|
+
page_screenshot_tool.execute
|
200
|
+
else
|
201
|
+
{ error: "Unsupported action: #{action}. Supported actions are: #{ACTIONS.join(', ')}" }
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
private
|
206
|
+
|
207
|
+
# @param name [Symbol]
|
208
|
+
# @param value [Object]
|
209
|
+
#
|
210
|
+
# @raise [ArgumentError]
|
211
|
+
# @return [void]
|
212
|
+
def require_param!(name, value)
|
213
|
+
raise ArgumentError, "#{name} parameter is required for this action" if value.nil?
|
214
|
+
end
|
215
|
+
|
216
|
+
# @return [Browser::VisitTool]
|
217
|
+
def visit_tool
|
218
|
+
Browser::VisitTool.new(driver: @driver, logger: @logger)
|
219
|
+
end
|
220
|
+
|
221
|
+
# @return [Browser::PageInspectTool]
|
222
|
+
def page_inspect_tool
|
223
|
+
Browser::PageInspectTool.new(driver: @driver, logger: @logger)
|
224
|
+
end
|
225
|
+
|
226
|
+
# @return [Browser::UIInspectTool]
|
227
|
+
def inspect_tool
|
228
|
+
Browser::InspectTool.new(driver: @driver, logger: @logger)
|
229
|
+
end
|
230
|
+
|
231
|
+
# @return [Browser::SelectorInspectTool]
|
232
|
+
def selector_inspect_tool
|
233
|
+
Browser::SelectorInspectTool.new(driver: @driver, logger: @logger)
|
234
|
+
end
|
235
|
+
|
236
|
+
# @return [Browser::ButtonClickTool]
|
237
|
+
def button_click_tool
|
238
|
+
Browser::ButtonClickTool.new(driver: @driver, logger: @logger)
|
239
|
+
end
|
240
|
+
|
241
|
+
# @return [Browser::LinkClickTool]
|
242
|
+
def link_click_tool
|
243
|
+
Browser::LinkClickTool.new(driver: @driver, logger: @logger)
|
244
|
+
end
|
245
|
+
|
246
|
+
# @return [Browser::ElementClickTool]
|
247
|
+
def element_click_tool
|
248
|
+
Browser::ElementClickTool.new(driver: @driver, logger: @logger)
|
249
|
+
end
|
250
|
+
|
251
|
+
# @return [Browser::TextFieldAreaSetTool]
|
252
|
+
def text_field_area_set_tool
|
253
|
+
Browser::TextFieldAreaSetTool.new(driver: @driver, logger: @logger)
|
254
|
+
end
|
255
|
+
|
256
|
+
# @return [Browser::PageScreenshotTool]
|
257
|
+
def page_screenshot_tool
|
258
|
+
Browser::PageScreenshotTool.new(driver: @driver, logger: @logger)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
end
|
262
|
+
end
|
@@ -0,0 +1,179 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sqlite3"
|
4
|
+
|
5
|
+
module OmniAI
|
6
|
+
module Tools
|
7
|
+
module Computer
|
8
|
+
# A tool for interacting with a computer. Be careful with using as it can perform actions on your computer!
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# class SomeDriver < BaseDriver
|
12
|
+
# @param text [String]
|
13
|
+
# def key(text:)
|
14
|
+
# # TODO
|
15
|
+
# end
|
16
|
+
#
|
17
|
+
# # @param text [String]
|
18
|
+
# # @param duration [Integer]
|
19
|
+
# def hold_key(text:, duration:)
|
20
|
+
# # TODO
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
# # @return [Hash<{ x: Integer, y: Integer }>]
|
24
|
+
# def mouse_position
|
25
|
+
# # TODO
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# # @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
29
|
+
# # @param button [String] e.g. "left", "middle", "right"
|
30
|
+
# def mouse_move(coordinate:)
|
31
|
+
# # TODO
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# # @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
35
|
+
# # @param button [String] e.g. "left", "middle", "right"
|
36
|
+
# def mouse_click(coordinate:, button:)
|
37
|
+
# # TODO
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# # @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
41
|
+
# # @param button [String] e.g. "left", "middle", "right"
|
42
|
+
# def mouse_down(coordinate:, button:)
|
43
|
+
# # TODO
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
# # @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
47
|
+
# # @param button [String] e.g. "left", "middle", "right"
|
48
|
+
# def mouse_up(coordinate:, button:)
|
49
|
+
# # TODO
|
50
|
+
# end
|
51
|
+
#
|
52
|
+
# # @param text [String]
|
53
|
+
# def type(text:)
|
54
|
+
# # TODO
|
55
|
+
# end
|
56
|
+
#
|
57
|
+
# # @param amount [Integer]
|
58
|
+
# # @param direction [String] e.g. "up", "down", "left", "right"
|
59
|
+
# def scroll(amount:, direction:)
|
60
|
+
# # TODO
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# # @yield [file]
|
64
|
+
# # @yieldparam file [File]
|
65
|
+
# def screenshot
|
66
|
+
# # TODO
|
67
|
+
# end
|
68
|
+
# end
|
69
|
+
class BaseDriver
|
70
|
+
DEFAULT_MOUSE_BUTTON = "left"
|
71
|
+
DEFAULT_DISPLAY_SCALE = 2
|
72
|
+
|
73
|
+
# @!attr_accessor :display_height
|
74
|
+
# @return [Integer] the height of the display in pixels
|
75
|
+
attr_accessor :display_width
|
76
|
+
|
77
|
+
# @!attr_accessor :display_height
|
78
|
+
# @return [Integer] the height of the display in pixels
|
79
|
+
attr_accessor :display_height
|
80
|
+
|
81
|
+
# @!attr_accessor :display_number
|
82
|
+
# @return [Integer] the display number
|
83
|
+
attr_accessor :display_number
|
84
|
+
|
85
|
+
# @param display_width [Integer] the width of the display in pixels
|
86
|
+
# @param display_height [Integer] the height of the display in pixels
|
87
|
+
# @param display_number [Integer] the display number
|
88
|
+
def initialize(display_width:, display_height:, display_number:)
|
89
|
+
@display_width = display_width
|
90
|
+
@display_height = display_height
|
91
|
+
|
92
|
+
@display_number = display_number
|
93
|
+
end
|
94
|
+
|
95
|
+
# @param text [String]
|
96
|
+
def key(text:)
|
97
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
98
|
+
end
|
99
|
+
|
100
|
+
# @param text [String]
|
101
|
+
# @param duration [Integer]
|
102
|
+
def hold_key(text:, duration:)
|
103
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
104
|
+
end
|
105
|
+
|
106
|
+
# @return [Hash<{ x: Integer, y: Integer }>]
|
107
|
+
def mouse_position
|
108
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
109
|
+
end
|
110
|
+
|
111
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
112
|
+
# @param button [String] e.g. "left", "middle", "right"
|
113
|
+
def mouse_move(coordinate:)
|
114
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
115
|
+
end
|
116
|
+
|
117
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
118
|
+
# @param button [String] e.g. "left", "middle", "right"
|
119
|
+
def mouse_click(coordinate:, button:)
|
120
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
121
|
+
end
|
122
|
+
|
123
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
124
|
+
# @param button [String] e.g. "left", "middle", "right"
|
125
|
+
def mouse_down(coordinate:, button:)
|
126
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
127
|
+
end
|
128
|
+
|
129
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
130
|
+
# @param button [String] e.g. "left", "middle", "right"
|
131
|
+
def mouse_up(coordinate:, button:)
|
132
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
133
|
+
end
|
134
|
+
|
135
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
136
|
+
# @param button [String] e.g. "left", "middle", "right"
|
137
|
+
def mouse_drag(coordinate:, button: DEFAULT_MOUSE_BUTTON)
|
138
|
+
mouse_down(coordinate: mouse_position, button:)
|
139
|
+
mouse_move(coordinate:, button:)
|
140
|
+
mouse_up(coordinate:, button:)
|
141
|
+
end
|
142
|
+
|
143
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
144
|
+
# @param button [String] e.g. "left", "middle", "right"
|
145
|
+
def mouse_double_click(coordinate:, button:)
|
146
|
+
2.times { mouse_click(coordinate:, button:) }
|
147
|
+
end
|
148
|
+
|
149
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
150
|
+
# @param button [String] e.g. "left", "middle", "right"
|
151
|
+
def mouse_triple_click(coordinate:, button:)
|
152
|
+
3.times { mouse_click(coordinate:, button:) }
|
153
|
+
end
|
154
|
+
|
155
|
+
# @param text [String]
|
156
|
+
def type(text:)
|
157
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
158
|
+
end
|
159
|
+
|
160
|
+
# @param amount [Integer]
|
161
|
+
# @param direction [String] e.g. "up", "down", "left", "right"
|
162
|
+
def scroll(amount:, direction:)
|
163
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
164
|
+
end
|
165
|
+
|
166
|
+
# @yield [file]
|
167
|
+
# @yieldparam file [File]
|
168
|
+
def screenshot
|
169
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
170
|
+
end
|
171
|
+
|
172
|
+
# @param duration [Integer]
|
173
|
+
def wait(duration:)
|
174
|
+
Kernel.sleep(duration)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OmniAI
|
4
|
+
module Tools
|
5
|
+
module Computer
|
6
|
+
# A driver for interacting with a Mac. Be careful with using as it can perform actions on your computer!
|
7
|
+
class MacDriver < BaseDriver
|
8
|
+
def initialize(keyboard: MacOS.keyboard, mouse: MacOS.mouse, display: MacOS.display)
|
9
|
+
@keyboard = keyboard
|
10
|
+
@mouse = mouse
|
11
|
+
@display = display
|
12
|
+
|
13
|
+
super(display_width: display.wide, display_height: display.high, display_number: display.id)
|
14
|
+
end
|
15
|
+
|
16
|
+
# @param text [String]
|
17
|
+
def key(text:)
|
18
|
+
@keyboard.keys(text)
|
19
|
+
end
|
20
|
+
|
21
|
+
# @param text [String]
|
22
|
+
# @param duration [Integer]
|
23
|
+
def hold_key(text:, duration:)
|
24
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Hash<{ x: Integer, y: Integer }>]
|
28
|
+
def mouse_position
|
29
|
+
position = @mouse.position
|
30
|
+
x = position.x
|
31
|
+
y = position.y
|
32
|
+
|
33
|
+
{
|
34
|
+
x:,
|
35
|
+
y:,
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
def mouse_move(coordinate:)
|
40
|
+
x = coordinate[:x]
|
41
|
+
y = coordinate[:y]
|
42
|
+
|
43
|
+
@mouse.move(x:, y:)
|
44
|
+
end
|
45
|
+
|
46
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
47
|
+
# @param button [String] e.g. "left", "middle", "right"
|
48
|
+
def mouse_click(coordinate:, button:)
|
49
|
+
x = coordinate[:x]
|
50
|
+
y = coordinate[:y]
|
51
|
+
|
52
|
+
case button
|
53
|
+
when "left" then @mouse.left_click(x:, y:)
|
54
|
+
when "middle" then @mouse.middle_click(x:, y:)
|
55
|
+
when "right" then @mouse.right_click(x:, y:)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
60
|
+
def mouse_down(coordinate:, button: DEFAULT_MOUSE_BUTTON)
|
61
|
+
x = coordinate[:x]
|
62
|
+
y = coordinate[:y]
|
63
|
+
|
64
|
+
case button
|
65
|
+
when "left" then @mouse.left_down(x:, y:)
|
66
|
+
when "middle" then @mouse.middle_down(x:, y:)
|
67
|
+
when "right" then @mouse.right_down(x:, y:)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
72
|
+
# @param button [String] e.g. "left", "middle", "right"
|
73
|
+
def mouse_up(coordinate:, button: DEFAULT_MOUSE_BUTTON)
|
74
|
+
x = coordinate[:x]
|
75
|
+
y = coordinate[:y]
|
76
|
+
|
77
|
+
case button
|
78
|
+
when "left" then @mouse.left_up(x:, y:)
|
79
|
+
when "middle" then @mouse.middle_up(x:, y:)
|
80
|
+
when "right" then @mouse.right_up(x:, y:)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# @param text [String]
|
85
|
+
def type(text:)
|
86
|
+
@keyboard.type(text)
|
87
|
+
end
|
88
|
+
|
89
|
+
# @param amount [Integer]
|
90
|
+
# @param direction [String] e.g. "up", "down", "left", "right"
|
91
|
+
def scroll(amount:, direction:)
|
92
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
93
|
+
end
|
94
|
+
|
95
|
+
# @yield [file]
|
96
|
+
# @yieldparam file [File]
|
97
|
+
def screenshot(&)
|
98
|
+
@display.screenshot(&)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|