omniai-tools 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +69 -4
- data/lib/omniai/tools/browser/watir_driver.rb +0 -2
- data/lib/omniai/tools/browser_tool.rb +0 -3
- data/lib/omniai/tools/computer/base_driver.rb +179 -0
- data/lib/omniai/tools/computer/mac_driver.rb +103 -0
- data/lib/omniai/tools/computer_tool.rb +189 -0
- data/lib/omniai/tools/database/base_driver.rb +17 -0
- data/lib/omniai/tools/database/postgres_driver.rb +30 -0
- data/lib/omniai/tools/database/sqlite_driver.rb +29 -0
- data/lib/omniai/tools/database_tool.rb +100 -0
- data/lib/omniai/tools/version.rb +1 -1
- metadata +10 -5
- data/lib/omniai/tools/database/base_tool.rb +0 -37
- data/lib/omniai/tools/database/sqlite_tool.rb +0 -110
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 50e3528ac19ae28cd4360f56a696e9d5cfb1ff81a3d3b89469a65c5fcaefbd83
|
4
|
+
data.tar.gz: da593f7ac889edfae81b6a7f68559008dfcde4db86f698f439b219a9fe649ad3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 98eca70c71d4f1eb509e6234de4aefbe8750116f8ed6882e21a10565fbd636d1982128f11de3c244dc32d0ba40e1e7a77b86b653935a8dc4ee1a78515453644b
|
7
|
+
data.tar.gz: 4191ddbcd454fffd8eff72f61d7470c502d8405d08782981bea5f4e3908b8bc6c767b57563cd24e7b40f9a203bbe3e20c340f1fc966e4f1dffada7a044871686
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -10,7 +10,7 @@
|
|
10
10
|
|
11
11
|
## Browser
|
12
12
|
|
13
|
-
|
13
|
+
Browser tools allow you to interact with any website (e.g. visit a page, click on a button, fill in some text, etc):
|
14
14
|
|
15
15
|
```ruby
|
16
16
|
require "omniai/openai"
|
@@ -53,6 +53,72 @@ Here are the top 5 posts on Hacker News right now:
|
|
53
53
|
...
|
54
54
|
```
|
55
55
|
|
56
|
+
## Computer
|
57
|
+
|
58
|
+
A computer tool grants the ability to manage a computer via an LLM:
|
59
|
+
|
60
|
+
```ruby
|
61
|
+
require "omniai/openai"
|
62
|
+
require "omniai/tools"
|
63
|
+
|
64
|
+
require "macos"
|
65
|
+
|
66
|
+
client = OmniAI::OpenAI::Client.new
|
67
|
+
logger = Logger.new($stdout)
|
68
|
+
logger.formatter = proc { |_, _, _, message| "[computer] #{message}\n" }
|
69
|
+
|
70
|
+
driver = OmniAI::Tools::Computer::MacDriver.new
|
71
|
+
tools = [OmniAI::Tools::ComputerTool.new(driver:, logger:)]
|
72
|
+
|
73
|
+
puts "Type 'exit' or 'quit' to leave."
|
74
|
+
|
75
|
+
loop do
|
76
|
+
print "# "
|
77
|
+
text = gets.strip
|
78
|
+
break if %w[exit quit].include?(text)
|
79
|
+
|
80
|
+
driver.screenshot do |file|
|
81
|
+
client.chat(stream: $stdout, tools:) do |prompt|
|
82
|
+
prompt.system <<~TEXT
|
83
|
+
Assist the user with tasks related to the use their computer.
|
84
|
+
|
85
|
+
1. The display is #{driver.display_width}px (w) × #{driver.display_height}px (h).
|
86
|
+
2. Attached find a screenshot of the display that may be inspected to determine the state of the computer.
|
87
|
+
3. The computer is using MacOS with all the expected applications (e.g. Finder, Safari, etc).
|
88
|
+
4. Any coordinates used for clicking must be scaled for the bounds of the display.
|
89
|
+
5. Whenever possible prefer to navigate using keyboard shortcuts rather than mouse clicks.
|
90
|
+
TEXT
|
91
|
+
|
92
|
+
prompt.user do |message|
|
93
|
+
message.text(text)
|
94
|
+
message.file(file.path, "image/png")
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
```
|
100
|
+
|
101
|
+
```
|
102
|
+
Type 'exit' or 'quit' to leave.
|
103
|
+
|
104
|
+
# What do you see on my screen?
|
105
|
+
|
106
|
+
Here's what I see on your screen:
|
107
|
+
- You are using a Mac with a display resolution of 2560×1440 pixels.
|
108
|
+
- The Terminal app is open at the very top, with a command prompt in a directory related to "omnial-tools" and "computer".
|
109
|
+
- Below the Terminal, Visual Studio Code (VS Code) is open showing a project directory named "omnial-tools", specifically in a folder like /examples/computer.
|
110
|
+
|
111
|
+
# Please open Safari
|
112
|
+
|
113
|
+
[computer] action="mouse_click" coordinate={x: 484, y: 1398} mouse_button="left"
|
114
|
+
Safari is being opened now. Let me know if you need to visit a specific website or perform any other actions in Safari!
|
115
|
+
|
116
|
+
# What is the current position of my mouse?
|
117
|
+
|
118
|
+
[computer] action="mouse_position"
|
119
|
+
Your mouse is currently positioned at approximately (484, 1398) on your screen.
|
120
|
+
```
|
121
|
+
|
56
122
|
## Database
|
57
123
|
|
58
124
|
Database tools are focused on running SQL statements:
|
@@ -64,13 +130,12 @@ require "omniai/tools"
|
|
64
130
|
require "sqlite3"
|
65
131
|
|
66
132
|
db = SQLite3::Database.new(":memory:")
|
133
|
+
driver = OmniAI::Tools::Database::SqliteDriver.new(db:)
|
67
134
|
|
68
135
|
client = OmniAI::OpenAI::Client.new
|
69
136
|
logger = Logger.new($stdout)
|
70
137
|
|
71
|
-
tools = [
|
72
|
-
OmniAI::Tools::Database::SqliteTool.new(logger:, db:),
|
73
|
-
]
|
138
|
+
tools = [OmniAI::Tools::DatabaseTool.new(logger:, driver:)]
|
74
139
|
|
75
140
|
puts "Type 'exit' or 'quit' to leave."
|
76
141
|
|
@@ -13,7 +13,6 @@ module OmniAI
|
|
13
13
|
LINK_CLICK = "link_click"
|
14
14
|
ELEMENT_CLICK = "element_click"
|
15
15
|
TEXT_FIELD_SET = "text_field_set"
|
16
|
-
SCREENSHOT = "screenshot"
|
17
16
|
end
|
18
17
|
|
19
18
|
ACTIONS = [
|
@@ -25,7 +24,6 @@ module OmniAI
|
|
25
24
|
Action::LINK_CLICK,
|
26
25
|
Action::ELEMENT_CLICK,
|
27
26
|
Action::TEXT_FIELD_SET,
|
28
|
-
Action::SCREENSHOT,
|
29
27
|
].freeze
|
30
28
|
|
31
29
|
description <<~TEXT
|
@@ -104,7 +102,6 @@ module OmniAI
|
|
104
102
|
* `#{Action::LINK_CLICK}`: Click a link element
|
105
103
|
* `#{Action::ELEMENT_CLICK}`: Click any clickable element
|
106
104
|
* `#{Action::TEXT_FIELD_SET}`: Enter text in input fields or text areas
|
107
|
-
* `#{Action::SCREENSHOT}`: Take a screenshot of the page or specific element
|
108
105
|
TEXT
|
109
106
|
|
110
107
|
parameter :url, :string, description: <<~TEXT
|
@@ -0,0 +1,179 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "sqlite3"
|
4
|
+
|
5
|
+
module OmniAI
|
6
|
+
module Tools
|
7
|
+
module Computer
|
8
|
+
# A tool for interacting with a computer. Be careful with using as it can perform actions on your computer!
|
9
|
+
#
|
10
|
+
# @example
|
11
|
+
# class SomeDriver < BaseDriver
|
12
|
+
# @param text [String]
|
13
|
+
# def key(text:)
|
14
|
+
# # TODO
|
15
|
+
# end
|
16
|
+
#
|
17
|
+
# # @param text [String]
|
18
|
+
# # @param duration [Integer]
|
19
|
+
# def hold_key(text:, duration:)
|
20
|
+
# # TODO
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
# # @return [Hash<{ x: Integer, y: Integer }>]
|
24
|
+
# def mouse_position
|
25
|
+
# # TODO
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# # @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
29
|
+
# # @param button [String] e.g. "left", "middle", "right"
|
30
|
+
# def mouse_move(coordinate:)
|
31
|
+
# # TODO
|
32
|
+
# end
|
33
|
+
#
|
34
|
+
# # @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
35
|
+
# # @param button [String] e.g. "left", "middle", "right"
|
36
|
+
# def mouse_click(coordinate:, button:)
|
37
|
+
# # TODO
|
38
|
+
# end
|
39
|
+
#
|
40
|
+
# # @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
41
|
+
# # @param button [String] e.g. "left", "middle", "right"
|
42
|
+
# def mouse_down(coordinate:, button:)
|
43
|
+
# # TODO
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
# # @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
47
|
+
# # @param button [String] e.g. "left", "middle", "right"
|
48
|
+
# def mouse_up(coordinate:, button:)
|
49
|
+
# # TODO
|
50
|
+
# end
|
51
|
+
#
|
52
|
+
# # @param text [String]
|
53
|
+
# def type(text:)
|
54
|
+
# # TODO
|
55
|
+
# end
|
56
|
+
#
|
57
|
+
# # @param amount [Integer]
|
58
|
+
# # @param direction [String] e.g. "up", "down", "left", "right"
|
59
|
+
# def scroll(amount:, direction:)
|
60
|
+
# # TODO
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# # @yield [file]
|
64
|
+
# # @yieldparam file [File]
|
65
|
+
# def screenshot
|
66
|
+
# # TODO
|
67
|
+
# end
|
68
|
+
# end
|
69
|
+
class BaseDriver
|
70
|
+
DEFAULT_MOUSE_BUTTON = "left"
|
71
|
+
DEFAULT_DISPLAY_SCALE = 2
|
72
|
+
|
73
|
+
# @!attr_accessor :display_height
|
74
|
+
# @return [Integer] the height of the display in pixels
|
75
|
+
attr_accessor :display_width
|
76
|
+
|
77
|
+
# @!attr_accessor :display_height
|
78
|
+
# @return [Integer] the height of the display in pixels
|
79
|
+
attr_accessor :display_height
|
80
|
+
|
81
|
+
# @!attr_accessor :display_number
|
82
|
+
# @return [Integer] the display number
|
83
|
+
attr_accessor :display_number
|
84
|
+
|
85
|
+
# @param display_width [Integer] the width of the display in pixels
|
86
|
+
# @param display_height [Integer] the height of the display in pixels
|
87
|
+
# @param display_number [Integer] the display number
|
88
|
+
def initialize(display_width:, display_height:, display_number:)
|
89
|
+
@display_width = display_width
|
90
|
+
@display_height = display_height
|
91
|
+
|
92
|
+
@display_number = display_number
|
93
|
+
end
|
94
|
+
|
95
|
+
# @param text [String]
|
96
|
+
def key(text:)
|
97
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
98
|
+
end
|
99
|
+
|
100
|
+
# @param text [String]
|
101
|
+
# @param duration [Integer]
|
102
|
+
def hold_key(text:, duration:)
|
103
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
104
|
+
end
|
105
|
+
|
106
|
+
# @return [Hash<{ x: Integer, y: Integer }>]
|
107
|
+
def mouse_position
|
108
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
109
|
+
end
|
110
|
+
|
111
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
112
|
+
# @param button [String] e.g. "left", "middle", "right"
|
113
|
+
def mouse_move(coordinate:)
|
114
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
115
|
+
end
|
116
|
+
|
117
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
118
|
+
# @param button [String] e.g. "left", "middle", "right"
|
119
|
+
def mouse_click(coordinate:, button:)
|
120
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
121
|
+
end
|
122
|
+
|
123
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
124
|
+
# @param button [String] e.g. "left", "middle", "right"
|
125
|
+
def mouse_down(coordinate:, button:)
|
126
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
127
|
+
end
|
128
|
+
|
129
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
130
|
+
# @param button [String] e.g. "left", "middle", "right"
|
131
|
+
def mouse_up(coordinate:, button:)
|
132
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
133
|
+
end
|
134
|
+
|
135
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
136
|
+
# @param button [String] e.g. "left", "middle", "right"
|
137
|
+
def mouse_drag(coordinate:, button: DEFAULT_MOUSE_BUTTON)
|
138
|
+
mouse_down(coordinate: mouse_position, button:)
|
139
|
+
mouse_move(coordinate:, button:)
|
140
|
+
mouse_up(coordinate:, button:)
|
141
|
+
end
|
142
|
+
|
143
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
144
|
+
# @param button [String] e.g. "left", "middle", "right"
|
145
|
+
def mouse_double_click(coordinate:, button:)
|
146
|
+
2.times { mouse_click(coordinate:, button:) }
|
147
|
+
end
|
148
|
+
|
149
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
150
|
+
# @param button [String] e.g. "left", "middle", "right"
|
151
|
+
def mouse_triple_click(coordinate:, button:)
|
152
|
+
3.times { mouse_click(coordinate:, button:) }
|
153
|
+
end
|
154
|
+
|
155
|
+
# @param text [String]
|
156
|
+
def type(text:)
|
157
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
158
|
+
end
|
159
|
+
|
160
|
+
# @param amount [Integer]
|
161
|
+
# @param direction [String] e.g. "up", "down", "left", "right"
|
162
|
+
def scroll(amount:, direction:)
|
163
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
164
|
+
end
|
165
|
+
|
166
|
+
# @yield [file]
|
167
|
+
# @yieldparam file [File]
|
168
|
+
def screenshot
|
169
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
170
|
+
end
|
171
|
+
|
172
|
+
# @param duration [Integer]
|
173
|
+
def wait(duration:)
|
174
|
+
Kernel.sleep(duration)
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
179
|
+
end
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OmniAI
|
4
|
+
module Tools
|
5
|
+
module Computer
|
6
|
+
# A driver for interacting with a Mac. Be careful with using as it can perform actions on your computer!
|
7
|
+
class MacDriver < BaseDriver
|
8
|
+
def initialize(keyboard: MacOS.keyboard, mouse: MacOS.mouse, display: MacOS.display)
|
9
|
+
@keyboard = keyboard
|
10
|
+
@mouse = mouse
|
11
|
+
@display = display
|
12
|
+
|
13
|
+
super(display_width: display.wide, display_height: display.high, display_number: display.id)
|
14
|
+
end
|
15
|
+
|
16
|
+
# @param text [String]
|
17
|
+
def key(text:)
|
18
|
+
@keyboard.keys(text)
|
19
|
+
end
|
20
|
+
|
21
|
+
# @param text [String]
|
22
|
+
# @param duration [Integer]
|
23
|
+
def hold_key(text:, duration:)
|
24
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
25
|
+
end
|
26
|
+
|
27
|
+
# @return [Hash<{ x: Integer, y: Integer }>]
|
28
|
+
def mouse_position
|
29
|
+
position = @mouse.position
|
30
|
+
x = position.x
|
31
|
+
y = position.y
|
32
|
+
|
33
|
+
{
|
34
|
+
x:,
|
35
|
+
y:,
|
36
|
+
}
|
37
|
+
end
|
38
|
+
|
39
|
+
def mouse_move(coordinate:)
|
40
|
+
x = coordinate[:x]
|
41
|
+
y = coordinate[:y]
|
42
|
+
|
43
|
+
@mouse.move(x:, y:)
|
44
|
+
end
|
45
|
+
|
46
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
47
|
+
# @param button [String] e.g. "left", "middle", "right"
|
48
|
+
def mouse_click(coordinate:, button:)
|
49
|
+
x = coordinate[:x]
|
50
|
+
y = coordinate[:y]
|
51
|
+
|
52
|
+
case button
|
53
|
+
when "left" then @mouse.left_click(x:, y:)
|
54
|
+
when "middle" then @mouse.middle_click(x:, y:)
|
55
|
+
when "right" then @mouse.right_click(x:, y:)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
60
|
+
def mouse_down(coordinate:, button: DEFAULT_MOUSE_BUTTON)
|
61
|
+
x = coordinate[:x]
|
62
|
+
y = coordinate[:y]
|
63
|
+
|
64
|
+
case button
|
65
|
+
when "left" then @mouse.left_down(x:, y:)
|
66
|
+
when "middle" then @mouse.middle_down(x:, y:)
|
67
|
+
when "right" then @mouse.right_down(x:, y:)
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
# @param coordinate [Hash<{ x: Integer, y: Integer }>]
|
72
|
+
# @param button [String] e.g. "left", "middle", "right"
|
73
|
+
def mouse_up(coordinate:, button: DEFAULT_MOUSE_BUTTON)
|
74
|
+
x = coordinate[:x]
|
75
|
+
y = coordinate[:y]
|
76
|
+
|
77
|
+
case button
|
78
|
+
when "left" then @mouse.left_up(x:, y:)
|
79
|
+
when "middle" then @mouse.middle_up(x:, y:)
|
80
|
+
when "right" then @mouse.right_up(x:, y:)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# @param text [String]
|
85
|
+
def type(text:)
|
86
|
+
@keyboard.type(text)
|
87
|
+
end
|
88
|
+
|
89
|
+
# @param amount [Integer]
|
90
|
+
# @param direction [String] e.g. "up", "down", "left", "right"
|
91
|
+
def scroll(amount:, direction:)
|
92
|
+
raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
|
93
|
+
end
|
94
|
+
|
95
|
+
# @yield [file]
|
96
|
+
# @yieldparam file [File]
|
97
|
+
def screenshot(&)
|
98
|
+
@display.screenshot(&)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,189 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OmniAI
|
4
|
+
module Tools
|
5
|
+
# A tool for interacting with a computer. Be careful with using as it can perform actions on your computer!
|
6
|
+
#
|
7
|
+
# @example
|
8
|
+
# computer = OmniAI::Tools::Computer::MacTool.new
|
9
|
+
# computer.display # { "width": 2560, "height": 1440, "scale": 1 }
|
10
|
+
# computer.screenshot
|
11
|
+
class ComputerTool < OmniAI::Tool
|
12
|
+
description "A tool for interacting with a computer."
|
13
|
+
|
14
|
+
module Action
|
15
|
+
KEY = "key" # press a key
|
16
|
+
HOLD_KEY = "hold_key" # hold a key
|
17
|
+
MOUSE_POSITION = "mouse_position" # get the current (x, y) pixel coordinate of the cursor on the screen
|
18
|
+
MOUSE_MOVE = "mouse_move" # move the cursor to a specific (x, y) pixel coordinate on the screen
|
19
|
+
MOUSE_CLICK = "mouse_click" # click at a specific x / y coordinate
|
20
|
+
MOUSE_DOWN = "mouse_down" # press the mouse button down
|
21
|
+
MOUSE_DRAG = "mouse_drag" # drag the mouse to a specific x / y coordinate
|
22
|
+
MOUSE_UP = "mouse_up" # release the mouse button
|
23
|
+
MOUSE_DOUBLE_CLICK = "mouse_double_click" # double click at a specific x / y coordinate
|
24
|
+
MOUSE_TRIPLE_CLICK = "mouse_triple_click" # triple click at a specific x / y coordinate
|
25
|
+
TYPE = "type" # type a string
|
26
|
+
SCROLL = "scroll"
|
27
|
+
WAIT = "wait"
|
28
|
+
end
|
29
|
+
|
30
|
+
module MouseButton
|
31
|
+
LEFT = "left"
|
32
|
+
MIDDLE = "middle"
|
33
|
+
RIGHT = "right"
|
34
|
+
end
|
35
|
+
|
36
|
+
module ScrollDirection
|
37
|
+
UP = "up"
|
38
|
+
DOWN = "down"
|
39
|
+
LEFT = "left"
|
40
|
+
RIGHT = "right"
|
41
|
+
end
|
42
|
+
|
43
|
+
ACTIONS = [
|
44
|
+
Action::KEY,
|
45
|
+
Action::HOLD_KEY,
|
46
|
+
Action::MOUSE_POSITION,
|
47
|
+
Action::MOUSE_MOVE,
|
48
|
+
Action::MOUSE_CLICK,
|
49
|
+
Action::MOUSE_DOWN,
|
50
|
+
Action::MOUSE_DRAG,
|
51
|
+
Action::MOUSE_UP,
|
52
|
+
Action::TYPE,
|
53
|
+
Action::SCROLL,
|
54
|
+
Action::WAIT,
|
55
|
+
].freeze
|
56
|
+
|
57
|
+
MOUSE_BUTTON_OPTIONS = [
|
58
|
+
MouseButton::LEFT,
|
59
|
+
MouseButton::MIDDLE,
|
60
|
+
MouseButton::RIGHT,
|
61
|
+
].freeze
|
62
|
+
|
63
|
+
SCROLL_DIRECTION_OPTIONS = [
|
64
|
+
ScrollDirection::UP,
|
65
|
+
ScrollDirection::DOWN,
|
66
|
+
ScrollDirection::LEFT,
|
67
|
+
ScrollDirection::RIGHT,
|
68
|
+
].freeze
|
69
|
+
|
70
|
+
parameter :action, :string, enum: ACTIONS, description: <<~TEXT
|
71
|
+
Options:
|
72
|
+
* `#{Action::KEY}`: Press a single key / combination of keys on the keyboard:
|
73
|
+
- supports xdotool's `key` syntax (e.g. "alt+Tab", "Return", "ctrl+s", etc)
|
74
|
+
* `#{Action::HOLD_KEY}`: Hold down a key or multiple keys for a specified duration (in seconds):
|
75
|
+
- supports xdotool's `key` syntax (e.g. "alt+Tab", "Return", "ctrl+s", etc)
|
76
|
+
* `#{Action::MOUSE_POSITION}`: Get the current (x,y) pixel coordinate of the cursor on the screen.
|
77
|
+
* `#{Action::MOUSE_MOVE}`: Move the cursor to a specified (x,y) pixel coordinate on the screen.
|
78
|
+
* `#{Action::MOUSE_CLICK}`: Click the mouse button at the specified (x,y) pixel coordinate on the screen.
|
79
|
+
* `#{Action::MOUSE_DOUBLE_CLICK}`: Double click at the specified (x,y) pixel coordinate on the screen.
|
80
|
+
* `#{Action::MOUSE_TRIPLE_CLICK}`: Triple click at the specified (x,y) pixel coordinate on the screen.
|
81
|
+
* `#{Action::MOUSE_DOWN}`: Press the mouse button at the specified (x,y) pixel coordinate on the screen.
|
82
|
+
* `#{Action::MOUSE_DRAG}`: Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
|
83
|
+
* `#{Action::MOUSE_UP}`: Release the mouse button at the specified (x,y) pixel coordinate on the screen.
|
84
|
+
* `#{Action::TYPE}`: Type a string of text on the keyboard.
|
85
|
+
* `#{Action::SCROLL}`: Scroll the screen in a specified direction by a specified amount of clicks of the scroll wheel.
|
86
|
+
* `#{Action::WAIT}`: Wait for a specified duration (in seconds).
|
87
|
+
TEXT
|
88
|
+
|
89
|
+
parameter :coordinate, :object, properties: {
|
90
|
+
x: OmniAI::Schema.integer(description: "The x position in pixels"),
|
91
|
+
y: OmniAI::Schema.integer(description: "The y position in pixels"),
|
92
|
+
}, required: %i[x y], description: <<~TEXT
|
93
|
+
An (x,y) coordinate. Required for the following actions:
|
94
|
+
* `#{Action::MOUSE_MOVE}`
|
95
|
+
* `#{Action::MOUSE_CLICK}`
|
96
|
+
* `#{Action::MOUSE_DOWN}`
|
97
|
+
* `#{Action::MOUSE_DRAG}`
|
98
|
+
* `#{Action::MOUSE_UP}`
|
99
|
+
* `#{Action::MOUSE_DOUBLE_CLICK}`
|
100
|
+
* `#{Action::MOUSE_TRIPLE_CLICK}`
|
101
|
+
TEXT
|
102
|
+
|
103
|
+
parameter :text, :string, description: <<~TEXT
|
104
|
+
The text to type. Required for the following actions:
|
105
|
+
* `#{Action::KEY}`
|
106
|
+
* `#{Action::HOLD_KEY}`
|
107
|
+
* `#{Action::TYPE}`
|
108
|
+
TEXT
|
109
|
+
|
110
|
+
parameter :duration, :integer, description: <<~TEXT
|
111
|
+
A duration in seconds. Required for the following actions:
|
112
|
+
* `#{Action::HOLD_KEY}`
|
113
|
+
* `#{Action::WAIT}`
|
114
|
+
TEXT
|
115
|
+
|
116
|
+
parameter :mouse_button, :string, enum: MOUSE_BUTTON_OPTIONS, description: <<~TEXT
|
117
|
+
The mouse button to use. Required for the following actions:
|
118
|
+
* `#{Action::MOUSE_CLICK}`
|
119
|
+
* `#{Action::MOUSE_DOWN}`
|
120
|
+
* `#{Action::MOUSE_DRAG}`
|
121
|
+
* `#{Action::MOUSE_UP}`
|
122
|
+
* `#{Action::MOUSE_DOUBLE_CLICK}`
|
123
|
+
* `#{Action::MOUSE_TRIPLE_CLICK}`
|
124
|
+
TEXT
|
125
|
+
|
126
|
+
parameter :scroll_direction, :string, enum: SCROLL_DIRECTION_OPTIONS, description: <<~TEXT
|
127
|
+
The direction to scroll. Required for the following actions:
|
128
|
+
* `#{Action::SCROLL}`
|
129
|
+
TEXT
|
130
|
+
|
131
|
+
parameter :scroll_amount, :integer, description: <<~TEXT
|
132
|
+
The amount of clicks to scroll. Required for the following actions:
|
133
|
+
* `#{Action::SCROLL}`
|
134
|
+
TEXT
|
135
|
+
|
136
|
+
required %i[action]
|
137
|
+
|
138
|
+
# @param driver [Computer::Driver]
|
139
|
+
def initialize(driver:, logger: Logger.new(IO::NULL))
|
140
|
+
@driver = driver
|
141
|
+
@logger = logger
|
142
|
+
super()
|
143
|
+
end
|
144
|
+
|
145
|
+
# @param action [String]
|
146
|
+
# @param coordinate [Hash<{ width: Integer, height: Integer }>] the (x,y) coordinate
|
147
|
+
# @param text [String]
|
148
|
+
# @param duration [Integer] the duration in seconds
|
149
|
+
# @param mouse_button [String] e.g. "left", "middle", "right"
|
150
|
+
# @param scroll_direction [String] e.g. "up", "down", "left", "right"
|
151
|
+
# @param scroll_amount [Integer] the amount of clicks to scroll
|
152
|
+
def execute(
|
153
|
+
action:,
|
154
|
+
coordinate: nil,
|
155
|
+
text: nil,
|
156
|
+
duration: nil,
|
157
|
+
mouse_button: nil,
|
158
|
+
scroll_direction: nil,
|
159
|
+
scroll_amount: nil
|
160
|
+
)
|
161
|
+
@logger.info({
|
162
|
+
action:,
|
163
|
+
coordinate:,
|
164
|
+
text:,
|
165
|
+
duration:,
|
166
|
+
mouse_button:,
|
167
|
+
scroll_direction:,
|
168
|
+
scroll_amount:,
|
169
|
+
}.compact.map { |key, value| "#{key}=#{value.inspect}" }.join(" "))
|
170
|
+
|
171
|
+
case action
|
172
|
+
when Action::KEY then @driver.key(text:)
|
173
|
+
when Action::HOLD_KEY then @driver.hold_key(text:, duration:)
|
174
|
+
when Action::MOUSE_POSITION then @driver.mouse_position
|
175
|
+
when Action::MOUSE_MOVE then @driver.mouse_move(coordinate:)
|
176
|
+
when Action::MOUSE_CLICK then @driver.mouse_click(coordinate:, button: mouse_button)
|
177
|
+
when Action::MOUSE_DOUBLE_CLICK then @driver.mouse_double_click(coordinate:, button: mouse_button)
|
178
|
+
when Action::MOUSE_TRIPLE_CLICK then @driver.mouse_triple_click(coordinate:, button: mouse_button)
|
179
|
+
when Action::MOUSE_DOWN then @driver.mouse_down(coordinate:, button: mouse_button)
|
180
|
+
when Action::MOUSE_UP then @driver.mouse_up(coordinate:, button: mouse_button)
|
181
|
+
when Action::MOUSE_DRAG then @driver.mouse_drag(coordinate:, button: mouse_button)
|
182
|
+
when Action::TYPE then @driver.type(text:)
|
183
|
+
when Action::SCROLL then @driver.scroll(amount: scroll_amount, direction: scroll_direction)
|
184
|
+
when Action::WAIT then @driver.wait(duration:)
|
185
|
+
end
|
186
|
+
end
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OmniAI
|
4
|
+
module Tools
|
5
|
+
module Database
|
6
|
+
# Base class for database drivers (e.g. sqlite, postgres, mysql, etc).
|
7
|
+
class BaseDriver
|
8
|
+
# @param statement [String] e.g. "SELECT * FROM people"
|
9
|
+
#
|
10
|
+
# @return [Hash] e.g. { status: :ok, result: [["id", "name"], [1, "John"], [2, "Paul"], ...] }
|
11
|
+
def perform(statement:)
|
12
|
+
raise NotImplementedError, "#{self.class}##{__method__} undefined"
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OmniAI
|
4
|
+
module Tools
|
5
|
+
module Database
|
6
|
+
# @example
|
7
|
+
# connection = PG.connect(dbname: "testdb")
|
8
|
+
# driver = OmniAI::Tools::Database::PostgresDriver.new
|
9
|
+
# driver.perform(statement: "SELECT * FROM people")
|
10
|
+
class PostgresDriver < BaseDriver
|
11
|
+
# @param connection [Sqlite3::Database]
|
12
|
+
def initialize(connection:)
|
13
|
+
super()
|
14
|
+
@connection = connection
|
15
|
+
end
|
16
|
+
|
17
|
+
# @param statement [String]
|
18
|
+
#
|
19
|
+
# @return [Hash]
|
20
|
+
def perform(statement:)
|
21
|
+
@connection.exec(statement) do |result|
|
22
|
+
{ status: :ok, result: [result.fields] + result.values }
|
23
|
+
end
|
24
|
+
rescue ::PG::Error => e
|
25
|
+
{ status: :error, message: e.message }
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OmniAI
|
4
|
+
module Tools
|
5
|
+
module Database
|
6
|
+
# @example
|
7
|
+
# driver = OmniAI::Tools::Database::SqliteDriver.new
|
8
|
+
# driver.perform(statement: "SELECT * FROM people")
|
9
|
+
class SqliteDriver < BaseDriver
|
10
|
+
# @param db [Sqlite3::Database]
|
11
|
+
def initialize(db:)
|
12
|
+
super()
|
13
|
+
@db = db
|
14
|
+
end
|
15
|
+
|
16
|
+
# @param statement [String]
|
17
|
+
#
|
18
|
+
# @return [Hash]
|
19
|
+
def perform(statement:)
|
20
|
+
result = @db.execute2(statement)
|
21
|
+
|
22
|
+
{ status: :ok, result: }
|
23
|
+
rescue ::SQLite3::Exception => e
|
24
|
+
{ status: :error, message: e.message }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,100 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OmniAI
|
4
|
+
module Tools
|
5
|
+
# @example
|
6
|
+
# db = Sqlite3::Database.new("./db.sqlite")
|
7
|
+
# driver = OmniAI::Tools::Database::Sqlite.new(db:)
|
8
|
+
# tool = OmniAI::Tools::DatabaseTool.new(driver:)
|
9
|
+
# tool.execute(statements: ["SELECT * FROM people"])
|
10
|
+
class DatabaseTool < OmniAI::Tool
|
11
|
+
description <<~TEXT
|
12
|
+
Executes SQL commands (INSERT / UPDATE / SELECT / etc) on a database.
|
13
|
+
|
14
|
+
Example:
|
15
|
+
|
16
|
+
STATEMENTS:
|
17
|
+
|
18
|
+
[
|
19
|
+
'CREATE TABLE people (id INTEGER PRIMARY KEY, name TEXT NOT NULL)',
|
20
|
+
'INSERT INTO people (name) VALUES ('John')',
|
21
|
+
'INSERT INTO people (name) VALUES ('Paul')',
|
22
|
+
'SELECT * FROM people',
|
23
|
+
'DROP TABLE people'
|
24
|
+
]
|
25
|
+
|
26
|
+
RESULT:
|
27
|
+
|
28
|
+
[
|
29
|
+
{
|
30
|
+
"status": "OK",
|
31
|
+
"statement": "CREATE TABLE people (id INTEGER PRIMARY KEY, name TEXT NOT NULL)",
|
32
|
+
"result": "..."
|
33
|
+
},
|
34
|
+
{
|
35
|
+
"status": "OK",
|
36
|
+
"statement": "INSERT INTO people (name) VALUES ('John')"
|
37
|
+
"result": "..."
|
38
|
+
},
|
39
|
+
{
|
40
|
+
"status": "OK",
|
41
|
+
"statement": "INSERT INTO people (name) VALUES ('Paul')",
|
42
|
+
"result": "..."
|
43
|
+
},
|
44
|
+
{
|
45
|
+
"status": "OK",
|
46
|
+
"statement": "SELECT * FROM people",
|
47
|
+
"result": "..."
|
48
|
+
},
|
49
|
+
{
|
50
|
+
"status": "OK",
|
51
|
+
"statement": "DROP TABLE people",
|
52
|
+
"result": "..."
|
53
|
+
}
|
54
|
+
]
|
55
|
+
TEXT
|
56
|
+
|
57
|
+
parameter(
|
58
|
+
:statements,
|
59
|
+
:array,
|
60
|
+
description: "A list of SQL statements to run sequentially.",
|
61
|
+
items: OmniAI::Schema.string(description: 'A SQL statement to run (e.g. "SELECT * FROM ...").')
|
62
|
+
)
|
63
|
+
|
64
|
+
required %i[statements]
|
65
|
+
|
66
|
+
# @param driver [OmniAI::Tools::Database::BaseDriver]
|
67
|
+
# @param logger [IO] An optional logger for debugging executed commands.
|
68
|
+
def initialize(driver:, logger: Logger.new(IO::NULL))
|
69
|
+
super()
|
70
|
+
@driver = driver
|
71
|
+
@logger = logger
|
72
|
+
end
|
73
|
+
|
74
|
+
# @example
|
75
|
+
# tool = OmniAI::Tools::Database::BaseTool.new
|
76
|
+
# tool.execute(statements: ["SELECT * FROM people"])
|
77
|
+
#
|
78
|
+
# @param statements [Array<String>]
|
79
|
+
#
|
80
|
+
# @return [Array<Hash>]
|
81
|
+
def execute(statements:)
|
82
|
+
[].tap do |executions|
|
83
|
+
statements.map do |statement|
|
84
|
+
execution = perform(statement:).merge(statement:)
|
85
|
+
executions << execution
|
86
|
+
break unless execution[:status].eql?(:ok)
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
def perform(statement:)
|
92
|
+
@logger&.info("#perform statement=#{statement.inspect}")
|
93
|
+
|
94
|
+
@driver.perform(statement:).tap do |result|
|
95
|
+
@logger&.info(JSON.generate(result))
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
data/lib/omniai/tools/version.rb
CHANGED
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: omniai-tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin Sylvestre
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies:
|
12
12
|
- !ruby/object:Gem::Dependency
|
13
13
|
name: omniai
|
@@ -76,8 +76,13 @@ files:
|
|
76
76
|
- lib/omniai/tools/browser/visit_tool.rb
|
77
77
|
- lib/omniai/tools/browser/watir_driver.rb
|
78
78
|
- lib/omniai/tools/browser_tool.rb
|
79
|
-
- lib/omniai/tools/
|
80
|
-
- lib/omniai/tools/
|
79
|
+
- lib/omniai/tools/computer/base_driver.rb
|
80
|
+
- lib/omniai/tools/computer/mac_driver.rb
|
81
|
+
- lib/omniai/tools/computer_tool.rb
|
82
|
+
- lib/omniai/tools/database/base_driver.rb
|
83
|
+
- lib/omniai/tools/database/postgres_driver.rb
|
84
|
+
- lib/omniai/tools/database/sqlite_driver.rb
|
85
|
+
- lib/omniai/tools/database_tool.rb
|
81
86
|
- lib/omniai/tools/disk/base_tool.rb
|
82
87
|
- lib/omniai/tools/disk/directory_create_tool.rb
|
83
88
|
- lib/omniai/tools/disk/directory_delete_tool.rb
|
@@ -113,7 +118,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
113
118
|
- !ruby/object:Gem::Version
|
114
119
|
version: '0'
|
115
120
|
requirements: []
|
116
|
-
rubygems_version: 3.6.
|
121
|
+
rubygems_version: 3.6.9
|
117
122
|
specification_version: 4
|
118
123
|
summary: A set of tools built for usage with OmniAI.
|
119
124
|
test_files: []
|
@@ -1,37 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "sqlite3"
|
4
|
-
|
5
|
-
module OmniAI
|
6
|
-
module Tools
|
7
|
-
module Database
|
8
|
-
# @example
|
9
|
-
# tool = OmniAI::Tools::Database::SqliteTool.new
|
10
|
-
# tool.execute(path: "./foo/bar")
|
11
|
-
class BaseTool < OmniAI::Tool
|
12
|
-
# @param logger [IO] An optional logger for debugging executed commands.
|
13
|
-
def initialize(logger: Logger.new(IO::NULL))
|
14
|
-
super()
|
15
|
-
@logger = logger
|
16
|
-
end
|
17
|
-
|
18
|
-
# @example
|
19
|
-
# tool = OmniAI::Tools::Database::BaseTool.new
|
20
|
-
# tool.execute(statements: ["SELECT * FROM people"])
|
21
|
-
#
|
22
|
-
# @param statements [Array<String>]
|
23
|
-
#
|
24
|
-
# @return [Array<Hash>]
|
25
|
-
def execute(statements:)
|
26
|
-
[].tap do |executions|
|
27
|
-
statements.map do |statement|
|
28
|
-
execution = perform(statement:)
|
29
|
-
executions << execution
|
30
|
-
break unless execution[:status].eql?(:ok)
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
end
|
37
|
-
end
|
@@ -1,110 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require "sqlite3"
|
4
|
-
|
5
|
-
module OmniAI
|
6
|
-
module Tools
|
7
|
-
module Database
|
8
|
-
# @example
|
9
|
-
# tool = OmniAI::Tools::Database::SqliteTool.new
|
10
|
-
# tool.execute(path: "./foo/bar")
|
11
|
-
class SqliteTool < BaseTool
|
12
|
-
description <<~TEXT
|
13
|
-
Executes SQL commands (INSERT / UPDATE / SELECT / etc) on a database.
|
14
|
-
|
15
|
-
Example:
|
16
|
-
|
17
|
-
STATEMENTS:
|
18
|
-
|
19
|
-
[
|
20
|
-
'CREATE TABLE people (id INTEGER PRIMARY KEY, name TEXT NOT NULL)',
|
21
|
-
'INSERT INTO people (name) VALUES ('John')',
|
22
|
-
'INSERT INTO people (name) VALUES ('Paul')',
|
23
|
-
'SELECT * FROM people',
|
24
|
-
'DROP TABLE people'
|
25
|
-
]
|
26
|
-
|
27
|
-
RESULT:
|
28
|
-
|
29
|
-
[
|
30
|
-
{
|
31
|
-
"status": "OK",
|
32
|
-
"statement": "CREATE TABLE people (id INTEGER PRIMARY KEY, name TEXT NOT NULL)",
|
33
|
-
"result": "..."
|
34
|
-
},
|
35
|
-
{
|
36
|
-
"status": "OK",
|
37
|
-
"statement": "INSERT INTO people (name) VALUES ('John')"
|
38
|
-
"result": "..."
|
39
|
-
},
|
40
|
-
{
|
41
|
-
"status": "OK",
|
42
|
-
"statement": "INSERT INTO people (name) VALUES ('Paul')",
|
43
|
-
"result": "..."
|
44
|
-
},
|
45
|
-
{
|
46
|
-
"status": "OK",
|
47
|
-
"statement": "SELECT * FROM people",
|
48
|
-
"result": "..."
|
49
|
-
},
|
50
|
-
{
|
51
|
-
"status": "OK",
|
52
|
-
"statement": "DROP TABLE people",
|
53
|
-
"result": "..."
|
54
|
-
}
|
55
|
-
]
|
56
|
-
TEXT
|
57
|
-
|
58
|
-
parameter(
|
59
|
-
:statements,
|
60
|
-
:array,
|
61
|
-
description: "A list of SQL statements to run sequentially.",
|
62
|
-
items: OmniAI::Schema.string(description: 'A SQL statement to run (e.g. "SELECT * FROM ...").')
|
63
|
-
)
|
64
|
-
|
65
|
-
required %i[statements]
|
66
|
-
|
67
|
-
# @param logger [IO] An optional logger for debugging executed commands.
|
68
|
-
# @param db [SQLite3::Database] A sqlite database.
|
69
|
-
def initialize(db:, logger: Logger.new(IO::NULL))
|
70
|
-
super(logger:)
|
71
|
-
@db = db
|
72
|
-
end
|
73
|
-
|
74
|
-
# @example
|
75
|
-
# tool = OmniAI::Tools::Database::BaseTool.new
|
76
|
-
# tool.execute(statements: ["SELECT * FROM people"])
|
77
|
-
#
|
78
|
-
# @param statements [Array<String>]
|
79
|
-
#
|
80
|
-
# @return [Array<Hash>]
|
81
|
-
def execute(statements:)
|
82
|
-
@logger.info("#{self.class.name}#{__method__} statements=#{statements.inspect}")
|
83
|
-
|
84
|
-
[].tap do |executions|
|
85
|
-
statements.map do |statement|
|
86
|
-
execution = perform(statement:)
|
87
|
-
executions << execution
|
88
|
-
break unless execution[:status].eql?(:ok)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
protected
|
94
|
-
|
95
|
-
# @param statement [String]
|
96
|
-
#
|
97
|
-
# @return [Hash]
|
98
|
-
def perform(statement:)
|
99
|
-
result = @db.execute2(statement)
|
100
|
-
|
101
|
-
{ status: :ok, statement:, result: }
|
102
|
-
rescue ::SQLite3::Exception => e
|
103
|
-
@logger.warn("ERROR: #{e.message}")
|
104
|
-
|
105
|
-
{ status: :error, statement:, result: e.message }
|
106
|
-
end
|
107
|
-
end
|
108
|
-
end
|
109
|
-
end
|
110
|
-
end
|