omniai-tools 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cc814f27a50bb0ecceaff25eb4153aed6ade80ca24f7abcb74198e0a31afe627
4
- data.tar.gz: c68820e0b080dc4812cd15871c8da74963d27b20960dd9e9e254b214bb313dfa
3
+ metadata.gz: 92931cd23e47e41ef1be07da377a47cf0e9af2456f58974f8a384597f9c2d6c6
4
+ data.tar.gz: 9adf44825839528ca308915cf6f58f5d5ef86b052f5dd7f4e5abfbef8fd98a6b
5
5
  SHA512:
6
- metadata.gz: 6ad4b2eb6e1cee4bbfd6b34f1957bb663de22785f4b2071de5323d38d5614c9438f813bbab9b98b9a232335a43243e82191103e59aca054400323f6137f5886e
7
- data.tar.gz: faf87bbacaf66022c34beb835674deda75b359e937bc4a734918b8f2cc28985438eba994b40ac0553c8fb1ab7bfa433532cd4cbe0d6dffa3fa5298fa904565dd
6
+ metadata.gz: b02dcd4125e9198e30c4ade51a6b942d9bc945ee03be946c0fb93328ffc3a7b3a29c2a4a10a3a5dfa98f003402d9e73de900e0015aed3e07969243cf16edc742
7
+ data.tar.gz: 4c4fa96c08aa45a8d0b76401602050e241a18a4eae4e5af6c59771a522fdb791c4bcc7771bf4bbd02e12f1c63e6d4945278030c8991bd56b35823a238b841abe
data/Gemfile CHANGED
@@ -6,11 +6,13 @@ gemspec
6
6
 
7
7
  gem "factory_bot"
8
8
  gem "irb"
9
+ gem "macos"
9
10
  gem "nokogiri"
10
11
  gem "omniai-anthropic"
11
12
  gem "omniai-google"
12
13
  gem "omniai-mistral"
13
14
  gem "omniai-openai"
15
+ gem "pg"
14
16
  gem "rake"
15
17
  gem "redcarpet"
16
18
  gem "rspec"
data/README.md CHANGED
@@ -10,7 +10,7 @@
10
10
 
11
11
  ## Browser
12
12
 
13
- Database tools are focused on running SQL statements:
13
+ Browser tools allow you to interact with any website (e.g. visit a page, click on a button, fill in some text, etc):
14
14
 
15
15
  ```ruby
16
16
  require "omniai/openai"
@@ -53,6 +53,72 @@ Here are the top 5 posts on Hacker News right now:
53
53
  ...
54
54
  ```
55
55
 
56
+ ## Computer
57
+
58
+ A computer tool grants the ability to manage a computer via an LLM:
59
+
60
+ ```ruby
61
+ require "omniai/openai"
62
+ require "omniai/tools"
63
+
64
+ require "macos"
65
+
66
+ client = OmniAI::OpenAI::Client.new
67
+ logger = Logger.new($stdout)
68
+ logger.formatter = proc { |_, _, _, message| "[computer] #{message}\n" }
69
+
70
+ driver = OmniAI::Tools::Computer::MacDriver.new
71
+ tools = [OmniAI::Tools::ComputerTool.new(driver:, logger:)]
72
+
73
+ puts "Type 'exit' or 'quit' to leave."
74
+
75
+ loop do
76
+ print "# "
77
+ text = gets.strip
78
+ break if %w[exit quit].include?(text)
79
+
80
+ driver.screenshot do |file|
81
+ client.chat(stream: $stdout, tools:) do |prompt|
82
+ prompt.system <<~TEXT
83
+ Assist the user with tasks related to the use their computer.
84
+
85
+ 1. The display is #{driver.display_width}px (w) × #{driver.display_height}px (h).
86
+ 2. Attached find a screenshot of the display that may be inspected to determine the state of the computer.
87
+ 3. The computer is using MacOS with all the expected applications (e.g. Finder, Safari, etc).
88
+ 4. Any coordinates used for clicking must be scaled for the bounds of the display.
89
+ 5. Whenever possible prefer to navigate using keyboard shortcuts rather than mouse clicks.
90
+ TEXT
91
+
92
+ prompt.user do |message|
93
+ message.text(text)
94
+ message.file(file.path, "image/png")
95
+ end
96
+ end
97
+ end
98
+ end
99
+ ```
100
+
101
+ ```
102
+ Type 'exit' or 'quit' to leave.
103
+
104
+ # What do you see on my screen?
105
+
106
+ Here's what I see on your screen:
107
+ - You are using a Mac with a display resolution of 2560×1440 pixels.
108
+ - The Terminal app is open at the very top, with a command prompt in a directory related to "omnial-tools" and "computer".
109
+ - Below the Terminal, Visual Studio Code (VS Code) is open showing a project directory named "omnial-tools", specifically in a folder like /examples/computer.
110
+
111
+ # Please open Safari
112
+
113
+ [computer] action="mouse_click" coordinate={x: 484, y: 1398} mouse_button="left"
114
+ Safari is being opened now. Let me know if you need to visit a specific website or perform any other actions in Safari!
115
+
116
+ # What is the current position of my mouse?
117
+
118
+ [computer] action="mouse_position"
119
+ Your mouse is currently positioned at approximately (484, 1398) on your screen.
120
+ ```
121
+
56
122
  ## Database
57
123
 
58
124
  Database tools are focused on running SQL statements:
@@ -64,13 +130,12 @@ require "omniai/tools"
64
130
  require "sqlite3"
65
131
 
66
132
  db = SQLite3::Database.new(":memory:")
133
+ driver = OmniAI::Tools::Database::SqliteDriver.new(db:)
67
134
 
68
135
  client = OmniAI::OpenAI::Client.new
69
136
  logger = Logger.new($stdout)
70
137
 
71
- tools = [
72
- OmniAI::Tools::Database::SqliteTool.new(logger:, db:),
73
- ]
138
+ tools = [OmniAI::Tools::DatabaseTool.new(logger:, driver:)]
74
139
 
75
140
  puts "Type 'exit' or 'quit' to leave."
76
141
 
@@ -1,7 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "watir"
4
-
5
3
  module OmniAI
6
4
  module Tools
7
5
  module Browser
@@ -104,7 +104,7 @@ module OmniAI
104
104
  * `#{Action::LINK_CLICK}`: Click a link element
105
105
  * `#{Action::ELEMENT_CLICK}`: Click any clickable element
106
106
  * `#{Action::TEXT_FIELD_SET}`: Enter text in input fields or text areas
107
- * `#{Action::SCREENSHOT}`: Take a screenshot of the page or specific element
107
+ * `#{Action::SCREENSHOT}`: Take a screenshot of the current page
108
108
  TEXT
109
109
 
110
110
  parameter :url, :string, description: <<~TEXT
@@ -0,0 +1,179 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "sqlite3"
4
+
5
+ module OmniAI
6
+ module Tools
7
+ module Computer
8
+ # A tool for interacting with a computer. Be careful with using as it can perform actions on your computer!
9
+ #
10
+ # @example
11
+ # class SomeDriver < BaseDriver
12
+ # @param text [String]
13
+ # def key(text:)
14
+ # # TODO
15
+ # end
16
+ #
17
+ # # @param text [String]
18
+ # # @param duration [Integer]
19
+ # def hold_key(text:, duration:)
20
+ # # TODO
21
+ # end
22
+ #
23
+ # # @return [Hash<{ x: Integer, y: Integer }>]
24
+ # def mouse_position
25
+ # # TODO
26
+ # end
27
+ #
28
+ # # @param coordinate [Hash<{ x: Integer, y: Integer }>]
29
+ # # @param button [String] e.g. "left", "middle", "right"
30
+ # def mouse_move(coordinate:)
31
+ # # TODO
32
+ # end
33
+ #
34
+ # # @param coordinate [Hash<{ x: Integer, y: Integer }>]
35
+ # # @param button [String] e.g. "left", "middle", "right"
36
+ # def mouse_click(coordinate:, button:)
37
+ # # TODO
38
+ # end
39
+ #
40
+ # # @param coordinate [Hash<{ x: Integer, y: Integer }>]
41
+ # # @param button [String] e.g. "left", "middle", "right"
42
+ # def mouse_down(coordinate:, button:)
43
+ # # TODO
44
+ # end
45
+ #
46
+ # # @param coordinate [Hash<{ x: Integer, y: Integer }>]
47
+ # # @param button [String] e.g. "left", "middle", "right"
48
+ # def mouse_up(coordinate:, button:)
49
+ # # TODO
50
+ # end
51
+ #
52
+ # # @param text [String]
53
+ # def type(text:)
54
+ # # TODO
55
+ # end
56
+ #
57
+ # # @param amount [Integer]
58
+ # # @param direction [String] e.g. "up", "down", "left", "right"
59
+ # def scroll(amount:, direction:)
60
+ # # TODO
61
+ # end
62
+ #
63
+ # # @yield [file]
64
+ # # @yieldparam file [File]
65
+ # def screenshot
66
+ # # TODO
67
+ # end
68
+ # end
69
+ class BaseDriver
70
+ DEFAULT_MOUSE_BUTTON = "left"
71
+ DEFAULT_DISPLAY_SCALE = 2
72
+
73
+ # @!attr_accessor :display_height
74
+ # @return [Integer] the height of the display in pixels
75
+ attr_accessor :display_width
76
+
77
+ # @!attr_accessor :display_height
78
+ # @return [Integer] the height of the display in pixels
79
+ attr_accessor :display_height
80
+
81
+ # @!attr_accessor :display_number
82
+ # @return [Integer] the display number
83
+ attr_accessor :display_number
84
+
85
+ # @param display_width [Integer] the width of the display in pixels
86
+ # @param display_height [Integer] the height of the display in pixels
87
+ # @param display_number [Integer] the display number
88
+ def initialize(display_width:, display_height:, display_number:)
89
+ @display_width = display_width
90
+ @display_height = display_height
91
+
92
+ @display_number = display_number
93
+ end
94
+
95
+ # @param text [String]
96
+ def key(text:)
97
+ raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
98
+ end
99
+
100
+ # @param text [String]
101
+ # @param duration [Integer]
102
+ def hold_key(text:, duration:)
103
+ raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
104
+ end
105
+
106
+ # @return [Hash<{ x: Integer, y: Integer }>]
107
+ def mouse_position
108
+ raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
109
+ end
110
+
111
+ # @param coordinate [Hash<{ x: Integer, y: Integer }>]
112
+ # @param button [String] e.g. "left", "middle", "right"
113
+ def mouse_move(coordinate:)
114
+ raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
115
+ end
116
+
117
+ # @param coordinate [Hash<{ x: Integer, y: Integer }>]
118
+ # @param button [String] e.g. "left", "middle", "right"
119
+ def mouse_click(coordinate:, button:)
120
+ raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
121
+ end
122
+
123
+ # @param coordinate [Hash<{ x: Integer, y: Integer }>]
124
+ # @param button [String] e.g. "left", "middle", "right"
125
+ def mouse_down(coordinate:, button:)
126
+ raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
127
+ end
128
+
129
+ # @param coordinate [Hash<{ x: Integer, y: Integer }>]
130
+ # @param button [String] e.g. "left", "middle", "right"
131
+ def mouse_up(coordinate:, button:)
132
+ raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
133
+ end
134
+
135
+ # @param coordinate [Hash<{ x: Integer, y: Integer }>]
136
+ # @param button [String] e.g. "left", "middle", "right"
137
+ def mouse_drag(coordinate:, button: DEFAULT_MOUSE_BUTTON)
138
+ mouse_down(coordinate: mouse_position, button:)
139
+ mouse_move(coordinate:, button:)
140
+ mouse_up(coordinate:, button:)
141
+ end
142
+
143
+ # @param coordinate [Hash<{ x: Integer, y: Integer }>]
144
+ # @param button [String] e.g. "left", "middle", "right"
145
+ def mouse_double_click(coordinate:, button:)
146
+ 2.times { mouse_click(coordinate:, button:) }
147
+ end
148
+
149
+ # @param coordinate [Hash<{ x: Integer, y: Integer }>]
150
+ # @param button [String] e.g. "left", "middle", "right"
151
+ def mouse_triple_click(coordinate:, button:)
152
+ 3.times { mouse_click(coordinate:, button:) }
153
+ end
154
+
155
+ # @param text [String]
156
+ def type(text:)
157
+ raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
158
+ end
159
+
160
+ # @param amount [Integer]
161
+ # @param direction [String] e.g. "up", "down", "left", "right"
162
+ def scroll(amount:, direction:)
163
+ raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
164
+ end
165
+
166
+ # @yield [file]
167
+ # @yieldparam file [File]
168
+ def screenshot
169
+ raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
170
+ end
171
+
172
+ # @param duration [Integer]
173
+ def wait(duration:)
174
+ Kernel.sleep(duration)
175
+ end
176
+ end
177
+ end
178
+ end
179
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Computer
6
+ # A driver for interacting with a Mac. Be careful with using as it can perform actions on your computer!
7
+ class MacDriver < BaseDriver
8
+ def initialize(keyboard: MacOS.keyboard, mouse: MacOS.mouse, display: MacOS.display)
9
+ @keyboard = keyboard
10
+ @mouse = mouse
11
+ @display = display
12
+
13
+ super(display_width: display.wide, display_height: display.high, display_number: display.id)
14
+ end
15
+
16
+ # @param text [String]
17
+ def key(text:)
18
+ @keyboard.keys(text)
19
+ end
20
+
21
+ # @param text [String]
22
+ # @param duration [Integer]
23
+ def hold_key(text:, duration:)
24
+ raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
25
+ end
26
+
27
+ # @return [Hash<{ x: Integer, y: Integer }>]
28
+ def mouse_position
29
+ position = @mouse.position
30
+ x = position.x
31
+ y = position.y
32
+
33
+ {
34
+ x:,
35
+ y:,
36
+ }
37
+ end
38
+
39
+ def mouse_move(coordinate:)
40
+ x = coordinate[:x]
41
+ y = coordinate[:y]
42
+
43
+ @mouse.move(x:, y:)
44
+ end
45
+
46
+ # @param coordinate [Hash<{ x: Integer, y: Integer }>]
47
+ # @param button [String] e.g. "left", "middle", "right"
48
+ def mouse_click(coordinate:, button:)
49
+ x = coordinate[:x]
50
+ y = coordinate[:y]
51
+
52
+ case button
53
+ when "left" then @mouse.left_click(x:, y:)
54
+ when "middle" then @mouse.middle_click(x:, y:)
55
+ when "right" then @mouse.right_click(x:, y:)
56
+ end
57
+ end
58
+
59
+ # @param coordinate [Hash<{ x: Integer, y: Integer }>]
60
+ def mouse_down(coordinate:, button: DEFAULT_MOUSE_BUTTON)
61
+ x = coordinate[:x]
62
+ y = coordinate[:y]
63
+
64
+ case button
65
+ when "left" then @mouse.left_down(x:, y:)
66
+ when "middle" then @mouse.middle_down(x:, y:)
67
+ when "right" then @mouse.right_down(x:, y:)
68
+ end
69
+ end
70
+
71
+ # @param coordinate [Hash<{ x: Integer, y: Integer }>]
72
+ # @param button [String] e.g. "left", "middle", "right"
73
+ def mouse_up(coordinate:, button: DEFAULT_MOUSE_BUTTON)
74
+ x = coordinate[:x]
75
+ y = coordinate[:y]
76
+
77
+ case button
78
+ when "left" then @mouse.left_up(x:, y:)
79
+ when "middle" then @mouse.middle_up(x:, y:)
80
+ when "right" then @mouse.right_up(x:, y:)
81
+ end
82
+ end
83
+
84
+ # @param text [String]
85
+ def type(text:)
86
+ @keyboard.type(text)
87
+ end
88
+
89
+ # @param amount [Integer]
90
+ # @param direction [String] e.g. "up", "down", "left", "right"
91
+ def scroll(amount:, direction:)
92
+ raise NotImplementedError, "#{self.class.name}##{__method__} undefined"
93
+ end
94
+
95
+ # @yield [file]
96
+ # @yieldparam file [File]
97
+ def screenshot(&)
98
+ @display.screenshot(&)
99
+ end
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,189 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ # A tool for interacting with a computer. Be careful with using as it can perform actions on your computer!
6
+ #
7
+ # @example
8
+ # computer = OmniAI::Tools::Computer::MacTool.new
9
+ # computer.display # { "width": 2560, "height": 1440, "scale": 1 }
10
+ # computer.screenshot
11
+ class ComputerTool < OmniAI::Tool
12
+ description "A tool for interacting with a computer."
13
+
14
+ module Action
15
+ KEY = "key" # press a key
16
+ HOLD_KEY = "hold_key" # hold a key
17
+ MOUSE_POSITION = "mouse_position" # get the current (x, y) pixel coordinate of the cursor on the screen
18
+ MOUSE_MOVE = "mouse_move" # move the cursor to a specific (x, y) pixel coordinate on the screen
19
+ MOUSE_CLICK = "mouse_click" # click at a specific x / y coordinate
20
+ MOUSE_DOWN = "mouse_down" # press the mouse button down
21
+ MOUSE_DRAG = "mouse_drag" # drag the mouse to a specific x / y coordinate
22
+ MOUSE_UP = "mouse_up" # release the mouse button
23
+ MOUSE_DOUBLE_CLICK = "mouse_double_click" # double click at a specific x / y coordinate
24
+ MOUSE_TRIPLE_CLICK = "mouse_triple_click" # triple click at a specific x / y coordinate
25
+ TYPE = "type" # type a string
26
+ SCROLL = "scroll"
27
+ WAIT = "wait"
28
+ end
29
+
30
+ module MouseButton
31
+ LEFT = "left"
32
+ MIDDLE = "middle"
33
+ RIGHT = "right"
34
+ end
35
+
36
+ module ScrollDirection
37
+ UP = "up"
38
+ DOWN = "down"
39
+ LEFT = "left"
40
+ RIGHT = "right"
41
+ end
42
+
43
+ ACTIONS = [
44
+ Action::KEY,
45
+ Action::HOLD_KEY,
46
+ Action::MOUSE_POSITION,
47
+ Action::MOUSE_MOVE,
48
+ Action::MOUSE_CLICK,
49
+ Action::MOUSE_DOWN,
50
+ Action::MOUSE_DRAG,
51
+ Action::MOUSE_UP,
52
+ Action::TYPE,
53
+ Action::SCROLL,
54
+ Action::WAIT,
55
+ ].freeze
56
+
57
+ MOUSE_BUTTON_OPTIONS = [
58
+ MouseButton::LEFT,
59
+ MouseButton::MIDDLE,
60
+ MouseButton::RIGHT,
61
+ ].freeze
62
+
63
+ SCROLL_DIRECTION_OPTIONS = [
64
+ ScrollDirection::UP,
65
+ ScrollDirection::DOWN,
66
+ ScrollDirection::LEFT,
67
+ ScrollDirection::RIGHT,
68
+ ].freeze
69
+
70
+ parameter :action, :string, enum: ACTIONS, description: <<~TEXT
71
+ Options:
72
+ * `#{Action::KEY}`: Press a single key / combination of keys on the keyboard:
73
+ - supports xdotool's `key` syntax (e.g. "alt+Tab", "Return", "ctrl+s", etc)
74
+ * `#{Action::HOLD_KEY}`: Hold down a key or multiple keys for a specified duration (in seconds):
75
+ - supports xdotool's `key` syntax (e.g. "alt+Tab", "Return", "ctrl+s", etc)
76
+ * `#{Action::MOUSE_POSITION}`: Get the current (x,y) pixel coordinate of the cursor on the screen.
77
+ * `#{Action::MOUSE_MOVE}`: Move the cursor to a specified (x,y) pixel coordinate on the screen.
78
+ * `#{Action::MOUSE_CLICK}`: Click the mouse button at the specified (x,y) pixel coordinate on the screen.
79
+ * `#{Action::MOUSE_DOUBLE_CLICK}`: Double click at the specified (x,y) pixel coordinate on the screen.
80
+ * `#{Action::MOUSE_TRIPLE_CLICK}`: Triple click at the specified (x,y) pixel coordinate on the screen.
81
+ * `#{Action::MOUSE_DOWN}`: Press the mouse button at the specified (x,y) pixel coordinate on the screen.
82
+ * `#{Action::MOUSE_DRAG}`: Click and drag the cursor to a specified (x, y) pixel coordinate on the screen.
83
+ * `#{Action::MOUSE_UP}`: Release the mouse button at the specified (x,y) pixel coordinate on the screen.
84
+ * `#{Action::TYPE}`: Type a string of text on the keyboard.
85
+ * `#{Action::SCROLL}`: Scroll the screen in a specified direction by a specified amount of clicks of the scroll wheel.
86
+ * `#{Action::WAIT}`: Wait for a specified duration (in seconds).
87
+ TEXT
88
+
89
+ parameter :coordinate, :object, properties: {
90
+ x: OmniAI::Schema.integer(description: "The x position in pixels"),
91
+ y: OmniAI::Schema.integer(description: "The y position in pixels"),
92
+ }, required: %i[x y], description: <<~TEXT
93
+ An (x,y) coordinate. Required for the following actions:
94
+ * `#{Action::MOUSE_MOVE}`
95
+ * `#{Action::MOUSE_CLICK}`
96
+ * `#{Action::MOUSE_DOWN}`
97
+ * `#{Action::MOUSE_DRAG}`
98
+ * `#{Action::MOUSE_UP}`
99
+ * `#{Action::MOUSE_DOUBLE_CLICK}`
100
+ * `#{Action::MOUSE_TRIPLE_CLICK}`
101
+ TEXT
102
+
103
+ parameter :text, :string, description: <<~TEXT
104
+ The text to type. Required for the following actions:
105
+ * `#{Action::KEY}`
106
+ * `#{Action::HOLD_KEY}`
107
+ * `#{Action::TYPE}`
108
+ TEXT
109
+
110
+ parameter :duration, :integer, description: <<~TEXT
111
+ A duration in seconds. Required for the following actions:
112
+ * `#{Action::HOLD_KEY}`
113
+ * `#{Action::WAIT}`
114
+ TEXT
115
+
116
+ parameter :mouse_button, :string, enum: MOUSE_BUTTON_OPTIONS, description: <<~TEXT
117
+ The mouse button to use. Required for the following actions:
118
+ * `#{Action::MOUSE_CLICK}`
119
+ * `#{Action::MOUSE_DOWN}`
120
+ * `#{Action::MOUSE_DRAG}`
121
+ * `#{Action::MOUSE_UP}`
122
+ * `#{Action::MOUSE_DOUBLE_CLICK}`
123
+ * `#{Action::MOUSE_TRIPLE_CLICK}`
124
+ TEXT
125
+
126
+ parameter :scroll_direction, :string, enum: SCROLL_DIRECTION_OPTIONS, description: <<~TEXT
127
+ The direction to scroll. Required for the following actions:
128
+ * `#{Action::SCROLL}`
129
+ TEXT
130
+
131
+ parameter :scroll_amount, :integer, description: <<~TEXT
132
+ The amount of clicks to scroll. Required for the following actions:
133
+ * `#{Action::SCROLL}`
134
+ TEXT
135
+
136
+ required %i[action]
137
+
138
+ # @param driver [Computer::Driver]
139
+ def initialize(driver:, logger: Logger.new(IO::NULL))
140
+ @driver = driver
141
+ @logger = logger
142
+ super()
143
+ end
144
+
145
+ # @param action [String]
146
+ # @param coordinate [Hash<{ width: Integer, height: Integer }>] the (x,y) coordinate
147
+ # @param text [String]
148
+ # @param duration [Integer] the duration in seconds
149
+ # @param mouse_button [String] e.g. "left", "middle", "right"
150
+ # @param scroll_direction [String] e.g. "up", "down", "left", "right"
151
+ # @param scroll_amount [Integer] the amount of clicks to scroll
152
+ def execute(
153
+ action:,
154
+ coordinate: nil,
155
+ text: nil,
156
+ duration: nil,
157
+ mouse_button: nil,
158
+ scroll_direction: nil,
159
+ scroll_amount: nil
160
+ )
161
+ @logger.info({
162
+ action:,
163
+ coordinate:,
164
+ text:,
165
+ duration:,
166
+ mouse_button:,
167
+ scroll_direction:,
168
+ scroll_amount:,
169
+ }.compact.map { |key, value| "#{key}=#{value.inspect}" }.join(" "))
170
+
171
+ case action
172
+ when Action::KEY then @driver.key(text:)
173
+ when Action::HOLD_KEY then @driver.hold_key(text:, duration:)
174
+ when Action::MOUSE_POSITION then @driver.mouse_position
175
+ when Action::MOUSE_MOVE then @driver.mouse_move(coordinate:)
176
+ when Action::MOUSE_CLICK then @driver.mouse_click(coordinate:, button: mouse_button)
177
+ when Action::MOUSE_DOUBLE_CLICK then @driver.mouse_double_click(coordinate:, button: mouse_button)
178
+ when Action::MOUSE_TRIPLE_CLICK then @driver.mouse_triple_click(coordinate:, button: mouse_button)
179
+ when Action::MOUSE_DOWN then @driver.mouse_down(coordinate:, button: mouse_button)
180
+ when Action::MOUSE_UP then @driver.mouse_up(coordinate:, button: mouse_button)
181
+ when Action::MOUSE_DRAG then @driver.mouse_drag(coordinate:, button: mouse_button)
182
+ when Action::TYPE then @driver.type(text:)
183
+ when Action::SCROLL then @driver.scroll(amount: scroll_amount, direction: scroll_direction)
184
+ when Action::WAIT then @driver.wait(duration:)
185
+ end
186
+ end
187
+ end
188
+ end
189
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Database
6
+ # Base class for database drivers (e.g. sqlite, postgres, mysql, etc).
7
+ class BaseDriver
8
+ # @param statement [String] e.g. "SELECT * FROM people"
9
+ #
10
+ # @return [Hash] e.g. { status: :ok, result: [["id", "name"], [1, "John"], [2, "Paul"], ...] }
11
+ def perform(statement:)
12
+ raise NotImplementedError, "#{self.class}##{__method__} undefined"
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Database
6
+ # @example
7
+ # connection = PG.connect(dbname: "testdb")
8
+ # driver = OmniAI::Tools::Database::PostgresDriver.new
9
+ # driver.perform(statement: "SELECT * FROM people")
10
+ class PostgresDriver < BaseDriver
11
+ # @param connection [Sqlite3::Database]
12
+ def initialize(connection:)
13
+ super()
14
+ @connection = connection
15
+ end
16
+
17
+ # @param statement [String]
18
+ #
19
+ # @return [Hash]
20
+ def perform(statement:)
21
+ @connection.exec(statement) do |result|
22
+ { status: :ok, result: [result.fields] + result.values }
23
+ end
24
+ rescue ::PG::Error => e
25
+ { status: :error, message: e.message }
26
+ end
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ module Database
6
+ # @example
7
+ # driver = OmniAI::Tools::Database::SqliteDriver.new
8
+ # driver.perform(statement: "SELECT * FROM people")
9
+ class SqliteDriver < BaseDriver
10
+ # @param db [Sqlite3::Database]
11
+ def initialize(db:)
12
+ super()
13
+ @db = db
14
+ end
15
+
16
+ # @param statement [String]
17
+ #
18
+ # @return [Hash]
19
+ def perform(statement:)
20
+ result = @db.execute2(statement)
21
+
22
+ { status: :ok, result: }
23
+ rescue ::SQLite3::Exception => e
24
+ { status: :error, message: e.message }
25
+ end
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,100 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ module Tools
5
+ # @example
6
+ # db = Sqlite3::Database.new("./db.sqlite")
7
+ # driver = OmniAI::Tools::Database::Sqlite.new(db:)
8
+ # tool = OmniAI::Tools::DatabaseTool.new(driver:)
9
+ # tool.execute(statements: ["SELECT * FROM people"])
10
+ class DatabaseTool < OmniAI::Tool
11
+ description <<~TEXT
12
+ Executes SQL commands (INSERT / UPDATE / SELECT / etc) on a database.
13
+
14
+ Example:
15
+
16
+ STATEMENTS:
17
+
18
+ [
19
+ 'CREATE TABLE people (id INTEGER PRIMARY KEY, name TEXT NOT NULL)',
20
+ 'INSERT INTO people (name) VALUES ('John')',
21
+ 'INSERT INTO people (name) VALUES ('Paul')',
22
+ 'SELECT * FROM people',
23
+ 'DROP TABLE people'
24
+ ]
25
+
26
+ RESULT:
27
+
28
+ [
29
+ {
30
+ "status": "OK",
31
+ "statement": "CREATE TABLE people (id INTEGER PRIMARY KEY, name TEXT NOT NULL)",
32
+ "result": "..."
33
+ },
34
+ {
35
+ "status": "OK",
36
+ "statement": "INSERT INTO people (name) VALUES ('John')"
37
+ "result": "..."
38
+ },
39
+ {
40
+ "status": "OK",
41
+ "statement": "INSERT INTO people (name) VALUES ('Paul')",
42
+ "result": "..."
43
+ },
44
+ {
45
+ "status": "OK",
46
+ "statement": "SELECT * FROM people",
47
+ "result": "..."
48
+ },
49
+ {
50
+ "status": "OK",
51
+ "statement": "DROP TABLE people",
52
+ "result": "..."
53
+ }
54
+ ]
55
+ TEXT
56
+
57
+ parameter(
58
+ :statements,
59
+ :array,
60
+ description: "A list of SQL statements to run sequentially.",
61
+ items: OmniAI::Schema.string(description: 'A SQL statement to run (e.g. "SELECT * FROM ...").')
62
+ )
63
+
64
+ required %i[statements]
65
+
66
+ # @param driver [OmniAI::Tools::Database::BaseDriver]
67
+ # @param logger [IO] An optional logger for debugging executed commands.
68
+ def initialize(driver:, logger: Logger.new(IO::NULL))
69
+ super()
70
+ @driver = driver
71
+ @logger = logger
72
+ end
73
+
74
+ # @example
75
+ # tool = OmniAI::Tools::Database::BaseTool.new
76
+ # tool.execute(statements: ["SELECT * FROM people"])
77
+ #
78
+ # @param statements [Array<String>]
79
+ #
80
+ # @return [Array<Hash>]
81
+ def execute(statements:)
82
+ [].tap do |executions|
83
+ statements.map do |statement|
84
+ execution = perform(statement:).merge(statement:)
85
+ executions << execution
86
+ break unless execution[:status].eql?(:ok)
87
+ end
88
+ end
89
+ end
90
+
91
+ def perform(statement:)
92
+ @logger&.info("#perform statement=#{statement.inspect}")
93
+
94
+ @driver.perform(statement:).tap do |result|
95
+ @logger&.info(JSON.generate(result))
96
+ end
97
+ end
98
+ end
99
+ end
100
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module OmniAI
4
4
  module Tools
5
- VERSION = "0.5.1"
5
+ VERSION = "0.6.1"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omniai-tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Sylvestre
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-05-29 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: omniai
@@ -76,8 +76,13 @@ files:
76
76
  - lib/omniai/tools/browser/visit_tool.rb
77
77
  - lib/omniai/tools/browser/watir_driver.rb
78
78
  - lib/omniai/tools/browser_tool.rb
79
- - lib/omniai/tools/database/base_tool.rb
80
- - lib/omniai/tools/database/sqlite_tool.rb
79
+ - lib/omniai/tools/computer/base_driver.rb
80
+ - lib/omniai/tools/computer/mac_driver.rb
81
+ - lib/omniai/tools/computer_tool.rb
82
+ - lib/omniai/tools/database/base_driver.rb
83
+ - lib/omniai/tools/database/postgres_driver.rb
84
+ - lib/omniai/tools/database/sqlite_driver.rb
85
+ - lib/omniai/tools/database_tool.rb
81
86
  - lib/omniai/tools/disk/base_tool.rb
82
87
  - lib/omniai/tools/disk/directory_create_tool.rb
83
88
  - lib/omniai/tools/disk/directory_delete_tool.rb
@@ -113,7 +118,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
113
118
  - !ruby/object:Gem::Version
114
119
  version: '0'
115
120
  requirements: []
116
- rubygems_version: 3.6.3
121
+ rubygems_version: 3.6.9
117
122
  specification_version: 4
118
123
  summary: A set of tools built for usage with OmniAI.
119
124
  test_files: []
@@ -1,37 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "sqlite3"
4
-
5
- module OmniAI
6
- module Tools
7
- module Database
8
- # @example
9
- # tool = OmniAI::Tools::Database::SqliteTool.new
10
- # tool.execute(path: "./foo/bar")
11
- class BaseTool < OmniAI::Tool
12
- # @param logger [IO] An optional logger for debugging executed commands.
13
- def initialize(logger: Logger.new(IO::NULL))
14
- super()
15
- @logger = logger
16
- end
17
-
18
- # @example
19
- # tool = OmniAI::Tools::Database::BaseTool.new
20
- # tool.execute(statements: ["SELECT * FROM people"])
21
- #
22
- # @param statements [Array<String>]
23
- #
24
- # @return [Array<Hash>]
25
- def execute(statements:)
26
- [].tap do |executions|
27
- statements.map do |statement|
28
- execution = perform(statement:)
29
- executions << execution
30
- break unless execution[:status].eql?(:ok)
31
- end
32
- end
33
- end
34
- end
35
- end
36
- end
37
- end
@@ -1,110 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require "sqlite3"
4
-
5
- module OmniAI
6
- module Tools
7
- module Database
8
- # @example
9
- # tool = OmniAI::Tools::Database::SqliteTool.new
10
- # tool.execute(path: "./foo/bar")
11
- class SqliteTool < BaseTool
12
- description <<~TEXT
13
- Executes SQL commands (INSERT / UPDATE / SELECT / etc) on a database.
14
-
15
- Example:
16
-
17
- STATEMENTS:
18
-
19
- [
20
- 'CREATE TABLE people (id INTEGER PRIMARY KEY, name TEXT NOT NULL)',
21
- 'INSERT INTO people (name) VALUES ('John')',
22
- 'INSERT INTO people (name) VALUES ('Paul')',
23
- 'SELECT * FROM people',
24
- 'DROP TABLE people'
25
- ]
26
-
27
- RESULT:
28
-
29
- [
30
- {
31
- "status": "OK",
32
- "statement": "CREATE TABLE people (id INTEGER PRIMARY KEY, name TEXT NOT NULL)",
33
- "result": "..."
34
- },
35
- {
36
- "status": "OK",
37
- "statement": "INSERT INTO people (name) VALUES ('John')"
38
- "result": "..."
39
- },
40
- {
41
- "status": "OK",
42
- "statement": "INSERT INTO people (name) VALUES ('Paul')",
43
- "result": "..."
44
- },
45
- {
46
- "status": "OK",
47
- "statement": "SELECT * FROM people",
48
- "result": "..."
49
- },
50
- {
51
- "status": "OK",
52
- "statement": "DROP TABLE people",
53
- "result": "..."
54
- }
55
- ]
56
- TEXT
57
-
58
- parameter(
59
- :statements,
60
- :array,
61
- description: "A list of SQL statements to run sequentially.",
62
- items: OmniAI::Schema.string(description: 'A SQL statement to run (e.g. "SELECT * FROM ...").')
63
- )
64
-
65
- required %i[statements]
66
-
67
- # @param logger [IO] An optional logger for debugging executed commands.
68
- # @param db [SQLite3::Database] A sqlite database.
69
- def initialize(db:, logger: Logger.new(IO::NULL))
70
- super(logger:)
71
- @db = db
72
- end
73
-
74
- # @example
75
- # tool = OmniAI::Tools::Database::BaseTool.new
76
- # tool.execute(statements: ["SELECT * FROM people"])
77
- #
78
- # @param statements [Array<String>]
79
- #
80
- # @return [Array<Hash>]
81
- def execute(statements:)
82
- @logger.info("#{self.class.name}#{__method__} statements=#{statements.inspect}")
83
-
84
- [].tap do |executions|
85
- statements.map do |statement|
86
- execution = perform(statement:)
87
- executions << execution
88
- break unless execution[:status].eql?(:ok)
89
- end
90
- end
91
- end
92
-
93
- protected
94
-
95
- # @param statement [String]
96
- #
97
- # @return [Hash]
98
- def perform(statement:)
99
- result = @db.execute2(statement)
100
-
101
- { status: :ok, statement:, result: }
102
- rescue ::SQLite3::Exception => e
103
- @logger.warn("ERROR: #{e.message}")
104
-
105
- { status: :error, statement:, result: e.message }
106
- end
107
- end
108
- end
109
- end
110
- end