omniai-anthropic 1.9.2 → 1.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 55c1ba530498bc6cfe194b42c12402ad79d3d9f9c13fa3b7922fecbfa1a8d1c8
4
- data.tar.gz: edc2e433bbf5107177ddd6d90df9f3d856e209316a10ecfa4deeaec85c72378c
3
+ metadata.gz: 3b3723aa11fbf3b8def1f87dee64a9bc9a171e7d2e8e3292354c5476c24d4233
4
+ data.tar.gz: 667d9b3ce3e9fb6cd4061372358b555def837325f3107404bf0a83fbd10a9af4
5
5
  SHA512:
6
- metadata.gz: d9e23e858ef9276bfb2ab2d63836170d6e9c21972a4d8fca7be09620d9585b325615168e06d771cf6609320ea6c9471a6145d674f1c997fe074bb02d9cdfcc7c
7
- data.tar.gz: 4da1388834893087d8405e3aeede1130c096b84acba445b6e08f10209d6df51157f63e32bb571e6a611b9db591e9ea5d676136d1fb0f287ed6362802a9e49fc6
6
+ metadata.gz: e448d21a9d4d615b2679015bd8ada729b23a407a1707d956baff61cb1fcc629e26cfaf7752b18f6dc52abed4b1a22cea884d95b2c423391be8e5ca5329b4ea5d
7
+ data.tar.gz: c68070843dc6c36ffb8639873eee20d61382c44b035f41ca147ac458fecb58709a52f06d3bad476fb91d835315eeed47c8f7dfb0af10566ed5e65566f033690a
data/README.md CHANGED
@@ -104,3 +104,19 @@ JSON.parse(completion.text) # { "name": "Ringo" }
104
104
  ```
105
105
 
106
106
  [Anthropic API Reference `control-output-format`](https://docs.anthropic.com/en/docs/control-output-format)
107
+
108
+ ### Computers
109
+
110
+ ```bash
111
+ sudo apt-get install convert # screenshots
112
+ sudo apt-get install scrot # screenshots
113
+ sudo apt-get install xdotool # mouse / keyboard
114
+ ```
115
+
116
+ ```ruby
117
+ computer = OmniAI::Anthropic::Computer.new
118
+
119
+ completion = client.chat(tools: [computer]) do |prompt|
120
+ prompt.user('Please signup for reddit')
121
+ end
122
+ ```
@@ -0,0 +1,188 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'open3'
4
+
5
+ module OmniAI
6
+ module Anthropic
7
+ # A reference implementation of an OmniAI computer tool using xdotool for mouse / keyboard:
8
+ # https://docs.anthropic.com/en/docs/build-with-claude/computer-use#computer-tool
9
+ #
10
+ # Usage:
11
+ #
12
+ # computer = OmniAI::Anthropic::Computer.new()
13
+ class Computer
14
+ TYPE = 'computer_20241022'
15
+
16
+ SCREENSHOT_DELAY = 2.0 # seconds
17
+ TYPING_DELAY = 20 # milliseconds
18
+
19
+ module Action
20
+ KEY = 'key'
21
+ TYPE = 'type'
22
+ CURSOR_POSITION = 'cursor_position'
23
+ MOUSE_MOVE = 'mouse_move'
24
+ LEFT_CLICK = 'left_click'
25
+ RIGHT_CLICK = 'right_click'
26
+ MIDDLE_CLICK = 'middle_click'
27
+ LEFT_CLICK_DRAG = 'left_click_drag'
28
+ RIGHT_CLICK_DRAG = 'right_click_drag'
29
+ MIDDLE_CLICK_DRAG = 'middle_click_drag'
30
+ DOUBLE_CLICK = 'double_click'
31
+ SCREENSHOT = 'screenshot'
32
+ end
33
+
34
+ module Button
35
+ LEFT = 1
36
+ MIDDLE = 2
37
+ RIGHT = 3
38
+ end
39
+
40
+ # @param name [String] optional
41
+ # @param display_width_px [Integer]
42
+ # @param display_height_px [Integer]
43
+ # @param display_number [Integer] optional
44
+ def initialize(display_width_px:, display_height_px:, display_number: 1, name: 'computer')
45
+ @name = name
46
+ @display_width_px = display_width_px
47
+ @display_height_px = display_height_px
48
+ @display_number = display_number
49
+ end
50
+
51
+ # @example
52
+ # tool.serialize # =>
53
+ # # {
54
+ # # "type": "computer_20241022",
55
+ # # "name": "computer",
56
+ # # "display_width_px": 1024,
57
+ # # "display_height_px": 768,
58
+ # # "display_number": 1,
59
+ # # }
60
+ #
61
+ # @return [Hash]
62
+ def serialize(*)
63
+ {
64
+ type: TYPE,
65
+ name: @name,
66
+ display_width_px: @display_width_px,
67
+ display_height_px: @display_height_px,
68
+ display_number: @display_number,
69
+ }
70
+ end
71
+
72
+ # @example
73
+ # computer.call({ "action" => 'type', "text" => 'Hello' })
74
+ #
75
+ # @param args [Hash]
76
+ # @return [String]
77
+ def call(args = {})
78
+ perform(
79
+ action: args['action'],
80
+ text: args['text'],
81
+ coordinate: args['coordinate']
82
+ )
83
+ end
84
+
85
+ # @param action [String]
86
+ # @param coordinate [Array] [x, y] optional
87
+ # @param text [String] optional
88
+ #
89
+ # @return [Array<Hash>]
90
+ def perform(action:, text: nil, coordinate: nil) # rubocop:disable Metrics/CyclomaticComplexity
91
+ case action
92
+ when Action::KEY then key(text: text)
93
+ when Action::TYPE then type(text: text)
94
+ when Action::CURSOR_POSITION then mouse_location
95
+ when Action::LEFT_CLICK then click(button: Button::LEFT)
96
+ when Action::MIDDLE_CLICK then click(button: Button::MIDDLE)
97
+ when Action::RIGHT_CLICK then click(button: Button::RIGHT)
98
+ when Action::LEFT_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::LEFT)
99
+ when Action::MIDDLE_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::MIDDLE)
100
+ when Action::RIGHT_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::RIGHT)
101
+ when Action::MOUSE_MOVE then mouse_move(coordinate:)
102
+ when Action::DOUBLE_CLICK then double_click(button: Button::LEFT)
103
+ when Action::SCREENSHOT then screenshot
104
+ end
105
+ end
106
+
107
+ # @param cmd [String]
108
+ #
109
+ # @return [String]
110
+ def shell(cmd, ...)
111
+ stdout, stderr, status = Open3.capture3(cmd, ...)
112
+
113
+ "stdout=#{stdout.inspect} stderr=#{stderr.inspect} status=#{status}"
114
+ end
115
+
116
+ # @param cmd [String]
117
+ #
118
+ # @return [String]
119
+ def xdotool(...)
120
+ shell('xdotool', ...)
121
+ end
122
+
123
+ # @param button [Integer]
124
+ #
125
+ # @return [String]
126
+ def click(button:)
127
+ xdotool('click', button)
128
+ end
129
+
130
+ # @param button [Integer]
131
+ #
132
+ # @return [String]
133
+ def double_click(button:)
134
+ xdotool('click', button, '--repeat', 2)
135
+ end
136
+
137
+ # @param coordinate [Array] [x, y]
138
+ #
139
+ # @return [String]
140
+ def mouse_move(coordinate:)
141
+ x, y = coordinate
142
+ xdotool('mousemove', '--sync', x, y)
143
+ end
144
+
145
+ # @param coordinate [Array] [x, y]
146
+ # @param button [Integer]
147
+ #
148
+ # @return [String]
149
+ def mouse_down_move_up(coordinate:, button:)
150
+ x, y = coordinate
151
+ xdotool('mousedown', button, 'mousemove', '--sync', x, y, 'mouseup', button)
152
+ end
153
+
154
+ # @return [String]
155
+ def mouse_location
156
+ xdotool('getmouselocation')
157
+ end
158
+
159
+ # @param text [String]
160
+ # @param delay [Integer] milliseconds
161
+ #
162
+ # @return [String]
163
+ def type(text:, delay: TYPING_DELAY)
164
+ xdotool('type', '--delay', delay, '--', text)
165
+ end
166
+
167
+ # @param text [String]
168
+ #
169
+ # @return [String]
170
+ def key(text:)
171
+ xdotool('key', '--', text)
172
+ end
173
+
174
+ # @return [Hash]
175
+ def screenshot
176
+ tempfile = Tempfile.new(['screenshot', '.png'])
177
+ Kernel.system('gnome-screenshot', '-w', '-f', tempfile.path)
178
+ tempfile.rewind
179
+ data = Base64.encode64(tempfile.read)
180
+
181
+ { type: 'base64', media_type: 'image/png', data: data }
182
+ ensure
183
+ tempfile.close
184
+ tempfile.unlink
185
+ end
186
+ end
187
+ end
188
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module OmniAI
4
4
  module Anthropic
5
- VERSION = '1.9.2'
5
+ VERSION = '1.9.3'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omniai-anthropic
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.2
4
+ version: 1.9.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Sylvestre
@@ -75,6 +75,7 @@ files:
75
75
  - lib/omniai/anthropic/chat/tool_call_serializer.rb
76
76
  - lib/omniai/anthropic/chat/tool_serializer.rb
77
77
  - lib/omniai/anthropic/client.rb
78
+ - lib/omniai/anthropic/computer.rb
78
79
  - lib/omniai/anthropic/config.rb
79
80
  - lib/omniai/anthropic/version.rb
80
81
  homepage: https://github.com/ksylvest/omniai-anthropic