omniai-anthropic 1.9.2 → 1.9.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 55c1ba530498bc6cfe194b42c12402ad79d3d9f9c13fa3b7922fecbfa1a8d1c8
4
- data.tar.gz: edc2e433bbf5107177ddd6d90df9f3d856e209316a10ecfa4deeaec85c72378c
3
+ metadata.gz: 3b3723aa11fbf3b8def1f87dee64a9bc9a171e7d2e8e3292354c5476c24d4233
4
+ data.tar.gz: 667d9b3ce3e9fb6cd4061372358b555def837325f3107404bf0a83fbd10a9af4
5
5
  SHA512:
6
- metadata.gz: d9e23e858ef9276bfb2ab2d63836170d6e9c21972a4d8fca7be09620d9585b325615168e06d771cf6609320ea6c9471a6145d674f1c997fe074bb02d9cdfcc7c
7
- data.tar.gz: 4da1388834893087d8405e3aeede1130c096b84acba445b6e08f10209d6df51157f63e32bb571e6a611b9db591e9ea5d676136d1fb0f287ed6362802a9e49fc6
6
+ metadata.gz: e448d21a9d4d615b2679015bd8ada729b23a407a1707d956baff61cb1fcc629e26cfaf7752b18f6dc52abed4b1a22cea884d95b2c423391be8e5ca5329b4ea5d
7
+ data.tar.gz: c68070843dc6c36ffb8639873eee20d61382c44b035f41ca147ac458fecb58709a52f06d3bad476fb91d835315eeed47c8f7dfb0af10566ed5e65566f033690a
data/README.md CHANGED
@@ -104,3 +104,19 @@ JSON.parse(completion.text) # { "name": "Ringo" }
104
104
  ```
105
105
 
106
106
  [Anthropic API Reference `control-output-format`](https://docs.anthropic.com/en/docs/control-output-format)
107
+
108
+ ### Computers
109
+
110
+ ```bash
111
+ sudo apt-get install convert # screenshots
112
+ sudo apt-get install scrot # screenshots
113
+ sudo apt-get install xdotool # mouse / keyboard
114
+ ```
115
+
116
+ ```ruby
117
+ computer = OmniAI::Anthropic::Computer.new
118
+
119
+ completion = client.chat(tools: [computer]) do |prompt|
120
+ prompt.user('Please signup for reddit')
121
+ end
122
+ ```
@@ -0,0 +1,188 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'open3'
4
+
5
+ module OmniAI
6
+ module Anthropic
7
+ # A reference implementation of an OmniAI computer tool using xdotool for mouse / keyboard:
8
+ # https://docs.anthropic.com/en/docs/build-with-claude/computer-use#computer-tool
9
+ #
10
+ # Usage:
11
+ #
12
+ # computer = OmniAI::Anthropic::Computer.new()
13
+ class Computer
14
+ TYPE = 'computer_20241022'
15
+
16
+ SCREENSHOT_DELAY = 2.0 # seconds
17
+ TYPING_DELAY = 20 # milliseconds
18
+
19
+ module Action
20
+ KEY = 'key'
21
+ TYPE = 'type'
22
+ CURSOR_POSITION = 'cursor_position'
23
+ MOUSE_MOVE = 'mouse_move'
24
+ LEFT_CLICK = 'left_click'
25
+ RIGHT_CLICK = 'right_click'
26
+ MIDDLE_CLICK = 'middle_click'
27
+ LEFT_CLICK_DRAG = 'left_click_drag'
28
+ RIGHT_CLICK_DRAG = 'right_click_drag'
29
+ MIDDLE_CLICK_DRAG = 'middle_click_drag'
30
+ DOUBLE_CLICK = 'double_click'
31
+ SCREENSHOT = 'screenshot'
32
+ end
33
+
34
+ module Button
35
+ LEFT = 1
36
+ MIDDLE = 2
37
+ RIGHT = 3
38
+ end
39
+
40
+ # @param name [String] optional
41
+ # @param display_width_px [Integer]
42
+ # @param display_height_px [Integer]
43
+ # @param display_number [Integer] optional
44
+ def initialize(display_width_px:, display_height_px:, display_number: 1, name: 'computer')
45
+ @name = name
46
+ @display_width_px = display_width_px
47
+ @display_height_px = display_height_px
48
+ @display_number = display_number
49
+ end
50
+
51
+ # @example
52
+ # tool.serialize # =>
53
+ # # {
54
+ # # "type": "computer_20241022",
55
+ # # "name": "computer",
56
+ # # "display_width_px": 1024,
57
+ # # "display_height_px": 768,
58
+ # # "display_number": 1,
59
+ # # }
60
+ #
61
+ # @return [Hash]
62
+ def serialize(*)
63
+ {
64
+ type: TYPE,
65
+ name: @name,
66
+ display_width_px: @display_width_px,
67
+ display_height_px: @display_height_px,
68
+ display_number: @display_number,
69
+ }
70
+ end
71
+
72
+ # @example
73
+ # computer.call({ "action" => 'type', "text" => 'Hello' })
74
+ #
75
+ # @param args [Hash]
76
+ # @return [String]
77
+ def call(args = {})
78
+ perform(
79
+ action: args['action'],
80
+ text: args['text'],
81
+ coordinate: args['coordinate']
82
+ )
83
+ end
84
+
85
+ # @param action [String]
86
+ # @param coordinate [Array] [x, y] optional
87
+ # @param text [String] optional
88
+ #
89
+ # @return [Array<Hash>]
90
+ def perform(action:, text: nil, coordinate: nil) # rubocop:disable Metrics/CyclomaticComplexity
91
+ case action
92
+ when Action::KEY then key(text: text)
93
+ when Action::TYPE then type(text: text)
94
+ when Action::CURSOR_POSITION then mouse_location
95
+ when Action::LEFT_CLICK then click(button: Button::LEFT)
96
+ when Action::MIDDLE_CLICK then click(button: Button::MIDDLE)
97
+ when Action::RIGHT_CLICK then click(button: Button::RIGHT)
98
+ when Action::LEFT_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::LEFT)
99
+ when Action::MIDDLE_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::MIDDLE)
100
+ when Action::RIGHT_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::RIGHT)
101
+ when Action::MOUSE_MOVE then mouse_move(coordinate:)
102
+ when Action::DOUBLE_CLICK then double_click(button: Button::LEFT)
103
+ when Action::SCREENSHOT then screenshot
104
+ end
105
+ end
106
+
107
+ # @param cmd [String]
108
+ #
109
+ # @return [String]
110
+ def shell(cmd, ...)
111
+ stdout, stderr, status = Open3.capture3(cmd, ...)
112
+
113
+ "stdout=#{stdout.inspect} stderr=#{stderr.inspect} status=#{status}"
114
+ end
115
+
116
+ # @param cmd [String]
117
+ #
118
+ # @return [String]
119
+ def xdotool(...)
120
+ shell('xdotool', ...)
121
+ end
122
+
123
+ # @param button [Integer]
124
+ #
125
+ # @return [String]
126
+ def click(button:)
127
+ xdotool('click', button)
128
+ end
129
+
130
+ # @param button [Integer]
131
+ #
132
+ # @return [String]
133
+ def double_click(button:)
134
+ xdotool('click', button, '--repeat', 2)
135
+ end
136
+
137
+ # @param coordinate [Array] [x, y]
138
+ #
139
+ # @return [String]
140
+ def mouse_move(coordinate:)
141
+ x, y = coordinate
142
+ xdotool('mousemove', '--sync', x, y)
143
+ end
144
+
145
+ # @param coordinate [Array] [x, y]
146
+ # @param button [Integer]
147
+ #
148
+ # @return [String]
149
+ def mouse_down_move_up(coordinate:, button:)
150
+ x, y = coordinate
151
+ xdotool('mousedown', button, 'mousemove', '--sync', x, y, 'mouseup', button)
152
+ end
153
+
154
+ # @return [String]
155
+ def mouse_location
156
+ xdotool('getmouselocation')
157
+ end
158
+
159
+ # @param text [String]
160
+ # @param delay [Integer] milliseconds
161
+ #
162
+ # @return [String]
163
+ def type(text:, delay: TYPING_DELAY)
164
+ xdotool('type', '--delay', delay, '--', text)
165
+ end
166
+
167
+ # @param text [String]
168
+ #
169
+ # @return [String]
170
+ def key(text:)
171
+ xdotool('key', '--', text)
172
+ end
173
+
174
+ # @return [Hash]
175
+ def screenshot
176
+ tempfile = Tempfile.new(['screenshot', '.png'])
177
+ Kernel.system('gnome-screenshot', '-w', '-f', tempfile.path)
178
+ tempfile.rewind
179
+ data = Base64.encode64(tempfile.read)
180
+
181
+ { type: 'base64', media_type: 'image/png', data: data }
182
+ ensure
183
+ tempfile.close
184
+ tempfile.unlink
185
+ end
186
+ end
187
+ end
188
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module OmniAI
4
4
  module Anthropic
5
- VERSION = '1.9.2'
5
+ VERSION = '1.9.3'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omniai-anthropic
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.2
4
+ version: 1.9.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Sylvestre
@@ -75,6 +75,7 @@ files:
75
75
  - lib/omniai/anthropic/chat/tool_call_serializer.rb
76
76
  - lib/omniai/anthropic/chat/tool_serializer.rb
77
77
  - lib/omniai/anthropic/client.rb
78
+ - lib/omniai/anthropic/computer.rb
78
79
  - lib/omniai/anthropic/config.rb
79
80
  - lib/omniai/anthropic/version.rb
80
81
  homepage: https://github.com/ksylvest/omniai-anthropic