omniai-anthropic 1.9.2 → 1.9.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 55c1ba530498bc6cfe194b42c12402ad79d3d9f9c13fa3b7922fecbfa1a8d1c8
4
- data.tar.gz: edc2e433bbf5107177ddd6d90df9f3d856e209316a10ecfa4deeaec85c72378c
3
+ metadata.gz: fe02992338864e110012ee2d463d0922deb06d0e536b246546eae3b3cfd9d5aa
4
+ data.tar.gz: 3ad82e6af98e100ac3186120a5622c87184f759dc669bea4f26a99489b116411
5
5
  SHA512:
6
- metadata.gz: d9e23e858ef9276bfb2ab2d63836170d6e9c21972a4d8fca7be09620d9585b325615168e06d771cf6609320ea6c9471a6145d674f1c997fe074bb02d9cdfcc7c
7
- data.tar.gz: 4da1388834893087d8405e3aeede1130c096b84acba445b6e08f10209d6df51157f63e32bb571e6a611b9db591e9ea5d676136d1fb0f287ed6362802a9e49fc6
6
+ metadata.gz: 180cea8404f6740edbd1c219e21089a40604e5cfdef051e0dfe1f6ba0b624f55bab73d69c8629e22a17e16b6e0008f32b6498694cedb525defd0bc70c6810ccf
7
+ data.tar.gz: c955b172f69deb6afd9d03d215e1ac1aa2d60407d8d8b214bbe381085c55af5659f64a82dab20aa95be4a3254c49abdabf87e2a90ca004d3a9598c32dec37ecc
data/README.md CHANGED
@@ -104,3 +104,19 @@ JSON.parse(completion.text) # { "name": "Ringo" }
104
104
  ```
105
105
 
106
106
  [Anthropic API Reference `control-output-format`](https://docs.anthropic.com/en/docs/control-output-format)
107
+
108
+ ### Computers
109
+
110
+ ```bash
111
+ sudo apt-get install convert # screenshots
112
+ sudo apt-get install scrot # screenshots
113
+ sudo apt-get install xdotool # mouse / keyboard
114
+ ```
115
+
116
+ ```ruby
117
+ computer = OmniAI::Anthropic::Computer.new
118
+
119
+ completion = client.chat(tools: [computer]) do |prompt|
120
+ prompt.user('Please signup for reddit')
121
+ end
122
+ ```
@@ -25,12 +25,14 @@ module OmniAI
25
25
  # @param api_key [String] optional - defaults to `OmniAI::Anthropic.config.api_key`
26
26
  # @param host [String] optional - defaults to `OmniAI::Anthropic.config.host`
27
27
  # @param version [String] optional - defaults to `OmniAI::Anthropic.config.version`
28
+ # @param beta [String] optional - defaults to `OmniAI::Anthropic.config.beta`
28
29
  # @param logger [Logger] optional - defaults to `OmniAI::Anthropic.config.logger`
29
30
  # @param timeout [Integer] optional - defaults to `OmniAI::Anthropic.config.timeout`
30
31
  def initialize(
31
32
  api_key: OmniAI::Anthropic.config.api_key,
32
33
  host: OmniAI::Anthropic.config.host,
33
34
  version: OmniAI::Anthropic.config.version,
35
+ beta: OmniAI::Anthropic.config.beta,
34
36
  logger: OmniAI::Anthropic.config.logger,
35
37
  timeout: OmniAI::Anthropic.config.timeout
36
38
  )
@@ -40,6 +42,7 @@ module OmniAI
40
42
 
41
43
  @host = host
42
44
  @version = version
45
+ @beta = beta
43
46
  end
44
47
 
45
48
  # @return [HTTP::Client]
@@ -47,6 +50,7 @@ module OmniAI
47
50
  @connection ||= super.headers({
48
51
  'x-api-key': @api_key,
49
52
  'anthropic-version': @version,
53
+ 'anthropic-beta': @beta,
50
54
  }.compact)
51
55
  end
52
56
 
@@ -0,0 +1,188 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'open3'
4
+
5
+ module OmniAI
6
+ module Anthropic
7
+ # A reference implementation of an OmniAI computer tool using xdotool for mouse / keyboard:
8
+ # https://docs.anthropic.com/en/docs/build-with-claude/computer-use#computer-tool
9
+ #
10
+ # Usage:
11
+ #
12
+ # computer = OmniAI::Anthropic::Computer.new()
13
+ class Computer
14
+ TYPE = 'computer_20241022'
15
+
16
+ SCREENSHOT_DELAY = 2.0 # seconds
17
+ TYPING_DELAY = 20 # milliseconds
18
+
19
+ module Action
20
+ KEY = 'key'
21
+ TYPE = 'type'
22
+ CURSOR_POSITION = 'cursor_position'
23
+ MOUSE_MOVE = 'mouse_move'
24
+ LEFT_CLICK = 'left_click'
25
+ RIGHT_CLICK = 'right_click'
26
+ MIDDLE_CLICK = 'middle_click'
27
+ LEFT_CLICK_DRAG = 'left_click_drag'
28
+ RIGHT_CLICK_DRAG = 'right_click_drag'
29
+ MIDDLE_CLICK_DRAG = 'middle_click_drag'
30
+ DOUBLE_CLICK = 'double_click'
31
+ SCREENSHOT = 'screenshot'
32
+ end
33
+
34
+ module Button
35
+ LEFT = 1
36
+ MIDDLE = 2
37
+ RIGHT = 3
38
+ end
39
+
40
+ # @param name [String] optional
41
+ # @param display_width_px [Integer]
42
+ # @param display_height_px [Integer]
43
+ # @param display_number [Integer] optional
44
+ def initialize(display_width_px:, display_height_px:, display_number: 1, name: 'computer')
45
+ @name = name
46
+ @display_width_px = display_width_px
47
+ @display_height_px = display_height_px
48
+ @display_number = display_number
49
+ end
50
+
51
+ # @example
52
+ # tool.serialize # =>
53
+ # # {
54
+ # # "type": "computer_20241022",
55
+ # # "name": "computer",
56
+ # # "display_width_px": 1024,
57
+ # # "display_height_px": 768,
58
+ # # "display_number": 1,
59
+ # # }
60
+ #
61
+ # @return [Hash]
62
+ def serialize(*)
63
+ {
64
+ type: TYPE,
65
+ name: @name,
66
+ display_width_px: @display_width_px,
67
+ display_height_px: @display_height_px,
68
+ display_number: @display_number,
69
+ }
70
+ end
71
+
72
+ # @example
73
+ # computer.call({ "action" => 'type', "text" => 'Hello' })
74
+ #
75
+ # @param args [Hash]
76
+ # @return [String]
77
+ def call(args = {})
78
+ perform(
79
+ action: args['action'],
80
+ text: args['text'],
81
+ coordinate: args['coordinate']
82
+ )
83
+ end
84
+
85
+ # @param action [String]
86
+ # @param coordinate [Array] [x, y] optional
87
+ # @param text [String] optional
88
+ #
89
+ # @return [Array<Hash>]
90
+ def perform(action:, text: nil, coordinate: nil) # rubocop:disable Metrics/CyclomaticComplexity
91
+ case action
92
+ when Action::KEY then key(text: text)
93
+ when Action::TYPE then type(text: text)
94
+ when Action::CURSOR_POSITION then mouse_location
95
+ when Action::LEFT_CLICK then click(button: Button::LEFT)
96
+ when Action::MIDDLE_CLICK then click(button: Button::MIDDLE)
97
+ when Action::RIGHT_CLICK then click(button: Button::RIGHT)
98
+ when Action::LEFT_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::LEFT)
99
+ when Action::MIDDLE_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::MIDDLE)
100
+ when Action::RIGHT_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::RIGHT)
101
+ when Action::MOUSE_MOVE then mouse_move(coordinate:)
102
+ when Action::DOUBLE_CLICK then double_click(button: Button::LEFT)
103
+ when Action::SCREENSHOT then screenshot
104
+ end
105
+ end
106
+
107
+ # @param cmd [String]
108
+ #
109
+ # @return [String]
110
+ def shell(cmd, ...)
111
+ stdout, stderr, status = Open3.capture3(cmd, ...)
112
+
113
+ "stdout=#{stdout.inspect} stderr=#{stderr.inspect} status=#{status}"
114
+ end
115
+
116
+ # @param cmd [String]
117
+ #
118
+ # @return [String]
119
+ def xdotool(...)
120
+ shell('xdotool', ...)
121
+ end
122
+
123
+ # @param button [Integer]
124
+ #
125
+ # @return [String]
126
+ def click(button:)
127
+ xdotool('click', button)
128
+ end
129
+
130
+ # @param button [Integer]
131
+ #
132
+ # @return [String]
133
+ def double_click(button:)
134
+ xdotool('click', button, '--repeat', 2)
135
+ end
136
+
137
+ # @param coordinate [Array] [x, y]
138
+ #
139
+ # @return [String]
140
+ def mouse_move(coordinate:)
141
+ x, y = coordinate
142
+ xdotool('mousemove', '--sync', x, y)
143
+ end
144
+
145
+ # @param coordinate [Array] [x, y]
146
+ # @param button [Integer]
147
+ #
148
+ # @return [String]
149
+ def mouse_down_move_up(coordinate:, button:)
150
+ x, y = coordinate
151
+ xdotool('mousedown', button, 'mousemove', '--sync', x, y, 'mouseup', button)
152
+ end
153
+
154
+ # @return [String]
155
+ def mouse_location
156
+ xdotool('getmouselocation')
157
+ end
158
+
159
+ # @param text [String]
160
+ # @param delay [Integer] milliseconds
161
+ #
162
+ # @return [String]
163
+ def type(text:, delay: TYPING_DELAY)
164
+ xdotool('type', '--delay', delay, '--', text)
165
+ end
166
+
167
+ # @param text [String]
168
+ #
169
+ # @return [String]
170
+ def key(text:)
171
+ xdotool('key', '--', text)
172
+ end
173
+
174
+ # @return [Hash]
175
+ def screenshot
176
+ tempfile = Tempfile.new(['screenshot', '.png'])
177
+ Kernel.system('gnome-screenshot', '-w', '-f', tempfile.path)
178
+ tempfile.rewind
179
+ data = Base64.encode64(tempfile.read)
180
+
181
+ { type: 'base64', media_type: 'image/png', data: data }
182
+ ensure
183
+ tempfile.close
184
+ tempfile.unlink
185
+ end
186
+ end
187
+ end
188
+ end
@@ -11,20 +11,27 @@ module OmniAI
11
11
  # @return [String, nil] passed as `anthropic-version` if specified
12
12
  attr_accessor :version
13
13
 
14
+ # @!attribute [rw] beta
15
+ # @return [String, nil] passed as `anthropic-beta` if specified
16
+ attr_accessor :beta
17
+
14
18
  # @param api_key [String, nil] optional - defaults to `ENV['ANTHROPIC_API_KEY']`
15
19
  # @param host [String, nil] optional - defaults to `ENV['ANTHROPIC_HOST'] w/ fallback to `DEFAULT_HOST`
16
20
  # @param version [String, nil] optional - defaults to `ENV['ANTHROPIC_VERSION'] w/ fallback to `DEFAULT_VERSION`
21
+ # @param beta [String, nil] optional - defaults to `ENV['ANTHROPIC_BETA']`
17
22
  # @param logger [Logger, nil] optional - defaults to
18
23
  # @param timeout [Integer, Hash, nil] optional
19
24
  def initialize(
20
25
  api_key: ENV.fetch('ANTHROPIC_API_KEY', nil),
21
26
  host: ENV.fetch('ANTHROPIC_HOST', DEFAULT_HOST),
22
27
  version: ENV.fetch('ANTHROPIC_VERSION', DEFAULT_VERSION),
28
+ beta: ENV.fetch('ANTHROPIC_BETA', nil),
23
29
  logger: nil,
24
30
  timeout: nil
25
31
  )
26
32
  super(api_key:, host:, logger:, timeout:)
27
33
  @version = version
34
+ @beta = beta
28
35
  @chat_options[:max_tokens] = 4096
29
36
  end
30
37
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module OmniAI
4
4
  module Anthropic
5
- VERSION = '1.9.2'
5
+ VERSION = '1.9.4'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omniai-anthropic
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.2
4
+ version: 1.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Sylvestre
@@ -75,6 +75,7 @@ files:
75
75
  - lib/omniai/anthropic/chat/tool_call_serializer.rb
76
76
  - lib/omniai/anthropic/chat/tool_serializer.rb
77
77
  - lib/omniai/anthropic/client.rb
78
+ - lib/omniai/anthropic/computer.rb
78
79
  - lib/omniai/anthropic/config.rb
79
80
  - lib/omniai/anthropic/version.rb
80
81
  homepage: https://github.com/ksylvest/omniai-anthropic