omniai-anthropic 1.9.2 → 1.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 55c1ba530498bc6cfe194b42c12402ad79d3d9f9c13fa3b7922fecbfa1a8d1c8
4
- data.tar.gz: edc2e433bbf5107177ddd6d90df9f3d856e209316a10ecfa4deeaec85c72378c
3
+ metadata.gz: fe02992338864e110012ee2d463d0922deb06d0e536b246546eae3b3cfd9d5aa
4
+ data.tar.gz: 3ad82e6af98e100ac3186120a5622c87184f759dc669bea4f26a99489b116411
5
5
  SHA512:
6
- metadata.gz: d9e23e858ef9276bfb2ab2d63836170d6e9c21972a4d8fca7be09620d9585b325615168e06d771cf6609320ea6c9471a6145d674f1c997fe074bb02d9cdfcc7c
7
- data.tar.gz: 4da1388834893087d8405e3aeede1130c096b84acba445b6e08f10209d6df51157f63e32bb571e6a611b9db591e9ea5d676136d1fb0f287ed6362802a9e49fc6
6
+ metadata.gz: 180cea8404f6740edbd1c219e21089a40604e5cfdef051e0dfe1f6ba0b624f55bab73d69c8629e22a17e16b6e0008f32b6498694cedb525defd0bc70c6810ccf
7
+ data.tar.gz: c955b172f69deb6afd9d03d215e1ac1aa2d60407d8d8b214bbe381085c55af5659f64a82dab20aa95be4a3254c49abdabf87e2a90ca004d3a9598c32dec37ecc
data/README.md CHANGED
@@ -104,3 +104,19 @@ JSON.parse(completion.text) # { "name": "Ringo" }
104
104
  ```
105
105
 
106
106
  [Anthropic API Reference `control-output-format`](https://docs.anthropic.com/en/docs/control-output-format)
107
+
108
+ ### Computers
109
+
110
+ ```bash
111
+ sudo apt-get install convert # screenshots
112
+ sudo apt-get install scrot # screenshots
113
+ sudo apt-get install xdotool # mouse / keyboard
114
+ ```
115
+
116
+ ```ruby
117
+ computer = OmniAI::Anthropic::Computer.new
118
+
119
+ completion = client.chat(tools: [computer]) do |prompt|
120
+ prompt.user('Please signup for reddit')
121
+ end
122
+ ```
@@ -25,12 +25,14 @@ module OmniAI
25
25
  # @param api_key [String] optional - defaults to `OmniAI::Anthropic.config.api_key`
26
26
  # @param host [String] optional - defaults to `OmniAI::Anthropic.config.host`
27
27
  # @param version [String] optional - defaults to `OmniAI::Anthropic.config.version`
28
+ # @param beta [String] optional - defaults to `OmniAI::Anthropic.config.beta`
28
29
  # @param logger [Logger] optional - defaults to `OmniAI::Anthropic.config.logger`
29
30
  # @param timeout [Integer] optional - defaults to `OmniAI::Anthropic.config.timeout`
30
31
  def initialize(
31
32
  api_key: OmniAI::Anthropic.config.api_key,
32
33
  host: OmniAI::Anthropic.config.host,
33
34
  version: OmniAI::Anthropic.config.version,
35
+ beta: OmniAI::Anthropic.config.beta,
34
36
  logger: OmniAI::Anthropic.config.logger,
35
37
  timeout: OmniAI::Anthropic.config.timeout
36
38
  )
@@ -40,6 +42,7 @@ module OmniAI
40
42
 
41
43
  @host = host
42
44
  @version = version
45
+ @beta = beta
43
46
  end
44
47
 
45
48
  # @return [HTTP::Client]
@@ -47,6 +50,7 @@ module OmniAI
47
50
  @connection ||= super.headers({
48
51
  'x-api-key': @api_key,
49
52
  'anthropic-version': @version,
53
+ 'anthropic-beta': @beta,
50
54
  }.compact)
51
55
  end
52
56
 
@@ -0,0 +1,188 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'open3'
4
+
5
+ module OmniAI
6
+ module Anthropic
7
+ # A reference implementation of an OmniAI computer tool using xdotool for mouse / keyboard:
8
+ # https://docs.anthropic.com/en/docs/build-with-claude/computer-use#computer-tool
9
+ #
10
+ # Usage:
11
+ #
12
+ # computer = OmniAI::Anthropic::Computer.new()
13
+ class Computer
14
+ TYPE = 'computer_20241022'
15
+
16
+ SCREENSHOT_DELAY = 2.0 # seconds
17
+ TYPING_DELAY = 20 # milliseconds
18
+
19
+ module Action
20
+ KEY = 'key'
21
+ TYPE = 'type'
22
+ CURSOR_POSITION = 'cursor_position'
23
+ MOUSE_MOVE = 'mouse_move'
24
+ LEFT_CLICK = 'left_click'
25
+ RIGHT_CLICK = 'right_click'
26
+ MIDDLE_CLICK = 'middle_click'
27
+ LEFT_CLICK_DRAG = 'left_click_drag'
28
+ RIGHT_CLICK_DRAG = 'right_click_drag'
29
+ MIDDLE_CLICK_DRAG = 'middle_click_drag'
30
+ DOUBLE_CLICK = 'double_click'
31
+ SCREENSHOT = 'screenshot'
32
+ end
33
+
34
+ module Button
35
+ LEFT = 1
36
+ MIDDLE = 2
37
+ RIGHT = 3
38
+ end
39
+
40
+ # @param name [String] optional
41
+ # @param display_width_px [Integer]
42
+ # @param display_height_px [Integer]
43
+ # @param display_number [Integer] optional
44
+ def initialize(display_width_px:, display_height_px:, display_number: 1, name: 'computer')
45
+ @name = name
46
+ @display_width_px = display_width_px
47
+ @display_height_px = display_height_px
48
+ @display_number = display_number
49
+ end
50
+
51
+ # @example
52
+ # tool.serialize # =>
53
+ # # {
54
+ # # "type": "computer_20241022",
55
+ # # "name": "computer",
56
+ # # "display_width_px": 1024,
57
+ # # "display_height_px": 768,
58
+ # # "display_number": 1,
59
+ # # }
60
+ #
61
+ # @return [Hash]
62
+ def serialize(*)
63
+ {
64
+ type: TYPE,
65
+ name: @name,
66
+ display_width_px: @display_width_px,
67
+ display_height_px: @display_height_px,
68
+ display_number: @display_number,
69
+ }
70
+ end
71
+
72
+ # @example
73
+ # computer.call({ "action" => 'type', "text" => 'Hello' })
74
+ #
75
+ # @param args [Hash]
76
+ # @return [String]
77
+ def call(args = {})
78
+ perform(
79
+ action: args['action'],
80
+ text: args['text'],
81
+ coordinate: args['coordinate']
82
+ )
83
+ end
84
+
85
+ # @param action [String]
86
+ # @param coordinate [Array] [x, y] optional
87
+ # @param text [String] optional
88
+ #
89
+ # @return [Array<Hash>]
90
+ def perform(action:, text: nil, coordinate: nil) # rubocop:disable Metrics/CyclomaticComplexity
91
+ case action
92
+ when Action::KEY then key(text: text)
93
+ when Action::TYPE then type(text: text)
94
+ when Action::CURSOR_POSITION then mouse_location
95
+ when Action::LEFT_CLICK then click(button: Button::LEFT)
96
+ when Action::MIDDLE_CLICK then click(button: Button::MIDDLE)
97
+ when Action::RIGHT_CLICK then click(button: Button::RIGHT)
98
+ when Action::LEFT_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::LEFT)
99
+ when Action::MIDDLE_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::MIDDLE)
100
+ when Action::RIGHT_CLICK_DRAG then mouse_down_move_up(coordinate:, button: Button::RIGHT)
101
+ when Action::MOUSE_MOVE then mouse_move(coordinate:)
102
+ when Action::DOUBLE_CLICK then double_click(button: Button::LEFT)
103
+ when Action::SCREENSHOT then screenshot
104
+ end
105
+ end
106
+
107
+ # @param cmd [String]
108
+ #
109
+ # @return [String]
110
+ def shell(cmd, ...)
111
+ stdout, stderr, status = Open3.capture3(cmd, ...)
112
+
113
+ "stdout=#{stdout.inspect} stderr=#{stderr.inspect} status=#{status}"
114
+ end
115
+
116
+ # @param cmd [String]
117
+ #
118
+ # @return [String]
119
+ def xdotool(...)
120
+ shell('xdotool', ...)
121
+ end
122
+
123
+ # @param button [Integer]
124
+ #
125
+ # @return [String]
126
+ def click(button:)
127
+ xdotool('click', button)
128
+ end
129
+
130
+ # @param button [Integer]
131
+ #
132
+ # @return [String]
133
+ def double_click(button:)
134
+ xdotool('click', button, '--repeat', 2)
135
+ end
136
+
137
+ # @param coordinate [Array] [x, y]
138
+ #
139
+ # @return [String]
140
+ def mouse_move(coordinate:)
141
+ x, y = coordinate
142
+ xdotool('mousemove', '--sync', x, y)
143
+ end
144
+
145
+ # @param coordinate [Array] [x, y]
146
+ # @param button [Integer]
147
+ #
148
+ # @return [String]
149
+ def mouse_down_move_up(coordinate:, button:)
150
+ x, y = coordinate
151
+ xdotool('mousedown', button, 'mousemove', '--sync', x, y, 'mouseup', button)
152
+ end
153
+
154
+ # @return [String]
155
+ def mouse_location
156
+ xdotool('getmouselocation')
157
+ end
158
+
159
+ # @param text [String]
160
+ # @param delay [Integer] milliseconds
161
+ #
162
+ # @return [String]
163
+ def type(text:, delay: TYPING_DELAY)
164
+ xdotool('type', '--delay', delay, '--', text)
165
+ end
166
+
167
+ # @param text [String]
168
+ #
169
+ # @return [String]
170
+ def key(text:)
171
+ xdotool('key', '--', text)
172
+ end
173
+
174
+ # @return [Hash]
175
+ def screenshot
176
+ tempfile = Tempfile.new(['screenshot', '.png'])
177
+ Kernel.system('gnome-screenshot', '-w', '-f', tempfile.path)
178
+ tempfile.rewind
179
+ data = Base64.encode64(tempfile.read)
180
+
181
+ { type: 'base64', media_type: 'image/png', data: data }
182
+ ensure
183
+ tempfile.close
184
+ tempfile.unlink
185
+ end
186
+ end
187
+ end
188
+ end
@@ -11,20 +11,27 @@ module OmniAI
11
11
  # @return [String, nil] passed as `anthropic-version` if specified
12
12
  attr_accessor :version
13
13
 
14
+ # @!attribute [rw] beta
15
+ # @return [String, nil] passed as `anthropic-beta` if specified
16
+ attr_accessor :beta
17
+
14
18
  # @param api_key [String, nil] optional - defaults to `ENV['ANTHROPIC_API_KEY']`
15
19
  # @param host [String, nil] optional - defaults to `ENV['ANTHROPIC_HOST'] w/ fallback to `DEFAULT_HOST`
16
20
  # @param version [String, nil] optional - defaults to `ENV['ANTHROPIC_VERSION'] w/ fallback to `DEFAULT_VERSION`
21
+ # @param beta [String, nil] optional - defaults to `ENV['ANTHROPIC_BETA']`
17
22
  # @param logger [Logger, nil] optional - defaults to
18
23
  # @param timeout [Integer, Hash, nil] optional
19
24
  def initialize(
20
25
  api_key: ENV.fetch('ANTHROPIC_API_KEY', nil),
21
26
  host: ENV.fetch('ANTHROPIC_HOST', DEFAULT_HOST),
22
27
  version: ENV.fetch('ANTHROPIC_VERSION', DEFAULT_VERSION),
28
+ beta: ENV.fetch('ANTHROPIC_BETA', nil),
23
29
  logger: nil,
24
30
  timeout: nil
25
31
  )
26
32
  super(api_key:, host:, logger:, timeout:)
27
33
  @version = version
34
+ @beta = beta
28
35
  @chat_options[:max_tokens] = 4096
29
36
  end
30
37
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module OmniAI
4
4
  module Anthropic
5
- VERSION = '1.9.2'
5
+ VERSION = '1.9.4'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omniai-anthropic
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.9.2
4
+ version: 1.9.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Sylvestre
@@ -75,6 +75,7 @@ files:
75
75
  - lib/omniai/anthropic/chat/tool_call_serializer.rb
76
76
  - lib/omniai/anthropic/chat/tool_serializer.rb
77
77
  - lib/omniai/anthropic/client.rb
78
+ - lib/omniai/anthropic/computer.rb
78
79
  - lib/omniai/anthropic/config.rb
79
80
  - lib/omniai/anthropic/version.rb
80
81
  homepage: https://github.com/ksylvest/omniai-anthropic