charai 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,393 @@
1
+ module Charai
2
+ # Hub class for performing actions on the browser
3
+ # Mainly used by AI.
4
+ class InputTool
5
+ # callback
6
+ # - on_assertion_ok(description)
7
+ # - on_assertion_fail(description)
8
+ # - on_action_start(action, params)
9
+ def initialize(browsing_context, callback: nil)
10
+ @browsing_context = browsing_context
11
+ @callback = callback
12
+ end
13
+
14
+ def on_send_message(&block)
15
+ @message_sender = block
16
+ end
17
+
18
+ def assertion_ok(description)
19
+ trigger_callback(:on_assertion_ok, description)
20
+ end
21
+
22
+ def assertion_fail(description)
23
+ trigger_callback(:on_assertion_fail, description)
24
+
25
+ if defined?(RSpec::Expectations)
26
+ RSpec::Expectations.fail_with(description)
27
+ elsif defined?(MiniTest::Assertion)
28
+ raise MiniTest::Assertion, description
29
+ else
30
+ raise description
31
+ end
32
+ end
33
+
34
+ def capture_screenshot
35
+ trigger_callback(:on_action_start, 'capture_screenshot', {})
36
+
37
+ current_url = @browsing_context.url
38
+ @browsing_context.capture_screenshot(format: 'png').tap do |binary|
39
+ if @message_sender
40
+ message = Agent::Message.new(
41
+ text: "Capture of #{current_url}",
42
+ images: [
43
+ { png: Base64.strict_encode64(binary) },
44
+ ],
45
+ )
46
+ @message_sender.call(message)
47
+ end
48
+ end
49
+ end
50
+
51
+ def click(x:, y:, delay: 50)
52
+ trigger_callback(:on_action_start, 'click', { x: x, y: y, delay: delay })
53
+
54
+ @browsing_context.perform_mouse_actions do |q|
55
+ q.pointer_move(x: x.to_i, y: y.to_i)
56
+ q.pointer_down(button: 0)
57
+ q.pause(duration: delay)
58
+ q.pointer_up(button: 0)
59
+ end
60
+ end
61
+
62
+ def execute_script(script)
63
+ trigger_callback(:on_action_start, 'execute_script', { script: script })
64
+
65
+ begin
66
+ result = @browsing_context.default_realm.script_evaluate(script)
67
+ rescue BrowsingContext::Realm::ScriptEvaluationError => e
68
+ result = e.message
69
+ end
70
+
71
+ notify_to_sender(result) unless "#{result}" == ''
72
+
73
+ result
74
+ end
75
+
76
+ def on_pressing_key(key, &block)
77
+ trigger_callback(:on_action_start, 'key_down', { key: key })
78
+
79
+ value = convert_key(key)
80
+ @browsing_context.perform_keyboard_actions do |q|
81
+ q.key_down(value: value)
82
+ end
83
+
84
+ begin
85
+ block.call
86
+ ensure
87
+ trigger_callback(:on_action_start, 'key_up', { key: key })
88
+
89
+ @browsing_context.perform_keyboard_actions do |q|
90
+ q.key_up(value: value)
91
+ end
92
+ end
93
+ end
94
+
95
+ def press_key(key, delay: 50)
96
+ trigger_callback(:on_action_start, 'press_key', { key: key, delay: delay })
97
+
98
+ value = convert_key(key)
99
+ @browsing_context.perform_keyboard_actions do |q|
100
+ q.key_down(value: value)
101
+ q.pause(duration: delay)
102
+ q.key_up(value: value)
103
+ end
104
+ end
105
+
106
+ def sleep_seconds(seconds)
107
+ trigger_callback(:on_action_start, 'sleep_seconds', { seconds: seconds })
108
+
109
+ sleep seconds
110
+ end
111
+
112
+ def type_text(text, delay: 50)
113
+ trigger_callback(:on_action_start, 'type_text', { text: text, delay: delay })
114
+
115
+ text.each_char do |c|
116
+ @browsing_context.perform_keyboard_actions do |q|
117
+ q.key_down(value: c)
118
+ q.pause(duration: delay / 2)
119
+ q.key_up(value: c)
120
+ q.pause(duration: delay - delay / 2)
121
+ end
122
+ end
123
+ end
124
+
125
+ # velocity:
126
+ # 500 - weak
127
+ # 1000 - normal
128
+ # 2000 - strong
129
+ def scroll_down(x: 0, y: 0, velocity: 1000)
130
+ raise ArgumentError, 'velocity must be positive' if velocity <= 0
131
+ trigger_callback(:on_action_start, 'scroll_down', { x: x, y: y, velocity: velocity })
132
+
133
+ @browsing_context.perform_mouse_wheel_actions do |q|
134
+ deceleration = SplineDeceleration.new(velocity)
135
+ loop do
136
+ delta_y = deceleration.calc
137
+ break if delta_y.zero?
138
+ q.scroll(x: x, y: y, delta_y: delta_y, duration: 16)
139
+ end
140
+ end
141
+ end
142
+
143
+ # velocity:
144
+ # 500 - weak
145
+ # 1000 - normal
146
+ # 2000 - strong
147
+ def scroll_up(x: 0, y: 0, velocity: 1000)
148
+ raise ArgumentError, 'velocity must be positive' if velocity <= 0
149
+ trigger_callback(:on_action_start, 'scroll_up', { x: x, y: y, velocity: velocity })
150
+
151
+ @browsing_context.perform_mouse_wheel_actions do |q|
152
+ deceleration = SplineDeceleration.new(velocity)
153
+ loop do
154
+ delta_y = -deceleration.calc
155
+ break if delta_y.zero?
156
+ q.scroll(x: x, y: y, delta_y: delta_y, duration: 16)
157
+ end
158
+ end
159
+ end
160
+
161
+ private
162
+
163
+ def notify_to_sender(message)
164
+ if @message_sender
165
+ message = Agent::Message.new(text: "result is `#{message}`", images: [])
166
+ @message_sender.call(message)
167
+ end
168
+ end
169
+
170
+ def trigger_callback(method_name, ...)
171
+ if @callback.respond_to?(method_name)
172
+ @callback.public_send(method_name, ...)
173
+ elsif @callback.is_a?(Hash) && @callback[method_name].is_a?(Proc)
174
+ @callback[method_name].call(...)
175
+ end
176
+ end
177
+
178
+ # ref: https://github.com/puppeteer/puppeteer/blob/puppeteer-v23.5.3/packages/puppeteer-core/src/bidi/Input.ts#L52
179
+ # Converted using ChatGPT 4o
180
+ def convert_key(key)
181
+ return key if key.length == 1
182
+
183
+ case key
184
+ when 'Cancel'
185
+ "\uE001"
186
+ when 'Help'
187
+ "\uE002"
188
+ when 'Backspace'
189
+ "\uE003"
190
+ when 'Tab'
191
+ "\uE004"
192
+ when 'Clear'
193
+ "\uE005"
194
+ when 'Enter'
195
+ "\uE007"
196
+ when 'Shift', 'ShiftLeft'
197
+ "\uE008"
198
+ when 'Control', 'ControlLeft', 'Ctrl'
199
+ "\uE009"
200
+ when 'ControlOrMeta', 'CtrlOrMeta'
201
+ Charai::Util.macos? ? "\uE03D" : "\uE009"
202
+ when 'Alt', 'AltLeft'
203
+ "\uE00A"
204
+ when 'Pause'
205
+ "\uE00B"
206
+ when 'Escape'
207
+ "\uE00C"
208
+ when 'PageUp'
209
+ "\uE00E"
210
+ when 'PageDown'
211
+ "\uE00F"
212
+ when 'End'
213
+ "\uE010"
214
+ when 'Home'
215
+ "\uE011"
216
+ when 'ArrowLeft'
217
+ "\uE012"
218
+ when 'ArrowUp'
219
+ "\uE013"
220
+ when 'ArrowRight'
221
+ "\uE014"
222
+ when 'ArrowDown'
223
+ "\uE015"
224
+ when 'Insert'
225
+ "\uE016"
226
+ when 'Delete'
227
+ "\uE017"
228
+ when 'NumpadEqual'
229
+ "\uE019"
230
+ when 'Numpad0'
231
+ "\uE01A"
232
+ when 'Numpad1'
233
+ "\uE01B"
234
+ when 'Numpad2'
235
+ "\uE01C"
236
+ when 'Numpad3'
237
+ "\uE01D"
238
+ when 'Numpad4'
239
+ "\uE01E"
240
+ when 'Numpad5'
241
+ "\uE01F"
242
+ when 'Numpad6'
243
+ "\uE020"
244
+ when 'Numpad7'
245
+ "\uE021"
246
+ when 'Numpad8'
247
+ "\uE022"
248
+ when 'Numpad9'
249
+ "\uE023"
250
+ when 'NumpadMultiply'
251
+ "\uE024"
252
+ when 'NumpadAdd'
253
+ "\uE025"
254
+ when 'NumpadSubtract'
255
+ "\uE027"
256
+ when 'NumpadDecimal'
257
+ "\uE028"
258
+ when 'NumpadDivide'
259
+ "\uE029"
260
+ when 'F1'
261
+ "\uE031"
262
+ when 'F2'
263
+ "\uE032"
264
+ when 'F3'
265
+ "\uE033"
266
+ when 'F4'
267
+ "\uE034"
268
+ when 'F5'
269
+ "\uE035"
270
+ when 'F6'
271
+ "\uE036"
272
+ when 'F7'
273
+ "\uE037"
274
+ when 'F8'
275
+ "\uE038"
276
+ when 'F9'
277
+ "\uE039"
278
+ when 'F10'
279
+ "\uE03A"
280
+ when 'F11'
281
+ "\uE03B"
282
+ when 'F12'
283
+ "\uE03C"
284
+ when 'Meta', 'MetaLeft'
285
+ "\uE03D"
286
+ when 'ShiftRight'
287
+ "\uE050"
288
+ when 'ControlRight'
289
+ "\uE051"
290
+ when 'AltRight'
291
+ "\uE052"
292
+ when 'MetaRight'
293
+ "\uE053"
294
+ when 'Digit0'
295
+ '0'
296
+ when 'Digit1'
297
+ '1'
298
+ when 'Digit2'
299
+ '2'
300
+ when 'Digit3'
301
+ '3'
302
+ when 'Digit4'
303
+ '4'
304
+ when 'Digit5'
305
+ '5'
306
+ when 'Digit6'
307
+ '6'
308
+ when 'Digit7'
309
+ '7'
310
+ when 'Digit8'
311
+ '8'
312
+ when 'Digit9'
313
+ '9'
314
+ when 'KeyA'
315
+ 'a'
316
+ when 'KeyB'
317
+ 'b'
318
+ when 'KeyC'
319
+ 'c'
320
+ when 'KeyD'
321
+ 'd'
322
+ when 'KeyE'
323
+ 'e'
324
+ when 'KeyF'
325
+ 'f'
326
+ when 'KeyG'
327
+ 'g'
328
+ when 'KeyH'
329
+ 'h'
330
+ when 'KeyI'
331
+ 'i'
332
+ when 'KeyJ'
333
+ 'j'
334
+ when 'KeyK'
335
+ 'k'
336
+ when 'KeyL'
337
+ 'l'
338
+ when 'KeyM'
339
+ 'm'
340
+ when 'KeyN'
341
+ 'n'
342
+ when 'KeyO'
343
+ 'o'
344
+ when 'KeyP'
345
+ 'p'
346
+ when 'KeyQ'
347
+ 'q'
348
+ when 'KeyR'
349
+ 'r'
350
+ when 'KeyS'
351
+ 's'
352
+ when 'KeyT'
353
+ 't'
354
+ when 'KeyU'
355
+ 'u'
356
+ when 'KeyV'
357
+ 'v'
358
+ when 'KeyW'
359
+ 'w'
360
+ when 'KeyX'
361
+ 'x'
362
+ when 'KeyY'
363
+ 'y'
364
+ when 'KeyZ'
365
+ 'z'
366
+ when 'Semicolon'
367
+ ';'
368
+ when 'Equal'
369
+ '='
370
+ when 'Comma'
371
+ ','
372
+ when 'Minus'
373
+ '-'
374
+ when 'Period'
375
+ '.'
376
+ when 'Slash'
377
+ '/'
378
+ when 'Backquote'
379
+ '`'
380
+ when 'BracketLeft'
381
+ '['
382
+ when 'Backslash'
383
+ '\\'
384
+ when 'BracketRight'
385
+ ']'
386
+ when 'Quote'
387
+ '"'
388
+ else
389
+ raise ArgumentError, "Unknown key: \"#{key}\""
390
+ end
391
+ end
392
+ end
393
+ end
@@ -0,0 +1,163 @@
1
+ require 'base64'
2
+ require 'json'
3
+ require 'net/http'
4
+ require 'uri'
5
+
6
+ module Charai
7
+ class OpenaiChat
8
+ # callback
9
+ # - on_chat_start(introduction)
10
+ # - on_chat_question(content: Array|String)
11
+ # - on_chat_answer(answer_text)
12
+ # - on_chat_conversation(content, answer_text)
13
+ def initialize(configuration, introduction: nil, callback: nil)
14
+ @configuration = configuration
15
+ @introduction = introduction
16
+ @callback = callback
17
+ @mutex = Mutex.new
18
+ clear
19
+ end
20
+
21
+ def clear
22
+ trigger_callback(:on_chat_start, @introduction)
23
+
24
+ @messages = []
25
+ if @introduction
26
+ @messages << { role: 'system', content: @introduction }
27
+ end
28
+ end
29
+
30
+ # .push('How are you?')
31
+ # .push('How many people is here?', images: [ { jpg: 'xXxXxxxxxxxxx' }, { png: 'xXxXxxxxxxxxx' } ])
32
+ def push(question, images: [])
33
+ content = build_question(question, images)
34
+ message = {
35
+ role: 'user',
36
+ content: content,
37
+ }
38
+
39
+ @mutex.synchronize do
40
+ trigger_callback(:on_chat_question, content)
41
+ fetch_openai(message).tap do |answer|
42
+ trigger_callback(:on_chat_answer, answer)
43
+ trigger_callback(:on_chat_conversation, content, answer)
44
+
45
+ @messages << message
46
+ @messages << { role: 'assistant', content: answer }
47
+ end
48
+ end
49
+ end
50
+
51
+ def pop
52
+ @mutex.synchronize do
53
+ @messages.pop
54
+ @messages.pop[:content]
55
+ end
56
+ end
57
+
58
+ private
59
+
60
+ def build_question(question, images)
61
+ if images.empty?
62
+ question
63
+ else
64
+ [
65
+ {
66
+ type: 'text',
67
+ text: question,
68
+ },
69
+ *(images.map { |image| build_image_payload(image) }),
70
+ ]
71
+ end
72
+ end
73
+
74
+ def build_image_payload(image)
75
+ if image.is_a?(String) && image.start_with?('http')
76
+ return build_image_payload(fetch_image_url(image))
77
+ end
78
+
79
+ raise ArgumentError, "image must be a Hash, but got #{image.class}" unless image.is_a?(Hash)
80
+ raise ArgumentError, "image must have only one key, but got #{image.keys}" unless image.keys.size == 1
81
+ type = image.keys.first
82
+ raise ArgumentError, "image key must be one of [:jpg, :jpeg, :png], but got #{type}" unless %i[jpg jpeg png].include?(type)
83
+
84
+ b64 = image[type]
85
+
86
+ {
87
+ type: 'image_url',
88
+ image_url: {
89
+ url: "data:image/#{type};base64,#{b64}",
90
+ },
91
+ }
92
+ end
93
+
94
+ def fetch_image_url(url)
95
+ uri = URI.parse(url)
96
+
97
+ response = Net::HTTP.get_response(uri)
98
+
99
+ if response.is_a?(Net::HTTPSuccess)
100
+ image_data = response.body
101
+ mime_type = response['content-type']
102
+
103
+ base64_image = Base64.strict_encode64(image_data)
104
+
105
+ case mime_type
106
+ when 'image/png'
107
+ { png: base64_image }
108
+ when 'image/jpeg'
109
+ { jpeg: base64_image }
110
+ else
111
+ raise "Unsupported image type: #{mime_type}"
112
+ end
113
+ else
114
+ raise "Failed to fetch image: #{response.code} #{response.message}"
115
+ end
116
+ end
117
+
118
+ def fetch_openai(message)
119
+ uri = URI(@configuration.endpoint_url)
120
+
121
+ response = Net::HTTP.start(uri.hostname, uri.port, use_ssl: uri.scheme == 'https', read_timeout: 120) do |http|
122
+ http.post(
123
+ uri,
124
+ @configuration.decorate_body({
125
+ messages: with_message_history(message),
126
+ }).to_json,
127
+ @configuration.add_auth_header({
128
+ 'Content-Type' => 'application/json',
129
+ 'Accept' => 'application/json',
130
+ }),
131
+ )
132
+ end
133
+ if response.is_a?(Net::HTTPSuccess)
134
+ body = JSON.parse(response.body)
135
+ body.dig('choices', 0, 'message', 'content')
136
+ else
137
+ raise "Failed to fetch OpenAI: #{response.code} #{response.message}"
138
+ end
139
+ end
140
+
141
+ def with_message_history(new_message, omit_images_except_last: 3)
142
+ Enumerator.new do |out|
143
+ len = @messages.size
144
+ @messages.each_with_index do |message, i|
145
+ if i < len - omit_images_except_last && message[:content].is_a?(Array)
146
+ out << message.merge(content: message[:content].find { |c| c[:type] == 'text'}[:text])
147
+ else
148
+ out << message
149
+ end
150
+ end
151
+ out << new_message
152
+ end.to_a
153
+ end
154
+
155
+ def trigger_callback(method_name, ...)
156
+ if @callback.respond_to?(method_name)
157
+ @callback.public_send(method_name, ...)
158
+ elsif @callback.is_a?(Hash) && @callback[method_name].is_a?(Proc)
159
+ @callback[method_name].call(...)
160
+ end
161
+ end
162
+ end
163
+ end
@@ -0,0 +1,58 @@
1
+ module Charai
2
+ class OpenaiConfiguration
3
+ def initialize(model:, api_key:)
4
+ @endpoint_url = 'https://api.openai.com/v1/chat/completions'
5
+ @model = model
6
+ @api_key = api_key
7
+ end
8
+
9
+ attr_reader :endpoint_url
10
+
11
+ def add_auth_header(headers)
12
+ headers['Authorization'] = "Bearer #{@api_key}"
13
+ headers
14
+ end
15
+
16
+ def decorate_body(payload)
17
+ payload[:model] = @model
18
+ payload
19
+ end
20
+ end
21
+
22
+ class AzureOpenaiConfiguration
23
+ def initialize(endpoint_url:, api_key:)
24
+ @endpoint_url = endpoint_url
25
+ @api_key = api_key
26
+ end
27
+
28
+ attr_reader :endpoint_url
29
+
30
+ def add_auth_header(headers)
31
+ headers['api-key'] = @api_key
32
+ headers
33
+ end
34
+
35
+ def decorate_body(payload)
36
+ payload
37
+ end
38
+ end
39
+
40
+ class OllamaConfiguration
41
+ def initialize(endpoint_url:, model:)
42
+ @endpoint_url = endpoint_url
43
+ @model = model
44
+ end
45
+
46
+ attr_reader :endpoint_url
47
+
48
+ def add_auth_header(headers)
49
+ # auth header is not required.
50
+ headers
51
+ end
52
+
53
+ def decorate_body(payload)
54
+ payload[:model] = @model
55
+ payload
56
+ end
57
+ end
58
+ end