omniai 1.6.5 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5d9011a55d57b9dd0923f4d7edd70001c3f9ee5b8211e98d59f9d001687af044
4
- data.tar.gz: 204e2f0ec0a44f65cba11cb49819e7f5c4e933a92d6bc1b48c4d630dee180ecd
3
+ metadata.gz: 24e77ca1695f294daa0762f9c4c9b32a39ba329d8745d10d8b9fdd5c98917838
4
+ data.tar.gz: c6594df5e048bd4c7ea5d04bc3a66a01a2311ff2ff36ec5ee60db22afca7e412
5
5
  SHA512:
6
- metadata.gz: 724827923255adade8f19bd9b43806d1af116d9c4dd6105713f0ba0b16c3d6cd5e5cbafdc2d324912a669523e565ba1ce190ed26035cdc1645a4505f2c6991a9
7
- data.tar.gz: 5944ef3f285359662fc6105fd2a3aba7a44a279c5062ec0c1bdf194ddecd6842a33b1cd9b20cd4f5b36641b67cd8bc046e677260170d06dd2d5e3b37db37f356
6
+ metadata.gz: 274c978038080fbbb760dc3341a6ffdd103bd7ac176511189f1f36d8287084800dd5fdbb098138a3bb2b4405e7272763423b9815f1c2d1ca735a3f55f16271a4
7
+ data.tar.gz: 39e42f5e5ea5031653b2d6474326c767183270f6ca3dd1ff0b873c64fdc1999df1bcc58015457f5c8673b1c4544ef4d4fe39c72e7eb450951a839a5cc27b7382
data/README.md CHANGED
@@ -229,6 +229,29 @@ tempfile.close
229
229
  tempfile.unlink
230
230
  ```
231
231
 
232
+ ### Embeddings
233
+
234
+ Clients that support generating embeddings (e.g. OpenAI, Mistral, etc.) convert text to embeddings via the following:
235
+
236
+ ```ruby
237
+ response = client.embed('The quick brown fox jumps over a lazy dog')
238
+ response.usage # <OmniAI::Embed::Usage prompt_tokens=5 total_tokens=5>
239
+ response.embedding # [0.1, 0.2, ...] >
240
+ ```
241
+
242
+ Batches of text can also be converted to embeddings via the following:
243
+
244
+ ```ruby
245
+ response = client.embed([
246
+ '',
247
+ '',
248
+ ])
249
+ response.usage # <OmniAI::Embed::Usage prompt_tokens=5 total_tokens=5>
250
+ response.embeddings.each do |embedding|
251
+ embedding # [0.1, 0.2, ...]
252
+ end
253
+ ```
254
+
232
255
  ## CLI
233
256
 
234
257
  OmniAI packages a basic command line interface (CLI) to allow for exploration of various APIs. A detailed CLI documentation can be found via help:
@@ -263,3 +286,39 @@ Type 'exit' or 'quit' to abort.
263
286
  ```
264
287
  The warmest place on earth is Africa.
265
288
  ```
289
+
290
+ ### Embed
291
+
292
+ #### w/ input
293
+
294
+ ```bash
295
+ omniai embed "The quick brown fox jumps over a lazy dog."
296
+ ```
297
+
298
+ ```
299
+ 0.0
300
+ ...
301
+ ```
302
+
303
+ #### w/o input
304
+
305
+ ```bash
306
+ omniai embed --provider="openai" --model="text-embedding-ada-002"
307
+ ```
308
+
309
+ ```
310
+ Type 'exit' or 'quit' to abort.
311
+ # Whe quick brown fox jumps over a lazy dog.
312
+ ```
313
+
314
+ ```
315
+ 0.0
316
+ ...
317
+ ```
318
+
319
+ 0.0
320
+ ...
321
+
322
+ ```
323
+
324
+ ```
@@ -4,6 +4,14 @@ module OmniAI
4
4
  class Chat
5
5
  # A placeholder for parts of a message. Any subclass must implement the serializable interface.
6
6
  class Content
7
+ # @return [String]
8
+ def self.summarize(content)
9
+ return content.map { |entry| summarize(entry) }.join("\n\n") if content.is_a?(Array)
10
+ return content if content.is_a?(String)
11
+
12
+ content.summarize
13
+ end
14
+
7
15
  # @param context [Context] optional
8
16
  #
9
17
  # @return [String]
@@ -15,6 +15,11 @@ module OmniAI
15
15
  @type = type
16
16
  end
17
17
 
18
+ # @return [String]
19
+ def summarize
20
+ "[#{filename}]"
21
+ end
22
+
18
23
  # @return [Boolean]
19
24
  def text?
20
25
  @type.match?(%r{^text/})
@@ -31,6 +31,14 @@ module OmniAI
31
31
  "#<#{self.class} role=#{@role.inspect} content=#{@content.inspect}>"
32
32
  end
33
33
 
34
+ # @return [String]
35
+ def summarize
36
+ <<~TEXT
37
+ #{@role}
38
+ #{Content.summarize(@content)}
39
+ TEXT
40
+ end
41
+
34
42
  # Usage:
35
43
  #
36
44
  # Message.deserialize({ role: :user, content: 'Hello!' }) # => #<Message ...>
@@ -62,6 +62,11 @@ module OmniAI
62
62
  "#<#{self.class.name} messages=#{@messages.inspect}>"
63
63
  end
64
64
 
65
+ # @return [String]
66
+ def summarize
67
+ @messages.map(&:summarize).join("\n\n")
68
+ end
69
+
65
70
  # Usage:
66
71
  #
67
72
  # prompt.serialize # => [{ content: "What is the capital of Canada?", role: :user }]
@@ -18,6 +18,11 @@ module OmniAI
18
18
  "#<#{self.class} text=#{@text.inspect}>"
19
19
  end
20
20
 
21
+ # @return [String]
22
+ def summarize
23
+ @text
24
+ end
25
+
21
26
  # @param data [Hash]
22
27
  def self.deserialize(data, context: nil)
23
28
  deserialize = context&.deserializers&.[](:text)
@@ -16,6 +16,11 @@ module OmniAI
16
16
  @uri = uri
17
17
  end
18
18
 
19
+ # @return [String]
20
+ def summarize
21
+ "[#{filename}]"
22
+ end
23
+
19
24
  # @return [String]
20
25
  def inspect
21
26
  "#<#{self.class} uri=#{@uri.inspect}>"
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ class CLI
5
+ # Used for CLI usage of 'omnia embed'.
6
+ class EmbedHandler < BaseHandler
7
+ # @param argv [Array<String>]
8
+ def handle!(argv:)
9
+ parser.parse!(argv)
10
+
11
+ if argv.empty?
12
+ listen!
13
+ else
14
+ embed(input: argv.join(' '))
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ def listen!
21
+ @stdout.puts('Type "exit" or "quit" to leave.')
22
+
23
+ loop do
24
+ @stdout.print('# ')
25
+ @stdout.flush
26
+ input = @stdin.gets&.chomp
27
+
28
+ break if input.nil? || input.match?(/\A(exit|quit)\z/i)
29
+
30
+ embed(input:)
31
+ rescue Interrupt
32
+ break
33
+ end
34
+ end
35
+
36
+ # @param input [String]
37
+ def embed(input:)
38
+ response = client.embed(input, **@args)
39
+ @stdout.puts(response.embedding)
40
+ end
41
+
42
+ # @return [OptionParser]
43
+ def parser
44
+ OptionParser.new do |options|
45
+ options.banner = 'usage: omniai embed [options] "<prompt>"'
46
+
47
+ options.on('-h', '--help', 'help') do
48
+ @stdout.puts(options)
49
+ exit
50
+ end
51
+
52
+ options.on('-p', '--provider=PROVIDER', 'provider') { |provider| @provider = provider }
53
+ options.on('-m', '--model=MODEL', 'model') { |model| @args[:model] = model }
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
data/lib/omniai/cli.rb CHANGED
@@ -28,10 +28,14 @@ module OmniAI
28
28
  command = argv.shift
29
29
  return if command.nil?
30
30
 
31
- case command
32
- when 'chat' then ChatHandler.handle!(stdin: @stdin, stdout: @stdout, provider: @provider, argv:)
33
- else raise Error, "unsupported command=#{command.inspect}"
34
- end
31
+ handler =
32
+ case command
33
+ when 'chat' then ChatHandler
34
+ when 'embed' then EmbedHandler
35
+ else raise Error, "unsupported command=#{command.inspect}"
36
+ end
37
+
38
+ handler.handle!(stdin: @stdin, stdout: @stdout, provider: @provider, argv:)
35
39
  end
36
40
 
37
41
  private
data/lib/omniai/client.rb CHANGED
@@ -173,5 +173,15 @@ module OmniAI
173
173
  def speak(input, model:, voice:, speed: nil, format: nil, &stream)
174
174
  raise NotImplementedError, "#{self.class.name}#speak undefined"
175
175
  end
176
+
177
+ # @raise [OmniAI::Error]
178
+ #
179
+ # @param input [String] required
180
+ # @param model [String] required
181
+ #
182
+ # @return [OmniAI::Embed::Embedding]
183
+ def embed(input, model:)
184
+ raise NotImplementedError, "#{self.class.name}#embed undefined"
185
+ end
176
186
  end
177
187
  end
@@ -0,0 +1,43 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ # Used to handle the setup of serializer / deserializer required per provide (e.g. Anthropic / Google / etc).
5
+ #
6
+ # Usage:
7
+ #
8
+ # OmniAI::Context.build do |context|
9
+ # context.serializers[:prompt] = (prompt, context:) -> { ... }
10
+ # context.serializers[:message] = (prompt, context:) -> { ... }
11
+ # context.serializers[:file] = (prompt, context:) -> { ... }
12
+ # context.serializers[:text] = (prompt, context:) -> { ... }
13
+ # context.serializers[:url] = (prompt, context:) -> { ... }
14
+ # context.deserializers[:prompt] = (data, context:) -> { Prompt.new(...) }
15
+ # context.deserializers[:message] = (data, context:) -> { Message.new(...) }
16
+ # context.deserializers[:file] = (data, context:) -> { File.new(...) }
17
+ # context.deserializers[:text] = (data, context:) -> { Text.new(...) }
18
+ # context.deserializers[:url] = (data, context:) -> { URL.new(...) }
19
+ # end
20
+ class Context
21
+ # @return [Hash]
22
+ attr_accessor :serializers
23
+
24
+ # @return [Hash]
25
+ attr_reader :deserializers
26
+
27
+ # @yield [context]
28
+ # @yieldparam context [Context]
29
+ #
30
+ # @return [Context]
31
+ def self.build(&block)
32
+ new.tap do |context|
33
+ block&.call(context)
34
+ end
35
+ end
36
+
37
+ # @return [Context]
38
+ def initialize
39
+ @serializers = {}
40
+ @deserializers = {}
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ class Embed
5
+ # The response returned by the API.
6
+ class Response
7
+ # @return [Hash]
8
+ attr_accessor :data
9
+
10
+ # @param data [Hash]
11
+ # @param context [OmniAI::Context] optional
12
+ def initialize(data:, context: nil)
13
+ @data = data
14
+ @context = context
15
+ end
16
+
17
+ # @return [String]
18
+ def inspect
19
+ "#<#{self.class.name}>"
20
+ end
21
+
22
+ # @return [Usage]
23
+ def usage
24
+ @usage ||= begin
25
+ deserializer = @context&.deserializers&.[](:usage)
26
+
27
+ if deserializer
28
+ deserializer.call(@data, context: @context)
29
+ else
30
+ prompt_tokens = @data.dig('usage', 'prompt_tokens')
31
+ total_tokens = @data.dig('usage', 'total_tokens')
32
+
33
+ Usage.new(prompt_tokens:, total_tokens:)
34
+ end
35
+ end
36
+ end
37
+
38
+ # @param index [Integer] optional
39
+ #
40
+ # @return [Array<Float>]
41
+ def embedding(index: 0)
42
+ embeddings[index]
43
+ end
44
+
45
+ # @return [Array<Array<Float>>]
46
+ def embeddings
47
+ @embeddings ||= begin
48
+ deserializer = @context&.deserializers&.[](:embeddings)
49
+
50
+ if deserializer
51
+ deserializer.call(@data, context: @context)
52
+ else
53
+ @data['data'].map { |embedding| embedding['embedding'] }
54
+ end
55
+ end
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ class Embed
5
+ # Token usage returned by the API.
6
+ class Usage
7
+ # @return [Integer]
8
+ attr_accessor :prompt_tokens
9
+
10
+ # @return [Integer]
11
+ attr_accessor :total_tokens
12
+
13
+ # @param prompt_tokens Integer
14
+ # @param total_tokens Integer
15
+ def initialize(prompt_tokens:, total_tokens:)
16
+ @prompt_tokens = prompt_tokens
17
+ @total_tokens = total_tokens
18
+ end
19
+
20
+ # @return [String]
21
+ def inspect
22
+ "#<#{self.class.name} prompt_tokens=#{@prompt_tokens} total_tokens=#{@total_tokens}>"
23
+ end
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,80 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OmniAI
4
+ # An abstract class that provides a consistent interface for processing embedding requests.
5
+ #
6
+ # Usage:
7
+ #
8
+ # class OmniAI::OpenAI::Embed < OmniAI::Embed
9
+ # module Model
10
+ # SMALL = "text-embedding-3-small"
11
+ # LARGE = "text-embedding-3-large"
12
+ # ADA = "text-embedding-3-002"
13
+ # end
14
+ #
15
+ # protected
16
+ #
17
+ # # @return [Hash]
18
+ # def payload
19
+ # { ... }
20
+ # end
21
+ #
22
+ # # @return [String]
23
+ # def path
24
+ # "..."
25
+ # end
26
+ # end
27
+ #
28
+ # client.embed(input, model: "...")
29
+ class Embed
30
+ def self.process!(...)
31
+ new(...).process!
32
+ end
33
+
34
+ # @param input [String] required
35
+ # @param client [Client] the client
36
+ # @param model [String] required
37
+ #
38
+ # @return [Response]
39
+ def initialize(input, client:, model:)
40
+ @input = input
41
+ @client = client
42
+ @model = model
43
+ end
44
+
45
+ # @raise [Error]
46
+ # @return [Response]
47
+ def process!
48
+ response = request!
49
+ raise HTTPError, response.flush unless response.status.ok?
50
+
51
+ parse!(response:)
52
+ end
53
+
54
+ protected
55
+
56
+ # @param response [HTTP::Response]
57
+ # @return [Response]
58
+ def parse!(response:)
59
+ Response.new(data: response.parse)
60
+ end
61
+
62
+ # @return [HTTP::Response]
63
+ def request!
64
+ @client
65
+ .connection
66
+ .accept(:json)
67
+ .post(path, json: payload)
68
+ end
69
+
70
+ # @return [Hash]
71
+ def payload
72
+ raise NotImplementedError, "#{self.class.name}#payload undefined"
73
+ end
74
+
75
+ # @return [String]
76
+ def path
77
+ raise NotImplementedError, "#{self.class.name}#path undefined"
78
+ end
79
+ end
80
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module OmniAI
4
- VERSION = '1.6.5'
4
+ VERSION = '1.7.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omniai
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.5
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Sylvestre
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-07-19 00:00:00.000000000 Z
11
+ date: 2024-08-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: event_stream_parser
@@ -69,7 +69,6 @@ files:
69
69
  - lib/omniai.rb
70
70
  - lib/omniai/chat.rb
71
71
  - lib/omniai/chat/content.rb
72
- - lib/omniai/chat/context.rb
73
72
  - lib/omniai/chat/file.rb
74
73
  - lib/omniai/chat/media.rb
75
74
  - lib/omniai/chat/message.rb
@@ -93,8 +92,13 @@ files:
93
92
  - lib/omniai/cli.rb
94
93
  - lib/omniai/cli/base_handler.rb
95
94
  - lib/omniai/cli/chat_handler.rb
95
+ - lib/omniai/cli/embed_handler.rb
96
96
  - lib/omniai/client.rb
97
97
  - lib/omniai/config.rb
98
+ - lib/omniai/context.rb
99
+ - lib/omniai/embed.rb
100
+ - lib/omniai/embed/response.rb
101
+ - lib/omniai/embed/usage.rb
98
102
  - lib/omniai/instrumentation.rb
99
103
  - lib/omniai/speak.rb
100
104
  - lib/omniai/tool.rb
@@ -1,42 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OmniAI
4
- class Chat
5
- # Used to handle the setup of serializer / deserializer methods for each type.
6
- #
7
- # Usage:
8
- #
9
- # OmniAI::Chat::Context.build do |context|
10
- # context.serializers[:prompt] = (prompt, context:) -> { ... }
11
- # context.serializers[:message] = (prompt, context:) -> { ... }
12
- # context.serializers[:file] = (prompt, context:) -> { ... }
13
- # context.serializers[:text] = (prompt, context:) -> { ... }
14
- # context.serializers[:url] = (prompt, context:) -> { ... }
15
- # context.deserializers[:prompt] = (data, context:) -> { Prompt.new(...) }
16
- # context.deserializers[:message] = (data, context:) -> { Message.new(...) }
17
- # context.deserializers[:file] = (data, context:) -> { File.new(...) }
18
- # context.deserializers[:text] = (data, context:) -> { Text.new(...) }
19
- # context.deserializers[:url] = (data, context:) -> { URL.new(...) }
20
- # end
21
- class Context
22
- # @return [Hash]
23
- attr_accessor :serializers
24
-
25
- # @return [Hash]
26
- attr_reader :deserializers
27
-
28
- # @return [Context]
29
- def self.build(&block)
30
- new.tap do |context|
31
- block&.call(context)
32
- end
33
- end
34
-
35
- # @return [Context]
36
- def initialize
37
- @serializers = {}
38
- @deserializers = {}
39
- end
40
- end
41
- end
42
- end