omniai-google 3.6.0 → 3.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf6bab2505b42bcb8b61ca4e428b77bbffb9f961e183a759a5ad4c4caa6faded
4
- data.tar.gz: 46a4e0576c2b311ef25bb97384e268d9ae831d9c5eab192bd6192c42578fdffd
3
+ metadata.gz: 74848884db95fef34b6c7306b2814ecf8d50797963e690a93b77e69c93cc7d3a
4
+ data.tar.gz: 9fbb92e5fe4fe36d9086182391ae383180607b5e97b36a17e458dc200bef73e1
5
5
  SHA512:
6
- metadata.gz: b076336eef0202b7a489003f8ac1acf6a75149e5f4c279c94452e3735075f15ef13e1849506fcd4e4bcd47471b1713affdbe794ddd69884c7144e656058fffb5
7
- data.tar.gz: 2fa90885dddc31135e74b6985ac089b2e016150c83c40b1470b29b4f754c90c23387643a902cbaae9951c502e1598284fff84198d022e98d1d8731c619c87279
6
+ metadata.gz: c92c79303ec17e46832115d075aa29760ba8d1f52111286d51d370c49c5266bb2e0ec9956e058a133de69bd0bb28c8ea4fa52efab34edea64f9cdc896397069e
7
+ data.tar.gz: 5fc61a87b13a8bd3ba73bc6b8c150c7da1d77aef938d7c826f9eac541b78ea288fb0698bbaf9a7609d7a19a85607b3d04d97e82cd013a46f64c319c86329ea60
@@ -85,8 +85,9 @@ module OmniAI
85
85
  #
86
86
  # @param input [String, Array<String>, Array<Integer>] required
87
87
  # @param model [String] optional
88
- def embed(input, model: Embed::DEFAULT_MODEL)
89
- Embed.process!(input, model:, client: self)
88
+ # @param options [Hash] provider-specific options (e.g. task_type: "RETRIEVAL_DOCUMENT")
89
+ def embed(input, model: Embed::DEFAULT_MODEL, **)
90
+ Embed.process!(input, model:, client: self, **)
90
91
  end
91
92
 
92
93
  # @raise [OmniAI::Error]
@@ -2,7 +2,7 @@
2
2
 
3
3
  module OmniAI
4
4
  module Google
5
- # An Google embed implementation.
5
+ # A Google embed implementation.
6
6
  #
7
7
  # Usage:
8
8
  #
@@ -14,67 +14,125 @@ module OmniAI
14
14
  TEXT_EMBEDDING_004 = "text-embedding-004"
15
15
  TEXT_EMBEDDING_005 = "text-embedding-005"
16
16
  TEXT_MULTILINGUAL_EMBEDDING_002 = "text-multilingual-embedding-002"
17
+ GEMINI_EMBEDDING_001 = "gemini-embedding-001"
18
+ GEMINI_EMBEDDING_2_PREVIEW = "gemini-embedding-2-preview"
17
19
  EMBEDDING = TEXT_EMBEDDING_004
18
20
  MULTILINGUAL_EMBEDDING = TEXT_MULTILINGUAL_EMBEDDING_002
19
21
  end
20
22
 
21
23
  DEFAULT_MODEL = Model::EMBEDDING
22
24
 
23
- DEFAULT_EMBEDDINGS_DESERIALIZER = proc do |data, *|
25
+ BATCH_EMBED_CONTENTS_DESERIALIZER = proc do |data, *|
24
26
  data["embeddings"].map { |embedding| embedding["values"] }
25
27
  end
26
28
 
27
- VERTEX_EMBEDDINGS_DESERIALIZER = proc do |data, *|
29
+ PREDICT_EMBEDDINGS_DESERIALIZER = proc do |data, *|
28
30
  data["predictions"].map { |prediction| prediction["embeddings"]["values"] }
29
31
  end
30
32
 
31
- VERTEX_USAGE_DESERIALIZER = proc do |data, *|
32
- tokens = data["predictions"].map { |prediction| prediction["embeddings"]["statistics"]["token_count"] }.sum
33
+ PREDICT_USAGE_DESERIALIZER = proc do |data, *|
34
+ tokens = data["predictions"].sum { |prediction| prediction["embeddings"]["statistics"]["token_count"] }
33
35
 
34
36
  Usage.new(prompt_tokens: tokens, total_tokens: tokens)
35
37
  end
36
38
 
39
+ EMBED_CONTENT_DESERIALIZER = proc do |data, *|
40
+ [data["embedding"]["values"]]
41
+ end
42
+
43
+ USAGE_METADATA_DESERIALIZER = proc do |data, *|
44
+ prompt_tokens = data.dig("usageMetadata", "promptTokenCount")
45
+ total_tokens = data.dig("usageMetadata", "totalTokenCount")
46
+
47
+ Usage.new(prompt_tokens:, total_tokens:)
48
+ end
49
+
37
50
  # @return [Context]
38
- DEFAULT_CONTEXT = Context.build do |context|
39
- context.deserializers[:embeddings] = DEFAULT_EMBEDDINGS_DESERIALIZER
51
+ BATCH_EMBED_CONTENTS_CONTEXT = Context.build do |context|
52
+ context.deserializers[:embeddings] = BATCH_EMBED_CONTENTS_DESERIALIZER
53
+ context.deserializers[:usage] = USAGE_METADATA_DESERIALIZER
40
54
  end
41
55
 
42
56
  # @return [Context]
43
- VERTEX_CONTEXT = Context.build do |context|
44
- context.deserializers[:embeddings] = VERTEX_EMBEDDINGS_DESERIALIZER
45
- context.deserializers[:usage] = VERTEX_USAGE_DESERIALIZER
57
+ PREDICT_CONTEXT = Context.build do |context|
58
+ context.deserializers[:embeddings] = PREDICT_EMBEDDINGS_DESERIALIZER
59
+ context.deserializers[:usage] = PREDICT_USAGE_DESERIALIZER
60
+ end
61
+
62
+ # @return [Context]
63
+ EMBED_CONTENT_CONTEXT = Context.build do |context|
64
+ context.deserializers[:embeddings] = EMBED_CONTENT_DESERIALIZER
65
+ context.deserializers[:usage] = USAGE_METADATA_DESERIALIZER
46
66
  end
47
67
 
48
68
  protected
49
69
 
50
- # @return [Boolean]
51
- def vertex?
52
- @client.vertex?
70
+ # Determines which endpoint to use based on client and model configuration.
71
+ # Routes gemini-embedding-2-* models to embedContent on Vertex, as Google's
72
+ # Vertex AI requires this endpoint for newer multimodal embedding models.
73
+ #
74
+ # @return [Symbol] :embed_content, :predict, or :batch_embed_contents
75
+ def endpoint
76
+ @endpoint ||=
77
+ if @client.vertex? && @model.start_with?("gemini-embedding-2")
78
+ :embed_content
79
+ elsif @client.vertex?
80
+ :predict
81
+ else
82
+ :batch_embed_contents
83
+ end
53
84
  end
54
85
 
55
86
  # @return [Context]
56
87
  def context
57
- vertex? ? VERTEX_CONTEXT : DEFAULT_CONTEXT
88
+ case endpoint
89
+ when :embed_content then EMBED_CONTENT_CONTEXT
90
+ when :predict then PREDICT_CONTEXT
91
+ when :batch_embed_contents then BATCH_EMBED_CONTENTS_CONTEXT
92
+ end
58
93
  end
59
94
 
60
- # @return [Array[Hash]]
61
- def instances
62
- arrayify(@input).map { |content| { content: } }
95
+ # @return [Hash]
96
+ def payload
97
+ case endpoint
98
+ when :embed_content then embed_content_payload
99
+ when :predict then predict_payload
100
+ when :batch_embed_contents then batch_embed_contents_payload
101
+ end
63
102
  end
64
103
 
65
- # @return [Array[Hash]]
66
- def requests
67
- arrayify(@input).map do |text|
68
- {
69
- model: "models/#{@model}",
70
- content: { parts: [{ text: }] },
71
- }
72
- end
104
+ # Builds payload for the Vertex embedContent endpoint (gemini-embedding-2-* models).
105
+ # @return [Hash]
106
+ def embed_content_payload
107
+ raise ArgumentError, "embedContent does not support batch input" if @input.is_a?(Array) && @input.length > 1
108
+
109
+ text = @input.is_a?(Array) ? @input.first : @input
110
+ result = { content: { parts: [{ text: }] } }
111
+ result[:taskType] = @options[:task_type] if @options[:task_type]
112
+ result
73
113
  end
74
114
 
115
+ # Builds payload for the Vertex predict endpoint (text-embedding and gemini-embedding-001 models).
75
116
  # @return [Hash]
76
- def payload
77
- vertex? ? { instances: } : { requests: }
117
+ def predict_payload
118
+ inputs = arrayify(@input)
119
+ { instances: inputs.map { |text| { content: text } } }
120
+ end
121
+
122
+ # Builds payload for the Google AI batchEmbedContents endpoint (non-Vertex).
123
+ # @return [Hash]
124
+ def batch_embed_contents_payload
125
+ inputs = arrayify(@input)
126
+ {
127
+ requests: inputs.map do |text|
128
+ request = {
129
+ model: "models/#{@model}",
130
+ content: { parts: [{ text: }] },
131
+ }
132
+ request[:taskType] = @options[:task_type] if @options[:task_type]
133
+ request
134
+ end,
135
+ }
78
136
  end
79
137
 
80
138
  # @return [Hash]
@@ -82,20 +140,15 @@ module OmniAI
82
140
  { key: (@client.api_key unless @client.credentials?) }.compact
83
141
  end
84
142
 
85
- # @return [String]
86
- def path
87
- "/#{@client.path}/models/#{@model}:#{procedure}"
88
- end
143
+ PROCEDURES = {
144
+ embed_content: "embedContent",
145
+ predict: "predict",
146
+ batch_embed_contents: "batchEmbedContents",
147
+ }.freeze
89
148
 
90
149
  # @return [String]
91
- def procedure
92
- vertex? ? "predict" : "batchEmbedContents"
93
- end
94
-
95
- # @param input [Object]
96
- # @return [Array]
97
- def arrayify(input)
98
- input.is_a?(Array) ? input : [input]
150
+ def path
151
+ "/#{@client.path}/models/#{@model}:#{PROCEDURES[endpoint]}"
99
152
  end
100
153
  end
101
154
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module OmniAI
4
4
  module Google
5
- VERSION = "3.6.0"
5
+ VERSION = "3.7.1"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omniai-google
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.6.0
4
+ version: 3.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Sylvestre