omniai-google 3.6.0 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bf6bab2505b42bcb8b61ca4e428b77bbffb9f961e183a759a5ad4c4caa6faded
4
- data.tar.gz: 46a4e0576c2b311ef25bb97384e268d9ae831d9c5eab192bd6192c42578fdffd
3
+ metadata.gz: 81bb938d26c6804a6942a17e57e1b1393f0e77925467a60659443f28c76d3752
4
+ data.tar.gz: ce70db23296a73089d2cbc6eaf344ae269af945c112dfc0942a17e31dc116dfe
5
5
  SHA512:
6
- metadata.gz: b076336eef0202b7a489003f8ac1acf6a75149e5f4c279c94452e3735075f15ef13e1849506fcd4e4bcd47471b1713affdbe794ddd69884c7144e656058fffb5
7
- data.tar.gz: 2fa90885dddc31135e74b6985ac089b2e016150c83c40b1470b29b4f754c90c23387643a902cbaae9951c502e1598284fff84198d022e98d1d8731c619c87279
6
+ metadata.gz: 0f5101592e2603341f545b3402a0c4b3b3516dd3fee36532d2a5691803e11610124db98d89af4e532bd16ccfff2663a37308d999dee79aea700a3066df559771
7
+ data.tar.gz: 78109e536527bc9853e84111e664c25c3574a0f7c952fadbaf2a025478be0172c7cd1a203c4bdf88aa185b242f2fb74f9b88f6d88205a0ec8c479bfe2391b6a0
@@ -85,8 +85,9 @@ module OmniAI
85
85
  #
86
86
  # @param input [String, Array<String>, Array<Integer>] required
87
87
  # @param model [String] optional
88
- def embed(input, model: Embed::DEFAULT_MODEL)
89
- Embed.process!(input, model:, client: self)
88
+ # @param options [Hash] provider-specific options (e.g. task_type: "RETRIEVAL_DOCUMENT")
89
+ def embed(input, model: Embed::DEFAULT_MODEL, **options)
90
+ Embed.process!(input, model:, client: self, **options)
90
91
  end
91
92
 
92
93
  # @raise [OmniAI::Error]
@@ -2,7 +2,7 @@
2
2
 
3
3
  module OmniAI
4
4
  module Google
5
- # An Google embed implementation.
5
+ # A Google embed implementation.
6
6
  #
7
7
  # Usage:
8
8
  #
@@ -14,67 +14,124 @@ module OmniAI
14
14
  TEXT_EMBEDDING_004 = "text-embedding-004"
15
15
  TEXT_EMBEDDING_005 = "text-embedding-005"
16
16
  TEXT_MULTILINGUAL_EMBEDDING_002 = "text-multilingual-embedding-002"
17
+ GEMINI_EMBEDDING_001 = "gemini-embedding-001"
18
+ GEMINI_EMBEDDING_2_PREVIEW = "gemini-embedding-2-preview"
17
19
  EMBEDDING = TEXT_EMBEDDING_004
18
20
  MULTILINGUAL_EMBEDDING = TEXT_MULTILINGUAL_EMBEDDING_002
19
21
  end
20
22
 
21
23
  DEFAULT_MODEL = Model::EMBEDDING
22
24
 
23
- DEFAULT_EMBEDDINGS_DESERIALIZER = proc do |data, *|
25
+ BATCH_EMBED_CONTENTS_DESERIALIZER = proc do |data, *|
24
26
  data["embeddings"].map { |embedding| embedding["values"] }
25
27
  end
26
28
 
27
- VERTEX_EMBEDDINGS_DESERIALIZER = proc do |data, *|
29
+ PREDICT_EMBEDDINGS_DESERIALIZER = proc do |data, *|
28
30
  data["predictions"].map { |prediction| prediction["embeddings"]["values"] }
29
31
  end
30
32
 
31
- VERTEX_USAGE_DESERIALIZER = proc do |data, *|
32
- tokens = data["predictions"].map { |prediction| prediction["embeddings"]["statistics"]["token_count"] }.sum
33
+ PREDICT_USAGE_DESERIALIZER = proc do |data, *|
34
+ tokens = data["predictions"].sum { |prediction| prediction["embeddings"]["statistics"]["token_count"] }
33
35
 
34
36
  Usage.new(prompt_tokens: tokens, total_tokens: tokens)
35
37
  end
36
38
 
39
+ EMBED_CONTENT_DESERIALIZER = proc do |data, *|
40
+ [data["embedding"]["values"]]
41
+ end
42
+
43
+ USAGE_METADATA_DESERIALIZER = proc do |data, *|
44
+ prompt_tokens = data.dig("usageMetadata", "promptTokenCount")
45
+ total_tokens = data.dig("usageMetadata", "totalTokenCount")
46
+
47
+ Usage.new(prompt_tokens: prompt_tokens, total_tokens: total_tokens)
48
+ end
49
+
37
50
  # @return [Context]
38
- DEFAULT_CONTEXT = Context.build do |context|
39
- context.deserializers[:embeddings] = DEFAULT_EMBEDDINGS_DESERIALIZER
51
+ BATCH_EMBED_CONTENTS_CONTEXT = Context.build do |context|
52
+ context.deserializers[:embeddings] = BATCH_EMBED_CONTENTS_DESERIALIZER
53
+ context.deserializers[:usage] = USAGE_METADATA_DESERIALIZER
40
54
  end
41
55
 
42
56
  # @return [Context]
43
- VERTEX_CONTEXT = Context.build do |context|
44
- context.deserializers[:embeddings] = VERTEX_EMBEDDINGS_DESERIALIZER
45
- context.deserializers[:usage] = VERTEX_USAGE_DESERIALIZER
57
+ PREDICT_CONTEXT = Context.build do |context|
58
+ context.deserializers[:embeddings] = PREDICT_EMBEDDINGS_DESERIALIZER
59
+ context.deserializers[:usage] = PREDICT_USAGE_DESERIALIZER
60
+ end
61
+
62
+ # @return [Context]
63
+ EMBED_CONTENT_CONTEXT = Context.build do |context|
64
+ context.deserializers[:embeddings] = EMBED_CONTENT_DESERIALIZER
65
+ context.deserializers[:usage] = USAGE_METADATA_DESERIALIZER
46
66
  end
47
67
 
48
68
  protected
49
69
 
50
- # @return [Boolean]
51
- def vertex?
52
- @client.vertex?
70
+ # Determines which endpoint to use based on client and model configuration.
71
+ # Routes gemini-embedding-2-* models to embedContent on Vertex, as Google's
72
+ # Vertex AI requires this endpoint for newer multimodal embedding models.
73
+ #
74
+ # @return [Symbol] :embed_content, :predict, or :batch_embed_contents
75
+ def endpoint
76
+ @endpoint ||= if @client.vertex? && @model.start_with?("gemini-embedding-2")
77
+ :embed_content
78
+ elsif @client.vertex?
79
+ :predict
80
+ else
81
+ :batch_embed_contents
82
+ end
53
83
  end
54
84
 
55
85
  # @return [Context]
56
86
  def context
57
- vertex? ? VERTEX_CONTEXT : DEFAULT_CONTEXT
87
+ case endpoint
88
+ when :embed_content then EMBED_CONTENT_CONTEXT
89
+ when :predict then PREDICT_CONTEXT
90
+ when :batch_embed_contents then BATCH_EMBED_CONTENTS_CONTEXT
91
+ end
92
+ end
93
+
94
+ # @return [Hash]
95
+ def payload
96
+ case endpoint
97
+ when :embed_content then embed_content_payload
98
+ when :predict then predict_payload
99
+ when :batch_embed_contents then batch_embed_contents_payload
100
+ end
58
101
  end
59
102
 
60
- # @return [Array[Hash]]
61
- def instances
62
- arrayify(@input).map { |content| { content: } }
103
+ # Builds payload for the Vertex embedContent endpoint (gemini-embedding-2-* models).
104
+ # @return [Hash]
105
+ def embed_content_payload
106
+ raise ArgumentError, "embedContent does not support batch input" if @input.is_a?(Array) && @input.length > 1
107
+
108
+ text = @input.is_a?(Array) ? @input.first : @input
109
+ result = { content: { parts: [{ text: text }] } }
110
+ result[:taskType] = @options[:task_type] if @options[:task_type]
111
+ result
63
112
  end
64
113
 
65
- # @return [Array[Hash]]
66
- def requests
67
- arrayify(@input).map do |text|
68
- {
69
- model: "models/#{@model}",
70
- content: { parts: [{ text: }] },
71
- }
72
- end
114
+ # Builds payload for the Vertex predict endpoint (text-embedding and gemini-embedding-001 models).
115
+ # @return [Hash]
116
+ def predict_payload
117
+ inputs = arrayify(@input)
118
+ { instances: inputs.map { |text| { content: text } } }
73
119
  end
74
120
 
121
+ # Builds payload for the Google AI batchEmbedContents endpoint (non-Vertex).
75
122
  # @return [Hash]
76
- def payload
77
- vertex? ? { instances: } : { requests: }
123
+ def batch_embed_contents_payload
124
+ inputs = arrayify(@input)
125
+ {
126
+ requests: inputs.map do |text|
127
+ request = {
128
+ model: "models/#{@model}",
129
+ content: { parts: [{ text: text }] },
130
+ }
131
+ request[:taskType] = @options[:task_type] if @options[:task_type]
132
+ request
133
+ end
134
+ }
78
135
  end
79
136
 
80
137
  # @return [Hash]
@@ -84,18 +141,13 @@ module OmniAI
84
141
 
85
142
  # @return [String]
86
143
  def path
87
- "/#{@client.path}/models/#{@model}:#{procedure}"
88
- end
89
-
90
- # @return [String]
91
- def procedure
92
- vertex? ? "predict" : "batchEmbedContents"
93
- end
144
+ procedure = case endpoint
145
+ when :embed_content then "embedContent"
146
+ when :predict then "predict"
147
+ when :batch_embed_contents then "batchEmbedContents"
148
+ end
94
149
 
95
- # @param input [Object]
96
- # @return [Array]
97
- def arrayify(input)
98
- input.is_a?(Array) ? input : [input]
150
+ "/#{@client.path}/models/#{@model}:#{procedure}"
99
151
  end
100
152
  end
101
153
  end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module OmniAI
4
4
  module Google
5
- VERSION = "3.6.0"
5
+ VERSION = "3.7.0"
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: omniai-google
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.6.0
4
+ version: 3.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kevin Sylvestre