lex-llm-vertex 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: b1ff87c604774c449138054bd3675a26da6696eaa43993c86b6f567e8861b628
4
+ data.tar.gz: f87648ca25d8317bbacbe84597b6c3abc495ea5f5ede11b2d363b74db04cfdd0
5
+ SHA512:
6
+ metadata.gz: df0217a1da4c3cea8adf36a0bf3fe27fa6932b672b8053f7aab01e5b360a6e6601548244e6f804817a5319c210e9ec05913b39023a260d06ba66f6b9dd9104cd
7
+ data.tar.gz: 2f17a80cdb3e2553ec857ad68d359c9669a93203c9b0b116c6dd547bc22350ad7229b5b201bc3509aba1e747ff20d8bb982d0899a36fdfac9323706d89cc9155
@@ -0,0 +1 @@
1
+ * @LegionIO/maintainers
@@ -0,0 +1,10 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: bundler
4
+ directory: /
5
+ schedule:
6
+ interval: weekly
7
+ - package-ecosystem: github-actions
8
+ directory: /
9
+ schedule:
10
+ interval: weekly
@@ -0,0 +1,16 @@
1
+ name: CI
2
+ on:
3
+ push:
4
+ branches: [main]
5
+ pull_request:
6
+
7
+ jobs:
8
+ ci:
9
+ uses: LegionIO/.github/.github/workflows/ci.yml@main
10
+
11
+ release:
12
+ needs: ci
13
+ if: github.event_name == 'push' && github.ref == 'refs/heads/main'
14
+ uses: LegionIO/.github/.github/workflows/release.yml@main
15
+ secrets:
16
+ rubygems-api-key: ${{ secrets.RUBYGEMS_API_KEY }}
data/.gitignore ADDED
@@ -0,0 +1,12 @@
1
+ /.bundle/
2
+ /coverage/
3
+ /pkg/
4
+ /tmp/
5
+ Gemfile.lock
6
+ *.gem
7
+
8
+ .rspec_status*
9
+ .env
10
+ .claude
11
+ AGENTS.md
12
+ CLAUDE.md
data/.rubocop.yml ADDED
@@ -0,0 +1,32 @@
1
+ plugins:
2
+ - rubocop-performance
3
+ - rubocop-rake
4
+ - rubocop-rspec
5
+
6
+ AllCops:
7
+ NewCops: enable
8
+ TargetRubyVersion: 3.4
9
+ SuggestExtensions: false
10
+
11
+ Metrics/BlockLength:
12
+ Exclude:
13
+ - "*.gemspec"
14
+ - spec/**/*
15
+ Metrics/MethodLength:
16
+ Enabled: false
17
+ Metrics/ParameterLists:
18
+ Enabled: false
19
+ Metrics/AbcSize:
20
+ Enabled: false
21
+ Metrics/CyclomaticComplexity:
22
+ Enabled: false
23
+ Metrics/PerceivedComplexity:
24
+ Enabled: false
25
+ RSpec/MultipleExpectations:
26
+ Enabled: false
27
+ RSpec/ExampleLength:
28
+ Enabled: false
29
+ RSpec/LeakyConstantDeclaration:
30
+ Enabled: false
31
+ RSpec/InstanceVariable:
32
+ Enabled: false
data/CHANGELOG.md ADDED
@@ -0,0 +1,7 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0 - 2026-04-28
4
+
5
+ - Initial Legion::Extensions::Llm Vertex AI provider extension scaffold.
6
+ - Add offline provider defaults, project/location-aware model offering mapping, Vertex publisher model endpoint construction, chat, streaming, embeddings, token-counting metadata, health, and live discovery entrypoints.
7
+ - Add README, gemspec, CI, and stubbed unit specs for Vertex AI routing behavior.
data/Gemfile ADDED
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ source 'https://rubygems.org'
4
+
5
+ group :test do
6
+ llm_base_path = ENV.fetch('LEX_LLM_PATH', File.expand_path('../lex-llm', __dir__))
7
+ gem 'lex-llm', path: llm_base_path if File.directory?(llm_base_path)
8
+ end
9
+
10
+ gemspec
11
+
12
+ group :development do
13
+ gem 'bundler', '>= 2.0'
14
+ gem 'rake', '>= 13.0'
15
+ gem 'rspec', '~> 3.12'
16
+ gem 'rubocop', '>= 1.0'
17
+ gem 'rubocop-performance'
18
+ gem 'rubocop-rake', '>= 0.6'
19
+ gem 'rubocop-rspec'
20
+ end
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 LegionIO
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,79 @@
1
+ # lex-llm-vertex
2
+
3
+ Google Cloud Vertex AI provider extension for `Legion::Extensions::Llm`.
4
+
5
+ This gem adds a hosted Vertex AI provider surface for Legion LLM routing without depending on the old `legion-llm` gem. It keeps discovery offline by default, preserves full Vertex publisher model resource names for routing, and exposes project/location instance metadata for multi-region provider fleets.
6
+
7
+ ## Install
8
+
9
+ ```ruby
10
+ gem 'lex-llm-vertex'
11
+ ```
12
+
13
+ ## Configuration
14
+
15
+ The provider registers the `:vertex` provider family with `Legion::Extensions::Llm::Provider`.
16
+
17
+ ```ruby
18
+ require 'legion/extensions/llm/vertex'
19
+
20
+ Legion::Extensions::Llm.configure do |config|
21
+ config.vertex_project = ENV['GOOGLE_CLOUD_PROJECT']
22
+ config.vertex_location = ENV.fetch('VERTEX_LOCATION', 'us-central1')
23
+ config.vertex_access_token = ENV['VERTEX_ACCESS_TOKEN']
24
+ end
25
+ ```
26
+
27
+ `vertex_access_token` is optional for local routing metadata and tests. For live calls, provide a Google Cloud access token through configuration or use Application Default Credentials in the process that owns HTTP authentication.
28
+
29
+ Default settings expose `env://` references and keep live discovery disabled:
30
+
31
+ ```ruby
32
+ Legion::Extensions::Llm::Vertex.default_settings
33
+ ```
34
+
35
+ ## Provider Surface
36
+
37
+ ```ruby
38
+ provider = Legion::Extensions::Llm::Vertex::Provider.new(Legion::Extensions::Llm.config)
39
+
40
+ provider.discover_offerings(live: false)
41
+ provider.offering_for(model: 'gemini-2.5-flash')
42
+ provider.health(live: false)
43
+ provider.chat(messages, model: model)
44
+ provider.stream(messages, model: model) { |chunk| chunk.content }
45
+ provider.embed('hello', model: 'gemini-embedding-001')
46
+ provider.count_tokens(messages, model: model)
47
+ ```
48
+
49
+ `discover_offerings(live: false)` returns a conservative static catalog for routing defaults and unit tests. `discover_offerings(live: true)` calls the Vertex publisher models listing endpoint and maps returned model data into `Legion::Extensions::Llm::Routing::ModelOffering` records.
50
+
51
+ ## Model Offerings
52
+
53
+ Every offering uses:
54
+
55
+ - `provider_family: :vertex`
56
+ - `transport: :http`
57
+ - the full Vertex publisher model resource name as `model`
58
+ - `metadata[:model_family]` inferred from the publisher/model or accepted from the caller
59
+ - `metadata[:project]` and `metadata[:location]` copied from the provider instance
60
+
61
+ Known aliases are intentionally small and configurable. For example, `gemini-flash` resolves to `gemini-2.5-flash`, while the offering preserves `projects/{project}/locations/{location}/publishers/google/models/gemini-2.5-flash`.
62
+
63
+ ## API Contract
64
+
65
+ The implementation is intentionally limited to Vertex AI REST surfaces documented by Google Cloud:
66
+
67
+ - `generateContent` and `streamGenerateContent` for Gemini publisher models
68
+ - `countTokens` for Gemini-style publisher models
69
+ - `predict` for documented text embedding models
70
+ - `rawPredict` and `streamRawPredict` endpoint builders for partner publisher models such as Mistral, Anthropic, and Meta
71
+
72
+ Provider-specific request bodies are not guessed. Partner raw-predict chat requests use the message shape documented for those partner model endpoints; embeddings are only implemented for documented Vertex text embedding models.
73
+
74
+ Google Cloud references:
75
+
76
+ - [Vertex AI GenAI REST API](https://cloud.google.com/vertex-ai/generative-ai/docs/reference/rest)
77
+ - [Generate content with the Gemini API in Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference)
78
+ - [Text embeddings API](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api)
79
+ - [Mistral AI models on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/mistral)
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/legion/extensions/llm/vertex/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'lex-llm-vertex'
7
+ spec.version = Legion::Extensions::Llm::Vertex::VERSION
8
+ spec.authors = ['LegionIO']
9
+ spec.email = ['matthewdiverson@gmail.com']
10
+ spec.summary = 'LegionIO LLM Google Cloud Vertex AI provider extension'
11
+ spec.description = 'Google Cloud Vertex AI provider integration for the LegionIO LLM routing framework.'
12
+ spec.homepage = 'https://github.com/LegionIO/lex-llm-vertex'
13
+ spec.license = 'MIT'
14
+ spec.required_ruby_version = '>= 3.4'
15
+
16
+ spec.metadata['homepage_uri'] = spec.homepage
17
+ spec.metadata['source_code_uri'] = spec.homepage
18
+ spec.metadata['documentation_uri'] = spec.homepage
19
+ spec.metadata['changelog_uri'] = "#{spec.homepage}/blob/main/CHANGELOG.md"
20
+ spec.metadata['bug_tracker_uri'] = "#{spec.homepage}/issues"
21
+ spec.metadata['rubygems_mfa_required'] = 'true'
22
+
23
+ spec.files = `git ls-files -z`.split("\x0").reject { |file| file.match(%r{^(spec|test|features|tmp|coverage)/}) }
24
+ spec.require_paths = ['lib']
25
+
26
+ spec.add_dependency 'legion-json', '>= 1.2.1'
27
+ spec.add_dependency 'legion-logging', '>= 1.3.2'
28
+ spec.add_dependency 'legion-settings', '>= 1.3.14'
29
+ spec.add_dependency 'lex-llm', '>= 0.1.3'
30
+ end
@@ -0,0 +1,609 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/json'
4
+ require 'legion/logging'
5
+ require 'legion/settings'
6
+ require 'legion/extensions/llm'
7
+ require 'securerandom'
8
+
9
+ module Legion
10
+ module Extensions
11
+ module Llm
12
+ module Vertex
13
+ # Google Cloud Vertex AI provider implementation for the Legion::Extensions::Llm contract.
14
+ class Provider < Legion::Extensions::Llm::Provider # rubocop:disable Metrics/ClassLength
15
+ DEFAULT_LOCATION = 'us-central1'
16
+ DEFAULT_PROJECT = 'env://GOOGLE_CLOUD_PROJECT'
17
+ DEFAULT_PUBLISHER = 'google'
18
+
19
+ STATIC_MODELS = [
20
+ { model: 'gemini-2.5-flash', alias: 'gemini-flash', publisher: 'google', model_family: :gemini },
21
+ { model: 'gemini-2.5-pro', alias: 'gemini-pro', publisher: 'google', model_family: :gemini },
22
+ { model: 'gemini-embedding-001', alias: 'gemini-embedding', publisher: 'google',
23
+ model_family: :gemini, usage_type: :embedding },
24
+ { model: 'text-embedding-005', alias: 'text-embedding', publisher: 'google',
25
+ model_family: :gemini, usage_type: :embedding },
26
+ { model: 'claude-sonnet-4-5', alias: 'claude-sonnet', publisher: 'anthropic',
27
+ model_family: :anthropic, api: :raw_predict },
28
+ { model: 'mistral-medium-3', alias: 'mistral-medium', publisher: 'mistralai',
29
+ model_family: :mistral, api: :raw_predict },
30
+ { model: 'llama-4-maverick', alias: 'llama-4-maverick', publisher: 'meta',
31
+ model_family: :meta, api: :raw_predict }
32
+ ].freeze
33
+
34
+ ALIASES = STATIC_MODELS.to_h { |entry| [entry.fetch(:alias), entry.fetch(:model)] }.freeze
35
+ PUBLISHERS = STATIC_MODELS.to_h { |entry| [entry.fetch(:model), entry.fetch(:publisher)] }.freeze
36
+ API_MODES = STATIC_MODELS.to_h { |entry| [entry.fetch(:model), entry.fetch(:api, :generate_content)] }.freeze
37
+ MODEL_FAMILIES = STATIC_MODELS.to_h { |entry| [entry.fetch(:model), entry.fetch(:model_family)] }.freeze
38
+
39
+ class << self
40
+ def slug = 'vertex'
41
+
42
+ def configuration_options
43
+ %i[
44
+ vertex_project
45
+ vertex_location
46
+ vertex_api_base
47
+ vertex_access_token
48
+ vertex_credentials
49
+ vertex_model_aliases
50
+ vertex_discovery_live
51
+ ]
52
+ end
53
+
54
+ def configuration_requirements = []
55
+ def capabilities = Capabilities
56
+
57
+ def resolve_model_id(model_id, config: nil)
58
+ configured_aliases = config.respond_to?(:vertex_model_aliases) ? config.vertex_model_aliases : nil
59
+ aliases = ALIASES.merge((configured_aliases || {}).transform_keys(&:to_s))
60
+ aliases.fetch(model_id.to_s, model_id.to_s)
61
+ end
62
+ end
63
+
64
+ # Capability predicates inferred from Vertex publisher model IDs and API modality.
65
+ module Capabilities
66
+ module_function
67
+
68
+ def chat?(model) = !embeddings?(model)
69
+ def streaming?(model) = chat?(model)
70
+ def vision?(model) = model_id(model).match?(/gemini|claude|mistral|llama/)
71
+ def functions?(model) = chat?(model)
72
+ def embeddings?(model) = model_id(model).match?(/embedding|embed/)
73
+
74
+ def model_id(model)
75
+ return model.fetch('model', model.fetch('id', '')) if model.is_a?(Hash)
76
+
77
+ model.respond_to?(:id) ? model.id.to_s : model.to_s
78
+ end
79
+ end
80
+
81
+ def api_base
82
+ config.vertex_api_base || "https://#{location}-aiplatform.googleapis.com/v1"
83
+ end
84
+
85
+ def headers
86
+ { 'Authorization' => bearer_token, 'Content-Type' => 'application/json; charset=utf-8' }.compact
87
+ end
88
+
89
+ def project = config.vertex_project || ENV.fetch('GOOGLE_CLOUD_PROJECT', DEFAULT_PROJECT)
90
+ def location = config.vertex_location || DEFAULT_LOCATION
91
+ def models_url = publisher_parent
92
+ def completion_url = generate_content_url(model: @model || STATIC_MODELS.first.fetch(:model))
93
+ def stream_url = stream_generate_content_url(model: @model || STATIC_MODELS.first.fetch(:model))
94
+ def count_tokens_url(model:) = "#{publisher_model_path(model)}:countTokens"
95
+ def embedding_url(model:) = "#{publisher_model_path(model)}:predict"
96
+
97
+ def generate_content_url(model:)
98
+ "#{publisher_model_path(model)}:generateContent"
99
+ end
100
+
101
+ def stream_generate_content_url(model:)
102
+ "#{publisher_model_path(model)}:streamGenerateContent?alt=sse"
103
+ end
104
+
105
+ def raw_predict_url(model:, stream: false)
106
+ suffix = stream ? 'streamRawPredict' : 'rawPredict'
107
+ "#{publisher_model_path(model)}:#{suffix}"
108
+ end
109
+
110
+ def discover_offerings(live: false, **filters)
111
+ return static_offerings(**filters) unless live
112
+
113
+ response = connection.get(models_url)
114
+ models = response.body['publisherModels'] || response.body['models'] || []
115
+ models.map { |model| offering_from_live_model(model) }
116
+ end
117
+
118
+ def offering_for(model:, model_family: nil, instance_id: :default, **metadata)
119
+ model_id = model_id(model)
120
+ publisher = metadata.delete(:publisher) || publisher_for(model_id)
121
+ family = model_family || metadata.delete(:model_family) || model_family_for(model_id, publisher)
122
+
123
+ build_offering(
124
+ model: resource_name(model_id, publisher:),
125
+ alias_name: alias_for(model_id),
126
+ model_family: family,
127
+ instance_id: instance_id,
128
+ publisher: publisher,
129
+ usage_type: metadata.delete(:usage_type) || usage_type_for(model_id),
130
+ api: metadata.delete(:api) || api_for(model_id),
131
+ metadata: metadata
132
+ )
133
+ end
134
+
135
+ def health(live: false)
136
+ baseline = {
137
+ provider: :vertex,
138
+ project: project,
139
+ location: location,
140
+ configured: configured?,
141
+ ready: configured?,
142
+ live: live,
143
+ credentials: credential_source
144
+ }
145
+ return baseline.merge(checked: false) unless live
146
+
147
+ connection.get(models_url)
148
+ baseline.merge(checked: true)
149
+ rescue StandardError => e
150
+ baseline.merge(checked: true, ready: false, error: e.class.name, message: e.message)
151
+ end
152
+
153
+ def readiness(live: false)
154
+ health(live:).merge(local: false, remote: true, api_base: api_base, endpoints: endpoint_manifest)
155
+ end
156
+
157
+ def chat(messages, model:, temperature: nil, max_tokens: nil, tools: {}, tool_prefs: nil, params: {})
158
+ model_id = model_id(model)
159
+ @model = model_id
160
+ payload = Utils.deep_merge(chat_payload(messages, model: model_id, temperature:, max_tokens:, tools:,
161
+ tool_prefs:, stream: false), params)
162
+ response = connection.post(chat_url(model_id, stream: false), payload)
163
+ parse_chat_response(response, model: model_id)
164
+ end
165
+
166
+ def stream(messages, model:, temperature: nil, max_tokens: nil, tools: {}, tool_prefs: nil, params: {})
167
+ model_id = model_id(model)
168
+ @model = model_id
169
+ payload = Utils.deep_merge(chat_payload(messages, model: model_id, temperature:, max_tokens:, tools:,
170
+ tool_prefs:, stream: true), params)
171
+ response = connection.post(chat_url(model_id, stream: true), payload)
172
+ chunk = build_chunk(response.body, model: model_id)
173
+ yield chunk if block_given? && chunk.content
174
+ parse_chat_response(response, model: model_id)
175
+ end
176
+
177
+ def count_tokens(messages, model:, params: {})
178
+ model_id = model_id(model)
179
+ unless generate_content_model?(model_id)
180
+ return {
181
+ supported: false,
182
+ provider: :vertex,
183
+ model: resource_name(model_id),
184
+ reason: 'Vertex countTokens is standardized for generateContent publisher models'
185
+ }
186
+ end
187
+
188
+ payload = Utils.deep_merge({ contents: format_messages(messages) }, params)
189
+ response = connection.post(count_tokens_url(model: model_id), payload)
190
+ { input_tokens: response.body['totalTokens'], raw: response.body }
191
+ end
192
+
193
+ def embed(text, model:, dimensions: nil, task_type: nil, title: nil, params: {})
194
+ model_id = model_id(model)
195
+ unless Capabilities.embeddings?(model_id)
196
+ raise NotImplementedError, "Vertex embedding payload for #{model_id} is not standardized"
197
+ end
198
+
199
+ instances = Array(text).map { |item| embedding_instance(item, task_type:, title:) }
200
+ parameters = { outputDimensionality: dimensions }.compact
201
+ payload = Utils.deep_merge({ instances: instances, parameters: parameters }, params)
202
+ response = connection.post(embedding_url(model: model_id), payload)
203
+ parse_embedding_response(response, model: model_id)
204
+ end
205
+
206
+ def complete(messages, tools:, temperature:, model:, params: {}, schema: nil, thinking: nil, tool_prefs: nil,
207
+ &)
208
+ payload = params.dup
209
+ payload[:generationConfig] = Utils.deep_merge(payload[:generationConfig] || {},
210
+ generation_config(temperature, schema, thinking))
211
+ if block_given?
212
+ stream(messages, model:, temperature:, tools:, tool_prefs:, params: payload, &)
213
+ else
214
+ chat(messages, model:, temperature:, tools:, tool_prefs:, params: payload)
215
+ end
216
+ end
217
+
218
+ private
219
+
220
+ def static_offerings(**filters)
221
+ STATIC_MODELS.filter_map do |entry|
222
+ next if filters[:model_family] && entry.fetch(:model_family) != filters[:model_family].to_sym
223
+ next if filters[:publisher] && entry.fetch(:publisher) != filters[:publisher].to_s
224
+
225
+ offering_for(**entry.slice(:model, :model_family, :publisher, :usage_type, :api))
226
+ end
227
+ end
228
+
229
+ def offering_from_live_model(model)
230
+ name = model['name'] || model['publisherModelName'] || model['model'] || model['id']
231
+ publisher = publisher_from_resource(name) || model['publisher'] || DEFAULT_PUBLISHER
232
+ id = name.to_s.split('/').last
233
+ offering_for(model: id, publisher:, metadata: model)
234
+ end
235
+
236
+ def build_offering(model:, model_family:, usage_type:, publisher:, api:, instance_id: :default,
237
+ alias_name: nil, metadata: {})
238
+ Legion::Extensions::Llm::Routing::ModelOffering.new(
239
+ provider_family: :vertex,
240
+ instance_id: instance_id,
241
+ transport: :http,
242
+ tier: :frontier,
243
+ model: model,
244
+ usage_type: usage_type,
245
+ capabilities: default_capabilities(model, api:),
246
+ limits: metadata.delete(:limits) || {},
247
+ metadata: metadata.merge(
248
+ model_family: model_family,
249
+ alias: alias_name,
250
+ publisher: publisher,
251
+ project: project,
252
+ location: location,
253
+ api: api
254
+ ).compact
255
+ )
256
+ end
257
+
258
+ def publisher_parent
259
+ "projects/#{project}/locations/#{location}/publishers/#{DEFAULT_PUBLISHER}/models"
260
+ end
261
+
262
+ def publisher_model_path(model)
263
+ id = model_id(model)
264
+ return id.delete_prefix("#{api_base}/") if id.start_with?('projects/')
265
+
266
+ "projects/#{project}/locations/#{location}/publishers/#{publisher_for(id)}/models/#{id}"
267
+ end
268
+
269
+ def resource_name(model, publisher: nil)
270
+ id = model_id(model)
271
+ return id if id.start_with?('projects/')
272
+
273
+ "projects/#{project}/locations/#{location}/publishers/#{publisher || publisher_for(id)}/models/#{id}"
274
+ end
275
+
276
+ def chat_url(model, stream:)
277
+ return raw_predict_url(model:, stream:) unless generate_content_model?(model)
278
+
279
+ stream ? stream_generate_content_url(model:) : generate_content_url(model:)
280
+ end
281
+
282
+ def chat_payload(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:, stream:)
283
+ if generate_content_model?(model)
284
+ generate_content_payload(messages, temperature:, max_tokens:, tools:, tool_prefs:)
285
+ else
286
+ raw_predict_payload(messages, model:, temperature:, max_tokens:, stream:)
287
+ end
288
+ end
289
+
290
+ def generate_content_payload(messages, temperature:, max_tokens:, tools:, tool_prefs:)
291
+ {
292
+ contents: format_messages(messages.reject { |message| message.role == :system }),
293
+ systemInstruction: system_instruction(messages),
294
+ generationConfig: generation_config(temperature, nil, nil, max_tokens:),
295
+ tools: format_tools(tools),
296
+ toolConfig: tool_config(tool_prefs)
297
+ }.compact
298
+ end
299
+
300
+ def raw_predict_payload(messages, model:, temperature:, max_tokens:, stream:)
301
+ {
302
+ model: model,
303
+ messages: messages.reject { |message| message.role == :system }.map do |message|
304
+ { role: raw_role(message.role), content: content_text(message.content) }
305
+ end,
306
+ temperature: temperature,
307
+ max_tokens: max_tokens,
308
+ stream: stream
309
+ }.compact
310
+ end
311
+
312
+ def generation_config(temperature, schema, thinking, max_tokens: nil)
313
+ {
314
+ temperature: temperature,
315
+ maxOutputTokens: max_tokens,
316
+ responseMimeType: ('application/json' if schema),
317
+ responseSchema: schema_hash(schema),
318
+ thinkingConfig: thinking_config(thinking)
319
+ }.compact
320
+ end
321
+
322
+ def schema_hash(schema)
323
+ return unless schema
324
+
325
+ schema.respond_to?(:to_h) ? schema.to_h.fetch(:schema, schema.to_h) : schema
326
+ end
327
+
328
+ def thinking_config(thinking)
329
+ return nil unless thinking
330
+
331
+ budget = thinking.respond_to?(:budget) ? thinking.budget : nil
332
+ budget ||= thinking[:budget] || thinking['budget'] if thinking.is_a?(Hash)
333
+ { thinkingBudget: budget }.compact
334
+ end
335
+
336
+ def system_instruction(messages)
337
+ parts = messages.select { |message| message.role == :system }
338
+ .flat_map { |message| content_parts(message.content) }
339
+ return nil if parts.empty?
340
+
341
+ { parts: parts }
342
+ end
343
+
344
+ def format_messages(messages)
345
+ messages.map { |message| { role: vertex_role(message.role), parts: message_parts(message) } }
346
+ end
347
+
348
+ def vertex_role(role)
349
+ role == :assistant ? 'model' : 'user'
350
+ end
351
+
352
+ def raw_role(role)
353
+ role == :assistant ? 'assistant' : 'user'
354
+ end
355
+
356
+ def message_parts(message)
357
+ return tool_call_parts(message) if message.tool_call?
358
+ return tool_result_parts(message) if message.tool_result?
359
+
360
+ content_parts(message.content)
361
+ end
362
+
363
+ def content_parts(content)
364
+ return Array(content.value) if content.is_a?(Legion::Extensions::Llm::Content::Raw)
365
+ return [{ text: Legion::JSON.generate(content) }] if content.is_a?(Hash) || content.is_a?(Array)
366
+ return [{ text: content.to_s }] unless content.is_a?(Legion::Extensions::Llm::Content)
367
+
368
+ parts = []
369
+ parts << { text: content.text } if content.text
370
+ content.attachments.each { |attachment| parts << attachment_part(attachment) }
371
+ parts
372
+ end
373
+
374
+ def attachment_part(attachment)
375
+ if attachment.text?
376
+ { text: attachment.for_llm }
377
+ else
378
+ { inlineData: { mimeType: attachment.mime_type, data: attachment.encoded } }
379
+ end
380
+ end
381
+
382
+ def content_text(content)
383
+ return content.text if content.respond_to?(:text)
384
+
385
+ content.to_s
386
+ end
387
+
388
+ def tool_call_parts(message)
389
+ message.tool_calls.values.map do |tool_call|
390
+ { functionCall: { name: tool_call.name, args: tool_call.arguments } }
391
+ end
392
+ end
393
+
394
+ def tool_result_parts(message)
395
+ [{
396
+ functionResponse: {
397
+ name: message.tool_call_id,
398
+ response: { name: message.tool_call_id, content: content_parts(message.content) }
399
+ }
400
+ }]
401
+ end
402
+
403
+ def format_tools(tools)
404
+ return nil if tools.empty?
405
+
406
+ [{
407
+ functionDeclarations: tools.values.map do |tool|
408
+ declaration = { name: tool.name, description: tool.description }
409
+ declaration[:parameters] = tool.params_schema if tool.respond_to?(:params_schema) && tool.params_schema
410
+ declaration
411
+ end
412
+ }]
413
+ end
414
+
415
+ def tool_config(tool_prefs)
416
+ return nil unless tool_prefs
417
+
418
+ choice = tool_prefs[:choice] || tool_prefs['choice']
419
+ return nil unless choice
420
+
421
+ { functionCallingConfig: { mode: choice.to_s } }
422
+ end
423
+
424
+ def parse_chat_response(response, model:)
425
+ body = response.body
426
+ if generate_content_model?(model)
427
+ parse_generate_content_response(body, model:)
428
+ else
429
+ parse_raw_predict_response(body, model:)
430
+ end
431
+ end
432
+
433
+ def parse_generate_content_response(body, model:)
434
+ parts = response_parts(body)
435
+ usage = body['usageMetadata'] || {}
436
+
437
+ Legion::Extensions::Llm::Message.new(
438
+ role: :assistant,
439
+ content: text_content(parts),
440
+ tool_calls: parse_tool_calls(parts),
441
+ input_tokens: usage['promptTokenCount'],
442
+ output_tokens: output_tokens(usage),
443
+ cached_tokens: usage['cachedContentTokenCount'],
444
+ thinking_tokens: usage['thoughtsTokenCount'],
445
+ model_id: body['modelVersion'] || model,
446
+ raw: body
447
+ )
448
+ end
449
+
450
+ def parse_raw_predict_response(body, model:)
451
+ choice = Array(body['choices']).first || {}
452
+ message = choice['message'] || {}
453
+ usage = body['usage'] || {}
454
+
455
+ Legion::Extensions::Llm::Message.new(
456
+ role: :assistant,
457
+ content: message['content'] || choice['text'],
458
+ input_tokens: usage['prompt_tokens'],
459
+ output_tokens: usage['completion_tokens'],
460
+ model_id: body['model'] || model,
461
+ raw: body
462
+ )
463
+ end
464
+
465
+ def build_chunk(body, model:)
466
+ parts = response_parts(body)
467
+ return raw_chunk(body, model:) if parts.empty?
468
+
469
+ usage = body['usageMetadata'] || {}
470
+ Legion::Extensions::Llm::Chunk.new(
471
+ role: :assistant,
472
+ content: text_content(parts),
473
+ input_tokens: usage['promptTokenCount'],
474
+ output_tokens: output_tokens(usage),
475
+ model_id: body['modelVersion'] || model,
476
+ raw: body
477
+ )
478
+ end
479
+
480
+ def raw_chunk(body, model:)
481
+ delta = Array(body['choices']).first&.dig('delta') || Array(body['choices']).first&.dig('message') || {}
482
+ Legion::Extensions::Llm::Chunk.new(role: :assistant, content: delta['content'],
483
+ model_id: body['model'] || model, raw: body)
484
+ end
485
+
486
+ def response_parts(body)
487
+ body.dig('candidates', 0, 'content', 'parts') || []
488
+ end
489
+
490
+ def text_content(parts)
491
+ text = parts.reject { |part| part['thought'] }.filter_map { |part| part['text'] }.join
492
+ text.empty? ? nil : text
493
+ end
494
+
495
+ def output_tokens(usage)
496
+ candidates = usage['candidatesTokenCount'] || 0
497
+ thoughts = usage['thoughtsTokenCount'] || 0
498
+ total = candidates + thoughts
499
+ total.positive? ? total : nil
500
+ end
501
+
502
+ def parse_tool_calls(parts)
503
+ calls = parts.each_with_object({}) do |part, result|
504
+ function_call = part['functionCall']
505
+ next unless function_call
506
+
507
+ id = SecureRandom.uuid
508
+ result[id] = Legion::Extensions::Llm::ToolCall.new(
509
+ id: id,
510
+ name: function_call['name'],
511
+ arguments: function_call['args'] || {}
512
+ )
513
+ end
514
+ calls.empty? ? nil : calls
515
+ end
516
+
517
+ def parse_embedding_response(response, model:)
518
+ predictions = response.body['predictions'] || []
519
+ vectors = predictions.map do |prediction|
520
+ prediction['embeddings']&.fetch('values', nil) || prediction['values']
521
+ end
522
+ vectors = vectors.first if vectors.length == 1
523
+ statistics = predictions.first&.dig('embeddings', 'statistics') || {}
524
+ Legion::Extensions::Llm::Embedding.new(vectors: vectors, model: model,
525
+ input_tokens: statistics['token_count'] || 0)
526
+ end
527
+
528
+ def embedding_instance(text, task_type:, title:)
529
+ { content: text, task_type: task_type, title: title }.compact
530
+ end
531
+
532
+ def default_capabilities(model, api:)
533
+ return %i[embedding] if Capabilities.embeddings?(model)
534
+
535
+ capabilities = %i[chat]
536
+ capabilities << :streaming if %i[generate_content raw_predict].include?(api)
537
+ capabilities << :vision if Capabilities.vision?(model)
538
+ capabilities << :functions if generate_content_model?(model)
539
+ capabilities
540
+ end
541
+
542
+ def bearer_token
543
+ token = config.vertex_access_token
544
+ token ? "Bearer #{token}" : nil
545
+ end
546
+
547
+ def credential_source
548
+ return :access_token if config.vertex_access_token
549
+ return :credentials_file if config.vertex_credentials
550
+
551
+ :google_application_default_credentials
552
+ end
553
+
554
+ def model_id(model)
555
+ value = model.respond_to?(:id) ? model.id : model
556
+ self.class.resolve_model_id(value, config:)
557
+ end
558
+
559
+ def publisher_for(model)
560
+ id = model_id(model)
561
+ return publisher_from_resource(id) if id.start_with?('projects/')
562
+
563
+ PUBLISHERS.fetch(id, DEFAULT_PUBLISHER)
564
+ end
565
+
566
+ def publisher_from_resource(resource)
567
+ match = resource.to_s.match(%r{/publishers/([^/]+)/models/})
568
+ match&.[](1)
569
+ end
570
+
571
+ def api_for(model)
572
+ id = model_id(model)
573
+ return API_MODES[id] if API_MODES.key?(id)
574
+ return :raw_predict if publisher_for(id) != DEFAULT_PUBLISHER && !Capabilities.embeddings?(id)
575
+
576
+ :generate_content
577
+ end
578
+
579
+ def generate_content_model?(model)
580
+ api_for(model) == :generate_content
581
+ end
582
+
583
+ def usage_type_for(model)
584
+ Capabilities.embeddings?(model) ? :embedding : :inference
585
+ end
586
+
587
+ def model_family_for(model, publisher = nil)
588
+ id = model_id(model)
589
+ return MODEL_FAMILIES[id] if MODEL_FAMILIES.key?(id)
590
+
591
+ normalized_family(publisher || publisher_for(id))
592
+ end
593
+
594
+ def normalized_family(provider)
595
+ value = provider.to_s.downcase.tr('-', '_')
596
+ return :gemini if value == 'google'
597
+ return :mistral if value == 'mistralai'
598
+
599
+ value.to_sym
600
+ end
601
+
602
+ def alias_for(model)
603
+ ALIASES.key(model_id(model))
604
+ end
605
+ end
606
+ end
607
+ end
608
+ end
609
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Legion
4
+ module Extensions
5
+ module Llm
6
+ module Vertex
7
+ VERSION = '0.1.0'
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'legion/extensions/llm'
4
+ require 'legion/extensions/llm/vertex/provider'
5
+ require 'legion/extensions/llm/vertex/version'
6
+
7
+ module Legion
8
+ module Extensions
9
+ module Llm
10
+ # Google Cloud Vertex AI provider extension namespace.
11
+ module Vertex
12
+ extend ::Legion::Extensions::Core if ::Legion::Extensions.const_defined?(:Core, false)
13
+
14
+ PROVIDER_FAMILY = :vertex
15
+
16
+ def self.default_settings
17
+ ::Legion::Extensions::Llm.provider_settings(
18
+ family: PROVIDER_FAMILY,
19
+ discovery: { enabled: true, live: false, locations: %w[us-central1 us-east5 europe-west4] },
20
+ instance: {
21
+ endpoint: 'https://us-central1-aiplatform.googleapis.com/v1',
22
+ project: 'env://GOOGLE_CLOUD_PROJECT',
23
+ location: 'us-central1',
24
+ tier: :frontier,
25
+ transport: :http,
26
+ credentials: {
27
+ provider: 'google-application-default-credentials',
28
+ access_token: 'env://VERTEX_ACCESS_TOKEN',
29
+ credentials_file: 'env://GOOGLE_APPLICATION_CREDENTIALS'
30
+ },
31
+ usage: { inference: true, embedding: true, token_counting: true },
32
+ limits: { concurrency: 4 }
33
+ }
34
+ )
35
+ end
36
+
37
+ def self.provider_class
38
+ Provider
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+
45
+ Legion::Extensions::Llm::Provider.register(Legion::Extensions::Llm::Vertex::PROVIDER_FAMILY,
46
+ Legion::Extensions::Llm::Vertex::Provider)
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: lex-llm-vertex
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - LegionIO
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: legion-json
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: 1.2.1
19
+ type: :runtime
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: 1.2.1
26
+ - !ruby/object:Gem::Dependency
27
+ name: legion-logging
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 1.3.2
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: 1.3.2
40
+ - !ruby/object:Gem::Dependency
41
+ name: legion-settings
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.3.14
47
+ type: :runtime
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: 1.3.14
54
+ - !ruby/object:Gem::Dependency
55
+ name: lex-llm
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: 0.1.3
61
+ type: :runtime
62
+ prerelease: false
63
+ version_requirements: !ruby/object:Gem::Requirement
64
+ requirements:
65
+ - - ">="
66
+ - !ruby/object:Gem::Version
67
+ version: 0.1.3
68
+ description: Google Cloud Vertex AI provider integration for the LegionIO LLM routing
69
+ framework.
70
+ email:
71
+ - matthewdiverson@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".github/CODEOWNERS"
77
+ - ".github/dependabot.yml"
78
+ - ".github/workflows/ci.yml"
79
+ - ".gitignore"
80
+ - ".rubocop.yml"
81
+ - CHANGELOG.md
82
+ - Gemfile
83
+ - LICENSE
84
+ - README.md
85
+ - lex-llm-vertex.gemspec
86
+ - lib/legion/extensions/llm/vertex.rb
87
+ - lib/legion/extensions/llm/vertex/provider.rb
88
+ - lib/legion/extensions/llm/vertex/version.rb
89
+ homepage: https://github.com/LegionIO/lex-llm-vertex
90
+ licenses:
91
+ - MIT
92
+ metadata:
93
+ homepage_uri: https://github.com/LegionIO/lex-llm-vertex
94
+ source_code_uri: https://github.com/LegionIO/lex-llm-vertex
95
+ documentation_uri: https://github.com/LegionIO/lex-llm-vertex
96
+ changelog_uri: https://github.com/LegionIO/lex-llm-vertex/blob/main/CHANGELOG.md
97
+ bug_tracker_uri: https://github.com/LegionIO/lex-llm-vertex/issues
98
+ rubygems_mfa_required: 'true'
99
+ rdoc_options: []
100
+ require_paths:
101
+ - lib
102
+ required_ruby_version: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - ">="
105
+ - !ruby/object:Gem::Version
106
+ version: '3.4'
107
+ required_rubygems_version: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - ">="
110
+ - !ruby/object:Gem::Version
111
+ version: '0'
112
+ requirements: []
113
+ rubygems_version: 3.6.9
114
+ specification_version: 4
115
+ summary: LegionIO LLM Google Cloud Vertex AI provider extension
116
+ test_files: []