lex-llm-vertex 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/CODEOWNERS +1 -0
- data/.github/dependabot.yml +10 -0
- data/.github/workflows/ci.yml +16 -0
- data/.gitignore +12 -0
- data/.rubocop.yml +32 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +20 -0
- data/LICENSE +21 -0
- data/README.md +79 -0
- data/lex-llm-vertex.gemspec +30 -0
- data/lib/legion/extensions/llm/vertex/provider.rb +609 -0
- data/lib/legion/extensions/llm/vertex/version.rb +11 -0
- data/lib/legion/extensions/llm/vertex.rb +46 -0
- metadata +116 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: b1ff87c604774c449138054bd3675a26da6696eaa43993c86b6f567e8861b628
|
|
4
|
+
data.tar.gz: f87648ca25d8317bbacbe84597b6c3abc495ea5f5ede11b2d363b74db04cfdd0
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: df0217a1da4c3cea8adf36a0bf3fe27fa6932b672b8053f7aab01e5b360a6e6601548244e6f804817a5319c210e9ec05913b39023a260d06ba66f6b9dd9104cd
|
|
7
|
+
data.tar.gz: 2f17a80cdb3e2553ec857ad68d359c9669a93203c9b0b116c6dd547bc22350ad7229b5b201bc3509aba1e747ff20d8bb982d0899a36fdfac9323706d89cc9155
|
data/.github/CODEOWNERS
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
* @LegionIO/maintainers
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
branches: [main]
|
|
5
|
+
pull_request:
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
ci:
|
|
9
|
+
uses: LegionIO/.github/.github/workflows/ci.yml@main
|
|
10
|
+
|
|
11
|
+
release:
|
|
12
|
+
needs: ci
|
|
13
|
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
14
|
+
uses: LegionIO/.github/.github/workflows/release.yml@main
|
|
15
|
+
secrets:
|
|
16
|
+
rubygems-api-key: ${{ secrets.RUBYGEMS_API_KEY }}
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
plugins:
|
|
2
|
+
- rubocop-performance
|
|
3
|
+
- rubocop-rake
|
|
4
|
+
- rubocop-rspec
|
|
5
|
+
|
|
6
|
+
AllCops:
|
|
7
|
+
NewCops: enable
|
|
8
|
+
TargetRubyVersion: 3.4
|
|
9
|
+
SuggestExtensions: false
|
|
10
|
+
|
|
11
|
+
Metrics/BlockLength:
|
|
12
|
+
Exclude:
|
|
13
|
+
- "*.gemspec"
|
|
14
|
+
- spec/**/*
|
|
15
|
+
Metrics/MethodLength:
|
|
16
|
+
Enabled: false
|
|
17
|
+
Metrics/ParameterLists:
|
|
18
|
+
Enabled: false
|
|
19
|
+
Metrics/AbcSize:
|
|
20
|
+
Enabled: false
|
|
21
|
+
Metrics/CyclomaticComplexity:
|
|
22
|
+
Enabled: false
|
|
23
|
+
Metrics/PerceivedComplexity:
|
|
24
|
+
Enabled: false
|
|
25
|
+
RSpec/MultipleExpectations:
|
|
26
|
+
Enabled: false
|
|
27
|
+
RSpec/ExampleLength:
|
|
28
|
+
Enabled: false
|
|
29
|
+
RSpec/LeakyConstantDeclaration:
|
|
30
|
+
Enabled: false
|
|
31
|
+
RSpec/InstanceVariable:
|
|
32
|
+
Enabled: false
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.1.0 - 2026-04-28
|
|
4
|
+
|
|
5
|
+
- Initial Legion::Extensions::Llm Vertex AI provider extension scaffold.
|
|
6
|
+
- Add offline provider defaults, project/location-aware model offering mapping, Vertex publisher model endpoint construction, chat, streaming, embeddings, token-counting metadata, health, and live discovery entrypoints.
|
|
7
|
+
- Add README, gemspec, CI, and stubbed unit specs for Vertex AI routing behavior.
|
data/Gemfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
source 'https://rubygems.org'
|
|
4
|
+
|
|
5
|
+
group :test do
|
|
6
|
+
llm_base_path = ENV.fetch('LEX_LLM_PATH', File.expand_path('../lex-llm', __dir__))
|
|
7
|
+
gem 'lex-llm', path: llm_base_path if File.directory?(llm_base_path)
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
gemspec
|
|
11
|
+
|
|
12
|
+
group :development do
|
|
13
|
+
gem 'bundler', '>= 2.0'
|
|
14
|
+
gem 'rake', '>= 13.0'
|
|
15
|
+
gem 'rspec', '~> 3.12'
|
|
16
|
+
gem 'rubocop', '>= 1.0'
|
|
17
|
+
gem 'rubocop-performance'
|
|
18
|
+
gem 'rubocop-rake', '>= 0.6'
|
|
19
|
+
gem 'rubocop-rspec'
|
|
20
|
+
end
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 LegionIO
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# lex-llm-vertex
|
|
2
|
+
|
|
3
|
+
Google Cloud Vertex AI provider extension for `Legion::Extensions::Llm`.
|
|
4
|
+
|
|
5
|
+
This gem adds a hosted Vertex AI provider surface for Legion LLM routing without depending on the old `legion-llm` gem. It keeps discovery offline by default, preserves full Vertex publisher model resource names for routing, and exposes project/location instance metadata for multi-region provider fleets.
|
|
6
|
+
|
|
7
|
+
## Install
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
gem 'lex-llm-vertex'
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
## Configuration
|
|
14
|
+
|
|
15
|
+
The provider registers the `:vertex` provider family with `Legion::Extensions::Llm::Provider`.
|
|
16
|
+
|
|
17
|
+
```ruby
|
|
18
|
+
require 'legion/extensions/llm/vertex'
|
|
19
|
+
|
|
20
|
+
Legion::Extensions::Llm.configure do |config|
|
|
21
|
+
config.vertex_project = ENV['GOOGLE_CLOUD_PROJECT']
|
|
22
|
+
config.vertex_location = ENV.fetch('VERTEX_LOCATION', 'us-central1')
|
|
23
|
+
config.vertex_access_token = ENV['VERTEX_ACCESS_TOKEN']
|
|
24
|
+
end
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
`vertex_access_token` is optional for local routing metadata and tests. For live calls, provide a Google Cloud access token through configuration or use Application Default Credentials in the process that owns HTTP authentication.
|
|
28
|
+
|
|
29
|
+
Default settings expose `env://` references and keep live discovery disabled:
|
|
30
|
+
|
|
31
|
+
```ruby
|
|
32
|
+
Legion::Extensions::Llm::Vertex.default_settings
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## Provider Surface
|
|
36
|
+
|
|
37
|
+
```ruby
|
|
38
|
+
provider = Legion::Extensions::Llm::Vertex::Provider.new(Legion::Extensions::Llm.config)
|
|
39
|
+
|
|
40
|
+
provider.discover_offerings(live: false)
|
|
41
|
+
provider.offering_for(model: 'gemini-2.5-flash')
|
|
42
|
+
provider.health(live: false)
|
|
43
|
+
provider.chat(messages, model: model)
|
|
44
|
+
provider.stream(messages, model: model) { |chunk| chunk.content }
|
|
45
|
+
provider.embed('hello', model: 'gemini-embedding-001')
|
|
46
|
+
provider.count_tokens(messages, model: model)
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
`discover_offerings(live: false)` returns a conservative static catalog for routing defaults and unit tests. `discover_offerings(live: true)` calls the Vertex publisher models listing endpoint and maps returned model data into `Legion::Extensions::Llm::Routing::ModelOffering` records.
|
|
50
|
+
|
|
51
|
+
## Model Offerings
|
|
52
|
+
|
|
53
|
+
Every offering uses:
|
|
54
|
+
|
|
55
|
+
- `provider_family: :vertex`
|
|
56
|
+
- `transport: :http`
|
|
57
|
+
- the full Vertex publisher model resource name as `model`
|
|
58
|
+
- `metadata[:model_family]` inferred from the publisher/model or accepted from the caller
|
|
59
|
+
- `metadata[:project]` and `metadata[:location]` copied from the provider instance
|
|
60
|
+
|
|
61
|
+
Known aliases are intentionally small and configurable. For example, `gemini-flash` resolves to `gemini-2.5-flash`, while the offering preserves `projects/{project}/locations/{location}/publishers/google/models/gemini-2.5-flash`.
|
|
62
|
+
|
|
63
|
+
## API Contract
|
|
64
|
+
|
|
65
|
+
The implementation is intentionally limited to Vertex AI REST surfaces documented by Google Cloud:
|
|
66
|
+
|
|
67
|
+
- `generateContent` and `streamGenerateContent` for Gemini publisher models
|
|
68
|
+
- `countTokens` for Gemini-style publisher models
|
|
69
|
+
- `predict` for documented text embedding models
|
|
70
|
+
- `rawPredict` and `streamRawPredict` endpoint builders for partner publisher models such as Mistral, Anthropic, and Meta
|
|
71
|
+
|
|
72
|
+
Provider-specific request bodies are not guessed. Partner raw-predict chat requests use the message shape documented for those partner model endpoints; embeddings are only implemented for documented Vertex text embedding models.
|
|
73
|
+
|
|
74
|
+
Google Cloud references:
|
|
75
|
+
|
|
76
|
+
- [Vertex AI GenAI REST API](https://cloud.google.com/vertex-ai/generative-ai/docs/reference/rest)
|
|
77
|
+
- [Generate content with the Gemini API in Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/inference)
|
|
78
|
+
- [Text embeddings API](https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api)
|
|
79
|
+
- [Mistral AI models on Vertex AI](https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/mistral)
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/legion/extensions/llm/vertex/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'lex-llm-vertex'
|
|
7
|
+
spec.version = Legion::Extensions::Llm::Vertex::VERSION
|
|
8
|
+
spec.authors = ['LegionIO']
|
|
9
|
+
spec.email = ['matthewdiverson@gmail.com']
|
|
10
|
+
spec.summary = 'LegionIO LLM Google Cloud Vertex AI provider extension'
|
|
11
|
+
spec.description = 'Google Cloud Vertex AI provider integration for the LegionIO LLM routing framework.'
|
|
12
|
+
spec.homepage = 'https://github.com/LegionIO/lex-llm-vertex'
|
|
13
|
+
spec.license = 'MIT'
|
|
14
|
+
spec.required_ruby_version = '>= 3.4'
|
|
15
|
+
|
|
16
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
|
17
|
+
spec.metadata['source_code_uri'] = spec.homepage
|
|
18
|
+
spec.metadata['documentation_uri'] = spec.homepage
|
|
19
|
+
spec.metadata['changelog_uri'] = "#{spec.homepage}/blob/main/CHANGELOG.md"
|
|
20
|
+
spec.metadata['bug_tracker_uri'] = "#{spec.homepage}/issues"
|
|
21
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
|
22
|
+
|
|
23
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |file| file.match(%r{^(spec|test|features|tmp|coverage)/}) }
|
|
24
|
+
spec.require_paths = ['lib']
|
|
25
|
+
|
|
26
|
+
spec.add_dependency 'legion-json', '>= 1.2.1'
|
|
27
|
+
spec.add_dependency 'legion-logging', '>= 1.3.2'
|
|
28
|
+
spec.add_dependency 'legion-settings', '>= 1.3.14'
|
|
29
|
+
spec.add_dependency 'lex-llm', '>= 0.1.3'
|
|
30
|
+
end
|
|
@@ -0,0 +1,609 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/json'
|
|
4
|
+
require 'legion/logging'
|
|
5
|
+
require 'legion/settings'
|
|
6
|
+
require 'legion/extensions/llm'
|
|
7
|
+
require 'securerandom'
|
|
8
|
+
|
|
9
|
+
module Legion
|
|
10
|
+
module Extensions
|
|
11
|
+
module Llm
|
|
12
|
+
module Vertex
|
|
13
|
+
# Google Cloud Vertex AI provider implementation for the Legion::Extensions::Llm contract.
|
|
14
|
+
class Provider < Legion::Extensions::Llm::Provider # rubocop:disable Metrics/ClassLength
|
|
15
|
+
DEFAULT_LOCATION = 'us-central1'
|
|
16
|
+
DEFAULT_PROJECT = 'env://GOOGLE_CLOUD_PROJECT'
|
|
17
|
+
DEFAULT_PUBLISHER = 'google'
|
|
18
|
+
|
|
19
|
+
STATIC_MODELS = [
|
|
20
|
+
{ model: 'gemini-2.5-flash', alias: 'gemini-flash', publisher: 'google', model_family: :gemini },
|
|
21
|
+
{ model: 'gemini-2.5-pro', alias: 'gemini-pro', publisher: 'google', model_family: :gemini },
|
|
22
|
+
{ model: 'gemini-embedding-001', alias: 'gemini-embedding', publisher: 'google',
|
|
23
|
+
model_family: :gemini, usage_type: :embedding },
|
|
24
|
+
{ model: 'text-embedding-005', alias: 'text-embedding', publisher: 'google',
|
|
25
|
+
model_family: :gemini, usage_type: :embedding },
|
|
26
|
+
{ model: 'claude-sonnet-4-5', alias: 'claude-sonnet', publisher: 'anthropic',
|
|
27
|
+
model_family: :anthropic, api: :raw_predict },
|
|
28
|
+
{ model: 'mistral-medium-3', alias: 'mistral-medium', publisher: 'mistralai',
|
|
29
|
+
model_family: :mistral, api: :raw_predict },
|
|
30
|
+
{ model: 'llama-4-maverick', alias: 'llama-4-maverick', publisher: 'meta',
|
|
31
|
+
model_family: :meta, api: :raw_predict }
|
|
32
|
+
].freeze
|
|
33
|
+
|
|
34
|
+
ALIASES = STATIC_MODELS.to_h { |entry| [entry.fetch(:alias), entry.fetch(:model)] }.freeze
|
|
35
|
+
PUBLISHERS = STATIC_MODELS.to_h { |entry| [entry.fetch(:model), entry.fetch(:publisher)] }.freeze
|
|
36
|
+
API_MODES = STATIC_MODELS.to_h { |entry| [entry.fetch(:model), entry.fetch(:api, :generate_content)] }.freeze
|
|
37
|
+
MODEL_FAMILIES = STATIC_MODELS.to_h { |entry| [entry.fetch(:model), entry.fetch(:model_family)] }.freeze
|
|
38
|
+
|
|
39
|
+
class << self
|
|
40
|
+
def slug = 'vertex'
|
|
41
|
+
|
|
42
|
+
def configuration_options
|
|
43
|
+
%i[
|
|
44
|
+
vertex_project
|
|
45
|
+
vertex_location
|
|
46
|
+
vertex_api_base
|
|
47
|
+
vertex_access_token
|
|
48
|
+
vertex_credentials
|
|
49
|
+
vertex_model_aliases
|
|
50
|
+
vertex_discovery_live
|
|
51
|
+
]
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def configuration_requirements = []
|
|
55
|
+
def capabilities = Capabilities
|
|
56
|
+
|
|
57
|
+
def resolve_model_id(model_id, config: nil)
|
|
58
|
+
configured_aliases = config.respond_to?(:vertex_model_aliases) ? config.vertex_model_aliases : nil
|
|
59
|
+
aliases = ALIASES.merge((configured_aliases || {}).transform_keys(&:to_s))
|
|
60
|
+
aliases.fetch(model_id.to_s, model_id.to_s)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# Capability predicates inferred from Vertex publisher model IDs and API modality.
|
|
65
|
+
module Capabilities
|
|
66
|
+
module_function
|
|
67
|
+
|
|
68
|
+
def chat?(model) = !embeddings?(model)
|
|
69
|
+
def streaming?(model) = chat?(model)
|
|
70
|
+
def vision?(model) = model_id(model).match?(/gemini|claude|mistral|llama/)
|
|
71
|
+
def functions?(model) = chat?(model)
|
|
72
|
+
def embeddings?(model) = model_id(model).match?(/embedding|embed/)
|
|
73
|
+
|
|
74
|
+
def model_id(model)
|
|
75
|
+
return model.fetch('model', model.fetch('id', '')) if model.is_a?(Hash)
|
|
76
|
+
|
|
77
|
+
model.respond_to?(:id) ? model.id.to_s : model.to_s
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def api_base
|
|
82
|
+
config.vertex_api_base || "https://#{location}-aiplatform.googleapis.com/v1"
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def headers
|
|
86
|
+
{ 'Authorization' => bearer_token, 'Content-Type' => 'application/json; charset=utf-8' }.compact
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def project = config.vertex_project || ENV.fetch('GOOGLE_CLOUD_PROJECT', DEFAULT_PROJECT)
|
|
90
|
+
def location = config.vertex_location || DEFAULT_LOCATION
|
|
91
|
+
def models_url = publisher_parent
|
|
92
|
+
def completion_url = generate_content_url(model: @model || STATIC_MODELS.first.fetch(:model))
|
|
93
|
+
def stream_url = stream_generate_content_url(model: @model || STATIC_MODELS.first.fetch(:model))
|
|
94
|
+
def count_tokens_url(model:) = "#{publisher_model_path(model)}:countTokens"
|
|
95
|
+
def embedding_url(model:) = "#{publisher_model_path(model)}:predict"
|
|
96
|
+
|
|
97
|
+
def generate_content_url(model:)
|
|
98
|
+
"#{publisher_model_path(model)}:generateContent"
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def stream_generate_content_url(model:)
|
|
102
|
+
"#{publisher_model_path(model)}:streamGenerateContent?alt=sse"
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def raw_predict_url(model:, stream: false)
|
|
106
|
+
suffix = stream ? 'streamRawPredict' : 'rawPredict'
|
|
107
|
+
"#{publisher_model_path(model)}:#{suffix}"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def discover_offerings(live: false, **filters)
|
|
111
|
+
return static_offerings(**filters) unless live
|
|
112
|
+
|
|
113
|
+
response = connection.get(models_url)
|
|
114
|
+
models = response.body['publisherModels'] || response.body['models'] || []
|
|
115
|
+
models.map { |model| offering_from_live_model(model) }
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
def offering_for(model:, model_family: nil, instance_id: :default, **metadata)
|
|
119
|
+
model_id = model_id(model)
|
|
120
|
+
publisher = metadata.delete(:publisher) || publisher_for(model_id)
|
|
121
|
+
family = model_family || metadata.delete(:model_family) || model_family_for(model_id, publisher)
|
|
122
|
+
|
|
123
|
+
build_offering(
|
|
124
|
+
model: resource_name(model_id, publisher:),
|
|
125
|
+
alias_name: alias_for(model_id),
|
|
126
|
+
model_family: family,
|
|
127
|
+
instance_id: instance_id,
|
|
128
|
+
publisher: publisher,
|
|
129
|
+
usage_type: metadata.delete(:usage_type) || usage_type_for(model_id),
|
|
130
|
+
api: metadata.delete(:api) || api_for(model_id),
|
|
131
|
+
metadata: metadata
|
|
132
|
+
)
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
def health(live: false)
|
|
136
|
+
baseline = {
|
|
137
|
+
provider: :vertex,
|
|
138
|
+
project: project,
|
|
139
|
+
location: location,
|
|
140
|
+
configured: configured?,
|
|
141
|
+
ready: configured?,
|
|
142
|
+
live: live,
|
|
143
|
+
credentials: credential_source
|
|
144
|
+
}
|
|
145
|
+
return baseline.merge(checked: false) unless live
|
|
146
|
+
|
|
147
|
+
connection.get(models_url)
|
|
148
|
+
baseline.merge(checked: true)
|
|
149
|
+
rescue StandardError => e
|
|
150
|
+
baseline.merge(checked: true, ready: false, error: e.class.name, message: e.message)
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
def readiness(live: false)
|
|
154
|
+
health(live:).merge(local: false, remote: true, api_base: api_base, endpoints: endpoint_manifest)
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def chat(messages, model:, temperature: nil, max_tokens: nil, tools: {}, tool_prefs: nil, params: {})
|
|
158
|
+
model_id = model_id(model)
|
|
159
|
+
@model = model_id
|
|
160
|
+
payload = Utils.deep_merge(chat_payload(messages, model: model_id, temperature:, max_tokens:, tools:,
|
|
161
|
+
tool_prefs:, stream: false), params)
|
|
162
|
+
response = connection.post(chat_url(model_id, stream: false), payload)
|
|
163
|
+
parse_chat_response(response, model: model_id)
|
|
164
|
+
end
|
|
165
|
+
|
|
166
|
+
def stream(messages, model:, temperature: nil, max_tokens: nil, tools: {}, tool_prefs: nil, params: {})
|
|
167
|
+
model_id = model_id(model)
|
|
168
|
+
@model = model_id
|
|
169
|
+
payload = Utils.deep_merge(chat_payload(messages, model: model_id, temperature:, max_tokens:, tools:,
|
|
170
|
+
tool_prefs:, stream: true), params)
|
|
171
|
+
response = connection.post(chat_url(model_id, stream: true), payload)
|
|
172
|
+
chunk = build_chunk(response.body, model: model_id)
|
|
173
|
+
yield chunk if block_given? && chunk.content
|
|
174
|
+
parse_chat_response(response, model: model_id)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def count_tokens(messages, model:, params: {})
|
|
178
|
+
model_id = model_id(model)
|
|
179
|
+
unless generate_content_model?(model_id)
|
|
180
|
+
return {
|
|
181
|
+
supported: false,
|
|
182
|
+
provider: :vertex,
|
|
183
|
+
model: resource_name(model_id),
|
|
184
|
+
reason: 'Vertex countTokens is standardized for generateContent publisher models'
|
|
185
|
+
}
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
payload = Utils.deep_merge({ contents: format_messages(messages) }, params)
|
|
189
|
+
response = connection.post(count_tokens_url(model: model_id), payload)
|
|
190
|
+
{ input_tokens: response.body['totalTokens'], raw: response.body }
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
def embed(text, model:, dimensions: nil, task_type: nil, title: nil, params: {})
|
|
194
|
+
model_id = model_id(model)
|
|
195
|
+
unless Capabilities.embeddings?(model_id)
|
|
196
|
+
raise NotImplementedError, "Vertex embedding payload for #{model_id} is not standardized"
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
instances = Array(text).map { |item| embedding_instance(item, task_type:, title:) }
|
|
200
|
+
parameters = { outputDimensionality: dimensions }.compact
|
|
201
|
+
payload = Utils.deep_merge({ instances: instances, parameters: parameters }, params)
|
|
202
|
+
response = connection.post(embedding_url(model: model_id), payload)
|
|
203
|
+
parse_embedding_response(response, model: model_id)
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def complete(messages, tools:, temperature:, model:, params: {}, schema: nil, thinking: nil, tool_prefs: nil,
|
|
207
|
+
&)
|
|
208
|
+
payload = params.dup
|
|
209
|
+
payload[:generationConfig] = Utils.deep_merge(payload[:generationConfig] || {},
|
|
210
|
+
generation_config(temperature, schema, thinking))
|
|
211
|
+
if block_given?
|
|
212
|
+
stream(messages, model:, temperature:, tools:, tool_prefs:, params: payload, &)
|
|
213
|
+
else
|
|
214
|
+
chat(messages, model:, temperature:, tools:, tool_prefs:, params: payload)
|
|
215
|
+
end
|
|
216
|
+
end
|
|
217
|
+
|
|
218
|
+
private
|
|
219
|
+
|
|
220
|
+
def static_offerings(**filters)
|
|
221
|
+
STATIC_MODELS.filter_map do |entry|
|
|
222
|
+
next if filters[:model_family] && entry.fetch(:model_family) != filters[:model_family].to_sym
|
|
223
|
+
next if filters[:publisher] && entry.fetch(:publisher) != filters[:publisher].to_s
|
|
224
|
+
|
|
225
|
+
offering_for(**entry.slice(:model, :model_family, :publisher, :usage_type, :api))
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
def offering_from_live_model(model)
|
|
230
|
+
name = model['name'] || model['publisherModelName'] || model['model'] || model['id']
|
|
231
|
+
publisher = publisher_from_resource(name) || model['publisher'] || DEFAULT_PUBLISHER
|
|
232
|
+
id = name.to_s.split('/').last
|
|
233
|
+
offering_for(model: id, publisher:, metadata: model)
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def build_offering(model:, model_family:, usage_type:, publisher:, api:, instance_id: :default,
|
|
237
|
+
alias_name: nil, metadata: {})
|
|
238
|
+
Legion::Extensions::Llm::Routing::ModelOffering.new(
|
|
239
|
+
provider_family: :vertex,
|
|
240
|
+
instance_id: instance_id,
|
|
241
|
+
transport: :http,
|
|
242
|
+
tier: :frontier,
|
|
243
|
+
model: model,
|
|
244
|
+
usage_type: usage_type,
|
|
245
|
+
capabilities: default_capabilities(model, api:),
|
|
246
|
+
limits: metadata.delete(:limits) || {},
|
|
247
|
+
metadata: metadata.merge(
|
|
248
|
+
model_family: model_family,
|
|
249
|
+
alias: alias_name,
|
|
250
|
+
publisher: publisher,
|
|
251
|
+
project: project,
|
|
252
|
+
location: location,
|
|
253
|
+
api: api
|
|
254
|
+
).compact
|
|
255
|
+
)
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def publisher_parent
|
|
259
|
+
"projects/#{project}/locations/#{location}/publishers/#{DEFAULT_PUBLISHER}/models"
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def publisher_model_path(model)
|
|
263
|
+
id = model_id(model)
|
|
264
|
+
return id.delete_prefix("#{api_base}/") if id.start_with?('projects/')
|
|
265
|
+
|
|
266
|
+
"projects/#{project}/locations/#{location}/publishers/#{publisher_for(id)}/models/#{id}"
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def resource_name(model, publisher: nil)
|
|
270
|
+
id = model_id(model)
|
|
271
|
+
return id if id.start_with?('projects/')
|
|
272
|
+
|
|
273
|
+
"projects/#{project}/locations/#{location}/publishers/#{publisher || publisher_for(id)}/models/#{id}"
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def chat_url(model, stream:)
|
|
277
|
+
return raw_predict_url(model:, stream:) unless generate_content_model?(model)
|
|
278
|
+
|
|
279
|
+
stream ? stream_generate_content_url(model:) : generate_content_url(model:)
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
def chat_payload(messages, model:, temperature:, max_tokens:, tools:, tool_prefs:, stream:)
|
|
283
|
+
if generate_content_model?(model)
|
|
284
|
+
generate_content_payload(messages, temperature:, max_tokens:, tools:, tool_prefs:)
|
|
285
|
+
else
|
|
286
|
+
raw_predict_payload(messages, model:, temperature:, max_tokens:, stream:)
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def generate_content_payload(messages, temperature:, max_tokens:, tools:, tool_prefs:)
|
|
291
|
+
{
|
|
292
|
+
contents: format_messages(messages.reject { |message| message.role == :system }),
|
|
293
|
+
systemInstruction: system_instruction(messages),
|
|
294
|
+
generationConfig: generation_config(temperature, nil, nil, max_tokens:),
|
|
295
|
+
tools: format_tools(tools),
|
|
296
|
+
toolConfig: tool_config(tool_prefs)
|
|
297
|
+
}.compact
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
def raw_predict_payload(messages, model:, temperature:, max_tokens:, stream:)
|
|
301
|
+
{
|
|
302
|
+
model: model,
|
|
303
|
+
messages: messages.reject { |message| message.role == :system }.map do |message|
|
|
304
|
+
{ role: raw_role(message.role), content: content_text(message.content) }
|
|
305
|
+
end,
|
|
306
|
+
temperature: temperature,
|
|
307
|
+
max_tokens: max_tokens,
|
|
308
|
+
stream: stream
|
|
309
|
+
}.compact
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
def generation_config(temperature, schema, thinking, max_tokens: nil)
|
|
313
|
+
{
|
|
314
|
+
temperature: temperature,
|
|
315
|
+
maxOutputTokens: max_tokens,
|
|
316
|
+
responseMimeType: ('application/json' if schema),
|
|
317
|
+
responseSchema: schema_hash(schema),
|
|
318
|
+
thinkingConfig: thinking_config(thinking)
|
|
319
|
+
}.compact
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
def schema_hash(schema)
|
|
323
|
+
return unless schema
|
|
324
|
+
|
|
325
|
+
schema.respond_to?(:to_h) ? schema.to_h.fetch(:schema, schema.to_h) : schema
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def thinking_config(thinking)
|
|
329
|
+
return nil unless thinking
|
|
330
|
+
|
|
331
|
+
budget = thinking.respond_to?(:budget) ? thinking.budget : nil
|
|
332
|
+
budget ||= thinking[:budget] || thinking['budget'] if thinking.is_a?(Hash)
|
|
333
|
+
{ thinkingBudget: budget }.compact
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
def system_instruction(messages)
|
|
337
|
+
parts = messages.select { |message| message.role == :system }
|
|
338
|
+
.flat_map { |message| content_parts(message.content) }
|
|
339
|
+
return nil if parts.empty?
|
|
340
|
+
|
|
341
|
+
{ parts: parts }
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
def format_messages(messages)
|
|
345
|
+
messages.map { |message| { role: vertex_role(message.role), parts: message_parts(message) } }
|
|
346
|
+
end
|
|
347
|
+
|
|
348
|
+
def vertex_role(role)
|
|
349
|
+
role == :assistant ? 'model' : 'user'
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
def raw_role(role)
|
|
353
|
+
role == :assistant ? 'assistant' : 'user'
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
def message_parts(message)
|
|
357
|
+
return tool_call_parts(message) if message.tool_call?
|
|
358
|
+
return tool_result_parts(message) if message.tool_result?
|
|
359
|
+
|
|
360
|
+
content_parts(message.content)
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def content_parts(content)
|
|
364
|
+
return Array(content.value) if content.is_a?(Legion::Extensions::Llm::Content::Raw)
|
|
365
|
+
return [{ text: Legion::JSON.generate(content) }] if content.is_a?(Hash) || content.is_a?(Array)
|
|
366
|
+
return [{ text: content.to_s }] unless content.is_a?(Legion::Extensions::Llm::Content)
|
|
367
|
+
|
|
368
|
+
parts = []
|
|
369
|
+
parts << { text: content.text } if content.text
|
|
370
|
+
content.attachments.each { |attachment| parts << attachment_part(attachment) }
|
|
371
|
+
parts
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
def attachment_part(attachment)
|
|
375
|
+
if attachment.text?
|
|
376
|
+
{ text: attachment.for_llm }
|
|
377
|
+
else
|
|
378
|
+
{ inlineData: { mimeType: attachment.mime_type, data: attachment.encoded } }
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
def content_text(content)
|
|
383
|
+
return content.text if content.respond_to?(:text)
|
|
384
|
+
|
|
385
|
+
content.to_s
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
def tool_call_parts(message)
|
|
389
|
+
message.tool_calls.values.map do |tool_call|
|
|
390
|
+
{ functionCall: { name: tool_call.name, args: tool_call.arguments } }
|
|
391
|
+
end
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
def tool_result_parts(message)
|
|
395
|
+
[{
|
|
396
|
+
functionResponse: {
|
|
397
|
+
name: message.tool_call_id,
|
|
398
|
+
response: { name: message.tool_call_id, content: content_parts(message.content) }
|
|
399
|
+
}
|
|
400
|
+
}]
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
def format_tools(tools)
|
|
404
|
+
return nil if tools.empty?
|
|
405
|
+
|
|
406
|
+
[{
|
|
407
|
+
functionDeclarations: tools.values.map do |tool|
|
|
408
|
+
declaration = { name: tool.name, description: tool.description }
|
|
409
|
+
declaration[:parameters] = tool.params_schema if tool.respond_to?(:params_schema) && tool.params_schema
|
|
410
|
+
declaration
|
|
411
|
+
end
|
|
412
|
+
}]
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
def tool_config(tool_prefs)
|
|
416
|
+
return nil unless tool_prefs
|
|
417
|
+
|
|
418
|
+
choice = tool_prefs[:choice] || tool_prefs['choice']
|
|
419
|
+
return nil unless choice
|
|
420
|
+
|
|
421
|
+
{ functionCallingConfig: { mode: choice.to_s } }
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
def parse_chat_response(response, model:)
|
|
425
|
+
body = response.body
|
|
426
|
+
if generate_content_model?(model)
|
|
427
|
+
parse_generate_content_response(body, model:)
|
|
428
|
+
else
|
|
429
|
+
parse_raw_predict_response(body, model:)
|
|
430
|
+
end
|
|
431
|
+
end
|
|
432
|
+
|
|
433
|
+
def parse_generate_content_response(body, model:)
|
|
434
|
+
parts = response_parts(body)
|
|
435
|
+
usage = body['usageMetadata'] || {}
|
|
436
|
+
|
|
437
|
+
Legion::Extensions::Llm::Message.new(
|
|
438
|
+
role: :assistant,
|
|
439
|
+
content: text_content(parts),
|
|
440
|
+
tool_calls: parse_tool_calls(parts),
|
|
441
|
+
input_tokens: usage['promptTokenCount'],
|
|
442
|
+
output_tokens: output_tokens(usage),
|
|
443
|
+
cached_tokens: usage['cachedContentTokenCount'],
|
|
444
|
+
thinking_tokens: usage['thoughtsTokenCount'],
|
|
445
|
+
model_id: body['modelVersion'] || model,
|
|
446
|
+
raw: body
|
|
447
|
+
)
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
def parse_raw_predict_response(body, model:)
|
|
451
|
+
choice = Array(body['choices']).first || {}
|
|
452
|
+
message = choice['message'] || {}
|
|
453
|
+
usage = body['usage'] || {}
|
|
454
|
+
|
|
455
|
+
Legion::Extensions::Llm::Message.new(
|
|
456
|
+
role: :assistant,
|
|
457
|
+
content: message['content'] || choice['text'],
|
|
458
|
+
input_tokens: usage['prompt_tokens'],
|
|
459
|
+
output_tokens: usage['completion_tokens'],
|
|
460
|
+
model_id: body['model'] || model,
|
|
461
|
+
raw: body
|
|
462
|
+
)
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
def build_chunk(body, model:)
|
|
466
|
+
parts = response_parts(body)
|
|
467
|
+
return raw_chunk(body, model:) if parts.empty?
|
|
468
|
+
|
|
469
|
+
usage = body['usageMetadata'] || {}
|
|
470
|
+
Legion::Extensions::Llm::Chunk.new(
|
|
471
|
+
role: :assistant,
|
|
472
|
+
content: text_content(parts),
|
|
473
|
+
input_tokens: usage['promptTokenCount'],
|
|
474
|
+
output_tokens: output_tokens(usage),
|
|
475
|
+
model_id: body['modelVersion'] || model,
|
|
476
|
+
raw: body
|
|
477
|
+
)
|
|
478
|
+
end
|
|
479
|
+
|
|
480
|
+
def raw_chunk(body, model:)
|
|
481
|
+
delta = Array(body['choices']).first&.dig('delta') || Array(body['choices']).first&.dig('message') || {}
|
|
482
|
+
Legion::Extensions::Llm::Chunk.new(role: :assistant, content: delta['content'],
|
|
483
|
+
model_id: body['model'] || model, raw: body)
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
def response_parts(body)
|
|
487
|
+
body.dig('candidates', 0, 'content', 'parts') || []
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
def text_content(parts)
|
|
491
|
+
text = parts.reject { |part| part['thought'] }.filter_map { |part| part['text'] }.join
|
|
492
|
+
text.empty? ? nil : text
|
|
493
|
+
end
|
|
494
|
+
|
|
495
|
+
def output_tokens(usage)
|
|
496
|
+
candidates = usage['candidatesTokenCount'] || 0
|
|
497
|
+
thoughts = usage['thoughtsTokenCount'] || 0
|
|
498
|
+
total = candidates + thoughts
|
|
499
|
+
total.positive? ? total : nil
|
|
500
|
+
end
|
|
501
|
+
|
|
502
|
+
def parse_tool_calls(parts)
|
|
503
|
+
calls = parts.each_with_object({}) do |part, result|
|
|
504
|
+
function_call = part['functionCall']
|
|
505
|
+
next unless function_call
|
|
506
|
+
|
|
507
|
+
id = SecureRandom.uuid
|
|
508
|
+
result[id] = Legion::Extensions::Llm::ToolCall.new(
|
|
509
|
+
id: id,
|
|
510
|
+
name: function_call['name'],
|
|
511
|
+
arguments: function_call['args'] || {}
|
|
512
|
+
)
|
|
513
|
+
end
|
|
514
|
+
calls.empty? ? nil : calls
|
|
515
|
+
end
|
|
516
|
+
|
|
517
|
+
def parse_embedding_response(response, model:)
|
|
518
|
+
predictions = response.body['predictions'] || []
|
|
519
|
+
vectors = predictions.map do |prediction|
|
|
520
|
+
prediction['embeddings']&.fetch('values', nil) || prediction['values']
|
|
521
|
+
end
|
|
522
|
+
vectors = vectors.first if vectors.length == 1
|
|
523
|
+
statistics = predictions.first&.dig('embeddings', 'statistics') || {}
|
|
524
|
+
Legion::Extensions::Llm::Embedding.new(vectors: vectors, model: model,
|
|
525
|
+
input_tokens: statistics['token_count'] || 0)
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
def embedding_instance(text, task_type:, title:)
|
|
529
|
+
{ content: text, task_type: task_type, title: title }.compact
|
|
530
|
+
end
|
|
531
|
+
|
|
532
|
+
def default_capabilities(model, api:)
|
|
533
|
+
return %i[embedding] if Capabilities.embeddings?(model)
|
|
534
|
+
|
|
535
|
+
capabilities = %i[chat]
|
|
536
|
+
capabilities << :streaming if %i[generate_content raw_predict].include?(api)
|
|
537
|
+
capabilities << :vision if Capabilities.vision?(model)
|
|
538
|
+
capabilities << :functions if generate_content_model?(model)
|
|
539
|
+
capabilities
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
def bearer_token
|
|
543
|
+
token = config.vertex_access_token
|
|
544
|
+
token ? "Bearer #{token}" : nil
|
|
545
|
+
end
|
|
546
|
+
|
|
547
|
+
def credential_source
|
|
548
|
+
return :access_token if config.vertex_access_token
|
|
549
|
+
return :credentials_file if config.vertex_credentials
|
|
550
|
+
|
|
551
|
+
:google_application_default_credentials
|
|
552
|
+
end
|
|
553
|
+
|
|
554
|
+
def model_id(model)
|
|
555
|
+
value = model.respond_to?(:id) ? model.id : model
|
|
556
|
+
self.class.resolve_model_id(value, config:)
|
|
557
|
+
end
|
|
558
|
+
|
|
559
|
+
def publisher_for(model)
|
|
560
|
+
id = model_id(model)
|
|
561
|
+
return publisher_from_resource(id) if id.start_with?('projects/')
|
|
562
|
+
|
|
563
|
+
PUBLISHERS.fetch(id, DEFAULT_PUBLISHER)
|
|
564
|
+
end
|
|
565
|
+
|
|
566
|
+
def publisher_from_resource(resource)
|
|
567
|
+
match = resource.to_s.match(%r{/publishers/([^/]+)/models/})
|
|
568
|
+
match&.[](1)
|
|
569
|
+
end
|
|
570
|
+
|
|
571
|
+
def api_for(model)
|
|
572
|
+
id = model_id(model)
|
|
573
|
+
return API_MODES[id] if API_MODES.key?(id)
|
|
574
|
+
return :raw_predict if publisher_for(id) != DEFAULT_PUBLISHER && !Capabilities.embeddings?(id)
|
|
575
|
+
|
|
576
|
+
:generate_content
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
def generate_content_model?(model)
|
|
580
|
+
api_for(model) == :generate_content
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
def usage_type_for(model)
|
|
584
|
+
Capabilities.embeddings?(model) ? :embedding : :inference
|
|
585
|
+
end
|
|
586
|
+
|
|
587
|
+
def model_family_for(model, publisher = nil)
|
|
588
|
+
id = model_id(model)
|
|
589
|
+
return MODEL_FAMILIES[id] if MODEL_FAMILIES.key?(id)
|
|
590
|
+
|
|
591
|
+
normalized_family(publisher || publisher_for(id))
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
def normalized_family(provider)
|
|
595
|
+
value = provider.to_s.downcase.tr('-', '_')
|
|
596
|
+
return :gemini if value == 'google'
|
|
597
|
+
return :mistral if value == 'mistralai'
|
|
598
|
+
|
|
599
|
+
value.to_sym
|
|
600
|
+
end
|
|
601
|
+
|
|
602
|
+
def alias_for(model)
|
|
603
|
+
ALIASES.key(model_id(model))
|
|
604
|
+
end
|
|
605
|
+
end
|
|
606
|
+
end
|
|
607
|
+
end
|
|
608
|
+
end
|
|
609
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/llm'
|
|
4
|
+
require 'legion/extensions/llm/vertex/provider'
|
|
5
|
+
require 'legion/extensions/llm/vertex/version'
|
|
6
|
+
|
|
7
|
+
module Legion
|
|
8
|
+
module Extensions
|
|
9
|
+
module Llm
|
|
10
|
+
# Google Cloud Vertex AI provider extension namespace.
|
|
11
|
+
module Vertex
|
|
12
|
+
extend ::Legion::Extensions::Core if ::Legion::Extensions.const_defined?(:Core, false)
|
|
13
|
+
|
|
14
|
+
PROVIDER_FAMILY = :vertex
|
|
15
|
+
|
|
16
|
+
def self.default_settings
|
|
17
|
+
::Legion::Extensions::Llm.provider_settings(
|
|
18
|
+
family: PROVIDER_FAMILY,
|
|
19
|
+
discovery: { enabled: true, live: false, locations: %w[us-central1 us-east5 europe-west4] },
|
|
20
|
+
instance: {
|
|
21
|
+
endpoint: 'https://us-central1-aiplatform.googleapis.com/v1',
|
|
22
|
+
project: 'env://GOOGLE_CLOUD_PROJECT',
|
|
23
|
+
location: 'us-central1',
|
|
24
|
+
tier: :frontier,
|
|
25
|
+
transport: :http,
|
|
26
|
+
credentials: {
|
|
27
|
+
provider: 'google-application-default-credentials',
|
|
28
|
+
access_token: 'env://VERTEX_ACCESS_TOKEN',
|
|
29
|
+
credentials_file: 'env://GOOGLE_APPLICATION_CREDENTIALS'
|
|
30
|
+
},
|
|
31
|
+
usage: { inference: true, embedding: true, token_counting: true },
|
|
32
|
+
limits: { concurrency: 4 }
|
|
33
|
+
}
|
|
34
|
+
)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def self.provider_class
|
|
38
|
+
Provider
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
Legion::Extensions::Llm::Provider.register(Legion::Extensions::Llm::Vertex::PROVIDER_FAMILY,
|
|
46
|
+
Legion::Extensions::Llm::Vertex::Provider)
|
metadata
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: lex-llm-vertex
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- LegionIO
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: legion-json
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: 1.2.1
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: 1.2.1
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: legion-logging
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: 1.3.2
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: 1.3.2
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: legion-settings
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: 1.3.14
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: 1.3.14
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: lex-llm
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: 0.1.3
|
|
61
|
+
type: :runtime
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - ">="
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: 0.1.3
|
|
68
|
+
description: Google Cloud Vertex AI provider integration for the LegionIO LLM routing
|
|
69
|
+
framework.
|
|
70
|
+
email:
|
|
71
|
+
- matthewdiverson@gmail.com
|
|
72
|
+
executables: []
|
|
73
|
+
extensions: []
|
|
74
|
+
extra_rdoc_files: []
|
|
75
|
+
files:
|
|
76
|
+
- ".github/CODEOWNERS"
|
|
77
|
+
- ".github/dependabot.yml"
|
|
78
|
+
- ".github/workflows/ci.yml"
|
|
79
|
+
- ".gitignore"
|
|
80
|
+
- ".rubocop.yml"
|
|
81
|
+
- CHANGELOG.md
|
|
82
|
+
- Gemfile
|
|
83
|
+
- LICENSE
|
|
84
|
+
- README.md
|
|
85
|
+
- lex-llm-vertex.gemspec
|
|
86
|
+
- lib/legion/extensions/llm/vertex.rb
|
|
87
|
+
- lib/legion/extensions/llm/vertex/provider.rb
|
|
88
|
+
- lib/legion/extensions/llm/vertex/version.rb
|
|
89
|
+
homepage: https://github.com/LegionIO/lex-llm-vertex
|
|
90
|
+
licenses:
|
|
91
|
+
- MIT
|
|
92
|
+
metadata:
|
|
93
|
+
homepage_uri: https://github.com/LegionIO/lex-llm-vertex
|
|
94
|
+
source_code_uri: https://github.com/LegionIO/lex-llm-vertex
|
|
95
|
+
documentation_uri: https://github.com/LegionIO/lex-llm-vertex
|
|
96
|
+
changelog_uri: https://github.com/LegionIO/lex-llm-vertex/blob/main/CHANGELOG.md
|
|
97
|
+
bug_tracker_uri: https://github.com/LegionIO/lex-llm-vertex/issues
|
|
98
|
+
rubygems_mfa_required: 'true'
|
|
99
|
+
rdoc_options: []
|
|
100
|
+
require_paths:
|
|
101
|
+
- lib
|
|
102
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
103
|
+
requirements:
|
|
104
|
+
- - ">="
|
|
105
|
+
- !ruby/object:Gem::Version
|
|
106
|
+
version: '3.4'
|
|
107
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
|
+
requirements:
|
|
109
|
+
- - ">="
|
|
110
|
+
- !ruby/object:Gem::Version
|
|
111
|
+
version: '0'
|
|
112
|
+
requirements: []
|
|
113
|
+
rubygems_version: 3.6.9
|
|
114
|
+
specification_version: 4
|
|
115
|
+
summary: LegionIO LLM Google Cloud Vertex AI provider extension
|
|
116
|
+
test_files: []
|