lex-llm-azure-foundry 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/CODEOWNERS +2 -0
- data/.github/dependabot.yml +18 -0
- data/.github/workflows/ci.yml +16 -0
- data/.gitignore +12 -0
- data/.rubocop.yml +18 -0
- data/CHANGELOG.md +8 -0
- data/Gemfile +20 -0
- data/LICENSE +21 -0
- data/README.md +125 -0
- data/lex-llm-azure-foundry.gemspec +30 -0
- data/lib/legion/extensions/llm/azure_foundry/provider.rb +357 -0
- data/lib/legion/extensions/llm/azure_foundry/version.rb +11 -0
- data/lib/legion/extensions/llm/azure_foundry.rb +47 -0
- metadata +116 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 33f564df8dc0e10567cb8780f15976c66d399ca545a6303674bd67e11896e404
|
|
4
|
+
data.tar.gz: bd35a164c1adb9c8c1ed3eaa590dc97908b2c2bfca74d09e2784ae41349d0c24
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 861eb2e467c3ef97123dd06705cd5eccbfb708573231e85cc171a10a8b7749bca5001cbe92e34eb5a8fb5a0a275277350f245a45daeba38b3efd843d8ae5c65b
|
|
7
|
+
data.tar.gz: a78aaace8611a9f72beb67f524d7e6c949f7c737698d77770280ab9d8bbee4305720ebed779d65e3df26971eb2ad2c8a457bd3b9640e5dc905e83b8a8136b593
|
data/.github/CODEOWNERS
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
version: 2
|
|
2
|
+
updates:
|
|
3
|
+
- package-ecosystem: bundler
|
|
4
|
+
directory: /
|
|
5
|
+
schedule:
|
|
6
|
+
interval: weekly
|
|
7
|
+
day: monday
|
|
8
|
+
open-pull-requests-limit: 5
|
|
9
|
+
labels:
|
|
10
|
+
- "type:dependencies"
|
|
11
|
+
- package-ecosystem: github-actions
|
|
12
|
+
directory: /
|
|
13
|
+
schedule:
|
|
14
|
+
interval: weekly
|
|
15
|
+
day: monday
|
|
16
|
+
open-pull-requests-limit: 5
|
|
17
|
+
labels:
|
|
18
|
+
- "type:dependencies"
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
on:
|
|
3
|
+
push:
|
|
4
|
+
branches: [main]
|
|
5
|
+
pull_request:
|
|
6
|
+
|
|
7
|
+
jobs:
|
|
8
|
+
ci:
|
|
9
|
+
uses: LegionIO/.github/.github/workflows/ci.yml@main
|
|
10
|
+
|
|
11
|
+
release:
|
|
12
|
+
needs: ci
|
|
13
|
+
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
|
14
|
+
uses: LegionIO/.github/.github/workflows/release.yml@main
|
|
15
|
+
secrets:
|
|
16
|
+
rubygems-api-key: ${{ secrets.RUBYGEMS_API_KEY }}
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
plugins:
|
|
2
|
+
- rubocop-performance
|
|
3
|
+
- rubocop-rake
|
|
4
|
+
- rubocop-rspec
|
|
5
|
+
|
|
6
|
+
AllCops:
|
|
7
|
+
NewCops: enable
|
|
8
|
+
TargetRubyVersion: 3.4
|
|
9
|
+
SuggestExtensions: false
|
|
10
|
+
|
|
11
|
+
Metrics/BlockLength:
|
|
12
|
+
Exclude:
|
|
13
|
+
- "*.gemspec"
|
|
14
|
+
- spec/**/*
|
|
15
|
+
Metrics/MethodLength:
|
|
16
|
+
Enabled: false
|
|
17
|
+
RSpec/MultipleExpectations:
|
|
18
|
+
Enabled: false
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
## 0.1.0 - 2026-04-28
|
|
4
|
+
|
|
5
|
+
- Initial Legion LLM Azure AI Foundry provider extension scaffold.
|
|
6
|
+
- Add Azure AI Foundry model inference and Azure OpenAI v1-compatible endpoint mapping.
|
|
7
|
+
- Add offline deployment-based offering discovery with explicit model-family and canonical-alias metadata.
|
|
8
|
+
- Add chat, streaming, embeddings, health, and token-count placeholder provider methods without requiring live Azure access.
|
data/Gemfile
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
source 'https://rubygems.org'
|
|
4
|
+
|
|
5
|
+
group :test do
|
|
6
|
+
llm_base_path = ENV.fetch('LEX_LLM_PATH', File.expand_path('../lex-llm', __dir__))
|
|
7
|
+
gem 'lex-llm', path: llm_base_path if File.directory?(llm_base_path)
|
|
8
|
+
end
|
|
9
|
+
|
|
10
|
+
gemspec
|
|
11
|
+
|
|
12
|
+
group :development do
|
|
13
|
+
gem 'bundler', '>= 2.0'
|
|
14
|
+
gem 'rake', '>= 13.0'
|
|
15
|
+
gem 'rspec', '~> 3.12'
|
|
16
|
+
gem 'rubocop', '>= 1.0'
|
|
17
|
+
gem 'rubocop-performance'
|
|
18
|
+
gem 'rubocop-rake', '>= 0.6'
|
|
19
|
+
gem 'rubocop-rspec'
|
|
20
|
+
end
|
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Carmine Paolino
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
# lex-llm-azure-foundry
|
|
2
|
+
|
|
3
|
+
LegionIO LLM provider extension for Azure AI Foundry Models and Azure OpenAI hosted deployments.
|
|
4
|
+
|
|
5
|
+
This gem lives under `Legion::Extensions::Llm::AzureFoundry` and depends on `lex-llm` for shared provider-neutral routing, fleet, model-offering, and schema primitives.
|
|
6
|
+
|
|
7
|
+
Load it with `require 'legion/extensions/llm/azure_foundry'`.
|
|
8
|
+
|
|
9
|
+
## What It Provides
|
|
10
|
+
|
|
11
|
+
- `Legion::Extensions::Llm::Provider` registration as `:azure_foundry`
|
|
12
|
+
- Azure AI Foundry model inference chat completions through `POST /models/chat/completions?api-version=...`
|
|
13
|
+
- Azure AI Foundry model inference embeddings through `POST /models/embeddings?api-version=...`
|
|
14
|
+
- Azure AI Foundry model info health check through `GET /models/info?api-version=...` when `live: true`
|
|
15
|
+
- Azure OpenAI v1-compatible endpoint support through `/openai/v1/chat/completions` and `/openai/v1/embeddings`
|
|
16
|
+
- deployment-name-preserving routing offerings for hosted Azure deployments
|
|
17
|
+
- explicit `model_family` and `canonical_model_alias` metadata for deployments whose base model cannot be proven from Azure metadata
|
|
18
|
+
- offline-first discovery from configured deployments
|
|
19
|
+
- shared OpenAI-compatible request and response mapping via `Legion::Extensions::Llm::Provider::OpenAICompatible`
|
|
20
|
+
- conservative token-counting metadata when no portable Azure token-counting REST endpoint is configured
|
|
21
|
+
|
|
22
|
+
## API Contract
|
|
23
|
+
|
|
24
|
+
The implementation follows Microsoft Learn REST documentation for Azure AI Foundry Models:
|
|
25
|
+
|
|
26
|
+
- Azure AI Foundry model inference endpoints use deployment names as the request `model`.
|
|
27
|
+
- The model inference endpoint supports chat completions and embeddings.
|
|
28
|
+
- The documented model-info endpoint is used only for explicit live health checks.
|
|
29
|
+
- Azure deployment metadata is not assumed to reliably prove base model family or version, so routing metadata should be configured explicitly.
|
|
30
|
+
|
|
31
|
+
## Defaults
|
|
32
|
+
|
|
33
|
+
```ruby
|
|
34
|
+
Legion::Extensions::Llm::AzureFoundry.default_settings
|
|
35
|
+
# {
|
|
36
|
+
# provider_family: :azure_foundry,
|
|
37
|
+
# discovery: { enabled: true, live: false },
|
|
38
|
+
# instances: {
|
|
39
|
+
# default: {
|
|
40
|
+
# endpoint: "https://<resource>.services.ai.azure.com",
|
|
41
|
+
# api_version: "2024-05-01-preview",
|
|
42
|
+
# surface: :model_inference,
|
|
43
|
+
# tier: :frontier,
|
|
44
|
+
# transport: :http,
|
|
45
|
+
# credentials: {
|
|
46
|
+
# api_key: "env://AZURE_INFERENCE_CREDENTIAL",
|
|
47
|
+
# bearer_token: "env://AZURE_FOUNDRY_BEARER_TOKEN",
|
|
48
|
+
# entra_scope: "https://cognitiveservices.azure.com/.default"
|
|
49
|
+
# },
|
|
50
|
+
# deployments: [],
|
|
51
|
+
# usage: { inference: true, embedding: true, token_counting: false },
|
|
52
|
+
# limits: { concurrency: 4 }
|
|
53
|
+
# }
|
|
54
|
+
# }
|
|
55
|
+
# }
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
## Configuration
|
|
59
|
+
|
|
60
|
+
```ruby
|
|
61
|
+
Legion::Extensions::Llm.configure do |config|
|
|
62
|
+
config.azure_foundry_endpoint = ENV.fetch("AZURE_FOUNDRY_ENDPOINT")
|
|
63
|
+
config.azure_foundry_api_key = ENV["AZURE_INFERENCE_CREDENTIAL"]
|
|
64
|
+
config.azure_foundry_bearer_token = ENV["AZURE_FOUNDRY_BEARER_TOKEN"]
|
|
65
|
+
config.azure_foundry_api_version = "2024-05-01-preview"
|
|
66
|
+
config.azure_foundry_surface = :model_inference
|
|
67
|
+
config.azure_foundry_deployments = [
|
|
68
|
+
{
|
|
69
|
+
deployment: "gpt-4o-prod",
|
|
70
|
+
model_family: :openai,
|
|
71
|
+
canonical_model_alias: "gpt-4o",
|
|
72
|
+
usage_type: :inference
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
deployment: "mistral-large-prod",
|
|
76
|
+
model_family: :mistral,
|
|
77
|
+
canonical_model_alias: "mistral-large",
|
|
78
|
+
usage_type: :inference
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
deployment: "embedding-prod",
|
|
82
|
+
model_family: :openai,
|
|
83
|
+
canonical_model_alias: "text-embedding-3-small",
|
|
84
|
+
usage_type: :embedding
|
|
85
|
+
}
|
|
86
|
+
]
|
|
87
|
+
end
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Use `config.azure_foundry_surface = :openai_v1` when the target endpoint should be treated as the OpenAI v1-compatible Azure route. The provider appends `/openai/v1` when the configured endpoint does not already include it.
|
|
91
|
+
|
|
92
|
+
## Provider Methods
|
|
93
|
+
|
|
94
|
+
```ruby
|
|
95
|
+
provider = Legion::Extensions::Llm::AzureFoundry.provider_class.new(Legion::Extensions::Llm.config)
|
|
96
|
+
|
|
97
|
+
provider.discover_offerings(live: false)
|
|
98
|
+
provider.offering_for(model: "gpt-4o-prod", model_family: :openai, canonical_model_alias: "gpt-4o")
|
|
99
|
+
provider.health(live: false)
|
|
100
|
+
provider.chat(messages, model: "gpt-4o-prod")
|
|
101
|
+
provider.stream(messages, model: "gpt-4o-prod") { |chunk| puts chunk.content }
|
|
102
|
+
provider.embed(["hello"], model: "embedding-prod")
|
|
103
|
+
provider.count_tokens(messages, model: "gpt-4o-prod")
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
`discover_offerings(live: false)` never calls Azure. It maps configured deployments into `Legion::Extensions::Llm::Routing::ModelOffering` values with `provider_family: :azure_foundry`.
|
|
107
|
+
|
|
108
|
+
`health(live: true)` calls the documented model-info endpoint for the configured model-inference surface. Keep `live: false` for startup paths and tests that must not require Azure.
|
|
109
|
+
|
|
110
|
+
`count_tokens` returns a structured unsupported result by default because the Microsoft REST contract used here does not define a portable token-counting endpoint across Azure AI Foundry deployments.
|
|
111
|
+
|
|
112
|
+
## Routing Metadata
|
|
113
|
+
|
|
114
|
+
Azure deployments are aliases. A deployment name can hide provider, model, and version details, so this extension preserves the deployment name as `model` and treats `canonical_model_alias` and `model_family` as routing metadata.
|
|
115
|
+
|
|
116
|
+
Supported `model_family` values are intentionally open-ended symbols, including:
|
|
117
|
+
|
|
118
|
+
- `:openai`
|
|
119
|
+
- `:mistral`
|
|
120
|
+
- `:meta`
|
|
121
|
+
- `:xai`
|
|
122
|
+
- `:anthropic`
|
|
123
|
+
- `:microsoft`
|
|
124
|
+
|
|
125
|
+
When `model_family` or `canonical_model_alias` is missing, offerings include `requires_explicit_model_metadata: true`.
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'lib/legion/extensions/llm/azure_foundry/version'
|
|
4
|
+
|
|
5
|
+
Gem::Specification.new do |spec|
|
|
6
|
+
spec.name = 'lex-llm-azure-foundry'
|
|
7
|
+
spec.version = Legion::Extensions::Llm::AzureFoundry::VERSION
|
|
8
|
+
spec.authors = ['LegionIO']
|
|
9
|
+
spec.email = ['matthewdiverson@gmail.com']
|
|
10
|
+
spec.summary = 'LegionIO LLM Azure AI Foundry provider extension'
|
|
11
|
+
spec.description = 'Azure AI Foundry and Azure OpenAI hosted provider integration for LegionIO LLM routing.'
|
|
12
|
+
spec.homepage = 'https://github.com/LegionIO/lex-llm-azure-foundry'
|
|
13
|
+
spec.license = 'MIT'
|
|
14
|
+
spec.required_ruby_version = '>= 3.4'
|
|
15
|
+
|
|
16
|
+
spec.metadata['homepage_uri'] = spec.homepage
|
|
17
|
+
spec.metadata['source_code_uri'] = spec.homepage
|
|
18
|
+
spec.metadata['documentation_uri'] = spec.homepage
|
|
19
|
+
spec.metadata['changelog_uri'] = "#{spec.homepage}/blob/main/CHANGELOG.md"
|
|
20
|
+
spec.metadata['bug_tracker_uri'] = "#{spec.homepage}/issues"
|
|
21
|
+
spec.metadata['rubygems_mfa_required'] = 'true'
|
|
22
|
+
|
|
23
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |file| file.match(%r{^(spec|test|features|tmp|coverage)/}) }
|
|
24
|
+
spec.require_paths = ['lib']
|
|
25
|
+
|
|
26
|
+
spec.add_dependency 'legion-json', '>= 1.2.1'
|
|
27
|
+
spec.add_dependency 'legion-logging', '>= 1.3.2'
|
|
28
|
+
spec.add_dependency 'legion-settings', '>= 1.3.14'
|
|
29
|
+
spec.add_dependency 'lex-llm', '>= 0.1.4'
|
|
30
|
+
end
|
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/json'
|
|
4
|
+
require 'legion/logging'
|
|
5
|
+
require 'legion/settings'
|
|
6
|
+
require 'legion/extensions/llm'
|
|
7
|
+
|
|
8
|
+
module Legion
|
|
9
|
+
module Extensions
|
|
10
|
+
module Llm
|
|
11
|
+
module AzureFoundry
|
|
12
|
+
# Azure AI Foundry and Azure OpenAI hosted provider surface.
|
|
13
|
+
class Provider < Legion::Extensions::Llm::Provider # rubocop:disable Metrics/ClassLength
|
|
14
|
+
include Legion::Extensions::Llm::Provider::OpenAICompatible
|
|
15
|
+
|
|
16
|
+
DEFAULT_API_VERSION = '2024-05-01-preview'
|
|
17
|
+
MODEL_INFERENCE_SURFACE = :model_inference
|
|
18
|
+
OPENAI_V1_SURFACE = :openai_v1
|
|
19
|
+
|
|
20
|
+
class << self
|
|
21
|
+
def slug = 'azure_foundry'
|
|
22
|
+
def configuration_requirements = %i[azure_foundry_endpoint]
|
|
23
|
+
|
|
24
|
+
def configuration_options
|
|
25
|
+
%i[
|
|
26
|
+
azure_foundry_endpoint
|
|
27
|
+
azure_foundry_api_key
|
|
28
|
+
azure_foundry_bearer_token
|
|
29
|
+
azure_foundry_api_version
|
|
30
|
+
azure_foundry_surface
|
|
31
|
+
azure_foundry_deployments
|
|
32
|
+
]
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def capabilities = Capabilities
|
|
36
|
+
|
|
37
|
+
def resolve_model_id(model_id, config: nil)
|
|
38
|
+
deployment = deployment_config(model_id, config:)
|
|
39
|
+
value_for(deployment, :deployment) || value_for(deployment, :model) || model_id.to_s
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def deployment_config(model_id, config:)
|
|
43
|
+
deployments = config&.azure_foundry_deployments
|
|
44
|
+
entries = normalize_deployments(deployments)
|
|
45
|
+
entries.find do |entry|
|
|
46
|
+
[value_for(entry, :deployment), value_for(entry, :model), value_for(entry, :canonical_model_alias)]
|
|
47
|
+
.compact.map(&:to_s).include?(model_id.to_s)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def normalize_deployments(deployments)
|
|
52
|
+
case deployments
|
|
53
|
+
when Hash
|
|
54
|
+
deployments.map do |name, metadata|
|
|
55
|
+
value = metadata.to_h
|
|
56
|
+
value[:deployment] ||= name
|
|
57
|
+
value
|
|
58
|
+
end
|
|
59
|
+
else
|
|
60
|
+
Array(deployments).map { |deployment| normalize_deployment_entry(deployment) }
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
private
|
|
65
|
+
|
|
66
|
+
def normalize_deployment_entry(deployment)
|
|
67
|
+
deployment.is_a?(Hash) ? deployment.dup : { deployment: deployment.to_s }
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def value_for(hash, key)
|
|
71
|
+
return nil unless hash.respond_to?(:key?)
|
|
72
|
+
|
|
73
|
+
hash[key] || hash[key.to_s]
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Capability predicates inferred from deployment metadata and model naming.
|
|
78
|
+
module Capabilities
|
|
79
|
+
module_function
|
|
80
|
+
|
|
81
|
+
def chat?(model) = !embeddings?(model)
|
|
82
|
+
def streaming?(model) = chat?(model)
|
|
83
|
+
def functions?(model) = chat?(model)
|
|
84
|
+
def vision?(model) = chat?(model) && model_id(model).match?(/(gpt-4|gpt-5|llava|vision|phi-3.5)/i)
|
|
85
|
+
def embeddings?(model) = usage_type(model) == :embedding || model_id(model).match?(/embed/i)
|
|
86
|
+
|
|
87
|
+
def critical_capabilities_for(model)
|
|
88
|
+
[
|
|
89
|
+
('streaming' if streaming?(model)),
|
|
90
|
+
('function_calling' if functions?(model)),
|
|
91
|
+
('vision' if vision?(model)),
|
|
92
|
+
('embeddings' if embeddings?(model))
|
|
93
|
+
].compact
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def model_id(model)
|
|
97
|
+
return hash_model_id(model) if model.is_a?(Hash)
|
|
98
|
+
|
|
99
|
+
model.respond_to?(:id) ? model.id.to_s : model.to_s
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def hash_model_id(model)
|
|
103
|
+
%i[canonical_model_alias model deployment].each do |key|
|
|
104
|
+
value = model[key] || model[key.to_s]
|
|
105
|
+
return value.to_s if value
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
def usage_type(model)
|
|
110
|
+
return nil unless model.is_a?(Hash)
|
|
111
|
+
|
|
112
|
+
value = model[:usage_type] || model['usage_type'] || model[:type] || model['type']
|
|
113
|
+
value&.to_sym
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def api_base
|
|
118
|
+
endpoint = config.azure_foundry_endpoint.to_s.sub(%r{/*\z}, '')
|
|
119
|
+
return "#{endpoint}/openai/v1" if surface == OPENAI_V1_SURFACE && !endpoint.end_with?('/openai/v1')
|
|
120
|
+
return endpoint.delete_suffix('/models') if surface == MODEL_INFERENCE_SURFACE
|
|
121
|
+
|
|
122
|
+
endpoint
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def headers
|
|
126
|
+
{
|
|
127
|
+
'api-key' => config.azure_foundry_api_key,
|
|
128
|
+
'Authorization' => bearer_header
|
|
129
|
+
}.compact
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def completion_url = path_for('chat/completions')
|
|
133
|
+
def chat_url = completion_url
|
|
134
|
+
def stream_url = completion_url
|
|
135
|
+
def models_url = path_for('info')
|
|
136
|
+
def embedding_url(**) = path_for('embeddings')
|
|
137
|
+
def health_url = models_url
|
|
138
|
+
|
|
139
|
+
def discover_offerings(live: false, **filters)
|
|
140
|
+
offerings = configured_deployments.filter_map { |deployment| offering_from_config(deployment) }
|
|
141
|
+
return filter_offerings(offerings, **filters) unless live
|
|
142
|
+
|
|
143
|
+
filter_offerings(offerings, **filters).map do |offering|
|
|
144
|
+
with_live_metadata(offering)
|
|
145
|
+
rescue StandardError => e
|
|
146
|
+
with_health(offering, ready: false, checked: true, error: e)
|
|
147
|
+
end
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def offering_for(model:, model_family: nil, canonical_model_alias: nil, instance_id: :default, # rubocop:disable Metrics/ParameterLists
|
|
151
|
+
usage_type: nil, **metadata)
|
|
152
|
+
deployment = self.class.deployment_config(model, config:)
|
|
153
|
+
model_id = self.class.resolve_model_id(model, config:)
|
|
154
|
+
configured_family = value_for(deployment, :model_family)
|
|
155
|
+
configured_alias = value_for(deployment, :canonical_model_alias)
|
|
156
|
+
|
|
157
|
+
build_offering(
|
|
158
|
+
model: model_id,
|
|
159
|
+
instance_id: instance_id,
|
|
160
|
+
model_family: normalize_family(model_family || configured_family || infer_model_family(model_id)),
|
|
161
|
+
canonical_model_alias: canonical_model_alias || configured_alias,
|
|
162
|
+
usage_type: usage_type || value_for(deployment, :usage_type) || usage_type_for(model_id),
|
|
163
|
+
metadata: metadata.merge(deployment_metadata(deployment))
|
|
164
|
+
)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
def health(live: false)
|
|
168
|
+
baseline = {
|
|
169
|
+
provider: :azure_foundry,
|
|
170
|
+
configured: configured?,
|
|
171
|
+
ready: configured?,
|
|
172
|
+
live: live,
|
|
173
|
+
api_base: api_base,
|
|
174
|
+
surface: surface
|
|
175
|
+
}
|
|
176
|
+
return baseline.merge(checked: false) unless live
|
|
177
|
+
|
|
178
|
+
response = connection.get(health_url)
|
|
179
|
+
baseline.merge(checked: true, model_info: response.body)
|
|
180
|
+
rescue StandardError => e
|
|
181
|
+
baseline.merge(checked: true, ready: false, error: e.class.name, message: e.message)
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def readiness(live: false)
|
|
185
|
+
health(live: live).merge(local: false, remote: true, endpoints: endpoint_manifest)
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def list_models
|
|
189
|
+
discover_offerings(live: false).map do |offering|
|
|
190
|
+
Legion::Extensions::Llm::Model::Info.new(
|
|
191
|
+
id: offering.model,
|
|
192
|
+
name: offering.metadata[:canonical_model_alias] || offering.model,
|
|
193
|
+
provider: :azure_foundry,
|
|
194
|
+
family: offering.metadata[:model_family],
|
|
195
|
+
capabilities: offering.capabilities.map(&:to_s),
|
|
196
|
+
modalities: modalities_for_capabilities(offering.capabilities.map(&:to_s)),
|
|
197
|
+
metadata: offering.to_h
|
|
198
|
+
)
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def chat(messages, model:, temperature: nil, max_tokens: nil, tools: {}, tool_prefs: nil, params: {}) # rubocop:disable Metrics/ParameterLists
|
|
203
|
+
complete(messages, tools:, temperature:, model: model_info(model, max_tokens:), params:, tool_prefs:)
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
def stream(messages, model:, temperature: nil, max_tokens: nil, tools: {}, tool_prefs: nil, params: {}, &) # rubocop:disable Metrics/ParameterLists
|
|
207
|
+
complete(messages, tools:, temperature:, model: model_info(model, max_tokens:), params:, tool_prefs:, &)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def embed(text, model:, dimensions: nil, input_type: nil)
|
|
211
|
+
payload = render_embedding_payload(text, model: model_id(model), dimensions:)
|
|
212
|
+
payload[:input_type] = input_type if input_type
|
|
213
|
+
response = connection.post(embedding_url(model:), payload)
|
|
214
|
+
parse_embedding_response(response, model: model_id(model), text:)
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
def count_tokens(messages, model:, **)
|
|
218
|
+
{
|
|
219
|
+
provider_family: :azure_foundry,
|
|
220
|
+
model: model_id(model),
|
|
221
|
+
supported: false,
|
|
222
|
+
reason: 'Azure AI Foundry REST docs do not define a portable token-counting endpoint for this surface.',
|
|
223
|
+
estimated_input_characters: messages.sum { |message| message.content.to_s.length }
|
|
224
|
+
}
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
private
|
|
228
|
+
|
|
229
|
+
def surface
|
|
230
|
+
(config.azure_foundry_surface || MODEL_INFERENCE_SURFACE).to_sym
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
def api_version
|
|
234
|
+
config.azure_foundry_api_version || DEFAULT_API_VERSION
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def path_for(path)
|
|
238
|
+
prefix = surface == MODEL_INFERENCE_SURFACE ? '/models' : ''
|
|
239
|
+
suffix = surface == MODEL_INFERENCE_SURFACE ? "?api-version=#{api_version}" : ''
|
|
240
|
+
"#{prefix}/#{path}#{suffix}"
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
def bearer_header
|
|
244
|
+
token = config.azure_foundry_bearer_token
|
|
245
|
+
token ? "Bearer #{token}" : nil
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def configured_deployments
|
|
249
|
+
self.class.normalize_deployments(config.azure_foundry_deployments)
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
def offering_from_config(deployment)
|
|
253
|
+
deployment_name = value_for(deployment, :deployment) || value_for(deployment, :model)
|
|
254
|
+
return nil if deployment_name.to_s.empty?
|
|
255
|
+
|
|
256
|
+
offering_for(
|
|
257
|
+
model: deployment_name,
|
|
258
|
+
model_family: value_for(deployment, :model_family),
|
|
259
|
+
canonical_model_alias: value_for(deployment, :canonical_model_alias),
|
|
260
|
+
instance_id: value_for(deployment, :instance_id) || :default,
|
|
261
|
+
usage_type: value_for(deployment, :usage_type),
|
|
262
|
+
configured: true
|
|
263
|
+
)
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def build_offering(model:, model_family:, usage_type:, instance_id:, canonical_model_alias:, metadata:) # rubocop:disable Metrics/ParameterLists
|
|
267
|
+
capabilities = capabilities_for(model, usage_type)
|
|
268
|
+
Legion::Extensions::Llm::Routing::ModelOffering.new(
|
|
269
|
+
provider_family: :azure_foundry,
|
|
270
|
+
instance_id: instance_id,
|
|
271
|
+
transport: :http,
|
|
272
|
+
tier: :frontier,
|
|
273
|
+
model: model,
|
|
274
|
+
usage_type: usage_type.to_sym,
|
|
275
|
+
capabilities: capabilities,
|
|
276
|
+
metadata: metadata.merge(
|
|
277
|
+
model_family: model_family,
|
|
278
|
+
canonical_model_alias: canonical_model_alias,
|
|
279
|
+
requires_explicit_model_metadata: canonical_model_alias.nil? || model_family.nil?
|
|
280
|
+
).compact
|
|
281
|
+
)
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def with_live_metadata(offering)
|
|
285
|
+
response = connection.get(models_url)
|
|
286
|
+
metadata = offering.metadata.merge(model_info: response.body)
|
|
287
|
+
with_health(offering, ready: true, checked: true, metadata:)
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
def with_health(offering, ready:, checked:, error: nil, metadata: offering.metadata)
|
|
291
|
+
health = { ready: ready, checked: checked }
|
|
292
|
+
health = health.merge(error: error.class.name, message: error.message) if error
|
|
293
|
+
|
|
294
|
+
Legion::Extensions::Llm::Routing::ModelOffering.new(offering.to_h.merge(health:, metadata:))
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def filter_offerings(offerings, model_family: nil, usage_type: nil, **)
|
|
298
|
+
offerings.select do |offering|
|
|
299
|
+
family_matches = model_family.nil? || offering.metadata[:model_family] == model_family.to_sym
|
|
300
|
+
usage_matches = usage_type.nil? || offering.usage_type == usage_type.to_sym
|
|
301
|
+
family_matches && usage_matches
|
|
302
|
+
end
|
|
303
|
+
end
|
|
304
|
+
|
|
305
|
+
def deployment_metadata(deployment)
|
|
306
|
+
return {} unless deployment
|
|
307
|
+
|
|
308
|
+
deployment.to_h.transform_keys(&:to_sym).except(:deployment, :model_family, :usage_type)
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
def capabilities_for(model, usage_type)
|
|
312
|
+
return %i[embedding] if usage_type.to_sym == :embedding
|
|
313
|
+
|
|
314
|
+
Capabilities.critical_capabilities_for(model).map(&:to_sym)
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
def usage_type_for(model)
|
|
318
|
+
Capabilities.embeddings?(model) ? :embedding : :inference
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
def normalize_family(value)
|
|
322
|
+
value&.to_sym
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
def infer_model_family(model)
|
|
326
|
+
id = model.to_s.downcase
|
|
327
|
+
return :openai if id.match?(/gpt|o\d|text-embedding|dall-e/)
|
|
328
|
+
return :mistral if id.include?('mistral')
|
|
329
|
+
return :meta if id.match?(/llama|meta/)
|
|
330
|
+
return :xai if id.match?(/grok|xai/)
|
|
331
|
+
return :anthropic if id.include?('claude')
|
|
332
|
+
return :microsoft if id.match?(/phi|microsoft/)
|
|
333
|
+
|
|
334
|
+
nil
|
|
335
|
+
end
|
|
336
|
+
|
|
337
|
+
def value_for(hash, key)
|
|
338
|
+
return nil unless hash.respond_to?(:key?)
|
|
339
|
+
|
|
340
|
+
hash[key] || hash[key.to_s]
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
def model_id(model)
|
|
344
|
+
self.class.resolve_model_id(model.respond_to?(:id) ? model.id : model, config:)
|
|
345
|
+
end
|
|
346
|
+
|
|
347
|
+
def model_info(model, max_tokens: nil)
|
|
348
|
+
return model if model.respond_to?(:id) && max_tokens.nil?
|
|
349
|
+
|
|
350
|
+
Legion::Extensions::Llm::Model::Info.new(id: model_id(model), provider: :azure_foundry,
|
|
351
|
+
max_output_tokens: max_tokens)
|
|
352
|
+
end
|
|
353
|
+
end
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
end
|
|
357
|
+
end
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'legion/extensions/llm'
|
|
4
|
+
require 'legion/extensions/llm/azure_foundry/provider'
|
|
5
|
+
require 'legion/extensions/llm/azure_foundry/version'
|
|
6
|
+
|
|
7
|
+
module Legion
|
|
8
|
+
module Extensions
|
|
9
|
+
module Llm
|
|
10
|
+
# Azure AI Foundry provider extension namespace.
|
|
11
|
+
module AzureFoundry
|
|
12
|
+
extend ::Legion::Extensions::Core if ::Legion::Extensions.const_defined?(:Core, false)
|
|
13
|
+
|
|
14
|
+
PROVIDER_FAMILY = :azure_foundry
|
|
15
|
+
|
|
16
|
+
def self.default_settings
|
|
17
|
+
::Legion::Extensions::Llm.provider_settings(
|
|
18
|
+
family: PROVIDER_FAMILY,
|
|
19
|
+
discovery: { enabled: true, live: false },
|
|
20
|
+
instance: {
|
|
21
|
+
endpoint: 'https://<resource>.services.ai.azure.com',
|
|
22
|
+
api_version: '2024-05-01-preview',
|
|
23
|
+
surface: :model_inference,
|
|
24
|
+
tier: :frontier,
|
|
25
|
+
transport: :http,
|
|
26
|
+
credentials: {
|
|
27
|
+
api_key: 'env://AZURE_INFERENCE_CREDENTIAL',
|
|
28
|
+
bearer_token: 'env://AZURE_FOUNDRY_BEARER_TOKEN',
|
|
29
|
+
entra_scope: 'https://cognitiveservices.azure.com/.default'
|
|
30
|
+
},
|
|
31
|
+
deployments: [],
|
|
32
|
+
usage: { inference: true, embedding: true, token_counting: false },
|
|
33
|
+
limits: { concurrency: 4 }
|
|
34
|
+
}
|
|
35
|
+
)
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def self.provider_class
|
|
39
|
+
Provider
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
Legion::Extensions::Llm::Provider.register(Legion::Extensions::Llm::AzureFoundry::PROVIDER_FAMILY,
|
|
47
|
+
Legion::Extensions::Llm::AzureFoundry::Provider)
|
metadata
ADDED
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: lex-llm-azure-foundry
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- LegionIO
|
|
8
|
+
bindir: bin
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: legion-json
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - ">="
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: 1.2.1
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - ">="
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: 1.2.1
|
|
26
|
+
- !ruby/object:Gem::Dependency
|
|
27
|
+
name: legion-logging
|
|
28
|
+
requirement: !ruby/object:Gem::Requirement
|
|
29
|
+
requirements:
|
|
30
|
+
- - ">="
|
|
31
|
+
- !ruby/object:Gem::Version
|
|
32
|
+
version: 1.3.2
|
|
33
|
+
type: :runtime
|
|
34
|
+
prerelease: false
|
|
35
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
36
|
+
requirements:
|
|
37
|
+
- - ">="
|
|
38
|
+
- !ruby/object:Gem::Version
|
|
39
|
+
version: 1.3.2
|
|
40
|
+
- !ruby/object:Gem::Dependency
|
|
41
|
+
name: legion-settings
|
|
42
|
+
requirement: !ruby/object:Gem::Requirement
|
|
43
|
+
requirements:
|
|
44
|
+
- - ">="
|
|
45
|
+
- !ruby/object:Gem::Version
|
|
46
|
+
version: 1.3.14
|
|
47
|
+
type: :runtime
|
|
48
|
+
prerelease: false
|
|
49
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
50
|
+
requirements:
|
|
51
|
+
- - ">="
|
|
52
|
+
- !ruby/object:Gem::Version
|
|
53
|
+
version: 1.3.14
|
|
54
|
+
- !ruby/object:Gem::Dependency
|
|
55
|
+
name: lex-llm
|
|
56
|
+
requirement: !ruby/object:Gem::Requirement
|
|
57
|
+
requirements:
|
|
58
|
+
- - ">="
|
|
59
|
+
- !ruby/object:Gem::Version
|
|
60
|
+
version: 0.1.4
|
|
61
|
+
type: :runtime
|
|
62
|
+
prerelease: false
|
|
63
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
64
|
+
requirements:
|
|
65
|
+
- - ">="
|
|
66
|
+
- !ruby/object:Gem::Version
|
|
67
|
+
version: 0.1.4
|
|
68
|
+
description: Azure AI Foundry and Azure OpenAI hosted provider integration for LegionIO
|
|
69
|
+
LLM routing.
|
|
70
|
+
email:
|
|
71
|
+
- matthewdiverson@gmail.com
|
|
72
|
+
executables: []
|
|
73
|
+
extensions: []
|
|
74
|
+
extra_rdoc_files: []
|
|
75
|
+
files:
|
|
76
|
+
- ".github/CODEOWNERS"
|
|
77
|
+
- ".github/dependabot.yml"
|
|
78
|
+
- ".github/workflows/ci.yml"
|
|
79
|
+
- ".gitignore"
|
|
80
|
+
- ".rubocop.yml"
|
|
81
|
+
- CHANGELOG.md
|
|
82
|
+
- Gemfile
|
|
83
|
+
- LICENSE
|
|
84
|
+
- README.md
|
|
85
|
+
- lex-llm-azure-foundry.gemspec
|
|
86
|
+
- lib/legion/extensions/llm/azure_foundry.rb
|
|
87
|
+
- lib/legion/extensions/llm/azure_foundry/provider.rb
|
|
88
|
+
- lib/legion/extensions/llm/azure_foundry/version.rb
|
|
89
|
+
homepage: https://github.com/LegionIO/lex-llm-azure-foundry
|
|
90
|
+
licenses:
|
|
91
|
+
- MIT
|
|
92
|
+
metadata:
|
|
93
|
+
homepage_uri: https://github.com/LegionIO/lex-llm-azure-foundry
|
|
94
|
+
source_code_uri: https://github.com/LegionIO/lex-llm-azure-foundry
|
|
95
|
+
documentation_uri: https://github.com/LegionIO/lex-llm-azure-foundry
|
|
96
|
+
changelog_uri: https://github.com/LegionIO/lex-llm-azure-foundry/blob/main/CHANGELOG.md
|
|
97
|
+
bug_tracker_uri: https://github.com/LegionIO/lex-llm-azure-foundry/issues
|
|
98
|
+
rubygems_mfa_required: 'true'
|
|
99
|
+
rdoc_options: []
|
|
100
|
+
require_paths:
|
|
101
|
+
- lib
|
|
102
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
103
|
+
requirements:
|
|
104
|
+
- - ">="
|
|
105
|
+
- !ruby/object:Gem::Version
|
|
106
|
+
version: '3.4'
|
|
107
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
108
|
+
requirements:
|
|
109
|
+
- - ">="
|
|
110
|
+
- !ruby/object:Gem::Version
|
|
111
|
+
version: '0'
|
|
112
|
+
requirements: []
|
|
113
|
+
rubygems_version: 3.6.9
|
|
114
|
+
specification_version: 4
|
|
115
|
+
summary: LegionIO LLM Azure AI Foundry provider extension
|
|
116
|
+
test_files: []
|