embedding_util 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +21 -0
- data/README.md +176 -0
- data/Rakefile +10 -0
- data/exe/embedding_util +7 -0
- data/lib/embedding_util/cli.rb +111 -0
- data/lib/embedding_util/configuration.rb +42 -0
- data/lib/embedding_util/profile.rb +22 -0
- data/lib/embedding_util/profiles.rb +42 -0
- data/lib/embedding_util/provider.rb +31 -0
- data/lib/embedding_util/provider_registry.rb +56 -0
- data/lib/embedding_util/providers/endpoint.rb +153 -0
- data/lib/embedding_util/providers/self_hosted.rb +44 -0
- data/lib/embedding_util/result.rb +7 -0
- data/lib/embedding_util/runtime_command.rb +84 -0
- data/lib/embedding_util/server_manager.rb +258 -0
- data/lib/embedding_util/server_model.rb +46 -0
- data/lib/embedding_util/version.rb +5 -0
- data/lib/embedding_util.rb +127 -0
- metadata +83 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 41974235f403d3694132565fa69ecf07ca338789bd197c34e2563eb7bd38ad06
|
|
4
|
+
data.tar.gz: c05864e6cda0f5881d44d676ab8b8cb7dc9d7ceee70aff8394a3ee79f8166a69
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: b3500877638960c73de3ebf04ef07441392b86f22108014e4f27e872b95d9ff52cf1850e423443c51bab16ea4c5149a851f619faa53d7f44480f99ae7ebdd759
|
|
7
|
+
data.tar.gz: e55f3761208466504a9865115d23e0f1ab68bfb146a08ddfcb1c2ae7df4b37a859a80d99aa52139bf6b005fa6ddf4624a96046d58c80f923d183316234042232
|
data/.rspec
ADDED
data/.rubocop.yml
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
AllCops:
|
|
2
|
+
TargetRubyVersion: 3.2
|
|
3
|
+
NewCops: disable
|
|
4
|
+
|
|
5
|
+
Style/StringLiterals:
|
|
6
|
+
EnforcedStyle: double_quotes
|
|
7
|
+
Exclude:
|
|
8
|
+
- "spec/**/*"
|
|
9
|
+
- "embedding_util.gemspec"
|
|
10
|
+
|
|
11
|
+
Style/FrozenStringLiteralComment:
|
|
12
|
+
Enabled: false
|
|
13
|
+
|
|
14
|
+
Style/Documentation:
|
|
15
|
+
Enabled: false
|
|
16
|
+
|
|
17
|
+
Layout/LineLength:
|
|
18
|
+
Max: 170
|
|
19
|
+
|
|
20
|
+
Metrics/BlockLength:
|
|
21
|
+
Enabled: false
|
|
22
|
+
Metrics/ClassLength:
|
|
23
|
+
Enabled: false
|
|
24
|
+
Metrics/MethodLength:
|
|
25
|
+
Enabled: false
|
|
26
|
+
Metrics/AbcSize:
|
|
27
|
+
Enabled: false
|
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
## [0.1.0] - 2026-06-08
|
|
2
|
+
|
|
3
|
+
- Initial release
|
|
4
|
+
- Add local-first embedding and true reranking API
|
|
5
|
+
- Add pinned `small_multilingual_v1` Qwen3 embedding/reranker profile
|
|
6
|
+
- Add endpoint provider for llama.cpp-compatible embedding/reranking APIs
|
|
7
|
+
- Add self-hosted local server management through Ramalama or direct `llama-server`
|
|
8
|
+
- Add `embedding_util` CLI with `support`, `profiles`, `embed`, `rerank`, and `serve`
|
data/LICENSE.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 hmdne
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,176 @@
|
|
|
1
|
+
# embedding_util
|
|
2
|
+
|
|
3
|
+
Local-first text embeddings and reranking for Ruby.
|
|
4
|
+
|
|
5
|
+
`embedding_util` provides a small require-and-use API for computing embedding vectors and true reranking scores through local model runtimes. It can start local model servers on demand, or use explicitly configured llama.cpp/Ramalama-compatible HTTP endpoints.
|
|
6
|
+
|
|
7
|
+
The default model profile is `small_multilingual_v1`, pinned to Qwen3 0.6B GGUF embedding and reranker models.
|
|
8
|
+
|
|
9
|
+
This gem is in the `0.x` series. The API is intentionally unstable until `1.0`, and public method names, configuration options, return shapes, and default profiles may change between minor releases.
|
|
10
|
+
|
|
11
|
+
## Installation
|
|
12
|
+
|
|
13
|
+
Add the gem to your Gemfile:
|
|
14
|
+
|
|
15
|
+
```ruby
|
|
16
|
+
gem "embedding_util"
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
Then install dependencies:
|
|
20
|
+
|
|
21
|
+
```sh
|
|
22
|
+
bundle install
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
## Quick Start
|
|
26
|
+
|
|
27
|
+
Install either `ramalama` or `llama-server`. Ramalama is preferred; direct `llama-server` is used when Ramalama is unavailable.
|
|
28
|
+
|
|
29
|
+
Then call the Ruby API:
|
|
30
|
+
|
|
31
|
+
```ruby
|
|
32
|
+
require "embedding_util"
|
|
33
|
+
|
|
34
|
+
vector = EmbeddingUtil.embed("hello world")
|
|
35
|
+
ranked = EmbeddingUtil.rerank("Which document is about software?", [
|
|
36
|
+
"Ruby is a programming language.",
|
|
37
|
+
"Fresh bread is often served warm.",
|
|
38
|
+
"A command-line tool can automate repetitive work."
|
|
39
|
+
])
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
With the default configuration, `EmbeddingUtil.embed` and `EmbeddingUtil.rerank` reuse an already-running local server when one is available. If not, they start the required local model server automatically. Auto-started servers bind to `127.0.0.1`, prefer ports `18080` for embeddings and `18081` for reranking, and choose the next free local port if the preferred port is already in use. First-time use downloads the pinned `small_multilingual_v1` GGUF files through the selected runtime.
|
|
43
|
+
|
|
44
|
+
## Application-Managed Servers
|
|
45
|
+
|
|
46
|
+
Applications can manage model servers themselves instead of using automatic self-hosting. This is useful when the embedding/reranking servers run as separate processes, system services, containers, or on another machine.
|
|
47
|
+
|
|
48
|
+
Configure the endpoints explicitly:
|
|
49
|
+
|
|
50
|
+
```ruby
|
|
51
|
+
require "embedding_util"
|
|
52
|
+
|
|
53
|
+
EmbeddingUtil.configure do |config|
|
|
54
|
+
config.embedding_endpoint = "http://embedding.internal:18080"
|
|
55
|
+
config.reranker_endpoint = "http://reranker.internal:18081"
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
vector = EmbeddingUtil.embed("hello world")
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Configured endpoints take precedence over automatic self-hosting.
|
|
62
|
+
|
|
63
|
+
`embedding_util serve` is one convenient way to run compatible servers yourself, but it is optional. You can also run Ramalama, direct `llama-server`, containers, or service units independently as long as they expose llama.cpp-compatible embedding/reranking HTTP APIs.
|
|
64
|
+
|
|
65
|
+
```sh
|
|
66
|
+
embedding_util serve --model embedding-small_multilingual_v1
|
|
67
|
+
embedding_util serve --model reranker-small_multilingual_v1
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
`serve` starts one model server per command and runs until stopped. Add `--shutdown-idle SECONDS` only when you want that manually managed server to stop itself after idle output; omit it, set it to `nil`, or pass `0` to disable idle shutdown.
|
|
71
|
+
|
|
72
|
+
## CLI
|
|
73
|
+
|
|
74
|
+
```sh
|
|
75
|
+
embedding_util support
|
|
76
|
+
embedding_util profiles
|
|
77
|
+
embedding_util serve --model embedding-small_multilingual_v1
|
|
78
|
+
embedding_util embed "hello world"
|
|
79
|
+
embedding_util embed "hello world" --verbose
|
|
80
|
+
embedding_util rerank \
|
|
81
|
+
"Which document is about software?" \
|
|
82
|
+
"Ruby is a programming language." \
|
|
83
|
+
"Fresh bread is often served warm." \
|
|
84
|
+
"A command-line tool can automate repetitive work."
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
`embed` prints a JSON array. `rerank` prints JSON objects with `index`, `document`, `score`, and `metadata`.
|
|
88
|
+
|
|
89
|
+
`serve` starts one local model server. The default model is `embedding-small_multilingual_v1`; use `reranker-small_multilingual_v1` for the reranker server. By default, `serve` uses Ramalama when available and falls back to direct `llama-server`. It runs until stopped unless a positive `--shutdown-idle` value is provided.
|
|
90
|
+
|
|
91
|
+
Explicit `serve --port PORT` requires that exact port to be free. Without `--port`, `serve` prefers the profile default port and chooses the next free local port if needed.
|
|
92
|
+
|
|
93
|
+
Use `--verbose` on `embed` or `rerank` to print self-hosting diagnostics, including the background `serve` command and log path. First-time model downloads are expected to work with the default startup timeout; use `--startup-timeout` only when you explicitly want to shorten or extend that wait.
|
|
94
|
+
|
|
95
|
+
## API
|
|
96
|
+
|
|
97
|
+
- `EmbeddingUtil.embed(text)` returns one embedding array.
|
|
98
|
+
- `EmbeddingUtil.embed_many(texts)` returns one embedding array per input text.
|
|
99
|
+
- `EmbeddingUtil.embed_result(text_or_texts)` returns embeddings plus provider/model metadata.
|
|
100
|
+
- `EmbeddingUtil.rerank(query, documents)` returns ranked `EmbeddingUtil::RankedDocument` objects.
|
|
101
|
+
- `EmbeddingUtil.rerank_result(query, documents)` returns ranked documents plus provider/model metadata.
|
|
102
|
+
- `EmbeddingUtil.support` reports configured provider support.
|
|
103
|
+
- `EmbeddingUtil.profiles` returns known immutable model profiles.
|
|
104
|
+
- `embedding_util support`, `profiles`, `embed`, `rerank`, and `serve` expose the same local-first behavior from the command line.
|
|
105
|
+
|
|
106
|
+
## Default Profile
|
|
107
|
+
|
|
108
|
+
`small_multilingual_v1` is intentionally pinned because embedding vectors are model-output-specific.
|
|
109
|
+
|
|
110
|
+
Embedding model:
|
|
111
|
+
|
|
112
|
+
- repo: `Qwen/Qwen3-Embedding-0.6B-GGUF`
|
|
113
|
+
- file: `Qwen3-Embedding-0.6B-Q8_0.gguf`
|
|
114
|
+
- dimensions: `1024`
|
|
115
|
+
- server flags: `--embedding --pooling last`
|
|
116
|
+
|
|
117
|
+
Reranker model:
|
|
118
|
+
|
|
119
|
+
- repo: `ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF`
|
|
120
|
+
- file: `qwen3-reranker-0.6b-q8_0.gguf`
|
|
121
|
+
- server flags: `--reranking`
|
|
122
|
+
|
|
123
|
+
Do not combine embedding and reranking flags for this profile. Run separate local servers.
|
|
124
|
+
|
|
125
|
+
## Configuration
|
|
126
|
+
|
|
127
|
+
```ruby
|
|
128
|
+
EmbeddingUtil.configure do |config|
|
|
129
|
+
config.profile = :small_multilingual_v1
|
|
130
|
+
config.runtime = :auto
|
|
131
|
+
config.host = "127.0.0.1"
|
|
132
|
+
config.embedding_port = 18080
|
|
133
|
+
config.reranker_port = 18081
|
|
134
|
+
config.startup_timeout = 3600
|
|
135
|
+
config.shutdown_idle = 300
|
|
136
|
+
config.timeout = 60
|
|
137
|
+
end
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
Explicit local endpoints can still be configured when you manage servers yourself:
|
|
141
|
+
|
|
142
|
+
```ruby
|
|
143
|
+
EmbeddingUtil.configure do |config|
|
|
144
|
+
config.embedding_endpoint = "http://127.0.0.1:18080"
|
|
145
|
+
config.reranker_endpoint = "http://127.0.0.1:18081"
|
|
146
|
+
end
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
Environment variables are also supported:
|
|
150
|
+
|
|
151
|
+
- `EMBEDDING_UTIL_ENDPOINT` for one endpoint serving both APIs
|
|
152
|
+
- `EMBEDDING_UTIL_EMBEDDING_ENDPOINT`
|
|
153
|
+
- `EMBEDDING_UTIL_RERANKER_ENDPOINT`
|
|
154
|
+
- `EMBEDDING_UTIL_TIMEOUT`
|
|
155
|
+
- `EMBEDDING_UTIL_STARTUP_TIMEOUT`
|
|
156
|
+
- `EMBEDDING_UTIL_RUNTIME`
|
|
157
|
+
- `EMBEDDING_UTIL_SHUTDOWN_IDLE`
|
|
158
|
+
- `EMBEDDING_UTIL_STATE_DIR`
|
|
159
|
+
- `EMBEDDING_UTIL_VERBOSE`
|
|
160
|
+
- `EMBEDDING_UTIL_EMBEDDING_PORT`
|
|
161
|
+
- `EMBEDDING_UTIL_RERANKER_PORT`
|
|
162
|
+
|
|
163
|
+
## Development
|
|
164
|
+
|
|
165
|
+
```sh
|
|
166
|
+
bundle install
|
|
167
|
+
bundle exec rake
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Contributing
|
|
171
|
+
|
|
172
|
+
Bug reports and pull requests are welcome on GitHub at `https://github.com/rbutils/embedding_util`.
|
|
173
|
+
|
|
174
|
+
## License
|
|
175
|
+
|
|
176
|
+
The gem is available as open source under the terms of the MIT License.
|
data/Rakefile
ADDED
data/exe/embedding_util
ADDED
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
require "thor"
|
|
5
|
+
|
|
6
|
+
module EmbeddingUtil
|
|
7
|
+
class CLI < Thor
|
|
8
|
+
CONFIG_OPTIONS = {
|
|
9
|
+
profile: :to_sym.to_proc,
|
|
10
|
+
runtime: ->(value) { RuntimeCommand.normalize_runtime(value) },
|
|
11
|
+
endpoint: ->(value) { value },
|
|
12
|
+
embedding_endpoint: ->(value) { value },
|
|
13
|
+
reranker_endpoint: ->(value) { value },
|
|
14
|
+
timeout: ->(value) { value },
|
|
15
|
+
startup_timeout: ->(value) { value },
|
|
16
|
+
shutdown_idle: :to_i.to_proc,
|
|
17
|
+
verbose: ->(value) { value }
|
|
18
|
+
}.freeze
|
|
19
|
+
|
|
20
|
+
class_option :endpoint, type: :string, desc: "Endpoint serving both embedding and reranking APIs"
|
|
21
|
+
class_option :embedding_endpoint, type: :string, desc: "Endpoint serving /v1/embeddings"
|
|
22
|
+
class_option :reranker_endpoint, type: :string, desc: "Endpoint serving /v1/rerank or /rerank"
|
|
23
|
+
class_option :profile, type: :string, desc: "Model profile"
|
|
24
|
+
class_option :runtime, type: :string, desc: "Self-hosting runtime: auto, ramalama, or llama-server"
|
|
25
|
+
class_option :timeout, type: :numeric, desc: "HTTP timeout in seconds"
|
|
26
|
+
class_option :startup_timeout, type: :numeric, desc: "Seconds to wait for self-hosted server startup"
|
|
27
|
+
class_option :shutdown_idle, type: :numeric, desc: "Stop self-hosted server after this many seconds without stdout/stderr activity"
|
|
28
|
+
class_option :verbose, type: :boolean, desc: "Print self-hosting diagnostics"
|
|
29
|
+
|
|
30
|
+
desc "support", "Display configured provider support"
|
|
31
|
+
def support
|
|
32
|
+
configure_embedding_util
|
|
33
|
+
EmbeddingUtil.support.each do |item|
|
|
34
|
+
status = item.fetch(:supported) ? "supported" : "not supported"
|
|
35
|
+
puts "#{item.fetch(:provider)}: #{status}"
|
|
36
|
+
puts " embedding_endpoint: #{item.fetch(:embedding_endpoint)}" if item[:embedding_endpoint]
|
|
37
|
+
puts " reranker_endpoint: #{item.fetch(:reranker_endpoint)}" if item[:reranker_endpoint]
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
desc "profiles", "List known model profiles"
|
|
42
|
+
def profiles
|
|
43
|
+
EmbeddingUtil.profiles.each do |profile|
|
|
44
|
+
puts profile.name
|
|
45
|
+
puts " embedding: #{profile.embedding.fetch(:repo)} / #{profile.embedding.fetch(:file)}"
|
|
46
|
+
puts " reranker: #{profile.reranker.fetch(:repo)} / #{profile.reranker.fetch(:file)}"
|
|
47
|
+
end
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
desc "embed TEXT", "Compute one embedding and print it as JSON"
|
|
51
|
+
def embed(text)
|
|
52
|
+
configure_embedding_util
|
|
53
|
+
puts JSON.generate(EmbeddingUtil.embed(text))
|
|
54
|
+
rescue Error => e
|
|
55
|
+
abort e.message
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
desc "rerank QUERY DOCUMENT...", "Rerank documents and print ranked results as JSON"
|
|
59
|
+
def rerank(query, *documents)
|
|
60
|
+
configure_embedding_util
|
|
61
|
+
raise Error, "provide at least one document to rerank" if documents.empty?
|
|
62
|
+
|
|
63
|
+
results = EmbeddingUtil.rerank(query, documents).map do |result|
|
|
64
|
+
{
|
|
65
|
+
index: result.index,
|
|
66
|
+
document: result.document,
|
|
67
|
+
score: result.score,
|
|
68
|
+
metadata: result.metadata
|
|
69
|
+
}
|
|
70
|
+
end
|
|
71
|
+
puts JSON.pretty_generate(results)
|
|
72
|
+
rescue Error => e
|
|
73
|
+
abort e.message
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
desc "serve", "Start one local model server and stop it after stdout/stderr is idle"
|
|
77
|
+
option :model, type: :string, default: "embedding-small_multilingual_v1",
|
|
78
|
+
desc: "Model server to run, such as embedding-small_multilingual_v1 or reranker-small_multilingual_v1"
|
|
79
|
+
option :port, type: :numeric, desc: "Port for the model server"
|
|
80
|
+
option :host, type: :string, default: "127.0.0.1", desc: "Host for the model server"
|
|
81
|
+
def serve
|
|
82
|
+
configure_embedding_util
|
|
83
|
+
ServerManager.new(config: EmbeddingUtil.configuration).serve(
|
|
84
|
+
model: options[:model],
|
|
85
|
+
runtime: options[:runtime] || EmbeddingUtil.configuration.runtime,
|
|
86
|
+
shutdown_idle: options[:shutdown_idle]&.to_i,
|
|
87
|
+
host: options[:host],
|
|
88
|
+
port: options[:port]&.to_i
|
|
89
|
+
)
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
no_commands do
|
|
93
|
+
def configure_embedding_util
|
|
94
|
+
EmbeddingUtil.configure do |config|
|
|
95
|
+
cli_config.each do |key, value|
|
|
96
|
+
config.public_send("#{key}=", value)
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def cli_config
|
|
102
|
+
CONFIG_OPTIONS.each_with_object({}) do |(key, coercion), values|
|
|
103
|
+
value = options[key]
|
|
104
|
+
next if value.nil?
|
|
105
|
+
|
|
106
|
+
values[key] = coercion.call(value)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module EmbeddingUtil
|
|
4
|
+
class Configuration
|
|
5
|
+
attr_accessor :profile, :provider, :endpoint, :embedding_endpoint, :reranker_endpoint, :timeout, :startup_timeout, :shutdown_idle, :host,
|
|
6
|
+
:embedding_port, :reranker_port, :state_dir, :verbose
|
|
7
|
+
attr_reader :runtime
|
|
8
|
+
|
|
9
|
+
def initialize
|
|
10
|
+
@profile = :small_multilingual_v1
|
|
11
|
+
@provider = :auto
|
|
12
|
+
self.runtime = ENV.fetch("EMBEDDING_UTIL_RUNTIME", "auto")
|
|
13
|
+
@endpoint = ENV["EMBEDDING_UTIL_ENDPOINT"]
|
|
14
|
+
@embedding_endpoint = ENV["EMBEDDING_UTIL_EMBEDDING_ENDPOINT"]
|
|
15
|
+
@reranker_endpoint = ENV["EMBEDDING_UTIL_RERANKER_ENDPOINT"]
|
|
16
|
+
@timeout = Float(ENV.fetch("EMBEDDING_UTIL_TIMEOUT", "60"))
|
|
17
|
+
@startup_timeout = Float(ENV.fetch("EMBEDDING_UTIL_STARTUP_TIMEOUT", "3600"))
|
|
18
|
+
@shutdown_idle = Integer(ENV.fetch("EMBEDDING_UTIL_SHUTDOWN_IDLE", "300"))
|
|
19
|
+
@host = ENV.fetch("EMBEDDING_UTIL_HOST", "127.0.0.1")
|
|
20
|
+
@embedding_port = Integer(ENV.fetch("EMBEDDING_UTIL_EMBEDDING_PORT", "18080"))
|
|
21
|
+
@reranker_port = Integer(ENV.fetch("EMBEDDING_UTIL_RERANKER_PORT", "18081"))
|
|
22
|
+
@state_dir = ENV.fetch("EMBEDDING_UTIL_STATE_DIR", File.expand_path("~/.local/state/embedding_util"))
|
|
23
|
+
@verbose = ENV.fetch("EMBEDDING_UTIL_VERBOSE", "false").match?(/\A(?:1|true|yes|on)\z/i)
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def runtime=(value)
|
|
27
|
+
@runtime = value.to_s.tr("-", "_").to_sym
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def resolved_profile
|
|
31
|
+
profile.is_a?(Profile) ? profile : Profiles.fetch(profile)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def embedding_endpoint_url
|
|
35
|
+
embedding_endpoint || endpoint
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def reranker_endpoint_url
|
|
39
|
+
reranker_endpoint || endpoint
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module EmbeddingUtil
|
|
4
|
+
Profile = Data.define(:name, :embedding, :reranker) do
|
|
5
|
+
def initialize(name:, embedding:, reranker:)
|
|
6
|
+
super(name: name.to_sym, embedding: deep_freeze(embedding), reranker: deep_freeze(reranker))
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
private
|
|
10
|
+
|
|
11
|
+
def deep_freeze(value)
|
|
12
|
+
case value
|
|
13
|
+
when Hash
|
|
14
|
+
value.transform_values { |item| deep_freeze(item) }.freeze
|
|
15
|
+
when Array
|
|
16
|
+
value.map { |item| deep_freeze(item) }.freeze
|
|
17
|
+
else
|
|
18
|
+
value.freeze
|
|
19
|
+
end
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "profile"
|
|
4
|
+
|
|
5
|
+
module EmbeddingUtil
|
|
6
|
+
module Profiles
|
|
7
|
+
SMALL_MULTILINGUAL_V1 = Profile.new(
|
|
8
|
+
name: :small_multilingual_v1,
|
|
9
|
+
embedding: {
|
|
10
|
+
repo: "Qwen/Qwen3-Embedding-0.6B-GGUF",
|
|
11
|
+
file: "Qwen3-Embedding-0.6B-Q8_0.gguf",
|
|
12
|
+
model: "qwen3-embedding-0.6b",
|
|
13
|
+
dimensions: 1024,
|
|
14
|
+
normalize: true,
|
|
15
|
+
pooling: "last",
|
|
16
|
+
server_flags: ["--embedding", "--pooling", "last"]
|
|
17
|
+
},
|
|
18
|
+
reranker: {
|
|
19
|
+
repo: "ggml-org/Qwen3-Reranker-0.6B-Q8_0-GGUF",
|
|
20
|
+
file: "qwen3-reranker-0.6b-q8_0.gguf",
|
|
21
|
+
model: "qwen3-reranker-0.6b",
|
|
22
|
+
server_flags: ["--reranking"]
|
|
23
|
+
}
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
BY_NAME = {
|
|
27
|
+
SMALL_MULTILINGUAL_V1.name => SMALL_MULTILINGUAL_V1
|
|
28
|
+
}.freeze
|
|
29
|
+
|
|
30
|
+
module_function
|
|
31
|
+
|
|
32
|
+
def fetch(name)
|
|
33
|
+
BY_NAME.fetch(name.to_sym) do
|
|
34
|
+
raise ArgumentError, "unknown embedding_util profile: #{name.inspect}"
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def all
|
|
39
|
+
BY_NAME.values
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
end
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module EmbeddingUtil
|
|
4
|
+
class Provider
|
|
5
|
+
attr_reader :config
|
|
6
|
+
|
|
7
|
+
def initialize(config: EmbeddingUtil.configuration)
|
|
8
|
+
@config = config
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def self.provider_name
|
|
12
|
+
name.split("::").last.gsub(/([a-z\d])([A-Z])/, "\\1_\\2").downcase.to_sym
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def self.supported?(config = EmbeddingUtil.configuration)
|
|
16
|
+
new(config: config).supported?
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def provider_name
|
|
20
|
+
self.class.provider_name
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def supported?
|
|
24
|
+
false
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def support
|
|
28
|
+
{ provider: provider_name, supported: supported? }
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
end
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module EmbeddingUtil
|
|
4
|
+
class ProviderRegistry
|
|
5
|
+
attr_reader :providers
|
|
6
|
+
|
|
7
|
+
def initialize
|
|
8
|
+
@providers = []
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def register(provider_class)
|
|
12
|
+
@providers << provider_class unless @providers.include?(provider_class)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def resolve(config: EmbeddingUtil.configuration)
|
|
16
|
+
selected = config.provider
|
|
17
|
+
return resolve_selected(selected, config) if selected && selected != :auto
|
|
18
|
+
|
|
19
|
+
provider_class = providers.find { |candidate| candidate.supported?(config) }
|
|
20
|
+
raise UnsupportedProviderError, unsupported_message unless provider_class
|
|
21
|
+
|
|
22
|
+
provider_class.new(config: config)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def support(config: EmbeddingUtil.configuration)
|
|
26
|
+
providers.map { |provider_class| provider_class.new(config: config).support }
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
def resolve_selected(selected, config)
|
|
32
|
+
provider_class = providers.find { |candidate| candidate.provider_name == selected.to_sym }
|
|
33
|
+
raise UnsupportedProviderError, "unknown embedding_util provider: #{selected.inspect}" unless provider_class
|
|
34
|
+
|
|
35
|
+
provider = provider_class.new(config: config)
|
|
36
|
+
raise UnsupportedProviderError, unsupported_message(selected) unless provider.supported?
|
|
37
|
+
|
|
38
|
+
provider
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
def unsupported_message(provider = nil)
|
|
42
|
+
target = provider ? "provider #{provider.inspect}" : "a supported local embedding provider"
|
|
43
|
+
<<~MESSAGE.strip
|
|
44
|
+
Could not find #{target}.
|
|
45
|
+
|
|
46
|
+
Configure already-running local endpoints:
|
|
47
|
+
EmbeddingUtil.configure { |c| c.embedding_endpoint = "http://127.0.0.1:18080" }
|
|
48
|
+
EmbeddingUtil.configure { |c| c.reranker_endpoint = "http://127.0.0.1:18081" }
|
|
49
|
+
|
|
50
|
+
Or install a local runtime for self-hosting:
|
|
51
|
+
ramalama
|
|
52
|
+
llama-server
|
|
53
|
+
MESSAGE
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|