embedding_util 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +2 -0
- data/.rubocop.yml +27 -0
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +21 -0
- data/README.md +176 -0
- data/Rakefile +10 -0
- data/exe/embedding_util +7 -0
- data/lib/embedding_util/cli.rb +111 -0
- data/lib/embedding_util/configuration.rb +42 -0
- data/lib/embedding_util/profile.rb +22 -0
- data/lib/embedding_util/profiles.rb +42 -0
- data/lib/embedding_util/provider.rb +31 -0
- data/lib/embedding_util/provider_registry.rb +56 -0
- data/lib/embedding_util/providers/endpoint.rb +153 -0
- data/lib/embedding_util/providers/self_hosted.rb +44 -0
- data/lib/embedding_util/result.rb +7 -0
- data/lib/embedding_util/runtime_command.rb +84 -0
- data/lib/embedding_util/server_manager.rb +258 -0
- data/lib/embedding_util/server_model.rb +46 -0
- data/lib/embedding_util/version.rb +5 -0
- data/lib/embedding_util.rb +127 -0
- metadata +83 -0
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "embedding_util/version"
|
|
4
|
+
|
|
5
|
+
module EmbeddingUtil
|
|
6
|
+
class Error < StandardError; end
|
|
7
|
+
class UnsupportedProviderError < Error; end
|
|
8
|
+
class EndpointError < Error; end
|
|
9
|
+
|
|
10
|
+
class EndpointNotFoundError < EndpointError
|
|
11
|
+
attr_reader :uri, :path, :body
|
|
12
|
+
|
|
13
|
+
def initialize(uri, path:, body: nil)
|
|
14
|
+
@uri = uri
|
|
15
|
+
@path = path
|
|
16
|
+
@body = body
|
|
17
|
+
super("#{uri} returned 404")
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
autoload :Configuration, "embedding_util/configuration"
|
|
22
|
+
autoload :CLI, "embedding_util/cli"
|
|
23
|
+
autoload :EmbeddingResult, "embedding_util/result"
|
|
24
|
+
autoload :Profile, "embedding_util/profile"
|
|
25
|
+
autoload :Profiles, "embedding_util/profiles"
|
|
26
|
+
autoload :Provider, "embedding_util/provider"
|
|
27
|
+
autoload :ProviderRegistry, "embedding_util/provider_registry"
|
|
28
|
+
autoload :RankedDocument, "embedding_util/result"
|
|
29
|
+
autoload :RerankResult, "embedding_util/result"
|
|
30
|
+
autoload :ServerManager, "embedding_util/server_manager"
|
|
31
|
+
autoload :ServerModel, "embedding_util/server_model"
|
|
32
|
+
autoload :RuntimeCommand, "embedding_util/runtime_command"
|
|
33
|
+
|
|
34
|
+
module Providers
|
|
35
|
+
autoload :Endpoint, "embedding_util/providers/endpoint"
|
|
36
|
+
autoload :SelfHosted, "embedding_util/providers/self_hosted"
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
module_function
|
|
40
|
+
|
|
41
|
+
def configuration
|
|
42
|
+
@configuration ||= Configuration.new
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def configure
|
|
46
|
+
yield configuration
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def reset_configuration!
|
|
50
|
+
@configuration = Configuration.new
|
|
51
|
+
@registry = nil
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def registry
|
|
55
|
+
@registry ||= begin
|
|
56
|
+
registry = ProviderRegistry.new
|
|
57
|
+
registry.register(Providers::Endpoint)
|
|
58
|
+
registry.register(Providers::SelfHosted)
|
|
59
|
+
registry
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def register_provider(provider_class)
|
|
64
|
+
registry.register(provider_class)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def support
|
|
68
|
+
registry.support(config: configuration)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def embed(text, **options)
|
|
72
|
+
embed_result(text, **options).embedding
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def embed_many(texts, profile: configuration.resolved_profile, provider: nil, **_options)
|
|
76
|
+
selected_provider(provider).embed(normalize_texts(texts), profile: resolve_profile(profile)).embedding
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def embed_result(input, profile: configuration.resolved_profile, provider: nil)
|
|
80
|
+
scalar = !input.is_a?(Array)
|
|
81
|
+
texts = normalize_texts(input)
|
|
82
|
+
result = selected_provider(provider).embed(texts, profile: resolve_profile(profile))
|
|
83
|
+
return result unless scalar
|
|
84
|
+
|
|
85
|
+
raise EndpointError, "server returned no embeddings for the given input" if result.embedding.empty?
|
|
86
|
+
|
|
87
|
+
EmbeddingResult.new(
|
|
88
|
+
embedding: result.embedding.fetch(0),
|
|
89
|
+
model: result.model,
|
|
90
|
+
profile: result.profile,
|
|
91
|
+
provider: result.provider,
|
|
92
|
+
metadata: result.metadata
|
|
93
|
+
)
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def rerank(query, documents, **options)
|
|
97
|
+
rerank_result(query, documents, **options).results
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
def rerank_result(query, documents, profile: configuration.resolved_profile, provider: nil)
|
|
101
|
+
selected_provider(provider).rerank(query.to_s, Array(documents).map(&:to_s), profile: resolve_profile(profile))
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def profiles
|
|
105
|
+
Profiles.all
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def profile(name = configuration.profile)
|
|
109
|
+
resolve_profile(name)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def normalize_texts(input)
|
|
113
|
+
input.is_a?(Array) ? input.map(&:to_s) : [input.to_s]
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def resolve_profile(value)
|
|
117
|
+
value.is_a?(Profile) ? value : Profiles.fetch(value)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
def selected_provider(provider)
|
|
121
|
+
return registry.resolve(config: configuration) unless provider
|
|
122
|
+
|
|
123
|
+
local_config = configuration.dup
|
|
124
|
+
local_config.provider = provider
|
|
125
|
+
registry.resolve(config: local_config)
|
|
126
|
+
end
|
|
127
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: embedding_util
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- hmdne
|
|
8
|
+
bindir: exe
|
|
9
|
+
cert_chain: []
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
11
|
+
dependencies:
|
|
12
|
+
- !ruby/object:Gem::Dependency
|
|
13
|
+
name: thor
|
|
14
|
+
requirement: !ruby/object:Gem::Requirement
|
|
15
|
+
requirements:
|
|
16
|
+
- - "~>"
|
|
17
|
+
- !ruby/object:Gem::Version
|
|
18
|
+
version: '1.3'
|
|
19
|
+
type: :runtime
|
|
20
|
+
prerelease: false
|
|
21
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
22
|
+
requirements:
|
|
23
|
+
- - "~>"
|
|
24
|
+
- !ruby/object:Gem::Version
|
|
25
|
+
version: '1.3'
|
|
26
|
+
description: A small rbutils gem for computing embeddings and true reranking through
|
|
27
|
+
local embedding model runtimes.
|
|
28
|
+
email:
|
|
29
|
+
- 54514036+hmdne@users.noreply.github.com
|
|
30
|
+
executables:
|
|
31
|
+
- embedding_util
|
|
32
|
+
extensions: []
|
|
33
|
+
extra_rdoc_files: []
|
|
34
|
+
files:
|
|
35
|
+
- ".rspec"
|
|
36
|
+
- ".rubocop.yml"
|
|
37
|
+
- CHANGELOG.md
|
|
38
|
+
- LICENSE.txt
|
|
39
|
+
- README.md
|
|
40
|
+
- Rakefile
|
|
41
|
+
- exe/embedding_util
|
|
42
|
+
- lib/embedding_util.rb
|
|
43
|
+
- lib/embedding_util/cli.rb
|
|
44
|
+
- lib/embedding_util/configuration.rb
|
|
45
|
+
- lib/embedding_util/profile.rb
|
|
46
|
+
- lib/embedding_util/profiles.rb
|
|
47
|
+
- lib/embedding_util/provider.rb
|
|
48
|
+
- lib/embedding_util/provider_registry.rb
|
|
49
|
+
- lib/embedding_util/providers/endpoint.rb
|
|
50
|
+
- lib/embedding_util/providers/self_hosted.rb
|
|
51
|
+
- lib/embedding_util/result.rb
|
|
52
|
+
- lib/embedding_util/runtime_command.rb
|
|
53
|
+
- lib/embedding_util/server_manager.rb
|
|
54
|
+
- lib/embedding_util/server_model.rb
|
|
55
|
+
- lib/embedding_util/version.rb
|
|
56
|
+
homepage: https://github.com/rbutils/embedding_util
|
|
57
|
+
licenses:
|
|
58
|
+
- MIT
|
|
59
|
+
metadata:
|
|
60
|
+
allowed_push_host: https://rubygems.org
|
|
61
|
+
source_code_uri: https://github.com/rbutils/embedding_util
|
|
62
|
+
changelog_uri: https://github.com/rbutils/embedding_util/blob/master/CHANGELOG.md
|
|
63
|
+
documentation_uri: https://github.com/rbutils/embedding_util#readme
|
|
64
|
+
bug_tracker_uri: https://github.com/rbutils/embedding_util/issues
|
|
65
|
+
rubygems_mfa_required: 'true'
|
|
66
|
+
rdoc_options: []
|
|
67
|
+
require_paths:
|
|
68
|
+
- lib
|
|
69
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
70
|
+
requirements:
|
|
71
|
+
- - ">="
|
|
72
|
+
- !ruby/object:Gem::Version
|
|
73
|
+
version: 3.2.0
|
|
74
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
|
+
requirements:
|
|
76
|
+
- - ">="
|
|
77
|
+
- !ruby/object:Gem::Version
|
|
78
|
+
version: '0'
|
|
79
|
+
requirements: []
|
|
80
|
+
rubygems_version: 4.0.6
|
|
81
|
+
specification_version: 4
|
|
82
|
+
summary: Local-first text embeddings and reranking for Ruby
|
|
83
|
+
test_files: []
|