voyageai 1.2.0 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +104 -2
- data/lib/voyageai/client.rb +19 -3
- data/lib/voyageai/embed.rb +1 -1
- data/lib/voyageai/model.rb +7 -0
- data/lib/voyageai/rerank.rb +46 -0
- data/lib/voyageai/reranking.rb +46 -0
- data/lib/voyageai/version.rb +1 -1
- metadata +5 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 651f676fcb2fa068cf44f37fda02d1185b776ca83fc0f6099362d5e4deb7a250
|
4
|
+
data.tar.gz: f3a22402be82fd02ec94628aeab067aec45cfcc6c203f7d75af13a00f95b580b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0b70798b6ee15febae3f263def8f118b4177f73e553bf3463e77f2638259b62d95025dc6336049eedec71fdccdac80512e54c55bd65b70333fc3f29518d97367
|
7
|
+
data.tar.gz: 2aa09dc2ea94771f309af4e40aa4340842b1181e1e82ddc270b75bddd5120044d0dd195bf687e302cf270da12c312261dc6a523e5b69a0184bacf0aafbb10da1
|
data/README.md
CHANGED
@@ -10,7 +10,9 @@ gem install voyageai
|
|
10
10
|
|
11
11
|
## Usage
|
12
12
|
|
13
|
-
###
|
13
|
+
### Embedding
|
14
|
+
|
15
|
+
#### Generating Single Embedding
|
14
16
|
|
15
17
|
```ruby
|
16
18
|
require 'voyageai'
|
@@ -25,7 +27,7 @@ embed.usage # "#<VoyageAI::Usage total_tokens=...>"
|
|
25
27
|
embed.embedding # [0.0, ...]
|
26
28
|
```
|
27
29
|
|
28
|
-
|
30
|
+
#### Generating Multiple Embeddings
|
29
31
|
|
30
32
|
```ruby
|
31
33
|
require 'voyageai'
|
@@ -45,6 +47,28 @@ embed.usage # "#<VoyageAI::Usage total_tokens=...>"
|
|
45
47
|
embed.embeddings # [[0.0, ...], ...]
|
46
48
|
```
|
47
49
|
|
50
|
+
### Reranking
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
require 'voyageai'
|
54
|
+
|
55
|
+
query = 'Who is the best person to call for a toilet?'
|
56
|
+
|
57
|
+
documents = [
|
58
|
+
'John is a musician.',
|
59
|
+
'Paul is a plumber.',
|
60
|
+
'George is a teacher.',
|
61
|
+
'Ringo is a doctor.',
|
62
|
+
]
|
63
|
+
|
64
|
+
voyageai = VoyageAI::Client.new(api_key: 'pa-...') # or configure ENV['VOYAGEAI_API_KEY']
|
65
|
+
|
66
|
+
rerank = voyageai.rerank(query:, documents:, top_k: 3)
|
67
|
+
rerank.model # "..."
|
68
|
+
rerank.usage # "#<VoyageAI::Usage total_tokens=...>"
|
69
|
+
rerank.results # [#<VoyageAI::Reranking index=0 relevance_score=0.5>]
|
70
|
+
```
|
71
|
+
|
48
72
|
## Configuration
|
49
73
|
|
50
74
|
```ruby
|
@@ -58,3 +82,81 @@ VoyageAI.configure do |config|
|
|
58
82
|
config.logger = Logger.new(STDOUT)
|
59
83
|
end
|
60
84
|
```
|
85
|
+
|
86
|
+
## Examples
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
require "voyageai"
|
90
|
+
|
91
|
+
Entry = Data.define(:document, :embedding)
|
92
|
+
|
93
|
+
VOYAGEAI = VoyageAI::Client.new
|
94
|
+
|
95
|
+
DOCUMENTS = [
|
96
|
+
"John is a musician.",
|
97
|
+
"Paul is a plumber.",
|
98
|
+
"George is a teacher.",
|
99
|
+
"Ringo is a doctor.",
|
100
|
+
"Lisa is a lawyer.",
|
101
|
+
"Stuart is a painter.",
|
102
|
+
"Brian is a writer.",
|
103
|
+
"Jane is a chef.",
|
104
|
+
"Bill is a nurse.",
|
105
|
+
"Susan is a carpenter.",
|
106
|
+
].freeze
|
107
|
+
|
108
|
+
embeddings = VOYAGEAI.embed(DOCUMENTS, input_type: "document").embeddings
|
109
|
+
|
110
|
+
ENTRIES = DOCUMENTS.zip(embeddings).map do |document, embedding|
|
111
|
+
Entry.new(document:, embedding:)
|
112
|
+
end
|
113
|
+
|
114
|
+
# @param src [Array<Float>]
|
115
|
+
# @param dst [Array<Float>]
|
116
|
+
#
|
117
|
+
# @return [Float]
|
118
|
+
def euclidean_distance(src, dst)
|
119
|
+
Math.sqrt(src.zip(dst).map { |a, b| (a - b)**2 }.reduce(:+))
|
120
|
+
end
|
121
|
+
|
122
|
+
# @param query [String]
|
123
|
+
# @param limit [Integer]
|
124
|
+
#
|
125
|
+
# @return [Array<String>]
|
126
|
+
def nearest_documents(query:, limit: 4)
|
127
|
+
embedding = VOYAGEAI.embed(query, input_type: "query").embedding
|
128
|
+
|
129
|
+
ENTRIES
|
130
|
+
.sort_by { |entry| euclidean_distance(entry.embedding, embedding) }
|
131
|
+
.first(limit)
|
132
|
+
.map(&:document)
|
133
|
+
end
|
134
|
+
|
135
|
+
# @param query [String]
|
136
|
+
def search(query:)
|
137
|
+
documents = nearest_documents(query:)
|
138
|
+
|
139
|
+
results = VOYAGEAI.rerank(query:, documents:, top_k: 2).results
|
140
|
+
|
141
|
+
puts "query=#{query.inspect}"
|
142
|
+
results.each do |reranking|
|
143
|
+
document = documents[reranking.index]
|
144
|
+
puts("document=#{document.inspect} relevance_score=#{reranking.relevance_score}")
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
search(query: "What do George and Ringo do?")
|
149
|
+
search(query: "Who works in the medical field?")
|
150
|
+
```
|
151
|
+
|
152
|
+
```
|
153
|
+
query="What do George and Ringo do?"
|
154
|
+
document="Ringo is a doctor." relevance_score=0.67968755
|
155
|
+
document="George is a teacher." relevance_score=0.58593755
|
156
|
+
```
|
157
|
+
|
158
|
+
```
|
159
|
+
query="Who works in the medical field?"
|
160
|
+
document="Bill is a nurse." relevance_score=0.55078125
|
161
|
+
document="Ringo is a doctor." relevance_score=0.50390625
|
162
|
+
```
|
data/lib/voyageai/client.rb
CHANGED
@@ -37,12 +37,14 @@ module VoyageAI
|
|
37
37
|
"#<#{self.class.name} api_key=#{masked_api_key.inspect} host=#{@host.inspect} version=#{@version.inspect}>"
|
38
38
|
end
|
39
39
|
|
40
|
-
# @param model [String] optional (e.g. VoyageAI::Model::VOYAGE or "voyage-3")
|
41
40
|
# @param input [String, Array<String>] required (e.g. "Sample" or ["Sample 1", "Sample 2", ...])
|
41
|
+
# @param model [String] optional (e.g. VoyageAI::Model::VOYAGE or "voyage-3")
|
42
|
+
# @param input_type [Symbol] optional (e.g. :query or :document)
|
43
|
+
# @param truncation [Boolean] optional
|
42
44
|
#
|
43
45
|
# @return [Embedding]
|
44
|
-
def embed(input, model: Model::VOYAGE)
|
45
|
-
payload = { input: arrayify(input), model: model }
|
46
|
+
def embed(input, model: Model::VOYAGE, input_type: nil, truncation: nil)
|
47
|
+
payload = { input: arrayify(input), model: model, truncation:, input_type: }.compact
|
46
48
|
response = http.accept(:json).post("/#{@version}/embeddings", json: payload)
|
47
49
|
|
48
50
|
raise RequestError.new(response:) unless response.status.ok?
|
@@ -50,6 +52,20 @@ module VoyageAI
|
|
50
52
|
Embed.parse(data: response.parse)
|
51
53
|
end
|
52
54
|
|
55
|
+
# @param query [String] required
|
56
|
+
# @param documents [Array<String>] required
|
57
|
+
# @param model [String] optional (e.g. VoyageAI::Model::RERANK or "rerank-2")
|
58
|
+
# @param top_k [Integer] optional
|
59
|
+
# @param truncation [Boolean] optional
|
60
|
+
def rerank(query:, documents:, model: Model::RERANK, top_k: nil, truncation: nil)
|
61
|
+
payload = { query:, documents:, model:, top_k:, truncation: }.compact
|
62
|
+
response = http.accept(:json).post("/#{@version}/rerank", json: payload)
|
63
|
+
|
64
|
+
raise RequestError.new(response:) unless response.status.ok?
|
65
|
+
|
66
|
+
Rerank.parse(data: response.parse)
|
67
|
+
end
|
68
|
+
|
53
69
|
private
|
54
70
|
|
55
71
|
# @return [HTTP::Client]
|
data/lib/voyageai/embed.rb
CHANGED
@@ -25,7 +25,7 @@ module VoyageAI
|
|
25
25
|
usage = Usage.parse(data: data["usage"])
|
26
26
|
embeddings = data["data"].map { |embedding_data| embedding_data["embedding"] }
|
27
27
|
|
28
|
-
|
28
|
+
new(model: model, usage: usage, embeddings: embeddings)
|
29
29
|
end
|
30
30
|
|
31
31
|
# @param model [String]
|
data/lib/voyageai/model.rb
CHANGED
@@ -3,17 +3,24 @@
|
|
3
3
|
module VoyageAI
|
4
4
|
module Model
|
5
5
|
VOYAGE_3 = "voyage-3"
|
6
|
+
VOYAGE_3_LARGE = "voyage-3-large"
|
6
7
|
VOYAGE_3_LITE = "voyage-3-lite"
|
7
8
|
VOYAGE_FINANCE_2 = "voyage-finance-2"
|
8
9
|
VOYAGE_MULTILINGUAL_2 = "age-multilingual-2"
|
9
10
|
VOYAGE_LAW_2 = "voyage-law-2"
|
10
11
|
VOYAGE_CODE_2 = "voyage-code-2"
|
11
12
|
|
13
|
+
RERANK_2 = "rerank-2"
|
14
|
+
RERANK_2_LITE = "rerank-2-lite"
|
15
|
+
|
12
16
|
VOYAGE = VOYAGE_3
|
13
17
|
VOYAGE_LITE = VOYAGE_3_LITE
|
14
18
|
VOYAGE_FINANCE = VOYAGE_FINANCE_2
|
15
19
|
VOYAGE_MULTILINGUAL = VOYAGE_MULTILINGUAL_2
|
16
20
|
VOYAGE_LAW = VOYAGE_LAW_2
|
17
21
|
VOYAGE_CODE = VOYAGE_CODE_2
|
22
|
+
|
23
|
+
RERANK = RERANK_2
|
24
|
+
RERANK_LITE = RERANK_2_LITE
|
18
25
|
end
|
19
26
|
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module VoyageAI
|
4
|
+
# The response for an rerank request that wraps the model / usage / results.
|
5
|
+
#
|
6
|
+
# @example
|
7
|
+
# VoyageAI::Rerank.new(model: "rerank-2", usage: VoyageAI::Usage.new(total_tokens: 0), results: [])
|
8
|
+
class Rerank
|
9
|
+
# @!attribute [rw] model
|
10
|
+
# @return [String]
|
11
|
+
attr_accessor :model
|
12
|
+
|
13
|
+
# @!attribute [rw] usage
|
14
|
+
# @return [Usage]
|
15
|
+
attr_accessor :usage
|
16
|
+
|
17
|
+
# @!attribute [rw] reranking
|
18
|
+
# @return [Array<Array<Float>>]
|
19
|
+
attr_accessor :results
|
20
|
+
|
21
|
+
# @param data [Hash]
|
22
|
+
# @return [Embed]
|
23
|
+
def self.parse(data:)
|
24
|
+
model = data["model"]
|
25
|
+
usage = Usage.parse(data: data["usage"])
|
26
|
+
|
27
|
+
results = data["data"].map { |entry| Reranking.parse(data: entry) }
|
28
|
+
|
29
|
+
new(model:, usage:, results:)
|
30
|
+
end
|
31
|
+
|
32
|
+
# @param model [String]
|
33
|
+
# @param usage [Usage]
|
34
|
+
# @param results [Array<Reranking>]
|
35
|
+
def initialize(model:, usage:, results:)
|
36
|
+
@model = model
|
37
|
+
@usage = usage
|
38
|
+
@results = results
|
39
|
+
end
|
40
|
+
|
41
|
+
# @return [String]
|
42
|
+
def inspect
|
43
|
+
"#<#{self.class.name} model=#{@model.inspect} usage=#{@usage.inspect}>"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module VoyageAI
|
4
|
+
# The response of an individual re-ranking.
|
5
|
+
#
|
6
|
+
# @example
|
7
|
+
# VoyageAI::Reranking.new(index: 0, document: "Sample", relevance_score: 0.0)
|
8
|
+
class Reranking
|
9
|
+
# @!attribute [rw] model
|
10
|
+
# @return [Integer]
|
11
|
+
attr_accessor :index
|
12
|
+
|
13
|
+
# @!attribute [rw] document
|
14
|
+
# @return [String]
|
15
|
+
attr_accessor :document
|
16
|
+
|
17
|
+
# @!attribute [rw] relevance_score
|
18
|
+
# @return [Float]
|
19
|
+
attr_accessor :relevance_score
|
20
|
+
|
21
|
+
# @param index [Integer]
|
22
|
+
# @param document [String]
|
23
|
+
# @param relevance_score [Float]
|
24
|
+
def initialize(index:, document:, relevance_score:)
|
25
|
+
@index = index
|
26
|
+
@document = document
|
27
|
+
@relevance_score = relevance_score
|
28
|
+
end
|
29
|
+
|
30
|
+
# @return [String]
|
31
|
+
def inspect
|
32
|
+
"#<#{self.class.name} index=#{@index} relevance_score=#{@relevance_score}>"
|
33
|
+
end
|
34
|
+
|
35
|
+
# @param data [Hash]
|
36
|
+
#
|
37
|
+
# @return [Reranking]
|
38
|
+
def self.parse(data:)
|
39
|
+
index = data["index"]
|
40
|
+
document = data["document"]
|
41
|
+
relevance_score = data["relevance_score"]
|
42
|
+
|
43
|
+
new(index: index, document: document, relevance_score: relevance_score)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
data/lib/voyageai/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: voyageai
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kevin Sylvestre
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 2025-01-08 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: http
|
@@ -55,6 +54,8 @@ files:
|
|
55
54
|
- lib/voyageai/embed.rb
|
56
55
|
- lib/voyageai/instrumentation.rb
|
57
56
|
- lib/voyageai/model.rb
|
57
|
+
- lib/voyageai/rerank.rb
|
58
|
+
- lib/voyageai/reranking.rb
|
58
59
|
- lib/voyageai/usage.rb
|
59
60
|
- lib/voyageai/version.rb
|
60
61
|
homepage: https://github.com/ksylvest/voyageai
|
@@ -64,7 +65,6 @@ metadata:
|
|
64
65
|
homepage_uri: https://github.com/ksylvest/voyageai
|
65
66
|
changelog_uri: https://github.com/ksylvest/voyageai/releases
|
66
67
|
rubygems_mfa_required: 'true'
|
67
|
-
post_install_message:
|
68
68
|
rdoc_options: []
|
69
69
|
require_paths:
|
70
70
|
- lib
|
@@ -79,8 +79,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
79
79
|
- !ruby/object:Gem::Version
|
80
80
|
version: '0'
|
81
81
|
requirements: []
|
82
|
-
rubygems_version: 3.
|
83
|
-
signing_key:
|
82
|
+
rubygems_version: 3.6.2
|
84
83
|
specification_version: 4
|
85
84
|
summary: A client for voyageai.com.
|
86
85
|
test_files: []
|