pgvector 0.3.1 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +62 -4
- data/lib/pgvector/bit.rb +6 -2
- data/lib/pgvector/pg.rb +75 -10
- data/lib/pgvector/sparse_vector.rb +8 -0
- data/lib/pgvector/vector.rb +24 -2
- data/lib/pgvector/version.rb +1 -1
- data/lib/sequel/plugins/pgvector.rb +7 -4
- metadata +3 -7
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7238af114feedc000b706855f9af446a583455a94771ece1e90c3e3082e63c44
|
|
4
|
+
data.tar.gz: f926141da347dc8bc1f3663e271cf3efa6b73b7c84c11e17db92f0d32af1a47c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 6351a9f10f989fb92e8b5f5edea1d66dcf5110b65b748fef654faa9bfe189af64a095dd263be03af8513f552063b828730bbc09b3545f3ccf25bc7b3aef9f47a
|
|
7
|
+
data.tar.gz: 86d146518d40c9af209e9a671db10c024df4c256271de7062c4de657404508d2c0aef599cb5a9a29c6b0190f40b2bedfb91dbb41c6a31bb40b0e0b7242d4957b
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
|
@@ -23,10 +23,17 @@ And follow the instructions for your database library:
|
|
|
23
23
|
|
|
24
24
|
Or check out some examples:
|
|
25
25
|
|
|
26
|
-
- [Embeddings](examples/
|
|
27
|
-
- [
|
|
28
|
-
- [
|
|
29
|
-
- [
|
|
26
|
+
- [Embeddings](examples/openai/example.rb) with OpenAI
|
|
27
|
+
- [Binary embeddings](examples/cohere/example.rb) with Cohere
|
|
28
|
+
- [Sentence embeddings](examples/informers/example.rb) with Informers
|
|
29
|
+
- [Hybrid search](examples/hybrid/example.rb) with Informers (Reciprocal Rank Fusion)
|
|
30
|
+
- [Sparse search](examples/sparse/example.rb) with Transformers.rb
|
|
31
|
+
- [Morgan fingerprints](examples/rdkit/example.rb) with RDKit.rb
|
|
32
|
+
- [Topic modeling](examples/tomoto/example.rb) with tomoto.rb
|
|
33
|
+
- [User-based recommendations](examples/disco/user_recs.rb) with Disco
|
|
34
|
+
- [Item-based recommendations](examples/disco/item_recs.rb) with Disco
|
|
35
|
+
- [Horizontal scaling](examples/citus/example.rb) with Citus
|
|
36
|
+
- [Bulk loading](examples/loading/example.rb) with `COPY`
|
|
30
37
|
|
|
31
38
|
## pg
|
|
32
39
|
|
|
@@ -126,6 +133,48 @@ DB.add_index :items, :embedding, type: "hnsw", opclass: "vector_l2_ops"
|
|
|
126
133
|
|
|
127
134
|
Use `vector_ip_ops` for inner product and `vector_cosine_ops` for cosine distance
|
|
128
135
|
|
|
136
|
+
## Reference
|
|
137
|
+
|
|
138
|
+
### Sparse Vectors
|
|
139
|
+
|
|
140
|
+
Create a sparse vector from an array
|
|
141
|
+
|
|
142
|
+
```ruby
|
|
143
|
+
vec = Pgvector::SparseVector.new([1, 0, 2, 0, 3, 0])
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
Or a hash of non-zero elements
|
|
147
|
+
|
|
148
|
+
```ruby
|
|
149
|
+
vec = Pgvector::SparseVector.new({0 => 1, 2 => 2, 4 => 3}, 6)
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
Note: Indices start at 0
|
|
153
|
+
|
|
154
|
+
Get the number of dimensions
|
|
155
|
+
|
|
156
|
+
```ruby
|
|
157
|
+
dim = vec.dimensions
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
Get the indices of non-zero elements
|
|
161
|
+
|
|
162
|
+
```ruby
|
|
163
|
+
indices = vec.indices
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
Get the values of non-zero elements
|
|
167
|
+
|
|
168
|
+
```ruby
|
|
169
|
+
values = vec.values
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
Get an array
|
|
173
|
+
|
|
174
|
+
```ruby
|
|
175
|
+
arr = vec.to_a
|
|
176
|
+
```
|
|
177
|
+
|
|
129
178
|
## History
|
|
130
179
|
|
|
131
180
|
View the [changelog](https://github.com/pgvector/pgvector-ruby/blob/master/CHANGELOG.md)
|
|
@@ -148,3 +197,12 @@ createdb pgvector_ruby_test
|
|
|
148
197
|
bundle install
|
|
149
198
|
bundle exec rake test
|
|
150
199
|
```
|
|
200
|
+
|
|
201
|
+
To run an example:
|
|
202
|
+
|
|
203
|
+
```sh
|
|
204
|
+
cd examples/loading
|
|
205
|
+
bundle install
|
|
206
|
+
createdb pgvector_example
|
|
207
|
+
bundle exec ruby example.rb
|
|
208
|
+
```
|
data/lib/pgvector/bit.rb
CHANGED
|
@@ -13,8 +13,8 @@ module Pgvector
|
|
|
13
13
|
end
|
|
14
14
|
|
|
15
15
|
def self.from_binary(string)
|
|
16
|
-
length = string
|
|
17
|
-
Bit.new(
|
|
16
|
+
length, data = string.unpack("l>B*")
|
|
17
|
+
Bit.new(data[...length])
|
|
18
18
|
end
|
|
19
19
|
|
|
20
20
|
def to_s
|
|
@@ -24,5 +24,9 @@ module Pgvector
|
|
|
24
24
|
def to_a
|
|
25
25
|
@data.each_char.map { |v| v != "0" }
|
|
26
26
|
end
|
|
27
|
+
|
|
28
|
+
def to_binary
|
|
29
|
+
[@data.length, @data].pack("l>B*")
|
|
30
|
+
end
|
|
27
31
|
end
|
|
28
32
|
end
|
data/lib/pgvector/pg.rb
CHANGED
|
@@ -3,17 +3,17 @@ require "pg"
|
|
|
3
3
|
module Pgvector
|
|
4
4
|
module PG
|
|
5
5
|
def self.register_vector(registry)
|
|
6
|
-
registry.register_type(0, "vector",
|
|
7
|
-
registry.register_type(1, "vector",
|
|
6
|
+
registry.register_type(0, "vector", TextEncoder::Vector, TextDecoder::Vector)
|
|
7
|
+
registry.register_type(1, "vector", BinaryEncoder::Vector, BinaryDecoder::Vector)
|
|
8
8
|
|
|
9
9
|
# no binary decoder for halfvec since unpack does not have directive for half-precision
|
|
10
|
-
registry.register_type(0, "halfvec",
|
|
10
|
+
registry.register_type(0, "halfvec", TextEncoder::Halfvec, TextDecoder::Halfvec)
|
|
11
11
|
|
|
12
|
-
registry.register_type(0, "bit",
|
|
13
|
-
registry.register_type(1, "bit",
|
|
12
|
+
registry.register_type(0, "bit", TextEncoder::Bit, TextDecoder::Bit)
|
|
13
|
+
registry.register_type(1, "bit", BinaryEncoder::Bit, BinaryDecoder::Bit)
|
|
14
14
|
|
|
15
|
-
registry.register_type(0, "sparsevec",
|
|
16
|
-
registry.register_type(1, "sparsevec",
|
|
15
|
+
registry.register_type(0, "sparsevec", TextEncoder::Sparsevec, TextDecoder::Sparsevec)
|
|
16
|
+
registry.register_type(1, "sparsevec", BinaryEncoder::Sparsevec, BinaryDecoder::Sparsevec)
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
module BinaryDecoder
|
|
@@ -31,7 +31,36 @@ module Pgvector
|
|
|
31
31
|
|
|
32
32
|
class Sparsevec < ::PG::SimpleDecoder
|
|
33
33
|
def decode(string, tuple = nil, field = nil)
|
|
34
|
-
SparseVector.from_binary(string)
|
|
34
|
+
::Pgvector::SparseVector.from_binary(string)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
module BinaryEncoder
|
|
40
|
+
# experimental
|
|
41
|
+
def self.type_map
|
|
42
|
+
tm = ::PG::TypeMapByClass.new
|
|
43
|
+
tm[::Pgvector::Vector] = Vector.new
|
|
44
|
+
tm[::Pgvector::Bit] = Bit.new
|
|
45
|
+
tm[::Pgvector::SparseVector] = Sparsevec.new
|
|
46
|
+
tm
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
class Vector < ::PG::SimpleEncoder
|
|
50
|
+
def encode(value)
|
|
51
|
+
value.to_binary
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
class Bit < ::PG::SimpleEncoder
|
|
56
|
+
def encode(value)
|
|
57
|
+
value.to_binary
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
class Sparsevec < ::PG::SimpleEncoder
|
|
62
|
+
def encode(value)
|
|
63
|
+
value.to_binary
|
|
35
64
|
end
|
|
36
65
|
end
|
|
37
66
|
end
|
|
@@ -45,7 +74,7 @@ module Pgvector
|
|
|
45
74
|
|
|
46
75
|
class Halfvec < ::PG::SimpleDecoder
|
|
47
76
|
def decode(string, tuple = nil, field = nil)
|
|
48
|
-
HalfVector.from_text(string).to_a
|
|
77
|
+
::Pgvector::HalfVector.from_text(string).to_a
|
|
49
78
|
end
|
|
50
79
|
end
|
|
51
80
|
|
|
@@ -57,7 +86,43 @@ module Pgvector
|
|
|
57
86
|
|
|
58
87
|
class Sparsevec < ::PG::SimpleDecoder
|
|
59
88
|
def decode(string, tuple = nil, field = nil)
|
|
60
|
-
SparseVector.from_text(string)
|
|
89
|
+
::Pgvector::SparseVector.from_text(string)
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
module TextEncoder
|
|
95
|
+
# experimental
|
|
96
|
+
def self.type_map
|
|
97
|
+
tm = ::PG::TypeMapByClass.new
|
|
98
|
+
tm[::Pgvector::Vector] = Vector.new
|
|
99
|
+
tm[::Pgvector::HalfVector] = Halfvec.new
|
|
100
|
+
tm[::Pgvector::Bit] = Bit.new
|
|
101
|
+
tm[::Pgvector::SparseVector] = Sparsevec.new
|
|
102
|
+
tm
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
class Vector < ::PG::SimpleEncoder
|
|
106
|
+
def encode(value)
|
|
107
|
+
value.to_s
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
class Halfvec < ::PG::SimpleEncoder
|
|
112
|
+
def encode(value)
|
|
113
|
+
value.to_s
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
class Bit < ::PG::SimpleEncoder
|
|
118
|
+
def encode(value)
|
|
119
|
+
value.to_s
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
class Sparsevec < ::PG::SimpleEncoder
|
|
124
|
+
def encode(value)
|
|
125
|
+
value.to_s
|
|
61
126
|
end
|
|
62
127
|
end
|
|
63
128
|
end
|
|
@@ -30,6 +30,14 @@ module Pgvector
|
|
|
30
30
|
arr
|
|
31
31
|
end
|
|
32
32
|
|
|
33
|
+
def to_binary
|
|
34
|
+
nnz = @indices.size
|
|
35
|
+
buffer = [dimensions, nnz, 0].pack("l>l>l>")
|
|
36
|
+
@indices.pack("l>#{nnz}", buffer: buffer)
|
|
37
|
+
@values.pack("g#{nnz}", buffer: buffer)
|
|
38
|
+
buffer
|
|
39
|
+
end
|
|
40
|
+
|
|
33
41
|
private
|
|
34
42
|
|
|
35
43
|
def from_hash(data, dimensions)
|
data/lib/pgvector/vector.rb
CHANGED
|
@@ -1,7 +1,13 @@
|
|
|
1
1
|
module Pgvector
|
|
2
2
|
class Vector
|
|
3
3
|
def initialize(data)
|
|
4
|
-
|
|
4
|
+
# keep as NArray when possible for performance
|
|
5
|
+
@data =
|
|
6
|
+
if numo?(data)
|
|
7
|
+
data.cast_to(Numo::SFloat)
|
|
8
|
+
else
|
|
9
|
+
data.to_a.map(&:to_f)
|
|
10
|
+
end
|
|
5
11
|
end
|
|
6
12
|
|
|
7
13
|
def self.from_text(string)
|
|
@@ -19,7 +25,23 @@ module Pgvector
|
|
|
19
25
|
end
|
|
20
26
|
|
|
21
27
|
def to_a
|
|
22
|
-
@data
|
|
28
|
+
@data.to_a
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
def to_binary
|
|
32
|
+
if numo?(@data)
|
|
33
|
+
[@data.shape[0], 0].pack("s>s>") + @data.to_network.to_binary.force_encoding(Encoding::BINARY)
|
|
34
|
+
else
|
|
35
|
+
buffer = [@data.size, 0].pack("s>s>")
|
|
36
|
+
@data.pack("g*", buffer: buffer)
|
|
37
|
+
buffer
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
private
|
|
42
|
+
|
|
43
|
+
def numo?(data)
|
|
44
|
+
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
|
23
45
|
end
|
|
24
46
|
end
|
|
25
47
|
end
|
data/lib/pgvector/version.rb
CHANGED
|
@@ -10,6 +10,7 @@ module Sequel
|
|
|
10
10
|
|
|
11
11
|
module DatasetMethods
|
|
12
12
|
def nearest_neighbors(column, value, distance:)
|
|
13
|
+
return extension(:null_dataset).nullify if value.nil?
|
|
13
14
|
value = ::Pgvector.encode(value) unless value.is_a?(String)
|
|
14
15
|
quoted_column = quote_identifier(column)
|
|
15
16
|
distance = distance.to_s
|
|
@@ -67,7 +68,7 @@ module Sequel
|
|
|
67
68
|
end
|
|
68
69
|
|
|
69
70
|
def []=(k, v)
|
|
70
|
-
if self.class.vector_columns.key?(k.to_sym) && !v.is_a?(String)
|
|
71
|
+
if self.class.vector_columns.key?(k.to_sym) && !v.is_a?(String) && !v.nil?
|
|
71
72
|
super(k, ::Pgvector.encode(v))
|
|
72
73
|
else
|
|
73
74
|
super
|
|
@@ -75,10 +76,12 @@ module Sequel
|
|
|
75
76
|
end
|
|
76
77
|
|
|
77
78
|
def [](k)
|
|
78
|
-
|
|
79
|
-
|
|
79
|
+
v = super
|
|
80
|
+
if self.class.vector_columns.key?(k.to_sym) && !v.nil?
|
|
81
|
+
# to_s needed for JRuby
|
|
82
|
+
::Pgvector.decode(v.to_s)
|
|
80
83
|
else
|
|
81
|
-
|
|
84
|
+
v
|
|
82
85
|
end
|
|
83
86
|
end
|
|
84
87
|
end
|
metadata
CHANGED
|
@@ -1,16 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pgvector
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.3.
|
|
4
|
+
version: 0.3.3
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
|
-
autorequire:
|
|
9
8
|
bindir: bin
|
|
10
9
|
cert_chain: []
|
|
11
|
-
date:
|
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
|
12
11
|
dependencies: []
|
|
13
|
-
description:
|
|
14
12
|
email: andrew@ankane.org
|
|
15
13
|
executables: []
|
|
16
14
|
extensions: []
|
|
@@ -32,7 +30,6 @@ homepage: https://github.com/pgvector/pgvector-ruby
|
|
|
32
30
|
licenses:
|
|
33
31
|
- MIT
|
|
34
32
|
metadata: {}
|
|
35
|
-
post_install_message:
|
|
36
33
|
rdoc_options: []
|
|
37
34
|
require_paths:
|
|
38
35
|
- lib
|
|
@@ -47,8 +44,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
47
44
|
- !ruby/object:Gem::Version
|
|
48
45
|
version: '0'
|
|
49
46
|
requirements: []
|
|
50
|
-
rubygems_version:
|
|
51
|
-
signing_key:
|
|
47
|
+
rubygems_version: 4.0.3
|
|
52
48
|
specification_version: 4
|
|
53
49
|
summary: pgvector support for Ruby
|
|
54
50
|
test_files: []
|