pgvector 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +1 -0
- data/lib/pgvector/bit.rb +6 -2
- data/lib/pgvector/pg.rb +75 -10
- data/lib/pgvector/sparse_vector.rb +8 -0
- data/lib/pgvector/vector.rb +24 -2
- data/lib/pgvector/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fbdde0af357aaae0f727dff71e8678c705b1b347ecca7b49a8ca097189b42db0
|
4
|
+
data.tar.gz: d9d8bee13760ca165905f888e79507350565425ae819c452bf723ff775899235
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '04629b78419ba4fd5325792d2b47f60a6a42fe2e5f8c681bcbd53d2f769b0c8db74ba7e53b9ef84c579b1c98ded45b51e3fe58347394b5f7b707efc81a6500f3'
|
7
|
+
data.tar.gz: 995963de5eb7bd7587bff1f70a709cb9b4e7f0791ef6f23b6930104c5a0ba61514a684b2dbf784b2e5b62464e84f5c6edbf569681578d75c5db860500c46bbae
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -24,6 +24,7 @@ And follow the instructions for your database library:
|
|
24
24
|
Or check out some examples:
|
25
25
|
|
26
26
|
- [Embeddings](examples/openai_embeddings.rb) with OpenAI
|
27
|
+
- [Binary embeddings](examples/cohere_embeddings.rb) with Cohere
|
27
28
|
- [User-based recommendations](examples/disco_user_recs.rb) with Disco
|
28
29
|
- [Item-based recommendations](examples/disco_item_recs.rb) with Disco
|
29
30
|
- [Bulk loading](examples/bulk_loading.rb) with `COPY`
|
data/lib/pgvector/bit.rb
CHANGED
@@ -13,8 +13,8 @@ module Pgvector
|
|
13
13
|
end
|
14
14
|
|
15
15
|
def self.from_binary(string)
|
16
|
-
length = string
|
17
|
-
Bit.new(
|
16
|
+
length, data = string.unpack("l>B*")
|
17
|
+
Bit.new(data[...length])
|
18
18
|
end
|
19
19
|
|
20
20
|
def to_s
|
@@ -24,5 +24,9 @@ module Pgvector
|
|
24
24
|
def to_a
|
25
25
|
@data.each_char.map { |v| v != "0" }
|
26
26
|
end
|
27
|
+
|
28
|
+
def to_binary
|
29
|
+
[@data.length, @data].pack("l>B*")
|
30
|
+
end
|
27
31
|
end
|
28
32
|
end
|
data/lib/pgvector/pg.rb
CHANGED
@@ -3,17 +3,17 @@ require "pg"
|
|
3
3
|
module Pgvector
|
4
4
|
module PG
|
5
5
|
def self.register_vector(registry)
|
6
|
-
registry.register_type(0, "vector",
|
7
|
-
registry.register_type(1, "vector",
|
6
|
+
registry.register_type(0, "vector", TextEncoder::Vector, TextDecoder::Vector)
|
7
|
+
registry.register_type(1, "vector", BinaryEncoder::Vector, BinaryDecoder::Vector)
|
8
8
|
|
9
9
|
# no binary decoder for halfvec since unpack does not have directive for half-precision
|
10
|
-
registry.register_type(0, "halfvec",
|
10
|
+
registry.register_type(0, "halfvec", TextEncoder::Halfvec, TextDecoder::Halfvec)
|
11
11
|
|
12
|
-
registry.register_type(0, "bit",
|
13
|
-
registry.register_type(1, "bit",
|
12
|
+
registry.register_type(0, "bit", TextEncoder::Bit, TextDecoder::Bit)
|
13
|
+
registry.register_type(1, "bit", BinaryEncoder::Bit, BinaryDecoder::Bit)
|
14
14
|
|
15
|
-
registry.register_type(0, "sparsevec",
|
16
|
-
registry.register_type(1, "sparsevec",
|
15
|
+
registry.register_type(0, "sparsevec", TextEncoder::Sparsevec, TextDecoder::Sparsevec)
|
16
|
+
registry.register_type(1, "sparsevec", BinaryEncoder::Sparsevec, BinaryDecoder::Sparsevec)
|
17
17
|
end
|
18
18
|
|
19
19
|
module BinaryDecoder
|
@@ -31,7 +31,36 @@ module Pgvector
|
|
31
31
|
|
32
32
|
class Sparsevec < ::PG::SimpleDecoder
|
33
33
|
def decode(string, tuple = nil, field = nil)
|
34
|
-
SparseVector.from_binary(string)
|
34
|
+
::Pgvector::SparseVector.from_binary(string)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
module BinaryEncoder
|
40
|
+
# experimental
|
41
|
+
def self.type_map
|
42
|
+
tm = ::PG::TypeMapByClass.new
|
43
|
+
tm[::Pgvector::Vector] = Vector.new
|
44
|
+
tm[::Pgvector::Bit] = Bit.new
|
45
|
+
tm[::Pgvector::SparseVector] = Sparsevec.new
|
46
|
+
tm
|
47
|
+
end
|
48
|
+
|
49
|
+
class Vector < ::PG::SimpleEncoder
|
50
|
+
def encode(value)
|
51
|
+
value.to_binary
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class Bit < ::PG::SimpleEncoder
|
56
|
+
def encode(value)
|
57
|
+
value.to_binary
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
class Sparsevec < ::PG::SimpleEncoder
|
62
|
+
def encode(value)
|
63
|
+
value.to_binary
|
35
64
|
end
|
36
65
|
end
|
37
66
|
end
|
@@ -45,7 +74,7 @@ module Pgvector
|
|
45
74
|
|
46
75
|
class Halfvec < ::PG::SimpleDecoder
|
47
76
|
def decode(string, tuple = nil, field = nil)
|
48
|
-
HalfVector.from_text(string).to_a
|
77
|
+
::Pgvector::HalfVector.from_text(string).to_a
|
49
78
|
end
|
50
79
|
end
|
51
80
|
|
@@ -57,7 +86,43 @@ module Pgvector
|
|
57
86
|
|
58
87
|
class Sparsevec < ::PG::SimpleDecoder
|
59
88
|
def decode(string, tuple = nil, field = nil)
|
60
|
-
SparseVector.from_text(string)
|
89
|
+
::Pgvector::SparseVector.from_text(string)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
module TextEncoder
|
95
|
+
# experimental
|
96
|
+
def self.type_map
|
97
|
+
tm = ::PG::TypeMapByClass.new
|
98
|
+
tm[::Pgvector::Vector] = Vector.new
|
99
|
+
tm[::Pgvector::HalfVector] = Halfvec.new
|
100
|
+
tm[::Pgvector::Bit] = Bit.new
|
101
|
+
tm[::Pgvector::SparseVector] = Sparsevec.new
|
102
|
+
tm
|
103
|
+
end
|
104
|
+
|
105
|
+
class Vector < ::PG::SimpleEncoder
|
106
|
+
def encode(value)
|
107
|
+
value.to_s
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class Halfvec < ::PG::SimpleEncoder
|
112
|
+
def encode(value)
|
113
|
+
value.to_s
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
class Bit < ::PG::SimpleEncoder
|
118
|
+
def encode(value)
|
119
|
+
value.to_s
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
class Sparsevec < ::PG::SimpleEncoder
|
124
|
+
def encode(value)
|
125
|
+
value.to_s
|
61
126
|
end
|
62
127
|
end
|
63
128
|
end
|
@@ -30,6 +30,14 @@ module Pgvector
|
|
30
30
|
arr
|
31
31
|
end
|
32
32
|
|
33
|
+
def to_binary
|
34
|
+
nnz = @indices.size
|
35
|
+
buffer = [dimensions, nnz, 0].pack("l>l>l>")
|
36
|
+
@indices.pack("l>#{nnz}", buffer: buffer)
|
37
|
+
@values.pack("g#{nnz}", buffer: buffer)
|
38
|
+
buffer
|
39
|
+
end
|
40
|
+
|
33
41
|
private
|
34
42
|
|
35
43
|
def from_hash(data, dimensions)
|
data/lib/pgvector/vector.rb
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
module Pgvector
|
2
2
|
class Vector
|
3
3
|
def initialize(data)
|
4
|
-
|
4
|
+
# keep as NArray when possible for performance
|
5
|
+
@data =
|
6
|
+
if numo?(data)
|
7
|
+
data.cast_to(Numo::SFloat)
|
8
|
+
else
|
9
|
+
data.to_a.map(&:to_f)
|
10
|
+
end
|
5
11
|
end
|
6
12
|
|
7
13
|
def self.from_text(string)
|
@@ -19,7 +25,23 @@ module Pgvector
|
|
19
25
|
end
|
20
26
|
|
21
27
|
def to_a
|
22
|
-
@data
|
28
|
+
@data.to_a
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_binary
|
32
|
+
if numo?(@data)
|
33
|
+
[@data.shape[0], 0].pack("s>s>") + @data.to_network.to_binary
|
34
|
+
else
|
35
|
+
buffer = [@data.size, 0].pack("s>s>")
|
36
|
+
@data.pack("g*", buffer: buffer)
|
37
|
+
buffer
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def numo?(data)
|
44
|
+
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
23
45
|
end
|
24
46
|
end
|
25
47
|
end
|
data/lib/pgvector/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgvector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-07-
|
11
|
+
date: 2024-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|