pgvector 0.3.0 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -0
- data/lib/pgvector/bit.rb +32 -0
- data/lib/pgvector/pg.rb +88 -8
- data/lib/pgvector/sparse_vector.rb +8 -0
- data/lib/pgvector/vector.rb +24 -2
- data/lib/pgvector/version.rb +1 -1
- data/lib/pgvector.rb +2 -1
- data/lib/sequel/extensions/pgvector.rb +5 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fbdde0af357aaae0f727dff71e8678c705b1b347ecca7b49a8ca097189b42db0
|
4
|
+
data.tar.gz: d9d8bee13760ca165905f888e79507350565425ae819c452bf723ff775899235
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '04629b78419ba4fd5325792d2b47f60a6a42fe2e5f8c681bcbd53d2f769b0c8db74ba7e53b9ef84c579b1c98ded45b51e3fe58347394b5f7b707efc81a6500f3'
|
7
|
+
data.tar.gz: 995963de5eb7bd7587bff1f70a709cb9b4e7f0791ef6f23b6930104c5a0ba61514a684b2dbf784b2e5b62464e84f5c6edbf569681578d75c5db860500c46bbae
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 0.3.2 (2024-07-17)
|
2
|
+
|
3
|
+
- Added `to_binary` method to `Vector`, `Bit`, and `SparseVector`
|
4
|
+
|
5
|
+
## 0.3.1 (2024-07-10)
|
6
|
+
|
7
|
+
- Added support for `bit` type to pg
|
8
|
+
- Added extension for Sequel
|
9
|
+
|
1
10
|
## 0.3.0 (2024-06-25)
|
2
11
|
|
3
12
|
- Added support for `halfvec` and `sparsevec` types
|
data/README.md
CHANGED
@@ -24,6 +24,7 @@ And follow the instructions for your database library:
|
|
24
24
|
Or check out some examples:
|
25
25
|
|
26
26
|
- [Embeddings](examples/openai_embeddings.rb) with OpenAI
|
27
|
+
- [Binary embeddings](examples/cohere_embeddings.rb) with Cohere
|
27
28
|
- [User-based recommendations](examples/disco_user_recs.rb) with Disco
|
28
29
|
- [Item-based recommendations](examples/disco_item_recs.rb) with Disco
|
29
30
|
- [Bulk loading](examples/bulk_loading.rb) with `COPY`
|
data/lib/pgvector/bit.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
module Pgvector
|
2
|
+
class Bit
|
3
|
+
def initialize(data)
|
4
|
+
if data.is_a?(Array)
|
5
|
+
@data = data.map { |v| v ? "1" : "0" }.join
|
6
|
+
else
|
7
|
+
@data = data.to_str
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.from_text(string)
|
12
|
+
Bit.new(string)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.from_binary(string)
|
16
|
+
length, data = string.unpack("l>B*")
|
17
|
+
Bit.new(data[...length])
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
@data
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_a
|
25
|
+
@data.each_char.map { |v| v != "0" }
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_binary
|
29
|
+
[@data.length, @data].pack("l>B*")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/pgvector/pg.rb
CHANGED
@@ -3,14 +3,17 @@ require "pg"
|
|
3
3
|
module Pgvector
|
4
4
|
module PG
|
5
5
|
def self.register_vector(registry)
|
6
|
-
registry.register_type(0, "vector",
|
7
|
-
registry.register_type(1, "vector",
|
6
|
+
registry.register_type(0, "vector", TextEncoder::Vector, TextDecoder::Vector)
|
7
|
+
registry.register_type(1, "vector", BinaryEncoder::Vector, BinaryDecoder::Vector)
|
8
8
|
|
9
9
|
# no binary decoder for halfvec since unpack does not have directive for half-precision
|
10
|
-
registry.register_type(0, "halfvec",
|
10
|
+
registry.register_type(0, "halfvec", TextEncoder::Halfvec, TextDecoder::Halfvec)
|
11
11
|
|
12
|
-
registry.register_type(0, "
|
13
|
-
registry.register_type(1, "
|
12
|
+
registry.register_type(0, "bit", TextEncoder::Bit, TextDecoder::Bit)
|
13
|
+
registry.register_type(1, "bit", BinaryEncoder::Bit, BinaryDecoder::Bit)
|
14
|
+
|
15
|
+
registry.register_type(0, "sparsevec", TextEncoder::Sparsevec, TextDecoder::Sparsevec)
|
16
|
+
registry.register_type(1, "sparsevec", BinaryEncoder::Sparsevec, BinaryDecoder::Sparsevec)
|
14
17
|
end
|
15
18
|
|
16
19
|
module BinaryDecoder
|
@@ -20,9 +23,44 @@ module Pgvector
|
|
20
23
|
end
|
21
24
|
end
|
22
25
|
|
26
|
+
class Bit < ::PG::SimpleDecoder
|
27
|
+
def decode(string, tuple = nil, field = nil)
|
28
|
+
::Pgvector::Bit.from_binary(string).to_s
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
23
32
|
class Sparsevec < ::PG::SimpleDecoder
|
24
33
|
def decode(string, tuple = nil, field = nil)
|
25
|
-
SparseVector.from_binary(string)
|
34
|
+
::Pgvector::SparseVector.from_binary(string)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
module BinaryEncoder
|
40
|
+
# experimental
|
41
|
+
def self.type_map
|
42
|
+
tm = ::PG::TypeMapByClass.new
|
43
|
+
tm[::Pgvector::Vector] = Vector.new
|
44
|
+
tm[::Pgvector::Bit] = Bit.new
|
45
|
+
tm[::Pgvector::SparseVector] = Sparsevec.new
|
46
|
+
tm
|
47
|
+
end
|
48
|
+
|
49
|
+
class Vector < ::PG::SimpleEncoder
|
50
|
+
def encode(value)
|
51
|
+
value.to_binary
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class Bit < ::PG::SimpleEncoder
|
56
|
+
def encode(value)
|
57
|
+
value.to_binary
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
class Sparsevec < ::PG::SimpleEncoder
|
62
|
+
def encode(value)
|
63
|
+
value.to_binary
|
26
64
|
end
|
27
65
|
end
|
28
66
|
end
|
@@ -36,13 +74,55 @@ module Pgvector
|
|
36
74
|
|
37
75
|
class Halfvec < ::PG::SimpleDecoder
|
38
76
|
def decode(string, tuple = nil, field = nil)
|
39
|
-
HalfVector.from_text(string).to_a
|
77
|
+
::Pgvector::HalfVector.from_text(string).to_a
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class Bit < ::PG::SimpleDecoder
|
82
|
+
def decode(string, tuple = nil, field = nil)
|
83
|
+
::Pgvector::Bit.from_text(string).to_s
|
40
84
|
end
|
41
85
|
end
|
42
86
|
|
43
87
|
class Sparsevec < ::PG::SimpleDecoder
|
44
88
|
def decode(string, tuple = nil, field = nil)
|
45
|
-
SparseVector.from_text(string)
|
89
|
+
::Pgvector::SparseVector.from_text(string)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
module TextEncoder
|
95
|
+
# experimental
|
96
|
+
def self.type_map
|
97
|
+
tm = ::PG::TypeMapByClass.new
|
98
|
+
tm[::Pgvector::Vector] = Vector.new
|
99
|
+
tm[::Pgvector::HalfVector] = Halfvec.new
|
100
|
+
tm[::Pgvector::Bit] = Bit.new
|
101
|
+
tm[::Pgvector::SparseVector] = Sparsevec.new
|
102
|
+
tm
|
103
|
+
end
|
104
|
+
|
105
|
+
class Vector < ::PG::SimpleEncoder
|
106
|
+
def encode(value)
|
107
|
+
value.to_s
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class Halfvec < ::PG::SimpleEncoder
|
112
|
+
def encode(value)
|
113
|
+
value.to_s
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
class Bit < ::PG::SimpleEncoder
|
118
|
+
def encode(value)
|
119
|
+
value.to_s
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
class Sparsevec < ::PG::SimpleEncoder
|
124
|
+
def encode(value)
|
125
|
+
value.to_s
|
46
126
|
end
|
47
127
|
end
|
48
128
|
end
|
@@ -30,6 +30,14 @@ module Pgvector
|
|
30
30
|
arr
|
31
31
|
end
|
32
32
|
|
33
|
+
def to_binary
|
34
|
+
nnz = @indices.size
|
35
|
+
buffer = [dimensions, nnz, 0].pack("l>l>l>")
|
36
|
+
@indices.pack("l>#{nnz}", buffer: buffer)
|
37
|
+
@values.pack("g#{nnz}", buffer: buffer)
|
38
|
+
buffer
|
39
|
+
end
|
40
|
+
|
33
41
|
private
|
34
42
|
|
35
43
|
def from_hash(data, dimensions)
|
data/lib/pgvector/vector.rb
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
module Pgvector
|
2
2
|
class Vector
|
3
3
|
def initialize(data)
|
4
|
-
|
4
|
+
# keep as NArray when possible for performance
|
5
|
+
@data =
|
6
|
+
if numo?(data)
|
7
|
+
data.cast_to(Numo::SFloat)
|
8
|
+
else
|
9
|
+
data.to_a.map(&:to_f)
|
10
|
+
end
|
5
11
|
end
|
6
12
|
|
7
13
|
def self.from_text(string)
|
@@ -19,7 +25,23 @@ module Pgvector
|
|
19
25
|
end
|
20
26
|
|
21
27
|
def to_a
|
22
|
-
@data
|
28
|
+
@data.to_a
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_binary
|
32
|
+
if numo?(@data)
|
33
|
+
[@data.shape[0], 0].pack("s>s>") + @data.to_network.to_binary
|
34
|
+
else
|
35
|
+
buffer = [@data.size, 0].pack("s>s>")
|
36
|
+
@data.pack("g*", buffer: buffer)
|
37
|
+
buffer
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def numo?(data)
|
44
|
+
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
23
45
|
end
|
24
46
|
end
|
25
47
|
end
|
data/lib/pgvector/version.rb
CHANGED
data/lib/pgvector.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# modules
|
2
|
+
require_relative "pgvector/bit"
|
2
3
|
require_relative "pgvector/half_vector"
|
3
4
|
require_relative "pgvector/sparse_vector"
|
4
5
|
require_relative "pgvector/vector"
|
@@ -8,7 +9,7 @@ module Pgvector
|
|
8
9
|
autoload :PG, "pgvector/pg"
|
9
10
|
|
10
11
|
def self.encode(data)
|
11
|
-
if data.is_a?(SparseVector)
|
12
|
+
if data.is_a?(Vector) || data.is_a?(HalfVector) || data.is_a?(SparseVector)
|
12
13
|
data.to_s
|
13
14
|
else
|
14
15
|
Vector.new(data).to_s
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgvector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -20,11 +20,13 @@ files:
|
|
20
20
|
- LICENSE.txt
|
21
21
|
- README.md
|
22
22
|
- lib/pgvector.rb
|
23
|
+
- lib/pgvector/bit.rb
|
23
24
|
- lib/pgvector/half_vector.rb
|
24
25
|
- lib/pgvector/pg.rb
|
25
26
|
- lib/pgvector/sparse_vector.rb
|
26
27
|
- lib/pgvector/vector.rb
|
27
28
|
- lib/pgvector/version.rb
|
29
|
+
- lib/sequel/extensions/pgvector.rb
|
28
30
|
- lib/sequel/plugins/pgvector.rb
|
29
31
|
homepage: https://github.com/pgvector/pgvector-ruby
|
30
32
|
licenses:
|