pgvector 0.3.0 → 0.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -0
- data/lib/pgvector/bit.rb +32 -0
- data/lib/pgvector/pg.rb +88 -8
- data/lib/pgvector/sparse_vector.rb +8 -0
- data/lib/pgvector/vector.rb +24 -2
- data/lib/pgvector/version.rb +1 -1
- data/lib/pgvector.rb +2 -1
- data/lib/sequel/extensions/pgvector.rb +5 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fbdde0af357aaae0f727dff71e8678c705b1b347ecca7b49a8ca097189b42db0
|
4
|
+
data.tar.gz: d9d8bee13760ca165905f888e79507350565425ae819c452bf723ff775899235
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '04629b78419ba4fd5325792d2b47f60a6a42fe2e5f8c681bcbd53d2f769b0c8db74ba7e53b9ef84c579b1c98ded45b51e3fe58347394b5f7b707efc81a6500f3'
|
7
|
+
data.tar.gz: 995963de5eb7bd7587bff1f70a709cb9b4e7f0791ef6f23b6930104c5a0ba61514a684b2dbf784b2e5b62464e84f5c6edbf569681578d75c5db860500c46bbae
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
## 0.3.2 (2024-07-17)
|
2
|
+
|
3
|
+
- Added `to_binary` method to `Vector`, `Bit`, and `SparseVector`
|
4
|
+
|
5
|
+
## 0.3.1 (2024-07-10)
|
6
|
+
|
7
|
+
- Added support for `bit` type to pg
|
8
|
+
- Added extension for Sequel
|
9
|
+
|
1
10
|
## 0.3.0 (2024-06-25)
|
2
11
|
|
3
12
|
- Added support for `halfvec` and `sparsevec` types
|
data/README.md
CHANGED
@@ -24,6 +24,7 @@ And follow the instructions for your database library:
|
|
24
24
|
Or check out some examples:
|
25
25
|
|
26
26
|
- [Embeddings](examples/openai_embeddings.rb) with OpenAI
|
27
|
+
- [Binary embeddings](examples/cohere_embeddings.rb) with Cohere
|
27
28
|
- [User-based recommendations](examples/disco_user_recs.rb) with Disco
|
28
29
|
- [Item-based recommendations](examples/disco_item_recs.rb) with Disco
|
29
30
|
- [Bulk loading](examples/bulk_loading.rb) with `COPY`
|
data/lib/pgvector/bit.rb
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
module Pgvector
|
2
|
+
class Bit
|
3
|
+
def initialize(data)
|
4
|
+
if data.is_a?(Array)
|
5
|
+
@data = data.map { |v| v ? "1" : "0" }.join
|
6
|
+
else
|
7
|
+
@data = data.to_str
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.from_text(string)
|
12
|
+
Bit.new(string)
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.from_binary(string)
|
16
|
+
length, data = string.unpack("l>B*")
|
17
|
+
Bit.new(data[...length])
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_s
|
21
|
+
@data
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_a
|
25
|
+
@data.each_char.map { |v| v != "0" }
|
26
|
+
end
|
27
|
+
|
28
|
+
def to_binary
|
29
|
+
[@data.length, @data].pack("l>B*")
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/pgvector/pg.rb
CHANGED
@@ -3,14 +3,17 @@ require "pg"
|
|
3
3
|
module Pgvector
|
4
4
|
module PG
|
5
5
|
def self.register_vector(registry)
|
6
|
-
registry.register_type(0, "vector",
|
7
|
-
registry.register_type(1, "vector",
|
6
|
+
registry.register_type(0, "vector", TextEncoder::Vector, TextDecoder::Vector)
|
7
|
+
registry.register_type(1, "vector", BinaryEncoder::Vector, BinaryDecoder::Vector)
|
8
8
|
|
9
9
|
# no binary decoder for halfvec since unpack does not have directive for half-precision
|
10
|
-
registry.register_type(0, "halfvec",
|
10
|
+
registry.register_type(0, "halfvec", TextEncoder::Halfvec, TextDecoder::Halfvec)
|
11
11
|
|
12
|
-
registry.register_type(0, "
|
13
|
-
registry.register_type(1, "
|
12
|
+
registry.register_type(0, "bit", TextEncoder::Bit, TextDecoder::Bit)
|
13
|
+
registry.register_type(1, "bit", BinaryEncoder::Bit, BinaryDecoder::Bit)
|
14
|
+
|
15
|
+
registry.register_type(0, "sparsevec", TextEncoder::Sparsevec, TextDecoder::Sparsevec)
|
16
|
+
registry.register_type(1, "sparsevec", BinaryEncoder::Sparsevec, BinaryDecoder::Sparsevec)
|
14
17
|
end
|
15
18
|
|
16
19
|
module BinaryDecoder
|
@@ -20,9 +23,44 @@ module Pgvector
|
|
20
23
|
end
|
21
24
|
end
|
22
25
|
|
26
|
+
class Bit < ::PG::SimpleDecoder
|
27
|
+
def decode(string, tuple = nil, field = nil)
|
28
|
+
::Pgvector::Bit.from_binary(string).to_s
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
23
32
|
class Sparsevec < ::PG::SimpleDecoder
|
24
33
|
def decode(string, tuple = nil, field = nil)
|
25
|
-
SparseVector.from_binary(string)
|
34
|
+
::Pgvector::SparseVector.from_binary(string)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
module BinaryEncoder
|
40
|
+
# experimental
|
41
|
+
def self.type_map
|
42
|
+
tm = ::PG::TypeMapByClass.new
|
43
|
+
tm[::Pgvector::Vector] = Vector.new
|
44
|
+
tm[::Pgvector::Bit] = Bit.new
|
45
|
+
tm[::Pgvector::SparseVector] = Sparsevec.new
|
46
|
+
tm
|
47
|
+
end
|
48
|
+
|
49
|
+
class Vector < ::PG::SimpleEncoder
|
50
|
+
def encode(value)
|
51
|
+
value.to_binary
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
class Bit < ::PG::SimpleEncoder
|
56
|
+
def encode(value)
|
57
|
+
value.to_binary
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
class Sparsevec < ::PG::SimpleEncoder
|
62
|
+
def encode(value)
|
63
|
+
value.to_binary
|
26
64
|
end
|
27
65
|
end
|
28
66
|
end
|
@@ -36,13 +74,55 @@ module Pgvector
|
|
36
74
|
|
37
75
|
class Halfvec < ::PG::SimpleDecoder
|
38
76
|
def decode(string, tuple = nil, field = nil)
|
39
|
-
HalfVector.from_text(string).to_a
|
77
|
+
::Pgvector::HalfVector.from_text(string).to_a
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class Bit < ::PG::SimpleDecoder
|
82
|
+
def decode(string, tuple = nil, field = nil)
|
83
|
+
::Pgvector::Bit.from_text(string).to_s
|
40
84
|
end
|
41
85
|
end
|
42
86
|
|
43
87
|
class Sparsevec < ::PG::SimpleDecoder
|
44
88
|
def decode(string, tuple = nil, field = nil)
|
45
|
-
SparseVector.from_text(string)
|
89
|
+
::Pgvector::SparseVector.from_text(string)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
module TextEncoder
|
95
|
+
# experimental
|
96
|
+
def self.type_map
|
97
|
+
tm = ::PG::TypeMapByClass.new
|
98
|
+
tm[::Pgvector::Vector] = Vector.new
|
99
|
+
tm[::Pgvector::HalfVector] = Halfvec.new
|
100
|
+
tm[::Pgvector::Bit] = Bit.new
|
101
|
+
tm[::Pgvector::SparseVector] = Sparsevec.new
|
102
|
+
tm
|
103
|
+
end
|
104
|
+
|
105
|
+
class Vector < ::PG::SimpleEncoder
|
106
|
+
def encode(value)
|
107
|
+
value.to_s
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
class Halfvec < ::PG::SimpleEncoder
|
112
|
+
def encode(value)
|
113
|
+
value.to_s
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
class Bit < ::PG::SimpleEncoder
|
118
|
+
def encode(value)
|
119
|
+
value.to_s
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
class Sparsevec < ::PG::SimpleEncoder
|
124
|
+
def encode(value)
|
125
|
+
value.to_s
|
46
126
|
end
|
47
127
|
end
|
48
128
|
end
|
@@ -30,6 +30,14 @@ module Pgvector
|
|
30
30
|
arr
|
31
31
|
end
|
32
32
|
|
33
|
+
def to_binary
|
34
|
+
nnz = @indices.size
|
35
|
+
buffer = [dimensions, nnz, 0].pack("l>l>l>")
|
36
|
+
@indices.pack("l>#{nnz}", buffer: buffer)
|
37
|
+
@values.pack("g#{nnz}", buffer: buffer)
|
38
|
+
buffer
|
39
|
+
end
|
40
|
+
|
33
41
|
private
|
34
42
|
|
35
43
|
def from_hash(data, dimensions)
|
data/lib/pgvector/vector.rb
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
module Pgvector
|
2
2
|
class Vector
|
3
3
|
def initialize(data)
|
4
|
-
|
4
|
+
# keep as NArray when possible for performance
|
5
|
+
@data =
|
6
|
+
if numo?(data)
|
7
|
+
data.cast_to(Numo::SFloat)
|
8
|
+
else
|
9
|
+
data.to_a.map(&:to_f)
|
10
|
+
end
|
5
11
|
end
|
6
12
|
|
7
13
|
def self.from_text(string)
|
@@ -19,7 +25,23 @@ module Pgvector
|
|
19
25
|
end
|
20
26
|
|
21
27
|
def to_a
|
22
|
-
@data
|
28
|
+
@data.to_a
|
29
|
+
end
|
30
|
+
|
31
|
+
def to_binary
|
32
|
+
if numo?(@data)
|
33
|
+
[@data.shape[0], 0].pack("s>s>") + @data.to_network.to_binary
|
34
|
+
else
|
35
|
+
buffer = [@data.size, 0].pack("s>s>")
|
36
|
+
@data.pack("g*", buffer: buffer)
|
37
|
+
buffer
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
|
43
|
+
def numo?(data)
|
44
|
+
defined?(Numo::NArray) && data.is_a?(Numo::NArray)
|
23
45
|
end
|
24
46
|
end
|
25
47
|
end
|
data/lib/pgvector/version.rb
CHANGED
data/lib/pgvector.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# modules
|
2
|
+
require_relative "pgvector/bit"
|
2
3
|
require_relative "pgvector/half_vector"
|
3
4
|
require_relative "pgvector/sparse_vector"
|
4
5
|
require_relative "pgvector/vector"
|
@@ -8,7 +9,7 @@ module Pgvector
|
|
8
9
|
autoload :PG, "pgvector/pg"
|
9
10
|
|
10
11
|
def self.encode(data)
|
11
|
-
if data.is_a?(SparseVector)
|
12
|
+
if data.is_a?(Vector) || data.is_a?(HalfVector) || data.is_a?(SparseVector)
|
12
13
|
data.to_s
|
13
14
|
else
|
14
15
|
Vector.new(data).to_s
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgvector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -20,11 +20,13 @@ files:
|
|
20
20
|
- LICENSE.txt
|
21
21
|
- README.md
|
22
22
|
- lib/pgvector.rb
|
23
|
+
- lib/pgvector/bit.rb
|
23
24
|
- lib/pgvector/half_vector.rb
|
24
25
|
- lib/pgvector/pg.rb
|
25
26
|
- lib/pgvector/sparse_vector.rb
|
26
27
|
- lib/pgvector/vector.rb
|
27
28
|
- lib/pgvector/version.rb
|
29
|
+
- lib/sequel/extensions/pgvector.rb
|
28
30
|
- lib/sequel/plugins/pgvector.rb
|
29
31
|
homepage: https://github.com/pgvector/pgvector-ruby
|
30
32
|
licenses:
|