pgvector 0.3.1 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d3ef6a53417383ff7f8a2a514df78513dfe9029e524f0f624df8828fb99dba0
4
- data.tar.gz: f326a21d3942079cdadc22d9a61367e7a6651ae37c5eb07a3955182f9f569ad0
3
+ metadata.gz: fbdde0af357aaae0f727dff71e8678c705b1b347ecca7b49a8ca097189b42db0
4
+ data.tar.gz: d9d8bee13760ca165905f888e79507350565425ae819c452bf723ff775899235
5
5
  SHA512:
6
- metadata.gz: fa4d6519685d179e3d5712cbcf1e6989ca4f98e5b0902f5061eca2be55451162f199fcc75f19841dbd8ada8c18de69ac6fa39cad545d4b5a6c542e9cdd0109ea
7
- data.tar.gz: fd71245492b2ff8a06a0af60dbd12e57e44025f2662d4ea77eb7176aaaa4c4cb3b39c1e159edce92944da5b54fddbb19d1b504acd76a12c4efdb9a0fb6e797da
6
+ metadata.gz: '04629b78419ba4fd5325792d2b47f60a6a42fe2e5f8c681bcbd53d2f769b0c8db74ba7e53b9ef84c579b1c98ded45b51e3fe58347394b5f7b707efc81a6500f3'
7
+ data.tar.gz: 995963de5eb7bd7587bff1f70a709cb9b4e7f0791ef6f23b6930104c5a0ba61514a684b2dbf784b2e5b62464e84f5c6edbf569681578d75c5db860500c46bbae
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.3.2 (2024-07-17)
2
+
3
+ - Added `to_binary` method to `Vector`, `Bit`, and `SparseVector`
4
+
1
5
  ## 0.3.1 (2024-07-10)
2
6
 
3
7
  - Added support for `bit` type to pg
data/README.md CHANGED
@@ -24,6 +24,7 @@ And follow the instructions for your database library:
24
24
  Or check out some examples:
25
25
 
26
26
  - [Embeddings](examples/openai_embeddings.rb) with OpenAI
27
+ - [Binary embeddings](examples/cohere_embeddings.rb) with Cohere
27
28
  - [User-based recommendations](examples/disco_user_recs.rb) with Disco
28
29
  - [Item-based recommendations](examples/disco_item_recs.rb) with Disco
29
30
  - [Bulk loading](examples/bulk_loading.rb) with `COPY`
data/lib/pgvector/bit.rb CHANGED
@@ -13,8 +13,8 @@ module Pgvector
13
13
  end
14
14
 
15
15
  def self.from_binary(string)
16
- length = string[..3].unpack1("l>")
17
- Bit.new(string[4..].unpack("B*").join[...length])
16
+ length, data = string.unpack("l>B*")
17
+ Bit.new(data[...length])
18
18
  end
19
19
 
20
20
  def to_s
@@ -24,5 +24,9 @@ module Pgvector
24
24
  def to_a
25
25
  @data.each_char.map { |v| v != "0" }
26
26
  end
27
+
28
+ def to_binary
29
+ [@data.length, @data].pack("l>B*")
30
+ end
27
31
  end
28
32
  end
data/lib/pgvector/pg.rb CHANGED
@@ -3,17 +3,17 @@ require "pg"
3
3
  module Pgvector
4
4
  module PG
5
5
  def self.register_vector(registry)
6
- registry.register_type(0, "vector", nil, TextDecoder::Vector)
7
- registry.register_type(1, "vector", nil, BinaryDecoder::Vector)
6
+ registry.register_type(0, "vector", TextEncoder::Vector, TextDecoder::Vector)
7
+ registry.register_type(1, "vector", BinaryEncoder::Vector, BinaryDecoder::Vector)
8
8
 
9
9
  # no binary decoder for halfvec since unpack does not have directive for half-precision
10
- registry.register_type(0, "halfvec", nil, TextDecoder::Halfvec)
10
+ registry.register_type(0, "halfvec", TextEncoder::Halfvec, TextDecoder::Halfvec)
11
11
 
12
- registry.register_type(0, "bit", nil, TextDecoder::Bit)
13
- registry.register_type(1, "bit", nil, BinaryDecoder::Bit)
12
+ registry.register_type(0, "bit", TextEncoder::Bit, TextDecoder::Bit)
13
+ registry.register_type(1, "bit", BinaryEncoder::Bit, BinaryDecoder::Bit)
14
14
 
15
- registry.register_type(0, "sparsevec", nil, TextDecoder::Sparsevec)
16
- registry.register_type(1, "sparsevec", nil, BinaryDecoder::Sparsevec)
15
+ registry.register_type(0, "sparsevec", TextEncoder::Sparsevec, TextDecoder::Sparsevec)
16
+ registry.register_type(1, "sparsevec", BinaryEncoder::Sparsevec, BinaryDecoder::Sparsevec)
17
17
  end
18
18
 
19
19
  module BinaryDecoder
@@ -31,7 +31,36 @@ module Pgvector
31
31
 
32
32
  class Sparsevec < ::PG::SimpleDecoder
33
33
  def decode(string, tuple = nil, field = nil)
34
- SparseVector.from_binary(string)
34
+ ::Pgvector::SparseVector.from_binary(string)
35
+ end
36
+ end
37
+ end
38
+
39
+ module BinaryEncoder
40
+ # experimental
41
+ def self.type_map
42
+ tm = ::PG::TypeMapByClass.new
43
+ tm[::Pgvector::Vector] = Vector.new
44
+ tm[::Pgvector::Bit] = Bit.new
45
+ tm[::Pgvector::SparseVector] = Sparsevec.new
46
+ tm
47
+ end
48
+
49
+ class Vector < ::PG::SimpleEncoder
50
+ def encode(value)
51
+ value.to_binary
52
+ end
53
+ end
54
+
55
+ class Bit < ::PG::SimpleEncoder
56
+ def encode(value)
57
+ value.to_binary
58
+ end
59
+ end
60
+
61
+ class Sparsevec < ::PG::SimpleEncoder
62
+ def encode(value)
63
+ value.to_binary
35
64
  end
36
65
  end
37
66
  end
@@ -45,7 +74,7 @@ module Pgvector
45
74
 
46
75
  class Halfvec < ::PG::SimpleDecoder
47
76
  def decode(string, tuple = nil, field = nil)
48
- HalfVector.from_text(string).to_a
77
+ ::Pgvector::HalfVector.from_text(string).to_a
49
78
  end
50
79
  end
51
80
 
@@ -57,7 +86,43 @@ module Pgvector
57
86
 
58
87
  class Sparsevec < ::PG::SimpleDecoder
59
88
  def decode(string, tuple = nil, field = nil)
60
- SparseVector.from_text(string)
89
+ ::Pgvector::SparseVector.from_text(string)
90
+ end
91
+ end
92
+ end
93
+
94
+ module TextEncoder
95
+ # experimental
96
+ def self.type_map
97
+ tm = ::PG::TypeMapByClass.new
98
+ tm[::Pgvector::Vector] = Vector.new
99
+ tm[::Pgvector::HalfVector] = Halfvec.new
100
+ tm[::Pgvector::Bit] = Bit.new
101
+ tm[::Pgvector::SparseVector] = Sparsevec.new
102
+ tm
103
+ end
104
+
105
+ class Vector < ::PG::SimpleEncoder
106
+ def encode(value)
107
+ value.to_s
108
+ end
109
+ end
110
+
111
+ class Halfvec < ::PG::SimpleEncoder
112
+ def encode(value)
113
+ value.to_s
114
+ end
115
+ end
116
+
117
+ class Bit < ::PG::SimpleEncoder
118
+ def encode(value)
119
+ value.to_s
120
+ end
121
+ end
122
+
123
+ class Sparsevec < ::PG::SimpleEncoder
124
+ def encode(value)
125
+ value.to_s
61
126
  end
62
127
  end
63
128
  end
@@ -30,6 +30,14 @@ module Pgvector
30
30
  arr
31
31
  end
32
32
 
33
+ def to_binary
34
+ nnz = @indices.size
35
+ buffer = [dimensions, nnz, 0].pack("l>l>l>")
36
+ @indices.pack("l>#{nnz}", buffer: buffer)
37
+ @values.pack("g#{nnz}", buffer: buffer)
38
+ buffer
39
+ end
40
+
33
41
  private
34
42
 
35
43
  def from_hash(data, dimensions)
@@ -1,7 +1,13 @@
1
1
  module Pgvector
2
2
  class Vector
3
3
  def initialize(data)
4
- @data = data.to_a.map(&:to_f)
4
+ # keep as NArray when possible for performance
5
+ @data =
6
+ if numo?(data)
7
+ data.cast_to(Numo::SFloat)
8
+ else
9
+ data.to_a.map(&:to_f)
10
+ end
5
11
  end
6
12
 
7
13
  def self.from_text(string)
@@ -19,7 +25,23 @@ module Pgvector
19
25
  end
20
26
 
21
27
  def to_a
22
- @data
28
+ @data.to_a
29
+ end
30
+
31
+ def to_binary
32
+ if numo?(@data)
33
+ [@data.shape[0], 0].pack("s>s>") + @data.to_network.to_binary
34
+ else
35
+ buffer = [@data.size, 0].pack("s>s>")
36
+ @data.pack("g*", buffer: buffer)
37
+ buffer
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def numo?(data)
44
+ defined?(Numo::NArray) && data.is_a?(Numo::NArray)
23
45
  end
24
46
  end
25
47
  end
@@ -1,3 +1,3 @@
1
1
  module Pgvector
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgvector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-11 00:00:00.000000000 Z
11
+ date: 2024-07-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org