pgvector 0.3.1 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d3ef6a53417383ff7f8a2a514df78513dfe9029e524f0f624df8828fb99dba0
4
- data.tar.gz: f326a21d3942079cdadc22d9a61367e7a6651ae37c5eb07a3955182f9f569ad0
3
+ metadata.gz: fbdde0af357aaae0f727dff71e8678c705b1b347ecca7b49a8ca097189b42db0
4
+ data.tar.gz: d9d8bee13760ca165905f888e79507350565425ae819c452bf723ff775899235
5
5
  SHA512:
6
- metadata.gz: fa4d6519685d179e3d5712cbcf1e6989ca4f98e5b0902f5061eca2be55451162f199fcc75f19841dbd8ada8c18de69ac6fa39cad545d4b5a6c542e9cdd0109ea
7
- data.tar.gz: fd71245492b2ff8a06a0af60dbd12e57e44025f2662d4ea77eb7176aaaa4c4cb3b39c1e159edce92944da5b54fddbb19d1b504acd76a12c4efdb9a0fb6e797da
6
+ metadata.gz: '04629b78419ba4fd5325792d2b47f60a6a42fe2e5f8c681bcbd53d2f769b0c8db74ba7e53b9ef84c579b1c98ded45b51e3fe58347394b5f7b707efc81a6500f3'
7
+ data.tar.gz: 995963de5eb7bd7587bff1f70a709cb9b4e7f0791ef6f23b6930104c5a0ba61514a684b2dbf784b2e5b62464e84f5c6edbf569681578d75c5db860500c46bbae
data/CHANGELOG.md CHANGED
@@ -1,3 +1,7 @@
1
+ ## 0.3.2 (2024-07-17)
2
+
3
+ - Added `to_binary` method to `Vector`, `Bit`, and `SparseVector`
4
+
1
5
  ## 0.3.1 (2024-07-10)
2
6
 
3
7
  - Added support for `bit` type to pg
data/README.md CHANGED
@@ -24,6 +24,7 @@ And follow the instructions for your database library:
24
24
  Or check out some examples:
25
25
 
26
26
  - [Embeddings](examples/openai_embeddings.rb) with OpenAI
27
+ - [Binary embeddings](examples/cohere_embeddings.rb) with Cohere
27
28
  - [User-based recommendations](examples/disco_user_recs.rb) with Disco
28
29
  - [Item-based recommendations](examples/disco_item_recs.rb) with Disco
29
30
  - [Bulk loading](examples/bulk_loading.rb) with `COPY`
data/lib/pgvector/bit.rb CHANGED
@@ -13,8 +13,8 @@ module Pgvector
13
13
  end
14
14
 
15
15
  def self.from_binary(string)
16
- length = string[..3].unpack1("l>")
17
- Bit.new(string[4..].unpack("B*").join[...length])
16
+ length, data = string.unpack("l>B*")
17
+ Bit.new(data[...length])
18
18
  end
19
19
 
20
20
  def to_s
@@ -24,5 +24,9 @@ module Pgvector
24
24
  def to_a
25
25
  @data.each_char.map { |v| v != "0" }
26
26
  end
27
+
28
+ def to_binary
29
+ [@data.length, @data].pack("l>B*")
30
+ end
27
31
  end
28
32
  end
data/lib/pgvector/pg.rb CHANGED
@@ -3,17 +3,17 @@ require "pg"
3
3
  module Pgvector
4
4
  module PG
5
5
  def self.register_vector(registry)
6
- registry.register_type(0, "vector", nil, TextDecoder::Vector)
7
- registry.register_type(1, "vector", nil, BinaryDecoder::Vector)
6
+ registry.register_type(0, "vector", TextEncoder::Vector, TextDecoder::Vector)
7
+ registry.register_type(1, "vector", BinaryEncoder::Vector, BinaryDecoder::Vector)
8
8
 
9
9
  # no binary decoder for halfvec since unpack does not have directive for half-precision
10
- registry.register_type(0, "halfvec", nil, TextDecoder::Halfvec)
10
+ registry.register_type(0, "halfvec", TextEncoder::Halfvec, TextDecoder::Halfvec)
11
11
 
12
- registry.register_type(0, "bit", nil, TextDecoder::Bit)
13
- registry.register_type(1, "bit", nil, BinaryDecoder::Bit)
12
+ registry.register_type(0, "bit", TextEncoder::Bit, TextDecoder::Bit)
13
+ registry.register_type(1, "bit", BinaryEncoder::Bit, BinaryDecoder::Bit)
14
14
 
15
- registry.register_type(0, "sparsevec", nil, TextDecoder::Sparsevec)
16
- registry.register_type(1, "sparsevec", nil, BinaryDecoder::Sparsevec)
15
+ registry.register_type(0, "sparsevec", TextEncoder::Sparsevec, TextDecoder::Sparsevec)
16
+ registry.register_type(1, "sparsevec", BinaryEncoder::Sparsevec, BinaryDecoder::Sparsevec)
17
17
  end
18
18
 
19
19
  module BinaryDecoder
@@ -31,7 +31,36 @@ module Pgvector
31
31
 
32
32
  class Sparsevec < ::PG::SimpleDecoder
33
33
  def decode(string, tuple = nil, field = nil)
34
- SparseVector.from_binary(string)
34
+ ::Pgvector::SparseVector.from_binary(string)
35
+ end
36
+ end
37
+ end
38
+
39
+ module BinaryEncoder
40
+ # experimental
41
+ def self.type_map
42
+ tm = ::PG::TypeMapByClass.new
43
+ tm[::Pgvector::Vector] = Vector.new
44
+ tm[::Pgvector::Bit] = Bit.new
45
+ tm[::Pgvector::SparseVector] = Sparsevec.new
46
+ tm
47
+ end
48
+
49
+ class Vector < ::PG::SimpleEncoder
50
+ def encode(value)
51
+ value.to_binary
52
+ end
53
+ end
54
+
55
+ class Bit < ::PG::SimpleEncoder
56
+ def encode(value)
57
+ value.to_binary
58
+ end
59
+ end
60
+
61
+ class Sparsevec < ::PG::SimpleEncoder
62
+ def encode(value)
63
+ value.to_binary
35
64
  end
36
65
  end
37
66
  end
@@ -45,7 +74,7 @@ module Pgvector
45
74
 
46
75
  class Halfvec < ::PG::SimpleDecoder
47
76
  def decode(string, tuple = nil, field = nil)
48
- HalfVector.from_text(string).to_a
77
+ ::Pgvector::HalfVector.from_text(string).to_a
49
78
  end
50
79
  end
51
80
 
@@ -57,7 +86,43 @@ module Pgvector
57
86
 
58
87
  class Sparsevec < ::PG::SimpleDecoder
59
88
  def decode(string, tuple = nil, field = nil)
60
- SparseVector.from_text(string)
89
+ ::Pgvector::SparseVector.from_text(string)
90
+ end
91
+ end
92
+ end
93
+
94
+ module TextEncoder
95
+ # experimental
96
+ def self.type_map
97
+ tm = ::PG::TypeMapByClass.new
98
+ tm[::Pgvector::Vector] = Vector.new
99
+ tm[::Pgvector::HalfVector] = Halfvec.new
100
+ tm[::Pgvector::Bit] = Bit.new
101
+ tm[::Pgvector::SparseVector] = Sparsevec.new
102
+ tm
103
+ end
104
+
105
+ class Vector < ::PG::SimpleEncoder
106
+ def encode(value)
107
+ value.to_s
108
+ end
109
+ end
110
+
111
+ class Halfvec < ::PG::SimpleEncoder
112
+ def encode(value)
113
+ value.to_s
114
+ end
115
+ end
116
+
117
+ class Bit < ::PG::SimpleEncoder
118
+ def encode(value)
119
+ value.to_s
120
+ end
121
+ end
122
+
123
+ class Sparsevec < ::PG::SimpleEncoder
124
+ def encode(value)
125
+ value.to_s
61
126
  end
62
127
  end
63
128
  end
@@ -30,6 +30,14 @@ module Pgvector
30
30
  arr
31
31
  end
32
32
 
33
+ def to_binary
34
+ nnz = @indices.size
35
+ buffer = [dimensions, nnz, 0].pack("l>l>l>")
36
+ @indices.pack("l>#{nnz}", buffer: buffer)
37
+ @values.pack("g#{nnz}", buffer: buffer)
38
+ buffer
39
+ end
40
+
33
41
  private
34
42
 
35
43
  def from_hash(data, dimensions)
@@ -1,7 +1,13 @@
1
1
  module Pgvector
2
2
  class Vector
3
3
  def initialize(data)
4
- @data = data.to_a.map(&:to_f)
4
+ # keep as NArray when possible for performance
5
+ @data =
6
+ if numo?(data)
7
+ data.cast_to(Numo::SFloat)
8
+ else
9
+ data.to_a.map(&:to_f)
10
+ end
5
11
  end
6
12
 
7
13
  def self.from_text(string)
@@ -19,7 +25,23 @@ module Pgvector
19
25
  end
20
26
 
21
27
  def to_a
22
- @data
28
+ @data.to_a
29
+ end
30
+
31
+ def to_binary
32
+ if numo?(@data)
33
+ [@data.shape[0], 0].pack("s>s>") + @data.to_network.to_binary
34
+ else
35
+ buffer = [@data.size, 0].pack("s>s>")
36
+ @data.pack("g*", buffer: buffer)
37
+ buffer
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def numo?(data)
44
+ defined?(Numo::NArray) && data.is_a?(Numo::NArray)
23
45
  end
24
46
  end
25
47
  end
@@ -1,3 +1,3 @@
1
1
  module Pgvector
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgvector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-07-11 00:00:00.000000000 Z
11
+ date: 2024-07-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org