pgvector 0.3.0 → 0.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 07a80636c13841d2fa97f8b740feb095c1c2f94fa3691b374d1472bba918bddf
4
- data.tar.gz: 50c9e67781fbbe23fa3dbe2f87790025f18d3686e412b46f62d1cb0d2995ecb3
3
+ metadata.gz: fbdde0af357aaae0f727dff71e8678c705b1b347ecca7b49a8ca097189b42db0
4
+ data.tar.gz: d9d8bee13760ca165905f888e79507350565425ae819c452bf723ff775899235
5
5
  SHA512:
6
- metadata.gz: f0aa5b733e1bc4022c2052be6b04aa0656bf306f588baf2907827674aeadc7dc52507895043049246634999dea37de629c19a164bd289fbd979eb628aaab0c33
7
- data.tar.gz: f52e2e9c1c074c4f809fe78d6926bcb630694d79199e288ebb33d136a05191c4b2fff6038f8d7fc56d4291f949caa1b536d942aa7d0912be2daccf5a7ee23f83
6
+ metadata.gz: '04629b78419ba4fd5325792d2b47f60a6a42fe2e5f8c681bcbd53d2f769b0c8db74ba7e53b9ef84c579b1c98ded45b51e3fe58347394b5f7b707efc81a6500f3'
7
+ data.tar.gz: 995963de5eb7bd7587bff1f70a709cb9b4e7f0791ef6f23b6930104c5a0ba61514a684b2dbf784b2e5b62464e84f5c6edbf569681578d75c5db860500c46bbae
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.3.2 (2024-07-17)
2
+
3
+ - Added `to_binary` method to `Vector`, `Bit`, and `SparseVector`
4
+
5
+ ## 0.3.1 (2024-07-10)
6
+
7
+ - Added support for `bit` type to pg
8
+ - Added extension for Sequel
9
+
1
10
  ## 0.3.0 (2024-06-25)
2
11
 
3
12
  - Added support for `halfvec` and `sparsevec` types
data/README.md CHANGED
@@ -24,6 +24,7 @@ And follow the instructions for your database library:
24
24
  Or check out some examples:
25
25
 
26
26
  - [Embeddings](examples/openai_embeddings.rb) with OpenAI
27
+ - [Binary embeddings](examples/cohere_embeddings.rb) with Cohere
27
28
  - [User-based recommendations](examples/disco_user_recs.rb) with Disco
28
29
  - [Item-based recommendations](examples/disco_item_recs.rb) with Disco
29
30
  - [Bulk loading](examples/bulk_loading.rb) with `COPY`
@@ -0,0 +1,32 @@
1
+ module Pgvector
2
+ class Bit
3
+ def initialize(data)
4
+ if data.is_a?(Array)
5
+ @data = data.map { |v| v ? "1" : "0" }.join
6
+ else
7
+ @data = data.to_str
8
+ end
9
+ end
10
+
11
+ def self.from_text(string)
12
+ Bit.new(string)
13
+ end
14
+
15
+ def self.from_binary(string)
16
+ length, data = string.unpack("l>B*")
17
+ Bit.new(data[...length])
18
+ end
19
+
20
+ def to_s
21
+ @data
22
+ end
23
+
24
+ def to_a
25
+ @data.each_char.map { |v| v != "0" }
26
+ end
27
+
28
+ def to_binary
29
+ [@data.length, @data].pack("l>B*")
30
+ end
31
+ end
32
+ end
data/lib/pgvector/pg.rb CHANGED
@@ -3,14 +3,17 @@ require "pg"
3
3
  module Pgvector
4
4
  module PG
5
5
  def self.register_vector(registry)
6
- registry.register_type(0, "vector", nil, TextDecoder::Vector)
7
- registry.register_type(1, "vector", nil, BinaryDecoder::Vector)
6
+ registry.register_type(0, "vector", TextEncoder::Vector, TextDecoder::Vector)
7
+ registry.register_type(1, "vector", BinaryEncoder::Vector, BinaryDecoder::Vector)
8
8
 
9
9
  # no binary decoder for halfvec since unpack does not have directive for half-precision
10
- registry.register_type(0, "halfvec", nil, TextDecoder::Halfvec)
10
+ registry.register_type(0, "halfvec", TextEncoder::Halfvec, TextDecoder::Halfvec)
11
11
 
12
- registry.register_type(0, "sparsevec", nil, TextDecoder::Sparsevec)
13
- registry.register_type(1, "sparsevec", nil, BinaryDecoder::Sparsevec)
12
+ registry.register_type(0, "bit", TextEncoder::Bit, TextDecoder::Bit)
13
+ registry.register_type(1, "bit", BinaryEncoder::Bit, BinaryDecoder::Bit)
14
+
15
+ registry.register_type(0, "sparsevec", TextEncoder::Sparsevec, TextDecoder::Sparsevec)
16
+ registry.register_type(1, "sparsevec", BinaryEncoder::Sparsevec, BinaryDecoder::Sparsevec)
14
17
  end
15
18
 
16
19
  module BinaryDecoder
@@ -20,9 +23,44 @@ module Pgvector
20
23
  end
21
24
  end
22
25
 
26
+ class Bit < ::PG::SimpleDecoder
27
+ def decode(string, tuple = nil, field = nil)
28
+ ::Pgvector::Bit.from_binary(string).to_s
29
+ end
30
+ end
31
+
23
32
  class Sparsevec < ::PG::SimpleDecoder
24
33
  def decode(string, tuple = nil, field = nil)
25
- SparseVector.from_binary(string)
34
+ ::Pgvector::SparseVector.from_binary(string)
35
+ end
36
+ end
37
+ end
38
+
39
+ module BinaryEncoder
40
+ # experimental
41
+ def self.type_map
42
+ tm = ::PG::TypeMapByClass.new
43
+ tm[::Pgvector::Vector] = Vector.new
44
+ tm[::Pgvector::Bit] = Bit.new
45
+ tm[::Pgvector::SparseVector] = Sparsevec.new
46
+ tm
47
+ end
48
+
49
+ class Vector < ::PG::SimpleEncoder
50
+ def encode(value)
51
+ value.to_binary
52
+ end
53
+ end
54
+
55
+ class Bit < ::PG::SimpleEncoder
56
+ def encode(value)
57
+ value.to_binary
58
+ end
59
+ end
60
+
61
+ class Sparsevec < ::PG::SimpleEncoder
62
+ def encode(value)
63
+ value.to_binary
26
64
  end
27
65
  end
28
66
  end
@@ -36,13 +74,55 @@ module Pgvector
36
74
 
37
75
  class Halfvec < ::PG::SimpleDecoder
38
76
  def decode(string, tuple = nil, field = nil)
39
- HalfVector.from_text(string).to_a
77
+ ::Pgvector::HalfVector.from_text(string).to_a
78
+ end
79
+ end
80
+
81
+ class Bit < ::PG::SimpleDecoder
82
+ def decode(string, tuple = nil, field = nil)
83
+ ::Pgvector::Bit.from_text(string).to_s
40
84
  end
41
85
  end
42
86
 
43
87
  class Sparsevec < ::PG::SimpleDecoder
44
88
  def decode(string, tuple = nil, field = nil)
45
- SparseVector.from_text(string)
89
+ ::Pgvector::SparseVector.from_text(string)
90
+ end
91
+ end
92
+ end
93
+
94
+ module TextEncoder
95
+ # experimental
96
+ def self.type_map
97
+ tm = ::PG::TypeMapByClass.new
98
+ tm[::Pgvector::Vector] = Vector.new
99
+ tm[::Pgvector::HalfVector] = Halfvec.new
100
+ tm[::Pgvector::Bit] = Bit.new
101
+ tm[::Pgvector::SparseVector] = Sparsevec.new
102
+ tm
103
+ end
104
+
105
+ class Vector < ::PG::SimpleEncoder
106
+ def encode(value)
107
+ value.to_s
108
+ end
109
+ end
110
+
111
+ class Halfvec < ::PG::SimpleEncoder
112
+ def encode(value)
113
+ value.to_s
114
+ end
115
+ end
116
+
117
+ class Bit < ::PG::SimpleEncoder
118
+ def encode(value)
119
+ value.to_s
120
+ end
121
+ end
122
+
123
+ class Sparsevec < ::PG::SimpleEncoder
124
+ def encode(value)
125
+ value.to_s
46
126
  end
47
127
  end
48
128
  end
@@ -30,6 +30,14 @@ module Pgvector
30
30
  arr
31
31
  end
32
32
 
33
+ def to_binary
34
+ nnz = @indices.size
35
+ buffer = [dimensions, nnz, 0].pack("l>l>l>")
36
+ @indices.pack("l>#{nnz}", buffer: buffer)
37
+ @values.pack("g#{nnz}", buffer: buffer)
38
+ buffer
39
+ end
40
+
33
41
  private
34
42
 
35
43
  def from_hash(data, dimensions)
@@ -1,7 +1,13 @@
1
1
  module Pgvector
2
2
  class Vector
3
3
  def initialize(data)
4
- @data = data.to_a.map(&:to_f)
4
+ # keep as NArray when possible for performance
5
+ @data =
6
+ if numo?(data)
7
+ data.cast_to(Numo::SFloat)
8
+ else
9
+ data.to_a.map(&:to_f)
10
+ end
5
11
  end
6
12
 
7
13
  def self.from_text(string)
@@ -19,7 +25,23 @@ module Pgvector
19
25
  end
20
26
 
21
27
  def to_a
22
- @data
28
+ @data.to_a
29
+ end
30
+
31
+ def to_binary
32
+ if numo?(@data)
33
+ [@data.shape[0], 0].pack("s>s>") + @data.to_network.to_binary
34
+ else
35
+ buffer = [@data.size, 0].pack("s>s>")
36
+ @data.pack("g*", buffer: buffer)
37
+ buffer
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def numo?(data)
44
+ defined?(Numo::NArray) && data.is_a?(Numo::NArray)
23
45
  end
24
46
  end
25
47
  end
@@ -1,3 +1,3 @@
1
1
  module Pgvector
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.2"
3
3
  end
data/lib/pgvector.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # modules
2
+ require_relative "pgvector/bit"
2
3
  require_relative "pgvector/half_vector"
3
4
  require_relative "pgvector/sparse_vector"
4
5
  require_relative "pgvector/vector"
@@ -8,7 +9,7 @@ module Pgvector
8
9
  autoload :PG, "pgvector/pg"
9
10
 
10
11
  def self.encode(data)
11
- if data.is_a?(SparseVector)
12
+ if data.is_a?(Vector) || data.is_a?(HalfVector) || data.is_a?(SparseVector)
12
13
  data.to_s
13
14
  else
14
15
  Vector.new(data).to_s
@@ -0,0 +1,5 @@
1
+ require_relative "../plugins/pgvector"
2
+
3
+ module Sequel
4
+ Dataset.register_extension(:pgvector, Plugins::Pgvector::DatasetMethods)
5
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgvector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-06-26 00:00:00.000000000 Z
11
+ date: 2024-07-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -20,11 +20,13 @@ files:
20
20
  - LICENSE.txt
21
21
  - README.md
22
22
  - lib/pgvector.rb
23
+ - lib/pgvector/bit.rb
23
24
  - lib/pgvector/half_vector.rb
24
25
  - lib/pgvector/pg.rb
25
26
  - lib/pgvector/sparse_vector.rb
26
27
  - lib/pgvector/vector.rb
27
28
  - lib/pgvector/version.rb
29
+ - lib/sequel/extensions/pgvector.rb
28
30
  - lib/sequel/plugins/pgvector.rb
29
31
  homepage: https://github.com/pgvector/pgvector-ruby
30
32
  licenses: