pgvector 0.3.0 → 0.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 07a80636c13841d2fa97f8b740feb095c1c2f94fa3691b374d1472bba918bddf
4
- data.tar.gz: 50c9e67781fbbe23fa3dbe2f87790025f18d3686e412b46f62d1cb0d2995ecb3
3
+ metadata.gz: fbdde0af357aaae0f727dff71e8678c705b1b347ecca7b49a8ca097189b42db0
4
+ data.tar.gz: d9d8bee13760ca165905f888e79507350565425ae819c452bf723ff775899235
5
5
  SHA512:
6
- metadata.gz: f0aa5b733e1bc4022c2052be6b04aa0656bf306f588baf2907827674aeadc7dc52507895043049246634999dea37de629c19a164bd289fbd979eb628aaab0c33
7
- data.tar.gz: f52e2e9c1c074c4f809fe78d6926bcb630694d79199e288ebb33d136a05191c4b2fff6038f8d7fc56d4291f949caa1b536d942aa7d0912be2daccf5a7ee23f83
6
+ metadata.gz: '04629b78419ba4fd5325792d2b47f60a6a42fe2e5f8c681bcbd53d2f769b0c8db74ba7e53b9ef84c579b1c98ded45b51e3fe58347394b5f7b707efc81a6500f3'
7
+ data.tar.gz: 995963de5eb7bd7587bff1f70a709cb9b4e7f0791ef6f23b6930104c5a0ba61514a684b2dbf784b2e5b62464e84f5c6edbf569681578d75c5db860500c46bbae
data/CHANGELOG.md CHANGED
@@ -1,3 +1,12 @@
1
+ ## 0.3.2 (2024-07-17)
2
+
3
+ - Added `to_binary` method to `Vector`, `Bit`, and `SparseVector`
4
+
5
+ ## 0.3.1 (2024-07-10)
6
+
7
+ - Added support for `bit` type to pg
8
+ - Added extension for Sequel
9
+
1
10
  ## 0.3.0 (2024-06-25)
2
11
 
3
12
  - Added support for `halfvec` and `sparsevec` types
data/README.md CHANGED
@@ -24,6 +24,7 @@ And follow the instructions for your database library:
24
24
  Or check out some examples:
25
25
 
26
26
  - [Embeddings](examples/openai_embeddings.rb) with OpenAI
27
+ - [Binary embeddings](examples/cohere_embeddings.rb) with Cohere
27
28
  - [User-based recommendations](examples/disco_user_recs.rb) with Disco
28
29
  - [Item-based recommendations](examples/disco_item_recs.rb) with Disco
29
30
  - [Bulk loading](examples/bulk_loading.rb) with `COPY`
@@ -0,0 +1,32 @@
1
+ module Pgvector
2
+ class Bit
3
+ def initialize(data)
4
+ if data.is_a?(Array)
5
+ @data = data.map { |v| v ? "1" : "0" }.join
6
+ else
7
+ @data = data.to_str
8
+ end
9
+ end
10
+
11
+ def self.from_text(string)
12
+ Bit.new(string)
13
+ end
14
+
15
+ def self.from_binary(string)
16
+ length, data = string.unpack("l>B*")
17
+ Bit.new(data[...length])
18
+ end
19
+
20
+ def to_s
21
+ @data
22
+ end
23
+
24
+ def to_a
25
+ @data.each_char.map { |v| v != "0" }
26
+ end
27
+
28
+ def to_binary
29
+ [@data.length, @data].pack("l>B*")
30
+ end
31
+ end
32
+ end
data/lib/pgvector/pg.rb CHANGED
@@ -3,14 +3,17 @@ require "pg"
3
3
  module Pgvector
4
4
  module PG
5
5
  def self.register_vector(registry)
6
- registry.register_type(0, "vector", nil, TextDecoder::Vector)
7
- registry.register_type(1, "vector", nil, BinaryDecoder::Vector)
6
+ registry.register_type(0, "vector", TextEncoder::Vector, TextDecoder::Vector)
7
+ registry.register_type(1, "vector", BinaryEncoder::Vector, BinaryDecoder::Vector)
8
8
 
9
9
  # no binary decoder for halfvec since unpack does not have directive for half-precision
10
- registry.register_type(0, "halfvec", nil, TextDecoder::Halfvec)
10
+ registry.register_type(0, "halfvec", TextEncoder::Halfvec, TextDecoder::Halfvec)
11
11
 
12
- registry.register_type(0, "sparsevec", nil, TextDecoder::Sparsevec)
13
- registry.register_type(1, "sparsevec", nil, BinaryDecoder::Sparsevec)
12
+ registry.register_type(0, "bit", TextEncoder::Bit, TextDecoder::Bit)
13
+ registry.register_type(1, "bit", BinaryEncoder::Bit, BinaryDecoder::Bit)
14
+
15
+ registry.register_type(0, "sparsevec", TextEncoder::Sparsevec, TextDecoder::Sparsevec)
16
+ registry.register_type(1, "sparsevec", BinaryEncoder::Sparsevec, BinaryDecoder::Sparsevec)
14
17
  end
15
18
 
16
19
  module BinaryDecoder
@@ -20,9 +23,44 @@ module Pgvector
20
23
  end
21
24
  end
22
25
 
26
+ class Bit < ::PG::SimpleDecoder
27
+ def decode(string, tuple = nil, field = nil)
28
+ ::Pgvector::Bit.from_binary(string).to_s
29
+ end
30
+ end
31
+
23
32
  class Sparsevec < ::PG::SimpleDecoder
24
33
  def decode(string, tuple = nil, field = nil)
25
- SparseVector.from_binary(string)
34
+ ::Pgvector::SparseVector.from_binary(string)
35
+ end
36
+ end
37
+ end
38
+
39
+ module BinaryEncoder
40
+ # experimental
41
+ def self.type_map
42
+ tm = ::PG::TypeMapByClass.new
43
+ tm[::Pgvector::Vector] = Vector.new
44
+ tm[::Pgvector::Bit] = Bit.new
45
+ tm[::Pgvector::SparseVector] = Sparsevec.new
46
+ tm
47
+ end
48
+
49
+ class Vector < ::PG::SimpleEncoder
50
+ def encode(value)
51
+ value.to_binary
52
+ end
53
+ end
54
+
55
+ class Bit < ::PG::SimpleEncoder
56
+ def encode(value)
57
+ value.to_binary
58
+ end
59
+ end
60
+
61
+ class Sparsevec < ::PG::SimpleEncoder
62
+ def encode(value)
63
+ value.to_binary
26
64
  end
27
65
  end
28
66
  end
@@ -36,13 +74,55 @@ module Pgvector
36
74
 
37
75
  class Halfvec < ::PG::SimpleDecoder
38
76
  def decode(string, tuple = nil, field = nil)
39
- HalfVector.from_text(string).to_a
77
+ ::Pgvector::HalfVector.from_text(string).to_a
78
+ end
79
+ end
80
+
81
+ class Bit < ::PG::SimpleDecoder
82
+ def decode(string, tuple = nil, field = nil)
83
+ ::Pgvector::Bit.from_text(string).to_s
40
84
  end
41
85
  end
42
86
 
43
87
  class Sparsevec < ::PG::SimpleDecoder
44
88
  def decode(string, tuple = nil, field = nil)
45
- SparseVector.from_text(string)
89
+ ::Pgvector::SparseVector.from_text(string)
90
+ end
91
+ end
92
+ end
93
+
94
+ module TextEncoder
95
+ # experimental
96
+ def self.type_map
97
+ tm = ::PG::TypeMapByClass.new
98
+ tm[::Pgvector::Vector] = Vector.new
99
+ tm[::Pgvector::HalfVector] = Halfvec.new
100
+ tm[::Pgvector::Bit] = Bit.new
101
+ tm[::Pgvector::SparseVector] = Sparsevec.new
102
+ tm
103
+ end
104
+
105
+ class Vector < ::PG::SimpleEncoder
106
+ def encode(value)
107
+ value.to_s
108
+ end
109
+ end
110
+
111
+ class Halfvec < ::PG::SimpleEncoder
112
+ def encode(value)
113
+ value.to_s
114
+ end
115
+ end
116
+
117
+ class Bit < ::PG::SimpleEncoder
118
+ def encode(value)
119
+ value.to_s
120
+ end
121
+ end
122
+
123
+ class Sparsevec < ::PG::SimpleEncoder
124
+ def encode(value)
125
+ value.to_s
46
126
  end
47
127
  end
48
128
  end
@@ -30,6 +30,14 @@ module Pgvector
30
30
  arr
31
31
  end
32
32
 
33
+ def to_binary
34
+ nnz = @indices.size
35
+ buffer = [dimensions, nnz, 0].pack("l>l>l>")
36
+ @indices.pack("l>#{nnz}", buffer: buffer)
37
+ @values.pack("g#{nnz}", buffer: buffer)
38
+ buffer
39
+ end
40
+
33
41
  private
34
42
 
35
43
  def from_hash(data, dimensions)
@@ -1,7 +1,13 @@
1
1
  module Pgvector
2
2
  class Vector
3
3
  def initialize(data)
4
- @data = data.to_a.map(&:to_f)
4
+ # keep as NArray when possible for performance
5
+ @data =
6
+ if numo?(data)
7
+ data.cast_to(Numo::SFloat)
8
+ else
9
+ data.to_a.map(&:to_f)
10
+ end
5
11
  end
6
12
 
7
13
  def self.from_text(string)
@@ -19,7 +25,23 @@ module Pgvector
19
25
  end
20
26
 
21
27
  def to_a
22
- @data
28
+ @data.to_a
29
+ end
30
+
31
+ def to_binary
32
+ if numo?(@data)
33
+ [@data.shape[0], 0].pack("s>s>") + @data.to_network.to_binary
34
+ else
35
+ buffer = [@data.size, 0].pack("s>s>")
36
+ @data.pack("g*", buffer: buffer)
37
+ buffer
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def numo?(data)
44
+ defined?(Numo::NArray) && data.is_a?(Numo::NArray)
23
45
  end
24
46
  end
25
47
  end
@@ -1,3 +1,3 @@
1
1
  module Pgvector
2
- VERSION = "0.3.0"
2
+ VERSION = "0.3.2"
3
3
  end
data/lib/pgvector.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # modules
2
+ require_relative "pgvector/bit"
2
3
  require_relative "pgvector/half_vector"
3
4
  require_relative "pgvector/sparse_vector"
4
5
  require_relative "pgvector/vector"
@@ -8,7 +9,7 @@ module Pgvector
8
9
  autoload :PG, "pgvector/pg"
9
10
 
10
11
  def self.encode(data)
11
- if data.is_a?(SparseVector)
12
+ if data.is_a?(Vector) || data.is_a?(HalfVector) || data.is_a?(SparseVector)
12
13
  data.to_s
13
14
  else
14
15
  Vector.new(data).to_s
@@ -0,0 +1,5 @@
1
+ require_relative "../plugins/pgvector"
2
+
3
+ module Sequel
4
+ Dataset.register_extension(:pgvector, Plugins::Pgvector::DatasetMethods)
5
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgvector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-06-26 00:00:00.000000000 Z
11
+ date: 2024-07-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -20,11 +20,13 @@ files:
20
20
  - LICENSE.txt
21
21
  - README.md
22
22
  - lib/pgvector.rb
23
+ - lib/pgvector/bit.rb
23
24
  - lib/pgvector/half_vector.rb
24
25
  - lib/pgvector/pg.rb
25
26
  - lib/pgvector/sparse_vector.rb
26
27
  - lib/pgvector/vector.rb
27
28
  - lib/pgvector/version.rb
29
+ - lib/sequel/extensions/pgvector.rb
28
30
  - lib/sequel/plugins/pgvector.rb
29
31
  homepage: https://github.com/pgvector/pgvector-ruby
30
32
  licenses: