pgvector 0.3.1 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3d3ef6a53417383ff7f8a2a514df78513dfe9029e524f0f624df8828fb99dba0
4
- data.tar.gz: f326a21d3942079cdadc22d9a61367e7a6651ae37c5eb07a3955182f9f569ad0
3
+ metadata.gz: 7238af114feedc000b706855f9af446a583455a94771ece1e90c3e3082e63c44
4
+ data.tar.gz: f926141da347dc8bc1f3663e271cf3efa6b73b7c84c11e17db92f0d32af1a47c
5
5
  SHA512:
6
- metadata.gz: fa4d6519685d179e3d5712cbcf1e6989ca4f98e5b0902f5061eca2be55451162f199fcc75f19841dbd8ada8c18de69ac6fa39cad545d4b5a6c542e9cdd0109ea
7
- data.tar.gz: fd71245492b2ff8a06a0af60dbd12e57e44025f2662d4ea77eb7176aaaa4c4cb3b39c1e159edce92944da5b54fddbb19d1b504acd76a12c4efdb9a0fb6e797da
6
+ metadata.gz: 6351a9f10f989fb92e8b5f5edea1d66dcf5110b65b748fef654faa9bfe189af64a095dd263be03af8513f552063b828730bbc09b3545f3ccf25bc7b3aef9f47a
7
+ data.tar.gz: 86d146518d40c9af209e9a671db10c024df4c256271de7062c4de657404508d2c0aef599cb5a9a29c6b0190f40b2bedfb91dbb41c6a31bb40b0e0b7242d4957b
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.3.3 (2026-03-18)
2
+
3
+ - Fixed error with `nil` values for Sequel
4
+
5
+ ## 0.3.2 (2024-07-17)
6
+
7
+ - Added `to_binary` method to `Vector`, `Bit`, and `SparseVector`
8
+
1
9
  ## 0.3.1 (2024-07-10)
2
10
 
3
11
  - Added support for `bit` type to pg
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2022-2024 Andrew Kane
3
+ Copyright (c) 2022-2025 Andrew Kane
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -23,10 +23,17 @@ And follow the instructions for your database library:
23
23
 
24
24
  Or check out some examples:
25
25
 
26
- - [Embeddings](examples/openai_embeddings.rb) with OpenAI
27
- - [User-based recommendations](examples/disco_user_recs.rb) with Disco
28
- - [Item-based recommendations](examples/disco_item_recs.rb) with Disco
29
- - [Bulk loading](examples/bulk_loading.rb) with `COPY`
26
+ - [Embeddings](examples/openai/example.rb) with OpenAI
27
+ - [Binary embeddings](examples/cohere/example.rb) with Cohere
28
+ - [Sentence embeddings](examples/informers/example.rb) with Informers
29
+ - [Hybrid search](examples/hybrid/example.rb) with Informers (Reciprocal Rank Fusion)
30
+ - [Sparse search](examples/sparse/example.rb) with Transformers.rb
31
+ - [Morgan fingerprints](examples/rdkit/example.rb) with RDKit.rb
32
+ - [Topic modeling](examples/tomoto/example.rb) with tomoto.rb
33
+ - [User-based recommendations](examples/disco/user_recs.rb) with Disco
34
+ - [Item-based recommendations](examples/disco/item_recs.rb) with Disco
35
+ - [Horizontal scaling](examples/citus/example.rb) with Citus
36
+ - [Bulk loading](examples/loading/example.rb) with `COPY`
30
37
 
31
38
  ## pg
32
39
 
@@ -126,6 +133,48 @@ DB.add_index :items, :embedding, type: "hnsw", opclass: "vector_l2_ops"
126
133
 
127
134
  Use `vector_ip_ops` for inner product and `vector_cosine_ops` for cosine distance
128
135
 
136
+ ## Reference
137
+
138
+ ### Sparse Vectors
139
+
140
+ Create a sparse vector from an array
141
+
142
+ ```ruby
143
+ vec = Pgvector::SparseVector.new([1, 0, 2, 0, 3, 0])
144
+ ```
145
+
146
+ Or a hash of non-zero elements
147
+
148
+ ```ruby
149
+ vec = Pgvector::SparseVector.new({0 => 1, 2 => 2, 4 => 3}, 6)
150
+ ```
151
+
152
+ Note: Indices start at 0
153
+
154
+ Get the number of dimensions
155
+
156
+ ```ruby
157
+ dim = vec.dimensions
158
+ ```
159
+
160
+ Get the indices of non-zero elements
161
+
162
+ ```ruby
163
+ indices = vec.indices
164
+ ```
165
+
166
+ Get the values of non-zero elements
167
+
168
+ ```ruby
169
+ values = vec.values
170
+ ```
171
+
172
+ Get an array
173
+
174
+ ```ruby
175
+ arr = vec.to_a
176
+ ```
177
+
129
178
  ## History
130
179
 
131
180
  View the [changelog](https://github.com/pgvector/pgvector-ruby/blob/master/CHANGELOG.md)
@@ -148,3 +197,12 @@ createdb pgvector_ruby_test
148
197
  bundle install
149
198
  bundle exec rake test
150
199
  ```
200
+
201
+ To run an example:
202
+
203
+ ```sh
204
+ cd examples/loading
205
+ bundle install
206
+ createdb pgvector_example
207
+ bundle exec ruby example.rb
208
+ ```
data/lib/pgvector/bit.rb CHANGED
@@ -13,8 +13,8 @@ module Pgvector
13
13
  end
14
14
 
15
15
  def self.from_binary(string)
16
- length = string[..3].unpack1("l>")
17
- Bit.new(string[4..].unpack("B*").join[...length])
16
+ length, data = string.unpack("l>B*")
17
+ Bit.new(data[...length])
18
18
  end
19
19
 
20
20
  def to_s
@@ -24,5 +24,9 @@ module Pgvector
24
24
  def to_a
25
25
  @data.each_char.map { |v| v != "0" }
26
26
  end
27
+
28
+ def to_binary
29
+ [@data.length, @data].pack("l>B*")
30
+ end
27
31
  end
28
32
  end
data/lib/pgvector/pg.rb CHANGED
@@ -3,17 +3,17 @@ require "pg"
3
3
  module Pgvector
4
4
  module PG
5
5
  def self.register_vector(registry)
6
- registry.register_type(0, "vector", nil, TextDecoder::Vector)
7
- registry.register_type(1, "vector", nil, BinaryDecoder::Vector)
6
+ registry.register_type(0, "vector", TextEncoder::Vector, TextDecoder::Vector)
7
+ registry.register_type(1, "vector", BinaryEncoder::Vector, BinaryDecoder::Vector)
8
8
 
9
9
  # no binary decoder for halfvec since unpack does not have directive for half-precision
10
- registry.register_type(0, "halfvec", nil, TextDecoder::Halfvec)
10
+ registry.register_type(0, "halfvec", TextEncoder::Halfvec, TextDecoder::Halfvec)
11
11
 
12
- registry.register_type(0, "bit", nil, TextDecoder::Bit)
13
- registry.register_type(1, "bit", nil, BinaryDecoder::Bit)
12
+ registry.register_type(0, "bit", TextEncoder::Bit, TextDecoder::Bit)
13
+ registry.register_type(1, "bit", BinaryEncoder::Bit, BinaryDecoder::Bit)
14
14
 
15
- registry.register_type(0, "sparsevec", nil, TextDecoder::Sparsevec)
16
- registry.register_type(1, "sparsevec", nil, BinaryDecoder::Sparsevec)
15
+ registry.register_type(0, "sparsevec", TextEncoder::Sparsevec, TextDecoder::Sparsevec)
16
+ registry.register_type(1, "sparsevec", BinaryEncoder::Sparsevec, BinaryDecoder::Sparsevec)
17
17
  end
18
18
 
19
19
  module BinaryDecoder
@@ -31,7 +31,36 @@ module Pgvector
31
31
 
32
32
  class Sparsevec < ::PG::SimpleDecoder
33
33
  def decode(string, tuple = nil, field = nil)
34
- SparseVector.from_binary(string)
34
+ ::Pgvector::SparseVector.from_binary(string)
35
+ end
36
+ end
37
+ end
38
+
39
+ module BinaryEncoder
40
+ # experimental
41
+ def self.type_map
42
+ tm = ::PG::TypeMapByClass.new
43
+ tm[::Pgvector::Vector] = Vector.new
44
+ tm[::Pgvector::Bit] = Bit.new
45
+ tm[::Pgvector::SparseVector] = Sparsevec.new
46
+ tm
47
+ end
48
+
49
+ class Vector < ::PG::SimpleEncoder
50
+ def encode(value)
51
+ value.to_binary
52
+ end
53
+ end
54
+
55
+ class Bit < ::PG::SimpleEncoder
56
+ def encode(value)
57
+ value.to_binary
58
+ end
59
+ end
60
+
61
+ class Sparsevec < ::PG::SimpleEncoder
62
+ def encode(value)
63
+ value.to_binary
35
64
  end
36
65
  end
37
66
  end
@@ -45,7 +74,7 @@ module Pgvector
45
74
 
46
75
  class Halfvec < ::PG::SimpleDecoder
47
76
  def decode(string, tuple = nil, field = nil)
48
- HalfVector.from_text(string).to_a
77
+ ::Pgvector::HalfVector.from_text(string).to_a
49
78
  end
50
79
  end
51
80
 
@@ -57,7 +86,43 @@ module Pgvector
57
86
 
58
87
  class Sparsevec < ::PG::SimpleDecoder
59
88
  def decode(string, tuple = nil, field = nil)
60
- SparseVector.from_text(string)
89
+ ::Pgvector::SparseVector.from_text(string)
90
+ end
91
+ end
92
+ end
93
+
94
+ module TextEncoder
95
+ # experimental
96
+ def self.type_map
97
+ tm = ::PG::TypeMapByClass.new
98
+ tm[::Pgvector::Vector] = Vector.new
99
+ tm[::Pgvector::HalfVector] = Halfvec.new
100
+ tm[::Pgvector::Bit] = Bit.new
101
+ tm[::Pgvector::SparseVector] = Sparsevec.new
102
+ tm
103
+ end
104
+
105
+ class Vector < ::PG::SimpleEncoder
106
+ def encode(value)
107
+ value.to_s
108
+ end
109
+ end
110
+
111
+ class Halfvec < ::PG::SimpleEncoder
112
+ def encode(value)
113
+ value.to_s
114
+ end
115
+ end
116
+
117
+ class Bit < ::PG::SimpleEncoder
118
+ def encode(value)
119
+ value.to_s
120
+ end
121
+ end
122
+
123
+ class Sparsevec < ::PG::SimpleEncoder
124
+ def encode(value)
125
+ value.to_s
61
126
  end
62
127
  end
63
128
  end
@@ -30,6 +30,14 @@ module Pgvector
30
30
  arr
31
31
  end
32
32
 
33
+ def to_binary
34
+ nnz = @indices.size
35
+ buffer = [dimensions, nnz, 0].pack("l>l>l>")
36
+ @indices.pack("l>#{nnz}", buffer: buffer)
37
+ @values.pack("g#{nnz}", buffer: buffer)
38
+ buffer
39
+ end
40
+
33
41
  private
34
42
 
35
43
  def from_hash(data, dimensions)
@@ -1,7 +1,13 @@
1
1
  module Pgvector
2
2
  class Vector
3
3
  def initialize(data)
4
- @data = data.to_a.map(&:to_f)
4
+ # keep as NArray when possible for performance
5
+ @data =
6
+ if numo?(data)
7
+ data.cast_to(Numo::SFloat)
8
+ else
9
+ data.to_a.map(&:to_f)
10
+ end
5
11
  end
6
12
 
7
13
  def self.from_text(string)
@@ -19,7 +25,23 @@ module Pgvector
19
25
  end
20
26
 
21
27
  def to_a
22
- @data
28
+ @data.to_a
29
+ end
30
+
31
+ def to_binary
32
+ if numo?(@data)
33
+ [@data.shape[0], 0].pack("s>s>") + @data.to_network.to_binary.force_encoding(Encoding::BINARY)
34
+ else
35
+ buffer = [@data.size, 0].pack("s>s>")
36
+ @data.pack("g*", buffer: buffer)
37
+ buffer
38
+ end
39
+ end
40
+
41
+ private
42
+
43
+ def numo?(data)
44
+ defined?(Numo::NArray) && data.is_a?(Numo::NArray)
23
45
  end
24
46
  end
25
47
  end
@@ -1,3 +1,3 @@
1
1
  module Pgvector
2
- VERSION = "0.3.1"
2
+ VERSION = "0.3.3"
3
3
  end
@@ -10,6 +10,7 @@ module Sequel
10
10
 
11
11
  module DatasetMethods
12
12
  def nearest_neighbors(column, value, distance:)
13
+ return extension(:null_dataset).nullify if value.nil?
13
14
  value = ::Pgvector.encode(value) unless value.is_a?(String)
14
15
  quoted_column = quote_identifier(column)
15
16
  distance = distance.to_s
@@ -67,7 +68,7 @@ module Sequel
67
68
  end
68
69
 
69
70
  def []=(k, v)
70
- if self.class.vector_columns.key?(k.to_sym) && !v.is_a?(String)
71
+ if self.class.vector_columns.key?(k.to_sym) && !v.is_a?(String) && !v.nil?
71
72
  super(k, ::Pgvector.encode(v))
72
73
  else
73
74
  super
@@ -75,10 +76,12 @@ module Sequel
75
76
  end
76
77
 
77
78
  def [](k)
78
- if self.class.vector_columns.key?(k.to_sym)
79
- ::Pgvector.decode(super)
79
+ v = super
80
+ if self.class.vector_columns.key?(k.to_sym) && !v.nil?
81
+ # to_s needed for JRuby
82
+ ::Pgvector.decode(v.to_s)
80
83
  else
81
- super
84
+ v
82
85
  end
83
86
  end
84
87
  end
metadata CHANGED
@@ -1,16 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgvector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
- autorequire:
9
8
  bindir: bin
10
9
  cert_chain: []
11
- date: 2024-07-11 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
12
11
  dependencies: []
13
- description:
14
12
  email: andrew@ankane.org
15
13
  executables: []
16
14
  extensions: []
@@ -32,7 +30,6 @@ homepage: https://github.com/pgvector/pgvector-ruby
32
30
  licenses:
33
31
  - MIT
34
32
  metadata: {}
35
- post_install_message:
36
33
  rdoc_options: []
37
34
  require_paths:
38
35
  - lib
@@ -47,8 +44,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
47
44
  - !ruby/object:Gem::Version
48
45
  version: '0'
49
46
  requirements: []
50
- rubygems_version: 3.5.11
51
- signing_key:
47
+ rubygems_version: 4.0.3
52
48
  specification_version: 4
53
49
  summary: pgvector support for Ruby
54
50
  test_files: []