pgvector 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a0729ce3ec22ea08be365fc6c7a5c5e0b038860d5c9a57ccd2554d9d8b57a3f2
4
- data.tar.gz: c36eabc18ca6db3dd47204df3c48b58f7ac635e230bb9039269506d7d8afa834
3
+ metadata.gz: 7485ea4be0d5be0177a972db911c696daf3438a661ddac61b08f4e8b2da3ac51
4
+ data.tar.gz: 2532ef79f5db88aecb681d9455e38f3e5fc1d30bde015d0a1e9daaa9fe82635e
5
5
  SHA512:
6
- metadata.gz: ff4dc7d6a7a26b1ff3502d2305b62fc25fff38f3bf314261dcb6aa0f40cc520963cfd9562af03b11678cad7819aef9951b480a61e138ae2ae02459ed668a5298
7
- data.tar.gz: 6aa39019d5566f83331f3c95bb6404b065b7f7a29be62e84be2f2b6ea58bc27d20002543d93dfec7a17256c970e32b9676e2d319d07758cf49fc31b89d3d4d7b
6
+ metadata.gz: be40e4c3e16dd904a200115794a8ffaa850b40c5055330bd873ec7a707164a53b29d22040defbb4dd8a9cff597d4e1ad5c659d37a580ccc201ce30a9eb17fef9
7
+ data.tar.gz: f5d36289b043d987920911ab08d85d7d1066039f84dcc2a24436701c06b246adcb8fd3c32e3f76f3e1604001403574c14818f91a64357ce4ebd675458e57184c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.2.2 (2023-10-03)
2
+
3
+ - Added `nearest_neighbors` method to datasets with Sequel
4
+
5
+ ## 0.2.1 (2023-06-04)
6
+
7
+ - Added support for Sequel
8
+
1
9
  ## 0.2.0 (2023-05-11)
2
10
 
3
11
  - Dropped support for Ruby < 3
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [pgvector](https://github.com/pgvector/pgvector) support for Ruby
4
4
 
5
- Supports the [pg](https://github.com/ged/ruby-pg) gem
5
+ Supports [pg](https://github.com/ged/ruby-pg) and [Sequel](https://github.com/jeremyevans/sequel)
6
6
 
7
7
  For Rails, check out [Neighbor](https://github.com/ankane/neighbor)
8
8
 
@@ -19,9 +19,22 @@ gem "pgvector"
19
19
  And follow the instructions for your database library:
20
20
 
21
21
  - [pg](#pg)
22
+ - [Sequel](#sequel)
23
+
24
+ Or check out some examples:
25
+
26
+ - [Embeddings](examples/openai_embeddings.rb) with OpenAI
27
+ - [User-based recommendations](examples/disco_user_recs.rb) with Disco
28
+ - [Item-based recommendations](examples/disco_item_recs.rb) with Disco
22
29
 
23
30
  ## pg
24
31
 
32
+ Enable the extension
33
+
34
+ ```ruby
35
+ conn.exec("CREATE EXTENSION IF NOT EXISTS vector")
36
+ ```
37
+
25
38
  Register the vector type with your connection
26
39
 
27
40
  ```ruby
@@ -43,6 +56,51 @@ Get the nearest neighbors to a vector
43
56
  conn.exec_params("SELECT * FROM items ORDER BY embedding <-> $1 LIMIT 5", [embedding]).to_a
44
57
  ```
45
58
 
59
+ ## Sequel
60
+
61
+ Enable the extension
62
+
63
+ ```ruby
64
+ DB.run("CREATE EXTENSION IF NOT EXISTS vector")
65
+ ```
66
+
67
+ Create a table
68
+
69
+ ```ruby
70
+ DB.create_table :items do
71
+ primary_key :id
72
+ column :embedding, "vector(3)"
73
+ end
74
+ ```
75
+
76
+ Add the plugin to your model
77
+
78
+ ```ruby
79
+ class Item < Sequel::Model
80
+ plugin :pgvector, :embedding
81
+ end
82
+ ```
83
+
84
+ Insert a vector
85
+
86
+ ```ruby
87
+ Item.create(embedding: [1, 1, 1])
88
+ ```
89
+
90
+ Get the nearest neighbors to a record
91
+
92
+ ```ruby
93
+ item.nearest_neighbors(:embedding, distance: "euclidean").limit(5)
94
+ ```
95
+
96
+ Also supports `inner_product` and `cosine` distance
97
+
98
+ Get the nearest neighbors to a vector
99
+
100
+ ```ruby
101
+ Item.nearest_neighbors(:embedding, [1, 1, 1], distance: "euclidean").limit(5)
102
+ ```
103
+
46
104
  ## History
47
105
 
48
106
  View the [changelog](https://github.com/pgvector/pgvector-ruby/blob/master/CHANGELOG.md)
data/lib/pgvector/pg.rb CHANGED
@@ -20,7 +20,7 @@ module Pgvector
20
20
  module TextDecoder
21
21
  class Vector < ::PG::SimpleDecoder
22
22
  def decode(string, tuple = nil, field = nil)
23
- string[1..-2].split(",").map(&:to_f)
23
+ Pgvector.decode(string)
24
24
  end
25
25
  end
26
26
  end
@@ -1,3 +1,3 @@
1
1
  module Pgvector
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.2"
3
3
  end
data/lib/pgvector.rb CHANGED
@@ -1,5 +1,14 @@
1
- require "pgvector/version"
1
+ # modules
2
+ require_relative "pgvector/version"
2
3
 
3
4
  module Pgvector
4
5
  autoload :PG, "pgvector/pg"
6
+
7
+ def self.encode(data)
8
+ "[#{data.to_a.map(&:to_f).join(",")}]"
9
+ end
10
+
11
+ def self.decode(string)
12
+ string[1..-2].split(",").map(&:to_f)
13
+ end
5
14
  end
@@ -0,0 +1,81 @@
1
+ module Sequel
2
+ module Plugins
3
+ module Pgvector
4
+ def self.configure(model, *columns)
5
+ model.vector_columns ||= {}
6
+ columns.each do |column|
7
+ model.vector_columns[column.to_sym] = {}
8
+ end
9
+ end
10
+
11
+ module DatasetMethods
12
+ def nearest_neighbors(column, value, distance:)
13
+ value = ::Pgvector.encode(value) unless value.is_a?(String)
14
+ quoted_column = quote_identifier(column)
15
+ distance = distance.to_s
16
+
17
+ operator =
18
+ case distance
19
+ when "inner_product"
20
+ "<#>"
21
+ when "cosine"
22
+ "<=>"
23
+ when "euclidean"
24
+ "<->"
25
+ end
26
+
27
+ raise ArgumentError, "Invalid distance: #{distance}" unless operator
28
+
29
+ order = "#{quoted_column} #{operator} ?"
30
+
31
+ neighbor_distance =
32
+ if distance == "inner_product"
33
+ "(#{order}) * -1"
34
+ else
35
+ order
36
+ end
37
+
38
+ select_append(Sequel.lit("#{neighbor_distance} AS neighbor_distance", value))
39
+ .exclude(column => nil)
40
+ .order(Sequel.lit(order, value))
41
+ end
42
+ end
43
+
44
+ module ClassMethods
45
+ attr_accessor :vector_columns
46
+
47
+ Sequel::Plugins.def_dataset_methods(self, :nearest_neighbors)
48
+
49
+ Plugins.inherited_instance_variables(self, :@vector_columns => :dup)
50
+ end
51
+
52
+ module InstanceMethods
53
+ def nearest_neighbors(column, **options)
54
+ column = column.to_sym
55
+ # important! check if neighbor attribute before calling send
56
+ raise ArgumentError, "Invalid column" unless self.class.vector_columns[column]
57
+
58
+ self.class
59
+ .nearest_neighbors(column, self[column], **options)
60
+ .exclude(primary_key => self[primary_key])
61
+ end
62
+
63
+ def []=(k, v)
64
+ if self.class.vector_columns.key?(k.to_sym) && !v.is_a?(String)
65
+ super(k, ::Pgvector.encode(v))
66
+ else
67
+ super
68
+ end
69
+ end
70
+
71
+ def [](k)
72
+ if self.class.vector_columns.key?(k.to_sym)
73
+ ::Pgvector.decode(super)
74
+ else
75
+ super
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgvector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-12 00:00:00.000000000 Z
11
+ date: 2023-10-04 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -22,6 +22,7 @@ files:
22
22
  - lib/pgvector.rb
23
23
  - lib/pgvector/pg.rb
24
24
  - lib/pgvector/version.rb
25
+ - lib/sequel/plugins/pgvector.rb
25
26
  homepage: https://github.com/pgvector/pgvector-ruby
26
27
  licenses:
27
28
  - MIT