pgvector 0.2.0 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a0729ce3ec22ea08be365fc6c7a5c5e0b038860d5c9a57ccd2554d9d8b57a3f2
4
- data.tar.gz: c36eabc18ca6db3dd47204df3c48b58f7ac635e230bb9039269506d7d8afa834
3
+ metadata.gz: 7485ea4be0d5be0177a972db911c696daf3438a661ddac61b08f4e8b2da3ac51
4
+ data.tar.gz: 2532ef79f5db88aecb681d9455e38f3e5fc1d30bde015d0a1e9daaa9fe82635e
5
5
  SHA512:
6
- metadata.gz: ff4dc7d6a7a26b1ff3502d2305b62fc25fff38f3bf314261dcb6aa0f40cc520963cfd9562af03b11678cad7819aef9951b480a61e138ae2ae02459ed668a5298
7
- data.tar.gz: 6aa39019d5566f83331f3c95bb6404b065b7f7a29be62e84be2f2b6ea58bc27d20002543d93dfec7a17256c970e32b9676e2d319d07758cf49fc31b89d3d4d7b
6
+ metadata.gz: be40e4c3e16dd904a200115794a8ffaa850b40c5055330bd873ec7a707164a53b29d22040defbb4dd8a9cff597d4e1ad5c659d37a580ccc201ce30a9eb17fef9
7
+ data.tar.gz: f5d36289b043d987920911ab08d85d7d1066039f84dcc2a24436701c06b246adcb8fd3c32e3f76f3e1604001403574c14818f91a64357ce4ebd675458e57184c
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.2.2 (2023-10-03)
2
+
3
+ - Added `nearest_neighbors` method to datasets with Sequel
4
+
5
+ ## 0.2.1 (2023-06-04)
6
+
7
+ - Added support for Sequel
8
+
1
9
  ## 0.2.0 (2023-05-11)
2
10
 
3
11
  - Dropped support for Ruby < 3
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [pgvector](https://github.com/pgvector/pgvector) support for Ruby
4
4
 
5
- Supports the [pg](https://github.com/ged/ruby-pg) gem
5
+ Supports [pg](https://github.com/ged/ruby-pg) and [Sequel](https://github.com/jeremyevans/sequel)
6
6
 
7
7
  For Rails, check out [Neighbor](https://github.com/ankane/neighbor)
8
8
 
@@ -19,9 +19,22 @@ gem "pgvector"
19
19
  And follow the instructions for your database library:
20
20
 
21
21
  - [pg](#pg)
22
+ - [Sequel](#sequel)
23
+
24
+ Or check out some examples:
25
+
26
+ - [Embeddings](examples/openai_embeddings.rb) with OpenAI
27
+ - [User-based recommendations](examples/disco_user_recs.rb) with Disco
28
+ - [Item-based recommendations](examples/disco_item_recs.rb) with Disco
22
29
 
23
30
  ## pg
24
31
 
32
+ Enable the extension
33
+
34
+ ```ruby
35
+ conn.exec("CREATE EXTENSION IF NOT EXISTS vector")
36
+ ```
37
+
25
38
  Register the vector type with your connection
26
39
 
27
40
  ```ruby
@@ -43,6 +56,51 @@ Get the nearest neighbors to a vector
43
56
  conn.exec_params("SELECT * FROM items ORDER BY embedding <-> $1 LIMIT 5", [embedding]).to_a
44
57
  ```
45
58
 
59
+ ## Sequel
60
+
61
+ Enable the extension
62
+
63
+ ```ruby
64
+ DB.run("CREATE EXTENSION IF NOT EXISTS vector")
65
+ ```
66
+
67
+ Create a table
68
+
69
+ ```ruby
70
+ DB.create_table :items do
71
+ primary_key :id
72
+ column :embedding, "vector(3)"
73
+ end
74
+ ```
75
+
76
+ Add the plugin to your model
77
+
78
+ ```ruby
79
+ class Item < Sequel::Model
80
+ plugin :pgvector, :embedding
81
+ end
82
+ ```
83
+
84
+ Insert a vector
85
+
86
+ ```ruby
87
+ Item.create(embedding: [1, 1, 1])
88
+ ```
89
+
90
+ Get the nearest neighbors to a record
91
+
92
+ ```ruby
93
+ item.nearest_neighbors(:embedding, distance: "euclidean").limit(5)
94
+ ```
95
+
96
+ Also supports `inner_product` and `cosine` distance
97
+
98
+ Get the nearest neighbors to a vector
99
+
100
+ ```ruby
101
+ Item.nearest_neighbors(:embedding, [1, 1, 1], distance: "euclidean").limit(5)
102
+ ```
103
+
46
104
  ## History
47
105
 
48
106
  View the [changelog](https://github.com/pgvector/pgvector-ruby/blob/master/CHANGELOG.md)
data/lib/pgvector/pg.rb CHANGED
@@ -20,7 +20,7 @@ module Pgvector
20
20
  module TextDecoder
21
21
  class Vector < ::PG::SimpleDecoder
22
22
  def decode(string, tuple = nil, field = nil)
23
- string[1..-2].split(",").map(&:to_f)
23
+ Pgvector.decode(string)
24
24
  end
25
25
  end
26
26
  end
@@ -1,3 +1,3 @@
1
1
  module Pgvector
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.2"
3
3
  end
data/lib/pgvector.rb CHANGED
@@ -1,5 +1,14 @@
1
- require "pgvector/version"
1
+ # modules
2
+ require_relative "pgvector/version"
2
3
 
3
4
  module Pgvector
4
5
  autoload :PG, "pgvector/pg"
6
+
7
+ def self.encode(data)
8
+ "[#{data.to_a.map(&:to_f).join(",")}]"
9
+ end
10
+
11
+ def self.decode(string)
12
+ string[1..-2].split(",").map(&:to_f)
13
+ end
5
14
  end
@@ -0,0 +1,81 @@
1
+ module Sequel
2
+ module Plugins
3
+ module Pgvector
4
+ def self.configure(model, *columns)
5
+ model.vector_columns ||= {}
6
+ columns.each do |column|
7
+ model.vector_columns[column.to_sym] = {}
8
+ end
9
+ end
10
+
11
+ module DatasetMethods
12
+ def nearest_neighbors(column, value, distance:)
13
+ value = ::Pgvector.encode(value) unless value.is_a?(String)
14
+ quoted_column = quote_identifier(column)
15
+ distance = distance.to_s
16
+
17
+ operator =
18
+ case distance
19
+ when "inner_product"
20
+ "<#>"
21
+ when "cosine"
22
+ "<=>"
23
+ when "euclidean"
24
+ "<->"
25
+ end
26
+
27
+ raise ArgumentError, "Invalid distance: #{distance}" unless operator
28
+
29
+ order = "#{quoted_column} #{operator} ?"
30
+
31
+ neighbor_distance =
32
+ if distance == "inner_product"
33
+ "(#{order}) * -1"
34
+ else
35
+ order
36
+ end
37
+
38
+ select_append(Sequel.lit("#{neighbor_distance} AS neighbor_distance", value))
39
+ .exclude(column => nil)
40
+ .order(Sequel.lit(order, value))
41
+ end
42
+ end
43
+
44
+ module ClassMethods
45
+ attr_accessor :vector_columns
46
+
47
+ Sequel::Plugins.def_dataset_methods(self, :nearest_neighbors)
48
+
49
+ Plugins.inherited_instance_variables(self, :@vector_columns => :dup)
50
+ end
51
+
52
+ module InstanceMethods
53
+ def nearest_neighbors(column, **options)
54
+ column = column.to_sym
55
+ # important! check if neighbor attribute before calling send
56
+ raise ArgumentError, "Invalid column" unless self.class.vector_columns[column]
57
+
58
+ self.class
59
+ .nearest_neighbors(column, self[column], **options)
60
+ .exclude(primary_key => self[primary_key])
61
+ end
62
+
63
+ def []=(k, v)
64
+ if self.class.vector_columns.key?(k.to_sym) && !v.is_a?(String)
65
+ super(k, ::Pgvector.encode(v))
66
+ else
67
+ super
68
+ end
69
+ end
70
+
71
+ def [](k)
72
+ if self.class.vector_columns.key?(k.to_sym)
73
+ ::Pgvector.decode(super)
74
+ else
75
+ super
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
81
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgvector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-05-12 00:00:00.000000000 Z
11
+ date: 2023-10-04 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -22,6 +22,7 @@ files:
22
22
  - lib/pgvector.rb
23
23
  - lib/pgvector/pg.rb
24
24
  - lib/pgvector/version.rb
25
+ - lib/sequel/plugins/pgvector.rb
25
26
  homepage: https://github.com/pgvector/pgvector-ruby
26
27
  licenses:
27
28
  - MIT