pgvector 0.1.1 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/LICENSE.txt +1 -1
- data/README.md +50 -4
- data/lib/pgvector/pg.rb +1 -1
- data/lib/pgvector/version.rb +1 -1
- data/lib/pgvector.rb +10 -1
- data/lib/sequel/plugins/pgvector.rb +77 -0
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 215f975cafa2f782f3777ec65dd2fea587b420286f7f5ccf63b6b68376756dfd
|
4
|
+
data.tar.gz: e1979a1aa4fd4157cb04ae73c0c62b378046ca97154a25d4355e827be5abc53b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1e54d7c41f8750b99262021e402f329252b428678ee99510dd52a1a113aeffdf35628a23b6ca9408ce55350a237cb2d224e66dfe7aafefd645612a29b74529ff
|
7
|
+
data.tar.gz: c8a014bb3708690cf3e463954aa7a846ce79075da039e23b3f94644e50e3c28d05376539699fd8ffe3c3b93d54eecb393fed472bddb60619ba897a5204bc511a
|
data/CHANGELOG.md
CHANGED
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[pgvector](https://github.com/pgvector/pgvector) support for Ruby
|
4
4
|
|
5
|
-
Supports
|
5
|
+
Supports [pg](https://github.com/ged/ruby-pg) and [Sequel](https://github.com/jeremyevans/sequel)
|
6
6
|
|
7
7
|
For Rails, check out [Neighbor](https://github.com/ankane/neighbor)
|
8
8
|
|
@@ -19,6 +19,13 @@ gem "pgvector"
|
|
19
19
|
And follow the instructions for your database library:
|
20
20
|
|
21
21
|
- [pg](#pg)
|
22
|
+
- [Sequel](#sequel)
|
23
|
+
|
24
|
+
Or check out some examples:
|
25
|
+
|
26
|
+
- [Embeddings](examples/openai_embeddings.rb) with OpenAI
|
27
|
+
- [User-based recommendations](examples/disco_user_recs.rb) with Disco
|
28
|
+
- [Item-based recommendations](examples/disco_item_recs.rb) with Disco
|
22
29
|
|
23
30
|
## pg
|
24
31
|
|
@@ -33,14 +40,53 @@ conn.type_map_for_results = PG::BasicTypeMapForResults.new(conn, registry: regis
|
|
33
40
|
Insert a vector
|
34
41
|
|
35
42
|
```ruby
|
36
|
-
|
37
|
-
conn.exec_params("INSERT INTO items (
|
43
|
+
embedding = [1, 2, 3]
|
44
|
+
conn.exec_params("INSERT INTO items (embedding) VALUES ($1)", [embedding])
|
45
|
+
```
|
46
|
+
|
47
|
+
Get the nearest neighbors to a vector
|
48
|
+
|
49
|
+
```ruby
|
50
|
+
conn.exec_params("SELECT * FROM items ORDER BY embedding <-> $1 LIMIT 5", [embedding]).to_a
|
51
|
+
```
|
52
|
+
|
53
|
+
## Sequel
|
54
|
+
|
55
|
+
Create a table
|
56
|
+
|
57
|
+
```ruby
|
58
|
+
DB.create_table :items do
|
59
|
+
primary_key :id
|
60
|
+
column :embedding, "vector(3)"
|
61
|
+
end
|
62
|
+
```
|
63
|
+
|
64
|
+
Add the plugin to your model
|
65
|
+
|
66
|
+
```ruby
|
67
|
+
class Item < Sequel::Model
|
68
|
+
plugin :pgvector, :embedding
|
69
|
+
end
|
70
|
+
```
|
71
|
+
|
72
|
+
Insert a vector
|
73
|
+
|
74
|
+
```ruby
|
75
|
+
Item.create(embedding: [1, 1, 1])
|
76
|
+
```
|
77
|
+
|
78
|
+
Get the nearest neighbors to a record
|
79
|
+
|
80
|
+
```ruby
|
81
|
+
item.nearest_neighbors(:embedding, distance: "euclidean").limit(5)
|
38
82
|
```
|
39
83
|
|
84
|
+
Also supports `inner_product` and `cosine` distance
|
85
|
+
|
40
86
|
Get the nearest neighbors to a vector
|
41
87
|
|
42
88
|
```ruby
|
43
|
-
|
89
|
+
Item.nearest_neighbors(:embedding, [1, 1, 1], distance: "euclidean").limit(5)
|
44
90
|
```
|
45
91
|
|
46
92
|
## History
|
data/lib/pgvector/pg.rb
CHANGED
data/lib/pgvector/version.rb
CHANGED
data/lib/pgvector.rb
CHANGED
@@ -1,5 +1,14 @@
|
|
1
|
-
|
1
|
+
# modules
|
2
|
+
require_relative "pgvector/version"
|
2
3
|
|
3
4
|
module Pgvector
|
4
5
|
autoload :PG, "pgvector/pg"
|
6
|
+
|
7
|
+
def self.encode(data)
|
8
|
+
"[#{data.to_a.map(&:to_f).join(",")}]"
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.decode(string)
|
12
|
+
string[1..-2].split(",").map(&:to_f)
|
13
|
+
end
|
5
14
|
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Sequel
|
2
|
+
module Plugins
|
3
|
+
module Pgvector
|
4
|
+
def self.configure(model, *columns)
|
5
|
+
model.vector_columns ||= {}
|
6
|
+
columns.each do |column|
|
7
|
+
model.vector_columns[column.to_sym] = {}
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module ClassMethods
|
12
|
+
attr_accessor :vector_columns
|
13
|
+
|
14
|
+
def nearest_neighbors(column, value, distance:)
|
15
|
+
value = ::Pgvector.encode(value) unless value.is_a?(String)
|
16
|
+
quoted_column = dataset.quote_identifier(column)
|
17
|
+
distance = distance.to_s
|
18
|
+
|
19
|
+
operator =
|
20
|
+
case distance
|
21
|
+
when "inner_product"
|
22
|
+
"<#>"
|
23
|
+
when "cosine"
|
24
|
+
"<=>"
|
25
|
+
when "euclidean"
|
26
|
+
"<->"
|
27
|
+
end
|
28
|
+
|
29
|
+
raise ArgumentError, "Invalid distance: #{distance}" unless operator
|
30
|
+
|
31
|
+
order = "#{quoted_column} #{operator} ?"
|
32
|
+
|
33
|
+
neighbor_distance =
|
34
|
+
if distance == "inner_product"
|
35
|
+
"(#{order}) * -1"
|
36
|
+
else
|
37
|
+
order
|
38
|
+
end
|
39
|
+
|
40
|
+
select_append(Sequel.lit("#{neighbor_distance} AS neighbor_distance", value))
|
41
|
+
.exclude(column => nil)
|
42
|
+
.order(Sequel.lit(order, value))
|
43
|
+
end
|
44
|
+
|
45
|
+
Plugins.inherited_instance_variables(self, :@vector_columns => :dup)
|
46
|
+
end
|
47
|
+
|
48
|
+
module InstanceMethods
|
49
|
+
def nearest_neighbors(column, **options)
|
50
|
+
column = column.to_sym
|
51
|
+
# important! check if neighbor attribute before calling send
|
52
|
+
raise ArgumentError, "Invalid column" unless self.class.vector_columns[column]
|
53
|
+
|
54
|
+
self.class
|
55
|
+
.nearest_neighbors(column, self[column], **options)
|
56
|
+
.exclude(primary_key => self[primary_key])
|
57
|
+
end
|
58
|
+
|
59
|
+
def []=(k, v)
|
60
|
+
if self.class.vector_columns.key?(k.to_sym) && !v.is_a?(String)
|
61
|
+
super(k, ::Pgvector.encode(v))
|
62
|
+
else
|
63
|
+
super
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def [](k)
|
68
|
+
if self.class.vector_columns.key?(k.to_sym)
|
69
|
+
::Pgvector.decode(super)
|
70
|
+
else
|
71
|
+
super
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pgvector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Andrew Kane
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-06-05 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email: andrew@ankane.org
|
@@ -22,6 +22,7 @@ files:
|
|
22
22
|
- lib/pgvector.rb
|
23
23
|
- lib/pgvector/pg.rb
|
24
24
|
- lib/pgvector/version.rb
|
25
|
+
- lib/sequel/plugins/pgvector.rb
|
25
26
|
homepage: https://github.com/pgvector/pgvector-ruby
|
26
27
|
licenses:
|
27
28
|
- MIT
|
@@ -34,14 +35,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
34
35
|
requirements:
|
35
36
|
- - ">="
|
36
37
|
- !ruby/object:Gem::Version
|
37
|
-
version: '
|
38
|
+
version: '3'
|
38
39
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
39
40
|
requirements:
|
40
41
|
- - ">="
|
41
42
|
- !ruby/object:Gem::Version
|
42
43
|
version: '0'
|
43
44
|
requirements: []
|
44
|
-
rubygems_version: 3.
|
45
|
+
rubygems_version: 3.4.10
|
45
46
|
signing_key:
|
46
47
|
specification_version: 4
|
47
48
|
summary: pgvector support for Ruby
|