pgvector 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +59 -1
- data/lib/pgvector/pg.rb +1 -1
- data/lib/pgvector/version.rb +1 -1
- data/lib/pgvector.rb +10 -1
- data/lib/sequel/plugins/pgvector.rb +81 -0
- metadata +3 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7485ea4be0d5be0177a972db911c696daf3438a661ddac61b08f4e8b2da3ac51
|
|
4
|
+
data.tar.gz: 2532ef79f5db88aecb681d9455e38f3e5fc1d30bde015d0a1e9daaa9fe82635e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: be40e4c3e16dd904a200115794a8ffaa850b40c5055330bd873ec7a707164a53b29d22040defbb4dd8a9cff597d4e1ad5c659d37a580ccc201ce30a9eb17fef9
|
|
7
|
+
data.tar.gz: f5d36289b043d987920911ab08d85d7d1066039f84dcc2a24436701c06b246adcb8fd3c32e3f76f3e1604001403574c14818f91a64357ce4ebd675458e57184c
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
[pgvector](https://github.com/pgvector/pgvector) support for Ruby
|
|
4
4
|
|
|
5
|
-
Supports
|
|
5
|
+
Supports [pg](https://github.com/ged/ruby-pg) and [Sequel](https://github.com/jeremyevans/sequel)
|
|
6
6
|
|
|
7
7
|
For Rails, check out [Neighbor](https://github.com/ankane/neighbor)
|
|
8
8
|
|
|
@@ -19,9 +19,22 @@ gem "pgvector"
|
|
|
19
19
|
And follow the instructions for your database library:
|
|
20
20
|
|
|
21
21
|
- [pg](#pg)
|
|
22
|
+
- [Sequel](#sequel)
|
|
23
|
+
|
|
24
|
+
Or check out some examples:
|
|
25
|
+
|
|
26
|
+
- [Embeddings](examples/openai_embeddings.rb) with OpenAI
|
|
27
|
+
- [User-based recommendations](examples/disco_user_recs.rb) with Disco
|
|
28
|
+
- [Item-based recommendations](examples/disco_item_recs.rb) with Disco
|
|
22
29
|
|
|
23
30
|
## pg
|
|
24
31
|
|
|
32
|
+
Enable the extension
|
|
33
|
+
|
|
34
|
+
```ruby
|
|
35
|
+
conn.exec("CREATE EXTENSION IF NOT EXISTS vector")
|
|
36
|
+
```
|
|
37
|
+
|
|
25
38
|
Register the vector type with your connection
|
|
26
39
|
|
|
27
40
|
```ruby
|
|
@@ -43,6 +56,51 @@ Get the nearest neighbors to a vector
|
|
|
43
56
|
conn.exec_params("SELECT * FROM items ORDER BY embedding <-> $1 LIMIT 5", [embedding]).to_a
|
|
44
57
|
```
|
|
45
58
|
|
|
59
|
+
## Sequel
|
|
60
|
+
|
|
61
|
+
Enable the extension
|
|
62
|
+
|
|
63
|
+
```ruby
|
|
64
|
+
DB.run("CREATE EXTENSION IF NOT EXISTS vector")
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
Create a table
|
|
68
|
+
|
|
69
|
+
```ruby
|
|
70
|
+
DB.create_table :items do
|
|
71
|
+
primary_key :id
|
|
72
|
+
column :embedding, "vector(3)"
|
|
73
|
+
end
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
Add the plugin to your model
|
|
77
|
+
|
|
78
|
+
```ruby
|
|
79
|
+
class Item < Sequel::Model
|
|
80
|
+
plugin :pgvector, :embedding
|
|
81
|
+
end
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Insert a vector
|
|
85
|
+
|
|
86
|
+
```ruby
|
|
87
|
+
Item.create(embedding: [1, 1, 1])
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
Get the nearest neighbors to a record
|
|
91
|
+
|
|
92
|
+
```ruby
|
|
93
|
+
item.nearest_neighbors(:embedding, distance: "euclidean").limit(5)
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Also supports `inner_product` and `cosine` distance
|
|
97
|
+
|
|
98
|
+
Get the nearest neighbors to a vector
|
|
99
|
+
|
|
100
|
+
```ruby
|
|
101
|
+
Item.nearest_neighbors(:embedding, [1, 1, 1], distance: "euclidean").limit(5)
|
|
102
|
+
```
|
|
103
|
+
|
|
46
104
|
## History
|
|
47
105
|
|
|
48
106
|
View the [changelog](https://github.com/pgvector/pgvector-ruby/blob/master/CHANGELOG.md)
|
data/lib/pgvector/pg.rb
CHANGED
data/lib/pgvector/version.rb
CHANGED
data/lib/pgvector.rb
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
|
-
|
|
1
|
+
# modules
|
|
2
|
+
require_relative "pgvector/version"
|
|
2
3
|
|
|
3
4
|
module Pgvector
|
|
4
5
|
autoload :PG, "pgvector/pg"
|
|
6
|
+
|
|
7
|
+
def self.encode(data)
|
|
8
|
+
"[#{data.to_a.map(&:to_f).join(",")}]"
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def self.decode(string)
|
|
12
|
+
string[1..-2].split(",").map(&:to_f)
|
|
13
|
+
end
|
|
5
14
|
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
module Sequel
|
|
2
|
+
module Plugins
|
|
3
|
+
module Pgvector
|
|
4
|
+
def self.configure(model, *columns)
|
|
5
|
+
model.vector_columns ||= {}
|
|
6
|
+
columns.each do |column|
|
|
7
|
+
model.vector_columns[column.to_sym] = {}
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
module DatasetMethods
|
|
12
|
+
def nearest_neighbors(column, value, distance:)
|
|
13
|
+
value = ::Pgvector.encode(value) unless value.is_a?(String)
|
|
14
|
+
quoted_column = quote_identifier(column)
|
|
15
|
+
distance = distance.to_s
|
|
16
|
+
|
|
17
|
+
operator =
|
|
18
|
+
case distance
|
|
19
|
+
when "inner_product"
|
|
20
|
+
"<#>"
|
|
21
|
+
when "cosine"
|
|
22
|
+
"<=>"
|
|
23
|
+
when "euclidean"
|
|
24
|
+
"<->"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
raise ArgumentError, "Invalid distance: #{distance}" unless operator
|
|
28
|
+
|
|
29
|
+
order = "#{quoted_column} #{operator} ?"
|
|
30
|
+
|
|
31
|
+
neighbor_distance =
|
|
32
|
+
if distance == "inner_product"
|
|
33
|
+
"(#{order}) * -1"
|
|
34
|
+
else
|
|
35
|
+
order
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
select_append(Sequel.lit("#{neighbor_distance} AS neighbor_distance", value))
|
|
39
|
+
.exclude(column => nil)
|
|
40
|
+
.order(Sequel.lit(order, value))
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
module ClassMethods
|
|
45
|
+
attr_accessor :vector_columns
|
|
46
|
+
|
|
47
|
+
Sequel::Plugins.def_dataset_methods(self, :nearest_neighbors)
|
|
48
|
+
|
|
49
|
+
Plugins.inherited_instance_variables(self, :@vector_columns => :dup)
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
module InstanceMethods
|
|
53
|
+
def nearest_neighbors(column, **options)
|
|
54
|
+
column = column.to_sym
|
|
55
|
+
# important! check if neighbor attribute before calling send
|
|
56
|
+
raise ArgumentError, "Invalid column" unless self.class.vector_columns[column]
|
|
57
|
+
|
|
58
|
+
self.class
|
|
59
|
+
.nearest_neighbors(column, self[column], **options)
|
|
60
|
+
.exclude(primary_key => self[primary_key])
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def []=(k, v)
|
|
64
|
+
if self.class.vector_columns.key?(k.to_sym) && !v.is_a?(String)
|
|
65
|
+
super(k, ::Pgvector.encode(v))
|
|
66
|
+
else
|
|
67
|
+
super
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def [](k)
|
|
72
|
+
if self.class.vector_columns.key?(k.to_sym)
|
|
73
|
+
::Pgvector.decode(super)
|
|
74
|
+
else
|
|
75
|
+
super
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: pgvector
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.2.
|
|
4
|
+
version: 0.2.2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Andrew Kane
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-
|
|
11
|
+
date: 2023-10-04 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description:
|
|
14
14
|
email: andrew@ankane.org
|
|
@@ -22,6 +22,7 @@ files:
|
|
|
22
22
|
- lib/pgvector.rb
|
|
23
23
|
- lib/pgvector/pg.rb
|
|
24
24
|
- lib/pgvector/version.rb
|
|
25
|
+
- lib/sequel/plugins/pgvector.rb
|
|
25
26
|
homepage: https://github.com/pgvector/pgvector-ruby
|
|
26
27
|
licenses:
|
|
27
28
|
- MIT
|