pgvector 0.1.1 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4e1249b9ced41c051b06654cea7539a58e2816d794461788bec605eab464cca8
4
- data.tar.gz: fc65c3c883a39cd1b63c804eaae65897c2a332836b5d91ba5cf5248dc660fb8f
3
+ metadata.gz: 215f975cafa2f782f3777ec65dd2fea587b420286f7f5ccf63b6b68376756dfd
4
+ data.tar.gz: e1979a1aa4fd4157cb04ae73c0c62b378046ca97154a25d4355e827be5abc53b
5
5
  SHA512:
6
- metadata.gz: b37957502d147a221b3678c373b648090de7f9101fa1199e1721ffc1ac0e04d99318935866f12295a160c2f1d1d7648e3103355865eb1625b11619b9e52981d6
7
- data.tar.gz: abdb670ea89993a92982c3913b192c6fed4ac6329beb2c0f1abb5aef39ef47993198ecdcdd5431ea057b84aa3931a532a96f5e85d0ab8a6c4faec48a98fc74de
6
+ metadata.gz: 1e54d7c41f8750b99262021e402f329252b428678ee99510dd52a1a113aeffdf35628a23b6ca9408ce55350a237cb2d224e66dfe7aafefd645612a29b74529ff
7
+ data.tar.gz: c8a014bb3708690cf3e463954aa7a846ce79075da039e23b3f94644e50e3c28d05376539699fd8ffe3c3b93d54eecb393fed472bddb60619ba897a5204bc511a
data/CHANGELOG.md CHANGED
@@ -1,3 +1,11 @@
1
+ ## 0.2.1 (2023-06-04)
2
+
3
+ - Added support for Sequel
4
+
5
+ ## 0.2.0 (2023-05-11)
6
+
7
+ - Dropped support for Ruby < 3
8
+
1
9
  ## 0.1.1 (2022-02-08)
2
10
 
3
11
  - Added autoloading
data/LICENSE.txt CHANGED
@@ -1,6 +1,6 @@
1
1
  The MIT License (MIT)
2
2
 
3
- Copyright (c) 2022 Andrew Kane
3
+ Copyright (c) 2022-2023 Andrew Kane
4
4
 
5
5
  Permission is hereby granted, free of charge, to any person obtaining a copy
6
6
  of this software and associated documentation files (the "Software"), to deal
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [pgvector](https://github.com/pgvector/pgvector) support for Ruby
4
4
 
5
- Supports the [pg](https://github.com/ged/ruby-pg) gem
5
+ Supports [pg](https://github.com/ged/ruby-pg) and [Sequel](https://github.com/jeremyevans/sequel)
6
6
 
7
7
  For Rails, check out [Neighbor](https://github.com/ankane/neighbor)
8
8
 
@@ -19,6 +19,13 @@ gem "pgvector"
19
19
  And follow the instructions for your database library:
20
20
 
21
21
  - [pg](#pg)
22
+ - [Sequel](#sequel)
23
+
24
+ Or check out some examples:
25
+
26
+ - [Embeddings](examples/openai_embeddings.rb) with OpenAI
27
+ - [User-based recommendations](examples/disco_user_recs.rb) with Disco
28
+ - [Item-based recommendations](examples/disco_item_recs.rb) with Disco
22
29
 
23
30
  ## pg
24
31
 
@@ -33,14 +40,53 @@ conn.type_map_for_results = PG::BasicTypeMapForResults.new(conn, registry: regis
33
40
  Insert a vector
34
41
 
35
42
  ```ruby
36
- factors = [1, 2, 3]
37
- conn.exec_params("INSERT INTO items (factors) VALUES ($1)", [factors])
43
+ embedding = [1, 2, 3]
44
+ conn.exec_params("INSERT INTO items (embedding) VALUES ($1)", [embedding])
45
+ ```
46
+
47
+ Get the nearest neighbors to a vector
48
+
49
+ ```ruby
50
+ conn.exec_params("SELECT * FROM items ORDER BY embedding <-> $1 LIMIT 5", [embedding]).to_a
51
+ ```
52
+
53
+ ## Sequel
54
+
55
+ Create a table
56
+
57
+ ```ruby
58
+ DB.create_table :items do
59
+ primary_key :id
60
+ column :embedding, "vector(3)"
61
+ end
62
+ ```
63
+
64
+ Add the plugin to your model
65
+
66
+ ```ruby
67
+ class Item < Sequel::Model
68
+ plugin :pgvector, :embedding
69
+ end
70
+ ```
71
+
72
+ Insert a vector
73
+
74
+ ```ruby
75
+ Item.create(embedding: [1, 1, 1])
76
+ ```
77
+
78
+ Get the nearest neighbors to a record
79
+
80
+ ```ruby
81
+ item.nearest_neighbors(:embedding, distance: "euclidean").limit(5)
38
82
  ```
39
83
 
84
+ Also supports `inner_product` and `cosine` distance
85
+
40
86
  Get the nearest neighbors to a vector
41
87
 
42
88
  ```ruby
43
- conn.exec_params("SELECT * FROM items ORDER BY factors <-> $1 LIMIT 5", [factors]).to_a
89
+ Item.nearest_neighbors(:embedding, [1, 1, 1], distance: "euclidean").limit(5)
44
90
  ```
45
91
 
46
92
  ## History
data/lib/pgvector/pg.rb CHANGED
@@ -20,7 +20,7 @@ module Pgvector
20
20
  module TextDecoder
21
21
  class Vector < ::PG::SimpleDecoder
22
22
  def decode(string, tuple = nil, field = nil)
23
- string[1..-2].split(",").map(&:to_f)
23
+ Pgvector.decode(string)
24
24
  end
25
25
  end
26
26
  end
@@ -1,3 +1,3 @@
1
1
  module Pgvector
2
- VERSION = "0.1.1"
2
+ VERSION = "0.2.1"
3
3
  end
data/lib/pgvector.rb CHANGED
@@ -1,5 +1,14 @@
1
- require "pgvector/version"
1
+ # modules
2
+ require_relative "pgvector/version"
2
3
 
3
4
  module Pgvector
4
5
  autoload :PG, "pgvector/pg"
6
+
7
+ def self.encode(data)
8
+ "[#{data.to_a.map(&:to_f).join(",")}]"
9
+ end
10
+
11
+ def self.decode(string)
12
+ string[1..-2].split(",").map(&:to_f)
13
+ end
5
14
  end
@@ -0,0 +1,77 @@
1
+ module Sequel
2
+ module Plugins
3
+ module Pgvector
4
+ def self.configure(model, *columns)
5
+ model.vector_columns ||= {}
6
+ columns.each do |column|
7
+ model.vector_columns[column.to_sym] = {}
8
+ end
9
+ end
10
+
11
+ module ClassMethods
12
+ attr_accessor :vector_columns
13
+
14
+ def nearest_neighbors(column, value, distance:)
15
+ value = ::Pgvector.encode(value) unless value.is_a?(String)
16
+ quoted_column = dataset.quote_identifier(column)
17
+ distance = distance.to_s
18
+
19
+ operator =
20
+ case distance
21
+ when "inner_product"
22
+ "<#>"
23
+ when "cosine"
24
+ "<=>"
25
+ when "euclidean"
26
+ "<->"
27
+ end
28
+
29
+ raise ArgumentError, "Invalid distance: #{distance}" unless operator
30
+
31
+ order = "#{quoted_column} #{operator} ?"
32
+
33
+ neighbor_distance =
34
+ if distance == "inner_product"
35
+ "(#{order}) * -1"
36
+ else
37
+ order
38
+ end
39
+
40
+ select_append(Sequel.lit("#{neighbor_distance} AS neighbor_distance", value))
41
+ .exclude(column => nil)
42
+ .order(Sequel.lit(order, value))
43
+ end
44
+
45
+ Plugins.inherited_instance_variables(self, :@vector_columns => :dup)
46
+ end
47
+
48
+ module InstanceMethods
49
+ def nearest_neighbors(column, **options)
50
+ column = column.to_sym
51
+ # important! check if neighbor attribute before calling send
52
+ raise ArgumentError, "Invalid column" unless self.class.vector_columns[column]
53
+
54
+ self.class
55
+ .nearest_neighbors(column, self[column], **options)
56
+ .exclude(primary_key => self[primary_key])
57
+ end
58
+
59
+ def []=(k, v)
60
+ if self.class.vector_columns.key?(k.to_sym) && !v.is_a?(String)
61
+ super(k, ::Pgvector.encode(v))
62
+ else
63
+ super
64
+ end
65
+ end
66
+
67
+ def [](k)
68
+ if self.class.vector_columns.key?(k.to_sym)
69
+ ::Pgvector.decode(super)
70
+ else
71
+ super
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pgvector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Andrew Kane
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-02-08 00:00:00.000000000 Z
11
+ date: 2023-06-05 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: andrew@ankane.org
@@ -22,6 +22,7 @@ files:
22
22
  - lib/pgvector.rb
23
23
  - lib/pgvector/pg.rb
24
24
  - lib/pgvector/version.rb
25
+ - lib/sequel/plugins/pgvector.rb
25
26
  homepage: https://github.com/pgvector/pgvector-ruby
26
27
  licenses:
27
28
  - MIT
@@ -34,14 +35,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
34
35
  requirements:
35
36
  - - ">="
36
37
  - !ruby/object:Gem::Version
37
- version: '2.6'
38
+ version: '3'
38
39
  required_rubygems_version: !ruby/object:Gem::Requirement
39
40
  requirements:
40
41
  - - ">="
41
42
  - !ruby/object:Gem::Version
42
43
  version: '0'
43
44
  requirements: []
44
- rubygems_version: 3.3.3
45
+ rubygems_version: 3.4.10
45
46
  signing_key:
46
47
  specification_version: 4
47
48
  summary: pgvector support for Ruby