neighbor 0.2.3 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/README.md +19 -11
- data/lib/neighbor/model.rb +32 -10
- data/lib/neighbor/type/cube.rb +38 -0
- data/lib/neighbor/type/vector.rb +14 -0
- data/lib/neighbor/version.rb +1 -1
- data/lib/neighbor.rb +9 -16
- metadata +7 -5
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 0c8b5d19222742f33f51f2c30f9d03108ebd3ed99908a7e9dd5f4e49caa2e225
         | 
| 4 | 
            +
              data.tar.gz: c9cfa942f2cdd8b9757c9ecfe5e89d0aced11263f8a559004ee15fa0c8adb3f4
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: e9e0050031ce7691baa9242b3b6b5aa76afb1fe7c63575129e68b2f5c027143b3c08f68a7babfcf2a9b02f1d9327679f75e9c40b95ac2245ea7c8dd3025d3cdb
         | 
| 7 | 
            +
              data.tar.gz: a9c505740cba454437617733d4025360848a16ef9a4c9c83fc16d5bc82a3e5521c77e3cba874ef3cf318cf3a1e319567958a6156481f7fd82ef72ebaa87d97eb
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    | @@ -1,3 +1,17 @@ | |
| 1 | 
            +
            ## 0.3.2 (2023-12-12)
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            - Added deprecation warning for `has_neighbors` without an attribute name
         | 
| 4 | 
            +
            - Added deprecation warning for `nearest_neighbors` without an attribute name
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            ## 0.3.1 (2023-09-25)
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            - Added support for passing multiple attributes to `has_neighbors`
         | 
| 9 | 
            +
            - Fixed error with `nearest_neighbors` scope with Ruby 3.2 and Active Record 6.1
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            ## 0.3.0 (2023-07-24)
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            - Dropped support for Ruby < 3 and Active Record < 6.1
         | 
| 14 | 
            +
             | 
| 1 15 | 
             
            ## 0.2.3 (2023-04-02)
         | 
| 2 16 |  | 
| 3 17 | 
             
            - Added support for dimensions to model generator
         | 
    
        data/README.md
    CHANGED
    
    | @@ -14,7 +14,7 @@ gem "neighbor" | |
| 14 14 |  | 
| 15 15 | 
             
            ## Choose An Extension
         | 
| 16 16 |  | 
| 17 | 
            -
            Neighbor supports two extensions: [cube](https://www.postgresql.org/docs/current/cube.html) and [vector](https://github.com/pgvector/pgvector). cube ships with Postgres, while vector supports approximate nearest neighbor search.
         | 
| 17 | 
            +
            Neighbor supports two extensions: [cube](https://www.postgresql.org/docs/current/cube.html) and [vector](https://github.com/pgvector/pgvector). cube ships with Postgres, while vector supports more dimensions and approximate nearest neighbor search.
         | 
| 18 18 |  | 
| 19 19 | 
             
            For cube, run:
         | 
| 20 20 |  | 
| @@ -35,7 +35,7 @@ rails db:migrate | |
| 35 35 | 
             
            Create a migration
         | 
| 36 36 |  | 
| 37 37 | 
             
            ```ruby
         | 
| 38 | 
            -
            class  | 
| 38 | 
            +
            class AddEmbeddingToItems < ActiveRecord::Migration[7.1]
         | 
| 39 39 | 
             
              def change
         | 
| 40 40 | 
             
                add_column :items, :embedding, :cube
         | 
| 41 41 | 
             
                # or
         | 
| @@ -114,21 +114,29 @@ end | |
| 114 114 | 
             
            For vector, add an approximate index to speed up queries. Create a migration with:
         | 
| 115 115 |  | 
| 116 116 | 
             
            ```ruby
         | 
| 117 | 
            -
            class  | 
| 117 | 
            +
            class AddIndexToItemsEmbedding < ActiveRecord::Migration[7.1]
         | 
| 118 118 | 
             
              def change
         | 
| 119 119 | 
             
                add_index :items, :embedding, using: :ivfflat, opclass: :vector_l2_ops
         | 
| 120 | 
            +
                # or with pgvector 0.5.0+
         | 
| 121 | 
            +
                add_index :items, :embedding, using: :hnsw, opclass: :vector_l2_ops
         | 
| 120 122 | 
             
              end
         | 
| 121 123 | 
             
            end
         | 
| 122 124 | 
             
            ```
         | 
| 123 125 |  | 
| 124 126 | 
             
            Use `:vector_cosine_ops` for cosine distance and `:vector_ip_ops` for inner product.
         | 
| 125 127 |  | 
| 126 | 
            -
            Set the number of probes
         | 
| 128 | 
            +
            Set the number of probes with IVFFlat
         | 
| 127 129 |  | 
| 128 130 | 
             
            ```ruby
         | 
| 129 131 | 
             
            Item.connection.execute("SET ivfflat.probes = 3")
         | 
| 130 132 | 
             
            ```
         | 
| 131 133 |  | 
| 134 | 
            +
            Or the size of the dynamic candidate list with HNSW
         | 
| 135 | 
            +
             | 
| 136 | 
            +
            ```ruby
         | 
| 137 | 
            +
            Item.connection.execute("SET hnsw.ef_search = 100")
         | 
| 138 | 
            +
            ```
         | 
| 139 | 
            +
             | 
| 132 140 | 
             
            ## Examples
         | 
| 133 141 |  | 
| 134 142 | 
             
            - [OpenAI Embeddings](#openai-embeddings)
         | 
| @@ -139,14 +147,14 @@ Item.connection.execute("SET ivfflat.probes = 3") | |
| 139 147 | 
             
            Generate a model
         | 
| 140 148 |  | 
| 141 149 | 
             
            ```sh
         | 
| 142 | 
            -
            rails generate model  | 
| 150 | 
            +
            rails generate model Document content:text embedding:vector{1536}
         | 
| 143 151 | 
             
            rails db:migrate
         | 
| 144 152 | 
             
            ```
         | 
| 145 153 |  | 
| 146 154 | 
             
            And add `has_neighbors`
         | 
| 147 155 |  | 
| 148 156 | 
             
            ```ruby
         | 
| 149 | 
            -
            class  | 
| 157 | 
            +
            class Document < ApplicationRecord
         | 
| 150 158 | 
             
              has_neighbors :embedding
         | 
| 151 159 | 
             
            end
         | 
| 152 160 | 
             
            ```
         | 
| @@ -184,18 +192,18 @@ embeddings = fetch_embeddings(input) | |
| 184 192 | 
             
            Store the embeddings
         | 
| 185 193 |  | 
| 186 194 | 
             
            ```ruby
         | 
| 187 | 
            -
             | 
| 195 | 
            +
            documents = []
         | 
| 188 196 | 
             
            input.zip(embeddings) do |content, embedding|
         | 
| 189 | 
            -
               | 
| 197 | 
            +
              documents << {content: content, embedding: embedding}
         | 
| 190 198 | 
             
            end
         | 
| 191 | 
            -
             | 
| 199 | 
            +
            Document.insert_all!(documents)
         | 
| 192 200 | 
             
            ```
         | 
| 193 201 |  | 
| 194 202 | 
             
            And get similar articles
         | 
| 195 203 |  | 
| 196 204 | 
             
            ```ruby
         | 
| 197 | 
            -
             | 
| 198 | 
            -
             | 
| 205 | 
            +
            document = Document.first
         | 
| 206 | 
            +
            document.nearest_neighbors(:embedding, distance: "cosine").first(5).map(&:content)
         | 
| 199 207 | 
             
            ```
         | 
| 200 208 |  | 
| 201 209 | 
             
            See the [complete code](examples/openai_embeddings.rb)
         | 
    
        data/lib/neighbor/model.rb
    CHANGED
    
    | @@ -1,7 +1,12 @@ | |
| 1 1 | 
             
            module Neighbor
         | 
| 2 2 | 
             
              module Model
         | 
| 3 | 
            -
                def has_neighbors( | 
| 4 | 
            -
                   | 
| 3 | 
            +
                def has_neighbors(*attribute_names, dimensions: nil, normalize: nil)
         | 
| 4 | 
            +
                  if attribute_names.empty?
         | 
| 5 | 
            +
                    warn "[neighbor] has_neighbors without an attribute name is deprecated"
         | 
| 6 | 
            +
                    attribute_names << :neighbor_vector
         | 
| 7 | 
            +
                  else
         | 
| 8 | 
            +
                    attribute_names.map!(&:to_sym)
         | 
| 9 | 
            +
                  end
         | 
| 5 10 |  | 
| 6 11 | 
             
                  class_eval do
         | 
| 7 12 | 
             
                    @neighbor_attributes ||= {}
         | 
| @@ -19,15 +24,28 @@ module Neighbor | |
| 19 24 | 
             
                      end
         | 
| 20 25 | 
             
                    end
         | 
| 21 26 |  | 
| 22 | 
            -
                     | 
| 23 | 
            -
             | 
| 27 | 
            +
                    attribute_names.each do |attribute_name|
         | 
| 28 | 
            +
                      raise Error, "has_neighbors already called for #{attribute_name.inspect}" if neighbor_attributes[attribute_name]
         | 
| 29 | 
            +
                      @neighbor_attributes[attribute_name] = {dimensions: dimensions, normalize: normalize}
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                      attribute attribute_name, Neighbor::Vector.new(dimensions: dimensions, normalize: normalize, model: self, attribute_name: attribute_name)
         | 
| 32 | 
            +
                    end
         | 
| 24 33 |  | 
| 25 | 
            -
                     | 
| 34 | 
            +
                    return if @neighbor_attributes.size != attribute_names.size
         | 
| 26 35 |  | 
| 27 | 
            -
                     | 
| 36 | 
            +
                    scope :nearest_neighbors, ->(attribute_name, vector = nil, options = nil) {
         | 
| 37 | 
            +
                      # cannot use keyword arguments with scope with Ruby 3.2 and Active Record 6.1
         | 
| 38 | 
            +
                      # https://github.com/rails/rails/issues/46934
         | 
| 39 | 
            +
                      if options.nil? && vector.is_a?(Hash)
         | 
| 40 | 
            +
                        options = vector
         | 
| 41 | 
            +
                        vector = nil
         | 
| 42 | 
            +
                      end
         | 
| 43 | 
            +
                      raise ArgumentError, "missing keyword: :distance" unless options.is_a?(Hash) && options.key?(:distance)
         | 
| 44 | 
            +
                      distance = options.delete(:distance)
         | 
| 45 | 
            +
                      raise ArgumentError, "unknown keywords: #{options.keys.map(&:inspect).join(", ")}" if options.any?
         | 
| 28 46 |  | 
| 29 | 
            -
                    scope :nearest_neighbors, ->(attribute_name, vector = nil, distance:) {
         | 
| 30 47 | 
             
                      if vector.nil? && !attribute_name.nil? && attribute_name.respond_to?(:to_a)
         | 
| 48 | 
            +
                        warn "[neighbor] nearest_neighbors without an attribute name is deprecated"
         | 
| 31 49 | 
             
                        vector = attribute_name
         | 
| 32 50 | 
             
                        attribute_name = :neighbor_vector
         | 
| 33 51 | 
             
                      end
         | 
| @@ -107,14 +125,18 @@ module Neighbor | |
| 107 125 | 
             
                        .order(Arel.sql(order))
         | 
| 108 126 | 
             
                    }
         | 
| 109 127 |  | 
| 110 | 
            -
                    def nearest_neighbors(attribute_name =  | 
| 128 | 
            +
                    def nearest_neighbors(attribute_name = nil, **options)
         | 
| 129 | 
            +
                      if attribute_name.nil?
         | 
| 130 | 
            +
                        warn "[neighbor] nearest_neighbors without an attribute name is deprecated"
         | 
| 131 | 
            +
                        attribute_name = :neighbor_vector
         | 
| 132 | 
            +
                      end
         | 
| 111 133 | 
             
                      attribute_name = attribute_name.to_sym
         | 
| 112 134 | 
             
                      # important! check if neighbor attribute before calling send
         | 
| 113 135 | 
             
                      raise ArgumentError, "Invalid attribute" unless self.class.neighbor_attributes[attribute_name]
         | 
| 114 136 |  | 
| 115 137 | 
             
                      self.class
         | 
| 116 | 
            -
                        .where.not(self.class.primary_key =>  | 
| 117 | 
            -
                        .nearest_neighbors(attribute_name,  | 
| 138 | 
            +
                        .where.not(self.class.primary_key => self[self.class.primary_key])
         | 
| 139 | 
            +
                        .nearest_neighbors(attribute_name, self[attribute_name], **options)
         | 
| 118 140 | 
             
                    end
         | 
| 119 141 | 
             
                  end
         | 
| 120 142 | 
             
                end
         | 
| @@ -0,0 +1,38 @@ | |
| 1 | 
            +
            module Neighbor
         | 
| 2 | 
            +
              module Type
         | 
| 3 | 
            +
                class Cube < ActiveRecord::Type::String
         | 
| 4 | 
            +
                  def type
         | 
| 5 | 
            +
                    :cube
         | 
| 6 | 
            +
                  end
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                  def cast(value)
         | 
| 9 | 
            +
                    if value.is_a?(Array)
         | 
| 10 | 
            +
                      if value.first.is_a?(Array)
         | 
| 11 | 
            +
                        value.map { |v| cast_point(v) }.join(", ")
         | 
| 12 | 
            +
                      else
         | 
| 13 | 
            +
                        cast_point(value)
         | 
| 14 | 
            +
                      end
         | 
| 15 | 
            +
                    else
         | 
| 16 | 
            +
                      super
         | 
| 17 | 
            +
                    end
         | 
| 18 | 
            +
                  end
         | 
| 19 | 
            +
             | 
| 20 | 
            +
                  # TODO uncomment in 0.4.0
         | 
| 21 | 
            +
                  # def deserialize(value)
         | 
| 22 | 
            +
                  #   if value.nil?
         | 
| 23 | 
            +
                  #     super
         | 
| 24 | 
            +
                  #   elsif value.include?("),(")
         | 
| 25 | 
            +
                  #     value[1..-1].split("),(").map { |v| v.split(",").map(&:to_f) }
         | 
| 26 | 
            +
                  #   else
         | 
| 27 | 
            +
                  #     value[1..-1].split(",").map(&:to_f)
         | 
| 28 | 
            +
                  #   end
         | 
| 29 | 
            +
                  # end
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                  private
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                  def cast_point(value)
         | 
| 34 | 
            +
                    "(#{value.map(&:to_f).join(", ")})"
         | 
| 35 | 
            +
                  end
         | 
| 36 | 
            +
                end
         | 
| 37 | 
            +
              end
         | 
| 38 | 
            +
            end
         | 
    
        data/lib/neighbor/version.rb
    CHANGED
    
    
    
        data/lib/neighbor.rb
    CHANGED
    
    | @@ -2,7 +2,7 @@ | |
| 2 2 | 
             
            require "active_support"
         | 
| 3 3 |  | 
| 4 4 | 
             
            # modules
         | 
| 5 | 
            -
             | 
| 5 | 
            +
            require_relative "neighbor/version"
         | 
| 6 6 |  | 
| 7 7 | 
             
            module Neighbor
         | 
| 8 8 | 
             
              class Error < StandardError; end
         | 
| @@ -10,18 +10,20 @@ module Neighbor | |
| 10 10 | 
             
              module RegisterTypes
         | 
| 11 11 | 
             
                def initialize_type_map(m = type_map)
         | 
| 12 12 | 
             
                  super
         | 
| 13 | 
            -
                  m.register_type "cube",  | 
| 13 | 
            +
                  m.register_type "cube", Type::Cube.new
         | 
| 14 14 | 
             
                  m.register_type "vector" do |_, _, sql_type|
         | 
| 15 15 | 
             
                    limit = extract_limit(sql_type)
         | 
| 16 | 
            -
                     | 
| 16 | 
            +
                    Type::Vector.new(limit: limit)
         | 
| 17 17 | 
             
                  end
         | 
| 18 18 | 
             
                end
         | 
| 19 19 | 
             
              end
         | 
| 20 20 | 
             
            end
         | 
| 21 21 |  | 
| 22 22 | 
             
            ActiveSupport.on_load(:active_record) do
         | 
| 23 | 
            -
               | 
| 24 | 
            -
               | 
| 23 | 
            +
              require_relative "neighbor/model"
         | 
| 24 | 
            +
              require_relative "neighbor/vector"
         | 
| 25 | 
            +
              require_relative "neighbor/type/cube"
         | 
| 26 | 
            +
              require_relative "neighbor/type/vector"
         | 
| 25 27 |  | 
| 26 28 | 
             
              extend Neighbor::Model
         | 
| 27 29 |  | 
| @@ -32,16 +34,7 @@ ActiveSupport.on_load(:active_record) do | |
| 32 34 | 
             
              ActiveRecord::ConnectionAdapters::PostgreSQLAdapter::NATIVE_DATABASE_TYPES[:vector] = {name: "vector"}
         | 
| 33 35 |  | 
| 34 36 | 
             
              # ensure schema can be loaded
         | 
| 35 | 
            -
               | 
| 36 | 
            -
                ActiveRecord::ConnectionAdapters::TableDefinition.send(:define_column_methods, :cube, :vector)
         | 
| 37 | 
            -
              else
         | 
| 38 | 
            -
                ActiveRecord::ConnectionAdapters::TableDefinition.define_method :cube do |*args, **options|
         | 
| 39 | 
            -
                  args.each { |name| column(name, :cube, options) }
         | 
| 40 | 
            -
                end
         | 
| 41 | 
            -
                ActiveRecord::ConnectionAdapters::TableDefinition.define_method :vector do |*args, **options|
         | 
| 42 | 
            -
                  args.each { |name| column(name, :vector, options) }
         | 
| 43 | 
            -
                end
         | 
| 44 | 
            -
              end
         | 
| 37 | 
            +
              ActiveRecord::ConnectionAdapters::TableDefinition.send(:define_column_methods, :cube, :vector)
         | 
| 45 38 |  | 
| 46 39 | 
             
              # prevent unknown OID warning
         | 
| 47 40 | 
             
              if ActiveRecord::VERSION::MAJOR >= 7
         | 
| @@ -51,4 +44,4 @@ ActiveSupport.on_load(:active_record) do | |
| 51 44 | 
             
              end
         | 
| 52 45 | 
             
            end
         | 
| 53 46 |  | 
| 54 | 
            -
             | 
| 47 | 
            +
            require_relative "neighbor/railtie" if defined?(Rails::Railtie)
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: neighbor
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.2 | 
| 4 | 
            +
              version: 0.3.2
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Andrew Kane
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2023- | 
| 11 | 
            +
            date: 2023-12-12 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: activerecord
         | 
| @@ -16,14 +16,14 @@ dependencies: | |
| 16 16 | 
             
                requirements:
         | 
| 17 17 | 
             
                - - ">="
         | 
| 18 18 | 
             
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            -
                    version: ' | 
| 19 | 
            +
                    version: '6.1'
         | 
| 20 20 | 
             
              type: :runtime
         | 
| 21 21 | 
             
              prerelease: false
         | 
| 22 22 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 23 | 
             
                requirements:
         | 
| 24 24 | 
             
                - - ">="
         | 
| 25 25 | 
             
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            -
                    version: ' | 
| 26 | 
            +
                    version: '6.1'
         | 
| 27 27 | 
             
            description:
         | 
| 28 28 | 
             
            email: andrew@ankane.org
         | 
| 29 29 | 
             
            executables: []
         | 
| @@ -40,6 +40,8 @@ files: | |
| 40 40 | 
             
            - lib/neighbor.rb
         | 
| 41 41 | 
             
            - lib/neighbor/model.rb
         | 
| 42 42 | 
             
            - lib/neighbor/railtie.rb
         | 
| 43 | 
            +
            - lib/neighbor/type/cube.rb
         | 
| 44 | 
            +
            - lib/neighbor/type/vector.rb
         | 
| 43 45 | 
             
            - lib/neighbor/vector.rb
         | 
| 44 46 | 
             
            - lib/neighbor/version.rb
         | 
| 45 47 | 
             
            homepage: https://github.com/ankane/neighbor
         | 
| @@ -54,7 +56,7 @@ required_ruby_version: !ruby/object:Gem::Requirement | |
| 54 56 | 
             
              requirements:
         | 
| 55 57 | 
             
              - - ">="
         | 
| 56 58 | 
             
                - !ruby/object:Gem::Version
         | 
| 57 | 
            -
                  version: ' | 
| 59 | 
            +
                  version: '3'
         | 
| 58 60 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 59 61 | 
             
              requirements:
         | 
| 60 62 | 
             
              - - ">="
         |