langchainrb 0.6.8 → 0.6.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 4d046a7cae545da3092694fee60b0fa3caa0852492c9bf7dbbe35001d96106e4
         | 
| 4 | 
            +
              data.tar.gz: 8ed841d2359400f9a4700a49b82030fbe3dd3563545eca31b487914e8fb236f7
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: eed2f03cbdd74ef42fafd07524dc6bf5c7ac2278e5e55dd1aad5b2cd87f319a91c8719a6e9ac43ac50cda5525f3defab14cfecc749bdec81075d52b3935b23dd
         | 
| 7 | 
            +
              data.tar.gz: cd4fce88b0f9f545f6d32660879e6fa5044ec74b41dcb41bc29eafb78b307d8b2cb8204856098657d8c9707331c8629c8a707a227fe4d20a5816347f8a66991c
         | 
    
        data/CHANGELOG.md
    CHANGED
    
    
    
        data/Gemfile.lock
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            PATH
         | 
| 2 2 | 
             
              remote: .
         | 
| 3 3 | 
             
              specs:
         | 
| 4 | 
            -
                langchainrb (0.6. | 
| 4 | 
            +
                langchainrb (0.6.9)
         | 
| 5 5 | 
             
                  baran (~> 0.1.6)
         | 
| 6 6 | 
             
                  colorize (~> 0.8.1)
         | 
| 7 7 | 
             
                  json-schema (~> 4.0.0)
         | 
| @@ -34,7 +34,7 @@ GEM | |
| 34 34 | 
             
                afm (0.2.2)
         | 
| 35 35 | 
             
                ai21 (0.2.1)
         | 
| 36 36 | 
             
                ast (2.4.2)
         | 
| 37 | 
            -
                baran (0.1. | 
| 37 | 
            +
                baran (0.1.7)
         | 
| 38 38 | 
             
                builder (3.2.4)
         | 
| 39 39 | 
             
                byebug (11.1.3)
         | 
| 40 40 | 
             
                childprocess (4.1.0)
         | 
| @@ -189,7 +189,7 @@ GEM | |
| 189 189 | 
             
                  ruby-rc4
         | 
| 190 190 | 
             
                  ttfunk
         | 
| 191 191 | 
             
                pg (1.5.3)
         | 
| 192 | 
            -
                pgvector (0.2. | 
| 192 | 
            +
                pgvector (0.2.1)
         | 
| 193 193 | 
             
                pinecone (0.1.71)
         | 
| 194 194 | 
             
                  dry-struct (~> 1.6.0)
         | 
| 195 195 | 
             
                  dry-validation (~> 1.10.0)
         | 
| @@ -334,7 +334,7 @@ DEPENDENCIES | |
| 334 334 | 
             
              open-weather-ruby-client (~> 0.3.0)
         | 
| 335 335 | 
             
              pdf-reader (~> 1.4)
         | 
| 336 336 | 
             
              pg (~> 1.5)
         | 
| 337 | 
            -
              pgvector (~> 0.2)
         | 
| 337 | 
            +
              pgvector (~> 0.2.1)
         | 
| 338 338 | 
             
              pinecone (~> 0.1.6)
         | 
| 339 339 | 
             
              pry-byebug (~> 3.10.0)
         | 
| 340 340 | 
             
              qdrant-ruby (~> 0.9.0)
         | 
| @@ -353,4 +353,4 @@ DEPENDENCIES | |
| 353 353 | 
             
              yard
         | 
| 354 354 |  | 
| 355 355 | 
             
            BUNDLED WITH
         | 
| 356 | 
            -
               2. | 
| 356 | 
            +
               2.3.22
         | 
| @@ -22,7 +22,7 @@ module Langchain | |
| 22 22 | 
             
                      leftover_tokens = token_limit(model_name) - text_token_length
         | 
| 23 23 |  | 
| 24 24 | 
             
                      # Raise an error even if whole prompt is equal to the model's token limit (leftover_tokens == 0)
         | 
| 25 | 
            -
                      if leftover_tokens  | 
| 25 | 
            +
                      if leftover_tokens < 0
         | 
| 26 26 | 
             
                        raise limit_exceeded_exception(token_limit(model_name), text_token_length)
         | 
| 27 27 | 
             
                      end
         | 
| 28 28 |  | 
| @@ -8,38 +8,45 @@ module Langchain::Vectorsearch | |
| 8 8 | 
             
                # Gem requirements: gem "pgvector", "~> 0.2"
         | 
| 9 9 | 
             
                #
         | 
| 10 10 | 
             
                # Usage:
         | 
| 11 | 
            -
                # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:,  | 
| 11 | 
            +
                # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace_column: nil, namespace: nil)
         | 
| 12 12 | 
             
                #
         | 
| 13 13 |  | 
| 14 14 | 
             
                # The operators supported by the PostgreSQL vector search adapter
         | 
| 15 15 | 
             
                OPERATORS = {
         | 
| 16 | 
            -
                  "cosine_distance" => " | 
| 17 | 
            -
                  "euclidean_distance" => " | 
| 16 | 
            +
                  "cosine_distance" => "cosine",
         | 
| 17 | 
            +
                  "euclidean_distance" => "euclidean"
         | 
| 18 18 | 
             
                }
         | 
| 19 19 | 
             
                DEFAULT_OPERATOR = "cosine_distance"
         | 
| 20 20 |  | 
| 21 | 
            -
                attr_reader :operator, : | 
| 21 | 
            +
                attr_reader :db, :operator, :table_name, :namespace_column, :namespace, :documents_table
         | 
| 22 22 |  | 
| 23 23 | 
             
                # @param url [String] The URL of the PostgreSQL database
         | 
| 24 24 | 
             
                # @param index_name [String] The name of the table to use for the index
         | 
| 25 25 | 
             
                # @param llm [Object] The LLM client to use
         | 
| 26 | 
            -
                # @param  | 
| 27 | 
            -
                def initialize(url:, index_name:, llm:,  | 
| 28 | 
            -
                   | 
| 26 | 
            +
                # @param namespace [String] The namespace to use for the index when inserting/querying
         | 
| 27 | 
            +
                def initialize(url:, index_name:, llm:, namespace: nil)
         | 
| 28 | 
            +
                  depends_on "sequel"
         | 
| 29 | 
            +
                  require "sequel"
         | 
| 30 | 
            +
                  depends_on "pgvector"
         | 
| 29 31 | 
             
                  require "pgvector"
         | 
| 30 32 |  | 
| 31 | 
            -
                  @ | 
| 32 | 
            -
                  registry = ::PG::BasicTypeRegistry.new.define_default_types
         | 
| 33 | 
            -
                  ::Pgvector::PG.register_vector(registry)
         | 
| 34 | 
            -
                  @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
         | 
| 33 | 
            +
                  @db = Sequel.connect(url)
         | 
| 35 34 |  | 
| 36 | 
            -
                  @ | 
| 37 | 
            -
             | 
| 35 | 
            +
                  @table_name = index_name
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                  @namespace_column = "namespace"
         | 
| 38 | 
            +
                  @namespace = namespace
         | 
| 38 39 | 
             
                  @operator = OPERATORS[DEFAULT_OPERATOR]
         | 
| 39 40 |  | 
| 40 41 | 
             
                  super(llm: llm)
         | 
| 41 42 | 
             
                end
         | 
| 42 43 |  | 
| 44 | 
            +
                def documents_model
         | 
| 45 | 
            +
                  Class.new(Sequel::Model(table_name.to_sym)) do
         | 
| 46 | 
            +
                    plugin :pgvector, :vectors
         | 
| 47 | 
            +
                  end
         | 
| 48 | 
            +
                end
         | 
| 49 | 
            +
             | 
| 43 50 | 
             
                # Upsert a list of texts to the index
         | 
| 44 51 | 
             
                # @param texts [Array<String>] The texts to add to the index
         | 
| 45 52 | 
             
                # @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
         | 
| @@ -47,32 +54,28 @@ module Langchain::Vectorsearch | |
| 47 54 | 
             
                # the added or updated texts.
         | 
| 48 55 | 
             
                def upsert_texts(texts:, ids:)
         | 
| 49 56 | 
             
                  data = texts.zip(ids).flat_map do |(text, id)|
         | 
| 50 | 
            -
                     | 
| 57 | 
            +
                    {id: id, content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
         | 
| 51 58 | 
             
                  end
         | 
| 52 | 
            -
                   | 
| 53 | 
            -
                   | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
                     | 
| 58 | 
            -
             | 
| 59 | 
            +
                  # @db[table_name.to_sym].multi_insert(data, return: :primary_key)
         | 
| 60 | 
            +
                  @db[table_name.to_sym]
         | 
| 61 | 
            +
                    .insert_conflict(
         | 
| 62 | 
            +
                      target: :id,
         | 
| 63 | 
            +
                      update: {content: Sequel[:excluded][:content], vectors: Sequel[:excluded][:vectors]}
         | 
| 64 | 
            +
                    )
         | 
| 65 | 
            +
                    .multi_insert(data, return: :primary_key)
         | 
| 59 66 | 
             
                end
         | 
| 60 67 |  | 
| 61 68 | 
             
                # Add a list of texts to the index
         | 
| 62 69 | 
             
                # @param texts [Array<String>] The texts to add to the index
         | 
| 63 70 | 
             
                # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
         | 
| 64 | 
            -
                # @return [ | 
| 65 | 
            -
                # the added texts.
         | 
| 71 | 
            +
                # @return [Array<Integer>] The the ids of the added texts.
         | 
| 66 72 | 
             
                def add_texts(texts:, ids: nil)
         | 
| 67 73 | 
             
                  if ids.nil? || ids.empty?
         | 
| 68 | 
            -
                    data = texts. | 
| 69 | 
            -
                       | 
| 74 | 
            +
                    data = texts.map do |text|
         | 
| 75 | 
            +
                      {content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
         | 
| 70 76 | 
             
                    end
         | 
| 71 | 
            -
             | 
| 72 | 
            -
                     | 
| 73 | 
            -
                      "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
         | 
| 74 | 
            -
                      data
         | 
| 75 | 
            -
                    )
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                    @db[table_name.to_sym].multi_insert(data, return: :primary_key)
         | 
| 76 79 | 
             
                  else
         | 
| 77 80 | 
             
                    upsert_texts(texts: texts, ids: ids)
         | 
| 78 81 | 
             
                  end
         | 
| @@ -81,8 +84,7 @@ module Langchain::Vectorsearch | |
| 81 84 | 
             
                # Update a list of ids and corresponding texts to the index
         | 
| 82 85 | 
             
                # @param texts [Array<String>] The texts to add to the index
         | 
| 83 86 | 
             
                # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
         | 
| 84 | 
            -
                # @return [ | 
| 85 | 
            -
                # the updated texts.
         | 
| 87 | 
            +
                # @return [Array<Integer>] The ids of the updated texts.
         | 
| 86 88 | 
             
                def update_texts(texts:, ids:)
         | 
| 87 89 | 
             
                  upsert_texts(texts: texts, ids: ids)
         | 
| 88 90 | 
             
                end
         | 
| @@ -90,16 +92,15 @@ module Langchain::Vectorsearch | |
| 90 92 | 
             
                # Create default schema
         | 
| 91 93 | 
             
                # @return [PG::Result] The response from the database
         | 
| 92 94 | 
             
                def create_default_schema
         | 
| 93 | 
            -
                   | 
| 94 | 
            -
                   | 
| 95 | 
            -
             | 
| 96 | 
            -
             | 
| 97 | 
            -
             | 
| 98 | 
            -
             | 
| 99 | 
            -
             | 
| 100 | 
            -
             | 
| 101 | 
            -
             | 
| 102 | 
            -
                  )
         | 
| 95 | 
            +
                  db.run "CREATE EXTENSION IF NOT EXISTS vector"
         | 
| 96 | 
            +
                  namespace = namespace_column
         | 
| 97 | 
            +
                  vector_dimension = default_dimension
         | 
| 98 | 
            +
                  db.create_table? table_name.to_sym do
         | 
| 99 | 
            +
                    primary_key :id
         | 
| 100 | 
            +
                    text :content
         | 
| 101 | 
            +
                    column :vectors, "vector(#{vector_dimension})"
         | 
| 102 | 
            +
                    text namespace.to_sym, default: nil
         | 
| 103 | 
            +
                  end
         | 
| 103 104 | 
             
                end
         | 
| 104 105 |  | 
| 105 106 | 
             
                # TODO: Add destroy_default_schema method
         | 
| @@ -123,15 +124,11 @@ module Langchain::Vectorsearch | |
| 123 124 | 
             
                # @param k [Integer] The number of top results to return
         | 
| 124 125 | 
             
                # @return [Array<Hash>] The results of the search
         | 
| 125 126 | 
             
                def similarity_search_by_vector(embedding:, k: 4)
         | 
| 126 | 
            -
                   | 
| 127 | 
            -
                     | 
| 128 | 
            -
             | 
| 129 | 
            -
                       | 
| 130 | 
            -
                    SQL
         | 
| 131 | 
            -
                    conn.exec_params(query, [embedding, k])
         | 
| 127 | 
            +
                  db.transaction do # BEGIN
         | 
| 128 | 
            +
                    documents_model
         | 
| 129 | 
            +
                      .nearest_neighbors(:vectors, embedding, distance: operator).limit(k)
         | 
| 130 | 
            +
                      .where(namespace_column.to_sym => namespace)
         | 
| 132 131 | 
             
                  end
         | 
| 133 | 
            -
             | 
| 134 | 
            -
                  result.to_a
         | 
| 135 132 | 
             
                end
         | 
| 136 133 |  | 
| 137 134 | 
             
                # Ask a question and return the answer
         | 
| @@ -142,7 +139,7 @@ module Langchain::Vectorsearch | |
| 142 139 | 
             
                  search_results = similarity_search(query: question)
         | 
| 143 140 |  | 
| 144 141 | 
             
                  context = search_results.map do |result|
         | 
| 145 | 
            -
                    result | 
| 142 | 
            +
                    result.content.to_s
         | 
| 146 143 | 
             
                  end
         | 
| 147 144 | 
             
                  context = context.join("\n---\n")
         | 
| 148 145 |  | 
    
        data/lib/langchain/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: langchainrb
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.6. | 
| 4 | 
            +
              version: 0.6.9
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Andrei Bondarev
         | 
| 8 8 | 
             
            autorequire:
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2023-07- | 
| 11 | 
            +
            date: 2023-07-29 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: baran
         | 
| @@ -324,14 +324,14 @@ dependencies: | |
| 324 324 | 
             
                requirements:
         | 
| 325 325 | 
             
                - - "~>"
         | 
| 326 326 | 
             
                  - !ruby/object:Gem::Version
         | 
| 327 | 
            -
                    version:  | 
| 327 | 
            +
                    version: 0.2.1
         | 
| 328 328 | 
             
              type: :development
         | 
| 329 329 | 
             
              prerelease: false
         | 
| 330 330 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 331 331 | 
             
                requirements:
         | 
| 332 332 | 
             
                - - "~>"
         | 
| 333 333 | 
             
                  - !ruby/object:Gem::Version
         | 
| 334 | 
            -
                    version:  | 
| 334 | 
            +
                    version: 0.2.1
         | 
| 335 335 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 336 336 | 
             
              name: pdf-reader
         | 
| 337 337 | 
             
              requirement: !ruby/object:Gem::Requirement
         |