RubyGems - langchainrb - Versions diffs - 0.6.8 → 0.6.10 - Mend

langchainrb 0.6.8 → 0.6.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

checksums.yaml +4 -4
data/.env.example +1 -0
data/CHANGELOG.md +8 -0
data/Gemfile.lock +9 -5
data/README.md +7 -1
data/lib/langchain/llm/anthropic.rb +62 -0
data/lib/langchain/utils/token_length/base_validator.rb +1 -1
data/lib/langchain/utils/token_length/openai_validator.rb +1 -0
data/lib/langchain/vectorsearch/milvus.rb +1 -1
data/lib/langchain/vectorsearch/pgvector.rb +48 -51
data/lib/langchain/version.rb +1 -1
data/lib/langchain.rb +1 -0
metadata +19 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: e8446cff4207c4784ce29233712430d50c2cbaa551cfe8ad27f413f27ad8346b
-  data.tar.gz: 8c7105302b673479be9a840ed1a19c815cf5a0415195db5373ce739c472dc2f3
+  metadata.gz: ba1fb0e3fbc05e4279fe3a698ad8fb1a25e02788991a8e6a7b27b411771096f3
+  data.tar.gz: 309cda1c8c7a4982b22c6ad2f82c20fb12ca3bdfdc3e8c0ebeaa9687a7f71ce0
 SHA512:
-  metadata.gz: d7bd52c8d161a2d26a1e717711053267ed3b78def7efa2e9411017d324e278baceb1d88eb709cb701986201b785e1ad8ee455948b1bba59d6cb478226b1b5df4
-  data.tar.gz: eeb639084a8990e1d36d035e6cd6d725377cecc61b3116f70254787f5d591a387c6f53e308dd482d2a96257f231866cffdf6326c516dcf78e60017817d8ebb93
+  metadata.gz: 3457cbad7efbc5504f4cb3b684e3837984be5d486c1ee21a718508d606dd63ccc2223dffc55f3cc9c52f5fd0a533b364407bbb4c3208515e2bab8ca2af9ea60a
+  data.tar.gz: 1ff3cded239c286ee87d7a2f0a1cfa45c734b185e35b6e6fbc63fd2951f7c7397e8562fa145a16f545ef07b2e983f40715010e9c96653b5f2bf128325a5a7577

data/.env.example CHANGED Viewed

@@ -1,4 +1,5 @@
 AI21_API_KEY=
+ANTHROPIC_API_KEY=
 CHROMA_URL=
 COHERE_API_KEY=
 GOOGLE_PALM_API_KEY=

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,13 @@
 ## [Unreleased]
+## [0.6.10] - 2023-08-01
+- 🗣️ LLMs
+  - Introducing Anthropic support
+## [0.6.9] - 2023-07-29
+## [0.6.8] - 2023-07-21
 ## [0.6.7] - 2023-07-19
 - Support for OpenAI functions
 - Streaming vectorsearch ask() responses

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    langchainrb (0.6.8)
+    langchainrb (0.6.10)
       baran (~> 0.1.6)
       colorize (~> 0.8.1)
       json-schema (~> 4.0.0)
@@ -33,8 +33,11 @@ GEM
       public_suffix (>= 2.0.2, < 6.0)
     afm (0.2.2)
     ai21 (0.2.1)
+    anthropic (0.1.0)
+      faraday (>= 1)
+      faraday-multipart (>= 1)
     ast (2.4.2)
-    baran (0.1.6)
+    baran (0.1.7)
     builder (3.2.4)
     byebug (11.1.3)
     childprocess (4.1.0)
@@ -189,7 +192,7 @@ GEM
       ruby-rc4
       ttfunk
     pg (1.5.3)
-    pgvector (0.2.0)
+    pgvector (0.2.1)
     pinecone (0.1.71)
       dry-struct (~> 1.6.0)
       dry-validation (~> 1.10.0)
@@ -318,6 +321,7 @@ PLATFORMS
 DEPENDENCIES
   ai21 (~> 0.2.1)
+  anthropic (~> 0.1.0)
   chroma-db (~> 0.3.0)
   cohere-ruby (~> 0.9.5)
   docx (~> 0.8.0)
@@ -334,7 +338,7 @@ DEPENDENCIES
   open-weather-ruby-client (~> 0.3.0)
   pdf-reader (~> 1.4)
   pg (~> 1.5)
-  pgvector (~> 0.2)
+  pgvector (~> 0.2.1)
   pinecone (~> 0.1.6)
   pry-byebug (~> 3.10.0)
   qdrant-ruby (~> 0.9.0)
@@ -353,4 +357,4 @@ DEPENDENCIES
   yard
 BUNDLED WITH
-   2.4.0
+   2.3.22

data/README.md CHANGED Viewed

@@ -203,6 +203,12 @@ Add `gem "ai21", "~> 0.2.1"` to your Gemfile.
 ai21 = Langchain::LLM::AI21.new(api_key: ENV["AI21_API_KEY"])
 ```
+#### Anthropic
+Add `gem "anthropic", "~> 0.1.0"` to your Gemfile.
+```ruby
+anthropic = Langchain::LLM::Anthropic.new(api_key: ENV["ANTHROPIC_API_KEY"])
+```
 ### Using Prompts 📋
 #### Prompt Templates
@@ -537,7 +543,7 @@ Join us in the [Langchain.rb](https://discord.gg/WDARp7J2n8) Discord server.
 ## Contributing
-Bug reports and pull requests are welcome on GitHub at https://github.com/andreibondarev/langchain.
+Bug reports and pull requests are welcome on GitHub at https://github.com/andreibondarev/langchainrb.
 ## License

data/lib/langchain/llm/anthropic.rb ADDED Viewed

@@ -0,0 +1,62 @@
+# frozen_string_literal: true
+module Langchain::LLM
+  #
+  # Wrapper around Anthropic APIs.
+  #
+  # Gem requirements:
+  #   gem "anthropic", "~> 0.1.0"
+  #
+  # Usage:
+  #     anthorpic = Langchain::LLM::Anthropic.new(api_key:)
+  #
+  class Anthropic < Base
+    DEFAULTS = {
+      temperature: 0.0,
+      completion_model_name: "claude-2"
+    }.freeze
+    # TODO: Implement token length validator for Anthropic
+    # LENGTH_VALIDATOR = Langchain::Utils::TokenLength::AnthropicValidator
+    def initialize(api_key:, llm_options: {}, default_options: {})
+      depends_on "anthropic"
+      require "anthropic"
+      @client = ::Anthropic::Client.new(access_token: api_key, **llm_options)
+      @defaults = DEFAULTS.merge(default_options)
+    end
+    #
+    # Generate a completion for a given prompt
+    #
+    # @param prompt [String] The prompt to generate a completion for
+    # @param params [Hash] extra parameters passed to Anthropic::Client#complete
+    # @return [String] The completion
+    #
+    def complete(prompt:, **params)
+      parameters = compose_parameters @defaults[:completion_model_name], params
+      parameters[:prompt] = prompt
+      # TODO: Implement token length validator for Anthropic
+      # parameters[:max_tokens_to_sample] = validate_max_tokens(prompt, parameters[:completion_model_name])
+      response = client.complete(parameters: parameters)
+      response.dig("completion")
+    end
+    private
+    def compose_parameters(model, params)
+      default_params = {model: model, temperature: @defaults[:temperature]}
+      default_params.merge(params)
+    end
+    # TODO: Implement token length validator for Anthropic
+    # def validate_max_tokens(messages, model)
+    #   LENGTH_VALIDATOR.validate_max_tokens!(messages, model)
+    # end
+  end
+end

data/lib/langchain/utils/token_length/base_validator.rb CHANGED Viewed

@@ -22,7 +22,7 @@ module Langchain
           leftover_tokens = token_limit(model_name) - text_token_length
           # Raise an error even if whole prompt is equal to the model's token limit (leftover_tokens == 0)
-          if leftover_tokens <= 0
+          if leftover_tokens < 0
             raise limit_exceeded_exception(token_limit(model_name), text_token_length)
           end

data/lib/langchain/utils/token_length/openai_validator.rb CHANGED Viewed

@@ -25,6 +25,7 @@ module Langchain
           "code-davinci-002" => 8001,
           "gpt-4" => 8192,
           "gpt-4-0314" => 8192,
+          "gpt-4-0613" => 8192,
           "gpt-4-32k" => 32768,
           "gpt-4-32k-0314" => 32768,
           "gpt-4-32k-0613" => 32768,

data/lib/langchain/vectorsearch/milvus.rb CHANGED Viewed

@@ -8,7 +8,7 @@ module Langchain::Vectorsearch
     # Gem requirements: gem "milvus", "~> 0.9.0"
     #
     # Usage:
-    # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, llm_api_key:)
+    # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, api_key:)
     #
     def initialize(url:, index_name:, llm:, api_key: nil)

data/lib/langchain/vectorsearch/pgvector.rb CHANGED Viewed

@@ -8,38 +8,45 @@ module Langchain::Vectorsearch
     # Gem requirements: gem "pgvector", "~> 0.2"
     #
     # Usage:
-    # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, llm_api_key:)
+    # pgvector = Langchain::Vectorsearch::Pgvector.new(url:, index_name:, llm:, namespace_column: nil, namespace: nil)
     #
     # The operators supported by the PostgreSQL vector search adapter
     OPERATORS = {
-      "cosine_distance" => "<=>",
-      "euclidean_distance" => "<->"
+      "cosine_distance" => "cosine",
+      "euclidean_distance" => "euclidean"
     }
     DEFAULT_OPERATOR = "cosine_distance"
-    attr_reader :operator, :quoted_table_name
+    attr_reader :db, :operator, :table_name, :namespace_column, :namespace, :documents_table
     # @param url [String] The URL of the PostgreSQL database
     # @param index_name [String] The name of the table to use for the index
     # @param llm [Object] The LLM client to use
-    # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
-    def initialize(url:, index_name:, llm:, api_key: nil)
-      require "pg"
+    # @param namespace [String] The namespace to use for the index when inserting/querying
+    def initialize(url:, index_name:, llm:, namespace: nil)
+      depends_on "sequel"
+      require "sequel"
+      depends_on "pgvector"
       require "pgvector"
-      @client = ::PG.connect(url)
-      registry = ::PG::BasicTypeRegistry.new.define_default_types
-      ::Pgvector::PG.register_vector(registry)
-      @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
+      @db = Sequel.connect(url)
-      @index_name = index_name
-      @quoted_table_name = @client.quote_ident(index_name)
+      @table_name = index_name
+      @namespace_column = "namespace"
+      @namespace = namespace
       @operator = OPERATORS[DEFAULT_OPERATOR]
       super(llm: llm)
     end
+    def documents_model
+      Class.new(Sequel::Model(table_name.to_sym)) do
+        plugin :pgvector, :vectors
+      end
+    end
     # Upsert a list of texts to the index
     # @param texts [Array<String>] The texts to add to the index
     # @param ids [Array<Integer>] The ids of the objects to add to the index, in the same order as the texts
@@ -47,32 +54,28 @@ module Langchain::Vectorsearch
     # the added or updated texts.
     def upsert_texts(texts:, ids:)
       data = texts.zip(ids).flat_map do |(text, id)|
-        [id, text, llm.embed(text: text)]
+        {id: id, content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
       end
-      values = texts.length.times.map { |i| "($#{3 * i + 1}, $#{3 * i + 2}, $#{3 * i + 3})" }.join(",")
-      # see https://github.com/pgvector/pgvector#storing
-      client.exec_params(
-        "INSERT INTO #{quoted_table_name} (id, content, vectors) VALUES
-#{values} ON CONFLICT (id) DO UPDATE SET content = EXCLUDED.content, vectors = EXCLUDED.vectors RETURNING id;",
-        data
-      )
+      # @db[table_name.to_sym].multi_insert(data, return: :primary_key)
+      @db[table_name.to_sym]
+        .insert_conflict(
+          target: :id,
+          update: {content: Sequel[:excluded][:content], vectors: Sequel[:excluded][:vectors]}
+        )
+        .multi_insert(data, return: :primary_key)
     end
     # Add a list of texts to the index
     # @param texts [Array<String>] The texts to add to the index
     # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
-    # @return [PG::Result] The response from the database including the ids of
-    # the added texts.
+    # @return [Array<Integer>] The the ids of the added texts.
     def add_texts(texts:, ids: nil)
       if ids.nil? || ids.empty?
-        data = texts.flat_map do |text|
-          [text, llm.embed(text: text)]
+        data = texts.map do |text|
+          {content: text, vectors: llm.embed(text: text).to_s, namespace: namespace}
         end
-        values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
-        client.exec_params(
-          "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values} RETURNING id;",
-          data
-        )
+        @db[table_name.to_sym].multi_insert(data, return: :primary_key)
       else
         upsert_texts(texts: texts, ids: ids)
       end
@@ -81,8 +84,7 @@ module Langchain::Vectorsearch
     # Update a list of ids and corresponding texts to the index
     # @param texts [Array<String>] The texts to add to the index
     # @param ids [Array<String>] The ids to add to the index, in the same order as the texts
-    # @return [PG::Result] The response from the database including the ids of
-    # the updated texts.
+    # @return [Array<Integer>] The ids of the updated texts.
     def update_texts(texts:, ids:)
       upsert_texts(texts: texts, ids: ids)
     end
@@ -90,16 +92,15 @@ module Langchain::Vectorsearch
     # Create default schema
     # @return [PG::Result] The response from the database
     def create_default_schema
-      client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
-      client.exec(
-        <<~SQL
-          CREATE TABLE IF NOT EXISTS #{quoted_table_name} (
-            id serial PRIMARY KEY,
-            content TEXT,
-            vectors VECTOR(#{default_dimension})
-          );
-        SQL
-      )
+      db.run "CREATE EXTENSION IF NOT EXISTS vector"
+      namespace = namespace_column
+      vector_dimension = default_dimension
+      db.create_table? table_name.to_sym do
+        primary_key :id
+        text :content
+        column :vectors, "vector(#{vector_dimension})"
+        text namespace.to_sym, default: nil
+      end
     end
     # TODO: Add destroy_default_schema method
@@ -123,15 +124,11 @@ module Langchain::Vectorsearch
     # @param k [Integer] The number of top results to return
     # @return [Array<Hash>] The results of the search
     def similarity_search_by_vector(embedding:, k: 4)
-      result = client.transaction do |conn|
-        conn.exec("SET LOCAL ivfflat.probes = 10;")
-        query = <<~SQL
-          SELECT id, content FROM #{quoted_table_name} ORDER BY vectors #{operator} $1 ASC LIMIT $2;
-        SQL
-        conn.exec_params(query, [embedding, k])
+      db.transaction do # BEGIN
+        documents_model
+          .nearest_neighbors(:vectors, embedding, distance: operator).limit(k)
+          .where(namespace_column.to_sym => namespace)
       end
-      result.to_a
     end
     # Ask a question and return the answer
@@ -142,7 +139,7 @@ module Langchain::Vectorsearch
       search_results = similarity_search(query: question)
       context = search_results.map do |result|
-        result["content"].to_s
+        result.content.to_s
       end
       context = context.join("\n---\n")

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.6.8"
+  VERSION = "0.6.10"
 end

data/lib/langchain.rb CHANGED Viewed

@@ -131,6 +131,7 @@ module Langchain
   module LLM
     autoload :AI21, "langchain/llm/ai21"
+    autoload :Anthropic, "langchain/llm/anthropic"
     autoload :Base, "langchain/llm/base"
     autoload :Cohere, "langchain/llm/cohere"
     autoload :GooglePalm, "langchain/llm/google_palm"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.6.8
+  version: 0.6.10
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-07-21 00:00:00.000000000 Z
+date: 2023-08-01 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: baran
@@ -136,6 +136,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 0.2.1
+- !ruby/object:Gem::Dependency
+  name: anthropic
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.1.0
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.1.0
 - !ruby/object:Gem::Dependency
   name: chroma-db
   requirement: !ruby/object:Gem::Requirement
@@ -324,14 +338,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.2'
+        version: 0.2.1
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '0.2'
+        version: 0.2.1
 - !ruby/object:Gem::Dependency
   name: pdf-reader
   requirement: !ruby/object:Gem::Requirement
@@ -518,6 +532,7 @@ files:
 - lib/langchain/data.rb
 - lib/langchain/dependency_helper.rb
 - lib/langchain/llm/ai21.rb
+- lib/langchain/llm/anthropic.rb
 - lib/langchain/llm/base.rb
 - lib/langchain/llm/cohere.rb
 - lib/langchain/llm/google_palm.rb