RubyGems - langchainrb - Versions diffs - 0.9.0 → 0.9.1 - Mend

langchainrb 0.9.0 → 0.9.1

Files changed (11) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/README.md +4 -2
data/lib/langchain/llm/ollama.rb +167 -27
data/lib/langchain/llm/response/base_response.rb +7 -0
data/lib/langchain/llm/response/ollama_response.rb +18 -0
data/lib/langchain/utils/token_length/openai_validator.rb +11 -0
data/lib/langchain/utils/token_length/token_limit_exceeded.rb +1 -1
data/lib/langchain/vectorsearch/pinecone.rb +2 -1
data/lib/langchain/version.rb +1 -1
metadata +18 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: eb443fa9eb8f0f9ee32fcef7b413d6825a4c45779c14551e03e71878215560d9
-  data.tar.gz: ed70c0b23899598c04fc6c6178466f2bda354f2483f712e83eeb9797f55c38ef
+  metadata.gz: 4c9d0655d58ddff57b9c9163065908dd17d91c6bffc5b146bf7fc01b4c9fb96d
+  data.tar.gz: ee82c644b7e38503fa0587ade2af0447819863303e9fa3755dce1676d68ad5f7
 SHA512:
-  metadata.gz: 1cf9baef16a801a1fd81ab6cb1ee89ab297fb8bc633a15641d125e44b1f4121208ec5e41f1c79ac49f93d27e60be899e030ec2bfb99359d6dc983b99398302ce
-  data.tar.gz: af0961e7ee973c0fd35f6e44206f66f7f598c0213a4ec71fb5a7608a58cf56336a6a1d700341c53ad6c0c65fb8eebd9bd382b54829821caa5219dda0089ca8f2
+  metadata.gz: 05faddd31c819e6d351ed99e05353e462341ab1744769a1b3a9932c37de4c68907b54f79bcd65f6b652d954d5301e80055c5ee8c57b66a3917256918c51cc61f
+  data.tar.gz: c2fed05da349fdc9ebd9990ea5c2d5c70a68241c491c903c631eb0584bce01da17bab7b04c59fa9fded8282547798219b2a0b951c1c5a8d1062d08f2a930062c

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,10 @@
 ## [Unreleased]
+## [0.9.1]
+- Add support for new OpenAI models
+- Add Ollama#chat method
+- Fix and refactor of `Langchain::LLM::Ollama`, responses can now be streamed.
 ## [0.9.0]
 - Introducing new `Langchain::Assistant` that will be replacing `Langchain::Conversation` and `Langchain::Agent`s.
 - `Langchain::Conversation` is deprecated.

data/README.md CHANGED Viewed

@@ -42,6 +42,8 @@ If bundler is not being used to manage dependencies, install the gem by executin
     gem install langchainrb
+Additional gems may be required when loading LLM Providers. These are not included by default so you can include only what you need.
 ## Usage
 ```ruby
@@ -62,7 +64,7 @@ Langchain.rb wraps all supported LLMs in a unified interface allowing you to eas
 | [GooglePalm](https://ai.google/discover/palm2?utm_source=langchainrb&utm_medium=github)         | ✅                 | ✅                 | ✅                  | ✅                 |                    |
 | [Google Vertex AI](https://cloud.google.com/vertex-ai?utm_source=langchainrb&utm_medium=github) | ✅                 | ✅                 | ❌                  | ✅                 |                    |
 | [HuggingFace](https://huggingface.co/?utm_source=langchainrb&utm_medium=github)                 | ✅                 | ❌                 | ❌                  | ❌                 |                    |
-| [Ollama](https://ollama.ai/?utm_source=langchainrb&utm_medium=github)                           | ✅                 | ✅                 | ❌                  | ❌                 |                    |
+| [Ollama](https://ollama.ai/?utm_source=langchainrb&utm_medium=github)                           | ✅                 | ✅                 | ✅                  | ❌                 |                    |
 | [Replicate](https://replicate.com/?utm_source=langchainrb&utm_medium=github)                    | ✅                 | ✅                 | ✅                  | ✅                 |                    |
 #### Using standalone LLMs:
@@ -91,7 +93,7 @@ llm.complete(prompt: "What is the meaning of life?").completion
 Generate a chat completion:
 ```ruby
-llm.chat(prompt: "Hey! How are you?").completion
+llm.chat(messages: [{role: "user", content: "What is the meaning of life?"}]).completion
 ```
 Summarize the text:

data/lib/langchain/llm/ollama.rb CHANGED Viewed

@@ -5,21 +5,26 @@ module Langchain::LLM
   # Available models: https://ollama.ai/library
   #
   # Usage:
-  #    ollama = Langchain::LLM::Ollama.new(url: ENV["OLLAMA_URL"])
+  #    ollama = Langchain::LLM::Ollama.new(url: ENV["OLLAMA_URL"], default_options: {})
   #
   class Ollama < Base
-    attr_reader :url
+    attr_reader :url, :defaults
     DEFAULTS = {
-      temperature: 0.0,
+      temperature: 0.8,
       completion_model_name: "llama2",
-      embeddings_model_name: "llama2"
+      embeddings_model_name: "llama2",
+      chat_completion_model_name: "llama2"
     }.freeze
     # Initialize the Ollama client
     # @param url [String] The URL of the Ollama instance
-    def initialize(url:)
+    # @param default_options [Hash] The default options to use
+    #
+    def initialize(url:, default_options: {})
+      depends_on "faraday"
       @url = url
+      @defaults = DEFAULTS.merge(default_options)
     end
     #
@@ -27,32 +32,128 @@ module Langchain::LLM
     #
     # @param prompt [String] The prompt to complete
     # @param model [String] The model to use
-    # @param options [Hash] The options to use (https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values)
+    #   For a list of valid parameters and values, see:
+    #   https://github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
     # @return [Langchain::LLM::OllamaResponse] Response object
     #
-    def complete(prompt:, model: nil, **options)
-      response = +""
+    def complete(
+      prompt:,
+      model: defaults[:completion_model_name],
+      images: nil,
+      format: nil,
+      system: nil,
+      template: nil,
+      context: nil,
+      stream: nil,
+      raw: nil,
+      mirostat: nil,
+      mirostat_eta: nil,
+      mirostat_tau: nil,
+      num_ctx: nil,
+      num_gqa: nil,
+      num_gpu: nil,
+      num_thread: nil,
+      repeat_last_n: nil,
+      repeat_penalty: nil,
+      temperature: defaults[:temperature],
+      seed: nil,
+      stop: nil,
+      tfs_z: nil,
+      num_predict: nil,
+      top_k: nil,
+      top_p: nil,
+      stop_sequences: nil,
+      &block
+    )
+      if stop_sequences
+        stop = stop_sequences
+      end
-      model_name = model || DEFAULTS[:completion_model_name]
+      parameters = {
+        prompt: prompt,
+        model: model,
+        images: images,
+        format: format,
+        system: system,
+        template: template,
+        context: context,
+        stream: stream,
+        raw: raw
+      }.compact
+      llm_parameters = {
+        mirostat: mirostat,
+        mirostat_eta: mirostat_eta,
+        mirostat_tau: mirostat_tau,
+        num_ctx: num_ctx,
+        num_gqa: num_gqa,
+        num_gpu: num_gpu,
+        num_thread: num_thread,
+        repeat_last_n: repeat_last_n,
+        repeat_penalty: repeat_penalty,
+        temperature: temperature,
+        seed: seed,
+        stop: stop,
+        tfs_z: tfs_z,
+        num_predict: num_predict,
+        top_k: top_k,
+        top_p: top_p
+      }
+      parameters[:options] = llm_parameters.compact
+      response = ""
       client.post("api/generate") do |req|
-        req.body = {}
-        req.body["prompt"] = prompt
-        req.body["model"] = model_name
-        req.body["options"] = options if options.any?
+        req.body = parameters
-        # TODO: Implement streaming support when a &block is passed in
         req.options.on_data = proc do |chunk, size|
           json_chunk = JSON.parse(chunk)
-          unless json_chunk.dig("done")
-            response.to_s << JSON.parse(chunk).dig("response")
-          end
+          response += json_chunk.dig("response")
+          yield json_chunk, size if block
         end
       end
-      Langchain::LLM::OllamaResponse.new(response, model: model_name)
+      Langchain::LLM::OllamaResponse.new(response, model: parameters[:model])
+    end
+    # Generate a chat completion
+    #
+    # @param model [String] Model name
+    # @param messages [Array<Hash>] Array of messages
+    # @param format [String] Format to return a response in. Currently the only accepted value is `json`
+    # @param temperature [Float] The temperature to use
+    # @param template [String] The prompt template to use (overrides what is defined in the `Modelfile`)
+    # @param stream [Boolean] Streaming the response. If false the response will be returned as a single response object, rather than a stream of objects
+    #
+    # The message object has the following fields:
+    #   role: the role of the message, either system, user or assistant
+    #   content: the content of the message
+    #   images (optional): a list of images to include in the message (for multimodal models such as llava)
+    def chat(
+      model: defaults[:chat_completion_model_name],
+      messages: [],
+      format: nil,
+      temperature: defaults[:temperature],
+      template: nil,
+      stream: false # TODO: Fix streaming.
+    )
+      parameters = {
+        model: model,
+        messages: messages,
+        format: format,
+        temperature: temperature,
+        template: template,
+        stream: stream
+      }.compact
+      response = client.post("api/chat") do |req|
+        req.body = parameters
+      end
+      Langchain::LLM::OllamaResponse.new(response.body, model: parameters[:model])
     end
     #
@@ -63,18 +164,57 @@ module Langchain::LLM
     # @param options [Hash] The options to use
     # @return [Langchain::LLM::OllamaResponse] Response object
     #
-    def embed(text:, model: nil, **options)
-      model_name = model || DEFAULTS[:embeddings_model_name]
+    def embed(
+      text:,
+      model: defaults[:embeddings_model_name],
+      mirostat: nil,
+      mirostat_eta: nil,
+      mirostat_tau: nil,
+      num_ctx: nil,
+      num_gqa: nil,
+      num_gpu: nil,
+      num_thread: nil,
+      repeat_last_n: nil,
+      repeat_penalty: nil,
+      temperature: defaults[:temperature],
+      seed: nil,
+      stop: nil,
+      tfs_z: nil,
+      num_predict: nil,
+      top_k: nil,
+      top_p: nil
+    )
+      parameters = {
+        prompt: text,
+        model: model
+      }.compact
+      llm_parameters = {
+        mirostat: mirostat,
+        mirostat_eta: mirostat_eta,
+        mirostat_tau: mirostat_tau,
+        num_ctx: num_ctx,
+        num_gqa: num_gqa,
+        num_gpu: num_gpu,
+        num_thread: num_thread,
+        repeat_last_n: repeat_last_n,
+        repeat_penalty: repeat_penalty,
+        temperature: temperature,
+        seed: seed,
+        stop: stop,
+        tfs_z: tfs_z,
+        num_predict: num_predict,
+        top_k: top_k,
+        top_p: top_p
+      }
+      parameters[:options] = llm_parameters.compact
       response = client.post("api/embeddings") do |req|
-        req.body = {}
-        req.body["prompt"] = text
-        req.body["model"] = model_name
-        req.body["options"] = options if options.any?
+        req.body = parameters
       end
-      Langchain::LLM::OllamaResponse.new(response.body, model: model_name)
+      Langchain::LLM::OllamaResponse.new(response.body, model: parameters[:model])
     end
     private

data/lib/langchain/llm/response/base_response.rb CHANGED Viewed

@@ -13,6 +13,13 @@ module Langchain
         @model = model
       end
+      # Returns the timestamp when the response was created
+      #
+      # @return [Time]
+      def created_at
+        raise NotImplementedError
+      end
       # Returns the completion text
       #
       # @return [String]

data/lib/langchain/llm/response/ollama_response.rb CHANGED Viewed

@@ -7,6 +7,16 @@ module Langchain::LLM
       super(raw_response, model: model)
     end
+    def created_at
+      if raw_response.dig("created_at")
+        Time.parse(raw_response.dig("created_at"))
+      end
+    end
+    def chat_completion
+      raw_response.dig("message", "content")
+    end
     def completion
       completions.first
     end
@@ -26,5 +36,13 @@ module Langchain::LLM
     def role
       "assistant"
     end
+    def prompt_tokens
+      raw_response.dig("prompt_eval_count")
+    end
+    def completion_tokens
+      raw_response.dig("eval_count")
+    end
   end
 end

data/lib/langchain/utils/token_length/openai_validator.rb CHANGED Viewed

@@ -19,10 +19,14 @@ module Langchain
           "gpt-3.5-turbo-1106" => 4096
         }
+        # NOTE: The gpt-4-turbo-preview is an alias that will always point to the latest GPT 4 Turbo preview
+        #   the future previews may have a different token limit!
         TOKEN_LIMITS = {
           # Source:
           # https://platform.openai.com/docs/api-reference/embeddings
           # https://platform.openai.com/docs/models/gpt-4
+          "text-embedding-3-large" => 8191,
+          "text-embedding-3-small" => 8191,
           "text-embedding-ada-002" => 8191,
           "gpt-3.5-turbo" => 4096,
           "gpt-3.5-turbo-0301" => 4096,
@@ -40,6 +44,8 @@ module Langchain
           "gpt-4-32k-0314" => 32768,
           "gpt-4-32k-0613" => 32768,
           "gpt-4-1106-preview" => 128000,
+          "gpt-4-turbo-preview" => 128000,
+          "gpt-4-0125-preview" => 128000,
           "gpt-4-vision-preview" => 128000,
           "text-curie-001" => 2049,
           "text-babbage-001" => 2049,
@@ -58,6 +64,11 @@ module Langchain
         # @return [Integer] The token length of the text
         #
         def self.token_length(text, model_name, options = {})
+          # tiktoken-ruby doesn't support text-embedding-3-large or text-embedding-3-small yet
+          if ["text-embedding-3-large", "text-embedding-3-small"].include?(model_name)
+            model_name = "text-embedding-ada-002"
+          end
           encoder = Tiktoken.encoding_for_model(model_name)
           encoder.encode(text).length
         end

data/lib/langchain/utils/token_length/token_limit_exceeded.rb CHANGED Viewed

@@ -7,7 +7,7 @@ module Langchain
         attr_reader :token_overflow
         def initialize(message = "", token_overflow = 0)
-          super message
+          super(message)
           @token_overflow = token_overflow
         end

data/lib/langchain/vectorsearch/pinecone.rb CHANGED Viewed

@@ -17,12 +17,13 @@ module Langchain::Vectorsearch
     # @param api_key [String] The API key to use
     # @param index_name [String] The name of the index to use
     # @param llm [Object] The LLM client to use
-    def initialize(environment:, api_key:, index_name:, llm:)
+    def initialize(environment:, api_key:, index_name:, llm:, base_uri: nil)
       depends_on "pinecone"
       ::Pinecone.configure do |config|
         config.api_key = api_key
         config.environment = environment
+        config.base_uri = base_uri if base_uri
       end
       @client = ::Pinecone::Client.new

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.9.0"
+  VERSION = "0.9.1"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.9.0
+  version: 0.9.1
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2024-01-17 00:00:00.000000000 Z
+date: 2024-02-06 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: baran
@@ -576,14 +576,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.8.9
+        version: 0.8.10
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.8.9
+        version: 0.8.10
 - !ruby/object:Gem::Dependency
   name: wikipedia-client
   requirement: !ruby/object:Gem::Requirement
@@ -598,6 +598,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 1.17.0
+- !ruby/object:Gem::Dependency
+  name: faraday
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 description: Build LLM-backed Ruby applications with Ruby's LangChain
 email:
 - andrei.bondarev13@gmail.com