RubyGems - langchainrb - Versions diffs - 0.5.1 → 0.5.2 - Mend

langchainrb 0.5.1 → 0.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +4 -0
data/Gemfile.lock +1 -1
data/README.md +2 -3
data/examples/create_and_manage_few_shot_prompt_templates.rb +3 -3
data/examples/create_and_manage_prompt_templates.rb +8 -4
data/examples/pdf_store_and_query_with_chroma.rb +1 -1
data/examples/store_and_query_with_pinecone.rb +2 -2
data/examples/store_and_query_with_qdrant.rb +1 -1
data/examples/store_and_query_with_weaviate.rb +1 -1
data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb +2 -5
data/lib/langchain/agent/sql_query_agent/sql_query_agent.rb +3 -3
data/lib/langchain/llm/openai.rb +4 -11
data/lib/langchain/tool/calculator.rb +2 -1
data/lib/langchain/utils/token_length_validator.rb +35 -8
data/lib/langchain/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 2673339c5bbe874a8bdf1722a2556f26d9fe13394875af914b5203632714f2f0
-  data.tar.gz: 216ab880c2c6094b267cbf3efcaf19ce74bea7cc665442fbf2b23108a9cb087b
+  metadata.gz: d36de4206b792714ba9b6773c03272e9638b14caf7140e0bc00c3e767aa5fdef
+  data.tar.gz: 819fab9de55a34e4e6dc865febc19bb9979df55fa8fc6a753774cf1961c40103
 SHA512:
-  metadata.gz: 408cf6194d85a4af076adbfd8be4a360d094200d127672f218d8914fbcd67d1a8a803645219532f66d4e79214571d61b629570df071de871b013e2d9d6c0d3a5
-  data.tar.gz: 123016bd42d1d2539c13f7d68074ddc19dc8a5880ae0b02b103e20bf7f058adfe2659beb90263a852bbf38b4b169622a1b7ac8a245c3791ab9b9ae8f8fc4e3cb
+  metadata.gz: 6e180b41bbca96bd5523c276923f223bbebe470314086c6a909df440890793bcc70dbd66ecf59bf5d0fd52426650cc5d2684c56cc8fc643209cc1679527cbef4
+  data.tar.gz: af5db76c2b22b5c7bdc1170de437921e8464a16566f46a5cad465d69e6da47c97a82f7331a5ea5747840e58acc71463aa8456b03e9bc8851efda7b734e5d23cc

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,9 @@
 ## [Unreleased]
+## [0.5.2] - 2023-06-07
+- 🗣️ LLMs
+  - Auto-calculate the max_tokens: setting to be passed on to OpenAI
 ## [0.5.1] - 2023-06-06
 - 🛠️ Tools
   - Modified Tool usage. Agents now accept Tools instances instead of Tool strings.

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    langchainrb (0.5.1)
+    langchainrb (0.5.2)
       colorize (~> 0.8.1)
       tiktoken_ruby (~> 0.0.5)

data/README.md CHANGED Viewed

@@ -281,11 +281,10 @@ Add `gem "sequel"` to your Gemfile
 ```ruby
 database = Langchain::Tool::Database.new(connection_string: "postgres://user:password@localhost:5432/db_name")
-agent = Langchain::Agent::SQLQueryAgent.new(llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]), tools: [database])
+agent = Langchain::Agent::SQLQueryAgent.new(llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]), db: database)
 ```
 ```ruby
-agent.ask(question: "How many users have a name with length greater than 5 in the users table?")
+agent.run(question: "How many users have a name with length greater than 5 in the users table?")
 #=> "14 users have a name with length greater than 5 in the users table."
 ```

data/examples/create_and_manage_few_shot_prompt_templates.rb CHANGED Viewed

@@ -1,10 +1,10 @@
 require "langchain"
 # Create a prompt with a few shot examples
-prompt = Prompt::FewShotPromptTemplate.new(
+prompt = Langchain::Prompt::FewShotPromptTemplate.new(
   prefix: "Write antonyms for the following words.",
   suffix: "Input: {adjective}\nOutput:",
-  example_prompt: Prompt::PromptTemplate.new(
+  example_prompt: Langchain::Prompt::PromptTemplate.new(
     input_variables: ["input", "output"],
     template: "Input: {input}\nOutput: {output}"
   ),
@@ -32,5 +32,5 @@ prompt.format(adjective: "good")
 prompt.save(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
 # Loading a new prompt template using a JSON file
-prompt = Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
+prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
 prompt.prefix # "Write antonyms for the following words."

data/examples/create_and_manage_prompt_templates.rb CHANGED Viewed

@@ -1,15 +1,15 @@
 require "langchain"
 # Create a prompt with one input variable
-prompt = Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke.", input_variables: ["adjective"])
+prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke.", input_variables: ["adjective"])
 prompt.format(adjective: "funny") # "Tell me a funny joke."
 # Create a prompt with multiple input variables
-prompt = Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke about {content}.", input_variables: ["adjective", "content"])
+prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke about {content}.", input_variables: ["adjective", "content"])
 prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
 # Creating a PromptTemplate using just a prompt and no input_variables
-prompt = Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
+prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
 prompt.input_variables # ["adjective", "content"]
 prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
@@ -17,5 +17,9 @@ prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke a
 prompt.save(file_path: "spec/fixtures/prompt/prompt_template.json")
 # Loading a new prompt template using a JSON file
-prompt = Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.json")
+prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.json")
+prompt.input_variables # ["adjective", "content"]
+# Loading a new prompt template using a YAML file
+prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.yaml")
 prompt.input_variables # ["adjective", "content"]

data/examples/pdf_store_and_query_with_chroma.rb CHANGED Viewed

@@ -4,7 +4,7 @@ require "langchain"
 # or add `gem "chroma-db", "~> 0.3.0"` to your Gemfile
 # Instantiate the Chroma client
-chroma = Vectorsearch::Chroma.new(
+chroma = Langchain::Vectorsearch::Chroma.new(
   url: ENV["CHROMA_URL"],
   index_name: "documents",
   llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])

data/examples/store_and_query_with_pinecone.rb CHANGED Viewed

@@ -4,7 +4,7 @@ require "langchain"
 # or add `gem "pinecone"` to your Gemfile
 # Instantiate the Qdrant client
-pinecone = Vectorsearch::Pinecone.new(
+pinecone = Langchain::Vectorsearch::Pinecone.new(
   environment: ENV["PINECONE_ENVIRONMENT"],
   api_key: ENV["PINECONE_API_KEY"],
   index_name: "recipes",
@@ -37,7 +37,7 @@ pinecone.ask(
 )
 # Generate your an embedding and search by it
-openai = LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
+openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
 embedding = openai.embed(text: "veggie")
 pinecone.similarity_search_by_vector(

data/examples/store_and_query_with_qdrant.rb CHANGED Viewed

@@ -4,7 +4,7 @@ require "langchain"
 # or add `gem "qdrant-ruby"` to your Gemfile
 # Instantiate the Qdrant client
-qdrant = Vectorsearch::Qdrant.new(
+qdrant = Langchain::Vectorsearch::Qdrant.new(
   url: ENV["QDRANT_URL"],
   api_key: ENV["QDRANT_API_KEY"],
   index_name: "recipes",

data/examples/store_and_query_with_weaviate.rb CHANGED Viewed

@@ -4,7 +4,7 @@ require "langchain"
 # or add `gem "weaviate-ruby"` to your Gemfile
 # Instantiate the Weaviate client
-weaviate = Vectorsearch::Weaviate.new(
+weaviate = Langchain::Vectorsearch::Weaviate.new(
   url: ENV["WEAVIATE_URL"],
   api_key: ENV["WEAVIATE_API_KEY"],
   index_name: "Recipes",

data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb CHANGED Viewed

@@ -39,11 +39,8 @@ module Langchain::Agent
       loop do
         Langchain.logger.info("[#{self.class.name}]".red + ": Sending the prompt to the #{llm.class} LLM")
-        response = llm.complete(
-          prompt: prompt,
-          stop_sequences: ["Observation:"],
-          max_tokens: 500
-        )
+        response = llm.complete(prompt: prompt, stop_sequences: ["Observation:"])
         # Append the response to the prompt
         prompt += response

data/lib/langchain/agent/sql_query_agent/sql_query_agent.rb CHANGED Viewed

@@ -22,12 +22,12 @@ module Langchain::Agent
     # @param question [String] Question to ask the LLM/Database
     # @return [String] Answer to the question
     #
-    def ask(question:)
+    def run(question:)
       prompt = create_prompt_for_sql(question: question)
       # Get the SQL string to execute
       Langchain.logger.info("[#{self.class.name}]".red + ":  Passing the inital prompt to the #{llm.class} LLM")
-      sql_string = llm.complete(prompt: prompt, max_tokens: 500)
+      sql_string = llm.complete(prompt: prompt)
       # Execute the SQL string and collect the results
       Langchain.logger.info("[#{self.class.name}]".red + ":  Passing the SQL to the Database: #{sql_string}")
@@ -36,7 +36,7 @@ module Langchain::Agent
       # Pass the results and get the LLM to synthesize the answer to the question
       Langchain.logger.info("[#{self.class.name}]".red + ":  Passing the synthesize prompt to the #{llm.class} LLM with results: #{results}")
       prompt2 = create_prompt_for_answer(question: question, sql_query: sql_string, results: results)
-      llm.complete(prompt: prompt2, max_tokens: 500)
+      llm.complete(prompt: prompt2)
     end
     private

data/lib/langchain/llm/openai.rb CHANGED Viewed

@@ -35,7 +35,7 @@ module Langchain::LLM
     def embed(text:, **params)
       parameters = {model: DEFAULTS[:embeddings_model_name], input: text}
-      Langchain::Utils::TokenLengthValidator.validate!(text, parameters[:model])
+      Langchain::Utils::TokenLengthValidator.validate_max_tokens!(text, parameters[:model])
       response = client.embeddings(parameters: parameters.merge(params))
       response.dig("data").first.dig("embedding")
@@ -50,9 +50,8 @@ module Langchain::LLM
     def complete(prompt:, **params)
       parameters = compose_parameters DEFAULTS[:completion_model_name], params
-      Langchain::Utils::TokenLengthValidator.validate!(prompt, parameters[:model])
       parameters[:prompt] = prompt
+      parameters[:max_tokens] = Langchain::Utils::TokenLengthValidator.validate_max_tokens!(prompt, parameters[:model])
       response = client.completions(parameters: parameters)
       response.dig("choices", 0, "text")
@@ -67,9 +66,8 @@ module Langchain::LLM
     def chat(prompt:, **params)
       parameters = compose_parameters DEFAULTS[:chat_completion_model_name], params
-      Langchain::Utils::TokenLengthValidator.validate!(prompt, parameters[:model])
       parameters[:messages] = [{role: "user", content: prompt}]
+      parameters[:max_tokens] = Langchain::Utils::TokenLengthValidator.validate_max_tokens!(prompt, parameters[:model])
       response = client.chat(parameters: parameters)
       response.dig("choices", 0, "message", "content")
@@ -87,12 +85,7 @@ module Langchain::LLM
       )
       prompt = prompt_template.format(text: text)
-      complete(
-        prompt: prompt,
-        temperature: DEFAULTS[:temperature],
-        # Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
-        max_tokens: 2048
-      )
+      complete(prompt: prompt, temperature: DEFAULTS[:temperature])
     end
     private

data/lib/langchain/tool/calculator.rb CHANGED Viewed

@@ -38,7 +38,8 @@ module Langchain::Tool
       hash_results = Langchain::Tool::SerpApi
         .new(api_key: ENV["SERPAPI_API_KEY"])
         .execute_search(input: input)
-      hash_results.dig(:answer_box, :to)
+      hash_results.dig(:answer_box, :to) ||
+        hash_results.dig(:answer_box, :result)
     end
   end
 end

data/lib/langchain/utils/token_length_validator.rb CHANGED Viewed

@@ -34,23 +34,50 @@ module Langchain
         "ada" => 2049
       }.freeze
+      # GOOGLE_PALM_TOKEN_LIMITS = {
+      #   "chat-bison-001" => {
+      #     "inputTokenLimit"=>4096,
+      #     "outputTokenLimit"=>1024
+      #   },
+      #   "text-bison-001" => {
+      #     "inputTokenLimit"=>8196,
+      #     "outputTokenLimit"=>1024
+      #   },
+      #   "embedding-gecko-001" => {
+      #     "inputTokenLimit"=>1024
+      #   }
+      # }.freeze
       #
-      # Validate the length of the text passed in to OpenAI's API
+      # Calculate the `max_tokens:` parameter to be set by calculating the context length of the text minus the prompt length
       #
       # @param text [String] The text to validate
       # @param model_name [String] The model name to validate against
-      # @return [Boolean] Whether the text is valid or not
+      # @return [Integer] Whether the text is valid or not
       # @raise [TokenLimitExceeded] If the text is too long
       #
-      def self.validate!(text, model_name)
-        encoder = Tiktoken.encoding_for_model(model_name)
-        token_length = encoder.encode(text).length
+      def self.validate_max_tokens!(text, model_name)
+        text_token_length = token_length(text, model_name)
+        max_tokens = TOKEN_LIMITS[model_name] - text_token_length
-        if token_length > TOKEN_LIMITS[model_name]
-          raise TokenLimitExceeded, "This model's maximum context length is #{TOKEN_LIMITS[model_name]} tokens, but the given text is #{token_length} tokens long."
+        # Raise an error even if whole prompt is equal to the model's token limit (max_tokens == 0) since not response will be returned
+        if max_tokens <= 0
+          raise TokenLimitExceeded, "This model's maximum context length is #{TOKEN_LIMITS[model_name]} tokens, but the given text is #{text_token_length} tokens long."
         end
-        true
+        max_tokens
+      end
+      #
+      # Calculate token length for a given text and model name
+      #
+      # @param text [String] The text to validate
+      # @param model_name [String] The model name to validate against
+      # @return [Integer] The token length of the text
+      #
+      def self.token_length(text, model_name)
+        encoder = Tiktoken.encoding_for_model(model_name)
+        encoder.encode(text).length
       end
     end
   end

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.5.1"
+  VERSION = "0.5.2"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.5.1
+  version: 0.5.2
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-06-06 00:00:00.000000000 Z
+date: 2023-06-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: tiktoken_ruby