RubyGems - langchainrb - Versions diffs - 0.3.15 → 0.4.0 - Mend

langchainrb 0.3.15 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/CHANGELOG.md +5 -0
data/Gemfile.lock +8 -1
data/README.md +20 -20
data/lib/{agent → langchain/agent}/base.rb +1 -1
data/lib/{agent → langchain/agent}/chain_of_thought_agent/chain_of_thought_agent.rb +10 -10
data/lib/{llm → langchain/llm}/base.rb +3 -3
data/lib/{llm → langchain/llm}/cohere.rb +1 -1
data/lib/{llm → langchain/llm}/google_palm.rb +3 -3
data/lib/{llm → langchain/llm}/hugging_face.rb +1 -1
data/lib/{llm → langchain/llm}/openai.rb +18 -6
data/lib/{llm → langchain/llm}/replicate.rb +3 -3
data/lib/{prompt → langchain/prompt}/base.rb +2 -2
data/lib/{prompt → langchain/prompt}/few_shot_prompt_template.rb +1 -1
data/lib/{prompt → langchain/prompt}/loading.rb +3 -3
data/lib/{prompt → langchain/prompt}/prompt_template.rb +1 -1
data/lib/{tool → langchain/tool}/base.rb +5 -5
data/lib/{tool → langchain/tool}/calculator.rb +2 -2
data/lib/{tool → langchain/tool}/serp_api.rb +1 -1
data/lib/{tool → langchain/tool}/wikipedia.rb +1 -1
data/lib/langchain/utils/token_length_validator.rb +57 -0
data/lib/{vectorsearch → langchain/vectorsearch}/base.rb +5 -5
data/lib/{vectorsearch → langchain/vectorsearch}/chroma.rb +1 -1
data/lib/{vectorsearch → langchain/vectorsearch}/milvus.rb +1 -1
data/lib/{vectorsearch → langchain/vectorsearch}/pgvector.rb +15 -4
data/lib/{vectorsearch → langchain/vectorsearch}/pinecone.rb +1 -1
data/lib/{vectorsearch → langchain/vectorsearch}/qdrant.rb +1 -1
data/lib/{vectorsearch → langchain/vectorsearch}/weaviate.rb +1 -1
data/lib/langchain.rb +39 -34
data/lib/version.rb +1 -1
metadata +43 -28
/data/lib/{agent → langchain/agent}/chain_of_thought_agent/chain_of_thought_agent_prompt.json +0 -0
/data/lib/{llm → langchain/llm}/prompts/summarize_template.json +0 -0

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 4f855e3c0e1f0d7b59e0255004a1a806c7048da6d3fe0a8ddf10be68e36ed9ba
-  data.tar.gz: 5758c90205c3e2bea420cf7fa0dec07638917beced60bd0482e1d803ced96c07
+  metadata.gz: ce8728ec2208577809174e154642db161121cb9dd49e0ec5d190d080e68b1d78
+  data.tar.gz: bb0e0ccc4558ca849549f495a4adfacc5f7851c786869974afdaef29f0cde3ca
 SHA512:
-  metadata.gz: ec26f8c4257a6949d829d7f68d2175943b80c2837739bdbead8e6b61891a6738cf84ec1934caff777b63dee39f3d0111e8f29a81c37c125be1cd3ae8137b6968
-  data.tar.gz: 31bb1aa0296dbbc8e1a1c6c2bc7236c92bf3e627935c752bd527c7f7d769da69f11c6bb63de6e0b4e0d1d8d0c739744045669220c35e253ae83eaa0e23482e6a
+  metadata.gz: b2b4c27e31d730563aeca70a0aa3c4cf129e69773e34f397ba057faa8298a4368c1b9f66f925188f867f1feb47b4e07f77df702fa7c6cb76ad1e1a8464b895f6
+  data.tar.gz: 55dd3fbc21e2cdf9bd84afcd6bb4de0f72c960dec0c6b1d2efff1f9492b3d5c7399f2d14c323597045e64eafb6f2f20992348d640317c64721fb0556f8a64126

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,10 @@
 ## [Unreleased]
+## [0.4.0] - 2023-06-01
+- [BREAKING] Everything is namespaced under `Langchain::` now
+- Pgvector similarity search uses the cosine distance by default now
+- OpenAI token length validation using tiktoken_ruby
 ## [0.3.15] - 2023-05-30
 - Drop Ruby 2.7 support. It had reached EOD.
 - Bump pgvector-ruby to 0.2

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,8 @@
 PATH
   remote: .
   specs:
-    langchainrb (0.3.15)
+    langchainrb (0.4.0)
+      tiktoken_ruby (~> 0.0.5)
 GEM
   remote: https://rubygems.org/
@@ -205,6 +206,7 @@ GEM
       zeitwerk (~> 2.5)
     rainbow (3.1.1)
     rake (13.0.6)
+    rb_sys (0.9.78)
     regexp_parser (2.8.0)
     replicate-ruby (0.2.2)
       addressable
@@ -262,6 +264,11 @@ GEM
     standardrb (1.0.1)
       standard
     thor (1.2.1)
+    tiktoken_ruby (0.0.5)
+      rb_sys (~> 0.9.68)
+    tiktoken_ruby (0.0.5-arm64-darwin)
+    tiktoken_ruby (0.0.5-x86_64-darwin)
+    tiktoken_ruby (0.0.5-x86_64-linux)
     treetop (1.6.12)
       polyglot (~> 0.3)
     ttfunk (1.7.0)

data/README.md CHANGED Viewed

@@ -44,7 +44,7 @@ Add `gem "weaviate-ruby", "~> 0.8.0"`  to your Gemfile.
 Pick the vector search database you'll be using and instantiate the client:
 ```ruby
-client = Vectorsearch::Weaviate.new(
+client = Langchain::Vectorsearch::Weaviate.new(
     url: ENV["WEAVIATE_URL"],
     api_key: ENV["WEAVIATE_API_KEY"],
     llm: :openai, # or :cohere
@@ -52,10 +52,10 @@ client = Vectorsearch::Weaviate.new(
 )
 # You can instantiate any other supported vector search database:
-client = Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.0"`
-client = Vectorsearch::Qdrant.new(...) # `gem"qdrant-ruby", "~> 0.9.0"`
-client = Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
-client = Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.3.0"`
+client = Langchain::Vectorsearch::Milvus.new(...) # `gem "milvus", "~> 0.9.0"`
+client = Langchain::Vectorsearch::Qdrant.new(...) # `gem"qdrant-ruby", "~> 0.9.0"`
+client = Langchain::Vectorsearch::Pinecone.new(...) # `gem "pinecone", "~> 0.1.6"`
+client = Langchain::Vectorsearch::Chroma.new(...) # `gem "chroma-db", "~> 0.3.0"`
 ```
 ```ruby
@@ -107,7 +107,7 @@ Add `gem "ruby-openai", "~> 4.0.0"` to your Gemfile.
 #### OpenAI
 ```ruby
-openai = LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
+openai = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
 ```
 ```ruby
 openai.embed(text: "foo bar")
@@ -120,7 +120,7 @@ openai.complete(prompt: "What is the meaning of life?")
 Add `gem "cohere-ruby", "~> 0.9.3"` to your Gemfile.
 ```ruby
-cohere = LLM::Cohere.new(api_key: ENV["COHERE_API_KEY"])
+cohere = Langchain::LLM::Cohere.new(api_key: ENV["COHERE_API_KEY"])
 ```
 ```ruby
 cohere.embed(text: "foo bar")
@@ -132,19 +132,19 @@ cohere.complete(prompt: "What is the meaning of life?")
 #### HuggingFace
 Add `gem "hugging-face", "~> 0.3.2"` to your Gemfile.
 ```ruby
-cohere = LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
+cohere = Langchain::LLM::HuggingFace.new(api_key: ENV["HUGGING_FACE_API_KEY"])
 ```
 #### Replicate
 Add `gem "replicate-ruby", "~> 0.2.2"` to your Gemfile.
 ```ruby
-cohere = LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
+cohere = Langchain::LLM::Replicate.new(api_key: ENV["REPLICATE_API_KEY"])
 ```
 #### Google PaLM (Pathways Language Model)
 Add `"google_palm_api", "~> 0.1.0"` to your Gemfile.
 ```ruby
-google_palm = LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
+google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
 ```
 ### Using Prompts 📋
@@ -154,21 +154,21 @@ google_palm = LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
 Create a prompt with one input variable:
 ```ruby
-prompt = Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke.", input_variables: ["adjective"])
+prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke.", input_variables: ["adjective"])
 prompt.format(adjective: "funny") # "Tell me a funny joke."
 ```
 Create a prompt with multiple input variables:
 ```ruby
-prompt = Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke about {content}.", input_variables: ["adjective", "content"])
+prompt = Langchain::Prompt::PromptTemplate.new(template: "Tell me a {adjective} joke about {content}.", input_variables: ["adjective", "content"])
 prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
 ```
 Creating a PromptTemplate using just a prompt and no input_variables:
 ```ruby
-prompt = Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
+prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
 prompt.input_variables # ["adjective", "content"]
 prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
 ```
@@ -182,7 +182,7 @@ prompt.save(file_path: "spec/fixtures/prompt/prompt_template.json")
 Loading a new prompt template using a JSON file:
 ```ruby
-prompt = Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.json")
+prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.json")
 prompt.input_variables # ["adjective", "content"]
 ```
@@ -191,10 +191,10 @@ prompt.input_variables # ["adjective", "content"]
 Create a prompt with a few shot examples:
 ```ruby
-prompt = Prompt::FewShotPromptTemplate.new(
+prompt = Langchain::Prompt::FewShotPromptTemplate.new(
   prefix: "Write antonyms for the following words.",
   suffix: "Input: {adjective}\nOutput:",
-  example_prompt: Prompt::PromptTemplate.new(
+  example_prompt: Langchain::Prompt::PromptTemplate.new(
     input_variables: ["input", "output"],
     template: "Input: {input}\nOutput: {output}"
   ),
@@ -228,7 +228,7 @@ prompt.save(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
 Loading a new prompt template using a JSON file:
 ```ruby
-prompt = Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
+prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_shot_prompt_template.json")
 prompt.prefix # "Write antonyms for the following words."
 ```
@@ -237,10 +237,10 @@ Agents are semi-autonomous bots that can respond to user questions and use avail
 #### Chain-of-Thought Agent
-Add `gem "openai-ruby"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
+Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
 ```ruby
-agent = Agent::ChainOfThoughtAgent.new(llm: :openai, llm_api_key: ENV["OPENAI_API_KEY"], tools: ['search', 'calculator'])
+agent = Langchain::Agent::ChainOfThoughtAgent.new(llm: :openai, llm_api_key: ENV["OPENAI_API_KEY"], tools: ['search', 'calculator'])
 agent.tools
 # => ["search", "calculator"]
@@ -273,7 +273,7 @@ Need to read data from various sources? Load it up.
 Just call `Langchan::Loader.load` with the path to the file or a URL you want to load.
 ```ruby
-Langchaing::Loader.load('/path/to/file.pdf')
+Langchain::Loader.load('/path/to/file.pdf')
 ```
 or

data/lib/{agent → langchain/agent}/base.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Agent
+module Langchain::Agent
   class Base
   end
 end

data/lib/{agent → langchain/agent}/chain_of_thought_agent/chain_of_thought_agent.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Agent
+module Langchain::Agent
   class ChainOfThoughtAgent < Base
     attr_reader :llm, :llm_api_key, :llm_client, :tools
@@ -11,14 +11,14 @@ module Agent
     # @param tools [Array] The tools to use
     # @return [ChainOfThoughtAgent] The Agent::ChainOfThoughtAgent instance
     def initialize(llm:, llm_api_key:, tools: [])
-      LLM::Base.validate_llm!(llm: llm)
-      Tool::Base.validate_tools!(tools: tools)
+      Langchain::LLM::Base.validate_llm!(llm: llm)
+      Langchain::Tool::Base.validate_tools!(tools: tools)
       @llm = llm
       @llm_api_key = llm_api_key
       @tools = tools
-      @llm_client = LLM.const_get(LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
+      @llm_client = Langchain::LLM.const_get(Langchain::LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
     end
     # Validate tools when they're re-assigned
@@ -26,7 +26,7 @@ module Agent
     # @param value [Array] The tools to use
     # @return [Array] The tools that will be used
     def tools=(value)
-      Tool::Base.validate_tools!(tools: value)
+      Langchain::Tool::Base.validate_tools!(tools: value)
       @tools = value
     end
@@ -62,8 +62,8 @@ module Agent
           Langchain.logger.info("Agent: Using the \"#{action}\" Tool with \"#{action_input}\"")
           # Retrieve the Tool::[ToolName] class and call `execute`` with action_input as the input
-          result = Tool
-            .const_get(Tool::Base::TOOLS[action.strip])
+          result = Langchain::Tool
+            .const_get(Langchain::Tool::Base::TOOLS[action.strip])
             .execute(input: action_input)
           # Append the Observation to the prompt
@@ -91,7 +91,7 @@ module Agent
         question: question,
         tool_names: "[#{tools.join(", ")}]",
         tools: tools.map do |tool|
-          "#{tool}: #{Tool.const_get(Tool::Base::TOOLS[tool]).const_get(:DESCRIPTION)}"
+          "#{tool}: #{Langchain::Tool.const_get(Langchain::Tool::Base::TOOLS[tool]).const_get(:DESCRIPTION)}"
         end.join("\n")
       )
     end
@@ -99,8 +99,8 @@ module Agent
     # Load the PromptTemplate from the JSON file
     # @return [PromptTemplate] PromptTemplate instance
     def prompt_template
-      @template ||= Prompt.load_from_path(
-        file_path: Langchain.root.join("agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json")
+      @template ||= Langchain::Prompt.load_from_path(
+        file_path: Langchain.root.join("langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json")
       )
     end
   end

data/lib/{llm → langchain/llm}/base.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module LLM
+module Langchain::LLM
   class Base
     attr_reader :client
@@ -42,8 +42,8 @@ module LLM
     # @param llm [Symbol] The LLM to use
     def self.validate_llm!(llm:)
       # TODO: Fix so this works when `llm` value is a string instead of a symbol
-      unless LLM::Base::LLMS.key?(llm)
-        raise ArgumentError, "LLM must be one of #{LLM::Base::LLMS.keys}"
+      unless Langchain::LLM::Base::LLMS.key?(llm)
+        raise ArgumentError, "LLM must be one of #{Langchain::LLM::Base::LLMS.keys}"
       end
     end
   end

data/lib/{llm → langchain/llm}/cohere.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module LLM
+module Langchain::LLM
   class Cohere < Base
     DEFAULTS = {
       temperature: 0.0,

data/lib/{llm → langchain/llm}/google_palm.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module LLM
+module Langchain::LLM
   class GooglePalm < Base
     # Wrapper around the Google PaLM (Pathways Language Model) APIs.
@@ -89,8 +89,8 @@ module LLM
     # @return [String] The summarization
     #
     def summarize(text:)
-      prompt_template = Prompt.load_from_path(
-        file_path: Langchain.root.join("llm/prompts/summarize_template.json")
+      prompt_template = Langchain::Prompt.load_from_path(
+        file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.json")
       )
       prompt = prompt_template.format(text: text)

data/lib/{llm → langchain/llm}/hugging_face.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module LLM
+module Langchain::LLM
   class HuggingFace < Base
     # The gem does not currently accept other models:
     # https://github.com/alchaplinsky/hugging-face/blob/main/lib/hugging_face/inference_api.rb#L32-L34

data/lib/{llm → langchain/llm}/openai.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module LLM
+module Langchain::LLM
   class OpenAI < Base
     DEFAULTS = {
       temperature: 0.0,
@@ -25,9 +25,13 @@ module LLM
     # @return [Array] The embedding
     #
     def embed(text:)
+      model = DEFAULTS[:embeddings_model_name]
+      Langchain::Utils::TokenLengthValidator.validate!(text, model)
       response = client.embeddings(
         parameters: {
-          model: DEFAULTS[:embeddings_model_name],
+          model: model,
           input: text
         }
       )
@@ -41,8 +45,12 @@ module LLM
     # @return [String] The completion
     #
     def complete(prompt:, **params)
+      model = DEFAULTS[:completion_model_name]
+      Langchain::Utils::TokenLengthValidator.validate!(prompt, model)
       default_params = {
-        model: DEFAULTS[:completion_model_name],
+        model: model,
         temperature: DEFAULTS[:temperature],
         prompt: prompt
       }
@@ -64,8 +72,12 @@ module LLM
     # @return [String] The chat completion
     #
     def chat(prompt:, **params)
+      model = DEFAULTS[:chat_completion_model_name]
+      Langchain::Utils::TokenLengthValidator.validate!(prompt, model)
       default_params = {
-        model: DEFAULTS[:chat_completion_model_name],
+        model: model,
         temperature: DEFAULTS[:temperature],
         # TODO: Figure out how to introduce persisted conversations
         messages: [{role: "user", content: prompt}]
@@ -88,8 +100,8 @@ module LLM
     # @return [String] The summary
     #
     def summarize(text:)
-      prompt_template = Prompt.load_from_path(
-        file_path: Langchain.root.join("llm/prompts/summarize_template.json")
+      prompt_template = Langchain::Prompt.load_from_path(
+        file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.json")
       )
       prompt = prompt_template.format(text: text)

data/lib/{llm → langchain/llm}/replicate.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module LLM
+module Langchain::LLM
   class Replicate < Base
     # Wrapper around Replicate.com LLM provider
     # Use it directly:
@@ -89,8 +89,8 @@ module LLM
     # @return [String] The summary
     #
     def summarize(text:)
-      prompt_template = Prompt.load_from_path(
-        file_path: Langchain.root.join("llm/prompts/summarize_template.json")
+      prompt_template = Langchain::Prompt.load_from_path(
+        file_path: Langchain.root.join("langchain/llm/prompts/summarize_template.json")
       )
       prompt = prompt_template.format(text: text)

data/lib/{prompt → langchain/prompt}/base.rb RENAMED Viewed

@@ -3,7 +3,7 @@
 require "strscan"
 require "json"
-module Prompt
+module Langchain::Prompt
   class Base
     def format(**kwargs)
       raise NotImplementedError
@@ -29,7 +29,7 @@ module Prompt
     #
     def validate(template:, input_variables:)
       input_variables_set = @input_variables.uniq
-      variables_from_template = Prompt::Base.extract_variables_from_template(template)
+      variables_from_template = Langchain::Prompt::Base.extract_variables_from_template(template)
       missing_variables = variables_from_template - input_variables_set
       extra_variables = input_variables_set - variables_from_template

data/lib/{prompt → langchain/prompt}/few_shot_prompt_template.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Prompt
+module Langchain::Prompt
   class FewShotPromptTemplate < Base
     attr_reader :examples, :example_prompt, :input_variables, :prefix, :suffix, :example_separator

data/lib/{prompt → langchain/prompt}/loading.rb RENAMED Viewed

@@ -3,10 +3,10 @@
 require "strscan"
 require "pathname"
-module Prompt
+module Langchain::Prompt
   TYPE_TO_LOADER = {
-    "prompt" => ->(config) { Prompt.load_prompt(config) },
-    "few_shot" => ->(config) { Prompt.load_few_shot_prompt(config) }
+    "prompt" => ->(config) { load_prompt(config) },
+    "few_shot" => ->(config) { load_few_shot_prompt(config) }
   }
   class << self

data/lib/{prompt → langchain/prompt}/prompt_template.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Prompt
+module Langchain::Prompt
   class PromptTemplate < Base
     attr_reader :template, :input_variables, :validate_template

data/lib/{tool → langchain/tool}/base.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Tool
+module Langchain::Tool
   class Base
     # How to add additional Tools?
     # 1. Create a new file in lib/tool/your_tool_name.rb
@@ -10,9 +10,9 @@ module Tool
     # 4. Add your tool to the README.md
     TOOLS = {
-      "calculator" => "Tool::Calculator",
-      "search" => "Tool::SerpApi",
-      "wikipedia" => "Tool::Wikipedia"
+      "calculator" => "Langchain::Tool::Calculator",
+      "search" => "Langchain::Tool::SerpApi",
+      "wikipedia" => "Langchain::Tool::Wikipedia"
     }
     def self.description(value)
@@ -40,7 +40,7 @@ module Tool
     # @raise [ArgumentError] If any of the tools are not supported
     #
     def self.validate_tools!(tools:)
-      unrecognized_tools = tools - Tool::Base::TOOLS.keys
+      unrecognized_tools = tools - Langchain::Tool::Base::TOOLS.keys
       if unrecognized_tools.any?
         raise ArgumentError, "Unrecognized Tools: #{unrecognized_tools}"

data/lib/{tool → langchain/tool}/calculator.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Tool
+module Langchain::Tool
   class Calculator < Base
     description <<~DESC
       Useful for getting the result of a math expression.
@@ -22,7 +22,7 @@ module Tool
     rescue Eqn::ParseError, Eqn::NoVariableValueError
       # Sometimes the input is not a pure math expression, e.g: "12F in Celsius"
       # We can use the google answer box to evaluate this expression
-      hash_results = Tool::SerpApi.execute_search(input: input)
+      hash_results = Langchain::Tool::SerpApi.execute_search(input: input)
       hash_results.dig(:answer_box, :to)
     end
   end

data/lib/{tool → langchain/tool}/serp_api.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Tool
+module Langchain::Tool
   class SerpApi < Base
     # Wrapper around SerpAPI
     # Set ENV["SERPAPI_API_KEY"] to use it

data/lib/{tool → langchain/tool}/wikipedia.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Tool
+module Langchain::Tool
   class Wikipedia < Base
     # Tool that adds the capability to search using the Wikipedia API

data/lib/langchain/utils/token_length_validator.rb ADDED Viewed

@@ -0,0 +1,57 @@
+# frozen_string_literal: true
+require "tiktoken_ruby"
+module Langchain
+  module Utils
+    class TokenLimitExceeded < StandardError; end
+    class TokenLengthValidator
+      #
+      # This class is meant to validate the length of the text passed in to OpenAI's API.
+      # It is used to validate the token length before the API call is made
+      #
+      TOKEN_LIMITS = {
+        # Source:
+        # https://platform.openai.com/docs/api-reference/embeddings
+        # https://platform.openai.com/docs/models/gpt-4
+        "text-embedding-ada-002" => 8191,
+        "gpt-3.5-turbo" => 4096,
+        "gpt-3.5-turbo-0301" => 4096,
+        "text-davinci-003" => 4097,
+        "text-davinci-002" => 4097,
+        "code-davinci-002" => 8001,
+        "gpt-4" => 8192,
+        "gpt-4-0314" => 8192,
+        "gpt-4-32k" => 32768,
+        "gpt-4-32k-0314" => 32768,
+        "text-curie-001" => 2049,
+        "text-babbage-001" => 2049,
+        "text-ada-001" => 2049,
+        "davinci" => 2049,
+        "curie" => 2049,
+        "babbage" => 2049,
+        "ada" => 2049
+      }.freeze
+      #
+      # Validate the length of the text passed in to OpenAI's API
+      #
+      # @param text [String] The text to validate
+      # @param model_name [String] The model name to validate against
+      # @return [Boolean] Whether the text is valid or not
+      # @raise [TokenLimitExceeded] If the text is too long
+      #
+      def self.validate!(text, model_name)
+        encoder = Tiktoken.encoding_for_model(model_name)
+        token_length = encoder.encode(text).length
+        if token_length > TOKEN_LIMITS[model_name]
+          raise TokenLimitExceeded, "This model's maximum context length is #{TOKEN_LIMITS[model_name]} tokens, but the given text is #{token_length} tokens long."
+        end
+        true
+      end
+    end
+  end
+end

data/lib/{vectorsearch → langchain/vectorsearch}/base.rb RENAMED Viewed

@@ -2,7 +2,7 @@
 require "forwardable"
-module Vectorsearch
+module Langchain::Vectorsearch
   class Base
     extend Forwardable
@@ -13,12 +13,12 @@ module Vectorsearch
     # @param llm [Symbol] The LLM to use
     # @param llm_api_key [String] The API key for the LLM
     def initialize(llm:, llm_api_key:)
-      LLM::Base.validate_llm!(llm: llm)
+      Langchain::LLM::Base.validate_llm!(llm: llm)
       @llm = llm
       @llm_api_key = llm_api_key
-      @llm_client = LLM.const_get(LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
+      @llm_client = Langchain::LLM.const_get(Langchain::LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
     end
     # Method supported by Vectorsearch DB to create a default schema
@@ -51,10 +51,10 @@ module Vectorsearch
       :default_dimension
     def generate_prompt(question:, context:)
-      prompt_template = Prompt::FewShotPromptTemplate.new(
+      prompt_template = Langchain::Prompt::FewShotPromptTemplate.new(
         prefix: "Context:",
         suffix: "---\nQuestion: {question}\n---\nAnswer:",
-        example_prompt: Prompt::PromptTemplate.new(
+        example_prompt: Langchain::Prompt::PromptTemplate.new(
           template: "{context}",
           input_variables: ["context"]
         ),

data/lib/{vectorsearch → langchain/vectorsearch}/chroma.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Vectorsearch
+module Langchain::Vectorsearch
   class Chroma < Base
     # Initialize the Chroma client
     # @param url [String] The URL of the Qdrant server

data/lib/{vectorsearch → langchain/vectorsearch}/milvus.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Vectorsearch
+module Langchain::Vectorsearch
   class Milvus < Base
     def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
       depends_on "milvus"

data/lib/{vectorsearch → langchain/vectorsearch}/pgvector.rb RENAMED Viewed

@@ -1,8 +1,17 @@
 # frozen_string_literal: true
-module Vectorsearch
+module Langchain::Vectorsearch
   # The PostgreSQL vector search adapter
   class Pgvector < Base
+    # The operators supported by the PostgreSQL vector search adapter
+    OPERATORS = {
+      "cosine_distance" => "<=>",
+      "euclidean_distance" => "<->"
+    }
+    DEFAULT_OPERATOR = "cosine_distance"
+    attr_reader :operator, :quoted_table_name
     # @param url [String] The URL of the PostgreSQL database
     # @param index_name [String] The name of the table to use for the index
     # @param llm [String] The URL of the Language Layer API
@@ -18,6 +27,8 @@ module Vectorsearch
       @client.type_map_for_results = PG::BasicTypeMapForResults.new(@client, registry: registry)
       @index_name = index_name
+      @quoted_table_name = @client.quote_ident(index_name)
+      @operator = OPERATORS[DEFAULT_OPERATOR]
       super(llm: llm, llm_api_key: llm_api_key)
     end
@@ -31,7 +42,7 @@ module Vectorsearch
       end
       values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
       client.exec_params(
-        "INSERT INTO #{@index_name} (content, vectors) VALUES #{values};",
+        "INSERT INTO #{quoted_table_name} (content, vectors) VALUES #{values};",
         data
       )
     end
@@ -42,7 +53,7 @@ module Vectorsearch
       client.exec("CREATE EXTENSION IF NOT EXISTS vector;")
       client.exec(
         <<~SQL
-          CREATE TABLE IF NOT EXISTS #{@index_name} (
+          CREATE TABLE IF NOT EXISTS #{quoted_table_name} (
             id serial PRIMARY KEY,
             content TEXT,
             vectors VECTOR(#{default_dimension})
@@ -73,7 +84,7 @@ module Vectorsearch
       result = client.transaction do |conn|
         conn.exec("SET LOCAL ivfflat.probes = 10;")
         query = <<~SQL
-          SELECT id, content FROM #{@index_name} ORDER BY vectors <-> $1 ASC LIMIT $2;
+          SELECT id, content FROM #{quoted_table_name} ORDER BY vectors #{operator} $1 ASC LIMIT $2;
         SQL
         conn.exec_params(query, [embedding, k])
       end

data/lib/{vectorsearch → langchain/vectorsearch}/pinecone.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Vectorsearch
+module Langchain::Vectorsearch
   class Pinecone < Base
     # Initialize the Pinecone client
     # @param environment [String] The environment to use

data/lib/{vectorsearch → langchain/vectorsearch}/qdrant.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Vectorsearch
+module Langchain::Vectorsearch
   class Qdrant < Base
     # Initialize the Qdrant client
     # @param url [String] The URL of the Qdrant server

data/lib/{vectorsearch → langchain/vectorsearch}/weaviate.rb RENAMED Viewed

@@ -1,6 +1,6 @@
 # frozen_string_literal: true
-module Vectorsearch
+module Langchain::Vectorsearch
   class Weaviate < Base
     # Initialize the Weaviate adapter
     # @param url [String] The URL of the Weaviate instance

data/lib/langchain.rb CHANGED Viewed

@@ -5,6 +5,7 @@ require "pathname"
 require_relative "./version"
 require_relative "./dependency_helper"
 module Langchain
   class << self
     attr_accessor :logger
@@ -19,6 +20,18 @@ module Langchain
   autoload :Loader, "langchain/loader"
   autoload :Data, "langchain/data"
+  module Agent
+    autoload :Base, "langchain/agent/base"
+    autoload :ChainOfThoughtAgent, "langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb"
+  end
+  module Tool
+    autoload :Base, "langchain/tool/base"
+    autoload :Calculator, "langchain/tool/calculator"
+    autoload :SerpApi, "langchain/tool/serp_api"
+    autoload :Wikipedia, "langchain/tool/wikipedia"
+  end
   module Processors
     autoload :Base, "langchain/processors/base"
     autoload :CSV, "langchain/processors/csv"
@@ -29,43 +42,35 @@ module Langchain
     autoload :PDF, "langchain/processors/pdf"
     autoload :Text, "langchain/processors/text"
   end
-end
-module Agent
-  autoload :Base, "agent/base"
-  autoload :ChainOfThoughtAgent, "agent/chain_of_thought_agent/chain_of_thought_agent.rb"
-end
-module Vectorsearch
-  autoload :Base, "vectorsearch/base"
-  autoload :Chroma, "vectorsearch/chroma"
-  autoload :Milvus, "vectorsearch/milvus"
-  autoload :Pinecone, "vectorsearch/pinecone"
-  autoload :Pgvector, "vectorsearch/pgvector"
-  autoload :Qdrant, "vectorsearch/qdrant"
-  autoload :Weaviate, "vectorsearch/weaviate"
-end
+  module Utils
+    autoload :TokenLengthValidator, "langchain/utils/token_length_validator"
+  end
-module LLM
-  autoload :Base, "llm/base"
-  autoload :Cohere, "llm/cohere"
-  autoload :GooglePalm, "llm/google_palm"
-  autoload :HuggingFace, "llm/hugging_face"
-  autoload :OpenAI, "llm/openai"
-  autoload :Replicate, "llm/replicate"
-end
+  module Vectorsearch
+    autoload :Base, "langchain/vectorsearch/base"
+    autoload :Chroma, "langchain/vectorsearch/chroma"
+    autoload :Milvus, "langchain/vectorsearch/milvus"
+    autoload :Pinecone, "langchain/vectorsearch/pinecone"
+    autoload :Pgvector, "langchain/vectorsearch/pgvector"
+    autoload :Qdrant, "langchain/vectorsearch/qdrant"
+    autoload :Weaviate, "langchain/vectorsearch/weaviate"
+  end
-module Prompt
-  require_relative "prompt/loading"
+  module LLM
+    autoload :Base, "langchain/llm/base"
+    autoload :Cohere, "langchain/llm/cohere"
+    autoload :GooglePalm, "langchain/llm/google_palm"
+    autoload :HuggingFace, "langchain/llm/hugging_face"
+    autoload :OpenAI, "langchain/llm/openai"
+    autoload :Replicate, "langchain/llm/replicate"
+  end
-  autoload :Base, "prompt/base"
-  autoload :PromptTemplate, "prompt/prompt_template"
-  autoload :FewShotPromptTemplate, "prompt/few_shot_prompt_template"
-end
+  module Prompt
+    require_relative "langchain/prompt/loading"
-module Tool
-  autoload :Base, "tool/base"
-  autoload :Calculator, "tool/calculator"
-  autoload :SerpApi, "tool/serp_api"
-  autoload :Wikipedia, "tool/wikipedia"
+    autoload :Base, "langchain/prompt/base"
+    autoload :PromptTemplate, "langchain/prompt/prompt_template"
+    autoload :FewShotPromptTemplate, "langchain/prompt/few_shot_prompt_template"
+  end
 end

data/lib/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.3.15"
+  VERSION = "0.4.0"
 end

metadata CHANGED Viewed

@@ -1,15 +1,29 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.3.15
+  version: 0.4.0
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-05-30 00:00:00.000000000 Z
+date: 2023-06-01 00:00:00.000000000 Z
 dependencies:
+- !ruby/object:Gem::Dependency
+  name: tiktoken_ruby
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.0.5
+  type: :runtime
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.0.5
 - !ruby/object:Gem::Dependency
   name: dotenv-rails
   requirement: !ruby/object:Gem::Requirement
@@ -311,12 +325,19 @@ files:
 - examples/store_and_query_with_pinecone.rb
 - examples/store_and_query_with_qdrant.rb
 - examples/store_and_query_with_weaviate.rb
-- lib/agent/base.rb
-- lib/agent/chain_of_thought_agent/chain_of_thought_agent.rb
-- lib/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
 - lib/dependency_helper.rb
 - lib/langchain.rb
+- lib/langchain/agent/base.rb
+- lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb
+- lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
 - lib/langchain/data.rb
+- lib/langchain/llm/base.rb
+- lib/langchain/llm/cohere.rb
+- lib/langchain/llm/google_palm.rb
+- lib/langchain/llm/hugging_face.rb
+- lib/langchain/llm/openai.rb
+- lib/langchain/llm/prompts/summarize_template.json
+- lib/langchain/llm/replicate.rb
 - lib/langchain/loader.rb
 - lib/langchain/processors/base.rb
 - lib/langchain/processors/csv.rb
@@ -326,29 +347,23 @@ files:
 - lib/langchain/processors/jsonl.rb
 - lib/langchain/processors/pdf.rb
 - lib/langchain/processors/text.rb
+- lib/langchain/prompt/base.rb
+- lib/langchain/prompt/few_shot_prompt_template.rb
+- lib/langchain/prompt/loading.rb
+- lib/langchain/prompt/prompt_template.rb
+- lib/langchain/tool/base.rb
+- lib/langchain/tool/calculator.rb
+- lib/langchain/tool/serp_api.rb
+- lib/langchain/tool/wikipedia.rb
+- lib/langchain/utils/token_length_validator.rb
+- lib/langchain/vectorsearch/base.rb
+- lib/langchain/vectorsearch/chroma.rb
+- lib/langchain/vectorsearch/milvus.rb
+- lib/langchain/vectorsearch/pgvector.rb
+- lib/langchain/vectorsearch/pinecone.rb
+- lib/langchain/vectorsearch/qdrant.rb
+- lib/langchain/vectorsearch/weaviate.rb
 - lib/langchainrb.rb
-- lib/llm/base.rb
-- lib/llm/cohere.rb
-- lib/llm/google_palm.rb
-- lib/llm/hugging_face.rb
-- lib/llm/openai.rb
-- lib/llm/prompts/summarize_template.json
-- lib/llm/replicate.rb
-- lib/prompt/base.rb
-- lib/prompt/few_shot_prompt_template.rb
-- lib/prompt/loading.rb
-- lib/prompt/prompt_template.rb
-- lib/tool/base.rb
-- lib/tool/calculator.rb
-- lib/tool/serp_api.rb
-- lib/tool/wikipedia.rb
-- lib/vectorsearch/base.rb
-- lib/vectorsearch/chroma.rb
-- lib/vectorsearch/milvus.rb
-- lib/vectorsearch/pgvector.rb
-- lib/vectorsearch/pinecone.rb
-- lib/vectorsearch/qdrant.rb
-- lib/vectorsearch/weaviate.rb
 - lib/version.rb
 - sig/langchain.rbs
 homepage: https://rubygems.org/gems/langchainrb
@@ -373,7 +388,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.3.7
+rubygems_version: 3.2.3
 signing_key:
 specification_version: 4
 summary: Build LLM-backed Ruby applications with Ruby's LangChain

/data/lib/{agent → langchain/agent}/chain_of_thought_agent/chain_of_thought_agent_prompt.json RENAMED Viewed

File without changes

/data/lib/{llm → langchain/llm}/prompts/summarize_template.json RENAMED Viewed

File without changes