RubyGems - langchainrb - Versions diffs - 0.4.2 → 0.5.0 - Mend

langchainrb 0.4.2 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/.env.example +2 -1
data/.rubocop.yml +11 -0
data/CHANGELOG.md +7 -0
data/Gemfile +2 -0
data/Gemfile.lock +8 -1
data/README.md +34 -6
data/examples/pdf_store_and_query_with_chroma.rb +1 -2
data/examples/store_and_query_with_pinecone.rb +1 -2
data/examples/store_and_query_with_qdrant.rb +1 -2
data/examples/store_and_query_with_weaviate.rb +1 -2
data/lefthook.yml +5 -0
data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb +6 -10
data/lib/langchain/agent/sql_query_agent/sql_query_agent.rb +78 -0
data/lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json +10 -0
data/lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json +10 -0
data/lib/langchain/llm/base.rb +0 -18
data/lib/langchain/llm/replicate.rb +1 -2
data/lib/langchain/processors/xlsx.rb +27 -0
data/lib/langchain/prompt/base.rb +6 -2
data/lib/langchain/prompt/loading.rb +5 -1
data/lib/langchain/tool/base.rb +2 -1
data/lib/langchain/tool/database.rb +45 -0
data/lib/langchain/vectorsearch/base.rb +7 -14
data/lib/langchain/vectorsearch/chroma.rb +6 -7
data/lib/langchain/vectorsearch/milvus.rb +4 -4
data/lib/langchain/vectorsearch/pgvector.rb +6 -7
data/lib/langchain/vectorsearch/pinecone.rb +6 -7
data/lib/langchain/vectorsearch/qdrant.rb +6 -7
data/lib/langchain/vectorsearch/weaviate.rb +6 -7
data/lib/langchain/version.rb +1 -1
data/lib/langchain.rb +3 -0
metadata +37 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: e6e84f50b6e12bd94f5fa8de956549537f5d34b8a901bc6af3fbc5d392fc2e0a
-  data.tar.gz: '08c01f481d64b0c35f7e86491d1115d975497c8561f50408516fad388f084c3e'
+  metadata.gz: b0a2fe8026e861c9d97465bce7da08a0b077492d6f7cf8fb42c45dbfdfe6749f
+  data.tar.gz: c04099c44a847bd9c05e8594859f92ca1f54d338c463ce59a375c2cb9731b1ad
 SHA512:
-  metadata.gz: 8a1d29180f3c0cf89307413bc99e22accc6875d458b3ae12ea72d30146cf5ff172fba7047fe00b385d324057638115254c8dcc6f01459a75f96dccb9a99a301b
-  data.tar.gz: 070a0b6836cdb7dd356c99186964f61926498d5f585910bbb30449d4b2a12d50797797dc34c6fe3eb6a4e64c156230d23606c2ac0cf131eeccc942c6231ab3c9
+  metadata.gz: dec375b2b7cae377cf31f3f8ed0a6ac9d79215c945e7c0da78ed1fbad3c502ecfcc5ce5318c55a9a634db88fea1f5fbbeed7a0f7dc6ab8096c909e0a3ff02154
+  data.tar.gz: 558a0f6ddf90ad044f9e2cc7c6ca678958472748d2e430a3cbb4308290898b9b22a204a94a5b5943f06137f7da5238d038a65b8c79d96f8a3705499d95cfb597

data/.env.example CHANGED Viewed

@@ -1,3 +1,4 @@
+AI21_API_KEY=
 CHROMA_URL=
 COHERE_API_KEY=
 HUGGING_FACE_API_KEY=
@@ -6,10 +7,10 @@ OPENAI_API_KEY=
 GOOGLE_PALM_API_KEY=
 PINECONE_API_KEY=
 PINECONE_ENVIRONMENT=
+POSTGRES_URL=
 REPLICATE_API_KEY=
 QDRANT_API_KEY=
 QDRANT_URL=
 SERPAPI_API_KEY=
 WEAVIATE_API_KEY=
 WEAVIATE_URL=
-POSTGRES_URL=

data/.rubocop.yml ADDED Viewed

@@ -0,0 +1,11 @@
+AllCops:
+  SuggestExtensions: false
+require:
+  - standard
+  - rubocop-performance
+inherit_gem:
+  standard: config/base.yml
+  standard-performance: config/base.yml
+  standard-custom: config/base.yml

data/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,12 @@
 ## [Unreleased]
+## [0.5.0] - 2023-06-05
+- [BREAKING] LLMs are now passed as objects to Vectorsearch classes instead of `llm: :name, llm_api_key:` previously
+- 📋 Prompts
+  - YAML prompt templates are now supported
+- 🚚 Loaders
+  - Introduce `Langchain::Processors::Xlsx` to parse .xlsx files
 ## [0.4.2] - 2023-06-03
 - 🗣️ LLMs
   - Introducing support for AI21

data/Gemfile CHANGED Viewed

@@ -10,3 +10,5 @@ gem "rake", "~> 13.0"
 gem "rspec", "~> 3.0"
 gem "standardrb"
+# Lets add rubocop explicitly here, we are using only standardrb rules in .rubocop.yml
+gem "rubocop"

data/Gemfile.lock CHANGED Viewed

@@ -1,7 +1,7 @@
 PATH
   remote: .
   specs:
-    langchainrb (0.4.2)
+    langchainrb (0.5.0)
       colorize (~> 0.8.1)
       tiktoken_ruby (~> 0.0.5)
@@ -219,6 +219,9 @@ GEM
       faraday-multipart
       faraday-retry
     rexml (3.2.5)
+    roo (2.10.0)
+      nokogiri (~> 1)
+      rubyzip (>= 1.3.0, < 3.0.0)
     rspec (3.12.0)
       rspec-core (~> 3.12.0)
       rspec-expectations (~> 3.12.0)
@@ -257,6 +260,7 @@ GEM
     rubyzip (2.3.2)
     safe_ruby (1.0.4)
       childprocess (>= 0.3.9)
+    sequel (5.68.0)
     standard (1.28.2)
       language_server-protocol (~> 3.17.0.2)
       lint_roller (~> 1.0)
@@ -321,9 +325,12 @@ DEPENDENCIES
   rake (~> 13.0)
   rdiscount
   replicate-ruby (~> 0.2.2)
+  roo (~> 2.10.0)
   rspec (~> 3.0)
+  rubocop
   ruby-openai (~> 4.0.0)
   safe_ruby (~> 1.0.4)
+  sequel (~> 5.68.0)
   standardrb
   weaviate-ruby (~> 0.8.0)
   wikipedia-client (~> 1.17.0)

data/README.md CHANGED Viewed

@@ -47,8 +47,7 @@ Pick the vector search database you'll be using and instantiate the client:
 client = Langchain::Vectorsearch::Weaviate.new(
     url: ENV["WEAVIATE_URL"],
     api_key: ENV["WEAVIATE_API_KEY"],
-    llm: :openai, # or :cohere
-    llm_api_key: ENV["OPENAI_API_KEY"]
+    llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
 )
 # You can instantiate any other supported vector search database:
@@ -151,6 +150,12 @@ Add `"google_palm_api", "~> 0.1.0"` to your Gemfile.
 google_palm = Langchain::LLM::GooglePalm.new(api_key: ENV["GOOGLE_PALM_API_KEY"])
 ```
+#### AI21
+Add `gem "ai21", "~> 0.2.0"` to your Gemfile.
+```ruby
+ai21 = Langchain::LLM::AI21.new(api_key: ENV["AI21_API_KEY"])
+```
 ### Using Prompts 📋
 #### Prompt Templates
@@ -172,9 +177,9 @@ prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke a
 Creating a PromptTemplate using just a prompt and no input_variables:
 ```ruby
-prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a {adjective} joke about {content}.")
-prompt.input_variables # ["adjective", "content"]
-prompt.format(adjective: "funny", content: "chickens") # "Tell me a funny joke about chickens."
+prompt = Langchain::Prompt::PromptTemplate.from_template("Tell me a funny joke about chickens.")
+prompt.input_variables # []
+prompt.format # "Tell me a funny joke about chickens."
 ```
 Save prompt template to JSON file:
@@ -236,6 +241,13 @@ prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/few_s
 prompt.prefix # "Write antonyms for the following words."
 ```
+Loading a new prompt template using a YAML file:
+```ruby
+prompt = Langchain::Prompt.load_from_path(file_path: "spec/fixtures/prompt/prompt_template.yaml")
+prompt.input_variables #=> ["adjective", "content"]
+```
 ### Using Agents 🤖
 Agents are semi-autonomous bots that can respond to user questions and use available to them Tools to provide informed replies. They break down problems into series of steps and define Actions (and Action Inputs) along the way that are executed and fed back to them as additional information. Once an Agent decides that it has the Final Answer it responds with it.
@@ -244,7 +256,7 @@ Agents are semi-autonomous bots that can respond to user questions and use avail
 Add `gem "ruby-openai"`, `gem "eqn"`, and `gem "google_search_results"` to your Gemfile
 ```ruby
-agent = Langchain::Agent::ChainOfThoughtAgent.new(llm: :openai, llm_api_key: ENV["OPENAI_API_KEY"], tools: ['search', 'calculator'])
+agent = Langchain::Agent::ChainOfThoughtAgent.new(llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]), tools: ['search', 'calculator'])
 agent.tools
 # => ["search", "calculator"]
@@ -254,6 +266,19 @@ agent.run(question: "How many full soccer fields would be needed to cover the di
 #=> "Approximately 2,945 soccer fields would be needed to cover the distance between NYC and DC in a straight line."
 ```
+#### SQL-Query Agent
+Add `gem "sequel"` to your Gemfile
+```ruby
+agent = Langchain::Agent::SQLQueryAgent.new(llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"]), db_connection_string: "postgres://user:password@localhost:5432/db_name")
+```
+```ruby
+agent.ask(question: "How many users have a name with length greater than 5 in the users table?")
+#=> "14 users have a name with length greater than 5 in the users table."
+```
 #### Demo
 ![May-12-2023 13-09-13](https://github.com/andreibondarev/langchainrb/assets/541665/6bad4cd9-976c-420f-9cf9-b85bf84f7eaf)
@@ -264,6 +289,7 @@ agent.run(question: "How many full soccer fields would be needed to cover the di
 | Name         | Description                                        | ENV Requirements                                              | Gem Requirements                          |
 | ------------ | :------------------------------------------------: | :-----------------------------------------------------------: | :---------------------------------------: |
 | "calculator" | Useful for getting the result of a math expression |                                                               | `gem "eqn", "~> 1.6.5"`                   |
+| "database"   | Useful for querying a SQL database |                                                               | `gem "sequel", "~> 5.68.0"`                   |
 | "ruby_code_interpreter" | Interprets Ruby expressions             |                                                               | `gem "safe_ruby", "~> 1.0.4"`             |
 | "search"     | A wrapper around Google Search                     | `ENV["SERPAPI_API_KEY"]` (https://serpapi.com/manage-api-key) | `gem "google_search_results", "~> 2.0.0"` |
 | "wikipedia"  | Calls Wikipedia API to retrieve the summary        |                                                               | `gem "wikipedia-client", "~> 1.17.0"`     |
@@ -298,6 +324,7 @@ Langchain::Loader.load('https://www.example.com/file.pdf')
 | JSON   | Langchain::Processors::JSON  |                              |
 | JSONL  | Langchain::Processors::JSONL |                              |
 | csv    | Langchain::Processors::CSV   |                              |
+| xlsx   | Langchain::Processors::Xlsx  |   `gem "roo", "~> 2.10.0"`   |
 ## Examples
 Additional examples available: [/examples](https://github.com/andreibondarev/langchainrb/tree/main/examples)
@@ -317,6 +344,7 @@ Langchain.logger.level = :info
 2. `cp .env.example .env`, then fill out the environment variables in `.env`
 3. `bundle exec rake` to ensure that the tests pass and to run standardrb
 4. `bin/console` to load the gem in a REPL session. Feel free to add your own instances of LLMs, Tools, Agents, etc. and experiment with them.
+5. Optionally, install lefthook git hooks for pre-commit to auto lint: `gem install lefthook && lefthook install -f`
 ## Community
 Join us in the [Ruby AI Builders](https://discord.gg/SBmjAnKT) Discord community in #langchainrb

data/examples/pdf_store_and_query_with_chroma.rb CHANGED Viewed

@@ -7,8 +7,7 @@ require "langchain"
 chroma = Vectorsearch::Chroma.new(
   url: ENV["CHROMA_URL"],
   index_name: "documents",
-  llm: :openai,
-  llm_api_key: ENV["OPENAI_API_KEY"]
+  llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
 )
 # Create the default schema.

data/examples/store_and_query_with_pinecone.rb CHANGED Viewed

@@ -8,8 +8,7 @@ pinecone = Vectorsearch::Pinecone.new(
   environment: ENV["PINECONE_ENVIRONMENT"],
   api_key: ENV["PINECONE_API_KEY"],
   index_name: "recipes",
-  llm: :openai,
-  llm_api_key: ENV["OPENAI_API_KEY"]
+  llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
 )
 # Create the default schema.

data/examples/store_and_query_with_qdrant.rb CHANGED Viewed

@@ -8,8 +8,7 @@ qdrant = Vectorsearch::Qdrant.new(
   url: ENV["QDRANT_URL"],
   api_key: ENV["QDRANT_API_KEY"],
   index_name: "recipes",
-  llm: :cohere,
-  llm_api_key: ENV["COHERE_API_KEY"]
+  llm: Langchain::LLM::Cohere.new(api_key: ENV["COHERE_API_KEY"])
 )
 # Create the default schema.

data/examples/store_and_query_with_weaviate.rb CHANGED Viewed

@@ -8,8 +8,7 @@ weaviate = Vectorsearch::Weaviate.new(
   url: ENV["WEAVIATE_URL"],
   api_key: ENV["WEAVIATE_API_KEY"],
   index_name: "Recipes",
-  llm: :openai,
-  llm_api_key: ENV["OPENAI_API_KEY"]
+  llm: Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
 )
 # Create the default schema. A text field `content` will be used.

data/lefthook.yml ADDED Viewed

@@ -0,0 +1,5 @@
+pre-commit:
+  commands:
+    lint:
+      run: standardrb --fix
+      stage_fixed: true

data/lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb CHANGED Viewed

@@ -2,23 +2,19 @@
 module Langchain::Agent
   class ChainOfThoughtAgent < Base
-    attr_reader :llm, :llm_api_key, :llm_client, :tools
+    attr_reader :llm, :tools
     # Initializes the Agent
     #
-    # @param llm [Symbol] The LLM to use
-    # @param llm_api_key [String] The API key for the LLM
+    # @param llm [Object] The LLM client to use
     # @param tools [Array] The tools to use
     # @return [ChainOfThoughtAgent] The Agent::ChainOfThoughtAgent instance
-    def initialize(llm:, llm_api_key:, tools: [])
-      Langchain::LLM::Base.validate_llm!(llm: llm)
+    def initialize(llm:, tools: [])
       Langchain::Tool::Base.validate_tools!(tools: tools)
-      @llm = llm
-      @llm_api_key = llm_api_key
       @tools = tools
-      @llm_client = Langchain::LLM.const_get(Langchain::LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
+      @llm = llm
     end
     # Validate tools when they're re-assigned
@@ -42,8 +38,8 @@ module Langchain::Agent
       )
       loop do
-        Langchain.logger.info("[#{self.class.name}]".red + ": Sending the prompt to the #{llm} LLM")
-        response = llm_client.complete(
+        Langchain.logger.info("[#{self.class.name}]".red + ": Sending the prompt to the #{llm.class} LLM")
+        response = llm.complete(
           prompt: prompt,
           stop_sequences: ["Observation:"],
           max_tokens: 500

data/lib/langchain/agent/sql_query_agent/sql_query_agent.rb ADDED Viewed

@@ -0,0 +1,78 @@
+# frozen_string_literal: true
+module Langchain::Agent
+  class SQLQueryAgent < Base
+    attr_reader :llm, :db, :schema
+    # Initializes the Agent
+    #
+    # @param llm [Object] The LLM client to use
+    # @param db_connection_string [String] Database connection info
+    def initialize(llm:, db_connection_string:)
+      @llm = llm
+      @db = Langchain::Tool::Database.new(db_connection_string)
+      @schema = @db.schema
+    end
+    # Ask a question and get an answer
+    #
+    # @param question [String] Question to ask the LLM/Database
+    # @return [String] Answer to the question
+    def ask(question:)
+      prompt = create_prompt_for_sql(question: question)
+      # Get the SQL string to execute
+      Langchain.logger.info("[#{self.class.name}]".red + ":  Passing the inital prompt to the #{llm.class} LLM")
+      sql_string = llm.complete(prompt: prompt, max_tokens: 500)
+      # Execute the SQL string and collect the results
+      Langchain.logger.info("[#{self.class.name}]".red + ":  Passing the SQL to the Database: #{sql_string}")
+      results = db.execute(input: sql_string)
+      # Pass the results and get the LLM to synthesize the answer to the question
+      Langchain.logger.info("[#{self.class.name}]".red + ":  Passing the synthesize prompt to the #{llm.class} LLM with results: #{results}")
+      prompt2 = create_prompt_for_answer(question: question, sql_query: sql_string, results: results)
+      llm.complete(prompt: prompt2, max_tokens: 500)
+    end
+    private
+    # Create the initial prompt to pass to the LLM
+    # @param question[String] Question to ask
+    # @return [String] Prompt
+    def create_prompt_for_sql(question:)
+      prompt_template_sql.format(
+        dialect: "standard SQL",
+        schema: schema,
+        question: question
+      )
+    end
+    # Load the PromptTemplate from the JSON file
+    # @return [PromptTemplate] PromptTemplate instance
+    def prompt_template_sql
+      Langchain::Prompt.load_from_path(
+        file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json")
+      )
+    end
+    # Create the second prompt to pass to the LLM
+    # @param question [String] Question to ask
+    # @return [String] Prompt
+    def create_prompt_for_answer(question:, sql_query:, results:)
+      prompt_template_answer.format(
+        question: question,
+        sql_query: sql_query,
+        results: results
+      )
+    end
+    # Load the PromptTemplate from the JSON file
+    # @return [PromptTemplate] PromptTemplate instance
+    def prompt_template_answer
+      Langchain::Prompt.load_from_path(
+        file_path: Langchain.root.join("langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json")
+      )
+    end
+  end
+end

data/lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json ADDED Viewed

@@ -0,0 +1,10 @@
+{
+  "_type": "prompt",
+  "template":
+    "Given an input question and results of a SQL query, look at the results and return the answer. Use the following format:\nQuestion: {question}\nThe SQL query: {sql_query}\nResult of the SQLQuery: {results}\nFinal answer: Final answer here",
+  "input_variables": [
+    "question",
+    "sql_query",
+    "results"
+  ]
+}

data/lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json ADDED Viewed

@@ -0,0 +1,10 @@
+{
+  "_type": "prompt",
+   "template":
+    "Given an input question, create a syntactically correct {dialect} query to run, then return the query in valid SQL.\nNever query for all the columns from a specific table, only ask for a the few relevant columns given the question.\nPay attention to use only the column names that you can see in the schema description. Be careful to not query for columns that do not exist. Pay attention to which column is in which table. Also, qualify column names with the table name when needed.\nOnly use the tables listed below.\n{schema}\nUse the following format:\nQuestion: {question}\nSQLQuery:",
+  "input_variables": [
+    "dialect",
+    "schema",
+    "question"
+  ]
+}

data/lib/langchain/llm/base.rb CHANGED Viewed

@@ -6,15 +6,6 @@ module Langchain::LLM
     attr_reader :client
-    # Currently supported LLMs
-    LLMS = {
-      cohere: "Cohere",
-      google_palm: "GooglePalm",
-      huggingface: "HuggingFace",
-      openai: "OpenAI",
-      replicate: "Replicate"
-    }.freeze
     def default_dimension
       self.class.const_get(:DEFAULTS).dig(:dimension)
     end
@@ -38,14 +29,5 @@ module Langchain::LLM
     def summarize(...)
       raise NotImplementedError, "#{self.class.name} does not support summarization"
     end
-    # Ensure that the LLM value passed in is supported
-    # @param llm [Symbol] The LLM to use
-    def self.validate_llm!(llm:)
-      # TODO: Fix so this works when `llm` value is a string instead of a symbol
-      unless Langchain::LLM::Base::LLMS.key?(llm)
-        raise ArgumentError, "LLM must be one of #{Langchain::LLM::Base::LLMS.keys}"
-      end
-    end
   end
 end

data/lib/langchain/llm/replicate.rb CHANGED Viewed

@@ -14,8 +14,7 @@ module Langchain::LLM
     # chroma = Vectorsearch::Chroma.new(
     #   url: ENV["CHROMA_URL"],
     #   index_name: "...",
-    #   llm: :replicate,
-    #   llm_api_key: ENV["REPLICATE_API_KEY"],
+    #   llm: Langchain::LLM::Replicate(api_key: ENV["REPLICATE_API_KEY"])
     # )
     DEFAULTS = {

data/lib/langchain/processors/xlsx.rb ADDED Viewed

@@ -0,0 +1,27 @@
+# frozen_string_literal: true
+module Langchain
+  module Processors
+    class Xlsx < Base
+      EXTENSIONS = [".xlsx", ".xlsm"].freeze
+      CONTENT_TYPES = ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"].freeze
+      def initialize(*)
+        depends_on "roo"
+        require "roo"
+      end
+      # Parse the document and return the text
+      # @param [File] data
+      # @return [Array<Array<String>>] Array of rows, each row is an array of cells
+      def parse(data)
+        xlsx_file = Roo::Spreadsheet.open(data)
+        xlsx_file.each_with_pagename.flat_map do |_, sheet|
+          sheet.map do |row|
+            row.map { |i| i.to_s.strip }
+          end
+        end
+      end
+    end
+  end
+end

data/lib/langchain/prompt/base.rb CHANGED Viewed

@@ -2,6 +2,7 @@
 require "strscan"
 require "json"
+require "yaml"
 module Langchain::Prompt
   class Base
@@ -52,10 +53,13 @@ module Langchain::Prompt
       directory_path = save_path.dirname
       FileUtils.mkdir_p(directory_path) unless directory_path.directory?
-      if save_path.extname == ".json"
+      case save_path.extname
+      when ".json"
         File.write(file_path, to_h.to_json)
+      when ".yaml", ".yml"
+        File.write(file_path, to_h.to_yaml)
       else
-        raise ArgumentError, "#{file_path} must be json"
+        raise ArgumentError, "#{file_path} must be json or yaml file"
       end
     end

data/lib/langchain/prompt/loading.rb CHANGED Viewed

@@ -3,6 +3,7 @@
 require "strscan"
 require "pathname"
 require "json"
+require "yaml"
 module Langchain::Prompt
   TYPE_TO_LOADER = {
@@ -23,8 +24,11 @@ module Langchain::Prompt
     def load_from_path(file_path:)
       file_path = file_path.is_a?(String) ? Pathname.new(file_path) : file_path
-      if file_path.extname == ".json"
+      case file_path.extname
+      when ".json"
         config = JSON.parse(File.read(file_path))
+      when ".yaml", ".yml"
+        config = YAML.safe_load(File.read(file_path))
       else
         raise ArgumentError, "Got unsupported file type #{file_path.extname}"
       end

data/lib/langchain/tool/base.rb CHANGED Viewed

@@ -14,7 +14,8 @@ module Langchain::Tool
     TOOLS = {
       "calculator" => "Langchain::Tool::Calculator",
       "search" => "Langchain::Tool::SerpApi",
-      "wikipedia" => "Langchain::Tool::Wikipedia"
+      "wikipedia" => "Langchain::Tool::Wikipedia",
+      "database" => "Langchain::Tool::Database"
     }
     def self.description(value)

data/lib/langchain/tool/database.rb ADDED Viewed

@@ -0,0 +1,45 @@
+module Langchain::Tool
+  class Database < Base
+    #
+    # Connects to a database, executes SQL queries, and outputs DB schema for Agents to use
+    #
+    # Gem requirements: gem "sequel", "~> 5.68.0"
+    #
+    description <<~DESC
+      Useful for getting the result of a database query.
+      The input to this tool should be valid SQL.
+    DESC
+    # Establish a database connection
+    # @param db_connection_string [String] Database connection info, e.g. 'postgres://user:password@localhost:5432/db_name'
+    def initialize(db_connection_string)
+      depends_on "sequel"
+      require "sequel"
+      require "sequel/extensions/schema_dumper"
+      raise StandardError, "db_connection_string parameter cannot be blank" if db_connection_string.empty?
+      @db = Sequel.connect(db_connection_string)
+      @db.extension :schema_dumper
+    end
+    def schema
+      Langchain.logger.info("[#{self.class.name}]".light_blue + ": Dumping schema")
+      @db.dump_schema_migration(same_db: true, indexes: false) unless @db.adapter_scheme == :mock
+    end
+    # Evaluates a sql expression
+    # @param input [String] sql expression
+    # @return [Array] results
+    def execute(input:)
+      Langchain.logger.info("[#{self.class.name}]".light_blue + ": Executing \"#{input}\"")
+      begin
+        @db[input].to_a
+      rescue Sequel::DatabaseError => e
+        Langchain.logger.error("[#{self.class.name}]".light_red + ": #{e.message}")
+      end
+    end
+  end
+end

data/lib/langchain/vectorsearch/base.rb CHANGED Viewed

@@ -7,19 +7,13 @@ module Langchain::Vectorsearch
     include Langchain::DependencyHelper
     extend Forwardable
-    attr_reader :client, :index_name, :llm, :llm_api_key, :llm_client
+    attr_reader :client, :index_name, :llm
     DEFAULT_METRIC = "cosine"
-    # @param llm [Symbol] The LLM to use
-    # @param llm_api_key [String] The API key for the LLM
-    def initialize(llm:, llm_api_key:)
-      Langchain::LLM::Base.validate_llm!(llm: llm)
+    # @param llm [Object] The LLM client to use
+    def initialize(llm:)
       @llm = llm
-      @llm_api_key = llm_api_key
-      @llm_client = Langchain::LLM.const_get(Langchain::LLM::Base::LLMS.fetch(llm)).new(api_key: llm_api_key)
     end
     # Method supported by Vectorsearch DB to create a default schema
@@ -48,7 +42,7 @@ module Langchain::Vectorsearch
       raise NotImplementedError, "#{self.class.name} does not support asking questions"
     end
-    def_delegators :llm_client,
+    def_delegators :llm,
       :default_dimension
     def generate_prompt(question:, context:)
@@ -69,11 +63,10 @@ module Langchain::Vectorsearch
       prompt_template.format(question: question)
     end
-    def add_data(path: nil, paths: nil)
-      raise ArgumentError, "Either path or paths must be provided" if path.nil? && paths.nil?
-      raise ArgumentError, "Either path or paths must be provided, not both" if !path.nil? && !paths.nil?
+    def add_data(paths:)
+      raise ArgumentError, "Paths must be provided" if paths.to_a.empty?
-      texts = Array(path || paths)
+      texts = Array(paths)
         .flatten
         .map { |path| Langchain::Loader.new(path)&.load&.value }
         .compact

data/lib/langchain/vectorsearch/chroma.rb CHANGED Viewed

@@ -15,9 +15,8 @@ module Langchain::Vectorsearch
     # @param url [String] The URL of the Qdrant server
     # @param api_key [String] The API key to use
     # @param index_name [String] The name of the index to use
-    # @param llm [Symbol] The LLM to use
-    # @param llm_api_key [String] The API key for the LLM
-    def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
+    # @param llm [Object] The LLM client to use
+    def initialize(url:, index_name:, llm:, api_key: nil)
       depends_on "chroma-db"
       require "chroma-db"
@@ -27,7 +26,7 @@ module Langchain::Vectorsearch
       @index_name = index_name
-      super(llm: llm, llm_api_key: llm_api_key)
+      super(llm: llm)
     end
     # Add a list of texts to the index
@@ -38,7 +37,7 @@ module Langchain::Vectorsearch
         ::Chroma::Resources::Embedding.new(
           # TODO: Add support for passing your own IDs
           id: SecureRandom.uuid,
-          embedding: llm_client.embed(text: text),
+          embedding: llm.embed(text: text),
           # TODO: Add support for passing metadata
           metadata: [], # metadatas[index],
           document: text # Do we actually need to store the whole original document?
@@ -63,7 +62,7 @@ module Langchain::Vectorsearch
       query:,
       k: 4
     )
-      embedding = llm_client.embed(text: query)
+      embedding = llm.embed(text: query)
       similarity_search_by_vector(
         embedding: embedding,
@@ -101,7 +100,7 @@ module Langchain::Vectorsearch
       prompt = generate_prompt(question: question, context: context)
-      llm_client.chat(prompt: prompt)
+      llm.chat(prompt: prompt)
     end
     private

data/lib/langchain/vectorsearch/milvus.rb CHANGED Viewed

@@ -11,14 +11,14 @@ module Langchain::Vectorsearch
     # milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, llm_api_key:)
     #
-    def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
+    def initialize(url:, index_name:, llm:, api_key: nil)
       depends_on "milvus"
       require "milvus"
       @client = ::Milvus::Client.new(url: url)
       @index_name = index_name
-      super(llm: llm, llm_api_key: llm_api_key)
+      super(llm: llm)
     end
     def add_texts(texts:)
@@ -33,7 +33,7 @@ module Langchain::Vectorsearch
           }, {
             field_name: "vectors",
             type: ::Milvus::DATA_TYPES["binary_vector"],
-            field: Array(texts).map { |text| llm_client.embed(text: text) }
+            field: Array(texts).map { |text| llm.embed(text: text) }
           }
         ]
       )
@@ -78,7 +78,7 @@ module Langchain::Vectorsearch
     end
     def similarity_search(query:, k: 4)
-      embedding = llm_client.embed(text: query)
+      embedding = llm.embed(text: query)
       similarity_search_by_vector(
         embedding: embedding,

data/lib/langchain/vectorsearch/pgvector.rb CHANGED Viewed

@@ -22,10 +22,9 @@ module Langchain::Vectorsearch
     # @param url [String] The URL of the PostgreSQL database
     # @param index_name [String] The name of the table to use for the index
-    # @param llm [String] The URL of the Language Layer API
-    # @param llm_api_key [String] The API key for the Language Layer API
+    # @param llm [Object] The LLM client to use
     # @param api_key [String] The API key for the Vectorsearch DB (not used for PostgreSQL)
-    def initialize(url:, index_name:, llm:, llm_api_key:, api_key: nil)
+    def initialize(url:, index_name:, llm:, api_key: nil)
       require "pg"
       require "pgvector"
@@ -38,7 +37,7 @@ module Langchain::Vectorsearch
       @quoted_table_name = @client.quote_ident(index_name)
       @operator = OPERATORS[DEFAULT_OPERATOR]
-      super(llm: llm, llm_api_key: llm_api_key)
+      super(llm: llm)
     end
     # Add a list of texts to the index
@@ -46,7 +45,7 @@ module Langchain::Vectorsearch
     # @return [PG::Result] The response from the database
     def add_texts(texts:)
       data = texts.flat_map do |text|
-        [text, llm_client.embed(text: text)]
+        [text, llm.embed(text: text)]
       end
       values = texts.length.times.map { |i| "($#{2 * i + 1}, $#{2 * i + 2})" }.join(",")
       client.exec_params(
@@ -75,7 +74,7 @@ module Langchain::Vectorsearch
     # @param k [Integer] The number of top results to return
     # @return [Array<Hash>] The results of the search
     def similarity_search(query:, k: 4)
-      embedding = llm_client.embed(text: query)
+      embedding = llm.embed(text: query)
       similarity_search_by_vector(
         embedding: embedding,
@@ -113,7 +112,7 @@ module Langchain::Vectorsearch
       prompt = generate_prompt(question: question, context: context)
-      llm_client.chat(prompt: prompt)
+      llm.chat(prompt: prompt)
     end
   end
 end

data/lib/langchain/vectorsearch/pinecone.rb CHANGED Viewed

@@ -15,9 +15,8 @@ module Langchain::Vectorsearch
     # @param environment [String] The environment to use
     # @param api_key [String] The API key to use
     # @param index_name [String] The name of the index to use
-    # @param llm [Symbol] The LLM to use
-    # @param llm_api_key [String] The API key for the LLM
-    def initialize(environment:, api_key:, index_name:, llm:, llm_api_key:)
+    # @param llm [Object] The LLM client to use
+    def initialize(environment:, api_key:, index_name:, llm:)
       depends_on "pinecone"
       require "pinecone"
@@ -29,7 +28,7 @@ module Langchain::Vectorsearch
       @client = ::Pinecone::Client.new
       @index_name = index_name
-      super(llm: llm, llm_api_key: llm_api_key)
+      super(llm: llm)
     end
     # Add a list of texts to the index
@@ -43,7 +42,7 @@ module Langchain::Vectorsearch
           # TODO: Allows passing in your own IDs
           id: SecureRandom.uuid,
           metadata: metadata || {content: text},
-          values: llm_client.embed(text: text)
+          values: llm.embed(text: text)
         }
       end
@@ -74,7 +73,7 @@ module Langchain::Vectorsearch
       namespace: "",
       filter: nil
     )
-      embedding = llm_client.embed(text: query)
+      embedding = llm.embed(text: query)
       similarity_search_by_vector(
         embedding: embedding,
@@ -121,7 +120,7 @@ module Langchain::Vectorsearch
       prompt = generate_prompt(question: question, context: context)
-      llm_client.chat(prompt: prompt)
+      llm.chat(prompt: prompt)
     end
   end
 end

data/lib/langchain/vectorsearch/qdrant.rb CHANGED Viewed

@@ -15,9 +15,8 @@ module Langchain::Vectorsearch
     # @param url [String] The URL of the Qdrant server
     # @param api_key [String] The API key to use
     # @param index_name [String] The name of the index to use
-    # @param llm [Symbol] The LLM to use
-    # @param llm_api_key [String] The API key for the LLM
-    def initialize(url:, api_key:, index_name:, llm:, llm_api_key:)
+    # @param llm [Object] The LLM client to use
+    def initialize(url:, api_key:, index_name:, llm:)
       depends_on "qdrant-ruby"
       require "qdrant"
@@ -27,7 +26,7 @@ module Langchain::Vectorsearch
       )
       @index_name = index_name
-      super(llm: llm, llm_api_key: llm_api_key)
+      super(llm: llm)
     end
     # Add a list of texts to the index
@@ -38,7 +37,7 @@ module Langchain::Vectorsearch
       Array(texts).each do |text|
         batch[:ids].push(SecureRandom.uuid)
-        batch[:vectors].push(llm_client.embed(text: text))
+        batch[:vectors].push(llm.embed(text: text))
         batch[:payloads].push({content: text})
       end
@@ -68,7 +67,7 @@ module Langchain::Vectorsearch
       query:,
       k: 4
     )
-      embedding = llm_client.embed(text: query)
+      embedding = llm.embed(text: query)
       similarity_search_by_vector(
         embedding: embedding,
@@ -105,7 +104,7 @@ module Langchain::Vectorsearch
       prompt = generate_prompt(question: question, context: context)
-      llm_client.chat(prompt: prompt)
+      llm.chat(prompt: prompt)
     end
   end
 end

data/lib/langchain/vectorsearch/weaviate.rb CHANGED Viewed

@@ -15,9 +15,8 @@ module Langchain::Vectorsearch
     # @param url [String] The URL of the Weaviate instance
     # @param api_key [String] The API key to use
     # @param index_name [String] The name of the index to use
-    # @param llm [Symbol] The LLM to use
-    # @param llm_api_key [String] The API key for the LLM
-    def initialize(url:, api_key:, index_name:, llm:, llm_api_key:)
+    # @param llm [Object] The LLM client to use
+    def initialize(url:, api_key:, index_name:, llm:)
       depends_on "weaviate-ruby"
       require "weaviate"
@@ -27,7 +26,7 @@ module Langchain::Vectorsearch
       )
       @index_name = index_name
-      super(llm: llm, llm_api_key: llm_api_key)
+      super(llm: llm)
     end
     # Add a list of texts to the index
@@ -38,7 +37,7 @@ module Langchain::Vectorsearch
         {
           class: index_name,
           properties: {content: text},
-          vector: llm_client.embed(text: text)
+          vector: llm.embed(text: text)
         }
       end
@@ -67,7 +66,7 @@ module Langchain::Vectorsearch
     # @param k [Integer|String] The number of results to return
     # @return [Hash] The search results
     def similarity_search(query:, k: 4)
-      embedding = llm_client.embed(text: query)
+      embedding = llm.embed(text: query)
       similarity_search_by_vector(embedding: embedding, k: k)
     end
@@ -100,7 +99,7 @@ module Langchain::Vectorsearch
       prompt = generate_prompt(question: question, context: context)
-      llm_client.chat(prompt: prompt)
+      llm.chat(prompt: prompt)
     end
   end
 end

data/lib/langchain/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Langchain
-  VERSION = "0.4.2"
+  VERSION = "0.5.0"
 end

data/lib/langchain.rb CHANGED Viewed

@@ -24,6 +24,7 @@ module Langchain
   module Agent
     autoload :Base, "langchain/agent/base"
     autoload :ChainOfThoughtAgent, "langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb"
+    autoload :SQLQueryAgent, "langchain/agent/sql_query_agent/sql_query_agent.rb"
   end
   module Tool
@@ -32,6 +33,7 @@ module Langchain
     autoload :RubyCodeInterpreter, "langchain/tool/ruby_code_interpreter"
     autoload :SerpApi, "langchain/tool/serp_api"
     autoload :Wikipedia, "langchain/tool/wikipedia"
+    autoload :Database, "langchain/tool/database"
   end
   module Processors
@@ -43,6 +45,7 @@ module Langchain
     autoload :JSONL, "langchain/processors/jsonl"
     autoload :PDF, "langchain/processors/pdf"
     autoload :Text, "langchain/processors/text"
+    autoload :Xlsx, "langchain/processors/xlsx"
   end
   module Utils

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: langchainrb
 version: !ruby/object:Gem::Version
-  version: 0.4.2
+  version: 0.5.0
 platform: ruby
 authors:
 - Andrei Bondarev
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2023-06-03 00:00:00.000000000 Z
+date: 2023-06-05 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: tiktoken_ruby
@@ -318,6 +318,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 0.9.0
+- !ruby/object:Gem::Dependency
+  name: roo
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 2.10.0
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 2.10.0
 - !ruby/object:Gem::Dependency
   name: ruby-openai
   requirement: !ruby/object:Gem::Requirement
@@ -346,6 +360,20 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: 1.0.4
+- !ruby/object:Gem::Dependency
+  name: sequel
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 5.68.0
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 5.68.0
 - !ruby/object:Gem::Dependency
   name: weaviate-ruby
   requirement: !ruby/object:Gem::Requirement
@@ -383,6 +411,7 @@ extra_rdoc_files: []
 files:
 - ".env.example"
 - ".rspec"
+- ".rubocop.yml"
 - CHANGELOG.md
 - Gemfile
 - Gemfile.lock
@@ -395,10 +424,14 @@ files:
 - examples/store_and_query_with_pinecone.rb
 - examples/store_and_query_with_qdrant.rb
 - examples/store_and_query_with_weaviate.rb
+- lefthook.yml
 - lib/langchain.rb
 - lib/langchain/agent/base.rb
 - lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent.rb
 - lib/langchain/agent/chain_of_thought_agent/chain_of_thought_agent_prompt.json
+- lib/langchain/agent/sql_query_agent/sql_query_agent.rb
+- lib/langchain/agent/sql_query_agent/sql_query_agent_answer_prompt.json
+- lib/langchain/agent/sql_query_agent/sql_query_agent_sql_prompt.json
 - lib/langchain/data.rb
 - lib/langchain/dependency_helper.rb
 - lib/langchain/llm/ai21.rb
@@ -418,12 +451,14 @@ files:
 - lib/langchain/processors/jsonl.rb
 - lib/langchain/processors/pdf.rb
 - lib/langchain/processors/text.rb
+- lib/langchain/processors/xlsx.rb
 - lib/langchain/prompt/base.rb
 - lib/langchain/prompt/few_shot_prompt_template.rb
 - lib/langchain/prompt/loading.rb
 - lib/langchain/prompt/prompt_template.rb
 - lib/langchain/tool/base.rb
 - lib/langchain/tool/calculator.rb
+- lib/langchain/tool/database.rb
 - lib/langchain/tool/ruby_code_interpreter.rb
 - lib/langchain/tool/serp_api.rb
 - lib/langchain/tool/wikipedia.rb